Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
Warning:line 4093, column 7
Value stored to 'NumLeftover' is never read

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name LegalizerHelper.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
1//===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This file implements the LegalizerHelper class to legalize
10/// individual instructions and the LegalizeMachineIR wrapper pass for the
11/// primary legalization.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
16#include "llvm/CodeGen/GlobalISel/CallLowering.h"
17#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
18#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
19#include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h"
20#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21#include "llvm/CodeGen/GlobalISel/Utils.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/TargetFrameLowering.h"
24#include "llvm/CodeGen/TargetInstrInfo.h"
25#include "llvm/CodeGen/TargetLowering.h"
26#include "llvm/CodeGen/TargetOpcodes.h"
27#include "llvm/CodeGen/TargetSubtargetInfo.h"
28#include "llvm/IR/Instructions.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/MathExtras.h"
31#include "llvm/Support/raw_ostream.h"
32
33#define DEBUG_TYPE"legalizer" "legalizer"
34
35using namespace llvm;
36using namespace LegalizeActions;
37using namespace MIPatternMatch;
38
39/// Try to break down \p OrigTy into \p NarrowTy sized pieces.
40///
41/// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
42/// with any leftover piece as type \p LeftoverTy
43///
44/// Returns -1 in the first element of the pair if the breakdown is not
45/// satisfiable.
46static std::pair<int, int>
47getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
48 assert(!LeftoverTy.isValid() && "this is an out argument")((void)0);
49
50 unsigned Size = OrigTy.getSizeInBits();
51 unsigned NarrowSize = NarrowTy.getSizeInBits();
52 unsigned NumParts = Size / NarrowSize;
53 unsigned LeftoverSize = Size - NumParts * NarrowSize;
54 assert(Size > NarrowSize)((void)0);
55
56 if (LeftoverSize == 0)
57 return {NumParts, 0};
58
59 if (NarrowTy.isVector()) {
60 unsigned EltSize = OrigTy.getScalarSizeInBits();
61 if (LeftoverSize % EltSize != 0)
62 return {-1, -1};
63 LeftoverTy = LLT::scalarOrVector(
64 ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
65 } else {
66 LeftoverTy = LLT::scalar(LeftoverSize);
67 }
68
69 int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
70 return std::make_pair(NumParts, NumLeftover);
71}
72
73static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
74
75 if (!Ty.isScalar())
76 return nullptr;
77
78 switch (Ty.getSizeInBits()) {
79 case 16:
80 return Type::getHalfTy(Ctx);
81 case 32:
82 return Type::getFloatTy(Ctx);
83 case 64:
84 return Type::getDoubleTy(Ctx);
85 case 80:
86 return Type::getX86_FP80Ty(Ctx);
87 case 128:
88 return Type::getFP128Ty(Ctx);
89 default:
90 return nullptr;
91 }
92}
93
94LegalizerHelper::LegalizerHelper(MachineFunction &MF,
95 GISelChangeObserver &Observer,
96 MachineIRBuilder &Builder)
97 : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
98 LI(*MF.getSubtarget().getLegalizerInfo()),
99 TLI(*MF.getSubtarget().getTargetLowering()) { }
100
101LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
102 GISelChangeObserver &Observer,
103 MachineIRBuilder &B)
104 : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
105 TLI(*MF.getSubtarget().getTargetLowering()) { }
106
107LegalizerHelper::LegalizeResult
108LegalizerHelper::legalizeInstrStep(MachineInstr &MI,
109 LostDebugLocObserver &LocObserver) {
110 LLVM_DEBUG(dbgs() << "Legalizing: " << MI)do { } while (false);
111
112 MIRBuilder.setInstrAndDebugLoc(MI);
113
114 if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
115 MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
116 return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
117 auto Step = LI.getAction(MI, MRI);
118 switch (Step.Action) {
119 case Legal:
120 LLVM_DEBUG(dbgs() << ".. Already legal\n")do { } while (false);
121 return AlreadyLegal;
122 case Libcall:
123 LLVM_DEBUG(dbgs() << ".. Convert to libcall\n")do { } while (false);
124 return libcall(MI, LocObserver);
125 case NarrowScalar:
126 LLVM_DEBUG(dbgs() << ".. Narrow scalar\n")do { } while (false);
127 return narrowScalar(MI, Step.TypeIdx, Step.NewType);
128 case WidenScalar:
129 LLVM_DEBUG(dbgs() << ".. Widen scalar\n")do { } while (false);
130 return widenScalar(MI, Step.TypeIdx, Step.NewType);
131 case Bitcast:
132 LLVM_DEBUG(dbgs() << ".. Bitcast type\n")do { } while (false);
133 return bitcast(MI, Step.TypeIdx, Step.NewType);
134 case Lower:
135 LLVM_DEBUG(dbgs() << ".. Lower\n")do { } while (false);
136 return lower(MI, Step.TypeIdx, Step.NewType);
137 case FewerElements:
138 LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n")do { } while (false);
139 return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
140 case MoreElements:
141 LLVM_DEBUG(dbgs() << ".. Increase number of elements\n")do { } while (false);
142 return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
143 case Custom:
144 LLVM_DEBUG(dbgs() << ".. Custom legalization\n")do { } while (false);
145 return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
146 default:
147 LLVM_DEBUG(dbgs() << ".. Unable to legalize\n")do { } while (false);
148 return UnableToLegalize;
149 }
150}
151
152void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
153 SmallVectorImpl<Register> &VRegs) {
154 for (int i = 0; i < NumParts; ++i)
155 VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
156 MIRBuilder.buildUnmerge(VRegs, Reg);
157}
158
159bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
160 LLT MainTy, LLT &LeftoverTy,
161 SmallVectorImpl<Register> &VRegs,
162 SmallVectorImpl<Register> &LeftoverRegs) {
163 assert(!LeftoverTy.isValid() && "this is an out argument")((void)0);
164
165 unsigned RegSize = RegTy.getSizeInBits();
166 unsigned MainSize = MainTy.getSizeInBits();
167 unsigned NumParts = RegSize / MainSize;
168 unsigned LeftoverSize = RegSize - NumParts * MainSize;
169
170 // Use an unmerge when possible.
171 if (LeftoverSize == 0) {
172 for (unsigned I = 0; I < NumParts; ++I)
173 VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
174 MIRBuilder.buildUnmerge(VRegs, Reg);
175 return true;
176 }
177
178 if (MainTy.isVector()) {
179 unsigned EltSize = MainTy.getScalarSizeInBits();
180 if (LeftoverSize % EltSize != 0)
181 return false;
182 LeftoverTy = LLT::scalarOrVector(
183 ElementCount::getFixed(LeftoverSize / EltSize), EltSize);
184 } else {
185 LeftoverTy = LLT::scalar(LeftoverSize);
186 }
187
188 // For irregular sizes, extract the individual parts.
189 for (unsigned I = 0; I != NumParts; ++I) {
190 Register NewReg = MRI.createGenericVirtualRegister(MainTy);
191 VRegs.push_back(NewReg);
192 MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
193 }
194
195 for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
196 Offset += LeftoverSize) {
197 Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
198 LeftoverRegs.push_back(NewReg);
199 MIRBuilder.buildExtract(NewReg, Reg, Offset);
200 }
201
202 return true;
203}
204
205void LegalizerHelper::insertParts(Register DstReg,
206 LLT ResultTy, LLT PartTy,
207 ArrayRef<Register> PartRegs,
208 LLT LeftoverTy,
209 ArrayRef<Register> LeftoverRegs) {
210 if (!LeftoverTy.isValid()) {
211 assert(LeftoverRegs.empty())((void)0);
212
213 if (!ResultTy.isVector()) {
214 MIRBuilder.buildMerge(DstReg, PartRegs);
215 return;
216 }
217
218 if (PartTy.isVector())
219 MIRBuilder.buildConcatVectors(DstReg, PartRegs);
220 else
221 MIRBuilder.buildBuildVector(DstReg, PartRegs);
222 return;
223 }
224
225 SmallVector<Register> GCDRegs;
226 LLT GCDTy = getGCDType(getGCDType(ResultTy, LeftoverTy), PartTy);
227 for (auto PartReg : concat<const Register>(PartRegs, LeftoverRegs))
228 extractGCDType(GCDRegs, GCDTy, PartReg);
229 LLT ResultLCMTy = buildLCMMergePieces(ResultTy, LeftoverTy, GCDTy, GCDRegs);
230 buildWidenedRemergeToDst(DstReg, ResultLCMTy, GCDRegs);
231}
232
233/// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
234static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
235 const MachineInstr &MI) {
236 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES)((void)0);
237
238 const int StartIdx = Regs.size();
239 const int NumResults = MI.getNumOperands() - 1;
240 Regs.resize(Regs.size() + NumResults);
241 for (int I = 0; I != NumResults; ++I)
242 Regs[StartIdx + I] = MI.getOperand(I).getReg();
243}
244
245void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
246 LLT GCDTy, Register SrcReg) {
247 LLT SrcTy = MRI.getType(SrcReg);
248 if (SrcTy == GCDTy) {
249 // If the source already evenly divides the result type, we don't need to do
250 // anything.
251 Parts.push_back(SrcReg);
252 } else {
253 // Need to split into common type sized pieces.
254 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
255 getUnmergeResults(Parts, *Unmerge);
256 }
257}
258
259LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
260 LLT NarrowTy, Register SrcReg) {
261 LLT SrcTy = MRI.getType(SrcReg);
262 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
263 extractGCDType(Parts, GCDTy, SrcReg);
264 return GCDTy;
265}
266
267LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
268 SmallVectorImpl<Register> &VRegs,
269 unsigned PadStrategy) {
270 LLT LCMTy = getLCMType(DstTy, NarrowTy);
271
272 int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
273 int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
274 int NumOrigSrc = VRegs.size();
275
276 Register PadReg;
277
278 // Get a value we can use to pad the source value if the sources won't evenly
279 // cover the result type.
280 if (NumOrigSrc < NumParts * NumSubParts) {
281 if (PadStrategy == TargetOpcode::G_ZEXT)
282 PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
283 else if (PadStrategy == TargetOpcode::G_ANYEXT)
284 PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
285 else {
286 assert(PadStrategy == TargetOpcode::G_SEXT)((void)0);
287
288 // Shift the sign bit of the low register through the high register.
289 auto ShiftAmt =
290 MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
291 PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
292 }
293 }
294
295 // Registers for the final merge to be produced.
296 SmallVector<Register, 4> Remerge(NumParts);
297
298 // Registers needed for intermediate merges, which will be merged into a
299 // source for Remerge.
300 SmallVector<Register, 4> SubMerge(NumSubParts);
301
302 // Once we've fully read off the end of the original source bits, we can reuse
303 // the same high bits for remaining padding elements.
304 Register AllPadReg;
305
306 // Build merges to the LCM type to cover the original result type.
307 for (int I = 0; I != NumParts; ++I) {
308 bool AllMergePartsArePadding = true;
309
310 // Build the requested merges to the requested type.
311 for (int J = 0; J != NumSubParts; ++J) {
312 int Idx = I * NumSubParts + J;
313 if (Idx >= NumOrigSrc) {
314 SubMerge[J] = PadReg;
315 continue;
316 }
317
318 SubMerge[J] = VRegs[Idx];
319
320 // There are meaningful bits here we can't reuse later.
321 AllMergePartsArePadding = false;
322 }
323
324 // If we've filled up a complete piece with padding bits, we can directly
325 // emit the natural sized constant if applicable, rather than a merge of
326 // smaller constants.
327 if (AllMergePartsArePadding && !AllPadReg) {
328 if (PadStrategy == TargetOpcode::G_ANYEXT)
329 AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
330 else if (PadStrategy == TargetOpcode::G_ZEXT)
331 AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
332
333 // If this is a sign extension, we can't materialize a trivial constant
334 // with the right type and have to produce a merge.
335 }
336
337 if (AllPadReg) {
338 // Avoid creating additional instructions if we're just adding additional
339 // copies of padding bits.
340 Remerge[I] = AllPadReg;
341 continue;
342 }
343
344 if (NumSubParts == 1)
345 Remerge[I] = SubMerge[0];
346 else
347 Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
348
349 // In the sign extend padding case, re-use the first all-signbit merge.
350 if (AllMergePartsArePadding && !AllPadReg)
351 AllPadReg = Remerge[I];
352 }
353
354 VRegs = std::move(Remerge);
355 return LCMTy;
356}
357
358void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
359 ArrayRef<Register> RemergeRegs) {
360 LLT DstTy = MRI.getType(DstReg);
361
362 // Create the merge to the widened source, and extract the relevant bits into
363 // the result.
364
365 if (DstTy == LCMTy) {
366 MIRBuilder.buildMerge(DstReg, RemergeRegs);
367 return;
368 }
369
370 auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
371 if (DstTy.isScalar() && LCMTy.isScalar()) {
372 MIRBuilder.buildTrunc(DstReg, Remerge);
373 return;
374 }
375
376 if (LCMTy.isVector()) {
377 unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
378 SmallVector<Register, 8> UnmergeDefs(NumDefs);
379 UnmergeDefs[0] = DstReg;
380 for (unsigned I = 1; I != NumDefs; ++I)
381 UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
382
383 MIRBuilder.buildUnmerge(UnmergeDefs,
384 MIRBuilder.buildMerge(LCMTy, RemergeRegs));
385 return;
386 }
387
388 llvm_unreachable("unhandled case")__builtin_unreachable();
389}
390
391static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
392#define RTLIBCASE_INT(LibcallPrefix)do { switch (Size) { case 32: return RTLIB::LibcallPrefix32; case
64: return RTLIB::LibcallPrefix64; case 128: return RTLIB::LibcallPrefix128
; default: __builtin_unreachable(); } } while (0)
\
393 do { \
394 switch (Size) { \
395 case 32: \
396 return RTLIB::LibcallPrefix##32; \
397 case 64: \
398 return RTLIB::LibcallPrefix##64; \
399 case 128: \
400 return RTLIB::LibcallPrefix##128; \
401 default: \
402 llvm_unreachable("unexpected size")__builtin_unreachable(); \
403 } \
404 } while (0)
405
406#define RTLIBCASE(LibcallPrefix)do { switch (Size) { case 32: return RTLIB::LibcallPrefix32; case
64: return RTLIB::LibcallPrefix64; case 80: return RTLIB::LibcallPrefix80
; case 128: return RTLIB::LibcallPrefix128; default: __builtin_unreachable
(); } } while (0)
\
407 do { \
408 switch (Size) { \
409 case 32: \
410 return RTLIB::LibcallPrefix##32; \
411 case 64: \
412 return RTLIB::LibcallPrefix##64; \
413 case 80: \
414 return RTLIB::LibcallPrefix##80; \
415 case 128: \
416 return RTLIB::LibcallPrefix##128; \
417 default: \
418 llvm_unreachable("unexpected size")__builtin_unreachable(); \
419 } \
420 } while (0)
421
422 switch (Opcode) {
423 case TargetOpcode::G_SDIV:
424 RTLIBCASE_INT(SDIV_I)do { switch (Size) { case 32: return RTLIB::SDIV_I32; case 64
: return RTLIB::SDIV_I64; case 128: return RTLIB::SDIV_I128; default
: __builtin_unreachable(); } } while (0)
;
425 case TargetOpcode::G_UDIV:
426 RTLIBCASE_INT(UDIV_I)do { switch (Size) { case 32: return RTLIB::UDIV_I32; case 64
: return RTLIB::UDIV_I64; case 128: return RTLIB::UDIV_I128; default
: __builtin_unreachable(); } } while (0)
;
427 case TargetOpcode::G_SREM:
428 RTLIBCASE_INT(SREM_I)do { switch (Size) { case 32: return RTLIB::SREM_I32; case 64
: return RTLIB::SREM_I64; case 128: return RTLIB::SREM_I128; default
: __builtin_unreachable(); } } while (0)
;
429 case TargetOpcode::G_UREM:
430 RTLIBCASE_INT(UREM_I)do { switch (Size) { case 32: return RTLIB::UREM_I32; case 64
: return RTLIB::UREM_I64; case 128: return RTLIB::UREM_I128; default
: __builtin_unreachable(); } } while (0)
;
431 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
432 RTLIBCASE_INT(CTLZ_I)do { switch (Size) { case 32: return RTLIB::CTLZ_I32; case 64
: return RTLIB::CTLZ_I64; case 128: return RTLIB::CTLZ_I128; default
: __builtin_unreachable(); } } while (0)
;
433 case TargetOpcode::G_FADD:
434 RTLIBCASE(ADD_F)do { switch (Size) { case 32: return RTLIB::ADD_F32; case 64:
return RTLIB::ADD_F64; case 80: return RTLIB::ADD_F80; case 128
: return RTLIB::ADD_F128; default: __builtin_unreachable(); }
} while (0)
;
435 case TargetOpcode::G_FSUB:
436 RTLIBCASE(SUB_F)do { switch (Size) { case 32: return RTLIB::SUB_F32; case 64:
return RTLIB::SUB_F64; case 80: return RTLIB::SUB_F80; case 128
: return RTLIB::SUB_F128; default: __builtin_unreachable(); }
} while (0)
;
437 case TargetOpcode::G_FMUL:
438 RTLIBCASE(MUL_F)do { switch (Size) { case 32: return RTLIB::MUL_F32; case 64:
return RTLIB::MUL_F64; case 80: return RTLIB::MUL_F80; case 128
: return RTLIB::MUL_F128; default: __builtin_unreachable(); }
} while (0)
;
439 case TargetOpcode::G_FDIV:
440 RTLIBCASE(DIV_F)do { switch (Size) { case 32: return RTLIB::DIV_F32; case 64:
return RTLIB::DIV_F64; case 80: return RTLIB::DIV_F80; case 128
: return RTLIB::DIV_F128; default: __builtin_unreachable(); }
} while (0)
;
441 case TargetOpcode::G_FEXP:
442 RTLIBCASE(EXP_F)do { switch (Size) { case 32: return RTLIB::EXP_F32; case 64:
return RTLIB::EXP_F64; case 80: return RTLIB::EXP_F80; case 128
: return RTLIB::EXP_F128; default: __builtin_unreachable(); }
} while (0)
;
443 case TargetOpcode::G_FEXP2:
444 RTLIBCASE(EXP2_F)do { switch (Size) { case 32: return RTLIB::EXP2_F32; case 64
: return RTLIB::EXP2_F64; case 80: return RTLIB::EXP2_F80; case
128: return RTLIB::EXP2_F128; default: __builtin_unreachable
(); } } while (0)
;
445 case TargetOpcode::G_FREM:
446 RTLIBCASE(REM_F)do { switch (Size) { case 32: return RTLIB::REM_F32; case 64:
return RTLIB::REM_F64; case 80: return RTLIB::REM_F80; case 128
: return RTLIB::REM_F128; default: __builtin_unreachable(); }
} while (0)
;
447 case TargetOpcode::G_FPOW:
448 RTLIBCASE(POW_F)do { switch (Size) { case 32: return RTLIB::POW_F32; case 64:
return RTLIB::POW_F64; case 80: return RTLIB::POW_F80; case 128
: return RTLIB::POW_F128; default: __builtin_unreachable(); }
} while (0)
;
449 case TargetOpcode::G_FMA:
450 RTLIBCASE(FMA_F)do { switch (Size) { case 32: return RTLIB::FMA_F32; case 64:
return RTLIB::FMA_F64; case 80: return RTLIB::FMA_F80; case 128
: return RTLIB::FMA_F128; default: __builtin_unreachable(); }
} while (0)
;
451 case TargetOpcode::G_FSIN:
452 RTLIBCASE(SIN_F)do { switch (Size) { case 32: return RTLIB::SIN_F32; case 64:
return RTLIB::SIN_F64; case 80: return RTLIB::SIN_F80; case 128
: return RTLIB::SIN_F128; default: __builtin_unreachable(); }
} while (0)
;
453 case TargetOpcode::G_FCOS:
454 RTLIBCASE(COS_F)do { switch (Size) { case 32: return RTLIB::COS_F32; case 64:
return RTLIB::COS_F64; case 80: return RTLIB::COS_F80; case 128
: return RTLIB::COS_F128; default: __builtin_unreachable(); }
} while (0)
;
455 case TargetOpcode::G_FLOG10:
456 RTLIBCASE(LOG10_F)do { switch (Size) { case 32: return RTLIB::LOG10_F32; case 64
: return RTLIB::LOG10_F64; case 80: return RTLIB::LOG10_F80; case
128: return RTLIB::LOG10_F128; default: __builtin_unreachable
(); } } while (0)
;
457 case TargetOpcode::G_FLOG:
458 RTLIBCASE(LOG_F)do { switch (Size) { case 32: return RTLIB::LOG_F32; case 64:
return RTLIB::LOG_F64; case 80: return RTLIB::LOG_F80; case 128
: return RTLIB::LOG_F128; default: __builtin_unreachable(); }
} while (0)
;
459 case TargetOpcode::G_FLOG2:
460 RTLIBCASE(LOG2_F)do { switch (Size) { case 32: return RTLIB::LOG2_F32; case 64
: return RTLIB::LOG2_F64; case 80: return RTLIB::LOG2_F80; case
128: return RTLIB::LOG2_F128; default: __builtin_unreachable
(); } } while (0)
;
461 case TargetOpcode::G_FCEIL:
462 RTLIBCASE(CEIL_F)do { switch (Size) { case 32: return RTLIB::CEIL_F32; case 64
: return RTLIB::CEIL_F64; case 80: return RTLIB::CEIL_F80; case
128: return RTLIB::CEIL_F128; default: __builtin_unreachable
(); } } while (0)
;
463 case TargetOpcode::G_FFLOOR:
464 RTLIBCASE(FLOOR_F)do { switch (Size) { case 32: return RTLIB::FLOOR_F32; case 64
: return RTLIB::FLOOR_F64; case 80: return RTLIB::FLOOR_F80; case
128: return RTLIB::FLOOR_F128; default: __builtin_unreachable
(); } } while (0)
;
465 case TargetOpcode::G_FMINNUM:
466 RTLIBCASE(FMIN_F)do { switch (Size) { case 32: return RTLIB::FMIN_F32; case 64
: return RTLIB::FMIN_F64; case 80: return RTLIB::FMIN_F80; case
128: return RTLIB::FMIN_F128; default: __builtin_unreachable
(); } } while (0)
;
467 case TargetOpcode::G_FMAXNUM:
468 RTLIBCASE(FMAX_F)do { switch (Size) { case 32: return RTLIB::FMAX_F32; case 64
: return RTLIB::FMAX_F64; case 80: return RTLIB::FMAX_F80; case
128: return RTLIB::FMAX_F128; default: __builtin_unreachable
(); } } while (0)
;
469 case TargetOpcode::G_FSQRT:
470 RTLIBCASE(SQRT_F)do { switch (Size) { case 32: return RTLIB::SQRT_F32; case 64
: return RTLIB::SQRT_F64; case 80: return RTLIB::SQRT_F80; case
128: return RTLIB::SQRT_F128; default: __builtin_unreachable
(); } } while (0)
;
471 case TargetOpcode::G_FRINT:
472 RTLIBCASE(RINT_F)do { switch (Size) { case 32: return RTLIB::RINT_F32; case 64
: return RTLIB::RINT_F64; case 80: return RTLIB::RINT_F80; case
128: return RTLIB::RINT_F128; default: __builtin_unreachable
(); } } while (0)
;
473 case TargetOpcode::G_FNEARBYINT:
474 RTLIBCASE(NEARBYINT_F)do { switch (Size) { case 32: return RTLIB::NEARBYINT_F32; case
64: return RTLIB::NEARBYINT_F64; case 80: return RTLIB::NEARBYINT_F80
; case 128: return RTLIB::NEARBYINT_F128; default: __builtin_unreachable
(); } } while (0)
;
475 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
476 RTLIBCASE(ROUNDEVEN_F)do { switch (Size) { case 32: return RTLIB::ROUNDEVEN_F32; case
64: return RTLIB::ROUNDEVEN_F64; case 80: return RTLIB::ROUNDEVEN_F80
; case 128: return RTLIB::ROUNDEVEN_F128; default: __builtin_unreachable
(); } } while (0)
;
477 }
478 llvm_unreachable("Unknown libcall function")__builtin_unreachable();
479}
480
481/// True if an instruction is in tail position in its caller. Intended for
482/// legalizing libcalls as tail calls when possible.
483static bool isLibCallInTailPosition(MachineInstr &MI,
484 const TargetInstrInfo &TII,
485 MachineRegisterInfo &MRI) {
486 MachineBasicBlock &MBB = *MI.getParent();
487 const Function &F = MBB.getParent()->getFunction();
488
489 // Conservatively require the attributes of the call to match those of
490 // the return. Ignore NoAlias and NonNull because they don't affect the
491 // call sequence.
492 AttributeList CallerAttrs = F.getAttributes();
493 if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
494 .removeAttribute(Attribute::NoAlias)
495 .removeAttribute(Attribute::NonNull)
496 .hasAttributes())
497 return false;
498
499 // It's not safe to eliminate the sign / zero extension of the return value.
500 if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
501 CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
502 return false;
503
504 // Only tail call if the following instruction is a standard return or if we
505 // have a `thisreturn` callee, and a sequence like:
506 //
507 // G_MEMCPY %0, %1, %2
508 // $x0 = COPY %0
509 // RET_ReallyLR implicit $x0
510 auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
511 if (Next != MBB.instr_end() && Next->isCopy()) {
512 switch (MI.getOpcode()) {
513 default:
514 llvm_unreachable("unsupported opcode")__builtin_unreachable();
515 case TargetOpcode::G_BZERO:
516 return false;
517 case TargetOpcode::G_MEMCPY:
518 case TargetOpcode::G_MEMMOVE:
519 case TargetOpcode::G_MEMSET:
520 break;
521 }
522
523 Register VReg = MI.getOperand(0).getReg();
524 if (!VReg.isVirtual() || VReg != Next->getOperand(1).getReg())
525 return false;
526
527 Register PReg = Next->getOperand(0).getReg();
528 if (!PReg.isPhysical())
529 return false;
530
531 auto Ret = next_nodbg(Next, MBB.instr_end());
532 if (Ret == MBB.instr_end() || !Ret->isReturn())
533 return false;
534
535 if (Ret->getNumImplicitOperands() != 1)
536 return false;
537
538 if (PReg != Ret->getOperand(0).getReg())
539 return false;
540
541 // Skip over the COPY that we just validated.
542 Next = Ret;
543 }
544
545 if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
546 return false;
547
548 return true;
549}
550
551LegalizerHelper::LegalizeResult
552llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
553 const CallLowering::ArgInfo &Result,
554 ArrayRef<CallLowering::ArgInfo> Args,
555 const CallingConv::ID CC) {
556 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
557
558 CallLowering::CallLoweringInfo Info;
559 Info.CallConv = CC;
560 Info.Callee = MachineOperand::CreateES(Name);
561 Info.OrigRet = Result;
562 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
563 if (!CLI.lowerCall(MIRBuilder, Info))
564 return LegalizerHelper::UnableToLegalize;
565
566 return LegalizerHelper::Legalized;
567}
568
569LegalizerHelper::LegalizeResult
570llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
571 const CallLowering::ArgInfo &Result,
572 ArrayRef<CallLowering::ArgInfo> Args) {
573 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
574 const char *Name = TLI.getLibcallName(Libcall);
575 const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
576 return createLibcall(MIRBuilder, Name, Result, Args, CC);
577}
578
579// Useful for libcalls where all operands have the same type.
580static LegalizerHelper::LegalizeResult
581simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
582 Type *OpType) {
583 auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
584
585 // FIXME: What does the original arg index mean here?
586 SmallVector<CallLowering::ArgInfo, 3> Args;
587 for (unsigned i = 1; i < MI.getNumOperands(); i++)
588 Args.push_back({MI.getOperand(i).getReg(), OpType, 0});
589 return createLibcall(MIRBuilder, Libcall,
590 {MI.getOperand(0).getReg(), OpType, 0}, Args);
591}
592
593LegalizerHelper::LegalizeResult
594llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
595 MachineInstr &MI, LostDebugLocObserver &LocObserver) {
596 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
597
598 SmallVector<CallLowering::ArgInfo, 3> Args;
599 // Add all the args, except for the last which is an imm denoting 'tail'.
600 for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
601 Register Reg = MI.getOperand(i).getReg();
602
603 // Need derive an IR type for call lowering.
604 LLT OpLLT = MRI.getType(Reg);
605 Type *OpTy = nullptr;
606 if (OpLLT.isPointer())
607 OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
608 else
609 OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
610 Args.push_back({Reg, OpTy, 0});
611 }
612
613 auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
614 auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
615 RTLIB::Libcall RTLibcall;
616 unsigned Opc = MI.getOpcode();
617 switch (Opc) {
618 case TargetOpcode::G_BZERO:
619 RTLibcall = RTLIB::BZERO;
620 break;
621 case TargetOpcode::G_MEMCPY:
622 RTLibcall = RTLIB::MEMCPY;
623 Args[0].Flags[0].setReturned();
624 break;
625 case TargetOpcode::G_MEMMOVE:
626 RTLibcall = RTLIB::MEMMOVE;
627 Args[0].Flags[0].setReturned();
628 break;
629 case TargetOpcode::G_MEMSET:
630 RTLibcall = RTLIB::MEMSET;
631 Args[0].Flags[0].setReturned();
632 break;
633 default:
634 llvm_unreachable("unsupported opcode")__builtin_unreachable();
635 }
636 const char *Name = TLI.getLibcallName(RTLibcall);
637
638 // Unsupported libcall on the target.
639 if (!Name) {
640 LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "do { } while (false)
641 << MIRBuilder.getTII().getName(Opc) << "\n")do { } while (false);
642 return LegalizerHelper::UnableToLegalize;
643 }
644
645 CallLowering::CallLoweringInfo Info;
646 Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
647 Info.Callee = MachineOperand::CreateES(Name);
648 Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx), 0);
649 Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
650 isLibCallInTailPosition(MI, MIRBuilder.getTII(), MRI);
651
652 std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
653 if (!CLI.lowerCall(MIRBuilder, Info))
654 return LegalizerHelper::UnableToLegalize;
655
656 if (Info.LoweredTailCall) {
657 assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?")((void)0);
658
659 // Check debug locations before removing the return.
660 LocObserver.checkpoint(true);
661
662 // We must have a return following the call (or debug insts) to get past
663 // isLibCallInTailPosition.
664 do {
665 MachineInstr *Next = MI.getNextNode();
666 assert(Next &&((void)0)
667 (Next->isCopy() || Next->isReturn() || Next->isDebugInstr()) &&((void)0)
668 "Expected instr following MI to be return or debug inst?")((void)0);
669 // We lowered a tail call, so the call is now the return from the block.
670 // Delete the old return.
671 Next->eraseFromParent();
672 } while (MI.getNextNode());
673
674 // We expect to lose the debug location from the return.
675 LocObserver.checkpoint(false);
676 }
677
678 return LegalizerHelper::Legalized;
679}
680
681static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
682 Type *FromType) {
683 auto ToMVT = MVT::getVT(ToType);
684 auto FromMVT = MVT::getVT(FromType);
685
686 switch (Opcode) {
687 case TargetOpcode::G_FPEXT:
688 return RTLIB::getFPEXT(FromMVT, ToMVT);
689 case TargetOpcode::G_FPTRUNC:
690 return RTLIB::getFPROUND(FromMVT, ToMVT);
691 case TargetOpcode::G_FPTOSI:
692 return RTLIB::getFPTOSINT(FromMVT, ToMVT);
693 case TargetOpcode::G_FPTOUI:
694 return RTLIB::getFPTOUINT(FromMVT, ToMVT);
695 case TargetOpcode::G_SITOFP:
696 return RTLIB::getSINTTOFP(FromMVT, ToMVT);
697 case TargetOpcode::G_UITOFP:
698 return RTLIB::getUINTTOFP(FromMVT, ToMVT);
699 }
700 llvm_unreachable("Unsupported libcall function")__builtin_unreachable();
701}
702
703static LegalizerHelper::LegalizeResult
704conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
705 Type *FromType) {
706 RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
707 return createLibcall(MIRBuilder, Libcall,
708 {MI.getOperand(0).getReg(), ToType, 0},
709 {{MI.getOperand(1).getReg(), FromType, 0}});
710}
711
712LegalizerHelper::LegalizeResult
713LegalizerHelper::libcall(MachineInstr &MI, LostDebugLocObserver &LocObserver) {
714 LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
715 unsigned Size = LLTy.getSizeInBits();
716 auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
717
718 switch (MI.getOpcode()) {
719 default:
720 return UnableToLegalize;
721 case TargetOpcode::G_SDIV:
722 case TargetOpcode::G_UDIV:
723 case TargetOpcode::G_SREM:
724 case TargetOpcode::G_UREM:
725 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
726 Type *HLTy = IntegerType::get(Ctx, Size);
727 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
728 if (Status != Legalized)
729 return Status;
730 break;
731 }
732 case TargetOpcode::G_FADD:
733 case TargetOpcode::G_FSUB:
734 case TargetOpcode::G_FMUL:
735 case TargetOpcode::G_FDIV:
736 case TargetOpcode::G_FMA:
737 case TargetOpcode::G_FPOW:
738 case TargetOpcode::G_FREM:
739 case TargetOpcode::G_FCOS:
740 case TargetOpcode::G_FSIN:
741 case TargetOpcode::G_FLOG10:
742 case TargetOpcode::G_FLOG:
743 case TargetOpcode::G_FLOG2:
744 case TargetOpcode::G_FEXP:
745 case TargetOpcode::G_FEXP2:
746 case TargetOpcode::G_FCEIL:
747 case TargetOpcode::G_FFLOOR:
748 case TargetOpcode::G_FMINNUM:
749 case TargetOpcode::G_FMAXNUM:
750 case TargetOpcode::G_FSQRT:
751 case TargetOpcode::G_FRINT:
752 case TargetOpcode::G_FNEARBYINT:
753 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
754 Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
755 if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
756 LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n")do { } while (false);
757 return UnableToLegalize;
758 }
759 auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
760 if (Status != Legalized)
761 return Status;
762 break;
763 }
764 case TargetOpcode::G_FPEXT:
765 case TargetOpcode::G_FPTRUNC: {
766 Type *FromTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(1).getReg()));
767 Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
768 if (!FromTy || !ToTy)
769 return UnableToLegalize;
770 LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
771 if (Status != Legalized)
772 return Status;
773 break;
774 }
775 case TargetOpcode::G_FPTOSI:
776 case TargetOpcode::G_FPTOUI: {
777 // FIXME: Support other types
778 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
779 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
780 if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
781 return UnableToLegalize;
782 LegalizeResult Status = conversionLibcall(
783 MI, MIRBuilder,
784 ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
785 FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
786 if (Status != Legalized)
787 return Status;
788 break;
789 }
790 case TargetOpcode::G_SITOFP:
791 case TargetOpcode::G_UITOFP: {
792 // FIXME: Support other types
793 unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
794 unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
795 if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
796 return UnableToLegalize;
797 LegalizeResult Status = conversionLibcall(
798 MI, MIRBuilder,
799 ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
800 FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
801 if (Status != Legalized)
802 return Status;
803 break;
804 }
805 case TargetOpcode::G_BZERO:
806 case TargetOpcode::G_MEMCPY:
807 case TargetOpcode::G_MEMMOVE:
808 case TargetOpcode::G_MEMSET: {
809 LegalizeResult Result =
810 createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI, LocObserver);
811 if (Result != Legalized)
812 return Result;
813 MI.eraseFromParent();
814 return Result;
815 }
816 }
817
818 MI.eraseFromParent();
819 return Legalized;
820}
821
822LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
823 unsigned TypeIdx,
824 LLT NarrowTy) {
825 uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
826 uint64_t NarrowSize = NarrowTy.getSizeInBits();
827
828 switch (MI.getOpcode()) {
829 default:
830 return UnableToLegalize;
831 case TargetOpcode::G_IMPLICIT_DEF: {
832 Register DstReg = MI.getOperand(0).getReg();
833 LLT DstTy = MRI.getType(DstReg);
834
835 // If SizeOp0 is not an exact multiple of NarrowSize, emit
836 // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
837 // FIXME: Although this would also be legal for the general case, it causes
838 // a lot of regressions in the emitted code (superfluous COPYs, artifact
839 // combines not being hit). This seems to be a problem related to the
840 // artifact combiner.
841 if (SizeOp0 % NarrowSize != 0) {
842 LLT ImplicitTy = NarrowTy;
843 if (DstTy.isVector())
844 ImplicitTy = LLT::vector(DstTy.getElementCount(), ImplicitTy);
845
846 Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
847 MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
848
849 MI.eraseFromParent();
850 return Legalized;
851 }
852
853 int NumParts = SizeOp0 / NarrowSize;
854
855 SmallVector<Register, 2> DstRegs;
856 for (int i = 0; i < NumParts; ++i)
857 DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
858
859 if (DstTy.isVector())
860 MIRBuilder.buildBuildVector(DstReg, DstRegs);
861 else
862 MIRBuilder.buildMerge(DstReg, DstRegs);
863 MI.eraseFromParent();
864 return Legalized;
865 }
866 case TargetOpcode::G_CONSTANT: {
867 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
868 const APInt &Val = MI.getOperand(1).getCImm()->getValue();
869 unsigned TotalSize = Ty.getSizeInBits();
870 unsigned NarrowSize = NarrowTy.getSizeInBits();
871 int NumParts = TotalSize / NarrowSize;
872
873 SmallVector<Register, 4> PartRegs;
874 for (int I = 0; I != NumParts; ++I) {
875 unsigned Offset = I * NarrowSize;
876 auto K = MIRBuilder.buildConstant(NarrowTy,
877 Val.lshr(Offset).trunc(NarrowSize));
878 PartRegs.push_back(K.getReg(0));
879 }
880
881 LLT LeftoverTy;
882 unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
883 SmallVector<Register, 1> LeftoverRegs;
884 if (LeftoverBits != 0) {
885 LeftoverTy = LLT::scalar(LeftoverBits);
886 auto K = MIRBuilder.buildConstant(
887 LeftoverTy,
888 Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
889 LeftoverRegs.push_back(K.getReg(0));
890 }
891
892 insertParts(MI.getOperand(0).getReg(),
893 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
894
895 MI.eraseFromParent();
896 return Legalized;
897 }
898 case TargetOpcode::G_SEXT:
899 case TargetOpcode::G_ZEXT:
900 case TargetOpcode::G_ANYEXT:
901 return narrowScalarExt(MI, TypeIdx, NarrowTy);
902 case TargetOpcode::G_TRUNC: {
903 if (TypeIdx != 1)
904 return UnableToLegalize;
905
906 uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
907 if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
908 LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n")do { } while (false);
909 return UnableToLegalize;
910 }
911
912 auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
913 MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
914 MI.eraseFromParent();
915 return Legalized;
916 }
917
918 case TargetOpcode::G_FREEZE:
919 return reduceOperationWidth(MI, TypeIdx, NarrowTy);
920 case TargetOpcode::G_ADD:
921 case TargetOpcode::G_SUB:
922 case TargetOpcode::G_SADDO:
923 case TargetOpcode::G_SSUBO:
924 case TargetOpcode::G_SADDE:
925 case TargetOpcode::G_SSUBE:
926 case TargetOpcode::G_UADDO:
927 case TargetOpcode::G_USUBO:
928 case TargetOpcode::G_UADDE:
929 case TargetOpcode::G_USUBE:
930 return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
931 case TargetOpcode::G_MUL:
932 case TargetOpcode::G_UMULH:
933 return narrowScalarMul(MI, NarrowTy);
934 case TargetOpcode::G_EXTRACT:
935 return narrowScalarExtract(MI, TypeIdx, NarrowTy);
936 case TargetOpcode::G_INSERT:
937 return narrowScalarInsert(MI, TypeIdx, NarrowTy);
938 case TargetOpcode::G_LOAD: {
939 auto &LoadMI = cast<GLoad>(MI);
940 Register DstReg = LoadMI.getDstReg();
941 LLT DstTy = MRI.getType(DstReg);
942 if (DstTy.isVector())
943 return UnableToLegalize;
944
945 if (8 * LoadMI.getMemSize() != DstTy.getSizeInBits()) {
946 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
947 MIRBuilder.buildLoad(TmpReg, LoadMI.getPointerReg(), LoadMI.getMMO());
948 MIRBuilder.buildAnyExt(DstReg, TmpReg);
949 LoadMI.eraseFromParent();
950 return Legalized;
951 }
952
953 return reduceLoadStoreWidth(LoadMI, TypeIdx, NarrowTy);
954 }
955 case TargetOpcode::G_ZEXTLOAD:
956 case TargetOpcode::G_SEXTLOAD: {
957 auto &LoadMI = cast<GExtLoad>(MI);
958 Register DstReg = LoadMI.getDstReg();
959 Register PtrReg = LoadMI.getPointerReg();
960
961 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
962 auto &MMO = LoadMI.getMMO();
963 unsigned MemSize = MMO.getSizeInBits();
964
965 if (MemSize == NarrowSize) {
966 MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
967 } else if (MemSize < NarrowSize) {
968 MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), TmpReg, PtrReg, MMO);
969 } else if (MemSize > NarrowSize) {
970 // FIXME: Need to split the load.
971 return UnableToLegalize;
972 }
973
974 if (isa<GZExtLoad>(LoadMI))
975 MIRBuilder.buildZExt(DstReg, TmpReg);
976 else
977 MIRBuilder.buildSExt(DstReg, TmpReg);
978
979 LoadMI.eraseFromParent();
980 return Legalized;
981 }
982 case TargetOpcode::G_STORE: {
983 auto &StoreMI = cast<GStore>(MI);
984
985 Register SrcReg = StoreMI.getValueReg();
986 LLT SrcTy = MRI.getType(SrcReg);
987 if (SrcTy.isVector())
988 return UnableToLegalize;
989
990 int NumParts = SizeOp0 / NarrowSize;
991 unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
992 unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
993 if (SrcTy.isVector() && LeftoverBits != 0)
994 return UnableToLegalize;
995
996 if (8 * StoreMI.getMemSize() != SrcTy.getSizeInBits()) {
997 Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
998 MIRBuilder.buildTrunc(TmpReg, SrcReg);
999 MIRBuilder.buildStore(TmpReg, StoreMI.getPointerReg(), StoreMI.getMMO());
1000 StoreMI.eraseFromParent();
1001 return Legalized;
1002 }
1003
1004 return reduceLoadStoreWidth(StoreMI, 0, NarrowTy);
1005 }
1006 case TargetOpcode::G_SELECT:
1007 return narrowScalarSelect(MI, TypeIdx, NarrowTy);
1008 case TargetOpcode::G_AND:
1009 case TargetOpcode::G_OR:
1010 case TargetOpcode::G_XOR: {
1011 // Legalize bitwise operation:
1012 // A = BinOp<Ty> B, C
1013 // into:
1014 // B1, ..., BN = G_UNMERGE_VALUES B
1015 // C1, ..., CN = G_UNMERGE_VALUES C
1016 // A1 = BinOp<Ty/N> B1, C2
1017 // ...
1018 // AN = BinOp<Ty/N> BN, CN
1019 // A = G_MERGE_VALUES A1, ..., AN
1020 return narrowScalarBasic(MI, TypeIdx, NarrowTy);
1021 }
1022 case TargetOpcode::G_SHL:
1023 case TargetOpcode::G_LSHR:
1024 case TargetOpcode::G_ASHR:
1025 return narrowScalarShift(MI, TypeIdx, NarrowTy);
1026 case TargetOpcode::G_CTLZ:
1027 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1028 case TargetOpcode::G_CTTZ:
1029 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1030 case TargetOpcode::G_CTPOP:
1031 if (TypeIdx == 1)
1032 switch (MI.getOpcode()) {
1033 case TargetOpcode::G_CTLZ:
1034 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
1035 return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
1036 case TargetOpcode::G_CTTZ:
1037 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
1038 return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
1039 case TargetOpcode::G_CTPOP:
1040 return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
1041 default:
1042 return UnableToLegalize;
1043 }
1044
1045 Observer.changingInstr(MI);
1046 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1047 Observer.changedInstr(MI);
1048 return Legalized;
1049 case TargetOpcode::G_INTTOPTR:
1050 if (TypeIdx != 1)
1051 return UnableToLegalize;
1052
1053 Observer.changingInstr(MI);
1054 narrowScalarSrc(MI, NarrowTy, 1);
1055 Observer.changedInstr(MI);
1056 return Legalized;
1057 case TargetOpcode::G_PTRTOINT:
1058 if (TypeIdx != 0)
1059 return UnableToLegalize;
1060
1061 Observer.changingInstr(MI);
1062 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
1063 Observer.changedInstr(MI);
1064 return Legalized;
1065 case TargetOpcode::G_PHI: {
1066 // FIXME: add support for when SizeOp0 isn't an exact multiple of
1067 // NarrowSize.
1068 if (SizeOp0 % NarrowSize != 0)
1069 return UnableToLegalize;
1070
1071 unsigned NumParts = SizeOp0 / NarrowSize;
1072 SmallVector<Register, 2> DstRegs(NumParts);
1073 SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
1074 Observer.changingInstr(MI);
1075 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
1076 MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
1077 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
1078 extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
1079 SrcRegs[i / 2]);
1080 }
1081 MachineBasicBlock &MBB = *MI.getParent();
1082 MIRBuilder.setInsertPt(MBB, MI);
1083 for (unsigned i = 0; i < NumParts; ++i) {
1084 DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
1085 MachineInstrBuilder MIB =
1086 MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
1087 for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
1088 MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
1089 }
1090 MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
1091 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1092 Observer.changedInstr(MI);
1093 MI.eraseFromParent();
1094 return Legalized;
1095 }
1096 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
1097 case TargetOpcode::G_INSERT_VECTOR_ELT: {
1098 if (TypeIdx != 2)
1099 return UnableToLegalize;
1100
1101 int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
1102 Observer.changingInstr(MI);
1103 narrowScalarSrc(MI, NarrowTy, OpIdx);
1104 Observer.changedInstr(MI);
1105 return Legalized;
1106 }
1107 case TargetOpcode::G_ICMP: {
1108 Register LHS = MI.getOperand(2).getReg();
1109 LLT SrcTy = MRI.getType(LHS);
1110 uint64_t SrcSize = SrcTy.getSizeInBits();
1111 CmpInst::Predicate Pred =
1112 static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
1113
1114 // TODO: Handle the non-equality case for weird sizes.
1115 if (NarrowSize * 2 != SrcSize && !ICmpInst::isEquality(Pred))
1116 return UnableToLegalize;
1117
1118 LLT LeftoverTy; // Example: s88 -> s64 (NarrowTy) + s24 (leftover)
1119 SmallVector<Register, 4> LHSPartRegs, LHSLeftoverRegs;
1120 if (!extractParts(LHS, SrcTy, NarrowTy, LeftoverTy, LHSPartRegs,
1121 LHSLeftoverRegs))
1122 return UnableToLegalize;
1123
1124 LLT Unused; // Matches LeftoverTy; G_ICMP LHS and RHS are the same type.
1125 SmallVector<Register, 4> RHSPartRegs, RHSLeftoverRegs;
1126 if (!extractParts(MI.getOperand(3).getReg(), SrcTy, NarrowTy, Unused,
1127 RHSPartRegs, RHSLeftoverRegs))
1128 return UnableToLegalize;
1129
1130 // We now have the LHS and RHS of the compare split into narrow-type
1131 // registers, plus potentially some leftover type.
1132 Register Dst = MI.getOperand(0).getReg();
1133 LLT ResTy = MRI.getType(Dst);
1134 if (ICmpInst::isEquality(Pred)) {
1135 // For each part on the LHS and RHS, keep track of the result of XOR-ing
1136 // them together. For each equal part, the result should be all 0s. For
1137 // each non-equal part, we'll get at least one 1.
1138 auto Zero = MIRBuilder.buildConstant(NarrowTy, 0);
1139 SmallVector<Register, 4> Xors;
1140 for (auto LHSAndRHS : zip(LHSPartRegs, RHSPartRegs)) {
1141 auto LHS = std::get<0>(LHSAndRHS);
1142 auto RHS = std::get<1>(LHSAndRHS);
1143 auto Xor = MIRBuilder.buildXor(NarrowTy, LHS, RHS).getReg(0);
1144 Xors.push_back(Xor);
1145 }
1146
1147 // Build a G_XOR for each leftover register. Each G_XOR must be widened
1148 // to the desired narrow type so that we can OR them together later.
1149 SmallVector<Register, 4> WidenedXors;
1150 for (auto LHSAndRHS : zip(LHSLeftoverRegs, RHSLeftoverRegs)) {
1151 auto LHS = std::get<0>(LHSAndRHS);
1152 auto RHS = std::get<1>(LHSAndRHS);
1153 auto Xor = MIRBuilder.buildXor(LeftoverTy, LHS, RHS).getReg(0);
1154 LLT GCDTy = extractGCDType(WidenedXors, NarrowTy, LeftoverTy, Xor);
1155 buildLCMMergePieces(LeftoverTy, NarrowTy, GCDTy, WidenedXors,
1156 /* PadStrategy = */ TargetOpcode::G_ZEXT);
1157 Xors.insert(Xors.end(), WidenedXors.begin(), WidenedXors.end());
1158 }
1159
1160 // Now, for each part we broke up, we know if they are equal/not equal
1161 // based off the G_XOR. We can OR these all together and compare against
1162 // 0 to get the result.
1163 assert(Xors.size() >= 2 && "Should have gotten at least two Xors?")((void)0);
1164 auto Or = MIRBuilder.buildOr(NarrowTy, Xors[0], Xors[1]);
1165 for (unsigned I = 2, E = Xors.size(); I < E; ++I)
1166 Or = MIRBuilder.buildOr(NarrowTy, Or, Xors[I]);
1167 MIRBuilder.buildICmp(Pred, Dst, Or, Zero);
1168 } else {
1169 // TODO: Handle non-power-of-two types.
1170 assert(LHSPartRegs.size() == 2 && "Expected exactly 2 LHS part regs?")((void)0);
1171 assert(RHSPartRegs.size() == 2 && "Expected exactly 2 RHS part regs?")((void)0);
1172 Register LHSL = LHSPartRegs[0];
1173 Register LHSH = LHSPartRegs[1];
1174 Register RHSL = RHSPartRegs[0];
1175 Register RHSH = RHSPartRegs[1];
1176 MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
1177 MachineInstrBuilder CmpHEQ =
1178 MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
1179 MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
1180 ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
1181 MIRBuilder.buildSelect(Dst, CmpHEQ, CmpLU, CmpH);
1182 }
1183 MI.eraseFromParent();
1184 return Legalized;
1185 }
1186 case TargetOpcode::G_SEXT_INREG: {
1187 if (TypeIdx != 0)
1188 return UnableToLegalize;
1189
1190 int64_t SizeInBits = MI.getOperand(2).getImm();
1191
1192 // So long as the new type has more bits than the bits we're extending we
1193 // don't need to break it apart.
1194 if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
1195 Observer.changingInstr(MI);
1196 // We don't lose any non-extension bits by truncating the src and
1197 // sign-extending the dst.
1198 MachineOperand &MO1 = MI.getOperand(1);
1199 auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
1200 MO1.setReg(TruncMIB.getReg(0));
1201
1202 MachineOperand &MO2 = MI.getOperand(0);
1203 Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
1204 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1205 MIRBuilder.buildSExt(MO2, DstExt);
1206 MO2.setReg(DstExt);
1207 Observer.changedInstr(MI);
1208 return Legalized;
1209 }
1210
1211 // Break it apart. Components below the extension point are unmodified. The
1212 // component containing the extension point becomes a narrower SEXT_INREG.
1213 // Components above it are ashr'd from the component containing the
1214 // extension point.
1215 if (SizeOp0 % NarrowSize != 0)
1216 return UnableToLegalize;
1217 int NumParts = SizeOp0 / NarrowSize;
1218
1219 // List the registers where the destination will be scattered.
1220 SmallVector<Register, 2> DstRegs;
1221 // List the registers where the source will be split.
1222 SmallVector<Register, 2> SrcRegs;
1223
1224 // Create all the temporary registers.
1225 for (int i = 0; i < NumParts; ++i) {
1226 Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
1227
1228 SrcRegs.push_back(SrcReg);
1229 }
1230
1231 // Explode the big arguments into smaller chunks.
1232 MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
1233
1234 Register AshrCstReg =
1235 MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
1236 .getReg(0);
1237 Register FullExtensionReg = 0;
1238 Register PartialExtensionReg = 0;
1239
1240 // Do the operation on each small part.
1241 for (int i = 0; i < NumParts; ++i) {
1242 if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
1243 DstRegs.push_back(SrcRegs[i]);
1244 else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
1245 assert(PartialExtensionReg &&((void)0)
1246 "Expected to visit partial extension before full")((void)0);
1247 if (FullExtensionReg) {
1248 DstRegs.push_back(FullExtensionReg);
1249 continue;
1250 }
1251 DstRegs.push_back(
1252 MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
1253 .getReg(0));
1254 FullExtensionReg = DstRegs.back();
1255 } else {
1256 DstRegs.push_back(
1257 MIRBuilder
1258 .buildInstr(
1259 TargetOpcode::G_SEXT_INREG, {NarrowTy},
1260 {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
1261 .getReg(0));
1262 PartialExtensionReg = DstRegs.back();
1263 }
1264 }
1265
1266 // Gather the destination registers into the final destination.
1267 Register DstReg = MI.getOperand(0).getReg();
1268 MIRBuilder.buildMerge(DstReg, DstRegs);
1269 MI.eraseFromParent();
1270 return Legalized;
1271 }
1272 case TargetOpcode::G_BSWAP:
1273 case TargetOpcode::G_BITREVERSE: {
1274 if (SizeOp0 % NarrowSize != 0)
1275 return UnableToLegalize;
1276
1277 Observer.changingInstr(MI);
1278 SmallVector<Register, 2> SrcRegs, DstRegs;
1279 unsigned NumParts = SizeOp0 / NarrowSize;
1280 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
1281
1282 for (unsigned i = 0; i < NumParts; ++i) {
1283 auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
1284 {SrcRegs[NumParts - 1 - i]});
1285 DstRegs.push_back(DstPart.getReg(0));
1286 }
1287
1288 MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
1289
1290 Observer.changedInstr(MI);
1291 MI.eraseFromParent();
1292 return Legalized;
1293 }
1294 case TargetOpcode::G_PTR_ADD:
1295 case TargetOpcode::G_PTRMASK: {
1296 if (TypeIdx != 1)
1297 return UnableToLegalize;
1298 Observer.changingInstr(MI);
1299 narrowScalarSrc(MI, NarrowTy, 2);
1300 Observer.changedInstr(MI);
1301 return Legalized;
1302 }
1303 case TargetOpcode::G_FPTOUI:
1304 case TargetOpcode::G_FPTOSI:
1305 return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
1306 case TargetOpcode::G_FPEXT:
1307 if (TypeIdx != 0)
1308 return UnableToLegalize;
1309 Observer.changingInstr(MI);
1310 narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
1311 Observer.changedInstr(MI);
1312 return Legalized;
1313 }
1314}
1315
1316Register LegalizerHelper::coerceToScalar(Register Val) {
1317 LLT Ty = MRI.getType(Val);
1318 if (Ty.isScalar())
1319 return Val;
1320
1321 const DataLayout &DL = MIRBuilder.getDataLayout();
1322 LLT NewTy = LLT::scalar(Ty.getSizeInBits());
1323 if (Ty.isPointer()) {
1324 if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
1325 return Register();
1326 return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
1327 }
1328
1329 Register NewVal = Val;
1330
1331 assert(Ty.isVector())((void)0);
1332 LLT EltTy = Ty.getElementType();
1333 if (EltTy.isPointer())
1334 NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
1335 return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
1336}
1337
1338void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
1339 unsigned OpIdx, unsigned ExtOpcode) {
1340 MachineOperand &MO = MI.getOperand(OpIdx);
1341 auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
1342 MO.setReg(ExtB.getReg(0));
1343}
1344
1345void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
1346 unsigned OpIdx) {
1347 MachineOperand &MO = MI.getOperand(OpIdx);
1348 auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
1349 MO.setReg(ExtB.getReg(0));
1350}
1351
1352void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
1353 unsigned OpIdx, unsigned TruncOpcode) {
1354 MachineOperand &MO = MI.getOperand(OpIdx);
1355 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
1356 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1357 MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
1358 MO.setReg(DstExt);
1359}
1360
1361void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
1362 unsigned OpIdx, unsigned ExtOpcode) {
1363 MachineOperand &MO = MI.getOperand(OpIdx);
1364 Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
1365 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1366 MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
1367 MO.setReg(DstTrunc);
1368}
1369
1370void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
1371 unsigned OpIdx) {
1372 MachineOperand &MO = MI.getOperand(OpIdx);
1373 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1374 MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
1375}
1376
1377void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
1378 unsigned OpIdx) {
1379 MachineOperand &MO = MI.getOperand(OpIdx);
1380
1381 LLT OldTy = MRI.getType(MO.getReg());
1382 unsigned OldElts = OldTy.getNumElements();
1383 unsigned NewElts = MoreTy.getNumElements();
1384
1385 unsigned NumParts = NewElts / OldElts;
1386
1387 // Use concat_vectors if the result is a multiple of the number of elements.
1388 if (NumParts * OldElts == NewElts) {
1389 SmallVector<Register, 8> Parts;
1390 Parts.push_back(MO.getReg());
1391
1392 Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
1393 for (unsigned I = 1; I != NumParts; ++I)
1394 Parts.push_back(ImpDef);
1395
1396 auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
1397 MO.setReg(Concat.getReg(0));
1398 return;
1399 }
1400
1401 Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
1402 Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
1403 MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
1404 MO.setReg(MoreReg);
1405}
1406
1407void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1408 MachineOperand &Op = MI.getOperand(OpIdx);
1409 Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
1410}
1411
1412void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
1413 MachineOperand &MO = MI.getOperand(OpIdx);
1414 Register CastDst = MRI.createGenericVirtualRegister(CastTy);
1415 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
1416 MIRBuilder.buildBitcast(MO, CastDst);
1417 MO.setReg(CastDst);
1418}
1419
1420LegalizerHelper::LegalizeResult
1421LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
1422 LLT WideTy) {
1423 if (TypeIdx != 1)
1424 return UnableToLegalize;
1425
1426 Register DstReg = MI.getOperand(0).getReg();
1427 LLT DstTy = MRI.getType(DstReg);
1428 if (DstTy.isVector())
1429 return UnableToLegalize;
1430
1431 Register Src1 = MI.getOperand(1).getReg();
1432 LLT SrcTy = MRI.getType(Src1);
1433 const int DstSize = DstTy.getSizeInBits();
1434 const int SrcSize = SrcTy.getSizeInBits();
1435 const int WideSize = WideTy.getSizeInBits();
1436 const int NumMerge = (DstSize + WideSize - 1) / WideSize;
1437
1438 unsigned NumOps = MI.getNumOperands();
1439 unsigned NumSrc = MI.getNumOperands() - 1;
1440 unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
1441
1442 if (WideSize >= DstSize) {
1443 // Directly pack the bits in the target type.
1444 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
1445
1446 for (unsigned I = 2; I != NumOps; ++I) {
1447 const unsigned Offset = (I - 1) * PartSize;
1448
1449 Register SrcReg = MI.getOperand(I).getReg();
1450 assert(MRI.getType(SrcReg) == LLT::scalar(PartSize))((void)0);
1451
1452 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
1453
1454 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
1455 MRI.createGenericVirtualRegister(WideTy);
1456
1457 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
1458 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
1459 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
1460 ResultReg = NextResult;
1461 }
1462
1463 if (WideSize > DstSize)
1464 MIRBuilder.buildTrunc(DstReg, ResultReg);
1465 else if (DstTy.isPointer())
1466 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
1467
1468 MI.eraseFromParent();
1469 return Legalized;
1470 }
1471
1472 // Unmerge the original values to the GCD type, and recombine to the next
1473 // multiple greater than the original type.
1474 //
1475 // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
1476 // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
1477 // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
1478 // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
1479 // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
1480 // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
1481 // %12:_(s12) = G_MERGE_VALUES %10, %11
1482 //
1483 // Padding with undef if necessary:
1484 //
1485 // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
1486 // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
1487 // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
1488 // %7:_(s2) = G_IMPLICIT_DEF
1489 // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
1490 // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
1491 // %10:_(s12) = G_MERGE_VALUES %8, %9
1492
1493 const int GCD = greatestCommonDivisor(SrcSize, WideSize);
1494 LLT GCDTy = LLT::scalar(GCD);
1495
1496 SmallVector<Register, 8> Parts;
1497 SmallVector<Register, 8> NewMergeRegs;
1498 SmallVector<Register, 8> Unmerges;
1499 LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
1500
1501 // Decompose the original operands if they don't evenly divide.
1502 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
1503 Register SrcReg = MI.getOperand(I).getReg();
1504 if (GCD == SrcSize) {
1505 Unmerges.push_back(SrcReg);
1506 } else {
1507 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
1508 for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
1509 Unmerges.push_back(Unmerge.getReg(J));
1510 }
1511 }
1512
1513 // Pad with undef to the next size that is a multiple of the requested size.
1514 if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
1515 Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
1516 for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
1517 Unmerges.push_back(UndefReg);
1518 }
1519
1520 const int PartsPerGCD = WideSize / GCD;
1521
1522 // Build merges of each piece.
1523 ArrayRef<Register> Slicer(Unmerges);
1524 for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
1525 auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
1526 NewMergeRegs.push_back(Merge.getReg(0));
1527 }
1528
1529 // A truncate may be necessary if the requested type doesn't evenly divide the
1530 // original result type.
1531 if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
1532 MIRBuilder.buildMerge(DstReg, NewMergeRegs);
1533 } else {
1534 auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
1535 MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
1536 }
1537
1538 MI.eraseFromParent();
1539 return Legalized;
1540}
1541
1542Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
1543 Register WideReg = MRI.createGenericVirtualRegister(WideTy);
1544 LLT OrigTy = MRI.getType(OrigReg);
1545 LLT LCMTy = getLCMType(WideTy, OrigTy);
1546
1547 const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
1548 const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
1549
1550 Register UnmergeSrc = WideReg;
1551
1552 // Create a merge to the LCM type, padding with undef
1553 // %0:_(<3 x s32>) = G_FOO => <4 x s32>
1554 // =>
1555 // %1:_(<4 x s32>) = G_FOO
1556 // %2:_(<4 x s32>) = G_IMPLICIT_DEF
1557 // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
1558 // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
1559 if (NumMergeParts > 1) {
1560 Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
1561 SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
1562 MergeParts[0] = WideReg;
1563 UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
1564 }
1565
1566 // Unmerge to the original register and pad with dead defs.
1567 SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
1568 UnmergeResults[0] = OrigReg;
1569 for (int I = 1; I != NumUnmergeParts; ++I)
1570 UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
1571
1572 MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
1573 return WideReg;
1574}
1575
1576LegalizerHelper::LegalizeResult
1577LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
1578 LLT WideTy) {
1579 if (TypeIdx != 0)
1580 return UnableToLegalize;
1581
1582 int NumDst = MI.getNumOperands() - 1;
1583 Register SrcReg = MI.getOperand(NumDst).getReg();
1584 LLT SrcTy = MRI.getType(SrcReg);
1585 if (SrcTy.isVector())
1586 return UnableToLegalize;
1587
1588 Register Dst0Reg = MI.getOperand(0).getReg();
1589 LLT DstTy = MRI.getType(Dst0Reg);
1590 if (!DstTy.isScalar())
1591 return UnableToLegalize;
1592
1593 if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
1594 if (SrcTy.isPointer()) {
1595 const DataLayout &DL = MIRBuilder.getDataLayout();
1596 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
1597 LLVM_DEBUG(do { } while (false)
1598 dbgs() << "Not casting non-integral address space integer\n")do { } while (false);
1599 return UnableToLegalize;
1600 }
1601
1602 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
1603 SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
1604 }
1605
1606 // Widen SrcTy to WideTy. This does not affect the result, but since the
1607 // user requested this size, it is probably better handled than SrcTy and
1608 // should reduce the total number of legalization artifacts
1609 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1610 SrcTy = WideTy;
1611 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
1612 }
1613
1614 // Theres no unmerge type to target. Directly extract the bits from the
1615 // source type
1616 unsigned DstSize = DstTy.getSizeInBits();
1617
1618 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
1619 for (int I = 1; I != NumDst; ++I) {
1620 auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
1621 auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
1622 MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
1623 }
1624
1625 MI.eraseFromParent();
1626 return Legalized;
1627 }
1628
1629 // Extend the source to a wider type.
1630 LLT LCMTy = getLCMType(SrcTy, WideTy);
1631
1632 Register WideSrc = SrcReg;
1633 if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
1634 // TODO: If this is an integral address space, cast to integer and anyext.
1635 if (SrcTy.isPointer()) {
1636 LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n")do { } while (false);
1637 return UnableToLegalize;
1638 }
1639
1640 WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
1641 }
1642
1643 auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
1644
1645 // Create a sequence of unmerges and merges to the original results. Since we
1646 // may have widened the source, we will need to pad the results with dead defs
1647 // to cover the source register.
1648 // e.g. widen s48 to s64:
1649 // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
1650 //
1651 // =>
1652 // %4:_(s192) = G_ANYEXT %0:_(s96)
1653 // %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
1654 // ; unpack to GCD type, with extra dead defs
1655 // %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
1656 // %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
1657 // dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
1658 // %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10 ; Remerge to destination
1659 // %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
1660 const LLT GCDTy = getGCDType(WideTy, DstTy);
1661 const int NumUnmerge = Unmerge->getNumOperands() - 1;
1662 const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
1663
1664 // Directly unmerge to the destination without going through a GCD type
1665 // if possible
1666 if (PartsPerRemerge == 1) {
1667 const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
1668
1669 for (int I = 0; I != NumUnmerge; ++I) {
1670 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
1671
1672 for (int J = 0; J != PartsPerUnmerge; ++J) {
1673 int Idx = I * PartsPerUnmerge + J;
1674 if (Idx < NumDst)
1675 MIB.addDef(MI.getOperand(Idx).getReg());
1676 else {
1677 // Create dead def for excess components.
1678 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
1679 }
1680 }
1681
1682 MIB.addUse(Unmerge.getReg(I));
1683 }
1684 } else {
1685 SmallVector<Register, 16> Parts;
1686 for (int J = 0; J != NumUnmerge; ++J)
1687 extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
1688
1689 SmallVector<Register, 8> RemergeParts;
1690 for (int I = 0; I != NumDst; ++I) {
1691 for (int J = 0; J < PartsPerRemerge; ++J) {
1692 const int Idx = I * PartsPerRemerge + J;
1693 RemergeParts.emplace_back(Parts[Idx]);
1694 }
1695
1696 MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
1697 RemergeParts.clear();
1698 }
1699 }
1700
1701 MI.eraseFromParent();
1702 return Legalized;
1703}
1704
1705LegalizerHelper::LegalizeResult
1706LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
1707 LLT WideTy) {
1708 Register DstReg = MI.getOperand(0).getReg();
1709 Register SrcReg = MI.getOperand(1).getReg();
1710 LLT SrcTy = MRI.getType(SrcReg);
1711
1712 LLT DstTy = MRI.getType(DstReg);
1713 unsigned Offset = MI.getOperand(2).getImm();
1714
1715 if (TypeIdx == 0) {
1716 if (SrcTy.isVector() || DstTy.isVector())
1717 return UnableToLegalize;
1718
1719 SrcOp Src(SrcReg);
1720 if (SrcTy.isPointer()) {
1721 // Extracts from pointers can be handled only if they are really just
1722 // simple integers.
1723 const DataLayout &DL = MIRBuilder.getDataLayout();
1724 if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
1725 return UnableToLegalize;
1726
1727 LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
1728 Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
1729 SrcTy = SrcAsIntTy;
1730 }
1731
1732 if (DstTy.isPointer())
1733 return UnableToLegalize;
1734
1735 if (Offset == 0) {
1736 // Avoid a shift in the degenerate case.
1737 MIRBuilder.buildTrunc(DstReg,
1738 MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
1739 MI.eraseFromParent();
1740 return Legalized;
1741 }
1742
1743 // Do a shift in the source type.
1744 LLT ShiftTy = SrcTy;
1745 if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
1746 Src = MIRBuilder.buildAnyExt(WideTy, Src);
1747 ShiftTy = WideTy;
1748 }
1749
1750 auto LShr = MIRBuilder.buildLShr(
1751 ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
1752 MIRBuilder.buildTrunc(DstReg, LShr);
1753 MI.eraseFromParent();
1754 return Legalized;
1755 }
1756
1757 if (SrcTy.isScalar()) {
1758 Observer.changingInstr(MI);
1759 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1760 Observer.changedInstr(MI);
1761 return Legalized;
1762 }
1763
1764 if (!SrcTy.isVector())
1765 return UnableToLegalize;
1766
1767 if (DstTy != SrcTy.getElementType())
1768 return UnableToLegalize;
1769
1770 if (Offset % SrcTy.getScalarSizeInBits() != 0)
1771 return UnableToLegalize;
1772
1773 Observer.changingInstr(MI);
1774 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1775
1776 MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
1777 Offset);
1778 widenScalarDst(MI, WideTy.getScalarType(), 0);
1779 Observer.changedInstr(MI);
1780 return Legalized;
1781}
1782
1783LegalizerHelper::LegalizeResult
1784LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
1785 LLT WideTy) {
1786 if (TypeIdx != 0 || WideTy.isVector())
1787 return UnableToLegalize;
1788 Observer.changingInstr(MI);
1789 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
1790 widenScalarDst(MI, WideTy);
1791 Observer.changedInstr(MI);
1792 return Legalized;
1793}
1794
1795LegalizerHelper::LegalizeResult
1796LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
1797 LLT WideTy) {
1798 if (TypeIdx == 1)
1799 return UnableToLegalize; // TODO
1800
1801 unsigned Opcode;
1802 unsigned ExtOpcode;
1803 Optional<Register> CarryIn = None;
1804 switch (MI.getOpcode()) {
1805 default:
1806 llvm_unreachable("Unexpected opcode!")__builtin_unreachable();
1807 case TargetOpcode::G_SADDO:
1808 Opcode = TargetOpcode::G_ADD;
1809 ExtOpcode = TargetOpcode::G_SEXT;
1810 break;
1811 case TargetOpcode::G_SSUBO:
1812 Opcode = TargetOpcode::G_SUB;
1813 ExtOpcode = TargetOpcode::G_SEXT;
1814 break;
1815 case TargetOpcode::G_UADDO:
1816 Opcode = TargetOpcode::G_ADD;
1817 ExtOpcode = TargetOpcode::G_ZEXT;
1818 break;
1819 case TargetOpcode::G_USUBO:
1820 Opcode = TargetOpcode::G_SUB;
1821 ExtOpcode = TargetOpcode::G_ZEXT;
1822 break;
1823 case TargetOpcode::G_SADDE:
1824 Opcode = TargetOpcode::G_UADDE;
1825 ExtOpcode = TargetOpcode::G_SEXT;
1826 CarryIn = MI.getOperand(4).getReg();
1827 break;
1828 case TargetOpcode::G_SSUBE:
1829 Opcode = TargetOpcode::G_USUBE;
1830 ExtOpcode = TargetOpcode::G_SEXT;
1831 CarryIn = MI.getOperand(4).getReg();
1832 break;
1833 case TargetOpcode::G_UADDE:
1834 Opcode = TargetOpcode::G_UADDE;
1835 ExtOpcode = TargetOpcode::G_ZEXT;
1836 CarryIn = MI.getOperand(4).getReg();
1837 break;
1838 case TargetOpcode::G_USUBE:
1839 Opcode = TargetOpcode::G_USUBE;
1840 ExtOpcode = TargetOpcode::G_ZEXT;
1841 CarryIn = MI.getOperand(4).getReg();
1842 break;
1843 }
1844
1845 auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
1846 auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
1847 // Do the arithmetic in the larger type.
1848 Register NewOp;
1849 if (CarryIn) {
1850 LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
1851 NewOp = MIRBuilder
1852 .buildInstr(Opcode, {WideTy, CarryOutTy},
1853 {LHSExt, RHSExt, *CarryIn})
1854 .getReg(0);
1855 } else {
1856 NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
1857 }
1858 LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
1859 auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
1860 auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
1861 // There is no overflow if the ExtOp is the same as NewOp.
1862 MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
1863 // Now trunc the NewOp to the original result.
1864 MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
1865 MI.eraseFromParent();
1866 return Legalized;
1867}
1868
1869LegalizerHelper::LegalizeResult
1870LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
1871 LLT WideTy) {
1872 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
1873 MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
1874 MI.getOpcode() == TargetOpcode::G_SSHLSAT;
1875 bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
1876 MI.getOpcode() == TargetOpcode::G_USHLSAT;
1877 // We can convert this to:
1878 // 1. Any extend iN to iM
1879 // 2. SHL by M-N
1880 // 3. [US][ADD|SUB|SHL]SAT
1881 // 4. L/ASHR by M-N
1882 //
1883 // It may be more efficient to lower this to a min and a max operation in
1884 // the higher precision arithmetic if the promoted operation isn't legal,
1885 // but this decision is up to the target's lowering request.
1886 Register DstReg = MI.getOperand(0).getReg();
1887
1888 unsigned NewBits = WideTy.getScalarSizeInBits();
1889 unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
1890
1891 // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
1892 // must not left shift the RHS to preserve the shift amount.
1893 auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
1894 auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
1895 : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
1896 auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
1897 auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
1898 auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
1899
1900 auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
1901 {ShiftL, ShiftR}, MI.getFlags());
1902
1903 // Use a shift that will preserve the number of sign bits when the trunc is
1904 // folded away.
1905 auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
1906 : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
1907
1908 MIRBuilder.buildTrunc(DstReg, Result);
1909 MI.eraseFromParent();
1910 return Legalized;
1911}
1912
1913LegalizerHelper::LegalizeResult
1914LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
1915 LLT WideTy) {
1916 if (TypeIdx == 1)
1917 return UnableToLegalize;
1918
1919 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
1920 Register Result = MI.getOperand(0).getReg();
1921 Register OriginalOverflow = MI.getOperand(1).getReg();
1922 Register LHS = MI.getOperand(2).getReg();
1923 Register RHS = MI.getOperand(3).getReg();
1924 LLT SrcTy = MRI.getType(LHS);
1925 LLT OverflowTy = MRI.getType(OriginalOverflow);
1926 unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
1927
1928 // To determine if the result overflowed in the larger type, we extend the
1929 // input to the larger type, do the multiply (checking if it overflows),
1930 // then also check the high bits of the result to see if overflow happened
1931 // there.
1932 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
1933 auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
1934 auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
1935
1936 auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
1937 {LeftOperand, RightOperand});
1938 auto Mul = Mulo->getOperand(0);
1939 MIRBuilder.buildTrunc(Result, Mul);
1940
1941 MachineInstrBuilder ExtResult;
1942 // Overflow occurred if it occurred in the larger type, or if the high part
1943 // of the result does not zero/sign-extend the low part. Check this second
1944 // possibility first.
1945 if (IsSigned) {
1946 // For signed, overflow occurred when the high part does not sign-extend
1947 // the low part.
1948 ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
1949 } else {
1950 // Unsigned overflow occurred when the high part does not zero-extend the
1951 // low part.
1952 ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
1953 }
1954
1955 // Multiplication cannot overflow if the WideTy is >= 2 * original width,
1956 // so we don't need to check the overflow result of larger type Mulo.
1957 if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
1958 auto Overflow =
1959 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
1960 // Finally check if the multiplication in the larger type itself overflowed.
1961 MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
1962 } else {
1963 MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
1964 }
1965 MI.eraseFromParent();
1966 return Legalized;
1967}
1968
1969LegalizerHelper::LegalizeResult
1970LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
1971 switch (MI.getOpcode()) {
1972 default:
1973 return UnableToLegalize;
1974 case TargetOpcode::G_ATOMICRMW_XCHG:
1975 case TargetOpcode::G_ATOMICRMW_ADD:
1976 case TargetOpcode::G_ATOMICRMW_SUB:
1977 case TargetOpcode::G_ATOMICRMW_AND:
1978 case TargetOpcode::G_ATOMICRMW_OR:
1979 case TargetOpcode::G_ATOMICRMW_XOR:
1980 case TargetOpcode::G_ATOMICRMW_MIN:
1981 case TargetOpcode::G_ATOMICRMW_MAX:
1982 case TargetOpcode::G_ATOMICRMW_UMIN:
1983 case TargetOpcode::G_ATOMICRMW_UMAX:
1984 assert(TypeIdx == 0 && "atomicrmw with second scalar type")((void)0);
1985 Observer.changingInstr(MI);
1986 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1987 widenScalarDst(MI, WideTy, 0);
1988 Observer.changedInstr(MI);
1989 return Legalized;
1990 case TargetOpcode::G_ATOMIC_CMPXCHG:
1991 assert(TypeIdx == 0 && "G_ATOMIC_CMPXCHG with second scalar type")((void)0);
1992 Observer.changingInstr(MI);
1993 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
1994 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
1995 widenScalarDst(MI, WideTy, 0);
1996 Observer.changedInstr(MI);
1997 return Legalized;
1998 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS:
1999 if (TypeIdx == 0) {
2000 Observer.changingInstr(MI);
2001 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2002 widenScalarSrc(MI, WideTy, 4, TargetOpcode::G_ANYEXT);
2003 widenScalarDst(MI, WideTy, 0);
2004 Observer.changedInstr(MI);
2005 return Legalized;
2006 }
2007 assert(TypeIdx == 1 &&((void)0)
2008 "G_ATOMIC_CMPXCHG_WITH_SUCCESS with third scalar type")((void)0);
2009 Observer.changingInstr(MI);
2010 widenScalarDst(MI, WideTy, 1);
2011 Observer.changedInstr(MI);
2012 return Legalized;
2013 case TargetOpcode::G_EXTRACT:
2014 return widenScalarExtract(MI, TypeIdx, WideTy);
2015 case TargetOpcode::G_INSERT:
2016 return widenScalarInsert(MI, TypeIdx, WideTy);
2017 case TargetOpcode::G_MERGE_VALUES:
2018 return widenScalarMergeValues(MI, TypeIdx, WideTy);
2019 case TargetOpcode::G_UNMERGE_VALUES:
2020 return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
2021 case TargetOpcode::G_SADDO:
2022 case TargetOpcode::G_SSUBO:
2023 case TargetOpcode::G_UADDO:
2024 case TargetOpcode::G_USUBO:
2025 case TargetOpcode::G_SADDE:
2026 case TargetOpcode::G_SSUBE:
2027 case TargetOpcode::G_UADDE:
2028 case TargetOpcode::G_USUBE:
2029 return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
2030 case TargetOpcode::G_UMULO:
2031 case TargetOpcode::G_SMULO:
2032 return widenScalarMulo(MI, TypeIdx, WideTy);
2033 case TargetOpcode::G_SADDSAT:
2034 case TargetOpcode::G_SSUBSAT:
2035 case TargetOpcode::G_SSHLSAT:
2036 case TargetOpcode::G_UADDSAT:
2037 case TargetOpcode::G_USUBSAT:
2038 case TargetOpcode::G_USHLSAT:
2039 return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
2040 case TargetOpcode::G_CTTZ:
2041 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
2042 case TargetOpcode::G_CTLZ:
2043 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
2044 case TargetOpcode::G_CTPOP: {
2045 if (TypeIdx == 0) {
2046 Observer.changingInstr(MI);
2047 widenScalarDst(MI, WideTy, 0);
2048 Observer.changedInstr(MI);
2049 return Legalized;
2050 }
2051
2052 Register SrcReg = MI.getOperand(1).getReg();
2053
2054 // First ZEXT the input.
2055 auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
2056 LLT CurTy = MRI.getType(SrcReg);
2057 if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
2058 // The count is the same in the larger type except if the original
2059 // value was zero. This can be handled by setting the bit just off
2060 // the top of the original type.
2061 auto TopBit =
2062 APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
2063 MIBSrc = MIRBuilder.buildOr(
2064 WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
2065 }
2066
2067 // Perform the operation at the larger size.
2068 auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
2069 // This is already the correct result for CTPOP and CTTZs
2070 if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
2071 MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
2072 // The correct result is NewOp - (Difference in widety and current ty).
2073 unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
2074 MIBNewOp = MIRBuilder.buildSub(
2075 WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2076 }
2077
2078 MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
2079 MI.eraseFromParent();
2080 return Legalized;
2081 }
2082 case TargetOpcode::G_BSWAP: {
2083 Observer.changingInstr(MI);
2084 Register DstReg = MI.getOperand(0).getReg();
2085
2086 Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
2087 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2088 Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
2089 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2090
2091 MI.getOperand(0).setReg(DstExt);
2092
2093 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2094
2095 LLT Ty = MRI.getType(DstReg);
2096 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2097 MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
2098 MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
2099
2100 MIRBuilder.buildTrunc(DstReg, ShrReg);
2101 Observer.changedInstr(MI);
2102 return Legalized;
2103 }
2104 case TargetOpcode::G_BITREVERSE: {
2105 Observer.changingInstr(MI);
2106
2107 Register DstReg = MI.getOperand(0).getReg();
2108 LLT Ty = MRI.getType(DstReg);
2109 unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
2110
2111 Register DstExt = MRI.createGenericVirtualRegister(WideTy);
2112 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2113 MI.getOperand(0).setReg(DstExt);
2114 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
2115
2116 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
2117 auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
2118 MIRBuilder.buildTrunc(DstReg, Shift);
2119 Observer.changedInstr(MI);
2120 return Legalized;
2121 }
2122 case TargetOpcode::G_FREEZE:
2123 Observer.changingInstr(MI);
2124 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2125 widenScalarDst(MI, WideTy);
2126 Observer.changedInstr(MI);
2127 return Legalized;
2128
2129 case TargetOpcode::G_ABS:
2130 Observer.changingInstr(MI);
2131 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2132 widenScalarDst(MI, WideTy);
2133 Observer.changedInstr(MI);
2134 return Legalized;
2135
2136 case TargetOpcode::G_ADD:
2137 case TargetOpcode::G_AND:
2138 case TargetOpcode::G_MUL:
2139 case TargetOpcode::G_OR:
2140 case TargetOpcode::G_XOR:
2141 case TargetOpcode::G_SUB:
2142 // Perform operation at larger width (any extension is fines here, high bits
2143 // don't affect the result) and then truncate the result back to the
2144 // original type.
2145 Observer.changingInstr(MI);
2146 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2147 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2148 widenScalarDst(MI, WideTy);
2149 Observer.changedInstr(MI);
2150 return Legalized;
2151
2152 case TargetOpcode::G_SBFX:
2153 case TargetOpcode::G_UBFX:
2154 Observer.changingInstr(MI);
2155
2156 if (TypeIdx == 0) {
2157 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2158 widenScalarDst(MI, WideTy);
2159 } else {
2160 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2161 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2162 }
2163
2164 Observer.changedInstr(MI);
2165 return Legalized;
2166
2167 case TargetOpcode::G_SHL:
2168 Observer.changingInstr(MI);
2169
2170 if (TypeIdx == 0) {
2171 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2172 widenScalarDst(MI, WideTy);
2173 } else {
2174 assert(TypeIdx == 1)((void)0);
2175 // The "number of bits to shift" operand must preserve its value as an
2176 // unsigned integer:
2177 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2178 }
2179
2180 Observer.changedInstr(MI);
2181 return Legalized;
2182
2183 case TargetOpcode::G_SDIV:
2184 case TargetOpcode::G_SREM:
2185 case TargetOpcode::G_SMIN:
2186 case TargetOpcode::G_SMAX:
2187 Observer.changingInstr(MI);
2188 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2189 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2190 widenScalarDst(MI, WideTy);
2191 Observer.changedInstr(MI);
2192 return Legalized;
2193
2194 case TargetOpcode::G_SDIVREM:
2195 Observer.changingInstr(MI);
2196 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2197 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2198 widenScalarDst(MI, WideTy);
2199 widenScalarDst(MI, WideTy, 1);
2200 Observer.changedInstr(MI);
2201 return Legalized;
2202
2203 case TargetOpcode::G_ASHR:
2204 case TargetOpcode::G_LSHR:
2205 Observer.changingInstr(MI);
2206
2207 if (TypeIdx == 0) {
2208 unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
2209 TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
2210
2211 widenScalarSrc(MI, WideTy, 1, CvtOp);
2212 widenScalarDst(MI, WideTy);
2213 } else {
2214 assert(TypeIdx == 1)((void)0);
2215 // The "number of bits to shift" operand must preserve its value as an
2216 // unsigned integer:
2217 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2218 }
2219
2220 Observer.changedInstr(MI);
2221 return Legalized;
2222 case TargetOpcode::G_UDIV:
2223 case TargetOpcode::G_UREM:
2224 case TargetOpcode::G_UMIN:
2225 case TargetOpcode::G_UMAX:
2226 Observer.changingInstr(MI);
2227 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2228 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2229 widenScalarDst(MI, WideTy);
2230 Observer.changedInstr(MI);
2231 return Legalized;
2232
2233 case TargetOpcode::G_UDIVREM:
2234 Observer.changingInstr(MI);
2235 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2236 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ZEXT);
2237 widenScalarDst(MI, WideTy);
2238 widenScalarDst(MI, WideTy, 1);
2239 Observer.changedInstr(MI);
2240 return Legalized;
2241
2242 case TargetOpcode::G_SELECT:
2243 Observer.changingInstr(MI);
2244 if (TypeIdx == 0) {
2245 // Perform operation at larger width (any extension is fine here, high
2246 // bits don't affect the result) and then truncate the result back to the
2247 // original type.
2248 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2249 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
2250 widenScalarDst(MI, WideTy);
2251 } else {
2252 bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
2253 // Explicit extension is required here since high bits affect the result.
2254 widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
2255 }
2256 Observer.changedInstr(MI);
2257 return Legalized;
2258
2259 case TargetOpcode::G_FPTOSI:
2260 case TargetOpcode::G_FPTOUI:
2261 Observer.changingInstr(MI);
2262
2263 if (TypeIdx == 0)
2264 widenScalarDst(MI, WideTy);
2265 else
2266 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2267
2268 Observer.changedInstr(MI);
2269 return Legalized;
2270 case TargetOpcode::G_SITOFP:
2271 Observer.changingInstr(MI);
2272
2273 if (TypeIdx == 0)
2274 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2275 else
2276 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
2277
2278 Observer.changedInstr(MI);
2279 return Legalized;
2280 case TargetOpcode::G_UITOFP:
2281 Observer.changingInstr(MI);
2282
2283 if (TypeIdx == 0)
2284 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2285 else
2286 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2287
2288 Observer.changedInstr(MI);
2289 return Legalized;
2290 case TargetOpcode::G_LOAD:
2291 case TargetOpcode::G_SEXTLOAD:
2292 case TargetOpcode::G_ZEXTLOAD:
2293 Observer.changingInstr(MI);
2294 widenScalarDst(MI, WideTy);
2295 Observer.changedInstr(MI);
2296 return Legalized;
2297
2298 case TargetOpcode::G_STORE: {
2299 if (TypeIdx != 0)
2300 return UnableToLegalize;
2301
2302 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2303 if (!Ty.isScalar())
2304 return UnableToLegalize;
2305
2306 Observer.changingInstr(MI);
2307
2308 unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
2309 TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
2310 widenScalarSrc(MI, WideTy, 0, ExtType);
2311
2312 Observer.changedInstr(MI);
2313 return Legalized;
2314 }
2315 case TargetOpcode::G_CONSTANT: {
2316 MachineOperand &SrcMO = MI.getOperand(1);
2317 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2318 unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
2319 MRI.getType(MI.getOperand(0).getReg()));
2320 assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||((void)0)
2321 ExtOpc == TargetOpcode::G_ANYEXT) &&((void)0)
2322 "Illegal Extend")((void)0);
2323 const APInt &SrcVal = SrcMO.getCImm()->getValue();
2324 const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
2325 ? SrcVal.sext(WideTy.getSizeInBits())
2326 : SrcVal.zext(WideTy.getSizeInBits());
2327 Observer.changingInstr(MI);
2328 SrcMO.setCImm(ConstantInt::get(Ctx, Val));
2329
2330 widenScalarDst(MI, WideTy);
2331 Observer.changedInstr(MI);
2332 return Legalized;
2333 }
2334 case TargetOpcode::G_FCONSTANT: {
2335 MachineOperand &SrcMO = MI.getOperand(1);
2336 LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
2337 APFloat Val = SrcMO.getFPImm()->getValueAPF();
2338 bool LosesInfo;
2339 switch (WideTy.getSizeInBits()) {
2340 case 32:
2341 Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
2342 &LosesInfo);
2343 break;
2344 case 64:
2345 Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
2346 &LosesInfo);
2347 break;
2348 default:
2349 return UnableToLegalize;
2350 }
2351
2352 assert(!LosesInfo && "extend should always be lossless")((void)0);
2353
2354 Observer.changingInstr(MI);
2355 SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
2356
2357 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2358 Observer.changedInstr(MI);
2359 return Legalized;
2360 }
2361 case TargetOpcode::G_IMPLICIT_DEF: {
2362 Observer.changingInstr(MI);
2363 widenScalarDst(MI, WideTy);
2364 Observer.changedInstr(MI);
2365 return Legalized;
2366 }
2367 case TargetOpcode::G_BRCOND:
2368 Observer.changingInstr(MI);
2369 widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
2370 Observer.changedInstr(MI);
2371 return Legalized;
2372
2373 case TargetOpcode::G_FCMP:
2374 Observer.changingInstr(MI);
2375 if (TypeIdx == 0)
2376 widenScalarDst(MI, WideTy);
2377 else {
2378 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
2379 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
2380 }
2381 Observer.changedInstr(MI);
2382 return Legalized;
2383
2384 case TargetOpcode::G_ICMP:
2385 Observer.changingInstr(MI);
2386 if (TypeIdx == 0)
2387 widenScalarDst(MI, WideTy);
2388 else {
2389 unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
2390 MI.getOperand(1).getPredicate()))
2391 ? TargetOpcode::G_SEXT
2392 : TargetOpcode::G_ZEXT;
2393 widenScalarSrc(MI, WideTy, 2, ExtOpcode);
2394 widenScalarSrc(MI, WideTy, 3, ExtOpcode);
2395 }
2396 Observer.changedInstr(MI);
2397 return Legalized;
2398
2399 case TargetOpcode::G_PTR_ADD:
2400 assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD")((void)0);
2401 Observer.changingInstr(MI);
2402 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2403 Observer.changedInstr(MI);
2404 return Legalized;
2405
2406 case TargetOpcode::G_PHI: {
2407 assert(TypeIdx == 0 && "Expecting only Idx 0")((void)0);
2408
2409 Observer.changingInstr(MI);
2410 for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
2411 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
2412 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
2413 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
2414 }
2415
2416 MachineBasicBlock &MBB = *MI.getParent();
2417 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
2418 widenScalarDst(MI, WideTy);
2419 Observer.changedInstr(MI);
2420 return Legalized;
2421 }
2422 case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
2423 if (TypeIdx == 0) {
2424 Register VecReg = MI.getOperand(1).getReg();
2425 LLT VecTy = MRI.getType(VecReg);
2426 Observer.changingInstr(MI);
2427
2428 widenScalarSrc(
2429 MI, LLT::vector(VecTy.getElementCount(), WideTy.getSizeInBits()), 1,
2430 TargetOpcode::G_SEXT);
2431
2432 widenScalarDst(MI, WideTy, 0);
2433 Observer.changedInstr(MI);
2434 return Legalized;
2435 }
2436
2437 if (TypeIdx != 2)
2438 return UnableToLegalize;
2439 Observer.changingInstr(MI);
2440 // TODO: Probably should be zext
2441 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
2442 Observer.changedInstr(MI);
2443 return Legalized;
2444 }
2445 case TargetOpcode::G_INSERT_VECTOR_ELT: {
2446 if (TypeIdx == 1) {
2447 Observer.changingInstr(MI);
2448
2449 Register VecReg = MI.getOperand(1).getReg();
2450 LLT VecTy = MRI.getType(VecReg);
2451 LLT WideVecTy = LLT::vector(VecTy.getElementCount(), WideTy);
2452
2453 widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
2454 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
2455 widenScalarDst(MI, WideVecTy, 0);
2456 Observer.changedInstr(MI);
2457 return Legalized;
2458 }
2459
2460 if (TypeIdx == 2) {
2461 Observer.changingInstr(MI);
2462 // TODO: Probably should be zext
2463 widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
2464 Observer.changedInstr(MI);
2465 return Legalized;
2466 }
2467
2468 return UnableToLegalize;
2469 }
2470 case TargetOpcode::G_FADD:
2471 case TargetOpcode::G_FMUL:
2472 case TargetOpcode::G_FSUB:
2473 case TargetOpcode::G_FMA:
2474 case TargetOpcode::G_FMAD:
2475 case TargetOpcode::G_FNEG:
2476 case TargetOpcode::G_FABS:
2477 case TargetOpcode::G_FCANONICALIZE:
2478 case TargetOpcode::G_FMINNUM:
2479 case TargetOpcode::G_FMAXNUM:
2480 case TargetOpcode::G_FMINNUM_IEEE:
2481 case TargetOpcode::G_FMAXNUM_IEEE:
2482 case TargetOpcode::G_FMINIMUM:
2483 case TargetOpcode::G_FMAXIMUM:
2484 case TargetOpcode::G_FDIV:
2485 case TargetOpcode::G_FREM:
2486 case TargetOpcode::G_FCEIL:
2487 case TargetOpcode::G_FFLOOR:
2488 case TargetOpcode::G_FCOS:
2489 case TargetOpcode::G_FSIN:
2490 case TargetOpcode::G_FLOG10:
2491 case TargetOpcode::G_FLOG:
2492 case TargetOpcode::G_FLOG2:
2493 case TargetOpcode::G_FRINT:
2494 case TargetOpcode::G_FNEARBYINT:
2495 case TargetOpcode::G_FSQRT:
2496 case TargetOpcode::G_FEXP:
2497 case TargetOpcode::G_FEXP2:
2498 case TargetOpcode::G_FPOW:
2499 case TargetOpcode::G_INTRINSIC_TRUNC:
2500 case TargetOpcode::G_INTRINSIC_ROUND:
2501 case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
2502 assert(TypeIdx == 0)((void)0);
2503 Observer.changingInstr(MI);
2504
2505 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
2506 widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
2507
2508 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2509 Observer.changedInstr(MI);
2510 return Legalized;
2511 case TargetOpcode::G_FPOWI: {
2512 if (TypeIdx != 0)
2513 return UnableToLegalize;
2514 Observer.changingInstr(MI);
2515 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
2516 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
2517 Observer.changedInstr(MI);
2518 return Legalized;
2519 }
2520 case TargetOpcode::G_INTTOPTR:
2521 if (TypeIdx != 1)
2522 return UnableToLegalize;
2523
2524 Observer.changingInstr(MI);
2525 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
2526 Observer.changedInstr(MI);
2527 return Legalized;
2528 case TargetOpcode::G_PTRTOINT:
2529 if (TypeIdx != 0)
2530 return UnableToLegalize;
2531
2532 Observer.changingInstr(MI);
2533 widenScalarDst(MI, WideTy, 0);
2534 Observer.changedInstr(MI);
2535 return Legalized;
2536 case TargetOpcode::G_BUILD_VECTOR: {
2537 Observer.changingInstr(MI);
2538
2539 const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
2540 for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2541 widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
2542
2543 // Avoid changing the result vector type if the source element type was
2544 // requested.
2545 if (TypeIdx == 1) {
2546 MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
2547 } else {
2548 widenScalarDst(MI, WideTy, 0);
2549 }
2550
2551 Observer.changedInstr(MI);
2552 return Legalized;
2553 }
2554 case TargetOpcode::G_SEXT_INREG:
2555 if (TypeIdx != 0)
2556 return UnableToLegalize;
2557
2558 Observer.changingInstr(MI);
2559 widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
2560 widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
2561 Observer.changedInstr(MI);
2562 return Legalized;
2563 case TargetOpcode::G_PTRMASK: {
2564 if (TypeIdx != 1)
2565 return UnableToLegalize;
2566 Observer.changingInstr(MI);
2567 widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
2568 Observer.changedInstr(MI);
2569 return Legalized;
2570 }
2571 }
2572}
2573
2574static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
2575 MachineIRBuilder &B, Register Src, LLT Ty) {
2576 auto Unmerge = B.buildUnmerge(Ty, Src);
2577 for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
2578 Pieces.push_back(Unmerge.getReg(I));
2579}
2580
2581LegalizerHelper::LegalizeResult
2582LegalizerHelper::lowerBitcast(MachineInstr &MI) {
2583 Register Dst = MI.getOperand(0).getReg();
2584 Register Src = MI.getOperand(1).getReg();
2585 LLT DstTy = MRI.getType(Dst);
2586 LLT SrcTy = MRI.getType(Src);
2587
2588 if (SrcTy.isVector()) {
2589 LLT SrcEltTy = SrcTy.getElementType();
2590 SmallVector<Register, 8> SrcRegs;
2591
2592 if (DstTy.isVector()) {
2593 int NumDstElt = DstTy.getNumElements();
2594 int NumSrcElt = SrcTy.getNumElements();
2595
2596 LLT DstEltTy = DstTy.getElementType();
2597 LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
2598 LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
2599
2600 // If there's an element size mismatch, insert intermediate casts to match
2601 // the result element type.
2602 if (NumSrcElt < NumDstElt) { // Source element type is larger.
2603 // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
2604 //
2605 // =>
2606 //
2607 // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
2608 // %3:_(<2 x s8>) = G_BITCAST %2
2609 // %4:_(<2 x s8>) = G_BITCAST %3
2610 // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
2611 DstCastTy = LLT::fixed_vector(NumDstElt / NumSrcElt, DstEltTy);
2612 SrcPartTy = SrcEltTy;
2613 } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
2614 //
2615 // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
2616 //
2617 // =>
2618 //
2619 // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
2620 // %3:_(s16) = G_BITCAST %2
2621 // %4:_(s16) = G_BITCAST %3
2622 // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
2623 SrcPartTy = LLT::fixed_vector(NumSrcElt / NumDstElt, SrcEltTy);
2624 DstCastTy = DstEltTy;
2625 }
2626
2627 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
2628 for (Register &SrcReg : SrcRegs)
2629 SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
2630 } else
2631 getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
2632
2633 MIRBuilder.buildMerge(Dst, SrcRegs);
2634 MI.eraseFromParent();
2635 return Legalized;
2636 }
2637
2638 if (DstTy.isVector()) {
2639 SmallVector<Register, 8> SrcRegs;
2640 getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
2641 MIRBuilder.buildMerge(Dst, SrcRegs);
2642 MI.eraseFromParent();
2643 return Legalized;
2644 }
2645
2646 return UnableToLegalize;
2647}
2648
2649/// Figure out the bit offset into a register when coercing a vector index for
2650/// the wide element type. This is only for the case when promoting vector to
2651/// one with larger elements.
2652//
2653///
2654/// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2655/// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2656static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
2657 Register Idx,
2658 unsigned NewEltSize,
2659 unsigned OldEltSize) {
2660 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2661 LLT IdxTy = B.getMRI()->getType(Idx);
2662
2663 // Now figure out the amount we need to shift to get the target bits.
2664 auto OffsetMask = B.buildConstant(
2665 IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
2666 auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
2667 return B.buildShl(IdxTy, OffsetIdx,
2668 B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
2669}
2670
2671/// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
2672/// is casting to a vector with a smaller element size, perform multiple element
2673/// extracts and merge the results. If this is coercing to a vector with larger
2674/// elements, index the bitcasted vector and extract the target element with bit
2675/// operations. This is intended to force the indexing in the native register
2676/// size for architectures that can dynamically index the register file.
2677LegalizerHelper::LegalizeResult
2678LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
2679 LLT CastTy) {
2680 if (TypeIdx != 1)
2681 return UnableToLegalize;
2682
2683 Register Dst = MI.getOperand(0).getReg();
2684 Register SrcVec = MI.getOperand(1).getReg();
2685 Register Idx = MI.getOperand(2).getReg();
2686 LLT SrcVecTy = MRI.getType(SrcVec);
2687 LLT IdxTy = MRI.getType(Idx);
2688
2689 LLT SrcEltTy = SrcVecTy.getElementType();
2690 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2691 unsigned OldNumElts = SrcVecTy.getNumElements();
2692
2693 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2694 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2695
2696 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2697 const unsigned OldEltSize = SrcEltTy.getSizeInBits();
2698 if (NewNumElts > OldNumElts) {
2699 // Decreasing the vector element size
2700 //
2701 // e.g. i64 = extract_vector_elt x:v2i64, y:i32
2702 // =>
2703 // v4i32:castx = bitcast x:v2i64
2704 //
2705 // i64 = bitcast
2706 // (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
2707 // (i32 (extract_vector_elt castx, (2 * y + 1)))
2708 //
2709 if (NewNumElts % OldNumElts != 0)
2710 return UnableToLegalize;
2711
2712 // Type of the intermediate result vector.
2713 const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
2714 LLT MidTy =
2715 LLT::scalarOrVector(ElementCount::getFixed(NewEltsPerOldElt), NewEltTy);
2716
2717 auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
2718
2719 SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
2720 auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
2721
2722 for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
2723 auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
2724 auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
2725 auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
2726 NewOps[I] = Elt.getReg(0);
2727 }
2728
2729 auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
2730 MIRBuilder.buildBitcast(Dst, NewVec);
2731 MI.eraseFromParent();
2732 return Legalized;
2733 }
2734
2735 if (NewNumElts < OldNumElts) {
2736 if (NewEltSize % OldEltSize != 0)
2737 return UnableToLegalize;
2738
2739 // This only depends on powers of 2 because we use bit tricks to figure out
2740 // the bit offset we need to shift to get the target element. A general
2741 // expansion could emit division/multiply.
2742 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2743 return UnableToLegalize;
2744
2745 // Increasing the vector element size.
2746 // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
2747 //
2748 // =>
2749 //
2750 // %cast = G_BITCAST %vec
2751 // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
2752 // %wide_elt = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
2753 // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
2754 // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
2755 // %elt_bits = G_LSHR %wide_elt, %offset_bits
2756 // %elt = G_TRUNC %elt_bits
2757
2758 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2759 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2760
2761 // Divide to get the index in the wider element type.
2762 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2763
2764 Register WideElt = CastVec;
2765 if (CastTy.isVector()) {
2766 WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2767 ScaledIdx).getReg(0);
2768 }
2769
2770 // Compute the bit offset into the register of the target element.
2771 Register OffsetBits = getBitcastWiderVectorElementOffset(
2772 MIRBuilder, Idx, NewEltSize, OldEltSize);
2773
2774 // Shift the wide element to get the target element.
2775 auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
2776 MIRBuilder.buildTrunc(Dst, ExtractedBits);
2777 MI.eraseFromParent();
2778 return Legalized;
2779 }
2780
2781 return UnableToLegalize;
2782}
2783
2784/// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
2785/// TargetReg, while preserving other bits in \p TargetReg.
2786///
2787/// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
2788static Register buildBitFieldInsert(MachineIRBuilder &B,
2789 Register TargetReg, Register InsertReg,
2790 Register OffsetBits) {
2791 LLT TargetTy = B.getMRI()->getType(TargetReg);
2792 LLT InsertTy = B.getMRI()->getType(InsertReg);
2793 auto ZextVal = B.buildZExt(TargetTy, InsertReg);
2794 auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
2795
2796 // Produce a bitmask of the value to insert
2797 auto EltMask = B.buildConstant(
2798 TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
2799 InsertTy.getSizeInBits()));
2800 // Shift it into position
2801 auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
2802 auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
2803
2804 // Clear out the bits in the wide element
2805 auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
2806
2807 // The value to insert has all zeros already, so stick it into the masked
2808 // wide element.
2809 return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
2810}
2811
2812/// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
2813/// is increasing the element size, perform the indexing in the target element
2814/// type, and use bit operations to insert at the element position. This is
2815/// intended for architectures that can dynamically index the register file and
2816/// want to force indexing in the native register size.
2817LegalizerHelper::LegalizeResult
2818LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
2819 LLT CastTy) {
2820 if (TypeIdx != 0)
2821 return UnableToLegalize;
2822
2823 Register Dst = MI.getOperand(0).getReg();
2824 Register SrcVec = MI.getOperand(1).getReg();
2825 Register Val = MI.getOperand(2).getReg();
2826 Register Idx = MI.getOperand(3).getReg();
2827
2828 LLT VecTy = MRI.getType(Dst);
2829 LLT IdxTy = MRI.getType(Idx);
2830
2831 LLT VecEltTy = VecTy.getElementType();
2832 LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
2833 const unsigned NewEltSize = NewEltTy.getSizeInBits();
2834 const unsigned OldEltSize = VecEltTy.getSizeInBits();
2835
2836 unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
2837 unsigned OldNumElts = VecTy.getNumElements();
2838
2839 Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
2840 if (NewNumElts < OldNumElts) {
2841 if (NewEltSize % OldEltSize != 0)
2842 return UnableToLegalize;
2843
2844 // This only depends on powers of 2 because we use bit tricks to figure out
2845 // the bit offset we need to shift to get the target element. A general
2846 // expansion could emit division/multiply.
2847 if (!isPowerOf2_32(NewEltSize / OldEltSize))
2848 return UnableToLegalize;
2849
2850 const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
2851 auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
2852
2853 // Divide to get the index in the wider element type.
2854 auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
2855
2856 Register ExtractedElt = CastVec;
2857 if (CastTy.isVector()) {
2858 ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
2859 ScaledIdx).getReg(0);
2860 }
2861
2862 // Compute the bit offset into the register of the target element.
2863 Register OffsetBits = getBitcastWiderVectorElementOffset(
2864 MIRBuilder, Idx, NewEltSize, OldEltSize);
2865
2866 Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
2867 Val, OffsetBits);
2868 if (CastTy.isVector()) {
2869 InsertedElt = MIRBuilder.buildInsertVectorElement(
2870 CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
2871 }
2872
2873 MIRBuilder.buildBitcast(Dst, InsertedElt);
2874 MI.eraseFromParent();
2875 return Legalized;
2876 }
2877
2878 return UnableToLegalize;
2879}
2880
2881LegalizerHelper::LegalizeResult LegalizerHelper::lowerLoad(GAnyLoad &LoadMI) {
2882 // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
2883 Register DstReg = LoadMI.getDstReg();
2884 Register PtrReg = LoadMI.getPointerReg();
2885 LLT DstTy = MRI.getType(DstReg);
2886 MachineMemOperand &MMO = LoadMI.getMMO();
2887 LLT MemTy = MMO.getMemoryType();
2888 MachineFunction &MF = MIRBuilder.getMF();
2889 if (MemTy.isVector())
2890 return UnableToLegalize;
2891
2892 unsigned MemSizeInBits = MemTy.getSizeInBits();
2893 unsigned MemStoreSizeInBits = 8 * MemTy.getSizeInBytes();
2894
2895 if (MemSizeInBits != MemStoreSizeInBits) {
2896 // Promote to a byte-sized load if not loading an integral number of
2897 // bytes. For example, promote EXTLOAD:i20 -> EXTLOAD:i24.
2898 LLT WideMemTy = LLT::scalar(MemStoreSizeInBits);
2899 MachineMemOperand *NewMMO =
2900 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideMemTy);
2901
2902 Register LoadReg = DstReg;
2903 LLT LoadTy = DstTy;
2904
2905 // If this wasn't already an extending load, we need to widen the result
2906 // register to avoid creating a load with a narrower result than the source.
2907 if (MemStoreSizeInBits > DstTy.getSizeInBits()) {
2908 LoadTy = WideMemTy;
2909 LoadReg = MRI.createGenericVirtualRegister(WideMemTy);
2910 }
2911
2912 if (isa<GSExtLoad>(LoadMI)) {
2913 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2914 MIRBuilder.buildSExtInReg(LoadReg, NewLoad, MemSizeInBits);
2915 } else if (isa<GZExtLoad>(LoadMI) || WideMemTy == DstTy) {
2916 auto NewLoad = MIRBuilder.buildLoad(LoadTy, PtrReg, *NewMMO);
2917 // The extra bits are guaranteed to be zero, since we stored them that
2918 // way. A zext load from Wide thus automatically gives zext from MemVT.
2919 MIRBuilder.buildAssertZExt(LoadReg, NewLoad, MemSizeInBits);
2920 } else {
2921 MIRBuilder.buildLoad(LoadReg, PtrReg, *NewMMO);
2922 }
2923
2924 if (DstTy != LoadTy)
2925 MIRBuilder.buildTrunc(DstReg, LoadReg);
2926
2927 LoadMI.eraseFromParent();
2928 return Legalized;
2929 }
2930
2931 // This load needs splitting into power of 2 sized loads.
2932 if (DstTy.isVector())
2933 return UnableToLegalize;
2934 if (isPowerOf2_32(MemSizeInBits))
2935 return UnableToLegalize; // Don't know what we're being asked to do.
2936
2937 // Big endian lowering not implemented.
2938 if (MIRBuilder.getDataLayout().isBigEndian())
2939 return UnableToLegalize;
2940
2941 // Our strategy here is to generate anyextending loads for the smaller
2942 // types up to next power-2 result type, and then combine the two larger
2943 // result values together, before truncating back down to the non-pow-2
2944 // type.
2945 // E.g. v1 = i24 load =>
2946 // v2 = i32 zextload (2 byte)
2947 // v3 = i32 load (1 byte)
2948 // v4 = i32 shl v3, 16
2949 // v5 = i32 or v4, v2
2950 // v1 = i24 trunc v5
2951 // By doing this we generate the correct truncate which should get
2952 // combined away as an artifact with a matching extend.
2953 uint64_t LargeSplitSize = PowerOf2Floor(MemSizeInBits);
2954 uint64_t SmallSplitSize = MemSizeInBits - LargeSplitSize;
2955
2956 MachineMemOperand *LargeMMO =
2957 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
2958 MachineMemOperand *SmallMMO =
2959 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
2960
2961 LLT PtrTy = MRI.getType(PtrReg);
2962 unsigned AnyExtSize = PowerOf2Ceil(DstTy.getSizeInBits());
2963 LLT AnyExtTy = LLT::scalar(AnyExtSize);
2964 auto LargeLoad = MIRBuilder.buildLoadInstr(TargetOpcode::G_ZEXTLOAD, AnyExtTy,
2965 PtrReg, *LargeMMO);
2966
2967 auto OffsetCst = MIRBuilder.buildConstant(LLT::scalar(PtrTy.getSizeInBits()),
2968 LargeSplitSize / 8);
2969 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
2970 auto SmallPtr = MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
2971 auto SmallLoad = MIRBuilder.buildLoadInstr(LoadMI.getOpcode(), AnyExtTy,
2972 SmallPtr, *SmallMMO);
2973
2974 auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
2975 auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
2976
2977 if (AnyExtTy == DstTy)
2978 MIRBuilder.buildOr(DstReg, Shift, LargeLoad);
2979 else {
2980 auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
2981 MIRBuilder.buildTrunc(DstReg, {Or});
2982 }
2983
2984 LoadMI.eraseFromParent();
2985 return Legalized;
2986}
2987
2988LegalizerHelper::LegalizeResult LegalizerHelper::lowerStore(GStore &StoreMI) {
2989 // Lower a non-power of 2 store into multiple pow-2 stores.
2990 // E.g. split an i24 store into an i16 store + i8 store.
2991 // We do this by first extending the stored value to the next largest power
2992 // of 2 type, and then using truncating stores to store the components.
2993 // By doing this, likewise with G_LOAD, generate an extend that can be
2994 // artifact-combined away instead of leaving behind extracts.
2995 Register SrcReg = StoreMI.getValueReg();
2996 Register PtrReg = StoreMI.getPointerReg();
2997 LLT SrcTy = MRI.getType(SrcReg);
2998 MachineFunction &MF = MIRBuilder.getMF();
2999 MachineMemOperand &MMO = **StoreMI.memoperands_begin();
3000 LLT MemTy = MMO.getMemoryType();
3001
3002 if (SrcTy.isVector())
3003 return UnableToLegalize;
3004
3005 unsigned StoreWidth = MemTy.getSizeInBits();
3006 unsigned StoreSizeInBits = 8 * MemTy.getSizeInBytes();
3007
3008 if (StoreWidth != StoreSizeInBits) {
3009 // Promote to a byte-sized store with upper bits zero if not
3010 // storing an integral number of bytes. For example, promote
3011 // TRUNCSTORE:i1 X -> TRUNCSTORE:i8 (and X, 1)
3012 LLT WideTy = LLT::scalar(StoreSizeInBits);
3013
3014 if (StoreSizeInBits > SrcTy.getSizeInBits()) {
3015 // Avoid creating a store with a narrower source than result.
3016 SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
3017 SrcTy = WideTy;
3018 }
3019
3020 auto ZextInReg = MIRBuilder.buildZExtInReg(SrcTy, SrcReg, StoreWidth);
3021
3022 MachineMemOperand *NewMMO =
3023 MF.getMachineMemOperand(&MMO, MMO.getPointerInfo(), WideTy);
3024 MIRBuilder.buildStore(ZextInReg, PtrReg, *NewMMO);
3025 StoreMI.eraseFromParent();
3026 return Legalized;
3027 }
3028
3029 if (isPowerOf2_32(MemTy.getSizeInBits()))
3030 return UnableToLegalize; // Don't know what we're being asked to do.
3031
3032 // Extend to the next pow-2. If this store was itself the result of lowering,
3033 // e.g. an s56 store being broken into s32 + s24, we might have a stored type
3034 // that's wider the stored size.
3035 const LLT NewSrcTy = LLT::scalar(NextPowerOf2(MemTy.getSizeInBits()));
3036 auto ExtVal = MIRBuilder.buildAnyExtOrTrunc(NewSrcTy, SrcReg);
3037
3038 // Obtain the smaller value by shifting away the larger value.
3039 uint64_t LargeSplitSize = PowerOf2Floor(MemTy.getSizeInBits());
3040 uint64_t SmallSplitSize = MemTy.getSizeInBits() - LargeSplitSize;
3041 auto ShiftAmt = MIRBuilder.buildConstant(NewSrcTy, LargeSplitSize);
3042 auto SmallVal = MIRBuilder.buildLShr(NewSrcTy, ExtVal, ShiftAmt);
3043
3044 // Generate the PtrAdd and truncating stores.
3045 LLT PtrTy = MRI.getType(PtrReg);
3046 auto OffsetCst = MIRBuilder.buildConstant(
3047 LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
3048 Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
3049 auto SmallPtr =
3050 MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst);
3051
3052 MachineMemOperand *LargeMMO =
3053 MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
3054 MachineMemOperand *SmallMMO =
3055 MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
3056 MIRBuilder.buildStore(ExtVal, PtrReg, *LargeMMO);
3057 MIRBuilder.buildStore(SmallVal, SmallPtr, *SmallMMO);
3058 StoreMI.eraseFromParent();
3059 return Legalized;
3060}
3061
3062LegalizerHelper::LegalizeResult
3063LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
3064 switch (MI.getOpcode()) {
3065 case TargetOpcode::G_LOAD: {
3066 if (TypeIdx != 0)
3067 return UnableToLegalize;
3068 MachineMemOperand &MMO = **MI.memoperands_begin();
3069
3070 // Not sure how to interpret a bitcast of an extending load.
3071 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3072 return UnableToLegalize;
3073
3074 Observer.changingInstr(MI);
3075 bitcastDst(MI, CastTy, 0);
3076 MMO.setType(CastTy);
3077 Observer.changedInstr(MI);
3078 return Legalized;
3079 }
3080 case TargetOpcode::G_STORE: {
3081 if (TypeIdx != 0)
3082 return UnableToLegalize;
3083
3084 MachineMemOperand &MMO = **MI.memoperands_begin();
3085
3086 // Not sure how to interpret a bitcast of a truncating store.
3087 if (MMO.getMemoryType().getSizeInBits() != CastTy.getSizeInBits())
3088 return UnableToLegalize;
3089
3090 Observer.changingInstr(MI);
3091 bitcastSrc(MI, CastTy, 0);
3092 MMO.setType(CastTy);
3093 Observer.changedInstr(MI);
3094 return Legalized;
3095 }
3096 case TargetOpcode::G_SELECT: {
3097 if (TypeIdx != 0)
3098 return UnableToLegalize;
3099
3100 if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
3101 LLVM_DEBUG(do { } while (false)
3102 dbgs() << "bitcast action not implemented for vector select\n")do { } while (false);
3103 return UnableToLegalize;
3104 }
3105
3106 Observer.changingInstr(MI);
3107 bitcastSrc(MI, CastTy, 2);
3108 bitcastSrc(MI, CastTy, 3);
3109 bitcastDst(MI, CastTy, 0);
3110 Observer.changedInstr(MI);
3111 return Legalized;
3112 }
3113 case TargetOpcode::G_AND:
3114 case TargetOpcode::G_OR:
3115 case TargetOpcode::G_XOR: {
3116 Observer.changingInstr(MI);
3117 bitcastSrc(MI, CastTy, 1);
3118 bitcastSrc(MI, CastTy, 2);
3119 bitcastDst(MI, CastTy, 0);
3120 Observer.changedInstr(MI);
3121 return Legalized;
3122 }
3123 case TargetOpcode::G_EXTRACT_VECTOR_ELT:
3124 return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
3125 case TargetOpcode::G_INSERT_VECTOR_ELT:
3126 return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
3127 default:
3128 return UnableToLegalize;
3129 }
3130}
3131
3132// Legalize an instruction by changing the opcode in place.
3133void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
3134 Observer.changingInstr(MI);
3135 MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
3136 Observer.changedInstr(MI);
3137}
3138
3139LegalizerHelper::LegalizeResult
3140LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
3141 using namespace TargetOpcode;
3142
3143 switch(MI.getOpcode()) {
3144 default:
3145 return UnableToLegalize;
3146 case TargetOpcode::G_BITCAST:
3147 return lowerBitcast(MI);
3148 case TargetOpcode::G_SREM:
3149 case TargetOpcode::G_UREM: {
3150 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3151 auto Quot =
3152 MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
3153 {MI.getOperand(1), MI.getOperand(2)});
3154
3155 auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
3156 MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
3157 MI.eraseFromParent();
3158 return Legalized;
3159 }
3160 case TargetOpcode::G_SADDO:
3161 case TargetOpcode::G_SSUBO:
3162 return lowerSADDO_SSUBO(MI);
3163 case TargetOpcode::G_UMULH:
3164 case TargetOpcode::G_SMULH:
3165 return lowerSMULH_UMULH(MI);
3166 case TargetOpcode::G_SMULO:
3167 case TargetOpcode::G_UMULO: {
3168 // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
3169 // result.
3170 Register Res = MI.getOperand(0).getReg();
3171 Register Overflow = MI.getOperand(1).getReg();
3172 Register LHS = MI.getOperand(2).getReg();
3173 Register RHS = MI.getOperand(3).getReg();
3174 LLT Ty = MRI.getType(Res);
3175
3176 unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
3177 ? TargetOpcode::G_SMULH
3178 : TargetOpcode::G_UMULH;
3179
3180 Observer.changingInstr(MI);
3181 const auto &TII = MIRBuilder.getTII();
3182 MI.setDesc(TII.get(TargetOpcode::G_MUL));
3183 MI.RemoveOperand(1);
3184 Observer.changedInstr(MI);
3185
3186 auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
3187 auto Zero = MIRBuilder.buildConstant(Ty, 0);
3188
3189 // Move insert point forward so we can use the Res register if needed.
3190 MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
3191
3192 // For *signed* multiply, overflow is detected by checking:
3193 // (hi != (lo >> bitwidth-1))
3194 if (Opcode == TargetOpcode::G_SMULH) {
3195 auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
3196 auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
3197 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
3198 } else {
3199 MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
3200 }
3201 return Legalized;
3202 }
3203 case TargetOpcode::G_FNEG: {
3204 Register Res = MI.getOperand(0).getReg();
3205 LLT Ty = MRI.getType(Res);
3206
3207 // TODO: Handle vector types once we are able to
3208 // represent them.
3209 if (Ty.isVector())
3210 return UnableToLegalize;
3211 auto SignMask =
3212 MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
3213 Register SubByReg = MI.getOperand(1).getReg();
3214 MIRBuilder.buildXor(Res, SubByReg, SignMask);
3215 MI.eraseFromParent();
3216 return Legalized;
3217 }
3218 case TargetOpcode::G_FSUB: {
3219 Register Res = MI.getOperand(0).getReg();
3220 LLT Ty = MRI.getType(Res);
3221
3222 // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
3223 // First, check if G_FNEG is marked as Lower. If so, we may
3224 // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
3225 if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
3226 return UnableToLegalize;
3227 Register LHS = MI.getOperand(1).getReg();
3228 Register RHS = MI.getOperand(2).getReg();
3229 Register Neg = MRI.createGenericVirtualRegister(Ty);
3230 MIRBuilder.buildFNeg(Neg, RHS);
3231 MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
3232 MI.eraseFromParent();
3233 return Legalized;
3234 }
3235 case TargetOpcode::G_FMAD:
3236 return lowerFMad(MI);
3237 case TargetOpcode::G_FFLOOR:
3238 return lowerFFloor(MI);
3239 case TargetOpcode::G_INTRINSIC_ROUND:
3240 return lowerIntrinsicRound(MI);
3241 case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
3242 // Since round even is the assumed rounding mode for unconstrained FP
3243 // operations, rint and roundeven are the same operation.
3244 changeOpcode(MI, TargetOpcode::G_FRINT);
3245 return Legalized;
3246 }
3247 case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
3248 Register OldValRes = MI.getOperand(0).getReg();
3249 Register SuccessRes = MI.getOperand(1).getReg();
3250 Register Addr = MI.getOperand(2).getReg();
3251 Register CmpVal = MI.getOperand(3).getReg();
3252 Register NewVal = MI.getOperand(4).getReg();
3253 MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
3254 **MI.memoperands_begin());
3255 MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
3256 MI.eraseFromParent();
3257 return Legalized;
3258 }
3259 case TargetOpcode::G_LOAD:
3260 case TargetOpcode::G_SEXTLOAD:
3261 case TargetOpcode::G_ZEXTLOAD:
3262 return lowerLoad(cast<GAnyLoad>(MI));
3263 case TargetOpcode::G_STORE:
3264 return lowerStore(cast<GStore>(MI));
3265 case TargetOpcode::G_CTLZ_ZERO_UNDEF:
3266 case TargetOpcode::G_CTTZ_ZERO_UNDEF:
3267 case TargetOpcode::G_CTLZ:
3268 case TargetOpcode::G_CTTZ:
3269 case TargetOpcode::G_CTPOP:
3270 return lowerBitCount(MI);
3271 case G_UADDO: {
3272 Register Res = MI.getOperand(0).getReg();
3273 Register CarryOut = MI.getOperand(1).getReg();
3274 Register LHS = MI.getOperand(2).getReg();
3275 Register RHS = MI.getOperand(3).getReg();
3276
3277 MIRBuilder.buildAdd(Res, LHS, RHS);
3278 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
3279
3280 MI.eraseFromParent();
3281 return Legalized;
3282 }
3283 case G_UADDE: {
3284 Register Res = MI.getOperand(0).getReg();
3285 Register CarryOut = MI.getOperand(1).getReg();
3286 Register LHS = MI.getOperand(2).getReg();
3287 Register RHS = MI.getOperand(3).getReg();
3288 Register CarryIn = MI.getOperand(4).getReg();
3289 LLT Ty = MRI.getType(Res);
3290
3291 auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
3292 auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
3293 MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
3294 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
3295
3296 MI.eraseFromParent();
3297 return Legalized;
3298 }
3299 case G_USUBO: {
3300 Register Res = MI.getOperand(0).getReg();
3301 Register BorrowOut = MI.getOperand(1).getReg();
3302 Register LHS = MI.getOperand(2).getReg();
3303 Register RHS = MI.getOperand(3).getReg();
3304
3305 MIRBuilder.buildSub(Res, LHS, RHS);
3306 MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
3307
3308 MI.eraseFromParent();
3309 return Legalized;
3310 }
3311 case G_USUBE: {
3312 Register Res = MI.getOperand(0).getReg();
3313 Register BorrowOut = MI.getOperand(1).getReg();
3314 Register LHS = MI.getOperand(2).getReg();
3315 Register RHS = MI.getOperand(3).getReg();
3316 Register BorrowIn = MI.getOperand(4).getReg();
3317 const LLT CondTy = MRI.getType(BorrowOut);
3318 const LLT Ty = MRI.getType(Res);
3319
3320 auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
3321 auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
3322 MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
3323
3324 auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
3325 auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
3326 MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
3327
3328 MI.eraseFromParent();
3329 return Legalized;
3330 }
3331 case G_UITOFP:
3332 return lowerUITOFP(MI);
3333 case G_SITOFP:
3334 return lowerSITOFP(MI);
3335 case G_FPTOUI:
3336 return lowerFPTOUI(MI);
3337 case G_FPTOSI:
3338 return lowerFPTOSI(MI);
3339 case G_FPTRUNC:
3340 return lowerFPTRUNC(MI);
3341 case G_FPOWI:
3342 return lowerFPOWI(MI);
3343 case G_SMIN:
3344 case G_SMAX:
3345 case G_UMIN:
3346 case G_UMAX:
3347 return lowerMinMax(MI);
3348 case G_FCOPYSIGN:
3349 return lowerFCopySign(MI);
3350 case G_FMINNUM:
3351 case G_FMAXNUM:
3352 return lowerFMinNumMaxNum(MI);
3353 case G_MERGE_VALUES:
3354 return lowerMergeValues(MI);
3355 case G_UNMERGE_VALUES:
3356 return lowerUnmergeValues(MI);
3357 case TargetOpcode::G_SEXT_INREG: {
3358 assert(MI.getOperand(2).isImm() && "Expected immediate")((void)0);
3359 int64_t SizeInBits = MI.getOperand(2).getImm();
3360
3361 Register DstReg = MI.getOperand(0).getReg();
3362 Register SrcReg = MI.getOperand(1).getReg();
3363 LLT DstTy = MRI.getType(DstReg);
3364 Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
3365
3366 auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
3367 MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
3368 MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
3369 MI.eraseFromParent();
3370 return Legalized;
3371 }
3372 case G_EXTRACT_VECTOR_ELT:
3373 case G_INSERT_VECTOR_ELT:
3374 return lowerExtractInsertVectorElt(MI);
3375 case G_SHUFFLE_VECTOR:
3376 return lowerShuffleVector(MI);
3377 case G_DYN_STACKALLOC:
3378 return lowerDynStackAlloc(MI);
3379 case G_EXTRACT:
3380 return lowerExtract(MI);
3381 case G_INSERT:
3382 return lowerInsert(MI);
3383 case G_BSWAP:
3384 return lowerBswap(MI);
3385 case G_BITREVERSE:
3386 return lowerBitreverse(MI);
3387 case G_READ_REGISTER:
3388 case G_WRITE_REGISTER:
3389 return lowerReadWriteRegister(MI);
3390 case G_UADDSAT:
3391 case G_USUBSAT: {
3392 // Try to make a reasonable guess about which lowering strategy to use. The
3393 // target can override this with custom lowering and calling the
3394 // implementation functions.
3395 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3396 if (LI.isLegalOrCustom({G_UMIN, Ty}))
3397 return lowerAddSubSatToMinMax(MI);
3398 return lowerAddSubSatToAddoSubo(MI);
3399 }
3400 case G_SADDSAT:
3401 case G_SSUBSAT: {
3402 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3403
3404 // FIXME: It would probably make more sense to see if G_SADDO is preferred,
3405 // since it's a shorter expansion. However, we would need to figure out the
3406 // preferred boolean type for the carry out for the query.
3407 if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
3408 return lowerAddSubSatToMinMax(MI);
3409 return lowerAddSubSatToAddoSubo(MI);
3410 }
3411 case G_SSHLSAT:
3412 case G_USHLSAT:
3413 return lowerShlSat(MI);
3414 case G_ABS:
3415 return lowerAbsToAddXor(MI);
3416 case G_SELECT:
3417 return lowerSelect(MI);
3418 case G_SDIVREM:
3419 case G_UDIVREM:
3420 return lowerDIVREM(MI);
3421 case G_FSHL:
3422 case G_FSHR:
3423 return lowerFunnelShift(MI);
3424 case G_ROTL:
3425 case G_ROTR:
3426 return lowerRotate(MI);
3427 }
3428}
3429
3430Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
3431 Align MinAlign) const {
3432 // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
3433 // datalayout for the preferred alignment. Also there should be a target hook
3434 // for this to allow targets to reduce the alignment and ignore the
3435 // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
3436 // the type.
3437 return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
3438}
3439
3440MachineInstrBuilder
3441LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
3442 MachinePointerInfo &PtrInfo) {
3443 MachineFunction &MF = MIRBuilder.getMF();
3444 const DataLayout &DL = MIRBuilder.getDataLayout();
3445 int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
3446
3447 unsigned AddrSpace = DL.getAllocaAddrSpace();
3448 LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
3449
3450 PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
3451 return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
3452}
3453
3454static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
3455 LLT VecTy) {
3456 int64_t IdxVal;
3457 if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
3458 return IdxReg;
3459
3460 LLT IdxTy = B.getMRI()->getType(IdxReg);
3461 unsigned NElts = VecTy.getNumElements();
3462 if (isPowerOf2_32(NElts)) {
3463 APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
3464 return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
3465 }
3466
3467 return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
3468 .getReg(0);
3469}
3470
3471Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
3472 Register Index) {
3473 LLT EltTy = VecTy.getElementType();
3474
3475 // Calculate the element offset and add it to the pointer.
3476 unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
3477 assert(EltSize * 8 == EltTy.getSizeInBits() &&((void)0)
3478 "Converting bits to bytes lost precision")((void)0);
3479
3480 Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
3481
3482 LLT IdxTy = MRI.getType(Index);
3483 auto Mul = MIRBuilder.buildMul(IdxTy, Index,
3484 MIRBuilder.buildConstant(IdxTy, EltSize));
3485
3486 LLT PtrTy = MRI.getType(VecPtr);
3487 return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
3488}
3489
3490LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
3491 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
3492 Register DstReg = MI.getOperand(0).getReg();
3493 LLT DstTy = MRI.getType(DstReg);
3494 LLT LCMTy = getLCMType(DstTy, NarrowTy);
3495
3496 unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
3497
3498 auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
3499 SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
3500
3501 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3502 MI.eraseFromParent();
3503 return Legalized;
3504}
3505
3506// Handle splitting vector operations which need to have the same number of
3507// elements in each type index, but each type index may have a different element
3508// type.
3509//
3510// e.g. <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
3511// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3512// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3513//
3514// Also handles some irregular breakdown cases, e.g.
3515// e.g. <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
3516// <2 x s64> = G_SHL <2 x s64>, <2 x s32>
3517// s64 = G_SHL s64, s32
3518LegalizerHelper::LegalizeResult
3519LegalizerHelper::fewerElementsVectorMultiEltType(
3520 MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
3521 if (TypeIdx != 0)
3522 return UnableToLegalize;
3523
3524 const LLT NarrowTy0 = NarrowTyArg;
3525 const Register DstReg = MI.getOperand(0).getReg();
3526 LLT DstTy = MRI.getType(DstReg);
3527 LLT LeftoverTy0;
3528
3529 // All of the operands need to have the same number of elements, so if we can
3530 // determine a type breakdown for the result type, we can for all of the
3531 // source types.
3532 int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
3533 if (NumParts < 0)
3534 return UnableToLegalize;
3535
3536 SmallVector<MachineInstrBuilder, 4> NewInsts;
3537
3538 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3539 SmallVector<Register, 4> PartRegs, LeftoverRegs;
3540
3541 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
3542 Register SrcReg = MI.getOperand(I).getReg();
3543 LLT SrcTyI = MRI.getType(SrcReg);
3544 const auto NewEC = NarrowTy0.isVector() ? NarrowTy0.getElementCount()
3545 : ElementCount::getFixed(1);
3546 LLT NarrowTyI = LLT::scalarOrVector(NewEC, SrcTyI.getScalarType());
3547 LLT LeftoverTyI;
3548
3549 // Split this operand into the requested typed registers, and any leftover
3550 // required to reproduce the original type.
3551 if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
3552 LeftoverRegs))
3553 return UnableToLegalize;
3554
3555 if (I == 1) {
3556 // For the first operand, create an instruction for each part and setup
3557 // the result.
3558 for (Register PartReg : PartRegs) {
3559 Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3560 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3561 .addDef(PartDstReg)
3562 .addUse(PartReg));
3563 DstRegs.push_back(PartDstReg);
3564 }
3565
3566 for (Register LeftoverReg : LeftoverRegs) {
3567 Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
3568 NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
3569 .addDef(PartDstReg)
3570 .addUse(LeftoverReg));
3571 LeftoverDstRegs.push_back(PartDstReg);
3572 }
3573 } else {
3574 assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size())((void)0);
3575
3576 // Add the newly created operand splits to the existing instructions. The
3577 // odd-sized pieces are ordered after the requested NarrowTyArg sized
3578 // pieces.
3579 unsigned InstCount = 0;
3580 for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
3581 NewInsts[InstCount++].addUse(PartRegs[J]);
3582 for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
3583 NewInsts[InstCount++].addUse(LeftoverRegs[J]);
3584 }
3585
3586 PartRegs.clear();
3587 LeftoverRegs.clear();
3588 }
3589
3590 // Insert the newly built operations and rebuild the result register.
3591 for (auto &MIB : NewInsts)
3592 MIRBuilder.insertInstr(MIB);
3593
3594 insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
3595
3596 MI.eraseFromParent();
3597 return Legalized;
3598}
3599
3600LegalizerHelper::LegalizeResult
3601LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
3602 LLT NarrowTy) {
3603 if (TypeIdx != 0)
3604 return UnableToLegalize;
3605
3606 Register DstReg = MI.getOperand(0).getReg();
3607 Register SrcReg = MI.getOperand(1).getReg();
3608 LLT DstTy = MRI.getType(DstReg);
3609 LLT SrcTy = MRI.getType(SrcReg);
3610
3611 LLT NarrowTy0 = NarrowTy;
3612 LLT NarrowTy1;
3613 unsigned NumParts;
3614
3615 if (NarrowTy.isVector()) {
3616 // Uneven breakdown not handled.
3617 NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
3618 if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
3619 return UnableToLegalize;
3620
3621 NarrowTy1 = LLT::vector(NarrowTy.getElementCount(), SrcTy.getElementType());
3622 } else {
3623 NumParts = DstTy.getNumElements();
3624 NarrowTy1 = SrcTy.getElementType();
3625 }
3626
3627 SmallVector<Register, 4> SrcRegs, DstRegs;
3628 extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
3629
3630 for (unsigned I = 0; I < NumParts; ++I) {
3631 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3632 MachineInstr *NewInst =
3633 MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
3634
3635 NewInst->setFlags(MI.getFlags());
3636 DstRegs.push_back(DstReg);
3637 }
3638
3639 if (NarrowTy.isVector())
3640 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3641 else
3642 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3643
3644 MI.eraseFromParent();
3645 return Legalized;
3646}
3647
3648LegalizerHelper::LegalizeResult
3649LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
3650 LLT NarrowTy) {
3651 Register DstReg = MI.getOperand(0).getReg();
3652 Register Src0Reg = MI.getOperand(2).getReg();
3653 LLT DstTy = MRI.getType(DstReg);
3654 LLT SrcTy = MRI.getType(Src0Reg);
3655
3656 unsigned NumParts;
3657 LLT NarrowTy0, NarrowTy1;
3658
3659 if (TypeIdx == 0) {
3660 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3661 unsigned OldElts = DstTy.getNumElements();
3662
3663 NarrowTy0 = NarrowTy;
3664 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
3665 NarrowTy1 = NarrowTy.isVector() ? LLT::vector(NarrowTy.getElementCount(),
3666 SrcTy.getScalarSizeInBits())
3667 : SrcTy.getElementType();
3668
3669 } else {
3670 unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
3671 unsigned OldElts = SrcTy.getNumElements();
3672
3673 NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
3674 NarrowTy.getNumElements();
3675 NarrowTy0 =
3676 LLT::vector(NarrowTy.getElementCount(), DstTy.getScalarSizeInBits());
3677 NarrowTy1 = NarrowTy;
3678 }
3679
3680 // FIXME: Don't know how to handle the situation where the small vectors
3681 // aren't all the same size yet.
3682 if (NarrowTy1.isVector() &&
3683 NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
3684 return UnableToLegalize;
3685
3686 CmpInst::Predicate Pred
3687 = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
3688
3689 SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
3690 extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
3691 extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
3692
3693 for (unsigned I = 0; I < NumParts; ++I) {
3694 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3695 DstRegs.push_back(DstReg);
3696
3697 if (MI.getOpcode() == TargetOpcode::G_ICMP)
3698 MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3699 else {
3700 MachineInstr *NewCmp
3701 = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
3702 NewCmp->setFlags(MI.getFlags());
3703 }
3704 }
3705
3706 if (NarrowTy1.isVector())
3707 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3708 else
3709 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3710
3711 MI.eraseFromParent();
3712 return Legalized;
3713}
3714
3715LegalizerHelper::LegalizeResult
3716LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
3717 LLT NarrowTy) {
3718 Register DstReg = MI.getOperand(0).getReg();
3719 Register CondReg = MI.getOperand(1).getReg();
3720
3721 unsigned NumParts = 0;
3722 LLT NarrowTy0, NarrowTy1;
3723
3724 LLT DstTy = MRI.getType(DstReg);
3725 LLT CondTy = MRI.getType(CondReg);
3726 unsigned Size = DstTy.getSizeInBits();
3727
3728 assert(TypeIdx == 0 || CondTy.isVector())((void)0);
3729
3730 if (TypeIdx == 0) {
3731 NarrowTy0 = NarrowTy;
3732 NarrowTy1 = CondTy;
3733
3734 unsigned NarrowSize = NarrowTy0.getSizeInBits();
3735 // FIXME: Don't know how to handle the situation where the small vectors
3736 // aren't all the same size yet.
3737 if (Size % NarrowSize != 0)
3738 return UnableToLegalize;
3739
3740 NumParts = Size / NarrowSize;
3741
3742 // Need to break down the condition type
3743 if (CondTy.isVector()) {
3744 if (CondTy.getNumElements() == NumParts)
3745 NarrowTy1 = CondTy.getElementType();
3746 else
3747 NarrowTy1 =
3748 LLT::vector(CondTy.getElementCount().divideCoefficientBy(NumParts),
3749 CondTy.getScalarSizeInBits());
3750 }
3751 } else {
3752 NumParts = CondTy.getNumElements();
3753 if (NarrowTy.isVector()) {
3754 // TODO: Handle uneven breakdown.
3755 if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
3756 return UnableToLegalize;
3757
3758 return UnableToLegalize;
3759 } else {
3760 NarrowTy0 = DstTy.getElementType();
3761 NarrowTy1 = NarrowTy;
3762 }
3763 }
3764
3765 SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
3766 if (CondTy.isVector())
3767 extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
3768
3769 extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
3770 extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
3771
3772 for (unsigned i = 0; i < NumParts; ++i) {
3773 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
3774 MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
3775 Src1Regs[i], Src2Regs[i]);
3776 DstRegs.push_back(DstReg);
3777 }
3778
3779 if (NarrowTy0.isVector())
3780 MIRBuilder.buildConcatVectors(DstReg, DstRegs);
3781 else
3782 MIRBuilder.buildBuildVector(DstReg, DstRegs);
3783
3784 MI.eraseFromParent();
3785 return Legalized;
3786}
3787
3788LegalizerHelper::LegalizeResult
3789LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
3790 LLT NarrowTy) {
3791 const Register DstReg = MI.getOperand(0).getReg();
3792 LLT PhiTy = MRI.getType(DstReg);
3793 LLT LeftoverTy;
3794
3795 // All of the operands need to have the same number of elements, so if we can
3796 // determine a type breakdown for the result type, we can for all of the
3797 // source types.
3798 int NumParts, NumLeftover;
3799 std::tie(NumParts, NumLeftover)
3800 = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
3801 if (NumParts < 0)
3802 return UnableToLegalize;
3803
3804 SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
3805 SmallVector<MachineInstrBuilder, 4> NewInsts;
3806
3807 const int TotalNumParts = NumParts + NumLeftover;
3808
3809 // Insert the new phis in the result block first.
3810 for (int I = 0; I != TotalNumParts; ++I) {
3811 LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
3812 Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
3813 NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
3814 .addDef(PartDstReg));
3815 if (I < NumParts)
3816 DstRegs.push_back(PartDstReg);
3817 else
3818 LeftoverDstRegs.push_back(PartDstReg);
3819 }
3820
3821 MachineBasicBlock *MBB = MI.getParent();
3822 MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
3823 insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
3824
3825 SmallVector<Register, 4> PartRegs, LeftoverRegs;
3826
3827 // Insert code to extract the incoming values in each predecessor block.
3828 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
3829 PartRegs.clear();
3830 LeftoverRegs.clear();
3831
3832 Register SrcReg = MI.getOperand(I).getReg();
3833 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
3834 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
3835
3836 LLT Unused;
3837 if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
3838 LeftoverRegs))
3839 return UnableToLegalize;
3840
3841 // Add the newly created operand splits to the existing instructions. The
3842 // odd-sized pieces are ordered after the requested NarrowTyArg sized
3843 // pieces.
3844 for (int J = 0; J != TotalNumParts; ++J) {
3845 MachineInstrBuilder MIB = NewInsts[J];
3846 MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
3847 MIB.addMBB(&OpMBB);
3848 }
3849 }
3850
3851 MI.eraseFromParent();
3852 return Legalized;
3853}
3854
3855LegalizerHelper::LegalizeResult
3856LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
3857 unsigned TypeIdx,
3858 LLT NarrowTy) {
3859 if (TypeIdx != 1)
3860 return UnableToLegalize;
3861
3862 const int NumDst = MI.getNumOperands() - 1;
3863 const Register SrcReg = MI.getOperand(NumDst).getReg();
3864 LLT SrcTy = MRI.getType(SrcReg);
3865
3866 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
3867
3868 // TODO: Create sequence of extracts.
3869 if (DstTy == NarrowTy)
3870 return UnableToLegalize;
3871
3872 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3873 if (DstTy == GCDTy) {
3874 // This would just be a copy of the same unmerge.
3875 // TODO: Create extracts, pad with undef and create intermediate merges.
3876 return UnableToLegalize;
3877 }
3878
3879 auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
3880 const int NumUnmerge = Unmerge->getNumOperands() - 1;
3881 const int PartsPerUnmerge = NumDst / NumUnmerge;
3882
3883 for (int I = 0; I != NumUnmerge; ++I) {
3884 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
3885
3886 for (int J = 0; J != PartsPerUnmerge; ++J)
3887 MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
3888 MIB.addUse(Unmerge.getReg(I));
3889 }
3890
3891 MI.eraseFromParent();
3892 return Legalized;
3893}
3894
3895LegalizerHelper::LegalizeResult
3896LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
3897 LLT NarrowTy) {
3898 Register Result = MI.getOperand(0).getReg();
3899 Register Overflow = MI.getOperand(1).getReg();
3900 Register LHS = MI.getOperand(2).getReg();
3901 Register RHS = MI.getOperand(3).getReg();
3902
3903 LLT SrcTy = MRI.getType(LHS);
3904 if (!SrcTy.isVector())
3905 return UnableToLegalize;
3906
3907 LLT ElementType = SrcTy.getElementType();
3908 LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
3909 const ElementCount NumResult = SrcTy.getElementCount();
3910 LLT GCDTy = getGCDType(SrcTy, NarrowTy);
3911
3912 // Unmerge the operands to smaller parts of GCD type.
3913 auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
3914 auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
3915
3916 const int NumOps = UnmergeLHS->getNumOperands() - 1;
3917 const ElementCount PartsPerUnmerge = NumResult.divideCoefficientBy(NumOps);
3918 LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
3919 LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
3920
3921 // Perform the operation over unmerged parts.
3922 SmallVector<Register, 8> ResultParts;
3923 SmallVector<Register, 8> OverflowParts;
3924 for (int I = 0; I != NumOps; ++I) {
3925 Register Operand1 = UnmergeLHS->getOperand(I).getReg();
3926 Register Operand2 = UnmergeRHS->getOperand(I).getReg();
3927 auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
3928 {Operand1, Operand2});
3929 ResultParts.push_back(PartMul->getOperand(0).getReg());
3930 OverflowParts.push_back(PartMul->getOperand(1).getReg());
3931 }
3932
3933 LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
3934 LLT OverflowLCMTy =
3935 LLT::scalarOrVector(ResultLCMTy.getElementCount(), OverflowElementTy);
3936
3937 // Recombine the pieces to the original result and overflow registers.
3938 buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
3939 buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
3940 MI.eraseFromParent();
3941 return Legalized;
3942}
3943
3944// Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
3945// a vector
3946//
3947// Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
3948// undef as necessary.
3949//
3950// %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
3951// -> <2 x s16>
3952//
3953// %4:_(s16) = G_IMPLICIT_DEF
3954// %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
3955// %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
3956// %7:_(<2 x s16>) = G_IMPLICIT_DEF
3957// %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
3958// %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
3959LegalizerHelper::LegalizeResult
3960LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
3961 LLT NarrowTy) {
3962 Register DstReg = MI.getOperand(0).getReg();
3963 LLT DstTy = MRI.getType(DstReg);
3964 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
3965 LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
3966
3967 // Break into a common type
3968 SmallVector<Register, 16> Parts;
3969 for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
3970 extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
3971
3972 // Build the requested new merge, padding with undef.
3973 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
3974 TargetOpcode::G_ANYEXT);
3975
3976 // Pack into the original result register.
3977 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
3978
3979 MI.eraseFromParent();
3980 return Legalized;
3981}
3982
3983LegalizerHelper::LegalizeResult
3984LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
3985 unsigned TypeIdx,
3986 LLT NarrowVecTy) {
3987 Register DstReg = MI.getOperand(0).getReg();
3988 Register SrcVec = MI.getOperand(1).getReg();
3989 Register InsertVal;
3990 bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
3991
3992 assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index")((void)0);
3993 if (IsInsert)
3994 InsertVal = MI.getOperand(2).getReg();
3995
3996 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
3997
3998 // TODO: Handle total scalarization case.
3999 if (!NarrowVecTy.isVector())
4000 return UnableToLegalize;
4001
4002 LLT VecTy = MRI.getType(SrcVec);
4003
4004 // If the index is a constant, we can really break this down as you would
4005 // expect, and index into the target size pieces.
4006 int64_t IdxVal;
4007 auto MaybeCst =
4008 getConstantVRegValWithLookThrough(Idx, MRI, /*LookThroughInstrs*/ true,
4009 /*HandleFConstants*/ false);
4010 if (MaybeCst) {
4011 IdxVal = MaybeCst->Value.getSExtValue();
4012 // Avoid out of bounds indexing the pieces.
4013 if (IdxVal >= VecTy.getNumElements()) {
4014 MIRBuilder.buildUndef(DstReg);
4015 MI.eraseFromParent();
4016 return Legalized;
4017 }
4018
4019 SmallVector<Register, 8> VecParts;
4020 LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
4021
4022 // Build a sequence of NarrowTy pieces in VecParts for this operand.
4023 LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
4024 TargetOpcode::G_ANYEXT);
4025
4026 unsigned NewNumElts = NarrowVecTy.getNumElements();
4027
4028 LLT IdxTy = MRI.getType(Idx);
4029 int64_t PartIdx = IdxVal / NewNumElts;
4030 auto NewIdx =
4031 MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
4032
4033 if (IsInsert) {
4034 LLT PartTy = MRI.getType(VecParts[PartIdx]);
4035
4036 // Use the adjusted index to insert into one of the subvectors.
4037 auto InsertPart = MIRBuilder.buildInsertVectorElement(
4038 PartTy, VecParts[PartIdx], InsertVal, NewIdx);
4039 VecParts[PartIdx] = InsertPart.getReg(0);
4040
4041 // Recombine the inserted subvector with the others to reform the result
4042 // vector.
4043 buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
4044 } else {
4045 MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
4046 }
4047
4048 MI.eraseFromParent();
4049 return Legalized;
4050 }
4051
4052 // With a variable index, we can't perform the operation in a smaller type, so
4053 // we're forced to expand this.
4054 //
4055 // TODO: We could emit a chain of compare/select to figure out which piece to
4056 // index.
4057 return lowerExtractInsertVectorElt(MI);
4058}
4059
4060LegalizerHelper::LegalizeResult
4061LegalizerHelper::reduceLoadStoreWidth(GLoadStore &LdStMI, unsigned TypeIdx,
4062 LLT NarrowTy) {
4063 // FIXME: Don't know how to handle secondary types yet.
4064 if (TypeIdx != 0)
4065 return UnableToLegalize;
4066
4067 // This implementation doesn't work for atomics. Give up instead of doing
4068 // something invalid.
4069 if (LdStMI.isAtomic())
4070 return UnableToLegalize;
4071
4072 bool IsLoad = isa<GLoad>(LdStMI);
4073 Register ValReg = LdStMI.getReg(0);
4074 Register AddrReg = LdStMI.getPointerReg();
4075 LLT ValTy = MRI.getType(ValReg);
4076
4077 // FIXME: Do we need a distinct NarrowMemory legalize action?
4078 if (ValTy.getSizeInBits() != 8 * LdStMI.getMemSize()) {
4079 LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n")do { } while (false);
4080 return UnableToLegalize;
4081 }
4082
4083 int NumParts = -1;
4084 int NumLeftover = -1;
4085 LLT LeftoverTy;
4086 SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
4087 if (IsLoad) {
4088 std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
4089 } else {
4090 if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
4091 NarrowLeftoverRegs)) {
4092 NumParts = NarrowRegs.size();
4093 NumLeftover = NarrowLeftoverRegs.size();
Value stored to 'NumLeftover' is never read
4094 }
4095 }
4096
4097 if (NumParts == -1)
4098 return UnableToLegalize;
4099
4100 LLT PtrTy = MRI.getType(AddrReg);
4101 const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
4102
4103 unsigned TotalSize = ValTy.getSizeInBits();
4104
4105 // Split the load/store into PartTy sized pieces starting at Offset. If this
4106 // is a load, return the new registers in ValRegs. For a store, each elements
4107 // of ValRegs should be PartTy. Returns the next offset that needs to be
4108 // handled.
4109 auto MMO = LdStMI.getMMO();
4110 auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
4111 unsigned Offset) -> unsigned {
4112 MachineFunction &MF = MIRBuilder.getMF();
4113 unsigned PartSize = PartTy.getSizeInBits();
4114 for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
4115 Offset += PartSize, ++Idx) {
4116 unsigned ByteOffset = Offset / 8;
4117 Register NewAddrReg;
4118
4119 MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
4120
4121 MachineMemOperand *NewMMO =
4122 MF.getMachineMemOperand(&MMO, ByteOffset, PartTy);
4123
4124 if (IsLoad) {
4125 Register Dst = MRI.createGenericVirtualRegister(PartTy);
4126 ValRegs.push_back(Dst);
4127 MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
4128 } else {
4129 MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
4130 }
4131 }
4132
4133 return Offset;
4134 };
4135
4136 unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
4137
4138 // Handle the rest of the register if this isn't an even type breakdown.
4139 if (LeftoverTy.isValid())
4140 splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
4141
4142 if (IsLoad) {
4143 insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
4144 LeftoverTy, NarrowLeftoverRegs);
4145 }
4146
4147 LdStMI.eraseFromParent();
4148 return Legalized;
4149}
4150
4151LegalizerHelper::LegalizeResult
4152LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
4153 LLT NarrowTy) {
4154 assert(TypeIdx == 0 && "only one type index expected")((void)0);
4155
4156 const unsigned Opc = MI.getOpcode();
4157 const int NumDefOps = MI.getNumExplicitDefs();
4158 const int NumSrcOps = MI.getNumOperands() - NumDefOps;
4159 const unsigned Flags = MI.getFlags();
4160 const unsigned NarrowSize = NarrowTy.getSizeInBits();
4161 const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
4162
4163 assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "((void)0)
4164 "result and 1-3 sources or 2 results and "((void)0)
4165 "1-2 sources")((void)0);
4166
4167 SmallVector<Register, 2> DstRegs;
4168 for (int I = 0; I < NumDefOps; ++I)
4169 DstRegs.push_back(MI.getOperand(I).getReg());
4170
4171 // First of all check whether we are narrowing (changing the element type)
4172 // or reducing the vector elements
4173 const LLT DstTy = MRI.getType(DstRegs[0]);
4174 const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
4175
4176 SmallVector<Register, 8> ExtractedRegs[3];
4177 SmallVector<Register, 8> Parts;
4178
4179 // Break down all the sources into NarrowTy pieces we can operate on. This may
4180 // involve creating merges to a wider type, padded with undef.
4181 for (int I = 0; I != NumSrcOps; ++I) {
4182 Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
4183 LLT SrcTy = MRI.getType(SrcReg);
4184
4185 // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
4186 // For fewerElements, this is a smaller vector with the same element type.
4187 LLT OpNarrowTy;
4188 if (IsNarrow) {
4189 OpNarrowTy = NarrowScalarTy;
4190
4191 // In case of narrowing, we need to cast vectors to scalars for this to
4192 // work properly
4193 // FIXME: Can we do without the bitcast here if we're narrowing?
4194 if (SrcTy.isVector()) {
4195 SrcTy = LLT::scalar(SrcTy.getSizeInBits());
4196 SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
4197 }
4198 } else {
4199 auto NarrowEC = NarrowTy.isVector() ? NarrowTy.getElementCount()
4200 : ElementCount::getFixed(1);
4201 OpNarrowTy = LLT::scalarOrVector(NarrowEC, SrcTy.getScalarType());
4202 }
4203
4204 LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
4205
4206 // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
4207 buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
4208 TargetOpcode::G_ANYEXT);
4209 }
4210
4211 SmallVector<Register, 8> ResultRegs[2];
4212
4213 // Input operands for each sub-instruction.
4214 SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
4215
4216 int NumParts = ExtractedRegs[0].size();
4217 const unsigned DstSize = DstTy.getSizeInBits();
4218 const LLT DstScalarTy = LLT::scalar(DstSize);
4219
4220 // Narrowing needs to use scalar types
4221 LLT DstLCMTy, NarrowDstTy;
4222 if (IsNarrow) {
4223 DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
4224 NarrowDstTy = NarrowScalarTy;
4225 } else {
4226 DstLCMTy = getLCMType(DstTy, NarrowTy);
4227 NarrowDstTy = NarrowTy;
4228 }
4229
4230 // We widened the source registers to satisfy merge/unmerge size
4231 // constraints. We'll have some extra fully undef parts.
4232 const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
4233
4234 for (int I = 0; I != NumRealParts; ++I) {
4235 // Emit this instruction on each of the split pieces.
4236 for (int J = 0; J != NumSrcOps; ++J)
4237 InputRegs[J] = ExtractedRegs[J][I];
4238
4239 MachineInstrBuilder Inst;
4240 if (NumDefOps == 1)
4241 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
4242 else
4243 Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
4244 Flags);
4245
4246 for (int J = 0; J != NumDefOps; ++J)
4247 ResultRegs[J].push_back(Inst.getReg(J));
4248 }
4249
4250 // Fill out the widened result with undef instead of creating instructions
4251 // with undef inputs.
4252 int NumUndefParts = NumParts - NumRealParts;
4253 if (NumUndefParts != 0) {
4254 Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
4255 for (int I = 0; I != NumDefOps; ++I)
4256 ResultRegs[I].append(NumUndefParts, Undef);
4257 }
4258
4259 // Extract the possibly padded result. Use a scratch register if we need to do
4260 // a final bitcast, otherwise use the original result register.
4261 Register MergeDstReg;
4262 for (int I = 0; I != NumDefOps; ++I) {
4263 if (IsNarrow && DstTy.isVector())
4264 MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
4265 else
4266 MergeDstReg = DstRegs[I];
4267
4268 buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
4269
4270 // Recast to vector if we narrowed a vector
4271 if (IsNarrow && DstTy.isVector())
4272 MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
4273 }
4274
4275 MI.eraseFromParent();
4276 return Legalized;
4277}
4278
4279LegalizerHelper::LegalizeResult
4280LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
4281 LLT NarrowTy) {
4282 Register DstReg = MI.getOperand(0).getReg();
4283 Register SrcReg = MI.getOperand(1).getReg();
4284 int64_t Imm = MI.getOperand(2).getImm();
4285
4286 LLT DstTy = MRI.getType(DstReg);
4287
4288 SmallVector<Register, 8> Parts;
4289 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
4290 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
4291
4292 for (Register &R : Parts)
4293 R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
4294
4295 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
4296
4297 MI.eraseFromParent();
4298 return Legalized;
4299}
4300
4301LegalizerHelper::LegalizeResult
4302LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
4303 LLT NarrowTy) {
4304 using namespace TargetOpcode;
4305
4306 switch (MI.getOpcode()) {
4307 case G_IMPLICIT_DEF:
4308 return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
4309 case G_TRUNC:
4310 case G_AND:
4311 case G_OR:
4312 case G_XOR:
4313 case G_ADD:
4314 case G_SUB:
4315 case G_MUL:
4316 case G_PTR_ADD:
4317 case G_SMULH:
4318 case G_UMULH:
4319 case G_FADD:
4320 case G_FMUL:
4321 case G_FSUB:
4322 case G_FNEG:
4323 case G_FABS:
4324 case G_FCANONICALIZE:
4325 case G_FDIV:
4326 case G_FREM:
4327 case G_FMA:
4328 case G_FMAD:
4329 case G_FPOW:
4330 case G_FEXP:
4331 case G_FEXP2:
4332 case G_FLOG:
4333 case G_FLOG2:
4334 case G_FLOG10:
4335 case G_FNEARBYINT:
4336 case G_FCEIL:
4337 case G_FFLOOR:
4338 case G_FRINT:
4339 case G_INTRINSIC_ROUND:
4340 case G_INTRINSIC_ROUNDEVEN:
4341 case G_INTRINSIC_TRUNC:
4342 case G_FCOS:
4343 case G_FSIN:
4344 case G_FSQRT:
4345 case G_BSWAP:
4346 case G_BITREVERSE:
4347 case G_SDIV:
4348 case G_UDIV:
4349 case G_SREM:
4350 case G_UREM:
4351 case G_SDIVREM:
4352 case G_UDIVREM:
4353 case G_SMIN:
4354 case G_SMAX:
4355 case G_UMIN:
4356 case G_UMAX:
4357 case G_ABS:
4358 case G_FMINNUM:
4359 case G_FMAXNUM:
4360 case G_FMINNUM_IEEE:
4361 case G_FMAXNUM_IEEE:
4362 case G_FMINIMUM:
4363 case G_FMAXIMUM:
4364 case G_FSHL:
4365 case G_FSHR:
4366 case G_FREEZE:
4367 case G_SADDSAT:
4368 case G_SSUBSAT:
4369 case G_UADDSAT:
4370 case G_USUBSAT:
4371 return reduceOperationWidth(MI, TypeIdx, NarrowTy);
4372 case G_UMULO:
4373 case G_SMULO:
4374 return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
4375 case G_SHL:
4376 case G_LSHR:
4377 case G_ASHR:
4378 case G_SSHLSAT:
4379 case G_USHLSAT:
4380 case G_CTLZ:
4381 case G_CTLZ_ZERO_UNDEF:
4382 case G_CTTZ:
4383 case G_CTTZ_ZERO_UNDEF:
4384 case G_CTPOP:
4385 case G_FCOPYSIGN:
4386 return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
4387 case G_ZEXT:
4388 case G_SEXT:
4389 case G_ANYEXT:
4390 case G_FPEXT:
4391 case G_FPTRUNC:
4392 case G_SITOFP:
4393 case G_UITOFP:
4394 case G_FPTOSI:
4395 case G_FPTOUI:
4396 case G_INTTOPTR:
4397 case G_PTRTOINT:
4398 case G_ADDRSPACE_CAST:
4399 return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
4400 case G_ICMP:
4401 case G_FCMP:
4402 return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
4403 case G_SELECT:
4404 return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
4405 case G_PHI:
4406 return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
4407 case G_UNMERGE_VALUES:
4408 return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
4409 case G_BUILD_VECTOR:
4410 assert(TypeIdx == 0 && "not a vector type index")((void)0);
4411 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4412 case G_CONCAT_VECTORS:
4413 if (TypeIdx != 1) // TODO: This probably does work as expected already.
4414 return UnableToLegalize;
4415 return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
4416 case G_EXTRACT_VECTOR_ELT:
4417 case G_INSERT_VECTOR_ELT:
4418 return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
4419 case G_LOAD:
4420 case G_STORE:
4421 return reduceLoadStoreWidth(cast<GLoadStore>(MI), TypeIdx, NarrowTy);
4422 case G_SEXT_INREG:
4423 return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
4424 GISEL_VECREDUCE_CASES_NONSEQcase TargetOpcode::G_VECREDUCE_FADD: case TargetOpcode::G_VECREDUCE_FMUL
: case TargetOpcode::G_VECREDUCE_FMAX: case TargetOpcode::G_VECREDUCE_FMIN
: case TargetOpcode::G_VECREDUCE_ADD: case TargetOpcode::G_VECREDUCE_MUL
: case TargetOpcode::G_VECREDUCE_AND: case TargetOpcode::G_VECREDUCE_OR
: case TargetOpcode::G_VECREDUCE_XOR: case TargetOpcode::G_VECREDUCE_SMAX
: case TargetOpcode::G_VECREDUCE_SMIN: case TargetOpcode::G_VECREDUCE_UMAX
: case TargetOpcode::G_VECREDUCE_UMIN:
4425 return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
4426 case G_SHUFFLE_VECTOR:
4427 return fewerElementsVectorShuffle(MI, TypeIdx, NarrowTy);
4428 default:
4429 return UnableToLegalize;
4430 }
4431}
4432
4433LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorShuffle(
4434 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4435 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR)((void)0);
4436 if (TypeIdx != 0)
4437 return UnableToLegalize;
4438
4439 Register DstReg = MI.getOperand(0).getReg();
4440 Register Src1Reg = MI.getOperand(1).getReg();
4441 Register Src2Reg = MI.getOperand(2).getReg();
4442 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
4443 LLT DstTy = MRI.getType(DstReg);
4444 LLT Src1Ty = MRI.getType(Src1Reg);
4445 LLT Src2Ty = MRI.getType(Src2Reg);
4446 // The shuffle should be canonicalized by now.
4447 if (DstTy != Src1Ty)
4448 return UnableToLegalize;
4449 if (DstTy != Src2Ty)
4450 return UnableToLegalize;
4451
4452 if (!isPowerOf2_32(DstTy.getNumElements()))
4453 return UnableToLegalize;
4454
4455 // We only support splitting a shuffle into 2, so adjust NarrowTy accordingly.
4456 // Further legalization attempts will be needed to do split further.
4457 NarrowTy =
4458 DstTy.changeElementCount(DstTy.getElementCount().divideCoefficientBy(2));
4459 unsigned NewElts = NarrowTy.getNumElements();
4460
4461 SmallVector<Register> SplitSrc1Regs, SplitSrc2Regs;
4462 extractParts(Src1Reg, NarrowTy, 2, SplitSrc1Regs);
4463 extractParts(Src2Reg, NarrowTy, 2, SplitSrc2Regs);
4464 Register Inputs[4] = {SplitSrc1Regs[0], SplitSrc1Regs[1], SplitSrc2Regs[0],
4465 SplitSrc2Regs[1]};
4466
4467 Register Hi, Lo;
4468
4469 // If Lo or Hi uses elements from at most two of the four input vectors, then
4470 // express it as a vector shuffle of those two inputs. Otherwise extract the
4471 // input elements by hand and construct the Lo/Hi output using a BUILD_VECTOR.
4472 SmallVector<int, 16> Ops;
4473 for (unsigned High = 0; High < 2; ++High) {
4474 Register &Output = High ? Hi : Lo;
4475
4476 // Build a shuffle mask for the output, discovering on the fly which
4477 // input vectors to use as shuffle operands (recorded in InputUsed).
4478 // If building a suitable shuffle vector proves too hard, then bail
4479 // out with useBuildVector set.
4480 unsigned InputUsed[2] = {-1U, -1U}; // Not yet discovered.
4481 unsigned FirstMaskIdx = High * NewElts;
4482 bool UseBuildVector = false;
4483 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4484 // The mask element. This indexes into the input.
4485 int Idx = Mask[FirstMaskIdx + MaskOffset];
4486
4487 // The input vector this mask element indexes into.
4488 unsigned Input = (unsigned)Idx / NewElts;
4489
4490 if (Input >= array_lengthof(Inputs)) {
4491 // The mask element does not index into any input vector.
4492 Ops.push_back(-1);
4493 continue;
4494 }
4495
4496 // Turn the index into an offset from the start of the input vector.
4497 Idx -= Input * NewElts;
4498
4499 // Find or create a shuffle vector operand to hold this input.
4500 unsigned OpNo;
4501 for (OpNo = 0; OpNo < array_lengthof(InputUsed); ++OpNo) {
4502 if (InputUsed[OpNo] == Input) {
4503 // This input vector is already an operand.
4504 break;
4505 } else if (InputUsed[OpNo] == -1U) {
4506 // Create a new operand for this input vector.
4507 InputUsed[OpNo] = Input;
4508 break;
4509 }
4510 }
4511
4512 if (OpNo >= array_lengthof(InputUsed)) {
4513 // More than two input vectors used! Give up on trying to create a
4514 // shuffle vector. Insert all elements into a BUILD_VECTOR instead.
4515 UseBuildVector = true;
4516 break;
4517 }
4518
4519 // Add the mask index for the new shuffle vector.
4520 Ops.push_back(Idx + OpNo * NewElts);
4521 }
4522
4523 if (UseBuildVector) {
4524 LLT EltTy = NarrowTy.getElementType();
4525 SmallVector<Register, 16> SVOps;
4526
4527 // Extract the input elements by hand.
4528 for (unsigned MaskOffset = 0; MaskOffset < NewElts; ++MaskOffset) {
4529 // The mask element. This indexes into the input.
4530 int Idx = Mask[FirstMaskIdx + MaskOffset];
4531
4532 // The input vector this mask element indexes into.
4533 unsigned Input = (unsigned)Idx / NewElts;
4534
4535 if (Input >= array_lengthof(Inputs)) {
4536 // The mask element is "undef" or indexes off the end of the input.
4537 SVOps.push_back(MIRBuilder.buildUndef(EltTy).getReg(0));
4538 continue;
4539 }
4540
4541 // Turn the index into an offset from the start of the input vector.
4542 Idx -= Input * NewElts;
4543
4544 // Extract the vector element by hand.
4545 SVOps.push_back(MIRBuilder
4546 .buildExtractVectorElement(
4547 EltTy, Inputs[Input],
4548 MIRBuilder.buildConstant(LLT::scalar(32), Idx))
4549 .getReg(0));
4550 }
4551
4552 // Construct the Lo/Hi output using a G_BUILD_VECTOR.
4553 Output = MIRBuilder.buildBuildVector(NarrowTy, SVOps).getReg(0);
4554 } else if (InputUsed[0] == -1U) {
4555 // No input vectors were used! The result is undefined.
4556 Output = MIRBuilder.buildUndef(NarrowTy).getReg(0);
4557 } else {
4558 Register Op0 = Inputs[InputUsed[0]];
4559 // If only one input was used, use an undefined vector for the other.
4560 Register Op1 = InputUsed[1] == -1U
4561 ? MIRBuilder.buildUndef(NarrowTy).getReg(0)
4562 : Inputs[InputUsed[1]];
4563 // At least one input vector was used. Create a new shuffle vector.
4564 Output = MIRBuilder.buildShuffleVector(NarrowTy, Op0, Op1, Ops).getReg(0);
4565 }
4566
4567 Ops.clear();
4568 }
4569
4570 MIRBuilder.buildConcatVectors(DstReg, {Lo, Hi});
4571 MI.eraseFromParent();
4572 return Legalized;
4573}
4574
4575LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
4576 MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
4577 unsigned Opc = MI.getOpcode();
4578 assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&((void)0)
4579 Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&((void)0)
4580 "Sequential reductions not expected")((void)0);
4581
4582 if (TypeIdx != 1)
4583 return UnableToLegalize;
4584
4585 // The semantics of the normal non-sequential reductions allow us to freely
4586 // re-associate the operation.
4587 Register SrcReg = MI.getOperand(1).getReg();
4588 LLT SrcTy = MRI.getType(SrcReg);
4589 Register DstReg = MI.getOperand(0).getReg();
4590 LLT DstTy = MRI.getType(DstReg);
4591
4592 if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
4593 return UnableToLegalize;
4594
4595 SmallVector<Register> SplitSrcs;
4596 const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
4597 extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
4598 SmallVector<Register> PartialReductions;
4599 for (unsigned Part = 0; Part < NumParts; ++Part) {
4600 PartialReductions.push_back(
4601 MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
4602 }
4603
4604 unsigned ScalarOpc;
4605 switch (Opc) {
4606 case TargetOpcode::G_VECREDUCE_FADD:
4607 ScalarOpc = TargetOpcode::G_FADD;
4608 break;
4609 case TargetOpcode::G_VECREDUCE_FMUL:
4610 ScalarOpc = TargetOpcode::G_FMUL;
4611 break;
4612 case TargetOpcode::G_VECREDUCE_FMAX:
4613 ScalarOpc = TargetOpcode::G_FMAXNUM;
4614 break;
4615 case TargetOpcode::G_VECREDUCE_FMIN:
4616 ScalarOpc = TargetOpcode::G_FMINNUM;
4617 break;
4618 case TargetOpcode::G_VECREDUCE_ADD:
4619 ScalarOpc = TargetOpcode::G_ADD;
4620 break;
4621 case TargetOpcode::G_VECREDUCE_MUL:
4622 ScalarOpc = TargetOpcode::G_MUL;
4623 break;
4624 case TargetOpcode::G_VECREDUCE_AND:
4625 ScalarOpc = TargetOpcode::G_AND;
4626 break;
4627 case TargetOpcode::G_VECREDUCE_OR:
4628 ScalarOpc = TargetOpcode::G_OR;
4629 break;
4630 case TargetOpcode::G_VECREDUCE_XOR:
4631 ScalarOpc = TargetOpcode::G_XOR;
4632 break;
4633 case TargetOpcode::G_VECREDUCE_SMAX:
4634 ScalarOpc = TargetOpcode::G_SMAX;
4635 break;
4636 case TargetOpcode::G_VECREDUCE_SMIN:
4637 ScalarOpc = TargetOpcode::G_SMIN;
4638 break;
4639 case TargetOpcode::G_VECREDUCE_UMAX:
4640 ScalarOpc = TargetOpcode::G_UMAX;
4641 break;
4642 case TargetOpcode::G_VECREDUCE_UMIN:
4643 ScalarOpc = TargetOpcode::G_UMIN;
4644 break;
4645 default:
4646 LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n")do { } while (false);
4647 return UnableToLegalize;
4648 }
4649
4650 // If the types involved are powers of 2, we can generate intermediate vector
4651 // ops, before generating a final reduction operation.
4652 if (isPowerOf2_32(SrcTy.getNumElements()) &&
4653 isPowerOf2_32(NarrowTy.getNumElements())) {
4654 return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
4655 }
4656
4657 Register Acc = PartialReductions[0];
4658 for (unsigned Part = 1; Part < NumParts; ++Part) {
4659 if (Part == NumParts - 1) {
4660 MIRBuilder.buildInstr(ScalarOpc, {DstReg},
4661 {Acc, PartialReductions[Part]});
4662 } else {
4663 Acc = MIRBuilder
4664 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
4665 .getReg(0);
4666 }
4667 }
4668 MI.eraseFromParent();
4669 return Legalized;
4670}
4671
4672LegalizerHelper::LegalizeResult
4673LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
4674 LLT SrcTy, LLT NarrowTy,
4675 unsigned ScalarOpc) {
4676 SmallVector<Register> SplitSrcs;
4677 // Split the sources into NarrowTy size pieces.
4678 extractParts(SrcReg, NarrowTy,
4679 SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
4680 // We're going to do a tree reduction using vector operations until we have
4681 // one NarrowTy size value left.
4682 while (SplitSrcs.size() > 1) {
4683 SmallVector<Register> PartialRdxs;
4684 for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
4685 Register LHS = SplitSrcs[Idx];
4686 Register RHS = SplitSrcs[Idx + 1];
4687 // Create the intermediate vector op.
4688 Register Res =
4689 MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
4690 PartialRdxs.push_back(Res);
4691 }
4692 SplitSrcs = std::move(PartialRdxs);
4693 }
4694 // Finally generate the requested NarrowTy based reduction.
4695 Observer.changingInstr(MI);
4696 MI.getOperand(1).setReg(SplitSrcs[0]);
4697 Observer.changedInstr(MI);
4698 return Legalized;
4699}
4700
4701LegalizerHelper::LegalizeResult
4702LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
4703 const LLT HalfTy, const LLT AmtTy) {
4704
4705 Register InL = MRI.createGenericVirtualRegister(HalfTy);
4706 Register InH = MRI.createGenericVirtualRegister(HalfTy);
4707 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4708
4709 if (Amt.isNullValue()) {
4710 MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
4711 MI.eraseFromParent();
4712 return Legalized;
4713 }
4714
4715 LLT NVT = HalfTy;
4716 unsigned NVTBits = HalfTy.getSizeInBits();
4717 unsigned VTBits = 2 * NVTBits;
4718
4719 SrcOp Lo(Register(0)), Hi(Register(0));
4720 if (MI.getOpcode() == TargetOpcode::G_SHL) {
4721 if (Amt.ugt(VTBits)) {
4722 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4723 } else if (Amt.ugt(NVTBits)) {
4724 Lo = MIRBuilder.buildConstant(NVT, 0);
4725 Hi = MIRBuilder.buildShl(NVT, InL,
4726 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4727 } else if (Amt == NVTBits) {
4728 Lo = MIRBuilder.buildConstant(NVT, 0);
4729 Hi = InL;
4730 } else {
4731 Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
4732 auto OrLHS =
4733 MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
4734 auto OrRHS = MIRBuilder.buildLShr(
4735 NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4736 Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4737 }
4738 } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4739 if (Amt.ugt(VTBits)) {
4740 Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
4741 } else if (Amt.ugt(NVTBits)) {
4742 Lo = MIRBuilder.buildLShr(NVT, InH,
4743 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4744 Hi = MIRBuilder.buildConstant(NVT, 0);
4745 } else if (Amt == NVTBits) {
4746 Lo = InH;
4747 Hi = MIRBuilder.buildConstant(NVT, 0);
4748 } else {
4749 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4750
4751 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4752 auto OrRHS = MIRBuilder.buildShl(
4753 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4754
4755 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4756 Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
4757 }
4758 } else {
4759 if (Amt.ugt(VTBits)) {
4760 Hi = Lo = MIRBuilder.buildAShr(
4761 NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4762 } else if (Amt.ugt(NVTBits)) {
4763 Lo = MIRBuilder.buildAShr(NVT, InH,
4764 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
4765 Hi = MIRBuilder.buildAShr(NVT, InH,
4766 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4767 } else if (Amt == NVTBits) {
4768 Lo = InH;
4769 Hi = MIRBuilder.buildAShr(NVT, InH,
4770 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
4771 } else {
4772 auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
4773
4774 auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
4775 auto OrRHS = MIRBuilder.buildShl(
4776 NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
4777
4778 Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
4779 Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
4780 }
4781 }
4782
4783 MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
4784 MI.eraseFromParent();
4785
4786 return Legalized;
4787}
4788
4789// TODO: Optimize if constant shift amount.
4790LegalizerHelper::LegalizeResult
4791LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
4792 LLT RequestedTy) {
4793 if (TypeIdx == 1) {
4794 Observer.changingInstr(MI);
4795 narrowScalarSrc(MI, RequestedTy, 2);
4796 Observer.changedInstr(MI);
4797 return Legalized;
4798 }
4799
4800 Register DstReg = MI.getOperand(0).getReg();
4801 LLT DstTy = MRI.getType(DstReg);
4802 if (DstTy.isVector())
4803 return UnableToLegalize;
4804
4805 Register Amt = MI.getOperand(2).getReg();
4806 LLT ShiftAmtTy = MRI.getType(Amt);
4807 const unsigned DstEltSize = DstTy.getScalarSizeInBits();
4808 if (DstEltSize % 2 != 0)
4809 return UnableToLegalize;
4810
4811 // Ignore the input type. We can only go to exactly half the size of the
4812 // input. If that isn't small enough, the resulting pieces will be further
4813 // legalized.
4814 const unsigned NewBitSize = DstEltSize / 2;
4815 const LLT HalfTy = LLT::scalar(NewBitSize);
4816 const LLT CondTy = LLT::scalar(1);
4817
4818 if (const MachineInstr *KShiftAmt =
4819 getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
4820 return narrowScalarShiftByConstant(
4821 MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
4822 }
4823
4824 // TODO: Expand with known bits.
4825
4826 // Handle the fully general expansion by an unknown amount.
4827 auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
4828
4829 Register InL = MRI.createGenericVirtualRegister(HalfTy);
4830 Register InH = MRI.createGenericVirtualRegister(HalfTy);
4831 MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
4832
4833 auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
4834 auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
4835
4836 auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
4837 auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
4838 auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
4839
4840 Register ResultRegs[2];
4841 switch (MI.getOpcode()) {
4842 case TargetOpcode::G_SHL: {
4843 // Short: ShAmt < NewBitSize
4844 auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
4845
4846 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
4847 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
4848 auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4849
4850 // Long: ShAmt >= NewBitSize
4851 auto LoL = MIRBuilder.buildConstant(HalfTy, 0); // Lo part is zero.
4852 auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
4853
4854 auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
4855 auto Hi = MIRBuilder.buildSelect(
4856 HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
4857
4858 ResultRegs[0] = Lo.getReg(0);
4859 ResultRegs[1] = Hi.getReg(0);
4860 break;
4861 }
4862 case TargetOpcode::G_LSHR:
4863 case TargetOpcode::G_ASHR: {
4864 // Short: ShAmt < NewBitSize
4865 auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
4866
4867 auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
4868 auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
4869 auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
4870
4871 // Long: ShAmt >= NewBitSize
4872 MachineInstrBuilder HiL;
4873 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
4874 HiL = MIRBuilder.buildConstant(HalfTy, 0); // Hi part is zero.
4875 } else {
4876 auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
4877 HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt); // Sign of Hi part.
4878 }
4879 auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
4880 {InH, AmtExcess}); // Lo from Hi part.
4881
4882 auto Lo = MIRBuilder.buildSelect(
4883 HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
4884
4885 auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
4886
4887 ResultRegs[0] = Lo.getReg(0);
4888 ResultRegs[1] = Hi.getReg(0);
4889 break;
4890 }
4891 default:
4892 llvm_unreachable("not a shift")__builtin_unreachable();
4893 }
4894
4895 MIRBuilder.buildMerge(DstReg, ResultRegs);
4896 MI.eraseFromParent();
4897 return Legalized;
4898}
4899
4900LegalizerHelper::LegalizeResult
4901LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
4902 LLT MoreTy) {
4903 assert(TypeIdx == 0 && "Expecting only Idx 0")((void)0);
4904
4905 Observer.changingInstr(MI);
4906 for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
4907 MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
4908 MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
4909 moreElementsVectorSrc(MI, MoreTy, I);
4910 }
4911
4912 MachineBasicBlock &MBB = *MI.getParent();
4913 MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
4914 moreElementsVectorDst(MI, MoreTy, 0);
4915 Observer.changedInstr(MI);
4916 return Legalized;
4917}
4918
4919LegalizerHelper::LegalizeResult
4920LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
4921 LLT MoreTy) {
4922 unsigned Opc = MI.getOpcode();
4923 switch (Opc) {
4924 case TargetOpcode::G_IMPLICIT_DEF:
4925 case TargetOpcode::G_LOAD: {
4926 if (TypeIdx != 0)
4927 return UnableToLegalize;
4928 Observer.changingInstr(MI);
4929 moreElementsVectorDst(MI, MoreTy, 0);
4930 Observer.changedInstr(MI);
4931 return Legalized;
4932 }
4933 case TargetOpcode::G_STORE:
4934 if (TypeIdx != 0)
4935 return UnableToLegalize;
4936 Observer.changingInstr(MI);
4937 moreElementsVectorSrc(MI, MoreTy, 0);
4938 Observer.changedInstr(MI);
4939 return Legalized;
4940 case TargetOpcode::G_AND:
4941 case TargetOpcode::G_OR:
4942 case TargetOpcode::G_XOR:
4943 case TargetOpcode::G_SMIN:
4944 case TargetOpcode::G_SMAX:
4945 case TargetOpcode::G_UMIN:
4946 case TargetOpcode::G_UMAX:
4947 case TargetOpcode::G_FMINNUM:
4948 case TargetOpcode::G_FMAXNUM:
4949 case TargetOpcode::G_FMINNUM_IEEE:
4950 case TargetOpcode::G_FMAXNUM_IEEE:
4951 case TargetOpcode::G_FMINIMUM:
4952 case TargetOpcode::G_FMAXIMUM: {
4953 Observer.changingInstr(MI);
4954 moreElementsVectorSrc(MI, MoreTy, 1);
4955 moreElementsVectorSrc(MI, MoreTy, 2);
4956 moreElementsVectorDst(MI, MoreTy, 0);
4957 Observer.changedInstr(MI);
4958 return Legalized;
4959 }
4960 case TargetOpcode::G_EXTRACT:
4961 if (TypeIdx != 1)
4962 return UnableToLegalize;
4963 Observer.changingInstr(MI);
4964 moreElementsVectorSrc(MI, MoreTy, 1);
4965 Observer.changedInstr(MI);
4966 return Legalized;
4967 case TargetOpcode::G_INSERT:
4968 case TargetOpcode::G_FREEZE:
4969 if (TypeIdx != 0)
4970 return UnableToLegalize;
4971 Observer.changingInstr(MI);
4972 moreElementsVectorSrc(MI, MoreTy, 1);
4973 moreElementsVectorDst(MI, MoreTy, 0);
4974 Observer.changedInstr(MI);
4975 return Legalized;
4976 case TargetOpcode::G_SELECT:
4977 if (TypeIdx != 0)
4978 return UnableToLegalize;
4979 if (MRI.getType(MI.getOperand(1).getReg()).isVector())
4980 return UnableToLegalize;
4981
4982 Observer.changingInstr(MI);
4983 moreElementsVectorSrc(MI, MoreTy, 2);
4984 moreElementsVectorSrc(MI, MoreTy, 3);
4985 moreElementsVectorDst(MI, MoreTy, 0);
4986 Observer.changedInstr(MI);
4987 return Legalized;
4988 case TargetOpcode::G_UNMERGE_VALUES: {
4989 if (TypeIdx != 1)
4990 return UnableToLegalize;
4991
4992 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
4993 int NumDst = MI.getNumOperands() - 1;
4994 moreElementsVectorSrc(MI, MoreTy, NumDst);
4995
4996 auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
4997 for (int I = 0; I != NumDst; ++I)
4998 MIB.addDef(MI.getOperand(I).getReg());
4999
5000 int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
5001 for (int I = NumDst; I != NewNumDst; ++I)
5002 MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
5003
5004 MIB.addUse(MI.getOperand(NumDst).getReg());
5005 MI.eraseFromParent();
5006 return Legalized;
5007 }
5008 case TargetOpcode::G_PHI:
5009 return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
5010 case TargetOpcode::G_SHUFFLE_VECTOR:
5011 return moreElementsVectorShuffle(MI, TypeIdx, MoreTy);
5012 default:
5013 return UnableToLegalize;
5014 }
5015}
5016
5017LegalizerHelper::LegalizeResult
5018LegalizerHelper::moreElementsVectorShuffle(MachineInstr &MI,
5019 unsigned int TypeIdx, LLT MoreTy) {
5020 if (TypeIdx != 0)
5021 return UnableToLegalize;
5022
5023 Register DstReg = MI.getOperand(0).getReg();
5024 Register Src1Reg = MI.getOperand(1).getReg();
5025 Register Src2Reg = MI.getOperand(2).getReg();
5026 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
5027 LLT DstTy = MRI.getType(DstReg);
5028 LLT Src1Ty = MRI.getType(Src1Reg);
5029 LLT Src2Ty = MRI.getType(Src2Reg);
5030 unsigned NumElts = DstTy.getNumElements();
5031 unsigned WidenNumElts = MoreTy.getNumElements();
5032
5033 // Expect a canonicalized shuffle.
5034 if (DstTy != Src1Ty || DstTy != Src2Ty)
5035 return UnableToLegalize;
5036
5037 moreElementsVectorSrc(MI, MoreTy, 1);
5038 moreElementsVectorSrc(MI, MoreTy, 2);
5039
5040 // Adjust mask based on new input vector length.
5041 SmallVector<int, 16> NewMask;
5042 for (unsigned I = 0; I != NumElts; ++I) {
5043 int Idx = Mask[I];
5044 if (Idx < static_cast<int>(NumElts))
5045 NewMask.push_back(Idx);
5046 else
5047 NewMask.push_back(Idx - NumElts + WidenNumElts);
5048 }
5049 for (unsigned I = NumElts; I != WidenNumElts; ++I)
5050 NewMask.push_back(-1);
5051 moreElementsVectorDst(MI, MoreTy, 0);
5052 MIRBuilder.setInstrAndDebugLoc(MI);
5053 MIRBuilder.buildShuffleVector(MI.getOperand(0).getReg(),
5054 MI.getOperand(1).getReg(),
5055 MI.getOperand(2).getReg(), NewMask);
5056 MI.eraseFromParent();
5057 return Legalized;
5058}
5059
5060void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
5061 ArrayRef<Register> Src1Regs,
5062 ArrayRef<Register> Src2Regs,
5063 LLT NarrowTy) {
5064 MachineIRBuilder &B = MIRBuilder;
5065 unsigned SrcParts = Src1Regs.size();
5066 unsigned DstParts = DstRegs.size();
5067
5068 unsigned DstIdx = 0; // Low bits of the result.
5069 Register FactorSum =
5070 B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
5071 DstRegs[DstIdx] = FactorSum;
5072
5073 unsigned CarrySumPrevDstIdx;
5074 SmallVector<Register, 4> Factors;
5075
5076 for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
5077 // Collect low parts of muls for DstIdx.
5078 for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
5079 i <= std::min(DstIdx, SrcParts - 1); ++i) {
5080 MachineInstrBuilder Mul =
5081 B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
5082 Factors.push_back(Mul.getReg(0));
5083 }
5084 // Collect high parts of muls from previous DstIdx.
5085 for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
5086 i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
5087 MachineInstrBuilder Umulh =
5088 B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
5089 Factors.push_back(Umulh.getReg(0));
5090 }
5091 // Add CarrySum from additions calculated for previous DstIdx.
5092 if (DstIdx != 1) {
5093 Factors.push_back(CarrySumPrevDstIdx);
5094 }
5095
5096 Register CarrySum;
5097 // Add all factors and accumulate all carries into CarrySum.
5098 if (DstIdx != DstParts - 1) {
5099 MachineInstrBuilder Uaddo =
5100 B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
5101 FactorSum = Uaddo.getReg(0);
5102 CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
5103 for (unsigned i = 2; i < Factors.size(); ++i) {
5104 MachineInstrBuilder Uaddo =
5105 B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
5106 FactorSum = Uaddo.getReg(0);
5107 MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
5108 CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
5109 }
5110 } else {
5111 // Since value for the next index is not calculated, neither is CarrySum.
5112 FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
5113 for (unsigned i = 2; i < Factors.size(); ++i)
5114 FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
5115 }
5116
5117 CarrySumPrevDstIdx = CarrySum;
5118 DstRegs[DstIdx] = FactorSum;
5119 Factors.clear();
5120 }
5121}
5122
5123LegalizerHelper::LegalizeResult
5124LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
5125 LLT NarrowTy) {
5126 if (TypeIdx != 0)
5127 return UnableToLegalize;
5128
5129 Register DstReg = MI.getOperand(0).getReg();
5130 LLT DstType = MRI.getType(DstReg);
5131 // FIXME: add support for vector types
5132 if (DstType.isVector())
5133 return UnableToLegalize;
5134
5135 unsigned Opcode = MI.getOpcode();
5136 unsigned OpO, OpE, OpF;
5137 switch (Opcode) {
5138 case TargetOpcode::G_SADDO:
5139 case TargetOpcode::G_SADDE:
5140 case TargetOpcode::G_UADDO:
5141 case TargetOpcode::G_UADDE:
5142 case TargetOpcode::G_ADD:
5143 OpO = TargetOpcode::G_UADDO;
5144 OpE = TargetOpcode::G_UADDE;
5145 OpF = TargetOpcode::G_UADDE;
5146 if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
5147 OpF = TargetOpcode::G_SADDE;
5148 break;
5149 case TargetOpcode::G_SSUBO:
5150 case TargetOpcode::G_SSUBE:
5151 case TargetOpcode::G_USUBO:
5152 case TargetOpcode::G_USUBE:
5153 case TargetOpcode::G_SUB:
5154 OpO = TargetOpcode::G_USUBO;
5155 OpE = TargetOpcode::G_USUBE;
5156 OpF = TargetOpcode::G_USUBE;
5157 if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
5158 OpF = TargetOpcode::G_SSUBE;
5159 break;
5160 default:
5161 llvm_unreachable("Unexpected add/sub opcode!")__builtin_unreachable();
5162 }
5163
5164 // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
5165 unsigned NumDefs = MI.getNumExplicitDefs();
5166 Register Src1 = MI.getOperand(NumDefs).getReg();
5167 Register Src2 = MI.getOperand(NumDefs + 1).getReg();
5168 Register CarryDst, CarryIn;
5169 if (NumDefs == 2)
5170 CarryDst = MI.getOperand(1).getReg();
5171 if (MI.getNumOperands() == NumDefs + 3)
5172 CarryIn = MI.getOperand(NumDefs + 2).getReg();
5173
5174 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5175 LLT LeftoverTy, DummyTy;
5176 SmallVector<Register, 2> Src1Regs, Src2Regs, Src1Left, Src2Left, DstRegs;
5177 extractParts(Src1, RegTy, NarrowTy, LeftoverTy, Src1Regs, Src1Left);
5178 extractParts(Src2, RegTy, NarrowTy, DummyTy, Src2Regs, Src2Left);
5179
5180 int NarrowParts = Src1Regs.size();
5181 for (int I = 0, E = Src1Left.size(); I != E; ++I) {
5182 Src1Regs.push_back(Src1Left[I]);
5183 Src2Regs.push_back(Src2Left[I]);
5184 }
5185 DstRegs.reserve(Src1Regs.size());
5186
5187 for (int i = 0, e = Src1Regs.size(); i != e; ++i) {
5188 Register DstReg =
5189 MRI.createGenericVirtualRegister(MRI.getType(Src1Regs[i]));
5190 Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
5191 // Forward the final carry-out to the destination register
5192 if (i == e - 1 && CarryDst)
5193 CarryOut = CarryDst;
5194
5195 if (!CarryIn) {
5196 MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
5197 {Src1Regs[i], Src2Regs[i]});
5198 } else if (i == e - 1) {
5199 MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
5200 {Src1Regs[i], Src2Regs[i], CarryIn});
5201 } else {
5202 MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
5203 {Src1Regs[i], Src2Regs[i], CarryIn});
5204 }
5205
5206 DstRegs.push_back(DstReg);
5207 CarryIn = CarryOut;
5208 }
5209 insertParts(MI.getOperand(0).getReg(), RegTy, NarrowTy,
5210 makeArrayRef(DstRegs).take_front(NarrowParts), LeftoverTy,
5211 makeArrayRef(DstRegs).drop_front(NarrowParts));
5212
5213 MI.eraseFromParent();
5214 return Legalized;
5215}
5216
5217LegalizerHelper::LegalizeResult
5218LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
5219 Register DstReg = MI.getOperand(0).getReg();
5220 Register Src1 = MI.getOperand(1).getReg();
5221 Register Src2 = MI.getOperand(2).getReg();
5222
5223 LLT Ty = MRI.getType(DstReg);
5224 if (Ty.isVector())
5225 return UnableToLegalize;
5226
5227 unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
5228 unsigned DstSize = Ty.getSizeInBits();
5229 unsigned NarrowSize = NarrowTy.getSizeInBits();
5230 if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
5231 return UnableToLegalize;
5232
5233 unsigned NumDstParts = DstSize / NarrowSize;
5234 unsigned NumSrcParts = SrcSize / NarrowSize;
5235 bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
5236 unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
5237
5238 SmallVector<Register, 2> Src1Parts, Src2Parts;
5239 SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
5240 extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
5241 extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
5242 multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
5243
5244 // Take only high half of registers if this is high mul.
5245 ArrayRef<Register> DstRegs(
5246 IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
5247 MIRBuilder.buildMerge(DstReg, DstRegs);
5248 MI.eraseFromParent();
5249 return Legalized;
5250}
5251
5252LegalizerHelper::LegalizeResult
5253LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
5254 LLT NarrowTy) {
5255 if (TypeIdx != 0)
5256 return UnableToLegalize;
5257
5258 bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
5259
5260 Register Src = MI.getOperand(1).getReg();
5261 LLT SrcTy = MRI.getType(Src);
5262
5263 // If all finite floats fit into the narrowed integer type, we can just swap
5264 // out the result type. This is practically only useful for conversions from
5265 // half to at least 16-bits, so just handle the one case.
5266 if (SrcTy.getScalarType() != LLT::scalar(16) ||
5267 NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
5268 return UnableToLegalize;
5269
5270 Observer.changingInstr(MI);
5271 narrowScalarDst(MI, NarrowTy, 0,
5272 IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
5273 Observer.changedInstr(MI);
5274 return Legalized;
5275}
5276
5277LegalizerHelper::LegalizeResult
5278LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
5279 LLT NarrowTy) {
5280 if (TypeIdx != 1)
5281 return UnableToLegalize;
5282
5283 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5284
5285 int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
5286 // FIXME: add support for when SizeOp1 isn't an exact multiple of
5287 // NarrowSize.
5288 if (SizeOp1 % NarrowSize != 0)
5289 return UnableToLegalize;
5290 int NumParts = SizeOp1 / NarrowSize;
5291
5292 SmallVector<Register, 2> SrcRegs, DstRegs;
5293 SmallVector<uint64_t, 2> Indexes;
5294 extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
5295
5296 Register OpReg = MI.getOperand(0).getReg();
5297 uint64_t OpStart = MI.getOperand(2).getImm();
5298 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5299 for (int i = 0; i < NumParts; ++i) {
5300 unsigned SrcStart = i * NarrowSize;
5301
5302 if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
5303 // No part of the extract uses this subregister, ignore it.
5304 continue;
5305 } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5306 // The entire subregister is extracted, forward the value.
5307 DstRegs.push_back(SrcRegs[i]);
5308 continue;
5309 }
5310
5311 // OpSegStart is where this destination segment would start in OpReg if it
5312 // extended infinitely in both directions.
5313 int64_t ExtractOffset;
5314 uint64_t SegSize;
5315 if (OpStart < SrcStart) {
5316 ExtractOffset = 0;
5317 SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
5318 } else {
5319 ExtractOffset = OpStart - SrcStart;
5320 SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
5321 }
5322
5323 Register SegReg = SrcRegs[i];
5324 if (ExtractOffset != 0 || SegSize != NarrowSize) {
5325 // A genuine extract is needed.
5326 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5327 MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
5328 }
5329
5330 DstRegs.push_back(SegReg);
5331 }
5332
5333 Register DstReg = MI.getOperand(0).getReg();
5334 if (MRI.getType(DstReg).isVector())
5335 MIRBuilder.buildBuildVector(DstReg, DstRegs);
5336 else if (DstRegs.size() > 1)
5337 MIRBuilder.buildMerge(DstReg, DstRegs);
5338 else
5339 MIRBuilder.buildCopy(DstReg, DstRegs[0]);
5340 MI.eraseFromParent();
5341 return Legalized;
5342}
5343
5344LegalizerHelper::LegalizeResult
5345LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
5346 LLT NarrowTy) {
5347 // FIXME: Don't know how to handle secondary types yet.
5348 if (TypeIdx != 0)
5349 return UnableToLegalize;
5350
5351 SmallVector<Register, 2> SrcRegs, LeftoverRegs, DstRegs;
5352 SmallVector<uint64_t, 2> Indexes;
5353 LLT RegTy = MRI.getType(MI.getOperand(0).getReg());
5354 LLT LeftoverTy;
5355 extractParts(MI.getOperand(1).getReg(), RegTy, NarrowTy, LeftoverTy, SrcRegs,
5356 LeftoverRegs);
5357
5358 for (Register Reg : LeftoverRegs)
5359 SrcRegs.push_back(Reg);
5360
5361 uint64_t NarrowSize = NarrowTy.getSizeInBits();
5362 Register OpReg = MI.getOperand(2).getReg();
5363 uint64_t OpStart = MI.getOperand(3).getImm();
5364 uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
5365 for (int I = 0, E = SrcRegs.size(); I != E; ++I) {
5366 unsigned DstStart = I * NarrowSize;
5367
5368 if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
5369 // The entire subregister is defined by this insert, forward the new
5370 // value.
5371 DstRegs.push_back(OpReg);
5372 continue;
5373 }
5374
5375 Register SrcReg = SrcRegs[I];
5376 if (MRI.getType(SrcRegs[I]) == LeftoverTy) {
5377 // The leftover reg is smaller than NarrowTy, so we need to extend it.
5378 SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
5379 MIRBuilder.buildAnyExt(SrcReg, SrcRegs[I]);
5380 }
5381
5382 if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
5383 // No part of the insert affects this subregister, forward the original.
5384 DstRegs.push_back(SrcReg);
5385 continue;
5386 }
5387
5388 // OpSegStart is where this destination segment would start in OpReg if it
5389 // extended infinitely in both directions.
5390 int64_t ExtractOffset, InsertOffset;
5391 uint64_t SegSize;
5392 if (OpStart < DstStart) {
5393 InsertOffset = 0;
5394 ExtractOffset = DstStart - OpStart;
5395 SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
5396 } else {
5397 InsertOffset = OpStart - DstStart;
5398 ExtractOffset = 0;
5399 SegSize =
5400 std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
5401 }
5402
5403 Register SegReg = OpReg;
5404 if (ExtractOffset != 0 || SegSize != OpSize) {
5405 // A genuine extract is needed.
5406 SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
5407 MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
5408 }
5409
5410 Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
5411 MIRBuilder.buildInsert(DstReg, SrcReg, SegReg, InsertOffset);
5412 DstRegs.push_back(DstReg);
5413 }
5414
5415 uint64_t WideSize = DstRegs.size() * NarrowSize;
5416 Register DstReg = MI.getOperand(0).getReg();
5417 if (WideSize > RegTy.getSizeInBits()) {
5418 Register MergeReg = MRI.createGenericVirtualRegister(LLT::scalar(WideSize));
5419 MIRBuilder.buildMerge(MergeReg, DstRegs);
5420 MIRBuilder.buildTrunc(DstReg, MergeReg);
5421 } else
5422 MIRBuilder.buildMerge(DstReg, DstRegs);
5423
5424 MI.eraseFromParent();
5425 return Legalized;
5426}
5427
5428LegalizerHelper::LegalizeResult
5429LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
5430 LLT NarrowTy) {
5431 Register DstReg = MI.getOperand(0).getReg();
5432 LLT DstTy = MRI.getType(DstReg);
5433
5434 assert(MI.getNumOperands() == 3 && TypeIdx == 0)((void)0);
5435
5436 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
5437 SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
5438 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
5439 LLT LeftoverTy;
5440 if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
5441 Src0Regs, Src0LeftoverRegs))
5442 return UnableToLegalize;
5443
5444 LLT Unused;
5445 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
5446 Src1Regs, Src1LeftoverRegs))
5447 llvm_unreachable("inconsistent extractParts result")__builtin_unreachable();
5448
5449 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
5450 auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
5451 {Src0Regs[I], Src1Regs[I]});
5452 DstRegs.push_back(Inst.getReg(0));
5453 }
5454
5455 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
5456 auto Inst = MIRBuilder.buildInstr(
5457 MI.getOpcode(),
5458 {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
5459 DstLeftoverRegs.push_back(Inst.getReg(0));
5460 }
5461
5462 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
5463 LeftoverTy, DstLeftoverRegs);
5464
5465 MI.eraseFromParent();
5466 return Legalized;
5467}
5468
5469LegalizerHelper::LegalizeResult
5470LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
5471 LLT NarrowTy) {
5472 if (TypeIdx != 0)
5473 return UnableToLegalize;
5474
5475 Register DstReg = MI.getOperand(0).getReg();
5476 Register SrcReg = MI.getOperand(1).getReg();
5477
5478 LLT DstTy = MRI.getType(DstReg);
5479 if (DstTy.isVector())
5480 return UnableToLegalize;
5481
5482 SmallVector<Register, 8> Parts;
5483 LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
5484 LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
5485 buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
5486
5487 MI.eraseFromParent();
5488 return Legalized;
5489}
5490
5491LegalizerHelper::LegalizeResult
5492LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
5493 LLT NarrowTy) {
5494 if (TypeIdx != 0)
5495 return UnableToLegalize;
5496
5497 Register CondReg = MI.getOperand(1).getReg();
5498 LLT CondTy = MRI.getType(CondReg);
5499 if (CondTy.isVector()) // TODO: Handle vselect
5500 return UnableToLegalize;
5501
5502 Register DstReg = MI.getOperand(0).getReg();
5503 LLT DstTy = MRI.getType(DstReg);
5504
5505 SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
5506 SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
5507 SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
5508 LLT LeftoverTy;
5509 if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
5510 Src1Regs, Src1LeftoverRegs))
5511 return UnableToLegalize;
5512
5513 LLT Unused;
5514 if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
5515 Src2Regs, Src2LeftoverRegs))
5516 llvm_unreachable("inconsistent extractParts result")__builtin_unreachable();
5517
5518 for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
5519 auto Select = MIRBuilder.buildSelect(NarrowTy,
5520 CondReg, Src1Regs[I], Src2Regs[I]);
5521 DstRegs.push_back(Select.getReg(0));
5522 }
5523
5524 for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
5525 auto Select = MIRBuilder.buildSelect(
5526 LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
5527 DstLeftoverRegs.push_back(Select.getReg(0));
5528 }
5529
5530 insertParts(DstReg, DstTy, NarrowTy, DstRegs,
5531 LeftoverTy, DstLeftoverRegs);
5532
5533 MI.eraseFromParent();
5534 return Legalized;
5535}
5536
5537LegalizerHelper::LegalizeResult
5538LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
5539 LLT NarrowTy) {
5540 if (TypeIdx != 1)
5541 return UnableToLegalize;
5542
5543 Register DstReg = MI.getOperand(0).getReg();
5544 Register SrcReg = MI.getOperand(1).getReg();
5545 LLT DstTy = MRI.getType(DstReg);
5546 LLT SrcTy = MRI.getType(SrcReg);
5547 unsigned NarrowSize = NarrowTy.getSizeInBits();
5548
5549 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5550 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
5551
5552 MachineIRBuilder &B = MIRBuilder;
5553 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
5554 // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
5555 auto C_0 = B.buildConstant(NarrowTy, 0);
5556 auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
5557 UnmergeSrc.getReg(1), C_0);
5558 auto LoCTLZ = IsUndef ?
5559 B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
5560 B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
5561 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
5562 auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
5563 auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
5564 B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
5565
5566 MI.eraseFromParent();
5567 return Legalized;
5568 }
5569
5570 return UnableToLegalize;
5571}
5572
5573LegalizerHelper::LegalizeResult
5574LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
5575 LLT NarrowTy) {
5576 if (TypeIdx != 1)
5577 return UnableToLegalize;
5578
5579 Register DstReg = MI.getOperand(0).getReg();
5580 Register SrcReg = MI.getOperand(1).getReg();
5581 LLT DstTy = MRI.getType(DstReg);
5582 LLT SrcTy = MRI.getType(SrcReg);
5583 unsigned NarrowSize = NarrowTy.getSizeInBits();
5584
5585 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5586 const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
5587
5588 MachineIRBuilder &B = MIRBuilder;
5589 auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
5590 // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
5591 auto C_0 = B.buildConstant(NarrowTy, 0);
5592 auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
5593 UnmergeSrc.getReg(0), C_0);
5594 auto HiCTTZ = IsUndef ?
5595 B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
5596 B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
5597 auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
5598 auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
5599 auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
5600 B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
5601
5602 MI.eraseFromParent();
5603 return Legalized;
5604 }
5605
5606 return UnableToLegalize;
5607}
5608
5609LegalizerHelper::LegalizeResult
5610LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
5611 LLT NarrowTy) {
5612 if (TypeIdx != 1)
5613 return UnableToLegalize;
5614
5615 Register DstReg = MI.getOperand(0).getReg();
5616 LLT DstTy = MRI.getType(DstReg);
5617 LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
5618 unsigned NarrowSize = NarrowTy.getSizeInBits();
5619
5620 if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
5621 auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
5622
5623 auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
5624 auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
5625 MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
5626
5627 MI.eraseFromParent();
5628 return Legalized;
5629 }
5630
5631 return UnableToLegalize;
5632}
5633
5634LegalizerHelper::LegalizeResult
5635LegalizerHelper::lowerBitCount(MachineInstr &MI) {
5636 unsigned Opc = MI.getOpcode();
5637 const auto &TII = MIRBuilder.getTII();
5638 auto isSupported = [this](const LegalityQuery &Q) {
5639 auto QAction = LI.getAction(Q).Action;
5640 return QAction == Legal || QAction == Libcall || QAction == Custom;
5641 };
5642 switch (Opc) {
5643 default:
5644 return UnableToLegalize;
5645 case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
5646 // This trivially expands to CTLZ.
5647 Observer.changingInstr(MI);
5648 MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
5649 Observer.changedInstr(MI);
5650 return Legalized;
5651 }
5652 case TargetOpcode::G_CTLZ: {
5653 Register DstReg = MI.getOperand(0).getReg();
5654 Register SrcReg = MI.getOperand(1).getReg();
5655 LLT DstTy = MRI.getType(DstReg);
5656 LLT SrcTy = MRI.getType(SrcReg);
5657 unsigned Len = SrcTy.getSizeInBits();
5658
5659 if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5660 // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
5661 auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
5662 auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
5663 auto ICmp = MIRBuilder.buildICmp(
5664 CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
5665 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5666 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
5667 MI.eraseFromParent();
5668 return Legalized;
5669 }
5670 // for now, we do this:
5671 // NewLen = NextPowerOf2(Len);
5672 // x = x | (x >> 1);
5673 // x = x | (x >> 2);
5674 // ...
5675 // x = x | (x >>16);
5676 // x = x | (x >>32); // for 64-bit input
5677 // Upto NewLen/2
5678 // return Len - popcount(x);
5679 //
5680 // Ref: "Hacker's Delight" by Henry Warren
5681 Register Op = SrcReg;
5682 unsigned NewLen = PowerOf2Ceil(Len);
5683 for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
5684 auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
5685 auto MIBOp = MIRBuilder.buildOr(
5686 SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
5687 Op = MIBOp.getReg(0);
5688 }
5689 auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
5690 MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
5691 MIBPop);
5692 MI.eraseFromParent();
5693 return Legalized;
5694 }
5695 case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
5696 // This trivially expands to CTTZ.
5697 Observer.changingInstr(MI);
5698 MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
5699 Observer.changedInstr(MI);
5700 return Legalized;
5701 }
5702 case TargetOpcode::G_CTTZ: {
5703 Register DstReg = MI.getOperand(0).getReg();
5704 Register SrcReg = MI.getOperand(1).getReg();
5705 LLT DstTy = MRI.getType(DstReg);
5706 LLT SrcTy = MRI.getType(SrcReg);
5707
5708 unsigned Len = SrcTy.getSizeInBits();
5709 if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
5710 // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
5711 // zero.
5712 auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
5713 auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
5714 auto ICmp = MIRBuilder.buildICmp(
5715 CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
5716 auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
5717 MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
5718 MI.eraseFromParent();
5719 return Legalized;
5720 }
5721 // for now, we use: { return popcount(~x & (x - 1)); }
5722 // unless the target has ctlz but not ctpop, in which case we use:
5723 // { return 32 - nlz(~x & (x-1)); }
5724 // Ref: "Hacker's Delight" by Henry Warren
5725 auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
5726 auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
5727 auto MIBTmp = MIRBuilder.buildAnd(
5728 SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
5729 if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
5730 isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
5731 auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
5732 MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
5733 MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
5734 MI.eraseFromParent();
5735 return Legalized;
5736 }
5737 MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
5738 MI.getOperand(1).setReg(MIBTmp.getReg(0));
5739 return Legalized;
5740 }
5741 case TargetOpcode::G_CTPOP: {
5742 Register SrcReg = MI.getOperand(1).getReg();
5743 LLT Ty = MRI.getType(SrcReg);
5744 unsigned Size = Ty.getSizeInBits();
5745 MachineIRBuilder &B = MIRBuilder;
5746
5747 // Count set bits in blocks of 2 bits. Default approach would be
5748 // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
5749 // We use following formula instead:
5750 // B2Count = val - { (val >> 1) & 0x55555555 }
5751 // since it gives same result in blocks of 2 with one instruction less.
5752 auto C_1 = B.buildConstant(Ty, 1);
5753 auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
5754 APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
5755 auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
5756 auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
5757 auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
5758
5759 // In order to get count in blocks of 4 add values from adjacent block of 2.
5760 // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
5761 auto C_2 = B.buildConstant(Ty, 2);
5762 auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
5763 APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
5764 auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
5765 auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
5766 auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
5767 auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
5768
5769 // For count in blocks of 8 bits we don't have to mask high 4 bits before
5770 // addition since count value sits in range {0,...,8} and 4 bits are enough
5771 // to hold such binary values. After addition high 4 bits still hold count
5772 // of set bits in high 4 bit block, set them to zero and get 8 bit result.
5773 // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
5774 auto C_4 = B.buildConstant(Ty, 4);
5775 auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
5776 auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
5777 APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
5778 auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
5779 auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
5780
5781 assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm")((void)0);
5782 // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
5783 // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
5784 auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
5785 auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
5786
5787 // Shift count result from 8 high bits to low bits.
5788 auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
5789 B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
5790
5791 MI.eraseFromParent();
5792 return Legalized;
5793 }
5794 }
5795}
5796
5797// Check that (every element of) Reg is undef or not an exact multiple of BW.
5798static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
5799 Register Reg, unsigned BW) {
5800 return matchUnaryPredicate(
5801 MRI, Reg,
5802 [=](const Constant *C) {
5803 // Null constant here means an undef.
5804 const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
5805 return !CI || CI->getValue().urem(BW) != 0;
5806 },
5807 /*AllowUndefs*/ true);
5808}
5809
5810LegalizerHelper::LegalizeResult
5811LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
5812 Register Dst = MI.getOperand(0).getReg();
5813 Register X = MI.getOperand(1).getReg();
5814 Register Y = MI.getOperand(2).getReg();
5815 Register Z = MI.getOperand(3).getReg();
5816 LLT Ty = MRI.getType(Dst);
5817 LLT ShTy = MRI.getType(Z);
5818
5819 unsigned BW = Ty.getScalarSizeInBits();
5820
5821 if (!isPowerOf2_32(BW))
5822 return UnableToLegalize;
5823
5824 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5825 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5826
5827 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
5828 // fshl X, Y, Z -> fshr X, Y, -Z
5829 // fshr X, Y, Z -> fshl X, Y, -Z
5830 auto Zero = MIRBuilder.buildConstant(ShTy, 0);
5831 Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
5832 } else {
5833 // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
5834 // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
5835 auto One = MIRBuilder.buildConstant(ShTy, 1);
5836 if (IsFSHL) {
5837 Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
5838 X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
5839 } else {
5840 X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
5841 Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
5842 }
5843
5844 Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
5845 }
5846
5847 MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
5848 MI.eraseFromParent();
5849 return Legalized;
5850}
5851
5852LegalizerHelper::LegalizeResult
5853LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
5854 Register Dst = MI.getOperand(0).getReg();
5855 Register X = MI.getOperand(1).getReg();
5856 Register Y = MI.getOperand(2).getReg();
5857 Register Z = MI.getOperand(3).getReg();
5858 LLT Ty = MRI.getType(Dst);
5859 LLT ShTy = MRI.getType(Z);
5860
5861 const unsigned BW = Ty.getScalarSizeInBits();
5862 const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5863
5864 Register ShX, ShY;
5865 Register ShAmt, InvShAmt;
5866
5867 // FIXME: Emit optimized urem by constant instead of letting it expand later.
5868 if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
5869 // fshl: X << C | Y >> (BW - C)
5870 // fshr: X << (BW - C) | Y >> C
5871 // where C = Z % BW is not zero
5872 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
5873 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
5874 InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
5875 ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
5876 ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
5877 } else {
5878 // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
5879 // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
5880 auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
5881 if (isPowerOf2_32(BW)) {
5882 // Z % BW -> Z & (BW - 1)
5883 ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
5884 // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
5885 auto NotZ = MIRBuilder.buildNot(ShTy, Z);
5886 InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
5887 } else {
5888 auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
5889 ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
5890 InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
5891 }
5892
5893 auto One = MIRBuilder.buildConstant(ShTy, 1);
5894 if (IsFSHL) {
5895 ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
5896 auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
5897 ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
5898 } else {
5899 auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
5900 ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
5901 ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
5902 }
5903 }
5904
5905 MIRBuilder.buildOr(Dst, ShX, ShY);
5906 MI.eraseFromParent();
5907 return Legalized;
5908}
5909
5910LegalizerHelper::LegalizeResult
5911LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
5912 // These operations approximately do the following (while avoiding undefined
5913 // shifts by BW):
5914 // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
5915 // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
5916 Register Dst = MI.getOperand(0).getReg();
5917 LLT Ty = MRI.getType(Dst);
5918 LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
5919
5920 bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
5921 unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
5922
5923 // TODO: Use smarter heuristic that accounts for vector legalization.
5924 if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
5925 return lowerFunnelShiftAsShifts(MI);
5926
5927 // This only works for powers of 2, fallback to shifts if it fails.
5928 LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
5929 if (Result == UnableToLegalize)
5930 return lowerFunnelShiftAsShifts(MI);
5931 return Result;
5932}
5933
5934LegalizerHelper::LegalizeResult
5935LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
5936 Register Dst = MI.getOperand(0).getReg();
5937 Register Src = MI.getOperand(1).getReg();
5938 Register Amt = MI.getOperand(2).getReg();
5939 LLT AmtTy = MRI.getType(Amt);
5940 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
5941 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
5942 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
5943 auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
5944 MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
5945 MI.eraseFromParent();
5946 return Legalized;
5947}
5948
5949LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
5950 Register Dst = MI.getOperand(0).getReg();
5951 Register Src = MI.getOperand(1).getReg();
5952 Register Amt = MI.getOperand(2).getReg();
5953 LLT DstTy = MRI.getType(Dst);
5954 LLT SrcTy = MRI.getType(Dst);
5955 LLT AmtTy = MRI.getType(Amt);
5956
5957 unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
5958 bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
5959
5960 MIRBuilder.setInstrAndDebugLoc(MI);
5961
5962 // If a rotate in the other direction is supported, use it.
5963 unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
5964 if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
5965 isPowerOf2_32(EltSizeInBits))
5966 return lowerRotateWithReverseRotate(MI);
5967
5968 auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
5969 unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
5970 unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
5971 auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
5972 Register ShVal;
5973 Register RevShiftVal;
5974 if (isPowerOf2_32(EltSizeInBits)) {
5975 // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
5976 // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
5977 auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
5978 auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
5979 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
5980 auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
5981 RevShiftVal =
5982 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
5983 } else {
5984 // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
5985 // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
5986 auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
5987 auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
5988 ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
5989 auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
5990 auto One = MIRBuilder.buildConstant(AmtTy, 1);
5991 auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
5992 RevShiftVal =
5993 MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
5994 }
5995 MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
5996 MI.eraseFromParent();
5997 return Legalized;
5998}
5999
6000// Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
6001// representation.
6002LegalizerHelper::LegalizeResult
6003LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
6004 Register Dst = MI.getOperand(0).getReg();
6005 Register Src = MI.getOperand(1).getReg();
6006 const LLT S64 = LLT::scalar(64);
6007 const LLT S32 = LLT::scalar(32);
6008 const LLT S1 = LLT::scalar(1);
6009
6010 assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32)((void)0);
6011
6012 // unsigned cul2f(ulong u) {
6013 // uint lz = clz(u);
6014 // uint e = (u != 0) ? 127U + 63U - lz : 0;
6015 // u = (u << lz) & 0x7fffffffffffffffUL;
6016 // ulong t = u & 0xffffffffffUL;
6017 // uint v = (e << 23) | (uint)(u >> 40);
6018 // uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
6019 // return as_float(v + r);
6020 // }
6021
6022 auto Zero32 = MIRBuilder.buildConstant(S32, 0);
6023 auto Zero64 = MIRBuilder.buildConstant(S64, 0);
6024
6025 auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
6026
6027 auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
6028 auto Sub = MIRBuilder.buildSub(S32, K, LZ);
6029
6030 auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
6031 auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
6032
6033 auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
6034 auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
6035
6036 auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
6037
6038 auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
6039 auto T = MIRBuilder.buildAnd(S64, U, Mask1);
6040
6041 auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
6042 auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
6043 auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
6044
6045 auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
6046 auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
6047 auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
6048 auto One = MIRBuilder.buildConstant(S32, 1);
6049
6050 auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
6051 auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
6052 auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
6053 MIRBuilder.buildAdd(Dst, V, R);
6054
6055 MI.eraseFromParent();
6056 return Legalized;
6057}
6058
6059LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
6060 Register Dst = MI.getOperand(0).getReg();
6061 Register Src = MI.getOperand(1).getReg();
6062 LLT DstTy = MRI.getType(Dst);
6063 LLT SrcTy = MRI.getType(Src);
6064
6065 if (SrcTy == LLT::scalar(1)) {
6066 auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
6067 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6068 MIRBuilder.buildSelect(Dst, Src, True, False);
6069 MI.eraseFromParent();
6070 return Legalized;
6071 }
6072
6073 if (SrcTy != LLT::scalar(64))
6074 return UnableToLegalize;
6075
6076 if (DstTy == LLT::scalar(32)) {
6077 // TODO: SelectionDAG has several alternative expansions to port which may
6078 // be more reasonble depending on the available instructions. If a target
6079 // has sitofp, does not have CTLZ, or can efficiently use f64 as an
6080 // intermediate type, this is probably worse.
6081 return lowerU64ToF32BitOps(MI);
6082 }
6083
6084 return UnableToLegalize;
6085}
6086
6087LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
6088 Register Dst = MI.getOperand(0).getReg();
6089 Register Src = MI.getOperand(1).getReg();
6090 LLT DstTy = MRI.getType(Dst);
6091 LLT SrcTy = MRI.getType(Src);
6092
6093 const LLT S64 = LLT::scalar(64);
6094 const LLT S32 = LLT::scalar(32);
6095 const LLT S1 = LLT::scalar(1);
6096
6097 if (SrcTy == S1) {
6098 auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
6099 auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
6100 MIRBuilder.buildSelect(Dst, Src, True, False);
6101 MI.eraseFromParent();
6102 return Legalized;
6103 }
6104
6105 if (SrcTy != S64)
6106 return UnableToLegalize;
6107
6108 if (DstTy == S32) {
6109 // signed cl2f(long l) {
6110 // long s = l >> 63;
6111 // float r = cul2f((l + s) ^ s);
6112 // return s ? -r : r;
6113 // }
6114 Register L = Src;
6115 auto SignBit = MIRBuilder.buildConstant(S64, 63);
6116 auto S = MIRBuilder.buildAShr(S64, L, SignBit);
6117
6118 auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
6119 auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
6120 auto R = MIRBuilder.buildUITOFP(S32, Xor);
6121
6122 auto RNeg = MIRBuilder.buildFNeg(S32, R);
6123 auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
6124 MIRBuilder.buildConstant(S64, 0));
6125 MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
6126 MI.eraseFromParent();
6127 return Legalized;
6128 }
6129
6130 return UnableToLegalize;
6131}
6132
6133LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
6134 Register Dst = MI.getOperand(0).getReg();
6135 Register Src = MI.getOperand(1).getReg();
6136 LLT DstTy = MRI.getType(Dst);
6137 LLT SrcTy = MRI.getType(Src);
6138 const LLT S64 = LLT::scalar(64);
6139 const LLT S32 = LLT::scalar(32);
6140
6141 if (SrcTy != S64 && SrcTy != S32)
6142 return UnableToLegalize;
6143 if (DstTy != S32 && DstTy != S64)
6144 return UnableToLegalize;
6145
6146 // FPTOSI gives same result as FPTOUI for positive signed integers.
6147 // FPTOUI needs to deal with fp values that convert to unsigned integers
6148 // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
6149
6150 APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
6151 APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
6152 : APFloat::IEEEdouble(),
6153 APInt::getNullValue(SrcTy.getSizeInBits()));
6154 TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
6155
6156 MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
6157
6158 MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
6159 // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
6160 // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
6161 MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
6162 MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
6163 MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
6164 MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
6165
6166 const LLT S1 = LLT::scalar(1);
6167
6168 MachineInstrBuilder FCMP =
6169 MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
6170 MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
6171
6172 MI.eraseFromParent();
6173 return Legalized;
6174}
6175
6176LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
6177 Register Dst = MI.getOperand(0).getReg();
6178 Register Src = MI.getOperand(1).getReg();
6179 LLT DstTy = MRI.getType(Dst);
6180 LLT SrcTy = MRI.getType(Src);
6181 const LLT S64 = LLT::scalar(64);
6182 const LLT S32 = LLT::scalar(32);
6183
6184 // FIXME: Only f32 to i64 conversions are supported.
6185 if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
6186 return UnableToLegalize;
6187
6188 // Expand f32 -> i64 conversion
6189 // This algorithm comes from compiler-rt's implementation of fixsfdi:
6190 // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
6191
6192 unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
6193
6194 auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
6195 auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
6196
6197 auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
6198 auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
6199
6200 auto SignMask = MIRBuilder.buildConstant(SrcTy,
6201 APInt::getSignMask(SrcEltBits));
6202 auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
6203 auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
6204 auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
6205 Sign = MIRBuilder.buildSExt(DstTy, Sign);
6206
6207 auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
6208 auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
6209 auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
6210
6211 auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
6212 R = MIRBuilder.buildZExt(DstTy, R);
6213
6214 auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
6215 auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
6216 auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
6217 auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
6218
6219 auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
6220 auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
6221
6222 const LLT S1 = LLT::scalar(1);
6223 auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
6224 S1, Exponent, ExponentLoBit);
6225
6226 R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
6227
6228 auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
6229 auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
6230
6231 auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
6232
6233 auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
6234 S1, Exponent, ZeroSrcTy);
6235
6236 auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
6237 MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
6238
6239 MI.eraseFromParent();
6240 return Legalized;
6241}
6242
6243// f64 -> f16 conversion using round-to-nearest-even rounding mode.
6244LegalizerHelper::LegalizeResult
6245LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
6246 Register Dst = MI.getOperand(0).getReg();
6247 Register Src = MI.getOperand(1).getReg();
6248
6249 if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
6250 return UnableToLegalize;
6251
6252 const unsigned ExpMask = 0x7ff;
6253 const unsigned ExpBiasf64 = 1023;
6254 const unsigned ExpBiasf16 = 15;
6255 const LLT S32 = LLT::scalar(32);
6256 const LLT S1 = LLT::scalar(1);
6257
6258 auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
6259 Register U = Unmerge.getReg(0);
6260 Register UH = Unmerge.getReg(1);
6261
6262 auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
6263 E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
6264
6265 // Subtract the fp64 exponent bias (1023) to get the real exponent and
6266 // add the f16 bias (15) to get the biased exponent for the f16 format.
6267 E = MIRBuilder.buildAdd(
6268 S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
6269
6270 auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
6271 M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
6272
6273 auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
6274 MIRBuilder.buildConstant(S32, 0x1ff));
6275 MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
6276
6277 auto Zero = MIRBuilder.buildConstant(S32, 0);
6278 auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
6279 auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
6280 M = MIRBuilder.buildOr(S32, M, Lo40Set);
6281
6282 // (M != 0 ? 0x0200 : 0) | 0x7c00;
6283 auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
6284 auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
6285 auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
6286
6287 auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
6288 auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
6289
6290 // N = M | (E << 12);
6291 auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
6292 auto N = MIRBuilder.buildOr(S32, M, EShl12);
6293
6294 // B = clamp(1-E, 0, 13);
6295 auto One = MIRBuilder.buildConstant(S32, 1);
6296 auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
6297 auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
6298 B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
6299
6300 auto SigSetHigh = MIRBuilder.buildOr(S32, M,
6301 MIRBuilder.buildConstant(S32, 0x1000));
6302
6303 auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
6304 auto D0 = MIRBuilder.buildShl(S32, D, B);
6305
6306 auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
6307 D0, SigSetHigh);
6308 auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
6309 D = MIRBuilder.buildOr(S32, D, D1);
6310
6311 auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
6312 auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
6313
6314 auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
6315 V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
6316
6317 auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
6318 MIRBuilder.buildConstant(S32, 3));
6319 auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
6320
6321 auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
6322 MIRBuilder.buildConstant(S32, 5));
6323 auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
6324
6325 V1 = MIRBuilder.buildOr(S32, V0, V1);
6326 V = MIRBuilder.buildAdd(S32, V, V1);
6327
6328 auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1,
6329 E, MIRBuilder.buildConstant(S32, 30));
6330 V = MIRBuilder.buildSelect(S32, CmpEGt30,
6331 MIRBuilder.buildConstant(S32, 0x7c00), V);
6332
6333 auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
6334 E, MIRBuilder.buildConstant(S32, 1039));
6335 V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
6336
6337 // Extract the sign bit.
6338 auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
6339 Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
6340
6341 // Insert the sign bit
6342 V = MIRBuilder.buildOr(S32, Sign, V);
6343
6344 MIRBuilder.buildTrunc(Dst, V);
6345 MI.eraseFromParent();
6346 return Legalized;
6347}
6348
6349LegalizerHelper::LegalizeResult
6350LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
6351 Register Dst = MI.getOperand(0).getReg();
6352 Register Src = MI.getOperand(1).getReg();
6353
6354 LLT DstTy = MRI.getType(Dst);
6355 LLT SrcTy = MRI.getType(Src);
6356 const LLT S64 = LLT::scalar(64);
6357 const LLT S16 = LLT::scalar(16);
6358
6359 if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
6360 return lowerFPTRUNC_F64_TO_F16(MI);
6361
6362 return UnableToLegalize;
6363}
6364
6365// TODO: If RHS is a constant SelectionDAGBuilder expands this into a
6366// multiplication tree.
6367LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
6368 Register Dst = MI.getOperand(0).getReg();
6369 Register Src0 = MI.getOperand(1).getReg();
6370 Register Src1 = MI.getOperand(2).getReg();
6371 LLT Ty = MRI.getType(Dst);
6372
6373 auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
6374 MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
6375 MI.eraseFromParent();
6376 return Legalized;
6377}
6378
6379static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
6380 switch (Opc) {
6381 case TargetOpcode::G_SMIN:
6382 return CmpInst::ICMP_SLT;
6383 case TargetOpcode::G_SMAX:
6384 return CmpInst::ICMP_SGT;
6385 case TargetOpcode::G_UMIN:
6386 return CmpInst::ICMP_ULT;
6387 case TargetOpcode::G_UMAX:
6388 return CmpInst::ICMP_UGT;
6389 default:
6390 llvm_unreachable("not in integer min/max")__builtin_unreachable();
6391 }
6392}
6393
6394LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
6395 Register Dst = MI.getOperand(0).getReg();
6396 Register Src0 = MI.getOperand(1).getReg();
6397 Register Src1 = MI.getOperand(2).getReg();
6398
6399 const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
6400 LLT CmpType = MRI.getType(Dst).changeElementSize(1);
6401
6402 auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
6403 MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
6404
6405 MI.eraseFromParent();
6406 return Legalized;
6407}
6408
6409LegalizerHelper::LegalizeResult
6410LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
6411 Register Dst = MI.getOperand(0).getReg();
6412 Register Src0 = MI.getOperand(1).getReg();
6413 Register Src1 = MI.getOperand(2).getReg();
6414
6415 const LLT Src0Ty = MRI.getType(Src0);
6416 const LLT Src1Ty = MRI.getType(Src1);
6417
6418 const int Src0Size = Src0Ty.getScalarSizeInBits();
6419 const int Src1Size = Src1Ty.getScalarSizeInBits();
6420
6421 auto SignBitMask = MIRBuilder.buildConstant(
6422 Src0Ty, APInt::getSignMask(Src0Size));
6423
6424 auto NotSignBitMask = MIRBuilder.buildConstant(
6425 Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
6426
6427 Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
6428 Register And1;
6429 if (Src0Ty == Src1Ty) {
6430 And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
6431 } else if (Src0Size > Src1Size) {
6432 auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
6433 auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
6434 auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
6435 And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
6436 } else {
6437 auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
6438 auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
6439 auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
6440 And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
6441 }
6442
6443 // Be careful about setting nsz/nnan/ninf on every instruction, since the
6444 // constants are a nan and -0.0, but the final result should preserve
6445 // everything.
6446 unsigned Flags = MI.getFlags();
6447 MIRBuilder.buildOr(Dst, And0, And1, Flags);
6448
6449 MI.eraseFromParent();
6450 return Legalized;
6451}
6452
6453LegalizerHelper::LegalizeResult
6454LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
6455 unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
6456 TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
6457
6458 Register Dst = MI.getOperand(0).getReg();
6459 Register Src0 = MI.getOperand(1).getReg();
6460 Register Src1 = MI.getOperand(2).getReg();
6461 LLT Ty = MRI.getType(Dst);
6462
6463 if (!MI.getFlag(MachineInstr::FmNoNans)) {
6464 // Insert canonicalizes if it's possible we need to quiet to get correct
6465 // sNaN behavior.
6466
6467 // Note this must be done here, and not as an optimization combine in the
6468 // absence of a dedicate quiet-snan instruction as we're using an
6469 // omni-purpose G_FCANONICALIZE.
6470 if (!isKnownNeverSNaN(Src0, MRI))
6471 Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
6472
6473 if (!isKnownNeverSNaN(Src1, MRI))
6474 Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
6475 }
6476
6477 // If there are no nans, it's safe to simply replace this with the non-IEEE
6478 // version.
6479 MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
6480 MI.eraseFromParent();
6481 return Legalized;
6482}
6483
6484LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
6485 // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
6486 Register DstReg = MI.getOperand(0).getReg();
6487 LLT Ty = MRI.getType(DstReg);
6488 unsigned Flags = MI.getFlags();
6489
6490 auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
6491 Flags);
6492 MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
6493 MI.eraseFromParent();
6494 return Legalized;
6495}
6496
6497LegalizerHelper::LegalizeResult
6498LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
6499 Register DstReg = MI.getOperand(0).getReg();
6500 Register X = MI.getOperand(1).getReg();
6501 const unsigned Flags = MI.getFlags();
6502 const LLT Ty = MRI.getType(DstReg);
6503 const LLT CondTy = Ty.changeElementSize(1);
6504
6505 // round(x) =>
6506 // t = trunc(x);
6507 // d = fabs(x - t);
6508 // o = copysign(1.0f, x);
6509 // return t + (d >= 0.5 ? o : 0.0);
6510
6511 auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
6512
6513 auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
6514 auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
6515 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6516 auto One = MIRBuilder.buildFConstant(Ty, 1.0);
6517 auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
6518 auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
6519
6520 auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
6521 Flags);
6522 auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
6523
6524 MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
6525
6526 MI.eraseFromParent();
6527 return Legalized;
6528}
6529
6530LegalizerHelper::LegalizeResult
6531LegalizerHelper::lowerFFloor(MachineInstr &MI) {
6532 Register DstReg = MI.getOperand(0).getReg();
6533 Register SrcReg = MI.getOperand(1).getReg();
6534 unsigned Flags = MI.getFlags();
6535 LLT Ty = MRI.getType(DstReg);
6536 const LLT CondTy = Ty.changeElementSize(1);
6537
6538 // result = trunc(src);
6539 // if (src < 0.0 && src != result)
6540 // result += -1.0.
6541
6542 auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
6543 auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
6544
6545 auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
6546 SrcReg, Zero, Flags);
6547 auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
6548 SrcReg, Trunc, Flags);
6549 auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
6550 auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
6551
6552 MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
6553 MI.eraseFromParent();
6554 return Legalized;
6555}
6556
6557LegalizerHelper::LegalizeResult
6558LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
6559 const unsigned NumOps = MI.getNumOperands();
6560 Register DstReg = MI.getOperand(0).getReg();
6561 Register Src0Reg = MI.getOperand(1).getReg();
6562 LLT DstTy = MRI.getType(DstReg);
6563 LLT SrcTy = MRI.getType(Src0Reg);
6564 unsigned PartSize = SrcTy.getSizeInBits();
6565
6566 LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
6567 Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
6568
6569 for (unsigned I = 2; I != NumOps; ++I) {
6570 const unsigned Offset = (I - 1) * PartSize;
6571
6572 Register SrcReg = MI.getOperand(I).getReg();
6573 auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
6574
6575 Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
6576 MRI.createGenericVirtualRegister(WideTy);
6577
6578 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
6579 auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
6580 MIRBuilder.buildOr(NextResult, ResultReg, Shl);
6581 ResultReg = NextResult;
6582 }
6583
6584 if (DstTy.isPointer()) {
6585 if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
6586 DstTy.getAddressSpace())) {
6587 LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n")do { } while (false);
6588 return UnableToLegalize;
6589 }
6590
6591 MIRBuilder.buildIntToPtr(DstReg, ResultReg);
6592 }
6593
6594 MI.eraseFromParent();
6595 return Legalized;
6596}
6597
6598LegalizerHelper::LegalizeResult
6599LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
6600 const unsigned NumDst = MI.getNumOperands() - 1;
6601 Register SrcReg = MI.getOperand(NumDst).getReg();
6602 Register Dst0Reg = MI.getOperand(0).getReg();
6603 LLT DstTy = MRI.getType(Dst0Reg);
6604 if (DstTy.isPointer())
6605 return UnableToLegalize; // TODO
6606
6607 SrcReg = coerceToScalar(SrcReg);
6608 if (!SrcReg)
6609 return UnableToLegalize;
6610
6611 // Expand scalarizing unmerge as bitcast to integer and shift.
6612 LLT IntTy = MRI.getType(SrcReg);
6613
6614 MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
6615
6616 const unsigned DstSize = DstTy.getSizeInBits();
6617 unsigned Offset = DstSize;
6618 for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
6619 auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
6620 auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
6621 MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
6622 }
6623
6624 MI.eraseFromParent();
6625 return Legalized;
6626}
6627
6628/// Lower a vector extract or insert by writing the vector to a stack temporary
6629/// and reloading the element or vector.
6630///
6631/// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
6632/// =>
6633/// %stack_temp = G_FRAME_INDEX
6634/// G_STORE %vec, %stack_temp
6635/// %idx = clamp(%idx, %vec.getNumElements())
6636/// %element_ptr = G_PTR_ADD %stack_temp, %idx
6637/// %dst = G_LOAD %element_ptr
6638LegalizerHelper::LegalizeResult
6639LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
6640 Register DstReg = MI.getOperand(0).getReg();
6641 Register SrcVec = MI.getOperand(1).getReg();
6642 Register InsertVal;
6643 if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
6644 InsertVal = MI.getOperand(2).getReg();
6645
6646 Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
6647
6648 LLT VecTy = MRI.getType(SrcVec);
6649 LLT EltTy = VecTy.getElementType();
6650 if (!EltTy.isByteSized()) { // Not implemented.
6651 LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n")do { } while (false);
6652 return UnableToLegalize;
6653 }
6654
6655 unsigned EltBytes = EltTy.getSizeInBytes();
6656 Align VecAlign = getStackTemporaryAlignment(VecTy);
6657 Align EltAlign;
6658
6659 MachinePointerInfo PtrInfo;
6660 auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
6661 VecAlign, PtrInfo);
6662 MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
6663
6664 // Get the pointer to the element, and be sure not to hit undefined behavior
6665 // if the index is out of bounds.
6666 Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
6667
6668 int64_t IdxVal;
6669 if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
6670 int64_t Offset = IdxVal * EltBytes;
6671 PtrInfo = PtrInfo.getWithOffset(Offset);
6672 EltAlign = commonAlignment(VecAlign, Offset);
6673 } else {
6674 // We lose information with a variable offset.
6675 EltAlign = getStackTemporaryAlignment(EltTy);
6676 PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
6677 }
6678
6679 if (InsertVal) {
6680 // Write the inserted element
6681 MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
6682
6683 // Reload the whole vector.
6684 MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
6685 } else {
6686 MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
6687 }
6688
6689 MI.eraseFromParent();
6690 return Legalized;
6691}
6692
6693LegalizerHelper::LegalizeResult
6694LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
6695 Register DstReg = MI.getOperand(0).getReg();
6696 Register Src0Reg = MI.getOperand(1).getReg();
6697 Register Src1Reg = MI.getOperand(2).getReg();
6698 LLT Src0Ty = MRI.getType(Src0Reg);
6699 LLT DstTy = MRI.getType(DstReg);
6700 LLT IdxTy = LLT::scalar(32);
6701
6702 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
6703
6704 if (DstTy.isScalar()) {
6705 if (Src0Ty.isVector())
6706 return UnableToLegalize;
6707
6708 // This is just a SELECT.
6709 assert(Mask.size() == 1 && "Expected a single mask element")((void)0);
6710 Register Val;
6711 if (Mask[0] < 0 || Mask[0] > 1)
6712 Val = MIRBuilder.buildUndef(DstTy).getReg(0);
6713 else
6714 Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
6715 MIRBuilder.buildCopy(DstReg, Val);
6716 MI.eraseFromParent();
6717 return Legalized;
6718 }
6719
6720 Register Undef;
6721 SmallVector<Register, 32> BuildVec;
6722 LLT EltTy = DstTy.getElementType();
6723
6724 for (int Idx : Mask) {
6725 if (Idx < 0) {
6726 if (!Undef.isValid())
6727 Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
6728 BuildVec.push_back(Undef);
6729 continue;
6730 }
6731
6732 if (Src0Ty.isScalar()) {
6733 BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
6734 } else {
6735 int NumElts = Src0Ty.getNumElements();
6736 Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
6737 int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
6738 auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
6739 auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
6740 BuildVec.push_back(Extract.getReg(0));
6741 }
6742 }
6743
6744 MIRBuilder.buildBuildVector(DstReg, BuildVec);
6745 MI.eraseFromParent();
6746 return Legalized;
6747}
6748
6749LegalizerHelper::LegalizeResult
6750LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
6751 const auto &MF = *MI.getMF();
6752 const auto &TFI = *MF.getSubtarget().getFrameLowering();
6753 if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
6754 return UnableToLegalize;
6755
6756 Register Dst = MI.getOperand(0).getReg();
6757 Register AllocSize = MI.getOperand(1).getReg();
6758 Align Alignment = assumeAligned(MI.getOperand(2).getImm());
6759
6760 LLT PtrTy = MRI.getType(Dst);
6761 LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
6762
6763 Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
6764 auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
6765 SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
6766
6767 // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
6768 // have to generate an extra instruction to negate the alloc and then use
6769 // G_PTR_ADD to add the negative offset.
6770 auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
6771 if (Alignment > Align(1)) {
6772 APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
6773 AlignMask.negate();
6774 auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
6775 Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
6776 }
6777
6778 SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
6779 MIRBuilder.buildCopy(SPReg, SPTmp);
6780 MIRBuilder.buildCopy(Dst, SPTmp);
6781
6782 MI.eraseFromParent();
6783 return Legalized;
6784}
6785
6786LegalizerHelper::LegalizeResult
6787LegalizerHelper::lowerExtract(MachineInstr &MI) {
6788 Register Dst = MI.getOperand(0).getReg();
6789 Register Src = MI.getOperand(1).getReg();
6790 unsigned Offset = MI.getOperand(2).getImm();
6791
6792 LLT DstTy = MRI.getType(Dst);
6793 LLT SrcTy = MRI.getType(Src);
6794
6795 if (DstTy.isScalar() &&
6796 (SrcTy.isScalar() ||
6797 (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
6798 LLT SrcIntTy = SrcTy;
6799 if (!SrcTy.isScalar()) {
6800 SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
6801 Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
6802 }
6803
6804 if (Offset == 0)
6805 MIRBuilder.buildTrunc(Dst, Src);
6806 else {
6807 auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
6808 auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
6809 MIRBuilder.buildTrunc(Dst, Shr);
6810 }
6811
6812 MI.eraseFromParent();
6813 return Legalized;
6814 }
6815
6816 return UnableToLegalize;
6817}
6818
6819LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
6820 Register Dst = MI.getOperand(0).getReg();
6821 Register Src = MI.getOperand(1).getReg();
6822 Register InsertSrc = MI.getOperand(2).getReg();
6823 uint64_t Offset = MI.getOperand(3).getImm();
6824
6825 LLT DstTy = MRI.getType(Src);
6826 LLT InsertTy = MRI.getType(InsertSrc);
6827
6828 if (InsertTy.isVector() ||
6829 (DstTy.isVector() && DstTy.getElementType() != InsertTy))
6830 return UnableToLegalize;
6831
6832 const DataLayout &DL = MIRBuilder.getDataLayout();
6833 if ((DstTy.isPointer() &&
6834 DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
6835 (InsertTy.isPointer() &&
6836 DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
6837 LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n")do { } while (false);
6838 return UnableToLegalize;
6839 }
6840
6841 LLT IntDstTy = DstTy;
6842
6843 if (!DstTy.isScalar()) {
6844 IntDstTy = LLT::scalar(DstTy.getSizeInBits());
6845 Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
6846 }
6847
6848 if (!InsertTy.isScalar()) {
6849 const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
6850 InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
6851 }
6852
6853 Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
6854 if (Offset != 0) {
6855 auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
6856 ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
6857 }
6858
6859 APInt MaskVal = APInt::getBitsSetWithWrap(
6860 DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
6861
6862 auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
6863 auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
6864 auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
6865
6866 MIRBuilder.buildCast(Dst, Or);
6867 MI.eraseFromParent();
6868 return Legalized;
6869}
6870
6871LegalizerHelper::LegalizeResult
6872LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
6873 Register Dst0 = MI.getOperand(0).getReg();
6874 Register Dst1 = MI.getOperand(1).getReg();
6875 Register LHS = MI.getOperand(2).getReg();
6876 Register RHS = MI.getOperand(3).getReg();
6877 const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
6878
6879 LLT Ty = MRI.getType(Dst0);
6880 LLT BoolTy = MRI.getType(Dst1);
6881
6882 if (IsAdd)
6883 MIRBuilder.buildAdd(Dst0, LHS, RHS);
6884 else
6885 MIRBuilder.buildSub(Dst0, LHS, RHS);
6886
6887 // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
6888
6889 auto Zero = MIRBuilder.buildConstant(Ty, 0);
6890
6891 // For an addition, the result should be less than one of the operands (LHS)
6892 // if and only if the other operand (RHS) is negative, otherwise there will
6893 // be overflow.
6894 // For a subtraction, the result should be less than one of the operands
6895 // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
6896 // otherwise there will be overflow.
6897 auto ResultLowerThanLHS =
6898 MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
6899 auto ConditionRHS = MIRBuilder.buildICmp(
6900 IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
6901
6902 MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
6903 MI.eraseFromParent();
6904 return Legalized;
6905}
6906
6907LegalizerHelper::LegalizeResult
6908LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
6909 Register Res = MI.getOperand(0).getReg();
6910 Register LHS = MI.getOperand(1).getReg();
6911 Register RHS = MI.getOperand(2).getReg();
6912 LLT Ty = MRI.getType(Res);
6913 bool IsSigned;
6914 bool IsAdd;
6915 unsigned BaseOp;
6916 switch (MI.getOpcode()) {
6917 default:
6918 llvm_unreachable("unexpected addsat/subsat opcode")__builtin_unreachable();
6919 case TargetOpcode::G_UADDSAT:
6920 IsSigned = false;
6921 IsAdd = true;
6922 BaseOp = TargetOpcode::G_ADD;
6923 break;
6924 case TargetOpcode::G_SADDSAT:
6925 IsSigned = true;
6926 IsAdd = true;
6927 BaseOp = TargetOpcode::G_ADD;
6928 break;
6929 case TargetOpcode::G_USUBSAT:
6930 IsSigned = false;
6931 IsAdd = false;
6932 BaseOp = TargetOpcode::G_SUB;
6933 break;
6934 case TargetOpcode::G_SSUBSAT:
6935 IsSigned = true;
6936 IsAdd = false;
6937 BaseOp = TargetOpcode::G_SUB;
6938 break;
6939 }
6940
6941 if (IsSigned) {
6942 // sadd.sat(a, b) ->
6943 // hi = 0x7fffffff - smax(a, 0)
6944 // lo = 0x80000000 - smin(a, 0)
6945 // a + smin(smax(lo, b), hi)
6946 // ssub.sat(a, b) ->
6947 // lo = smax(a, -1) - 0x7fffffff
6948 // hi = smin(a, -1) - 0x80000000
6949 // a - smin(smax(lo, b), hi)
6950 // TODO: AMDGPU can use a "median of 3" instruction here:
6951 // a +/- med3(lo, b, hi)
6952 uint64_t NumBits = Ty.getScalarSizeInBits();
6953 auto MaxVal =
6954 MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
6955 auto MinVal =
6956 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
6957 MachineInstrBuilder Hi, Lo;
6958 if (IsAdd) {
6959 auto Zero = MIRBuilder.buildConstant(Ty, 0);
6960 Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
6961 Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
6962 } else {
6963 auto NegOne = MIRBuilder.buildConstant(Ty, -1);
6964 Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
6965 MaxVal);
6966 Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
6967 MinVal);
6968 }
6969 auto RHSClamped =
6970 MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
6971 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
6972 } else {
6973 // uadd.sat(a, b) -> a + umin(~a, b)
6974 // usub.sat(a, b) -> a - umin(a, b)
6975 Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
6976 auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
6977 MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
6978 }
6979
6980 MI.eraseFromParent();
6981 return Legalized;
6982}
6983
6984LegalizerHelper::LegalizeResult
6985LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
6986 Register Res = MI.getOperand(0).getReg();
6987 Register LHS = MI.getOperand(1).getReg();
6988 Register RHS = MI.getOperand(2).getReg();
6989 LLT Ty = MRI.getType(Res);
6990 LLT BoolTy = Ty.changeElementSize(1);
6991 bool IsSigned;
6992 bool IsAdd;
6993 unsigned OverflowOp;
6994 switch (MI.getOpcode()) {
6995 default:
6996 llvm_unreachable("unexpected addsat/subsat opcode")__builtin_unreachable();
6997 case TargetOpcode::G_UADDSAT:
6998 IsSigned = false;
6999 IsAdd = true;
7000 OverflowOp = TargetOpcode::G_UADDO;
7001 break;
7002 case TargetOpcode::G_SADDSAT:
7003 IsSigned = true;
7004 IsAdd = true;
7005 OverflowOp = TargetOpcode::G_SADDO;
7006 break;
7007 case TargetOpcode::G_USUBSAT:
7008 IsSigned = false;
7009 IsAdd = false;
7010 OverflowOp = TargetOpcode::G_USUBO;
7011 break;
7012 case TargetOpcode::G_SSUBSAT:
7013 IsSigned = true;
7014 IsAdd = false;
7015 OverflowOp = TargetOpcode::G_SSUBO;
7016 break;
7017 }
7018
7019 auto OverflowRes =
7020 MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
7021 Register Tmp = OverflowRes.getReg(0);
7022 Register Ov = OverflowRes.getReg(1);
7023 MachineInstrBuilder Clamp;
7024 if (IsSigned) {
7025 // sadd.sat(a, b) ->
7026 // {tmp, ov} = saddo(a, b)
7027 // ov ? (tmp >>s 31) + 0x80000000 : r
7028 // ssub.sat(a, b) ->
7029 // {tmp, ov} = ssubo(a, b)
7030 // ov ? (tmp >>s 31) + 0x80000000 : r
7031 uint64_t NumBits = Ty.getScalarSizeInBits();
7032 auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
7033 auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
7034 auto MinVal =
7035 MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
7036 Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
7037 } else {
7038 // uadd.sat(a, b) ->
7039 // {tmp, ov} = uaddo(a, b)
7040 // ov ? 0xffffffff : tmp
7041 // usub.sat(a, b) ->
7042 // {tmp, ov} = usubo(a, b)
7043 // ov ? 0 : tmp
7044 Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
7045 }
7046 MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
7047
7048 MI.eraseFromParent();
7049 return Legalized;
7050}
7051
7052LegalizerHelper::LegalizeResult
7053LegalizerHelper::lowerShlSat(MachineInstr &MI) {
7054 assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||((void)0)
7055 MI.getOpcode() == TargetOpcode::G_USHLSAT) &&((void)0)
7056 "Expected shlsat opcode!")((void)0);
7057 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
7058 Register Res = MI.getOperand(0).getReg();
7059 Register LHS = MI.getOperand(1).getReg();
7060 Register RHS = MI.getOperand(2).getReg();
7061 LLT Ty = MRI.getType(Res);
7062 LLT BoolTy = Ty.changeElementSize(1);
7063
7064 unsigned BW = Ty.getScalarSizeInBits();
7065 auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
7066 auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
7067 : MIRBuilder.buildLShr(Ty, Result, RHS);
7068
7069 MachineInstrBuilder SatVal;
7070 if (IsSigned) {
7071 auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
7072 auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
7073 auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
7074 MIRBuilder.buildConstant(Ty, 0));
7075 SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
7076 } else {
7077 SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
7078 }
7079 auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
7080 MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
7081
7082 MI.eraseFromParent();
7083 return Legalized;
7084}
7085
7086LegalizerHelper::LegalizeResult
7087LegalizerHelper::lowerBswap(MachineInstr &MI) {
7088 Register Dst = MI.getOperand(0).getReg();
7089 Register Src = MI.getOperand(1).getReg();
7090 const LLT Ty = MRI.getType(Src);
7091 unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
7092 unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
7093
7094 // Swap most and least significant byte, set remaining bytes in Res to zero.
7095 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
7096 auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
7097 auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7098 auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
7099
7100 // Set i-th high/low byte in Res to i-th low/high byte from Src.
7101 for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
7102 // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
7103 APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
7104 auto Mask = MIRBuilder.buildConstant(Ty, APMask);
7105 auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
7106 // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
7107 auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
7108 auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
7109 Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
7110 // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
7111 auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
7112 auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
7113 Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
7114 }
7115 Res.getInstr()->getOperand(0).setReg(Dst);
7116
7117 MI.eraseFromParent();
7118 return Legalized;
7119}
7120
7121//{ (Src & Mask) >> N } | { (Src << N) & Mask }
7122static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
7123 MachineInstrBuilder Src, APInt Mask) {
7124 const LLT Ty = Dst.getLLTTy(*B.getMRI());
7125 MachineInstrBuilder C_N = B.buildConstant(Ty, N);
7126 MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
7127 auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
7128 auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
7129 return B.buildOr(Dst, LHS, RHS);
7130}
7131
7132LegalizerHelper::LegalizeResult
7133LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
7134 Register Dst = MI.getOperand(0).getReg();
7135 Register Src = MI.getOperand(1).getReg();
7136 const LLT Ty = MRI.getType(Src);
7137 unsigned Size = Ty.getSizeInBits();
7138
7139 MachineInstrBuilder BSWAP =
7140 MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
7141
7142 // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
7143 // [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
7144 // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
7145 MachineInstrBuilder Swap4 =
7146 SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
7147
7148 // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
7149 // [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
7150 // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
7151 MachineInstrBuilder Swap2 =
7152 SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
7153
7154 // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
7155 // [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
7156 // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
7157 SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
7158
7159 MI.eraseFromParent();
7160 return Legalized;
7161}
7162
7163LegalizerHelper::LegalizeResult
7164LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
7165 MachineFunction &MF = MIRBuilder.getMF();
7166
7167 bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
7168 int NameOpIdx = IsRead ? 1 : 0;
7169 int ValRegIndex = IsRead ? 0 : 1;
7170
7171 Register ValReg = MI.getOperand(ValRegIndex).getReg();
7172 const LLT Ty = MRI.getType(ValReg);
7173 const MDString *RegStr = cast<MDString>(
7174 cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
7175
7176 Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
7177 if (!PhysReg.isValid())
7178 return UnableToLegalize;
7179
7180 if (IsRead)
7181 MIRBuilder.buildCopy(ValReg, PhysReg);
7182 else
7183 MIRBuilder.buildCopy(PhysReg, ValReg);
7184
7185 MI.eraseFromParent();
7186 return Legalized;
7187}
7188
7189LegalizerHelper::LegalizeResult
7190LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
7191 bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
7192 unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
7193 Register Result = MI.getOperand(0).getReg();
7194 LLT OrigTy = MRI.getType(Result);
7195 auto SizeInBits = OrigTy.getScalarSizeInBits();
7196 LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
7197
7198 auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
7199 auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
7200 auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
7201 unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
7202
7203 auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
7204 auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
7205 MIRBuilder.buildTrunc(Result, Shifted);
7206
7207 MI.eraseFromParent();
7208 return Legalized;
7209}
7210
7211LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
7212 // Implement vector G_SELECT in terms of XOR, AND, OR.
7213 Register DstReg = MI.getOperand(0).getReg();
7214 Register MaskReg = MI.getOperand(1).getReg();
7215 Register Op1Reg = MI.getOperand(2).getReg();
7216 Register Op2Reg = MI.getOperand(3).getReg();
7217 LLT DstTy = MRI.getType(DstReg);
7218 LLT MaskTy = MRI.getType(MaskReg);
7219 LLT Op1Ty = MRI.getType(Op1Reg);
7220 if (!DstTy.isVector())
7221 return UnableToLegalize;
7222
7223 // Vector selects can have a scalar predicate. If so, splat into a vector and
7224 // finish for later legalization attempts to try again.
7225 if (MaskTy.isScalar()) {
7226 Register MaskElt = MaskReg;
7227 if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
7228 MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
7229 // Generate a vector splat idiom to be pattern matched later.
7230 auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
7231 Observer.changingInstr(MI);
7232 MI.getOperand(1).setReg(ShufSplat.getReg(0));
7233 Observer.changedInstr(MI);
7234 return Legalized;
7235 }
7236
7237 if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
7238 return UnableToLegalize;
7239 }
7240
7241 auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
7242 auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
7243 auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
7244 MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
7245 MI.eraseFromParent();
7246 return Legalized;
7247}
7248
7249LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
7250 // Split DIVREM into individual instructions.
7251 unsigned Opcode = MI.getOpcode();
7252
7253 MIRBuilder.buildInstr(
7254 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
7255 : TargetOpcode::G_UDIV,
7256 {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
7257 MIRBuilder.buildInstr(
7258 Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
7259 : TargetOpcode::G_UREM,
7260 {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
7261 MI.eraseFromParent();
7262 return Legalized;
7263}
7264
7265LegalizerHelper::LegalizeResult
7266LegalizerHelper::lowerAbsToAddXor(MachineInstr &MI) {
7267 // Expand %res = G_ABS %a into:
7268 // %v1 = G_ASHR %a, scalar_size-1
7269 // %v2 = G_ADD %a, %v1
7270 // %res = G_XOR %v2, %v1
7271 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
7272 Register OpReg = MI.getOperand(1).getReg();
7273 auto ShiftAmt =
7274 MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
7275 auto Shift = MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
7276 auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
7277 MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
7278 MI.eraseFromParent();
7279 return Legalized;
7280}
7281
7282LegalizerHelper::LegalizeResult
7283LegalizerHelper::lowerAbsToMaxNeg(MachineInstr &MI) {
7284 // Expand %res = G_ABS %a into:
7285 // %v1 = G_CONSTANT 0
7286 // %v2 = G_SUB %v1, %a
7287 // %res = G_SMAX %a, %v2
7288 Register SrcReg = MI.getOperand(1).getReg();
7289 LLT Ty = MRI.getType(SrcReg);
7290 auto Zero = MIRBuilder.buildConstant(Ty, 0).getReg(0);
7291 auto Sub = MIRBuilder.buildSub(Ty, Zero, SrcReg).getReg(0);
7292 MIRBuilder.buildSMax(MI.getOperand(0), SrcReg, Sub);
7293 MI.eraseFromParent();
7294 return Legalized;
7295}