File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains the X86 implementation of TargetFrameLowering class. | |||
10 | // | |||
11 | //===----------------------------------------------------------------------===// | |||
12 | ||||
13 | #include "X86FrameLowering.h" | |||
14 | #include "X86InstrBuilder.h" | |||
15 | #include "X86InstrInfo.h" | |||
16 | #include "X86MachineFunctionInfo.h" | |||
17 | #include "X86ReturnProtectorLowering.h" | |||
18 | #include "X86Subtarget.h" | |||
19 | #include "X86TargetMachine.h" | |||
20 | #include "llvm/ADT/SmallSet.h" | |||
21 | #include "llvm/ADT/Statistic.h" | |||
22 | #include "llvm/Analysis/EHPersonalities.h" | |||
23 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
24 | #include "llvm/CodeGen/MachineFunction.h" | |||
25 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
26 | #include "llvm/CodeGen/MachineModuleInfo.h" | |||
27 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
28 | #include "llvm/CodeGen/WinEHFuncInfo.h" | |||
29 | #include "llvm/IR/DataLayout.h" | |||
30 | #include "llvm/IR/Function.h" | |||
31 | #include "llvm/MC/MCAsmInfo.h" | |||
32 | #include "llvm/MC/MCObjectFileInfo.h" | |||
33 | #include "llvm/MC/MCSymbol.h" | |||
34 | #include "llvm/Support/Debug.h" | |||
35 | #include "llvm/Target/TargetOptions.h" | |||
36 | #include <cstdlib> | |||
37 | ||||
38 | #define DEBUG_TYPE"x86-fl" "x86-fl" | |||
39 | ||||
40 | STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue")static llvm::Statistic NumFrameLoopProbe = {"x86-fl", "NumFrameLoopProbe" , "Number of loop stack probes used in prologue"}; | |||
41 | STATISTIC(NumFrameExtraProbe,static llvm::Statistic NumFrameExtraProbe = {"x86-fl", "NumFrameExtraProbe" , "Number of extra stack probes generated in prologue"} | |||
42 | "Number of extra stack probes generated in prologue")static llvm::Statistic NumFrameExtraProbe = {"x86-fl", "NumFrameExtraProbe" , "Number of extra stack probes generated in prologue"}; | |||
43 | ||||
44 | using namespace llvm; | |||
45 | ||||
46 | X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, | |||
47 | MaybeAlign StackAlignOverride) | |||
48 | : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), | |||
49 | STI.is64Bit() ? -8 : -4), | |||
50 | STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()), RPL() { | |||
51 | // Cache a bunch of frame-related predicates for this subtarget. | |||
52 | SlotSize = TRI->getSlotSize(); | |||
53 | Is64Bit = STI.is64Bit(); | |||
54 | IsLP64 = STI.isTarget64BitLP64(); | |||
55 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. | |||
56 | Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); | |||
57 | StackPtr = TRI->getStackRegister(); | |||
58 | SaveArgs = Is64Bit ? STI.getSaveArgs() : 0; | |||
59 | } | |||
60 | ||||
61 | bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { | |||
62 | return !MF.getFrameInfo().hasVarSizedObjects() && | |||
63 | !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && | |||
64 | !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); | |||
65 | } | |||
66 | ||||
67 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the | |||
68 | /// call frame pseudos can be simplified. Having a FP, as in the default | |||
69 | /// implementation, is not sufficient here since we can't always use it. | |||
70 | /// Use a more nuanced condition. | |||
71 | bool | |||
72 | X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { | |||
73 | return hasReservedCallFrame(MF) || | |||
74 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || | |||
75 | (hasFP(MF) && !TRI->hasStackRealignment(MF)) || | |||
76 | TRI->hasBasePointer(MF); | |||
77 | } | |||
78 | ||||
79 | // needsFrameIndexResolution - Do we need to perform FI resolution for | |||
80 | // this function. Normally, this is required only when the function | |||
81 | // has any stack objects. However, FI resolution actually has another job, | |||
82 | // not apparent from the title - it resolves callframesetup/destroy | |||
83 | // that were not simplified earlier. | |||
84 | // So, this is required for x86 functions that have push sequences even | |||
85 | // when there are no stack objects. | |||
86 | bool | |||
87 | X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { | |||
88 | return MF.getFrameInfo().hasStackObjects() || | |||
89 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); | |||
90 | } | |||
91 | ||||
92 | /// hasFP - Return true if the specified function should have a dedicated frame | |||
93 | /// pointer register. This is true if the function has variable sized allocas | |||
94 | /// or if frame pointer elimination is disabled. | |||
95 | bool X86FrameLowering::hasFP(const MachineFunction &MF) const { | |||
96 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
97 | return (MF.getTarget().Options.DisableFramePointerElim(MF) || | |||
98 | TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || | |||
99 | MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || | |||
100 | MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || | |||
101 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || | |||
102 | MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || | |||
103 | MFI.hasStackMap() || MFI.hasPatchPoint() || | |||
104 | MFI.hasCopyImplyingStackAdjustment() || | |||
105 | SaveArgs); | |||
106 | } | |||
107 | ||||
108 | static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { | |||
109 | if (IsLP64) { | |||
110 | if (isInt<8>(Imm)) | |||
111 | return X86::SUB64ri8; | |||
112 | return X86::SUB64ri32; | |||
113 | } else { | |||
114 | if (isInt<8>(Imm)) | |||
115 | return X86::SUB32ri8; | |||
116 | return X86::SUB32ri; | |||
117 | } | |||
118 | } | |||
119 | ||||
120 | static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) { | |||
121 | if (IsLP64) { | |||
122 | if (isInt<8>(Imm)) | |||
123 | return X86::ADD64ri8; | |||
124 | return X86::ADD64ri32; | |||
125 | } else { | |||
126 | if (isInt<8>(Imm)) | |||
127 | return X86::ADD32ri8; | |||
128 | return X86::ADD32ri; | |||
129 | } | |||
130 | } | |||
131 | ||||
132 | static unsigned getSUBrrOpcode(bool IsLP64) { | |||
133 | return IsLP64 ? X86::SUB64rr : X86::SUB32rr; | |||
134 | } | |||
135 | ||||
136 | static unsigned getADDrrOpcode(bool IsLP64) { | |||
137 | return IsLP64 ? X86::ADD64rr : X86::ADD32rr; | |||
138 | } | |||
139 | ||||
140 | static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { | |||
141 | if (IsLP64) { | |||
142 | if (isInt<8>(Imm)) | |||
143 | return X86::AND64ri8; | |||
144 | return X86::AND64ri32; | |||
145 | } | |||
146 | if (isInt<8>(Imm)) | |||
147 | return X86::AND32ri8; | |||
148 | return X86::AND32ri; | |||
149 | } | |||
150 | ||||
151 | static unsigned getLEArOpcode(bool IsLP64) { | |||
152 | return IsLP64 ? X86::LEA64r : X86::LEA32r; | |||
153 | } | |||
154 | ||||
155 | static bool isEAXLiveIn(MachineBasicBlock &MBB) { | |||
156 | for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { | |||
157 | unsigned Reg = RegMask.PhysReg; | |||
158 | ||||
159 | if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || | |||
160 | Reg == X86::AH || Reg == X86::AL) | |||
161 | return true; | |||
162 | } | |||
163 | ||||
164 | return false; | |||
165 | } | |||
166 | ||||
167 | /// Check if the flags need to be preserved before the terminators. | |||
168 | /// This would be the case, if the eflags is live-in of the region | |||
169 | /// composed by the terminators or live-out of that region, without | |||
170 | /// being defined by a terminator. | |||
171 | static bool | |||
172 | flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { | |||
173 | for (const MachineInstr &MI : MBB.terminators()) { | |||
174 | bool BreakNext = false; | |||
175 | for (const MachineOperand &MO : MI.operands()) { | |||
176 | if (!MO.isReg()) | |||
177 | continue; | |||
178 | Register Reg = MO.getReg(); | |||
179 | if (Reg != X86::EFLAGS) | |||
180 | continue; | |||
181 | ||||
182 | // This terminator needs an eflags that is not defined | |||
183 | // by a previous another terminator: | |||
184 | // EFLAGS is live-in of the region composed by the terminators. | |||
185 | if (!MO.isDef()) | |||
186 | return true; | |||
187 | // This terminator defines the eflags, i.e., we don't need to preserve it. | |||
188 | // However, we still need to check this specific terminator does not | |||
189 | // read a live-in value. | |||
190 | BreakNext = true; | |||
191 | } | |||
192 | // We found a definition of the eflags, no need to preserve them. | |||
193 | if (BreakNext) | |||
194 | return false; | |||
195 | } | |||
196 | ||||
197 | // None of the terminators use or define the eflags. | |||
198 | // Check if they are live-out, that would imply we need to preserve them. | |||
199 | for (const MachineBasicBlock *Succ : MBB.successors()) | |||
200 | if (Succ->isLiveIn(X86::EFLAGS)) | |||
201 | return true; | |||
202 | ||||
203 | return false; | |||
204 | } | |||
205 | ||||
206 | /// emitSPUpdate - Emit a series of instructions to increment / decrement the | |||
207 | /// stack pointer by a constant value. | |||
208 | void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, | |||
209 | MachineBasicBlock::iterator &MBBI, | |||
210 | const DebugLoc &DL, | |||
211 | int64_t NumBytes, bool InEpilogue) const { | |||
212 | bool isSub = NumBytes < 0; | |||
213 | uint64_t Offset = isSub ? -NumBytes : NumBytes; | |||
214 | MachineInstr::MIFlag Flag = | |||
215 | isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; | |||
216 | ||||
217 | uint64_t Chunk = (1LL << 31) - 1; | |||
218 | ||||
219 | MachineFunction &MF = *MBB.getParent(); | |||
220 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
221 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
222 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); | |||
223 | ||||
224 | // It's ok to not take into account large chunks when probing, as the | |||
225 | // allocation is split in smaller chunks anyway. | |||
226 | if (EmitInlineStackProbe && !InEpilogue) { | |||
227 | ||||
228 | // This pseudo-instruction is going to be expanded, potentially using a | |||
229 | // loop, by inlineStackProbe(). | |||
230 | BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset); | |||
231 | return; | |||
232 | } else if (Offset > Chunk) { | |||
233 | // Rather than emit a long series of instructions for large offsets, | |||
234 | // load the offset into a register and do one sub/add | |||
235 | unsigned Reg = 0; | |||
236 | unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); | |||
237 | ||||
238 | if (isSub && !isEAXLiveIn(MBB)) | |||
239 | Reg = Rax; | |||
240 | else | |||
241 | Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); | |||
242 | ||||
243 | unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; | |||
244 | unsigned AddSubRROpc = | |||
245 | isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit); | |||
246 | if (Reg) { | |||
247 | BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg) | |||
248 | .addImm(Offset) | |||
249 | .setMIFlag(Flag); | |||
250 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr) | |||
251 | .addReg(StackPtr) | |||
252 | .addReg(Reg); | |||
253 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
254 | return; | |||
255 | } else if (Offset > 8 * Chunk) { | |||
256 | // If we would need more than 8 add or sub instructions (a >16GB stack | |||
257 | // frame), it's worth spilling RAX to materialize this immediate. | |||
258 | // pushq %rax | |||
259 | // movabsq +-$Offset+-SlotSize, %rax | |||
260 | // addq %rsp, %rax | |||
261 | // xchg %rax, (%rsp) | |||
262 | // movq (%rsp), %rsp | |||
263 | assert(Is64Bit && "can't have 32-bit 16GB stack frame")((void)0); | |||
264 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
265 | .addReg(Rax, RegState::Kill) | |||
266 | .setMIFlag(Flag); | |||
267 | // Subtract is not commutative, so negate the offset and always use add. | |||
268 | // Subtract 8 less and add 8 more to account for the PUSH we just did. | |||
269 | if (isSub) | |||
270 | Offset = -(Offset - SlotSize); | |||
271 | else | |||
272 | Offset = Offset + SlotSize; | |||
273 | BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax) | |||
274 | .addImm(Offset) | |||
275 | .setMIFlag(Flag); | |||
276 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax) | |||
277 | .addReg(Rax) | |||
278 | .addReg(StackPtr); | |||
279 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
280 | // Exchange the new SP in RAX with the top of the stack. | |||
281 | addRegOffset( | |||
282 | BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax), | |||
283 | StackPtr, false, 0); | |||
284 | // Load new SP from the top of the stack into RSP. | |||
285 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr), | |||
286 | StackPtr, false, 0); | |||
287 | return; | |||
288 | } | |||
289 | } | |||
290 | ||||
291 | while (Offset) { | |||
292 | uint64_t ThisVal = std::min(Offset, Chunk); | |||
293 | if (ThisVal == SlotSize) { | |||
294 | // Use push / pop for slot sized adjustments as a size optimization. We | |||
295 | // need to find a dead register when using pop. | |||
296 | unsigned Reg = isSub | |||
297 | ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) | |||
298 | : TRI->findDeadCallerSavedReg(MBB, MBBI); | |||
299 | if (Reg) { | |||
300 | unsigned Opc = isSub | |||
301 | ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) | |||
302 | : (Is64Bit ? X86::POP64r : X86::POP32r); | |||
303 | BuildMI(MBB, MBBI, DL, TII.get(Opc)) | |||
304 | .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)) | |||
305 | .setMIFlag(Flag); | |||
306 | Offset -= ThisVal; | |||
307 | continue; | |||
308 | } | |||
309 | } | |||
310 | ||||
311 | BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue) | |||
312 | .setMIFlag(Flag); | |||
313 | ||||
314 | Offset -= ThisVal; | |||
315 | } | |||
316 | } | |||
317 | ||||
318 | MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( | |||
319 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | |||
320 | const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { | |||
321 | assert(Offset != 0 && "zero offset stack adjustment requested")((void)0); | |||
322 | ||||
323 | // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue | |||
324 | // is tricky. | |||
325 | bool UseLEA; | |||
326 | if (!InEpilogue) { | |||
327 | // Check if inserting the prologue at the beginning | |||
328 | // of MBB would require to use LEA operations. | |||
329 | // We need to use LEA operations if EFLAGS is live in, because | |||
330 | // it means an instruction will read it before it gets defined. | |||
331 | UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS); | |||
332 | } else { | |||
333 | // If we can use LEA for SP but we shouldn't, check that none | |||
334 | // of the terminators uses the eflags. Otherwise we will insert | |||
335 | // a ADD that will redefine the eflags and break the condition. | |||
336 | // Alternatively, we could move the ADD, but this may not be possible | |||
337 | // and is an optimization anyway. | |||
338 | UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); | |||
339 | if (UseLEA && !STI.useLeaForSP()) | |||
340 | UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); | |||
341 | // If that assert breaks, that means we do not do the right thing | |||
342 | // in canUseAsEpilogue. | |||
343 | assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&((void)0) | |||
344 | "We shouldn't have allowed this insertion point")((void)0); | |||
345 | } | |||
346 | ||||
347 | MachineInstrBuilder MI; | |||
348 | if (UseLEA) { | |||
349 | MI = addRegOffset(BuildMI(MBB, MBBI, DL, | |||
350 | TII.get(getLEArOpcode(Uses64BitFramePtr)), | |||
351 | StackPtr), | |||
352 | StackPtr, false, Offset); | |||
353 | } else { | |||
354 | bool IsSub = Offset < 0; | |||
355 | uint64_t AbsOffset = IsSub ? -Offset : Offset; | |||
356 | const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) | |||
357 | : getADDriOpcode(Uses64BitFramePtr, AbsOffset); | |||
358 | MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
359 | .addReg(StackPtr) | |||
360 | .addImm(AbsOffset); | |||
361 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
362 | } | |||
363 | return MI; | |||
364 | } | |||
365 | ||||
366 | int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, | |||
367 | MachineBasicBlock::iterator &MBBI, | |||
368 | bool doMergeWithPrevious) const { | |||
369 | if ((doMergeWithPrevious && MBBI == MBB.begin()) || | |||
370 | (!doMergeWithPrevious && MBBI == MBB.end())) | |||
371 | return 0; | |||
372 | ||||
373 | MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; | |||
374 | ||||
375 | PI = skipDebugInstructionsBackward(PI, MBB.begin()); | |||
376 | // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI | |||
377 | // instruction, and that there are no DBG_VALUE or other instructions between | |||
378 | // ADD/SUB/LEA and its corresponding CFI instruction. | |||
379 | /* TODO: Add support for the case where there are multiple CFI instructions | |||
380 | below the ADD/SUB/LEA, e.g.: | |||
381 | ... | |||
382 | add | |||
383 | cfi_def_cfa_offset | |||
384 | cfi_offset | |||
385 | ... | |||
386 | */ | |||
387 | if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) | |||
388 | PI = std::prev(PI); | |||
389 | ||||
390 | unsigned Opc = PI->getOpcode(); | |||
391 | int Offset = 0; | |||
392 | ||||
393 | if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || | |||
394 | Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && | |||
395 | PI->getOperand(0).getReg() == StackPtr){ | |||
396 | assert(PI->getOperand(1).getReg() == StackPtr)((void)0); | |||
397 | Offset = PI->getOperand(2).getImm(); | |||
398 | } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && | |||
399 | PI->getOperand(0).getReg() == StackPtr && | |||
400 | PI->getOperand(1).getReg() == StackPtr && | |||
401 | PI->getOperand(2).getImm() == 1 && | |||
402 | PI->getOperand(3).getReg() == X86::NoRegister && | |||
403 | PI->getOperand(5).getReg() == X86::NoRegister) { | |||
404 | // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. | |||
405 | Offset = PI->getOperand(4).getImm(); | |||
406 | } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || | |||
407 | Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && | |||
408 | PI->getOperand(0).getReg() == StackPtr) { | |||
409 | assert(PI->getOperand(1).getReg() == StackPtr)((void)0); | |||
410 | Offset = -PI->getOperand(2).getImm(); | |||
411 | } else | |||
412 | return 0; | |||
413 | ||||
414 | PI = MBB.erase(PI); | |||
415 | if (PI != MBB.end() && PI->isCFIInstruction()) { | |||
416 | auto CIs = MBB.getParent()->getFrameInstructions(); | |||
417 | MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()]; | |||
418 | if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset || | |||
419 | CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) | |||
420 | PI = MBB.erase(PI); | |||
421 | } | |||
422 | if (!doMergeWithPrevious) | |||
423 | MBBI = skipDebugInstructionsForward(PI, MBB.end()); | |||
424 | ||||
425 | return Offset; | |||
426 | } | |||
427 | ||||
428 | void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, | |||
429 | MachineBasicBlock::iterator MBBI, | |||
430 | const DebugLoc &DL, | |||
431 | const MCCFIInstruction &CFIInst) const { | |||
432 | MachineFunction &MF = *MBB.getParent(); | |||
433 | unsigned CFIIndex = MF.addFrameInst(CFIInst); | |||
434 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
435 | .addCFIIndex(CFIIndex); | |||
436 | } | |||
437 | ||||
438 | /// Emits Dwarf Info specifying offsets of callee saved registers and | |||
439 | /// frame pointer. This is called only when basic block sections are enabled. | |||
440 | void X86FrameLowering::emitCalleeSavedFrameMoves( | |||
441 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { | |||
442 | MachineFunction &MF = *MBB.getParent(); | |||
443 | if (!hasFP(MF)) { | |||
444 | emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); | |||
445 | return; | |||
446 | } | |||
447 | const MachineModuleInfo &MMI = MF.getMMI(); | |||
448 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); | |||
449 | const Register FramePtr = TRI->getFrameRegister(MF); | |||
450 | const Register MachineFramePtr = | |||
451 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64)) | |||
452 | : FramePtr; | |||
453 | unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); | |||
454 | // Offset = space for return address + size of the frame pointer itself. | |||
455 | unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); | |||
456 | BuildCFI(MBB, MBBI, DebugLoc{}, | |||
457 | MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset)); | |||
458 | emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); | |||
459 | } | |||
460 | ||||
461 | void X86FrameLowering::emitCalleeSavedFrameMoves( | |||
462 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | |||
463 | const DebugLoc &DL, bool IsPrologue) const { | |||
464 | MachineFunction &MF = *MBB.getParent(); | |||
465 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
466 | MachineModuleInfo &MMI = MF.getMMI(); | |||
467 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); | |||
468 | ||||
469 | // Add callee saved registers to move list. | |||
470 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); | |||
471 | if (CSI.empty()) return; | |||
472 | ||||
473 | // Calculate offsets. | |||
474 | for (std::vector<CalleeSavedInfo>::const_iterator | |||
475 | I = CSI.begin(), E = CSI.end(); I != E; ++I) { | |||
476 | int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); | |||
477 | unsigned Reg = I->getReg(); | |||
478 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |||
479 | ||||
480 | if (IsPrologue) { | |||
481 | BuildCFI(MBB, MBBI, DL, | |||
482 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |||
483 | } else { | |||
484 | BuildCFI(MBB, MBBI, DL, | |||
485 | MCCFIInstruction::createRestore(nullptr, DwarfReg)); | |||
486 | } | |||
487 | } | |||
488 | } | |||
489 | ||||
490 | void X86FrameLowering::emitStackProbe(MachineFunction &MF, | |||
491 | MachineBasicBlock &MBB, | |||
492 | MachineBasicBlock::iterator MBBI, | |||
493 | const DebugLoc &DL, bool InProlog) const { | |||
494 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
495 | if (STI.isTargetWindowsCoreCLR()) { | |||
496 | if (InProlog) { | |||
497 | BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)) | |||
498 | .addImm(0 /* no explicit stack size */); | |||
499 | } else { | |||
500 | emitStackProbeInline(MF, MBB, MBBI, DL, false); | |||
501 | } | |||
502 | } else { | |||
503 | emitStackProbeCall(MF, MBB, MBBI, DL, InProlog); | |||
504 | } | |||
505 | } | |||
506 | ||||
507 | void X86FrameLowering::inlineStackProbe(MachineFunction &MF, | |||
508 | MachineBasicBlock &PrologMBB) const { | |||
509 | auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) { | |||
510 | return MI.getOpcode() == X86::STACKALLOC_W_PROBING; | |||
511 | }); | |||
512 | if (Where != PrologMBB.end()) { | |||
513 | DebugLoc DL = PrologMBB.findDebugLoc(Where); | |||
514 | emitStackProbeInline(MF, PrologMBB, Where, DL, true); | |||
515 | Where->eraseFromParent(); | |||
516 | } | |||
517 | } | |||
518 | ||||
519 | void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, | |||
520 | MachineBasicBlock &MBB, | |||
521 | MachineBasicBlock::iterator MBBI, | |||
522 | const DebugLoc &DL, | |||
523 | bool InProlog) const { | |||
524 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
525 | if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) | |||
526 | emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); | |||
527 | else | |||
528 | emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); | |||
529 | } | |||
530 | ||||
531 | void X86FrameLowering::emitStackProbeInlineGeneric( | |||
532 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
533 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { | |||
534 | MachineInstr &AllocWithProbe = *MBBI; | |||
535 | uint64_t Offset = AllocWithProbe.getOperand(0).getImm(); | |||
536 | ||||
537 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
538 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
539 | assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&((void)0) | |||
540 | "different expansion expected for CoreCLR 64 bit")((void)0); | |||
541 | ||||
542 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
543 | uint64_t ProbeChunk = StackProbeSize * 8; | |||
544 | ||||
545 | uint64_t MaxAlign = | |||
546 | TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; | |||
547 | ||||
548 | // Synthesize a loop or unroll it, depending on the number of iterations. | |||
549 | // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left | |||
550 | // between the unaligned rsp and current rsp. | |||
551 | if (Offset > ProbeChunk) { | |||
552 | emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, | |||
553 | MaxAlign % StackProbeSize); | |||
554 | } else { | |||
555 | emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, | |||
556 | MaxAlign % StackProbeSize); | |||
557 | } | |||
558 | } | |||
559 | ||||
560 | void X86FrameLowering::emitStackProbeInlineGenericBlock( | |||
561 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
562 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, | |||
563 | uint64_t AlignOffset) const { | |||
564 | ||||
565 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); | |||
566 | const bool HasFP = hasFP(MF); | |||
567 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
568 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
569 | const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); | |||
570 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; | |||
571 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
572 | ||||
573 | uint64_t CurrentOffset = 0; | |||
574 | ||||
575 | assert(AlignOffset < StackProbeSize)((void)0); | |||
576 | ||||
577 | // If the offset is so small it fits within a page, there's nothing to do. | |||
578 | if (StackProbeSize < Offset + AlignOffset) { | |||
579 | ||||
580 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
581 | .addReg(StackPtr) | |||
582 | .addImm(StackProbeSize - AlignOffset) | |||
583 | .setMIFlag(MachineInstr::FrameSetup); | |||
584 | if (!HasFP && NeedsDwarfCFI) { | |||
585 | BuildCFI(MBB, MBBI, DL, | |||
586 | MCCFIInstruction::createAdjustCfaOffset( | |||
587 | nullptr, StackProbeSize - AlignOffset)); | |||
588 | } | |||
589 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
590 | ||||
591 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) | |||
592 | .setMIFlag(MachineInstr::FrameSetup), | |||
593 | StackPtr, false, 0) | |||
594 | .addImm(0) | |||
595 | .setMIFlag(MachineInstr::FrameSetup); | |||
596 | NumFrameExtraProbe++; | |||
597 | CurrentOffset = StackProbeSize - AlignOffset; | |||
598 | } | |||
599 | ||||
600 | // For the next N - 1 pages, just probe. I tried to take advantage of | |||
601 | // natural probes but it implies much more logic and there was very few | |||
602 | // interesting natural probes to interleave. | |||
603 | while (CurrentOffset + StackProbeSize < Offset) { | |||
604 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
605 | .addReg(StackPtr) | |||
606 | .addImm(StackProbeSize) | |||
607 | .setMIFlag(MachineInstr::FrameSetup); | |||
608 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
609 | ||||
610 | if (!HasFP && NeedsDwarfCFI) { | |||
611 | BuildCFI( | |||
612 | MBB, MBBI, DL, | |||
613 | MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize)); | |||
614 | } | |||
615 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) | |||
616 | .setMIFlag(MachineInstr::FrameSetup), | |||
617 | StackPtr, false, 0) | |||
618 | .addImm(0) | |||
619 | .setMIFlag(MachineInstr::FrameSetup); | |||
620 | NumFrameExtraProbe++; | |||
621 | CurrentOffset += StackProbeSize; | |||
622 | } | |||
623 | ||||
624 | // No need to probe the tail, it is smaller than a Page. | |||
625 | uint64_t ChunkSize = Offset - CurrentOffset; | |||
626 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
627 | .addReg(StackPtr) | |||
628 | .addImm(ChunkSize) | |||
629 | .setMIFlag(MachineInstr::FrameSetup); | |||
630 | // No need to adjust Dwarf CFA offset here, the last position of the stack has | |||
631 | // been defined | |||
632 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
633 | } | |||
634 | ||||
635 | void X86FrameLowering::emitStackProbeInlineGenericLoop( | |||
636 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
637 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, | |||
638 | uint64_t AlignOffset) const { | |||
639 | assert(Offset && "null offset")((void)0); | |||
640 | ||||
641 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
642 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
643 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; | |||
644 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
645 | ||||
646 | if (AlignOffset) { | |||
647 | if (AlignOffset < StackProbeSize) { | |||
648 | // Perform a first smaller allocation followed by a probe. | |||
649 | const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); | |||
650 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) | |||
651 | .addReg(StackPtr) | |||
652 | .addImm(AlignOffset) | |||
653 | .setMIFlag(MachineInstr::FrameSetup); | |||
654 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
655 | ||||
656 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) | |||
657 | .setMIFlag(MachineInstr::FrameSetup), | |||
658 | StackPtr, false, 0) | |||
659 | .addImm(0) | |||
660 | .setMIFlag(MachineInstr::FrameSetup); | |||
661 | NumFrameExtraProbe++; | |||
662 | Offset -= AlignOffset; | |||
663 | } | |||
664 | } | |||
665 | ||||
666 | // Synthesize a loop | |||
667 | NumFrameLoopProbe++; | |||
668 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); | |||
669 | ||||
670 | MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
671 | MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
672 | ||||
673 | MachineFunction::iterator MBBIter = ++MBB.getIterator(); | |||
674 | MF.insert(MBBIter, testMBB); | |||
675 | MF.insert(MBBIter, tailMBB); | |||
676 | ||||
677 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 | |||
678 | : Is64Bit ? X86::R11D | |||
679 | : X86::EAX; | |||
680 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) | |||
681 | .addReg(StackPtr) | |||
682 | .setMIFlag(MachineInstr::FrameSetup); | |||
683 | ||||
684 | // save loop bound | |||
685 | { | |||
686 | const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); | |||
687 | BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) | |||
688 | .addReg(FinalStackProbed) | |||
689 | .addImm(Offset / StackProbeSize * StackProbeSize) | |||
690 | .setMIFlag(MachineInstr::FrameSetup); | |||
691 | } | |||
692 | ||||
693 | // allocate a page | |||
694 | { | |||
695 | const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); | |||
696 | BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) | |||
697 | .addReg(StackPtr) | |||
698 | .addImm(StackProbeSize) | |||
699 | .setMIFlag(MachineInstr::FrameSetup); | |||
700 | } | |||
701 | ||||
702 | // touch the page | |||
703 | addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) | |||
704 | .setMIFlag(MachineInstr::FrameSetup), | |||
705 | StackPtr, false, 0) | |||
706 | .addImm(0) | |||
707 | .setMIFlag(MachineInstr::FrameSetup); | |||
708 | ||||
709 | // cmp with stack pointer bound | |||
710 | BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
711 | .addReg(StackPtr) | |||
712 | .addReg(FinalStackProbed) | |||
713 | .setMIFlag(MachineInstr::FrameSetup); | |||
714 | ||||
715 | // jump | |||
716 | BuildMI(testMBB, DL, TII.get(X86::JCC_1)) | |||
717 | .addMBB(testMBB) | |||
718 | .addImm(X86::COND_NE) | |||
719 | .setMIFlag(MachineInstr::FrameSetup); | |||
720 | testMBB->addSuccessor(testMBB); | |||
721 | testMBB->addSuccessor(tailMBB); | |||
722 | ||||
723 | // BB management | |||
724 | tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); | |||
725 | tailMBB->transferSuccessorsAndUpdatePHIs(&MBB); | |||
726 | MBB.addSuccessor(testMBB); | |||
727 | ||||
728 | // handle tail | |||
729 | unsigned TailOffset = Offset % StackProbeSize; | |||
730 | if (TailOffset) { | |||
731 | const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); | |||
732 | BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) | |||
733 | .addReg(StackPtr) | |||
734 | .addImm(TailOffset) | |||
735 | .setMIFlag(MachineInstr::FrameSetup); | |||
736 | } | |||
737 | ||||
738 | // Update Live In information | |||
739 | recomputeLiveIns(*testMBB); | |||
740 | recomputeLiveIns(*tailMBB); | |||
741 | } | |||
742 | ||||
743 | void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( | |||
744 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
745 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { | |||
746 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
747 | assert(STI.is64Bit() && "different expansion needed for 32 bit")((void)0); | |||
748 | assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR")((void)0); | |||
749 | const TargetInstrInfo &TII = *STI.getInstrInfo(); | |||
750 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); | |||
751 | ||||
752 | // RAX contains the number of bytes of desired stack adjustment. | |||
753 | // The handling here assumes this value has already been updated so as to | |||
754 | // maintain stack alignment. | |||
755 | // | |||
756 | // We need to exit with RSP modified by this amount and execute suitable | |||
757 | // page touches to notify the OS that we're growing the stack responsibly. | |||
758 | // All stack probing must be done without modifying RSP. | |||
759 | // | |||
760 | // MBB: | |||
761 | // SizeReg = RAX; | |||
762 | // ZeroReg = 0 | |||
763 | // CopyReg = RSP | |||
764 | // Flags, TestReg = CopyReg - SizeReg | |||
765 | // FinalReg = !Flags.Ovf ? TestReg : ZeroReg | |||
766 | // LimitReg = gs magic thread env access | |||
767 | // if FinalReg >= LimitReg goto ContinueMBB | |||
768 | // RoundBB: | |||
769 | // RoundReg = page address of FinalReg | |||
770 | // LoopMBB: | |||
771 | // LoopReg = PHI(LimitReg,ProbeReg) | |||
772 | // ProbeReg = LoopReg - PageSize | |||
773 | // [ProbeReg] = 0 | |||
774 | // if (ProbeReg > RoundReg) goto LoopMBB | |||
775 | // ContinueMBB: | |||
776 | // RSP = RSP - RAX | |||
777 | // [rest of original MBB] | |||
778 | ||||
779 | // Set up the new basic blocks | |||
780 | MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
781 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
782 | MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
783 | ||||
784 | MachineFunction::iterator MBBIter = std::next(MBB.getIterator()); | |||
785 | MF.insert(MBBIter, RoundMBB); | |||
786 | MF.insert(MBBIter, LoopMBB); | |||
787 | MF.insert(MBBIter, ContinueMBB); | |||
788 | ||||
789 | // Split MBB and move the tail portion down to ContinueMBB. | |||
790 | MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI); | |||
791 | ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end()); | |||
792 | ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB); | |||
793 | ||||
794 | // Some useful constants | |||
795 | const int64_t ThreadEnvironmentStackLimit = 0x10; | |||
796 | const int64_t PageSize = 0x1000; | |||
797 | const int64_t PageMask = ~(PageSize - 1); | |||
798 | ||||
799 | // Registers we need. For the normal case we use virtual | |||
800 | // registers. For the prolog expansion we use RAX, RCX and RDX. | |||
801 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
802 | const TargetRegisterClass *RegClass = &X86::GR64RegClass; | |||
803 | const Register SizeReg = InProlog ? X86::RAX | |||
804 | : MRI.createVirtualRegister(RegClass), | |||
805 | ZeroReg = InProlog ? X86::RCX | |||
806 | : MRI.createVirtualRegister(RegClass), | |||
807 | CopyReg = InProlog ? X86::RDX | |||
808 | : MRI.createVirtualRegister(RegClass), | |||
809 | TestReg = InProlog ? X86::RDX | |||
810 | : MRI.createVirtualRegister(RegClass), | |||
811 | FinalReg = InProlog ? X86::RDX | |||
812 | : MRI.createVirtualRegister(RegClass), | |||
813 | RoundedReg = InProlog ? X86::RDX | |||
814 | : MRI.createVirtualRegister(RegClass), | |||
815 | LimitReg = InProlog ? X86::RCX | |||
816 | : MRI.createVirtualRegister(RegClass), | |||
817 | JoinReg = InProlog ? X86::RCX | |||
818 | : MRI.createVirtualRegister(RegClass), | |||
819 | ProbeReg = InProlog ? X86::RCX | |||
820 | : MRI.createVirtualRegister(RegClass); | |||
821 | ||||
822 | // SP-relative offsets where we can save RCX and RDX. | |||
823 | int64_t RCXShadowSlot = 0; | |||
824 | int64_t RDXShadowSlot = 0; | |||
825 | ||||
826 | // If inlining in the prolog, save RCX and RDX. | |||
827 | if (InProlog) { | |||
828 | // Compute the offsets. We need to account for things already | |||
829 | // pushed onto the stack at this point: return address, frame | |||
830 | // pointer (if used), and callee saves. | |||
831 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
832 | const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); | |||
833 | const bool HasFP = hasFP(MF); | |||
834 | ||||
835 | // Check if we need to spill RCX and/or RDX. | |||
836 | // Here we assume that no earlier prologue instruction changes RCX and/or | |||
837 | // RDX, so checking the block live-ins is enough. | |||
838 | const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); | |||
839 | const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); | |||
840 | int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); | |||
841 | // Assign the initial slot to both registers, then change RDX's slot if both | |||
842 | // need to be spilled. | |||
843 | if (IsRCXLiveIn) | |||
844 | RCXShadowSlot = InitSlot; | |||
845 | if (IsRDXLiveIn) | |||
846 | RDXShadowSlot = InitSlot; | |||
847 | if (IsRDXLiveIn && IsRCXLiveIn) | |||
848 | RDXShadowSlot += 8; | |||
849 | // Emit the saves if needed. | |||
850 | if (IsRCXLiveIn) | |||
851 | addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, | |||
852 | RCXShadowSlot) | |||
853 | .addReg(X86::RCX); | |||
854 | if (IsRDXLiveIn) | |||
855 | addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, | |||
856 | RDXShadowSlot) | |||
857 | .addReg(X86::RDX); | |||
858 | } else { | |||
859 | // Not in the prolog. Copy RAX to a virtual reg. | |||
860 | BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); | |||
861 | } | |||
862 | ||||
863 | // Add code to MBB to check for overflow and set the new target stack pointer | |||
864 | // to zero if so. | |||
865 | BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg) | |||
866 | .addReg(ZeroReg, RegState::Undef) | |||
867 | .addReg(ZeroReg, RegState::Undef); | |||
868 | BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP); | |||
869 | BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) | |||
870 | .addReg(CopyReg) | |||
871 | .addReg(SizeReg); | |||
872 | BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) | |||
873 | .addReg(TestReg) | |||
874 | .addReg(ZeroReg) | |||
875 | .addImm(X86::COND_B); | |||
876 | ||||
877 | // FinalReg now holds final stack pointer value, or zero if | |||
878 | // allocation would overflow. Compare against the current stack | |||
879 | // limit from the thread environment block. Note this limit is the | |||
880 | // lowest touched page on the stack, not the point at which the OS | |||
881 | // will cause an overflow exception, so this is just an optimization | |||
882 | // to avoid unnecessarily touching pages that are below the current | |||
883 | // SP but already committed to the stack by the OS. | |||
884 | BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg) | |||
885 | .addReg(0) | |||
886 | .addImm(1) | |||
887 | .addReg(0) | |||
888 | .addImm(ThreadEnvironmentStackLimit) | |||
889 | .addReg(X86::GS); | |||
890 | BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg); | |||
891 | // Jump if the desired stack pointer is at or above the stack limit. | |||
892 | BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE); | |||
893 | ||||
894 | // Add code to roundMBB to round the final stack pointer to a page boundary. | |||
895 | RoundMBB->addLiveIn(FinalReg); | |||
896 | BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) | |||
897 | .addReg(FinalReg) | |||
898 | .addImm(PageMask); | |||
899 | BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB); | |||
900 | ||||
901 | // LimitReg now holds the current stack limit, RoundedReg page-rounded | |||
902 | // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page | |||
903 | // and probe until we reach RoundedReg. | |||
904 | if (!InProlog) { | |||
905 | BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg) | |||
906 | .addReg(LimitReg) | |||
907 | .addMBB(RoundMBB) | |||
908 | .addReg(ProbeReg) | |||
909 | .addMBB(LoopMBB); | |||
910 | } | |||
911 | ||||
912 | LoopMBB->addLiveIn(JoinReg); | |||
913 | addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, | |||
914 | false, -PageSize); | |||
915 | ||||
916 | // Probe by storing a byte onto the stack. | |||
917 | BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi)) | |||
918 | .addReg(ProbeReg) | |||
919 | .addImm(1) | |||
920 | .addReg(0) | |||
921 | .addImm(0) | |||
922 | .addReg(0) | |||
923 | .addImm(0); | |||
924 | ||||
925 | LoopMBB->addLiveIn(RoundedReg); | |||
926 | BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) | |||
927 | .addReg(RoundedReg) | |||
928 | .addReg(ProbeReg); | |||
929 | BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE); | |||
930 | ||||
931 | MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); | |||
932 | ||||
933 | // If in prolog, restore RDX and RCX. | |||
934 | if (InProlog) { | |||
935 | if (RCXShadowSlot) // It means we spilled RCX in the prologue. | |||
936 | addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, | |||
937 | TII.get(X86::MOV64rm), X86::RCX), | |||
938 | X86::RSP, false, RCXShadowSlot); | |||
939 | if (RDXShadowSlot) // It means we spilled RDX in the prologue. | |||
940 | addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, | |||
941 | TII.get(X86::MOV64rm), X86::RDX), | |||
942 | X86::RSP, false, RDXShadowSlot); | |||
943 | } | |||
944 | ||||
945 | // Now that the probing is done, add code to continueMBB to update | |||
946 | // the stack pointer for real. | |||
947 | ContinueMBB->addLiveIn(SizeReg); | |||
948 | BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) | |||
949 | .addReg(X86::RSP) | |||
950 | .addReg(SizeReg); | |||
951 | ||||
952 | // Add the control flow edges we need. | |||
953 | MBB.addSuccessor(ContinueMBB); | |||
954 | MBB.addSuccessor(RoundMBB); | |||
955 | RoundMBB->addSuccessor(LoopMBB); | |||
956 | LoopMBB->addSuccessor(ContinueMBB); | |||
957 | LoopMBB->addSuccessor(LoopMBB); | |||
958 | ||||
959 | // Mark all the instructions added to the prolog as frame setup. | |||
960 | if (InProlog) { | |||
961 | for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { | |||
962 | BeforeMBBI->setFlag(MachineInstr::FrameSetup); | |||
963 | } | |||
964 | for (MachineInstr &MI : *RoundMBB) { | |||
965 | MI.setFlag(MachineInstr::FrameSetup); | |||
966 | } | |||
967 | for (MachineInstr &MI : *LoopMBB) { | |||
968 | MI.setFlag(MachineInstr::FrameSetup); | |||
969 | } | |||
970 | for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin(); | |||
971 | CMBBI != ContinueMBBI; ++CMBBI) { | |||
972 | CMBBI->setFlag(MachineInstr::FrameSetup); | |||
973 | } | |||
974 | } | |||
975 | } | |||
976 | ||||
977 | void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, | |||
978 | MachineBasicBlock &MBB, | |||
979 | MachineBasicBlock::iterator MBBI, | |||
980 | const DebugLoc &DL, | |||
981 | bool InProlog) const { | |||
982 | bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; | |||
983 | ||||
984 | // FIXME: Add indirect thunk support and remove this. | |||
985 | if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) | |||
986 | report_fatal_error("Emitting stack probe calls on 64-bit with the large " | |||
987 | "code model and indirect thunks not yet implemented."); | |||
988 | ||||
989 | unsigned CallOp; | |||
990 | if (Is64Bit) | |||
991 | CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; | |||
992 | else | |||
993 | CallOp = X86::CALLpcrel32; | |||
994 | ||||
995 | StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); | |||
996 | ||||
997 | MachineInstrBuilder CI; | |||
998 | MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI); | |||
999 | ||||
1000 | // All current stack probes take AX and SP as input, clobber flags, and | |||
1001 | // preserve all registers. x86_64 probes leave RSP unmodified. | |||
1002 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { | |||
1003 | // For the large code model, we have to call through a register. Use R11, | |||
1004 | // as it is scratch in all supported calling conventions. | |||
1005 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) | |||
1006 | .addExternalSymbol(MF.createExternalSymbolName(Symbol)); | |||
1007 | CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); | |||
1008 | } else { | |||
1009 | CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)) | |||
1010 | .addExternalSymbol(MF.createExternalSymbolName(Symbol)); | |||
1011 | } | |||
1012 | ||||
1013 | unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX; | |||
1014 | unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP; | |||
1015 | CI.addReg(AX, RegState::Implicit) | |||
1016 | .addReg(SP, RegState::Implicit) | |||
1017 | .addReg(AX, RegState::Define | RegState::Implicit) | |||
1018 | .addReg(SP, RegState::Define | RegState::Implicit) | |||
1019 | .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); | |||
1020 | ||||
1021 | if (STI.isTargetWin64() || !STI.isOSWindows()) { | |||
1022 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. | |||
1023 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp | |||
1024 | // themselves. They also does not clobber %rax so we can reuse it when | |||
1025 | // adjusting %rsp. | |||
1026 | // All other platforms do not specify a particular ABI for the stack probe | |||
1027 | // function, so we arbitrarily define it to not adjust %esp/%rsp itself. | |||
1028 | BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP) | |||
1029 | .addReg(SP) | |||
1030 | .addReg(AX); | |||
1031 | } | |||
1032 | ||||
1033 | if (InProlog) { | |||
1034 | // Apply the frame setup flag to all inserted instrs. | |||
1035 | for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) | |||
1036 | ExpansionMBBI->setFlag(MachineInstr::FrameSetup); | |||
1037 | } | |||
1038 | } | |||
1039 | ||||
1040 | static unsigned calculateSetFPREG(uint64_t SPAdjust) { | |||
1041 | // Win64 ABI has a less restrictive limitation of 240; 128 works equally well | |||
1042 | // and might require smaller successive adjustments. | |||
1043 | const uint64_t Win64MaxSEHOffset = 128; | |||
1044 | uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); | |||
1045 | // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. | |||
1046 | return SEHFrameOffset & -16; | |||
1047 | } | |||
1048 | ||||
1049 | // If we're forcing a stack realignment we can't rely on just the frame | |||
1050 | // info, we need to know the ABI stack alignment as well in case we | |||
1051 | // have a call out. Otherwise just make sure we have some alignment - we'll | |||
1052 | // go with the minimum SlotSize. | |||
1053 | uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { | |||
1054 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
1055 | Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. | |||
1056 | Align StackAlign = getStackAlign(); | |||
1057 | if (MF.getFunction().hasFnAttribute("stackrealign")) { | |||
1058 | if (MFI.hasCalls()) | |||
1059 | MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; | |||
1060 | else if (MaxAlign < SlotSize) | |||
1061 | MaxAlign = Align(SlotSize); | |||
1062 | } | |||
1063 | return MaxAlign.value(); | |||
1064 | } | |||
1065 | ||||
1066 | void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, | |||
1067 | MachineBasicBlock::iterator MBBI, | |||
1068 | const DebugLoc &DL, unsigned Reg, | |||
1069 | uint64_t MaxAlign) const { | |||
1070 | uint64_t Val = -MaxAlign; | |||
1071 | unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); | |||
1072 | ||||
1073 | MachineFunction &MF = *MBB.getParent(); | |||
1074 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
1075 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
1076 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
1077 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); | |||
1078 | ||||
1079 | // We want to make sure that (in worst case) less than StackProbeSize bytes | |||
1080 | // are not probed after the AND. This assumption is used in | |||
1081 | // emitStackProbeInlineGeneric. | |||
1082 | if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { | |||
1083 | { | |||
1084 | NumFrameLoopProbe++; | |||
1085 | MachineBasicBlock *entryMBB = | |||
1086 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
1087 | MachineBasicBlock *headMBB = | |||
1088 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
1089 | MachineBasicBlock *bodyMBB = | |||
1090 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
1091 | MachineBasicBlock *footMBB = | |||
1092 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
1093 | ||||
1094 | MachineFunction::iterator MBBIter = MBB.getIterator(); | |||
1095 | MF.insert(MBBIter, entryMBB); | |||
1096 | MF.insert(MBBIter, headMBB); | |||
1097 | MF.insert(MBBIter, bodyMBB); | |||
1098 | MF.insert(MBBIter, footMBB); | |||
1099 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; | |||
1100 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 | |||
1101 | : Is64Bit ? X86::R11D | |||
1102 | : X86::EAX; | |||
1103 | ||||
1104 | // Setup entry block | |||
1105 | { | |||
1106 | ||||
1107 | entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI); | |||
1108 | BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) | |||
1109 | .addReg(StackPtr) | |||
1110 | .setMIFlag(MachineInstr::FrameSetup); | |||
1111 | MachineInstr *MI = | |||
1112 | BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) | |||
1113 | .addReg(FinalStackProbed) | |||
1114 | .addImm(Val) | |||
1115 | .setMIFlag(MachineInstr::FrameSetup); | |||
1116 | ||||
1117 | // The EFLAGS implicit def is dead. | |||
1118 | MI->getOperand(3).setIsDead(); | |||
1119 | ||||
1120 | BuildMI(entryMBB, DL, | |||
1121 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
1122 | .addReg(FinalStackProbed) | |||
1123 | .addReg(StackPtr) | |||
1124 | .setMIFlag(MachineInstr::FrameSetup); | |||
1125 | BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) | |||
1126 | .addMBB(&MBB) | |||
1127 | .addImm(X86::COND_E) | |||
1128 | .setMIFlag(MachineInstr::FrameSetup); | |||
1129 | entryMBB->addSuccessor(headMBB); | |||
1130 | entryMBB->addSuccessor(&MBB); | |||
1131 | } | |||
1132 | ||||
1133 | // Loop entry block | |||
1134 | ||||
1135 | { | |||
1136 | const unsigned SUBOpc = | |||
1137 | getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); | |||
1138 | BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) | |||
1139 | .addReg(StackPtr) | |||
1140 | .addImm(StackProbeSize) | |||
1141 | .setMIFlag(MachineInstr::FrameSetup); | |||
1142 | ||||
1143 | BuildMI(headMBB, DL, | |||
1144 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
1145 | .addReg(FinalStackProbed) | |||
1146 | .addReg(StackPtr) | |||
1147 | .setMIFlag(MachineInstr::FrameSetup); | |||
1148 | ||||
1149 | // jump | |||
1150 | BuildMI(headMBB, DL, TII.get(X86::JCC_1)) | |||
1151 | .addMBB(footMBB) | |||
1152 | .addImm(X86::COND_B) | |||
1153 | .setMIFlag(MachineInstr::FrameSetup); | |||
1154 | ||||
1155 | headMBB->addSuccessor(bodyMBB); | |||
1156 | headMBB->addSuccessor(footMBB); | |||
1157 | } | |||
1158 | ||||
1159 | // setup loop body | |||
1160 | { | |||
1161 | addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) | |||
1162 | .setMIFlag(MachineInstr::FrameSetup), | |||
1163 | StackPtr, false, 0) | |||
1164 | .addImm(0) | |||
1165 | .setMIFlag(MachineInstr::FrameSetup); | |||
1166 | ||||
1167 | const unsigned SUBOpc = | |||
1168 | getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); | |||
1169 | BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) | |||
1170 | .addReg(StackPtr) | |||
1171 | .addImm(StackProbeSize) | |||
1172 | .setMIFlag(MachineInstr::FrameSetup); | |||
1173 | ||||
1174 | // cmp with stack pointer bound | |||
1175 | BuildMI(bodyMBB, DL, | |||
1176 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
1177 | .addReg(FinalStackProbed) | |||
1178 | .addReg(StackPtr) | |||
1179 | .setMIFlag(MachineInstr::FrameSetup); | |||
1180 | ||||
1181 | // jump | |||
1182 | BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) | |||
1183 | .addMBB(bodyMBB) | |||
1184 | .addImm(X86::COND_B) | |||
1185 | .setMIFlag(MachineInstr::FrameSetup); | |||
1186 | bodyMBB->addSuccessor(bodyMBB); | |||
1187 | bodyMBB->addSuccessor(footMBB); | |||
1188 | } | |||
1189 | ||||
1190 | // setup loop footer | |||
1191 | { | |||
1192 | BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) | |||
1193 | .addReg(FinalStackProbed) | |||
1194 | .setMIFlag(MachineInstr::FrameSetup); | |||
1195 | addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) | |||
1196 | .setMIFlag(MachineInstr::FrameSetup), | |||
1197 | StackPtr, false, 0) | |||
1198 | .addImm(0) | |||
1199 | .setMIFlag(MachineInstr::FrameSetup); | |||
1200 | footMBB->addSuccessor(&MBB); | |||
1201 | } | |||
1202 | ||||
1203 | recomputeLiveIns(*headMBB); | |||
1204 | recomputeLiveIns(*bodyMBB); | |||
1205 | recomputeLiveIns(*footMBB); | |||
1206 | recomputeLiveIns(MBB); | |||
1207 | } | |||
1208 | } else { | |||
1209 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) | |||
1210 | .addReg(Reg) | |||
1211 | .addImm(Val) | |||
1212 | .setMIFlag(MachineInstr::FrameSetup); | |||
1213 | ||||
1214 | // The EFLAGS implicit def is dead. | |||
1215 | MI->getOperand(3).setIsDead(); | |||
1216 | } | |||
1217 | } | |||
1218 | ||||
1219 | // FIXME: Get this from tablegen. | |||
1220 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, | |||
1221 | const X86Subtarget &Subtarget) { | |||
1222 | assert(Subtarget.is64Bit())((void)0); | |||
1223 | ||||
1224 | if (Subtarget.isCallingConvWin64(CallConv)) { | |||
1225 | static const MCPhysReg GPR64ArgRegsWin64[] = { | |||
1226 | X86::RCX, X86::RDX, X86::R8, X86::R9 | |||
1227 | }; | |||
1228 | return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); | |||
1229 | } | |||
1230 | ||||
1231 | static const MCPhysReg GPR64ArgRegs64Bit[] = { | |||
1232 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 | |||
1233 | }; | |||
1234 | return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); | |||
1235 | } | |||
1236 | ||||
1237 | bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { | |||
1238 | // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be | |||
1239 | // clobbered by any interrupt handler. | |||
1240 | assert(&STI == &MF.getSubtarget<X86Subtarget>() &&((void)0) | |||
1241 | "MF used frame lowering for wrong subtarget")((void)0); | |||
1242 | const Function &Fn = MF.getFunction(); | |||
1243 | const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv()); | |||
1244 | return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); | |||
1245 | } | |||
1246 | ||||
1247 | bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { | |||
1248 | return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
1249 | } | |||
1250 | ||||
1251 | bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { | |||
1252 | return !isWin64Prologue(MF) && MF.needsFrameMoves(); | |||
1253 | } | |||
1254 | ||||
1255 | /// emitPrologue - Push callee-saved registers onto the stack, which | |||
1256 | /// automatically adjust the stack pointer. Adjust the stack pointer to allocate | |||
1257 | /// space for local variables. Also emit labels used by the exception handler to | |||
1258 | /// generate the exception handling frames. | |||
1259 | ||||
1260 | /* | |||
1261 | Here's a gist of what gets emitted: | |||
1262 | ||||
1263 | ; Establish frame pointer, if needed | |||
1264 | [if needs FP] | |||
1265 | push %rbp | |||
1266 | .cfi_def_cfa_offset 16 | |||
1267 | .cfi_offset %rbp, -16 | |||
1268 | .seh_pushreg %rpb | |||
1269 | mov %rsp, %rbp | |||
1270 | .cfi_def_cfa_register %rbp | |||
1271 | ||||
1272 | ; Spill general-purpose registers | |||
1273 | [for all callee-saved GPRs] | |||
1274 | pushq %<reg> | |||
1275 | [if not needs FP] | |||
1276 | .cfi_def_cfa_offset (offset from RETADDR) | |||
1277 | .seh_pushreg %<reg> | |||
1278 | ||||
1279 | ; If the required stack alignment > default stack alignment | |||
1280 | ; rsp needs to be re-aligned. This creates a "re-alignment gap" | |||
1281 | ; of unknown size in the stack frame. | |||
1282 | [if stack needs re-alignment] | |||
1283 | and $MASK, %rsp | |||
1284 | ||||
1285 | ; Allocate space for locals | |||
1286 | [if target is Windows and allocated space > 4096 bytes] | |||
1287 | ; Windows needs special care for allocations larger | |||
1288 | ; than one page. | |||
1289 | mov $NNN, %rax | |||
1290 | call ___chkstk_ms/___chkstk | |||
1291 | sub %rax, %rsp | |||
1292 | [else] | |||
1293 | sub $NNN, %rsp | |||
1294 | ||||
1295 | [if needs FP] | |||
1296 | .seh_stackalloc (size of XMM spill slots) | |||
1297 | .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots | |||
1298 | [else] | |||
1299 | .seh_stackalloc NNN | |||
1300 | ||||
1301 | ; Spill XMMs | |||
1302 | ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, | |||
1303 | ; they may get spilled on any platform, if the current function | |||
1304 | ; calls @llvm.eh.unwind.init | |||
1305 | [if needs FP] | |||
1306 | [for all callee-saved XMM registers] | |||
1307 | movaps %<xmm reg>, -MMM(%rbp) | |||
1308 | [for all callee-saved XMM registers] | |||
1309 | .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) | |||
1310 | ; i.e. the offset relative to (%rbp - SEHFrameOffset) | |||
1311 | [else] | |||
1312 | [for all callee-saved XMM registers] | |||
1313 | movaps %<xmm reg>, KKK(%rsp) | |||
1314 | [for all callee-saved XMM registers] | |||
1315 | .seh_savexmm %<xmm reg>, KKK | |||
1316 | ||||
1317 | .seh_endprologue | |||
1318 | ||||
1319 | [if needs base pointer] | |||
1320 | mov %rsp, %rbx | |||
1321 | [if needs to restore base pointer] | |||
1322 | mov %rsp, -MMM(%rbp) | |||
1323 | ||||
1324 | ; Emit CFI info | |||
1325 | [if needs FP] | |||
1326 | [for all callee-saved registers] | |||
1327 | .cfi_offset %<reg>, (offset from %rbp) | |||
1328 | [else] | |||
1329 | .cfi_def_cfa_offset (offset from RETADDR) | |||
1330 | [for all callee-saved registers] | |||
1331 | .cfi_offset %<reg>, (offset from %rsp) | |||
1332 | ||||
1333 | Notes: | |||
1334 | - .seh directives are emitted only for Windows 64 ABI | |||
1335 | - .cv_fpo directives are emitted on win32 when emitting CodeView | |||
1336 | - .cfi directives are emitted for all other ABIs | |||
1337 | - for 32-bit code, substitute %e?? registers for %r?? | |||
1338 | */ | |||
1339 | ||||
1340 | void X86FrameLowering::emitPrologue(MachineFunction &MF, | |||
1341 | MachineBasicBlock &MBB) const { | |||
1342 | assert(&STI == &MF.getSubtarget<X86Subtarget>() &&((void)0) | |||
1343 | "MF used frame lowering for wrong subtarget")((void)0); | |||
1344 | MachineBasicBlock::iterator MBBI = MBB.begin(); | |||
1345 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
1346 | const Function &Fn = MF.getFunction(); | |||
1347 | MachineModuleInfo &MMI = MF.getMMI(); | |||
1348 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
1349 | uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. | |||
| ||||
1350 | uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. | |||
1351 | bool IsFunclet = MBB.isEHFuncletEntry(); | |||
1352 | EHPersonality Personality = EHPersonality::Unknown; | |||
1353 | if (Fn.hasPersonalityFn()) | |||
1354 | Personality = classifyEHPersonality(Fn.getPersonalityFn()); | |||
1355 | bool FnHasClrFunclet = | |||
1356 | MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; | |||
1357 | bool IsClrFunclet = IsFunclet && FnHasClrFunclet; | |||
1358 | bool HasFP = hasFP(MF); | |||
1359 | bool IsWin64Prologue = isWin64Prologue(MF); | |||
1360 | bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); | |||
1361 | // FIXME: Emit FPO data for EH funclets. | |||
1362 | bool NeedsWinFPO = | |||
1363 | !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); | |||
1364 | bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; | |||
1365 | bool NeedsDwarfCFI = needsDwarfCFI(MF); | |||
1366 | Register FramePtr = TRI->getFrameRegister(MF); | |||
1367 | const Register MachineFramePtr = | |||
1368 | STI.isTarget64BitILP32() | |||
1369 | ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; | |||
1370 | Register BasePtr = TRI->getBaseRegister(); | |||
1371 | bool HasWinCFI = false; | |||
1372 | ||||
1373 | // Debug location must be unknown since the first debug location is used | |||
1374 | // to determine the end of the prologue. | |||
1375 | DebugLoc DL; | |||
1376 | ||||
1377 | // Add RETADDR move area to callee saved frame size. | |||
1378 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); | |||
1379 | if (TailCallReturnAddrDelta && IsWin64Prologue) | |||
1380 | report_fatal_error("Can't handle guaranteed tail call under win64 yet"); | |||
1381 | ||||
1382 | if (TailCallReturnAddrDelta < 0) | |||
1383 | X86FI->setCalleeSavedFrameSize( | |||
1384 | X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); | |||
1385 | ||||
1386 | const bool EmitStackProbeCall = | |||
1387 | STI.getTargetLowering()->hasStackProbeSymbol(MF); | |||
1388 | unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); | |||
1389 | ||||
1390 | if (HasFP && X86FI->hasSwiftAsyncContext()) { | |||
1391 | BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), | |||
1392 | MachineFramePtr) | |||
1393 | .addUse(MachineFramePtr) | |||
1394 | .addImm(60) | |||
1395 | .setMIFlag(MachineInstr::FrameSetup); | |||
1396 | } | |||
1397 | ||||
1398 | // Re-align the stack on 64-bit if the x86-interrupt calling convention is | |||
1399 | // used and an error code was pushed, since the x86-64 ABI requires a 16-byte | |||
1400 | // stack alignment. | |||
1401 | if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && | |||
1402 | Fn.arg_size() == 2) { | |||
1403 | StackSize += 8; | |||
1404 | MFI.setStackSize(StackSize); | |||
1405 | emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false); | |||
1406 | } | |||
1407 | ||||
1408 | // If this is x86-64 and the Red Zone is not disabled, if we are a leaf | |||
1409 | // function, and use up to 128 bytes of stack space, don't have a frame | |||
1410 | // pointer, calls, or dynamic alloca then we do not need to adjust the | |||
1411 | // stack pointer (we fit in the Red Zone). We also check that we don't | |||
1412 | // push and pop from the stack. | |||
1413 | if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && | |||
1414 | !MFI.hasVarSizedObjects() && // No dynamic alloca. | |||
1415 | !MFI.adjustsStack() && // No calls. | |||
1416 | !EmitStackProbeCall && // No stack probes. | |||
1417 | !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. | |||
1418 | !MF.shouldSplitStack()) { // Regular stack | |||
1419 | uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); | |||
1420 | if (HasFP) MinSize += SlotSize; | |||
1421 | X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); | |||
1422 | StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); | |||
1423 | MFI.setStackSize(StackSize); | |||
1424 | } | |||
1425 | ||||
1426 | // Insert stack pointer adjustment for later moving of return addr. Only | |||
1427 | // applies to tail call optimized functions where the callee argument stack | |||
1428 | // size is bigger than the callers. | |||
1429 | if (TailCallReturnAddrDelta < 0) { | |||
1430 | BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, | |||
1431 | /*InEpilogue=*/false) | |||
1432 | .setMIFlag(MachineInstr::FrameSetup); | |||
1433 | } | |||
1434 | ||||
1435 | // Mapping for machine moves: | |||
1436 | // | |||
1437 | // DST: VirtualFP AND | |||
1438 | // SRC: VirtualFP => DW_CFA_def_cfa_offset | |||
1439 | // ELSE => DW_CFA_def_cfa | |||
1440 | // | |||
1441 | // SRC: VirtualFP AND | |||
1442 | // DST: Register => DW_CFA_def_cfa_register | |||
1443 | // | |||
1444 | // ELSE | |||
1445 | // OFFSET < 0 => DW_CFA_offset_extended_sf | |||
1446 | // REG < 64 => DW_CFA_offset + Reg | |||
1447 | // ELSE => DW_CFA_offset_extended | |||
1448 | ||||
1449 | uint64_t NumBytes = 0; | |||
1450 | int stackGrowth = -SlotSize; | |||
1451 | ||||
1452 | // Find the funclet establisher parameter | |||
1453 | Register Establisher = X86::NoRegister; | |||
1454 | if (IsClrFunclet) | |||
1455 | Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; | |||
1456 | else if (IsFunclet) | |||
1457 | Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; | |||
1458 | ||||
1459 | if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { | |||
1460 | // Immediately spill establisher into the home slot. | |||
1461 | // The runtime cares about this. | |||
1462 | // MOV64mr %rdx, 16(%rsp) | |||
1463 | unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; | |||
1464 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) | |||
1465 | .addReg(Establisher) | |||
1466 | .setMIFlag(MachineInstr::FrameSetup); | |||
1467 | MBB.addLiveIn(Establisher); | |||
1468 | } | |||
1469 | ||||
1470 | if (HasFP) { | |||
1471 | assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved")((void)0); | |||
1472 | ||||
1473 | // Calculate required stack adjustment. | |||
1474 | uint64_t FrameSize = StackSize - SlotSize; | |||
1475 | // If required, include space for extra hidden slot for stashing base pointer. | |||
1476 | if (X86FI->getRestoreBasePointer()) | |||
1477 | FrameSize += SlotSize; | |||
1478 | ||||
1479 | NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); | |||
1480 | ||||
1481 | // Callee-saved registers are pushed on stack before the stack is realigned. | |||
1482 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) | |||
1483 | NumBytes = alignTo(NumBytes, MaxAlign); | |||
1484 | ||||
1485 | // Save EBP/RBP into the appropriate stack slot. | |||
1486 | BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) | |||
1487 | .addReg(MachineFramePtr, RegState::Kill) | |||
1488 | .setMIFlag(MachineInstr::FrameSetup); | |||
1489 | ||||
1490 | if (NeedsDwarfCFI) { | |||
1491 | // Mark the place where EBP/RBP was saved. | |||
1492 | // Define the current CFA rule to use the provided offset. | |||
1493 | assert(StackSize)((void)0); | |||
1494 | BuildCFI(MBB, MBBI, DL, | |||
1495 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth)); | |||
1496 | ||||
1497 | // Change the rule for the FramePtr to be an "offset" rule. | |||
1498 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); | |||
1499 | BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( | |||
1500 | nullptr, DwarfFramePtr, 2 * stackGrowth)); | |||
1501 | } | |||
1502 | ||||
1503 | if (NeedsWinCFI) { | |||
1504 | HasWinCFI = true; | |||
1505 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) | |||
1506 | .addImm(FramePtr) | |||
1507 | .setMIFlag(MachineInstr::FrameSetup); | |||
1508 | } | |||
1509 | ||||
1510 | if (!IsFunclet) { | |||
1511 | if (X86FI->hasSwiftAsyncContext()) { | |||
1512 | const auto &Attrs = MF.getFunction().getAttributes(); | |||
1513 | ||||
1514 | // Before we update the live frame pointer we have to ensure there's a | |||
1515 | // valid (or null) asynchronous context in its slot just before FP in | |||
1516 | // the frame record, so store it now. | |||
1517 | if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { | |||
1518 | // We have an initial context in r14, store it just before the frame | |||
1519 | // pointer. | |||
1520 | MBB.addLiveIn(X86::R14); | |||
1521 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
1522 | .addReg(X86::R14) | |||
1523 | .setMIFlag(MachineInstr::FrameSetup); | |||
1524 | } else { | |||
1525 | // No initial context, store null so that there's no pointer that | |||
1526 | // could be misused. | |||
1527 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) | |||
1528 | .addImm(0) | |||
1529 | .setMIFlag(MachineInstr::FrameSetup); | |||
1530 | } | |||
1531 | ||||
1532 | if (NeedsWinCFI) { | |||
1533 | HasWinCFI = true; | |||
1534 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) | |||
1535 | .addImm(X86::R14) | |||
1536 | .setMIFlag(MachineInstr::FrameSetup); | |||
1537 | } | |||
1538 | ||||
1539 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) | |||
1540 | .addUse(X86::RSP) | |||
1541 | .addImm(1) | |||
1542 | .addUse(X86::NoRegister) | |||
1543 | .addImm(8) | |||
1544 | .addUse(X86::NoRegister) | |||
1545 | .setMIFlag(MachineInstr::FrameSetup); | |||
1546 | BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) | |||
1547 | .addUse(X86::RSP) | |||
1548 | .addImm(8) | |||
1549 | .setMIFlag(MachineInstr::FrameSetup); | |||
1550 | } | |||
1551 | ||||
1552 | if (!IsWin64Prologue && !IsFunclet) { | |||
1553 | // Update EBP with the new base value. | |||
1554 | if (!X86FI->hasSwiftAsyncContext()) | |||
1555 | BuildMI(MBB, MBBI, DL, | |||
1556 | TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), | |||
1557 | FramePtr) | |||
1558 | .addReg(StackPtr) | |||
1559 | .setMIFlag(MachineInstr::FrameSetup); | |||
1560 | ||||
1561 | if (SaveArgs && !Fn.arg_empty()) { | |||
1562 | ArrayRef<MCPhysReg> GPRs = | |||
1563 | get64BitArgumentGPRs(Fn.getCallingConv(), STI); | |||
1564 | unsigned arg_size = Fn.arg_size(); | |||
1565 | unsigned RI = 0; | |||
1566 | int64_t SaveSize = 0; | |||
1567 | ||||
1568 | if (Fn.hasStructRetAttr()) { | |||
1569 | GPRs = GPRs.drop_front(1); | |||
1570 | arg_size--; | |||
1571 | } | |||
1572 | ||||
1573 | for (MCPhysReg Reg : GPRs) { | |||
1574 | if (++RI > arg_size) | |||
1575 | break; | |||
1576 | ||||
1577 | SaveSize += SlotSize; | |||
1578 | ||||
1579 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
1580 | .addReg(Reg) | |||
1581 | .setMIFlag(MachineInstr::FrameSetup); | |||
1582 | } | |||
1583 | ||||
1584 | // Realign the stack. PUSHes are the most space efficient. | |||
1585 | while (SaveSize % getStackAlignment()) { | |||
1586 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
1587 | .addReg(GPRs.front()) | |||
1588 | .setMIFlag(MachineInstr::FrameSetup); | |||
1589 | ||||
1590 | SaveSize += SlotSize; | |||
1591 | } | |||
1592 | ||||
1593 | //dlg StackSize -= SaveSize; | |||
1594 | //dlg MFI.setStackSize(StackSize); | |||
1595 | X86FI->setSaveArgSize(SaveSize); | |||
1596 | } | |||
1597 | ||||
1598 | if (NeedsDwarfCFI) { | |||
1599 | // Mark effective beginning of when frame pointer becomes valid. | |||
1600 | // Define the current CFA to use the EBP/RBP register. | |||
1601 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); | |||
1602 | BuildCFI( | |||
1603 | MBB, MBBI, DL, | |||
1604 | MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); | |||
1605 | } | |||
1606 | ||||
1607 | if (NeedsWinFPO) { | |||
1608 | // .cv_fpo_setframe $FramePtr | |||
1609 | HasWinCFI = true; | |||
1610 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) | |||
1611 | .addImm(FramePtr) | |||
1612 | .addImm(0) | |||
1613 | .setMIFlag(MachineInstr::FrameSetup); | |||
1614 | } | |||
1615 | } | |||
1616 | } | |||
1617 | } else { | |||
1618 | assert(!IsFunclet && "funclets without FPs not yet implemented")((void)0); | |||
1619 | NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); | |||
1620 | } | |||
1621 | ||||
1622 | // Update the offset adjustment, which is mainly used by codeview to translate | |||
1623 | // from ESP to VFRAME relative local variable offsets. | |||
1624 | if (!IsFunclet) { | |||
1625 | if (HasFP && TRI->hasStackRealignment(MF)) | |||
1626 | MFI.setOffsetAdjustment(-NumBytes); | |||
1627 | else | |||
1628 | MFI.setOffsetAdjustment(-StackSize); | |||
1629 | } | |||
1630 | ||||
1631 | // For EH funclets, only allocate enough space for outgoing calls. Save the | |||
1632 | // NumBytes value that we would've used for the parent frame. | |||
1633 | unsigned ParentFrameNumBytes = NumBytes; | |||
1634 | if (IsFunclet) | |||
1635 | NumBytes = getWinEHFuncletFrameSize(MF); | |||
1636 | ||||
1637 | // Skip the callee-saved push instructions. | |||
1638 | bool PushedRegs = false; | |||
1639 | int StackOffset = 2 * stackGrowth; | |||
1640 | ||||
1641 | while (MBBI != MBB.end() && | |||
1642 | MBBI->getFlag(MachineInstr::FrameSetup) && | |||
1643 | (MBBI->getOpcode() == X86::PUSH32r || | |||
1644 | MBBI->getOpcode() == X86::PUSH64r)) { | |||
1645 | PushedRegs = true; | |||
1646 | Register Reg = MBBI->getOperand(0).getReg(); | |||
1647 | ++MBBI; | |||
1648 | ||||
1649 | if (!HasFP && NeedsDwarfCFI) { | |||
1650 | // Mark callee-saved push instruction. | |||
1651 | // Define the current CFA rule to use the provided offset. | |||
1652 | assert(StackSize)((void)0); | |||
1653 | BuildCFI(MBB, MBBI, DL, | |||
1654 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); | |||
1655 | StackOffset += stackGrowth; | |||
1656 | } | |||
1657 | ||||
1658 | if (NeedsWinCFI) { | |||
1659 | HasWinCFI = true; | |||
1660 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) | |||
1661 | .addImm(Reg) | |||
1662 | .setMIFlag(MachineInstr::FrameSetup); | |||
1663 | } | |||
1664 | } | |||
1665 | ||||
1666 | // Realign stack after we pushed callee-saved registers (so that we'll be | |||
1667 | // able to calculate their offsets from the frame pointer). | |||
1668 | // Don't do this for Win64, it needs to realign the stack after the prologue. | |||
1669 | if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { | |||
1670 | assert(HasFP && "There should be a frame pointer if stack is realigned.")((void)0); | |||
1671 | BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); | |||
1672 | ||||
1673 | if (NeedsWinCFI) { | |||
1674 | HasWinCFI = true; | |||
1675 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign)) | |||
1676 | .addImm(MaxAlign) | |||
1677 | .setMIFlag(MachineInstr::FrameSetup); | |||
1678 | } | |||
1679 | } | |||
1680 | ||||
1681 | // If there is an SUB32ri of ESP immediately before this instruction, merge | |||
1682 | // the two. This can be the case when tail call elimination is enabled and | |||
1683 | // the callee has more arguments then the caller. | |||
1684 | NumBytes -= mergeSPUpdates(MBB, MBBI, true); | |||
1685 | ||||
1686 | // Adjust stack pointer: ESP -= numbytes. | |||
1687 | ||||
1688 | // Windows and cygwin/mingw require a prologue helper routine when allocating | |||
1689 | // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw | |||
1690 | // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the | |||
1691 | // stack and adjust the stack pointer in one go. The 64-bit version of | |||
1692 | // __chkstk is only responsible for probing the stack. The 64-bit prologue is | |||
1693 | // responsible for adjusting the stack pointer. Touching the stack at 4K | |||
1694 | // increments is necessary to ensure that the guard pages used by the OS | |||
1695 | // virtual memory manager are allocated in correct sequence. | |||
1696 | uint64_t AlignedNumBytes = NumBytes; | |||
1697 | if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) | |||
1698 | AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); | |||
1699 | if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { | |||
1700 | assert(!X86FI->getUsesRedZone() &&((void)0) | |||
1701 | "The Red Zone is not accounted for in stack probes")((void)0); | |||
1702 | ||||
1703 | // Check whether EAX is livein for this block. | |||
1704 | bool isEAXAlive = isEAXLiveIn(MBB); | |||
1705 | ||||
1706 | if (isEAXAlive) { | |||
1707 | if (Is64Bit) { | |||
1708 | // Save RAX | |||
1709 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
1710 | .addReg(X86::RAX, RegState::Kill) | |||
1711 | .setMIFlag(MachineInstr::FrameSetup); | |||
1712 | } else { | |||
1713 | // Save EAX | |||
1714 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) | |||
1715 | .addReg(X86::EAX, RegState::Kill) | |||
1716 | .setMIFlag(MachineInstr::FrameSetup); | |||
1717 | } | |||
1718 | } | |||
1719 | ||||
1720 | if (Is64Bit) { | |||
1721 | // Handle the 64-bit Windows ABI case where we need to call __chkstk. | |||
1722 | // Function prologue is responsible for adjusting the stack pointer. | |||
1723 | int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; | |||
1724 | if (isUInt<32>(Alloc)) { | |||
1725 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) | |||
1726 | .addImm(Alloc) | |||
1727 | .setMIFlag(MachineInstr::FrameSetup); | |||
1728 | } else if (isInt<32>(Alloc)) { | |||
1729 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) | |||
1730 | .addImm(Alloc) | |||
1731 | .setMIFlag(MachineInstr::FrameSetup); | |||
1732 | } else { | |||
1733 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) | |||
1734 | .addImm(Alloc) | |||
1735 | .setMIFlag(MachineInstr::FrameSetup); | |||
1736 | } | |||
1737 | } else { | |||
1738 | // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. | |||
1739 | // We'll also use 4 already allocated bytes for EAX. | |||
1740 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) | |||
1741 | .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) | |||
1742 | .setMIFlag(MachineInstr::FrameSetup); | |||
1743 | } | |||
1744 | ||||
1745 | // Call __chkstk, __chkstk_ms, or __alloca. | |||
1746 | emitStackProbe(MF, MBB, MBBI, DL, true); | |||
1747 | ||||
1748 | if (isEAXAlive) { | |||
1749 | // Restore RAX/EAX | |||
1750 | MachineInstr *MI; | |||
1751 | if (Is64Bit) | |||
1752 | MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX), | |||
1753 | StackPtr, false, NumBytes - 8); | |||
1754 | else | |||
1755 | MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), | |||
1756 | StackPtr, false, NumBytes - 4); | |||
1757 | MI->setFlag(MachineInstr::FrameSetup); | |||
1758 | MBB.insert(MBBI, MI); | |||
1759 | } | |||
1760 | } else if (NumBytes) { | |||
1761 | emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false); | |||
1762 | } | |||
1763 | ||||
1764 | if (NeedsWinCFI && NumBytes) { | |||
1765 | HasWinCFI = true; | |||
1766 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) | |||
1767 | .addImm(NumBytes) | |||
1768 | .setMIFlag(MachineInstr::FrameSetup); | |||
1769 | } | |||
1770 | ||||
1771 | int SEHFrameOffset = 0; | |||
1772 | unsigned SPOrEstablisher; | |||
1773 | if (IsFunclet) { | |||
1774 | if (IsClrFunclet) { | |||
1775 | // The establisher parameter passed to a CLR funclet is actually a pointer | |||
1776 | // to the (mostly empty) frame of its nearest enclosing funclet; we have | |||
1777 | // to find the root function establisher frame by loading the PSPSym from | |||
1778 | // the intermediate frame. | |||
1779 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); | |||
1780 | MachinePointerInfo NoInfo; | |||
1781 | MBB.addLiveIn(Establisher); | |||
1782 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), | |||
1783 | Establisher, false, PSPSlotOffset) | |||
1784 | .addMemOperand(MF.getMachineMemOperand( | |||
1785 | NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize))); | |||
1786 | ; | |||
1787 | // Save the root establisher back into the current funclet's (mostly | |||
1788 | // empty) frame, in case a sub-funclet or the GC needs it. | |||
1789 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, | |||
1790 | false, PSPSlotOffset) | |||
1791 | .addReg(Establisher) | |||
1792 | .addMemOperand(MF.getMachineMemOperand( | |||
1793 | NoInfo, | |||
1794 | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, | |||
1795 | SlotSize, Align(SlotSize))); | |||
1796 | } | |||
1797 | SPOrEstablisher = Establisher; | |||
1798 | } else { | |||
1799 | SPOrEstablisher = StackPtr; | |||
1800 | } | |||
1801 | ||||
1802 | if (IsWin64Prologue && HasFP) { | |||
1803 | // Set RBP to a small fixed offset from RSP. In the funclet case, we base | |||
1804 | // this calculation on the incoming establisher, which holds the value of | |||
1805 | // RSP from the parent frame at the end of the prologue. | |||
1806 | SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); | |||
1807 | if (SEHFrameOffset) | |||
1808 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), | |||
1809 | SPOrEstablisher, false, SEHFrameOffset); | |||
1810 | else | |||
1811 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) | |||
1812 | .addReg(SPOrEstablisher); | |||
1813 | ||||
1814 | // If this is not a funclet, emit the CFI describing our frame pointer. | |||
1815 | if (NeedsWinCFI && !IsFunclet) { | |||
1816 | assert(!NeedsWinFPO && "this setframe incompatible with FPO data")((void)0); | |||
1817 | HasWinCFI = true; | |||
1818 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) | |||
1819 | .addImm(FramePtr) | |||
1820 | .addImm(SEHFrameOffset) | |||
1821 | .setMIFlag(MachineInstr::FrameSetup); | |||
1822 | if (isAsynchronousEHPersonality(Personality)) | |||
1823 | MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; | |||
1824 | } | |||
1825 | } else if (IsFunclet && STI.is32Bit()) { | |||
1826 | // Reset EBP / ESI to something good for funclets. | |||
1827 | MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); | |||
1828 | // If we're a catch funclet, we can be returned to via catchret. Save ESP | |||
1829 | // into the registration node so that the runtime will restore it for us. | |||
1830 | if (!MBB.isCleanupFuncletEntry()) { | |||
1831 | assert(Personality == EHPersonality::MSVC_CXX)((void)0); | |||
1832 | Register FrameReg; | |||
1833 | int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; | |||
1834 | int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); | |||
1835 | // ESP is the first field, so no extra displacement is needed. | |||
1836 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg, | |||
1837 | false, EHRegOffset) | |||
1838 | .addReg(X86::ESP); | |||
1839 | } | |||
1840 | } | |||
1841 | ||||
1842 | while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { | |||
1843 | const MachineInstr &FrameInstr = *MBBI; | |||
1844 | ++MBBI; | |||
1845 | ||||
1846 | if (NeedsWinCFI) { | |||
1847 | int FI; | |||
1848 | if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { | |||
1849 | if (X86::FR64RegClass.contains(Reg)) { | |||
1850 | int Offset; | |||
1851 | Register IgnoredFrameReg; | |||
1852 | if (IsWin64Prologue && IsFunclet) | |||
1853 | Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); | |||
1854 | else | |||
1855 | Offset = | |||
1856 | getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() + | |||
1857 | SEHFrameOffset; | |||
1858 | ||||
1859 | HasWinCFI = true; | |||
1860 | assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data")((void)0); | |||
1861 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) | |||
1862 | .addImm(Reg) | |||
1863 | .addImm(Offset) | |||
1864 | .setMIFlag(MachineInstr::FrameSetup); | |||
1865 | } | |||
1866 | } | |||
1867 | } | |||
1868 | } | |||
1869 | ||||
1870 | if (NeedsWinCFI && HasWinCFI) | |||
1871 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) | |||
1872 | .setMIFlag(MachineInstr::FrameSetup); | |||
1873 | ||||
1874 | if (FnHasClrFunclet && !IsFunclet) { | |||
1875 | // Save the so-called Initial-SP (i.e. the value of the stack pointer | |||
1876 | // immediately after the prolog) into the PSPSlot so that funclets | |||
1877 | // and the GC can recover it. | |||
1878 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); | |||
1879 | auto PSPInfo = MachinePointerInfo::getFixedStack( | |||
1880 | MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx); | |||
1881 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, | |||
1882 | PSPSlotOffset) | |||
1883 | .addReg(StackPtr) | |||
1884 | .addMemOperand(MF.getMachineMemOperand( | |||
1885 | PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, | |||
1886 | SlotSize, Align(SlotSize))); | |||
1887 | } | |||
1888 | ||||
1889 | // Realign stack after we spilled callee-saved registers (so that we'll be | |||
1890 | // able to calculate their offsets from the frame pointer). | |||
1891 | // Win64 requires aligning the stack after the prologue. | |||
1892 | if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { | |||
1893 | assert(HasFP && "There should be a frame pointer if stack is realigned.")((void)0); | |||
1894 | BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); | |||
1895 | } | |||
1896 | ||||
1897 | // We already dealt with stack realignment and funclets above. | |||
1898 | if (IsFunclet && STI.is32Bit()) | |||
1899 | return; | |||
1900 | ||||
1901 | // If we need a base pointer, set it up here. It's whatever the value | |||
1902 | // of the stack pointer is at this point. Any variable size objects | |||
1903 | // will be allocated after this, so we can still use the base pointer | |||
1904 | // to reference locals. | |||
1905 | if (TRI->hasBasePointer(MF)) { | |||
1906 | // Update the base pointer with the current stack pointer. | |||
1907 | unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; | |||
1908 | BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) | |||
1909 | .addReg(SPOrEstablisher) | |||
1910 | .setMIFlag(MachineInstr::FrameSetup); | |||
1911 | if (X86FI->getRestoreBasePointer()) { | |||
1912 | // Stash value of base pointer. Saving RSP instead of EBP shortens | |||
1913 | // dependence chain. Used by SjLj EH. | |||
1914 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; | |||
1915 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), | |||
1916 | FramePtr, true, X86FI->getRestoreBasePointerOffset()) | |||
1917 | .addReg(SPOrEstablisher) | |||
1918 | .setMIFlag(MachineInstr::FrameSetup); | |||
1919 | } | |||
1920 | ||||
1921 | if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { | |||
1922 | // Stash the value of the frame pointer relative to the base pointer for | |||
1923 | // Win32 EH. This supports Win32 EH, which does the inverse of the above: | |||
1924 | // it recovers the frame pointer from the base pointer rather than the | |||
1925 | // other way around. | |||
1926 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; | |||
1927 | Register UsedReg; | |||
1928 | int Offset = | |||
1929 | getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) | |||
1930 | .getFixed(); | |||
1931 | assert(UsedReg == BasePtr)((void)0); | |||
1932 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) | |||
1933 | .addReg(FramePtr) | |||
1934 | .setMIFlag(MachineInstr::FrameSetup); | |||
1935 | } | |||
1936 | } | |||
1937 | ||||
1938 | if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { | |||
1939 | // Mark end of stack pointer adjustment. | |||
1940 | if (!HasFP && NumBytes) { | |||
1941 | // Define the current CFA rule to use the provided offset. | |||
1942 | assert(StackSize)((void)0); | |||
1943 | BuildCFI( | |||
1944 | MBB, MBBI, DL, | |||
1945 | MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth)); | |||
1946 | } | |||
1947 | ||||
1948 | // Emit DWARF info specifying the offsets of the callee-saved registers. | |||
1949 | emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); | |||
1950 | } | |||
1951 | ||||
1952 | // X86 Interrupt handling function cannot assume anything about the direction | |||
1953 | // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction | |||
1954 | // in each prologue of interrupt handler function. | |||
1955 | // | |||
1956 | // FIXME: Create "cld" instruction only in these cases: | |||
1957 | // 1. The interrupt handling function uses any of the "rep" instructions. | |||
1958 | // 2. Interrupt handling function calls another function. | |||
1959 | // | |||
1960 | if (Fn.getCallingConv() == CallingConv::X86_INTR) | |||
1961 | BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) | |||
1962 | .setMIFlag(MachineInstr::FrameSetup); | |||
1963 | ||||
1964 | // At this point we know if the function has WinCFI or not. | |||
1965 | MF.setHasWinCFI(HasWinCFI); | |||
1966 | } | |||
1967 | ||||
1968 | bool X86FrameLowering::canUseLEAForSPInEpilogue( | |||
1969 | const MachineFunction &MF) const { | |||
1970 | // We can't use LEA instructions for adjusting the stack pointer if we don't | |||
1971 | // have a frame pointer in the Win64 ABI. Only ADD instructions may be used | |||
1972 | // to deallocate the stack. | |||
1973 | // This means that we can use LEA for SP in two situations: | |||
1974 | // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. | |||
1975 | // 2. We *have* a frame pointer which means we are permitted to use LEA. | |||
1976 | return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); | |||
1977 | } | |||
1978 | ||||
1979 | static bool isFuncletReturnInstr(MachineInstr &MI) { | |||
1980 | switch (MI.getOpcode()) { | |||
1981 | case X86::CATCHRET: | |||
1982 | case X86::CLEANUPRET: | |||
1983 | return true; | |||
1984 | default: | |||
1985 | return false; | |||
1986 | } | |||
1987 | llvm_unreachable("impossible")__builtin_unreachable(); | |||
1988 | } | |||
1989 | ||||
1990 | // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the | |||
1991 | // stack. It holds a pointer to the bottom of the root function frame. The | |||
1992 | // establisher frame pointer passed to a nested funclet may point to the | |||
1993 | // (mostly empty) frame of its parent funclet, but it will need to find | |||
1994 | // the frame of the root function to access locals. To facilitate this, | |||
1995 | // every funclet copies the pointer to the bottom of the root function | |||
1996 | // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the | |||
1997 | // same offset for the PSPSym in the root function frame that's used in the | |||
1998 | // funclets' frames allows each funclet to dynamically accept any ancestor | |||
1999 | // frame as its establisher argument (the runtime doesn't guarantee the | |||
2000 | // immediate parent for some reason lost to history), and also allows the GC, | |||
2001 | // which uses the PSPSym for some bookkeeping, to find it in any funclet's | |||
2002 | // frame with only a single offset reported for the entire method. | |||
2003 | unsigned | |||
2004 | X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { | |||
2005 | const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); | |||
2006 | Register SPReg; | |||
2007 | int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg, | |||
2008 | /*IgnoreSPUpdates*/ true) | |||
2009 | .getFixed(); | |||
2010 | assert(Offset >= 0 && SPReg == TRI->getStackRegister())((void)0); | |||
2011 | return static_cast<unsigned>(Offset); | |||
2012 | } | |||
2013 | ||||
2014 | unsigned | |||
2015 | X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { | |||
2016 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
2017 | // This is the size of the pushed CSRs. | |||
2018 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); | |||
2019 | // This is the size of callee saved XMMs. | |||
2020 | const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); | |||
2021 | unsigned XMMSize = WinEHXMMSlotInfo.size() * | |||
2022 | TRI->getSpillSize(X86::VR128RegClass); | |||
2023 | // This is the amount of stack a funclet needs to allocate. | |||
2024 | unsigned UsedSize; | |||
2025 | EHPersonality Personality = | |||
2026 | classifyEHPersonality(MF.getFunction().getPersonalityFn()); | |||
2027 | if (Personality == EHPersonality::CoreCLR) { | |||
2028 | // CLR funclets need to hold enough space to include the PSPSym, at the | |||
2029 | // same offset from the stack pointer (immediately after the prolog) as it | |||
2030 | // resides at in the main function. | |||
2031 | UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; | |||
2032 | } else { | |||
2033 | // Other funclets just need enough stack for outgoing call arguments. | |||
2034 | UsedSize = MF.getFrameInfo().getMaxCallFrameSize(); | |||
2035 | } | |||
2036 | // RBP is not included in the callee saved register block. After pushing RBP, | |||
2037 | // everything is 16 byte aligned. Everything we allocate before an outgoing | |||
2038 | // call must also be 16 byte aligned. | |||
2039 | unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign()); | |||
2040 | // Subtract out the size of the callee saved registers. This is how much stack | |||
2041 | // each funclet will allocate. | |||
2042 | return FrameSizeMinusRBP + XMMSize - CSSize; | |||
2043 | } | |||
2044 | ||||
2045 | static bool isTailCallOpcode(unsigned Opc) { | |||
2046 | return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || | |||
2047 | Opc == X86::TCRETURNmi || | |||
2048 | Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 || | |||
2049 | Opc == X86::TCRETURNmi64; | |||
2050 | } | |||
2051 | ||||
2052 | void X86FrameLowering::emitEpilogue(MachineFunction &MF, | |||
2053 | MachineBasicBlock &MBB) const { | |||
2054 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2055 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
2056 | MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); | |||
2057 | MachineBasicBlock::iterator MBBI = Terminator; | |||
2058 | DebugLoc DL; | |||
2059 | if (MBBI != MBB.end()) | |||
2060 | DL = MBBI->getDebugLoc(); | |||
2061 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. | |||
2062 | const bool Is64BitILP32 = STI.isTarget64BitILP32(); | |||
2063 | Register FramePtr = TRI->getFrameRegister(MF); | |||
2064 | Register MachineFramePtr = | |||
2065 | Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; | |||
2066 | ||||
2067 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
2068 | bool NeedsWin64CFI = | |||
2069 | IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); | |||
2070 | bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); | |||
2071 | ||||
2072 | // Get the number of bytes to allocate from the FrameInfo. | |||
2073 | uint64_t StackSize = MFI.getStackSize(); | |||
2074 | uint64_t MaxAlign = calculateMaxStackAlign(MF); | |||
2075 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); | |||
2076 | bool HasFP = hasFP(MF); | |||
2077 | uint64_t NumBytes = 0; | |||
2078 | ||||
2079 | bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() && | |||
2080 | !MF.getTarget().getTargetTriple().isOSWindows()) && | |||
2081 | MF.needsFrameMoves(); | |||
2082 | ||||
2083 | if (IsFunclet) { | |||
2084 | assert(HasFP && "EH funclets without FP not yet implemented")((void)0); | |||
2085 | NumBytes = getWinEHFuncletFrameSize(MF); | |||
2086 | } else if (HasFP) { | |||
2087 | // Calculate required stack adjustment. | |||
2088 | uint64_t FrameSize = StackSize - SlotSize; | |||
2089 | NumBytes = FrameSize - CSSize; | |||
2090 | ||||
2091 | // Callee-saved registers were pushed on stack before the stack was | |||
2092 | // realigned. | |||
2093 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) | |||
2094 | NumBytes = alignTo(FrameSize, MaxAlign); | |||
2095 | } else { | |||
2096 | NumBytes = StackSize - CSSize; | |||
2097 | } | |||
2098 | uint64_t SEHStackAllocAmt = NumBytes; | |||
2099 | ||||
2100 | // AfterPop is the position to insert .cfi_restore. | |||
2101 | MachineBasicBlock::iterator AfterPop = MBBI; | |||
2102 | if (HasFP) { | |||
2103 | if (X86FI->hasSwiftAsyncContext()) { | |||
2104 | // Discard the context. | |||
2105 | int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); | |||
2106 | emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true); | |||
2107 | } | |||
2108 | ||||
2109 | if (X86FI->getSaveArgSize()) { | |||
2110 | // LEAVE is effectively mov rbp,rsp; pop rbp | |||
2111 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64)) | |||
2112 | .setMIFlag(MachineInstr::FrameDestroy); | |||
2113 | } else { | |||
2114 | // Pop EBP. | |||
2115 | BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), | |||
2116 | MachineFramePtr) | |||
2117 | .setMIFlag(MachineInstr::FrameDestroy); | |||
2118 | } | |||
2119 | ||||
2120 | // We need to reset FP to its untagged state on return. Bit 60 is currently | |||
2121 | // used to show the presence of an extended frame. | |||
2122 | if (X86FI->hasSwiftAsyncContext()) { | |||
2123 | BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), | |||
2124 | MachineFramePtr) | |||
2125 | .addUse(MachineFramePtr) | |||
2126 | .addImm(60) | |||
2127 | .setMIFlag(MachineInstr::FrameDestroy); | |||
2128 | } | |||
2129 | ||||
2130 | if (NeedsDwarfCFI) { | |||
2131 | unsigned DwarfStackPtr = | |||
2132 | TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); | |||
2133 | BuildCFI(MBB, MBBI, DL, | |||
2134 | MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize)); | |||
2135 | if (!MBB.succ_empty() && !MBB.isReturnBlock()) { | |||
2136 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); | |||
2137 | BuildCFI(MBB, AfterPop, DL, | |||
2138 | MCCFIInstruction::createRestore(nullptr, DwarfFramePtr)); | |||
2139 | --MBBI; | |||
2140 | --AfterPop; | |||
2141 | } | |||
2142 | --MBBI; | |||
2143 | } | |||
2144 | } | |||
2145 | ||||
2146 | MachineBasicBlock::iterator FirstCSPop = MBBI; | |||
2147 | // Skip the callee-saved pop instructions. | |||
2148 | while (MBBI != MBB.begin()) { | |||
2149 | MachineBasicBlock::iterator PI = std::prev(MBBI); | |||
2150 | unsigned Opc = PI->getOpcode(); | |||
2151 | ||||
2152 | if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { | |||
2153 | if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
2154 | (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
2155 | (Opc != X86::LEAVE64 || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
2156 | (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
2157 | (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) | |||
2158 | break; | |||
2159 | FirstCSPop = PI; | |||
2160 | } | |||
2161 | ||||
2162 | --MBBI; | |||
2163 | } | |||
2164 | MBBI = FirstCSPop; | |||
2165 | ||||
2166 | if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) | |||
2167 | emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator); | |||
2168 | ||||
2169 | if (MBBI != MBB.end()) | |||
2170 | DL = MBBI->getDebugLoc(); | |||
2171 | ||||
2172 | // If there is an ADD32ri or SUB32ri of ESP immediately before this | |||
2173 | // instruction, merge the two instructions. | |||
2174 | if (NumBytes || MFI.hasVarSizedObjects()) | |||
2175 | NumBytes += mergeSPUpdates(MBB, MBBI, true); | |||
2176 | ||||
2177 | // If dynamic alloca is used, then reset esp to point to the last callee-saved | |||
2178 | // slot before popping them off! Same applies for the case, when stack was | |||
2179 | // realigned. Don't do this if this was a funclet epilogue, since the funclets | |||
2180 | // will not do realignment or dynamic stack allocation. | |||
2181 | if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) && | |||
2182 | !IsFunclet) { | |||
2183 | if (TRI->hasStackRealignment(MF)) | |||
2184 | MBBI = FirstCSPop; | |||
2185 | unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); | |||
2186 | uint64_t LEAAmount = | |||
2187 | IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; | |||
2188 | ||||
2189 | if (X86FI->hasSwiftAsyncContext()) | |||
2190 | LEAAmount -= 16; | |||
2191 | ||||
2192 | // There are only two legal forms of epilogue: | |||
2193 | // - add SEHAllocationSize, %rsp | |||
2194 | // - lea SEHAllocationSize(%FramePtr), %rsp | |||
2195 | // | |||
2196 | // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. | |||
2197 | // However, we may use this sequence if we have a frame pointer because the | |||
2198 | // effects of the prologue can safely be undone. | |||
2199 | if (LEAAmount != 0) { | |||
2200 | unsigned Opc = getLEArOpcode(Uses64BitFramePtr); | |||
2201 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), | |||
2202 | FramePtr, false, LEAAmount); | |||
2203 | --MBBI; | |||
2204 | } else { | |||
2205 | unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); | |||
2206 | BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
2207 | .addReg(FramePtr); | |||
2208 | --MBBI; | |||
2209 | } | |||
2210 | } else if (NumBytes) { | |||
2211 | // Adjust stack pointer back: ESP += numbytes. | |||
2212 | emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); | |||
2213 | if (!hasFP(MF) && NeedsDwarfCFI) { | |||
2214 | // Define the current CFA rule to use the provided offset. | |||
2215 | BuildCFI(MBB, MBBI, DL, | |||
2216 | MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize)); | |||
2217 | } | |||
2218 | --MBBI; | |||
2219 | } | |||
2220 | ||||
2221 | // Windows unwinder will not invoke function's exception handler if IP is | |||
2222 | // either in prologue or in epilogue. This behavior causes a problem when a | |||
2223 | // call immediately precedes an epilogue, because the return address points | |||
2224 | // into the epilogue. To cope with that, we insert an epilogue marker here, | |||
2225 | // then replace it with a 'nop' if it ends up immediately after a CALL in the | |||
2226 | // final emitted code. | |||
2227 | if (NeedsWin64CFI && MF.hasWinCFI()) | |||
2228 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); | |||
2229 | ||||
2230 | if (!hasFP(MF) && NeedsDwarfCFI) { | |||
2231 | MBBI = FirstCSPop; | |||
2232 | int64_t Offset = -CSSize - SlotSize; | |||
2233 | // Mark callee-saved pop instruction. | |||
2234 | // Define the current CFA rule to use the provided offset. | |||
2235 | while (MBBI != MBB.end()) { | |||
2236 | MachineBasicBlock::iterator PI = MBBI; | |||
2237 | unsigned Opc = PI->getOpcode(); | |||
2238 | ++MBBI; | |||
2239 | if (Opc == X86::POP32r || Opc == X86::POP64r) { | |||
2240 | Offset += SlotSize; | |||
2241 | BuildCFI(MBB, MBBI, DL, | |||
2242 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); | |||
2243 | } | |||
2244 | } | |||
2245 | } | |||
2246 | ||||
2247 | // Emit DWARF info specifying the restores of the callee-saved registers. | |||
2248 | // For epilogue with return inside or being other block without successor, | |||
2249 | // no need to generate .cfi_restore for callee-saved registers. | |||
2250 | if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) { | |||
2251 | emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false); | |||
2252 | } | |||
2253 | ||||
2254 | if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { | |||
2255 | // Add the return addr area delta back since we are not tail calling. | |||
2256 | int Offset = -1 * X86FI->getTCReturnAddrDelta(); | |||
2257 | assert(Offset >= 0 && "TCDelta should never be positive")((void)0); | |||
2258 | if (Offset) { | |||
2259 | // Check for possible merge with preceding ADD instruction. | |||
2260 | Offset += mergeSPUpdates(MBB, Terminator, true); | |||
2261 | emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true); | |||
2262 | } | |||
2263 | } | |||
2264 | ||||
2265 | // Emit tilerelease for AMX kernel. | |||
2266 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
2267 | const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); | |||
2268 | for (unsigned I = 0; I < RC->getNumRegs(); I++) | |||
2269 | if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) { | |||
2270 | BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); | |||
2271 | break; | |||
2272 | } | |||
2273 | } | |||
2274 | ||||
2275 | StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, | |||
2276 | int FI, | |||
2277 | Register &FrameReg) const { | |||
2278 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2279 | ||||
2280 | bool IsFixed = MFI.isFixedObjectIndex(FI); | |||
2281 | // We can't calculate offset from frame pointer if the stack is realigned, | |||
2282 | // so enforce usage of stack/base pointer. The base pointer is used when we | |||
2283 | // have dynamic allocas in addition to dynamic realignment. | |||
2284 | if (TRI->hasBasePointer(MF)) | |||
2285 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); | |||
2286 | else if (TRI->hasStackRealignment(MF)) | |||
2287 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); | |||
2288 | else | |||
2289 | FrameReg = TRI->getFrameRegister(MF); | |||
2290 | ||||
2291 | // Offset will hold the offset from the stack pointer at function entry to the | |||
2292 | // object. | |||
2293 | // We need to factor in additional offsets applied during the prologue to the | |||
2294 | // frame, base, and stack pointer depending on which is used. | |||
2295 | int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); | |||
2296 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
2297 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); | |||
2298 | uint64_t StackSize = MFI.getStackSize(); | |||
2299 | bool HasFP = hasFP(MF); | |||
2300 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
2301 | int64_t FPDelta = 0; | |||
2302 | ||||
2303 | // In an x86 interrupt, remove the offset we added to account for the return | |||
2304 | // address from any stack object allocated in the caller's frame. Interrupts | |||
2305 | // do not have a standard return address. Fixed objects in the current frame, | |||
2306 | // such as SSE register spills, should not get this treatment. | |||
2307 | if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && | |||
2308 | Offset >= 0) { | |||
2309 | Offset += getOffsetOfLocalArea(); | |||
2310 | } | |||
2311 | ||||
2312 | if (IsWin64Prologue) { | |||
2313 | assert(!MFI.hasCalls() || (StackSize % 16) == 8)((void)0); | |||
2314 | ||||
2315 | // Calculate required stack adjustment. | |||
2316 | uint64_t FrameSize = StackSize - SlotSize; | |||
2317 | // If required, include space for extra hidden slot for stashing base pointer. | |||
2318 | if (X86FI->getRestoreBasePointer()) | |||
2319 | FrameSize += SlotSize; | |||
2320 | uint64_t NumBytes = FrameSize - CSSize; | |||
2321 | ||||
2322 | uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); | |||
2323 | if (FI && FI == X86FI->getFAIndex()) | |||
2324 | return StackOffset::getFixed(-SEHFrameOffset); | |||
2325 | ||||
2326 | // FPDelta is the offset from the "traditional" FP location of the old base | |||
2327 | // pointer followed by return address and the location required by the | |||
2328 | // restricted Win64 prologue. | |||
2329 | // Add FPDelta to all offsets below that go through the frame pointer. | |||
2330 | FPDelta = FrameSize - SEHFrameOffset; | |||
2331 | assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&((void)0) | |||
2332 | "FPDelta isn't aligned per the Win64 ABI!")((void)0); | |||
2333 | } | |||
2334 | ||||
2335 | if (FI >= 0) | |||
2336 | Offset -= X86FI->getSaveArgSize(); | |||
2337 | ||||
2338 | if (TRI->hasBasePointer(MF)) { | |||
2339 | assert(HasFP && "VLAs and dynamic stack realign, but no FP?!")((void)0); | |||
2340 | if (FI < 0) { | |||
2341 | // Skip the saved EBP. | |||
2342 | return StackOffset::getFixed(Offset + SlotSize + FPDelta); | |||
2343 | } else { | |||
2344 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)))((void)0); | |||
2345 | return StackOffset::getFixed(Offset + StackSize); | |||
2346 | } | |||
2347 | } else if (TRI->hasStackRealignment(MF)) { | |||
2348 | if (FI < 0) { | |||
2349 | // Skip the saved EBP. | |||
2350 | return StackOffset::getFixed(Offset + SlotSize + FPDelta); | |||
2351 | } else { | |||
2352 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)))((void)0); | |||
2353 | return StackOffset::getFixed(Offset + StackSize); | |||
2354 | } | |||
2355 | // FIXME: Support tail calls | |||
2356 | } else { | |||
2357 | if (!HasFP) | |||
2358 | return StackOffset::getFixed(Offset + StackSize); | |||
2359 | ||||
2360 | // Skip the saved EBP. | |||
2361 | Offset += SlotSize; | |||
2362 | ||||
2363 | // Skip the RETADDR move area | |||
2364 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); | |||
2365 | if (TailCallReturnAddrDelta < 0) | |||
2366 | Offset -= TailCallReturnAddrDelta; | |||
2367 | } | |||
2368 | ||||
2369 | return StackOffset::getFixed(Offset + FPDelta); | |||
2370 | } | |||
2371 | ||||
2372 | int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, | |||
2373 | Register &FrameReg) const { | |||
2374 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2375 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
2376 | const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); | |||
2377 | const auto it = WinEHXMMSlotInfo.find(FI); | |||
2378 | ||||
2379 | if (it == WinEHXMMSlotInfo.end()) | |||
2380 | return getFrameIndexReference(MF, FI, FrameReg).getFixed(); | |||
2381 | ||||
2382 | FrameReg = TRI->getStackRegister(); | |||
2383 | return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) + | |||
2384 | it->second; | |||
2385 | } | |||
2386 | ||||
2387 | StackOffset | |||
2388 | X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, | |||
2389 | Register &FrameReg, | |||
2390 | int Adjustment) const { | |||
2391 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2392 | FrameReg = TRI->getStackRegister(); | |||
2393 | return StackOffset::getFixed(MFI.getObjectOffset(FI) - | |||
2394 | getOffsetOfLocalArea() + Adjustment); | |||
2395 | } | |||
2396 | ||||
2397 | StackOffset | |||
2398 | X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, | |||
2399 | int FI, Register &FrameReg, | |||
2400 | bool IgnoreSPUpdates) const { | |||
2401 | ||||
2402 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2403 | // Does not include any dynamic realign. | |||
2404 | const uint64_t StackSize = MFI.getStackSize(); | |||
2405 | // LLVM arranges the stack as follows: | |||
2406 | // ... | |||
2407 | // ARG2 | |||
2408 | // ARG1 | |||
2409 | // RETADDR | |||
2410 | // PUSH RBP <-- RBP points here | |||
2411 | // PUSH CSRs | |||
2412 | // ~~~~~~~ <-- possible stack realignment (non-win64) | |||
2413 | // ... | |||
2414 | // STACK OBJECTS | |||
2415 | // ... <-- RSP after prologue points here | |||
2416 | // ~~~~~~~ <-- possible stack realignment (win64) | |||
2417 | // | |||
2418 | // if (hasVarSizedObjects()): | |||
2419 | // ... <-- "base pointer" (ESI/RBX) points here | |||
2420 | // DYNAMIC ALLOCAS | |||
2421 | // ... <-- RSP points here | |||
2422 | // | |||
2423 | // Case 1: In the simple case of no stack realignment and no dynamic | |||
2424 | // allocas, both "fixed" stack objects (arguments and CSRs) are addressable | |||
2425 | // with fixed offsets from RSP. | |||
2426 | // | |||
2427 | // Case 2: In the case of stack realignment with no dynamic allocas, fixed | |||
2428 | // stack objects are addressed with RBP and regular stack objects with RSP. | |||
2429 | // | |||
2430 | // Case 3: In the case of dynamic allocas and stack realignment, RSP is used | |||
2431 | // to address stack arguments for outgoing calls and nothing else. The "base | |||
2432 | // pointer" points to local variables, and RBP points to fixed objects. | |||
2433 | // | |||
2434 | // In cases 2 and 3, we can only answer for non-fixed stack objects, and the | |||
2435 | // answer we give is relative to the SP after the prologue, and not the | |||
2436 | // SP in the middle of the function. | |||
2437 | ||||
2438 | if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) && | |||
2439 | !STI.isTargetWin64()) | |||
2440 | return getFrameIndexReference(MF, FI, FrameReg); | |||
2441 | ||||
2442 | // If !hasReservedCallFrame the function might have SP adjustement in the | |||
2443 | // body. So, even though the offset is statically known, it depends on where | |||
2444 | // we are in the function. | |||
2445 | if (!IgnoreSPUpdates && !hasReservedCallFrame(MF)) | |||
2446 | return getFrameIndexReference(MF, FI, FrameReg); | |||
2447 | ||||
2448 | // We don't handle tail calls, and shouldn't be seeing them either. | |||
2449 | assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&((void)0) | |||
2450 | "we don't handle this case!")((void)0); | |||
2451 | ||||
2452 | // This is how the math works out: | |||
2453 | // | |||
2454 | // %rsp grows (i.e. gets lower) left to right. Each box below is | |||
2455 | // one word (eight bytes). Obj0 is the stack slot we're trying to | |||
2456 | // get to. | |||
2457 | // | |||
2458 | // ---------------------------------- | |||
2459 | // | BP | Obj0 | Obj1 | ... | ObjN | | |||
2460 | // ---------------------------------- | |||
2461 | // ^ ^ ^ ^ | |||
2462 | // A B C E | |||
2463 | // | |||
2464 | // A is the incoming stack pointer. | |||
2465 | // (B - A) is the local area offset (-8 for x86-64) [1] | |||
2466 | // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2] | |||
2467 | // | |||
2468 | // |(E - B)| is the StackSize (absolute value, positive). For a | |||
2469 | // stack that grown down, this works out to be (B - E). [3] | |||
2470 | // | |||
2471 | // E is also the value of %rsp after stack has been set up, and we | |||
2472 | // want (C - E) -- the value we can add to %rsp to get to Obj0. Now | |||
2473 | // (C - E) == (C - A) - (B - A) + (B - E) | |||
2474 | // { Using [1], [2] and [3] above } | |||
2475 | // == getObjectOffset - LocalAreaOffset + StackSize | |||
2476 | ||||
2477 | return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); | |||
2478 | } | |||
2479 | ||||
2480 | bool X86FrameLowering::assignCalleeSavedSpillSlots( | |||
2481 | MachineFunction &MF, const TargetRegisterInfo *TRI, | |||
2482 | std::vector<CalleeSavedInfo> &CSI) const { | |||
2483 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2484 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
2485 | ||||
2486 | unsigned CalleeSavedFrameSize = 0; | |||
2487 | unsigned XMMCalleeSavedFrameSize = 0; | |||
2488 | auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); | |||
2489 | int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); | |||
2490 | ||||
2491 | int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); | |||
2492 | ||||
2493 | if (TailCallReturnAddrDelta < 0) { | |||
2494 | // create RETURNADDR area | |||
2495 | // arg | |||
2496 | // arg | |||
2497 | // RETADDR | |||
2498 | // { ... | |||
2499 | // RETADDR area | |||
2500 | // ... | |||
2501 | // } | |||
2502 | // [EBP] | |||
2503 | MFI.CreateFixedObject(-TailCallReturnAddrDelta, | |||
2504 | TailCallReturnAddrDelta - SlotSize, true); | |||
2505 | } | |||
2506 | ||||
2507 | // Spill the BasePtr if it's used. | |||
2508 | if (this->TRI->hasBasePointer(MF)) { | |||
2509 | // Allocate a spill slot for EBP if we have a base pointer and EH funclets. | |||
2510 | if (MF.hasEHFunclets()) { | |||
2511 | int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); | |||
2512 | X86FI->setHasSEHFramePtrSave(true); | |||
2513 | X86FI->setSEHFramePtrSaveIndex(FI); | |||
2514 | } | |||
2515 | } | |||
2516 | ||||
2517 | if (hasFP(MF)) { | |||
2518 | // emitPrologue always spills frame register the first thing. | |||
2519 | SpillSlotOffset -= SlotSize; | |||
2520 | MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); | |||
2521 | ||||
2522 | // The async context lives directly before the frame pointer, and we | |||
2523 | // allocate a second slot to preserve stack alignment. | |||
2524 | if (X86FI->hasSwiftAsyncContext()) { | |||
2525 | SpillSlotOffset -= SlotSize; | |||
2526 | MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); | |||
2527 | SpillSlotOffset -= SlotSize; | |||
2528 | } | |||
2529 | ||||
2530 | // Since emitPrologue and emitEpilogue will handle spilling and restoring of | |||
2531 | // the frame register, we can delete it from CSI list and not have to worry | |||
2532 | // about avoiding it later. | |||
2533 | Register FPReg = TRI->getFrameRegister(MF); | |||
2534 | for (unsigned i = 0; i < CSI.size(); ++i) { | |||
2535 | if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { | |||
2536 | CSI.erase(CSI.begin() + i); | |||
2537 | break; | |||
2538 | } | |||
2539 | } | |||
2540 | } | |||
2541 | ||||
2542 | // Assign slots for GPRs. It increases frame size. | |||
2543 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
2544 | unsigned Reg = CSI[i - 1].getReg(); | |||
2545 | ||||
2546 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) | |||
2547 | continue; | |||
2548 | ||||
2549 | SpillSlotOffset -= SlotSize; | |||
2550 | CalleeSavedFrameSize += SlotSize; | |||
2551 | ||||
2552 | int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); | |||
2553 | CSI[i - 1].setFrameIdx(SlotIndex); | |||
2554 | } | |||
2555 | ||||
2556 | X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); | |||
2557 | MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize); | |||
2558 | ||||
2559 | // Assign slots for XMMs. | |||
2560 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
2561 | unsigned Reg = CSI[i - 1].getReg(); | |||
2562 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) | |||
2563 | continue; | |||
2564 | ||||
2565 | // If this is k-register make sure we lookup via the largest legal type. | |||
2566 | MVT VT = MVT::Other; | |||
2567 | if (X86::VK16RegClass.contains(Reg)) | |||
2568 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; | |||
2569 | ||||
2570 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); | |||
2571 | unsigned Size = TRI->getSpillSize(*RC); | |||
2572 | Align Alignment = TRI->getSpillAlign(*RC); | |||
2573 | // ensure alignment | |||
2574 | assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86")((void)0); | |||
2575 | SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment); | |||
2576 | ||||
2577 | // spill into slot | |||
2578 | SpillSlotOffset -= Size; | |||
2579 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); | |||
2580 | CSI[i - 1].setFrameIdx(SlotIndex); | |||
2581 | MFI.ensureMaxAlignment(Alignment); | |||
2582 | ||||
2583 | // Save the start offset and size of XMM in stack frame for funclets. | |||
2584 | if (X86::VR128RegClass.contains(Reg)) { | |||
2585 | WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; | |||
2586 | XMMCalleeSavedFrameSize += Size; | |||
2587 | } | |||
2588 | } | |||
2589 | ||||
2590 | return true; | |||
2591 | } | |||
2592 | ||||
2593 | bool X86FrameLowering::spillCalleeSavedRegisters( | |||
2594 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, | |||
2595 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { | |||
2596 | DebugLoc DL = MBB.findDebugLoc(MI); | |||
2597 | ||||
2598 | // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI | |||
2599 | // for us, and there are no XMM CSRs on Win32. | |||
2600 | if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) | |||
2601 | return true; | |||
2602 | ||||
2603 | // Push GPRs. It increases frame size. | |||
2604 | const MachineFunction &MF = *MBB.getParent(); | |||
2605 | unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; | |||
2606 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
2607 | unsigned Reg = CSI[i - 1].getReg(); | |||
2608 | ||||
2609 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) | |||
2610 | continue; | |||
2611 | ||||
2612 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
2613 | bool isLiveIn = MRI.isLiveIn(Reg); | |||
2614 | if (!isLiveIn) | |||
2615 | MBB.addLiveIn(Reg); | |||
2616 | ||||
2617 | // Decide whether we can add a kill flag to the use. | |||
2618 | bool CanKill = !isLiveIn; | |||
2619 | // Check if any subregister is live-in | |||
2620 | if (CanKill) { | |||
2621 | for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) { | |||
2622 | if (MRI.isLiveIn(*AReg)) { | |||
2623 | CanKill = false; | |||
2624 | break; | |||
2625 | } | |||
2626 | } | |||
2627 | } | |||
2628 | ||||
2629 | // Do not set a kill flag on values that are also marked as live-in. This | |||
2630 | // happens with the @llvm-returnaddress intrinsic and with arguments | |||
2631 | // passed in callee saved registers. | |||
2632 | // Omitting the kill flags is conservatively correct even if the live-in | |||
2633 | // is not used after all. | |||
2634 | BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill)) | |||
2635 | .setMIFlag(MachineInstr::FrameSetup); | |||
2636 | } | |||
2637 | ||||
2638 | // Make XMM regs spilled. X86 does not have ability of push/pop XMM. | |||
2639 | // It can be done by spilling XMMs to stack frame. | |||
2640 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
2641 | unsigned Reg = CSI[i-1].getReg(); | |||
2642 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) | |||
2643 | continue; | |||
2644 | ||||
2645 | // If this is k-register make sure we lookup via the largest legal type. | |||
2646 | MVT VT = MVT::Other; | |||
2647 | if (X86::VK16RegClass.contains(Reg)) | |||
2648 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; | |||
2649 | ||||
2650 | // Add the callee-saved register as live-in. It's killed at the spill. | |||
2651 | MBB.addLiveIn(Reg); | |||
2652 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); | |||
2653 | ||||
2654 | TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, | |||
2655 | TRI); | |||
2656 | --MI; | |||
2657 | MI->setFlag(MachineInstr::FrameSetup); | |||
2658 | ++MI; | |||
2659 | } | |||
2660 | ||||
2661 | return true; | |||
2662 | } | |||
2663 | ||||
2664 | void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, | |||
2665 | MachineBasicBlock::iterator MBBI, | |||
2666 | MachineInstr *CatchRet) const { | |||
2667 | // SEH shouldn't use catchret. | |||
2668 | assert(!isAsynchronousEHPersonality(classifyEHPersonality(((void)0) | |||
2669 | MBB.getParent()->getFunction().getPersonalityFn())) &&((void)0) | |||
2670 | "SEH should not use CATCHRET")((void)0); | |||
2671 | const DebugLoc &DL = CatchRet->getDebugLoc(); | |||
2672 | MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB(); | |||
2673 | ||||
2674 | // Fill EAX/RAX with the address of the target block. | |||
2675 | if (STI.is64Bit()) { | |||
2676 | // LEA64r CatchRetTarget(%rip), %rax | |||
2677 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX) | |||
2678 | .addReg(X86::RIP) | |||
2679 | .addImm(0) | |||
2680 | .addReg(0) | |||
2681 | .addMBB(CatchRetTarget) | |||
2682 | .addReg(0); | |||
2683 | } else { | |||
2684 | // MOV32ri $CatchRetTarget, %eax | |||
2685 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) | |||
2686 | .addMBB(CatchRetTarget); | |||
2687 | } | |||
2688 | ||||
2689 | // Record that we've taken the address of CatchRetTarget and no longer just | |||
2690 | // reference it in a terminator. | |||
2691 | CatchRetTarget->setHasAddressTaken(); | |||
2692 | } | |||
2693 | ||||
2694 | bool X86FrameLowering::restoreCalleeSavedRegisters( | |||
2695 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, | |||
2696 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { | |||
2697 | if (CSI.empty()) | |||
2698 | return false; | |||
2699 | ||||
2700 | if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) { | |||
2701 | // Don't restore CSRs in 32-bit EH funclets. Matches | |||
2702 | // spillCalleeSavedRegisters. | |||
2703 | if (STI.is32Bit()) | |||
2704 | return true; | |||
2705 | // Don't restore CSRs before an SEH catchret. SEH except blocks do not form | |||
2706 | // funclets. emitEpilogue transforms these to normal jumps. | |||
2707 | if (MI->getOpcode() == X86::CATCHRET) { | |||
2708 | const Function &F = MBB.getParent()->getFunction(); | |||
2709 | bool IsSEH = isAsynchronousEHPersonality( | |||
2710 | classifyEHPersonality(F.getPersonalityFn())); | |||
2711 | if (IsSEH) | |||
2712 | return true; | |||
2713 | } | |||
2714 | } | |||
2715 | ||||
2716 | DebugLoc DL = MBB.findDebugLoc(MI); | |||
2717 | ||||
2718 | // Reload XMMs from stack frame. | |||
2719 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
2720 | unsigned Reg = CSI[i].getReg(); | |||
2721 | if (X86::GR64RegClass.contains(Reg) || | |||
2722 | X86::GR32RegClass.contains(Reg)) | |||
2723 | continue; | |||
2724 | ||||
2725 | // If this is k-register make sure we lookup via the largest legal type. | |||
2726 | MVT VT = MVT::Other; | |||
2727 | if (X86::VK16RegClass.contains(Reg)) | |||
2728 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; | |||
2729 | ||||
2730 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); | |||
2731 | TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); | |||
2732 | } | |||
2733 | ||||
2734 | // POP GPRs. | |||
2735 | unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; | |||
2736 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
2737 | unsigned Reg = CSI[i].getReg(); | |||
2738 | if (!X86::GR64RegClass.contains(Reg) && | |||
2739 | !X86::GR32RegClass.contains(Reg)) | |||
2740 | continue; | |||
2741 | ||||
2742 | BuildMI(MBB, MI, DL, TII.get(Opc), Reg) | |||
2743 | .setMIFlag(MachineInstr::FrameDestroy); | |||
2744 | } | |||
2745 | return true; | |||
2746 | } | |||
2747 | ||||
2748 | void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, | |||
2749 | BitVector &SavedRegs, | |||
2750 | RegScavenger *RS) const { | |||
2751 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); | |||
2752 | ||||
2753 | // Spill the BasePtr if it's used. | |||
2754 | if (TRI->hasBasePointer(MF)){ | |||
2755 | Register BasePtr = TRI->getBaseRegister(); | |||
2756 | if (STI.isTarget64BitILP32()) | |||
2757 | BasePtr = getX86SubSuperRegister(BasePtr, 64); | |||
2758 | SavedRegs.set(BasePtr); | |||
2759 | } | |||
2760 | } | |||
2761 | ||||
2762 | static bool | |||
2763 | HasNestArgument(const MachineFunction *MF) { | |||
2764 | const Function &F = MF->getFunction(); | |||
2765 | for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); | |||
2766 | I != E; I++) { | |||
2767 | if (I->hasNestAttr() && !I->use_empty()) | |||
2768 | return true; | |||
2769 | } | |||
2770 | return false; | |||
2771 | } | |||
2772 | ||||
2773 | /// GetScratchRegister - Get a temp register for performing work in the | |||
2774 | /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform | |||
2775 | /// and the properties of the function either one or two registers will be | |||
2776 | /// needed. Set primary to true for the first register, false for the second. | |||
2777 | static unsigned | |||
2778 | GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { | |||
2779 | CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); | |||
2780 | ||||
2781 | // Erlang stuff. | |||
2782 | if (CallingConvention == CallingConv::HiPE) { | |||
2783 | if (Is64Bit) | |||
2784 | return Primary ? X86::R14 : X86::R13; | |||
2785 | else | |||
2786 | return Primary ? X86::EBX : X86::EDI; | |||
2787 | } | |||
2788 | ||||
2789 | if (Is64Bit) { | |||
2790 | if (IsLP64) | |||
2791 | return Primary ? X86::R11 : X86::R12; | |||
2792 | else | |||
2793 | return Primary ? X86::R11D : X86::R12D; | |||
2794 | } | |||
2795 | ||||
2796 | bool IsNested = HasNestArgument(&MF); | |||
2797 | ||||
2798 | if (CallingConvention == CallingConv::X86_FastCall || | |||
2799 | CallingConvention == CallingConv::Fast || | |||
2800 | CallingConvention == CallingConv::Tail) { | |||
2801 | if (IsNested) | |||
2802 | report_fatal_error("Segmented stacks does not support fastcall with " | |||
2803 | "nested function."); | |||
2804 | return Primary ? X86::EAX : X86::ECX; | |||
2805 | } | |||
2806 | if (IsNested) | |||
2807 | return Primary ? X86::EDX : X86::EAX; | |||
2808 | return Primary ? X86::ECX : X86::EAX; | |||
2809 | } | |||
2810 | ||||
2811 | // The stack limit in the TCB is set to this many bytes above the actual stack | |||
2812 | // limit. | |||
2813 | static const uint64_t kSplitStackAvailable = 256; | |||
2814 | ||||
2815 | void X86FrameLowering::adjustForSegmentedStacks( | |||
2816 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { | |||
2817 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
2818 | uint64_t StackSize; | |||
2819 | unsigned TlsReg, TlsOffset; | |||
2820 | DebugLoc DL; | |||
2821 | ||||
2822 | // To support shrink-wrapping we would need to insert the new blocks | |||
2823 | // at the right place and update the branches to PrologueMBB. | |||
2824 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet")((void)0); | |||
2825 | ||||
2826 | unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); | |||
2827 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&((void)0) | |||
2828 | "Scratch register is live-in")((void)0); | |||
2829 | ||||
2830 | if (MF.getFunction().isVarArg()) | |||
2831 | report_fatal_error("Segmented stacks do not support vararg functions."); | |||
2832 | if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && | |||
2833 | !STI.isTargetWin64() && !STI.isTargetFreeBSD() && | |||
2834 | !STI.isTargetDragonFly()) | |||
2835 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
2836 | ||||
2837 | // Eventually StackSize will be calculated by a link-time pass; which will | |||
2838 | // also decide whether checking code needs to be injected into this particular | |||
2839 | // prologue. | |||
2840 | StackSize = MFI.getStackSize(); | |||
2841 | ||||
2842 | // Do not generate a prologue for leaf functions with a stack of size zero. | |||
2843 | // For non-leaf functions we have to allow for the possibility that the | |||
2844 | // callis to a non-split function, as in PR37807. This function could also | |||
2845 | // take the address of a non-split function. When the linker tries to adjust | |||
2846 | // its non-existent prologue, it would fail with an error. Mark the object | |||
2847 | // file so that such failures are not errors. See this Go language bug-report | |||
2848 | // https://go-review.googlesource.com/c/go/+/148819/ | |||
2849 | if (StackSize == 0 && !MFI.hasTailCall()) { | |||
2850 | MF.getMMI().setHasNosplitStack(true); | |||
2851 | return; | |||
2852 | } | |||
2853 | ||||
2854 | MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); | |||
2855 | MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); | |||
2856 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
2857 | bool IsNested = false; | |||
2858 | ||||
2859 | // We need to know if the function has a nest argument only in 64 bit mode. | |||
2860 | if (Is64Bit) | |||
2861 | IsNested = HasNestArgument(&MF); | |||
2862 | ||||
2863 | // The MOV R10, RAX needs to be in a different block, since the RET we emit in | |||
2864 | // allocMBB needs to be last (terminating) instruction. | |||
2865 | ||||
2866 | for (const auto &LI : PrologueMBB.liveins()) { | |||
2867 | allocMBB->addLiveIn(LI); | |||
2868 | checkMBB->addLiveIn(LI); | |||
2869 | } | |||
2870 | ||||
2871 | if (IsNested) | |||
2872 | allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); | |||
2873 | ||||
2874 | MF.push_front(allocMBB); | |||
2875 | MF.push_front(checkMBB); | |||
2876 | ||||
2877 | // When the frame size is less than 256 we just compare the stack | |||
2878 | // boundary directly to the value of the stack pointer, per gcc. | |||
2879 | bool CompareStackPointer = StackSize < kSplitStackAvailable; | |||
2880 | ||||
2881 | // Read the limit off the current stacklet off the stack_guard location. | |||
2882 | if (Is64Bit) { | |||
2883 | if (STI.isTargetLinux()) { | |||
2884 | TlsReg = X86::FS; | |||
2885 | TlsOffset = IsLP64 ? 0x70 : 0x40; | |||
2886 | } else if (STI.isTargetDarwin()) { | |||
2887 | TlsReg = X86::GS; | |||
2888 | TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. | |||
2889 | } else if (STI.isTargetWin64()) { | |||
2890 | TlsReg = X86::GS; | |||
2891 | TlsOffset = 0x28; // pvArbitrary, reserved for application use | |||
2892 | } else if (STI.isTargetFreeBSD()) { | |||
2893 | TlsReg = X86::FS; | |||
2894 | TlsOffset = 0x18; | |||
2895 | } else if (STI.isTargetDragonFly()) { | |||
2896 | TlsReg = X86::FS; | |||
2897 | TlsOffset = 0x20; // use tls_tcb.tcb_segstack | |||
2898 | } else { | |||
2899 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
2900 | } | |||
2901 | ||||
2902 | if (CompareStackPointer) | |||
2903 | ScratchReg = IsLP64 ? X86::RSP : X86::ESP; | |||
2904 | else | |||
2905 | BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) | |||
2906 | .addImm(1).addReg(0).addImm(-StackSize).addReg(0); | |||
2907 | ||||
2908 | BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) | |||
2909 | .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); | |||
2910 | } else { | |||
2911 | if (STI.isTargetLinux()) { | |||
2912 | TlsReg = X86::GS; | |||
2913 | TlsOffset = 0x30; | |||
2914 | } else if (STI.isTargetDarwin()) { | |||
2915 | TlsReg = X86::GS; | |||
2916 | TlsOffset = 0x48 + 90*4; | |||
2917 | } else if (STI.isTargetWin32()) { | |||
2918 | TlsReg = X86::FS; | |||
2919 | TlsOffset = 0x14; // pvArbitrary, reserved for application use | |||
2920 | } else if (STI.isTargetDragonFly()) { | |||
2921 | TlsReg = X86::FS; | |||
2922 | TlsOffset = 0x10; // use tls_tcb.tcb_segstack | |||
2923 | } else if (STI.isTargetFreeBSD()) { | |||
2924 | report_fatal_error("Segmented stacks not supported on FreeBSD i386."); | |||
2925 | } else { | |||
2926 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
2927 | } | |||
2928 | ||||
2929 | if (CompareStackPointer) | |||
2930 | ScratchReg = X86::ESP; | |||
2931 | else | |||
2932 | BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) | |||
2933 | .addImm(1).addReg(0).addImm(-StackSize).addReg(0); | |||
2934 | ||||
2935 | if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || | |||
2936 | STI.isTargetDragonFly()) { | |||
2937 | BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) | |||
2938 | .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); | |||
2939 | } else if (STI.isTargetDarwin()) { | |||
2940 | ||||
2941 | // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. | |||
2942 | unsigned ScratchReg2; | |||
2943 | bool SaveScratch2; | |||
2944 | if (CompareStackPointer) { | |||
2945 | // The primary scratch register is available for holding the TLS offset. | |||
2946 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); | |||
2947 | SaveScratch2 = false; | |||
2948 | } else { | |||
2949 | // Need to use a second register to hold the TLS offset | |||
2950 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); | |||
2951 | ||||
2952 | // Unfortunately, with fastcc the second scratch register may hold an | |||
2953 | // argument. | |||
2954 | SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); | |||
2955 | } | |||
2956 | ||||
2957 | // If Scratch2 is live-in then it needs to be saved. | |||
2958 | assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&((void)0) | |||
2959 | "Scratch register is live-in and not saved")((void)0); | |||
2960 | ||||
2961 | if (SaveScratch2) | |||
2962 | BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) | |||
2963 | .addReg(ScratchReg2, RegState::Kill); | |||
2964 | ||||
2965 | BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) | |||
2966 | .addImm(TlsOffset); | |||
2967 | BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) | |||
2968 | .addReg(ScratchReg) | |||
2969 | .addReg(ScratchReg2).addImm(1).addReg(0) | |||
2970 | .addImm(0) | |||
2971 | .addReg(TlsReg); | |||
2972 | ||||
2973 | if (SaveScratch2) | |||
2974 | BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); | |||
2975 | } | |||
2976 | } | |||
2977 | ||||
2978 | // This jump is taken if SP >= (Stacklet Limit + Stack Space required). | |||
2979 | // It jumps to normal execution of the function body. | |||
2980 | BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A); | |||
2981 | ||||
2982 | // On 32 bit we first push the arguments size and then the frame size. On 64 | |||
2983 | // bit, we pass the stack frame size in r10 and the argument size in r11. | |||
2984 | if (Is64Bit) { | |||
2985 | // Functions with nested arguments use R10, so it needs to be saved across | |||
2986 | // the call to _morestack | |||
2987 | ||||
2988 | const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; | |||
2989 | const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; | |||
2990 | const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; | |||
2991 | const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; | |||
2992 | const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; | |||
2993 | ||||
2994 | if (IsNested) | |||
2995 | BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); | |||
2996 | ||||
2997 | BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) | |||
2998 | .addImm(StackSize); | |||
2999 | BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) | |||
3000 | .addImm(X86FI->getArgumentStackSize()); | |||
3001 | } else { | |||
3002 | BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) | |||
3003 | .addImm(X86FI->getArgumentStackSize()); | |||
3004 | BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) | |||
3005 | .addImm(StackSize); | |||
3006 | } | |||
3007 | ||||
3008 | // __morestack is in libgcc | |||
3009 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { | |||
3010 | // Under the large code model, we cannot assume that __morestack lives | |||
3011 | // within 2^31 bytes of the call site, so we cannot use pc-relative | |||
3012 | // addressing. We cannot perform the call via a temporary register, | |||
3013 | // as the rax register may be used to store the static chain, and all | |||
3014 | // other suitable registers may be either callee-save or used for | |||
3015 | // parameter passing. We cannot use the stack at this point either | |||
3016 | // because __morestack manipulates the stack directly. | |||
3017 | // | |||
3018 | // To avoid these issues, perform an indirect call via a read-only memory | |||
3019 | // location containing the address. | |||
3020 | // | |||
3021 | // This solution is not perfect, as it assumes that the .rodata section | |||
3022 | // is laid out within 2^31 bytes of each function body, but this seems | |||
3023 | // to be sufficient for JIT. | |||
3024 | // FIXME: Add retpoline support and remove the error here.. | |||
3025 | if (STI.useIndirectThunkCalls()) | |||
3026 | report_fatal_error("Emitting morestack calls on 64-bit with the large " | |||
3027 | "code model and thunks not yet implemented."); | |||
3028 | BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) | |||
3029 | .addReg(X86::RIP) | |||
3030 | .addImm(0) | |||
3031 | .addReg(0) | |||
3032 | .addExternalSymbol("__morestack_addr") | |||
3033 | .addReg(0); | |||
3034 | MF.getMMI().setUsesMorestackAddr(true); | |||
3035 | } else { | |||
3036 | if (Is64Bit) | |||
3037 | BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) | |||
3038 | .addExternalSymbol("__morestack"); | |||
3039 | else | |||
3040 | BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) | |||
3041 | .addExternalSymbol("__morestack"); | |||
3042 | } | |||
3043 | ||||
3044 | if (IsNested) | |||
3045 | BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); | |||
3046 | else | |||
3047 | BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); | |||
3048 | ||||
3049 | allocMBB->addSuccessor(&PrologueMBB); | |||
3050 | ||||
3051 | checkMBB->addSuccessor(allocMBB, BranchProbability::getZero()); | |||
3052 | checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne()); | |||
3053 | ||||
3054 | #ifdef EXPENSIVE_CHECKS | |||
3055 | MF.verify(); | |||
3056 | #endif | |||
3057 | } | |||
3058 | ||||
3059 | /// Lookup an ERTS parameter in the !hipe.literals named metadata node. | |||
3060 | /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets | |||
3061 | /// to fields it needs, through a named metadata node "hipe.literals" containing | |||
3062 | /// name-value pairs. | |||
3063 | static unsigned getHiPELiteral( | |||
3064 | NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) { | |||
3065 | for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) { | |||
3066 | MDNode *Node = HiPELiteralsMD->getOperand(i); | |||
3067 | if (Node->getNumOperands() != 2) continue; | |||
3068 | MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0)); | |||
3069 | ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1)); | |||
3070 | if (!NodeName || !NodeVal) continue; | |||
3071 | ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue()); | |||
3072 | if (ValConst && NodeName->getString() == LiteralName) { | |||
3073 | return ValConst->getZExtValue(); | |||
3074 | } | |||
3075 | } | |||
3076 | ||||
3077 | report_fatal_error("HiPE literal " + LiteralName | |||
3078 | + " required but not provided"); | |||
3079 | } | |||
3080 | ||||
3081 | // Return true if there are no non-ehpad successors to MBB and there are no | |||
3082 | // non-meta instructions between MBBI and MBB.end(). | |||
3083 | static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, | |||
3084 | MachineBasicBlock::const_iterator MBBI) { | |||
3085 | return llvm::all_of( | |||
3086 | MBB.successors(), | |||
3087 | [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && | |||
3088 | std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) { | |||
3089 | return MI.isMetaInstruction(); | |||
3090 | }); | |||
3091 | } | |||
3092 | ||||
3093 | /// Erlang programs may need a special prologue to handle the stack size they | |||
3094 | /// might need at runtime. That is because Erlang/OTP does not implement a C | |||
3095 | /// stack but uses a custom implementation of hybrid stack/heap architecture. | |||
3096 | /// (for more information see Eric Stenman's Ph.D. thesis: | |||
3097 | /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) | |||
3098 | /// | |||
3099 | /// CheckStack: | |||
3100 | /// temp0 = sp - MaxStack | |||
3101 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart | |||
3102 | /// OldStart: | |||
3103 | /// ... | |||
3104 | /// IncStack: | |||
3105 | /// call inc_stack # doubles the stack space | |||
3106 | /// temp0 = sp - MaxStack | |||
3107 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart | |||
3108 | void X86FrameLowering::adjustForHiPEPrologue( | |||
3109 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { | |||
3110 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
3111 | DebugLoc DL; | |||
3112 | ||||
3113 | // To support shrink-wrapping we would need to insert the new blocks | |||
3114 | // at the right place and update the branches to PrologueMBB. | |||
3115 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet")((void)0); | |||
3116 | ||||
3117 | // HiPE-specific values | |||
3118 | NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule() | |||
3119 | ->getNamedMetadata("hipe.literals"); | |||
3120 | if (!HiPELiteralsMD) | |||
3121 | report_fatal_error( | |||
3122 | "Can't generate HiPE prologue without runtime parameters"); | |||
3123 | const unsigned HipeLeafWords | |||
3124 | = getHiPELiteral(HiPELiteralsMD, | |||
3125 | Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS"); | |||
3126 | const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; | |||
3127 | const unsigned Guaranteed = HipeLeafWords * SlotSize; | |||
3128 | unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ? | |||
3129 | MF.getFunction().arg_size() - CCRegisteredArgs : 0; | |||
3130 | unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize; | |||
3131 | ||||
3132 | assert(STI.isTargetLinux() &&((void)0) | |||
3133 | "HiPE prologue is only supported on Linux operating systems.")((void)0); | |||
3134 | ||||
3135 | // Compute the largest caller's frame that is needed to fit the callees' | |||
3136 | // frames. This 'MaxStack' is computed from: | |||
3137 | // | |||
3138 | // a) the fixed frame size, which is the space needed for all spilled temps, | |||
3139 | // b) outgoing on-stack parameter areas, and | |||
3140 | // c) the minimum stack space this function needs to make available for the | |||
3141 | // functions it calls (a tunable ABI property). | |||
3142 | if (MFI.hasCalls()) { | |||
3143 | unsigned MoreStackForCalls = 0; | |||
3144 | ||||
3145 | for (auto &MBB : MF) { | |||
3146 | for (auto &MI : MBB) { | |||
3147 | if (!MI.isCall()) | |||
3148 | continue; | |||
3149 | ||||
3150 | // Get callee operand. | |||
3151 | const MachineOperand &MO = MI.getOperand(0); | |||
3152 | ||||
3153 | // Only take account of global function calls (no closures etc.). | |||
3154 | if (!MO.isGlobal()) | |||
3155 | continue; | |||
3156 | ||||
3157 | const Function *F = dyn_cast<Function>(MO.getGlobal()); | |||
3158 | if (!F) | |||
3159 | continue; | |||
3160 | ||||
3161 | // Do not update 'MaxStack' for primitive and built-in functions | |||
3162 | // (encoded with names either starting with "erlang."/"bif_" or not | |||
3163 | // having a ".", such as a simple <Module>.<Function>.<Arity>, or an | |||
3164 | // "_", such as the BIF "suspend_0") as they are executed on another | |||
3165 | // stack. | |||
3166 | if (F->getName().find("erlang.") != StringRef::npos || | |||
3167 | F->getName().find("bif_") != StringRef::npos || | |||
3168 | F->getName().find_first_of("._") == StringRef::npos) | |||
3169 | continue; | |||
3170 | ||||
3171 | unsigned CalleeStkArity = | |||
3172 | F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; | |||
3173 | if (HipeLeafWords - 1 > CalleeStkArity) | |||
3174 | MoreStackForCalls = std::max(MoreStackForCalls, | |||
3175 | (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); | |||
3176 | } | |||
3177 | } | |||
3178 | MaxStack += MoreStackForCalls; | |||
3179 | } | |||
3180 | ||||
3181 | // If the stack frame needed is larger than the guaranteed then runtime checks | |||
3182 | // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. | |||
3183 | if (MaxStack > Guaranteed) { | |||
3184 | MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); | |||
3185 | MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); | |||
3186 | ||||
3187 | for (const auto &LI : PrologueMBB.liveins()) { | |||
3188 | stackCheckMBB->addLiveIn(LI); | |||
3189 | incStackMBB->addLiveIn(LI); | |||
3190 | } | |||
3191 | ||||
3192 | MF.push_front(incStackMBB); | |||
3193 | MF.push_front(stackCheckMBB); | |||
3194 | ||||
3195 | unsigned ScratchReg, SPReg, PReg, SPLimitOffset; | |||
3196 | unsigned LEAop, CMPop, CALLop; | |||
3197 | SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT"); | |||
3198 | if (Is64Bit) { | |||
3199 | SPReg = X86::RSP; | |||
3200 | PReg = X86::RBP; | |||
3201 | LEAop = X86::LEA64r; | |||
3202 | CMPop = X86::CMP64rm; | |||
3203 | CALLop = X86::CALL64pcrel32; | |||
3204 | } else { | |||
3205 | SPReg = X86::ESP; | |||
3206 | PReg = X86::EBP; | |||
3207 | LEAop = X86::LEA32r; | |||
3208 | CMPop = X86::CMP32rm; | |||
3209 | CALLop = X86::CALLpcrel32; | |||
3210 | } | |||
3211 | ||||
3212 | ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); | |||
3213 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&((void)0) | |||
3214 | "HiPE prologue scratch register is live-in")((void)0); | |||
3215 | ||||
3216 | // Create new MBB for StackCheck: | |||
3217 | addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), | |||
3218 | SPReg, false, -MaxStack); | |||
3219 | // SPLimitOffset is in a fixed heap location (pointed by BP). | |||
3220 | addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) | |||
3221 | .addReg(ScratchReg), PReg, false, SPLimitOffset); | |||
3222 | BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE); | |||
3223 | ||||
3224 | // Create new MBB for IncStack: | |||
3225 | BuildMI(incStackMBB, DL, TII.get(CALLop)). | |||
3226 | addExternalSymbol("inc_stack_0"); | |||
3227 | addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), | |||
3228 | SPReg, false, -MaxStack); | |||
3229 | addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) | |||
3230 | .addReg(ScratchReg), PReg, false, SPLimitOffset); | |||
3231 | BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE); | |||
3232 | ||||
3233 | stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100}); | |||
3234 | stackCheckMBB->addSuccessor(incStackMBB, {1, 100}); | |||
3235 | incStackMBB->addSuccessor(&PrologueMBB, {99, 100}); | |||
3236 | incStackMBB->addSuccessor(incStackMBB, {1, 100}); | |||
3237 | } | |||
3238 | #ifdef EXPENSIVE_CHECKS | |||
3239 | MF.verify(); | |||
3240 | #endif | |||
3241 | } | |||
3242 | ||||
3243 | bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, | |||
3244 | MachineBasicBlock::iterator MBBI, | |||
3245 | const DebugLoc &DL, | |||
3246 | int Offset) const { | |||
3247 | if (Offset <= 0) | |||
3248 | return false; | |||
3249 | ||||
3250 | if (Offset % SlotSize) | |||
3251 | return false; | |||
3252 | ||||
3253 | int NumPops = Offset / SlotSize; | |||
3254 | // This is only worth it if we have at most 2 pops. | |||
3255 | if (NumPops != 1 && NumPops != 2) | |||
3256 | return false; | |||
3257 | ||||
3258 | // Handle only the trivial case where the adjustment directly follows | |||
3259 | // a call. This is the most common one, anyway. | |||
3260 | if (MBBI == MBB.begin()) | |||
3261 | return false; | |||
3262 | MachineBasicBlock::iterator Prev = std::prev(MBBI); | |||
3263 | if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) | |||
3264 | return false; | |||
3265 | ||||
3266 | unsigned Regs[2]; | |||
3267 | unsigned FoundRegs = 0; | |||
3268 | ||||
3269 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |||
3270 | const MachineOperand &RegMask = Prev->getOperand(1); | |||
3271 | ||||
3272 | auto &RegClass = | |||
3273 | Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; | |||
3274 | // Try to find up to NumPops free registers. | |||
3275 | for (auto Candidate : RegClass) { | |||
3276 | // Poor man's liveness: | |||
3277 | // Since we're immediately after a call, any register that is clobbered | |||
3278 | // by the call and not defined by it can be considered dead. | |||
3279 | if (!RegMask.clobbersPhysReg(Candidate)) | |||
3280 | continue; | |||
3281 | ||||
3282 | // Don't clobber reserved registers | |||
3283 | if (MRI.isReserved(Candidate)) | |||
3284 | continue; | |||
3285 | ||||
3286 | bool IsDef = false; | |||
3287 | for (const MachineOperand &MO : Prev->implicit_operands()) { | |||
3288 | if (MO.isReg() && MO.isDef() && | |||
3289 | TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) { | |||
3290 | IsDef = true; | |||
3291 | break; | |||
3292 | } | |||
3293 | } | |||
3294 | ||||
3295 | if (IsDef) | |||
3296 | continue; | |||
3297 | ||||
3298 | Regs[FoundRegs++] = Candidate; | |||
3299 | if (FoundRegs == (unsigned)NumPops) | |||
3300 | break; | |||
3301 | } | |||
3302 | ||||
3303 | if (FoundRegs == 0) | |||
3304 | return false; | |||
3305 | ||||
3306 | // If we found only one free register, but need two, reuse the same one twice. | |||
3307 | while (FoundRegs < (unsigned)NumPops) | |||
3308 | Regs[FoundRegs++] = Regs[0]; | |||
3309 | ||||
3310 | for (int i = 0; i < NumPops; ++i) | |||
3311 | BuildMI(MBB, MBBI, DL, | |||
3312 | TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); | |||
3313 | ||||
3314 | return true; | |||
3315 | } | |||
3316 | ||||
3317 | MachineBasicBlock::iterator X86FrameLowering:: | |||
3318 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, | |||
3319 | MachineBasicBlock::iterator I) const { | |||
3320 | bool reserveCallFrame = hasReservedCallFrame(MF); | |||
3321 | unsigned Opcode = I->getOpcode(); | |||
3322 | bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); | |||
3323 | DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased. | |||
3324 | uint64_t Amount = TII.getFrameSize(*I); | |||
3325 | uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0; | |||
3326 | I = MBB.erase(I); | |||
3327 | auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); | |||
3328 | ||||
3329 | // Try to avoid emitting dead SP adjustments if the block end is unreachable, | |||
3330 | // typically because the function is marked noreturn (abort, throw, | |||
3331 | // assert_fail, etc). | |||
3332 | if (isDestroy && blockEndIsUnreachable(MBB, I)) | |||
3333 | return I; | |||
3334 | ||||
3335 | if (!reserveCallFrame) { | |||
3336 | // If the stack pointer can be changed after prologue, turn the | |||
3337 | // adjcallstackup instruction into a 'sub ESP, <amt>' and the | |||
3338 | // adjcallstackdown instruction into 'add ESP, <amt>' | |||
3339 | ||||
3340 | // We need to keep the stack aligned properly. To do this, we round the | |||
3341 | // amount of space needed for the outgoing arguments up to the next | |||
3342 | // alignment boundary. | |||
3343 | Amount = alignTo(Amount, getStackAlign()); | |||
3344 | ||||
3345 | const Function &F = MF.getFunction(); | |||
3346 | bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
3347 | bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves(); | |||
3348 | ||||
3349 | // If we have any exception handlers in this function, and we adjust | |||
3350 | // the SP before calls, we may need to indicate this to the unwinder | |||
3351 | // using GNU_ARGS_SIZE. Note that this may be necessary even when | |||
3352 | // Amount == 0, because the preceding function may have set a non-0 | |||
3353 | // GNU_ARGS_SIZE. | |||
3354 | // TODO: We don't need to reset this between subsequent functions, | |||
3355 | // if it didn't change. | |||
3356 | bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty(); | |||
3357 | ||||
3358 | if (HasDwarfEHHandlers && !isDestroy && | |||
3359 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) | |||
3360 | BuildCFI(MBB, InsertPos, DL, | |||
3361 | MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); | |||
3362 | ||||
3363 | if (Amount == 0) | |||
3364 | return I; | |||
3365 | ||||
3366 | // Factor out the amount that gets handled inside the sequence | |||
3367 | // (Pushes of argument for frame setup, callee pops for frame destroy) | |||
3368 | Amount -= InternalAmt; | |||
3369 | ||||
3370 | // TODO: This is needed only if we require precise CFA. | |||
3371 | // If this is a callee-pop calling convention, emit a CFA adjust for | |||
3372 | // the amount the callee popped. | |||
3373 | if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) | |||
3374 | BuildCFI(MBB, InsertPos, DL, | |||
3375 | MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); | |||
3376 | ||||
3377 | // Add Amount to SP to destroy a frame, or subtract to setup. | |||
3378 | int64_t StackAdjustment = isDestroy ? Amount : -Amount; | |||
3379 | ||||
3380 | if (StackAdjustment) { | |||
3381 | // Merge with any previous or following adjustment instruction. Note: the | |||
3382 | // instructions merged with here do not have CFI, so their stack | |||
3383 | // adjustments do not feed into CfaAdjustment. | |||
3384 | StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); | |||
3385 | StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); | |||
3386 | ||||
3387 | if (StackAdjustment) { | |||
3388 | if (!(F.hasMinSize() && | |||
3389 | adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) | |||
3390 | BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, | |||
3391 | /*InEpilogue=*/false); | |||
3392 | } | |||
3393 | } | |||
3394 | ||||
3395 | if (DwarfCFI && !hasFP(MF)) { | |||
3396 | // If we don't have FP, but need to generate unwind information, | |||
3397 | // we need to set the correct CFA offset after the stack adjustment. | |||
3398 | // How much we adjust the CFA offset depends on whether we're emitting | |||
3399 | // CFI only for EH purposes or for debugging. EH only requires the CFA | |||
3400 | // offset to be correct at each call site, while for debugging we want | |||
3401 | // it to be more precise. | |||
3402 | ||||
3403 | int64_t CfaAdjustment = -StackAdjustment; | |||
3404 | // TODO: When not using precise CFA, we also need to adjust for the | |||
3405 | // InternalAmt here. | |||
3406 | if (CfaAdjustment) { | |||
3407 | BuildCFI(MBB, InsertPos, DL, | |||
3408 | MCCFIInstruction::createAdjustCfaOffset(nullptr, | |||
3409 | CfaAdjustment)); | |||
3410 | } | |||
3411 | } | |||
3412 | ||||
3413 | return I; | |||
3414 | } | |||
3415 | ||||
3416 | if (InternalAmt) { | |||
3417 | MachineBasicBlock::iterator CI = I; | |||
3418 | MachineBasicBlock::iterator B = MBB.begin(); | |||
3419 | while (CI != B && !std::prev(CI)->isCall()) | |||
3420 | --CI; | |||
3421 | BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false); | |||
3422 | } | |||
3423 | ||||
3424 | return I; | |||
3425 | } | |||
3426 | ||||
3427 | bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { | |||
3428 | assert(MBB.getParent() && "Block is not attached to a function!")((void)0); | |||
3429 | const MachineFunction &MF = *MBB.getParent(); | |||
3430 | if (!MBB.isLiveIn(X86::EFLAGS)) | |||
3431 | return true; | |||
3432 | ||||
3433 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
3434 | return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); | |||
3435 | } | |||
3436 | ||||
3437 | bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { | |||
3438 | assert(MBB.getParent() && "Block is not attached to a function!")((void)0); | |||
3439 | ||||
3440 | // Win64 has strict requirements in terms of epilogue and we are | |||
3441 | // not taking a chance at messing with them. | |||
3442 | // I.e., unless this block is already an exit block, we can't use | |||
3443 | // it as an epilogue. | |||
3444 | if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) | |||
3445 | return false; | |||
3446 | ||||
3447 | // Swift async context epilogue has a BTR instruction that clobbers parts of | |||
3448 | // EFLAGS. | |||
3449 | const MachineFunction &MF = *MBB.getParent(); | |||
3450 | if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext()) | |||
3451 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); | |||
3452 | ||||
3453 | if (canUseLEAForSPInEpilogue(*MBB.getParent())) | |||
3454 | return true; | |||
3455 | ||||
3456 | // If we cannot use LEA to adjust SP, we may need to use ADD, which | |||
3457 | // clobbers the EFLAGS. Check that we do not need to preserve it, | |||
3458 | // otherwise, conservatively assume this is not | |||
3459 | // safe to insert the epilogue here. | |||
3460 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); | |||
3461 | } | |||
3462 | ||||
3463 | bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { | |||
3464 | // If we may need to emit frameless compact unwind information, give | |||
3465 | // up as this is currently broken: PR25614. | |||
3466 | bool CompactUnwind = | |||
3467 | MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() != | |||
3468 | nullptr; | |||
3469 | return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) || | |||
3470 | !CompactUnwind) && | |||
3471 | // The lowering of segmented stack and HiPE only support entry | |||
3472 | // blocks as prologue blocks: PR26107. This limitation may be | |||
3473 | // lifted if we fix: | |||
3474 | // - adjustForSegmentedStacks | |||
3475 | // - adjustForHiPEPrologue | |||
3476 | MF.getFunction().getCallingConv() != CallingConv::HiPE && | |||
3477 | !MF.shouldSplitStack(); | |||
3478 | } | |||
3479 | ||||
3480 | MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( | |||
3481 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | |||
3482 | const DebugLoc &DL, bool RestoreSP) const { | |||
3483 | assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env")((void)0); | |||
3484 | assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32")((void)0); | |||
3485 | assert(STI.is32Bit() && !Uses64BitFramePtr &&((void)0) | |||
3486 | "restoring EBP/ESI on non-32-bit target")((void)0); | |||
3487 | ||||
3488 | MachineFunction &MF = *MBB.getParent(); | |||
3489 | Register FramePtr = TRI->getFrameRegister(MF); | |||
3490 | Register BasePtr = TRI->getBaseRegister(); | |||
3491 | WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); | |||
3492 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
3493 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
3494 | ||||
3495 | // FIXME: Don't set FrameSetup flag in catchret case. | |||
3496 | ||||
3497 | int FI = FuncInfo.EHRegNodeFrameIndex; | |||
3498 | int EHRegSize = MFI.getObjectSize(FI); | |||
3499 | ||||
3500 | if (RestoreSP) { | |||
3501 | // MOV32rm -EHRegSize(%ebp), %esp | |||
3502 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), | |||
3503 | X86::EBP, true, -EHRegSize) | |||
3504 | .setMIFlag(MachineInstr::FrameSetup); | |||
3505 | } | |||
3506 | ||||
3507 | Register UsedReg; | |||
3508 | int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed(); | |||
3509 | int EndOffset = -EHRegOffset - EHRegSize; | |||
3510 | FuncInfo.EHRegNodeEndOffset = EndOffset; | |||
3511 | ||||
3512 | if (UsedReg == FramePtr) { | |||
3513 | // ADD $offset, %ebp | |||
3514 | unsigned ADDri = getADDriOpcode(false, EndOffset); | |||
3515 | BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) | |||
3516 | .addReg(FramePtr) | |||
3517 | .addImm(EndOffset) | |||
3518 | .setMIFlag(MachineInstr::FrameSetup) | |||
3519 | ->getOperand(3) | |||
3520 | .setIsDead(); | |||
3521 | assert(EndOffset >= 0 &&((void)0) | |||
3522 | "end of registration object above normal EBP position!")((void)0); | |||
3523 | } else if (UsedReg == BasePtr) { | |||
3524 | // LEA offset(%ebp), %esi | |||
3525 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), | |||
3526 | FramePtr, false, EndOffset) | |||
3527 | .setMIFlag(MachineInstr::FrameSetup); | |||
3528 | // MOV32rm SavedEBPOffset(%esi), %ebp | |||
3529 | assert(X86FI->getHasSEHFramePtrSave())((void)0); | |||
3530 | int Offset = | |||
3531 | getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) | |||
3532 | .getFixed(); | |||
3533 | assert(UsedReg == BasePtr)((void)0); | |||
3534 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), | |||
3535 | UsedReg, true, Offset) | |||
3536 | .setMIFlag(MachineInstr::FrameSetup); | |||
3537 | } else { | |||
3538 | llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr")__builtin_unreachable(); | |||
3539 | } | |||
3540 | return MBBI; | |||
3541 | } | |||
3542 | ||||
3543 | int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { | |||
3544 | return TRI->getSlotSize(); | |||
3545 | } | |||
3546 | ||||
3547 | Register | |||
3548 | X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { | |||
3549 | return TRI->getDwarfRegNum(StackPtr, true); | |||
3550 | } | |||
3551 | ||||
3552 | namespace { | |||
3553 | // Struct used by orderFrameObjects to help sort the stack objects. | |||
3554 | struct X86FrameSortingObject { | |||
3555 | bool IsValid = false; // true if we care about this Object. | |||
3556 | unsigned ObjectIndex = 0; // Index of Object into MFI list. | |||
3557 | unsigned ObjectSize = 0; // Size of Object in bytes. | |||
3558 | Align ObjectAlignment = Align(1); // Alignment of Object in bytes. | |||
3559 | unsigned ObjectNumUses = 0; // Object static number of uses. | |||
3560 | }; | |||
3561 | ||||
3562 | // The comparison function we use for std::sort to order our local | |||
3563 | // stack symbols. The current algorithm is to use an estimated | |||
3564 | // "density". This takes into consideration the size and number of | |||
3565 | // uses each object has in order to roughly minimize code size. | |||
3566 | // So, for example, an object of size 16B that is referenced 5 times | |||
3567 | // will get higher priority than 4 4B objects referenced 1 time each. | |||
3568 | // It's not perfect and we may be able to squeeze a few more bytes out of | |||
3569 | // it (for example : 0(esp) requires fewer bytes, symbols allocated at the | |||
3570 | // fringe end can have special consideration, given their size is less | |||
3571 | // important, etc.), but the algorithmic complexity grows too much to be | |||
3572 | // worth the extra gains we get. This gets us pretty close. | |||
3573 | // The final order leaves us with objects with highest priority going | |||
3574 | // at the end of our list. | |||
3575 | struct X86FrameSortingComparator { | |||
3576 | inline bool operator()(const X86FrameSortingObject &A, | |||
3577 | const X86FrameSortingObject &B) const { | |||
3578 | uint64_t DensityAScaled, DensityBScaled; | |||
3579 | ||||
3580 | // For consistency in our comparison, all invalid objects are placed | |||
3581 | // at the end. This also allows us to stop walking when we hit the | |||
3582 | // first invalid item after it's all sorted. | |||
3583 | if (!A.IsValid) | |||
3584 | return false; | |||
3585 | if (!B.IsValid) | |||
3586 | return true; | |||
3587 | ||||
3588 | // The density is calculated by doing : | |||
3589 | // (double)DensityA = A.ObjectNumUses / A.ObjectSize | |||
3590 | // (double)DensityB = B.ObjectNumUses / B.ObjectSize | |||
3591 | // Since this approach may cause inconsistencies in | |||
3592 | // the floating point <, >, == comparisons, depending on the floating | |||
3593 | // point model with which the compiler was built, we're going | |||
3594 | // to scale both sides by multiplying with | |||
3595 | // A.ObjectSize * B.ObjectSize. This ends up factoring away | |||
3596 | // the division and, with it, the need for any floating point | |||
3597 | // arithmetic. | |||
3598 | DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) * | |||
3599 | static_cast<uint64_t>(B.ObjectSize); | |||
3600 | DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) * | |||
3601 | static_cast<uint64_t>(A.ObjectSize); | |||
3602 | ||||
3603 | // If the two densities are equal, prioritize highest alignment | |||
3604 | // objects. This allows for similar alignment objects | |||
3605 | // to be packed together (given the same density). | |||
3606 | // There's room for improvement here, also, since we can pack | |||
3607 | // similar alignment (different density) objects next to each | |||
3608 | // other to save padding. This will also require further | |||
3609 | // complexity/iterations, and the overall gain isn't worth it, | |||
3610 | // in general. Something to keep in mind, though. | |||
3611 | if (DensityAScaled == DensityBScaled) | |||
3612 | return A.ObjectAlignment < B.ObjectAlignment; | |||
3613 | ||||
3614 | return DensityAScaled < DensityBScaled; | |||
3615 | } | |||
3616 | }; | |||
3617 | } // namespace | |||
3618 | ||||
3619 | // Order the symbols in the local stack. | |||
3620 | // We want to place the local stack objects in some sort of sensible order. | |||
3621 | // The heuristic we use is to try and pack them according to static number | |||
3622 | // of uses and size of object in order to minimize code size. | |||
3623 | void X86FrameLowering::orderFrameObjects( | |||
3624 | const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { | |||
3625 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
3626 | ||||
3627 | // Don't waste time if there's nothing to do. | |||
3628 | if (ObjectsToAllocate.empty()) | |||
3629 | return; | |||
3630 | ||||
3631 | // Create an array of all MFI objects. We won't need all of these | |||
3632 | // objects, but we're going to create a full array of them to make | |||
3633 | // it easier to index into when we're counting "uses" down below. | |||
3634 | // We want to be able to easily/cheaply access an object by simply | |||
3635 | // indexing into it, instead of having to search for it every time. | |||
3636 | std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd()); | |||
3637 | ||||
3638 | // Walk the objects we care about and mark them as such in our working | |||
3639 | // struct. | |||
3640 | for (auto &Obj : ObjectsToAllocate) { | |||
3641 | SortingObjects[Obj].IsValid = true; | |||
3642 | SortingObjects[Obj].ObjectIndex = Obj; | |||
3643 | SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj); | |||
3644 | // Set the size. | |||
3645 | int ObjectSize = MFI.getObjectSize(Obj); | |||
3646 | if (ObjectSize == 0) | |||
3647 | // Variable size. Just use 4. | |||
3648 | SortingObjects[Obj].ObjectSize = 4; | |||
3649 | else | |||
3650 | SortingObjects[Obj].ObjectSize = ObjectSize; | |||
3651 | } | |||
3652 | ||||
3653 | // Count the number of uses for each object. | |||
3654 | for (auto &MBB : MF) { | |||
3655 | for (auto &MI : MBB) { | |||
3656 | if (MI.isDebugInstr()) | |||
3657 | continue; | |||
3658 | for (const MachineOperand &MO : MI.operands()) { | |||
3659 | // Check to see if it's a local stack symbol. | |||
3660 | if (!MO.isFI()) | |||
3661 | continue; | |||
3662 | int Index = MO.getIndex(); | |||
3663 | // Check to see if it falls within our range, and is tagged | |||
3664 | // to require ordering. | |||
3665 | if (Index >= 0 && Index < MFI.getObjectIndexEnd() && | |||
3666 | SortingObjects[Index].IsValid) | |||
3667 | SortingObjects[Index].ObjectNumUses++; | |||
3668 | } | |||
3669 | } | |||
3670 | } | |||
3671 | ||||
3672 | // Sort the objects using X86FrameSortingAlgorithm (see its comment for | |||
3673 | // info). | |||
3674 | llvm::stable_sort(SortingObjects, X86FrameSortingComparator()); | |||
3675 | ||||
3676 | // Now modify the original list to represent the final order that | |||
3677 | // we want. The order will depend on whether we're going to access them | |||
3678 | // from the stack pointer or the frame pointer. For SP, the list should | |||
3679 | // end up with the END containing objects that we want with smaller offsets. | |||
3680 | // For FP, it should be flipped. | |||
3681 | int i = 0; | |||
3682 | for (auto &Obj : SortingObjects) { | |||
3683 | // All invalid items are sorted at the end, so it's safe to stop. | |||
3684 | if (!Obj.IsValid) | |||
3685 | break; | |||
3686 | ObjectsToAllocate[i++] = Obj.ObjectIndex; | |||
3687 | } | |||
3688 | ||||
3689 | // Flip it if we're accessing off of the FP. | |||
3690 | if (!TRI->hasStackRealignment(MF) && hasFP(MF)) | |||
3691 | std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end()); | |||
3692 | } | |||
3693 | ||||
3694 | ||||
3695 | unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { | |||
3696 | // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. | |||
3697 | unsigned Offset = 16; | |||
3698 | // RBP is immediately pushed. | |||
3699 | Offset += SlotSize; | |||
3700 | // All callee-saved registers are then pushed. | |||
3701 | Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); | |||
3702 | // Every funclet allocates enough stack space for the largest outgoing call. | |||
3703 | Offset += getWinEHFuncletFrameSize(MF); | |||
3704 | return Offset; | |||
3705 | } | |||
3706 | ||||
3707 | void X86FrameLowering::processFunctionBeforeFrameFinalized( | |||
3708 | MachineFunction &MF, RegScavenger *RS) const { | |||
3709 | // Mark the function as not having WinCFI. We will set it back to true in | |||
3710 | // emitPrologue if it gets called and emits CFI. | |||
3711 | MF.setHasWinCFI(false); | |||
3712 | ||||
3713 | // If we are using Windows x64 CFI, ensure that the stack is always 8 byte | |||
3714 | // aligned. The format doesn't support misaligned stack adjustments. | |||
3715 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) | |||
3716 | MF.getFrameInfo().ensureMaxAlignment(Align(SlotSize)); | |||
3717 | ||||
3718 | // If this function isn't doing Win64-style C++ EH, we don't need to do | |||
3719 | // anything. | |||
3720 | if (STI.is64Bit() && MF.hasEHFunclets() && | |||
3721 | classifyEHPersonality(MF.getFunction().getPersonalityFn()) == | |||
3722 | EHPersonality::MSVC_CXX) { | |||
3723 | adjustFrameForMsvcCxxEh(MF); | |||
3724 | } | |||
3725 | } | |||
3726 | ||||
3727 | void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { | |||
3728 | // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset | |||
3729 | // relative to RSP after the prologue. Find the offset of the last fixed | |||
3730 | // object, so that we can allocate a slot immediately following it. If there | |||
3731 | // were no fixed objects, use offset -SlotSize, which is immediately after the | |||
3732 | // return address. Fixed objects have negative frame indices. | |||
3733 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
3734 | WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); | |||
3735 | int64_t MinFixedObjOffset = -SlotSize; | |||
3736 | for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) | |||
3737 | MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I)); | |||
3738 | ||||
3739 | for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { | |||
3740 | for (WinEHHandlerType &H : TBME.HandlerArray) { | |||
3741 | int FrameIndex = H.CatchObj.FrameIndex; | |||
3742 | if (FrameIndex != INT_MAX2147483647) { | |||
3743 | // Ensure alignment. | |||
3744 | unsigned Align = MFI.getObjectAlign(FrameIndex).value(); | |||
3745 | MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align; | |||
3746 | MinFixedObjOffset -= MFI.getObjectSize(FrameIndex); | |||
3747 | MFI.setObjectOffset(FrameIndex, MinFixedObjOffset); | |||
3748 | } | |||
3749 | } | |||
3750 | } | |||
3751 | ||||
3752 | // Ensure alignment. | |||
3753 | MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8; | |||
3754 | int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; | |||
3755 | int UnwindHelpFI = | |||
3756 | MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false); | |||
3757 | EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; | |||
3758 | ||||
3759 | // Store -2 into UnwindHelp on function entry. We have to scan forwards past | |||
3760 | // other frame setup instructions. | |||
3761 | MachineBasicBlock &MBB = MF.front(); | |||
3762 | auto MBBI = MBB.begin(); | |||
3763 | while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) | |||
3764 | ++MBBI; | |||
3765 | ||||
3766 | DebugLoc DL = MBB.findDebugLoc(MBBI); | |||
3767 | addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)), | |||
3768 | UnwindHelpFI) | |||
3769 | .addImm(-2); | |||
3770 | } | |||
3771 | ||||
3772 | const ReturnProtectorLowering *X86FrameLowering::getReturnProtector() const { | |||
3773 | return &RPL; | |||
3774 | } | |||
3775 | ||||
3776 | void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( | |||
3777 | MachineFunction &MF, RegScavenger *RS) const { | |||
3778 | if (STI.is32Bit() && MF.hasEHFunclets()) | |||
3779 | restoreWinEHStackPointersInParent(MF); | |||
3780 | } | |||
3781 | ||||
3782 | void X86FrameLowering::restoreWinEHStackPointersInParent( | |||
3783 | MachineFunction &MF) const { | |||
3784 | // 32-bit functions have to restore stack pointers when control is transferred | |||
3785 | // back to the parent function. These blocks are identified as eh pads that | |||
3786 | // are not funclet entries. | |||
3787 | bool IsSEH = isAsynchronousEHPersonality( | |||
3788 | classifyEHPersonality(MF.getFunction().getPersonalityFn())); | |||
3789 | for (MachineBasicBlock &MBB : MF) { | |||
3790 | bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); | |||
3791 | if (NeedsRestore) | |||
3792 | restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(), | |||
3793 | /*RestoreSP=*/IsSEH); | |||
3794 | } | |||
3795 | } |
1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file contains types to represent alignments. | |||
10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
11 | // invalid manipulations. | |||
12 | // | |||
13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
15 | // | |||
16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
17 | // you can get the underlying Align type by using the getValue() method. | |||
18 | // | |||
19 | //===----------------------------------------------------------------------===// | |||
20 | ||||
21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
23 | ||||
24 | #include "llvm/ADT/Optional.h" | |||
25 | #include "llvm/Support/MathExtras.h" | |||
26 | #include <cassert> | |||
27 | #ifndef NDEBUG1 | |||
28 | #include <string> | |||
29 | #endif // NDEBUG | |||
30 | ||||
31 | namespace llvm { | |||
32 | ||||
33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
35 | ||||
36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
37 | /// alignment. | |||
38 | /// It is suitable for use as static global constants. | |||
39 | struct Align { | |||
40 | private: | |||
41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
42 | /// ShiftValue is less than 64 by construction. | |||
43 | ||||
44 | friend struct MaybeAlign; | |||
45 | friend unsigned Log2(Align); | |||
46 | friend bool operator==(Align Lhs, Align Rhs); | |||
47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
50 | friend bool operator<(Align Lhs, Align Rhs); | |||
51 | friend bool operator>(Align Lhs, Align Rhs); | |||
52 | friend unsigned encode(struct MaybeAlign A); | |||
53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
54 | ||||
55 | /// A trivial type to allow construction of constexpr Align. | |||
56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
57 | /// definition of constexpr assign operators. | |||
58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
62 | struct LogValue { | |||
63 | uint8_t Log; | |||
64 | }; | |||
65 | ||||
66 | public: | |||
67 | /// Default is byte-aligned. | |||
68 | constexpr Align() = default; | |||
69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
70 | /// checks have been performed when building `Other`. | |||
71 | constexpr Align(const Align &Other) = default; | |||
72 | constexpr Align(Align &&Other) = default; | |||
73 | Align &operator=(const Align &Other) = default; | |||
74 | Align &operator=(Align &&Other) = default; | |||
75 | ||||
76 | explicit Align(uint64_t Value) { | |||
77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
79 | ShiftValue = Log2_64(Value); | |||
80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
81 | } | |||
82 | ||||
83 | /// This is a hole in the type system and should not be abused. | |||
84 | /// Needed to interact with C for instance. | |||
85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
86 | ||||
87 | /// Allow constructions of constexpr Align. | |||
88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
90 | } | |||
91 | ||||
92 | /// Allow constructions of constexpr Align from types. | |||
93 | /// Compile time equivalent to Align(alignof(T)). | |||
94 | template <typename T> constexpr static LogValue Of() { | |||
95 | return Constant<std::alignment_of<T>::value>(); | |||
96 | } | |||
97 | ||||
98 | /// Constexpr constructor from LogValue type. | |||
99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
100 | }; | |||
101 | ||||
102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
103 | inline Align assumeAligned(uint64_t Value) { | |||
104 | return Value ? Align(Value) : Align(); | |||
105 | } | |||
106 | ||||
107 | /// This struct is a compact representation of a valid (power of two) or | |||
108 | /// undefined (0) alignment. | |||
109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
110 | private: | |||
111 | using UP = llvm::Optional<Align>; | |||
112 | ||||
113 | public: | |||
114 | /// Default is undefined. | |||
115 | MaybeAlign() = default; | |||
116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
117 | /// checks have been performed when building `Other`. | |||
118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
122 | ||||
123 | /// Use llvm::Optional<Align> constructor. | |||
124 | using UP::UP; | |||
125 | ||||
126 | explicit MaybeAlign(uint64_t Value) { | |||
127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
129 | if (Value) | |||
130 | emplace(Value); | |||
131 | } | |||
132 | ||||
133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
135 | }; | |||
136 | ||||
137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
139 | return SizeInBytes % Lhs.value() == 0; | |||
140 | } | |||
141 | ||||
142 | /// Checks that Addr is a multiple of the alignment. | |||
143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
145 | } | |||
146 | ||||
147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
149 | const uint64_t Value = A.value(); | |||
150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
151 | ||||
152 | // The division followed by a multiplication can be thought of as a right | |||
153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
155 | // are just zero. | |||
156 | ||||
157 | // Most compilers can generate this code but the pattern may be missed when | |||
158 | // multiple functions gets inlined. | |||
159 | return (Size + Value - 1) & ~(Value - 1U); | |||
160 | } | |||
161 | ||||
162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
165 | /// Skew mod \p A'. | |||
166 | /// | |||
167 | /// Examples: | |||
168 | /// \code | |||
169 | /// alignTo(5, Align(8), 7) = 7 | |||
170 | /// alignTo(17, Align(8), 1) = 17 | |||
171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
172 | /// \endcode | |||
173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
174 | const uint64_t Value = A.value(); | |||
175 | Skew %= Value; | |||
176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
177 | } | |||
178 | ||||
179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
180 | /// Returns `Size` if current alignment is undefined. | |||
181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
183 | } | |||
184 | ||||
185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
189 | ArithAddr &&((void)0) | |||
190 | "Overflow")((void)0); | |||
191 | return alignTo(ArithAddr, Alignment); | |||
192 | } | |||
193 | ||||
194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
195 | /// or equal to \p Value and is a multiple of \p Align. | |||
196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
197 | return alignTo(Value, Alignment) - Value; | |||
198 | } | |||
199 | ||||
200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
201 | /// bytes, rounding up. | |||
202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
204 | } | |||
205 | ||||
206 | /// Returns the log2 of the alignment. | |||
207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
208 | ||||
209 | /// Returns the alignment that satisfies both alignments. | |||
210 | /// Same semantic as MinAlign. | |||
211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
212 | ||||
213 | /// Returns the alignment that satisfies both alignments. | |||
214 | /// Same semantic as MinAlign. | |||
215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
216 | return Align(MinAlign(A.value(), Offset)); | |||
217 | } | |||
218 | ||||
219 | /// Returns the alignment that satisfies both alignments. | |||
220 | /// Same semantic as MinAlign. | |||
221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
223 | } | |||
224 | ||||
225 | /// Returns the alignment that satisfies both alignments. | |||
226 | /// Same semantic as MinAlign. | |||
227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
229 | } | |||
230 | ||||
231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
233 | ||||
234 | /// Dual operation of the encode function above. | |||
235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
236 | if (Value == 0) | |||
237 | return MaybeAlign(); | |||
238 | Align Out; | |||
239 | Out.ShiftValue = Value - 1; | |||
240 | return Out; | |||
241 | } | |||
242 | ||||
243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
244 | /// definition. | |||
245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
246 | ||||
247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
250 | return Lhs.value() == Rhs; | |||
251 | } | |||
252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
254 | return Lhs.value() != Rhs; | |||
255 | } | |||
256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
258 | return Lhs.value() <= Rhs; | |||
259 | } | |||
260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
262 | return Lhs.value() >= Rhs; | |||
263 | } | |||
264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
266 | return Lhs.value() < Rhs; | |||
267 | } | |||
268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
270 | return Lhs.value() > Rhs; | |||
271 | } | |||
272 | ||||
273 | /// Comparisons between MaybeAlign and scalars. | |||
274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
276 | } | |||
277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
279 | } | |||
280 | ||||
281 | /// Comparisons operators between Align. | |||
282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
284 | } | |||
285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
287 | } | |||
288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
290 | } | |||
291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
293 | } | |||
294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
296 | } | |||
297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
299 | } | |||
300 | ||||
301 | // Don't allow relational comparisons with MaybeAlign. | |||
302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
306 | ||||
307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
311 | ||||
312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
316 | ||||
317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
319 | return Align(Lhs.value() * Rhs); | |||
320 | } | |||
321 | ||||
322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
325 | } | |||
326 | ||||
327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
329 | "Divisor must be positive and a power of 2")((void)0); | |||
330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
331 | return Align(Lhs.value() / Divisor); | |||
332 | } | |||
333 | ||||
334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
336 | "Divisor must be positive and a power of 2")((void)0); | |||
337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
338 | } | |||
339 | ||||
340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
342 | } | |||
343 | ||||
344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
346 | } | |||
347 | ||||
348 | #ifndef NDEBUG1 | |||
349 | // For usage in LLVM_DEBUG macros. | |||
350 | inline std::string DebugStr(const Align &A) { | |||
351 | return std::to_string(A.value()); | |||
352 | } | |||
353 | // For usage in LLVM_DEBUG macros. | |||
354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
355 | if (MA) | |||
356 | return std::to_string(MA->value()); | |||
357 | return "None"; | |||
358 | } | |||
359 | #endif // NDEBUG | |||
360 | ||||
361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
362 | ||||
363 | } // namespace llvm | |||
364 | ||||
365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |