| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
| Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===-- X86FrameLowering.cpp - X86 Frame Information ----------------------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains the X86 implementation of TargetFrameLowering class. | |||
| 10 | // | |||
| 11 | //===----------------------------------------------------------------------===// | |||
| 12 | ||||
| 13 | #include "X86FrameLowering.h" | |||
| 14 | #include "X86InstrBuilder.h" | |||
| 15 | #include "X86InstrInfo.h" | |||
| 16 | #include "X86MachineFunctionInfo.h" | |||
| 17 | #include "X86ReturnProtectorLowering.h" | |||
| 18 | #include "X86Subtarget.h" | |||
| 19 | #include "X86TargetMachine.h" | |||
| 20 | #include "llvm/ADT/SmallSet.h" | |||
| 21 | #include "llvm/ADT/Statistic.h" | |||
| 22 | #include "llvm/Analysis/EHPersonalities.h" | |||
| 23 | #include "llvm/CodeGen/MachineFrameInfo.h" | |||
| 24 | #include "llvm/CodeGen/MachineFunction.h" | |||
| 25 | #include "llvm/CodeGen/MachineInstrBuilder.h" | |||
| 26 | #include "llvm/CodeGen/MachineModuleInfo.h" | |||
| 27 | #include "llvm/CodeGen/MachineRegisterInfo.h" | |||
| 28 | #include "llvm/CodeGen/WinEHFuncInfo.h" | |||
| 29 | #include "llvm/IR/DataLayout.h" | |||
| 30 | #include "llvm/IR/Function.h" | |||
| 31 | #include "llvm/MC/MCAsmInfo.h" | |||
| 32 | #include "llvm/MC/MCObjectFileInfo.h" | |||
| 33 | #include "llvm/MC/MCSymbol.h" | |||
| 34 | #include "llvm/Support/Debug.h" | |||
| 35 | #include "llvm/Target/TargetOptions.h" | |||
| 36 | #include <cstdlib> | |||
| 37 | ||||
| 38 | #define DEBUG_TYPE"x86-fl" "x86-fl" | |||
| 39 | ||||
| 40 | STATISTIC(NumFrameLoopProbe, "Number of loop stack probes used in prologue")static llvm::Statistic NumFrameLoopProbe = {"x86-fl", "NumFrameLoopProbe" , "Number of loop stack probes used in prologue"}; | |||
| 41 | STATISTIC(NumFrameExtraProbe,static llvm::Statistic NumFrameExtraProbe = {"x86-fl", "NumFrameExtraProbe" , "Number of extra stack probes generated in prologue"} | |||
| 42 | "Number of extra stack probes generated in prologue")static llvm::Statistic NumFrameExtraProbe = {"x86-fl", "NumFrameExtraProbe" , "Number of extra stack probes generated in prologue"}; | |||
| 43 | ||||
| 44 | using namespace llvm; | |||
| 45 | ||||
| 46 | X86FrameLowering::X86FrameLowering(const X86Subtarget &STI, | |||
| 47 | MaybeAlign StackAlignOverride) | |||
| 48 | : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), | |||
| 49 | STI.is64Bit() ? -8 : -4), | |||
| 50 | STI(STI), TII(*STI.getInstrInfo()), TRI(STI.getRegisterInfo()), RPL() { | |||
| 51 | // Cache a bunch of frame-related predicates for this subtarget. | |||
| 52 | SlotSize = TRI->getSlotSize(); | |||
| 53 | Is64Bit = STI.is64Bit(); | |||
| 54 | IsLP64 = STI.isTarget64BitLP64(); | |||
| 55 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. | |||
| 56 | Uses64BitFramePtr = STI.isTarget64BitLP64() || STI.isTargetNaCl64(); | |||
| 57 | StackPtr = TRI->getStackRegister(); | |||
| 58 | SaveArgs = Is64Bit ? STI.getSaveArgs() : 0; | |||
| 59 | } | |||
| 60 | ||||
| 61 | bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { | |||
| 62 | return !MF.getFrameInfo().hasVarSizedObjects() && | |||
| 63 | !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() && | |||
| 64 | !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall(); | |||
| 65 | } | |||
| 66 | ||||
| 67 | /// canSimplifyCallFramePseudos - If there is a reserved call frame, the | |||
| 68 | /// call frame pseudos can be simplified. Having a FP, as in the default | |||
| 69 | /// implementation, is not sufficient here since we can't always use it. | |||
| 70 | /// Use a more nuanced condition. | |||
| 71 | bool | |||
| 72 | X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { | |||
| 73 | return hasReservedCallFrame(MF) || | |||
| 74 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || | |||
| 75 | (hasFP(MF) && !TRI->hasStackRealignment(MF)) || | |||
| 76 | TRI->hasBasePointer(MF); | |||
| 77 | } | |||
| 78 | ||||
| 79 | // needsFrameIndexResolution - Do we need to perform FI resolution for | |||
| 80 | // this function. Normally, this is required only when the function | |||
| 81 | // has any stack objects. However, FI resolution actually has another job, | |||
| 82 | // not apparent from the title - it resolves callframesetup/destroy | |||
| 83 | // that were not simplified earlier. | |||
| 84 | // So, this is required for x86 functions that have push sequences even | |||
| 85 | // when there are no stack objects. | |||
| 86 | bool | |||
| 87 | X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const { | |||
| 88 | return MF.getFrameInfo().hasStackObjects() || | |||
| 89 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences(); | |||
| 90 | } | |||
| 91 | ||||
| 92 | /// hasFP - Return true if the specified function should have a dedicated frame | |||
| 93 | /// pointer register. This is true if the function has variable sized allocas | |||
| 94 | /// or if frame pointer elimination is disabled. | |||
| 95 | bool X86FrameLowering::hasFP(const MachineFunction &MF) const { | |||
| 96 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 97 | return (MF.getTarget().Options.DisableFramePointerElim(MF) || | |||
| 98 | TRI->hasStackRealignment(MF) || MFI.hasVarSizedObjects() || | |||
| 99 | MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() || | |||
| 100 | MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() || | |||
| 101 | MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() || | |||
| 102 | MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() || | |||
| 103 | MFI.hasStackMap() || MFI.hasPatchPoint() || | |||
| 104 | MFI.hasCopyImplyingStackAdjustment() || | |||
| 105 | SaveArgs); | |||
| 106 | } | |||
| 107 | ||||
| 108 | static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { | |||
| 109 | if (IsLP64) { | |||
| 110 | if (isInt<8>(Imm)) | |||
| 111 | return X86::SUB64ri8; | |||
| 112 | return X86::SUB64ri32; | |||
| 113 | } else { | |||
| 114 | if (isInt<8>(Imm)) | |||
| 115 | return X86::SUB32ri8; | |||
| 116 | return X86::SUB32ri; | |||
| 117 | } | |||
| 118 | } | |||
| 119 | ||||
| 120 | static unsigned getADDriOpcode(bool IsLP64, int64_t Imm) { | |||
| 121 | if (IsLP64) { | |||
| 122 | if (isInt<8>(Imm)) | |||
| 123 | return X86::ADD64ri8; | |||
| 124 | return X86::ADD64ri32; | |||
| 125 | } else { | |||
| 126 | if (isInt<8>(Imm)) | |||
| 127 | return X86::ADD32ri8; | |||
| 128 | return X86::ADD32ri; | |||
| 129 | } | |||
| 130 | } | |||
| 131 | ||||
| 132 | static unsigned getSUBrrOpcode(bool IsLP64) { | |||
| 133 | return IsLP64 ? X86::SUB64rr : X86::SUB32rr; | |||
| 134 | } | |||
| 135 | ||||
| 136 | static unsigned getADDrrOpcode(bool IsLP64) { | |||
| 137 | return IsLP64 ? X86::ADD64rr : X86::ADD32rr; | |||
| 138 | } | |||
| 139 | ||||
| 140 | static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) { | |||
| 141 | if (IsLP64) { | |||
| 142 | if (isInt<8>(Imm)) | |||
| 143 | return X86::AND64ri8; | |||
| 144 | return X86::AND64ri32; | |||
| 145 | } | |||
| 146 | if (isInt<8>(Imm)) | |||
| 147 | return X86::AND32ri8; | |||
| 148 | return X86::AND32ri; | |||
| 149 | } | |||
| 150 | ||||
| 151 | static unsigned getLEArOpcode(bool IsLP64) { | |||
| 152 | return IsLP64 ? X86::LEA64r : X86::LEA32r; | |||
| 153 | } | |||
| 154 | ||||
| 155 | static bool isEAXLiveIn(MachineBasicBlock &MBB) { | |||
| 156 | for (MachineBasicBlock::RegisterMaskPair RegMask : MBB.liveins()) { | |||
| 157 | unsigned Reg = RegMask.PhysReg; | |||
| 158 | ||||
| 159 | if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX || | |||
| 160 | Reg == X86::AH || Reg == X86::AL) | |||
| 161 | return true; | |||
| 162 | } | |||
| 163 | ||||
| 164 | return false; | |||
| 165 | } | |||
| 166 | ||||
| 167 | /// Check if the flags need to be preserved before the terminators. | |||
| 168 | /// This would be the case, if the eflags is live-in of the region | |||
| 169 | /// composed by the terminators or live-out of that region, without | |||
| 170 | /// being defined by a terminator. | |||
| 171 | static bool | |||
| 172 | flagsNeedToBePreservedBeforeTheTerminators(const MachineBasicBlock &MBB) { | |||
| 173 | for (const MachineInstr &MI : MBB.terminators()) { | |||
| 174 | bool BreakNext = false; | |||
| 175 | for (const MachineOperand &MO : MI.operands()) { | |||
| 176 | if (!MO.isReg()) | |||
| 177 | continue; | |||
| 178 | Register Reg = MO.getReg(); | |||
| 179 | if (Reg != X86::EFLAGS) | |||
| 180 | continue; | |||
| 181 | ||||
| 182 | // This terminator needs an eflags that is not defined | |||
| 183 | // by a previous another terminator: | |||
| 184 | // EFLAGS is live-in of the region composed by the terminators. | |||
| 185 | if (!MO.isDef()) | |||
| 186 | return true; | |||
| 187 | // This terminator defines the eflags, i.e., we don't need to preserve it. | |||
| 188 | // However, we still need to check this specific terminator does not | |||
| 189 | // read a live-in value. | |||
| 190 | BreakNext = true; | |||
| 191 | } | |||
| 192 | // We found a definition of the eflags, no need to preserve them. | |||
| 193 | if (BreakNext) | |||
| 194 | return false; | |||
| 195 | } | |||
| 196 | ||||
| 197 | // None of the terminators use or define the eflags. | |||
| 198 | // Check if they are live-out, that would imply we need to preserve them. | |||
| 199 | for (const MachineBasicBlock *Succ : MBB.successors()) | |||
| 200 | if (Succ->isLiveIn(X86::EFLAGS)) | |||
| 201 | return true; | |||
| 202 | ||||
| 203 | return false; | |||
| 204 | } | |||
| 205 | ||||
| 206 | /// emitSPUpdate - Emit a series of instructions to increment / decrement the | |||
| 207 | /// stack pointer by a constant value. | |||
| 208 | void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB, | |||
| 209 | MachineBasicBlock::iterator &MBBI, | |||
| 210 | const DebugLoc &DL, | |||
| 211 | int64_t NumBytes, bool InEpilogue) const { | |||
| 212 | bool isSub = NumBytes < 0; | |||
| 213 | uint64_t Offset = isSub ? -NumBytes : NumBytes; | |||
| 214 | MachineInstr::MIFlag Flag = | |||
| 215 | isSub ? MachineInstr::FrameSetup : MachineInstr::FrameDestroy; | |||
| 216 | ||||
| 217 | uint64_t Chunk = (1LL << 31) - 1; | |||
| 218 | ||||
| 219 | MachineFunction &MF = *MBB.getParent(); | |||
| 220 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 221 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
| 222 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); | |||
| 223 | ||||
| 224 | // It's ok to not take into account large chunks when probing, as the | |||
| 225 | // allocation is split in smaller chunks anyway. | |||
| 226 | if (EmitInlineStackProbe && !InEpilogue) { | |||
| 227 | ||||
| 228 | // This pseudo-instruction is going to be expanded, potentially using a | |||
| 229 | // loop, by inlineStackProbe(). | |||
| 230 | BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)).addImm(Offset); | |||
| 231 | return; | |||
| 232 | } else if (Offset > Chunk) { | |||
| 233 | // Rather than emit a long series of instructions for large offsets, | |||
| 234 | // load the offset into a register and do one sub/add | |||
| 235 | unsigned Reg = 0; | |||
| 236 | unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX); | |||
| 237 | ||||
| 238 | if (isSub && !isEAXLiveIn(MBB)) | |||
| 239 | Reg = Rax; | |||
| 240 | else | |||
| 241 | Reg = TRI->findDeadCallerSavedReg(MBB, MBBI); | |||
| 242 | ||||
| 243 | unsigned MovRIOpc = Is64Bit ? X86::MOV64ri : X86::MOV32ri; | |||
| 244 | unsigned AddSubRROpc = | |||
| 245 | isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit); | |||
| 246 | if (Reg) { | |||
| 247 | BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Reg) | |||
| 248 | .addImm(Offset) | |||
| 249 | .setMIFlag(Flag); | |||
| 250 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr) | |||
| 251 | .addReg(StackPtr) | |||
| 252 | .addReg(Reg); | |||
| 253 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 254 | return; | |||
| 255 | } else if (Offset > 8 * Chunk) { | |||
| 256 | // If we would need more than 8 add or sub instructions (a >16GB stack | |||
| 257 | // frame), it's worth spilling RAX to materialize this immediate. | |||
| 258 | // pushq %rax | |||
| 259 | // movabsq +-$Offset+-SlotSize, %rax | |||
| 260 | // addq %rsp, %rax | |||
| 261 | // xchg %rax, (%rsp) | |||
| 262 | // movq (%rsp), %rsp | |||
| 263 | assert(Is64Bit && "can't have 32-bit 16GB stack frame")((void)0); | |||
| 264 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
| 265 | .addReg(Rax, RegState::Kill) | |||
| 266 | .setMIFlag(Flag); | |||
| 267 | // Subtract is not commutative, so negate the offset and always use add. | |||
| 268 | // Subtract 8 less and add 8 more to account for the PUSH we just did. | |||
| 269 | if (isSub) | |||
| 270 | Offset = -(Offset - SlotSize); | |||
| 271 | else | |||
| 272 | Offset = Offset + SlotSize; | |||
| 273 | BuildMI(MBB, MBBI, DL, TII.get(MovRIOpc), Rax) | |||
| 274 | .addImm(Offset) | |||
| 275 | .setMIFlag(Flag); | |||
| 276 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax) | |||
| 277 | .addReg(Rax) | |||
| 278 | .addReg(StackPtr); | |||
| 279 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 280 | // Exchange the new SP in RAX with the top of the stack. | |||
| 281 | addRegOffset( | |||
| 282 | BuildMI(MBB, MBBI, DL, TII.get(X86::XCHG64rm), Rax).addReg(Rax), | |||
| 283 | StackPtr, false, 0); | |||
| 284 | // Load new SP from the top of the stack into RSP. | |||
| 285 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), StackPtr), | |||
| 286 | StackPtr, false, 0); | |||
| 287 | return; | |||
| 288 | } | |||
| 289 | } | |||
| 290 | ||||
| 291 | while (Offset) { | |||
| 292 | uint64_t ThisVal = std::min(Offset, Chunk); | |||
| 293 | if (ThisVal == SlotSize) { | |||
| 294 | // Use push / pop for slot sized adjustments as a size optimization. We | |||
| 295 | // need to find a dead register when using pop. | |||
| 296 | unsigned Reg = isSub | |||
| 297 | ? (unsigned)(Is64Bit ? X86::RAX : X86::EAX) | |||
| 298 | : TRI->findDeadCallerSavedReg(MBB, MBBI); | |||
| 299 | if (Reg) { | |||
| 300 | unsigned Opc = isSub | |||
| 301 | ? (Is64Bit ? X86::PUSH64r : X86::PUSH32r) | |||
| 302 | : (Is64Bit ? X86::POP64r : X86::POP32r); | |||
| 303 | BuildMI(MBB, MBBI, DL, TII.get(Opc)) | |||
| 304 | .addReg(Reg, getDefRegState(!isSub) | getUndefRegState(isSub)) | |||
| 305 | .setMIFlag(Flag); | |||
| 306 | Offset -= ThisVal; | |||
| 307 | continue; | |||
| 308 | } | |||
| 309 | } | |||
| 310 | ||||
| 311 | BuildStackAdjustment(MBB, MBBI, DL, isSub ? -ThisVal : ThisVal, InEpilogue) | |||
| 312 | .setMIFlag(Flag); | |||
| 313 | ||||
| 314 | Offset -= ThisVal; | |||
| 315 | } | |||
| 316 | } | |||
| 317 | ||||
| 318 | MachineInstrBuilder X86FrameLowering::BuildStackAdjustment( | |||
| 319 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | |||
| 320 | const DebugLoc &DL, int64_t Offset, bool InEpilogue) const { | |||
| 321 | assert(Offset != 0 && "zero offset stack adjustment requested")((void)0); | |||
| 322 | ||||
| 323 | // On Atom, using LEA to adjust SP is preferred, but using it in the epilogue | |||
| 324 | // is tricky. | |||
| 325 | bool UseLEA; | |||
| 326 | if (!InEpilogue) { | |||
| 327 | // Check if inserting the prologue at the beginning | |||
| 328 | // of MBB would require to use LEA operations. | |||
| 329 | // We need to use LEA operations if EFLAGS is live in, because | |||
| 330 | // it means an instruction will read it before it gets defined. | |||
| 331 | UseLEA = STI.useLeaForSP() || MBB.isLiveIn(X86::EFLAGS); | |||
| 332 | } else { | |||
| 333 | // If we can use LEA for SP but we shouldn't, check that none | |||
| 334 | // of the terminators uses the eflags. Otherwise we will insert | |||
| 335 | // a ADD that will redefine the eflags and break the condition. | |||
| 336 | // Alternatively, we could move the ADD, but this may not be possible | |||
| 337 | // and is an optimization anyway. | |||
| 338 | UseLEA = canUseLEAForSPInEpilogue(*MBB.getParent()); | |||
| 339 | if (UseLEA && !STI.useLeaForSP()) | |||
| 340 | UseLEA = flagsNeedToBePreservedBeforeTheTerminators(MBB); | |||
| 341 | // If that assert breaks, that means we do not do the right thing | |||
| 342 | // in canUseAsEpilogue. | |||
| 343 | assert((UseLEA || !flagsNeedToBePreservedBeforeTheTerminators(MBB)) &&((void)0) | |||
| 344 | "We shouldn't have allowed this insertion point")((void)0); | |||
| 345 | } | |||
| 346 | ||||
| 347 | MachineInstrBuilder MI; | |||
| 348 | if (UseLEA) { | |||
| 349 | MI = addRegOffset(BuildMI(MBB, MBBI, DL, | |||
| 350 | TII.get(getLEArOpcode(Uses64BitFramePtr)), | |||
| 351 | StackPtr), | |||
| 352 | StackPtr, false, Offset); | |||
| 353 | } else { | |||
| 354 | bool IsSub = Offset < 0; | |||
| 355 | uint64_t AbsOffset = IsSub ? -Offset : Offset; | |||
| 356 | const unsigned Opc = IsSub ? getSUBriOpcode(Uses64BitFramePtr, AbsOffset) | |||
| 357 | : getADDriOpcode(Uses64BitFramePtr, AbsOffset); | |||
| 358 | MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
| 359 | .addReg(StackPtr) | |||
| 360 | .addImm(AbsOffset); | |||
| 361 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 362 | } | |||
| 363 | return MI; | |||
| 364 | } | |||
| 365 | ||||
| 366 | int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB, | |||
| 367 | MachineBasicBlock::iterator &MBBI, | |||
| 368 | bool doMergeWithPrevious) const { | |||
| 369 | if ((doMergeWithPrevious && MBBI == MBB.begin()) || | |||
| 370 | (!doMergeWithPrevious && MBBI == MBB.end())) | |||
| 371 | return 0; | |||
| 372 | ||||
| 373 | MachineBasicBlock::iterator PI = doMergeWithPrevious ? std::prev(MBBI) : MBBI; | |||
| 374 | ||||
| 375 | PI = skipDebugInstructionsBackward(PI, MBB.begin()); | |||
| 376 | // It is assumed that ADD/SUB/LEA instruction is succeded by one CFI | |||
| 377 | // instruction, and that there are no DBG_VALUE or other instructions between | |||
| 378 | // ADD/SUB/LEA and its corresponding CFI instruction. | |||
| 379 | /* TODO: Add support for the case where there are multiple CFI instructions | |||
| 380 | below the ADD/SUB/LEA, e.g.: | |||
| 381 | ... | |||
| 382 | add | |||
| 383 | cfi_def_cfa_offset | |||
| 384 | cfi_offset | |||
| 385 | ... | |||
| 386 | */ | |||
| 387 | if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction()) | |||
| 388 | PI = std::prev(PI); | |||
| 389 | ||||
| 390 | unsigned Opc = PI->getOpcode(); | |||
| 391 | int Offset = 0; | |||
| 392 | ||||
| 393 | if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 || | |||
| 394 | Opc == X86::ADD32ri || Opc == X86::ADD32ri8) && | |||
| 395 | PI->getOperand(0).getReg() == StackPtr){ | |||
| 396 | assert(PI->getOperand(1).getReg() == StackPtr)((void)0); | |||
| 397 | Offset = PI->getOperand(2).getImm(); | |||
| 398 | } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) && | |||
| 399 | PI->getOperand(0).getReg() == StackPtr && | |||
| 400 | PI->getOperand(1).getReg() == StackPtr && | |||
| 401 | PI->getOperand(2).getImm() == 1 && | |||
| 402 | PI->getOperand(3).getReg() == X86::NoRegister && | |||
| 403 | PI->getOperand(5).getReg() == X86::NoRegister) { | |||
| 404 | // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg. | |||
| 405 | Offset = PI->getOperand(4).getImm(); | |||
| 406 | } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 || | |||
| 407 | Opc == X86::SUB32ri || Opc == X86::SUB32ri8) && | |||
| 408 | PI->getOperand(0).getReg() == StackPtr) { | |||
| 409 | assert(PI->getOperand(1).getReg() == StackPtr)((void)0); | |||
| 410 | Offset = -PI->getOperand(2).getImm(); | |||
| 411 | } else | |||
| 412 | return 0; | |||
| 413 | ||||
| 414 | PI = MBB.erase(PI); | |||
| 415 | if (PI != MBB.end() && PI->isCFIInstruction()) { | |||
| 416 | auto CIs = MBB.getParent()->getFrameInstructions(); | |||
| 417 | MCCFIInstruction CI = CIs[PI->getOperand(0).getCFIIndex()]; | |||
| 418 | if (CI.getOperation() == MCCFIInstruction::OpDefCfaOffset || | |||
| 419 | CI.getOperation() == MCCFIInstruction::OpAdjustCfaOffset) | |||
| 420 | PI = MBB.erase(PI); | |||
| 421 | } | |||
| 422 | if (!doMergeWithPrevious) | |||
| 423 | MBBI = skipDebugInstructionsForward(PI, MBB.end()); | |||
| 424 | ||||
| 425 | return Offset; | |||
| 426 | } | |||
| 427 | ||||
| 428 | void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB, | |||
| 429 | MachineBasicBlock::iterator MBBI, | |||
| 430 | const DebugLoc &DL, | |||
| 431 | const MCCFIInstruction &CFIInst) const { | |||
| 432 | MachineFunction &MF = *MBB.getParent(); | |||
| 433 | unsigned CFIIndex = MF.addFrameInst(CFIInst); | |||
| 434 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) | |||
| 435 | .addCFIIndex(CFIIndex); | |||
| 436 | } | |||
| 437 | ||||
| 438 | /// Emits Dwarf Info specifying offsets of callee saved registers and | |||
| 439 | /// frame pointer. This is called only when basic block sections are enabled. | |||
| 440 | void X86FrameLowering::emitCalleeSavedFrameMoves( | |||
| 441 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const { | |||
| 442 | MachineFunction &MF = *MBB.getParent(); | |||
| 443 | if (!hasFP(MF)) { | |||
| 444 | emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); | |||
| 445 | return; | |||
| 446 | } | |||
| 447 | const MachineModuleInfo &MMI = MF.getMMI(); | |||
| 448 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); | |||
| 449 | const Register FramePtr = TRI->getFrameRegister(MF); | |||
| 450 | const Register MachineFramePtr = | |||
| 451 | STI.isTarget64BitILP32() ? Register(getX86SubSuperRegister(FramePtr, 64)) | |||
| 452 | : FramePtr; | |||
| 453 | unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true); | |||
| 454 | // Offset = space for return address + size of the frame pointer itself. | |||
| 455 | unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4); | |||
| 456 | BuildCFI(MBB, MBBI, DebugLoc{}, | |||
| 457 | MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset)); | |||
| 458 | emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true); | |||
| 459 | } | |||
| 460 | ||||
| 461 | void X86FrameLowering::emitCalleeSavedFrameMoves( | |||
| 462 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | |||
| 463 | const DebugLoc &DL, bool IsPrologue) const { | |||
| 464 | MachineFunction &MF = *MBB.getParent(); | |||
| 465 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 466 | MachineModuleInfo &MMI = MF.getMMI(); | |||
| 467 | const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); | |||
| 468 | ||||
| 469 | // Add callee saved registers to move list. | |||
| 470 | const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); | |||
| 471 | if (CSI.empty()) return; | |||
| 472 | ||||
| 473 | // Calculate offsets. | |||
| 474 | for (std::vector<CalleeSavedInfo>::const_iterator | |||
| 475 | I = CSI.begin(), E = CSI.end(); I != E; ++I) { | |||
| 476 | int64_t Offset = MFI.getObjectOffset(I->getFrameIdx()); | |||
| 477 | unsigned Reg = I->getReg(); | |||
| 478 | unsigned DwarfReg = MRI->getDwarfRegNum(Reg, true); | |||
| 479 | ||||
| 480 | if (IsPrologue) { | |||
| 481 | BuildCFI(MBB, MBBI, DL, | |||
| 482 | MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); | |||
| 483 | } else { | |||
| 484 | BuildCFI(MBB, MBBI, DL, | |||
| 485 | MCCFIInstruction::createRestore(nullptr, DwarfReg)); | |||
| 486 | } | |||
| 487 | } | |||
| 488 | } | |||
| 489 | ||||
| 490 | void X86FrameLowering::emitStackProbe(MachineFunction &MF, | |||
| 491 | MachineBasicBlock &MBB, | |||
| 492 | MachineBasicBlock::iterator MBBI, | |||
| 493 | const DebugLoc &DL, bool InProlog) const { | |||
| 494 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 495 | if (STI.isTargetWindowsCoreCLR()) { | |||
| 496 | if (InProlog) { | |||
| 497 | BuildMI(MBB, MBBI, DL, TII.get(X86::STACKALLOC_W_PROBING)) | |||
| 498 | .addImm(0 /* no explicit stack size */); | |||
| 499 | } else { | |||
| 500 | emitStackProbeInline(MF, MBB, MBBI, DL, false); | |||
| 501 | } | |||
| 502 | } else { | |||
| 503 | emitStackProbeCall(MF, MBB, MBBI, DL, InProlog); | |||
| 504 | } | |||
| 505 | } | |||
| 506 | ||||
| 507 | void X86FrameLowering::inlineStackProbe(MachineFunction &MF, | |||
| 508 | MachineBasicBlock &PrologMBB) const { | |||
| 509 | auto Where = llvm::find_if(PrologMBB, [](MachineInstr &MI) { | |||
| 510 | return MI.getOpcode() == X86::STACKALLOC_W_PROBING; | |||
| 511 | }); | |||
| 512 | if (Where != PrologMBB.end()) { | |||
| 513 | DebugLoc DL = PrologMBB.findDebugLoc(Where); | |||
| 514 | emitStackProbeInline(MF, PrologMBB, Where, DL, true); | |||
| 515 | Where->eraseFromParent(); | |||
| 516 | } | |||
| 517 | } | |||
| 518 | ||||
| 519 | void X86FrameLowering::emitStackProbeInline(MachineFunction &MF, | |||
| 520 | MachineBasicBlock &MBB, | |||
| 521 | MachineBasicBlock::iterator MBBI, | |||
| 522 | const DebugLoc &DL, | |||
| 523 | bool InProlog) const { | |||
| 524 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 525 | if (STI.isTargetWindowsCoreCLR() && STI.is64Bit()) | |||
| 526 | emitStackProbeInlineWindowsCoreCLR64(MF, MBB, MBBI, DL, InProlog); | |||
| 527 | else | |||
| 528 | emitStackProbeInlineGeneric(MF, MBB, MBBI, DL, InProlog); | |||
| 529 | } | |||
| 530 | ||||
| 531 | void X86FrameLowering::emitStackProbeInlineGeneric( | |||
| 532 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
| 533 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { | |||
| 534 | MachineInstr &AllocWithProbe = *MBBI; | |||
| 535 | uint64_t Offset = AllocWithProbe.getOperand(0).getImm(); | |||
| 536 | ||||
| 537 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 538 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
| 539 | assert(!(STI.is64Bit() && STI.isTargetWindowsCoreCLR()) &&((void)0) | |||
| 540 | "different expansion expected for CoreCLR 64 bit")((void)0); | |||
| 541 | ||||
| 542 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
| 543 | uint64_t ProbeChunk = StackProbeSize * 8; | |||
| 544 | ||||
| 545 | uint64_t MaxAlign = | |||
| 546 | TRI->hasStackRealignment(MF) ? calculateMaxStackAlign(MF) : 0; | |||
| 547 | ||||
| 548 | // Synthesize a loop or unroll it, depending on the number of iterations. | |||
| 549 | // BuildStackAlignAND ensures that only MaxAlign % StackProbeSize bits left | |||
| 550 | // between the unaligned rsp and current rsp. | |||
| 551 | if (Offset > ProbeChunk) { | |||
| 552 | emitStackProbeInlineGenericLoop(MF, MBB, MBBI, DL, Offset, | |||
| 553 | MaxAlign % StackProbeSize); | |||
| 554 | } else { | |||
| 555 | emitStackProbeInlineGenericBlock(MF, MBB, MBBI, DL, Offset, | |||
| 556 | MaxAlign % StackProbeSize); | |||
| 557 | } | |||
| 558 | } | |||
| 559 | ||||
| 560 | void X86FrameLowering::emitStackProbeInlineGenericBlock( | |||
| 561 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
| 562 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, | |||
| 563 | uint64_t AlignOffset) const { | |||
| 564 | ||||
| 565 | const bool NeedsDwarfCFI = needsDwarfCFI(MF); | |||
| 566 | const bool HasFP = hasFP(MF); | |||
| 567 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 568 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
| 569 | const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, Offset); | |||
| 570 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; | |||
| 571 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
| 572 | ||||
| 573 | uint64_t CurrentOffset = 0; | |||
| 574 | ||||
| 575 | assert(AlignOffset < StackProbeSize)((void)0); | |||
| 576 | ||||
| 577 | // If the offset is so small it fits within a page, there's nothing to do. | |||
| 578 | if (StackProbeSize < Offset + AlignOffset) { | |||
| 579 | ||||
| 580 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
| 581 | .addReg(StackPtr) | |||
| 582 | .addImm(StackProbeSize - AlignOffset) | |||
| 583 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 584 | if (!HasFP && NeedsDwarfCFI) { | |||
| 585 | BuildCFI(MBB, MBBI, DL, | |||
| 586 | MCCFIInstruction::createAdjustCfaOffset( | |||
| 587 | nullptr, StackProbeSize - AlignOffset)); | |||
| 588 | } | |||
| 589 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 590 | ||||
| 591 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) | |||
| 592 | .setMIFlag(MachineInstr::FrameSetup), | |||
| 593 | StackPtr, false, 0) | |||
| 594 | .addImm(0) | |||
| 595 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 596 | NumFrameExtraProbe++; | |||
| 597 | CurrentOffset = StackProbeSize - AlignOffset; | |||
| 598 | } | |||
| 599 | ||||
| 600 | // For the next N - 1 pages, just probe. I tried to take advantage of | |||
| 601 | // natural probes but it implies much more logic and there was very few | |||
| 602 | // interesting natural probes to interleave. | |||
| 603 | while (CurrentOffset + StackProbeSize < Offset) { | |||
| 604 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
| 605 | .addReg(StackPtr) | |||
| 606 | .addImm(StackProbeSize) | |||
| 607 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 608 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 609 | ||||
| 610 | if (!HasFP && NeedsDwarfCFI) { | |||
| 611 | BuildCFI( | |||
| 612 | MBB, MBBI, DL, | |||
| 613 | MCCFIInstruction::createAdjustCfaOffset(nullptr, StackProbeSize)); | |||
| 614 | } | |||
| 615 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) | |||
| 616 | .setMIFlag(MachineInstr::FrameSetup), | |||
| 617 | StackPtr, false, 0) | |||
| 618 | .addImm(0) | |||
| 619 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 620 | NumFrameExtraProbe++; | |||
| 621 | CurrentOffset += StackProbeSize; | |||
| 622 | } | |||
| 623 | ||||
| 624 | // No need to probe the tail, it is smaller than a Page. | |||
| 625 | uint64_t ChunkSize = Offset - CurrentOffset; | |||
| 626 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
| 627 | .addReg(StackPtr) | |||
| 628 | .addImm(ChunkSize) | |||
| 629 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 630 | // No need to adjust Dwarf CFA offset here, the last position of the stack has | |||
| 631 | // been defined | |||
| 632 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 633 | } | |||
| 634 | ||||
| 635 | void X86FrameLowering::emitStackProbeInlineGenericLoop( | |||
| 636 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
| 637 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, uint64_t Offset, | |||
| 638 | uint64_t AlignOffset) const { | |||
| 639 | assert(Offset && "null offset")((void)0); | |||
| 640 | ||||
| 641 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 642 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
| 643 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; | |||
| 644 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
| 645 | ||||
| 646 | if (AlignOffset) { | |||
| 647 | if (AlignOffset < StackProbeSize) { | |||
| 648 | // Perform a first smaller allocation followed by a probe. | |||
| 649 | const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, AlignOffset); | |||
| 650 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), StackPtr) | |||
| 651 | .addReg(StackPtr) | |||
| 652 | .addImm(AlignOffset) | |||
| 653 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 654 | MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead. | |||
| 655 | ||||
| 656 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MovMIOpc)) | |||
| 657 | .setMIFlag(MachineInstr::FrameSetup), | |||
| 658 | StackPtr, false, 0) | |||
| 659 | .addImm(0) | |||
| 660 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 661 | NumFrameExtraProbe++; | |||
| 662 | Offset -= AlignOffset; | |||
| 663 | } | |||
| 664 | } | |||
| 665 | ||||
| 666 | // Synthesize a loop | |||
| 667 | NumFrameLoopProbe++; | |||
| 668 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); | |||
| 669 | ||||
| 670 | MachineBasicBlock *testMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
| 671 | MachineBasicBlock *tailMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
| 672 | ||||
| 673 | MachineFunction::iterator MBBIter = ++MBB.getIterator(); | |||
| 674 | MF.insert(MBBIter, testMBB); | |||
| 675 | MF.insert(MBBIter, tailMBB); | |||
| 676 | ||||
| 677 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 | |||
| 678 | : Is64Bit ? X86::R11D | |||
| 679 | : X86::EAX; | |||
| 680 | BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) | |||
| 681 | .addReg(StackPtr) | |||
| 682 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 683 | ||||
| 684 | // save loop bound | |||
| 685 | { | |||
| 686 | const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, Offset); | |||
| 687 | BuildMI(MBB, MBBI, DL, TII.get(SUBOpc), FinalStackProbed) | |||
| 688 | .addReg(FinalStackProbed) | |||
| 689 | .addImm(Offset / StackProbeSize * StackProbeSize) | |||
| 690 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 691 | } | |||
| 692 | ||||
| 693 | // allocate a page | |||
| 694 | { | |||
| 695 | const unsigned SUBOpc = getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); | |||
| 696 | BuildMI(testMBB, DL, TII.get(SUBOpc), StackPtr) | |||
| 697 | .addReg(StackPtr) | |||
| 698 | .addImm(StackProbeSize) | |||
| 699 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 700 | } | |||
| 701 | ||||
| 702 | // touch the page | |||
| 703 | addRegOffset(BuildMI(testMBB, DL, TII.get(MovMIOpc)) | |||
| 704 | .setMIFlag(MachineInstr::FrameSetup), | |||
| 705 | StackPtr, false, 0) | |||
| 706 | .addImm(0) | |||
| 707 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 708 | ||||
| 709 | // cmp with stack pointer bound | |||
| 710 | BuildMI(testMBB, DL, TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
| 711 | .addReg(StackPtr) | |||
| 712 | .addReg(FinalStackProbed) | |||
| 713 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 714 | ||||
| 715 | // jump | |||
| 716 | BuildMI(testMBB, DL, TII.get(X86::JCC_1)) | |||
| 717 | .addMBB(testMBB) | |||
| 718 | .addImm(X86::COND_NE) | |||
| 719 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 720 | testMBB->addSuccessor(testMBB); | |||
| 721 | testMBB->addSuccessor(tailMBB); | |||
| 722 | ||||
| 723 | // BB management | |||
| 724 | tailMBB->splice(tailMBB->end(), &MBB, MBBI, MBB.end()); | |||
| 725 | tailMBB->transferSuccessorsAndUpdatePHIs(&MBB); | |||
| 726 | MBB.addSuccessor(testMBB); | |||
| 727 | ||||
| 728 | // handle tail | |||
| 729 | unsigned TailOffset = Offset % StackProbeSize; | |||
| 730 | if (TailOffset) { | |||
| 731 | const unsigned Opc = getSUBriOpcode(Uses64BitFramePtr, TailOffset); | |||
| 732 | BuildMI(*tailMBB, tailMBB->begin(), DL, TII.get(Opc), StackPtr) | |||
| 733 | .addReg(StackPtr) | |||
| 734 | .addImm(TailOffset) | |||
| 735 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 736 | } | |||
| 737 | ||||
| 738 | // Update Live In information | |||
| 739 | recomputeLiveIns(*testMBB); | |||
| 740 | recomputeLiveIns(*tailMBB); | |||
| 741 | } | |||
| 742 | ||||
| 743 | void X86FrameLowering::emitStackProbeInlineWindowsCoreCLR64( | |||
| 744 | MachineFunction &MF, MachineBasicBlock &MBB, | |||
| 745 | MachineBasicBlock::iterator MBBI, const DebugLoc &DL, bool InProlog) const { | |||
| 746 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 747 | assert(STI.is64Bit() && "different expansion needed for 32 bit")((void)0); | |||
| 748 | assert(STI.isTargetWindowsCoreCLR() && "custom expansion expects CoreCLR")((void)0); | |||
| 749 | const TargetInstrInfo &TII = *STI.getInstrInfo(); | |||
| 750 | const BasicBlock *LLVM_BB = MBB.getBasicBlock(); | |||
| 751 | ||||
| 752 | // RAX contains the number of bytes of desired stack adjustment. | |||
| 753 | // The handling here assumes this value has already been updated so as to | |||
| 754 | // maintain stack alignment. | |||
| 755 | // | |||
| 756 | // We need to exit with RSP modified by this amount and execute suitable | |||
| 757 | // page touches to notify the OS that we're growing the stack responsibly. | |||
| 758 | // All stack probing must be done without modifying RSP. | |||
| 759 | // | |||
| 760 | // MBB: | |||
| 761 | // SizeReg = RAX; | |||
| 762 | // ZeroReg = 0 | |||
| 763 | // CopyReg = RSP | |||
| 764 | // Flags, TestReg = CopyReg - SizeReg | |||
| 765 | // FinalReg = !Flags.Ovf ? TestReg : ZeroReg | |||
| 766 | // LimitReg = gs magic thread env access | |||
| 767 | // if FinalReg >= LimitReg goto ContinueMBB | |||
| 768 | // RoundBB: | |||
| 769 | // RoundReg = page address of FinalReg | |||
| 770 | // LoopMBB: | |||
| 771 | // LoopReg = PHI(LimitReg,ProbeReg) | |||
| 772 | // ProbeReg = LoopReg - PageSize | |||
| 773 | // [ProbeReg] = 0 | |||
| 774 | // if (ProbeReg > RoundReg) goto LoopMBB | |||
| 775 | // ContinueMBB: | |||
| 776 | // RSP = RSP - RAX | |||
| 777 | // [rest of original MBB] | |||
| 778 | ||||
| 779 | // Set up the new basic blocks | |||
| 780 | MachineBasicBlock *RoundMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
| 781 | MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
| 782 | MachineBasicBlock *ContinueMBB = MF.CreateMachineBasicBlock(LLVM_BB); | |||
| 783 | ||||
| 784 | MachineFunction::iterator MBBIter = std::next(MBB.getIterator()); | |||
| 785 | MF.insert(MBBIter, RoundMBB); | |||
| 786 | MF.insert(MBBIter, LoopMBB); | |||
| 787 | MF.insert(MBBIter, ContinueMBB); | |||
| 788 | ||||
| 789 | // Split MBB and move the tail portion down to ContinueMBB. | |||
| 790 | MachineBasicBlock::iterator BeforeMBBI = std::prev(MBBI); | |||
| 791 | ContinueMBB->splice(ContinueMBB->begin(), &MBB, MBBI, MBB.end()); | |||
| 792 | ContinueMBB->transferSuccessorsAndUpdatePHIs(&MBB); | |||
| 793 | ||||
| 794 | // Some useful constants | |||
| 795 | const int64_t ThreadEnvironmentStackLimit = 0x10; | |||
| 796 | const int64_t PageSize = 0x1000; | |||
| 797 | const int64_t PageMask = ~(PageSize - 1); | |||
| 798 | ||||
| 799 | // Registers we need. For the normal case we use virtual | |||
| 800 | // registers. For the prolog expansion we use RAX, RCX and RDX. | |||
| 801 | MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 802 | const TargetRegisterClass *RegClass = &X86::GR64RegClass; | |||
| 803 | const Register SizeReg = InProlog ? X86::RAX | |||
| 804 | : MRI.createVirtualRegister(RegClass), | |||
| 805 | ZeroReg = InProlog ? X86::RCX | |||
| 806 | : MRI.createVirtualRegister(RegClass), | |||
| 807 | CopyReg = InProlog ? X86::RDX | |||
| 808 | : MRI.createVirtualRegister(RegClass), | |||
| 809 | TestReg = InProlog ? X86::RDX | |||
| 810 | : MRI.createVirtualRegister(RegClass), | |||
| 811 | FinalReg = InProlog ? X86::RDX | |||
| 812 | : MRI.createVirtualRegister(RegClass), | |||
| 813 | RoundedReg = InProlog ? X86::RDX | |||
| 814 | : MRI.createVirtualRegister(RegClass), | |||
| 815 | LimitReg = InProlog ? X86::RCX | |||
| 816 | : MRI.createVirtualRegister(RegClass), | |||
| 817 | JoinReg = InProlog ? X86::RCX | |||
| 818 | : MRI.createVirtualRegister(RegClass), | |||
| 819 | ProbeReg = InProlog ? X86::RCX | |||
| 820 | : MRI.createVirtualRegister(RegClass); | |||
| 821 | ||||
| 822 | // SP-relative offsets where we can save RCX and RDX. | |||
| 823 | int64_t RCXShadowSlot = 0; | |||
| 824 | int64_t RDXShadowSlot = 0; | |||
| 825 | ||||
| 826 | // If inlining in the prolog, save RCX and RDX. | |||
| 827 | if (InProlog) { | |||
| 828 | // Compute the offsets. We need to account for things already | |||
| 829 | // pushed onto the stack at this point: return address, frame | |||
| 830 | // pointer (if used), and callee saves. | |||
| 831 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 832 | const int64_t CalleeSaveSize = X86FI->getCalleeSavedFrameSize(); | |||
| 833 | const bool HasFP = hasFP(MF); | |||
| 834 | ||||
| 835 | // Check if we need to spill RCX and/or RDX. | |||
| 836 | // Here we assume that no earlier prologue instruction changes RCX and/or | |||
| 837 | // RDX, so checking the block live-ins is enough. | |||
| 838 | const bool IsRCXLiveIn = MBB.isLiveIn(X86::RCX); | |||
| 839 | const bool IsRDXLiveIn = MBB.isLiveIn(X86::RDX); | |||
| 840 | int64_t InitSlot = 8 + CalleeSaveSize + (HasFP ? 8 : 0); | |||
| 841 | // Assign the initial slot to both registers, then change RDX's slot if both | |||
| 842 | // need to be spilled. | |||
| 843 | if (IsRCXLiveIn) | |||
| 844 | RCXShadowSlot = InitSlot; | |||
| 845 | if (IsRDXLiveIn) | |||
| 846 | RDXShadowSlot = InitSlot; | |||
| 847 | if (IsRDXLiveIn && IsRCXLiveIn) | |||
| 848 | RDXShadowSlot += 8; | |||
| 849 | // Emit the saves if needed. | |||
| 850 | if (IsRCXLiveIn) | |||
| 851 | addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, | |||
| 852 | RCXShadowSlot) | |||
| 853 | .addReg(X86::RCX); | |||
| 854 | if (IsRDXLiveIn) | |||
| 855 | addRegOffset(BuildMI(&MBB, DL, TII.get(X86::MOV64mr)), X86::RSP, false, | |||
| 856 | RDXShadowSlot) | |||
| 857 | .addReg(X86::RDX); | |||
| 858 | } else { | |||
| 859 | // Not in the prolog. Copy RAX to a virtual reg. | |||
| 860 | BuildMI(&MBB, DL, TII.get(X86::MOV64rr), SizeReg).addReg(X86::RAX); | |||
| 861 | } | |||
| 862 | ||||
| 863 | // Add code to MBB to check for overflow and set the new target stack pointer | |||
| 864 | // to zero if so. | |||
| 865 | BuildMI(&MBB, DL, TII.get(X86::XOR64rr), ZeroReg) | |||
| 866 | .addReg(ZeroReg, RegState::Undef) | |||
| 867 | .addReg(ZeroReg, RegState::Undef); | |||
| 868 | BuildMI(&MBB, DL, TII.get(X86::MOV64rr), CopyReg).addReg(X86::RSP); | |||
| 869 | BuildMI(&MBB, DL, TII.get(X86::SUB64rr), TestReg) | |||
| 870 | .addReg(CopyReg) | |||
| 871 | .addReg(SizeReg); | |||
| 872 | BuildMI(&MBB, DL, TII.get(X86::CMOV64rr), FinalReg) | |||
| 873 | .addReg(TestReg) | |||
| 874 | .addReg(ZeroReg) | |||
| 875 | .addImm(X86::COND_B); | |||
| 876 | ||||
| 877 | // FinalReg now holds final stack pointer value, or zero if | |||
| 878 | // allocation would overflow. Compare against the current stack | |||
| 879 | // limit from the thread environment block. Note this limit is the | |||
| 880 | // lowest touched page on the stack, not the point at which the OS | |||
| 881 | // will cause an overflow exception, so this is just an optimization | |||
| 882 | // to avoid unnecessarily touching pages that are below the current | |||
| 883 | // SP but already committed to the stack by the OS. | |||
| 884 | BuildMI(&MBB, DL, TII.get(X86::MOV64rm), LimitReg) | |||
| 885 | .addReg(0) | |||
| 886 | .addImm(1) | |||
| 887 | .addReg(0) | |||
| 888 | .addImm(ThreadEnvironmentStackLimit) | |||
| 889 | .addReg(X86::GS); | |||
| 890 | BuildMI(&MBB, DL, TII.get(X86::CMP64rr)).addReg(FinalReg).addReg(LimitReg); | |||
| 891 | // Jump if the desired stack pointer is at or above the stack limit. | |||
| 892 | BuildMI(&MBB, DL, TII.get(X86::JCC_1)).addMBB(ContinueMBB).addImm(X86::COND_AE); | |||
| 893 | ||||
| 894 | // Add code to roundMBB to round the final stack pointer to a page boundary. | |||
| 895 | RoundMBB->addLiveIn(FinalReg); | |||
| 896 | BuildMI(RoundMBB, DL, TII.get(X86::AND64ri32), RoundedReg) | |||
| 897 | .addReg(FinalReg) | |||
| 898 | .addImm(PageMask); | |||
| 899 | BuildMI(RoundMBB, DL, TII.get(X86::JMP_1)).addMBB(LoopMBB); | |||
| 900 | ||||
| 901 | // LimitReg now holds the current stack limit, RoundedReg page-rounded | |||
| 902 | // final RSP value. Add code to loopMBB to decrement LimitReg page-by-page | |||
| 903 | // and probe until we reach RoundedReg. | |||
| 904 | if (!InProlog) { | |||
| 905 | BuildMI(LoopMBB, DL, TII.get(X86::PHI), JoinReg) | |||
| 906 | .addReg(LimitReg) | |||
| 907 | .addMBB(RoundMBB) | |||
| 908 | .addReg(ProbeReg) | |||
| 909 | .addMBB(LoopMBB); | |||
| 910 | } | |||
| 911 | ||||
| 912 | LoopMBB->addLiveIn(JoinReg); | |||
| 913 | addRegOffset(BuildMI(LoopMBB, DL, TII.get(X86::LEA64r), ProbeReg), JoinReg, | |||
| 914 | false, -PageSize); | |||
| 915 | ||||
| 916 | // Probe by storing a byte onto the stack. | |||
| 917 | BuildMI(LoopMBB, DL, TII.get(X86::MOV8mi)) | |||
| 918 | .addReg(ProbeReg) | |||
| 919 | .addImm(1) | |||
| 920 | .addReg(0) | |||
| 921 | .addImm(0) | |||
| 922 | .addReg(0) | |||
| 923 | .addImm(0); | |||
| 924 | ||||
| 925 | LoopMBB->addLiveIn(RoundedReg); | |||
| 926 | BuildMI(LoopMBB, DL, TII.get(X86::CMP64rr)) | |||
| 927 | .addReg(RoundedReg) | |||
| 928 | .addReg(ProbeReg); | |||
| 929 | BuildMI(LoopMBB, DL, TII.get(X86::JCC_1)).addMBB(LoopMBB).addImm(X86::COND_NE); | |||
| 930 | ||||
| 931 | MachineBasicBlock::iterator ContinueMBBI = ContinueMBB->getFirstNonPHI(); | |||
| 932 | ||||
| 933 | // If in prolog, restore RDX and RCX. | |||
| 934 | if (InProlog) { | |||
| 935 | if (RCXShadowSlot) // It means we spilled RCX in the prologue. | |||
| 936 | addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, | |||
| 937 | TII.get(X86::MOV64rm), X86::RCX), | |||
| 938 | X86::RSP, false, RCXShadowSlot); | |||
| 939 | if (RDXShadowSlot) // It means we spilled RDX in the prologue. | |||
| 940 | addRegOffset(BuildMI(*ContinueMBB, ContinueMBBI, DL, | |||
| 941 | TII.get(X86::MOV64rm), X86::RDX), | |||
| 942 | X86::RSP, false, RDXShadowSlot); | |||
| 943 | } | |||
| 944 | ||||
| 945 | // Now that the probing is done, add code to continueMBB to update | |||
| 946 | // the stack pointer for real. | |||
| 947 | ContinueMBB->addLiveIn(SizeReg); | |||
| 948 | BuildMI(*ContinueMBB, ContinueMBBI, DL, TII.get(X86::SUB64rr), X86::RSP) | |||
| 949 | .addReg(X86::RSP) | |||
| 950 | .addReg(SizeReg); | |||
| 951 | ||||
| 952 | // Add the control flow edges we need. | |||
| 953 | MBB.addSuccessor(ContinueMBB); | |||
| 954 | MBB.addSuccessor(RoundMBB); | |||
| 955 | RoundMBB->addSuccessor(LoopMBB); | |||
| 956 | LoopMBB->addSuccessor(ContinueMBB); | |||
| 957 | LoopMBB->addSuccessor(LoopMBB); | |||
| 958 | ||||
| 959 | // Mark all the instructions added to the prolog as frame setup. | |||
| 960 | if (InProlog) { | |||
| 961 | for (++BeforeMBBI; BeforeMBBI != MBB.end(); ++BeforeMBBI) { | |||
| 962 | BeforeMBBI->setFlag(MachineInstr::FrameSetup); | |||
| 963 | } | |||
| 964 | for (MachineInstr &MI : *RoundMBB) { | |||
| 965 | MI.setFlag(MachineInstr::FrameSetup); | |||
| 966 | } | |||
| 967 | for (MachineInstr &MI : *LoopMBB) { | |||
| 968 | MI.setFlag(MachineInstr::FrameSetup); | |||
| 969 | } | |||
| 970 | for (MachineBasicBlock::iterator CMBBI = ContinueMBB->begin(); | |||
| 971 | CMBBI != ContinueMBBI; ++CMBBI) { | |||
| 972 | CMBBI->setFlag(MachineInstr::FrameSetup); | |||
| 973 | } | |||
| 974 | } | |||
| 975 | } | |||
| 976 | ||||
| 977 | void X86FrameLowering::emitStackProbeCall(MachineFunction &MF, | |||
| 978 | MachineBasicBlock &MBB, | |||
| 979 | MachineBasicBlock::iterator MBBI, | |||
| 980 | const DebugLoc &DL, | |||
| 981 | bool InProlog) const { | |||
| 982 | bool IsLargeCodeModel = MF.getTarget().getCodeModel() == CodeModel::Large; | |||
| 983 | ||||
| 984 | // FIXME: Add indirect thunk support and remove this. | |||
| 985 | if (Is64Bit && IsLargeCodeModel && STI.useIndirectThunkCalls()) | |||
| 986 | report_fatal_error("Emitting stack probe calls on 64-bit with the large " | |||
| 987 | "code model and indirect thunks not yet implemented."); | |||
| 988 | ||||
| 989 | unsigned CallOp; | |||
| 990 | if (Is64Bit) | |||
| 991 | CallOp = IsLargeCodeModel ? X86::CALL64r : X86::CALL64pcrel32; | |||
| 992 | else | |||
| 993 | CallOp = X86::CALLpcrel32; | |||
| 994 | ||||
| 995 | StringRef Symbol = STI.getTargetLowering()->getStackProbeSymbolName(MF); | |||
| 996 | ||||
| 997 | MachineInstrBuilder CI; | |||
| 998 | MachineBasicBlock::iterator ExpansionMBBI = std::prev(MBBI); | |||
| 999 | ||||
| 1000 | // All current stack probes take AX and SP as input, clobber flags, and | |||
| 1001 | // preserve all registers. x86_64 probes leave RSP unmodified. | |||
| 1002 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { | |||
| 1003 | // For the large code model, we have to call through a register. Use R11, | |||
| 1004 | // as it is scratch in all supported calling conventions. | |||
| 1005 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::R11) | |||
| 1006 | .addExternalSymbol(MF.createExternalSymbolName(Symbol)); | |||
| 1007 | CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)).addReg(X86::R11); | |||
| 1008 | } else { | |||
| 1009 | CI = BuildMI(MBB, MBBI, DL, TII.get(CallOp)) | |||
| 1010 | .addExternalSymbol(MF.createExternalSymbolName(Symbol)); | |||
| 1011 | } | |||
| 1012 | ||||
| 1013 | unsigned AX = Uses64BitFramePtr ? X86::RAX : X86::EAX; | |||
| 1014 | unsigned SP = Uses64BitFramePtr ? X86::RSP : X86::ESP; | |||
| 1015 | CI.addReg(AX, RegState::Implicit) | |||
| 1016 | .addReg(SP, RegState::Implicit) | |||
| 1017 | .addReg(AX, RegState::Define | RegState::Implicit) | |||
| 1018 | .addReg(SP, RegState::Define | RegState::Implicit) | |||
| 1019 | .addReg(X86::EFLAGS, RegState::Define | RegState::Implicit); | |||
| 1020 | ||||
| 1021 | if (STI.isTargetWin64() || !STI.isOSWindows()) { | |||
| 1022 | // MSVC x32's _chkstk and cygwin/mingw's _alloca adjust %esp themselves. | |||
| 1023 | // MSVC x64's __chkstk and cygwin/mingw's ___chkstk_ms do not adjust %rsp | |||
| 1024 | // themselves. They also does not clobber %rax so we can reuse it when | |||
| 1025 | // adjusting %rsp. | |||
| 1026 | // All other platforms do not specify a particular ABI for the stack probe | |||
| 1027 | // function, so we arbitrarily define it to not adjust %esp/%rsp itself. | |||
| 1028 | BuildMI(MBB, MBBI, DL, TII.get(getSUBrrOpcode(Uses64BitFramePtr)), SP) | |||
| 1029 | .addReg(SP) | |||
| 1030 | .addReg(AX); | |||
| 1031 | } | |||
| 1032 | ||||
| 1033 | if (InProlog) { | |||
| 1034 | // Apply the frame setup flag to all inserted instrs. | |||
| 1035 | for (++ExpansionMBBI; ExpansionMBBI != MBBI; ++ExpansionMBBI) | |||
| 1036 | ExpansionMBBI->setFlag(MachineInstr::FrameSetup); | |||
| 1037 | } | |||
| 1038 | } | |||
| 1039 | ||||
| 1040 | static unsigned calculateSetFPREG(uint64_t SPAdjust) { | |||
| 1041 | // Win64 ABI has a less restrictive limitation of 240; 128 works equally well | |||
| 1042 | // and might require smaller successive adjustments. | |||
| 1043 | const uint64_t Win64MaxSEHOffset = 128; | |||
| 1044 | uint64_t SEHFrameOffset = std::min(SPAdjust, Win64MaxSEHOffset); | |||
| 1045 | // Win64 ABI requires 16-byte alignment for the UWOP_SET_FPREG opcode. | |||
| 1046 | return SEHFrameOffset & -16; | |||
| 1047 | } | |||
| 1048 | ||||
| 1049 | // If we're forcing a stack realignment we can't rely on just the frame | |||
| 1050 | // info, we need to know the ABI stack alignment as well in case we | |||
| 1051 | // have a call out. Otherwise just make sure we have some alignment - we'll | |||
| 1052 | // go with the minimum SlotSize. | |||
| 1053 | uint64_t X86FrameLowering::calculateMaxStackAlign(const MachineFunction &MF) const { | |||
| 1054 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 1055 | Align MaxAlign = MFI.getMaxAlign(); // Desired stack alignment. | |||
| 1056 | Align StackAlign = getStackAlign(); | |||
| 1057 | if (MF.getFunction().hasFnAttribute("stackrealign")) { | |||
| 1058 | if (MFI.hasCalls()) | |||
| 1059 | MaxAlign = (StackAlign > MaxAlign) ? StackAlign : MaxAlign; | |||
| 1060 | else if (MaxAlign < SlotSize) | |||
| 1061 | MaxAlign = Align(SlotSize); | |||
| 1062 | } | |||
| 1063 | return MaxAlign.value(); | |||
| 1064 | } | |||
| 1065 | ||||
| 1066 | void X86FrameLowering::BuildStackAlignAND(MachineBasicBlock &MBB, | |||
| 1067 | MachineBasicBlock::iterator MBBI, | |||
| 1068 | const DebugLoc &DL, unsigned Reg, | |||
| 1069 | uint64_t MaxAlign) const { | |||
| 1070 | uint64_t Val = -MaxAlign; | |||
| 1071 | unsigned AndOp = getANDriOpcode(Uses64BitFramePtr, Val); | |||
| 1072 | ||||
| 1073 | MachineFunction &MF = *MBB.getParent(); | |||
| 1074 | const X86Subtarget &STI = MF.getSubtarget<X86Subtarget>(); | |||
| 1075 | const X86TargetLowering &TLI = *STI.getTargetLowering(); | |||
| 1076 | const uint64_t StackProbeSize = TLI.getStackProbeSize(MF); | |||
| 1077 | const bool EmitInlineStackProbe = TLI.hasInlineStackProbe(MF); | |||
| 1078 | ||||
| 1079 | // We want to make sure that (in worst case) less than StackProbeSize bytes | |||
| 1080 | // are not probed after the AND. This assumption is used in | |||
| 1081 | // emitStackProbeInlineGeneric. | |||
| 1082 | if (Reg == StackPtr && EmitInlineStackProbe && MaxAlign >= StackProbeSize) { | |||
| 1083 | { | |||
| 1084 | NumFrameLoopProbe++; | |||
| 1085 | MachineBasicBlock *entryMBB = | |||
| 1086 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
| 1087 | MachineBasicBlock *headMBB = | |||
| 1088 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
| 1089 | MachineBasicBlock *bodyMBB = | |||
| 1090 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
| 1091 | MachineBasicBlock *footMBB = | |||
| 1092 | MF.CreateMachineBasicBlock(MBB.getBasicBlock()); | |||
| 1093 | ||||
| 1094 | MachineFunction::iterator MBBIter = MBB.getIterator(); | |||
| 1095 | MF.insert(MBBIter, entryMBB); | |||
| 1096 | MF.insert(MBBIter, headMBB); | |||
| 1097 | MF.insert(MBBIter, bodyMBB); | |||
| 1098 | MF.insert(MBBIter, footMBB); | |||
| 1099 | const unsigned MovMIOpc = Is64Bit ? X86::MOV64mi32 : X86::MOV32mi; | |||
| 1100 | Register FinalStackProbed = Uses64BitFramePtr ? X86::R11 | |||
| 1101 | : Is64Bit ? X86::R11D | |||
| 1102 | : X86::EAX; | |||
| 1103 | ||||
| 1104 | // Setup entry block | |||
| 1105 | { | |||
| 1106 | ||||
| 1107 | entryMBB->splice(entryMBB->end(), &MBB, MBB.begin(), MBBI); | |||
| 1108 | BuildMI(entryMBB, DL, TII.get(TargetOpcode::COPY), FinalStackProbed) | |||
| 1109 | .addReg(StackPtr) | |||
| 1110 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1111 | MachineInstr *MI = | |||
| 1112 | BuildMI(entryMBB, DL, TII.get(AndOp), FinalStackProbed) | |||
| 1113 | .addReg(FinalStackProbed) | |||
| 1114 | .addImm(Val) | |||
| 1115 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1116 | ||||
| 1117 | // The EFLAGS implicit def is dead. | |||
| 1118 | MI->getOperand(3).setIsDead(); | |||
| 1119 | ||||
| 1120 | BuildMI(entryMBB, DL, | |||
| 1121 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
| 1122 | .addReg(FinalStackProbed) | |||
| 1123 | .addReg(StackPtr) | |||
| 1124 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1125 | BuildMI(entryMBB, DL, TII.get(X86::JCC_1)) | |||
| 1126 | .addMBB(&MBB) | |||
| 1127 | .addImm(X86::COND_E) | |||
| 1128 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1129 | entryMBB->addSuccessor(headMBB); | |||
| 1130 | entryMBB->addSuccessor(&MBB); | |||
| 1131 | } | |||
| 1132 | ||||
| 1133 | // Loop entry block | |||
| 1134 | ||||
| 1135 | { | |||
| 1136 | const unsigned SUBOpc = | |||
| 1137 | getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); | |||
| 1138 | BuildMI(headMBB, DL, TII.get(SUBOpc), StackPtr) | |||
| 1139 | .addReg(StackPtr) | |||
| 1140 | .addImm(StackProbeSize) | |||
| 1141 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1142 | ||||
| 1143 | BuildMI(headMBB, DL, | |||
| 1144 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
| 1145 | .addReg(FinalStackProbed) | |||
| 1146 | .addReg(StackPtr) | |||
| 1147 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1148 | ||||
| 1149 | // jump | |||
| 1150 | BuildMI(headMBB, DL, TII.get(X86::JCC_1)) | |||
| 1151 | .addMBB(footMBB) | |||
| 1152 | .addImm(X86::COND_B) | |||
| 1153 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1154 | ||||
| 1155 | headMBB->addSuccessor(bodyMBB); | |||
| 1156 | headMBB->addSuccessor(footMBB); | |||
| 1157 | } | |||
| 1158 | ||||
| 1159 | // setup loop body | |||
| 1160 | { | |||
| 1161 | addRegOffset(BuildMI(bodyMBB, DL, TII.get(MovMIOpc)) | |||
| 1162 | .setMIFlag(MachineInstr::FrameSetup), | |||
| 1163 | StackPtr, false, 0) | |||
| 1164 | .addImm(0) | |||
| 1165 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1166 | ||||
| 1167 | const unsigned SUBOpc = | |||
| 1168 | getSUBriOpcode(Uses64BitFramePtr, StackProbeSize); | |||
| 1169 | BuildMI(bodyMBB, DL, TII.get(SUBOpc), StackPtr) | |||
| 1170 | .addReg(StackPtr) | |||
| 1171 | .addImm(StackProbeSize) | |||
| 1172 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1173 | ||||
| 1174 | // cmp with stack pointer bound | |||
| 1175 | BuildMI(bodyMBB, DL, | |||
| 1176 | TII.get(Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) | |||
| 1177 | .addReg(FinalStackProbed) | |||
| 1178 | .addReg(StackPtr) | |||
| 1179 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1180 | ||||
| 1181 | // jump | |||
| 1182 | BuildMI(bodyMBB, DL, TII.get(X86::JCC_1)) | |||
| 1183 | .addMBB(bodyMBB) | |||
| 1184 | .addImm(X86::COND_B) | |||
| 1185 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1186 | bodyMBB->addSuccessor(bodyMBB); | |||
| 1187 | bodyMBB->addSuccessor(footMBB); | |||
| 1188 | } | |||
| 1189 | ||||
| 1190 | // setup loop footer | |||
| 1191 | { | |||
| 1192 | BuildMI(footMBB, DL, TII.get(TargetOpcode::COPY), StackPtr) | |||
| 1193 | .addReg(FinalStackProbed) | |||
| 1194 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1195 | addRegOffset(BuildMI(footMBB, DL, TII.get(MovMIOpc)) | |||
| 1196 | .setMIFlag(MachineInstr::FrameSetup), | |||
| 1197 | StackPtr, false, 0) | |||
| 1198 | .addImm(0) | |||
| 1199 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1200 | footMBB->addSuccessor(&MBB); | |||
| 1201 | } | |||
| 1202 | ||||
| 1203 | recomputeLiveIns(*headMBB); | |||
| 1204 | recomputeLiveIns(*bodyMBB); | |||
| 1205 | recomputeLiveIns(*footMBB); | |||
| 1206 | recomputeLiveIns(MBB); | |||
| 1207 | } | |||
| 1208 | } else { | |||
| 1209 | MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AndOp), Reg) | |||
| 1210 | .addReg(Reg) | |||
| 1211 | .addImm(Val) | |||
| 1212 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1213 | ||||
| 1214 | // The EFLAGS implicit def is dead. | |||
| 1215 | MI->getOperand(3).setIsDead(); | |||
| 1216 | } | |||
| 1217 | } | |||
| 1218 | ||||
| 1219 | // FIXME: Get this from tablegen. | |||
| 1220 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, | |||
| 1221 | const X86Subtarget &Subtarget) { | |||
| 1222 | assert(Subtarget.is64Bit())((void)0); | |||
| 1223 | ||||
| 1224 | if (Subtarget.isCallingConvWin64(CallConv)) { | |||
| 1225 | static const MCPhysReg GPR64ArgRegsWin64[] = { | |||
| 1226 | X86::RCX, X86::RDX, X86::R8, X86::R9 | |||
| 1227 | }; | |||
| 1228 | return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); | |||
| 1229 | } | |||
| 1230 | ||||
| 1231 | static const MCPhysReg GPR64ArgRegs64Bit[] = { | |||
| 1232 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 | |||
| 1233 | }; | |||
| 1234 | return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); | |||
| 1235 | } | |||
| 1236 | ||||
| 1237 | bool X86FrameLowering::has128ByteRedZone(const MachineFunction& MF) const { | |||
| 1238 | // x86-64 (non Win64) has a 128 byte red zone which is guaranteed not to be | |||
| 1239 | // clobbered by any interrupt handler. | |||
| 1240 | assert(&STI == &MF.getSubtarget<X86Subtarget>() &&((void)0) | |||
| 1241 | "MF used frame lowering for wrong subtarget")((void)0); | |||
| 1242 | const Function &Fn = MF.getFunction(); | |||
| 1243 | const bool IsWin64CC = STI.isCallingConvWin64(Fn.getCallingConv()); | |||
| 1244 | return Is64Bit && !IsWin64CC && !Fn.hasFnAttribute(Attribute::NoRedZone); | |||
| 1245 | } | |||
| 1246 | ||||
| 1247 | bool X86FrameLowering::isWin64Prologue(const MachineFunction &MF) const { | |||
| 1248 | return MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
| 1249 | } | |||
| 1250 | ||||
| 1251 | bool X86FrameLowering::needsDwarfCFI(const MachineFunction &MF) const { | |||
| 1252 | return !isWin64Prologue(MF) && MF.needsFrameMoves(); | |||
| 1253 | } | |||
| 1254 | ||||
| 1255 | /// emitPrologue - Push callee-saved registers onto the stack, which | |||
| 1256 | /// automatically adjust the stack pointer. Adjust the stack pointer to allocate | |||
| 1257 | /// space for local variables. Also emit labels used by the exception handler to | |||
| 1258 | /// generate the exception handling frames. | |||
| 1259 | ||||
| 1260 | /* | |||
| 1261 | Here's a gist of what gets emitted: | |||
| 1262 | ||||
| 1263 | ; Establish frame pointer, if needed | |||
| 1264 | [if needs FP] | |||
| 1265 | push %rbp | |||
| 1266 | .cfi_def_cfa_offset 16 | |||
| 1267 | .cfi_offset %rbp, -16 | |||
| 1268 | .seh_pushreg %rpb | |||
| 1269 | mov %rsp, %rbp | |||
| 1270 | .cfi_def_cfa_register %rbp | |||
| 1271 | ||||
| 1272 | ; Spill general-purpose registers | |||
| 1273 | [for all callee-saved GPRs] | |||
| 1274 | pushq %<reg> | |||
| 1275 | [if not needs FP] | |||
| 1276 | .cfi_def_cfa_offset (offset from RETADDR) | |||
| 1277 | .seh_pushreg %<reg> | |||
| 1278 | ||||
| 1279 | ; If the required stack alignment > default stack alignment | |||
| 1280 | ; rsp needs to be re-aligned. This creates a "re-alignment gap" | |||
| 1281 | ; of unknown size in the stack frame. | |||
| 1282 | [if stack needs re-alignment] | |||
| 1283 | and $MASK, %rsp | |||
| 1284 | ||||
| 1285 | ; Allocate space for locals | |||
| 1286 | [if target is Windows and allocated space > 4096 bytes] | |||
| 1287 | ; Windows needs special care for allocations larger | |||
| 1288 | ; than one page. | |||
| 1289 | mov $NNN, %rax | |||
| 1290 | call ___chkstk_ms/___chkstk | |||
| 1291 | sub %rax, %rsp | |||
| 1292 | [else] | |||
| 1293 | sub $NNN, %rsp | |||
| 1294 | ||||
| 1295 | [if needs FP] | |||
| 1296 | .seh_stackalloc (size of XMM spill slots) | |||
| 1297 | .seh_setframe %rbp, SEHFrameOffset ; = size of all spill slots | |||
| 1298 | [else] | |||
| 1299 | .seh_stackalloc NNN | |||
| 1300 | ||||
| 1301 | ; Spill XMMs | |||
| 1302 | ; Note, that while only Windows 64 ABI specifies XMMs as callee-preserved, | |||
| 1303 | ; they may get spilled on any platform, if the current function | |||
| 1304 | ; calls @llvm.eh.unwind.init | |||
| 1305 | [if needs FP] | |||
| 1306 | [for all callee-saved XMM registers] | |||
| 1307 | movaps %<xmm reg>, -MMM(%rbp) | |||
| 1308 | [for all callee-saved XMM registers] | |||
| 1309 | .seh_savexmm %<xmm reg>, (-MMM + SEHFrameOffset) | |||
| 1310 | ; i.e. the offset relative to (%rbp - SEHFrameOffset) | |||
| 1311 | [else] | |||
| 1312 | [for all callee-saved XMM registers] | |||
| 1313 | movaps %<xmm reg>, KKK(%rsp) | |||
| 1314 | [for all callee-saved XMM registers] | |||
| 1315 | .seh_savexmm %<xmm reg>, KKK | |||
| 1316 | ||||
| 1317 | .seh_endprologue | |||
| 1318 | ||||
| 1319 | [if needs base pointer] | |||
| 1320 | mov %rsp, %rbx | |||
| 1321 | [if needs to restore base pointer] | |||
| 1322 | mov %rsp, -MMM(%rbp) | |||
| 1323 | ||||
| 1324 | ; Emit CFI info | |||
| 1325 | [if needs FP] | |||
| 1326 | [for all callee-saved registers] | |||
| 1327 | .cfi_offset %<reg>, (offset from %rbp) | |||
| 1328 | [else] | |||
| 1329 | .cfi_def_cfa_offset (offset from RETADDR) | |||
| 1330 | [for all callee-saved registers] | |||
| 1331 | .cfi_offset %<reg>, (offset from %rsp) | |||
| 1332 | ||||
| 1333 | Notes: | |||
| 1334 | - .seh directives are emitted only for Windows 64 ABI | |||
| 1335 | - .cv_fpo directives are emitted on win32 when emitting CodeView | |||
| 1336 | - .cfi directives are emitted for all other ABIs | |||
| 1337 | - for 32-bit code, substitute %e?? registers for %r?? | |||
| 1338 | */ | |||
| 1339 | ||||
| 1340 | void X86FrameLowering::emitPrologue(MachineFunction &MF, | |||
| 1341 | MachineBasicBlock &MBB) const { | |||
| 1342 | assert(&STI == &MF.getSubtarget<X86Subtarget>() &&((void)0) | |||
| 1343 | "MF used frame lowering for wrong subtarget")((void)0); | |||
| 1344 | MachineBasicBlock::iterator MBBI = MBB.begin(); | |||
| 1345 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 1346 | const Function &Fn = MF.getFunction(); | |||
| 1347 | MachineModuleInfo &MMI = MF.getMMI(); | |||
| 1348 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 1349 | uint64_t MaxAlign = calculateMaxStackAlign(MF); // Desired stack alignment. | |||
| ||||
| 1350 | uint64_t StackSize = MFI.getStackSize(); // Number of bytes to allocate. | |||
| 1351 | bool IsFunclet = MBB.isEHFuncletEntry(); | |||
| 1352 | EHPersonality Personality = EHPersonality::Unknown; | |||
| 1353 | if (Fn.hasPersonalityFn()) | |||
| 1354 | Personality = classifyEHPersonality(Fn.getPersonalityFn()); | |||
| 1355 | bool FnHasClrFunclet = | |||
| 1356 | MF.hasEHFunclets() && Personality == EHPersonality::CoreCLR; | |||
| 1357 | bool IsClrFunclet = IsFunclet && FnHasClrFunclet; | |||
| 1358 | bool HasFP = hasFP(MF); | |||
| 1359 | bool IsWin64Prologue = isWin64Prologue(MF); | |||
| 1360 | bool NeedsWin64CFI = IsWin64Prologue && Fn.needsUnwindTableEntry(); | |||
| 1361 | // FIXME: Emit FPO data for EH funclets. | |||
| 1362 | bool NeedsWinFPO = | |||
| 1363 | !IsFunclet && STI.isTargetWin32() && MMI.getModule()->getCodeViewFlag(); | |||
| 1364 | bool NeedsWinCFI = NeedsWin64CFI || NeedsWinFPO; | |||
| 1365 | bool NeedsDwarfCFI = needsDwarfCFI(MF); | |||
| 1366 | Register FramePtr = TRI->getFrameRegister(MF); | |||
| 1367 | const Register MachineFramePtr = | |||
| 1368 | STI.isTarget64BitILP32() | |||
| 1369 | ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; | |||
| 1370 | Register BasePtr = TRI->getBaseRegister(); | |||
| 1371 | bool HasWinCFI = false; | |||
| 1372 | ||||
| 1373 | // Debug location must be unknown since the first debug location is used | |||
| 1374 | // to determine the end of the prologue. | |||
| 1375 | DebugLoc DL; | |||
| 1376 | ||||
| 1377 | // Add RETADDR move area to callee saved frame size. | |||
| 1378 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); | |||
| 1379 | if (TailCallReturnAddrDelta && IsWin64Prologue) | |||
| 1380 | report_fatal_error("Can't handle guaranteed tail call under win64 yet"); | |||
| 1381 | ||||
| 1382 | if (TailCallReturnAddrDelta < 0) | |||
| 1383 | X86FI->setCalleeSavedFrameSize( | |||
| 1384 | X86FI->getCalleeSavedFrameSize() - TailCallReturnAddrDelta); | |||
| 1385 | ||||
| 1386 | const bool EmitStackProbeCall = | |||
| 1387 | STI.getTargetLowering()->hasStackProbeSymbol(MF); | |||
| 1388 | unsigned StackProbeSize = STI.getTargetLowering()->getStackProbeSize(MF); | |||
| 1389 | ||||
| 1390 | if (HasFP && X86FI->hasSwiftAsyncContext()) { | |||
| 1391 | BuildMI(MBB, MBBI, DL, TII.get(X86::BTS64ri8), | |||
| 1392 | MachineFramePtr) | |||
| 1393 | .addUse(MachineFramePtr) | |||
| 1394 | .addImm(60) | |||
| 1395 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1396 | } | |||
| 1397 | ||||
| 1398 | // Re-align the stack on 64-bit if the x86-interrupt calling convention is | |||
| 1399 | // used and an error code was pushed, since the x86-64 ABI requires a 16-byte | |||
| 1400 | // stack alignment. | |||
| 1401 | if (Fn.getCallingConv() == CallingConv::X86_INTR && Is64Bit && | |||
| 1402 | Fn.arg_size() == 2) { | |||
| 1403 | StackSize += 8; | |||
| 1404 | MFI.setStackSize(StackSize); | |||
| 1405 | emitSPUpdate(MBB, MBBI, DL, -8, /*InEpilogue=*/false); | |||
| 1406 | } | |||
| 1407 | ||||
| 1408 | // If this is x86-64 and the Red Zone is not disabled, if we are a leaf | |||
| 1409 | // function, and use up to 128 bytes of stack space, don't have a frame | |||
| 1410 | // pointer, calls, or dynamic alloca then we do not need to adjust the | |||
| 1411 | // stack pointer (we fit in the Red Zone). We also check that we don't | |||
| 1412 | // push and pop from the stack. | |||
| 1413 | if (has128ByteRedZone(MF) && !TRI->hasStackRealignment(MF) && | |||
| 1414 | !MFI.hasVarSizedObjects() && // No dynamic alloca. | |||
| 1415 | !MFI.adjustsStack() && // No calls. | |||
| 1416 | !EmitStackProbeCall && // No stack probes. | |||
| 1417 | !MFI.hasCopyImplyingStackAdjustment() && // Don't push and pop. | |||
| 1418 | !MF.shouldSplitStack()) { // Regular stack | |||
| 1419 | uint64_t MinSize = X86FI->getCalleeSavedFrameSize(); | |||
| 1420 | if (HasFP) MinSize += SlotSize; | |||
| 1421 | X86FI->setUsesRedZone(MinSize > 0 || StackSize > 0); | |||
| 1422 | StackSize = std::max(MinSize, StackSize > 128 ? StackSize - 128 : 0); | |||
| 1423 | MFI.setStackSize(StackSize); | |||
| 1424 | } | |||
| 1425 | ||||
| 1426 | // Insert stack pointer adjustment for later moving of return addr. Only | |||
| 1427 | // applies to tail call optimized functions where the callee argument stack | |||
| 1428 | // size is bigger than the callers. | |||
| 1429 | if (TailCallReturnAddrDelta < 0) { | |||
| 1430 | BuildStackAdjustment(MBB, MBBI, DL, TailCallReturnAddrDelta, | |||
| 1431 | /*InEpilogue=*/false) | |||
| 1432 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1433 | } | |||
| 1434 | ||||
| 1435 | // Mapping for machine moves: | |||
| 1436 | // | |||
| 1437 | // DST: VirtualFP AND | |||
| 1438 | // SRC: VirtualFP => DW_CFA_def_cfa_offset | |||
| 1439 | // ELSE => DW_CFA_def_cfa | |||
| 1440 | // | |||
| 1441 | // SRC: VirtualFP AND | |||
| 1442 | // DST: Register => DW_CFA_def_cfa_register | |||
| 1443 | // | |||
| 1444 | // ELSE | |||
| 1445 | // OFFSET < 0 => DW_CFA_offset_extended_sf | |||
| 1446 | // REG < 64 => DW_CFA_offset + Reg | |||
| 1447 | // ELSE => DW_CFA_offset_extended | |||
| 1448 | ||||
| 1449 | uint64_t NumBytes = 0; | |||
| 1450 | int stackGrowth = -SlotSize; | |||
| 1451 | ||||
| 1452 | // Find the funclet establisher parameter | |||
| 1453 | Register Establisher = X86::NoRegister; | |||
| 1454 | if (IsClrFunclet) | |||
| 1455 | Establisher = Uses64BitFramePtr ? X86::RCX : X86::ECX; | |||
| 1456 | else if (IsFunclet) | |||
| 1457 | Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; | |||
| 1458 | ||||
| 1459 | if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { | |||
| 1460 | // Immediately spill establisher into the home slot. | |||
| 1461 | // The runtime cares about this. | |||
| 1462 | // MOV64mr %rdx, 16(%rsp) | |||
| 1463 | unsigned MOVmr = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; | |||
| 1464 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(MOVmr)), StackPtr, true, 16) | |||
| 1465 | .addReg(Establisher) | |||
| 1466 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1467 | MBB.addLiveIn(Establisher); | |||
| 1468 | } | |||
| 1469 | ||||
| 1470 | if (HasFP) { | |||
| 1471 | assert(MF.getRegInfo().isReserved(MachineFramePtr) && "FP reserved")((void)0); | |||
| 1472 | ||||
| 1473 | // Calculate required stack adjustment. | |||
| 1474 | uint64_t FrameSize = StackSize - SlotSize; | |||
| 1475 | // If required, include space for extra hidden slot for stashing base pointer. | |||
| 1476 | if (X86FI->getRestoreBasePointer()) | |||
| 1477 | FrameSize += SlotSize; | |||
| 1478 | ||||
| 1479 | NumBytes = FrameSize - X86FI->getCalleeSavedFrameSize(); | |||
| 1480 | ||||
| 1481 | // Callee-saved registers are pushed on stack before the stack is realigned. | |||
| 1482 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) | |||
| 1483 | NumBytes = alignTo(NumBytes, MaxAlign); | |||
| 1484 | ||||
| 1485 | // Save EBP/RBP into the appropriate stack slot. | |||
| 1486 | BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::PUSH64r : X86::PUSH32r)) | |||
| 1487 | .addReg(MachineFramePtr, RegState::Kill) | |||
| 1488 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1489 | ||||
| 1490 | if (NeedsDwarfCFI) { | |||
| 1491 | // Mark the place where EBP/RBP was saved. | |||
| 1492 | // Define the current CFA rule to use the provided offset. | |||
| 1493 | assert(StackSize)((void)0); | |||
| 1494 | BuildCFI(MBB, MBBI, DL, | |||
| 1495 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -2 * stackGrowth)); | |||
| 1496 | ||||
| 1497 | // Change the rule for the FramePtr to be an "offset" rule. | |||
| 1498 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); | |||
| 1499 | BuildCFI(MBB, MBBI, DL, MCCFIInstruction::createOffset( | |||
| 1500 | nullptr, DwarfFramePtr, 2 * stackGrowth)); | |||
| 1501 | } | |||
| 1502 | ||||
| 1503 | if (NeedsWinCFI) { | |||
| 1504 | HasWinCFI = true; | |||
| 1505 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) | |||
| 1506 | .addImm(FramePtr) | |||
| 1507 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1508 | } | |||
| 1509 | ||||
| 1510 | if (!IsFunclet) { | |||
| 1511 | if (X86FI->hasSwiftAsyncContext()) { | |||
| 1512 | const auto &Attrs = MF.getFunction().getAttributes(); | |||
| 1513 | ||||
| 1514 | // Before we update the live frame pointer we have to ensure there's a | |||
| 1515 | // valid (or null) asynchronous context in its slot just before FP in | |||
| 1516 | // the frame record, so store it now. | |||
| 1517 | if (Attrs.hasAttrSomewhere(Attribute::SwiftAsync)) { | |||
| 1518 | // We have an initial context in r14, store it just before the frame | |||
| 1519 | // pointer. | |||
| 1520 | MBB.addLiveIn(X86::R14); | |||
| 1521 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
| 1522 | .addReg(X86::R14) | |||
| 1523 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1524 | } else { | |||
| 1525 | // No initial context, store null so that there's no pointer that | |||
| 1526 | // could be misused. | |||
| 1527 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64i8)) | |||
| 1528 | .addImm(0) | |||
| 1529 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1530 | } | |||
| 1531 | ||||
| 1532 | if (NeedsWinCFI) { | |||
| 1533 | HasWinCFI = true; | |||
| 1534 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) | |||
| 1535 | .addImm(X86::R14) | |||
| 1536 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1537 | } | |||
| 1538 | ||||
| 1539 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr) | |||
| 1540 | .addUse(X86::RSP) | |||
| 1541 | .addImm(1) | |||
| 1542 | .addUse(X86::NoRegister) | |||
| 1543 | .addImm(8) | |||
| 1544 | .addUse(X86::NoRegister) | |||
| 1545 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1546 | BuildMI(MBB, MBBI, DL, TII.get(X86::SUB64ri8), X86::RSP) | |||
| 1547 | .addUse(X86::RSP) | |||
| 1548 | .addImm(8) | |||
| 1549 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1550 | } | |||
| 1551 | ||||
| 1552 | if (!IsWin64Prologue && !IsFunclet) { | |||
| 1553 | // Update EBP with the new base value. | |||
| 1554 | if (!X86FI->hasSwiftAsyncContext()) | |||
| 1555 | BuildMI(MBB, MBBI, DL, | |||
| 1556 | TII.get(Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr), | |||
| 1557 | FramePtr) | |||
| 1558 | .addReg(StackPtr) | |||
| 1559 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1560 | ||||
| 1561 | if (SaveArgs && !Fn.arg_empty()) { | |||
| 1562 | ArrayRef<MCPhysReg> GPRs = | |||
| 1563 | get64BitArgumentGPRs(Fn.getCallingConv(), STI); | |||
| 1564 | unsigned arg_size = Fn.arg_size(); | |||
| 1565 | unsigned RI = 0; | |||
| 1566 | int64_t SaveSize = 0; | |||
| 1567 | ||||
| 1568 | if (Fn.hasStructRetAttr()) { | |||
| 1569 | GPRs = GPRs.drop_front(1); | |||
| 1570 | arg_size--; | |||
| 1571 | } | |||
| 1572 | ||||
| 1573 | for (MCPhysReg Reg : GPRs) { | |||
| 1574 | if (++RI > arg_size) | |||
| 1575 | break; | |||
| 1576 | ||||
| 1577 | SaveSize += SlotSize; | |||
| 1578 | ||||
| 1579 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
| 1580 | .addReg(Reg) | |||
| 1581 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1582 | } | |||
| 1583 | ||||
| 1584 | // Realign the stack. PUSHes are the most space efficient. | |||
| 1585 | while (SaveSize % getStackAlignment()) { | |||
| 1586 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
| 1587 | .addReg(GPRs.front()) | |||
| 1588 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1589 | ||||
| 1590 | SaveSize += SlotSize; | |||
| 1591 | } | |||
| 1592 | ||||
| 1593 | //dlg StackSize -= SaveSize; | |||
| 1594 | //dlg MFI.setStackSize(StackSize); | |||
| 1595 | X86FI->setSaveArgSize(SaveSize); | |||
| 1596 | } | |||
| 1597 | ||||
| 1598 | if (NeedsDwarfCFI) { | |||
| 1599 | // Mark effective beginning of when frame pointer becomes valid. | |||
| 1600 | // Define the current CFA to use the EBP/RBP register. | |||
| 1601 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); | |||
| 1602 | BuildCFI( | |||
| 1603 | MBB, MBBI, DL, | |||
| 1604 | MCCFIInstruction::createDefCfaRegister(nullptr, DwarfFramePtr)); | |||
| 1605 | } | |||
| 1606 | ||||
| 1607 | if (NeedsWinFPO) { | |||
| 1608 | // .cv_fpo_setframe $FramePtr | |||
| 1609 | HasWinCFI = true; | |||
| 1610 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) | |||
| 1611 | .addImm(FramePtr) | |||
| 1612 | .addImm(0) | |||
| 1613 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1614 | } | |||
| 1615 | } | |||
| 1616 | } | |||
| 1617 | } else { | |||
| 1618 | assert(!IsFunclet && "funclets without FPs not yet implemented")((void)0); | |||
| 1619 | NumBytes = StackSize - X86FI->getCalleeSavedFrameSize(); | |||
| 1620 | } | |||
| 1621 | ||||
| 1622 | // Update the offset adjustment, which is mainly used by codeview to translate | |||
| 1623 | // from ESP to VFRAME relative local variable offsets. | |||
| 1624 | if (!IsFunclet) { | |||
| 1625 | if (HasFP && TRI->hasStackRealignment(MF)) | |||
| 1626 | MFI.setOffsetAdjustment(-NumBytes); | |||
| 1627 | else | |||
| 1628 | MFI.setOffsetAdjustment(-StackSize); | |||
| 1629 | } | |||
| 1630 | ||||
| 1631 | // For EH funclets, only allocate enough space for outgoing calls. Save the | |||
| 1632 | // NumBytes value that we would've used for the parent frame. | |||
| 1633 | unsigned ParentFrameNumBytes = NumBytes; | |||
| 1634 | if (IsFunclet) | |||
| 1635 | NumBytes = getWinEHFuncletFrameSize(MF); | |||
| 1636 | ||||
| 1637 | // Skip the callee-saved push instructions. | |||
| 1638 | bool PushedRegs = false; | |||
| 1639 | int StackOffset = 2 * stackGrowth; | |||
| 1640 | ||||
| 1641 | while (MBBI != MBB.end() && | |||
| 1642 | MBBI->getFlag(MachineInstr::FrameSetup) && | |||
| 1643 | (MBBI->getOpcode() == X86::PUSH32r || | |||
| 1644 | MBBI->getOpcode() == X86::PUSH64r)) { | |||
| 1645 | PushedRegs = true; | |||
| 1646 | Register Reg = MBBI->getOperand(0).getReg(); | |||
| 1647 | ++MBBI; | |||
| 1648 | ||||
| 1649 | if (!HasFP && NeedsDwarfCFI) { | |||
| 1650 | // Mark callee-saved push instruction. | |||
| 1651 | // Define the current CFA rule to use the provided offset. | |||
| 1652 | assert(StackSize)((void)0); | |||
| 1653 | BuildCFI(MBB, MBBI, DL, | |||
| 1654 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -StackOffset)); | |||
| 1655 | StackOffset += stackGrowth; | |||
| 1656 | } | |||
| 1657 | ||||
| 1658 | if (NeedsWinCFI) { | |||
| 1659 | HasWinCFI = true; | |||
| 1660 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_PushReg)) | |||
| 1661 | .addImm(Reg) | |||
| 1662 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1663 | } | |||
| 1664 | } | |||
| 1665 | ||||
| 1666 | // Realign stack after we pushed callee-saved registers (so that we'll be | |||
| 1667 | // able to calculate their offsets from the frame pointer). | |||
| 1668 | // Don't do this for Win64, it needs to realign the stack after the prologue. | |||
| 1669 | if (!IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) { | |||
| 1670 | assert(HasFP && "There should be a frame pointer if stack is realigned.")((void)0); | |||
| 1671 | BuildStackAlignAND(MBB, MBBI, DL, StackPtr, MaxAlign); | |||
| 1672 | ||||
| 1673 | if (NeedsWinCFI) { | |||
| 1674 | HasWinCFI = true; | |||
| 1675 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlign)) | |||
| 1676 | .addImm(MaxAlign) | |||
| 1677 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1678 | } | |||
| 1679 | } | |||
| 1680 | ||||
| 1681 | // If there is an SUB32ri of ESP immediately before this instruction, merge | |||
| 1682 | // the two. This can be the case when tail call elimination is enabled and | |||
| 1683 | // the callee has more arguments then the caller. | |||
| 1684 | NumBytes -= mergeSPUpdates(MBB, MBBI, true); | |||
| 1685 | ||||
| 1686 | // Adjust stack pointer: ESP -= numbytes. | |||
| 1687 | ||||
| 1688 | // Windows and cygwin/mingw require a prologue helper routine when allocating | |||
| 1689 | // more than 4K bytes on the stack. Windows uses __chkstk and cygwin/mingw | |||
| 1690 | // uses __alloca. __alloca and the 32-bit version of __chkstk will probe the | |||
| 1691 | // stack and adjust the stack pointer in one go. The 64-bit version of | |||
| 1692 | // __chkstk is only responsible for probing the stack. The 64-bit prologue is | |||
| 1693 | // responsible for adjusting the stack pointer. Touching the stack at 4K | |||
| 1694 | // increments is necessary to ensure that the guard pages used by the OS | |||
| 1695 | // virtual memory manager are allocated in correct sequence. | |||
| 1696 | uint64_t AlignedNumBytes = NumBytes; | |||
| 1697 | if (IsWin64Prologue && !IsFunclet && TRI->hasStackRealignment(MF)) | |||
| 1698 | AlignedNumBytes = alignTo(AlignedNumBytes, MaxAlign); | |||
| 1699 | if (AlignedNumBytes >= StackProbeSize && EmitStackProbeCall) { | |||
| 1700 | assert(!X86FI->getUsesRedZone() &&((void)0) | |||
| 1701 | "The Red Zone is not accounted for in stack probes")((void)0); | |||
| 1702 | ||||
| 1703 | // Check whether EAX is livein for this block. | |||
| 1704 | bool isEAXAlive = isEAXLiveIn(MBB); | |||
| 1705 | ||||
| 1706 | if (isEAXAlive) { | |||
| 1707 | if (Is64Bit) { | |||
| 1708 | // Save RAX | |||
| 1709 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r)) | |||
| 1710 | .addReg(X86::RAX, RegState::Kill) | |||
| 1711 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1712 | } else { | |||
| 1713 | // Save EAX | |||
| 1714 | BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH32r)) | |||
| 1715 | .addReg(X86::EAX, RegState::Kill) | |||
| 1716 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1717 | } | |||
| 1718 | } | |||
| 1719 | ||||
| 1720 | if (Is64Bit) { | |||
| 1721 | // Handle the 64-bit Windows ABI case where we need to call __chkstk. | |||
| 1722 | // Function prologue is responsible for adjusting the stack pointer. | |||
| 1723 | int64_t Alloc = isEAXAlive ? NumBytes - 8 : NumBytes; | |||
| 1724 | if (isUInt<32>(Alloc)) { | |||
| 1725 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) | |||
| 1726 | .addImm(Alloc) | |||
| 1727 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1728 | } else if (isInt<32>(Alloc)) { | |||
| 1729 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri32), X86::RAX) | |||
| 1730 | .addImm(Alloc) | |||
| 1731 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1732 | } else { | |||
| 1733 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64ri), X86::RAX) | |||
| 1734 | .addImm(Alloc) | |||
| 1735 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1736 | } | |||
| 1737 | } else { | |||
| 1738 | // Allocate NumBytes-4 bytes on stack in case of isEAXAlive. | |||
| 1739 | // We'll also use 4 already allocated bytes for EAX. | |||
| 1740 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) | |||
| 1741 | .addImm(isEAXAlive ? NumBytes - 4 : NumBytes) | |||
| 1742 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1743 | } | |||
| 1744 | ||||
| 1745 | // Call __chkstk, __chkstk_ms, or __alloca. | |||
| 1746 | emitStackProbe(MF, MBB, MBBI, DL, true); | |||
| 1747 | ||||
| 1748 | if (isEAXAlive) { | |||
| 1749 | // Restore RAX/EAX | |||
| 1750 | MachineInstr *MI; | |||
| 1751 | if (Is64Bit) | |||
| 1752 | MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV64rm), X86::RAX), | |||
| 1753 | StackPtr, false, NumBytes - 8); | |||
| 1754 | else | |||
| 1755 | MI = addRegOffset(BuildMI(MF, DL, TII.get(X86::MOV32rm), X86::EAX), | |||
| 1756 | StackPtr, false, NumBytes - 4); | |||
| 1757 | MI->setFlag(MachineInstr::FrameSetup); | |||
| 1758 | MBB.insert(MBBI, MI); | |||
| 1759 | } | |||
| 1760 | } else if (NumBytes) { | |||
| 1761 | emitSPUpdate(MBB, MBBI, DL, -(int64_t)NumBytes, /*InEpilogue=*/false); | |||
| 1762 | } | |||
| 1763 | ||||
| 1764 | if (NeedsWinCFI && NumBytes) { | |||
| 1765 | HasWinCFI = true; | |||
| 1766 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_StackAlloc)) | |||
| 1767 | .addImm(NumBytes) | |||
| 1768 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1769 | } | |||
| 1770 | ||||
| 1771 | int SEHFrameOffset = 0; | |||
| 1772 | unsigned SPOrEstablisher; | |||
| 1773 | if (IsFunclet) { | |||
| 1774 | if (IsClrFunclet) { | |||
| 1775 | // The establisher parameter passed to a CLR funclet is actually a pointer | |||
| 1776 | // to the (mostly empty) frame of its nearest enclosing funclet; we have | |||
| 1777 | // to find the root function establisher frame by loading the PSPSym from | |||
| 1778 | // the intermediate frame. | |||
| 1779 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); | |||
| 1780 | MachinePointerInfo NoInfo; | |||
| 1781 | MBB.addLiveIn(Establisher); | |||
| 1782 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rm), Establisher), | |||
| 1783 | Establisher, false, PSPSlotOffset) | |||
| 1784 | .addMemOperand(MF.getMachineMemOperand( | |||
| 1785 | NoInfo, MachineMemOperand::MOLoad, SlotSize, Align(SlotSize))); | |||
| 1786 | ; | |||
| 1787 | // Save the root establisher back into the current funclet's (mostly | |||
| 1788 | // empty) frame, in case a sub-funclet or the GC needs it. | |||
| 1789 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, | |||
| 1790 | false, PSPSlotOffset) | |||
| 1791 | .addReg(Establisher) | |||
| 1792 | .addMemOperand(MF.getMachineMemOperand( | |||
| 1793 | NoInfo, | |||
| 1794 | MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, | |||
| 1795 | SlotSize, Align(SlotSize))); | |||
| 1796 | } | |||
| 1797 | SPOrEstablisher = Establisher; | |||
| 1798 | } else { | |||
| 1799 | SPOrEstablisher = StackPtr; | |||
| 1800 | } | |||
| 1801 | ||||
| 1802 | if (IsWin64Prologue && HasFP) { | |||
| 1803 | // Set RBP to a small fixed offset from RSP. In the funclet case, we base | |||
| 1804 | // this calculation on the incoming establisher, which holds the value of | |||
| 1805 | // RSP from the parent frame at the end of the prologue. | |||
| 1806 | SEHFrameOffset = calculateSetFPREG(ParentFrameNumBytes); | |||
| 1807 | if (SEHFrameOffset) | |||
| 1808 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), FramePtr), | |||
| 1809 | SPOrEstablisher, false, SEHFrameOffset); | |||
| 1810 | else | |||
| 1811 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64rr), FramePtr) | |||
| 1812 | .addReg(SPOrEstablisher); | |||
| 1813 | ||||
| 1814 | // If this is not a funclet, emit the CFI describing our frame pointer. | |||
| 1815 | if (NeedsWinCFI && !IsFunclet) { | |||
| 1816 | assert(!NeedsWinFPO && "this setframe incompatible with FPO data")((void)0); | |||
| 1817 | HasWinCFI = true; | |||
| 1818 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SetFrame)) | |||
| 1819 | .addImm(FramePtr) | |||
| 1820 | .addImm(SEHFrameOffset) | |||
| 1821 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1822 | if (isAsynchronousEHPersonality(Personality)) | |||
| 1823 | MF.getWinEHFuncInfo()->SEHSetFrameOffset = SEHFrameOffset; | |||
| 1824 | } | |||
| 1825 | } else if (IsFunclet && STI.is32Bit()) { | |||
| 1826 | // Reset EBP / ESI to something good for funclets. | |||
| 1827 | MBBI = restoreWin32EHStackPointers(MBB, MBBI, DL); | |||
| 1828 | // If we're a catch funclet, we can be returned to via catchret. Save ESP | |||
| 1829 | // into the registration node so that the runtime will restore it for us. | |||
| 1830 | if (!MBB.isCleanupFuncletEntry()) { | |||
| 1831 | assert(Personality == EHPersonality::MSVC_CXX)((void)0); | |||
| 1832 | Register FrameReg; | |||
| 1833 | int FI = MF.getWinEHFuncInfo()->EHRegNodeFrameIndex; | |||
| 1834 | int64_t EHRegOffset = getFrameIndexReference(MF, FI, FrameReg).getFixed(); | |||
| 1835 | // ESP is the first field, so no extra displacement is needed. | |||
| 1836 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32mr)), FrameReg, | |||
| 1837 | false, EHRegOffset) | |||
| 1838 | .addReg(X86::ESP); | |||
| 1839 | } | |||
| 1840 | } | |||
| 1841 | ||||
| 1842 | while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) { | |||
| 1843 | const MachineInstr &FrameInstr = *MBBI; | |||
| 1844 | ++MBBI; | |||
| 1845 | ||||
| 1846 | if (NeedsWinCFI) { | |||
| 1847 | int FI; | |||
| 1848 | if (unsigned Reg = TII.isStoreToStackSlot(FrameInstr, FI)) { | |||
| 1849 | if (X86::FR64RegClass.contains(Reg)) { | |||
| 1850 | int Offset; | |||
| 1851 | Register IgnoredFrameReg; | |||
| 1852 | if (IsWin64Prologue && IsFunclet) | |||
| 1853 | Offset = getWin64EHFrameIndexRef(MF, FI, IgnoredFrameReg); | |||
| 1854 | else | |||
| 1855 | Offset = | |||
| 1856 | getFrameIndexReference(MF, FI, IgnoredFrameReg).getFixed() + | |||
| 1857 | SEHFrameOffset; | |||
| 1858 | ||||
| 1859 | HasWinCFI = true; | |||
| 1860 | assert(!NeedsWinFPO && "SEH_SaveXMM incompatible with FPO data")((void)0); | |||
| 1861 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_SaveXMM)) | |||
| 1862 | .addImm(Reg) | |||
| 1863 | .addImm(Offset) | |||
| 1864 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1865 | } | |||
| 1866 | } | |||
| 1867 | } | |||
| 1868 | } | |||
| 1869 | ||||
| 1870 | if (NeedsWinCFI && HasWinCFI) | |||
| 1871 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_EndPrologue)) | |||
| 1872 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1873 | ||||
| 1874 | if (FnHasClrFunclet && !IsFunclet) { | |||
| 1875 | // Save the so-called Initial-SP (i.e. the value of the stack pointer | |||
| 1876 | // immediately after the prolog) into the PSPSlot so that funclets | |||
| 1877 | // and the GC can recover it. | |||
| 1878 | unsigned PSPSlotOffset = getPSPSlotOffsetFromSP(MF); | |||
| 1879 | auto PSPInfo = MachinePointerInfo::getFixedStack( | |||
| 1880 | MF, MF.getWinEHFuncInfo()->PSPSymFrameIdx); | |||
| 1881 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mr)), StackPtr, false, | |||
| 1882 | PSPSlotOffset) | |||
| 1883 | .addReg(StackPtr) | |||
| 1884 | .addMemOperand(MF.getMachineMemOperand( | |||
| 1885 | PSPInfo, MachineMemOperand::MOStore | MachineMemOperand::MOVolatile, | |||
| 1886 | SlotSize, Align(SlotSize))); | |||
| 1887 | } | |||
| 1888 | ||||
| 1889 | // Realign stack after we spilled callee-saved registers (so that we'll be | |||
| 1890 | // able to calculate their offsets from the frame pointer). | |||
| 1891 | // Win64 requires aligning the stack after the prologue. | |||
| 1892 | if (IsWin64Prologue && TRI->hasStackRealignment(MF)) { | |||
| 1893 | assert(HasFP && "There should be a frame pointer if stack is realigned.")((void)0); | |||
| 1894 | BuildStackAlignAND(MBB, MBBI, DL, SPOrEstablisher, MaxAlign); | |||
| 1895 | } | |||
| 1896 | ||||
| 1897 | // We already dealt with stack realignment and funclets above. | |||
| 1898 | if (IsFunclet && STI.is32Bit()) | |||
| 1899 | return; | |||
| 1900 | ||||
| 1901 | // If we need a base pointer, set it up here. It's whatever the value | |||
| 1902 | // of the stack pointer is at this point. Any variable size objects | |||
| 1903 | // will be allocated after this, so we can still use the base pointer | |||
| 1904 | // to reference locals. | |||
| 1905 | if (TRI->hasBasePointer(MF)) { | |||
| 1906 | // Update the base pointer with the current stack pointer. | |||
| 1907 | unsigned Opc = Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr; | |||
| 1908 | BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr) | |||
| 1909 | .addReg(SPOrEstablisher) | |||
| 1910 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1911 | if (X86FI->getRestoreBasePointer()) { | |||
| 1912 | // Stash value of base pointer. Saving RSP instead of EBP shortens | |||
| 1913 | // dependence chain. Used by SjLj EH. | |||
| 1914 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; | |||
| 1915 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), | |||
| 1916 | FramePtr, true, X86FI->getRestoreBasePointerOffset()) | |||
| 1917 | .addReg(SPOrEstablisher) | |||
| 1918 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1919 | } | |||
| 1920 | ||||
| 1921 | if (X86FI->getHasSEHFramePtrSave() && !IsFunclet) { | |||
| 1922 | // Stash the value of the frame pointer relative to the base pointer for | |||
| 1923 | // Win32 EH. This supports Win32 EH, which does the inverse of the above: | |||
| 1924 | // it recovers the frame pointer from the base pointer rather than the | |||
| 1925 | // other way around. | |||
| 1926 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64mr : X86::MOV32mr; | |||
| 1927 | Register UsedReg; | |||
| 1928 | int Offset = | |||
| 1929 | getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) | |||
| 1930 | .getFixed(); | |||
| 1931 | assert(UsedReg == BasePtr)((void)0); | |||
| 1932 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opm)), UsedReg, true, Offset) | |||
| 1933 | .addReg(FramePtr) | |||
| 1934 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1935 | } | |||
| 1936 | } | |||
| 1937 | ||||
| 1938 | if (((!HasFP && NumBytes) || PushedRegs) && NeedsDwarfCFI) { | |||
| 1939 | // Mark end of stack pointer adjustment. | |||
| 1940 | if (!HasFP && NumBytes) { | |||
| 1941 | // Define the current CFA rule to use the provided offset. | |||
| 1942 | assert(StackSize)((void)0); | |||
| 1943 | BuildCFI( | |||
| 1944 | MBB, MBBI, DL, | |||
| 1945 | MCCFIInstruction::cfiDefCfaOffset(nullptr, StackSize - stackGrowth)); | |||
| 1946 | } | |||
| 1947 | ||||
| 1948 | // Emit DWARF info specifying the offsets of the callee-saved registers. | |||
| 1949 | emitCalleeSavedFrameMoves(MBB, MBBI, DL, true); | |||
| 1950 | } | |||
| 1951 | ||||
| 1952 | // X86 Interrupt handling function cannot assume anything about the direction | |||
| 1953 | // flag (DF in EFLAGS register). Clear this flag by creating "cld" instruction | |||
| 1954 | // in each prologue of interrupt handler function. | |||
| 1955 | // | |||
| 1956 | // FIXME: Create "cld" instruction only in these cases: | |||
| 1957 | // 1. The interrupt handling function uses any of the "rep" instructions. | |||
| 1958 | // 2. Interrupt handling function calls another function. | |||
| 1959 | // | |||
| 1960 | if (Fn.getCallingConv() == CallingConv::X86_INTR) | |||
| 1961 | BuildMI(MBB, MBBI, DL, TII.get(X86::CLD)) | |||
| 1962 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 1963 | ||||
| 1964 | // At this point we know if the function has WinCFI or not. | |||
| 1965 | MF.setHasWinCFI(HasWinCFI); | |||
| 1966 | } | |||
| 1967 | ||||
| 1968 | bool X86FrameLowering::canUseLEAForSPInEpilogue( | |||
| 1969 | const MachineFunction &MF) const { | |||
| 1970 | // We can't use LEA instructions for adjusting the stack pointer if we don't | |||
| 1971 | // have a frame pointer in the Win64 ABI. Only ADD instructions may be used | |||
| 1972 | // to deallocate the stack. | |||
| 1973 | // This means that we can use LEA for SP in two situations: | |||
| 1974 | // 1. We *aren't* using the Win64 ABI which means we are free to use LEA. | |||
| 1975 | // 2. We *have* a frame pointer which means we are permitted to use LEA. | |||
| 1976 | return !MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || hasFP(MF); | |||
| 1977 | } | |||
| 1978 | ||||
| 1979 | static bool isFuncletReturnInstr(MachineInstr &MI) { | |||
| 1980 | switch (MI.getOpcode()) { | |||
| 1981 | case X86::CATCHRET: | |||
| 1982 | case X86::CLEANUPRET: | |||
| 1983 | return true; | |||
| 1984 | default: | |||
| 1985 | return false; | |||
| 1986 | } | |||
| 1987 | llvm_unreachable("impossible")__builtin_unreachable(); | |||
| 1988 | } | |||
| 1989 | ||||
| 1990 | // CLR funclets use a special "Previous Stack Pointer Symbol" slot on the | |||
| 1991 | // stack. It holds a pointer to the bottom of the root function frame. The | |||
| 1992 | // establisher frame pointer passed to a nested funclet may point to the | |||
| 1993 | // (mostly empty) frame of its parent funclet, but it will need to find | |||
| 1994 | // the frame of the root function to access locals. To facilitate this, | |||
| 1995 | // every funclet copies the pointer to the bottom of the root function | |||
| 1996 | // frame into a PSPSym slot in its own (mostly empty) stack frame. Using the | |||
| 1997 | // same offset for the PSPSym in the root function frame that's used in the | |||
| 1998 | // funclets' frames allows each funclet to dynamically accept any ancestor | |||
| 1999 | // frame as its establisher argument (the runtime doesn't guarantee the | |||
| 2000 | // immediate parent for some reason lost to history), and also allows the GC, | |||
| 2001 | // which uses the PSPSym for some bookkeeping, to find it in any funclet's | |||
| 2002 | // frame with only a single offset reported for the entire method. | |||
| 2003 | unsigned | |||
| 2004 | X86FrameLowering::getPSPSlotOffsetFromSP(const MachineFunction &MF) const { | |||
| 2005 | const WinEHFuncInfo &Info = *MF.getWinEHFuncInfo(); | |||
| 2006 | Register SPReg; | |||
| 2007 | int Offset = getFrameIndexReferencePreferSP(MF, Info.PSPSymFrameIdx, SPReg, | |||
| 2008 | /*IgnoreSPUpdates*/ true) | |||
| 2009 | .getFixed(); | |||
| 2010 | assert(Offset >= 0 && SPReg == TRI->getStackRegister())((void)0); | |||
| 2011 | return static_cast<unsigned>(Offset); | |||
| 2012 | } | |||
| 2013 | ||||
| 2014 | unsigned | |||
| 2015 | X86FrameLowering::getWinEHFuncletFrameSize(const MachineFunction &MF) const { | |||
| 2016 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 2017 | // This is the size of the pushed CSRs. | |||
| 2018 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); | |||
| 2019 | // This is the size of callee saved XMMs. | |||
| 2020 | const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); | |||
| 2021 | unsigned XMMSize = WinEHXMMSlotInfo.size() * | |||
| 2022 | TRI->getSpillSize(X86::VR128RegClass); | |||
| 2023 | // This is the amount of stack a funclet needs to allocate. | |||
| 2024 | unsigned UsedSize; | |||
| 2025 | EHPersonality Personality = | |||
| 2026 | classifyEHPersonality(MF.getFunction().getPersonalityFn()); | |||
| 2027 | if (Personality == EHPersonality::CoreCLR) { | |||
| 2028 | // CLR funclets need to hold enough space to include the PSPSym, at the | |||
| 2029 | // same offset from the stack pointer (immediately after the prolog) as it | |||
| 2030 | // resides at in the main function. | |||
| 2031 | UsedSize = getPSPSlotOffsetFromSP(MF) + SlotSize; | |||
| 2032 | } else { | |||
| 2033 | // Other funclets just need enough stack for outgoing call arguments. | |||
| 2034 | UsedSize = MF.getFrameInfo().getMaxCallFrameSize(); | |||
| 2035 | } | |||
| 2036 | // RBP is not included in the callee saved register block. After pushing RBP, | |||
| 2037 | // everything is 16 byte aligned. Everything we allocate before an outgoing | |||
| 2038 | // call must also be 16 byte aligned. | |||
| 2039 | unsigned FrameSizeMinusRBP = alignTo(CSSize + UsedSize, getStackAlign()); | |||
| 2040 | // Subtract out the size of the callee saved registers. This is how much stack | |||
| 2041 | // each funclet will allocate. | |||
| 2042 | return FrameSizeMinusRBP + XMMSize - CSSize; | |||
| 2043 | } | |||
| 2044 | ||||
| 2045 | static bool isTailCallOpcode(unsigned Opc) { | |||
| 2046 | return Opc == X86::TCRETURNri || Opc == X86::TCRETURNdi || | |||
| 2047 | Opc == X86::TCRETURNmi || | |||
| 2048 | Opc == X86::TCRETURNri64 || Opc == X86::TCRETURNdi64 || | |||
| 2049 | Opc == X86::TCRETURNmi64; | |||
| 2050 | } | |||
| 2051 | ||||
| 2052 | void X86FrameLowering::emitEpilogue(MachineFunction &MF, | |||
| 2053 | MachineBasicBlock &MBB) const { | |||
| 2054 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2055 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 2056 | MachineBasicBlock::iterator Terminator = MBB.getFirstTerminator(); | |||
| 2057 | MachineBasicBlock::iterator MBBI = Terminator; | |||
| 2058 | DebugLoc DL; | |||
| 2059 | if (MBBI != MBB.end()) | |||
| 2060 | DL = MBBI->getDebugLoc(); | |||
| 2061 | // standard x86_64 and NaCl use 64-bit frame/stack pointers, x32 - 32-bit. | |||
| 2062 | const bool Is64BitILP32 = STI.isTarget64BitILP32(); | |||
| 2063 | Register FramePtr = TRI->getFrameRegister(MF); | |||
| 2064 | Register MachineFramePtr = | |||
| 2065 | Is64BitILP32 ? Register(getX86SubSuperRegister(FramePtr, 64)) : FramePtr; | |||
| 2066 | ||||
| 2067 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
| 2068 | bool NeedsWin64CFI = | |||
| 2069 | IsWin64Prologue && MF.getFunction().needsUnwindTableEntry(); | |||
| 2070 | bool IsFunclet = MBBI == MBB.end() ? false : isFuncletReturnInstr(*MBBI); | |||
| 2071 | ||||
| 2072 | // Get the number of bytes to allocate from the FrameInfo. | |||
| 2073 | uint64_t StackSize = MFI.getStackSize(); | |||
| 2074 | uint64_t MaxAlign = calculateMaxStackAlign(MF); | |||
| 2075 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); | |||
| 2076 | bool HasFP = hasFP(MF); | |||
| 2077 | uint64_t NumBytes = 0; | |||
| 2078 | ||||
| 2079 | bool NeedsDwarfCFI = (!MF.getTarget().getTargetTriple().isOSDarwin() && | |||
| 2080 | !MF.getTarget().getTargetTriple().isOSWindows()) && | |||
| 2081 | MF.needsFrameMoves(); | |||
| 2082 | ||||
| 2083 | if (IsFunclet) { | |||
| 2084 | assert(HasFP && "EH funclets without FP not yet implemented")((void)0); | |||
| 2085 | NumBytes = getWinEHFuncletFrameSize(MF); | |||
| 2086 | } else if (HasFP) { | |||
| 2087 | // Calculate required stack adjustment. | |||
| 2088 | uint64_t FrameSize = StackSize - SlotSize; | |||
| 2089 | NumBytes = FrameSize - CSSize; | |||
| 2090 | ||||
| 2091 | // Callee-saved registers were pushed on stack before the stack was | |||
| 2092 | // realigned. | |||
| 2093 | if (TRI->hasStackRealignment(MF) && !IsWin64Prologue) | |||
| 2094 | NumBytes = alignTo(FrameSize, MaxAlign); | |||
| 2095 | } else { | |||
| 2096 | NumBytes = StackSize - CSSize; | |||
| 2097 | } | |||
| 2098 | uint64_t SEHStackAllocAmt = NumBytes; | |||
| 2099 | ||||
| 2100 | // AfterPop is the position to insert .cfi_restore. | |||
| 2101 | MachineBasicBlock::iterator AfterPop = MBBI; | |||
| 2102 | if (HasFP) { | |||
| 2103 | if (X86FI->hasSwiftAsyncContext()) { | |||
| 2104 | // Discard the context. | |||
| 2105 | int Offset = 16 + mergeSPUpdates(MBB, MBBI, true); | |||
| 2106 | emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/true); | |||
| 2107 | } | |||
| 2108 | ||||
| 2109 | if (X86FI->getSaveArgSize()) { | |||
| 2110 | // LEAVE is effectively mov rbp,rsp; pop rbp | |||
| 2111 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEAVE64)) | |||
| 2112 | .setMIFlag(MachineInstr::FrameDestroy); | |||
| 2113 | } else { | |||
| 2114 | // Pop EBP. | |||
| 2115 | BuildMI(MBB, MBBI, DL, TII.get(Is64Bit ? X86::POP64r : X86::POP32r), | |||
| 2116 | MachineFramePtr) | |||
| 2117 | .setMIFlag(MachineInstr::FrameDestroy); | |||
| 2118 | } | |||
| 2119 | ||||
| 2120 | // We need to reset FP to its untagged state on return. Bit 60 is currently | |||
| 2121 | // used to show the presence of an extended frame. | |||
| 2122 | if (X86FI->hasSwiftAsyncContext()) { | |||
| 2123 | BuildMI(MBB, MBBI, DL, TII.get(X86::BTR64ri8), | |||
| 2124 | MachineFramePtr) | |||
| 2125 | .addUse(MachineFramePtr) | |||
| 2126 | .addImm(60) | |||
| 2127 | .setMIFlag(MachineInstr::FrameDestroy); | |||
| 2128 | } | |||
| 2129 | ||||
| 2130 | if (NeedsDwarfCFI) { | |||
| 2131 | unsigned DwarfStackPtr = | |||
| 2132 | TRI->getDwarfRegNum(Is64Bit ? X86::RSP : X86::ESP, true); | |||
| 2133 | BuildCFI(MBB, MBBI, DL, | |||
| 2134 | MCCFIInstruction::cfiDefCfa(nullptr, DwarfStackPtr, SlotSize)); | |||
| 2135 | if (!MBB.succ_empty() && !MBB.isReturnBlock()) { | |||
| 2136 | unsigned DwarfFramePtr = TRI->getDwarfRegNum(MachineFramePtr, true); | |||
| 2137 | BuildCFI(MBB, AfterPop, DL, | |||
| 2138 | MCCFIInstruction::createRestore(nullptr, DwarfFramePtr)); | |||
| 2139 | --MBBI; | |||
| 2140 | --AfterPop; | |||
| 2141 | } | |||
| 2142 | --MBBI; | |||
| 2143 | } | |||
| 2144 | } | |||
| 2145 | ||||
| 2146 | MachineBasicBlock::iterator FirstCSPop = MBBI; | |||
| 2147 | // Skip the callee-saved pop instructions. | |||
| 2148 | while (MBBI != MBB.begin()) { | |||
| 2149 | MachineBasicBlock::iterator PI = std::prev(MBBI); | |||
| 2150 | unsigned Opc = PI->getOpcode(); | |||
| 2151 | ||||
| 2152 | if (Opc != X86::DBG_VALUE && !PI->isTerminator()) { | |||
| 2153 | if ((Opc != X86::POP32r || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
| 2154 | (Opc != X86::POP64r || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
| 2155 | (Opc != X86::LEAVE64 || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
| 2156 | (Opc != X86::BTR64ri8 || !PI->getFlag(MachineInstr::FrameDestroy)) && | |||
| 2157 | (Opc != X86::ADD64ri8 || !PI->getFlag(MachineInstr::FrameDestroy))) | |||
| 2158 | break; | |||
| 2159 | FirstCSPop = PI; | |||
| 2160 | } | |||
| 2161 | ||||
| 2162 | --MBBI; | |||
| 2163 | } | |||
| 2164 | MBBI = FirstCSPop; | |||
| 2165 | ||||
| 2166 | if (IsFunclet && Terminator->getOpcode() == X86::CATCHRET) | |||
| 2167 | emitCatchRetReturnValue(MBB, FirstCSPop, &*Terminator); | |||
| 2168 | ||||
| 2169 | if (MBBI != MBB.end()) | |||
| 2170 | DL = MBBI->getDebugLoc(); | |||
| 2171 | ||||
| 2172 | // If there is an ADD32ri or SUB32ri of ESP immediately before this | |||
| 2173 | // instruction, merge the two instructions. | |||
| 2174 | if (NumBytes || MFI.hasVarSizedObjects()) | |||
| 2175 | NumBytes += mergeSPUpdates(MBB, MBBI, true); | |||
| 2176 | ||||
| 2177 | // If dynamic alloca is used, then reset esp to point to the last callee-saved | |||
| 2178 | // slot before popping them off! Same applies for the case, when stack was | |||
| 2179 | // realigned. Don't do this if this was a funclet epilogue, since the funclets | |||
| 2180 | // will not do realignment or dynamic stack allocation. | |||
| 2181 | if (((TRI->hasStackRealignment(MF)) || MFI.hasVarSizedObjects()) && | |||
| 2182 | !IsFunclet) { | |||
| 2183 | if (TRI->hasStackRealignment(MF)) | |||
| 2184 | MBBI = FirstCSPop; | |||
| 2185 | unsigned SEHFrameOffset = calculateSetFPREG(SEHStackAllocAmt); | |||
| 2186 | uint64_t LEAAmount = | |||
| 2187 | IsWin64Prologue ? SEHStackAllocAmt - SEHFrameOffset : -CSSize; | |||
| 2188 | ||||
| 2189 | if (X86FI->hasSwiftAsyncContext()) | |||
| 2190 | LEAAmount -= 16; | |||
| 2191 | ||||
| 2192 | // There are only two legal forms of epilogue: | |||
| 2193 | // - add SEHAllocationSize, %rsp | |||
| 2194 | // - lea SEHAllocationSize(%FramePtr), %rsp | |||
| 2195 | // | |||
| 2196 | // 'mov %FramePtr, %rsp' will not be recognized as an epilogue sequence. | |||
| 2197 | // However, we may use this sequence if we have a frame pointer because the | |||
| 2198 | // effects of the prologue can safely be undone. | |||
| 2199 | if (LEAAmount != 0) { | |||
| 2200 | unsigned Opc = getLEArOpcode(Uses64BitFramePtr); | |||
| 2201 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr), | |||
| 2202 | FramePtr, false, LEAAmount); | |||
| 2203 | --MBBI; | |||
| 2204 | } else { | |||
| 2205 | unsigned Opc = (Uses64BitFramePtr ? X86::MOV64rr : X86::MOV32rr); | |||
| 2206 | BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr) | |||
| 2207 | .addReg(FramePtr); | |||
| 2208 | --MBBI; | |||
| 2209 | } | |||
| 2210 | } else if (NumBytes) { | |||
| 2211 | // Adjust stack pointer back: ESP += numbytes. | |||
| 2212 | emitSPUpdate(MBB, MBBI, DL, NumBytes, /*InEpilogue=*/true); | |||
| 2213 | if (!hasFP(MF) && NeedsDwarfCFI) { | |||
| 2214 | // Define the current CFA rule to use the provided offset. | |||
| 2215 | BuildCFI(MBB, MBBI, DL, | |||
| 2216 | MCCFIInstruction::cfiDefCfaOffset(nullptr, CSSize + SlotSize)); | |||
| 2217 | } | |||
| 2218 | --MBBI; | |||
| 2219 | } | |||
| 2220 | ||||
| 2221 | // Windows unwinder will not invoke function's exception handler if IP is | |||
| 2222 | // either in prologue or in epilogue. This behavior causes a problem when a | |||
| 2223 | // call immediately precedes an epilogue, because the return address points | |||
| 2224 | // into the epilogue. To cope with that, we insert an epilogue marker here, | |||
| 2225 | // then replace it with a 'nop' if it ends up immediately after a CALL in the | |||
| 2226 | // final emitted code. | |||
| 2227 | if (NeedsWin64CFI && MF.hasWinCFI()) | |||
| 2228 | BuildMI(MBB, MBBI, DL, TII.get(X86::SEH_Epilogue)); | |||
| 2229 | ||||
| 2230 | if (!hasFP(MF) && NeedsDwarfCFI) { | |||
| 2231 | MBBI = FirstCSPop; | |||
| 2232 | int64_t Offset = -CSSize - SlotSize; | |||
| 2233 | // Mark callee-saved pop instruction. | |||
| 2234 | // Define the current CFA rule to use the provided offset. | |||
| 2235 | while (MBBI != MBB.end()) { | |||
| 2236 | MachineBasicBlock::iterator PI = MBBI; | |||
| 2237 | unsigned Opc = PI->getOpcode(); | |||
| 2238 | ++MBBI; | |||
| 2239 | if (Opc == X86::POP32r || Opc == X86::POP64r) { | |||
| 2240 | Offset += SlotSize; | |||
| 2241 | BuildCFI(MBB, MBBI, DL, | |||
| 2242 | MCCFIInstruction::cfiDefCfaOffset(nullptr, -Offset)); | |||
| 2243 | } | |||
| 2244 | } | |||
| 2245 | } | |||
| 2246 | ||||
| 2247 | // Emit DWARF info specifying the restores of the callee-saved registers. | |||
| 2248 | // For epilogue with return inside or being other block without successor, | |||
| 2249 | // no need to generate .cfi_restore for callee-saved registers. | |||
| 2250 | if (NeedsDwarfCFI && !MBB.succ_empty() && !MBB.isReturnBlock()) { | |||
| 2251 | emitCalleeSavedFrameMoves(MBB, AfterPop, DL, false); | |||
| 2252 | } | |||
| 2253 | ||||
| 2254 | if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) { | |||
| 2255 | // Add the return addr area delta back since we are not tail calling. | |||
| 2256 | int Offset = -1 * X86FI->getTCReturnAddrDelta(); | |||
| 2257 | assert(Offset >= 0 && "TCDelta should never be positive")((void)0); | |||
| 2258 | if (Offset) { | |||
| 2259 | // Check for possible merge with preceding ADD instruction. | |||
| 2260 | Offset += mergeSPUpdates(MBB, Terminator, true); | |||
| 2261 | emitSPUpdate(MBB, Terminator, DL, Offset, /*InEpilogue=*/true); | |||
| 2262 | } | |||
| 2263 | } | |||
| 2264 | ||||
| 2265 | // Emit tilerelease for AMX kernel. | |||
| 2266 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 2267 | const TargetRegisterClass *RC = TRI->getRegClass(X86::TILERegClassID); | |||
| 2268 | for (unsigned I = 0; I < RC->getNumRegs(); I++) | |||
| 2269 | if (!MRI.reg_nodbg_empty(X86::TMM0 + I)) { | |||
| 2270 | BuildMI(MBB, Terminator, DL, TII.get(X86::TILERELEASE)); | |||
| 2271 | break; | |||
| 2272 | } | |||
| 2273 | } | |||
| 2274 | ||||
| 2275 | StackOffset X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, | |||
| 2276 | int FI, | |||
| 2277 | Register &FrameReg) const { | |||
| 2278 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2279 | ||||
| 2280 | bool IsFixed = MFI.isFixedObjectIndex(FI); | |||
| 2281 | // We can't calculate offset from frame pointer if the stack is realigned, | |||
| 2282 | // so enforce usage of stack/base pointer. The base pointer is used when we | |||
| 2283 | // have dynamic allocas in addition to dynamic realignment. | |||
| 2284 | if (TRI->hasBasePointer(MF)) | |||
| 2285 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getBaseRegister(); | |||
| 2286 | else if (TRI->hasStackRealignment(MF)) | |||
| 2287 | FrameReg = IsFixed ? TRI->getFramePtr() : TRI->getStackRegister(); | |||
| 2288 | else | |||
| 2289 | FrameReg = TRI->getFrameRegister(MF); | |||
| 2290 | ||||
| 2291 | // Offset will hold the offset from the stack pointer at function entry to the | |||
| 2292 | // object. | |||
| 2293 | // We need to factor in additional offsets applied during the prologue to the | |||
| 2294 | // frame, base, and stack pointer depending on which is used. | |||
| 2295 | int Offset = MFI.getObjectOffset(FI) - getOffsetOfLocalArea(); | |||
| 2296 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 2297 | unsigned CSSize = X86FI->getCalleeSavedFrameSize(); | |||
| 2298 | uint64_t StackSize = MFI.getStackSize(); | |||
| 2299 | bool HasFP = hasFP(MF); | |||
| 2300 | bool IsWin64Prologue = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
| 2301 | int64_t FPDelta = 0; | |||
| 2302 | ||||
| 2303 | // In an x86 interrupt, remove the offset we added to account for the return | |||
| 2304 | // address from any stack object allocated in the caller's frame. Interrupts | |||
| 2305 | // do not have a standard return address. Fixed objects in the current frame, | |||
| 2306 | // such as SSE register spills, should not get this treatment. | |||
| 2307 | if (MF.getFunction().getCallingConv() == CallingConv::X86_INTR && | |||
| 2308 | Offset >= 0) { | |||
| 2309 | Offset += getOffsetOfLocalArea(); | |||
| 2310 | } | |||
| 2311 | ||||
| 2312 | if (IsWin64Prologue) { | |||
| 2313 | assert(!MFI.hasCalls() || (StackSize % 16) == 8)((void)0); | |||
| 2314 | ||||
| 2315 | // Calculate required stack adjustment. | |||
| 2316 | uint64_t FrameSize = StackSize - SlotSize; | |||
| 2317 | // If required, include space for extra hidden slot for stashing base pointer. | |||
| 2318 | if (X86FI->getRestoreBasePointer()) | |||
| 2319 | FrameSize += SlotSize; | |||
| 2320 | uint64_t NumBytes = FrameSize - CSSize; | |||
| 2321 | ||||
| 2322 | uint64_t SEHFrameOffset = calculateSetFPREG(NumBytes); | |||
| 2323 | if (FI && FI == X86FI->getFAIndex()) | |||
| 2324 | return StackOffset::getFixed(-SEHFrameOffset); | |||
| 2325 | ||||
| 2326 | // FPDelta is the offset from the "traditional" FP location of the old base | |||
| 2327 | // pointer followed by return address and the location required by the | |||
| 2328 | // restricted Win64 prologue. | |||
| 2329 | // Add FPDelta to all offsets below that go through the frame pointer. | |||
| 2330 | FPDelta = FrameSize - SEHFrameOffset; | |||
| 2331 | assert((!MFI.hasCalls() || (FPDelta % 16) == 0) &&((void)0) | |||
| 2332 | "FPDelta isn't aligned per the Win64 ABI!")((void)0); | |||
| 2333 | } | |||
| 2334 | ||||
| 2335 | if (FI >= 0) | |||
| 2336 | Offset -= X86FI->getSaveArgSize(); | |||
| 2337 | ||||
| 2338 | if (TRI->hasBasePointer(MF)) { | |||
| 2339 | assert(HasFP && "VLAs and dynamic stack realign, but no FP?!")((void)0); | |||
| 2340 | if (FI < 0) { | |||
| 2341 | // Skip the saved EBP. | |||
| 2342 | return StackOffset::getFixed(Offset + SlotSize + FPDelta); | |||
| 2343 | } else { | |||
| 2344 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)))((void)0); | |||
| 2345 | return StackOffset::getFixed(Offset + StackSize); | |||
| 2346 | } | |||
| 2347 | } else if (TRI->hasStackRealignment(MF)) { | |||
| 2348 | if (FI < 0) { | |||
| 2349 | // Skip the saved EBP. | |||
| 2350 | return StackOffset::getFixed(Offset + SlotSize + FPDelta); | |||
| 2351 | } else { | |||
| 2352 | assert(isAligned(MFI.getObjectAlign(FI), -(Offset + StackSize)))((void)0); | |||
| 2353 | return StackOffset::getFixed(Offset + StackSize); | |||
| 2354 | } | |||
| 2355 | // FIXME: Support tail calls | |||
| 2356 | } else { | |||
| 2357 | if (!HasFP) | |||
| 2358 | return StackOffset::getFixed(Offset + StackSize); | |||
| 2359 | ||||
| 2360 | // Skip the saved EBP. | |||
| 2361 | Offset += SlotSize; | |||
| 2362 | ||||
| 2363 | // Skip the RETADDR move area | |||
| 2364 | int TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); | |||
| 2365 | if (TailCallReturnAddrDelta < 0) | |||
| 2366 | Offset -= TailCallReturnAddrDelta; | |||
| 2367 | } | |||
| 2368 | ||||
| 2369 | return StackOffset::getFixed(Offset + FPDelta); | |||
| 2370 | } | |||
| 2371 | ||||
| 2372 | int X86FrameLowering::getWin64EHFrameIndexRef(const MachineFunction &MF, int FI, | |||
| 2373 | Register &FrameReg) const { | |||
| 2374 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2375 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 2376 | const auto& WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); | |||
| 2377 | const auto it = WinEHXMMSlotInfo.find(FI); | |||
| 2378 | ||||
| 2379 | if (it == WinEHXMMSlotInfo.end()) | |||
| 2380 | return getFrameIndexReference(MF, FI, FrameReg).getFixed(); | |||
| 2381 | ||||
| 2382 | FrameReg = TRI->getStackRegister(); | |||
| 2383 | return alignDown(MFI.getMaxCallFrameSize(), getStackAlign().value()) + | |||
| 2384 | it->second; | |||
| 2385 | } | |||
| 2386 | ||||
| 2387 | StackOffset | |||
| 2388 | X86FrameLowering::getFrameIndexReferenceSP(const MachineFunction &MF, int FI, | |||
| 2389 | Register &FrameReg, | |||
| 2390 | int Adjustment) const { | |||
| 2391 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2392 | FrameReg = TRI->getStackRegister(); | |||
| 2393 | return StackOffset::getFixed(MFI.getObjectOffset(FI) - | |||
| 2394 | getOffsetOfLocalArea() + Adjustment); | |||
| 2395 | } | |||
| 2396 | ||||
| 2397 | StackOffset | |||
| 2398 | X86FrameLowering::getFrameIndexReferencePreferSP(const MachineFunction &MF, | |||
| 2399 | int FI, Register &FrameReg, | |||
| 2400 | bool IgnoreSPUpdates) const { | |||
| 2401 | ||||
| 2402 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2403 | // Does not include any dynamic realign. | |||
| 2404 | const uint64_t StackSize = MFI.getStackSize(); | |||
| 2405 | // LLVM arranges the stack as follows: | |||
| 2406 | // ... | |||
| 2407 | // ARG2 | |||
| 2408 | // ARG1 | |||
| 2409 | // RETADDR | |||
| 2410 | // PUSH RBP <-- RBP points here | |||
| 2411 | // PUSH CSRs | |||
| 2412 | // ~~~~~~~ <-- possible stack realignment (non-win64) | |||
| 2413 | // ... | |||
| 2414 | // STACK OBJECTS | |||
| 2415 | // ... <-- RSP after prologue points here | |||
| 2416 | // ~~~~~~~ <-- possible stack realignment (win64) | |||
| 2417 | // | |||
| 2418 | // if (hasVarSizedObjects()): | |||
| 2419 | // ... <-- "base pointer" (ESI/RBX) points here | |||
| 2420 | // DYNAMIC ALLOCAS | |||
| 2421 | // ... <-- RSP points here | |||
| 2422 | // | |||
| 2423 | // Case 1: In the simple case of no stack realignment and no dynamic | |||
| 2424 | // allocas, both "fixed" stack objects (arguments and CSRs) are addressable | |||
| 2425 | // with fixed offsets from RSP. | |||
| 2426 | // | |||
| 2427 | // Case 2: In the case of stack realignment with no dynamic allocas, fixed | |||
| 2428 | // stack objects are addressed with RBP and regular stack objects with RSP. | |||
| 2429 | // | |||
| 2430 | // Case 3: In the case of dynamic allocas and stack realignment, RSP is used | |||
| 2431 | // to address stack arguments for outgoing calls and nothing else. The "base | |||
| 2432 | // pointer" points to local variables, and RBP points to fixed objects. | |||
| 2433 | // | |||
| 2434 | // In cases 2 and 3, we can only answer for non-fixed stack objects, and the | |||
| 2435 | // answer we give is relative to the SP after the prologue, and not the | |||
| 2436 | // SP in the middle of the function. | |||
| 2437 | ||||
| 2438 | if (MFI.isFixedObjectIndex(FI) && TRI->hasStackRealignment(MF) && | |||
| 2439 | !STI.isTargetWin64()) | |||
| 2440 | return getFrameIndexReference(MF, FI, FrameReg); | |||
| 2441 | ||||
| 2442 | // If !hasReservedCallFrame the function might have SP adjustement in the | |||
| 2443 | // body. So, even though the offset is statically known, it depends on where | |||
| 2444 | // we are in the function. | |||
| 2445 | if (!IgnoreSPUpdates && !hasReservedCallFrame(MF)) | |||
| 2446 | return getFrameIndexReference(MF, FI, FrameReg); | |||
| 2447 | ||||
| 2448 | // We don't handle tail calls, and shouldn't be seeing them either. | |||
| 2449 | assert(MF.getInfo<X86MachineFunctionInfo>()->getTCReturnAddrDelta() >= 0 &&((void)0) | |||
| 2450 | "we don't handle this case!")((void)0); | |||
| 2451 | ||||
| 2452 | // This is how the math works out: | |||
| 2453 | // | |||
| 2454 | // %rsp grows (i.e. gets lower) left to right. Each box below is | |||
| 2455 | // one word (eight bytes). Obj0 is the stack slot we're trying to | |||
| 2456 | // get to. | |||
| 2457 | // | |||
| 2458 | // ---------------------------------- | |||
| 2459 | // | BP | Obj0 | Obj1 | ... | ObjN | | |||
| 2460 | // ---------------------------------- | |||
| 2461 | // ^ ^ ^ ^ | |||
| 2462 | // A B C E | |||
| 2463 | // | |||
| 2464 | // A is the incoming stack pointer. | |||
| 2465 | // (B - A) is the local area offset (-8 for x86-64) [1] | |||
| 2466 | // (C - A) is the Offset returned by MFI.getObjectOffset for Obj0 [2] | |||
| 2467 | // | |||
| 2468 | // |(E - B)| is the StackSize (absolute value, positive). For a | |||
| 2469 | // stack that grown down, this works out to be (B - E). [3] | |||
| 2470 | // | |||
| 2471 | // E is also the value of %rsp after stack has been set up, and we | |||
| 2472 | // want (C - E) -- the value we can add to %rsp to get to Obj0. Now | |||
| 2473 | // (C - E) == (C - A) - (B - A) + (B - E) | |||
| 2474 | // { Using [1], [2] and [3] above } | |||
| 2475 | // == getObjectOffset - LocalAreaOffset + StackSize | |||
| 2476 | ||||
| 2477 | return getFrameIndexReferenceSP(MF, FI, FrameReg, StackSize); | |||
| 2478 | } | |||
| 2479 | ||||
| 2480 | bool X86FrameLowering::assignCalleeSavedSpillSlots( | |||
| 2481 | MachineFunction &MF, const TargetRegisterInfo *TRI, | |||
| 2482 | std::vector<CalleeSavedInfo> &CSI) const { | |||
| 2483 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2484 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 2485 | ||||
| 2486 | unsigned CalleeSavedFrameSize = 0; | |||
| 2487 | unsigned XMMCalleeSavedFrameSize = 0; | |||
| 2488 | auto &WinEHXMMSlotInfo = X86FI->getWinEHXMMSlotInfo(); | |||
| 2489 | int SpillSlotOffset = getOffsetOfLocalArea() + X86FI->getTCReturnAddrDelta(); | |||
| 2490 | ||||
| 2491 | int64_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta(); | |||
| 2492 | ||||
| 2493 | if (TailCallReturnAddrDelta < 0) { | |||
| 2494 | // create RETURNADDR area | |||
| 2495 | // arg | |||
| 2496 | // arg | |||
| 2497 | // RETADDR | |||
| 2498 | // { ... | |||
| 2499 | // RETADDR area | |||
| 2500 | // ... | |||
| 2501 | // } | |||
| 2502 | // [EBP] | |||
| 2503 | MFI.CreateFixedObject(-TailCallReturnAddrDelta, | |||
| 2504 | TailCallReturnAddrDelta - SlotSize, true); | |||
| 2505 | } | |||
| 2506 | ||||
| 2507 | // Spill the BasePtr if it's used. | |||
| 2508 | if (this->TRI->hasBasePointer(MF)) { | |||
| 2509 | // Allocate a spill slot for EBP if we have a base pointer and EH funclets. | |||
| 2510 | if (MF.hasEHFunclets()) { | |||
| 2511 | int FI = MFI.CreateSpillStackObject(SlotSize, Align(SlotSize)); | |||
| 2512 | X86FI->setHasSEHFramePtrSave(true); | |||
| 2513 | X86FI->setSEHFramePtrSaveIndex(FI); | |||
| 2514 | } | |||
| 2515 | } | |||
| 2516 | ||||
| 2517 | if (hasFP(MF)) { | |||
| 2518 | // emitPrologue always spills frame register the first thing. | |||
| 2519 | SpillSlotOffset -= SlotSize; | |||
| 2520 | MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); | |||
| 2521 | ||||
| 2522 | // The async context lives directly before the frame pointer, and we | |||
| 2523 | // allocate a second slot to preserve stack alignment. | |||
| 2524 | if (X86FI->hasSwiftAsyncContext()) { | |||
| 2525 | SpillSlotOffset -= SlotSize; | |||
| 2526 | MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); | |||
| 2527 | SpillSlotOffset -= SlotSize; | |||
| 2528 | } | |||
| 2529 | ||||
| 2530 | // Since emitPrologue and emitEpilogue will handle spilling and restoring of | |||
| 2531 | // the frame register, we can delete it from CSI list and not have to worry | |||
| 2532 | // about avoiding it later. | |||
| 2533 | Register FPReg = TRI->getFrameRegister(MF); | |||
| 2534 | for (unsigned i = 0; i < CSI.size(); ++i) { | |||
| 2535 | if (TRI->regsOverlap(CSI[i].getReg(),FPReg)) { | |||
| 2536 | CSI.erase(CSI.begin() + i); | |||
| 2537 | break; | |||
| 2538 | } | |||
| 2539 | } | |||
| 2540 | } | |||
| 2541 | ||||
| 2542 | // Assign slots for GPRs. It increases frame size. | |||
| 2543 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
| 2544 | unsigned Reg = CSI[i - 1].getReg(); | |||
| 2545 | ||||
| 2546 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) | |||
| 2547 | continue; | |||
| 2548 | ||||
| 2549 | SpillSlotOffset -= SlotSize; | |||
| 2550 | CalleeSavedFrameSize += SlotSize; | |||
| 2551 | ||||
| 2552 | int SlotIndex = MFI.CreateFixedSpillStackObject(SlotSize, SpillSlotOffset); | |||
| 2553 | CSI[i - 1].setFrameIdx(SlotIndex); | |||
| 2554 | } | |||
| 2555 | ||||
| 2556 | X86FI->setCalleeSavedFrameSize(CalleeSavedFrameSize); | |||
| 2557 | MFI.setCVBytesOfCalleeSavedRegisters(CalleeSavedFrameSize); | |||
| 2558 | ||||
| 2559 | // Assign slots for XMMs. | |||
| 2560 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
| 2561 | unsigned Reg = CSI[i - 1].getReg(); | |||
| 2562 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) | |||
| 2563 | continue; | |||
| 2564 | ||||
| 2565 | // If this is k-register make sure we lookup via the largest legal type. | |||
| 2566 | MVT VT = MVT::Other; | |||
| 2567 | if (X86::VK16RegClass.contains(Reg)) | |||
| 2568 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; | |||
| 2569 | ||||
| 2570 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); | |||
| 2571 | unsigned Size = TRI->getSpillSize(*RC); | |||
| 2572 | Align Alignment = TRI->getSpillAlign(*RC); | |||
| 2573 | // ensure alignment | |||
| 2574 | assert(SpillSlotOffset < 0 && "SpillSlotOffset should always < 0 on X86")((void)0); | |||
| 2575 | SpillSlotOffset = -alignTo(-SpillSlotOffset, Alignment); | |||
| 2576 | ||||
| 2577 | // spill into slot | |||
| 2578 | SpillSlotOffset -= Size; | |||
| 2579 | int SlotIndex = MFI.CreateFixedSpillStackObject(Size, SpillSlotOffset); | |||
| 2580 | CSI[i - 1].setFrameIdx(SlotIndex); | |||
| 2581 | MFI.ensureMaxAlignment(Alignment); | |||
| 2582 | ||||
| 2583 | // Save the start offset and size of XMM in stack frame for funclets. | |||
| 2584 | if (X86::VR128RegClass.contains(Reg)) { | |||
| 2585 | WinEHXMMSlotInfo[SlotIndex] = XMMCalleeSavedFrameSize; | |||
| 2586 | XMMCalleeSavedFrameSize += Size; | |||
| 2587 | } | |||
| 2588 | } | |||
| 2589 | ||||
| 2590 | return true; | |||
| 2591 | } | |||
| 2592 | ||||
| 2593 | bool X86FrameLowering::spillCalleeSavedRegisters( | |||
| 2594 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, | |||
| 2595 | ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { | |||
| 2596 | DebugLoc DL = MBB.findDebugLoc(MI); | |||
| 2597 | ||||
| 2598 | // Don't save CSRs in 32-bit EH funclets. The caller saves EBX, EBP, ESI, EDI | |||
| 2599 | // for us, and there are no XMM CSRs on Win32. | |||
| 2600 | if (MBB.isEHFuncletEntry() && STI.is32Bit() && STI.isOSWindows()) | |||
| 2601 | return true; | |||
| 2602 | ||||
| 2603 | // Push GPRs. It increases frame size. | |||
| 2604 | const MachineFunction &MF = *MBB.getParent(); | |||
| 2605 | unsigned Opc = STI.is64Bit() ? X86::PUSH64r : X86::PUSH32r; | |||
| 2606 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
| 2607 | unsigned Reg = CSI[i - 1].getReg(); | |||
| 2608 | ||||
| 2609 | if (!X86::GR64RegClass.contains(Reg) && !X86::GR32RegClass.contains(Reg)) | |||
| 2610 | continue; | |||
| 2611 | ||||
| 2612 | const MachineRegisterInfo &MRI = MF.getRegInfo(); | |||
| 2613 | bool isLiveIn = MRI.isLiveIn(Reg); | |||
| 2614 | if (!isLiveIn) | |||
| 2615 | MBB.addLiveIn(Reg); | |||
| 2616 | ||||
| 2617 | // Decide whether we can add a kill flag to the use. | |||
| 2618 | bool CanKill = !isLiveIn; | |||
| 2619 | // Check if any subregister is live-in | |||
| 2620 | if (CanKill) { | |||
| 2621 | for (MCRegAliasIterator AReg(Reg, TRI, false); AReg.isValid(); ++AReg) { | |||
| 2622 | if (MRI.isLiveIn(*AReg)) { | |||
| 2623 | CanKill = false; | |||
| 2624 | break; | |||
| 2625 | } | |||
| 2626 | } | |||
| 2627 | } | |||
| 2628 | ||||
| 2629 | // Do not set a kill flag on values that are also marked as live-in. This | |||
| 2630 | // happens with the @llvm-returnaddress intrinsic and with arguments | |||
| 2631 | // passed in callee saved registers. | |||
| 2632 | // Omitting the kill flags is conservatively correct even if the live-in | |||
| 2633 | // is not used after all. | |||
| 2634 | BuildMI(MBB, MI, DL, TII.get(Opc)).addReg(Reg, getKillRegState(CanKill)) | |||
| 2635 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 2636 | } | |||
| 2637 | ||||
| 2638 | // Make XMM regs spilled. X86 does not have ability of push/pop XMM. | |||
| 2639 | // It can be done by spilling XMMs to stack frame. | |||
| 2640 | for (unsigned i = CSI.size(); i != 0; --i) { | |||
| 2641 | unsigned Reg = CSI[i-1].getReg(); | |||
| 2642 | if (X86::GR64RegClass.contains(Reg) || X86::GR32RegClass.contains(Reg)) | |||
| 2643 | continue; | |||
| 2644 | ||||
| 2645 | // If this is k-register make sure we lookup via the largest legal type. | |||
| 2646 | MVT VT = MVT::Other; | |||
| 2647 | if (X86::VK16RegClass.contains(Reg)) | |||
| 2648 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; | |||
| 2649 | ||||
| 2650 | // Add the callee-saved register as live-in. It's killed at the spill. | |||
| 2651 | MBB.addLiveIn(Reg); | |||
| 2652 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); | |||
| 2653 | ||||
| 2654 | TII.storeRegToStackSlot(MBB, MI, Reg, true, CSI[i - 1].getFrameIdx(), RC, | |||
| 2655 | TRI); | |||
| 2656 | --MI; | |||
| 2657 | MI->setFlag(MachineInstr::FrameSetup); | |||
| 2658 | ++MI; | |||
| 2659 | } | |||
| 2660 | ||||
| 2661 | return true; | |||
| 2662 | } | |||
| 2663 | ||||
| 2664 | void X86FrameLowering::emitCatchRetReturnValue(MachineBasicBlock &MBB, | |||
| 2665 | MachineBasicBlock::iterator MBBI, | |||
| 2666 | MachineInstr *CatchRet) const { | |||
| 2667 | // SEH shouldn't use catchret. | |||
| 2668 | assert(!isAsynchronousEHPersonality(classifyEHPersonality(((void)0) | |||
| 2669 | MBB.getParent()->getFunction().getPersonalityFn())) &&((void)0) | |||
| 2670 | "SEH should not use CATCHRET")((void)0); | |||
| 2671 | const DebugLoc &DL = CatchRet->getDebugLoc(); | |||
| 2672 | MachineBasicBlock *CatchRetTarget = CatchRet->getOperand(0).getMBB(); | |||
| 2673 | ||||
| 2674 | // Fill EAX/RAX with the address of the target block. | |||
| 2675 | if (STI.is64Bit()) { | |||
| 2676 | // LEA64r CatchRetTarget(%rip), %rax | |||
| 2677 | BuildMI(MBB, MBBI, DL, TII.get(X86::LEA64r), X86::RAX) | |||
| 2678 | .addReg(X86::RIP) | |||
| 2679 | .addImm(0) | |||
| 2680 | .addReg(0) | |||
| 2681 | .addMBB(CatchRetTarget) | |||
| 2682 | .addReg(0); | |||
| 2683 | } else { | |||
| 2684 | // MOV32ri $CatchRetTarget, %eax | |||
| 2685 | BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32ri), X86::EAX) | |||
| 2686 | .addMBB(CatchRetTarget); | |||
| 2687 | } | |||
| 2688 | ||||
| 2689 | // Record that we've taken the address of CatchRetTarget and no longer just | |||
| 2690 | // reference it in a terminator. | |||
| 2691 | CatchRetTarget->setHasAddressTaken(); | |||
| 2692 | } | |||
| 2693 | ||||
| 2694 | bool X86FrameLowering::restoreCalleeSavedRegisters( | |||
| 2695 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, | |||
| 2696 | MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { | |||
| 2697 | if (CSI.empty()) | |||
| 2698 | return false; | |||
| 2699 | ||||
| 2700 | if (MI != MBB.end() && isFuncletReturnInstr(*MI) && STI.isOSWindows()) { | |||
| 2701 | // Don't restore CSRs in 32-bit EH funclets. Matches | |||
| 2702 | // spillCalleeSavedRegisters. | |||
| 2703 | if (STI.is32Bit()) | |||
| 2704 | return true; | |||
| 2705 | // Don't restore CSRs before an SEH catchret. SEH except blocks do not form | |||
| 2706 | // funclets. emitEpilogue transforms these to normal jumps. | |||
| 2707 | if (MI->getOpcode() == X86::CATCHRET) { | |||
| 2708 | const Function &F = MBB.getParent()->getFunction(); | |||
| 2709 | bool IsSEH = isAsynchronousEHPersonality( | |||
| 2710 | classifyEHPersonality(F.getPersonalityFn())); | |||
| 2711 | if (IsSEH) | |||
| 2712 | return true; | |||
| 2713 | } | |||
| 2714 | } | |||
| 2715 | ||||
| 2716 | DebugLoc DL = MBB.findDebugLoc(MI); | |||
| 2717 | ||||
| 2718 | // Reload XMMs from stack frame. | |||
| 2719 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
| 2720 | unsigned Reg = CSI[i].getReg(); | |||
| 2721 | if (X86::GR64RegClass.contains(Reg) || | |||
| 2722 | X86::GR32RegClass.contains(Reg)) | |||
| 2723 | continue; | |||
| 2724 | ||||
| 2725 | // If this is k-register make sure we lookup via the largest legal type. | |||
| 2726 | MVT VT = MVT::Other; | |||
| 2727 | if (X86::VK16RegClass.contains(Reg)) | |||
| 2728 | VT = STI.hasBWI() ? MVT::v64i1 : MVT::v16i1; | |||
| 2729 | ||||
| 2730 | const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg, VT); | |||
| 2731 | TII.loadRegFromStackSlot(MBB, MI, Reg, CSI[i].getFrameIdx(), RC, TRI); | |||
| 2732 | } | |||
| 2733 | ||||
| 2734 | // POP GPRs. | |||
| 2735 | unsigned Opc = STI.is64Bit() ? X86::POP64r : X86::POP32r; | |||
| 2736 | for (unsigned i = 0, e = CSI.size(); i != e; ++i) { | |||
| 2737 | unsigned Reg = CSI[i].getReg(); | |||
| 2738 | if (!X86::GR64RegClass.contains(Reg) && | |||
| 2739 | !X86::GR32RegClass.contains(Reg)) | |||
| 2740 | continue; | |||
| 2741 | ||||
| 2742 | BuildMI(MBB, MI, DL, TII.get(Opc), Reg) | |||
| 2743 | .setMIFlag(MachineInstr::FrameDestroy); | |||
| 2744 | } | |||
| 2745 | return true; | |||
| 2746 | } | |||
| 2747 | ||||
| 2748 | void X86FrameLowering::determineCalleeSaves(MachineFunction &MF, | |||
| 2749 | BitVector &SavedRegs, | |||
| 2750 | RegScavenger *RS) const { | |||
| 2751 | TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); | |||
| 2752 | ||||
| 2753 | // Spill the BasePtr if it's used. | |||
| 2754 | if (TRI->hasBasePointer(MF)){ | |||
| 2755 | Register BasePtr = TRI->getBaseRegister(); | |||
| 2756 | if (STI.isTarget64BitILP32()) | |||
| 2757 | BasePtr = getX86SubSuperRegister(BasePtr, 64); | |||
| 2758 | SavedRegs.set(BasePtr); | |||
| 2759 | } | |||
| 2760 | } | |||
| 2761 | ||||
| 2762 | static bool | |||
| 2763 | HasNestArgument(const MachineFunction *MF) { | |||
| 2764 | const Function &F = MF->getFunction(); | |||
| 2765 | for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); | |||
| 2766 | I != E; I++) { | |||
| 2767 | if (I->hasNestAttr() && !I->use_empty()) | |||
| 2768 | return true; | |||
| 2769 | } | |||
| 2770 | return false; | |||
| 2771 | } | |||
| 2772 | ||||
| 2773 | /// GetScratchRegister - Get a temp register for performing work in the | |||
| 2774 | /// segmented stack and the Erlang/HiPE stack prologue. Depending on platform | |||
| 2775 | /// and the properties of the function either one or two registers will be | |||
| 2776 | /// needed. Set primary to true for the first register, false for the second. | |||
| 2777 | static unsigned | |||
| 2778 | GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) { | |||
| 2779 | CallingConv::ID CallingConvention = MF.getFunction().getCallingConv(); | |||
| 2780 | ||||
| 2781 | // Erlang stuff. | |||
| 2782 | if (CallingConvention == CallingConv::HiPE) { | |||
| 2783 | if (Is64Bit) | |||
| 2784 | return Primary ? X86::R14 : X86::R13; | |||
| 2785 | else | |||
| 2786 | return Primary ? X86::EBX : X86::EDI; | |||
| 2787 | } | |||
| 2788 | ||||
| 2789 | if (Is64Bit) { | |||
| 2790 | if (IsLP64) | |||
| 2791 | return Primary ? X86::R11 : X86::R12; | |||
| 2792 | else | |||
| 2793 | return Primary ? X86::R11D : X86::R12D; | |||
| 2794 | } | |||
| 2795 | ||||
| 2796 | bool IsNested = HasNestArgument(&MF); | |||
| 2797 | ||||
| 2798 | if (CallingConvention == CallingConv::X86_FastCall || | |||
| 2799 | CallingConvention == CallingConv::Fast || | |||
| 2800 | CallingConvention == CallingConv::Tail) { | |||
| 2801 | if (IsNested) | |||
| 2802 | report_fatal_error("Segmented stacks does not support fastcall with " | |||
| 2803 | "nested function."); | |||
| 2804 | return Primary ? X86::EAX : X86::ECX; | |||
| 2805 | } | |||
| 2806 | if (IsNested) | |||
| 2807 | return Primary ? X86::EDX : X86::EAX; | |||
| 2808 | return Primary ? X86::ECX : X86::EAX; | |||
| 2809 | } | |||
| 2810 | ||||
| 2811 | // The stack limit in the TCB is set to this many bytes above the actual stack | |||
| 2812 | // limit. | |||
| 2813 | static const uint64_t kSplitStackAvailable = 256; | |||
| 2814 | ||||
| 2815 | void X86FrameLowering::adjustForSegmentedStacks( | |||
| 2816 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { | |||
| 2817 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 2818 | uint64_t StackSize; | |||
| 2819 | unsigned TlsReg, TlsOffset; | |||
| 2820 | DebugLoc DL; | |||
| 2821 | ||||
| 2822 | // To support shrink-wrapping we would need to insert the new blocks | |||
| 2823 | // at the right place and update the branches to PrologueMBB. | |||
| 2824 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet")((void)0); | |||
| 2825 | ||||
| 2826 | unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); | |||
| 2827 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&((void)0) | |||
| 2828 | "Scratch register is live-in")((void)0); | |||
| 2829 | ||||
| 2830 | if (MF.getFunction().isVarArg()) | |||
| 2831 | report_fatal_error("Segmented stacks do not support vararg functions."); | |||
| 2832 | if (!STI.isTargetLinux() && !STI.isTargetDarwin() && !STI.isTargetWin32() && | |||
| 2833 | !STI.isTargetWin64() && !STI.isTargetFreeBSD() && | |||
| 2834 | !STI.isTargetDragonFly()) | |||
| 2835 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
| 2836 | ||||
| 2837 | // Eventually StackSize will be calculated by a link-time pass; which will | |||
| 2838 | // also decide whether checking code needs to be injected into this particular | |||
| 2839 | // prologue. | |||
| 2840 | StackSize = MFI.getStackSize(); | |||
| 2841 | ||||
| 2842 | // Do not generate a prologue for leaf functions with a stack of size zero. | |||
| 2843 | // For non-leaf functions we have to allow for the possibility that the | |||
| 2844 | // callis to a non-split function, as in PR37807. This function could also | |||
| 2845 | // take the address of a non-split function. When the linker tries to adjust | |||
| 2846 | // its non-existent prologue, it would fail with an error. Mark the object | |||
| 2847 | // file so that such failures are not errors. See this Go language bug-report | |||
| 2848 | // https://go-review.googlesource.com/c/go/+/148819/ | |||
| 2849 | if (StackSize == 0 && !MFI.hasTailCall()) { | |||
| 2850 | MF.getMMI().setHasNosplitStack(true); | |||
| 2851 | return; | |||
| 2852 | } | |||
| 2853 | ||||
| 2854 | MachineBasicBlock *allocMBB = MF.CreateMachineBasicBlock(); | |||
| 2855 | MachineBasicBlock *checkMBB = MF.CreateMachineBasicBlock(); | |||
| 2856 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 2857 | bool IsNested = false; | |||
| 2858 | ||||
| 2859 | // We need to know if the function has a nest argument only in 64 bit mode. | |||
| 2860 | if (Is64Bit) | |||
| 2861 | IsNested = HasNestArgument(&MF); | |||
| 2862 | ||||
| 2863 | // The MOV R10, RAX needs to be in a different block, since the RET we emit in | |||
| 2864 | // allocMBB needs to be last (terminating) instruction. | |||
| 2865 | ||||
| 2866 | for (const auto &LI : PrologueMBB.liveins()) { | |||
| 2867 | allocMBB->addLiveIn(LI); | |||
| 2868 | checkMBB->addLiveIn(LI); | |||
| 2869 | } | |||
| 2870 | ||||
| 2871 | if (IsNested) | |||
| 2872 | allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D); | |||
| 2873 | ||||
| 2874 | MF.push_front(allocMBB); | |||
| 2875 | MF.push_front(checkMBB); | |||
| 2876 | ||||
| 2877 | // When the frame size is less than 256 we just compare the stack | |||
| 2878 | // boundary directly to the value of the stack pointer, per gcc. | |||
| 2879 | bool CompareStackPointer = StackSize < kSplitStackAvailable; | |||
| 2880 | ||||
| 2881 | // Read the limit off the current stacklet off the stack_guard location. | |||
| 2882 | if (Is64Bit) { | |||
| 2883 | if (STI.isTargetLinux()) { | |||
| 2884 | TlsReg = X86::FS; | |||
| 2885 | TlsOffset = IsLP64 ? 0x70 : 0x40; | |||
| 2886 | } else if (STI.isTargetDarwin()) { | |||
| 2887 | TlsReg = X86::GS; | |||
| 2888 | TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90. | |||
| 2889 | } else if (STI.isTargetWin64()) { | |||
| 2890 | TlsReg = X86::GS; | |||
| 2891 | TlsOffset = 0x28; // pvArbitrary, reserved for application use | |||
| 2892 | } else if (STI.isTargetFreeBSD()) { | |||
| 2893 | TlsReg = X86::FS; | |||
| 2894 | TlsOffset = 0x18; | |||
| 2895 | } else if (STI.isTargetDragonFly()) { | |||
| 2896 | TlsReg = X86::FS; | |||
| 2897 | TlsOffset = 0x20; // use tls_tcb.tcb_segstack | |||
| 2898 | } else { | |||
| 2899 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
| 2900 | } | |||
| 2901 | ||||
| 2902 | if (CompareStackPointer) | |||
| 2903 | ScratchReg = IsLP64 ? X86::RSP : X86::ESP; | |||
| 2904 | else | |||
| 2905 | BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP) | |||
| 2906 | .addImm(1).addReg(0).addImm(-StackSize).addReg(0); | |||
| 2907 | ||||
| 2908 | BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg) | |||
| 2909 | .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg); | |||
| 2910 | } else { | |||
| 2911 | if (STI.isTargetLinux()) { | |||
| 2912 | TlsReg = X86::GS; | |||
| 2913 | TlsOffset = 0x30; | |||
| 2914 | } else if (STI.isTargetDarwin()) { | |||
| 2915 | TlsReg = X86::GS; | |||
| 2916 | TlsOffset = 0x48 + 90*4; | |||
| 2917 | } else if (STI.isTargetWin32()) { | |||
| 2918 | TlsReg = X86::FS; | |||
| 2919 | TlsOffset = 0x14; // pvArbitrary, reserved for application use | |||
| 2920 | } else if (STI.isTargetDragonFly()) { | |||
| 2921 | TlsReg = X86::FS; | |||
| 2922 | TlsOffset = 0x10; // use tls_tcb.tcb_segstack | |||
| 2923 | } else if (STI.isTargetFreeBSD()) { | |||
| 2924 | report_fatal_error("Segmented stacks not supported on FreeBSD i386."); | |||
| 2925 | } else { | |||
| 2926 | report_fatal_error("Segmented stacks not supported on this platform."); | |||
| 2927 | } | |||
| 2928 | ||||
| 2929 | if (CompareStackPointer) | |||
| 2930 | ScratchReg = X86::ESP; | |||
| 2931 | else | |||
| 2932 | BuildMI(checkMBB, DL, TII.get(X86::LEA32r), ScratchReg).addReg(X86::ESP) | |||
| 2933 | .addImm(1).addReg(0).addImm(-StackSize).addReg(0); | |||
| 2934 | ||||
| 2935 | if (STI.isTargetLinux() || STI.isTargetWin32() || STI.isTargetWin64() || | |||
| 2936 | STI.isTargetDragonFly()) { | |||
| 2937 | BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)).addReg(ScratchReg) | |||
| 2938 | .addReg(0).addImm(0).addReg(0).addImm(TlsOffset).addReg(TlsReg); | |||
| 2939 | } else if (STI.isTargetDarwin()) { | |||
| 2940 | ||||
| 2941 | // TlsOffset doesn't fit into a mod r/m byte so we need an extra register. | |||
| 2942 | unsigned ScratchReg2; | |||
| 2943 | bool SaveScratch2; | |||
| 2944 | if (CompareStackPointer) { | |||
| 2945 | // The primary scratch register is available for holding the TLS offset. | |||
| 2946 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true); | |||
| 2947 | SaveScratch2 = false; | |||
| 2948 | } else { | |||
| 2949 | // Need to use a second register to hold the TLS offset | |||
| 2950 | ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false); | |||
| 2951 | ||||
| 2952 | // Unfortunately, with fastcc the second scratch register may hold an | |||
| 2953 | // argument. | |||
| 2954 | SaveScratch2 = MF.getRegInfo().isLiveIn(ScratchReg2); | |||
| 2955 | } | |||
| 2956 | ||||
| 2957 | // If Scratch2 is live-in then it needs to be saved. | |||
| 2958 | assert((!MF.getRegInfo().isLiveIn(ScratchReg2) || SaveScratch2) &&((void)0) | |||
| 2959 | "Scratch register is live-in and not saved")((void)0); | |||
| 2960 | ||||
| 2961 | if (SaveScratch2) | |||
| 2962 | BuildMI(checkMBB, DL, TII.get(X86::PUSH32r)) | |||
| 2963 | .addReg(ScratchReg2, RegState::Kill); | |||
| 2964 | ||||
| 2965 | BuildMI(checkMBB, DL, TII.get(X86::MOV32ri), ScratchReg2) | |||
| 2966 | .addImm(TlsOffset); | |||
| 2967 | BuildMI(checkMBB, DL, TII.get(X86::CMP32rm)) | |||
| 2968 | .addReg(ScratchReg) | |||
| 2969 | .addReg(ScratchReg2).addImm(1).addReg(0) | |||
| 2970 | .addImm(0) | |||
| 2971 | .addReg(TlsReg); | |||
| 2972 | ||||
| 2973 | if (SaveScratch2) | |||
| 2974 | BuildMI(checkMBB, DL, TII.get(X86::POP32r), ScratchReg2); | |||
| 2975 | } | |||
| 2976 | } | |||
| 2977 | ||||
| 2978 | // This jump is taken if SP >= (Stacklet Limit + Stack Space required). | |||
| 2979 | // It jumps to normal execution of the function body. | |||
| 2980 | BuildMI(checkMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_A); | |||
| 2981 | ||||
| 2982 | // On 32 bit we first push the arguments size and then the frame size. On 64 | |||
| 2983 | // bit, we pass the stack frame size in r10 and the argument size in r11. | |||
| 2984 | if (Is64Bit) { | |||
| 2985 | // Functions with nested arguments use R10, so it needs to be saved across | |||
| 2986 | // the call to _morestack | |||
| 2987 | ||||
| 2988 | const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX; | |||
| 2989 | const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D; | |||
| 2990 | const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D; | |||
| 2991 | const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr; | |||
| 2992 | const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri; | |||
| 2993 | ||||
| 2994 | if (IsNested) | |||
| 2995 | BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10); | |||
| 2996 | ||||
| 2997 | BuildMI(allocMBB, DL, TII.get(MOVri), Reg10) | |||
| 2998 | .addImm(StackSize); | |||
| 2999 | BuildMI(allocMBB, DL, TII.get(MOVri), Reg11) | |||
| 3000 | .addImm(X86FI->getArgumentStackSize()); | |||
| 3001 | } else { | |||
| 3002 | BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) | |||
| 3003 | .addImm(X86FI->getArgumentStackSize()); | |||
| 3004 | BuildMI(allocMBB, DL, TII.get(X86::PUSHi32)) | |||
| 3005 | .addImm(StackSize); | |||
| 3006 | } | |||
| 3007 | ||||
| 3008 | // __morestack is in libgcc | |||
| 3009 | if (Is64Bit && MF.getTarget().getCodeModel() == CodeModel::Large) { | |||
| 3010 | // Under the large code model, we cannot assume that __morestack lives | |||
| 3011 | // within 2^31 bytes of the call site, so we cannot use pc-relative | |||
| 3012 | // addressing. We cannot perform the call via a temporary register, | |||
| 3013 | // as the rax register may be used to store the static chain, and all | |||
| 3014 | // other suitable registers may be either callee-save or used for | |||
| 3015 | // parameter passing. We cannot use the stack at this point either | |||
| 3016 | // because __morestack manipulates the stack directly. | |||
| 3017 | // | |||
| 3018 | // To avoid these issues, perform an indirect call via a read-only memory | |||
| 3019 | // location containing the address. | |||
| 3020 | // | |||
| 3021 | // This solution is not perfect, as it assumes that the .rodata section | |||
| 3022 | // is laid out within 2^31 bytes of each function body, but this seems | |||
| 3023 | // to be sufficient for JIT. | |||
| 3024 | // FIXME: Add retpoline support and remove the error here.. | |||
| 3025 | if (STI.useIndirectThunkCalls()) | |||
| 3026 | report_fatal_error("Emitting morestack calls on 64-bit with the large " | |||
| 3027 | "code model and thunks not yet implemented."); | |||
| 3028 | BuildMI(allocMBB, DL, TII.get(X86::CALL64m)) | |||
| 3029 | .addReg(X86::RIP) | |||
| 3030 | .addImm(0) | |||
| 3031 | .addReg(0) | |||
| 3032 | .addExternalSymbol("__morestack_addr") | |||
| 3033 | .addReg(0); | |||
| 3034 | MF.getMMI().setUsesMorestackAddr(true); | |||
| 3035 | } else { | |||
| 3036 | if (Is64Bit) | |||
| 3037 | BuildMI(allocMBB, DL, TII.get(X86::CALL64pcrel32)) | |||
| 3038 | .addExternalSymbol("__morestack"); | |||
| 3039 | else | |||
| 3040 | BuildMI(allocMBB, DL, TII.get(X86::CALLpcrel32)) | |||
| 3041 | .addExternalSymbol("__morestack"); | |||
| 3042 | } | |||
| 3043 | ||||
| 3044 | if (IsNested) | |||
| 3045 | BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET_RESTORE_R10)); | |||
| 3046 | else | |||
| 3047 | BuildMI(allocMBB, DL, TII.get(X86::MORESTACK_RET)); | |||
| 3048 | ||||
| 3049 | allocMBB->addSuccessor(&PrologueMBB); | |||
| 3050 | ||||
| 3051 | checkMBB->addSuccessor(allocMBB, BranchProbability::getZero()); | |||
| 3052 | checkMBB->addSuccessor(&PrologueMBB, BranchProbability::getOne()); | |||
| 3053 | ||||
| 3054 | #ifdef EXPENSIVE_CHECKS | |||
| 3055 | MF.verify(); | |||
| 3056 | #endif | |||
| 3057 | } | |||
| 3058 | ||||
| 3059 | /// Lookup an ERTS parameter in the !hipe.literals named metadata node. | |||
| 3060 | /// HiPE provides Erlang Runtime System-internal parameters, such as PCB offsets | |||
| 3061 | /// to fields it needs, through a named metadata node "hipe.literals" containing | |||
| 3062 | /// name-value pairs. | |||
| 3063 | static unsigned getHiPELiteral( | |||
| 3064 | NamedMDNode *HiPELiteralsMD, const StringRef LiteralName) { | |||
| 3065 | for (int i = 0, e = HiPELiteralsMD->getNumOperands(); i != e; ++i) { | |||
| 3066 | MDNode *Node = HiPELiteralsMD->getOperand(i); | |||
| 3067 | if (Node->getNumOperands() != 2) continue; | |||
| 3068 | MDString *NodeName = dyn_cast<MDString>(Node->getOperand(0)); | |||
| 3069 | ValueAsMetadata *NodeVal = dyn_cast<ValueAsMetadata>(Node->getOperand(1)); | |||
| 3070 | if (!NodeName || !NodeVal) continue; | |||
| 3071 | ConstantInt *ValConst = dyn_cast_or_null<ConstantInt>(NodeVal->getValue()); | |||
| 3072 | if (ValConst && NodeName->getString() == LiteralName) { | |||
| 3073 | return ValConst->getZExtValue(); | |||
| 3074 | } | |||
| 3075 | } | |||
| 3076 | ||||
| 3077 | report_fatal_error("HiPE literal " + LiteralName | |||
| 3078 | + " required but not provided"); | |||
| 3079 | } | |||
| 3080 | ||||
| 3081 | // Return true if there are no non-ehpad successors to MBB and there are no | |||
| 3082 | // non-meta instructions between MBBI and MBB.end(). | |||
| 3083 | static bool blockEndIsUnreachable(const MachineBasicBlock &MBB, | |||
| 3084 | MachineBasicBlock::const_iterator MBBI) { | |||
| 3085 | return llvm::all_of( | |||
| 3086 | MBB.successors(), | |||
| 3087 | [](const MachineBasicBlock *Succ) { return Succ->isEHPad(); }) && | |||
| 3088 | std::all_of(MBBI, MBB.end(), [](const MachineInstr &MI) { | |||
| 3089 | return MI.isMetaInstruction(); | |||
| 3090 | }); | |||
| 3091 | } | |||
| 3092 | ||||
| 3093 | /// Erlang programs may need a special prologue to handle the stack size they | |||
| 3094 | /// might need at runtime. That is because Erlang/OTP does not implement a C | |||
| 3095 | /// stack but uses a custom implementation of hybrid stack/heap architecture. | |||
| 3096 | /// (for more information see Eric Stenman's Ph.D. thesis: | |||
| 3097 | /// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf) | |||
| 3098 | /// | |||
| 3099 | /// CheckStack: | |||
| 3100 | /// temp0 = sp - MaxStack | |||
| 3101 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart | |||
| 3102 | /// OldStart: | |||
| 3103 | /// ... | |||
| 3104 | /// IncStack: | |||
| 3105 | /// call inc_stack # doubles the stack space | |||
| 3106 | /// temp0 = sp - MaxStack | |||
| 3107 | /// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart | |||
| 3108 | void X86FrameLowering::adjustForHiPEPrologue( | |||
| 3109 | MachineFunction &MF, MachineBasicBlock &PrologueMBB) const { | |||
| 3110 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 3111 | DebugLoc DL; | |||
| 3112 | ||||
| 3113 | // To support shrink-wrapping we would need to insert the new blocks | |||
| 3114 | // at the right place and update the branches to PrologueMBB. | |||
| 3115 | assert(&(*MF.begin()) == &PrologueMBB && "Shrink-wrapping not supported yet")((void)0); | |||
| 3116 | ||||
| 3117 | // HiPE-specific values | |||
| 3118 | NamedMDNode *HiPELiteralsMD = MF.getMMI().getModule() | |||
| 3119 | ->getNamedMetadata("hipe.literals"); | |||
| 3120 | if (!HiPELiteralsMD) | |||
| 3121 | report_fatal_error( | |||
| 3122 | "Can't generate HiPE prologue without runtime parameters"); | |||
| 3123 | const unsigned HipeLeafWords | |||
| 3124 | = getHiPELiteral(HiPELiteralsMD, | |||
| 3125 | Is64Bit ? "AMD64_LEAF_WORDS" : "X86_LEAF_WORDS"); | |||
| 3126 | const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5; | |||
| 3127 | const unsigned Guaranteed = HipeLeafWords * SlotSize; | |||
| 3128 | unsigned CallerStkArity = MF.getFunction().arg_size() > CCRegisteredArgs ? | |||
| 3129 | MF.getFunction().arg_size() - CCRegisteredArgs : 0; | |||
| 3130 | unsigned MaxStack = MFI.getStackSize() + CallerStkArity*SlotSize + SlotSize; | |||
| 3131 | ||||
| 3132 | assert(STI.isTargetLinux() &&((void)0) | |||
| 3133 | "HiPE prologue is only supported on Linux operating systems.")((void)0); | |||
| 3134 | ||||
| 3135 | // Compute the largest caller's frame that is needed to fit the callees' | |||
| 3136 | // frames. This 'MaxStack' is computed from: | |||
| 3137 | // | |||
| 3138 | // a) the fixed frame size, which is the space needed for all spilled temps, | |||
| 3139 | // b) outgoing on-stack parameter areas, and | |||
| 3140 | // c) the minimum stack space this function needs to make available for the | |||
| 3141 | // functions it calls (a tunable ABI property). | |||
| 3142 | if (MFI.hasCalls()) { | |||
| 3143 | unsigned MoreStackForCalls = 0; | |||
| 3144 | ||||
| 3145 | for (auto &MBB : MF) { | |||
| 3146 | for (auto &MI : MBB) { | |||
| 3147 | if (!MI.isCall()) | |||
| 3148 | continue; | |||
| 3149 | ||||
| 3150 | // Get callee operand. | |||
| 3151 | const MachineOperand &MO = MI.getOperand(0); | |||
| 3152 | ||||
| 3153 | // Only take account of global function calls (no closures etc.). | |||
| 3154 | if (!MO.isGlobal()) | |||
| 3155 | continue; | |||
| 3156 | ||||
| 3157 | const Function *F = dyn_cast<Function>(MO.getGlobal()); | |||
| 3158 | if (!F) | |||
| 3159 | continue; | |||
| 3160 | ||||
| 3161 | // Do not update 'MaxStack' for primitive and built-in functions | |||
| 3162 | // (encoded with names either starting with "erlang."/"bif_" or not | |||
| 3163 | // having a ".", such as a simple <Module>.<Function>.<Arity>, or an | |||
| 3164 | // "_", such as the BIF "suspend_0") as they are executed on another | |||
| 3165 | // stack. | |||
| 3166 | if (F->getName().find("erlang.") != StringRef::npos || | |||
| 3167 | F->getName().find("bif_") != StringRef::npos || | |||
| 3168 | F->getName().find_first_of("._") == StringRef::npos) | |||
| 3169 | continue; | |||
| 3170 | ||||
| 3171 | unsigned CalleeStkArity = | |||
| 3172 | F->arg_size() > CCRegisteredArgs ? F->arg_size()-CCRegisteredArgs : 0; | |||
| 3173 | if (HipeLeafWords - 1 > CalleeStkArity) | |||
| 3174 | MoreStackForCalls = std::max(MoreStackForCalls, | |||
| 3175 | (HipeLeafWords - 1 - CalleeStkArity) * SlotSize); | |||
| 3176 | } | |||
| 3177 | } | |||
| 3178 | MaxStack += MoreStackForCalls; | |||
| 3179 | } | |||
| 3180 | ||||
| 3181 | // If the stack frame needed is larger than the guaranteed then runtime checks | |||
| 3182 | // and calls to "inc_stack_0" BIF should be inserted in the assembly prologue. | |||
| 3183 | if (MaxStack > Guaranteed) { | |||
| 3184 | MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock(); | |||
| 3185 | MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock(); | |||
| 3186 | ||||
| 3187 | for (const auto &LI : PrologueMBB.liveins()) { | |||
| 3188 | stackCheckMBB->addLiveIn(LI); | |||
| 3189 | incStackMBB->addLiveIn(LI); | |||
| 3190 | } | |||
| 3191 | ||||
| 3192 | MF.push_front(incStackMBB); | |||
| 3193 | MF.push_front(stackCheckMBB); | |||
| 3194 | ||||
| 3195 | unsigned ScratchReg, SPReg, PReg, SPLimitOffset; | |||
| 3196 | unsigned LEAop, CMPop, CALLop; | |||
| 3197 | SPLimitOffset = getHiPELiteral(HiPELiteralsMD, "P_NSP_LIMIT"); | |||
| 3198 | if (Is64Bit) { | |||
| 3199 | SPReg = X86::RSP; | |||
| 3200 | PReg = X86::RBP; | |||
| 3201 | LEAop = X86::LEA64r; | |||
| 3202 | CMPop = X86::CMP64rm; | |||
| 3203 | CALLop = X86::CALL64pcrel32; | |||
| 3204 | } else { | |||
| 3205 | SPReg = X86::ESP; | |||
| 3206 | PReg = X86::EBP; | |||
| 3207 | LEAop = X86::LEA32r; | |||
| 3208 | CMPop = X86::CMP32rm; | |||
| 3209 | CALLop = X86::CALLpcrel32; | |||
| 3210 | } | |||
| 3211 | ||||
| 3212 | ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true); | |||
| 3213 | assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&((void)0) | |||
| 3214 | "HiPE prologue scratch register is live-in")((void)0); | |||
| 3215 | ||||
| 3216 | // Create new MBB for StackCheck: | |||
| 3217 | addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg), | |||
| 3218 | SPReg, false, -MaxStack); | |||
| 3219 | // SPLimitOffset is in a fixed heap location (pointed by BP). | |||
| 3220 | addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop)) | |||
| 3221 | .addReg(ScratchReg), PReg, false, SPLimitOffset); | |||
| 3222 | BuildMI(stackCheckMBB, DL, TII.get(X86::JCC_1)).addMBB(&PrologueMBB).addImm(X86::COND_AE); | |||
| 3223 | ||||
| 3224 | // Create new MBB for IncStack: | |||
| 3225 | BuildMI(incStackMBB, DL, TII.get(CALLop)). | |||
| 3226 | addExternalSymbol("inc_stack_0"); | |||
| 3227 | addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg), | |||
| 3228 | SPReg, false, -MaxStack); | |||
| 3229 | addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop)) | |||
| 3230 | .addReg(ScratchReg), PReg, false, SPLimitOffset); | |||
| 3231 | BuildMI(incStackMBB, DL, TII.get(X86::JCC_1)).addMBB(incStackMBB).addImm(X86::COND_LE); | |||
| 3232 | ||||
| 3233 | stackCheckMBB->addSuccessor(&PrologueMBB, {99, 100}); | |||
| 3234 | stackCheckMBB->addSuccessor(incStackMBB, {1, 100}); | |||
| 3235 | incStackMBB->addSuccessor(&PrologueMBB, {99, 100}); | |||
| 3236 | incStackMBB->addSuccessor(incStackMBB, {1, 100}); | |||
| 3237 | } | |||
| 3238 | #ifdef EXPENSIVE_CHECKS | |||
| 3239 | MF.verify(); | |||
| 3240 | #endif | |||
| 3241 | } | |||
| 3242 | ||||
| 3243 | bool X86FrameLowering::adjustStackWithPops(MachineBasicBlock &MBB, | |||
| 3244 | MachineBasicBlock::iterator MBBI, | |||
| 3245 | const DebugLoc &DL, | |||
| 3246 | int Offset) const { | |||
| 3247 | if (Offset <= 0) | |||
| 3248 | return false; | |||
| 3249 | ||||
| 3250 | if (Offset % SlotSize) | |||
| 3251 | return false; | |||
| 3252 | ||||
| 3253 | int NumPops = Offset / SlotSize; | |||
| 3254 | // This is only worth it if we have at most 2 pops. | |||
| 3255 | if (NumPops != 1 && NumPops != 2) | |||
| 3256 | return false; | |||
| 3257 | ||||
| 3258 | // Handle only the trivial case where the adjustment directly follows | |||
| 3259 | // a call. This is the most common one, anyway. | |||
| 3260 | if (MBBI == MBB.begin()) | |||
| 3261 | return false; | |||
| 3262 | MachineBasicBlock::iterator Prev = std::prev(MBBI); | |||
| 3263 | if (!Prev->isCall() || !Prev->getOperand(1).isRegMask()) | |||
| 3264 | return false; | |||
| 3265 | ||||
| 3266 | unsigned Regs[2]; | |||
| 3267 | unsigned FoundRegs = 0; | |||
| 3268 | ||||
| 3269 | const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); | |||
| 3270 | const MachineOperand &RegMask = Prev->getOperand(1); | |||
| 3271 | ||||
| 3272 | auto &RegClass = | |||
| 3273 | Is64Bit ? X86::GR64_NOREX_NOSPRegClass : X86::GR32_NOREX_NOSPRegClass; | |||
| 3274 | // Try to find up to NumPops free registers. | |||
| 3275 | for (auto Candidate : RegClass) { | |||
| 3276 | // Poor man's liveness: | |||
| 3277 | // Since we're immediately after a call, any register that is clobbered | |||
| 3278 | // by the call and not defined by it can be considered dead. | |||
| 3279 | if (!RegMask.clobbersPhysReg(Candidate)) | |||
| 3280 | continue; | |||
| 3281 | ||||
| 3282 | // Don't clobber reserved registers | |||
| 3283 | if (MRI.isReserved(Candidate)) | |||
| 3284 | continue; | |||
| 3285 | ||||
| 3286 | bool IsDef = false; | |||
| 3287 | for (const MachineOperand &MO : Prev->implicit_operands()) { | |||
| 3288 | if (MO.isReg() && MO.isDef() && | |||
| 3289 | TRI->isSuperOrSubRegisterEq(MO.getReg(), Candidate)) { | |||
| 3290 | IsDef = true; | |||
| 3291 | break; | |||
| 3292 | } | |||
| 3293 | } | |||
| 3294 | ||||
| 3295 | if (IsDef) | |||
| 3296 | continue; | |||
| 3297 | ||||
| 3298 | Regs[FoundRegs++] = Candidate; | |||
| 3299 | if (FoundRegs == (unsigned)NumPops) | |||
| 3300 | break; | |||
| 3301 | } | |||
| 3302 | ||||
| 3303 | if (FoundRegs == 0) | |||
| 3304 | return false; | |||
| 3305 | ||||
| 3306 | // If we found only one free register, but need two, reuse the same one twice. | |||
| 3307 | while (FoundRegs < (unsigned)NumPops) | |||
| 3308 | Regs[FoundRegs++] = Regs[0]; | |||
| 3309 | ||||
| 3310 | for (int i = 0; i < NumPops; ++i) | |||
| 3311 | BuildMI(MBB, MBBI, DL, | |||
| 3312 | TII.get(STI.is64Bit() ? X86::POP64r : X86::POP32r), Regs[i]); | |||
| 3313 | ||||
| 3314 | return true; | |||
| 3315 | } | |||
| 3316 | ||||
| 3317 | MachineBasicBlock::iterator X86FrameLowering:: | |||
| 3318 | eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, | |||
| 3319 | MachineBasicBlock::iterator I) const { | |||
| 3320 | bool reserveCallFrame = hasReservedCallFrame(MF); | |||
| 3321 | unsigned Opcode = I->getOpcode(); | |||
| 3322 | bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode(); | |||
| 3323 | DebugLoc DL = I->getDebugLoc(); // copy DebugLoc as I will be erased. | |||
| 3324 | uint64_t Amount = TII.getFrameSize(*I); | |||
| 3325 | uint64_t InternalAmt = (isDestroy || Amount) ? TII.getFrameAdjustment(*I) : 0; | |||
| 3326 | I = MBB.erase(I); | |||
| 3327 | auto InsertPos = skipDebugInstructionsForward(I, MBB.end()); | |||
| 3328 | ||||
| 3329 | // Try to avoid emitting dead SP adjustments if the block end is unreachable, | |||
| 3330 | // typically because the function is marked noreturn (abort, throw, | |||
| 3331 | // assert_fail, etc). | |||
| 3332 | if (isDestroy && blockEndIsUnreachable(MBB, I)) | |||
| 3333 | return I; | |||
| 3334 | ||||
| 3335 | if (!reserveCallFrame) { | |||
| 3336 | // If the stack pointer can be changed after prologue, turn the | |||
| 3337 | // adjcallstackup instruction into a 'sub ESP, <amt>' and the | |||
| 3338 | // adjcallstackdown instruction into 'add ESP, <amt>' | |||
| 3339 | ||||
| 3340 | // We need to keep the stack aligned properly. To do this, we round the | |||
| 3341 | // amount of space needed for the outgoing arguments up to the next | |||
| 3342 | // alignment boundary. | |||
| 3343 | Amount = alignTo(Amount, getStackAlign()); | |||
| 3344 | ||||
| 3345 | const Function &F = MF.getFunction(); | |||
| 3346 | bool WindowsCFI = MF.getTarget().getMCAsmInfo()->usesWindowsCFI(); | |||
| 3347 | bool DwarfCFI = !WindowsCFI && MF.needsFrameMoves(); | |||
| 3348 | ||||
| 3349 | // If we have any exception handlers in this function, and we adjust | |||
| 3350 | // the SP before calls, we may need to indicate this to the unwinder | |||
| 3351 | // using GNU_ARGS_SIZE. Note that this may be necessary even when | |||
| 3352 | // Amount == 0, because the preceding function may have set a non-0 | |||
| 3353 | // GNU_ARGS_SIZE. | |||
| 3354 | // TODO: We don't need to reset this between subsequent functions, | |||
| 3355 | // if it didn't change. | |||
| 3356 | bool HasDwarfEHHandlers = !WindowsCFI && !MF.getLandingPads().empty(); | |||
| 3357 | ||||
| 3358 | if (HasDwarfEHHandlers && !isDestroy && | |||
| 3359 | MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences()) | |||
| 3360 | BuildCFI(MBB, InsertPos, DL, | |||
| 3361 | MCCFIInstruction::createGnuArgsSize(nullptr, Amount)); | |||
| 3362 | ||||
| 3363 | if (Amount == 0) | |||
| 3364 | return I; | |||
| 3365 | ||||
| 3366 | // Factor out the amount that gets handled inside the sequence | |||
| 3367 | // (Pushes of argument for frame setup, callee pops for frame destroy) | |||
| 3368 | Amount -= InternalAmt; | |||
| 3369 | ||||
| 3370 | // TODO: This is needed only if we require precise CFA. | |||
| 3371 | // If this is a callee-pop calling convention, emit a CFA adjust for | |||
| 3372 | // the amount the callee popped. | |||
| 3373 | if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) | |||
| 3374 | BuildCFI(MBB, InsertPos, DL, | |||
| 3375 | MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); | |||
| 3376 | ||||
| 3377 | // Add Amount to SP to destroy a frame, or subtract to setup. | |||
| 3378 | int64_t StackAdjustment = isDestroy ? Amount : -Amount; | |||
| 3379 | ||||
| 3380 | if (StackAdjustment) { | |||
| 3381 | // Merge with any previous or following adjustment instruction. Note: the | |||
| 3382 | // instructions merged with here do not have CFI, so their stack | |||
| 3383 | // adjustments do not feed into CfaAdjustment. | |||
| 3384 | StackAdjustment += mergeSPUpdates(MBB, InsertPos, true); | |||
| 3385 | StackAdjustment += mergeSPUpdates(MBB, InsertPos, false); | |||
| 3386 | ||||
| 3387 | if (StackAdjustment) { | |||
| 3388 | if (!(F.hasMinSize() && | |||
| 3389 | adjustStackWithPops(MBB, InsertPos, DL, StackAdjustment))) | |||
| 3390 | BuildStackAdjustment(MBB, InsertPos, DL, StackAdjustment, | |||
| 3391 | /*InEpilogue=*/false); | |||
| 3392 | } | |||
| 3393 | } | |||
| 3394 | ||||
| 3395 | if (DwarfCFI && !hasFP(MF)) { | |||
| 3396 | // If we don't have FP, but need to generate unwind information, | |||
| 3397 | // we need to set the correct CFA offset after the stack adjustment. | |||
| 3398 | // How much we adjust the CFA offset depends on whether we're emitting | |||
| 3399 | // CFI only for EH purposes or for debugging. EH only requires the CFA | |||
| 3400 | // offset to be correct at each call site, while for debugging we want | |||
| 3401 | // it to be more precise. | |||
| 3402 | ||||
| 3403 | int64_t CfaAdjustment = -StackAdjustment; | |||
| 3404 | // TODO: When not using precise CFA, we also need to adjust for the | |||
| 3405 | // InternalAmt here. | |||
| 3406 | if (CfaAdjustment) { | |||
| 3407 | BuildCFI(MBB, InsertPos, DL, | |||
| 3408 | MCCFIInstruction::createAdjustCfaOffset(nullptr, | |||
| 3409 | CfaAdjustment)); | |||
| 3410 | } | |||
| 3411 | } | |||
| 3412 | ||||
| 3413 | return I; | |||
| 3414 | } | |||
| 3415 | ||||
| 3416 | if (InternalAmt) { | |||
| 3417 | MachineBasicBlock::iterator CI = I; | |||
| 3418 | MachineBasicBlock::iterator B = MBB.begin(); | |||
| 3419 | while (CI != B && !std::prev(CI)->isCall()) | |||
| 3420 | --CI; | |||
| 3421 | BuildStackAdjustment(MBB, CI, DL, -InternalAmt, /*InEpilogue=*/false); | |||
| 3422 | } | |||
| 3423 | ||||
| 3424 | return I; | |||
| 3425 | } | |||
| 3426 | ||||
| 3427 | bool X86FrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { | |||
| 3428 | assert(MBB.getParent() && "Block is not attached to a function!")((void)0); | |||
| 3429 | const MachineFunction &MF = *MBB.getParent(); | |||
| 3430 | if (!MBB.isLiveIn(X86::EFLAGS)) | |||
| 3431 | return true; | |||
| 3432 | ||||
| 3433 | const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 3434 | return !TRI->hasStackRealignment(MF) && !X86FI->hasSwiftAsyncContext(); | |||
| 3435 | } | |||
| 3436 | ||||
| 3437 | bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { | |||
| 3438 | assert(MBB.getParent() && "Block is not attached to a function!")((void)0); | |||
| 3439 | ||||
| 3440 | // Win64 has strict requirements in terms of epilogue and we are | |||
| 3441 | // not taking a chance at messing with them. | |||
| 3442 | // I.e., unless this block is already an exit block, we can't use | |||
| 3443 | // it as an epilogue. | |||
| 3444 | if (STI.isTargetWin64() && !MBB.succ_empty() && !MBB.isReturnBlock()) | |||
| 3445 | return false; | |||
| 3446 | ||||
| 3447 | // Swift async context epilogue has a BTR instruction that clobbers parts of | |||
| 3448 | // EFLAGS. | |||
| 3449 | const MachineFunction &MF = *MBB.getParent(); | |||
| 3450 | if (MF.getInfo<X86MachineFunctionInfo>()->hasSwiftAsyncContext()) | |||
| 3451 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); | |||
| 3452 | ||||
| 3453 | if (canUseLEAForSPInEpilogue(*MBB.getParent())) | |||
| 3454 | return true; | |||
| 3455 | ||||
| 3456 | // If we cannot use LEA to adjust SP, we may need to use ADD, which | |||
| 3457 | // clobbers the EFLAGS. Check that we do not need to preserve it, | |||
| 3458 | // otherwise, conservatively assume this is not | |||
| 3459 | // safe to insert the epilogue here. | |||
| 3460 | return !flagsNeedToBePreservedBeforeTheTerminators(MBB); | |||
| 3461 | } | |||
| 3462 | ||||
| 3463 | bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { | |||
| 3464 | // If we may need to emit frameless compact unwind information, give | |||
| 3465 | // up as this is currently broken: PR25614. | |||
| 3466 | bool CompactUnwind = | |||
| 3467 | MF.getMMI().getContext().getObjectFileInfo()->getCompactUnwindSection() != | |||
| 3468 | nullptr; | |||
| 3469 | return (MF.getFunction().hasFnAttribute(Attribute::NoUnwind) || hasFP(MF) || | |||
| 3470 | !CompactUnwind) && | |||
| 3471 | // The lowering of segmented stack and HiPE only support entry | |||
| 3472 | // blocks as prologue blocks: PR26107. This limitation may be | |||
| 3473 | // lifted if we fix: | |||
| 3474 | // - adjustForSegmentedStacks | |||
| 3475 | // - adjustForHiPEPrologue | |||
| 3476 | MF.getFunction().getCallingConv() != CallingConv::HiPE && | |||
| 3477 | !MF.shouldSplitStack(); | |||
| 3478 | } | |||
| 3479 | ||||
| 3480 | MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( | |||
| 3481 | MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, | |||
| 3482 | const DebugLoc &DL, bool RestoreSP) const { | |||
| 3483 | assert(STI.isTargetWindowsMSVC() && "funclets only supported in MSVC env")((void)0); | |||
| 3484 | assert(STI.isTargetWin32() && "EBP/ESI restoration only required on win32")((void)0); | |||
| 3485 | assert(STI.is32Bit() && !Uses64BitFramePtr &&((void)0) | |||
| 3486 | "restoring EBP/ESI on non-32-bit target")((void)0); | |||
| 3487 | ||||
| 3488 | MachineFunction &MF = *MBB.getParent(); | |||
| 3489 | Register FramePtr = TRI->getFrameRegister(MF); | |||
| 3490 | Register BasePtr = TRI->getBaseRegister(); | |||
| 3491 | WinEHFuncInfo &FuncInfo = *MF.getWinEHFuncInfo(); | |||
| 3492 | X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>(); | |||
| 3493 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 3494 | ||||
| 3495 | // FIXME: Don't set FrameSetup flag in catchret case. | |||
| 3496 | ||||
| 3497 | int FI = FuncInfo.EHRegNodeFrameIndex; | |||
| 3498 | int EHRegSize = MFI.getObjectSize(FI); | |||
| 3499 | ||||
| 3500 | if (RestoreSP) { | |||
| 3501 | // MOV32rm -EHRegSize(%ebp), %esp | |||
| 3502 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), X86::ESP), | |||
| 3503 | X86::EBP, true, -EHRegSize) | |||
| 3504 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 3505 | } | |||
| 3506 | ||||
| 3507 | Register UsedReg; | |||
| 3508 | int EHRegOffset = getFrameIndexReference(MF, FI, UsedReg).getFixed(); | |||
| 3509 | int EndOffset = -EHRegOffset - EHRegSize; | |||
| 3510 | FuncInfo.EHRegNodeEndOffset = EndOffset; | |||
| 3511 | ||||
| 3512 | if (UsedReg == FramePtr) { | |||
| 3513 | // ADD $offset, %ebp | |||
| 3514 | unsigned ADDri = getADDriOpcode(false, EndOffset); | |||
| 3515 | BuildMI(MBB, MBBI, DL, TII.get(ADDri), FramePtr) | |||
| 3516 | .addReg(FramePtr) | |||
| 3517 | .addImm(EndOffset) | |||
| 3518 | .setMIFlag(MachineInstr::FrameSetup) | |||
| 3519 | ->getOperand(3) | |||
| 3520 | .setIsDead(); | |||
| 3521 | assert(EndOffset >= 0 &&((void)0) | |||
| 3522 | "end of registration object above normal EBP position!")((void)0); | |||
| 3523 | } else if (UsedReg == BasePtr) { | |||
| 3524 | // LEA offset(%ebp), %esi | |||
| 3525 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::LEA32r), BasePtr), | |||
| 3526 | FramePtr, false, EndOffset) | |||
| 3527 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 3528 | // MOV32rm SavedEBPOffset(%esi), %ebp | |||
| 3529 | assert(X86FI->getHasSEHFramePtrSave())((void)0); | |||
| 3530 | int Offset = | |||
| 3531 | getFrameIndexReference(MF, X86FI->getSEHFramePtrSaveIndex(), UsedReg) | |||
| 3532 | .getFixed(); | |||
| 3533 | assert(UsedReg == BasePtr)((void)0); | |||
| 3534 | addRegOffset(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV32rm), FramePtr), | |||
| 3535 | UsedReg, true, Offset) | |||
| 3536 | .setMIFlag(MachineInstr::FrameSetup); | |||
| 3537 | } else { | |||
| 3538 | llvm_unreachable("32-bit frames with WinEH must use FramePtr or BasePtr")__builtin_unreachable(); | |||
| 3539 | } | |||
| 3540 | return MBBI; | |||
| 3541 | } | |||
| 3542 | ||||
| 3543 | int X86FrameLowering::getInitialCFAOffset(const MachineFunction &MF) const { | |||
| 3544 | return TRI->getSlotSize(); | |||
| 3545 | } | |||
| 3546 | ||||
| 3547 | Register | |||
| 3548 | X86FrameLowering::getInitialCFARegister(const MachineFunction &MF) const { | |||
| 3549 | return TRI->getDwarfRegNum(StackPtr, true); | |||
| 3550 | } | |||
| 3551 | ||||
| 3552 | namespace { | |||
| 3553 | // Struct used by orderFrameObjects to help sort the stack objects. | |||
| 3554 | struct X86FrameSortingObject { | |||
| 3555 | bool IsValid = false; // true if we care about this Object. | |||
| 3556 | unsigned ObjectIndex = 0; // Index of Object into MFI list. | |||
| 3557 | unsigned ObjectSize = 0; // Size of Object in bytes. | |||
| 3558 | Align ObjectAlignment = Align(1); // Alignment of Object in bytes. | |||
| 3559 | unsigned ObjectNumUses = 0; // Object static number of uses. | |||
| 3560 | }; | |||
| 3561 | ||||
| 3562 | // The comparison function we use for std::sort to order our local | |||
| 3563 | // stack symbols. The current algorithm is to use an estimated | |||
| 3564 | // "density". This takes into consideration the size and number of | |||
| 3565 | // uses each object has in order to roughly minimize code size. | |||
| 3566 | // So, for example, an object of size 16B that is referenced 5 times | |||
| 3567 | // will get higher priority than 4 4B objects referenced 1 time each. | |||
| 3568 | // It's not perfect and we may be able to squeeze a few more bytes out of | |||
| 3569 | // it (for example : 0(esp) requires fewer bytes, symbols allocated at the | |||
| 3570 | // fringe end can have special consideration, given their size is less | |||
| 3571 | // important, etc.), but the algorithmic complexity grows too much to be | |||
| 3572 | // worth the extra gains we get. This gets us pretty close. | |||
| 3573 | // The final order leaves us with objects with highest priority going | |||
| 3574 | // at the end of our list. | |||
| 3575 | struct X86FrameSortingComparator { | |||
| 3576 | inline bool operator()(const X86FrameSortingObject &A, | |||
| 3577 | const X86FrameSortingObject &B) const { | |||
| 3578 | uint64_t DensityAScaled, DensityBScaled; | |||
| 3579 | ||||
| 3580 | // For consistency in our comparison, all invalid objects are placed | |||
| 3581 | // at the end. This also allows us to stop walking when we hit the | |||
| 3582 | // first invalid item after it's all sorted. | |||
| 3583 | if (!A.IsValid) | |||
| 3584 | return false; | |||
| 3585 | if (!B.IsValid) | |||
| 3586 | return true; | |||
| 3587 | ||||
| 3588 | // The density is calculated by doing : | |||
| 3589 | // (double)DensityA = A.ObjectNumUses / A.ObjectSize | |||
| 3590 | // (double)DensityB = B.ObjectNumUses / B.ObjectSize | |||
| 3591 | // Since this approach may cause inconsistencies in | |||
| 3592 | // the floating point <, >, == comparisons, depending on the floating | |||
| 3593 | // point model with which the compiler was built, we're going | |||
| 3594 | // to scale both sides by multiplying with | |||
| 3595 | // A.ObjectSize * B.ObjectSize. This ends up factoring away | |||
| 3596 | // the division and, with it, the need for any floating point | |||
| 3597 | // arithmetic. | |||
| 3598 | DensityAScaled = static_cast<uint64_t>(A.ObjectNumUses) * | |||
| 3599 | static_cast<uint64_t>(B.ObjectSize); | |||
| 3600 | DensityBScaled = static_cast<uint64_t>(B.ObjectNumUses) * | |||
| 3601 | static_cast<uint64_t>(A.ObjectSize); | |||
| 3602 | ||||
| 3603 | // If the two densities are equal, prioritize highest alignment | |||
| 3604 | // objects. This allows for similar alignment objects | |||
| 3605 | // to be packed together (given the same density). | |||
| 3606 | // There's room for improvement here, also, since we can pack | |||
| 3607 | // similar alignment (different density) objects next to each | |||
| 3608 | // other to save padding. This will also require further | |||
| 3609 | // complexity/iterations, and the overall gain isn't worth it, | |||
| 3610 | // in general. Something to keep in mind, though. | |||
| 3611 | if (DensityAScaled == DensityBScaled) | |||
| 3612 | return A.ObjectAlignment < B.ObjectAlignment; | |||
| 3613 | ||||
| 3614 | return DensityAScaled < DensityBScaled; | |||
| 3615 | } | |||
| 3616 | }; | |||
| 3617 | } // namespace | |||
| 3618 | ||||
| 3619 | // Order the symbols in the local stack. | |||
| 3620 | // We want to place the local stack objects in some sort of sensible order. | |||
| 3621 | // The heuristic we use is to try and pack them according to static number | |||
| 3622 | // of uses and size of object in order to minimize code size. | |||
| 3623 | void X86FrameLowering::orderFrameObjects( | |||
| 3624 | const MachineFunction &MF, SmallVectorImpl<int> &ObjectsToAllocate) const { | |||
| 3625 | const MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 3626 | ||||
| 3627 | // Don't waste time if there's nothing to do. | |||
| 3628 | if (ObjectsToAllocate.empty()) | |||
| 3629 | return; | |||
| 3630 | ||||
| 3631 | // Create an array of all MFI objects. We won't need all of these | |||
| 3632 | // objects, but we're going to create a full array of them to make | |||
| 3633 | // it easier to index into when we're counting "uses" down below. | |||
| 3634 | // We want to be able to easily/cheaply access an object by simply | |||
| 3635 | // indexing into it, instead of having to search for it every time. | |||
| 3636 | std::vector<X86FrameSortingObject> SortingObjects(MFI.getObjectIndexEnd()); | |||
| 3637 | ||||
| 3638 | // Walk the objects we care about and mark them as such in our working | |||
| 3639 | // struct. | |||
| 3640 | for (auto &Obj : ObjectsToAllocate) { | |||
| 3641 | SortingObjects[Obj].IsValid = true; | |||
| 3642 | SortingObjects[Obj].ObjectIndex = Obj; | |||
| 3643 | SortingObjects[Obj].ObjectAlignment = MFI.getObjectAlign(Obj); | |||
| 3644 | // Set the size. | |||
| 3645 | int ObjectSize = MFI.getObjectSize(Obj); | |||
| 3646 | if (ObjectSize == 0) | |||
| 3647 | // Variable size. Just use 4. | |||
| 3648 | SortingObjects[Obj].ObjectSize = 4; | |||
| 3649 | else | |||
| 3650 | SortingObjects[Obj].ObjectSize = ObjectSize; | |||
| 3651 | } | |||
| 3652 | ||||
| 3653 | // Count the number of uses for each object. | |||
| 3654 | for (auto &MBB : MF) { | |||
| 3655 | for (auto &MI : MBB) { | |||
| 3656 | if (MI.isDebugInstr()) | |||
| 3657 | continue; | |||
| 3658 | for (const MachineOperand &MO : MI.operands()) { | |||
| 3659 | // Check to see if it's a local stack symbol. | |||
| 3660 | if (!MO.isFI()) | |||
| 3661 | continue; | |||
| 3662 | int Index = MO.getIndex(); | |||
| 3663 | // Check to see if it falls within our range, and is tagged | |||
| 3664 | // to require ordering. | |||
| 3665 | if (Index >= 0 && Index < MFI.getObjectIndexEnd() && | |||
| 3666 | SortingObjects[Index].IsValid) | |||
| 3667 | SortingObjects[Index].ObjectNumUses++; | |||
| 3668 | } | |||
| 3669 | } | |||
| 3670 | } | |||
| 3671 | ||||
| 3672 | // Sort the objects using X86FrameSortingAlgorithm (see its comment for | |||
| 3673 | // info). | |||
| 3674 | llvm::stable_sort(SortingObjects, X86FrameSortingComparator()); | |||
| 3675 | ||||
| 3676 | // Now modify the original list to represent the final order that | |||
| 3677 | // we want. The order will depend on whether we're going to access them | |||
| 3678 | // from the stack pointer or the frame pointer. For SP, the list should | |||
| 3679 | // end up with the END containing objects that we want with smaller offsets. | |||
| 3680 | // For FP, it should be flipped. | |||
| 3681 | int i = 0; | |||
| 3682 | for (auto &Obj : SortingObjects) { | |||
| 3683 | // All invalid items are sorted at the end, so it's safe to stop. | |||
| 3684 | if (!Obj.IsValid) | |||
| 3685 | break; | |||
| 3686 | ObjectsToAllocate[i++] = Obj.ObjectIndex; | |||
| 3687 | } | |||
| 3688 | ||||
| 3689 | // Flip it if we're accessing off of the FP. | |||
| 3690 | if (!TRI->hasStackRealignment(MF) && hasFP(MF)) | |||
| 3691 | std::reverse(ObjectsToAllocate.begin(), ObjectsToAllocate.end()); | |||
| 3692 | } | |||
| 3693 | ||||
| 3694 | ||||
| 3695 | unsigned X86FrameLowering::getWinEHParentFrameOffset(const MachineFunction &MF) const { | |||
| 3696 | // RDX, the parent frame pointer, is homed into 16(%rsp) in the prologue. | |||
| 3697 | unsigned Offset = 16; | |||
| 3698 | // RBP is immediately pushed. | |||
| 3699 | Offset += SlotSize; | |||
| 3700 | // All callee-saved registers are then pushed. | |||
| 3701 | Offset += MF.getInfo<X86MachineFunctionInfo>()->getCalleeSavedFrameSize(); | |||
| 3702 | // Every funclet allocates enough stack space for the largest outgoing call. | |||
| 3703 | Offset += getWinEHFuncletFrameSize(MF); | |||
| 3704 | return Offset; | |||
| 3705 | } | |||
| 3706 | ||||
| 3707 | void X86FrameLowering::processFunctionBeforeFrameFinalized( | |||
| 3708 | MachineFunction &MF, RegScavenger *RS) const { | |||
| 3709 | // Mark the function as not having WinCFI. We will set it back to true in | |||
| 3710 | // emitPrologue if it gets called and emits CFI. | |||
| 3711 | MF.setHasWinCFI(false); | |||
| 3712 | ||||
| 3713 | // If we are using Windows x64 CFI, ensure that the stack is always 8 byte | |||
| 3714 | // aligned. The format doesn't support misaligned stack adjustments. | |||
| 3715 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) | |||
| 3716 | MF.getFrameInfo().ensureMaxAlignment(Align(SlotSize)); | |||
| 3717 | ||||
| 3718 | // If this function isn't doing Win64-style C++ EH, we don't need to do | |||
| 3719 | // anything. | |||
| 3720 | if (STI.is64Bit() && MF.hasEHFunclets() && | |||
| 3721 | classifyEHPersonality(MF.getFunction().getPersonalityFn()) == | |||
| 3722 | EHPersonality::MSVC_CXX) { | |||
| 3723 | adjustFrameForMsvcCxxEh(MF); | |||
| 3724 | } | |||
| 3725 | } | |||
| 3726 | ||||
| 3727 | void X86FrameLowering::adjustFrameForMsvcCxxEh(MachineFunction &MF) const { | |||
| 3728 | // Win64 C++ EH needs to allocate the UnwindHelp object at some fixed offset | |||
| 3729 | // relative to RSP after the prologue. Find the offset of the last fixed | |||
| 3730 | // object, so that we can allocate a slot immediately following it. If there | |||
| 3731 | // were no fixed objects, use offset -SlotSize, which is immediately after the | |||
| 3732 | // return address. Fixed objects have negative frame indices. | |||
| 3733 | MachineFrameInfo &MFI = MF.getFrameInfo(); | |||
| 3734 | WinEHFuncInfo &EHInfo = *MF.getWinEHFuncInfo(); | |||
| 3735 | int64_t MinFixedObjOffset = -SlotSize; | |||
| 3736 | for (int I = MFI.getObjectIndexBegin(); I < 0; ++I) | |||
| 3737 | MinFixedObjOffset = std::min(MinFixedObjOffset, MFI.getObjectOffset(I)); | |||
| 3738 | ||||
| 3739 | for (WinEHTryBlockMapEntry &TBME : EHInfo.TryBlockMap) { | |||
| 3740 | for (WinEHHandlerType &H : TBME.HandlerArray) { | |||
| 3741 | int FrameIndex = H.CatchObj.FrameIndex; | |||
| 3742 | if (FrameIndex != INT_MAX2147483647) { | |||
| 3743 | // Ensure alignment. | |||
| 3744 | unsigned Align = MFI.getObjectAlign(FrameIndex).value(); | |||
| 3745 | MinFixedObjOffset -= std::abs(MinFixedObjOffset) % Align; | |||
| 3746 | MinFixedObjOffset -= MFI.getObjectSize(FrameIndex); | |||
| 3747 | MFI.setObjectOffset(FrameIndex, MinFixedObjOffset); | |||
| 3748 | } | |||
| 3749 | } | |||
| 3750 | } | |||
| 3751 | ||||
| 3752 | // Ensure alignment. | |||
| 3753 | MinFixedObjOffset -= std::abs(MinFixedObjOffset) % 8; | |||
| 3754 | int64_t UnwindHelpOffset = MinFixedObjOffset - SlotSize; | |||
| 3755 | int UnwindHelpFI = | |||
| 3756 | MFI.CreateFixedObject(SlotSize, UnwindHelpOffset, /*IsImmutable=*/false); | |||
| 3757 | EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; | |||
| 3758 | ||||
| 3759 | // Store -2 into UnwindHelp on function entry. We have to scan forwards past | |||
| 3760 | // other frame setup instructions. | |||
| 3761 | MachineBasicBlock &MBB = MF.front(); | |||
| 3762 | auto MBBI = MBB.begin(); | |||
| 3763 | while (MBBI != MBB.end() && MBBI->getFlag(MachineInstr::FrameSetup)) | |||
| 3764 | ++MBBI; | |||
| 3765 | ||||
| 3766 | DebugLoc DL = MBB.findDebugLoc(MBBI); | |||
| 3767 | addFrameReference(BuildMI(MBB, MBBI, DL, TII.get(X86::MOV64mi32)), | |||
| 3768 | UnwindHelpFI) | |||
| 3769 | .addImm(-2); | |||
| 3770 | } | |||
| 3771 | ||||
| 3772 | const ReturnProtectorLowering *X86FrameLowering::getReturnProtector() const { | |||
| 3773 | return &RPL; | |||
| 3774 | } | |||
| 3775 | ||||
| 3776 | void X86FrameLowering::processFunctionBeforeFrameIndicesReplaced( | |||
| 3777 | MachineFunction &MF, RegScavenger *RS) const { | |||
| 3778 | if (STI.is32Bit() && MF.hasEHFunclets()) | |||
| 3779 | restoreWinEHStackPointersInParent(MF); | |||
| 3780 | } | |||
| 3781 | ||||
| 3782 | void X86FrameLowering::restoreWinEHStackPointersInParent( | |||
| 3783 | MachineFunction &MF) const { | |||
| 3784 | // 32-bit functions have to restore stack pointers when control is transferred | |||
| 3785 | // back to the parent function. These blocks are identified as eh pads that | |||
| 3786 | // are not funclet entries. | |||
| 3787 | bool IsSEH = isAsynchronousEHPersonality( | |||
| 3788 | classifyEHPersonality(MF.getFunction().getPersonalityFn())); | |||
| 3789 | for (MachineBasicBlock &MBB : MF) { | |||
| 3790 | bool NeedsRestore = MBB.isEHPad() && !MBB.isEHFuncletEntry(); | |||
| 3791 | if (NeedsRestore) | |||
| 3792 | restoreWin32EHStackPointers(MBB, MBB.begin(), DebugLoc(), | |||
| 3793 | /*RestoreSP=*/IsSEH); | |||
| 3794 | } | |||
| 3795 | } |
| 1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains types to represent alignments. | |||
| 10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
| 11 | // invalid manipulations. | |||
| 12 | // | |||
| 13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
| 14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
| 15 | // | |||
| 16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
| 17 | // you can get the underlying Align type by using the getValue() method. | |||
| 18 | // | |||
| 19 | //===----------------------------------------------------------------------===// | |||
| 20 | ||||
| 21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 23 | ||||
| 24 | #include "llvm/ADT/Optional.h" | |||
| 25 | #include "llvm/Support/MathExtras.h" | |||
| 26 | #include <cassert> | |||
| 27 | #ifndef NDEBUG1 | |||
| 28 | #include <string> | |||
| 29 | #endif // NDEBUG | |||
| 30 | ||||
| 31 | namespace llvm { | |||
| 32 | ||||
| 33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
| 34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
| 35 | ||||
| 36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
| 37 | /// alignment. | |||
| 38 | /// It is suitable for use as static global constants. | |||
| 39 | struct Align { | |||
| 40 | private: | |||
| 41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
| 42 | /// ShiftValue is less than 64 by construction. | |||
| 43 | ||||
| 44 | friend struct MaybeAlign; | |||
| 45 | friend unsigned Log2(Align); | |||
| 46 | friend bool operator==(Align Lhs, Align Rhs); | |||
| 47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
| 48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
| 49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
| 50 | friend bool operator<(Align Lhs, Align Rhs); | |||
| 51 | friend bool operator>(Align Lhs, Align Rhs); | |||
| 52 | friend unsigned encode(struct MaybeAlign A); | |||
| 53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
| 54 | ||||
| 55 | /// A trivial type to allow construction of constexpr Align. | |||
| 56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
| 57 | /// definition of constexpr assign operators. | |||
| 58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
| 59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
| 60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
| 61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
| 62 | struct LogValue { | |||
| 63 | uint8_t Log; | |||
| 64 | }; | |||
| 65 | ||||
| 66 | public: | |||
| 67 | /// Default is byte-aligned. | |||
| 68 | constexpr Align() = default; | |||
| 69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 70 | /// checks have been performed when building `Other`. | |||
| 71 | constexpr Align(const Align &Other) = default; | |||
| 72 | constexpr Align(Align &&Other) = default; | |||
| 73 | Align &operator=(const Align &Other) = default; | |||
| 74 | Align &operator=(Align &&Other) = default; | |||
| 75 | ||||
| 76 | explicit Align(uint64_t Value) { | |||
| 77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
| 78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
| 79 | ShiftValue = Log2_64(Value); | |||
| 80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
| 81 | } | |||
| 82 | ||||
| 83 | /// This is a hole in the type system and should not be abused. | |||
| 84 | /// Needed to interact with C for instance. | |||
| 85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
| 86 | ||||
| 87 | /// Allow constructions of constexpr Align. | |||
| 88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
| 89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
| 90 | } | |||
| 91 | ||||
| 92 | /// Allow constructions of constexpr Align from types. | |||
| 93 | /// Compile time equivalent to Align(alignof(T)). | |||
| 94 | template <typename T> constexpr static LogValue Of() { | |||
| 95 | return Constant<std::alignment_of<T>::value>(); | |||
| 96 | } | |||
| 97 | ||||
| 98 | /// Constexpr constructor from LogValue type. | |||
| 99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
| 100 | }; | |||
| 101 | ||||
| 102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
| 103 | inline Align assumeAligned(uint64_t Value) { | |||
| 104 | return Value ? Align(Value) : Align(); | |||
| 105 | } | |||
| 106 | ||||
| 107 | /// This struct is a compact representation of a valid (power of two) or | |||
| 108 | /// undefined (0) alignment. | |||
| 109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
| 110 | private: | |||
| 111 | using UP = llvm::Optional<Align>; | |||
| 112 | ||||
| 113 | public: | |||
| 114 | /// Default is undefined. | |||
| 115 | MaybeAlign() = default; | |||
| 116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 117 | /// checks have been performed when building `Other`. | |||
| 118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
| 119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
| 120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
| 121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
| 122 | ||||
| 123 | /// Use llvm::Optional<Align> constructor. | |||
| 124 | using UP::UP; | |||
| 125 | ||||
| 126 | explicit MaybeAlign(uint64_t Value) { | |||
| 127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
| 128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
| 129 | if (Value) | |||
| 130 | emplace(Value); | |||
| 131 | } | |||
| 132 | ||||
| 133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
| 134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
| 135 | }; | |||
| 136 | ||||
| 137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
| 138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
| 139 | return SizeInBytes % Lhs.value() == 0; | |||
| 140 | } | |||
| 141 | ||||
| 142 | /// Checks that Addr is a multiple of the alignment. | |||
| 143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
| 144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
| 145 | } | |||
| 146 | ||||
| 147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
| 149 | const uint64_t Value = A.value(); | |||
| 150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
| 151 | ||||
| 152 | // The division followed by a multiplication can be thought of as a right | |||
| 153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
| 154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
| 155 | // are just zero. | |||
| 156 | ||||
| 157 | // Most compilers can generate this code but the pattern may be missed when | |||
| 158 | // multiple functions gets inlined. | |||
| 159 | return (Size + Value - 1) & ~(Value - 1U); | |||
| 160 | } | |||
| 161 | ||||
| 162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
| 163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
| 164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
| 165 | /// Skew mod \p A'. | |||
| 166 | /// | |||
| 167 | /// Examples: | |||
| 168 | /// \code | |||
| 169 | /// alignTo(5, Align(8), 7) = 7 | |||
| 170 | /// alignTo(17, Align(8), 1) = 17 | |||
| 171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
| 172 | /// \endcode | |||
| 173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
| 174 | const uint64_t Value = A.value(); | |||
| 175 | Skew %= Value; | |||
| 176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
| 177 | } | |||
| 178 | ||||
| 179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 180 | /// Returns `Size` if current alignment is undefined. | |||
| 181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
| 182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
| 183 | } | |||
| 184 | ||||
| 185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
| 186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
| 187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
| 188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
| 189 | ArithAddr &&((void)0) | |||
| 190 | "Overflow")((void)0); | |||
| 191 | return alignTo(ArithAddr, Alignment); | |||
| 192 | } | |||
| 193 | ||||
| 194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
| 195 | /// or equal to \p Value and is a multiple of \p Align. | |||
| 196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
| 197 | return alignTo(Value, Alignment) - Value; | |||
| 198 | } | |||
| 199 | ||||
| 200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
| 201 | /// bytes, rounding up. | |||
| 202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
| 203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
| 204 | } | |||
| 205 | ||||
| 206 | /// Returns the log2 of the alignment. | |||
| 207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
| 208 | ||||
| 209 | /// Returns the alignment that satisfies both alignments. | |||
| 210 | /// Same semantic as MinAlign. | |||
| 211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
| 212 | ||||
| 213 | /// Returns the alignment that satisfies both alignments. | |||
| 214 | /// Same semantic as MinAlign. | |||
| 215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
| 216 | return Align(MinAlign(A.value(), Offset)); | |||
| 217 | } | |||
| 218 | ||||
| 219 | /// Returns the alignment that satisfies both alignments. | |||
| 220 | /// Same semantic as MinAlign. | |||
| 221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
| 222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
| 223 | } | |||
| 224 | ||||
| 225 | /// Returns the alignment that satisfies both alignments. | |||
| 226 | /// Same semantic as MinAlign. | |||
| 227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
| 228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
| 229 | } | |||
| 230 | ||||
| 231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
| 232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
| 233 | ||||
| 234 | /// Dual operation of the encode function above. | |||
| 235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
| 236 | if (Value == 0) | |||
| 237 | return MaybeAlign(); | |||
| 238 | Align Out; | |||
| 239 | Out.ShiftValue = Value - 1; | |||
| 240 | return Out; | |||
| 241 | } | |||
| 242 | ||||
| 243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
| 244 | /// definition. | |||
| 245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
| 246 | ||||
| 247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
| 248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
| 249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 250 | return Lhs.value() == Rhs; | |||
| 251 | } | |||
| 252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
| 253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 254 | return Lhs.value() != Rhs; | |||
| 255 | } | |||
| 256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
| 257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 258 | return Lhs.value() <= Rhs; | |||
| 259 | } | |||
| 260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
| 261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 262 | return Lhs.value() >= Rhs; | |||
| 263 | } | |||
| 264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
| 265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 266 | return Lhs.value() < Rhs; | |||
| 267 | } | |||
| 268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
| 269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 270 | return Lhs.value() > Rhs; | |||
| 271 | } | |||
| 272 | ||||
| 273 | /// Comparisons between MaybeAlign and scalars. | |||
| 274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
| 276 | } | |||
| 277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
| 279 | } | |||
| 280 | ||||
| 281 | /// Comparisons operators between Align. | |||
| 282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
| 283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
| 284 | } | |||
| 285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
| 286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
| 287 | } | |||
| 288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
| 289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
| 290 | } | |||
| 291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
| 292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
| 293 | } | |||
| 294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
| 295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
| 296 | } | |||
| 297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
| 298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
| 299 | } | |||
| 300 | ||||
| 301 | // Don't allow relational comparisons with MaybeAlign. | |||
| 302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 306 | ||||
| 307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 311 | ||||
| 312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 316 | ||||
| 317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
| 318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 319 | return Align(Lhs.value() * Rhs); | |||
| 320 | } | |||
| 321 | ||||
| 322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
| 325 | } | |||
| 326 | ||||
| 327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
| 328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 329 | "Divisor must be positive and a power of 2")((void)0); | |||
| 330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
| 331 | return Align(Lhs.value() / Divisor); | |||
| 332 | } | |||
| 333 | ||||
| 334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
| 335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 336 | "Divisor must be positive and a power of 2")((void)0); | |||
| 337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
| 338 | } | |||
| 339 | ||||
| 340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
| 341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
| 342 | } | |||
| 343 | ||||
| 344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
| 345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
| 346 | } | |||
| 347 | ||||
| 348 | #ifndef NDEBUG1 | |||
| 349 | // For usage in LLVM_DEBUG macros. | |||
| 350 | inline std::string DebugStr(const Align &A) { | |||
| 351 | return std::to_string(A.value()); | |||
| 352 | } | |||
| 353 | // For usage in LLVM_DEBUG macros. | |||
| 354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
| 355 | if (MA) | |||
| 356 | return std::to_string(MA->value()); | |||
| 357 | return "None"; | |||
| 358 | } | |||
| 359 | #endif // NDEBUG | |||
| 360 | ||||
| 361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
| 362 | ||||
| 363 | } // namespace llvm | |||
| 364 | ||||
| 365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |