| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/CodeGen/SelectionDAGNodes.h |
| Warning: | line 1110, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// | ||||
| 2 | // | ||||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
| 6 | // | ||||
| 7 | //==-----------------------------------------------------------------------===// | ||||
| 8 | // | ||||
| 9 | /// \file | ||||
| 10 | /// Defines an instruction selector for the AMDGPU target. | ||||
| 11 | // | ||||
| 12 | //===----------------------------------------------------------------------===// | ||||
| 13 | |||||
| 14 | #include "AMDGPU.h" | ||||
| 15 | #include "AMDGPUTargetMachine.h" | ||||
| 16 | #include "SIMachineFunctionInfo.h" | ||||
| 17 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" | ||||
| 18 | #include "llvm/Analysis/ValueTracking.h" | ||||
| 19 | #include "llvm/CodeGen/FunctionLoweringInfo.h" | ||||
| 20 | #include "llvm/CodeGen/SelectionDAG.h" | ||||
| 21 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||
| 22 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
| 23 | #include "llvm/IR/IntrinsicsAMDGPU.h" | ||||
| 24 | #include "llvm/InitializePasses.h" | ||||
| 25 | |||||
| 26 | #ifdef EXPENSIVE_CHECKS | ||||
| 27 | #include "llvm/Analysis/LoopInfo.h" | ||||
| 28 | #include "llvm/IR/Dominators.h" | ||||
| 29 | #endif | ||||
| 30 | |||||
| 31 | #define DEBUG_TYPE"isel" "isel" | ||||
| 32 | |||||
| 33 | using namespace llvm; | ||||
| 34 | |||||
| 35 | namespace llvm { | ||||
| 36 | |||||
| 37 | class R600InstrInfo; | ||||
| 38 | |||||
| 39 | } // end namespace llvm | ||||
| 40 | |||||
| 41 | //===----------------------------------------------------------------------===// | ||||
| 42 | // Instruction Selector Implementation | ||||
| 43 | //===----------------------------------------------------------------------===// | ||||
| 44 | |||||
| 45 | namespace { | ||||
| 46 | |||||
| 47 | static bool isNullConstantOrUndef(SDValue V) { | ||||
| 48 | if (V.isUndef()) | ||||
| 49 | return true; | ||||
| 50 | |||||
| 51 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); | ||||
| 52 | return Const != nullptr && Const->isNullValue(); | ||||
| 53 | } | ||||
| 54 | |||||
| 55 | static bool getConstantValue(SDValue N, uint32_t &Out) { | ||||
| 56 | // This is only used for packed vectors, where ussing 0 for undef should | ||||
| 57 | // always be good. | ||||
| 58 | if (N.isUndef()) { | ||||
| 59 | Out = 0; | ||||
| 60 | return true; | ||||
| 61 | } | ||||
| 62 | |||||
| 63 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { | ||||
| 64 | Out = C->getAPIntValue().getSExtValue(); | ||||
| 65 | return true; | ||||
| 66 | } | ||||
| 67 | |||||
| 68 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { | ||||
| 69 | Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); | ||||
| 70 | return true; | ||||
| 71 | } | ||||
| 72 | |||||
| 73 | return false; | ||||
| 74 | } | ||||
| 75 | |||||
| 76 | // TODO: Handle undef as zero | ||||
| 77 | static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, | ||||
| 78 | bool Negate = false) { | ||||
| 79 | assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2)((void)0); | ||||
| 80 | uint32_t LHSVal, RHSVal; | ||||
| 81 | if (getConstantValue(N->getOperand(0), LHSVal) && | ||||
| 82 | getConstantValue(N->getOperand(1), RHSVal)) { | ||||
| 83 | SDLoc SL(N); | ||||
| 84 | uint32_t K = Negate ? | ||||
| 85 | (-LHSVal & 0xffff) | (-RHSVal << 16) : | ||||
| 86 | (LHSVal & 0xffff) | (RHSVal << 16); | ||||
| 87 | return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), | ||||
| 88 | DAG.getTargetConstant(K, SL, MVT::i32)); | ||||
| 89 | } | ||||
| 90 | |||||
| 91 | return nullptr; | ||||
| 92 | } | ||||
| 93 | |||||
| 94 | static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { | ||||
| 95 | return packConstantV2I16(N, DAG, true); | ||||
| 96 | } | ||||
| 97 | |||||
| 98 | /// AMDGPU specific code to select AMDGPU machine instructions for | ||||
| 99 | /// SelectionDAG operations. | ||||
| 100 | class AMDGPUDAGToDAGISel : public SelectionDAGISel { | ||||
| 101 | // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can | ||||
| 102 | // make the right decision when generating code for different targets. | ||||
| 103 | const GCNSubtarget *Subtarget; | ||||
| 104 | |||||
| 105 | // Default FP mode for the current function. | ||||
| 106 | AMDGPU::SIModeRegisterDefaults Mode; | ||||
| 107 | |||||
| 108 | bool EnableLateStructurizeCFG; | ||||
| 109 | |||||
| 110 | // Instructions that will be lowered with a final instruction that zeros the | ||||
| 111 | // high result bits. | ||||
| 112 | bool fp16SrcZerosHighBits(unsigned Opc) const; | ||||
| 113 | |||||
| 114 | public: | ||||
| 115 | explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, | ||||
| 116 | CodeGenOpt::Level OptLevel = CodeGenOpt::Default) | ||||
| 117 | : SelectionDAGISel(*TM, OptLevel) { | ||||
| 118 | EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; | ||||
| 119 | } | ||||
| 120 | ~AMDGPUDAGToDAGISel() override = default; | ||||
| 121 | |||||
| 122 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||
| 123 | AU.addRequired<AMDGPUArgumentUsageInfo>(); | ||||
| 124 | AU.addRequired<LegacyDivergenceAnalysis>(); | ||||
| 125 | #ifdef EXPENSIVE_CHECKS | ||||
| 126 | AU.addRequired<DominatorTreeWrapperPass>(); | ||||
| 127 | AU.addRequired<LoopInfoWrapperPass>(); | ||||
| 128 | #endif | ||||
| 129 | SelectionDAGISel::getAnalysisUsage(AU); | ||||
| 130 | } | ||||
| 131 | |||||
| 132 | bool matchLoadD16FromBuildVector(SDNode *N) const; | ||||
| 133 | |||||
| 134 | bool runOnMachineFunction(MachineFunction &MF) override; | ||||
| 135 | void PreprocessISelDAG() override; | ||||
| 136 | void Select(SDNode *N) override; | ||||
| 137 | StringRef getPassName() const override; | ||||
| 138 | void PostprocessISelDAG() override; | ||||
| 139 | |||||
| 140 | protected: | ||||
| 141 | void SelectBuildVector(SDNode *N, unsigned RegClassID); | ||||
| 142 | |||||
| 143 | private: | ||||
| 144 | std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; | ||||
| 145 | bool isNoNanSrc(SDValue N) const; | ||||
| 146 | bool isInlineImmediate(const SDNode *N, bool Negated = false) const; | ||||
| 147 | bool isNegInlineImmediate(const SDNode *N) const { | ||||
| 148 | return isInlineImmediate(N, true); | ||||
| 149 | } | ||||
| 150 | |||||
| 151 | bool isInlineImmediate16(int64_t Imm) const { | ||||
| 152 | return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); | ||||
| 153 | } | ||||
| 154 | |||||
| 155 | bool isInlineImmediate32(int64_t Imm) const { | ||||
| 156 | return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); | ||||
| 157 | } | ||||
| 158 | |||||
| 159 | bool isInlineImmediate64(int64_t Imm) const { | ||||
| 160 | return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); | ||||
| 161 | } | ||||
| 162 | |||||
| 163 | bool isInlineImmediate(const APFloat &Imm) const { | ||||
| 164 | return Subtarget->getInstrInfo()->isInlineConstant(Imm); | ||||
| 165 | } | ||||
| 166 | |||||
| 167 | bool isVGPRImm(const SDNode *N) const; | ||||
| 168 | bool isUniformLoad(const SDNode *N) const; | ||||
| 169 | bool isUniformBr(const SDNode *N) const; | ||||
| 170 | |||||
| 171 | bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, | ||||
| 172 | SDValue &RHS) const; | ||||
| 173 | |||||
| 174 | MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; | ||||
| 175 | |||||
| 176 | SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; | ||||
| 177 | SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; | ||||
| 178 | SDNode *glueCopyToM0LDSInit(SDNode *N) const; | ||||
| 179 | |||||
| 180 | const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; | ||||
| 181 | virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); | ||||
| 182 | virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); | ||||
| 183 | bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; | ||||
| 184 | bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, | ||||
| 185 | unsigned Size) const; | ||||
| 186 | bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; | ||||
| 187 | bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, | ||||
| 188 | SDValue &Offset1) const; | ||||
| 189 | bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, | ||||
| 190 | SDValue &Offset1) const; | ||||
| 191 | bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, | ||||
| 192 | SDValue &Offset1, unsigned Size) const; | ||||
| 193 | bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, | ||||
| 194 | SDValue &SOffset, SDValue &Offset, SDValue &Offen, | ||||
| 195 | SDValue &Idxen, SDValue &Addr64) const; | ||||
| 196 | bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, | ||||
| 197 | SDValue &SOffset, SDValue &Offset) const; | ||||
| 198 | bool SelectMUBUFScratchOffen(SDNode *Parent, | ||||
| 199 | SDValue Addr, SDValue &RSrc, SDValue &VAddr, | ||||
| 200 | SDValue &SOffset, SDValue &ImmOffset) const; | ||||
| 201 | bool SelectMUBUFScratchOffset(SDNode *Parent, | ||||
| 202 | SDValue Addr, SDValue &SRsrc, SDValue &Soffset, | ||||
| 203 | SDValue &Offset) const; | ||||
| 204 | |||||
| 205 | bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, | ||||
| 206 | SDValue &Offset) const; | ||||
| 207 | |||||
| 208 | bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
| 209 | SDValue &Offset, uint64_t FlatVariant) const; | ||||
| 210 | bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
| 211 | SDValue &Offset) const; | ||||
| 212 | bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
| 213 | SDValue &Offset) const; | ||||
| 214 | bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
| 215 | SDValue &Offset) const; | ||||
| 216 | bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, | ||||
| 217 | SDValue &VOffset, SDValue &Offset) const; | ||||
| 218 | bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, | ||||
| 219 | SDValue &Offset) const; | ||||
| 220 | |||||
| 221 | bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, | ||||
| 222 | bool &Imm) const; | ||||
| 223 | SDValue Expand32BitAddress(SDValue Addr) const; | ||||
| 224 | bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, | ||||
| 225 | bool &Imm) const; | ||||
| 226 | bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | ||||
| 227 | bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | ||||
| 228 | bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | ||||
| 229 | bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; | ||||
| 230 | bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; | ||||
| 231 | bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; | ||||
| 232 | |||||
| 233 | bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 234 | bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, | ||||
| 235 | bool AllowAbs = true) const; | ||||
| 236 | bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 237 | bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 238 | bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; | ||||
| 239 | bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, | ||||
| 240 | SDValue &Clamp, SDValue &Omod) const; | ||||
| 241 | bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, | ||||
| 242 | SDValue &Clamp, SDValue &Omod) const; | ||||
| 243 | bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, | ||||
| 244 | SDValue &Clamp, SDValue &Omod) const; | ||||
| 245 | |||||
| 246 | bool SelectVOP3OMods(SDValue In, SDValue &Src, | ||||
| 247 | SDValue &Clamp, SDValue &Omod) const; | ||||
| 248 | |||||
| 249 | bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 250 | |||||
| 251 | bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 252 | |||||
| 253 | bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 254 | bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; | ||||
| 255 | bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
| 256 | |||||
| 257 | SDValue getHi16Elt(SDValue In) const; | ||||
| 258 | |||||
| 259 | SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; | ||||
| 260 | |||||
| 261 | void SelectADD_SUB_I64(SDNode *N); | ||||
| 262 | void SelectAddcSubb(SDNode *N); | ||||
| 263 | void SelectUADDO_USUBO(SDNode *N); | ||||
| 264 | void SelectDIV_SCALE(SDNode *N); | ||||
| 265 | void SelectMAD_64_32(SDNode *N); | ||||
| 266 | void SelectFMA_W_CHAIN(SDNode *N); | ||||
| 267 | void SelectFMUL_W_CHAIN(SDNode *N); | ||||
| 268 | |||||
| 269 | SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, | ||||
| 270 | uint32_t Offset, uint32_t Width); | ||||
| 271 | void SelectS_BFEFromShifts(SDNode *N); | ||||
| 272 | void SelectS_BFE(SDNode *N); | ||||
| 273 | bool isCBranchSCC(const SDNode *N) const; | ||||
| 274 | void SelectBRCOND(SDNode *N); | ||||
| 275 | void SelectFMAD_FMA(SDNode *N); | ||||
| 276 | void SelectATOMIC_CMP_SWAP(SDNode *N); | ||||
| 277 | void SelectDSAppendConsume(SDNode *N, unsigned IntrID); | ||||
| 278 | void SelectDS_GWS(SDNode *N, unsigned IntrID); | ||||
| 279 | void SelectInterpP1F16(SDNode *N); | ||||
| 280 | void SelectINTRINSIC_W_CHAIN(SDNode *N); | ||||
| 281 | void SelectINTRINSIC_WO_CHAIN(SDNode *N); | ||||
| 282 | void SelectINTRINSIC_VOID(SDNode *N); | ||||
| 283 | |||||
| 284 | protected: | ||||
| 285 | // Include the pieces autogenerated from the target description. | ||||
| 286 | #include "AMDGPUGenDAGISel.inc" | ||||
| 287 | }; | ||||
| 288 | |||||
| 289 | class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { | ||||
| 290 | const R600Subtarget *Subtarget; | ||||
| 291 | |||||
| 292 | bool isConstantLoad(const MemSDNode *N, int cbID) const; | ||||
| 293 | bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); | ||||
| 294 | bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, | ||||
| 295 | SDValue& Offset); | ||||
| 296 | public: | ||||
| 297 | explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : | ||||
| 298 | AMDGPUDAGToDAGISel(TM, OptLevel) {} | ||||
| 299 | |||||
| 300 | void Select(SDNode *N) override; | ||||
| 301 | |||||
| 302 | bool SelectADDRIndirect(SDValue Addr, SDValue &Base, | ||||
| 303 | SDValue &Offset) override; | ||||
| 304 | bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||||
| 305 | SDValue &Offset) override; | ||||
| 306 | |||||
| 307 | bool runOnMachineFunction(MachineFunction &MF) override; | ||||
| 308 | |||||
| 309 | void PreprocessISelDAG() override {} | ||||
| 310 | |||||
| 311 | protected: | ||||
| 312 | // Include the pieces autogenerated from the target description. | ||||
| 313 | #include "R600GenDAGISel.inc" | ||||
| 314 | }; | ||||
| 315 | |||||
| 316 | static SDValue stripBitcast(SDValue Val) { | ||||
| 317 | return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; | ||||
| 318 | } | ||||
| 319 | |||||
| 320 | // Figure out if this is really an extract of the high 16-bits of a dword. | ||||
| 321 | static bool isExtractHiElt(SDValue In, SDValue &Out) { | ||||
| 322 | In = stripBitcast(In); | ||||
| 323 | |||||
| 324 | if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | ||||
| 325 | if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) { | ||||
| 326 | if (!Idx->isOne()) | ||||
| 327 | return false; | ||||
| 328 | Out = In.getOperand(0); | ||||
| 329 | return true; | ||||
| 330 | } | ||||
| 331 | } | ||||
| 332 | |||||
| 333 | if (In.getOpcode() != ISD::TRUNCATE) | ||||
| 334 | return false; | ||||
| 335 | |||||
| 336 | SDValue Srl = In.getOperand(0); | ||||
| 337 | if (Srl.getOpcode() == ISD::SRL) { | ||||
| 338 | if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { | ||||
| 339 | if (ShiftAmt->getZExtValue() == 16) { | ||||
| 340 | Out = stripBitcast(Srl.getOperand(0)); | ||||
| 341 | return true; | ||||
| 342 | } | ||||
| 343 | } | ||||
| 344 | } | ||||
| 345 | |||||
| 346 | return false; | ||||
| 347 | } | ||||
| 348 | |||||
| 349 | // Look through operations that obscure just looking at the low 16-bits of the | ||||
| 350 | // same register. | ||||
| 351 | static SDValue stripExtractLoElt(SDValue In) { | ||||
| 352 | if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | ||||
| 353 | if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) { | ||||
| 354 | if (Idx->isNullValue() && In.getValueSizeInBits() <= 32) | ||||
| 355 | return In.getOperand(0); | ||||
| 356 | } | ||||
| 357 | } | ||||
| 358 | |||||
| 359 | if (In.getOpcode() == ISD::TRUNCATE) { | ||||
| 360 | SDValue Src = In.getOperand(0); | ||||
| 361 | if (Src.getValueType().getSizeInBits() == 32) | ||||
| 362 | return stripBitcast(Src); | ||||
| 363 | } | ||||
| 364 | |||||
| 365 | return In; | ||||
| 366 | } | ||||
| 367 | |||||
| 368 | } // end anonymous namespace | ||||
| 369 | |||||
| 370 | INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry &Registry) { | ||||
| 371 | "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry &Registry) { | ||||
| 372 | INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)initializeAMDGPUArgumentUsageInfoPass(Registry); | ||||
| 373 | INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)initializeAMDGPUPerfHintAnalysisPass(Registry); | ||||
| 374 | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)initializeLegacyDivergenceAnalysisPass(Registry); | ||||
| 375 | #ifdef EXPENSIVE_CHECKS | ||||
| 376 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | ||||
| 377 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | ||||
| 378 | #endif | ||||
| 379 | INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection" , "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass (PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag , initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); } | ||||
| 380 | "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection" , "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass (PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag , initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); } | ||||
| 381 | |||||
| 382 | /// This pass converts a legalized DAG into a AMDGPU-specific | ||||
| 383 | // DAG, ready for instruction scheduling. | ||||
| 384 | FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, | ||||
| 385 | CodeGenOpt::Level OptLevel) { | ||||
| 386 | return new AMDGPUDAGToDAGISel(TM, OptLevel); | ||||
| 387 | } | ||||
| 388 | |||||
| 389 | /// This pass converts a legalized DAG into a R600-specific | ||||
| 390 | // DAG, ready for instruction scheduling. | ||||
| 391 | FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, | ||||
| 392 | CodeGenOpt::Level OptLevel) { | ||||
| 393 | return new R600DAGToDAGISel(TM, OptLevel); | ||||
| 394 | } | ||||
| 395 | |||||
| 396 | bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | ||||
| 397 | #ifdef EXPENSIVE_CHECKS | ||||
| 398 | DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | ||||
| 399 | LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | ||||
| 400 | for (auto &L : LI->getLoopsInPreorder()) { | ||||
| 401 | assert(L->isLCSSAForm(DT))((void)0); | ||||
| 402 | } | ||||
| 403 | #endif | ||||
| 404 | Subtarget = &MF.getSubtarget<GCNSubtarget>(); | ||||
| 405 | Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction()); | ||||
| 406 | return SelectionDAGISel::runOnMachineFunction(MF); | ||||
| 407 | } | ||||
| 408 | |||||
| 409 | bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const { | ||||
| 410 | // XXX - only need to list legal operations. | ||||
| 411 | switch (Opc) { | ||||
| 412 | case ISD::FADD: | ||||
| 413 | case ISD::FSUB: | ||||
| 414 | case ISD::FMUL: | ||||
| 415 | case ISD::FDIV: | ||||
| 416 | case ISD::FREM: | ||||
| 417 | case ISD::FCANONICALIZE: | ||||
| 418 | case ISD::UINT_TO_FP: | ||||
| 419 | case ISD::SINT_TO_FP: | ||||
| 420 | case ISD::FABS: | ||||
| 421 | // Fabs is lowered to a bit operation, but it's an and which will clear the | ||||
| 422 | // high bits anyway. | ||||
| 423 | case ISD::FSQRT: | ||||
| 424 | case ISD::FSIN: | ||||
| 425 | case ISD::FCOS: | ||||
| 426 | case ISD::FPOWI: | ||||
| 427 | case ISD::FPOW: | ||||
| 428 | case ISD::FLOG: | ||||
| 429 | case ISD::FLOG2: | ||||
| 430 | case ISD::FLOG10: | ||||
| 431 | case ISD::FEXP: | ||||
| 432 | case ISD::FEXP2: | ||||
| 433 | case ISD::FCEIL: | ||||
| 434 | case ISD::FTRUNC: | ||||
| 435 | case ISD::FRINT: | ||||
| 436 | case ISD::FNEARBYINT: | ||||
| 437 | case ISD::FROUND: | ||||
| 438 | case ISD::FFLOOR: | ||||
| 439 | case ISD::FMINNUM: | ||||
| 440 | case ISD::FMAXNUM: | ||||
| 441 | case AMDGPUISD::FRACT: | ||||
| 442 | case AMDGPUISD::CLAMP: | ||||
| 443 | case AMDGPUISD::COS_HW: | ||||
| 444 | case AMDGPUISD::SIN_HW: | ||||
| 445 | case AMDGPUISD::FMIN3: | ||||
| 446 | case AMDGPUISD::FMAX3: | ||||
| 447 | case AMDGPUISD::FMED3: | ||||
| 448 | case AMDGPUISD::FMAD_FTZ: | ||||
| 449 | case AMDGPUISD::RCP: | ||||
| 450 | case AMDGPUISD::RSQ: | ||||
| 451 | case AMDGPUISD::RCP_IFLAG: | ||||
| 452 | case AMDGPUISD::LDEXP: | ||||
| 453 | // On gfx10, all 16-bit instructions preserve the high bits. | ||||
| 454 | return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9; | ||||
| 455 | case ISD::FP_ROUND: | ||||
| 456 | // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the | ||||
| 457 | // high bits on gfx9. | ||||
| 458 | // TODO: If we had the source node we could see if the source was fma/mad | ||||
| 459 | return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; | ||||
| 460 | case ISD::FMA: | ||||
| 461 | case ISD::FMAD: | ||||
| 462 | case AMDGPUISD::DIV_FIXUP: | ||||
| 463 | return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; | ||||
| 464 | default: | ||||
| 465 | // fcopysign, select and others may be lowered to 32-bit bit operations | ||||
| 466 | // which don't zero the high bits. | ||||
| 467 | return false; | ||||
| 468 | } | ||||
| 469 | } | ||||
| 470 | |||||
| 471 | bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { | ||||
| 472 | assert(Subtarget->d16PreservesUnusedBits())((void)0); | ||||
| 473 | MVT VT = N->getValueType(0).getSimpleVT(); | ||||
| 474 | if (VT != MVT::v2i16 && VT != MVT::v2f16) | ||||
| 475 | return false; | ||||
| 476 | |||||
| 477 | SDValue Lo = N->getOperand(0); | ||||
| 478 | SDValue Hi = N->getOperand(1); | ||||
| 479 | |||||
| 480 | LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi)); | ||||
| 481 | |||||
| 482 | // build_vector lo, (load ptr) -> load_d16_hi ptr, lo | ||||
| 483 | // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo | ||||
| 484 | // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo | ||||
| 485 | |||||
| 486 | // Need to check for possible indirect dependencies on the other half of the | ||||
| 487 | // vector to avoid introducing a cycle. | ||||
| 488 | if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) { | ||||
| 489 | SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); | ||||
| 490 | |||||
| 491 | SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo); | ||||
| 492 | SDValue Ops[] = { | ||||
| 493 | LdHi->getChain(), LdHi->getBasePtr(), TiedIn | ||||
| 494 | }; | ||||
| 495 | |||||
| 496 | unsigned LoadOp = AMDGPUISD::LOAD_D16_HI; | ||||
| 497 | if (LdHi->getMemoryVT() == MVT::i8) { | ||||
| 498 | LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ? | ||||
| 499 | AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8; | ||||
| 500 | } else { | ||||
| 501 | assert(LdHi->getMemoryVT() == MVT::i16)((void)0); | ||||
| 502 | } | ||||
| 503 | |||||
| 504 | SDValue NewLoadHi = | ||||
| 505 | CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList, | ||||
| 506 | Ops, LdHi->getMemoryVT(), | ||||
| 507 | LdHi->getMemOperand()); | ||||
| 508 | |||||
| 509 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi); | ||||
| 510 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1)); | ||||
| 511 | return true; | ||||
| 512 | } | ||||
| 513 | |||||
| 514 | // build_vector (load ptr), hi -> load_d16_lo ptr, hi | ||||
| 515 | // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi | ||||
| 516 | // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi | ||||
| 517 | LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo)); | ||||
| 518 | if (LdLo
| ||||
| 519 | SDValue TiedIn = getHi16Elt(Hi); | ||||
| 520 | if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode())) | ||||
| 521 | return false; | ||||
| 522 | |||||
| 523 | SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); | ||||
| 524 | unsigned LoadOp = AMDGPUISD::LOAD_D16_LO; | ||||
| 525 | if (LdLo->getMemoryVT() == MVT::i8) { | ||||
| 526 | LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ? | ||||
| 527 | AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8; | ||||
| 528 | } else { | ||||
| 529 | assert(LdLo->getMemoryVT() == MVT::i16)((void)0); | ||||
| 530 | } | ||||
| 531 | |||||
| 532 | TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn); | ||||
| 533 | |||||
| 534 | SDValue Ops[] = { | ||||
| 535 | LdLo->getChain(), LdLo->getBasePtr(), TiedIn | ||||
| 536 | }; | ||||
| 537 | |||||
| 538 | SDValue NewLoadLo = | ||||
| 539 | CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList, | ||||
| 540 | Ops, LdLo->getMemoryVT(), | ||||
| 541 | LdLo->getMemOperand()); | ||||
| 542 | |||||
| 543 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo); | ||||
| 544 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1)); | ||||
| 545 | return true; | ||||
| 546 | } | ||||
| 547 | |||||
| 548 | return false; | ||||
| 549 | } | ||||
| 550 | |||||
| 551 | void AMDGPUDAGToDAGISel::PreprocessISelDAG() { | ||||
| 552 | if (!Subtarget->d16PreservesUnusedBits()) | ||||
| |||||
| 553 | return; | ||||
| 554 | |||||
| 555 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
| 556 | |||||
| 557 | bool MadeChange = false; | ||||
| 558 | while (Position != CurDAG->allnodes_begin()) { | ||||
| 559 | SDNode *N = &*--Position; | ||||
| 560 | if (N->use_empty()) | ||||
| 561 | continue; | ||||
| 562 | |||||
| 563 | switch (N->getOpcode()) { | ||||
| 564 | case ISD::BUILD_VECTOR: | ||||
| 565 | MadeChange |= matchLoadD16FromBuildVector(N); | ||||
| 566 | break; | ||||
| 567 | default: | ||||
| 568 | break; | ||||
| 569 | } | ||||
| 570 | } | ||||
| 571 | |||||
| 572 | if (MadeChange) { | ||||
| 573 | CurDAG->RemoveDeadNodes(); | ||||
| 574 | LLVM_DEBUG(dbgs() << "After PreProcess:\n";do { } while (false) | ||||
| 575 | CurDAG->dump();)do { } while (false); | ||||
| 576 | } | ||||
| 577 | } | ||||
| 578 | |||||
| 579 | bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { | ||||
| 580 | if (TM.Options.NoNaNsFPMath) | ||||
| 581 | return true; | ||||
| 582 | |||||
| 583 | // TODO: Move into isKnownNeverNaN | ||||
| 584 | if (N->getFlags().hasNoNaNs()) | ||||
| 585 | return true; | ||||
| 586 | |||||
| 587 | return CurDAG->isKnownNeverNaN(N); | ||||
| 588 | } | ||||
| 589 | |||||
| 590 | bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N, | ||||
| 591 | bool Negated) const { | ||||
| 592 | if (N->isUndef()) | ||||
| 593 | return true; | ||||
| 594 | |||||
| 595 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
| 596 | if (Negated) { | ||||
| 597 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) | ||||
| 598 | return TII->isInlineConstant(-C->getAPIntValue()); | ||||
| 599 | |||||
| 600 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) | ||||
| 601 | return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt()); | ||||
| 602 | |||||
| 603 | } else { | ||||
| 604 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) | ||||
| 605 | return TII->isInlineConstant(C->getAPIntValue()); | ||||
| 606 | |||||
| 607 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) | ||||
| 608 | return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); | ||||
| 609 | } | ||||
| 610 | |||||
| 611 | return false; | ||||
| 612 | } | ||||
| 613 | |||||
| 614 | /// Determine the register class for \p OpNo | ||||
| 615 | /// \returns The register class of the virtual register that will be used for | ||||
| 616 | /// the given operand number \OpNo or NULL if the register class cannot be | ||||
| 617 | /// determined. | ||||
| 618 | const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, | ||||
| 619 | unsigned OpNo) const { | ||||
| 620 | if (!N->isMachineOpcode()) { | ||||
| 621 | if (N->getOpcode() == ISD::CopyToReg) { | ||||
| 622 | Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); | ||||
| 623 | if (Reg.isVirtual()) { | ||||
| 624 | MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); | ||||
| 625 | return MRI.getRegClass(Reg); | ||||
| 626 | } | ||||
| 627 | |||||
| 628 | const SIRegisterInfo *TRI | ||||
| 629 | = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); | ||||
| 630 | return TRI->getPhysRegClass(Reg); | ||||
| 631 | } | ||||
| 632 | |||||
| 633 | return nullptr; | ||||
| 634 | } | ||||
| 635 | |||||
| 636 | switch (N->getMachineOpcode()) { | ||||
| 637 | default: { | ||||
| 638 | const MCInstrDesc &Desc = | ||||
| 639 | Subtarget->getInstrInfo()->get(N->getMachineOpcode()); | ||||
| 640 | unsigned OpIdx = Desc.getNumDefs() + OpNo; | ||||
| 641 | if (OpIdx >= Desc.getNumOperands()) | ||||
| 642 | return nullptr; | ||||
| 643 | int RegClass = Desc.OpInfo[OpIdx].RegClass; | ||||
| 644 | if (RegClass == -1) | ||||
| 645 | return nullptr; | ||||
| 646 | |||||
| 647 | return Subtarget->getRegisterInfo()->getRegClass(RegClass); | ||||
| 648 | } | ||||
| 649 | case AMDGPU::REG_SEQUENCE: { | ||||
| 650 | unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); | ||||
| 651 | const TargetRegisterClass *SuperRC = | ||||
| 652 | Subtarget->getRegisterInfo()->getRegClass(RCID); | ||||
| 653 | |||||
| 654 | SDValue SubRegOp = N->getOperand(OpNo + 1); | ||||
| 655 | unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); | ||||
| 656 | return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, | ||||
| 657 | SubRegIdx); | ||||
| 658 | } | ||||
| 659 | } | ||||
| 660 | } | ||||
| 661 | |||||
| 662 | SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain, | ||||
| 663 | SDValue Glue) const { | ||||
| 664 | SmallVector <SDValue, 8> Ops; | ||||
| 665 | Ops.push_back(NewChain); // Replace the chain. | ||||
| 666 | for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) | ||||
| 667 | Ops.push_back(N->getOperand(i)); | ||||
| 668 | |||||
| 669 | Ops.push_back(Glue); | ||||
| 670 | return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); | ||||
| 671 | } | ||||
| 672 | |||||
| 673 | SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { | ||||
| 674 | const SITargetLowering& Lowering = | ||||
| 675 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
| 676 | |||||
| 677 | assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain")((void)0); | ||||
| 678 | |||||
| 679 | SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val); | ||||
| 680 | return glueCopyToOp(N, M0, M0.getValue(1)); | ||||
| 681 | } | ||||
| 682 | |||||
| 683 | SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { | ||||
| 684 | unsigned AS = cast<MemSDNode>(N)->getAddressSpace(); | ||||
| 685 | if (AS == AMDGPUAS::LOCAL_ADDRESS) { | ||||
| 686 | if (Subtarget->ldsRequiresM0Init()) | ||||
| 687 | return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); | ||||
| 688 | } else if (AS == AMDGPUAS::REGION_ADDRESS) { | ||||
| 689 | MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
| 690 | unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize(); | ||||
| 691 | return | ||||
| 692 | glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32)); | ||||
| 693 | } | ||||
| 694 | return N; | ||||
| 695 | } | ||||
| 696 | |||||
| 697 | MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, | ||||
| 698 | EVT VT) const { | ||||
| 699 | SDNode *Lo = CurDAG->getMachineNode( | ||||
| 700 | AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
| 701 | CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32)); | ||||
| 702 | SDNode *Hi = | ||||
| 703 | CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
| 704 | CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32)); | ||||
| 705 | const SDValue Ops[] = { | ||||
| 706 | CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), | ||||
| 707 | SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), | ||||
| 708 | SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)}; | ||||
| 709 | |||||
| 710 | return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); | ||||
| 711 | } | ||||
| 712 | |||||
| 713 | void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { | ||||
| 714 | EVT VT = N->getValueType(0); | ||||
| 715 | unsigned NumVectorElts = VT.getVectorNumElements(); | ||||
| 716 | EVT EltVT = VT.getVectorElementType(); | ||||
| 717 | SDLoc DL(N); | ||||
| 718 | SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); | ||||
| 719 | |||||
| 720 | if (NumVectorElts == 1) { | ||||
| 721 | CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), | ||||
| 722 | RegClass); | ||||
| 723 | return; | ||||
| 724 | } | ||||
| 725 | |||||
| 726 | assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "((void)0) | ||||
| 727 | "supported yet")((void)0); | ||||
| 728 | // 32 = Max Num Vector Elements | ||||
| 729 | // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) | ||||
| 730 | // 1 = Vector Register Class | ||||
| 731 | SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); | ||||
| 732 | |||||
| 733 | bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() == | ||||
| 734 | Triple::amdgcn; | ||||
| 735 | RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); | ||||
| 736 | bool IsRegSeq = true; | ||||
| 737 | unsigned NOps = N->getNumOperands(); | ||||
| 738 | for (unsigned i = 0; i < NOps; i++) { | ||||
| 739 | // XXX: Why is this here? | ||||
| 740 | if (isa<RegisterSDNode>(N->getOperand(i))) { | ||||
| 741 | IsRegSeq = false; | ||||
| 742 | break; | ||||
| 743 | } | ||||
| 744 | unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) | ||||
| 745 | : R600RegisterInfo::getSubRegFromChannel(i); | ||||
| 746 | RegSeqArgs[1 + (2 * i)] = N->getOperand(i); | ||||
| 747 | RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); | ||||
| 748 | } | ||||
| 749 | if (NOps != NumVectorElts) { | ||||
| 750 | // Fill in the missing undef elements if this was a scalar_to_vector. | ||||
| 751 | assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts)((void)0); | ||||
| 752 | MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, | ||||
| 753 | DL, EltVT); | ||||
| 754 | for (unsigned i = NOps; i < NumVectorElts; ++i) { | ||||
| 755 | unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) | ||||
| 756 | : R600RegisterInfo::getSubRegFromChannel(i); | ||||
| 757 | RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); | ||||
| 758 | RegSeqArgs[1 + (2 * i) + 1] = | ||||
| 759 | CurDAG->getTargetConstant(Sub, DL, MVT::i32); | ||||
| 760 | } | ||||
| 761 | } | ||||
| 762 | |||||
| 763 | if (!IsRegSeq) | ||||
| 764 | SelectCode(N); | ||||
| 765 | CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); | ||||
| 766 | } | ||||
| 767 | |||||
| 768 | void AMDGPUDAGToDAGISel::Select(SDNode *N) { | ||||
| 769 | unsigned int Opc = N->getOpcode(); | ||||
| 770 | if (N->isMachineOpcode()) { | ||||
| 771 | N->setNodeId(-1); | ||||
| 772 | return; // Already selected. | ||||
| 773 | } | ||||
| 774 | |||||
| 775 | // isa<MemSDNode> almost works but is slightly too permissive for some DS | ||||
| 776 | // intrinsics. | ||||
| 777 | if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) || | ||||
| 778 | (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || | ||||
| 779 | Opc == ISD::ATOMIC_LOAD_FADD || | ||||
| 780 | Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || | ||||
| 781 | Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) { | ||||
| 782 | N = glueCopyToM0LDSInit(N); | ||||
| 783 | SelectCode(N); | ||||
| 784 | return; | ||||
| 785 | } | ||||
| 786 | |||||
| 787 | switch (Opc) { | ||||
| 788 | default: | ||||
| 789 | break; | ||||
| 790 | // We are selecting i64 ADD here instead of custom lower it during | ||||
| 791 | // DAG legalization, so we can fold some i64 ADDs used for address | ||||
| 792 | // calculation into the LOAD and STORE instructions. | ||||
| 793 | case ISD::ADDC: | ||||
| 794 | case ISD::ADDE: | ||||
| 795 | case ISD::SUBC: | ||||
| 796 | case ISD::SUBE: { | ||||
| 797 | if (N->getValueType(0) != MVT::i64) | ||||
| 798 | break; | ||||
| 799 | |||||
| 800 | SelectADD_SUB_I64(N); | ||||
| 801 | return; | ||||
| 802 | } | ||||
| 803 | case ISD::ADDCARRY: | ||||
| 804 | case ISD::SUBCARRY: | ||||
| 805 | if (N->getValueType(0) != MVT::i32) | ||||
| 806 | break; | ||||
| 807 | |||||
| 808 | SelectAddcSubb(N); | ||||
| 809 | return; | ||||
| 810 | case ISD::UADDO: | ||||
| 811 | case ISD::USUBO: { | ||||
| 812 | SelectUADDO_USUBO(N); | ||||
| 813 | return; | ||||
| 814 | } | ||||
| 815 | case AMDGPUISD::FMUL_W_CHAIN: { | ||||
| 816 | SelectFMUL_W_CHAIN(N); | ||||
| 817 | return; | ||||
| 818 | } | ||||
| 819 | case AMDGPUISD::FMA_W_CHAIN: { | ||||
| 820 | SelectFMA_W_CHAIN(N); | ||||
| 821 | return; | ||||
| 822 | } | ||||
| 823 | |||||
| 824 | case ISD::SCALAR_TO_VECTOR: | ||||
| 825 | case ISD::BUILD_VECTOR: { | ||||
| 826 | EVT VT = N->getValueType(0); | ||||
| 827 | unsigned NumVectorElts = VT.getVectorNumElements(); | ||||
| 828 | if (VT.getScalarSizeInBits() == 16) { | ||||
| 829 | if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) { | ||||
| 830 | if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) { | ||||
| 831 | ReplaceNode(N, Packed); | ||||
| 832 | return; | ||||
| 833 | } | ||||
| 834 | } | ||||
| 835 | |||||
| 836 | break; | ||||
| 837 | } | ||||
| 838 | |||||
| 839 | assert(VT.getVectorElementType().bitsEq(MVT::i32))((void)0); | ||||
| 840 | unsigned RegClassID = | ||||
| 841 | SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID(); | ||||
| 842 | SelectBuildVector(N, RegClassID); | ||||
| 843 | return; | ||||
| 844 | } | ||||
| 845 | case ISD::BUILD_PAIR: { | ||||
| 846 | SDValue RC, SubReg0, SubReg1; | ||||
| 847 | SDLoc DL(N); | ||||
| 848 | if (N->getValueType(0) == MVT::i128) { | ||||
| 849 | RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); | ||||
| 850 | SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); | ||||
| 851 | SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); | ||||
| 852 | } else if (N->getValueType(0) == MVT::i64) { | ||||
| 853 | RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); | ||||
| 854 | SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); | ||||
| 855 | SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); | ||||
| 856 | } else { | ||||
| 857 | llvm_unreachable("Unhandled value type for BUILD_PAIR")__builtin_unreachable(); | ||||
| 858 | } | ||||
| 859 | const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, | ||||
| 860 | N->getOperand(1), SubReg1 }; | ||||
| 861 | ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, | ||||
| 862 | N->getValueType(0), Ops)); | ||||
| 863 | return; | ||||
| 864 | } | ||||
| 865 | |||||
| 866 | case ISD::Constant: | ||||
| 867 | case ISD::ConstantFP: { | ||||
| 868 | if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) | ||||
| 869 | break; | ||||
| 870 | |||||
| 871 | uint64_t Imm; | ||||
| 872 | if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) | ||||
| 873 | Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); | ||||
| 874 | else { | ||||
| 875 | ConstantSDNode *C = cast<ConstantSDNode>(N); | ||||
| 876 | Imm = C->getZExtValue(); | ||||
| 877 | } | ||||
| 878 | |||||
| 879 | SDLoc DL(N); | ||||
| 880 | ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); | ||||
| 881 | return; | ||||
| 882 | } | ||||
| 883 | case AMDGPUISD::BFE_I32: | ||||
| 884 | case AMDGPUISD::BFE_U32: { | ||||
| 885 | // There is a scalar version available, but unlike the vector version which | ||||
| 886 | // has a separate operand for the offset and width, the scalar version packs | ||||
| 887 | // the width and offset into a single operand. Try to move to the scalar | ||||
| 888 | // version if the offsets are constant, so that we can try to keep extended | ||||
| 889 | // loads of kernel arguments in SGPRs. | ||||
| 890 | |||||
| 891 | // TODO: Technically we could try to pattern match scalar bitshifts of | ||||
| 892 | // dynamic values, but it's probably not useful. | ||||
| 893 | ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
| 894 | if (!Offset) | ||||
| 895 | break; | ||||
| 896 | |||||
| 897 | ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); | ||||
| 898 | if (!Width) | ||||
| 899 | break; | ||||
| 900 | |||||
| 901 | bool Signed = Opc == AMDGPUISD::BFE_I32; | ||||
| 902 | |||||
| 903 | uint32_t OffsetVal = Offset->getZExtValue(); | ||||
| 904 | uint32_t WidthVal = Width->getZExtValue(); | ||||
| 905 | |||||
| 906 | ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, | ||||
| 907 | SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); | ||||
| 908 | return; | ||||
| 909 | } | ||||
| 910 | case AMDGPUISD::DIV_SCALE: { | ||||
| 911 | SelectDIV_SCALE(N); | ||||
| 912 | return; | ||||
| 913 | } | ||||
| 914 | case AMDGPUISD::MAD_I64_I32: | ||||
| 915 | case AMDGPUISD::MAD_U64_U32: { | ||||
| 916 | SelectMAD_64_32(N); | ||||
| 917 | return; | ||||
| 918 | } | ||||
| 919 | case ISD::CopyToReg: { | ||||
| 920 | const SITargetLowering& Lowering = | ||||
| 921 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
| 922 | N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); | ||||
| 923 | break; | ||||
| 924 | } | ||||
| 925 | case ISD::AND: | ||||
| 926 | case ISD::SRL: | ||||
| 927 | case ISD::SRA: | ||||
| 928 | case ISD::SIGN_EXTEND_INREG: | ||||
| 929 | if (N->getValueType(0) != MVT::i32) | ||||
| 930 | break; | ||||
| 931 | |||||
| 932 | SelectS_BFE(N); | ||||
| 933 | return; | ||||
| 934 | case ISD::BRCOND: | ||||
| 935 | SelectBRCOND(N); | ||||
| 936 | return; | ||||
| 937 | case ISD::FMAD: | ||||
| 938 | case ISD::FMA: | ||||
| 939 | SelectFMAD_FMA(N); | ||||
| 940 | return; | ||||
| 941 | case AMDGPUISD::ATOMIC_CMP_SWAP: | ||||
| 942 | SelectATOMIC_CMP_SWAP(N); | ||||
| 943 | return; | ||||
| 944 | case AMDGPUISD::CVT_PKRTZ_F16_F32: | ||||
| 945 | case AMDGPUISD::CVT_PKNORM_I16_F32: | ||||
| 946 | case AMDGPUISD::CVT_PKNORM_U16_F32: | ||||
| 947 | case AMDGPUISD::CVT_PK_U16_U32: | ||||
| 948 | case AMDGPUISD::CVT_PK_I16_I32: { | ||||
| 949 | // Hack around using a legal type if f16 is illegal. | ||||
| 950 | if (N->getValueType(0) == MVT::i32) { | ||||
| 951 | MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16; | ||||
| 952 | N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT), | ||||
| 953 | { N->getOperand(0), N->getOperand(1) }); | ||||
| 954 | SelectCode(N); | ||||
| 955 | return; | ||||
| 956 | } | ||||
| 957 | |||||
| 958 | break; | ||||
| 959 | } | ||||
| 960 | case ISD::INTRINSIC_W_CHAIN: { | ||||
| 961 | SelectINTRINSIC_W_CHAIN(N); | ||||
| 962 | return; | ||||
| 963 | } | ||||
| 964 | case ISD::INTRINSIC_WO_CHAIN: { | ||||
| 965 | SelectINTRINSIC_WO_CHAIN(N); | ||||
| 966 | return; | ||||
| 967 | } | ||||
| 968 | case ISD::INTRINSIC_VOID: { | ||||
| 969 | SelectINTRINSIC_VOID(N); | ||||
| 970 | return; | ||||
| 971 | } | ||||
| 972 | } | ||||
| 973 | |||||
| 974 | SelectCode(N); | ||||
| 975 | } | ||||
| 976 | |||||
| 977 | bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { | ||||
| 978 | const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); | ||||
| 979 | const Instruction *Term = BB->getTerminator(); | ||||
| 980 | return Term->getMetadata("amdgpu.uniform") || | ||||
| 981 | Term->getMetadata("structurizecfg.uniform"); | ||||
| 982 | } | ||||
| 983 | |||||
| 984 | static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, | ||||
| 985 | SDValue &N0, SDValue &N1) { | ||||
| 986 | if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST && | ||||
| 987 | Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { | ||||
| 988 | // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e. | ||||
| 989 | // (i64 (bitcast (v2i32 (build_vector | ||||
| 990 | // (or (extract_vector_elt V, 0), OFFSET), | ||||
| 991 | // (extract_vector_elt V, 1))))) | ||||
| 992 | SDValue Lo = Addr.getOperand(0).getOperand(0); | ||||
| 993 | if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) { | ||||
| 994 | SDValue BaseLo = Lo.getOperand(0); | ||||
| 995 | SDValue BaseHi = Addr.getOperand(0).getOperand(1); | ||||
| 996 | // Check that split base (Lo and Hi) are extracted from the same one. | ||||
| 997 | if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT && | ||||
| 998 | BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT && | ||||
| 999 | BaseLo.getOperand(0) == BaseHi.getOperand(0) && | ||||
| 1000 | // Lo is statically extracted from index 0. | ||||
| 1001 | isa<ConstantSDNode>(BaseLo.getOperand(1)) && | ||||
| 1002 | BaseLo.getConstantOperandVal(1) == 0 && | ||||
| 1003 | // Hi is statically extracted from index 0. | ||||
| 1004 | isa<ConstantSDNode>(BaseHi.getOperand(1)) && | ||||
| 1005 | BaseHi.getConstantOperandVal(1) == 1) { | ||||
| 1006 | N0 = BaseLo.getOperand(0).getOperand(0); | ||||
| 1007 | N1 = Lo.getOperand(1); | ||||
| 1008 | return true; | ||||
| 1009 | } | ||||
| 1010 | } | ||||
| 1011 | } | ||||
| 1012 | return false; | ||||
| 1013 | } | ||||
| 1014 | |||||
| 1015 | bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, | ||||
| 1016 | SDValue &RHS) const { | ||||
| 1017 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
| 1018 | LHS = Addr.getOperand(0); | ||||
| 1019 | RHS = Addr.getOperand(1); | ||||
| 1020 | return true; | ||||
| 1021 | } | ||||
| 1022 | |||||
| 1023 | if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) { | ||||
| 1024 | assert(LHS && RHS && isa<ConstantSDNode>(RHS))((void)0); | ||||
| 1025 | return true; | ||||
| 1026 | } | ||||
| 1027 | |||||
| 1028 | return false; | ||||
| 1029 | } | ||||
| 1030 | |||||
| 1031 | StringRef AMDGPUDAGToDAGISel::getPassName() const { | ||||
| 1032 | return "AMDGPU DAG->DAG Pattern Instruction Selection"; | ||||
| 1033 | } | ||||
| 1034 | |||||
| 1035 | //===----------------------------------------------------------------------===// | ||||
| 1036 | // Complex Patterns | ||||
| 1037 | //===----------------------------------------------------------------------===// | ||||
| 1038 | |||||
| 1039 | bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||||
| 1040 | SDValue &Offset) { | ||||
| 1041 | return false; | ||||
| 1042 | } | ||||
| 1043 | |||||
| 1044 | bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, | ||||
| 1045 | SDValue &Offset) { | ||||
| 1046 | ConstantSDNode *C; | ||||
| 1047 | SDLoc DL(Addr); | ||||
| 1048 | |||||
| 1049 | if ((C = dyn_cast<ConstantSDNode>(Addr))) { | ||||
| 1050 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
| 1051 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
| 1052 | } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && | ||||
| 1053 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { | ||||
| 1054 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
| 1055 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
| 1056 | } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && | ||||
| 1057 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { | ||||
| 1058 | Base = Addr.getOperand(0); | ||||
| 1059 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
| 1060 | } else { | ||||
| 1061 | Base = Addr; | ||||
| 1062 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1063 | } | ||||
| 1064 | |||||
| 1065 | return true; | ||||
| 1066 | } | ||||
| 1067 | |||||
| 1068 | SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, | ||||
| 1069 | const SDLoc &DL) const { | ||||
| 1070 | SDNode *Mov = CurDAG->getMachineNode( | ||||
| 1071 | AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
| 1072 | CurDAG->getTargetConstant(Val, DL, MVT::i32)); | ||||
| 1073 | return SDValue(Mov, 0); | ||||
| 1074 | } | ||||
| 1075 | |||||
| 1076 | // FIXME: Should only handle addcarry/subcarry | ||||
| 1077 | void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { | ||||
| 1078 | SDLoc DL(N); | ||||
| 1079 | SDValue LHS = N->getOperand(0); | ||||
| 1080 | SDValue RHS = N->getOperand(1); | ||||
| 1081 | |||||
| 1082 | unsigned Opcode = N->getOpcode(); | ||||
| 1083 | bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); | ||||
| 1084 | bool ProduceCarry = | ||||
| 1085 | ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; | ||||
| 1086 | bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; | ||||
| 1087 | |||||
| 1088 | SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); | ||||
| 1089 | SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); | ||||
| 1090 | |||||
| 1091 | SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
| 1092 | DL, MVT::i32, LHS, Sub0); | ||||
| 1093 | SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
| 1094 | DL, MVT::i32, LHS, Sub1); | ||||
| 1095 | |||||
| 1096 | SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
| 1097 | DL, MVT::i32, RHS, Sub0); | ||||
| 1098 | SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
| 1099 | DL, MVT::i32, RHS, Sub1); | ||||
| 1100 | |||||
| 1101 | SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); | ||||
| 1102 | |||||
| 1103 | static const unsigned OpcMap[2][2][2] = { | ||||
| 1104 | {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32}, | ||||
| 1105 | {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}}, | ||||
| 1106 | {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32}, | ||||
| 1107 | {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}}; | ||||
| 1108 | |||||
| 1109 | unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd]; | ||||
| 1110 | unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd]; | ||||
| 1111 | |||||
| 1112 | SDNode *AddLo; | ||||
| 1113 | if (!ConsumeCarry) { | ||||
| 1114 | SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; | ||||
| 1115 | AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); | ||||
| 1116 | } else { | ||||
| 1117 | SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; | ||||
| 1118 | AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); | ||||
| 1119 | } | ||||
| 1120 | SDValue AddHiArgs[] = { | ||||
| 1121 | SDValue(Hi0, 0), | ||||
| 1122 | SDValue(Hi1, 0), | ||||
| 1123 | SDValue(AddLo, 1) | ||||
| 1124 | }; | ||||
| 1125 | SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); | ||||
| 1126 | |||||
| 1127 | SDValue RegSequenceArgs[] = { | ||||
| 1128 | CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), | ||||
| 1129 | SDValue(AddLo,0), | ||||
| 1130 | Sub0, | ||||
| 1131 | SDValue(AddHi,0), | ||||
| 1132 | Sub1, | ||||
| 1133 | }; | ||||
| 1134 | SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, | ||||
| 1135 | MVT::i64, RegSequenceArgs); | ||||
| 1136 | |||||
| 1137 | if (ProduceCarry) { | ||||
| 1138 | // Replace the carry-use | ||||
| 1139 | ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); | ||||
| 1140 | } | ||||
| 1141 | |||||
| 1142 | // Replace the remaining uses. | ||||
| 1143 | ReplaceNode(N, RegSequence); | ||||
| 1144 | } | ||||
| 1145 | |||||
| 1146 | void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { | ||||
| 1147 | SDLoc DL(N); | ||||
| 1148 | SDValue LHS = N->getOperand(0); | ||||
| 1149 | SDValue RHS = N->getOperand(1); | ||||
| 1150 | SDValue CI = N->getOperand(2); | ||||
| 1151 | |||||
| 1152 | if (N->isDivergent()) { | ||||
| 1153 | unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64 | ||||
| 1154 | : AMDGPU::V_SUBB_U32_e64; | ||||
| 1155 | CurDAG->SelectNodeTo( | ||||
| 1156 | N, Opc, N->getVTList(), | ||||
| 1157 | {LHS, RHS, CI, | ||||
| 1158 | CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); | ||||
| 1159 | } else { | ||||
| 1160 | unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO | ||||
| 1161 | : AMDGPU::S_SUB_CO_PSEUDO; | ||||
| 1162 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI}); | ||||
| 1163 | } | ||||
| 1164 | } | ||||
| 1165 | |||||
| 1166 | void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { | ||||
| 1167 | // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned | ||||
| 1168 | // carry out despite the _i32 name. These were renamed in VI to _U32. | ||||
| 1169 | // FIXME: We should probably rename the opcodes here. | ||||
| 1170 | bool IsAdd = N->getOpcode() == ISD::UADDO; | ||||
| 1171 | bool IsVALU = N->isDivergent(); | ||||
| 1172 | |||||
| 1173 | for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; | ||||
| 1174 | ++UI) | ||||
| 1175 | if (UI.getUse().getResNo() == 1) { | ||||
| 1176 | if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) || | ||||
| 1177 | (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) { | ||||
| 1178 | IsVALU = true; | ||||
| 1179 | break; | ||||
| 1180 | } | ||||
| 1181 | } | ||||
| 1182 | |||||
| 1183 | if (IsVALU) { | ||||
| 1184 | unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; | ||||
| 1185 | |||||
| 1186 | CurDAG->SelectNodeTo( | ||||
| 1187 | N, Opc, N->getVTList(), | ||||
| 1188 | {N->getOperand(0), N->getOperand(1), | ||||
| 1189 | CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); | ||||
| 1190 | } else { | ||||
| 1191 | unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO | ||||
| 1192 | : AMDGPU::S_USUBO_PSEUDO; | ||||
| 1193 | |||||
| 1194 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), | ||||
| 1195 | {N->getOperand(0), N->getOperand(1)}); | ||||
| 1196 | } | ||||
| 1197 | } | ||||
| 1198 | |||||
| 1199 | void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { | ||||
| 1200 | SDLoc SL(N); | ||||
| 1201 | // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod | ||||
| 1202 | SDValue Ops[10]; | ||||
| 1203 | |||||
| 1204 | SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); | ||||
| 1205 | SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); | ||||
| 1206 | SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); | ||||
| 1207 | Ops[8] = N->getOperand(0); | ||||
| 1208 | Ops[9] = N->getOperand(4); | ||||
| 1209 | |||||
| 1210 | CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops); | ||||
| 1211 | } | ||||
| 1212 | |||||
| 1213 | void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { | ||||
| 1214 | SDLoc SL(N); | ||||
| 1215 | // src0_modifiers, src0, src1_modifiers, src1, clamp, omod | ||||
| 1216 | SDValue Ops[8]; | ||||
| 1217 | |||||
| 1218 | SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); | ||||
| 1219 | SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); | ||||
| 1220 | Ops[6] = N->getOperand(0); | ||||
| 1221 | Ops[7] = N->getOperand(3); | ||||
| 1222 | |||||
| 1223 | CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); | ||||
| 1224 | } | ||||
| 1225 | |||||
| 1226 | // We need to handle this here because tablegen doesn't support matching | ||||
| 1227 | // instructions with multiple outputs. | ||||
| 1228 | void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { | ||||
| 1229 | SDLoc SL(N); | ||||
| 1230 | EVT VT = N->getValueType(0); | ||||
| 1231 | |||||
| 1232 | assert(VT == MVT::f32 || VT == MVT::f64)((void)0); | ||||
| 1233 | |||||
| 1234 | unsigned Opc | ||||
| 1235 | = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64; | ||||
| 1236 | |||||
| 1237 | // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, | ||||
| 1238 | // omod | ||||
| 1239 | SDValue Ops[8]; | ||||
| 1240 | SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); | ||||
| 1241 | SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]); | ||||
| 1242 | SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]); | ||||
| 1243 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
| 1244 | } | ||||
| 1245 | |||||
| 1246 | // We need to handle this here because tablegen doesn't support matching | ||||
| 1247 | // instructions with multiple outputs. | ||||
| 1248 | void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { | ||||
| 1249 | SDLoc SL(N); | ||||
| 1250 | bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; | ||||
| 1251 | unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; | ||||
| 1252 | |||||
| 1253 | SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); | ||||
| 1254 | SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), | ||||
| 1255 | Clamp }; | ||||
| 1256 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
| 1257 | } | ||||
| 1258 | |||||
| 1259 | bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const { | ||||
| 1260 | if (!isUInt<16>(Offset)) | ||||
| 1261 | return false; | ||||
| 1262 | |||||
| 1263 | if (!Base || Subtarget->hasUsableDSOffset() || | ||||
| 1264 | Subtarget->unsafeDSOffsetFoldingEnabled()) | ||||
| 1265 | return true; | ||||
| 1266 | |||||
| 1267 | // On Southern Islands instruction with a negative base value and an offset | ||||
| 1268 | // don't seem to work. | ||||
| 1269 | return CurDAG->SignBitIsZero(Base); | ||||
| 1270 | } | ||||
| 1271 | |||||
| 1272 | bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, | ||||
| 1273 | SDValue &Offset) const { | ||||
| 1274 | SDLoc DL(Addr); | ||||
| 1275 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
| 1276 | SDValue N0 = Addr.getOperand(0); | ||||
| 1277 | SDValue N1 = Addr.getOperand(1); | ||||
| 1278 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
| 1279 | if (isDSOffsetLegal(N0, C1->getSExtValue())) { | ||||
| 1280 | // (add n0, c0) | ||||
| 1281 | Base = N0; | ||||
| 1282 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); | ||||
| 1283 | return true; | ||||
| 1284 | } | ||||
| 1285 | } else if (Addr.getOpcode() == ISD::SUB) { | ||||
| 1286 | // sub C, x -> add (sub 0, x), C | ||||
| 1287 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { | ||||
| 1288 | int64_t ByteOffset = C->getSExtValue(); | ||||
| 1289 | if (isDSOffsetLegal(SDValue(), ByteOffset)) { | ||||
| 1290 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1291 | |||||
| 1292 | // XXX - This is kind of hacky. Create a dummy sub node so we can check | ||||
| 1293 | // the known bits in isDSOffsetLegal. We need to emit the selected node | ||||
| 1294 | // here, so this is thrown away. | ||||
| 1295 | SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, | ||||
| 1296 | Zero, Addr.getOperand(1)); | ||||
| 1297 | |||||
| 1298 | if (isDSOffsetLegal(Sub, ByteOffset)) { | ||||
| 1299 | SmallVector<SDValue, 3> Opnds; | ||||
| 1300 | Opnds.push_back(Zero); | ||||
| 1301 | Opnds.push_back(Addr.getOperand(1)); | ||||
| 1302 | |||||
| 1303 | // FIXME: Select to VOP3 version for with-carry. | ||||
| 1304 | unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32; | ||||
| 1305 | if (Subtarget->hasAddNoCarry()) { | ||||
| 1306 | SubOp = AMDGPU::V_SUB_U32_e64; | ||||
| 1307 | Opnds.push_back( | ||||
| 1308 | CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit | ||||
| 1309 | } | ||||
| 1310 | |||||
| 1311 | MachineSDNode *MachineSub = | ||||
| 1312 | CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); | ||||
| 1313 | |||||
| 1314 | Base = SDValue(MachineSub, 0); | ||||
| 1315 | Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); | ||||
| 1316 | return true; | ||||
| 1317 | } | ||||
| 1318 | } | ||||
| 1319 | } | ||||
| 1320 | } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { | ||||
| 1321 | // If we have a constant address, prefer to put the constant into the | ||||
| 1322 | // offset. This can save moves to load the constant address since multiple | ||||
| 1323 | // operations can share the zero base address register, and enables merging | ||||
| 1324 | // into read2 / write2 instructions. | ||||
| 1325 | |||||
| 1326 | SDLoc DL(Addr); | ||||
| 1327 | |||||
| 1328 | if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) { | ||||
| 1329 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1330 | MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, | ||||
| 1331 | DL, MVT::i32, Zero); | ||||
| 1332 | Base = SDValue(MovZero, 0); | ||||
| 1333 | Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); | ||||
| 1334 | return true; | ||||
| 1335 | } | ||||
| 1336 | } | ||||
| 1337 | |||||
| 1338 | // default case | ||||
| 1339 | Base = Addr; | ||||
| 1340 | Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); | ||||
| 1341 | return true; | ||||
| 1342 | } | ||||
| 1343 | |||||
| 1344 | bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0, | ||||
| 1345 | unsigned Offset1, | ||||
| 1346 | unsigned Size) const { | ||||
| 1347 | if (Offset0 % Size != 0 || Offset1 % Size != 0) | ||||
| 1348 | return false; | ||||
| 1349 | if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size)) | ||||
| 1350 | return false; | ||||
| 1351 | |||||
| 1352 | if (!Base || Subtarget->hasUsableDSOffset() || | ||||
| 1353 | Subtarget->unsafeDSOffsetFoldingEnabled()) | ||||
| 1354 | return true; | ||||
| 1355 | |||||
| 1356 | // On Southern Islands instruction with a negative base value and an offset | ||||
| 1357 | // don't seem to work. | ||||
| 1358 | return CurDAG->SignBitIsZero(Base); | ||||
| 1359 | } | ||||
| 1360 | |||||
| 1361 | // TODO: If offset is too big, put low 16-bit into offset. | ||||
| 1362 | bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, | ||||
| 1363 | SDValue &Offset0, | ||||
| 1364 | SDValue &Offset1) const { | ||||
| 1365 | return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4); | ||||
| 1366 | } | ||||
| 1367 | |||||
| 1368 | bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base, | ||||
| 1369 | SDValue &Offset0, | ||||
| 1370 | SDValue &Offset1) const { | ||||
| 1371 | return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8); | ||||
| 1372 | } | ||||
| 1373 | |||||
| 1374 | bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base, | ||||
| 1375 | SDValue &Offset0, SDValue &Offset1, | ||||
| 1376 | unsigned Size) const { | ||||
| 1377 | SDLoc DL(Addr); | ||||
| 1378 | |||||
| 1379 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
| 1380 | SDValue N0 = Addr.getOperand(0); | ||||
| 1381 | SDValue N1 = Addr.getOperand(1); | ||||
| 1382 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
| 1383 | unsigned OffsetValue0 = C1->getZExtValue(); | ||||
| 1384 | unsigned OffsetValue1 = OffsetValue0 + Size; | ||||
| 1385 | |||||
| 1386 | // (add n0, c0) | ||||
| 1387 | if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) { | ||||
| 1388 | Base = N0; | ||||
| 1389 | Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); | ||||
| 1390 | Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); | ||||
| 1391 | return true; | ||||
| 1392 | } | ||||
| 1393 | } else if (Addr.getOpcode() == ISD::SUB) { | ||||
| 1394 | // sub C, x -> add (sub 0, x), C | ||||
| 1395 | if (const ConstantSDNode *C = | ||||
| 1396 | dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { | ||||
| 1397 | unsigned OffsetValue0 = C->getZExtValue(); | ||||
| 1398 | unsigned OffsetValue1 = OffsetValue0 + Size; | ||||
| 1399 | |||||
| 1400 | if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) { | ||||
| 1401 | SDLoc DL(Addr); | ||||
| 1402 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1403 | |||||
| 1404 | // XXX - This is kind of hacky. Create a dummy sub node so we can check | ||||
| 1405 | // the known bits in isDSOffsetLegal. We need to emit the selected node | ||||
| 1406 | // here, so this is thrown away. | ||||
| 1407 | SDValue Sub = | ||||
| 1408 | CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1)); | ||||
| 1409 | |||||
| 1410 | if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) { | ||||
| 1411 | SmallVector<SDValue, 3> Opnds; | ||||
| 1412 | Opnds.push_back(Zero); | ||||
| 1413 | Opnds.push_back(Addr.getOperand(1)); | ||||
| 1414 | unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32; | ||||
| 1415 | if (Subtarget->hasAddNoCarry()) { | ||||
| 1416 | SubOp = AMDGPU::V_SUB_U32_e64; | ||||
| 1417 | Opnds.push_back( | ||||
| 1418 | CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit | ||||
| 1419 | } | ||||
| 1420 | |||||
| 1421 | MachineSDNode *MachineSub = CurDAG->getMachineNode( | ||||
| 1422 | SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds); | ||||
| 1423 | |||||
| 1424 | Base = SDValue(MachineSub, 0); | ||||
| 1425 | Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); | ||||
| 1426 | Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); | ||||
| 1427 | return true; | ||||
| 1428 | } | ||||
| 1429 | } | ||||
| 1430 | } | ||||
| 1431 | } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { | ||||
| 1432 | unsigned OffsetValue0 = CAddr->getZExtValue(); | ||||
| 1433 | unsigned OffsetValue1 = OffsetValue0 + Size; | ||||
| 1434 | |||||
| 1435 | if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) { | ||||
| 1436 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1437 | MachineSDNode *MovZero = | ||||
| 1438 | CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero); | ||||
| 1439 | Base = SDValue(MovZero, 0); | ||||
| 1440 | Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); | ||||
| 1441 | Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); | ||||
| 1442 | return true; | ||||
| 1443 | } | ||||
| 1444 | } | ||||
| 1445 | |||||
| 1446 | // default case | ||||
| 1447 | |||||
| 1448 | Base = Addr; | ||||
| 1449 | Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); | ||||
| 1450 | Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); | ||||
| 1451 | return true; | ||||
| 1452 | } | ||||
| 1453 | |||||
| 1454 | bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, | ||||
| 1455 | SDValue &SOffset, SDValue &Offset, | ||||
| 1456 | SDValue &Offen, SDValue &Idxen, | ||||
| 1457 | SDValue &Addr64) const { | ||||
| 1458 | // Subtarget prefers to use flat instruction | ||||
| 1459 | // FIXME: This should be a pattern predicate and not reach here | ||||
| 1460 | if (Subtarget->useFlatForGlobal()) | ||||
| 1461 | return false; | ||||
| 1462 | |||||
| 1463 | SDLoc DL(Addr); | ||||
| 1464 | |||||
| 1465 | Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 1466 | Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 1467 | Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 1468 | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1469 | |||||
| 1470 | ConstantSDNode *C1 = nullptr; | ||||
| 1471 | SDValue N0 = Addr; | ||||
| 1472 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
| 1473 | C1 = cast<ConstantSDNode>(Addr.getOperand(1)); | ||||
| 1474 | if (isUInt<32>(C1->getZExtValue())) | ||||
| 1475 | N0 = Addr.getOperand(0); | ||||
| 1476 | else | ||||
| 1477 | C1 = nullptr; | ||||
| 1478 | } | ||||
| 1479 | |||||
| 1480 | if (N0.getOpcode() == ISD::ADD) { | ||||
| 1481 | // (add N2, N3) -> addr64, or | ||||
| 1482 | // (add (add N2, N3), C1) -> addr64 | ||||
| 1483 | SDValue N2 = N0.getOperand(0); | ||||
| 1484 | SDValue N3 = N0.getOperand(1); | ||||
| 1485 | Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); | ||||
| 1486 | |||||
| 1487 | if (N2->isDivergent()) { | ||||
| 1488 | if (N3->isDivergent()) { | ||||
| 1489 | // Both N2 and N3 are divergent. Use N0 (the result of the add) as the | ||||
| 1490 | // addr64, and construct the resource from a 0 address. | ||||
| 1491 | Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); | ||||
| 1492 | VAddr = N0; | ||||
| 1493 | } else { | ||||
| 1494 | // N2 is divergent, N3 is not. | ||||
| 1495 | Ptr = N3; | ||||
| 1496 | VAddr = N2; | ||||
| 1497 | } | ||||
| 1498 | } else { | ||||
| 1499 | // N2 is not divergent. | ||||
| 1500 | Ptr = N2; | ||||
| 1501 | VAddr = N3; | ||||
| 1502 | } | ||||
| 1503 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
| 1504 | } else if (N0->isDivergent()) { | ||||
| 1505 | // N0 is divergent. Use it as the addr64, and construct the resource from a | ||||
| 1506 | // 0 address. | ||||
| 1507 | Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); | ||||
| 1508 | VAddr = N0; | ||||
| 1509 | Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); | ||||
| 1510 | } else { | ||||
| 1511 | // N0 -> offset, or | ||||
| 1512 | // (N0 + C1) -> offset | ||||
| 1513 | VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1514 | Ptr = N0; | ||||
| 1515 | } | ||||
| 1516 | |||||
| 1517 | if (!C1) { | ||||
| 1518 | // No offset. | ||||
| 1519 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
| 1520 | return true; | ||||
| 1521 | } | ||||
| 1522 | |||||
| 1523 | if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { | ||||
| 1524 | // Legal offset for instruction. | ||||
| 1525 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); | ||||
| 1526 | return true; | ||||
| 1527 | } | ||||
| 1528 | |||||
| 1529 | // Illegal offset, store it in soffset. | ||||
| 1530 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
| 1531 | SOffset = | ||||
| 1532 | SDValue(CurDAG->getMachineNode( | ||||
| 1533 | AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
| 1534 | CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), | ||||
| 1535 | 0); | ||||
| 1536 | return true; | ||||
| 1537 | } | ||||
| 1538 | |||||
| 1539 | bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, | ||||
| 1540 | SDValue &VAddr, SDValue &SOffset, | ||||
| 1541 | SDValue &Offset) const { | ||||
| 1542 | SDValue Ptr, Offen, Idxen, Addr64; | ||||
| 1543 | |||||
| 1544 | // addr64 bit was removed for volcanic islands. | ||||
| 1545 | // FIXME: This should be a pattern predicate and not reach here | ||||
| 1546 | if (!Subtarget->hasAddr64()) | ||||
| 1547 | return false; | ||||
| 1548 | |||||
| 1549 | if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64)) | ||||
| 1550 | return false; | ||||
| 1551 | |||||
| 1552 | ConstantSDNode *C = cast<ConstantSDNode>(Addr64); | ||||
| 1553 | if (C->getSExtValue()) { | ||||
| 1554 | SDLoc DL(Addr); | ||||
| 1555 | |||||
| 1556 | const SITargetLowering& Lowering = | ||||
| 1557 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
| 1558 | |||||
| 1559 | SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); | ||||
| 1560 | return true; | ||||
| 1561 | } | ||||
| 1562 | |||||
| 1563 | return false; | ||||
| 1564 | } | ||||
| 1565 | |||||
| 1566 | std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { | ||||
| 1567 | SDLoc DL(N); | ||||
| 1568 | |||||
| 1569 | auto *FI = dyn_cast<FrameIndexSDNode>(N); | ||||
| 1570 | SDValue TFI = | ||||
| 1571 | FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N; | ||||
| 1572 | |||||
| 1573 | // We rebase the base address into an absolute stack address and hence | ||||
| 1574 | // use constant 0 for soffset. This value must be retained until | ||||
| 1575 | // frame elimination and eliminateFrameIndex will choose the appropriate | ||||
| 1576 | // frame register if need be. | ||||
| 1577 | return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32)); | ||||
| 1578 | } | ||||
| 1579 | |||||
| 1580 | bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, | ||||
| 1581 | SDValue Addr, SDValue &Rsrc, | ||||
| 1582 | SDValue &VAddr, SDValue &SOffset, | ||||
| 1583 | SDValue &ImmOffset) const { | ||||
| 1584 | |||||
| 1585 | SDLoc DL(Addr); | ||||
| 1586 | MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
| 1587 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); | ||||
| 1588 | |||||
| 1589 | Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); | ||||
| 1590 | |||||
| 1591 | if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { | ||||
| 1592 | int64_t Imm = CAddr->getSExtValue(); | ||||
| 1593 | const int64_t NullPtr = | ||||
| 1594 | AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS); | ||||
| 1595 | // Don't fold null pointer. | ||||
| 1596 | if (Imm != NullPtr) { | ||||
| 1597 | SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); | ||||
| 1598 | MachineSDNode *MovHighBits = CurDAG->getMachineNode( | ||||
| 1599 | AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); | ||||
| 1600 | VAddr = SDValue(MovHighBits, 0); | ||||
| 1601 | |||||
| 1602 | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1603 | ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); | ||||
| 1604 | return true; | ||||
| 1605 | } | ||||
| 1606 | } | ||||
| 1607 | |||||
| 1608 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
| 1609 | // (add n0, c1) | ||||
| 1610 | |||||
| 1611 | SDValue N0 = Addr.getOperand(0); | ||||
| 1612 | SDValue N1 = Addr.getOperand(1); | ||||
| 1613 | |||||
| 1614 | // Offsets in vaddr must be positive if range checking is enabled. | ||||
| 1615 | // | ||||
| 1616 | // The total computation of vaddr + soffset + offset must not overflow. If | ||||
| 1617 | // vaddr is negative, even if offset is 0 the sgpr offset add will end up | ||||
| 1618 | // overflowing. | ||||
| 1619 | // | ||||
| 1620 | // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would | ||||
| 1621 | // always perform a range check. If a negative vaddr base index was used, | ||||
| 1622 | // this would fail the range check. The overall address computation would | ||||
| 1623 | // compute a valid address, but this doesn't happen due to the range | ||||
| 1624 | // check. For out-of-bounds MUBUF loads, a 0 is returned. | ||||
| 1625 | // | ||||
| 1626 | // Therefore it should be safe to fold any VGPR offset on gfx9 into the | ||||
| 1627 | // MUBUF vaddr, but not on older subtargets which can only do this if the | ||||
| 1628 | // sign bit is known 0. | ||||
| 1629 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
| 1630 | if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && | ||||
| 1631 | (!Subtarget->privateMemoryResourceIsRangeChecked() || | ||||
| 1632 | CurDAG->SignBitIsZero(N0))) { | ||||
| 1633 | std::tie(VAddr, SOffset) = foldFrameIndex(N0); | ||||
| 1634 | ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); | ||||
| 1635 | return true; | ||||
| 1636 | } | ||||
| 1637 | } | ||||
| 1638 | |||||
| 1639 | // (node) | ||||
| 1640 | std::tie(VAddr, SOffset) = foldFrameIndex(Addr); | ||||
| 1641 | ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
| 1642 | return true; | ||||
| 1643 | } | ||||
| 1644 | |||||
| 1645 | static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) { | ||||
| 1646 | if (Val.getOpcode() != ISD::CopyFromReg) | ||||
| 1647 | return false; | ||||
| 1648 | auto RC = | ||||
| 1649 | TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg()); | ||||
| 1650 | return RC && TRI.isSGPRClass(RC); | ||||
| 1651 | } | ||||
| 1652 | |||||
| 1653 | bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, | ||||
| 1654 | SDValue Addr, | ||||
| 1655 | SDValue &SRsrc, | ||||
| 1656 | SDValue &SOffset, | ||||
| 1657 | SDValue &Offset) const { | ||||
| 1658 | const SIRegisterInfo *TRI = | ||||
| 1659 | static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); | ||||
| 1660 | MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
| 1661 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); | ||||
| 1662 | SDLoc DL(Addr); | ||||
| 1663 | |||||
| 1664 | // CopyFromReg <sgpr> | ||||
| 1665 | if (IsCopyFromSGPR(*TRI, Addr)) { | ||||
| 1666 | SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); | ||||
| 1667 | SOffset = Addr; | ||||
| 1668 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
| 1669 | return true; | ||||
| 1670 | } | ||||
| 1671 | |||||
| 1672 | ConstantSDNode *CAddr; | ||||
| 1673 | if (Addr.getOpcode() == ISD::ADD) { | ||||
| 1674 | // Add (CopyFromReg <sgpr>) <constant> | ||||
| 1675 | CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); | ||||
| 1676 | if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) | ||||
| 1677 | return false; | ||||
| 1678 | if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0))) | ||||
| 1679 | return false; | ||||
| 1680 | |||||
| 1681 | SOffset = Addr.getOperand(0); | ||||
| 1682 | } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) && | ||||
| 1683 | SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) { | ||||
| 1684 | // <constant> | ||||
| 1685 | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 1686 | } else { | ||||
| 1687 | return false; | ||||
| 1688 | } | ||||
| 1689 | |||||
| 1690 | SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); | ||||
| 1691 | |||||
| 1692 | Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); | ||||
| 1693 | return true; | ||||
| 1694 | } | ||||
| 1695 | |||||
| 1696 | bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, | ||||
| 1697 | SDValue &SOffset, SDValue &Offset | ||||
| 1698 | ) const { | ||||
| 1699 | SDValue Ptr, VAddr, Offen, Idxen, Addr64; | ||||
| 1700 | const SIInstrInfo *TII = | ||||
| 1701 | static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); | ||||
| 1702 | |||||
| 1703 | if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64)) | ||||
| 1704 | return false; | ||||
| 1705 | |||||
| 1706 | if (!cast<ConstantSDNode>(Offen)->getSExtValue() && | ||||
| 1707 | !cast<ConstantSDNode>(Idxen)->getSExtValue() && | ||||
| 1708 | !cast<ConstantSDNode>(Addr64)->getSExtValue()) { | ||||
| 1709 | uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | | ||||
| 1710 | APInt::getAllOnesValue(32).getZExtValue(); // Size | ||||
| 1711 | SDLoc DL(Addr); | ||||
| 1712 | |||||
| 1713 | const SITargetLowering& Lowering = | ||||
| 1714 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
| 1715 | |||||
| 1716 | SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); | ||||
| 1717 | return true; | ||||
| 1718 | } | ||||
| 1719 | return false; | ||||
| 1720 | } | ||||
| 1721 | |||||
| 1722 | // Find a load or store from corresponding pattern root. | ||||
| 1723 | // Roots may be build_vector, bitconvert or their combinations. | ||||
| 1724 | static MemSDNode* findMemSDNode(SDNode *N) { | ||||
| 1725 | N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); | ||||
| 1726 | if (MemSDNode *MN = dyn_cast<MemSDNode>(N)) | ||||
| 1727 | return MN; | ||||
| 1728 | assert(isa<BuildVectorSDNode>(N))((void)0); | ||||
| 1729 | for (SDValue V : N->op_values()) | ||||
| 1730 | if (MemSDNode *MN = | ||||
| 1731 | dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V))) | ||||
| 1732 | return MN; | ||||
| 1733 | llvm_unreachable("cannot find MemSDNode in the pattern!")__builtin_unreachable(); | ||||
| 1734 | } | ||||
| 1735 | |||||
| 1736 | bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr, | ||||
| 1737 | SDValue &VAddr, SDValue &Offset, | ||||
| 1738 | uint64_t FlatVariant) const { | ||||
| 1739 | int64_t OffsetVal = 0; | ||||
| 1740 | |||||
| 1741 | unsigned AS = findMemSDNode(N)->getAddressSpace(); | ||||
| 1742 | |||||
| 1743 | bool CanHaveFlatSegmentOffsetBug = | ||||
| 1744 | Subtarget->hasFlatSegmentOffsetBug() && | ||||
| 1745 | FlatVariant == SIInstrFlags::FLAT && | ||||
| 1746 | (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS); | ||||
| 1747 | |||||
| 1748 | if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) { | ||||
| 1749 | SDValue N0, N1; | ||||
| 1750 | if (isBaseWithConstantOffset64(Addr, N0, N1)) { | ||||
| 1751 | int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); | ||||
| 1752 | |||||
| 1753 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
| 1754 | if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) { | ||||
| 1755 | Addr = N0; | ||||
| 1756 | OffsetVal = COffsetVal; | ||||
| 1757 | } else { | ||||
| 1758 | // If the offset doesn't fit, put the low bits into the offset field and | ||||
| 1759 | // add the rest. | ||||
| 1760 | // | ||||
| 1761 | // For a FLAT instruction the hardware decides whether to access | ||||
| 1762 | // global/scratch/shared memory based on the high bits of vaddr, | ||||
| 1763 | // ignoring the offset field, so we have to ensure that when we add | ||||
| 1764 | // remainder to vaddr it still points into the same underlying object. | ||||
| 1765 | // The easiest way to do that is to make sure that we split the offset | ||||
| 1766 | // into two pieces that are both >= 0 or both <= 0. | ||||
| 1767 | |||||
| 1768 | SDLoc DL(N); | ||||
| 1769 | uint64_t RemainderOffset; | ||||
| 1770 | |||||
| 1771 | std::tie(OffsetVal, RemainderOffset) = | ||||
| 1772 | TII->splitFlatOffset(COffsetVal, AS, FlatVariant); | ||||
| 1773 | |||||
| 1774 | SDValue AddOffsetLo = | ||||
| 1775 | getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); | ||||
| 1776 | SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 1777 | |||||
| 1778 | if (Addr.getValueType().getSizeInBits() == 32) { | ||||
| 1779 | SmallVector<SDValue, 3> Opnds; | ||||
| 1780 | Opnds.push_back(N0); | ||||
| 1781 | Opnds.push_back(AddOffsetLo); | ||||
| 1782 | unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32; | ||||
| 1783 | if (Subtarget->hasAddNoCarry()) { | ||||
| 1784 | AddOp = AMDGPU::V_ADD_U32_e64; | ||||
| 1785 | Opnds.push_back(Clamp); | ||||
| 1786 | } | ||||
| 1787 | Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0); | ||||
| 1788 | } else { | ||||
| 1789 | // TODO: Should this try to use a scalar add pseudo if the base address | ||||
| 1790 | // is uniform and saddr is usable? | ||||
| 1791 | SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); | ||||
| 1792 | SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); | ||||
| 1793 | |||||
| 1794 | SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
| 1795 | DL, MVT::i32, N0, Sub0); | ||||
| 1796 | SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
| 1797 | DL, MVT::i32, N0, Sub1); | ||||
| 1798 | |||||
| 1799 | SDValue AddOffsetHi = | ||||
| 1800 | getMaterializedScalarImm32(Hi_32(RemainderOffset), DL); | ||||
| 1801 | |||||
| 1802 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); | ||||
| 1803 | |||||
| 1804 | SDNode *Add = | ||||
| 1805 | CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs, | ||||
| 1806 | {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); | ||||
| 1807 | |||||
| 1808 | SDNode *Addc = CurDAG->getMachineNode( | ||||
| 1809 | AMDGPU::V_ADDC_U32_e64, DL, VTs, | ||||
| 1810 | {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp}); | ||||
| 1811 | |||||
| 1812 | SDValue RegSequenceArgs[] = { | ||||
| 1813 | CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), | ||||
| 1814 | SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1}; | ||||
| 1815 | |||||
| 1816 | Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, | ||||
| 1817 | MVT::i64, RegSequenceArgs), | ||||
| 1818 | 0); | ||||
| 1819 | } | ||||
| 1820 | } | ||||
| 1821 | } | ||||
| 1822 | } | ||||
| 1823 | |||||
| 1824 | VAddr = Addr; | ||||
| 1825 | Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); | ||||
| 1826 | return true; | ||||
| 1827 | } | ||||
| 1828 | |||||
| 1829 | bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr, | ||||
| 1830 | SDValue &VAddr, | ||||
| 1831 | SDValue &Offset) const { | ||||
| 1832 | return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT); | ||||
| 1833 | } | ||||
| 1834 | |||||
| 1835 | bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr, | ||||
| 1836 | SDValue &VAddr, | ||||
| 1837 | SDValue &Offset) const { | ||||
| 1838 | return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal); | ||||
| 1839 | } | ||||
| 1840 | |||||
| 1841 | bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr, | ||||
| 1842 | SDValue &VAddr, | ||||
| 1843 | SDValue &Offset) const { | ||||
| 1844 | return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, | ||||
| 1845 | SIInstrFlags::FlatScratch); | ||||
| 1846 | } | ||||
| 1847 | |||||
| 1848 | // If this matches zero_extend i32:x, return x | ||||
| 1849 | static SDValue matchZExtFromI32(SDValue Op) { | ||||
| 1850 | if (Op.getOpcode() != ISD::ZERO_EXTEND) | ||||
| 1851 | return SDValue(); | ||||
| 1852 | |||||
| 1853 | SDValue ExtSrc = Op.getOperand(0); | ||||
| 1854 | return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue(); | ||||
| 1855 | } | ||||
| 1856 | |||||
| 1857 | // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset) | ||||
| 1858 | bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, | ||||
| 1859 | SDValue Addr, | ||||
| 1860 | SDValue &SAddr, | ||||
| 1861 | SDValue &VOffset, | ||||
| 1862 | SDValue &Offset) const { | ||||
| 1863 | int64_t ImmOffset = 0; | ||||
| 1864 | |||||
| 1865 | // Match the immediate offset first, which canonically is moved as low as | ||||
| 1866 | // possible. | ||||
| 1867 | |||||
| 1868 | SDValue LHS, RHS; | ||||
| 1869 | if (isBaseWithConstantOffset64(Addr, LHS, RHS)) { | ||||
| 1870 | int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue(); | ||||
| 1871 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
| 1872 | |||||
| 1873 | if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, | ||||
| 1874 | SIInstrFlags::FlatGlobal)) { | ||||
| 1875 | Addr = LHS; | ||||
| 1876 | ImmOffset = COffsetVal; | ||||
| 1877 | } else if (!LHS->isDivergent()) { | ||||
| 1878 | if (COffsetVal > 0) { | ||||
| 1879 | SDLoc SL(N); | ||||
| 1880 | // saddr + large_offset -> saddr + | ||||
| 1881 | // (voffset = large_offset & ~MaxOffset) + | ||||
| 1882 | // (large_offset & MaxOffset); | ||||
| 1883 | int64_t SplitImmOffset, RemainderOffset; | ||||
| 1884 | std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset( | ||||
| 1885 | COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal); | ||||
| 1886 | |||||
| 1887 | if (isUInt<32>(RemainderOffset)) { | ||||
| 1888 | SDNode *VMov = CurDAG->getMachineNode( | ||||
| 1889 | AMDGPU::V_MOV_B32_e32, SL, MVT::i32, | ||||
| 1890 | CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32)); | ||||
| 1891 | VOffset = SDValue(VMov, 0); | ||||
| 1892 | SAddr = LHS; | ||||
| 1893 | Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16); | ||||
| 1894 | return true; | ||||
| 1895 | } | ||||
| 1896 | } | ||||
| 1897 | |||||
| 1898 | // We are adding a 64 bit SGPR and a constant. If constant bus limit | ||||
| 1899 | // is 1 we would need to perform 1 or 2 extra moves for each half of | ||||
| 1900 | // the constant and it is better to do a scalar add and then issue a | ||||
| 1901 | // single VALU instruction to materialize zero. Otherwise it is less | ||||
| 1902 | // instructions to perform VALU adds with immediates or inline literals. | ||||
| 1903 | unsigned NumLiterals = | ||||
| 1904 | !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) + | ||||
| 1905 | !TII->isInlineConstant(APInt(32, COffsetVal >> 32)); | ||||
| 1906 | if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals) | ||||
| 1907 | return false; | ||||
| 1908 | } | ||||
| 1909 | } | ||||
| 1910 | |||||
| 1911 | // Match the variable offset. | ||||
| 1912 | if (Addr.getOpcode() == ISD::ADD) { | ||||
| 1913 | LHS = Addr.getOperand(0); | ||||
| 1914 | RHS = Addr.getOperand(1); | ||||
| 1915 | |||||
| 1916 | if (!LHS->isDivergent()) { | ||||
| 1917 | // add (i64 sgpr), (zero_extend (i32 vgpr)) | ||||
| 1918 | if (SDValue ZextRHS = matchZExtFromI32(RHS)) { | ||||
| 1919 | SAddr = LHS; | ||||
| 1920 | VOffset = ZextRHS; | ||||
| 1921 | } | ||||
| 1922 | } | ||||
| 1923 | |||||
| 1924 | if (!SAddr && !RHS->isDivergent()) { | ||||
| 1925 | // add (zero_extend (i32 vgpr)), (i64 sgpr) | ||||
| 1926 | if (SDValue ZextLHS = matchZExtFromI32(LHS)) { | ||||
| 1927 | SAddr = RHS; | ||||
| 1928 | VOffset = ZextLHS; | ||||
| 1929 | } | ||||
| 1930 | } | ||||
| 1931 | |||||
| 1932 | if (SAddr) { | ||||
| 1933 | Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); | ||||
| 1934 | return true; | ||||
| 1935 | } | ||||
| 1936 | } | ||||
| 1937 | |||||
| 1938 | if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF || | ||||
| 1939 | isa<ConstantSDNode>(Addr)) | ||||
| 1940 | return false; | ||||
| 1941 | |||||
| 1942 | // It's cheaper to materialize a single 32-bit zero for vaddr than the two | ||||
| 1943 | // moves required to copy a 64-bit SGPR to VGPR. | ||||
| 1944 | SAddr = Addr; | ||||
| 1945 | SDNode *VMov = | ||||
| 1946 | CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32, | ||||
| 1947 | CurDAG->getTargetConstant(0, SDLoc(), MVT::i32)); | ||||
| 1948 | VOffset = SDValue(VMov, 0); | ||||
| 1949 | Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); | ||||
| 1950 | return true; | ||||
| 1951 | } | ||||
| 1952 | |||||
| 1953 | static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) { | ||||
| 1954 | if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) { | ||||
| 1955 | SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); | ||||
| 1956 | } else if (SAddr.getOpcode() == ISD::ADD && | ||||
| 1957 | isa<FrameIndexSDNode>(SAddr.getOperand(0))) { | ||||
| 1958 | // Materialize this into a scalar move for scalar address to avoid | ||||
| 1959 | // readfirstlane. | ||||
| 1960 | auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0)); | ||||
| 1961 | SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), | ||||
| 1962 | FI->getValueType(0)); | ||||
| 1963 | SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr), | ||||
| 1964 | MVT::i32, TFI, SAddr.getOperand(1)), | ||||
| 1965 | 0); | ||||
| 1966 | } | ||||
| 1967 | |||||
| 1968 | return SAddr; | ||||
| 1969 | } | ||||
| 1970 | |||||
| 1971 | // Match (32-bit SGPR base) + sext(imm offset) | ||||
| 1972 | bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, | ||||
| 1973 | SDValue &SAddr, | ||||
| 1974 | SDValue &Offset) const { | ||||
| 1975 | if (Addr->isDivergent()) | ||||
| 1976 | return false; | ||||
| 1977 | |||||
| 1978 | SDLoc DL(Addr); | ||||
| 1979 | |||||
| 1980 | int64_t COffsetVal = 0; | ||||
| 1981 | |||||
| 1982 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
| 1983 | COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); | ||||
| 1984 | SAddr = Addr.getOperand(0); | ||||
| 1985 | } else { | ||||
| 1986 | SAddr = Addr; | ||||
| 1987 | } | ||||
| 1988 | |||||
| 1989 | SAddr = SelectSAddrFI(CurDAG, SAddr); | ||||
| 1990 | |||||
| 1991 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
| 1992 | |||||
| 1993 | if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, | ||||
| 1994 | SIInstrFlags::FlatScratch)) { | ||||
| 1995 | int64_t SplitImmOffset, RemainderOffset; | ||||
| 1996 | std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset( | ||||
| 1997 | COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch); | ||||
| 1998 | |||||
| 1999 | COffsetVal = SplitImmOffset; | ||||
| 2000 | |||||
| 2001 | SDValue AddOffset = | ||||
| 2002 | SAddr.getOpcode() == ISD::TargetFrameIndex | ||||
| 2003 | ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL) | ||||
| 2004 | : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32); | ||||
| 2005 | SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32, | ||||
| 2006 | SAddr, AddOffset), | ||||
| 2007 | 0); | ||||
| 2008 | } | ||||
| 2009 | |||||
| 2010 | Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16); | ||||
| 2011 | |||||
| 2012 | return true; | ||||
| 2013 | } | ||||
| 2014 | |||||
| 2015 | bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, | ||||
| 2016 | SDValue &Offset, bool &Imm) const { | ||||
| 2017 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); | ||||
| 2018 | if (!C) { | ||||
| 2019 | if (ByteOffsetNode.getValueType().isScalarInteger() && | ||||
| 2020 | ByteOffsetNode.getValueType().getSizeInBits() == 32) { | ||||
| 2021 | Offset = ByteOffsetNode; | ||||
| 2022 | Imm = false; | ||||
| 2023 | return true; | ||||
| 2024 | } | ||||
| 2025 | if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { | ||||
| 2026 | if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { | ||||
| 2027 | Offset = ByteOffsetNode.getOperand(0); | ||||
| 2028 | Imm = false; | ||||
| 2029 | return true; | ||||
| 2030 | } | ||||
| 2031 | } | ||||
| 2032 | return false; | ||||
| 2033 | } | ||||
| 2034 | |||||
| 2035 | SDLoc SL(ByteOffsetNode); | ||||
| 2036 | // GFX9 and GFX10 have signed byte immediate offsets. | ||||
| 2037 | int64_t ByteOffset = C->getSExtValue(); | ||||
| 2038 | Optional<int64_t> EncodedOffset = | ||||
| 2039 | AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); | ||||
| 2040 | if (EncodedOffset) { | ||||
| 2041 | Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); | ||||
| 2042 | Imm = true; | ||||
| 2043 | return true; | ||||
| 2044 | } | ||||
| 2045 | |||||
| 2046 | // SGPR and literal offsets are unsigned. | ||||
| 2047 | if (ByteOffset < 0) | ||||
| 2048 | return false; | ||||
| 2049 | |||||
| 2050 | EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); | ||||
| 2051 | if (EncodedOffset) { | ||||
| 2052 | Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); | ||||
| 2053 | return true; | ||||
| 2054 | } | ||||
| 2055 | |||||
| 2056 | if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) | ||||
| 2057 | return false; | ||||
| 2058 | |||||
| 2059 | SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); | ||||
| 2060 | Offset = SDValue( | ||||
| 2061 | CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); | ||||
| 2062 | |||||
| 2063 | return true; | ||||
| 2064 | } | ||||
| 2065 | |||||
| 2066 | SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { | ||||
| 2067 | if (Addr.getValueType() != MVT::i32) | ||||
| 2068 | return Addr; | ||||
| 2069 | |||||
| 2070 | // Zero-extend a 32-bit address. | ||||
| 2071 | SDLoc SL(Addr); | ||||
| 2072 | |||||
| 2073 | const MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
| 2074 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); | ||||
| 2075 | unsigned AddrHiVal = Info->get32BitAddressHighBits(); | ||||
| 2076 | SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); | ||||
| 2077 | |||||
| 2078 | const SDValue Ops[] = { | ||||
| 2079 | CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), | ||||
| 2080 | Addr, | ||||
| 2081 | CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), | ||||
| 2082 | SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), | ||||
| 2083 | 0), | ||||
| 2084 | CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), | ||||
| 2085 | }; | ||||
| 2086 | |||||
| 2087 | return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, | ||||
| 2088 | Ops), 0); | ||||
| 2089 | } | ||||
| 2090 | |||||
| 2091 | bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, | ||||
| 2092 | SDValue &Offset, bool &Imm) const { | ||||
| 2093 | SDLoc SL(Addr); | ||||
| 2094 | |||||
| 2095 | // A 32-bit (address + offset) should not cause unsigned 32-bit integer | ||||
| 2096 | // wraparound, because s_load instructions perform the addition in 64 bits. | ||||
| 2097 | if ((Addr.getValueType() != MVT::i32 || | ||||
| 2098 | Addr->getFlags().hasNoUnsignedWrap())) { | ||||
| 2099 | SDValue N0, N1; | ||||
| 2100 | // Extract the base and offset if possible. | ||||
| 2101 | if (CurDAG->isBaseWithConstantOffset(Addr) || | ||||
| 2102 | Addr.getOpcode() == ISD::ADD) { | ||||
| 2103 | N0 = Addr.getOperand(0); | ||||
| 2104 | N1 = Addr.getOperand(1); | ||||
| 2105 | } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) { | ||||
| 2106 | assert(N0 && N1 && isa<ConstantSDNode>(N1))((void)0); | ||||
| 2107 | } | ||||
| 2108 | if (N0 && N1) { | ||||
| 2109 | if (SelectSMRDOffset(N1, Offset, Imm)) { | ||||
| 2110 | SBase = Expand32BitAddress(N0); | ||||
| 2111 | return true; | ||||
| 2112 | } | ||||
| 2113 | } | ||||
| 2114 | } | ||||
| 2115 | SBase = Expand32BitAddress(Addr); | ||||
| 2116 | Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); | ||||
| 2117 | Imm = true; | ||||
| 2118 | return true; | ||||
| 2119 | } | ||||
| 2120 | |||||
| 2121 | bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, | ||||
| 2122 | SDValue &Offset) const { | ||||
| 2123 | bool Imm = false; | ||||
| 2124 | return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; | ||||
| 2125 | } | ||||
| 2126 | |||||
| 2127 | bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, | ||||
| 2128 | SDValue &Offset) const { | ||||
| 2129 | |||||
| 2130 | assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((void)0); | ||||
| 2131 | |||||
| 2132 | bool Imm = false; | ||||
| 2133 | if (!SelectSMRD(Addr, SBase, Offset, Imm)) | ||||
| 2134 | return false; | ||||
| 2135 | |||||
| 2136 | return !Imm && isa<ConstantSDNode>(Offset); | ||||
| 2137 | } | ||||
| 2138 | |||||
| 2139 | bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, | ||||
| 2140 | SDValue &Offset) const { | ||||
| 2141 | bool Imm = false; | ||||
| 2142 | return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && | ||||
| 2143 | !isa<ConstantSDNode>(Offset); | ||||
| 2144 | } | ||||
| 2145 | |||||
| 2146 | bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, | ||||
| 2147 | SDValue &Offset) const { | ||||
| 2148 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) { | ||||
| 2149 | // The immediate offset for S_BUFFER instructions is unsigned. | ||||
| 2150 | if (auto Imm = | ||||
| 2151 | AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) { | ||||
| 2152 | Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32); | ||||
| 2153 | return true; | ||||
| 2154 | } | ||||
| 2155 | } | ||||
| 2156 | |||||
| 2157 | return false; | ||||
| 2158 | } | ||||
| 2159 | |||||
| 2160 | bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, | ||||
| 2161 | SDValue &Offset) const { | ||||
| 2162 | assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((void)0); | ||||
| 2163 | |||||
| 2164 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) { | ||||
| 2165 | if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, | ||||
| 2166 | C->getZExtValue())) { | ||||
| 2167 | Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32); | ||||
| 2168 | return true; | ||||
| 2169 | } | ||||
| 2170 | } | ||||
| 2171 | |||||
| 2172 | return false; | ||||
| 2173 | } | ||||
| 2174 | |||||
| 2175 | bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, | ||||
| 2176 | SDValue &Base, | ||||
| 2177 | SDValue &Offset) const { | ||||
| 2178 | SDLoc DL(Index); | ||||
| 2179 | |||||
| 2180 | if (CurDAG->isBaseWithConstantOffset(Index)) { | ||||
| 2181 | SDValue N0 = Index.getOperand(0); | ||||
| 2182 | SDValue N1 = Index.getOperand(1); | ||||
| 2183 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
| 2184 | |||||
| 2185 | // (add n0, c0) | ||||
| 2186 | // Don't peel off the offset (c0) if doing so could possibly lead | ||||
| 2187 | // the base (n0) to be negative. | ||||
| 2188 | // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset. | ||||
| 2189 | if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) || | ||||
| 2190 | (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) { | ||||
| 2191 | Base = N0; | ||||
| 2192 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); | ||||
| 2193 | return true; | ||||
| 2194 | } | ||||
| 2195 | } | ||||
| 2196 | |||||
| 2197 | if (isa<ConstantSDNode>(Index)) | ||||
| 2198 | return false; | ||||
| 2199 | |||||
| 2200 | Base = Index; | ||||
| 2201 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 2202 | return true; | ||||
| 2203 | } | ||||
| 2204 | |||||
| 2205 | SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, | ||||
| 2206 | SDValue Val, uint32_t Offset, | ||||
| 2207 | uint32_t Width) { | ||||
| 2208 | // Transformation function, pack the offset and width of a BFE into | ||||
| 2209 | // the format expected by the S_BFE_I32 / S_BFE_U32. In the second | ||||
| 2210 | // source, bits [5:0] contain the offset and bits [22:16] the width. | ||||
| 2211 | uint32_t PackedVal = Offset | (Width << 16); | ||||
| 2212 | SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); | ||||
| 2213 | |||||
| 2214 | return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); | ||||
| 2215 | } | ||||
| 2216 | |||||
| 2217 | void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { | ||||
| 2218 | // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) | ||||
| 2219 | // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) | ||||
| 2220 | // Predicate: 0 < b <= c < 32 | ||||
| 2221 | |||||
| 2222 | const SDValue &Shl = N->getOperand(0); | ||||
| 2223 | ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); | ||||
| 2224 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
| 2225 | |||||
| 2226 | if (B && C) { | ||||
| 2227 | uint32_t BVal = B->getZExtValue(); | ||||
| 2228 | uint32_t CVal = C->getZExtValue(); | ||||
| 2229 | |||||
| 2230 | if (0 < BVal && BVal <= CVal && CVal < 32) { | ||||
| 2231 | bool Signed = N->getOpcode() == ISD::SRA; | ||||
| 2232 | unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; | ||||
| 2233 | |||||
| 2234 | ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, | ||||
| 2235 | 32 - CVal)); | ||||
| 2236 | return; | ||||
| 2237 | } | ||||
| 2238 | } | ||||
| 2239 | SelectCode(N); | ||||
| 2240 | } | ||||
| 2241 | |||||
| 2242 | void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { | ||||
| 2243 | switch (N->getOpcode()) { | ||||
| 2244 | case ISD::AND: | ||||
| 2245 | if (N->getOperand(0).getOpcode() == ISD::SRL) { | ||||
| 2246 | // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" | ||||
| 2247 | // Predicate: isMask(mask) | ||||
| 2248 | const SDValue &Srl = N->getOperand(0); | ||||
| 2249 | ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); | ||||
| 2250 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
| 2251 | |||||
| 2252 | if (Shift && Mask) { | ||||
| 2253 | uint32_t ShiftVal = Shift->getZExtValue(); | ||||
| 2254 | uint32_t MaskVal = Mask->getZExtValue(); | ||||
| 2255 | |||||
| 2256 | if (isMask_32(MaskVal)) { | ||||
| 2257 | uint32_t WidthVal = countPopulation(MaskVal); | ||||
| 2258 | |||||
| 2259 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), | ||||
| 2260 | Srl.getOperand(0), ShiftVal, WidthVal)); | ||||
| 2261 | return; | ||||
| 2262 | } | ||||
| 2263 | } | ||||
| 2264 | } | ||||
| 2265 | break; | ||||
| 2266 | case ISD::SRL: | ||||
| 2267 | if (N->getOperand(0).getOpcode() == ISD::AND) { | ||||
| 2268 | // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" | ||||
| 2269 | // Predicate: isMask(mask >> b) | ||||
| 2270 | const SDValue &And = N->getOperand(0); | ||||
| 2271 | ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
| 2272 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); | ||||
| 2273 | |||||
| 2274 | if (Shift && Mask) { | ||||
| 2275 | uint32_t ShiftVal = Shift->getZExtValue(); | ||||
| 2276 | uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; | ||||
| 2277 | |||||
| 2278 | if (isMask_32(MaskVal)) { | ||||
| 2279 | uint32_t WidthVal = countPopulation(MaskVal); | ||||
| 2280 | |||||
| 2281 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), | ||||
| 2282 | And.getOperand(0), ShiftVal, WidthVal)); | ||||
| 2283 | return; | ||||
| 2284 | } | ||||
| 2285 | } | ||||
| 2286 | } else if (N->getOperand(0).getOpcode() == ISD::SHL) { | ||||
| 2287 | SelectS_BFEFromShifts(N); | ||||
| 2288 | return; | ||||
| 2289 | } | ||||
| 2290 | break; | ||||
| 2291 | case ISD::SRA: | ||||
| 2292 | if (N->getOperand(0).getOpcode() == ISD::SHL) { | ||||
| 2293 | SelectS_BFEFromShifts(N); | ||||
| 2294 | return; | ||||
| 2295 | } | ||||
| 2296 | break; | ||||
| 2297 | |||||
| 2298 | case ISD::SIGN_EXTEND_INREG: { | ||||
| 2299 | // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 | ||||
| 2300 | SDValue Src = N->getOperand(0); | ||||
| 2301 | if (Src.getOpcode() != ISD::SRL) | ||||
| 2302 | break; | ||||
| 2303 | |||||
| 2304 | const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); | ||||
| 2305 | if (!Amt) | ||||
| 2306 | break; | ||||
| 2307 | |||||
| 2308 | unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); | ||||
| 2309 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), | ||||
| 2310 | Amt->getZExtValue(), Width)); | ||||
| 2311 | return; | ||||
| 2312 | } | ||||
| 2313 | } | ||||
| 2314 | |||||
| 2315 | SelectCode(N); | ||||
| 2316 | } | ||||
| 2317 | |||||
| 2318 | bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { | ||||
| 2319 | assert(N->getOpcode() == ISD::BRCOND)((void)0); | ||||
| 2320 | if (!N->hasOneUse()) | ||||
| 2321 | return false; | ||||
| 2322 | |||||
| 2323 | SDValue Cond = N->getOperand(1); | ||||
| 2324 | if (Cond.getOpcode() == ISD::CopyToReg) | ||||
| 2325 | Cond = Cond.getOperand(2); | ||||
| 2326 | |||||
| 2327 | if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) | ||||
| 2328 | return false; | ||||
| 2329 | |||||
| 2330 | MVT VT = Cond.getOperand(0).getSimpleValueType(); | ||||
| 2331 | if (VT == MVT::i32) | ||||
| 2332 | return true; | ||||
| 2333 | |||||
| 2334 | if (VT == MVT::i64) { | ||||
| 2335 | auto ST = static_cast<const GCNSubtarget *>(Subtarget); | ||||
| 2336 | |||||
| 2337 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); | ||||
| 2338 | return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); | ||||
| 2339 | } | ||||
| 2340 | |||||
| 2341 | return false; | ||||
| 2342 | } | ||||
| 2343 | |||||
| 2344 | void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { | ||||
| 2345 | SDValue Cond = N->getOperand(1); | ||||
| 2346 | |||||
| 2347 | if (Cond.isUndef()) { | ||||
| 2348 | CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, | ||||
| 2349 | N->getOperand(2), N->getOperand(0)); | ||||
| 2350 | return; | ||||
| 2351 | } | ||||
| 2352 | |||||
| 2353 | const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget); | ||||
| 2354 | const SIRegisterInfo *TRI = ST->getRegisterInfo(); | ||||
| 2355 | |||||
| 2356 | bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); | ||||
| 2357 | unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; | ||||
| 2358 | Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC(); | ||||
| 2359 | SDLoc SL(N); | ||||
| 2360 | |||||
| 2361 | if (!UseSCCBr) { | ||||
| 2362 | // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not | ||||
| 2363 | // analyzed what generates the vcc value, so we do not know whether vcc | ||||
| 2364 | // bits for disabled lanes are 0. Thus we need to mask out bits for | ||||
| 2365 | // disabled lanes. | ||||
| 2366 | // | ||||
| 2367 | // For the case that we select S_CBRANCH_SCC1 and it gets | ||||
| 2368 | // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls | ||||
| 2369 | // SIInstrInfo::moveToVALU which inserts the S_AND). | ||||
| 2370 | // | ||||
| 2371 | // We could add an analysis of what generates the vcc value here and omit | ||||
| 2372 | // the S_AND when is unnecessary. But it would be better to add a separate | ||||
| 2373 | // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it | ||||
| 2374 | // catches both cases. | ||||
| 2375 | Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32 | ||||
| 2376 | : AMDGPU::S_AND_B64, | ||||
| 2377 | SL, MVT::i1, | ||||
| 2378 | CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO | ||||
| 2379 | : AMDGPU::EXEC, | ||||
| 2380 | MVT::i1), | ||||
| 2381 | Cond), | ||||
| 2382 | 0); | ||||
| 2383 | } | ||||
| 2384 | |||||
| 2385 | SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); | ||||
| 2386 | CurDAG->SelectNodeTo(N, BrOp, MVT::Other, | ||||
| 2387 | N->getOperand(2), // Basic Block | ||||
| 2388 | VCC.getValue(0)); | ||||
| 2389 | } | ||||
| 2390 | |||||
| 2391 | void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { | ||||
| 2392 | MVT VT = N->getSimpleValueType(0); | ||||
| 2393 | bool IsFMA = N->getOpcode() == ISD::FMA; | ||||
| 2394 | if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && | ||||
| 2395 | !Subtarget->hasFmaMixInsts()) || | ||||
| 2396 | ((IsFMA && Subtarget->hasMadMixInsts()) || | ||||
| 2397 | (!IsFMA && Subtarget->hasFmaMixInsts()))) { | ||||
| 2398 | SelectCode(N); | ||||
| 2399 | return; | ||||
| 2400 | } | ||||
| 2401 | |||||
| 2402 | SDValue Src0 = N->getOperand(0); | ||||
| 2403 | SDValue Src1 = N->getOperand(1); | ||||
| 2404 | SDValue Src2 = N->getOperand(2); | ||||
| 2405 | unsigned Src0Mods, Src1Mods, Src2Mods; | ||||
| 2406 | |||||
| 2407 | // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand | ||||
| 2408 | // using the conversion from f16. | ||||
| 2409 | bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); | ||||
| 2410 | bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); | ||||
| 2411 | bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); | ||||
| 2412 | |||||
| 2413 | assert((IsFMA || !Mode.allFP32Denormals()) &&((void)0) | ||||
| 2414 | "fmad selected with denormals enabled")((void)0); | ||||
| 2415 | // TODO: We can select this with f32 denormals enabled if all the sources are | ||||
| 2416 | // converted from f16 (in which case fmad isn't legal). | ||||
| 2417 | |||||
| 2418 | if (Sel0 || Sel1 || Sel2) { | ||||
| 2419 | // For dummy operands. | ||||
| 2420 | SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); | ||||
| 2421 | SDValue Ops[] = { | ||||
| 2422 | CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, | ||||
| 2423 | CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, | ||||
| 2424 | CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, | ||||
| 2425 | CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), | ||||
| 2426 | Zero, Zero | ||||
| 2427 | }; | ||||
| 2428 | |||||
| 2429 | CurDAG->SelectNodeTo(N, | ||||
| 2430 | IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, | ||||
| 2431 | MVT::f32, Ops); | ||||
| 2432 | } else { | ||||
| 2433 | SelectCode(N); | ||||
| 2434 | } | ||||
| 2435 | } | ||||
| 2436 | |||||
| 2437 | // This is here because there isn't a way to use the generated sub0_sub1 as the | ||||
| 2438 | // subreg index to EXTRACT_SUBREG in tablegen. | ||||
| 2439 | void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { | ||||
| 2440 | MemSDNode *Mem = cast<MemSDNode>(N); | ||||
| 2441 | unsigned AS = Mem->getAddressSpace(); | ||||
| 2442 | if (AS == AMDGPUAS::FLAT_ADDRESS) { | ||||
| 2443 | SelectCode(N); | ||||
| 2444 | return; | ||||
| 2445 | } | ||||
| 2446 | |||||
| 2447 | MVT VT = N->getSimpleValueType(0); | ||||
| 2448 | bool Is32 = (VT == MVT::i32); | ||||
| 2449 | SDLoc SL(N); | ||||
| 2450 | |||||
| 2451 | MachineSDNode *CmpSwap = nullptr; | ||||
| 2452 | if (Subtarget->hasAddr64()) { | ||||
| 2453 | SDValue SRsrc, VAddr, SOffset, Offset; | ||||
| 2454 | |||||
| 2455 | if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) { | ||||
| 2456 | unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : | ||||
| 2457 | AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; | ||||
| 2458 | SDValue CmpVal = Mem->getOperand(2); | ||||
| 2459 | SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); | ||||
| 2460 | |||||
| 2461 | // XXX - Do we care about glue operands? | ||||
| 2462 | |||||
| 2463 | SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol, | ||||
| 2464 | Mem->getChain()}; | ||||
| 2465 | |||||
| 2466 | CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); | ||||
| 2467 | } | ||||
| 2468 | } | ||||
| 2469 | |||||
| 2470 | if (!CmpSwap) { | ||||
| 2471 | SDValue SRsrc, SOffset, Offset; | ||||
| 2472 | if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) { | ||||
| 2473 | unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : | ||||
| 2474 | AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; | ||||
| 2475 | |||||
| 2476 | SDValue CmpVal = Mem->getOperand(2); | ||||
| 2477 | SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); | ||||
| 2478 | SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()}; | ||||
| 2479 | |||||
| 2480 | CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); | ||||
| 2481 | } | ||||
| 2482 | } | ||||
| 2483 | |||||
| 2484 | if (!CmpSwap) { | ||||
| 2485 | SelectCode(N); | ||||
| 2486 | return; | ||||
| 2487 | } | ||||
| 2488 | |||||
| 2489 | MachineMemOperand *MMO = Mem->getMemOperand(); | ||||
| 2490 | CurDAG->setNodeMemRefs(CmpSwap, {MMO}); | ||||
| 2491 | |||||
| 2492 | unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; | ||||
| 2493 | SDValue Extract | ||||
| 2494 | = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); | ||||
| 2495 | |||||
| 2496 | ReplaceUses(SDValue(N, 0), Extract); | ||||
| 2497 | ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); | ||||
| 2498 | CurDAG->RemoveDeadNode(N); | ||||
| 2499 | } | ||||
| 2500 | |||||
| 2501 | void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { | ||||
| 2502 | // The address is assumed to be uniform, so if it ends up in a VGPR, it will | ||||
| 2503 | // be copied to an SGPR with readfirstlane. | ||||
| 2504 | unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ? | ||||
| 2505 | AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; | ||||
| 2506 | |||||
| 2507 | SDValue Chain = N->getOperand(0); | ||||
| 2508 | SDValue Ptr = N->getOperand(2); | ||||
| 2509 | MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); | ||||
| 2510 | MachineMemOperand *MMO = M->getMemOperand(); | ||||
| 2511 | bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; | ||||
| 2512 | |||||
| 2513 | SDValue Offset; | ||||
| 2514 | if (CurDAG->isBaseWithConstantOffset(Ptr)) { | ||||
| 2515 | SDValue PtrBase = Ptr.getOperand(0); | ||||
| 2516 | SDValue PtrOffset = Ptr.getOperand(1); | ||||
| 2517 | |||||
| 2518 | const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); | ||||
| 2519 | if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) { | ||||
| 2520 | N = glueCopyToM0(N, PtrBase); | ||||
| 2521 | Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); | ||||
| 2522 | } | ||||
| 2523 | } | ||||
| 2524 | |||||
| 2525 | if (!Offset) { | ||||
| 2526 | N = glueCopyToM0(N, Ptr); | ||||
| 2527 | Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); | ||||
| 2528 | } | ||||
| 2529 | |||||
| 2530 | SDValue Ops[] = { | ||||
| 2531 | Offset, | ||||
| 2532 | CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), | ||||
| 2533 | Chain, | ||||
| 2534 | N->getOperand(N->getNumOperands() - 1) // New glue | ||||
| 2535 | }; | ||||
| 2536 | |||||
| 2537 | SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
| 2538 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); | ||||
| 2539 | } | ||||
| 2540 | |||||
| 2541 | static unsigned gwsIntrinToOpcode(unsigned IntrID) { | ||||
| 2542 | switch (IntrID) { | ||||
| 2543 | case Intrinsic::amdgcn_ds_gws_init: | ||||
| 2544 | return AMDGPU::DS_GWS_INIT; | ||||
| 2545 | case Intrinsic::amdgcn_ds_gws_barrier: | ||||
| 2546 | return AMDGPU::DS_GWS_BARRIER; | ||||
| 2547 | case Intrinsic::amdgcn_ds_gws_sema_v: | ||||
| 2548 | return AMDGPU::DS_GWS_SEMA_V; | ||||
| 2549 | case Intrinsic::amdgcn_ds_gws_sema_br: | ||||
| 2550 | return AMDGPU::DS_GWS_SEMA_BR; | ||||
| 2551 | case Intrinsic::amdgcn_ds_gws_sema_p: | ||||
| 2552 | return AMDGPU::DS_GWS_SEMA_P; | ||||
| 2553 | case Intrinsic::amdgcn_ds_gws_sema_release_all: | ||||
| 2554 | return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; | ||||
| 2555 | default: | ||||
| 2556 | llvm_unreachable("not a gws intrinsic")__builtin_unreachable(); | ||||
| 2557 | } | ||||
| 2558 | } | ||||
| 2559 | |||||
| 2560 | void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { | ||||
| 2561 | if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && | ||||
| 2562 | !Subtarget->hasGWSSemaReleaseAll()) { | ||||
| 2563 | // Let this error. | ||||
| 2564 | SelectCode(N); | ||||
| 2565 | return; | ||||
| 2566 | } | ||||
| 2567 | |||||
| 2568 | // Chain, intrinsic ID, vsrc, offset | ||||
| 2569 | const bool HasVSrc = N->getNumOperands() == 4; | ||||
| 2570 | assert(HasVSrc || N->getNumOperands() == 3)((void)0); | ||||
| 2571 | |||||
| 2572 | SDLoc SL(N); | ||||
| 2573 | SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2); | ||||
| 2574 | int ImmOffset = 0; | ||||
| 2575 | MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); | ||||
| 2576 | MachineMemOperand *MMO = M->getMemOperand(); | ||||
| 2577 | |||||
| 2578 | // Don't worry if the offset ends up in a VGPR. Only one lane will have | ||||
| 2579 | // effect, so SIFixSGPRCopies will validly insert readfirstlane. | ||||
| 2580 | |||||
| 2581 | // The resource id offset is computed as (<isa opaque base> + M0[21:16] + | ||||
| 2582 | // offset field) % 64. Some versions of the programming guide omit the m0 | ||||
| 2583 | // part, or claim it's from offset 0. | ||||
| 2584 | if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { | ||||
| 2585 | // If we have a constant offset, try to use the 0 in m0 as the base. | ||||
| 2586 | // TODO: Look into changing the default m0 initialization value. If the | ||||
| 2587 | // default -1 only set the low 16-bits, we could leave it as-is and add 1 to | ||||
| 2588 | // the immediate offset. | ||||
| 2589 | glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); | ||||
| 2590 | ImmOffset = ConstOffset->getZExtValue(); | ||||
| 2591 | } else { | ||||
| 2592 | if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { | ||||
| 2593 | ImmOffset = BaseOffset.getConstantOperandVal(1); | ||||
| 2594 | BaseOffset = BaseOffset.getOperand(0); | ||||
| 2595 | } | ||||
| 2596 | |||||
| 2597 | // Prefer to do the shift in an SGPR since it should be possible to use m0 | ||||
| 2598 | // as the result directly. If it's already an SGPR, it will be eliminated | ||||
| 2599 | // later. | ||||
| 2600 | SDNode *SGPROffset | ||||
| 2601 | = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, | ||||
| 2602 | BaseOffset); | ||||
| 2603 | // Shift to offset in m0 | ||||
| 2604 | SDNode *M0Base | ||||
| 2605 | = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, | ||||
| 2606 | SDValue(SGPROffset, 0), | ||||
| 2607 | CurDAG->getTargetConstant(16, SL, MVT::i32)); | ||||
| 2608 | glueCopyToM0(N, SDValue(M0Base, 0)); | ||||
| 2609 | } | ||||
| 2610 | |||||
| 2611 | SDValue Chain = N->getOperand(0); | ||||
| 2612 | SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); | ||||
| 2613 | |||||
| 2614 | const unsigned Opc = gwsIntrinToOpcode(IntrID); | ||||
| 2615 | SmallVector<SDValue, 5> Ops; | ||||
| 2616 | if (HasVSrc) | ||||
| 2617 | Ops.push_back(N->getOperand(2)); | ||||
| 2618 | Ops.push_back(OffsetField); | ||||
| 2619 | Ops.push_back(Chain); | ||||
| 2620 | |||||
| 2621 | SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
| 2622 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); | ||||
| 2623 | } | ||||
| 2624 | |||||
| 2625 | void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) { | ||||
| 2626 | if (Subtarget->getLDSBankCount() != 16) { | ||||
| 2627 | // This is a single instruction with a pattern. | ||||
| 2628 | SelectCode(N); | ||||
| 2629 | return; | ||||
| 2630 | } | ||||
| 2631 | |||||
| 2632 | SDLoc DL(N); | ||||
| 2633 | |||||
| 2634 | // This requires 2 instructions. It is possible to write a pattern to support | ||||
| 2635 | // this, but the generated isel emitter doesn't correctly deal with multiple | ||||
| 2636 | // output instructions using the same physical register input. The copy to m0 | ||||
| 2637 | // is incorrectly placed before the second instruction. | ||||
| 2638 | // | ||||
| 2639 | // TODO: Match source modifiers. | ||||
| 2640 | // | ||||
| 2641 | // def : Pat < | ||||
| 2642 | // (int_amdgcn_interp_p1_f16 | ||||
| 2643 | // (VOP3Mods f32:$src0, i32:$src0_modifiers), | ||||
| 2644 | // (i32 timm:$attrchan), (i32 timm:$attr), | ||||
| 2645 | // (i1 timm:$high), M0), | ||||
| 2646 | // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr, | ||||
| 2647 | // timm:$attrchan, 0, | ||||
| 2648 | // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> { | ||||
| 2649 | // let Predicates = [has16BankLDS]; | ||||
| 2650 | // } | ||||
| 2651 | |||||
| 2652 | // 16 bank LDS | ||||
| 2653 | SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0, | ||||
| 2654 | N->getOperand(5), SDValue()); | ||||
| 2655 | |||||
| 2656 | SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other); | ||||
| 2657 | |||||
| 2658 | SDNode *InterpMov = | ||||
| 2659 | CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, { | ||||
| 2660 | CurDAG->getTargetConstant(2, DL, MVT::i32), // P0 | ||||
| 2661 | N->getOperand(3), // Attr | ||||
| 2662 | N->getOperand(2), // Attrchan | ||||
| 2663 | ToM0.getValue(1) // In glue | ||||
| 2664 | }); | ||||
| 2665 | |||||
| 2666 | SDNode *InterpP1LV = | ||||
| 2667 | CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, { | ||||
| 2668 | CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers | ||||
| 2669 | N->getOperand(1), // Src0 | ||||
| 2670 | N->getOperand(3), // Attr | ||||
| 2671 | N->getOperand(2), // Attrchan | ||||
| 2672 | CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers | ||||
| 2673 | SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high | ||||
| 2674 | N->getOperand(4), // high | ||||
| 2675 | CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp | ||||
| 2676 | CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod | ||||
| 2677 | SDValue(InterpMov, 1) | ||||
| 2678 | }); | ||||
| 2679 | |||||
| 2680 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0)); | ||||
| 2681 | } | ||||
| 2682 | |||||
| 2683 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { | ||||
| 2684 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); | ||||
| 2685 | switch (IntrID) { | ||||
| 2686 | case Intrinsic::amdgcn_ds_append: | ||||
| 2687 | case Intrinsic::amdgcn_ds_consume: { | ||||
| 2688 | if (N->getValueType(0) != MVT::i32) | ||||
| 2689 | break; | ||||
| 2690 | SelectDSAppendConsume(N, IntrID); | ||||
| 2691 | return; | ||||
| 2692 | } | ||||
| 2693 | } | ||||
| 2694 | |||||
| 2695 | SelectCode(N); | ||||
| 2696 | } | ||||
| 2697 | |||||
| 2698 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { | ||||
| 2699 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); | ||||
| 2700 | unsigned Opcode; | ||||
| 2701 | switch (IntrID) { | ||||
| 2702 | case Intrinsic::amdgcn_wqm: | ||||
| 2703 | Opcode = AMDGPU::WQM; | ||||
| 2704 | break; | ||||
| 2705 | case Intrinsic::amdgcn_softwqm: | ||||
| 2706 | Opcode = AMDGPU::SOFT_WQM; | ||||
| 2707 | break; | ||||
| 2708 | case Intrinsic::amdgcn_wwm: | ||||
| 2709 | case Intrinsic::amdgcn_strict_wwm: | ||||
| 2710 | Opcode = AMDGPU::STRICT_WWM; | ||||
| 2711 | break; | ||||
| 2712 | case Intrinsic::amdgcn_strict_wqm: | ||||
| 2713 | Opcode = AMDGPU::STRICT_WQM; | ||||
| 2714 | break; | ||||
| 2715 | case Intrinsic::amdgcn_interp_p1_f16: | ||||
| 2716 | SelectInterpP1F16(N); | ||||
| 2717 | return; | ||||
| 2718 | default: | ||||
| 2719 | SelectCode(N); | ||||
| 2720 | return; | ||||
| 2721 | } | ||||
| 2722 | |||||
| 2723 | SDValue Src = N->getOperand(1); | ||||
| 2724 | CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); | ||||
| 2725 | } | ||||
| 2726 | |||||
| 2727 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { | ||||
| 2728 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); | ||||
| 2729 | switch (IntrID) { | ||||
| 2730 | case Intrinsic::amdgcn_ds_gws_init: | ||||
| 2731 | case Intrinsic::amdgcn_ds_gws_barrier: | ||||
| 2732 | case Intrinsic::amdgcn_ds_gws_sema_v: | ||||
| 2733 | case Intrinsic::amdgcn_ds_gws_sema_br: | ||||
| 2734 | case Intrinsic::amdgcn_ds_gws_sema_p: | ||||
| 2735 | case Intrinsic::amdgcn_ds_gws_sema_release_all: | ||||
| 2736 | SelectDS_GWS(N, IntrID); | ||||
| 2737 | return; | ||||
| 2738 | default: | ||||
| 2739 | break; | ||||
| 2740 | } | ||||
| 2741 | |||||
| 2742 | SelectCode(N); | ||||
| 2743 | } | ||||
| 2744 | |||||
| 2745 | bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, | ||||
| 2746 | unsigned &Mods, | ||||
| 2747 | bool AllowAbs) const { | ||||
| 2748 | Mods = 0; | ||||
| 2749 | Src = In; | ||||
| 2750 | |||||
| 2751 | if (Src.getOpcode() == ISD::FNEG) { | ||||
| 2752 | Mods |= SISrcMods::NEG; | ||||
| 2753 | Src = Src.getOperand(0); | ||||
| 2754 | } | ||||
| 2755 | |||||
| 2756 | if (AllowAbs && Src.getOpcode() == ISD::FABS) { | ||||
| 2757 | Mods |= SISrcMods::ABS; | ||||
| 2758 | Src = Src.getOperand(0); | ||||
| 2759 | } | ||||
| 2760 | |||||
| 2761 | return true; | ||||
| 2762 | } | ||||
| 2763 | |||||
| 2764 | bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, | ||||
| 2765 | SDValue &SrcMods) const { | ||||
| 2766 | unsigned Mods; | ||||
| 2767 | if (SelectVOP3ModsImpl(In, Src, Mods)) { | ||||
| 2768 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
| 2769 | return true; | ||||
| 2770 | } | ||||
| 2771 | |||||
| 2772 | return false; | ||||
| 2773 | } | ||||
| 2774 | |||||
| 2775 | bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src, | ||||
| 2776 | SDValue &SrcMods) const { | ||||
| 2777 | unsigned Mods; | ||||
| 2778 | if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) { | ||||
| 2779 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
| 2780 | return true; | ||||
| 2781 | } | ||||
| 2782 | |||||
| 2783 | return false; | ||||
| 2784 | } | ||||
| 2785 | |||||
| 2786 | bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, | ||||
| 2787 | SDValue &SrcMods) const { | ||||
| 2788 | SelectVOP3Mods(In, Src, SrcMods); | ||||
| 2789 | return isNoNanSrc(Src); | ||||
| 2790 | } | ||||
| 2791 | |||||
| 2792 | bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { | ||||
| 2793 | if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) | ||||
| 2794 | return false; | ||||
| 2795 | |||||
| 2796 | Src = In; | ||||
| 2797 | return true; | ||||
| 2798 | } | ||||
| 2799 | |||||
| 2800 | bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, | ||||
| 2801 | SDValue &SrcMods, SDValue &Clamp, | ||||
| 2802 | SDValue &Omod) const { | ||||
| 2803 | SDLoc DL(In); | ||||
| 2804 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 2805 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 2806 | |||||
| 2807 | return SelectVOP3Mods(In, Src, SrcMods); | ||||
| 2808 | } | ||||
| 2809 | |||||
| 2810 | bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src, | ||||
| 2811 | SDValue &SrcMods, SDValue &Clamp, | ||||
| 2812 | SDValue &Omod) const { | ||||
| 2813 | SDLoc DL(In); | ||||
| 2814 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 2815 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 2816 | |||||
| 2817 | return SelectVOP3BMods(In, Src, SrcMods); | ||||
| 2818 | } | ||||
| 2819 | |||||
| 2820 | bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, | ||||
| 2821 | SDValue &Clamp, SDValue &Omod) const { | ||||
| 2822 | Src = In; | ||||
| 2823 | |||||
| 2824 | SDLoc DL(In); | ||||
| 2825 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 2826 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
| 2827 | |||||
| 2828 | return true; | ||||
| 2829 | } | ||||
| 2830 | |||||
| 2831 | bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, | ||||
| 2832 | SDValue &SrcMods) const { | ||||
| 2833 | unsigned Mods = 0; | ||||
| 2834 | Src = In; | ||||
| 2835 | |||||
| 2836 | if (Src.getOpcode() == ISD::FNEG) { | ||||
| 2837 | Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); | ||||
| 2838 | Src = Src.getOperand(0); | ||||
| 2839 | } | ||||
| 2840 | |||||
| 2841 | if (Src.getOpcode() == ISD::BUILD_VECTOR) { | ||||
| 2842 | unsigned VecMods = Mods; | ||||
| 2843 | |||||
| 2844 | SDValue Lo = stripBitcast(Src.getOperand(0)); | ||||
| 2845 | SDValue Hi = stripBitcast(Src.getOperand(1)); | ||||
| 2846 | |||||
| 2847 | if (Lo.getOpcode() == ISD::FNEG) { | ||||
| 2848 | Lo = stripBitcast(Lo.getOperand(0)); | ||||
| 2849 | Mods ^= SISrcMods::NEG; | ||||
| 2850 | } | ||||
| 2851 | |||||
| 2852 | if (Hi.getOpcode() == ISD::FNEG) { | ||||
| 2853 | Hi = stripBitcast(Hi.getOperand(0)); | ||||
| 2854 | Mods ^= SISrcMods::NEG_HI; | ||||
| 2855 | } | ||||
| 2856 | |||||
| 2857 | if (isExtractHiElt(Lo, Lo)) | ||||
| 2858 | Mods |= SISrcMods::OP_SEL_0; | ||||
| 2859 | |||||
| 2860 | if (isExtractHiElt(Hi, Hi)) | ||||
| 2861 | Mods |= SISrcMods::OP_SEL_1; | ||||
| 2862 | |||||
| 2863 | unsigned VecSize = Src.getValueSizeInBits(); | ||||
| 2864 | Lo = stripExtractLoElt(Lo); | ||||
| 2865 | Hi = stripExtractLoElt(Hi); | ||||
| 2866 | |||||
| 2867 | if (Lo.getValueSizeInBits() > VecSize) { | ||||
| 2868 | Lo = CurDAG->getTargetExtractSubreg( | ||||
| 2869 | (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In), | ||||
| 2870 | MVT::getIntegerVT(VecSize), Lo); | ||||
| 2871 | } | ||||
| 2872 | |||||
| 2873 | if (Hi.getValueSizeInBits() > VecSize) { | ||||
| 2874 | Hi = CurDAG->getTargetExtractSubreg( | ||||
| 2875 | (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In), | ||||
| 2876 | MVT::getIntegerVT(VecSize), Hi); | ||||
| 2877 | } | ||||
| 2878 | |||||
| 2879 | assert(Lo.getValueSizeInBits() <= VecSize &&((void)0) | ||||
| 2880 | Hi.getValueSizeInBits() <= VecSize)((void)0); | ||||
| 2881 | |||||
| 2882 | if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { | ||||
| 2883 | // Really a scalar input. Just select from the low half of the register to | ||||
| 2884 | // avoid packing. | ||||
| 2885 | |||||
| 2886 | if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) { | ||||
| 2887 | Src = Lo; | ||||
| 2888 | } else { | ||||
| 2889 | assert(Lo.getValueSizeInBits() == 32 && VecSize == 64)((void)0); | ||||
| 2890 | |||||
| 2891 | SDLoc SL(In); | ||||
| 2892 | SDValue Undef = SDValue( | ||||
| 2893 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, | ||||
| 2894 | Lo.getValueType()), 0); | ||||
| 2895 | auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID | ||||
| 2896 | : AMDGPU::SReg_64RegClassID; | ||||
| 2897 | const SDValue Ops[] = { | ||||
| 2898 | CurDAG->getTargetConstant(RC, SL, MVT::i32), | ||||
| 2899 | Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), | ||||
| 2900 | Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) }; | ||||
| 2901 | |||||
| 2902 | Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL, | ||||
| 2903 | Src.getValueType(), Ops), 0); | ||||
| 2904 | } | ||||
| 2905 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
| 2906 | return true; | ||||
| 2907 | } | ||||
| 2908 | |||||
| 2909 | if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) { | ||||
| 2910 | uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF() | ||||
| 2911 | .bitcastToAPInt().getZExtValue(); | ||||
| 2912 | if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) { | ||||
| 2913 | Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);; | ||||
| 2914 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
| 2915 | return true; | ||||
| 2916 | } | ||||
| 2917 | } | ||||
| 2918 | |||||
| 2919 | Mods = VecMods; | ||||
| 2920 | } | ||||
| 2921 | |||||
| 2922 | // Packed instructions do not have abs modifiers. | ||||
| 2923 | Mods |= SISrcMods::OP_SEL_1; | ||||
| 2924 | |||||
| 2925 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
| 2926 | return true; | ||||
| 2927 | } | ||||
| 2928 | |||||
| 2929 | bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, | ||||
| 2930 | SDValue &SrcMods) const { | ||||
| 2931 | Src = In; | ||||
| 2932 | // FIXME: Handle op_sel | ||||
| 2933 | SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); | ||||
| 2934 | return true; | ||||
| 2935 | } | ||||
| 2936 | |||||
| 2937 | bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, | ||||
| 2938 | SDValue &SrcMods) const { | ||||
| 2939 | // FIXME: Handle op_sel | ||||
| 2940 | return SelectVOP3Mods(In, Src, SrcMods); | ||||
| 2941 | } | ||||
| 2942 | |||||
| 2943 | // The return value is not whether the match is possible (which it always is), | ||||
| 2944 | // but whether or not it a conversion is really used. | ||||
| 2945 | bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, | ||||
| 2946 | unsigned &Mods) const { | ||||
| 2947 | Mods = 0; | ||||
| 2948 | SelectVOP3ModsImpl(In, Src, Mods); | ||||
| 2949 | |||||
| 2950 | if (Src.getOpcode() == ISD::FP_EXTEND) { | ||||
| 2951 | Src = Src.getOperand(0); | ||||
| 2952 | assert(Src.getValueType() == MVT::f16)((void)0); | ||||
| 2953 | Src = stripBitcast(Src); | ||||
| 2954 | |||||
| 2955 | // Be careful about folding modifiers if we already have an abs. fneg is | ||||
| 2956 | // applied last, so we don't want to apply an earlier fneg. | ||||
| 2957 | if ((Mods & SISrcMods::ABS) == 0) { | ||||
| 2958 | unsigned ModsTmp; | ||||
| 2959 | SelectVOP3ModsImpl(Src, Src, ModsTmp); | ||||
| 2960 | |||||
| 2961 | if ((ModsTmp & SISrcMods::NEG) != 0) | ||||
| 2962 | Mods ^= SISrcMods::NEG; | ||||
| 2963 | |||||
| 2964 | if ((ModsTmp & SISrcMods::ABS) != 0) | ||||
| 2965 | Mods |= SISrcMods::ABS; | ||||
| 2966 | } | ||||
| 2967 | |||||
| 2968 | // op_sel/op_sel_hi decide the source type and source. | ||||
| 2969 | // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. | ||||
| 2970 | // If the sources's op_sel is set, it picks the high half of the source | ||||
| 2971 | // register. | ||||
| 2972 | |||||
| 2973 | Mods |= SISrcMods::OP_SEL_1; | ||||
| 2974 | if (isExtractHiElt(Src, Src)) { | ||||
| 2975 | Mods |= SISrcMods::OP_SEL_0; | ||||
| 2976 | |||||
| 2977 | // TODO: Should we try to look for neg/abs here? | ||||
| 2978 | } | ||||
| 2979 | |||||
| 2980 | return true; | ||||
| 2981 | } | ||||
| 2982 | |||||
| 2983 | return false; | ||||
| 2984 | } | ||||
| 2985 | |||||
| 2986 | bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, | ||||
| 2987 | SDValue &SrcMods) const { | ||||
| 2988 | unsigned Mods = 0; | ||||
| 2989 | SelectVOP3PMadMixModsImpl(In, Src, Mods); | ||||
| 2990 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
| 2991 | return true; | ||||
| 2992 | } | ||||
| 2993 | |||||
| 2994 | SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { | ||||
| 2995 | if (In.isUndef()) | ||||
| 2996 | return CurDAG->getUNDEF(MVT::i32); | ||||
| 2997 | |||||
| 2998 | if (ConstantSDNode *C
| ||||
| 2999 | SDLoc SL(In); | ||||
| 3000 | return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32); | ||||
| 3001 | } | ||||
| 3002 | |||||
| 3003 | if (ConstantFPSDNode *C
| ||||
| 3004 | SDLoc SL(In); | ||||
| 3005 | return CurDAG->getConstant( | ||||
| 3006 | C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); | ||||
| 3007 | } | ||||
| 3008 | |||||
| 3009 | SDValue Src; | ||||
| 3010 | if (isExtractHiElt(In, Src)) | ||||
| 3011 | return Src; | ||||
| 3012 | |||||
| 3013 | return SDValue(); | ||||
| 3014 | } | ||||
| 3015 | |||||
| 3016 | bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { | ||||
| 3017 | assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn)((void)0); | ||||
| 3018 | |||||
| 3019 | const SIRegisterInfo *SIRI = | ||||
| 3020 | static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); | ||||
| 3021 | const SIInstrInfo * SII = | ||||
| 3022 | static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); | ||||
| 3023 | |||||
| 3024 | unsigned Limit = 0; | ||||
| 3025 | bool AllUsesAcceptSReg = true; | ||||
| 3026 | for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); | ||||
| 3027 | Limit < 10 && U != E; ++U, ++Limit) { | ||||
| 3028 | const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); | ||||
| 3029 | |||||
| 3030 | // If the register class is unknown, it could be an unknown | ||||
| 3031 | // register class that needs to be an SGPR, e.g. an inline asm | ||||
| 3032 | // constraint | ||||
| 3033 | if (!RC || SIRI->isSGPRClass(RC)) | ||||
| 3034 | return false; | ||||
| 3035 | |||||
| 3036 | if (RC != &AMDGPU::VS_32RegClass) { | ||||
| 3037 | AllUsesAcceptSReg = false; | ||||
| 3038 | SDNode * User = *U; | ||||
| 3039 | if (User->isMachineOpcode()) { | ||||
| 3040 | unsigned Opc = User->getMachineOpcode(); | ||||
| 3041 | MCInstrDesc Desc = SII->get(Opc); | ||||
| 3042 | if (Desc.isCommutable()) { | ||||
| 3043 | unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo(); | ||||
| 3044 | unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; | ||||
| 3045 | if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { | ||||
| 3046 | unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); | ||||
| 3047 | const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo); | ||||
| 3048 | if (CommutedRC == &AMDGPU::VS_32RegClass) | ||||
| 3049 | AllUsesAcceptSReg = true; | ||||
| 3050 | } | ||||
| 3051 | } | ||||
| 3052 | } | ||||
| 3053 | // If "AllUsesAcceptSReg == false" so far we haven't suceeded | ||||
| 3054 | // commuting current user. This means have at least one use | ||||
| 3055 | // that strictly require VGPR. Thus, we will not attempt to commute | ||||
| 3056 | // other user instructions. | ||||
| 3057 | if (!AllUsesAcceptSReg) | ||||
| 3058 | break; | ||||
| 3059 | } | ||||
| 3060 | } | ||||
| 3061 | return !AllUsesAcceptSReg && (Limit < 10); | ||||
| 3062 | } | ||||
| 3063 | |||||
| 3064 | bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const { | ||||
| 3065 | auto Ld = cast<LoadSDNode>(N); | ||||
| 3066 | |||||
| 3067 | return Ld->getAlignment() >= 4 && | ||||
| 3068 | ( | ||||
| 3069 | ( | ||||
| 3070 | ( | ||||
| 3071 | Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || | ||||
| 3072 | Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT | ||||
| 3073 | ) | ||||
| 3074 | && | ||||
| 3075 | !N->isDivergent() | ||||
| 3076 | ) | ||||
| 3077 | || | ||||
| 3078 | ( | ||||
| 3079 | Subtarget->getScalarizeGlobalBehavior() && | ||||
| 3080 | Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && | ||||
| 3081 | Ld->isSimple() && | ||||
| 3082 | !N->isDivergent() && | ||||
| 3083 | static_cast<const SITargetLowering *>( | ||||
| 3084 | getTargetLowering())->isMemOpHasNoClobberedMemOperand(N) | ||||
| 3085 | ) | ||||
| 3086 | ); | ||||
| 3087 | } | ||||
| 3088 | |||||
| 3089 | void AMDGPUDAGToDAGISel::PostprocessISelDAG() { | ||||
| 3090 | const AMDGPUTargetLowering& Lowering = | ||||
| 3091 | *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); | ||||
| 3092 | bool IsModified = false; | ||||
| 3093 | do { | ||||
| 3094 | IsModified = false; | ||||
| 3095 | |||||
| 3096 | // Go over all selected nodes and try to fold them a bit more | ||||
| 3097 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); | ||||
| 3098 | while (Position != CurDAG->allnodes_end()) { | ||||
| 3099 | SDNode *Node = &*Position++; | ||||
| 3100 | MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); | ||||
| 3101 | if (!MachineNode) | ||||
| 3102 | continue; | ||||
| 3103 | |||||
| 3104 | SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); | ||||
| 3105 | if (ResNode != Node) { | ||||
| 3106 | if (ResNode) | ||||
| 3107 | ReplaceUses(Node, ResNode); | ||||
| 3108 | IsModified = true; | ||||
| 3109 | } | ||||
| 3110 | } | ||||
| 3111 | CurDAG->RemoveDeadNodes(); | ||||
| 3112 | } while (IsModified); | ||||
| 3113 | } | ||||
| 3114 | |||||
| 3115 | bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | ||||
| 3116 | Subtarget = &MF.getSubtarget<R600Subtarget>(); | ||||
| 3117 | return SelectionDAGISel::runOnMachineFunction(MF); | ||||
| 3118 | } | ||||
| 3119 | |||||
| 3120 | bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { | ||||
| 3121 | if (!N->readMem()) | ||||
| 3122 | return false; | ||||
| 3123 | if (CbId == -1) | ||||
| 3124 | return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || | ||||
| 3125 | N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; | ||||
| 3126 | |||||
| 3127 | return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; | ||||
| 3128 | } | ||||
| 3129 | |||||
| 3130 | bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, | ||||
| 3131 | SDValue& IntPtr) { | ||||
| 3132 | if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { | ||||
| 3133 | IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), | ||||
| 3134 | true); | ||||
| 3135 | return true; | ||||
| 3136 | } | ||||
| 3137 | return false; | ||||
| 3138 | } | ||||
| 3139 | |||||
| 3140 | bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, | ||||
| 3141 | SDValue& BaseReg, SDValue &Offset) { | ||||
| 3142 | if (!isa<ConstantSDNode>(Addr)) { | ||||
| 3143 | BaseReg = Addr; | ||||
| 3144 | Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); | ||||
| 3145 | return true; | ||||
| 3146 | } | ||||
| 3147 | return false; | ||||
| 3148 | } | ||||
| 3149 | |||||
| 3150 | void R600DAGToDAGISel::Select(SDNode *N) { | ||||
| 3151 | unsigned int Opc = N->getOpcode(); | ||||
| 3152 | if (N->isMachineOpcode()) { | ||||
| 3153 | N->setNodeId(-1); | ||||
| 3154 | return; // Already selected. | ||||
| 3155 | } | ||||
| 3156 | |||||
| 3157 | switch (Opc) { | ||||
| 3158 | default: break; | ||||
| 3159 | case AMDGPUISD::BUILD_VERTICAL_VECTOR: | ||||
| 3160 | case ISD::SCALAR_TO_VECTOR: | ||||
| 3161 | case ISD::BUILD_VECTOR: { | ||||
| 3162 | EVT VT = N->getValueType(0); | ||||
| 3163 | unsigned NumVectorElts = VT.getVectorNumElements(); | ||||
| 3164 | unsigned RegClassID; | ||||
| 3165 | // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG | ||||
| 3166 | // that adds a 128 bits reg copy when going through TwoAddressInstructions | ||||
| 3167 | // pass. We want to avoid 128 bits copies as much as possible because they | ||||
| 3168 | // can't be bundled by our scheduler. | ||||
| 3169 | switch(NumVectorElts) { | ||||
| 3170 | case 2: RegClassID = R600::R600_Reg64RegClassID; break; | ||||
| 3171 | case 4: | ||||
| 3172 | if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) | ||||
| 3173 | RegClassID = R600::R600_Reg128VerticalRegClassID; | ||||
| 3174 | else | ||||
| 3175 | RegClassID = R600::R600_Reg128RegClassID; | ||||
| 3176 | break; | ||||
| 3177 | default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR")__builtin_unreachable(); | ||||
| 3178 | } | ||||
| 3179 | SelectBuildVector(N, RegClassID); | ||||
| 3180 | return; | ||||
| 3181 | } | ||||
| 3182 | } | ||||
| 3183 | |||||
| 3184 | SelectCode(N); | ||||
| 3185 | } | ||||
| 3186 | |||||
| 3187 | bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, | ||||
| 3188 | SDValue &Offset) { | ||||
| 3189 | ConstantSDNode *C; | ||||
| 3190 | SDLoc DL(Addr); | ||||
| 3191 | |||||
| 3192 | if ((C = dyn_cast<ConstantSDNode>(Addr))) { | ||||
| 3193 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
| 3194 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
| 3195 | } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && | ||||
| 3196 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { | ||||
| 3197 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
| 3198 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
| 3199 | } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && | ||||
| 3200 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { | ||||
| 3201 | Base = Addr.getOperand(0); | ||||
| 3202 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
| 3203 | } else { | ||||
| 3204 | Base = Addr; | ||||
| 3205 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
| 3206 | } | ||||
| 3207 | |||||
| 3208 | return true; | ||||
| 3209 | } | ||||
| 3210 | |||||
| 3211 | bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||||
| 3212 | SDValue &Offset) { | ||||
| 3213 | ConstantSDNode *IMMOffset; | ||||
| 3214 | |||||
| 3215 | if (Addr.getOpcode() == ISD::ADD | ||||
| 3216 | && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) | ||||
| 3217 | && isInt<16>(IMMOffset->getZExtValue())) { | ||||
| 3218 | |||||
| 3219 | Base = Addr.getOperand(0); | ||||
| 3220 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), | ||||
| 3221 | MVT::i32); | ||||
| 3222 | return true; | ||||
| 3223 | // If the pointer address is constant, we can move it to the offset field. | ||||
| 3224 | } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) | ||||
| 3225 | && isInt<16>(IMMOffset->getZExtValue())) { | ||||
| 3226 | Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), | ||||
| 3227 | SDLoc(CurDAG->getEntryNode()), | ||||
| 3228 | R600::ZERO, MVT::i32); | ||||
| 3229 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), | ||||
| 3230 | MVT::i32); | ||||
| 3231 | return true; | ||||
| 3232 | } | ||||
| 3233 | |||||
| 3234 | // Default case, no offset | ||||
| 3235 | Base = Addr; | ||||
| 3236 | Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); | ||||
| 3237 | return true; | ||||
| 3238 | } |
| 1 | //===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file declares the SDNode class and derived classes, which are used to | |||
| 10 | // represent the nodes and operations present in a SelectionDAG. These nodes | |||
| 11 | // and operations are machine code level operations, with some similarities to | |||
| 12 | // the GCC RTL representation. | |||
| 13 | // | |||
| 14 | // Clients should include the SelectionDAG.h file instead of this file directly. | |||
| 15 | // | |||
| 16 | //===----------------------------------------------------------------------===// | |||
| 17 | ||||
| 18 | #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
| 19 | #define LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
| 20 | ||||
| 21 | #include "llvm/ADT/APFloat.h" | |||
| 22 | #include "llvm/ADT/ArrayRef.h" | |||
| 23 | #include "llvm/ADT/BitVector.h" | |||
| 24 | #include "llvm/ADT/FoldingSet.h" | |||
| 25 | #include "llvm/ADT/GraphTraits.h" | |||
| 26 | #include "llvm/ADT/SmallPtrSet.h" | |||
| 27 | #include "llvm/ADT/SmallVector.h" | |||
| 28 | #include "llvm/ADT/ilist_node.h" | |||
| 29 | #include "llvm/ADT/iterator.h" | |||
| 30 | #include "llvm/ADT/iterator_range.h" | |||
| 31 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
| 32 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
| 33 | #include "llvm/CodeGen/Register.h" | |||
| 34 | #include "llvm/CodeGen/ValueTypes.h" | |||
| 35 | #include "llvm/IR/Constants.h" | |||
| 36 | #include "llvm/IR/DebugLoc.h" | |||
| 37 | #include "llvm/IR/Instruction.h" | |||
| 38 | #include "llvm/IR/Instructions.h" | |||
| 39 | #include "llvm/IR/Metadata.h" | |||
| 40 | #include "llvm/IR/Operator.h" | |||
| 41 | #include "llvm/Support/AlignOf.h" | |||
| 42 | #include "llvm/Support/AtomicOrdering.h" | |||
| 43 | #include "llvm/Support/Casting.h" | |||
| 44 | #include "llvm/Support/ErrorHandling.h" | |||
| 45 | #include "llvm/Support/MachineValueType.h" | |||
| 46 | #include "llvm/Support/TypeSize.h" | |||
| 47 | #include <algorithm> | |||
| 48 | #include <cassert> | |||
| 49 | #include <climits> | |||
| 50 | #include <cstddef> | |||
| 51 | #include <cstdint> | |||
| 52 | #include <cstring> | |||
| 53 | #include <iterator> | |||
| 54 | #include <string> | |||
| 55 | #include <tuple> | |||
| 56 | ||||
| 57 | namespace llvm { | |||
| 58 | ||||
| 59 | class APInt; | |||
| 60 | class Constant; | |||
| 61 | template <typename T> struct DenseMapInfo; | |||
| 62 | class GlobalValue; | |||
| 63 | class MachineBasicBlock; | |||
| 64 | class MachineConstantPoolValue; | |||
| 65 | class MCSymbol; | |||
| 66 | class raw_ostream; | |||
| 67 | class SDNode; | |||
| 68 | class SelectionDAG; | |||
| 69 | class Type; | |||
| 70 | class Value; | |||
| 71 | ||||
| 72 | void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, | |||
| 73 | bool force = false); | |||
| 74 | ||||
| 75 | /// This represents a list of ValueType's that has been intern'd by | |||
| 76 | /// a SelectionDAG. Instances of this simple value class are returned by | |||
| 77 | /// SelectionDAG::getVTList(...). | |||
| 78 | /// | |||
| 79 | struct SDVTList { | |||
| 80 | const EVT *VTs; | |||
| 81 | unsigned int NumVTs; | |||
| 82 | }; | |||
| 83 | ||||
| 84 | namespace ISD { | |||
| 85 | ||||
| 86 | /// Node predicates | |||
| 87 | ||||
| 88 | /// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the | |||
| 89 | /// same constant or undefined, return true and return the constant value in | |||
| 90 | /// \p SplatValue. | |||
| 91 | bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); | |||
| 92 | ||||
| 93 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
| 94 | /// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to | |||
| 95 | /// true, it only checks BUILD_VECTOR. | |||
| 96 | bool isConstantSplatVectorAllOnes(const SDNode *N, | |||
| 97 | bool BuildVectorOnly = false); | |||
| 98 | ||||
| 99 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
| 100 | /// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it | |||
| 101 | /// only checks BUILD_VECTOR. | |||
| 102 | bool isConstantSplatVectorAllZeros(const SDNode *N, | |||
| 103 | bool BuildVectorOnly = false); | |||
| 104 | ||||
| 105 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
| 106 | /// elements are ~0 or undef. | |||
| 107 | bool isBuildVectorAllOnes(const SDNode *N); | |||
| 108 | ||||
| 109 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
| 110 | /// elements are 0 or undef. | |||
| 111 | bool isBuildVectorAllZeros(const SDNode *N); | |||
| 112 | ||||
| 113 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
| 114 | /// ConstantSDNode or undef. | |||
| 115 | bool isBuildVectorOfConstantSDNodes(const SDNode *N); | |||
| 116 | ||||
| 117 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
| 118 | /// ConstantFPSDNode or undef. | |||
| 119 | bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); | |||
| 120 | ||||
| 121 | /// Return true if the node has at least one operand and all operands of the | |||
| 122 | /// specified node are ISD::UNDEF. | |||
| 123 | bool allOperandsUndef(const SDNode *N); | |||
| 124 | ||||
| 125 | } // end namespace ISD | |||
| 126 | ||||
| 127 | //===----------------------------------------------------------------------===// | |||
| 128 | /// Unlike LLVM values, Selection DAG nodes may return multiple | |||
| 129 | /// values as the result of a computation. Many nodes return multiple values, | |||
| 130 | /// from loads (which define a token and a return value) to ADDC (which returns | |||
| 131 | /// a result and a carry value), to calls (which may return an arbitrary number | |||
| 132 | /// of values). | |||
| 133 | /// | |||
| 134 | /// As such, each use of a SelectionDAG computation must indicate the node that | |||
| 135 | /// computes it as well as which return value to use from that node. This pair | |||
| 136 | /// of information is represented with the SDValue value type. | |||
| 137 | /// | |||
| 138 | class SDValue { | |||
| 139 | friend struct DenseMapInfo<SDValue>; | |||
| 140 | ||||
| 141 | SDNode *Node = nullptr; // The node defining the value we are using. | |||
| 142 | unsigned ResNo = 0; // Which return value of the node we are using. | |||
| 143 | ||||
| 144 | public: | |||
| 145 | SDValue() = default; | |||
| 146 | SDValue(SDNode *node, unsigned resno); | |||
| 147 | ||||
| 148 | /// get the index which selects a specific result in the SDNode | |||
| 149 | unsigned getResNo() const { return ResNo; } | |||
| 150 | ||||
| 151 | /// get the SDNode which holds the desired result | |||
| 152 | SDNode *getNode() const { return Node; } | |||
| 153 | ||||
| 154 | /// set the SDNode | |||
| 155 | void setNode(SDNode *N) { Node = N; } | |||
| 156 | ||||
| 157 | inline SDNode *operator->() const { return Node; } | |||
| 158 | ||||
| 159 | bool operator==(const SDValue &O) const { | |||
| 160 | return Node == O.Node && ResNo == O.ResNo; | |||
| 161 | } | |||
| 162 | bool operator!=(const SDValue &O) const { | |||
| 163 | return !operator==(O); | |||
| 164 | } | |||
| 165 | bool operator<(const SDValue &O) const { | |||
| 166 | return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo); | |||
| 167 | } | |||
| 168 | explicit operator bool() const { | |||
| 169 | return Node != nullptr; | |||
| 170 | } | |||
| 171 | ||||
| 172 | SDValue getValue(unsigned R) const { | |||
| 173 | return SDValue(Node, R); | |||
| 174 | } | |||
| 175 | ||||
| 176 | /// Return true if this node is an operand of N. | |||
| 177 | bool isOperandOf(const SDNode *N) const; | |||
| 178 | ||||
| 179 | /// Return the ValueType of the referenced return value. | |||
| 180 | inline EVT getValueType() const; | |||
| 181 | ||||
| 182 | /// Return the simple ValueType of the referenced return value. | |||
| 183 | MVT getSimpleValueType() const { | |||
| 184 | return getValueType().getSimpleVT(); | |||
| 185 | } | |||
| 186 | ||||
| 187 | /// Returns the size of the value in bits. | |||
| 188 | /// | |||
| 189 | /// If the value type is a scalable vector type, the scalable property will | |||
| 190 | /// be set and the runtime size will be a positive integer multiple of the | |||
| 191 | /// base size. | |||
| 192 | TypeSize getValueSizeInBits() const { | |||
| 193 | return getValueType().getSizeInBits(); | |||
| 194 | } | |||
| 195 | ||||
| 196 | uint64_t getScalarValueSizeInBits() const { | |||
| 197 | return getValueType().getScalarType().getFixedSizeInBits(); | |||
| 198 | } | |||
| 199 | ||||
| 200 | // Forwarding methods - These forward to the corresponding methods in SDNode. | |||
| 201 | inline unsigned getOpcode() const; | |||
| 202 | inline unsigned getNumOperands() const; | |||
| 203 | inline const SDValue &getOperand(unsigned i) const; | |||
| 204 | inline uint64_t getConstantOperandVal(unsigned i) const; | |||
| 205 | inline const APInt &getConstantOperandAPInt(unsigned i) const; | |||
| 206 | inline bool isTargetMemoryOpcode() const; | |||
| 207 | inline bool isTargetOpcode() const; | |||
| 208 | inline bool isMachineOpcode() const; | |||
| 209 | inline bool isUndef() const; | |||
| 210 | inline unsigned getMachineOpcode() const; | |||
| 211 | inline const DebugLoc &getDebugLoc() const; | |||
| 212 | inline void dump() const; | |||
| 213 | inline void dump(const SelectionDAG *G) const; | |||
| 214 | inline void dumpr() const; | |||
| 215 | inline void dumpr(const SelectionDAG *G) const; | |||
| 216 | ||||
| 217 | /// Return true if this operand (which must be a chain) reaches the | |||
| 218 | /// specified operand without crossing any side-effecting instructions. | |||
| 219 | /// In practice, this looks through token factors and non-volatile loads. | |||
| 220 | /// In order to remain efficient, this only | |||
| 221 | /// looks a couple of nodes in, it does not do an exhaustive search. | |||
| 222 | bool reachesChainWithoutSideEffects(SDValue Dest, | |||
| 223 | unsigned Depth = 2) const; | |||
| 224 | ||||
| 225 | /// Return true if there are no nodes using value ResNo of Node. | |||
| 226 | inline bool use_empty() const; | |||
| 227 | ||||
| 228 | /// Return true if there is exactly one node using value ResNo of Node. | |||
| 229 | inline bool hasOneUse() const; | |||
| 230 | }; | |||
| 231 | ||||
| 232 | template<> struct DenseMapInfo<SDValue> { | |||
| 233 | static inline SDValue getEmptyKey() { | |||
| 234 | SDValue V; | |||
| 235 | V.ResNo = -1U; | |||
| 236 | return V; | |||
| 237 | } | |||
| 238 | ||||
| 239 | static inline SDValue getTombstoneKey() { | |||
| 240 | SDValue V; | |||
| 241 | V.ResNo = -2U; | |||
| 242 | return V; | |||
| 243 | } | |||
| 244 | ||||
| 245 | static unsigned getHashValue(const SDValue &Val) { | |||
| 246 | return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^ | |||
| 247 | (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo(); | |||
| 248 | } | |||
| 249 | ||||
| 250 | static bool isEqual(const SDValue &LHS, const SDValue &RHS) { | |||
| 251 | return LHS == RHS; | |||
| 252 | } | |||
| 253 | }; | |||
| 254 | ||||
| 255 | /// Allow casting operators to work directly on | |||
| 256 | /// SDValues as if they were SDNode*'s. | |||
| 257 | template<> struct simplify_type<SDValue> { | |||
| 258 | using SimpleType = SDNode *; | |||
| 259 | ||||
| 260 | static SimpleType getSimplifiedValue(SDValue &Val) { | |||
| 261 | return Val.getNode(); | |||
| 262 | } | |||
| 263 | }; | |||
| 264 | template<> struct simplify_type<const SDValue> { | |||
| 265 | using SimpleType = /*const*/ SDNode *; | |||
| 266 | ||||
| 267 | static SimpleType getSimplifiedValue(const SDValue &Val) { | |||
| 268 | return Val.getNode(); | |||
| 269 | } | |||
| 270 | }; | |||
| 271 | ||||
| 272 | /// Represents a use of a SDNode. This class holds an SDValue, | |||
| 273 | /// which records the SDNode being used and the result number, a | |||
| 274 | /// pointer to the SDNode using the value, and Next and Prev pointers, | |||
| 275 | /// which link together all the uses of an SDNode. | |||
| 276 | /// | |||
| 277 | class SDUse { | |||
| 278 | /// Val - The value being used. | |||
| 279 | SDValue Val; | |||
| 280 | /// User - The user of this value. | |||
| 281 | SDNode *User = nullptr; | |||
| 282 | /// Prev, Next - Pointers to the uses list of the SDNode referred by | |||
| 283 | /// this operand. | |||
| 284 | SDUse **Prev = nullptr; | |||
| 285 | SDUse *Next = nullptr; | |||
| 286 | ||||
| 287 | public: | |||
| 288 | SDUse() = default; | |||
| 289 | SDUse(const SDUse &U) = delete; | |||
| 290 | SDUse &operator=(const SDUse &) = delete; | |||
| 291 | ||||
| 292 | /// Normally SDUse will just implicitly convert to an SDValue that it holds. | |||
| 293 | operator const SDValue&() const { return Val; } | |||
| 294 | ||||
| 295 | /// If implicit conversion to SDValue doesn't work, the get() method returns | |||
| 296 | /// the SDValue. | |||
| 297 | const SDValue &get() const { return Val; } | |||
| 298 | ||||
| 299 | /// This returns the SDNode that contains this Use. | |||
| 300 | SDNode *getUser() { return User; } | |||
| 301 | ||||
| 302 | /// Get the next SDUse in the use list. | |||
| 303 | SDUse *getNext() const { return Next; } | |||
| 304 | ||||
| 305 | /// Convenience function for get().getNode(). | |||
| 306 | SDNode *getNode() const { return Val.getNode(); } | |||
| 307 | /// Convenience function for get().getResNo(). | |||
| 308 | unsigned getResNo() const { return Val.getResNo(); } | |||
| 309 | /// Convenience function for get().getValueType(). | |||
| 310 | EVT getValueType() const { return Val.getValueType(); } | |||
| 311 | ||||
| 312 | /// Convenience function for get().operator== | |||
| 313 | bool operator==(const SDValue &V) const { | |||
| 314 | return Val == V; | |||
| 315 | } | |||
| 316 | ||||
| 317 | /// Convenience function for get().operator!= | |||
| 318 | bool operator!=(const SDValue &V) const { | |||
| 319 | return Val != V; | |||
| 320 | } | |||
| 321 | ||||
| 322 | /// Convenience function for get().operator< | |||
| 323 | bool operator<(const SDValue &V) const { | |||
| 324 | return Val < V; | |||
| 325 | } | |||
| 326 | ||||
| 327 | private: | |||
| 328 | friend class SelectionDAG; | |||
| 329 | friend class SDNode; | |||
| 330 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
| 331 | friend class HandleSDNode; | |||
| 332 | ||||
| 333 | void setUser(SDNode *p) { User = p; } | |||
| 334 | ||||
| 335 | /// Remove this use from its existing use list, assign it the | |||
| 336 | /// given value, and add it to the new value's node's use list. | |||
| 337 | inline void set(const SDValue &V); | |||
| 338 | /// Like set, but only supports initializing a newly-allocated | |||
| 339 | /// SDUse with a non-null value. | |||
| 340 | inline void setInitial(const SDValue &V); | |||
| 341 | /// Like set, but only sets the Node portion of the value, | |||
| 342 | /// leaving the ResNo portion unmodified. | |||
| 343 | inline void setNode(SDNode *N); | |||
| 344 | ||||
| 345 | void addToList(SDUse **List) { | |||
| 346 | Next = *List; | |||
| 347 | if (Next) Next->Prev = &Next; | |||
| 348 | Prev = List; | |||
| 349 | *List = this; | |||
| 350 | } | |||
| 351 | ||||
| 352 | void removeFromList() { | |||
| 353 | *Prev = Next; | |||
| 354 | if (Next) Next->Prev = Prev; | |||
| 355 | } | |||
| 356 | }; | |||
| 357 | ||||
| 358 | /// simplify_type specializations - Allow casting operators to work directly on | |||
| 359 | /// SDValues as if they were SDNode*'s. | |||
| 360 | template<> struct simplify_type<SDUse> { | |||
| 361 | using SimpleType = SDNode *; | |||
| 362 | ||||
| 363 | static SimpleType getSimplifiedValue(SDUse &Val) { | |||
| 364 | return Val.getNode(); | |||
| 365 | } | |||
| 366 | }; | |||
| 367 | ||||
| 368 | /// These are IR-level optimization flags that may be propagated to SDNodes. | |||
| 369 | /// TODO: This data structure should be shared by the IR optimizer and the | |||
| 370 | /// the backend. | |||
| 371 | struct SDNodeFlags { | |||
| 372 | private: | |||
| 373 | bool NoUnsignedWrap : 1; | |||
| 374 | bool NoSignedWrap : 1; | |||
| 375 | bool Exact : 1; | |||
| 376 | bool NoNaNs : 1; | |||
| 377 | bool NoInfs : 1; | |||
| 378 | bool NoSignedZeros : 1; | |||
| 379 | bool AllowReciprocal : 1; | |||
| 380 | bool AllowContract : 1; | |||
| 381 | bool ApproximateFuncs : 1; | |||
| 382 | bool AllowReassociation : 1; | |||
| 383 | ||||
| 384 | // We assume instructions do not raise floating-point exceptions by default, | |||
| 385 | // and only those marked explicitly may do so. We could choose to represent | |||
| 386 | // this via a positive "FPExcept" flags like on the MI level, but having a | |||
| 387 | // negative "NoFPExcept" flag here (that defaults to true) makes the flag | |||
| 388 | // intersection logic more straightforward. | |||
| 389 | bool NoFPExcept : 1; | |||
| 390 | ||||
| 391 | public: | |||
| 392 | /// Default constructor turns off all optimization flags. | |||
| 393 | SDNodeFlags() | |||
| 394 | : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), | |||
| 395 | NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), | |||
| 396 | AllowContract(false), ApproximateFuncs(false), | |||
| 397 | AllowReassociation(false), NoFPExcept(false) {} | |||
| 398 | ||||
| 399 | /// Propagate the fast-math-flags from an IR FPMathOperator. | |||
| 400 | void copyFMF(const FPMathOperator &FPMO) { | |||
| 401 | setNoNaNs(FPMO.hasNoNaNs()); | |||
| 402 | setNoInfs(FPMO.hasNoInfs()); | |||
| 403 | setNoSignedZeros(FPMO.hasNoSignedZeros()); | |||
| 404 | setAllowReciprocal(FPMO.hasAllowReciprocal()); | |||
| 405 | setAllowContract(FPMO.hasAllowContract()); | |||
| 406 | setApproximateFuncs(FPMO.hasApproxFunc()); | |||
| 407 | setAllowReassociation(FPMO.hasAllowReassoc()); | |||
| 408 | } | |||
| 409 | ||||
| 410 | // These are mutators for each flag. | |||
| 411 | void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } | |||
| 412 | void setNoSignedWrap(bool b) { NoSignedWrap = b; } | |||
| 413 | void setExact(bool b) { Exact = b; } | |||
| 414 | void setNoNaNs(bool b) { NoNaNs = b; } | |||
| 415 | void setNoInfs(bool b) { NoInfs = b; } | |||
| 416 | void setNoSignedZeros(bool b) { NoSignedZeros = b; } | |||
| 417 | void setAllowReciprocal(bool b) { AllowReciprocal = b; } | |||
| 418 | void setAllowContract(bool b) { AllowContract = b; } | |||
| 419 | void setApproximateFuncs(bool b) { ApproximateFuncs = b; } | |||
| 420 | void setAllowReassociation(bool b) { AllowReassociation = b; } | |||
| 421 | void setNoFPExcept(bool b) { NoFPExcept = b; } | |||
| 422 | ||||
| 423 | // These are accessors for each flag. | |||
| 424 | bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } | |||
| 425 | bool hasNoSignedWrap() const { return NoSignedWrap; } | |||
| 426 | bool hasExact() const { return Exact; } | |||
| 427 | bool hasNoNaNs() const { return NoNaNs; } | |||
| 428 | bool hasNoInfs() const { return NoInfs; } | |||
| 429 | bool hasNoSignedZeros() const { return NoSignedZeros; } | |||
| 430 | bool hasAllowReciprocal() const { return AllowReciprocal; } | |||
| 431 | bool hasAllowContract() const { return AllowContract; } | |||
| 432 | bool hasApproximateFuncs() const { return ApproximateFuncs; } | |||
| 433 | bool hasAllowReassociation() const { return AllowReassociation; } | |||
| 434 | bool hasNoFPExcept() const { return NoFPExcept; } | |||
| 435 | ||||
| 436 | /// Clear any flags in this flag set that aren't also set in Flags. All | |||
| 437 | /// flags will be cleared if Flags are undefined. | |||
| 438 | void intersectWith(const SDNodeFlags Flags) { | |||
| 439 | NoUnsignedWrap &= Flags.NoUnsignedWrap; | |||
| 440 | NoSignedWrap &= Flags.NoSignedWrap; | |||
| 441 | Exact &= Flags.Exact; | |||
| 442 | NoNaNs &= Flags.NoNaNs; | |||
| 443 | NoInfs &= Flags.NoInfs; | |||
| 444 | NoSignedZeros &= Flags.NoSignedZeros; | |||
| 445 | AllowReciprocal &= Flags.AllowReciprocal; | |||
| 446 | AllowContract &= Flags.AllowContract; | |||
| 447 | ApproximateFuncs &= Flags.ApproximateFuncs; | |||
| 448 | AllowReassociation &= Flags.AllowReassociation; | |||
| 449 | NoFPExcept &= Flags.NoFPExcept; | |||
| 450 | } | |||
| 451 | }; | |||
| 452 | ||||
| 453 | /// Represents one node in the SelectionDAG. | |||
| 454 | /// | |||
| 455 | class SDNode : public FoldingSetNode, public ilist_node<SDNode> { | |||
| 456 | private: | |||
| 457 | /// The operation that this node performs. | |||
| 458 | int16_t NodeType; | |||
| 459 | ||||
| 460 | protected: | |||
| 461 | // We define a set of mini-helper classes to help us interpret the bits in our | |||
| 462 | // SubclassData. These are designed to fit within a uint16_t so they pack | |||
| 463 | // with NodeType. | |||
| 464 | ||||
| 465 | #if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1)) | |||
| 466 | // Except for GCC; by default, AIX compilers store bit-fields in 4-byte words | |||
| 467 | // and give the `pack` pragma push semantics. | |||
| 468 | #define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2) | |||
| 469 | #define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop) | |||
| 470 | #else | |||
| 471 | #define BEGIN_TWO_BYTE_PACK() | |||
| 472 | #define END_TWO_BYTE_PACK() | |||
| 473 | #endif | |||
| 474 | ||||
| 475 | BEGIN_TWO_BYTE_PACK() | |||
| 476 | class SDNodeBitfields { | |||
| 477 | friend class SDNode; | |||
| 478 | friend class MemIntrinsicSDNode; | |||
| 479 | friend class MemSDNode; | |||
| 480 | friend class SelectionDAG; | |||
| 481 | ||||
| 482 | uint16_t HasDebugValue : 1; | |||
| 483 | uint16_t IsMemIntrinsic : 1; | |||
| 484 | uint16_t IsDivergent : 1; | |||
| 485 | }; | |||
| 486 | enum { NumSDNodeBits = 3 }; | |||
| 487 | ||||
| 488 | class ConstantSDNodeBitfields { | |||
| 489 | friend class ConstantSDNode; | |||
| 490 | ||||
| 491 | uint16_t : NumSDNodeBits; | |||
| 492 | ||||
| 493 | uint16_t IsOpaque : 1; | |||
| 494 | }; | |||
| 495 | ||||
| 496 | class MemSDNodeBitfields { | |||
| 497 | friend class MemSDNode; | |||
| 498 | friend class MemIntrinsicSDNode; | |||
| 499 | friend class AtomicSDNode; | |||
| 500 | ||||
| 501 | uint16_t : NumSDNodeBits; | |||
| 502 | ||||
| 503 | uint16_t IsVolatile : 1; | |||
| 504 | uint16_t IsNonTemporal : 1; | |||
| 505 | uint16_t IsDereferenceable : 1; | |||
| 506 | uint16_t IsInvariant : 1; | |||
| 507 | }; | |||
| 508 | enum { NumMemSDNodeBits = NumSDNodeBits + 4 }; | |||
| 509 | ||||
| 510 | class LSBaseSDNodeBitfields { | |||
| 511 | friend class LSBaseSDNode; | |||
| 512 | friend class MaskedLoadStoreSDNode; | |||
| 513 | friend class MaskedGatherScatterSDNode; | |||
| 514 | ||||
| 515 | uint16_t : NumMemSDNodeBits; | |||
| 516 | ||||
| 517 | // This storage is shared between disparate class hierarchies to hold an | |||
| 518 | // enumeration specific to the class hierarchy in use. | |||
| 519 | // LSBaseSDNode => enum ISD::MemIndexedMode | |||
| 520 | // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode | |||
| 521 | // MaskedGatherScatterSDNode => enum ISD::MemIndexType | |||
| 522 | uint16_t AddressingMode : 3; | |||
| 523 | }; | |||
| 524 | enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; | |||
| 525 | ||||
| 526 | class LoadSDNodeBitfields { | |||
| 527 | friend class LoadSDNode; | |||
| 528 | friend class MaskedLoadSDNode; | |||
| 529 | friend class MaskedGatherSDNode; | |||
| 530 | ||||
| 531 | uint16_t : NumLSBaseSDNodeBits; | |||
| 532 | ||||
| 533 | uint16_t ExtTy : 2; // enum ISD::LoadExtType | |||
| 534 | uint16_t IsExpanding : 1; | |||
| 535 | }; | |||
| 536 | ||||
| 537 | class StoreSDNodeBitfields { | |||
| 538 | friend class StoreSDNode; | |||
| 539 | friend class MaskedStoreSDNode; | |||
| 540 | friend class MaskedScatterSDNode; | |||
| 541 | ||||
| 542 | uint16_t : NumLSBaseSDNodeBits; | |||
| 543 | ||||
| 544 | uint16_t IsTruncating : 1; | |||
| 545 | uint16_t IsCompressing : 1; | |||
| 546 | }; | |||
| 547 | ||||
| 548 | union { | |||
| 549 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
| 550 | SDNodeBitfields SDNodeBits; | |||
| 551 | ConstantSDNodeBitfields ConstantSDNodeBits; | |||
| 552 | MemSDNodeBitfields MemSDNodeBits; | |||
| 553 | LSBaseSDNodeBitfields LSBaseSDNodeBits; | |||
| 554 | LoadSDNodeBitfields LoadSDNodeBits; | |||
| 555 | StoreSDNodeBitfields StoreSDNodeBits; | |||
| 556 | }; | |||
| 557 | END_TWO_BYTE_PACK() | |||
| 558 | #undef BEGIN_TWO_BYTE_PACK | |||
| 559 | #undef END_TWO_BYTE_PACK | |||
| 560 | ||||
| 561 | // RawSDNodeBits must cover the entirety of the union. This means that all of | |||
| 562 | // the union's members must have size <= RawSDNodeBits. We write the RHS as | |||
| 563 | // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter. | |||
| 564 | static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide"); | |||
| 565 | static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide"); | |||
| 566 | static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide"); | |||
| 567 | static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide"); | |||
| 568 | static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide"); | |||
| 569 | static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide"); | |||
| 570 | ||||
| 571 | private: | |||
| 572 | friend class SelectionDAG; | |||
| 573 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
| 574 | friend class HandleSDNode; | |||
| 575 | ||||
| 576 | /// Unique id per SDNode in the DAG. | |||
| 577 | int NodeId = -1; | |||
| 578 | ||||
| 579 | /// The values that are used by this operation. | |||
| 580 | SDUse *OperandList = nullptr; | |||
| 581 | ||||
| 582 | /// The types of the values this node defines. SDNode's may | |||
| 583 | /// define multiple values simultaneously. | |||
| 584 | const EVT *ValueList; | |||
| 585 | ||||
| 586 | /// List of uses for this SDNode. | |||
| 587 | SDUse *UseList = nullptr; | |||
| 588 | ||||
| 589 | /// The number of entries in the Operand/Value list. | |||
| 590 | unsigned short NumOperands = 0; | |||
| 591 | unsigned short NumValues; | |||
| 592 | ||||
| 593 | // The ordering of the SDNodes. It roughly corresponds to the ordering of the | |||
| 594 | // original LLVM instructions. | |||
| 595 | // This is used for turning off scheduling, because we'll forgo | |||
| 596 | // the normal scheduling algorithms and output the instructions according to | |||
| 597 | // this ordering. | |||
| 598 | unsigned IROrder; | |||
| 599 | ||||
| 600 | /// Source line information. | |||
| 601 | DebugLoc debugLoc; | |||
| 602 | ||||
| 603 | /// Return a pointer to the specified value type. | |||
| 604 | static const EVT *getValueTypeList(EVT VT); | |||
| 605 | ||||
| 606 | SDNodeFlags Flags; | |||
| 607 | ||||
| 608 | public: | |||
| 609 | /// Unique and persistent id per SDNode in the DAG. | |||
| 610 | /// Used for debug printing. | |||
| 611 | uint16_t PersistentId; | |||
| 612 | ||||
| 613 | //===--------------------------------------------------------------------===// | |||
| 614 | // Accessors | |||
| 615 | // | |||
| 616 | ||||
| 617 | /// Return the SelectionDAG opcode value for this node. For | |||
| 618 | /// pre-isel nodes (those for which isMachineOpcode returns false), these | |||
| 619 | /// are the opcode values in the ISD and <target>ISD namespaces. For | |||
| 620 | /// post-isel opcodes, see getMachineOpcode. | |||
| 621 | unsigned getOpcode() const { return (unsigned short)NodeType; } | |||
| 622 | ||||
| 623 | /// Test if this node has a target-specific opcode (in the | |||
| 624 | /// \<target\>ISD namespace). | |||
| 625 | bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } | |||
| 626 | ||||
| 627 | /// Test if this node has a target-specific opcode that may raise | |||
| 628 | /// FP exceptions (in the \<target\>ISD namespace and greater than | |||
| 629 | /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory | |||
| 630 | /// opcode are currently automatically considered to possibly raise | |||
| 631 | /// FP exceptions as well. | |||
| 632 | bool isTargetStrictFPOpcode() const { | |||
| 633 | return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; | |||
| 634 | } | |||
| 635 | ||||
| 636 | /// Test if this node has a target-specific | |||
| 637 | /// memory-referencing opcode (in the \<target\>ISD namespace and | |||
| 638 | /// greater than FIRST_TARGET_MEMORY_OPCODE). | |||
| 639 | bool isTargetMemoryOpcode() const { | |||
| 640 | return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; | |||
| 641 | } | |||
| 642 | ||||
| 643 | /// Return true if the type of the node type undefined. | |||
| 644 | bool isUndef() const { return NodeType == ISD::UNDEF; } | |||
| 645 | ||||
| 646 | /// Test if this node is a memory intrinsic (with valid pointer information). | |||
| 647 | /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for | |||
| 648 | /// non-memory intrinsics (with chains) that are not really instances of | |||
| 649 | /// MemSDNode. For such nodes, we need some extra state to determine the | |||
| 650 | /// proper classof relationship. | |||
| 651 | bool isMemIntrinsic() const { | |||
| 652 | return (NodeType == ISD::INTRINSIC_W_CHAIN || | |||
| 653 | NodeType == ISD::INTRINSIC_VOID) && | |||
| 654 | SDNodeBits.IsMemIntrinsic; | |||
| 655 | } | |||
| 656 | ||||
| 657 | /// Test if this node is a strict floating point pseudo-op. | |||
| 658 | bool isStrictFPOpcode() { | |||
| 659 | switch (NodeType) { | |||
| 660 | default: | |||
| 661 | return false; | |||
| 662 | case ISD::STRICT_FP16_TO_FP: | |||
| 663 | case ISD::STRICT_FP_TO_FP16: | |||
| 664 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | |||
| 665 | case ISD::STRICT_##DAGN: | |||
| 666 | #include "llvm/IR/ConstrainedOps.def" | |||
| 667 | return true; | |||
| 668 | } | |||
| 669 | } | |||
| 670 | ||||
| 671 | /// Test if this node has a post-isel opcode, directly | |||
| 672 | /// corresponding to a MachineInstr opcode. | |||
| 673 | bool isMachineOpcode() const { return NodeType < 0; } | |||
| 674 | ||||
| 675 | /// This may only be called if isMachineOpcode returns | |||
| 676 | /// true. It returns the MachineInstr opcode value that the node's opcode | |||
| 677 | /// corresponds to. | |||
| 678 | unsigned getMachineOpcode() const { | |||
| 679 | assert(isMachineOpcode() && "Not a MachineInstr opcode!")((void)0); | |||
| 680 | return ~NodeType; | |||
| 681 | } | |||
| 682 | ||||
| 683 | bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } | |||
| 684 | void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } | |||
| 685 | ||||
| 686 | bool isDivergent() const { return SDNodeBits.IsDivergent; } | |||
| 687 | ||||
| 688 | /// Return true if there are no uses of this node. | |||
| 689 | bool use_empty() const { return UseList == nullptr; } | |||
| 690 | ||||
| 691 | /// Return true if there is exactly one use of this node. | |||
| 692 | bool hasOneUse() const { return hasSingleElement(uses()); } | |||
| 693 | ||||
| 694 | /// Return the number of uses of this node. This method takes | |||
| 695 | /// time proportional to the number of uses. | |||
| 696 | size_t use_size() const { return std::distance(use_begin(), use_end()); } | |||
| 697 | ||||
| 698 | /// Return the unique node id. | |||
| 699 | int getNodeId() const { return NodeId; } | |||
| 700 | ||||
| 701 | /// Set unique node id. | |||
| 702 | void setNodeId(int Id) { NodeId = Id; } | |||
| 703 | ||||
| 704 | /// Return the node ordering. | |||
| 705 | unsigned getIROrder() const { return IROrder; } | |||
| 706 | ||||
| 707 | /// Set the node ordering. | |||
| 708 | void setIROrder(unsigned Order) { IROrder = Order; } | |||
| 709 | ||||
| 710 | /// Return the source location info. | |||
| 711 | const DebugLoc &getDebugLoc() const { return debugLoc; } | |||
| 712 | ||||
| 713 | /// Set source location info. Try to avoid this, putting | |||
| 714 | /// it in the constructor is preferable. | |||
| 715 | void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); } | |||
| 716 | ||||
| 717 | /// This class provides iterator support for SDUse | |||
| 718 | /// operands that use a specific SDNode. | |||
| 719 | class use_iterator { | |||
| 720 | friend class SDNode; | |||
| 721 | ||||
| 722 | SDUse *Op = nullptr; | |||
| 723 | ||||
| 724 | explicit use_iterator(SDUse *op) : Op(op) {} | |||
| 725 | ||||
| 726 | public: | |||
| 727 | using iterator_category = std::forward_iterator_tag; | |||
| 728 | using value_type = SDUse; | |||
| 729 | using difference_type = std::ptrdiff_t; | |||
| 730 | using pointer = value_type *; | |||
| 731 | using reference = value_type &; | |||
| 732 | ||||
| 733 | use_iterator() = default; | |||
| 734 | use_iterator(const use_iterator &I) : Op(I.Op) {} | |||
| 735 | ||||
| 736 | bool operator==(const use_iterator &x) const { | |||
| 737 | return Op == x.Op; | |||
| 738 | } | |||
| 739 | bool operator!=(const use_iterator &x) const { | |||
| 740 | return !operator==(x); | |||
| 741 | } | |||
| 742 | ||||
| 743 | /// Return true if this iterator is at the end of uses list. | |||
| 744 | bool atEnd() const { return Op == nullptr; } | |||
| 745 | ||||
| 746 | // Iterator traversal: forward iteration only. | |||
| 747 | use_iterator &operator++() { // Preincrement | |||
| 748 | assert(Op && "Cannot increment end iterator!")((void)0); | |||
| 749 | Op = Op->getNext(); | |||
| 750 | return *this; | |||
| 751 | } | |||
| 752 | ||||
| 753 | use_iterator operator++(int) { // Postincrement | |||
| 754 | use_iterator tmp = *this; ++*this; return tmp; | |||
| 755 | } | |||
| 756 | ||||
| 757 | /// Retrieve a pointer to the current user node. | |||
| 758 | SDNode *operator*() const { | |||
| 759 | assert(Op && "Cannot dereference end iterator!")((void)0); | |||
| 760 | return Op->getUser(); | |||
| 761 | } | |||
| 762 | ||||
| 763 | SDNode *operator->() const { return operator*(); } | |||
| 764 | ||||
| 765 | SDUse &getUse() const { return *Op; } | |||
| 766 | ||||
| 767 | /// Retrieve the operand # of this use in its user. | |||
| 768 | unsigned getOperandNo() const { | |||
| 769 | assert(Op && "Cannot dereference end iterator!")((void)0); | |||
| 770 | return (unsigned)(Op - Op->getUser()->OperandList); | |||
| 771 | } | |||
| 772 | }; | |||
| 773 | ||||
| 774 | /// Provide iteration support to walk over all uses of an SDNode. | |||
| 775 | use_iterator use_begin() const { | |||
| 776 | return use_iterator(UseList); | |||
| 777 | } | |||
| 778 | ||||
| 779 | static use_iterator use_end() { return use_iterator(nullptr); } | |||
| 780 | ||||
| 781 | inline iterator_range<use_iterator> uses() { | |||
| 782 | return make_range(use_begin(), use_end()); | |||
| 783 | } | |||
| 784 | inline iterator_range<use_iterator> uses() const { | |||
| 785 | return make_range(use_begin(), use_end()); | |||
| 786 | } | |||
| 787 | ||||
| 788 | /// Return true if there are exactly NUSES uses of the indicated value. | |||
| 789 | /// This method ignores uses of other values defined by this operation. | |||
| 790 | bool hasNUsesOfValue(unsigned NUses, unsigned Value) const; | |||
| 791 | ||||
| 792 | /// Return true if there are any use of the indicated value. | |||
| 793 | /// This method ignores uses of other values defined by this operation. | |||
| 794 | bool hasAnyUseOfValue(unsigned Value) const; | |||
| 795 | ||||
| 796 | /// Return true if this node is the only use of N. | |||
| 797 | bool isOnlyUserOf(const SDNode *N) const; | |||
| 798 | ||||
| 799 | /// Return true if this node is an operand of N. | |||
| 800 | bool isOperandOf(const SDNode *N) const; | |||
| 801 | ||||
| 802 | /// Return true if this node is a predecessor of N. | |||
| 803 | /// NOTE: Implemented on top of hasPredecessor and every bit as | |||
| 804 | /// expensive. Use carefully. | |||
| 805 | bool isPredecessorOf(const SDNode *N) const { | |||
| 806 | return N->hasPredecessor(this); | |||
| 807 | } | |||
| 808 | ||||
| 809 | /// Return true if N is a predecessor of this node. | |||
| 810 | /// N is either an operand of this node, or can be reached by recursively | |||
| 811 | /// traversing up the operands. | |||
| 812 | /// NOTE: This is an expensive method. Use it carefully. | |||
| 813 | bool hasPredecessor(const SDNode *N) const; | |||
| 814 | ||||
| 815 | /// Returns true if N is a predecessor of any node in Worklist. This | |||
| 816 | /// helper keeps Visited and Worklist sets externally to allow unions | |||
| 817 | /// searches to be performed in parallel, caching of results across | |||
| 818 | /// queries and incremental addition to Worklist. Stops early if N is | |||
| 819 | /// found but will resume. Remember to clear Visited and Worklists | |||
| 820 | /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before | |||
| 821 | /// giving up. The TopologicalPrune flag signals that positive NodeIds are | |||
| 822 | /// topologically ordered (Operands have strictly smaller node id) and search | |||
| 823 | /// can be pruned leveraging this. | |||
| 824 | static bool hasPredecessorHelper(const SDNode *N, | |||
| 825 | SmallPtrSetImpl<const SDNode *> &Visited, | |||
| 826 | SmallVectorImpl<const SDNode *> &Worklist, | |||
| 827 | unsigned int MaxSteps = 0, | |||
| 828 | bool TopologicalPrune = false) { | |||
| 829 | SmallVector<const SDNode *, 8> DeferredNodes; | |||
| 830 | if (Visited.count(N)) | |||
| 831 | return true; | |||
| 832 | ||||
| 833 | // Node Id's are assigned in three places: As a topological | |||
| 834 | // ordering (> 0), during legalization (results in values set to | |||
| 835 | // 0), new nodes (set to -1). If N has a topolgical id then we | |||
| 836 | // know that all nodes with ids smaller than it cannot be | |||
| 837 | // successors and we need not check them. Filter out all node | |||
| 838 | // that can't be matches. We add them to the worklist before exit | |||
| 839 | // in case of multiple calls. Note that during selection the topological id | |||
| 840 | // may be violated if a node's predecessor is selected before it. We mark | |||
| 841 | // this at selection negating the id of unselected successors and | |||
| 842 | // restricting topological pruning to positive ids. | |||
| 843 | ||||
| 844 | int NId = N->getNodeId(); | |||
| 845 | // If we Invalidated the Id, reconstruct original NId. | |||
| 846 | if (NId < -1) | |||
| 847 | NId = -(NId + 1); | |||
| 848 | ||||
| 849 | bool Found = false; | |||
| 850 | while (!Worklist.empty()) { | |||
| 851 | const SDNode *M = Worklist.pop_back_val(); | |||
| 852 | int MId = M->getNodeId(); | |||
| 853 | if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && | |||
| 854 | (MId > 0) && (MId < NId)) { | |||
| 855 | DeferredNodes.push_back(M); | |||
| 856 | continue; | |||
| 857 | } | |||
| 858 | for (const SDValue &OpV : M->op_values()) { | |||
| 859 | SDNode *Op = OpV.getNode(); | |||
| 860 | if (Visited.insert(Op).second) | |||
| 861 | Worklist.push_back(Op); | |||
| 862 | if (Op == N) | |||
| 863 | Found = true; | |||
| 864 | } | |||
| 865 | if (Found) | |||
| 866 | break; | |||
| 867 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
| 868 | break; | |||
| 869 | } | |||
| 870 | // Push deferred nodes back on worklist. | |||
| 871 | Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); | |||
| 872 | // If we bailed early, conservatively return found. | |||
| 873 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
| 874 | return true; | |||
| 875 | return Found; | |||
| 876 | } | |||
| 877 | ||||
| 878 | /// Return true if all the users of N are contained in Nodes. | |||
| 879 | /// NOTE: Requires at least one match, but doesn't require them all. | |||
| 880 | static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N); | |||
| 881 | ||||
| 882 | /// Return the number of values used by this operation. | |||
| 883 | unsigned getNumOperands() const { return NumOperands; } | |||
| 884 | ||||
| 885 | /// Return the maximum number of operands that a SDNode can hold. | |||
| 886 | static constexpr size_t getMaxNumOperands() { | |||
| 887 | return std::numeric_limits<decltype(SDNode::NumOperands)>::max(); | |||
| 888 | } | |||
| 889 | ||||
| 890 | /// Helper method returns the integer value of a ConstantSDNode operand. | |||
| 891 | inline uint64_t getConstantOperandVal(unsigned Num) const; | |||
| 892 | ||||
| 893 | /// Helper method returns the APInt of a ConstantSDNode operand. | |||
| 894 | inline const APInt &getConstantOperandAPInt(unsigned Num) const; | |||
| 895 | ||||
| 896 | const SDValue &getOperand(unsigned Num) const { | |||
| 897 | assert(Num < NumOperands && "Invalid child # of SDNode!")((void)0); | |||
| 898 | return OperandList[Num]; | |||
| 899 | } | |||
| 900 | ||||
| 901 | using op_iterator = SDUse *; | |||
| 902 | ||||
| 903 | op_iterator op_begin() const { return OperandList; } | |||
| 904 | op_iterator op_end() const { return OperandList+NumOperands; } | |||
| 905 | ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); } | |||
| 906 | ||||
| 907 | /// Iterator for directly iterating over the operand SDValue's. | |||
| 908 | struct value_op_iterator | |||
| 909 | : iterator_adaptor_base<value_op_iterator, op_iterator, | |||
| 910 | std::random_access_iterator_tag, SDValue, | |||
| 911 | ptrdiff_t, value_op_iterator *, | |||
| 912 | value_op_iterator *> { | |||
| 913 | explicit value_op_iterator(SDUse *U = nullptr) | |||
| 914 | : iterator_adaptor_base(U) {} | |||
| 915 | ||||
| 916 | const SDValue &operator*() const { return I->get(); } | |||
| 917 | }; | |||
| 918 | ||||
| 919 | iterator_range<value_op_iterator> op_values() const { | |||
| 920 | return make_range(value_op_iterator(op_begin()), | |||
| 921 | value_op_iterator(op_end())); | |||
| 922 | } | |||
| 923 | ||||
| 924 | SDVTList getVTList() const { | |||
| 925 | SDVTList X = { ValueList, NumValues }; | |||
| 926 | return X; | |||
| 927 | } | |||
| 928 | ||||
| 929 | /// If this node has a glue operand, return the node | |||
| 930 | /// to which the glue operand points. Otherwise return NULL. | |||
| 931 | SDNode *getGluedNode() const { | |||
| 932 | if (getNumOperands() != 0 && | |||
| 933 | getOperand(getNumOperands()-1).getValueType() == MVT::Glue) | |||
| 934 | return getOperand(getNumOperands()-1).getNode(); | |||
| 935 | return nullptr; | |||
| 936 | } | |||
| 937 | ||||
| 938 | /// If this node has a glue value with a user, return | |||
| 939 | /// the user (there is at most one). Otherwise return NULL. | |||
| 940 | SDNode *getGluedUser() const { | |||
| 941 | for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) | |||
| 942 | if (UI.getUse().get().getValueType() == MVT::Glue) | |||
| 943 | return *UI; | |||
| 944 | return nullptr; | |||
| 945 | } | |||
| 946 | ||||
| 947 | SDNodeFlags getFlags() const { return Flags; } | |||
| 948 | void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } | |||
| 949 | ||||
| 950 | /// Clear any flags in this node that aren't also set in Flags. | |||
| 951 | /// If Flags is not in a defined state then this has no effect. | |||
| 952 | void intersectFlagsWith(const SDNodeFlags Flags); | |||
| 953 | ||||
| 954 | /// Return the number of values defined/returned by this operator. | |||
| 955 | unsigned getNumValues() const { return NumValues; } | |||
| 956 | ||||
| 957 | /// Return the type of a specified result. | |||
| 958 | EVT getValueType(unsigned ResNo) const { | |||
| 959 | assert(ResNo < NumValues && "Illegal result number!")((void)0); | |||
| 960 | return ValueList[ResNo]; | |||
| 961 | } | |||
| 962 | ||||
| 963 | /// Return the type of a specified result as a simple type. | |||
| 964 | MVT getSimpleValueType(unsigned ResNo) const { | |||
| 965 | return getValueType(ResNo).getSimpleVT(); | |||
| 966 | } | |||
| 967 | ||||
| 968 | /// Returns MVT::getSizeInBits(getValueType(ResNo)). | |||
| 969 | /// | |||
| 970 | /// If the value type is a scalable vector type, the scalable property will | |||
| 971 | /// be set and the runtime size will be a positive integer multiple of the | |||
| 972 | /// base size. | |||
| 973 | TypeSize getValueSizeInBits(unsigned ResNo) const { | |||
| 974 | return getValueType(ResNo).getSizeInBits(); | |||
| 975 | } | |||
| 976 | ||||
| 977 | using value_iterator = const EVT *; | |||
| 978 | ||||
| 979 | value_iterator value_begin() const { return ValueList; } | |||
| 980 | value_iterator value_end() const { return ValueList+NumValues; } | |||
| 981 | iterator_range<value_iterator> values() const { | |||
| 982 | return llvm::make_range(value_begin(), value_end()); | |||
| 983 | } | |||
| 984 | ||||
| 985 | /// Return the opcode of this operation for printing. | |||
| 986 | std::string getOperationName(const SelectionDAG *G = nullptr) const; | |||
| 987 | static const char* getIndexedModeName(ISD::MemIndexedMode AM); | |||
| 988 | void print_types(raw_ostream &OS, const SelectionDAG *G) const; | |||
| 989 | void print_details(raw_ostream &OS, const SelectionDAG *G) const; | |||
| 990 | void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
| 991 | void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
| 992 | ||||
| 993 | /// Print a SelectionDAG node and all children down to | |||
| 994 | /// the leaves. The given SelectionDAG allows target-specific nodes | |||
| 995 | /// to be printed in human-readable form. Unlike printr, this will | |||
| 996 | /// print the whole DAG, including children that appear multiple | |||
| 997 | /// times. | |||
| 998 | /// | |||
| 999 | void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const; | |||
| 1000 | ||||
| 1001 | /// Print a SelectionDAG node and children up to | |||
| 1002 | /// depth "depth." The given SelectionDAG allows target-specific | |||
| 1003 | /// nodes to be printed in human-readable form. Unlike printr, this | |||
| 1004 | /// will print children that appear multiple times wherever they are | |||
| 1005 | /// used. | |||
| 1006 | /// | |||
| 1007 | void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr, | |||
| 1008 | unsigned depth = 100) const; | |||
| 1009 | ||||
| 1010 | /// Dump this node, for debugging. | |||
| 1011 | void dump() const; | |||
| 1012 | ||||
| 1013 | /// Dump (recursively) this node and its use-def subgraph. | |||
| 1014 | void dumpr() const; | |||
| 1015 | ||||
| 1016 | /// Dump this node, for debugging. | |||
| 1017 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
| 1018 | /// in human-readable form. | |||
| 1019 | void dump(const SelectionDAG *G) const; | |||
| 1020 | ||||
| 1021 | /// Dump (recursively) this node and its use-def subgraph. | |||
| 1022 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
| 1023 | /// in human-readable form. | |||
| 1024 | void dumpr(const SelectionDAG *G) const; | |||
| 1025 | ||||
| 1026 | /// printrFull to dbgs(). The given SelectionDAG allows | |||
| 1027 | /// target-specific nodes to be printed in human-readable form. | |||
| 1028 | /// Unlike dumpr, this will print the whole DAG, including children | |||
| 1029 | /// that appear multiple times. | |||
| 1030 | void dumprFull(const SelectionDAG *G = nullptr) const; | |||
| 1031 | ||||
| 1032 | /// printrWithDepth to dbgs(). The given | |||
| 1033 | /// SelectionDAG allows target-specific nodes to be printed in | |||
| 1034 | /// human-readable form. Unlike dumpr, this will print children | |||
| 1035 | /// that appear multiple times wherever they are used. | |||
| 1036 | /// | |||
| 1037 | void dumprWithDepth(const SelectionDAG *G = nullptr, | |||
| 1038 | unsigned depth = 100) const; | |||
| 1039 | ||||
| 1040 | /// Gather unique data for the node. | |||
| 1041 | void Profile(FoldingSetNodeID &ID) const; | |||
| 1042 | ||||
| 1043 | /// This method should only be used by the SDUse class. | |||
| 1044 | void addUse(SDUse &U) { U.addToList(&UseList); } | |||
| 1045 | ||||
| 1046 | protected: | |||
| 1047 | static SDVTList getSDVTList(EVT VT) { | |||
| 1048 | SDVTList Ret = { getValueTypeList(VT), 1 }; | |||
| 1049 | return Ret; | |||
| 1050 | } | |||
| 1051 | ||||
| 1052 | /// Create an SDNode. | |||
| 1053 | /// | |||
| 1054 | /// SDNodes are created without any operands, and never own the operand | |||
| 1055 | /// storage. To add operands, see SelectionDAG::createOperands. | |||
| 1056 | SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs) | |||
| 1057 | : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs), | |||
| 1058 | IROrder(Order), debugLoc(std::move(dl)) { | |||
| 1059 | memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits)); | |||
| 1060 | assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((void)0); | |||
| 1061 | assert(NumValues == VTs.NumVTs &&((void)0) | |||
| 1062 | "NumValues wasn't wide enough for its operands!")((void)0); | |||
| 1063 | } | |||
| 1064 | ||||
| 1065 | /// Release the operands and set this node to have zero operands. | |||
| 1066 | void DropOperands(); | |||
| 1067 | }; | |||
| 1068 | ||||
| 1069 | /// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed | |||
| 1070 | /// into SDNode creation functions. | |||
| 1071 | /// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted | |||
| 1072 | /// from the original Instruction, and IROrder is the ordinal position of | |||
| 1073 | /// the instruction. | |||
| 1074 | /// When an SDNode is created after the DAG is being built, both DebugLoc and | |||
| 1075 | /// the IROrder are propagated from the original SDNode. | |||
| 1076 | /// So SDLoc class provides two constructors besides the default one, one to | |||
| 1077 | /// be used by the DAGBuilder, the other to be used by others. | |||
| 1078 | class SDLoc { | |||
| 1079 | private: | |||
| 1080 | DebugLoc DL; | |||
| 1081 | int IROrder = 0; | |||
| 1082 | ||||
| 1083 | public: | |||
| 1084 | SDLoc() = default; | |||
| 1085 | SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {} | |||
| 1086 | SDLoc(const SDValue V) : SDLoc(V.getNode()) {} | |||
| 1087 | SDLoc(const Instruction *I, int Order) : IROrder(Order) { | |||
| 1088 | assert(Order >= 0 && "bad IROrder")((void)0); | |||
| 1089 | if (I) | |||
| 1090 | DL = I->getDebugLoc(); | |||
| 1091 | } | |||
| 1092 | ||||
| 1093 | unsigned getIROrder() const { return IROrder; } | |||
| 1094 | const DebugLoc &getDebugLoc() const { return DL; } | |||
| 1095 | }; | |||
| 1096 | ||||
| 1097 | // Define inline functions from the SDValue class. | |||
| 1098 | ||||
| 1099 | inline SDValue::SDValue(SDNode *node, unsigned resno) | |||
| 1100 | : Node(node), ResNo(resno) { | |||
| 1101 | // Explicitly check for !ResNo to avoid use-after-free, because there are | |||
| 1102 | // callers that use SDValue(N, 0) with a deleted N to indicate successful | |||
| 1103 | // combines. | |||
| 1104 | assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&((void)0) | |||
| 1105 | "Invalid result number for the given node!")((void)0); | |||
| 1106 | assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((void)0); | |||
| 1107 | } | |||
| 1108 | ||||
| 1109 | inline unsigned SDValue::getOpcode() const { | |||
| 1110 | return Node->getOpcode(); | |||
| ||||
| 1111 | } | |||
| 1112 | ||||
| 1113 | inline EVT SDValue::getValueType() const { | |||
| 1114 | return Node->getValueType(ResNo); | |||
| 1115 | } | |||
| 1116 | ||||
| 1117 | inline unsigned SDValue::getNumOperands() const { | |||
| 1118 | return Node->getNumOperands(); | |||
| 1119 | } | |||
| 1120 | ||||
| 1121 | inline const SDValue &SDValue::getOperand(unsigned i) const { | |||
| 1122 | return Node->getOperand(i); | |||
| 1123 | } | |||
| 1124 | ||||
| 1125 | inline uint64_t SDValue::getConstantOperandVal(unsigned i) const { | |||
| 1126 | return Node->getConstantOperandVal(i); | |||
| 1127 | } | |||
| 1128 | ||||
| 1129 | inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const { | |||
| 1130 | return Node->getConstantOperandAPInt(i); | |||
| 1131 | } | |||
| 1132 | ||||
| 1133 | inline bool SDValue::isTargetOpcode() const { | |||
| 1134 | return Node->isTargetOpcode(); | |||
| 1135 | } | |||
| 1136 | ||||
| 1137 | inline bool SDValue::isTargetMemoryOpcode() const { | |||
| 1138 | return Node->isTargetMemoryOpcode(); | |||
| 1139 | } | |||
| 1140 | ||||
| 1141 | inline bool SDValue::isMachineOpcode() const { | |||
| 1142 | return Node->isMachineOpcode(); | |||
| 1143 | } | |||
| 1144 | ||||
| 1145 | inline unsigned SDValue::getMachineOpcode() const { | |||
| 1146 | return Node->getMachineOpcode(); | |||
| 1147 | } | |||
| 1148 | ||||
| 1149 | inline bool SDValue::isUndef() const { | |||
| 1150 | return Node->isUndef(); | |||
| 1151 | } | |||
| 1152 | ||||
| 1153 | inline bool SDValue::use_empty() const { | |||
| 1154 | return !Node->hasAnyUseOfValue(ResNo); | |||
| 1155 | } | |||
| 1156 | ||||
| 1157 | inline bool SDValue::hasOneUse() const { | |||
| 1158 | return Node->hasNUsesOfValue(1, ResNo); | |||
| 1159 | } | |||
| 1160 | ||||
| 1161 | inline const DebugLoc &SDValue::getDebugLoc() const { | |||
| 1162 | return Node->getDebugLoc(); | |||
| 1163 | } | |||
| 1164 | ||||
| 1165 | inline void SDValue::dump() const { | |||
| 1166 | return Node->dump(); | |||
| 1167 | } | |||
| 1168 | ||||
| 1169 | inline void SDValue::dump(const SelectionDAG *G) const { | |||
| 1170 | return Node->dump(G); | |||
| 1171 | } | |||
| 1172 | ||||
| 1173 | inline void SDValue::dumpr() const { | |||
| 1174 | return Node->dumpr(); | |||
| 1175 | } | |||
| 1176 | ||||
| 1177 | inline void SDValue::dumpr(const SelectionDAG *G) const { | |||
| 1178 | return Node->dumpr(G); | |||
| 1179 | } | |||
| 1180 | ||||
| 1181 | // Define inline functions from the SDUse class. | |||
| 1182 | ||||
| 1183 | inline void SDUse::set(const SDValue &V) { | |||
| 1184 | if (Val.getNode()) removeFromList(); | |||
| 1185 | Val = V; | |||
| 1186 | if (V.getNode()) V.getNode()->addUse(*this); | |||
| 1187 | } | |||
| 1188 | ||||
| 1189 | inline void SDUse::setInitial(const SDValue &V) { | |||
| 1190 | Val = V; | |||
| 1191 | V.getNode()->addUse(*this); | |||
| 1192 | } | |||
| 1193 | ||||
| 1194 | inline void SDUse::setNode(SDNode *N) { | |||
| 1195 | if (Val.getNode()) removeFromList(); | |||
| 1196 | Val.setNode(N); | |||
| 1197 | if (N) N->addUse(*this); | |||
| 1198 | } | |||
| 1199 | ||||
| 1200 | /// This class is used to form a handle around another node that | |||
| 1201 | /// is persistent and is updated across invocations of replaceAllUsesWith on its | |||
| 1202 | /// operand. This node should be directly created by end-users and not added to | |||
| 1203 | /// the AllNodes list. | |||
| 1204 | class HandleSDNode : public SDNode { | |||
| 1205 | SDUse Op; | |||
| 1206 | ||||
| 1207 | public: | |||
| 1208 | explicit HandleSDNode(SDValue X) | |||
| 1209 | : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) { | |||
| 1210 | // HandleSDNodes are never inserted into the DAG, so they won't be | |||
| 1211 | // auto-numbered. Use ID 65535 as a sentinel. | |||
| 1212 | PersistentId = 0xffff; | |||
| 1213 | ||||
| 1214 | // Manually set up the operand list. This node type is special in that it's | |||
| 1215 | // always stack allocated and SelectionDAG does not manage its operands. | |||
| 1216 | // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not | |||
| 1217 | // be so special. | |||
| 1218 | Op.setUser(this); | |||
| 1219 | Op.setInitial(X); | |||
| 1220 | NumOperands = 1; | |||
| 1221 | OperandList = &Op; | |||
| 1222 | } | |||
| 1223 | ~HandleSDNode(); | |||
| 1224 | ||||
| 1225 | const SDValue &getValue() const { return Op; } | |||
| 1226 | }; | |||
| 1227 | ||||
| 1228 | class AddrSpaceCastSDNode : public SDNode { | |||
| 1229 | private: | |||
| 1230 | unsigned SrcAddrSpace; | |||
| 1231 | unsigned DestAddrSpace; | |||
| 1232 | ||||
| 1233 | public: | |||
| 1234 | AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT, | |||
| 1235 | unsigned SrcAS, unsigned DestAS); | |||
| 1236 | ||||
| 1237 | unsigned getSrcAddressSpace() const { return SrcAddrSpace; } | |||
| 1238 | unsigned getDestAddressSpace() const { return DestAddrSpace; } | |||
| 1239 | ||||
| 1240 | static bool classof(const SDNode *N) { | |||
| 1241 | return N->getOpcode() == ISD::ADDRSPACECAST; | |||
| 1242 | } | |||
| 1243 | }; | |||
| 1244 | ||||
| 1245 | /// This is an abstract virtual class for memory operations. | |||
| 1246 | class MemSDNode : public SDNode { | |||
| 1247 | private: | |||
| 1248 | // VT of in-memory value. | |||
| 1249 | EVT MemoryVT; | |||
| 1250 | ||||
| 1251 | protected: | |||
| 1252 | /// Memory reference information. | |||
| 1253 | MachineMemOperand *MMO; | |||
| 1254 | ||||
| 1255 | public: | |||
| 1256 | MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 1257 | EVT memvt, MachineMemOperand *MMO); | |||
| 1258 | ||||
| 1259 | bool readMem() const { return MMO->isLoad(); } | |||
| 1260 | bool writeMem() const { return MMO->isStore(); } | |||
| 1261 | ||||
| 1262 | /// Returns alignment and volatility of the memory access | |||
| 1263 | Align getOriginalAlign() const { return MMO->getBaseAlign(); } | |||
| 1264 | Align getAlign() const { return MMO->getAlign(); } | |||
| 1265 | // FIXME: Remove once transition to getAlign is over. | |||
| 1266 | unsigned getAlignment() const { return MMO->getAlign().value(); } | |||
| 1267 | ||||
| 1268 | /// Return the SubclassData value, without HasDebugValue. This contains an | |||
| 1269 | /// encoding of the volatile flag, as well as bits used by subclasses. This | |||
| 1270 | /// function should only be used to compute a FoldingSetNodeID value. | |||
| 1271 | /// The HasDebugValue bit is masked out because CSE map needs to match | |||
| 1272 | /// nodes with debug info with nodes without debug info. Same is about | |||
| 1273 | /// isDivergent bit. | |||
| 1274 | unsigned getRawSubclassData() const { | |||
| 1275 | uint16_t Data; | |||
| 1276 | union { | |||
| 1277 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
| 1278 | SDNodeBitfields SDNodeBits; | |||
| 1279 | }; | |||
| 1280 | memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits)); | |||
| 1281 | SDNodeBits.HasDebugValue = 0; | |||
| 1282 | SDNodeBits.IsDivergent = false; | |||
| 1283 | memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits)); | |||
| 1284 | return Data; | |||
| 1285 | } | |||
| 1286 | ||||
| 1287 | bool isVolatile() const { return MemSDNodeBits.IsVolatile; } | |||
| 1288 | bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; } | |||
| 1289 | bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; } | |||
| 1290 | bool isInvariant() const { return MemSDNodeBits.IsInvariant; } | |||
| 1291 | ||||
| 1292 | // Returns the offset from the location of the access. | |||
| 1293 | int64_t getSrcValueOffset() const { return MMO->getOffset(); } | |||
| 1294 | ||||
| 1295 | /// Returns the AA info that describes the dereference. | |||
| 1296 | AAMDNodes getAAInfo() const { return MMO->getAAInfo(); } | |||
| 1297 | ||||
| 1298 | /// Returns the Ranges that describes the dereference. | |||
| 1299 | const MDNode *getRanges() const { return MMO->getRanges(); } | |||
| 1300 | ||||
| 1301 | /// Returns the synchronization scope ID for this memory operation. | |||
| 1302 | SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } | |||
| 1303 | ||||
| 1304 | /// Return the atomic ordering requirements for this memory operation. For | |||
| 1305 | /// cmpxchg atomic operations, return the atomic ordering requirements when | |||
| 1306 | /// store occurs. | |||
| 1307 | AtomicOrdering getSuccessOrdering() const { | |||
| 1308 | return MMO->getSuccessOrdering(); | |||
| 1309 | } | |||
| 1310 | ||||
| 1311 | /// Return a single atomic ordering that is at least as strong as both the | |||
| 1312 | /// success and failure orderings for an atomic operation. (For operations | |||
| 1313 | /// other than cmpxchg, this is equivalent to getSuccessOrdering().) | |||
| 1314 | AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); } | |||
| 1315 | ||||
| 1316 | /// Return true if the memory operation ordering is Unordered or higher. | |||
| 1317 | bool isAtomic() const { return MMO->isAtomic(); } | |||
| 1318 | ||||
| 1319 | /// Returns true if the memory operation doesn't imply any ordering | |||
| 1320 | /// constraints on surrounding memory operations beyond the normal memory | |||
| 1321 | /// aliasing rules. | |||
| 1322 | bool isUnordered() const { return MMO->isUnordered(); } | |||
| 1323 | ||||
| 1324 | /// Returns true if the memory operation is neither atomic or volatile. | |||
| 1325 | bool isSimple() const { return !isAtomic() && !isVolatile(); } | |||
| 1326 | ||||
| 1327 | /// Return the type of the in-memory value. | |||
| 1328 | EVT getMemoryVT() const { return MemoryVT; } | |||
| 1329 | ||||
| 1330 | /// Return a MachineMemOperand object describing the memory | |||
| 1331 | /// reference performed by operation. | |||
| 1332 | MachineMemOperand *getMemOperand() const { return MMO; } | |||
| 1333 | ||||
| 1334 | const MachinePointerInfo &getPointerInfo() const { | |||
| 1335 | return MMO->getPointerInfo(); | |||
| 1336 | } | |||
| 1337 | ||||
| 1338 | /// Return the address space for the associated pointer | |||
| 1339 | unsigned getAddressSpace() const { | |||
| 1340 | return getPointerInfo().getAddrSpace(); | |||
| 1341 | } | |||
| 1342 | ||||
| 1343 | /// Update this MemSDNode's MachineMemOperand information | |||
| 1344 | /// to reflect the alignment of NewMMO, if it has a greater alignment. | |||
| 1345 | /// This must only be used when the new alignment applies to all users of | |||
| 1346 | /// this MachineMemOperand. | |||
| 1347 | void refineAlignment(const MachineMemOperand *NewMMO) { | |||
| 1348 | MMO->refineAlignment(NewMMO); | |||
| 1349 | } | |||
| 1350 | ||||
| 1351 | const SDValue &getChain() const { return getOperand(0); } | |||
| 1352 | ||||
| 1353 | const SDValue &getBasePtr() const { | |||
| 1354 | switch (getOpcode()) { | |||
| 1355 | case ISD::STORE: | |||
| 1356 | case ISD::MSTORE: | |||
| 1357 | return getOperand(2); | |||
| 1358 | case ISD::MGATHER: | |||
| 1359 | case ISD::MSCATTER: | |||
| 1360 | return getOperand(3); | |||
| 1361 | default: | |||
| 1362 | return getOperand(1); | |||
| 1363 | } | |||
| 1364 | } | |||
| 1365 | ||||
| 1366 | // Methods to support isa and dyn_cast | |||
| 1367 | static bool classof(const SDNode *N) { | |||
| 1368 | // For some targets, we lower some target intrinsics to a MemIntrinsicNode | |||
| 1369 | // with either an intrinsic or a target opcode. | |||
| 1370 | switch (N->getOpcode()) { | |||
| 1371 | case ISD::LOAD: | |||
| 1372 | case ISD::STORE: | |||
| 1373 | case ISD::PREFETCH: | |||
| 1374 | case ISD::ATOMIC_CMP_SWAP: | |||
| 1375 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: | |||
| 1376 | case ISD::ATOMIC_SWAP: | |||
| 1377 | case ISD::ATOMIC_LOAD_ADD: | |||
| 1378 | case ISD::ATOMIC_LOAD_SUB: | |||
| 1379 | case ISD::ATOMIC_LOAD_AND: | |||
| 1380 | case ISD::ATOMIC_LOAD_CLR: | |||
| 1381 | case ISD::ATOMIC_LOAD_OR: | |||
| 1382 | case ISD::ATOMIC_LOAD_XOR: | |||
| 1383 | case ISD::ATOMIC_LOAD_NAND: | |||
| 1384 | case ISD::ATOMIC_LOAD_MIN: | |||
| 1385 | case ISD::ATOMIC_LOAD_MAX: | |||
| 1386 | case ISD::ATOMIC_LOAD_UMIN: | |||
| 1387 | case ISD::ATOMIC_LOAD_UMAX: | |||
| 1388 | case ISD::ATOMIC_LOAD_FADD: | |||
| 1389 | case ISD::ATOMIC_LOAD_FSUB: | |||
| 1390 | case ISD::ATOMIC_LOAD: | |||
| 1391 | case ISD::ATOMIC_STORE: | |||
| 1392 | case ISD::MLOAD: | |||
| 1393 | case ISD::MSTORE: | |||
| 1394 | case ISD::MGATHER: | |||
| 1395 | case ISD::MSCATTER: | |||
| 1396 | return true; | |||
| 1397 | default: | |||
| 1398 | return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); | |||
| 1399 | } | |||
| 1400 | } | |||
| 1401 | }; | |||
| 1402 | ||||
| 1403 | /// This is an SDNode representing atomic operations. | |||
| 1404 | class AtomicSDNode : public MemSDNode { | |||
| 1405 | public: | |||
| 1406 | AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL, | |||
| 1407 | EVT MemVT, MachineMemOperand *MMO) | |||
| 1408 | : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { | |||
| 1409 | assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((void)0) | |||
| 1410 | MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((void)0); | |||
| 1411 | } | |||
| 1412 | ||||
| 1413 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
| 1414 | const SDValue &getVal() const { return getOperand(2); } | |||
| 1415 | ||||
| 1416 | /// Returns true if this SDNode represents cmpxchg atomic operation, false | |||
| 1417 | /// otherwise. | |||
| 1418 | bool isCompareAndSwap() const { | |||
| 1419 | unsigned Op = getOpcode(); | |||
| 1420 | return Op == ISD::ATOMIC_CMP_SWAP || | |||
| 1421 | Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS; | |||
| 1422 | } | |||
| 1423 | ||||
| 1424 | /// For cmpxchg atomic operations, return the atomic ordering requirements | |||
| 1425 | /// when store does not occur. | |||
| 1426 | AtomicOrdering getFailureOrdering() const { | |||
| 1427 | assert(isCompareAndSwap() && "Must be cmpxchg operation")((void)0); | |||
| 1428 | return MMO->getFailureOrdering(); | |||
| 1429 | } | |||
| 1430 | ||||
| 1431 | // Methods to support isa and dyn_cast | |||
| 1432 | static bool classof(const SDNode *N) { | |||
| 1433 | return N->getOpcode() == ISD::ATOMIC_CMP_SWAP || | |||
| 1434 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || | |||
| 1435 | N->getOpcode() == ISD::ATOMIC_SWAP || | |||
| 1436 | N->getOpcode() == ISD::ATOMIC_LOAD_ADD || | |||
| 1437 | N->getOpcode() == ISD::ATOMIC_LOAD_SUB || | |||
| 1438 | N->getOpcode() == ISD::ATOMIC_LOAD_AND || | |||
| 1439 | N->getOpcode() == ISD::ATOMIC_LOAD_CLR || | |||
| 1440 | N->getOpcode() == ISD::ATOMIC_LOAD_OR || | |||
| 1441 | N->getOpcode() == ISD::ATOMIC_LOAD_XOR || | |||
| 1442 | N->getOpcode() == ISD::ATOMIC_LOAD_NAND || | |||
| 1443 | N->getOpcode() == ISD::ATOMIC_LOAD_MIN || | |||
| 1444 | N->getOpcode() == ISD::ATOMIC_LOAD_MAX || | |||
| 1445 | N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || | |||
| 1446 | N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || | |||
| 1447 | N->getOpcode() == ISD::ATOMIC_LOAD_FADD || | |||
| 1448 | N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || | |||
| 1449 | N->getOpcode() == ISD::ATOMIC_LOAD || | |||
| 1450 | N->getOpcode() == ISD::ATOMIC_STORE; | |||
| 1451 | } | |||
| 1452 | }; | |||
| 1453 | ||||
| 1454 | /// This SDNode is used for target intrinsics that touch | |||
| 1455 | /// memory and need an associated MachineMemOperand. Its opcode may be | |||
| 1456 | /// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode | |||
| 1457 | /// with a value not less than FIRST_TARGET_MEMORY_OPCODE. | |||
| 1458 | class MemIntrinsicSDNode : public MemSDNode { | |||
| 1459 | public: | |||
| 1460 | MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, | |||
| 1461 | SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO) | |||
| 1462 | : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) { | |||
| 1463 | SDNodeBits.IsMemIntrinsic = true; | |||
| 1464 | } | |||
| 1465 | ||||
| 1466 | // Methods to support isa and dyn_cast | |||
| 1467 | static bool classof(const SDNode *N) { | |||
| 1468 | // We lower some target intrinsics to their target opcode | |||
| 1469 | // early a node with a target opcode can be of this class | |||
| 1470 | return N->isMemIntrinsic() || | |||
| 1471 | N->getOpcode() == ISD::PREFETCH || | |||
| 1472 | N->isTargetMemoryOpcode(); | |||
| 1473 | } | |||
| 1474 | }; | |||
| 1475 | ||||
| 1476 | /// This SDNode is used to implement the code generator | |||
| 1477 | /// support for the llvm IR shufflevector instruction. It combines elements | |||
| 1478 | /// from two input vectors into a new input vector, with the selection and | |||
| 1479 | /// ordering of elements determined by an array of integers, referred to as | |||
| 1480 | /// the shuffle mask. For input vectors of width N, mask indices of 0..N-1 | |||
| 1481 | /// refer to elements from the LHS input, and indices from N to 2N-1 the RHS. | |||
| 1482 | /// An index of -1 is treated as undef, such that the code generator may put | |||
| 1483 | /// any value in the corresponding element of the result. | |||
| 1484 | class ShuffleVectorSDNode : public SDNode { | |||
| 1485 | // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and | |||
| 1486 | // is freed when the SelectionDAG object is destroyed. | |||
| 1487 | const int *Mask; | |||
| 1488 | ||||
| 1489 | protected: | |||
| 1490 | friend class SelectionDAG; | |||
| 1491 | ||||
| 1492 | ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M) | |||
| 1493 | : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {} | |||
| 1494 | ||||
| 1495 | public: | |||
| 1496 | ArrayRef<int> getMask() const { | |||
| 1497 | EVT VT = getValueType(0); | |||
| 1498 | return makeArrayRef(Mask, VT.getVectorNumElements()); | |||
| 1499 | } | |||
| 1500 | ||||
| 1501 | int getMaskElt(unsigned Idx) const { | |||
| 1502 | assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((void)0); | |||
| 1503 | return Mask[Idx]; | |||
| 1504 | } | |||
| 1505 | ||||
| 1506 | bool isSplat() const { return isSplatMask(Mask, getValueType(0)); } | |||
| 1507 | ||||
| 1508 | int getSplatIndex() const { | |||
| 1509 | assert(isSplat() && "Cannot get splat index for non-splat!")((void)0); | |||
| 1510 | EVT VT = getValueType(0); | |||
| 1511 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) | |||
| 1512 | if (Mask[i] >= 0) | |||
| 1513 | return Mask[i]; | |||
| 1514 | ||||
| 1515 | // We can choose any index value here and be correct because all elements | |||
| 1516 | // are undefined. Return 0 for better potential for callers to simplify. | |||
| 1517 | return 0; | |||
| 1518 | } | |||
| 1519 | ||||
| 1520 | static bool isSplatMask(const int *Mask, EVT VT); | |||
| 1521 | ||||
| 1522 | /// Change values in a shuffle permute mask assuming | |||
| 1523 | /// the two vector operands have swapped position. | |||
| 1524 | static void commuteMask(MutableArrayRef<int> Mask) { | |||
| 1525 | unsigned NumElems = Mask.size(); | |||
| 1526 | for (unsigned i = 0; i != NumElems; ++i) { | |||
| 1527 | int idx = Mask[i]; | |||
| 1528 | if (idx < 0) | |||
| 1529 | continue; | |||
| 1530 | else if (idx < (int)NumElems) | |||
| 1531 | Mask[i] = idx + NumElems; | |||
| 1532 | else | |||
| 1533 | Mask[i] = idx - NumElems; | |||
| 1534 | } | |||
| 1535 | } | |||
| 1536 | ||||
| 1537 | static bool classof(const SDNode *N) { | |||
| 1538 | return N->getOpcode() == ISD::VECTOR_SHUFFLE; | |||
| 1539 | } | |||
| 1540 | }; | |||
| 1541 | ||||
| 1542 | class ConstantSDNode : public SDNode { | |||
| 1543 | friend class SelectionDAG; | |||
| 1544 | ||||
| 1545 | const ConstantInt *Value; | |||
| 1546 | ||||
| 1547 | ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT) | |||
| 1548 | : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(), | |||
| 1549 | getSDVTList(VT)), | |||
| 1550 | Value(val) { | |||
| 1551 | ConstantSDNodeBits.IsOpaque = isOpaque; | |||
| 1552 | } | |||
| 1553 | ||||
| 1554 | public: | |||
| 1555 | const ConstantInt *getConstantIntValue() const { return Value; } | |||
| 1556 | const APInt &getAPIntValue() const { return Value->getValue(); } | |||
| 1557 | uint64_t getZExtValue() const { return Value->getZExtValue(); } | |||
| 1558 | int64_t getSExtValue() const { return Value->getSExtValue(); } | |||
| 1559 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX0xffffffffffffffffULL) { | |||
| 1560 | return Value->getLimitedValue(Limit); | |||
| 1561 | } | |||
| 1562 | MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); } | |||
| 1563 | Align getAlignValue() const { return Value->getAlignValue(); } | |||
| 1564 | ||||
| 1565 | bool isOne() const { return Value->isOne(); } | |||
| 1566 | bool isNullValue() const { return Value->isZero(); } | |||
| 1567 | bool isAllOnesValue() const { return Value->isMinusOne(); } | |||
| 1568 | bool isMaxSignedValue() const { return Value->isMaxValue(true); } | |||
| 1569 | bool isMinSignedValue() const { return Value->isMinValue(true); } | |||
| 1570 | ||||
| 1571 | bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } | |||
| 1572 | ||||
| 1573 | static bool classof(const SDNode *N) { | |||
| 1574 | return N->getOpcode() == ISD::Constant || | |||
| 1575 | N->getOpcode() == ISD::TargetConstant; | |||
| 1576 | } | |||
| 1577 | }; | |||
| 1578 | ||||
| 1579 | uint64_t SDNode::getConstantOperandVal(unsigned Num) const { | |||
| 1580 | return cast<ConstantSDNode>(getOperand(Num))->getZExtValue(); | |||
| 1581 | } | |||
| 1582 | ||||
| 1583 | const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { | |||
| 1584 | return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); | |||
| 1585 | } | |||
| 1586 | ||||
| 1587 | class ConstantFPSDNode : public SDNode { | |||
| 1588 | friend class SelectionDAG; | |||
| 1589 | ||||
| 1590 | const ConstantFP *Value; | |||
| 1591 | ||||
| 1592 | ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) | |||
| 1593 | : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, | |||
| 1594 | DebugLoc(), getSDVTList(VT)), | |||
| 1595 | Value(val) {} | |||
| 1596 | ||||
| 1597 | public: | |||
| 1598 | const APFloat& getValueAPF() const { return Value->getValueAPF(); } | |||
| 1599 | const ConstantFP *getConstantFPValue() const { return Value; } | |||
| 1600 | ||||
| 1601 | /// Return true if the value is positive or negative zero. | |||
| 1602 | bool isZero() const { return Value->isZero(); } | |||
| 1603 | ||||
| 1604 | /// Return true if the value is a NaN. | |||
| 1605 | bool isNaN() const { return Value->isNaN(); } | |||
| 1606 | ||||
| 1607 | /// Return true if the value is an infinity | |||
| 1608 | bool isInfinity() const { return Value->isInfinity(); } | |||
| 1609 | ||||
| 1610 | /// Return true if the value is negative. | |||
| 1611 | bool isNegative() const { return Value->isNegative(); } | |||
| 1612 | ||||
| 1613 | /// We don't rely on operator== working on double values, as | |||
| 1614 | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. | |||
| 1615 | /// As such, this method can be used to do an exact bit-for-bit comparison of | |||
| 1616 | /// two floating point values. | |||
| 1617 | ||||
| 1618 | /// We leave the version with the double argument here because it's just so | |||
| 1619 | /// convenient to write "2.0" and the like. Without this function we'd | |||
| 1620 | /// have to duplicate its logic everywhere it's called. | |||
| 1621 | bool isExactlyValue(double V) const { | |||
| 1622 | return Value->getValueAPF().isExactlyValue(V); | |||
| 1623 | } | |||
| 1624 | bool isExactlyValue(const APFloat& V) const; | |||
| 1625 | ||||
| 1626 | static bool isValueValidForType(EVT VT, const APFloat& Val); | |||
| 1627 | ||||
| 1628 | static bool classof(const SDNode *N) { | |||
| 1629 | return N->getOpcode() == ISD::ConstantFP || | |||
| 1630 | N->getOpcode() == ISD::TargetConstantFP; | |||
| 1631 | } | |||
| 1632 | }; | |||
| 1633 | ||||
| 1634 | /// Returns true if \p V is a constant integer zero. | |||
| 1635 | bool isNullConstant(SDValue V); | |||
| 1636 | ||||
| 1637 | /// Returns true if \p V is an FP constant with a value of positive zero. | |||
| 1638 | bool isNullFPConstant(SDValue V); | |||
| 1639 | ||||
| 1640 | /// Returns true if \p V is an integer constant with all bits set. | |||
| 1641 | bool isAllOnesConstant(SDValue V); | |||
| 1642 | ||||
| 1643 | /// Returns true if \p V is a constant integer one. | |||
| 1644 | bool isOneConstant(SDValue V); | |||
| 1645 | ||||
| 1646 | /// Return the non-bitcasted source operand of \p V if it exists. | |||
| 1647 | /// If \p V is not a bitcasted value, it is returned as-is. | |||
| 1648 | SDValue peekThroughBitcasts(SDValue V); | |||
| 1649 | ||||
| 1650 | /// Return the non-bitcasted and one-use source operand of \p V if it exists. | |||
| 1651 | /// If \p V is not a bitcasted one-use value, it is returned as-is. | |||
| 1652 | SDValue peekThroughOneUseBitcasts(SDValue V); | |||
| 1653 | ||||
| 1654 | /// Return the non-extracted vector source operand of \p V if it exists. | |||
| 1655 | /// If \p V is not an extracted subvector, it is returned as-is. | |||
| 1656 | SDValue peekThroughExtractSubvectors(SDValue V); | |||
| 1657 | ||||
| 1658 | /// Returns true if \p V is a bitwise not operation. Assumes that an all ones | |||
| 1659 | /// constant is canonicalized to be operand 1. | |||
| 1660 | bool isBitwiseNot(SDValue V, bool AllowUndefs = false); | |||
| 1661 | ||||
| 1662 | /// Returns the SDNode if it is a constant splat BuildVector or constant int. | |||
| 1663 | ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false, | |||
| 1664 | bool AllowTruncation = false); | |||
| 1665 | ||||
| 1666 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
| 1667 | /// constant int. | |||
| 1668 | ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts, | |||
| 1669 | bool AllowUndefs = false, | |||
| 1670 | bool AllowTruncation = false); | |||
| 1671 | ||||
| 1672 | /// Returns the SDNode if it is a constant splat BuildVector or constant float. | |||
| 1673 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); | |||
| 1674 | ||||
| 1675 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
| 1676 | /// constant float. | |||
| 1677 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts, | |||
| 1678 | bool AllowUndefs = false); | |||
| 1679 | ||||
| 1680 | /// Return true if the value is a constant 0 integer or a splatted vector of | |||
| 1681 | /// a constant 0 integer (with no undefs by default). | |||
| 1682 | /// Build vector implicit truncation is not an issue for null values. | |||
| 1683 | bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); | |||
| 1684 | ||||
| 1685 | /// Return true if the value is a constant 1 integer or a splatted vector of a | |||
| 1686 | /// constant 1 integer (with no undefs). | |||
| 1687 | /// Does not permit build vector implicit truncation. | |||
| 1688 | bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false); | |||
| 1689 | ||||
| 1690 | /// Return true if the value is a constant -1 integer or a splatted vector of a | |||
| 1691 | /// constant -1 integer (with no undefs). | |||
| 1692 | /// Does not permit build vector implicit truncation. | |||
| 1693 | bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false); | |||
| 1694 | ||||
| 1695 | /// Return true if \p V is either a integer or FP constant. | |||
| 1696 | inline bool isIntOrFPConstant(SDValue V) { | |||
| 1697 | return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V); | |||
| 1698 | } | |||
| 1699 | ||||
| 1700 | class GlobalAddressSDNode : public SDNode { | |||
| 1701 | friend class SelectionDAG; | |||
| 1702 | ||||
| 1703 | const GlobalValue *TheGlobal; | |||
| 1704 | int64_t Offset; | |||
| 1705 | unsigned TargetFlags; | |||
| 1706 | ||||
| 1707 | GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, | |||
| 1708 | const GlobalValue *GA, EVT VT, int64_t o, | |||
| 1709 | unsigned TF); | |||
| 1710 | ||||
| 1711 | public: | |||
| 1712 | const GlobalValue *getGlobal() const { return TheGlobal; } | |||
| 1713 | int64_t getOffset() const { return Offset; } | |||
| 1714 | unsigned getTargetFlags() const { return TargetFlags; } | |||
| 1715 | // Return the address space this GlobalAddress belongs to. | |||
| 1716 | unsigned getAddressSpace() const; | |||
| 1717 | ||||
| 1718 | static bool classof(const SDNode *N) { | |||
| 1719 | return N->getOpcode() == ISD::GlobalAddress || | |||
| 1720 | N->getOpcode() == ISD::TargetGlobalAddress || | |||
| 1721 | N->getOpcode() == ISD::GlobalTLSAddress || | |||
| 1722 | N->getOpcode() == ISD::TargetGlobalTLSAddress; | |||
| 1723 | } | |||
| 1724 | }; | |||
| 1725 | ||||
| 1726 | class FrameIndexSDNode : public SDNode { | |||
| 1727 | friend class SelectionDAG; | |||
| 1728 | ||||
| 1729 | int FI; | |||
| 1730 | ||||
| 1731 | FrameIndexSDNode(int fi, EVT VT, bool isTarg) | |||
| 1732 | : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, | |||
| 1733 | 0, DebugLoc(), getSDVTList(VT)), FI(fi) { | |||
| 1734 | } | |||
| 1735 | ||||
| 1736 | public: | |||
| 1737 | int getIndex() const { return FI; } | |||
| 1738 | ||||
| 1739 | static bool classof(const SDNode *N) { | |||
| 1740 | return N->getOpcode() == ISD::FrameIndex || | |||
| 1741 | N->getOpcode() == ISD::TargetFrameIndex; | |||
| 1742 | } | |||
| 1743 | }; | |||
| 1744 | ||||
| 1745 | /// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate | |||
| 1746 | /// the offet and size that are started/ended in the underlying FrameIndex. | |||
| 1747 | class LifetimeSDNode : public SDNode { | |||
| 1748 | friend class SelectionDAG; | |||
| 1749 | int64_t Size; | |||
| 1750 | int64_t Offset; // -1 if offset is unknown. | |||
| 1751 | ||||
| 1752 | LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, | |||
| 1753 | SDVTList VTs, int64_t Size, int64_t Offset) | |||
| 1754 | : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {} | |||
| 1755 | public: | |||
| 1756 | int64_t getFrameIndex() const { | |||
| 1757 | return cast<FrameIndexSDNode>(getOperand(1))->getIndex(); | |||
| 1758 | } | |||
| 1759 | ||||
| 1760 | bool hasOffset() const { return Offset >= 0; } | |||
| 1761 | int64_t getOffset() const { | |||
| 1762 | assert(hasOffset() && "offset is unknown")((void)0); | |||
| 1763 | return Offset; | |||
| 1764 | } | |||
| 1765 | int64_t getSize() const { | |||
| 1766 | assert(hasOffset() && "offset is unknown")((void)0); | |||
| 1767 | return Size; | |||
| 1768 | } | |||
| 1769 | ||||
| 1770 | // Methods to support isa and dyn_cast | |||
| 1771 | static bool classof(const SDNode *N) { | |||
| 1772 | return N->getOpcode() == ISD::LIFETIME_START || | |||
| 1773 | N->getOpcode() == ISD::LIFETIME_END; | |||
| 1774 | } | |||
| 1775 | }; | |||
| 1776 | ||||
| 1777 | /// This SDNode is used for PSEUDO_PROBE values, which are the function guid and | |||
| 1778 | /// the index of the basic block being probed. A pseudo probe serves as a place | |||
| 1779 | /// holder and will be removed at the end of compilation. It does not have any | |||
| 1780 | /// operand because we do not want the instruction selection to deal with any. | |||
| 1781 | class PseudoProbeSDNode : public SDNode { | |||
| 1782 | friend class SelectionDAG; | |||
| 1783 | uint64_t Guid; | |||
| 1784 | uint64_t Index; | |||
| 1785 | uint32_t Attributes; | |||
| 1786 | ||||
| 1787 | PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl, | |||
| 1788 | SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr) | |||
| 1789 | : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index), | |||
| 1790 | Attributes(Attr) {} | |||
| 1791 | ||||
| 1792 | public: | |||
| 1793 | uint64_t getGuid() const { return Guid; } | |||
| 1794 | uint64_t getIndex() const { return Index; } | |||
| 1795 | uint32_t getAttributes() const { return Attributes; } | |||
| 1796 | ||||
| 1797 | // Methods to support isa and dyn_cast | |||
| 1798 | static bool classof(const SDNode *N) { | |||
| 1799 | return N->getOpcode() == ISD::PSEUDO_PROBE; | |||
| 1800 | } | |||
| 1801 | }; | |||
| 1802 | ||||
| 1803 | class JumpTableSDNode : public SDNode { | |||
| 1804 | friend class SelectionDAG; | |||
| 1805 | ||||
| 1806 | int JTI; | |||
| 1807 | unsigned TargetFlags; | |||
| 1808 | ||||
| 1809 | JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) | |||
| 1810 | : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, | |||
| 1811 | 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { | |||
| 1812 | } | |||
| 1813 | ||||
| 1814 | public: | |||
| 1815 | int getIndex() const { return JTI; } | |||
| 1816 | unsigned getTargetFlags() const { return TargetFlags; } | |||
| 1817 | ||||
| 1818 | static bool classof(const SDNode *N) { | |||
| 1819 | return N->getOpcode() == ISD::JumpTable || | |||
| 1820 | N->getOpcode() == ISD::TargetJumpTable; | |||
| 1821 | } | |||
| 1822 | }; | |||
| 1823 | ||||
| 1824 | class ConstantPoolSDNode : public SDNode { | |||
| 1825 | friend class SelectionDAG; | |||
| 1826 | ||||
| 1827 | union { | |||
| 1828 | const Constant *ConstVal; | |||
| 1829 | MachineConstantPoolValue *MachineCPVal; | |||
| 1830 | } Val; | |||
| 1831 | int Offset; // It's a MachineConstantPoolValue if top bit is set. | |||
| 1832 | Align Alignment; // Minimum alignment requirement of CP. | |||
| 1833 | unsigned TargetFlags; | |||
| 1834 | ||||
| 1835 | ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, | |||
| 1836 | Align Alignment, unsigned TF) | |||
| 1837 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
| 1838 | DebugLoc(), getSDVTList(VT)), | |||
| 1839 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
| 1840 | assert(Offset >= 0 && "Offset is too large")((void)0); | |||
| 1841 | Val.ConstVal = c; | |||
| 1842 | } | |||
| 1843 | ||||
| 1844 | ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o, | |||
| 1845 | Align Alignment, unsigned TF) | |||
| 1846 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
| 1847 | DebugLoc(), getSDVTList(VT)), | |||
| 1848 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
| 1849 | assert(Offset >= 0 && "Offset is too large")((void)0); | |||
| 1850 | Val.MachineCPVal = v; | |||
| 1851 | Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1); | |||
| 1852 | } | |||
| 1853 | ||||
| 1854 | public: | |||
| 1855 | bool isMachineConstantPoolEntry() const { | |||
| 1856 | return Offset < 0; | |||
| 1857 | } | |||
| 1858 | ||||
| 1859 | const Constant *getConstVal() const { | |||
| 1860 | assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((void)0); | |||
| 1861 | return Val.ConstVal; | |||
| 1862 | } | |||
| 1863 | ||||
| 1864 | MachineConstantPoolValue *getMachineCPVal() const { | |||
| 1865 | assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((void)0); | |||
| 1866 | return Val.MachineCPVal; | |||
| 1867 | } | |||
| 1868 | ||||
| 1869 | int getOffset() const { | |||
| 1870 | return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1)); | |||
| 1871 | } | |||
| 1872 | ||||
| 1873 | // Return the alignment of this constant pool object, which is either 0 (for | |||
| 1874 | // default alignment) or the desired value. | |||
| 1875 | Align getAlign() const { return Alignment; } | |||
| 1876 | unsigned getTargetFlags() const { return TargetFlags; } | |||
| 1877 | ||||
| 1878 | Type *getType() const; | |||
| 1879 | ||||
| 1880 | static bool classof(const SDNode *N) { | |||
| 1881 | return N->getOpcode() == ISD::ConstantPool || | |||
| 1882 | N->getOpcode() == ISD::TargetConstantPool; | |||
| 1883 | } | |||
| 1884 | }; | |||
| 1885 | ||||
| 1886 | /// Completely target-dependent object reference. | |||
| 1887 | class TargetIndexSDNode : public SDNode { | |||
| 1888 | friend class SelectionDAG; | |||
| 1889 | ||||
| 1890 | unsigned TargetFlags; | |||
| 1891 | int Index; | |||
| 1892 | int64_t Offset; | |||
| 1893 | ||||
| 1894 | public: | |||
| 1895 | TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) | |||
| 1896 | : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), | |||
| 1897 | TargetFlags(TF), Index(Idx), Offset(Ofs) {} | |||
| 1898 | ||||
| 1899 | unsigned getTargetFlags() const { return TargetFlags; } | |||
| 1900 | int getIndex() const { return Index; } | |||
| 1901 | int64_t getOffset() const { return Offset; } | |||
| 1902 | ||||
| 1903 | static bool classof(const SDNode *N) { | |||
| 1904 | return N->getOpcode() == ISD::TargetIndex; | |||
| 1905 | } | |||
| 1906 | }; | |||
| 1907 | ||||
| 1908 | class BasicBlockSDNode : public SDNode { | |||
| 1909 | friend class SelectionDAG; | |||
| 1910 | ||||
| 1911 | MachineBasicBlock *MBB; | |||
| 1912 | ||||
| 1913 | /// Debug info is meaningful and potentially useful here, but we create | |||
| 1914 | /// blocks out of order when they're jumped to, which makes it a bit | |||
| 1915 | /// harder. Let's see if we need it first. | |||
| 1916 | explicit BasicBlockSDNode(MachineBasicBlock *mbb) | |||
| 1917 | : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) | |||
| 1918 | {} | |||
| 1919 | ||||
| 1920 | public: | |||
| 1921 | MachineBasicBlock *getBasicBlock() const { return MBB; } | |||
| 1922 | ||||
| 1923 | static bool classof(const SDNode *N) { | |||
| 1924 | return N->getOpcode() == ISD::BasicBlock; | |||
| 1925 | } | |||
| 1926 | }; | |||
| 1927 | ||||
| 1928 | /// A "pseudo-class" with methods for operating on BUILD_VECTORs. | |||
| 1929 | class BuildVectorSDNode : public SDNode { | |||
| 1930 | public: | |||
| 1931 | // These are constructed as SDNodes and then cast to BuildVectorSDNodes. | |||
| 1932 | explicit BuildVectorSDNode() = delete; | |||
| 1933 | ||||
| 1934 | /// Check if this is a constant splat, and if so, find the | |||
| 1935 | /// smallest element size that splats the vector. If MinSplatBits is | |||
| 1936 | /// nonzero, the element size must be at least that large. Note that the | |||
| 1937 | /// splat element may be the entire vector (i.e., a one element vector). | |||
| 1938 | /// Returns the splat element value in SplatValue. Any undefined bits in | |||
| 1939 | /// that value are zero, and the corresponding bits in the SplatUndef mask | |||
| 1940 | /// are set. The SplatBitSize value is set to the splat element size in | |||
| 1941 | /// bits. HasAnyUndefs is set to true if any bits in the vector are | |||
| 1942 | /// undefined. isBigEndian describes the endianness of the target. | |||
| 1943 | bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, | |||
| 1944 | unsigned &SplatBitSize, bool &HasAnyUndefs, | |||
| 1945 | unsigned MinSplatBits = 0, | |||
| 1946 | bool isBigEndian = false) const; | |||
| 1947 | ||||
| 1948 | /// Returns the demanded splatted value or a null value if this is not a | |||
| 1949 | /// splat. | |||
| 1950 | /// | |||
| 1951 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
| 1952 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 1953 | /// the vector width and set the bits where elements are undef. | |||
| 1954 | SDValue getSplatValue(const APInt &DemandedElts, | |||
| 1955 | BitVector *UndefElements = nullptr) const; | |||
| 1956 | ||||
| 1957 | /// Returns the splatted value or a null value if this is not a splat. | |||
| 1958 | /// | |||
| 1959 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 1960 | /// the vector width and set the bits where elements are undef. | |||
| 1961 | SDValue getSplatValue(BitVector *UndefElements = nullptr) const; | |||
| 1962 | ||||
| 1963 | /// Find the shortest repeating sequence of values in the build vector. | |||
| 1964 | /// | |||
| 1965 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
| 1966 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
| 1967 | /// | |||
| 1968 | /// Currently this must be a power-of-2 build vector. | |||
| 1969 | /// The DemandedElts mask indicates the elements that must be present, | |||
| 1970 | /// undemanded elements in Sequence may be null (SDValue()). If passed a | |||
| 1971 | /// non-null UndefElements bitvector, it will resize it to match the original | |||
| 1972 | /// vector width and set the bits where elements are undef. If result is | |||
| 1973 | /// false, Sequence will be empty. | |||
| 1974 | bool getRepeatedSequence(const APInt &DemandedElts, | |||
| 1975 | SmallVectorImpl<SDValue> &Sequence, | |||
| 1976 | BitVector *UndefElements = nullptr) const; | |||
| 1977 | ||||
| 1978 | /// Find the shortest repeating sequence of values in the build vector. | |||
| 1979 | /// | |||
| 1980 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
| 1981 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
| 1982 | /// | |||
| 1983 | /// Currently this must be a power-of-2 build vector. | |||
| 1984 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 1985 | /// the original vector width and set the bits where elements are undef. | |||
| 1986 | /// If result is false, Sequence will be empty. | |||
| 1987 | bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, | |||
| 1988 | BitVector *UndefElements = nullptr) const; | |||
| 1989 | ||||
| 1990 | /// Returns the demanded splatted constant or null if this is not a constant | |||
| 1991 | /// splat. | |||
| 1992 | /// | |||
| 1993 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
| 1994 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 1995 | /// the vector width and set the bits where elements are undef. | |||
| 1996 | ConstantSDNode * | |||
| 1997 | getConstantSplatNode(const APInt &DemandedElts, | |||
| 1998 | BitVector *UndefElements = nullptr) const; | |||
| 1999 | ||||
| 2000 | /// Returns the splatted constant or null if this is not a constant | |||
| 2001 | /// splat. | |||
| 2002 | /// | |||
| 2003 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 2004 | /// the vector width and set the bits where elements are undef. | |||
| 2005 | ConstantSDNode * | |||
| 2006 | getConstantSplatNode(BitVector *UndefElements = nullptr) const; | |||
| 2007 | ||||
| 2008 | /// Returns the demanded splatted constant FP or null if this is not a | |||
| 2009 | /// constant FP splat. | |||
| 2010 | /// | |||
| 2011 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
| 2012 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 2013 | /// the vector width and set the bits where elements are undef. | |||
| 2014 | ConstantFPSDNode * | |||
| 2015 | getConstantFPSplatNode(const APInt &DemandedElts, | |||
| 2016 | BitVector *UndefElements = nullptr) const; | |||
| 2017 | ||||
| 2018 | /// Returns the splatted constant FP or null if this is not a constant | |||
| 2019 | /// FP splat. | |||
| 2020 | /// | |||
| 2021 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
| 2022 | /// the vector width and set the bits where elements are undef. | |||
| 2023 | ConstantFPSDNode * | |||
| 2024 | getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; | |||
| 2025 | ||||
| 2026 | /// If this is a constant FP splat and the splatted constant FP is an | |||
| 2027 | /// exact power or 2, return the log base 2 integer value. Otherwise, | |||
| 2028 | /// return -1. | |||
| 2029 | /// | |||
| 2030 | /// The BitWidth specifies the necessary bit precision. | |||
| 2031 | int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, | |||
| 2032 | uint32_t BitWidth) const; | |||
| 2033 | ||||
| 2034 | bool isConstant() const; | |||
| 2035 | ||||
| 2036 | static bool classof(const SDNode *N) { | |||
| 2037 | return N->getOpcode() == ISD::BUILD_VECTOR; | |||
| 2038 | } | |||
| 2039 | }; | |||
| 2040 | ||||
| 2041 | /// An SDNode that holds an arbitrary LLVM IR Value. This is | |||
| 2042 | /// used when the SelectionDAG needs to make a simple reference to something | |||
| 2043 | /// in the LLVM IR representation. | |||
| 2044 | /// | |||
| 2045 | class SrcValueSDNode : public SDNode { | |||
| 2046 | friend class SelectionDAG; | |||
| 2047 | ||||
| 2048 | const Value *V; | |||
| 2049 | ||||
| 2050 | /// Create a SrcValue for a general value. | |||
| 2051 | explicit SrcValueSDNode(const Value *v) | |||
| 2052 | : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {} | |||
| 2053 | ||||
| 2054 | public: | |||
| 2055 | /// Return the contained Value. | |||
| 2056 | const Value *getValue() const { return V; } | |||
| 2057 | ||||
| 2058 | static bool classof(const SDNode *N) { | |||
| 2059 | return N->getOpcode() == ISD::SRCVALUE; | |||
| 2060 | } | |||
| 2061 | }; | |||
| 2062 | ||||
| 2063 | class MDNodeSDNode : public SDNode { | |||
| 2064 | friend class SelectionDAG; | |||
| 2065 | ||||
| 2066 | const MDNode *MD; | |||
| 2067 | ||||
| 2068 | explicit MDNodeSDNode(const MDNode *md) | |||
| 2069 | : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) | |||
| 2070 | {} | |||
| 2071 | ||||
| 2072 | public: | |||
| 2073 | const MDNode *getMD() const { return MD; } | |||
| 2074 | ||||
| 2075 | static bool classof(const SDNode *N) { | |||
| 2076 | return N->getOpcode() == ISD::MDNODE_SDNODE; | |||
| 2077 | } | |||
| 2078 | }; | |||
| 2079 | ||||
| 2080 | class RegisterSDNode : public SDNode { | |||
| 2081 | friend class SelectionDAG; | |||
| 2082 | ||||
| 2083 | Register Reg; | |||
| 2084 | ||||
| 2085 | RegisterSDNode(Register reg, EVT VT) | |||
| 2086 | : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {} | |||
| 2087 | ||||
| 2088 | public: | |||
| 2089 | Register getReg() const { return Reg; } | |||
| 2090 | ||||
| 2091 | static bool classof(const SDNode *N) { | |||
| 2092 | return N->getOpcode() == ISD::Register; | |||
| 2093 | } | |||
| 2094 | }; | |||
| 2095 | ||||
| 2096 | class RegisterMaskSDNode : public SDNode { | |||
| 2097 | friend class SelectionDAG; | |||
| 2098 | ||||
| 2099 | // The memory for RegMask is not owned by the node. | |||
| 2100 | const uint32_t *RegMask; | |||
| 2101 | ||||
| 2102 | RegisterMaskSDNode(const uint32_t *mask) | |||
| 2103 | : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)), | |||
| 2104 | RegMask(mask) {} | |||
| 2105 | ||||
| 2106 | public: | |||
| 2107 | const uint32_t *getRegMask() const { return RegMask; } | |||
| 2108 | ||||
| 2109 | static bool classof(const SDNode *N) { | |||
| 2110 | return N->getOpcode() == ISD::RegisterMask; | |||
| 2111 | } | |||
| 2112 | }; | |||
| 2113 | ||||
| 2114 | class BlockAddressSDNode : public SDNode { | |||
| 2115 | friend class SelectionDAG; | |||
| 2116 | ||||
| 2117 | const BlockAddress *BA; | |||
| 2118 | int64_t Offset; | |||
| 2119 | unsigned TargetFlags; | |||
| 2120 | ||||
| 2121 | BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, | |||
| 2122 | int64_t o, unsigned Flags) | |||
| 2123 | : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), | |||
| 2124 | BA(ba), Offset(o), TargetFlags(Flags) {} | |||
| 2125 | ||||
| 2126 | public: | |||
| 2127 | const BlockAddress *getBlockAddress() const { return BA; } | |||
| 2128 | int64_t getOffset() const { return Offset; } | |||
| 2129 | unsigned getTargetFlags() const { return TargetFlags; } | |||
| 2130 | ||||
| 2131 | static bool classof(const SDNode *N) { | |||
| 2132 | return N->getOpcode() == ISD::BlockAddress || | |||
| 2133 | N->getOpcode() == ISD::TargetBlockAddress; | |||
| 2134 | } | |||
| 2135 | }; | |||
| 2136 | ||||
| 2137 | class LabelSDNode : public SDNode { | |||
| 2138 | friend class SelectionDAG; | |||
| 2139 | ||||
| 2140 | MCSymbol *Label; | |||
| 2141 | ||||
| 2142 | LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L) | |||
| 2143 | : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) { | |||
| 2144 | assert(LabelSDNode::classof(this) && "not a label opcode")((void)0); | |||
| 2145 | } | |||
| 2146 | ||||
| 2147 | public: | |||
| 2148 | MCSymbol *getLabel() const { return Label; } | |||
| 2149 | ||||
| 2150 | static bool classof(const SDNode *N) { | |||
| 2151 | return N->getOpcode() == ISD::EH_LABEL || | |||
| 2152 | N->getOpcode() == ISD::ANNOTATION_LABEL; | |||
| 2153 | } | |||
| 2154 | }; | |||
| 2155 | ||||
| 2156 | class ExternalSymbolSDNode : public SDNode { | |||
| 2157 | friend class SelectionDAG; | |||
| 2158 | ||||
| 2159 | const char *Symbol; | |||
| 2160 | unsigned TargetFlags; | |||
| 2161 | ||||
| 2162 | ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) | |||
| 2163 | : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, | |||
| 2164 | DebugLoc(), getSDVTList(VT)), | |||
| 2165 | Symbol(Sym), TargetFlags(TF) {} | |||
| 2166 | ||||
| 2167 | public: | |||
| 2168 | const char *getSymbol() const { return Symbol; } | |||
| 2169 | unsigned getTargetFlags() const { return TargetFlags; } | |||
| 2170 | ||||
| 2171 | static bool classof(const SDNode *N) { | |||
| 2172 | return N->getOpcode() == ISD::ExternalSymbol || | |||
| 2173 | N->getOpcode() == ISD::TargetExternalSymbol; | |||
| 2174 | } | |||
| 2175 | }; | |||
| 2176 | ||||
| 2177 | class MCSymbolSDNode : public SDNode { | |||
| 2178 | friend class SelectionDAG; | |||
| 2179 | ||||
| 2180 | MCSymbol *Symbol; | |||
| 2181 | ||||
| 2182 | MCSymbolSDNode(MCSymbol *Symbol, EVT VT) | |||
| 2183 | : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {} | |||
| 2184 | ||||
| 2185 | public: | |||
| 2186 | MCSymbol *getMCSymbol() const { return Symbol; } | |||
| 2187 | ||||
| 2188 | static bool classof(const SDNode *N) { | |||
| 2189 | return N->getOpcode() == ISD::MCSymbol; | |||
| 2190 | } | |||
| 2191 | }; | |||
| 2192 | ||||
| 2193 | class CondCodeSDNode : public SDNode { | |||
| 2194 | friend class SelectionDAG; | |||
| 2195 | ||||
| 2196 | ISD::CondCode Condition; | |||
| 2197 | ||||
| 2198 | explicit CondCodeSDNode(ISD::CondCode Cond) | |||
| 2199 | : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
| 2200 | Condition(Cond) {} | |||
| 2201 | ||||
| 2202 | public: | |||
| 2203 | ISD::CondCode get() const { return Condition; } | |||
| 2204 | ||||
| 2205 | static bool classof(const SDNode *N) { | |||
| 2206 | return N->getOpcode() == ISD::CONDCODE; | |||
| 2207 | } | |||
| 2208 | }; | |||
| 2209 | ||||
| 2210 | /// This class is used to represent EVT's, which are used | |||
| 2211 | /// to parameterize some operations. | |||
| 2212 | class VTSDNode : public SDNode { | |||
| 2213 | friend class SelectionDAG; | |||
| 2214 | ||||
| 2215 | EVT ValueType; | |||
| 2216 | ||||
| 2217 | explicit VTSDNode(EVT VT) | |||
| 2218 | : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
| 2219 | ValueType(VT) {} | |||
| 2220 | ||||
| 2221 | public: | |||
| 2222 | EVT getVT() const { return ValueType; } | |||
| 2223 | ||||
| 2224 | static bool classof(const SDNode *N) { | |||
| 2225 | return N->getOpcode() == ISD::VALUETYPE; | |||
| 2226 | } | |||
| 2227 | }; | |||
| 2228 | ||||
| 2229 | /// Base class for LoadSDNode and StoreSDNode | |||
| 2230 | class LSBaseSDNode : public MemSDNode { | |||
| 2231 | public: | |||
| 2232 | LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, | |||
| 2233 | SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, | |||
| 2234 | MachineMemOperand *MMO) | |||
| 2235 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
| 2236 | LSBaseSDNodeBits.AddressingMode = AM; | |||
| 2237 | assert(getAddressingMode() == AM && "Value truncated")((void)0); | |||
| 2238 | } | |||
| 2239 | ||||
| 2240 | const SDValue &getOffset() const { | |||
| 2241 | return getOperand(getOpcode() == ISD::LOAD ? 2 : 3); | |||
| 2242 | } | |||
| 2243 | ||||
| 2244 | /// Return the addressing mode for this load or store: | |||
| 2245 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
| 2246 | ISD::MemIndexedMode getAddressingMode() const { | |||
| 2247 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
| 2248 | } | |||
| 2249 | ||||
| 2250 | /// Return true if this is a pre/post inc/dec load/store. | |||
| 2251 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
| 2252 | ||||
| 2253 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
| 2254 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
| 2255 | ||||
| 2256 | static bool classof(const SDNode *N) { | |||
| 2257 | return N->getOpcode() == ISD::LOAD || | |||
| 2258 | N->getOpcode() == ISD::STORE; | |||
| 2259 | } | |||
| 2260 | }; | |||
| 2261 | ||||
| 2262 | /// This class is used to represent ISD::LOAD nodes. | |||
| 2263 | class LoadSDNode : public LSBaseSDNode { | |||
| 2264 | friend class SelectionDAG; | |||
| 2265 | ||||
| 2266 | LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 2267 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT, | |||
| 2268 | MachineMemOperand *MMO) | |||
| 2269 | : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
| 2270 | LoadSDNodeBits.ExtTy = ETy; | |||
| 2271 | assert(readMem() && "Load MachineMemOperand is not a load!")((void)0); | |||
| 2272 | assert(!writeMem() && "Load MachineMemOperand is a store!")((void)0); | |||
| 2273 | } | |||
| 2274 | ||||
| 2275 | public: | |||
| 2276 | /// Return whether this is a plain node, | |||
| 2277 | /// or one of the varieties of value-extending loads. | |||
| 2278 | ISD::LoadExtType getExtensionType() const { | |||
| 2279 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
| 2280 | } | |||
| 2281 | ||||
| 2282 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
| 2283 | const SDValue &getOffset() const { return getOperand(2); } | |||
| 2284 | ||||
| 2285 | static bool classof(const SDNode *N) { | |||
| 2286 | return N->getOpcode() == ISD::LOAD; | |||
| 2287 | } | |||
| 2288 | }; | |||
| 2289 | ||||
| 2290 | /// This class is used to represent ISD::STORE nodes. | |||
| 2291 | class StoreSDNode : public LSBaseSDNode { | |||
| 2292 | friend class SelectionDAG; | |||
| 2293 | ||||
| 2294 | StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 2295 | ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT, | |||
| 2296 | MachineMemOperand *MMO) | |||
| 2297 | : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
| 2298 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
| 2299 | assert(!readMem() && "Store MachineMemOperand is a load!")((void)0); | |||
| 2300 | assert(writeMem() && "Store MachineMemOperand is not a store!")((void)0); | |||
| 2301 | } | |||
| 2302 | ||||
| 2303 | public: | |||
| 2304 | /// Return true if the op does a truncation before store. | |||
| 2305 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
| 2306 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
| 2307 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
| 2308 | void setTruncatingStore(bool Truncating) { | |||
| 2309 | StoreSDNodeBits.IsTruncating = Truncating; | |||
| 2310 | } | |||
| 2311 | ||||
| 2312 | const SDValue &getValue() const { return getOperand(1); } | |||
| 2313 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
| 2314 | const SDValue &getOffset() const { return getOperand(3); } | |||
| 2315 | ||||
| 2316 | static bool classof(const SDNode *N) { | |||
| 2317 | return N->getOpcode() == ISD::STORE; | |||
| 2318 | } | |||
| 2319 | }; | |||
| 2320 | ||||
| 2321 | /// This base class is used to represent MLOAD and MSTORE nodes | |||
| 2322 | class MaskedLoadStoreSDNode : public MemSDNode { | |||
| 2323 | public: | |||
| 2324 | friend class SelectionDAG; | |||
| 2325 | ||||
| 2326 | MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
| 2327 | const DebugLoc &dl, SDVTList VTs, | |||
| 2328 | ISD::MemIndexedMode AM, EVT MemVT, | |||
| 2329 | MachineMemOperand *MMO) | |||
| 2330 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
| 2331 | LSBaseSDNodeBits.AddressingMode = AM; | |||
| 2332 | assert(getAddressingMode() == AM && "Value truncated")((void)0); | |||
| 2333 | } | |||
| 2334 | ||||
| 2335 | // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) | |||
| 2336 | // MaskedStoreSDNode (Chain, data, ptr, offset, mask) | |||
| 2337 | // Mask is a vector of i1 elements | |||
| 2338 | const SDValue &getOffset() const { | |||
| 2339 | return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); | |||
| 2340 | } | |||
| 2341 | const SDValue &getMask() const { | |||
| 2342 | return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); | |||
| 2343 | } | |||
| 2344 | ||||
| 2345 | /// Return the addressing mode for this load or store: | |||
| 2346 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
| 2347 | ISD::MemIndexedMode getAddressingMode() const { | |||
| 2348 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
| 2349 | } | |||
| 2350 | ||||
| 2351 | /// Return true if this is a pre/post inc/dec load/store. | |||
| 2352 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
| 2353 | ||||
| 2354 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
| 2355 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
| 2356 | ||||
| 2357 | static bool classof(const SDNode *N) { | |||
| 2358 | return N->getOpcode() == ISD::MLOAD || | |||
| 2359 | N->getOpcode() == ISD::MSTORE; | |||
| 2360 | } | |||
| 2361 | }; | |||
| 2362 | ||||
| 2363 | /// This class is used to represent an MLOAD node | |||
| 2364 | class MaskedLoadSDNode : public MaskedLoadStoreSDNode { | |||
| 2365 | public: | |||
| 2366 | friend class SelectionDAG; | |||
| 2367 | ||||
| 2368 | MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 2369 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, | |||
| 2370 | bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) | |||
| 2371 | : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
| 2372 | LoadSDNodeBits.ExtTy = ETy; | |||
| 2373 | LoadSDNodeBits.IsExpanding = IsExpanding; | |||
| 2374 | } | |||
| 2375 | ||||
| 2376 | ISD::LoadExtType getExtensionType() const { | |||
| 2377 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
| 2378 | } | |||
| 2379 | ||||
| 2380 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
| 2381 | const SDValue &getOffset() const { return getOperand(2); } | |||
| 2382 | const SDValue &getMask() const { return getOperand(3); } | |||
| 2383 | const SDValue &getPassThru() const { return getOperand(4); } | |||
| 2384 | ||||
| 2385 | static bool classof(const SDNode *N) { | |||
| 2386 | return N->getOpcode() == ISD::MLOAD; | |||
| 2387 | } | |||
| 2388 | ||||
| 2389 | bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } | |||
| 2390 | }; | |||
| 2391 | ||||
| 2392 | /// This class is used to represent an MSTORE node | |||
| 2393 | class MaskedStoreSDNode : public MaskedLoadStoreSDNode { | |||
| 2394 | public: | |||
| 2395 | friend class SelectionDAG; | |||
| 2396 | ||||
| 2397 | MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 2398 | ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, | |||
| 2399 | EVT MemVT, MachineMemOperand *MMO) | |||
| 2400 | : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
| 2401 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
| 2402 | StoreSDNodeBits.IsCompressing = isCompressing; | |||
| 2403 | } | |||
| 2404 | ||||
| 2405 | /// Return true if the op does a truncation before store. | |||
| 2406 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
| 2407 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
| 2408 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
| 2409 | ||||
| 2410 | /// Returns true if the op does a compression to the vector before storing. | |||
| 2411 | /// The node contiguously stores the active elements (integers or floats) | |||
| 2412 | /// in src (those with their respective bit set in writemask k) to unaligned | |||
| 2413 | /// memory at base_addr. | |||
| 2414 | bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } | |||
| 2415 | ||||
| 2416 | const SDValue &getValue() const { return getOperand(1); } | |||
| 2417 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
| 2418 | const SDValue &getOffset() const { return getOperand(3); } | |||
| 2419 | const SDValue &getMask() const { return getOperand(4); } | |||
| 2420 | ||||
| 2421 | static bool classof(const SDNode *N) { | |||
| 2422 | return N->getOpcode() == ISD::MSTORE; | |||
| 2423 | } | |||
| 2424 | }; | |||
| 2425 | ||||
| 2426 | /// This is a base class used to represent | |||
| 2427 | /// MGATHER and MSCATTER nodes | |||
| 2428 | /// | |||
| 2429 | class MaskedGatherScatterSDNode : public MemSDNode { | |||
| 2430 | public: | |||
| 2431 | friend class SelectionDAG; | |||
| 2432 | ||||
| 2433 | MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
| 2434 | const DebugLoc &dl, SDVTList VTs, EVT MemVT, | |||
| 2435 | MachineMemOperand *MMO, ISD::MemIndexType IndexType) | |||
| 2436 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
| 2437 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
| 2438 | assert(getIndexType() == IndexType && "Value truncated")((void)0); | |||
| 2439 | } | |||
| 2440 | ||||
| 2441 | /// How is Index applied to BasePtr when computing addresses. | |||
| 2442 | ISD::MemIndexType getIndexType() const { | |||
| 2443 | return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); | |||
| 2444 | } | |||
| 2445 | void setIndexType(ISD::MemIndexType IndexType) { | |||
| 2446 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
| 2447 | } | |||
| 2448 | bool isIndexScaled() const { | |||
| 2449 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
| 2450 | (getIndexType() == ISD::UNSIGNED_SCALED); | |||
| 2451 | } | |||
| 2452 | bool isIndexSigned() const { | |||
| 2453 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
| 2454 | (getIndexType() == ISD::SIGNED_UNSCALED); | |||
| 2455 | } | |||
| 2456 | ||||
| 2457 | // In the both nodes address is Op1, mask is Op2: | |||
| 2458 | // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) | |||
| 2459 | // MaskedScatterSDNode (Chain, value, mask, base, index, scale) | |||
| 2460 | // Mask is a vector of i1 elements | |||
| 2461 | const SDValue &getBasePtr() const { return getOperand(3); } | |||
| 2462 | const SDValue &getIndex() const { return getOperand(4); } | |||
| 2463 | const SDValue &getMask() const { return getOperand(2); } | |||
| 2464 | const SDValue &getScale() const { return getOperand(5); } | |||
| 2465 | ||||
| 2466 | static bool classof(const SDNode *N) { | |||
| 2467 | return N->getOpcode() == ISD::MGATHER || | |||
| 2468 | N->getOpcode() == ISD::MSCATTER; | |||
| 2469 | } | |||
| 2470 | }; | |||
| 2471 | ||||
| 2472 | /// This class is used to represent an MGATHER node | |||
| 2473 | /// | |||
| 2474 | class MaskedGatherSDNode : public MaskedGatherScatterSDNode { | |||
| 2475 | public: | |||
| 2476 | friend class SelectionDAG; | |||
| 2477 | ||||
| 2478 | MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 2479 | EVT MemVT, MachineMemOperand *MMO, | |||
| 2480 | ISD::MemIndexType IndexType, ISD::LoadExtType ETy) | |||
| 2481 | : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, | |||
| 2482 | IndexType) { | |||
| 2483 | LoadSDNodeBits.ExtTy = ETy; | |||
| 2484 | } | |||
| 2485 | ||||
| 2486 | const SDValue &getPassThru() const { return getOperand(1); } | |||
| 2487 | ||||
| 2488 | ISD::LoadExtType getExtensionType() const { | |||
| 2489 | return ISD::LoadExtType(LoadSDNodeBits.ExtTy); | |||
| 2490 | } | |||
| 2491 | ||||
| 2492 | static bool classof(const SDNode *N) { | |||
| 2493 | return N->getOpcode() == ISD::MGATHER; | |||
| 2494 | } | |||
| 2495 | }; | |||
| 2496 | ||||
| 2497 | /// This class is used to represent an MSCATTER node | |||
| 2498 | /// | |||
| 2499 | class MaskedScatterSDNode : public MaskedGatherScatterSDNode { | |||
| 2500 | public: | |||
| 2501 | friend class SelectionDAG; | |||
| 2502 | ||||
| 2503 | MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
| 2504 | EVT MemVT, MachineMemOperand *MMO, | |||
| 2505 | ISD::MemIndexType IndexType, bool IsTrunc) | |||
| 2506 | : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, | |||
| 2507 | IndexType) { | |||
| 2508 | StoreSDNodeBits.IsTruncating = IsTrunc; | |||
| 2509 | } | |||
| 2510 | ||||
| 2511 | /// Return true if the op does a truncation before store. | |||
| 2512 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
| 2513 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
| 2514 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
| 2515 | ||||
| 2516 | const SDValue &getValue() const { return getOperand(1); } | |||
| 2517 | ||||
| 2518 | static bool classof(const SDNode *N) { | |||
| 2519 | return N->getOpcode() == ISD::MSCATTER; | |||
| 2520 | } | |||
| 2521 | }; | |||
| 2522 | ||||
| 2523 | /// An SDNode that represents everything that will be needed | |||
| 2524 | /// to construct a MachineInstr. These nodes are created during the | |||
| 2525 | /// instruction selection proper phase. | |||
| 2526 | /// | |||
| 2527 | /// Note that the only supported way to set the `memoperands` is by calling the | |||
| 2528 | /// `SelectionDAG::setNodeMemRefs` function as the memory management happens | |||
| 2529 | /// inside the DAG rather than in the node. | |||
| 2530 | class MachineSDNode : public SDNode { | |||
| 2531 | private: | |||
| 2532 | friend class SelectionDAG; | |||
| 2533 | ||||
| 2534 | MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs) | |||
| 2535 | : SDNode(Opc, Order, DL, VTs) {} | |||
| 2536 | ||||
| 2537 | // We use a pointer union between a single `MachineMemOperand` pointer and | |||
| 2538 | // a pointer to an array of `MachineMemOperand` pointers. This is null when | |||
| 2539 | // the number of these is zero, the single pointer variant used when the | |||
| 2540 | // number is one, and the array is used for larger numbers. | |||
| 2541 | // | |||
| 2542 | // The array is allocated via the `SelectionDAG`'s allocator and so will | |||
| 2543 | // always live until the DAG is cleaned up and doesn't require ownership here. | |||
| 2544 | // | |||
| 2545 | // We can't use something simpler like `TinyPtrVector` here because `SDNode` | |||
| 2546 | // subclasses aren't managed in a conforming C++ manner. See the comments on | |||
| 2547 | // `SelectionDAG::MorphNodeTo` which details what all goes on, but the | |||
| 2548 | // constraint here is that these don't manage memory with their constructor or | |||
| 2549 | // destructor and can be initialized to a good state even if they start off | |||
| 2550 | // uninitialized. | |||
| 2551 | PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {}; | |||
| 2552 | ||||
| 2553 | // Note that this could be folded into the above `MemRefs` member if doing so | |||
| 2554 | // is advantageous at some point. We don't need to store this in most cases. | |||
| 2555 | // However, at the moment this doesn't appear to make the allocation any | |||
| 2556 | // smaller and makes the code somewhat simpler to read. | |||
| 2557 | int NumMemRefs = 0; | |||
| 2558 | ||||
| 2559 | public: | |||
| 2560 | using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator; | |||
| 2561 | ||||
| 2562 | ArrayRef<MachineMemOperand *> memoperands() const { | |||
| 2563 | // Special case the common cases. | |||
| 2564 | if (NumMemRefs == 0) | |||
| 2565 | return {}; | |||
| 2566 | if (NumMemRefs == 1) | |||
| 2567 | return makeArrayRef(MemRefs.getAddrOfPtr1(), 1); | |||
| 2568 | ||||
| 2569 | // Otherwise we have an actual array. | |||
| 2570 | return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); | |||
| 2571 | } | |||
| 2572 | mmo_iterator memoperands_begin() const { return memoperands().begin(); } | |||
| 2573 | mmo_iterator memoperands_end() const { return memoperands().end(); } | |||
| 2574 | bool memoperands_empty() const { return memoperands().empty(); } | |||
| 2575 | ||||
| 2576 | /// Clear out the memory reference descriptor list. | |||
| 2577 | void clearMemRefs() { | |||
| 2578 | MemRefs = nullptr; | |||
| 2579 | NumMemRefs = 0; | |||
| 2580 | } | |||
| 2581 | ||||
| 2582 | static bool classof(const SDNode *N) { | |||
| 2583 | return N->isMachineOpcode(); | |||
| 2584 | } | |||
| 2585 | }; | |||
| 2586 | ||||
| 2587 | /// An SDNode that records if a register contains a value that is guaranteed to | |||
| 2588 | /// be aligned accordingly. | |||
| 2589 | class AssertAlignSDNode : public SDNode { | |||
| 2590 | Align Alignment; | |||
| 2591 | ||||
| 2592 | public: | |||
| 2593 | AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A) | |||
| 2594 | : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {} | |||
| 2595 | ||||
| 2596 | Align getAlign() const { return Alignment; } | |||
| 2597 | ||||
| 2598 | static bool classof(const SDNode *N) { | |||
| 2599 | return N->getOpcode() == ISD::AssertAlign; | |||
| 2600 | } | |||
| 2601 | }; | |||
| 2602 | ||||
| 2603 | class SDNodeIterator { | |||
| 2604 | const SDNode *Node; | |||
| 2605 | unsigned Operand; | |||
| 2606 | ||||
| 2607 | SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {} | |||
| 2608 | ||||
| 2609 | public: | |||
| 2610 | using iterator_category = std::forward_iterator_tag; | |||
| 2611 | using value_type = SDNode; | |||
| 2612 | using difference_type = std::ptrdiff_t; | |||
| 2613 | using pointer = value_type *; | |||
| 2614 | using reference = value_type &; | |||
| 2615 | ||||
| 2616 | bool operator==(const SDNodeIterator& x) const { | |||
| 2617 | return Operand == x.Operand; | |||
| 2618 | } | |||
| 2619 | bool operator!=(const SDNodeIterator& x) const { return !operator==(x); } | |||
| 2620 | ||||
| 2621 | pointer operator*() const { | |||
| 2622 | return Node->getOperand(Operand).getNode(); | |||
| 2623 | } | |||
| 2624 | pointer operator->() const { return operator*(); } | |||
| 2625 | ||||
| 2626 | SDNodeIterator& operator++() { // Preincrement | |||
| 2627 | ++Operand; | |||
| 2628 | return *this; | |||
| 2629 | } | |||
| 2630 | SDNodeIterator operator++(int) { // Postincrement | |||
| 2631 | SDNodeIterator tmp = *this; ++*this; return tmp; | |||
| 2632 | } | |||
| 2633 | size_t operator-(SDNodeIterator Other) const { | |||
| 2634 | assert(Node == Other.Node &&((void)0) | |||
| 2635 | "Cannot compare iterators of two different nodes!")((void)0); | |||
| 2636 | return Operand - Other.Operand; | |||
| 2637 | } | |||
| 2638 | ||||
| 2639 | static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); } | |||
| 2640 | static SDNodeIterator end (const SDNode *N) { | |||
| 2641 | return SDNodeIterator(N, N->getNumOperands()); | |||
| 2642 | } | |||
| 2643 | ||||
| 2644 | unsigned getOperand() const { return Operand; } | |||
| 2645 | const SDNode *getNode() const { return Node; } | |||
| 2646 | }; | |||
| 2647 | ||||
| 2648 | template <> struct GraphTraits<SDNode*> { | |||
| 2649 | using NodeRef = SDNode *; | |||
| 2650 | using ChildIteratorType = SDNodeIterator; | |||
| 2651 | ||||
| 2652 | static NodeRef getEntryNode(SDNode *N) { return N; } | |||
| 2653 | ||||
| 2654 | static ChildIteratorType child_begin(NodeRef N) { | |||
| 2655 | return SDNodeIterator::begin(N); | |||
| 2656 | } | |||
| 2657 | ||||
| 2658 | static ChildIteratorType child_end(NodeRef N) { | |||
| 2659 | return SDNodeIterator::end(N); | |||
| 2660 | } | |||
| 2661 | }; | |||
| 2662 | ||||
| 2663 | /// A representation of the largest SDNode, for use in sizeof(). | |||
| 2664 | /// | |||
| 2665 | /// This needs to be a union because the largest node differs on 32 bit systems | |||
| 2666 | /// with 4 and 8 byte pointer alignment, respectively. | |||
| 2667 | using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, | |||
| 2668 | BlockAddressSDNode, | |||
| 2669 | GlobalAddressSDNode, | |||
| 2670 | PseudoProbeSDNode>; | |||
| 2671 | ||||
| 2672 | /// The SDNode class with the greatest alignment requirement. | |||
| 2673 | using MostAlignedSDNode = GlobalAddressSDNode; | |||
| 2674 | ||||
| 2675 | namespace ISD { | |||
| 2676 | ||||
| 2677 | /// Returns true if the specified node is a non-extending and unindexed load. | |||
| 2678 | inline bool isNormalLoad(const SDNode *N) { | |||
| 2679 | const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N); | |||
| 2680 | return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD && | |||
| 2681 | Ld->getAddressingMode() == ISD::UNINDEXED; | |||
| 2682 | } | |||
| 2683 | ||||
| 2684 | /// Returns true if the specified node is a non-extending load. | |||
| 2685 | inline bool isNON_EXTLoad(const SDNode *N) { | |||
| 2686 | return isa<LoadSDNode>(N) && | |||
| 2687 | cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; | |||
| 2688 | } | |||
| 2689 | ||||
| 2690 | /// Returns true if the specified node is a EXTLOAD. | |||
| 2691 | inline bool isEXTLoad(const SDNode *N) { | |||
| 2692 | return isa<LoadSDNode>(N) && | |||
| 2693 | cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD; | |||
| 2694 | } | |||
| 2695 | ||||
| 2696 | /// Returns true if the specified node is a SEXTLOAD. | |||
| 2697 | inline bool isSEXTLoad(const SDNode *N) { | |||
| 2698 | return isa<LoadSDNode>(N) && | |||
| 2699 | cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; | |||
| 2700 | } | |||
| 2701 | ||||
| 2702 | /// Returns true if the specified node is a ZEXTLOAD. | |||
| 2703 | inline bool isZEXTLoad(const SDNode *N) { | |||
| 2704 | return isa<LoadSDNode>(N) && | |||
| 2705 | cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; | |||
| 2706 | } | |||
| 2707 | ||||
| 2708 | /// Returns true if the specified node is an unindexed load. | |||
| 2709 | inline bool isUNINDEXEDLoad(const SDNode *N) { | |||
| 2710 | return isa<LoadSDNode>(N) && | |||
| 2711 | cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
| 2712 | } | |||
| 2713 | ||||
| 2714 | /// Returns true if the specified node is a non-truncating | |||
| 2715 | /// and unindexed store. | |||
| 2716 | inline bool isNormalStore(const SDNode *N) { | |||
| 2717 | const StoreSDNode *St = dyn_cast<StoreSDNode>(N); | |||
| 2718 | return St && !St->isTruncatingStore() && | |||
| 2719 | St->getAddressingMode() == ISD::UNINDEXED; | |||
| 2720 | } | |||
| 2721 | ||||
| 2722 | /// Returns true if the specified node is an unindexed store. | |||
| 2723 | inline bool isUNINDEXEDStore(const SDNode *N) { | |||
| 2724 | return isa<StoreSDNode>(N) && | |||
| 2725 | cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
| 2726 | } | |||
| 2727 | ||||
| 2728 | /// Attempt to match a unary predicate against a scalar/splat constant or | |||
| 2729 | /// every element of a constant BUILD_VECTOR. | |||
| 2730 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
| 2731 | bool matchUnaryPredicate(SDValue Op, | |||
| 2732 | std::function<bool(ConstantSDNode *)> Match, | |||
| 2733 | bool AllowUndefs = false); | |||
| 2734 | ||||
| 2735 | /// Attempt to match a binary predicate against a pair of scalar/splat | |||
| 2736 | /// constants or every element of a pair of constant BUILD_VECTORs. | |||
| 2737 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
| 2738 | /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match. | |||
| 2739 | bool matchBinaryPredicate( | |||
| 2740 | SDValue LHS, SDValue RHS, | |||
| 2741 | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, | |||
| 2742 | bool AllowUndefs = false, bool AllowTypeMismatch = false); | |||
| 2743 | ||||
| 2744 | /// Returns true if the specified value is the overflow result from one | |||
| 2745 | /// of the overflow intrinsic nodes. | |||
| 2746 | inline bool isOverflowIntrOpRes(SDValue Op) { | |||
| 2747 | unsigned Opc = Op.getOpcode(); | |||
| 2748 | return (Op.getResNo() == 1 && | |||
| 2749 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
| 2750 | Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); | |||
| 2751 | } | |||
| 2752 | ||||
| 2753 | } // end namespace ISD | |||
| 2754 | ||||
| 2755 | } // end namespace llvm | |||
| 2756 | ||||
| 2757 | #endif // LLVM_CODEGEN_SELECTIONDAGNODES_H |