File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/CodeGen/SelectionDAGNodes.h |
Warning: | line 1110, column 10 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | //===-- AMDGPUISelDAGToDAG.cpp - A dag to dag inst selector for AMDGPU ----===// | ||||
2 | // | ||||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||
4 | // See https://llvm.org/LICENSE.txt for license information. | ||||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||
6 | // | ||||
7 | //==-----------------------------------------------------------------------===// | ||||
8 | // | ||||
9 | /// \file | ||||
10 | /// Defines an instruction selector for the AMDGPU target. | ||||
11 | // | ||||
12 | //===----------------------------------------------------------------------===// | ||||
13 | |||||
14 | #include "AMDGPU.h" | ||||
15 | #include "AMDGPUTargetMachine.h" | ||||
16 | #include "SIMachineFunctionInfo.h" | ||||
17 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" | ||||
18 | #include "llvm/Analysis/ValueTracking.h" | ||||
19 | #include "llvm/CodeGen/FunctionLoweringInfo.h" | ||||
20 | #include "llvm/CodeGen/SelectionDAG.h" | ||||
21 | #include "llvm/CodeGen/SelectionDAGISel.h" | ||||
22 | #include "llvm/CodeGen/SelectionDAGNodes.h" | ||||
23 | #include "llvm/IR/IntrinsicsAMDGPU.h" | ||||
24 | #include "llvm/InitializePasses.h" | ||||
25 | |||||
26 | #ifdef EXPENSIVE_CHECKS | ||||
27 | #include "llvm/Analysis/LoopInfo.h" | ||||
28 | #include "llvm/IR/Dominators.h" | ||||
29 | #endif | ||||
30 | |||||
31 | #define DEBUG_TYPE"isel" "isel" | ||||
32 | |||||
33 | using namespace llvm; | ||||
34 | |||||
35 | namespace llvm { | ||||
36 | |||||
37 | class R600InstrInfo; | ||||
38 | |||||
39 | } // end namespace llvm | ||||
40 | |||||
41 | //===----------------------------------------------------------------------===// | ||||
42 | // Instruction Selector Implementation | ||||
43 | //===----------------------------------------------------------------------===// | ||||
44 | |||||
45 | namespace { | ||||
46 | |||||
47 | static bool isNullConstantOrUndef(SDValue V) { | ||||
48 | if (V.isUndef()) | ||||
49 | return true; | ||||
50 | |||||
51 | ConstantSDNode *Const = dyn_cast<ConstantSDNode>(V); | ||||
52 | return Const != nullptr && Const->isNullValue(); | ||||
53 | } | ||||
54 | |||||
55 | static bool getConstantValue(SDValue N, uint32_t &Out) { | ||||
56 | // This is only used for packed vectors, where ussing 0 for undef should | ||||
57 | // always be good. | ||||
58 | if (N.isUndef()) { | ||||
59 | Out = 0; | ||||
60 | return true; | ||||
61 | } | ||||
62 | |||||
63 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) { | ||||
64 | Out = C->getAPIntValue().getSExtValue(); | ||||
65 | return true; | ||||
66 | } | ||||
67 | |||||
68 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) { | ||||
69 | Out = C->getValueAPF().bitcastToAPInt().getSExtValue(); | ||||
70 | return true; | ||||
71 | } | ||||
72 | |||||
73 | return false; | ||||
74 | } | ||||
75 | |||||
76 | // TODO: Handle undef as zero | ||||
77 | static SDNode *packConstantV2I16(const SDNode *N, SelectionDAG &DAG, | ||||
78 | bool Negate = false) { | ||||
79 | assert(N->getOpcode() == ISD::BUILD_VECTOR && N->getNumOperands() == 2)((void)0); | ||||
80 | uint32_t LHSVal, RHSVal; | ||||
81 | if (getConstantValue(N->getOperand(0), LHSVal) && | ||||
82 | getConstantValue(N->getOperand(1), RHSVal)) { | ||||
83 | SDLoc SL(N); | ||||
84 | uint32_t K = Negate ? | ||||
85 | (-LHSVal & 0xffff) | (-RHSVal << 16) : | ||||
86 | (LHSVal & 0xffff) | (RHSVal << 16); | ||||
87 | return DAG.getMachineNode(AMDGPU::S_MOV_B32, SL, N->getValueType(0), | ||||
88 | DAG.getTargetConstant(K, SL, MVT::i32)); | ||||
89 | } | ||||
90 | |||||
91 | return nullptr; | ||||
92 | } | ||||
93 | |||||
94 | static SDNode *packNegConstantV2I16(const SDNode *N, SelectionDAG &DAG) { | ||||
95 | return packConstantV2I16(N, DAG, true); | ||||
96 | } | ||||
97 | |||||
98 | /// AMDGPU specific code to select AMDGPU machine instructions for | ||||
99 | /// SelectionDAG operations. | ||||
100 | class AMDGPUDAGToDAGISel : public SelectionDAGISel { | ||||
101 | // Subtarget - Keep a pointer to the AMDGPU Subtarget around so that we can | ||||
102 | // make the right decision when generating code for different targets. | ||||
103 | const GCNSubtarget *Subtarget; | ||||
104 | |||||
105 | // Default FP mode for the current function. | ||||
106 | AMDGPU::SIModeRegisterDefaults Mode; | ||||
107 | |||||
108 | bool EnableLateStructurizeCFG; | ||||
109 | |||||
110 | // Instructions that will be lowered with a final instruction that zeros the | ||||
111 | // high result bits. | ||||
112 | bool fp16SrcZerosHighBits(unsigned Opc) const; | ||||
113 | |||||
114 | public: | ||||
115 | explicit AMDGPUDAGToDAGISel(TargetMachine *TM = nullptr, | ||||
116 | CodeGenOpt::Level OptLevel = CodeGenOpt::Default) | ||||
117 | : SelectionDAGISel(*TM, OptLevel) { | ||||
118 | EnableLateStructurizeCFG = AMDGPUTargetMachine::EnableLateStructurizeCFG; | ||||
119 | } | ||||
120 | ~AMDGPUDAGToDAGISel() override = default; | ||||
121 | |||||
122 | void getAnalysisUsage(AnalysisUsage &AU) const override { | ||||
123 | AU.addRequired<AMDGPUArgumentUsageInfo>(); | ||||
124 | AU.addRequired<LegacyDivergenceAnalysis>(); | ||||
125 | #ifdef EXPENSIVE_CHECKS | ||||
126 | AU.addRequired<DominatorTreeWrapperPass>(); | ||||
127 | AU.addRequired<LoopInfoWrapperPass>(); | ||||
128 | #endif | ||||
129 | SelectionDAGISel::getAnalysisUsage(AU); | ||||
130 | } | ||||
131 | |||||
132 | bool matchLoadD16FromBuildVector(SDNode *N) const; | ||||
133 | |||||
134 | bool runOnMachineFunction(MachineFunction &MF) override; | ||||
135 | void PreprocessISelDAG() override; | ||||
136 | void Select(SDNode *N) override; | ||||
137 | StringRef getPassName() const override; | ||||
138 | void PostprocessISelDAG() override; | ||||
139 | |||||
140 | protected: | ||||
141 | void SelectBuildVector(SDNode *N, unsigned RegClassID); | ||||
142 | |||||
143 | private: | ||||
144 | std::pair<SDValue, SDValue> foldFrameIndex(SDValue N) const; | ||||
145 | bool isNoNanSrc(SDValue N) const; | ||||
146 | bool isInlineImmediate(const SDNode *N, bool Negated = false) const; | ||||
147 | bool isNegInlineImmediate(const SDNode *N) const { | ||||
148 | return isInlineImmediate(N, true); | ||||
149 | } | ||||
150 | |||||
151 | bool isInlineImmediate16(int64_t Imm) const { | ||||
152 | return AMDGPU::isInlinableLiteral16(Imm, Subtarget->hasInv2PiInlineImm()); | ||||
153 | } | ||||
154 | |||||
155 | bool isInlineImmediate32(int64_t Imm) const { | ||||
156 | return AMDGPU::isInlinableLiteral32(Imm, Subtarget->hasInv2PiInlineImm()); | ||||
157 | } | ||||
158 | |||||
159 | bool isInlineImmediate64(int64_t Imm) const { | ||||
160 | return AMDGPU::isInlinableLiteral64(Imm, Subtarget->hasInv2PiInlineImm()); | ||||
161 | } | ||||
162 | |||||
163 | bool isInlineImmediate(const APFloat &Imm) const { | ||||
164 | return Subtarget->getInstrInfo()->isInlineConstant(Imm); | ||||
165 | } | ||||
166 | |||||
167 | bool isVGPRImm(const SDNode *N) const; | ||||
168 | bool isUniformLoad(const SDNode *N) const; | ||||
169 | bool isUniformBr(const SDNode *N) const; | ||||
170 | |||||
171 | bool isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, | ||||
172 | SDValue &RHS) const; | ||||
173 | |||||
174 | MachineSDNode *buildSMovImm64(SDLoc &DL, uint64_t Val, EVT VT) const; | ||||
175 | |||||
176 | SDNode *glueCopyToOp(SDNode *N, SDValue NewChain, SDValue Glue) const; | ||||
177 | SDNode *glueCopyToM0(SDNode *N, SDValue Val) const; | ||||
178 | SDNode *glueCopyToM0LDSInit(SDNode *N) const; | ||||
179 | |||||
180 | const TargetRegisterClass *getOperandRegClass(SDNode *N, unsigned OpNo) const; | ||||
181 | virtual bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, SDValue &Offset); | ||||
182 | virtual bool SelectADDRIndirect(SDValue Addr, SDValue &Base, SDValue &Offset); | ||||
183 | bool isDSOffsetLegal(SDValue Base, unsigned Offset) const; | ||||
184 | bool isDSOffset2Legal(SDValue Base, unsigned Offset0, unsigned Offset1, | ||||
185 | unsigned Size) const; | ||||
186 | bool SelectDS1Addr1Offset(SDValue Ptr, SDValue &Base, SDValue &Offset) const; | ||||
187 | bool SelectDS64Bit4ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, | ||||
188 | SDValue &Offset1) const; | ||||
189 | bool SelectDS128Bit8ByteAligned(SDValue Ptr, SDValue &Base, SDValue &Offset0, | ||||
190 | SDValue &Offset1) const; | ||||
191 | bool SelectDSReadWrite2(SDValue Ptr, SDValue &Base, SDValue &Offset0, | ||||
192 | SDValue &Offset1, unsigned Size) const; | ||||
193 | bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, | ||||
194 | SDValue &SOffset, SDValue &Offset, SDValue &Offen, | ||||
195 | SDValue &Idxen, SDValue &Addr64) const; | ||||
196 | bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr, | ||||
197 | SDValue &SOffset, SDValue &Offset) const; | ||||
198 | bool SelectMUBUFScratchOffen(SDNode *Parent, | ||||
199 | SDValue Addr, SDValue &RSrc, SDValue &VAddr, | ||||
200 | SDValue &SOffset, SDValue &ImmOffset) const; | ||||
201 | bool SelectMUBUFScratchOffset(SDNode *Parent, | ||||
202 | SDValue Addr, SDValue &SRsrc, SDValue &Soffset, | ||||
203 | SDValue &Offset) const; | ||||
204 | |||||
205 | bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset, | ||||
206 | SDValue &Offset) const; | ||||
207 | |||||
208 | bool SelectFlatOffsetImpl(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
209 | SDValue &Offset, uint64_t FlatVariant) const; | ||||
210 | bool SelectFlatOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
211 | SDValue &Offset) const; | ||||
212 | bool SelectGlobalOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
213 | SDValue &Offset) const; | ||||
214 | bool SelectScratchOffset(SDNode *N, SDValue Addr, SDValue &VAddr, | ||||
215 | SDValue &Offset) const; | ||||
216 | bool SelectGlobalSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, | ||||
217 | SDValue &VOffset, SDValue &Offset) const; | ||||
218 | bool SelectScratchSAddr(SDNode *N, SDValue Addr, SDValue &SAddr, | ||||
219 | SDValue &Offset) const; | ||||
220 | |||||
221 | bool SelectSMRDOffset(SDValue ByteOffsetNode, SDValue &Offset, | ||||
222 | bool &Imm) const; | ||||
223 | SDValue Expand32BitAddress(SDValue Addr) const; | ||||
224 | bool SelectSMRD(SDValue Addr, SDValue &SBase, SDValue &Offset, | ||||
225 | bool &Imm) const; | ||||
226 | bool SelectSMRDImm(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | ||||
227 | bool SelectSMRDImm32(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | ||||
228 | bool SelectSMRDSgpr(SDValue Addr, SDValue &SBase, SDValue &Offset) const; | ||||
229 | bool SelectSMRDBufferImm(SDValue Addr, SDValue &Offset) const; | ||||
230 | bool SelectSMRDBufferImm32(SDValue Addr, SDValue &Offset) const; | ||||
231 | bool SelectMOVRELOffset(SDValue Index, SDValue &Base, SDValue &Offset) const; | ||||
232 | |||||
233 | bool SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
234 | bool SelectVOP3ModsImpl(SDValue In, SDValue &Src, unsigned &SrcMods, | ||||
235 | bool AllowAbs = true) const; | ||||
236 | bool SelectVOP3Mods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
237 | bool SelectVOP3BMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
238 | bool SelectVOP3NoMods(SDValue In, SDValue &Src) const; | ||||
239 | bool SelectVOP3Mods0(SDValue In, SDValue &Src, SDValue &SrcMods, | ||||
240 | SDValue &Clamp, SDValue &Omod) const; | ||||
241 | bool SelectVOP3BMods0(SDValue In, SDValue &Src, SDValue &SrcMods, | ||||
242 | SDValue &Clamp, SDValue &Omod) const; | ||||
243 | bool SelectVOP3NoMods0(SDValue In, SDValue &Src, SDValue &SrcMods, | ||||
244 | SDValue &Clamp, SDValue &Omod) const; | ||||
245 | |||||
246 | bool SelectVOP3OMods(SDValue In, SDValue &Src, | ||||
247 | SDValue &Clamp, SDValue &Omod) const; | ||||
248 | |||||
249 | bool SelectVOP3PMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
250 | |||||
251 | bool SelectVOP3OpSel(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
252 | |||||
253 | bool SelectVOP3OpSelMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
254 | bool SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, unsigned &Mods) const; | ||||
255 | bool SelectVOP3PMadMixMods(SDValue In, SDValue &Src, SDValue &SrcMods) const; | ||||
256 | |||||
257 | SDValue getHi16Elt(SDValue In) const; | ||||
258 | |||||
259 | SDValue getMaterializedScalarImm32(int64_t Val, const SDLoc &DL) const; | ||||
260 | |||||
261 | void SelectADD_SUB_I64(SDNode *N); | ||||
262 | void SelectAddcSubb(SDNode *N); | ||||
263 | void SelectUADDO_USUBO(SDNode *N); | ||||
264 | void SelectDIV_SCALE(SDNode *N); | ||||
265 | void SelectMAD_64_32(SDNode *N); | ||||
266 | void SelectFMA_W_CHAIN(SDNode *N); | ||||
267 | void SelectFMUL_W_CHAIN(SDNode *N); | ||||
268 | |||||
269 | SDNode *getS_BFE(unsigned Opcode, const SDLoc &DL, SDValue Val, | ||||
270 | uint32_t Offset, uint32_t Width); | ||||
271 | void SelectS_BFEFromShifts(SDNode *N); | ||||
272 | void SelectS_BFE(SDNode *N); | ||||
273 | bool isCBranchSCC(const SDNode *N) const; | ||||
274 | void SelectBRCOND(SDNode *N); | ||||
275 | void SelectFMAD_FMA(SDNode *N); | ||||
276 | void SelectATOMIC_CMP_SWAP(SDNode *N); | ||||
277 | void SelectDSAppendConsume(SDNode *N, unsigned IntrID); | ||||
278 | void SelectDS_GWS(SDNode *N, unsigned IntrID); | ||||
279 | void SelectInterpP1F16(SDNode *N); | ||||
280 | void SelectINTRINSIC_W_CHAIN(SDNode *N); | ||||
281 | void SelectINTRINSIC_WO_CHAIN(SDNode *N); | ||||
282 | void SelectINTRINSIC_VOID(SDNode *N); | ||||
283 | |||||
284 | protected: | ||||
285 | // Include the pieces autogenerated from the target description. | ||||
286 | #include "AMDGPUGenDAGISel.inc" | ||||
287 | }; | ||||
288 | |||||
289 | class R600DAGToDAGISel : public AMDGPUDAGToDAGISel { | ||||
290 | const R600Subtarget *Subtarget; | ||||
291 | |||||
292 | bool isConstantLoad(const MemSDNode *N, int cbID) const; | ||||
293 | bool SelectGlobalValueConstantOffset(SDValue Addr, SDValue& IntPtr); | ||||
294 | bool SelectGlobalValueVariableOffset(SDValue Addr, SDValue &BaseReg, | ||||
295 | SDValue& Offset); | ||||
296 | public: | ||||
297 | explicit R600DAGToDAGISel(TargetMachine *TM, CodeGenOpt::Level OptLevel) : | ||||
298 | AMDGPUDAGToDAGISel(TM, OptLevel) {} | ||||
299 | |||||
300 | void Select(SDNode *N) override; | ||||
301 | |||||
302 | bool SelectADDRIndirect(SDValue Addr, SDValue &Base, | ||||
303 | SDValue &Offset) override; | ||||
304 | bool SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||||
305 | SDValue &Offset) override; | ||||
306 | |||||
307 | bool runOnMachineFunction(MachineFunction &MF) override; | ||||
308 | |||||
309 | void PreprocessISelDAG() override {} | ||||
310 | |||||
311 | protected: | ||||
312 | // Include the pieces autogenerated from the target description. | ||||
313 | #include "R600GenDAGISel.inc" | ||||
314 | }; | ||||
315 | |||||
316 | static SDValue stripBitcast(SDValue Val) { | ||||
317 | return Val.getOpcode() == ISD::BITCAST ? Val.getOperand(0) : Val; | ||||
318 | } | ||||
319 | |||||
320 | // Figure out if this is really an extract of the high 16-bits of a dword. | ||||
321 | static bool isExtractHiElt(SDValue In, SDValue &Out) { | ||||
322 | In = stripBitcast(In); | ||||
323 | |||||
324 | if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | ||||
325 | if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) { | ||||
326 | if (!Idx->isOne()) | ||||
327 | return false; | ||||
328 | Out = In.getOperand(0); | ||||
329 | return true; | ||||
330 | } | ||||
331 | } | ||||
332 | |||||
333 | if (In.getOpcode() != ISD::TRUNCATE) | ||||
334 | return false; | ||||
335 | |||||
336 | SDValue Srl = In.getOperand(0); | ||||
337 | if (Srl.getOpcode() == ISD::SRL) { | ||||
338 | if (ConstantSDNode *ShiftAmt = dyn_cast<ConstantSDNode>(Srl.getOperand(1))) { | ||||
339 | if (ShiftAmt->getZExtValue() == 16) { | ||||
340 | Out = stripBitcast(Srl.getOperand(0)); | ||||
341 | return true; | ||||
342 | } | ||||
343 | } | ||||
344 | } | ||||
345 | |||||
346 | return false; | ||||
347 | } | ||||
348 | |||||
349 | // Look through operations that obscure just looking at the low 16-bits of the | ||||
350 | // same register. | ||||
351 | static SDValue stripExtractLoElt(SDValue In) { | ||||
352 | if (In.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { | ||||
353 | if (ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(In.getOperand(1))) { | ||||
354 | if (Idx->isNullValue() && In.getValueSizeInBits() <= 32) | ||||
355 | return In.getOperand(0); | ||||
356 | } | ||||
357 | } | ||||
358 | |||||
359 | if (In.getOpcode() == ISD::TRUNCATE) { | ||||
360 | SDValue Src = In.getOperand(0); | ||||
361 | if (Src.getValueType().getSizeInBits() == 32) | ||||
362 | return stripBitcast(Src); | ||||
363 | } | ||||
364 | |||||
365 | return In; | ||||
366 | } | ||||
367 | |||||
368 | } // end anonymous namespace | ||||
369 | |||||
370 | INITIALIZE_PASS_BEGIN(AMDGPUDAGToDAGISel, "amdgpu-isel",static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry &Registry) { | ||||
371 | "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)static void *initializeAMDGPUDAGToDAGISelPassOnce(PassRegistry &Registry) { | ||||
372 | INITIALIZE_PASS_DEPENDENCY(AMDGPUArgumentUsageInfo)initializeAMDGPUArgumentUsageInfoPass(Registry); | ||||
373 | INITIALIZE_PASS_DEPENDENCY(AMDGPUPerfHintAnalysis)initializeAMDGPUPerfHintAnalysisPass(Registry); | ||||
374 | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)initializeLegacyDivergenceAnalysisPass(Registry); | ||||
375 | #ifdef EXPENSIVE_CHECKS | ||||
376 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry); | ||||
377 | INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry); | ||||
378 | #endif | ||||
379 | INITIALIZE_PASS_END(AMDGPUDAGToDAGISel, "amdgpu-isel",PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection" , "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass (PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag , initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); } | ||||
380 | "AMDGPU DAG->DAG Pattern Instruction Selection", false, false)PassInfo *PI = new PassInfo( "AMDGPU DAG->DAG Pattern Instruction Selection" , "amdgpu-isel", &AMDGPUDAGToDAGISel::ID, PassInfo::NormalCtor_t (callDefaultCtor<AMDGPUDAGToDAGISel>), false, false); Registry .registerPass(*PI, true); return PI; } static llvm::once_flag InitializeAMDGPUDAGToDAGISelPassFlag; void llvm::initializeAMDGPUDAGToDAGISelPass (PassRegistry &Registry) { llvm::call_once(InitializeAMDGPUDAGToDAGISelPassFlag , initializeAMDGPUDAGToDAGISelPassOnce, std::ref(Registry)); } | ||||
381 | |||||
382 | /// This pass converts a legalized DAG into a AMDGPU-specific | ||||
383 | // DAG, ready for instruction scheduling. | ||||
384 | FunctionPass *llvm::createAMDGPUISelDag(TargetMachine *TM, | ||||
385 | CodeGenOpt::Level OptLevel) { | ||||
386 | return new AMDGPUDAGToDAGISel(TM, OptLevel); | ||||
387 | } | ||||
388 | |||||
389 | /// This pass converts a legalized DAG into a R600-specific | ||||
390 | // DAG, ready for instruction scheduling. | ||||
391 | FunctionPass *llvm::createR600ISelDag(TargetMachine *TM, | ||||
392 | CodeGenOpt::Level OptLevel) { | ||||
393 | return new R600DAGToDAGISel(TM, OptLevel); | ||||
394 | } | ||||
395 | |||||
396 | bool AMDGPUDAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | ||||
397 | #ifdef EXPENSIVE_CHECKS | ||||
398 | DominatorTree & DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | ||||
399 | LoopInfo * LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | ||||
400 | for (auto &L : LI->getLoopsInPreorder()) { | ||||
401 | assert(L->isLCSSAForm(DT))((void)0); | ||||
402 | } | ||||
403 | #endif | ||||
404 | Subtarget = &MF.getSubtarget<GCNSubtarget>(); | ||||
405 | Mode = AMDGPU::SIModeRegisterDefaults(MF.getFunction()); | ||||
406 | return SelectionDAGISel::runOnMachineFunction(MF); | ||||
407 | } | ||||
408 | |||||
409 | bool AMDGPUDAGToDAGISel::fp16SrcZerosHighBits(unsigned Opc) const { | ||||
410 | // XXX - only need to list legal operations. | ||||
411 | switch (Opc) { | ||||
412 | case ISD::FADD: | ||||
413 | case ISD::FSUB: | ||||
414 | case ISD::FMUL: | ||||
415 | case ISD::FDIV: | ||||
416 | case ISD::FREM: | ||||
417 | case ISD::FCANONICALIZE: | ||||
418 | case ISD::UINT_TO_FP: | ||||
419 | case ISD::SINT_TO_FP: | ||||
420 | case ISD::FABS: | ||||
421 | // Fabs is lowered to a bit operation, but it's an and which will clear the | ||||
422 | // high bits anyway. | ||||
423 | case ISD::FSQRT: | ||||
424 | case ISD::FSIN: | ||||
425 | case ISD::FCOS: | ||||
426 | case ISD::FPOWI: | ||||
427 | case ISD::FPOW: | ||||
428 | case ISD::FLOG: | ||||
429 | case ISD::FLOG2: | ||||
430 | case ISD::FLOG10: | ||||
431 | case ISD::FEXP: | ||||
432 | case ISD::FEXP2: | ||||
433 | case ISD::FCEIL: | ||||
434 | case ISD::FTRUNC: | ||||
435 | case ISD::FRINT: | ||||
436 | case ISD::FNEARBYINT: | ||||
437 | case ISD::FROUND: | ||||
438 | case ISD::FFLOOR: | ||||
439 | case ISD::FMINNUM: | ||||
440 | case ISD::FMAXNUM: | ||||
441 | case AMDGPUISD::FRACT: | ||||
442 | case AMDGPUISD::CLAMP: | ||||
443 | case AMDGPUISD::COS_HW: | ||||
444 | case AMDGPUISD::SIN_HW: | ||||
445 | case AMDGPUISD::FMIN3: | ||||
446 | case AMDGPUISD::FMAX3: | ||||
447 | case AMDGPUISD::FMED3: | ||||
448 | case AMDGPUISD::FMAD_FTZ: | ||||
449 | case AMDGPUISD::RCP: | ||||
450 | case AMDGPUISD::RSQ: | ||||
451 | case AMDGPUISD::RCP_IFLAG: | ||||
452 | case AMDGPUISD::LDEXP: | ||||
453 | // On gfx10, all 16-bit instructions preserve the high bits. | ||||
454 | return Subtarget->getGeneration() <= AMDGPUSubtarget::GFX9; | ||||
455 | case ISD::FP_ROUND: | ||||
456 | // We may select fptrunc (fma/mad) to mad_mixlo, which does not zero the | ||||
457 | // high bits on gfx9. | ||||
458 | // TODO: If we had the source node we could see if the source was fma/mad | ||||
459 | return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; | ||||
460 | case ISD::FMA: | ||||
461 | case ISD::FMAD: | ||||
462 | case AMDGPUISD::DIV_FIXUP: | ||||
463 | return Subtarget->getGeneration() == AMDGPUSubtarget::VOLCANIC_ISLANDS; | ||||
464 | default: | ||||
465 | // fcopysign, select and others may be lowered to 32-bit bit operations | ||||
466 | // which don't zero the high bits. | ||||
467 | return false; | ||||
468 | } | ||||
469 | } | ||||
470 | |||||
471 | bool AMDGPUDAGToDAGISel::matchLoadD16FromBuildVector(SDNode *N) const { | ||||
472 | assert(Subtarget->d16PreservesUnusedBits())((void)0); | ||||
473 | MVT VT = N->getValueType(0).getSimpleVT(); | ||||
474 | if (VT != MVT::v2i16 && VT != MVT::v2f16) | ||||
475 | return false; | ||||
476 | |||||
477 | SDValue Lo = N->getOperand(0); | ||||
478 | SDValue Hi = N->getOperand(1); | ||||
479 | |||||
480 | LoadSDNode *LdHi = dyn_cast<LoadSDNode>(stripBitcast(Hi)); | ||||
481 | |||||
482 | // build_vector lo, (load ptr) -> load_d16_hi ptr, lo | ||||
483 | // build_vector lo, (zextload ptr from i8) -> load_d16_hi_u8 ptr, lo | ||||
484 | // build_vector lo, (sextload ptr from i8) -> load_d16_hi_i8 ptr, lo | ||||
485 | |||||
486 | // Need to check for possible indirect dependencies on the other half of the | ||||
487 | // vector to avoid introducing a cycle. | ||||
488 | if (LdHi && Hi.hasOneUse() && !LdHi->isPredecessorOf(Lo.getNode())) { | ||||
489 | SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); | ||||
490 | |||||
491 | SDValue TiedIn = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), VT, Lo); | ||||
492 | SDValue Ops[] = { | ||||
493 | LdHi->getChain(), LdHi->getBasePtr(), TiedIn | ||||
494 | }; | ||||
495 | |||||
496 | unsigned LoadOp = AMDGPUISD::LOAD_D16_HI; | ||||
497 | if (LdHi->getMemoryVT() == MVT::i8) { | ||||
498 | LoadOp = LdHi->getExtensionType() == ISD::SEXTLOAD ? | ||||
499 | AMDGPUISD::LOAD_D16_HI_I8 : AMDGPUISD::LOAD_D16_HI_U8; | ||||
500 | } else { | ||||
501 | assert(LdHi->getMemoryVT() == MVT::i16)((void)0); | ||||
502 | } | ||||
503 | |||||
504 | SDValue NewLoadHi = | ||||
505 | CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdHi), VTList, | ||||
506 | Ops, LdHi->getMemoryVT(), | ||||
507 | LdHi->getMemOperand()); | ||||
508 | |||||
509 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadHi); | ||||
510 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdHi, 1), NewLoadHi.getValue(1)); | ||||
511 | return true; | ||||
512 | } | ||||
513 | |||||
514 | // build_vector (load ptr), hi -> load_d16_lo ptr, hi | ||||
515 | // build_vector (zextload ptr from i8), hi -> load_d16_lo_u8 ptr, hi | ||||
516 | // build_vector (sextload ptr from i8), hi -> load_d16_lo_i8 ptr, hi | ||||
517 | LoadSDNode *LdLo = dyn_cast<LoadSDNode>(stripBitcast(Lo)); | ||||
518 | if (LdLo
| ||||
519 | SDValue TiedIn = getHi16Elt(Hi); | ||||
520 | if (!TiedIn || LdLo->isPredecessorOf(TiedIn.getNode())) | ||||
521 | return false; | ||||
522 | |||||
523 | SDVTList VTList = CurDAG->getVTList(VT, MVT::Other); | ||||
524 | unsigned LoadOp = AMDGPUISD::LOAD_D16_LO; | ||||
525 | if (LdLo->getMemoryVT() == MVT::i8) { | ||||
526 | LoadOp = LdLo->getExtensionType() == ISD::SEXTLOAD ? | ||||
527 | AMDGPUISD::LOAD_D16_LO_I8 : AMDGPUISD::LOAD_D16_LO_U8; | ||||
528 | } else { | ||||
529 | assert(LdLo->getMemoryVT() == MVT::i16)((void)0); | ||||
530 | } | ||||
531 | |||||
532 | TiedIn = CurDAG->getNode(ISD::BITCAST, SDLoc(N), VT, TiedIn); | ||||
533 | |||||
534 | SDValue Ops[] = { | ||||
535 | LdLo->getChain(), LdLo->getBasePtr(), TiedIn | ||||
536 | }; | ||||
537 | |||||
538 | SDValue NewLoadLo = | ||||
539 | CurDAG->getMemIntrinsicNode(LoadOp, SDLoc(LdLo), VTList, | ||||
540 | Ops, LdLo->getMemoryVT(), | ||||
541 | LdLo->getMemOperand()); | ||||
542 | |||||
543 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), NewLoadLo); | ||||
544 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(LdLo, 1), NewLoadLo.getValue(1)); | ||||
545 | return true; | ||||
546 | } | ||||
547 | |||||
548 | return false; | ||||
549 | } | ||||
550 | |||||
551 | void AMDGPUDAGToDAGISel::PreprocessISelDAG() { | ||||
552 | if (!Subtarget->d16PreservesUnusedBits()) | ||||
| |||||
553 | return; | ||||
554 | |||||
555 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); | ||||
556 | |||||
557 | bool MadeChange = false; | ||||
558 | while (Position != CurDAG->allnodes_begin()) { | ||||
559 | SDNode *N = &*--Position; | ||||
560 | if (N->use_empty()) | ||||
561 | continue; | ||||
562 | |||||
563 | switch (N->getOpcode()) { | ||||
564 | case ISD::BUILD_VECTOR: | ||||
565 | MadeChange |= matchLoadD16FromBuildVector(N); | ||||
566 | break; | ||||
567 | default: | ||||
568 | break; | ||||
569 | } | ||||
570 | } | ||||
571 | |||||
572 | if (MadeChange) { | ||||
573 | CurDAG->RemoveDeadNodes(); | ||||
574 | LLVM_DEBUG(dbgs() << "After PreProcess:\n";do { } while (false) | ||||
575 | CurDAG->dump();)do { } while (false); | ||||
576 | } | ||||
577 | } | ||||
578 | |||||
579 | bool AMDGPUDAGToDAGISel::isNoNanSrc(SDValue N) const { | ||||
580 | if (TM.Options.NoNaNsFPMath) | ||||
581 | return true; | ||||
582 | |||||
583 | // TODO: Move into isKnownNeverNaN | ||||
584 | if (N->getFlags().hasNoNaNs()) | ||||
585 | return true; | ||||
586 | |||||
587 | return CurDAG->isKnownNeverNaN(N); | ||||
588 | } | ||||
589 | |||||
590 | bool AMDGPUDAGToDAGISel::isInlineImmediate(const SDNode *N, | ||||
591 | bool Negated) const { | ||||
592 | if (N->isUndef()) | ||||
593 | return true; | ||||
594 | |||||
595 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
596 | if (Negated) { | ||||
597 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) | ||||
598 | return TII->isInlineConstant(-C->getAPIntValue()); | ||||
599 | |||||
600 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) | ||||
601 | return TII->isInlineConstant(-C->getValueAPF().bitcastToAPInt()); | ||||
602 | |||||
603 | } else { | ||||
604 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N)) | ||||
605 | return TII->isInlineConstant(C->getAPIntValue()); | ||||
606 | |||||
607 | if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N)) | ||||
608 | return TII->isInlineConstant(C->getValueAPF().bitcastToAPInt()); | ||||
609 | } | ||||
610 | |||||
611 | return false; | ||||
612 | } | ||||
613 | |||||
614 | /// Determine the register class for \p OpNo | ||||
615 | /// \returns The register class of the virtual register that will be used for | ||||
616 | /// the given operand number \OpNo or NULL if the register class cannot be | ||||
617 | /// determined. | ||||
618 | const TargetRegisterClass *AMDGPUDAGToDAGISel::getOperandRegClass(SDNode *N, | ||||
619 | unsigned OpNo) const { | ||||
620 | if (!N->isMachineOpcode()) { | ||||
621 | if (N->getOpcode() == ISD::CopyToReg) { | ||||
622 | Register Reg = cast<RegisterSDNode>(N->getOperand(1))->getReg(); | ||||
623 | if (Reg.isVirtual()) { | ||||
624 | MachineRegisterInfo &MRI = CurDAG->getMachineFunction().getRegInfo(); | ||||
625 | return MRI.getRegClass(Reg); | ||||
626 | } | ||||
627 | |||||
628 | const SIRegisterInfo *TRI | ||||
629 | = static_cast<const GCNSubtarget *>(Subtarget)->getRegisterInfo(); | ||||
630 | return TRI->getPhysRegClass(Reg); | ||||
631 | } | ||||
632 | |||||
633 | return nullptr; | ||||
634 | } | ||||
635 | |||||
636 | switch (N->getMachineOpcode()) { | ||||
637 | default: { | ||||
638 | const MCInstrDesc &Desc = | ||||
639 | Subtarget->getInstrInfo()->get(N->getMachineOpcode()); | ||||
640 | unsigned OpIdx = Desc.getNumDefs() + OpNo; | ||||
641 | if (OpIdx >= Desc.getNumOperands()) | ||||
642 | return nullptr; | ||||
643 | int RegClass = Desc.OpInfo[OpIdx].RegClass; | ||||
644 | if (RegClass == -1) | ||||
645 | return nullptr; | ||||
646 | |||||
647 | return Subtarget->getRegisterInfo()->getRegClass(RegClass); | ||||
648 | } | ||||
649 | case AMDGPU::REG_SEQUENCE: { | ||||
650 | unsigned RCID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); | ||||
651 | const TargetRegisterClass *SuperRC = | ||||
652 | Subtarget->getRegisterInfo()->getRegClass(RCID); | ||||
653 | |||||
654 | SDValue SubRegOp = N->getOperand(OpNo + 1); | ||||
655 | unsigned SubRegIdx = cast<ConstantSDNode>(SubRegOp)->getZExtValue(); | ||||
656 | return Subtarget->getRegisterInfo()->getSubClassWithSubReg(SuperRC, | ||||
657 | SubRegIdx); | ||||
658 | } | ||||
659 | } | ||||
660 | } | ||||
661 | |||||
662 | SDNode *AMDGPUDAGToDAGISel::glueCopyToOp(SDNode *N, SDValue NewChain, | ||||
663 | SDValue Glue) const { | ||||
664 | SmallVector <SDValue, 8> Ops; | ||||
665 | Ops.push_back(NewChain); // Replace the chain. | ||||
666 | for (unsigned i = 1, e = N->getNumOperands(); i != e; ++i) | ||||
667 | Ops.push_back(N->getOperand(i)); | ||||
668 | |||||
669 | Ops.push_back(Glue); | ||||
670 | return CurDAG->MorphNodeTo(N, N->getOpcode(), N->getVTList(), Ops); | ||||
671 | } | ||||
672 | |||||
673 | SDNode *AMDGPUDAGToDAGISel::glueCopyToM0(SDNode *N, SDValue Val) const { | ||||
674 | const SITargetLowering& Lowering = | ||||
675 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
676 | |||||
677 | assert(N->getOperand(0).getValueType() == MVT::Other && "Expected chain")((void)0); | ||||
678 | |||||
679 | SDValue M0 = Lowering.copyToM0(*CurDAG, N->getOperand(0), SDLoc(N), Val); | ||||
680 | return glueCopyToOp(N, M0, M0.getValue(1)); | ||||
681 | } | ||||
682 | |||||
683 | SDNode *AMDGPUDAGToDAGISel::glueCopyToM0LDSInit(SDNode *N) const { | ||||
684 | unsigned AS = cast<MemSDNode>(N)->getAddressSpace(); | ||||
685 | if (AS == AMDGPUAS::LOCAL_ADDRESS) { | ||||
686 | if (Subtarget->ldsRequiresM0Init()) | ||||
687 | return glueCopyToM0(N, CurDAG->getTargetConstant(-1, SDLoc(N), MVT::i32)); | ||||
688 | } else if (AS == AMDGPUAS::REGION_ADDRESS) { | ||||
689 | MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
690 | unsigned Value = MF.getInfo<SIMachineFunctionInfo>()->getGDSSize(); | ||||
691 | return | ||||
692 | glueCopyToM0(N, CurDAG->getTargetConstant(Value, SDLoc(N), MVT::i32)); | ||||
693 | } | ||||
694 | return N; | ||||
695 | } | ||||
696 | |||||
697 | MachineSDNode *AMDGPUDAGToDAGISel::buildSMovImm64(SDLoc &DL, uint64_t Imm, | ||||
698 | EVT VT) const { | ||||
699 | SDNode *Lo = CurDAG->getMachineNode( | ||||
700 | AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
701 | CurDAG->getTargetConstant(Imm & 0xFFFFFFFF, DL, MVT::i32)); | ||||
702 | SDNode *Hi = | ||||
703 | CurDAG->getMachineNode(AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
704 | CurDAG->getTargetConstant(Imm >> 32, DL, MVT::i32)); | ||||
705 | const SDValue Ops[] = { | ||||
706 | CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), | ||||
707 | SDValue(Lo, 0), CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32), | ||||
708 | SDValue(Hi, 0), CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32)}; | ||||
709 | |||||
710 | return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, VT, Ops); | ||||
711 | } | ||||
712 | |||||
713 | void AMDGPUDAGToDAGISel::SelectBuildVector(SDNode *N, unsigned RegClassID) { | ||||
714 | EVT VT = N->getValueType(0); | ||||
715 | unsigned NumVectorElts = VT.getVectorNumElements(); | ||||
716 | EVT EltVT = VT.getVectorElementType(); | ||||
717 | SDLoc DL(N); | ||||
718 | SDValue RegClass = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); | ||||
719 | |||||
720 | if (NumVectorElts == 1) { | ||||
721 | CurDAG->SelectNodeTo(N, AMDGPU::COPY_TO_REGCLASS, EltVT, N->getOperand(0), | ||||
722 | RegClass); | ||||
723 | return; | ||||
724 | } | ||||
725 | |||||
726 | assert(NumVectorElts <= 32 && "Vectors with more than 32 elements not "((void)0) | ||||
727 | "supported yet")((void)0); | ||||
728 | // 32 = Max Num Vector Elements | ||||
729 | // 2 = 2 REG_SEQUENCE operands per element (value, subreg index) | ||||
730 | // 1 = Vector Register Class | ||||
731 | SmallVector<SDValue, 32 * 2 + 1> RegSeqArgs(NumVectorElts * 2 + 1); | ||||
732 | |||||
733 | bool IsGCN = CurDAG->getSubtarget().getTargetTriple().getArch() == | ||||
734 | Triple::amdgcn; | ||||
735 | RegSeqArgs[0] = CurDAG->getTargetConstant(RegClassID, DL, MVT::i32); | ||||
736 | bool IsRegSeq = true; | ||||
737 | unsigned NOps = N->getNumOperands(); | ||||
738 | for (unsigned i = 0; i < NOps; i++) { | ||||
739 | // XXX: Why is this here? | ||||
740 | if (isa<RegisterSDNode>(N->getOperand(i))) { | ||||
741 | IsRegSeq = false; | ||||
742 | break; | ||||
743 | } | ||||
744 | unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) | ||||
745 | : R600RegisterInfo::getSubRegFromChannel(i); | ||||
746 | RegSeqArgs[1 + (2 * i)] = N->getOperand(i); | ||||
747 | RegSeqArgs[1 + (2 * i) + 1] = CurDAG->getTargetConstant(Sub, DL, MVT::i32); | ||||
748 | } | ||||
749 | if (NOps != NumVectorElts) { | ||||
750 | // Fill in the missing undef elements if this was a scalar_to_vector. | ||||
751 | assert(N->getOpcode() == ISD::SCALAR_TO_VECTOR && NOps < NumVectorElts)((void)0); | ||||
752 | MachineSDNode *ImpDef = CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, | ||||
753 | DL, EltVT); | ||||
754 | for (unsigned i = NOps; i < NumVectorElts; ++i) { | ||||
755 | unsigned Sub = IsGCN ? SIRegisterInfo::getSubRegFromChannel(i) | ||||
756 | : R600RegisterInfo::getSubRegFromChannel(i); | ||||
757 | RegSeqArgs[1 + (2 * i)] = SDValue(ImpDef, 0); | ||||
758 | RegSeqArgs[1 + (2 * i) + 1] = | ||||
759 | CurDAG->getTargetConstant(Sub, DL, MVT::i32); | ||||
760 | } | ||||
761 | } | ||||
762 | |||||
763 | if (!IsRegSeq) | ||||
764 | SelectCode(N); | ||||
765 | CurDAG->SelectNodeTo(N, AMDGPU::REG_SEQUENCE, N->getVTList(), RegSeqArgs); | ||||
766 | } | ||||
767 | |||||
768 | void AMDGPUDAGToDAGISel::Select(SDNode *N) { | ||||
769 | unsigned int Opc = N->getOpcode(); | ||||
770 | if (N->isMachineOpcode()) { | ||||
771 | N->setNodeId(-1); | ||||
772 | return; // Already selected. | ||||
773 | } | ||||
774 | |||||
775 | // isa<MemSDNode> almost works but is slightly too permissive for some DS | ||||
776 | // intrinsics. | ||||
777 | if (Opc == ISD::LOAD || Opc == ISD::STORE || isa<AtomicSDNode>(N) || | ||||
778 | (Opc == AMDGPUISD::ATOMIC_INC || Opc == AMDGPUISD::ATOMIC_DEC || | ||||
779 | Opc == ISD::ATOMIC_LOAD_FADD || | ||||
780 | Opc == AMDGPUISD::ATOMIC_LOAD_FMIN || | ||||
781 | Opc == AMDGPUISD::ATOMIC_LOAD_FMAX)) { | ||||
782 | N = glueCopyToM0LDSInit(N); | ||||
783 | SelectCode(N); | ||||
784 | return; | ||||
785 | } | ||||
786 | |||||
787 | switch (Opc) { | ||||
788 | default: | ||||
789 | break; | ||||
790 | // We are selecting i64 ADD here instead of custom lower it during | ||||
791 | // DAG legalization, so we can fold some i64 ADDs used for address | ||||
792 | // calculation into the LOAD and STORE instructions. | ||||
793 | case ISD::ADDC: | ||||
794 | case ISD::ADDE: | ||||
795 | case ISD::SUBC: | ||||
796 | case ISD::SUBE: { | ||||
797 | if (N->getValueType(0) != MVT::i64) | ||||
798 | break; | ||||
799 | |||||
800 | SelectADD_SUB_I64(N); | ||||
801 | return; | ||||
802 | } | ||||
803 | case ISD::ADDCARRY: | ||||
804 | case ISD::SUBCARRY: | ||||
805 | if (N->getValueType(0) != MVT::i32) | ||||
806 | break; | ||||
807 | |||||
808 | SelectAddcSubb(N); | ||||
809 | return; | ||||
810 | case ISD::UADDO: | ||||
811 | case ISD::USUBO: { | ||||
812 | SelectUADDO_USUBO(N); | ||||
813 | return; | ||||
814 | } | ||||
815 | case AMDGPUISD::FMUL_W_CHAIN: { | ||||
816 | SelectFMUL_W_CHAIN(N); | ||||
817 | return; | ||||
818 | } | ||||
819 | case AMDGPUISD::FMA_W_CHAIN: { | ||||
820 | SelectFMA_W_CHAIN(N); | ||||
821 | return; | ||||
822 | } | ||||
823 | |||||
824 | case ISD::SCALAR_TO_VECTOR: | ||||
825 | case ISD::BUILD_VECTOR: { | ||||
826 | EVT VT = N->getValueType(0); | ||||
827 | unsigned NumVectorElts = VT.getVectorNumElements(); | ||||
828 | if (VT.getScalarSizeInBits() == 16) { | ||||
829 | if (Opc == ISD::BUILD_VECTOR && NumVectorElts == 2) { | ||||
830 | if (SDNode *Packed = packConstantV2I16(N, *CurDAG)) { | ||||
831 | ReplaceNode(N, Packed); | ||||
832 | return; | ||||
833 | } | ||||
834 | } | ||||
835 | |||||
836 | break; | ||||
837 | } | ||||
838 | |||||
839 | assert(VT.getVectorElementType().bitsEq(MVT::i32))((void)0); | ||||
840 | unsigned RegClassID = | ||||
841 | SIRegisterInfo::getSGPRClassForBitWidth(NumVectorElts * 32)->getID(); | ||||
842 | SelectBuildVector(N, RegClassID); | ||||
843 | return; | ||||
844 | } | ||||
845 | case ISD::BUILD_PAIR: { | ||||
846 | SDValue RC, SubReg0, SubReg1; | ||||
847 | SDLoc DL(N); | ||||
848 | if (N->getValueType(0) == MVT::i128) { | ||||
849 | RC = CurDAG->getTargetConstant(AMDGPU::SGPR_128RegClassID, DL, MVT::i32); | ||||
850 | SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0_sub1, DL, MVT::i32); | ||||
851 | SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub2_sub3, DL, MVT::i32); | ||||
852 | } else if (N->getValueType(0) == MVT::i64) { | ||||
853 | RC = CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32); | ||||
854 | SubReg0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); | ||||
855 | SubReg1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); | ||||
856 | } else { | ||||
857 | llvm_unreachable("Unhandled value type for BUILD_PAIR")__builtin_unreachable(); | ||||
858 | } | ||||
859 | const SDValue Ops[] = { RC, N->getOperand(0), SubReg0, | ||||
860 | N->getOperand(1), SubReg1 }; | ||||
861 | ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, | ||||
862 | N->getValueType(0), Ops)); | ||||
863 | return; | ||||
864 | } | ||||
865 | |||||
866 | case ISD::Constant: | ||||
867 | case ISD::ConstantFP: { | ||||
868 | if (N->getValueType(0).getSizeInBits() != 64 || isInlineImmediate(N)) | ||||
869 | break; | ||||
870 | |||||
871 | uint64_t Imm; | ||||
872 | if (ConstantFPSDNode *FP = dyn_cast<ConstantFPSDNode>(N)) | ||||
873 | Imm = FP->getValueAPF().bitcastToAPInt().getZExtValue(); | ||||
874 | else { | ||||
875 | ConstantSDNode *C = cast<ConstantSDNode>(N); | ||||
876 | Imm = C->getZExtValue(); | ||||
877 | } | ||||
878 | |||||
879 | SDLoc DL(N); | ||||
880 | ReplaceNode(N, buildSMovImm64(DL, Imm, N->getValueType(0))); | ||||
881 | return; | ||||
882 | } | ||||
883 | case AMDGPUISD::BFE_I32: | ||||
884 | case AMDGPUISD::BFE_U32: { | ||||
885 | // There is a scalar version available, but unlike the vector version which | ||||
886 | // has a separate operand for the offset and width, the scalar version packs | ||||
887 | // the width and offset into a single operand. Try to move to the scalar | ||||
888 | // version if the offsets are constant, so that we can try to keep extended | ||||
889 | // loads of kernel arguments in SGPRs. | ||||
890 | |||||
891 | // TODO: Technically we could try to pattern match scalar bitshifts of | ||||
892 | // dynamic values, but it's probably not useful. | ||||
893 | ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
894 | if (!Offset) | ||||
895 | break; | ||||
896 | |||||
897 | ConstantSDNode *Width = dyn_cast<ConstantSDNode>(N->getOperand(2)); | ||||
898 | if (!Width) | ||||
899 | break; | ||||
900 | |||||
901 | bool Signed = Opc == AMDGPUISD::BFE_I32; | ||||
902 | |||||
903 | uint32_t OffsetVal = Offset->getZExtValue(); | ||||
904 | uint32_t WidthVal = Width->getZExtValue(); | ||||
905 | |||||
906 | ReplaceNode(N, getS_BFE(Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32, | ||||
907 | SDLoc(N), N->getOperand(0), OffsetVal, WidthVal)); | ||||
908 | return; | ||||
909 | } | ||||
910 | case AMDGPUISD::DIV_SCALE: { | ||||
911 | SelectDIV_SCALE(N); | ||||
912 | return; | ||||
913 | } | ||||
914 | case AMDGPUISD::MAD_I64_I32: | ||||
915 | case AMDGPUISD::MAD_U64_U32: { | ||||
916 | SelectMAD_64_32(N); | ||||
917 | return; | ||||
918 | } | ||||
919 | case ISD::CopyToReg: { | ||||
920 | const SITargetLowering& Lowering = | ||||
921 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
922 | N = Lowering.legalizeTargetIndependentNode(N, *CurDAG); | ||||
923 | break; | ||||
924 | } | ||||
925 | case ISD::AND: | ||||
926 | case ISD::SRL: | ||||
927 | case ISD::SRA: | ||||
928 | case ISD::SIGN_EXTEND_INREG: | ||||
929 | if (N->getValueType(0) != MVT::i32) | ||||
930 | break; | ||||
931 | |||||
932 | SelectS_BFE(N); | ||||
933 | return; | ||||
934 | case ISD::BRCOND: | ||||
935 | SelectBRCOND(N); | ||||
936 | return; | ||||
937 | case ISD::FMAD: | ||||
938 | case ISD::FMA: | ||||
939 | SelectFMAD_FMA(N); | ||||
940 | return; | ||||
941 | case AMDGPUISD::ATOMIC_CMP_SWAP: | ||||
942 | SelectATOMIC_CMP_SWAP(N); | ||||
943 | return; | ||||
944 | case AMDGPUISD::CVT_PKRTZ_F16_F32: | ||||
945 | case AMDGPUISD::CVT_PKNORM_I16_F32: | ||||
946 | case AMDGPUISD::CVT_PKNORM_U16_F32: | ||||
947 | case AMDGPUISD::CVT_PK_U16_U32: | ||||
948 | case AMDGPUISD::CVT_PK_I16_I32: { | ||||
949 | // Hack around using a legal type if f16 is illegal. | ||||
950 | if (N->getValueType(0) == MVT::i32) { | ||||
951 | MVT NewVT = Opc == AMDGPUISD::CVT_PKRTZ_F16_F32 ? MVT::v2f16 : MVT::v2i16; | ||||
952 | N = CurDAG->MorphNodeTo(N, N->getOpcode(), CurDAG->getVTList(NewVT), | ||||
953 | { N->getOperand(0), N->getOperand(1) }); | ||||
954 | SelectCode(N); | ||||
955 | return; | ||||
956 | } | ||||
957 | |||||
958 | break; | ||||
959 | } | ||||
960 | case ISD::INTRINSIC_W_CHAIN: { | ||||
961 | SelectINTRINSIC_W_CHAIN(N); | ||||
962 | return; | ||||
963 | } | ||||
964 | case ISD::INTRINSIC_WO_CHAIN: { | ||||
965 | SelectINTRINSIC_WO_CHAIN(N); | ||||
966 | return; | ||||
967 | } | ||||
968 | case ISD::INTRINSIC_VOID: { | ||||
969 | SelectINTRINSIC_VOID(N); | ||||
970 | return; | ||||
971 | } | ||||
972 | } | ||||
973 | |||||
974 | SelectCode(N); | ||||
975 | } | ||||
976 | |||||
977 | bool AMDGPUDAGToDAGISel::isUniformBr(const SDNode *N) const { | ||||
978 | const BasicBlock *BB = FuncInfo->MBB->getBasicBlock(); | ||||
979 | const Instruction *Term = BB->getTerminator(); | ||||
980 | return Term->getMetadata("amdgpu.uniform") || | ||||
981 | Term->getMetadata("structurizecfg.uniform"); | ||||
982 | } | ||||
983 | |||||
984 | static bool getBaseWithOffsetUsingSplitOR(SelectionDAG &DAG, SDValue Addr, | ||||
985 | SDValue &N0, SDValue &N1) { | ||||
986 | if (Addr.getValueType() == MVT::i64 && Addr.getOpcode() == ISD::BITCAST && | ||||
987 | Addr.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { | ||||
988 | // As we split 64-bit `or` earlier, it's complicated pattern to match, i.e. | ||||
989 | // (i64 (bitcast (v2i32 (build_vector | ||||
990 | // (or (extract_vector_elt V, 0), OFFSET), | ||||
991 | // (extract_vector_elt V, 1))))) | ||||
992 | SDValue Lo = Addr.getOperand(0).getOperand(0); | ||||
993 | if (Lo.getOpcode() == ISD::OR && DAG.isBaseWithConstantOffset(Lo)) { | ||||
994 | SDValue BaseLo = Lo.getOperand(0); | ||||
995 | SDValue BaseHi = Addr.getOperand(0).getOperand(1); | ||||
996 | // Check that split base (Lo and Hi) are extracted from the same one. | ||||
997 | if (BaseLo.getOpcode() == ISD::EXTRACT_VECTOR_ELT && | ||||
998 | BaseHi.getOpcode() == ISD::EXTRACT_VECTOR_ELT && | ||||
999 | BaseLo.getOperand(0) == BaseHi.getOperand(0) && | ||||
1000 | // Lo is statically extracted from index 0. | ||||
1001 | isa<ConstantSDNode>(BaseLo.getOperand(1)) && | ||||
1002 | BaseLo.getConstantOperandVal(1) == 0 && | ||||
1003 | // Hi is statically extracted from index 0. | ||||
1004 | isa<ConstantSDNode>(BaseHi.getOperand(1)) && | ||||
1005 | BaseHi.getConstantOperandVal(1) == 1) { | ||||
1006 | N0 = BaseLo.getOperand(0).getOperand(0); | ||||
1007 | N1 = Lo.getOperand(1); | ||||
1008 | return true; | ||||
1009 | } | ||||
1010 | } | ||||
1011 | } | ||||
1012 | return false; | ||||
1013 | } | ||||
1014 | |||||
1015 | bool AMDGPUDAGToDAGISel::isBaseWithConstantOffset64(SDValue Addr, SDValue &LHS, | ||||
1016 | SDValue &RHS) const { | ||||
1017 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
1018 | LHS = Addr.getOperand(0); | ||||
1019 | RHS = Addr.getOperand(1); | ||||
1020 | return true; | ||||
1021 | } | ||||
1022 | |||||
1023 | if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, LHS, RHS)) { | ||||
1024 | assert(LHS && RHS && isa<ConstantSDNode>(RHS))((void)0); | ||||
1025 | return true; | ||||
1026 | } | ||||
1027 | |||||
1028 | return false; | ||||
1029 | } | ||||
1030 | |||||
1031 | StringRef AMDGPUDAGToDAGISel::getPassName() const { | ||||
1032 | return "AMDGPU DAG->DAG Pattern Instruction Selection"; | ||||
1033 | } | ||||
1034 | |||||
1035 | //===----------------------------------------------------------------------===// | ||||
1036 | // Complex Patterns | ||||
1037 | //===----------------------------------------------------------------------===// | ||||
1038 | |||||
1039 | bool AMDGPUDAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||||
1040 | SDValue &Offset) { | ||||
1041 | return false; | ||||
1042 | } | ||||
1043 | |||||
1044 | bool AMDGPUDAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, | ||||
1045 | SDValue &Offset) { | ||||
1046 | ConstantSDNode *C; | ||||
1047 | SDLoc DL(Addr); | ||||
1048 | |||||
1049 | if ((C = dyn_cast<ConstantSDNode>(Addr))) { | ||||
1050 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
1051 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
1052 | } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && | ||||
1053 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { | ||||
1054 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
1055 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
1056 | } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && | ||||
1057 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { | ||||
1058 | Base = Addr.getOperand(0); | ||||
1059 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
1060 | } else { | ||||
1061 | Base = Addr; | ||||
1062 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1063 | } | ||||
1064 | |||||
1065 | return true; | ||||
1066 | } | ||||
1067 | |||||
1068 | SDValue AMDGPUDAGToDAGISel::getMaterializedScalarImm32(int64_t Val, | ||||
1069 | const SDLoc &DL) const { | ||||
1070 | SDNode *Mov = CurDAG->getMachineNode( | ||||
1071 | AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
1072 | CurDAG->getTargetConstant(Val, DL, MVT::i32)); | ||||
1073 | return SDValue(Mov, 0); | ||||
1074 | } | ||||
1075 | |||||
1076 | // FIXME: Should only handle addcarry/subcarry | ||||
1077 | void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) { | ||||
1078 | SDLoc DL(N); | ||||
1079 | SDValue LHS = N->getOperand(0); | ||||
1080 | SDValue RHS = N->getOperand(1); | ||||
1081 | |||||
1082 | unsigned Opcode = N->getOpcode(); | ||||
1083 | bool ConsumeCarry = (Opcode == ISD::ADDE || Opcode == ISD::SUBE); | ||||
1084 | bool ProduceCarry = | ||||
1085 | ConsumeCarry || Opcode == ISD::ADDC || Opcode == ISD::SUBC; | ||||
1086 | bool IsAdd = Opcode == ISD::ADD || Opcode == ISD::ADDC || Opcode == ISD::ADDE; | ||||
1087 | |||||
1088 | SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); | ||||
1089 | SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); | ||||
1090 | |||||
1091 | SDNode *Lo0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
1092 | DL, MVT::i32, LHS, Sub0); | ||||
1093 | SDNode *Hi0 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
1094 | DL, MVT::i32, LHS, Sub1); | ||||
1095 | |||||
1096 | SDNode *Lo1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
1097 | DL, MVT::i32, RHS, Sub0); | ||||
1098 | SDNode *Hi1 = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
1099 | DL, MVT::i32, RHS, Sub1); | ||||
1100 | |||||
1101 | SDVTList VTList = CurDAG->getVTList(MVT::i32, MVT::Glue); | ||||
1102 | |||||
1103 | static const unsigned OpcMap[2][2][2] = { | ||||
1104 | {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32}, | ||||
1105 | {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}}, | ||||
1106 | {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32}, | ||||
1107 | {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}}; | ||||
1108 | |||||
1109 | unsigned Opc = OpcMap[0][N->isDivergent()][IsAdd]; | ||||
1110 | unsigned CarryOpc = OpcMap[1][N->isDivergent()][IsAdd]; | ||||
1111 | |||||
1112 | SDNode *AddLo; | ||||
1113 | if (!ConsumeCarry) { | ||||
1114 | SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0) }; | ||||
1115 | AddLo = CurDAG->getMachineNode(Opc, DL, VTList, Args); | ||||
1116 | } else { | ||||
1117 | SDValue Args[] = { SDValue(Lo0, 0), SDValue(Lo1, 0), N->getOperand(2) }; | ||||
1118 | AddLo = CurDAG->getMachineNode(CarryOpc, DL, VTList, Args); | ||||
1119 | } | ||||
1120 | SDValue AddHiArgs[] = { | ||||
1121 | SDValue(Hi0, 0), | ||||
1122 | SDValue(Hi1, 0), | ||||
1123 | SDValue(AddLo, 1) | ||||
1124 | }; | ||||
1125 | SDNode *AddHi = CurDAG->getMachineNode(CarryOpc, DL, VTList, AddHiArgs); | ||||
1126 | |||||
1127 | SDValue RegSequenceArgs[] = { | ||||
1128 | CurDAG->getTargetConstant(AMDGPU::SReg_64RegClassID, DL, MVT::i32), | ||||
1129 | SDValue(AddLo,0), | ||||
1130 | Sub0, | ||||
1131 | SDValue(AddHi,0), | ||||
1132 | Sub1, | ||||
1133 | }; | ||||
1134 | SDNode *RegSequence = CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, | ||||
1135 | MVT::i64, RegSequenceArgs); | ||||
1136 | |||||
1137 | if (ProduceCarry) { | ||||
1138 | // Replace the carry-use | ||||
1139 | ReplaceUses(SDValue(N, 1), SDValue(AddHi, 1)); | ||||
1140 | } | ||||
1141 | |||||
1142 | // Replace the remaining uses. | ||||
1143 | ReplaceNode(N, RegSequence); | ||||
1144 | } | ||||
1145 | |||||
1146 | void AMDGPUDAGToDAGISel::SelectAddcSubb(SDNode *N) { | ||||
1147 | SDLoc DL(N); | ||||
1148 | SDValue LHS = N->getOperand(0); | ||||
1149 | SDValue RHS = N->getOperand(1); | ||||
1150 | SDValue CI = N->getOperand(2); | ||||
1151 | |||||
1152 | if (N->isDivergent()) { | ||||
1153 | unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::V_ADDC_U32_e64 | ||||
1154 | : AMDGPU::V_SUBB_U32_e64; | ||||
1155 | CurDAG->SelectNodeTo( | ||||
1156 | N, Opc, N->getVTList(), | ||||
1157 | {LHS, RHS, CI, | ||||
1158 | CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); | ||||
1159 | } else { | ||||
1160 | unsigned Opc = N->getOpcode() == ISD::ADDCARRY ? AMDGPU::S_ADD_CO_PSEUDO | ||||
1161 | : AMDGPU::S_SUB_CO_PSEUDO; | ||||
1162 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), {LHS, RHS, CI}); | ||||
1163 | } | ||||
1164 | } | ||||
1165 | |||||
1166 | void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) { | ||||
1167 | // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned | ||||
1168 | // carry out despite the _i32 name. These were renamed in VI to _U32. | ||||
1169 | // FIXME: We should probably rename the opcodes here. | ||||
1170 | bool IsAdd = N->getOpcode() == ISD::UADDO; | ||||
1171 | bool IsVALU = N->isDivergent(); | ||||
1172 | |||||
1173 | for (SDNode::use_iterator UI = N->use_begin(), E = N->use_end(); UI != E; | ||||
1174 | ++UI) | ||||
1175 | if (UI.getUse().getResNo() == 1) { | ||||
1176 | if ((IsAdd && (UI->getOpcode() != ISD::ADDCARRY)) || | ||||
1177 | (!IsAdd && (UI->getOpcode() != ISD::SUBCARRY))) { | ||||
1178 | IsVALU = true; | ||||
1179 | break; | ||||
1180 | } | ||||
1181 | } | ||||
1182 | |||||
1183 | if (IsVALU) { | ||||
1184 | unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64; | ||||
1185 | |||||
1186 | CurDAG->SelectNodeTo( | ||||
1187 | N, Opc, N->getVTList(), | ||||
1188 | {N->getOperand(0), N->getOperand(1), | ||||
1189 | CurDAG->getTargetConstant(0, {}, MVT::i1) /*clamp bit*/}); | ||||
1190 | } else { | ||||
1191 | unsigned Opc = N->getOpcode() == ISD::UADDO ? AMDGPU::S_UADDO_PSEUDO | ||||
1192 | : AMDGPU::S_USUBO_PSEUDO; | ||||
1193 | |||||
1194 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), | ||||
1195 | {N->getOperand(0), N->getOperand(1)}); | ||||
1196 | } | ||||
1197 | } | ||||
1198 | |||||
1199 | void AMDGPUDAGToDAGISel::SelectFMA_W_CHAIN(SDNode *N) { | ||||
1200 | SDLoc SL(N); | ||||
1201 | // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, omod | ||||
1202 | SDValue Ops[10]; | ||||
1203 | |||||
1204 | SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[6], Ops[7]); | ||||
1205 | SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); | ||||
1206 | SelectVOP3Mods(N->getOperand(3), Ops[5], Ops[4]); | ||||
1207 | Ops[8] = N->getOperand(0); | ||||
1208 | Ops[9] = N->getOperand(4); | ||||
1209 | |||||
1210 | CurDAG->SelectNodeTo(N, AMDGPU::V_FMA_F32_e64, N->getVTList(), Ops); | ||||
1211 | } | ||||
1212 | |||||
1213 | void AMDGPUDAGToDAGISel::SelectFMUL_W_CHAIN(SDNode *N) { | ||||
1214 | SDLoc SL(N); | ||||
1215 | // src0_modifiers, src0, src1_modifiers, src1, clamp, omod | ||||
1216 | SDValue Ops[8]; | ||||
1217 | |||||
1218 | SelectVOP3Mods0(N->getOperand(1), Ops[1], Ops[0], Ops[4], Ops[5]); | ||||
1219 | SelectVOP3Mods(N->getOperand(2), Ops[3], Ops[2]); | ||||
1220 | Ops[6] = N->getOperand(0); | ||||
1221 | Ops[7] = N->getOperand(3); | ||||
1222 | |||||
1223 | CurDAG->SelectNodeTo(N, AMDGPU::V_MUL_F32_e64, N->getVTList(), Ops); | ||||
1224 | } | ||||
1225 | |||||
1226 | // We need to handle this here because tablegen doesn't support matching | ||||
1227 | // instructions with multiple outputs. | ||||
1228 | void AMDGPUDAGToDAGISel::SelectDIV_SCALE(SDNode *N) { | ||||
1229 | SDLoc SL(N); | ||||
1230 | EVT VT = N->getValueType(0); | ||||
1231 | |||||
1232 | assert(VT == MVT::f32 || VT == MVT::f64)((void)0); | ||||
1233 | |||||
1234 | unsigned Opc | ||||
1235 | = (VT == MVT::f64) ? AMDGPU::V_DIV_SCALE_F64_e64 : AMDGPU::V_DIV_SCALE_F32_e64; | ||||
1236 | |||||
1237 | // src0_modifiers, src0, src1_modifiers, src1, src2_modifiers, src2, clamp, | ||||
1238 | // omod | ||||
1239 | SDValue Ops[8]; | ||||
1240 | SelectVOP3BMods0(N->getOperand(0), Ops[1], Ops[0], Ops[6], Ops[7]); | ||||
1241 | SelectVOP3BMods(N->getOperand(1), Ops[3], Ops[2]); | ||||
1242 | SelectVOP3BMods(N->getOperand(2), Ops[5], Ops[4]); | ||||
1243 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
1244 | } | ||||
1245 | |||||
1246 | // We need to handle this here because tablegen doesn't support matching | ||||
1247 | // instructions with multiple outputs. | ||||
1248 | void AMDGPUDAGToDAGISel::SelectMAD_64_32(SDNode *N) { | ||||
1249 | SDLoc SL(N); | ||||
1250 | bool Signed = N->getOpcode() == AMDGPUISD::MAD_I64_I32; | ||||
1251 | unsigned Opc = Signed ? AMDGPU::V_MAD_I64_I32_e64 : AMDGPU::V_MAD_U64_U32_e64; | ||||
1252 | |||||
1253 | SDValue Clamp = CurDAG->getTargetConstant(0, SL, MVT::i1); | ||||
1254 | SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), | ||||
1255 | Clamp }; | ||||
1256 | CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
1257 | } | ||||
1258 | |||||
1259 | bool AMDGPUDAGToDAGISel::isDSOffsetLegal(SDValue Base, unsigned Offset) const { | ||||
1260 | if (!isUInt<16>(Offset)) | ||||
1261 | return false; | ||||
1262 | |||||
1263 | if (!Base || Subtarget->hasUsableDSOffset() || | ||||
1264 | Subtarget->unsafeDSOffsetFoldingEnabled()) | ||||
1265 | return true; | ||||
1266 | |||||
1267 | // On Southern Islands instruction with a negative base value and an offset | ||||
1268 | // don't seem to work. | ||||
1269 | return CurDAG->SignBitIsZero(Base); | ||||
1270 | } | ||||
1271 | |||||
1272 | bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base, | ||||
1273 | SDValue &Offset) const { | ||||
1274 | SDLoc DL(Addr); | ||||
1275 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
1276 | SDValue N0 = Addr.getOperand(0); | ||||
1277 | SDValue N1 = Addr.getOperand(1); | ||||
1278 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
1279 | if (isDSOffsetLegal(N0, C1->getSExtValue())) { | ||||
1280 | // (add n0, c0) | ||||
1281 | Base = N0; | ||||
1282 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); | ||||
1283 | return true; | ||||
1284 | } | ||||
1285 | } else if (Addr.getOpcode() == ISD::SUB) { | ||||
1286 | // sub C, x -> add (sub 0, x), C | ||||
1287 | if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { | ||||
1288 | int64_t ByteOffset = C->getSExtValue(); | ||||
1289 | if (isDSOffsetLegal(SDValue(), ByteOffset)) { | ||||
1290 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1291 | |||||
1292 | // XXX - This is kind of hacky. Create a dummy sub node so we can check | ||||
1293 | // the known bits in isDSOffsetLegal. We need to emit the selected node | ||||
1294 | // here, so this is thrown away. | ||||
1295 | SDValue Sub = CurDAG->getNode(ISD::SUB, DL, MVT::i32, | ||||
1296 | Zero, Addr.getOperand(1)); | ||||
1297 | |||||
1298 | if (isDSOffsetLegal(Sub, ByteOffset)) { | ||||
1299 | SmallVector<SDValue, 3> Opnds; | ||||
1300 | Opnds.push_back(Zero); | ||||
1301 | Opnds.push_back(Addr.getOperand(1)); | ||||
1302 | |||||
1303 | // FIXME: Select to VOP3 version for with-carry. | ||||
1304 | unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32; | ||||
1305 | if (Subtarget->hasAddNoCarry()) { | ||||
1306 | SubOp = AMDGPU::V_SUB_U32_e64; | ||||
1307 | Opnds.push_back( | ||||
1308 | CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit | ||||
1309 | } | ||||
1310 | |||||
1311 | MachineSDNode *MachineSub = | ||||
1312 | CurDAG->getMachineNode(SubOp, DL, MVT::i32, Opnds); | ||||
1313 | |||||
1314 | Base = SDValue(MachineSub, 0); | ||||
1315 | Offset = CurDAG->getTargetConstant(ByteOffset, DL, MVT::i16); | ||||
1316 | return true; | ||||
1317 | } | ||||
1318 | } | ||||
1319 | } | ||||
1320 | } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { | ||||
1321 | // If we have a constant address, prefer to put the constant into the | ||||
1322 | // offset. This can save moves to load the constant address since multiple | ||||
1323 | // operations can share the zero base address register, and enables merging | ||||
1324 | // into read2 / write2 instructions. | ||||
1325 | |||||
1326 | SDLoc DL(Addr); | ||||
1327 | |||||
1328 | if (isDSOffsetLegal(SDValue(), CAddr->getZExtValue())) { | ||||
1329 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1330 | MachineSDNode *MovZero = CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, | ||||
1331 | DL, MVT::i32, Zero); | ||||
1332 | Base = SDValue(MovZero, 0); | ||||
1333 | Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); | ||||
1334 | return true; | ||||
1335 | } | ||||
1336 | } | ||||
1337 | |||||
1338 | // default case | ||||
1339 | Base = Addr; | ||||
1340 | Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i16); | ||||
1341 | return true; | ||||
1342 | } | ||||
1343 | |||||
1344 | bool AMDGPUDAGToDAGISel::isDSOffset2Legal(SDValue Base, unsigned Offset0, | ||||
1345 | unsigned Offset1, | ||||
1346 | unsigned Size) const { | ||||
1347 | if (Offset0 % Size != 0 || Offset1 % Size != 0) | ||||
1348 | return false; | ||||
1349 | if (!isUInt<8>(Offset0 / Size) || !isUInt<8>(Offset1 / Size)) | ||||
1350 | return false; | ||||
1351 | |||||
1352 | if (!Base || Subtarget->hasUsableDSOffset() || | ||||
1353 | Subtarget->unsafeDSOffsetFoldingEnabled()) | ||||
1354 | return true; | ||||
1355 | |||||
1356 | // On Southern Islands instruction with a negative base value and an offset | ||||
1357 | // don't seem to work. | ||||
1358 | return CurDAG->SignBitIsZero(Base); | ||||
1359 | } | ||||
1360 | |||||
1361 | // TODO: If offset is too big, put low 16-bit into offset. | ||||
1362 | bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base, | ||||
1363 | SDValue &Offset0, | ||||
1364 | SDValue &Offset1) const { | ||||
1365 | return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 4); | ||||
1366 | } | ||||
1367 | |||||
1368 | bool AMDGPUDAGToDAGISel::SelectDS128Bit8ByteAligned(SDValue Addr, SDValue &Base, | ||||
1369 | SDValue &Offset0, | ||||
1370 | SDValue &Offset1) const { | ||||
1371 | return SelectDSReadWrite2(Addr, Base, Offset0, Offset1, 8); | ||||
1372 | } | ||||
1373 | |||||
1374 | bool AMDGPUDAGToDAGISel::SelectDSReadWrite2(SDValue Addr, SDValue &Base, | ||||
1375 | SDValue &Offset0, SDValue &Offset1, | ||||
1376 | unsigned Size) const { | ||||
1377 | SDLoc DL(Addr); | ||||
1378 | |||||
1379 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
1380 | SDValue N0 = Addr.getOperand(0); | ||||
1381 | SDValue N1 = Addr.getOperand(1); | ||||
1382 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
1383 | unsigned OffsetValue0 = C1->getZExtValue(); | ||||
1384 | unsigned OffsetValue1 = OffsetValue0 + Size; | ||||
1385 | |||||
1386 | // (add n0, c0) | ||||
1387 | if (isDSOffset2Legal(N0, OffsetValue0, OffsetValue1, Size)) { | ||||
1388 | Base = N0; | ||||
1389 | Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); | ||||
1390 | Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); | ||||
1391 | return true; | ||||
1392 | } | ||||
1393 | } else if (Addr.getOpcode() == ISD::SUB) { | ||||
1394 | // sub C, x -> add (sub 0, x), C | ||||
1395 | if (const ConstantSDNode *C = | ||||
1396 | dyn_cast<ConstantSDNode>(Addr.getOperand(0))) { | ||||
1397 | unsigned OffsetValue0 = C->getZExtValue(); | ||||
1398 | unsigned OffsetValue1 = OffsetValue0 + Size; | ||||
1399 | |||||
1400 | if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) { | ||||
1401 | SDLoc DL(Addr); | ||||
1402 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1403 | |||||
1404 | // XXX - This is kind of hacky. Create a dummy sub node so we can check | ||||
1405 | // the known bits in isDSOffsetLegal. We need to emit the selected node | ||||
1406 | // here, so this is thrown away. | ||||
1407 | SDValue Sub = | ||||
1408 | CurDAG->getNode(ISD::SUB, DL, MVT::i32, Zero, Addr.getOperand(1)); | ||||
1409 | |||||
1410 | if (isDSOffset2Legal(Sub, OffsetValue0, OffsetValue1, Size)) { | ||||
1411 | SmallVector<SDValue, 3> Opnds; | ||||
1412 | Opnds.push_back(Zero); | ||||
1413 | Opnds.push_back(Addr.getOperand(1)); | ||||
1414 | unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32; | ||||
1415 | if (Subtarget->hasAddNoCarry()) { | ||||
1416 | SubOp = AMDGPU::V_SUB_U32_e64; | ||||
1417 | Opnds.push_back( | ||||
1418 | CurDAG->getTargetConstant(0, {}, MVT::i1)); // clamp bit | ||||
1419 | } | ||||
1420 | |||||
1421 | MachineSDNode *MachineSub = CurDAG->getMachineNode( | ||||
1422 | SubOp, DL, MVT::getIntegerVT(Size * 8), Opnds); | ||||
1423 | |||||
1424 | Base = SDValue(MachineSub, 0); | ||||
1425 | Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); | ||||
1426 | Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); | ||||
1427 | return true; | ||||
1428 | } | ||||
1429 | } | ||||
1430 | } | ||||
1431 | } else if (const ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { | ||||
1432 | unsigned OffsetValue0 = CAddr->getZExtValue(); | ||||
1433 | unsigned OffsetValue1 = OffsetValue0 + Size; | ||||
1434 | |||||
1435 | if (isDSOffset2Legal(SDValue(), OffsetValue0, OffsetValue1, Size)) { | ||||
1436 | SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1437 | MachineSDNode *MovZero = | ||||
1438 | CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, DL, MVT::i32, Zero); | ||||
1439 | Base = SDValue(MovZero, 0); | ||||
1440 | Offset0 = CurDAG->getTargetConstant(OffsetValue0 / Size, DL, MVT::i8); | ||||
1441 | Offset1 = CurDAG->getTargetConstant(OffsetValue1 / Size, DL, MVT::i8); | ||||
1442 | return true; | ||||
1443 | } | ||||
1444 | } | ||||
1445 | |||||
1446 | // default case | ||||
1447 | |||||
1448 | Base = Addr; | ||||
1449 | Offset0 = CurDAG->getTargetConstant(0, DL, MVT::i8); | ||||
1450 | Offset1 = CurDAG->getTargetConstant(1, DL, MVT::i8); | ||||
1451 | return true; | ||||
1452 | } | ||||
1453 | |||||
1454 | bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr, SDValue &VAddr, | ||||
1455 | SDValue &SOffset, SDValue &Offset, | ||||
1456 | SDValue &Offen, SDValue &Idxen, | ||||
1457 | SDValue &Addr64) const { | ||||
1458 | // Subtarget prefers to use flat instruction | ||||
1459 | // FIXME: This should be a pattern predicate and not reach here | ||||
1460 | if (Subtarget->useFlatForGlobal()) | ||||
1461 | return false; | ||||
1462 | |||||
1463 | SDLoc DL(Addr); | ||||
1464 | |||||
1465 | Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
1466 | Offen = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
1467 | Addr64 = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
1468 | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1469 | |||||
1470 | ConstantSDNode *C1 = nullptr; | ||||
1471 | SDValue N0 = Addr; | ||||
1472 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
1473 | C1 = cast<ConstantSDNode>(Addr.getOperand(1)); | ||||
1474 | if (isUInt<32>(C1->getZExtValue())) | ||||
1475 | N0 = Addr.getOperand(0); | ||||
1476 | else | ||||
1477 | C1 = nullptr; | ||||
1478 | } | ||||
1479 | |||||
1480 | if (N0.getOpcode() == ISD::ADD) { | ||||
1481 | // (add N2, N3) -> addr64, or | ||||
1482 | // (add (add N2, N3), C1) -> addr64 | ||||
1483 | SDValue N2 = N0.getOperand(0); | ||||
1484 | SDValue N3 = N0.getOperand(1); | ||||
1485 | Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); | ||||
1486 | |||||
1487 | if (N2->isDivergent()) { | ||||
1488 | if (N3->isDivergent()) { | ||||
1489 | // Both N2 and N3 are divergent. Use N0 (the result of the add) as the | ||||
1490 | // addr64, and construct the resource from a 0 address. | ||||
1491 | Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); | ||||
1492 | VAddr = N0; | ||||
1493 | } else { | ||||
1494 | // N2 is divergent, N3 is not. | ||||
1495 | Ptr = N3; | ||||
1496 | VAddr = N2; | ||||
1497 | } | ||||
1498 | } else { | ||||
1499 | // N2 is not divergent. | ||||
1500 | Ptr = N2; | ||||
1501 | VAddr = N3; | ||||
1502 | } | ||||
1503 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
1504 | } else if (N0->isDivergent()) { | ||||
1505 | // N0 is divergent. Use it as the addr64, and construct the resource from a | ||||
1506 | // 0 address. | ||||
1507 | Ptr = SDValue(buildSMovImm64(DL, 0, MVT::v2i32), 0); | ||||
1508 | VAddr = N0; | ||||
1509 | Addr64 = CurDAG->getTargetConstant(1, DL, MVT::i1); | ||||
1510 | } else { | ||||
1511 | // N0 -> offset, or | ||||
1512 | // (N0 + C1) -> offset | ||||
1513 | VAddr = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1514 | Ptr = N0; | ||||
1515 | } | ||||
1516 | |||||
1517 | if (!C1) { | ||||
1518 | // No offset. | ||||
1519 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
1520 | return true; | ||||
1521 | } | ||||
1522 | |||||
1523 | if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue())) { | ||||
1524 | // Legal offset for instruction. | ||||
1525 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); | ||||
1526 | return true; | ||||
1527 | } | ||||
1528 | |||||
1529 | // Illegal offset, store it in soffset. | ||||
1530 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
1531 | SOffset = | ||||
1532 | SDValue(CurDAG->getMachineNode( | ||||
1533 | AMDGPU::S_MOV_B32, DL, MVT::i32, | ||||
1534 | CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32)), | ||||
1535 | 0); | ||||
1536 | return true; | ||||
1537 | } | ||||
1538 | |||||
1539 | bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, | ||||
1540 | SDValue &VAddr, SDValue &SOffset, | ||||
1541 | SDValue &Offset) const { | ||||
1542 | SDValue Ptr, Offen, Idxen, Addr64; | ||||
1543 | |||||
1544 | // addr64 bit was removed for volcanic islands. | ||||
1545 | // FIXME: This should be a pattern predicate and not reach here | ||||
1546 | if (!Subtarget->hasAddr64()) | ||||
1547 | return false; | ||||
1548 | |||||
1549 | if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64)) | ||||
1550 | return false; | ||||
1551 | |||||
1552 | ConstantSDNode *C = cast<ConstantSDNode>(Addr64); | ||||
1553 | if (C->getSExtValue()) { | ||||
1554 | SDLoc DL(Addr); | ||||
1555 | |||||
1556 | const SITargetLowering& Lowering = | ||||
1557 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
1558 | |||||
1559 | SRsrc = SDValue(Lowering.wrapAddr64Rsrc(*CurDAG, DL, Ptr), 0); | ||||
1560 | return true; | ||||
1561 | } | ||||
1562 | |||||
1563 | return false; | ||||
1564 | } | ||||
1565 | |||||
1566 | std::pair<SDValue, SDValue> AMDGPUDAGToDAGISel::foldFrameIndex(SDValue N) const { | ||||
1567 | SDLoc DL(N); | ||||
1568 | |||||
1569 | auto *FI = dyn_cast<FrameIndexSDNode>(N); | ||||
1570 | SDValue TFI = | ||||
1571 | FI ? CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)) : N; | ||||
1572 | |||||
1573 | // We rebase the base address into an absolute stack address and hence | ||||
1574 | // use constant 0 for soffset. This value must be retained until | ||||
1575 | // frame elimination and eliminateFrameIndex will choose the appropriate | ||||
1576 | // frame register if need be. | ||||
1577 | return std::make_pair(TFI, CurDAG->getTargetConstant(0, DL, MVT::i32)); | ||||
1578 | } | ||||
1579 | |||||
1580 | bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffen(SDNode *Parent, | ||||
1581 | SDValue Addr, SDValue &Rsrc, | ||||
1582 | SDValue &VAddr, SDValue &SOffset, | ||||
1583 | SDValue &ImmOffset) const { | ||||
1584 | |||||
1585 | SDLoc DL(Addr); | ||||
1586 | MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
1587 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); | ||||
1588 | |||||
1589 | Rsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); | ||||
1590 | |||||
1591 | if (ConstantSDNode *CAddr = dyn_cast<ConstantSDNode>(Addr)) { | ||||
1592 | int64_t Imm = CAddr->getSExtValue(); | ||||
1593 | const int64_t NullPtr = | ||||
1594 | AMDGPUTargetMachine::getNullPointerValue(AMDGPUAS::PRIVATE_ADDRESS); | ||||
1595 | // Don't fold null pointer. | ||||
1596 | if (Imm != NullPtr) { | ||||
1597 | SDValue HighBits = CurDAG->getTargetConstant(Imm & ~4095, DL, MVT::i32); | ||||
1598 | MachineSDNode *MovHighBits = CurDAG->getMachineNode( | ||||
1599 | AMDGPU::V_MOV_B32_e32, DL, MVT::i32, HighBits); | ||||
1600 | VAddr = SDValue(MovHighBits, 0); | ||||
1601 | |||||
1602 | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1603 | ImmOffset = CurDAG->getTargetConstant(Imm & 4095, DL, MVT::i16); | ||||
1604 | return true; | ||||
1605 | } | ||||
1606 | } | ||||
1607 | |||||
1608 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
1609 | // (add n0, c1) | ||||
1610 | |||||
1611 | SDValue N0 = Addr.getOperand(0); | ||||
1612 | SDValue N1 = Addr.getOperand(1); | ||||
1613 | |||||
1614 | // Offsets in vaddr must be positive if range checking is enabled. | ||||
1615 | // | ||||
1616 | // The total computation of vaddr + soffset + offset must not overflow. If | ||||
1617 | // vaddr is negative, even if offset is 0 the sgpr offset add will end up | ||||
1618 | // overflowing. | ||||
1619 | // | ||||
1620 | // Prior to gfx9, MUBUF instructions with the vaddr offset enabled would | ||||
1621 | // always perform a range check. If a negative vaddr base index was used, | ||||
1622 | // this would fail the range check. The overall address computation would | ||||
1623 | // compute a valid address, but this doesn't happen due to the range | ||||
1624 | // check. For out-of-bounds MUBUF loads, a 0 is returned. | ||||
1625 | // | ||||
1626 | // Therefore it should be safe to fold any VGPR offset on gfx9 into the | ||||
1627 | // MUBUF vaddr, but not on older subtargets which can only do this if the | ||||
1628 | // sign bit is known 0. | ||||
1629 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
1630 | if (SIInstrInfo::isLegalMUBUFImmOffset(C1->getZExtValue()) && | ||||
1631 | (!Subtarget->privateMemoryResourceIsRangeChecked() || | ||||
1632 | CurDAG->SignBitIsZero(N0))) { | ||||
1633 | std::tie(VAddr, SOffset) = foldFrameIndex(N0); | ||||
1634 | ImmOffset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i16); | ||||
1635 | return true; | ||||
1636 | } | ||||
1637 | } | ||||
1638 | |||||
1639 | // (node) | ||||
1640 | std::tie(VAddr, SOffset) = foldFrameIndex(Addr); | ||||
1641 | ImmOffset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
1642 | return true; | ||||
1643 | } | ||||
1644 | |||||
1645 | static bool IsCopyFromSGPR(const SIRegisterInfo &TRI, SDValue Val) { | ||||
1646 | if (Val.getOpcode() != ISD::CopyFromReg) | ||||
1647 | return false; | ||||
1648 | auto RC = | ||||
1649 | TRI.getPhysRegClass(cast<RegisterSDNode>(Val.getOperand(1))->getReg()); | ||||
1650 | return RC && TRI.isSGPRClass(RC); | ||||
1651 | } | ||||
1652 | |||||
1653 | bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent, | ||||
1654 | SDValue Addr, | ||||
1655 | SDValue &SRsrc, | ||||
1656 | SDValue &SOffset, | ||||
1657 | SDValue &Offset) const { | ||||
1658 | const SIRegisterInfo *TRI = | ||||
1659 | static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); | ||||
1660 | MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
1661 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); | ||||
1662 | SDLoc DL(Addr); | ||||
1663 | |||||
1664 | // CopyFromReg <sgpr> | ||||
1665 | if (IsCopyFromSGPR(*TRI, Addr)) { | ||||
1666 | SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); | ||||
1667 | SOffset = Addr; | ||||
1668 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i16); | ||||
1669 | return true; | ||||
1670 | } | ||||
1671 | |||||
1672 | ConstantSDNode *CAddr; | ||||
1673 | if (Addr.getOpcode() == ISD::ADD) { | ||||
1674 | // Add (CopyFromReg <sgpr>) <constant> | ||||
1675 | CAddr = dyn_cast<ConstantSDNode>(Addr.getOperand(1)); | ||||
1676 | if (!CAddr || !SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) | ||||
1677 | return false; | ||||
1678 | if (!IsCopyFromSGPR(*TRI, Addr.getOperand(0))) | ||||
1679 | return false; | ||||
1680 | |||||
1681 | SOffset = Addr.getOperand(0); | ||||
1682 | } else if ((CAddr = dyn_cast<ConstantSDNode>(Addr)) && | ||||
1683 | SIInstrInfo::isLegalMUBUFImmOffset(CAddr->getZExtValue())) { | ||||
1684 | // <constant> | ||||
1685 | SOffset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
1686 | } else { | ||||
1687 | return false; | ||||
1688 | } | ||||
1689 | |||||
1690 | SRsrc = CurDAG->getRegister(Info->getScratchRSrcReg(), MVT::v4i32); | ||||
1691 | |||||
1692 | Offset = CurDAG->getTargetConstant(CAddr->getZExtValue(), DL, MVT::i16); | ||||
1693 | return true; | ||||
1694 | } | ||||
1695 | |||||
1696 | bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, | ||||
1697 | SDValue &SOffset, SDValue &Offset | ||||
1698 | ) const { | ||||
1699 | SDValue Ptr, VAddr, Offen, Idxen, Addr64; | ||||
1700 | const SIInstrInfo *TII = | ||||
1701 | static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); | ||||
1702 | |||||
1703 | if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64)) | ||||
1704 | return false; | ||||
1705 | |||||
1706 | if (!cast<ConstantSDNode>(Offen)->getSExtValue() && | ||||
1707 | !cast<ConstantSDNode>(Idxen)->getSExtValue() && | ||||
1708 | !cast<ConstantSDNode>(Addr64)->getSExtValue()) { | ||||
1709 | uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | | ||||
1710 | APInt::getAllOnesValue(32).getZExtValue(); // Size | ||||
1711 | SDLoc DL(Addr); | ||||
1712 | |||||
1713 | const SITargetLowering& Lowering = | ||||
1714 | *static_cast<const SITargetLowering*>(getTargetLowering()); | ||||
1715 | |||||
1716 | SRsrc = SDValue(Lowering.buildRSRC(*CurDAG, DL, Ptr, 0, Rsrc), 0); | ||||
1717 | return true; | ||||
1718 | } | ||||
1719 | return false; | ||||
1720 | } | ||||
1721 | |||||
1722 | // Find a load or store from corresponding pattern root. | ||||
1723 | // Roots may be build_vector, bitconvert or their combinations. | ||||
1724 | static MemSDNode* findMemSDNode(SDNode *N) { | ||||
1725 | N = AMDGPUTargetLowering::stripBitcast(SDValue(N,0)).getNode(); | ||||
1726 | if (MemSDNode *MN = dyn_cast<MemSDNode>(N)) | ||||
1727 | return MN; | ||||
1728 | assert(isa<BuildVectorSDNode>(N))((void)0); | ||||
1729 | for (SDValue V : N->op_values()) | ||||
1730 | if (MemSDNode *MN = | ||||
1731 | dyn_cast<MemSDNode>(AMDGPUTargetLowering::stripBitcast(V))) | ||||
1732 | return MN; | ||||
1733 | llvm_unreachable("cannot find MemSDNode in the pattern!")__builtin_unreachable(); | ||||
1734 | } | ||||
1735 | |||||
1736 | bool AMDGPUDAGToDAGISel::SelectFlatOffsetImpl(SDNode *N, SDValue Addr, | ||||
1737 | SDValue &VAddr, SDValue &Offset, | ||||
1738 | uint64_t FlatVariant) const { | ||||
1739 | int64_t OffsetVal = 0; | ||||
1740 | |||||
1741 | unsigned AS = findMemSDNode(N)->getAddressSpace(); | ||||
1742 | |||||
1743 | bool CanHaveFlatSegmentOffsetBug = | ||||
1744 | Subtarget->hasFlatSegmentOffsetBug() && | ||||
1745 | FlatVariant == SIInstrFlags::FLAT && | ||||
1746 | (AS == AMDGPUAS::FLAT_ADDRESS || AS == AMDGPUAS::GLOBAL_ADDRESS); | ||||
1747 | |||||
1748 | if (Subtarget->hasFlatInstOffsets() && !CanHaveFlatSegmentOffsetBug) { | ||||
1749 | SDValue N0, N1; | ||||
1750 | if (isBaseWithConstantOffset64(Addr, N0, N1)) { | ||||
1751 | int64_t COffsetVal = cast<ConstantSDNode>(N1)->getSExtValue(); | ||||
1752 | |||||
1753 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
1754 | if (TII->isLegalFLATOffset(COffsetVal, AS, FlatVariant)) { | ||||
1755 | Addr = N0; | ||||
1756 | OffsetVal = COffsetVal; | ||||
1757 | } else { | ||||
1758 | // If the offset doesn't fit, put the low bits into the offset field and | ||||
1759 | // add the rest. | ||||
1760 | // | ||||
1761 | // For a FLAT instruction the hardware decides whether to access | ||||
1762 | // global/scratch/shared memory based on the high bits of vaddr, | ||||
1763 | // ignoring the offset field, so we have to ensure that when we add | ||||
1764 | // remainder to vaddr it still points into the same underlying object. | ||||
1765 | // The easiest way to do that is to make sure that we split the offset | ||||
1766 | // into two pieces that are both >= 0 or both <= 0. | ||||
1767 | |||||
1768 | SDLoc DL(N); | ||||
1769 | uint64_t RemainderOffset; | ||||
1770 | |||||
1771 | std::tie(OffsetVal, RemainderOffset) = | ||||
1772 | TII->splitFlatOffset(COffsetVal, AS, FlatVariant); | ||||
1773 | |||||
1774 | SDValue AddOffsetLo = | ||||
1775 | getMaterializedScalarImm32(Lo_32(RemainderOffset), DL); | ||||
1776 | SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
1777 | |||||
1778 | if (Addr.getValueType().getSizeInBits() == 32) { | ||||
1779 | SmallVector<SDValue, 3> Opnds; | ||||
1780 | Opnds.push_back(N0); | ||||
1781 | Opnds.push_back(AddOffsetLo); | ||||
1782 | unsigned AddOp = AMDGPU::V_ADD_CO_U32_e32; | ||||
1783 | if (Subtarget->hasAddNoCarry()) { | ||||
1784 | AddOp = AMDGPU::V_ADD_U32_e64; | ||||
1785 | Opnds.push_back(Clamp); | ||||
1786 | } | ||||
1787 | Addr = SDValue(CurDAG->getMachineNode(AddOp, DL, MVT::i32, Opnds), 0); | ||||
1788 | } else { | ||||
1789 | // TODO: Should this try to use a scalar add pseudo if the base address | ||||
1790 | // is uniform and saddr is usable? | ||||
1791 | SDValue Sub0 = CurDAG->getTargetConstant(AMDGPU::sub0, DL, MVT::i32); | ||||
1792 | SDValue Sub1 = CurDAG->getTargetConstant(AMDGPU::sub1, DL, MVT::i32); | ||||
1793 | |||||
1794 | SDNode *N0Lo = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
1795 | DL, MVT::i32, N0, Sub0); | ||||
1796 | SDNode *N0Hi = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, | ||||
1797 | DL, MVT::i32, N0, Sub1); | ||||
1798 | |||||
1799 | SDValue AddOffsetHi = | ||||
1800 | getMaterializedScalarImm32(Hi_32(RemainderOffset), DL); | ||||
1801 | |||||
1802 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i1); | ||||
1803 | |||||
1804 | SDNode *Add = | ||||
1805 | CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs, | ||||
1806 | {AddOffsetLo, SDValue(N0Lo, 0), Clamp}); | ||||
1807 | |||||
1808 | SDNode *Addc = CurDAG->getMachineNode( | ||||
1809 | AMDGPU::V_ADDC_U32_e64, DL, VTs, | ||||
1810 | {AddOffsetHi, SDValue(N0Hi, 0), SDValue(Add, 1), Clamp}); | ||||
1811 | |||||
1812 | SDValue RegSequenceArgs[] = { | ||||
1813 | CurDAG->getTargetConstant(AMDGPU::VReg_64RegClassID, DL, MVT::i32), | ||||
1814 | SDValue(Add, 0), Sub0, SDValue(Addc, 0), Sub1}; | ||||
1815 | |||||
1816 | Addr = SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, DL, | ||||
1817 | MVT::i64, RegSequenceArgs), | ||||
1818 | 0); | ||||
1819 | } | ||||
1820 | } | ||||
1821 | } | ||||
1822 | } | ||||
1823 | |||||
1824 | VAddr = Addr; | ||||
1825 | Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i16); | ||||
1826 | return true; | ||||
1827 | } | ||||
1828 | |||||
1829 | bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N, SDValue Addr, | ||||
1830 | SDValue &VAddr, | ||||
1831 | SDValue &Offset) const { | ||||
1832 | return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FLAT); | ||||
1833 | } | ||||
1834 | |||||
1835 | bool AMDGPUDAGToDAGISel::SelectGlobalOffset(SDNode *N, SDValue Addr, | ||||
1836 | SDValue &VAddr, | ||||
1837 | SDValue &Offset) const { | ||||
1838 | return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, SIInstrFlags::FlatGlobal); | ||||
1839 | } | ||||
1840 | |||||
1841 | bool AMDGPUDAGToDAGISel::SelectScratchOffset(SDNode *N, SDValue Addr, | ||||
1842 | SDValue &VAddr, | ||||
1843 | SDValue &Offset) const { | ||||
1844 | return SelectFlatOffsetImpl(N, Addr, VAddr, Offset, | ||||
1845 | SIInstrFlags::FlatScratch); | ||||
1846 | } | ||||
1847 | |||||
1848 | // If this matches zero_extend i32:x, return x | ||||
1849 | static SDValue matchZExtFromI32(SDValue Op) { | ||||
1850 | if (Op.getOpcode() != ISD::ZERO_EXTEND) | ||||
1851 | return SDValue(); | ||||
1852 | |||||
1853 | SDValue ExtSrc = Op.getOperand(0); | ||||
1854 | return (ExtSrc.getValueType() == MVT::i32) ? ExtSrc : SDValue(); | ||||
1855 | } | ||||
1856 | |||||
1857 | // Match (64-bit SGPR base) + (zext vgpr offset) + sext(imm offset) | ||||
1858 | bool AMDGPUDAGToDAGISel::SelectGlobalSAddr(SDNode *N, | ||||
1859 | SDValue Addr, | ||||
1860 | SDValue &SAddr, | ||||
1861 | SDValue &VOffset, | ||||
1862 | SDValue &Offset) const { | ||||
1863 | int64_t ImmOffset = 0; | ||||
1864 | |||||
1865 | // Match the immediate offset first, which canonically is moved as low as | ||||
1866 | // possible. | ||||
1867 | |||||
1868 | SDValue LHS, RHS; | ||||
1869 | if (isBaseWithConstantOffset64(Addr, LHS, RHS)) { | ||||
1870 | int64_t COffsetVal = cast<ConstantSDNode>(RHS)->getSExtValue(); | ||||
1871 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
1872 | |||||
1873 | if (TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, | ||||
1874 | SIInstrFlags::FlatGlobal)) { | ||||
1875 | Addr = LHS; | ||||
1876 | ImmOffset = COffsetVal; | ||||
1877 | } else if (!LHS->isDivergent()) { | ||||
1878 | if (COffsetVal > 0) { | ||||
1879 | SDLoc SL(N); | ||||
1880 | // saddr + large_offset -> saddr + | ||||
1881 | // (voffset = large_offset & ~MaxOffset) + | ||||
1882 | // (large_offset & MaxOffset); | ||||
1883 | int64_t SplitImmOffset, RemainderOffset; | ||||
1884 | std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset( | ||||
1885 | COffsetVal, AMDGPUAS::GLOBAL_ADDRESS, SIInstrFlags::FlatGlobal); | ||||
1886 | |||||
1887 | if (isUInt<32>(RemainderOffset)) { | ||||
1888 | SDNode *VMov = CurDAG->getMachineNode( | ||||
1889 | AMDGPU::V_MOV_B32_e32, SL, MVT::i32, | ||||
1890 | CurDAG->getTargetConstant(RemainderOffset, SDLoc(), MVT::i32)); | ||||
1891 | VOffset = SDValue(VMov, 0); | ||||
1892 | SAddr = LHS; | ||||
1893 | Offset = CurDAG->getTargetConstant(SplitImmOffset, SDLoc(), MVT::i16); | ||||
1894 | return true; | ||||
1895 | } | ||||
1896 | } | ||||
1897 | |||||
1898 | // We are adding a 64 bit SGPR and a constant. If constant bus limit | ||||
1899 | // is 1 we would need to perform 1 or 2 extra moves for each half of | ||||
1900 | // the constant and it is better to do a scalar add and then issue a | ||||
1901 | // single VALU instruction to materialize zero. Otherwise it is less | ||||
1902 | // instructions to perform VALU adds with immediates or inline literals. | ||||
1903 | unsigned NumLiterals = | ||||
1904 | !TII->isInlineConstant(APInt(32, COffsetVal & 0xffffffff)) + | ||||
1905 | !TII->isInlineConstant(APInt(32, COffsetVal >> 32)); | ||||
1906 | if (Subtarget->getConstantBusLimit(AMDGPU::V_ADD_U32_e64) > NumLiterals) | ||||
1907 | return false; | ||||
1908 | } | ||||
1909 | } | ||||
1910 | |||||
1911 | // Match the variable offset. | ||||
1912 | if (Addr.getOpcode() == ISD::ADD) { | ||||
1913 | LHS = Addr.getOperand(0); | ||||
1914 | RHS = Addr.getOperand(1); | ||||
1915 | |||||
1916 | if (!LHS->isDivergent()) { | ||||
1917 | // add (i64 sgpr), (zero_extend (i32 vgpr)) | ||||
1918 | if (SDValue ZextRHS = matchZExtFromI32(RHS)) { | ||||
1919 | SAddr = LHS; | ||||
1920 | VOffset = ZextRHS; | ||||
1921 | } | ||||
1922 | } | ||||
1923 | |||||
1924 | if (!SAddr && !RHS->isDivergent()) { | ||||
1925 | // add (zero_extend (i32 vgpr)), (i64 sgpr) | ||||
1926 | if (SDValue ZextLHS = matchZExtFromI32(LHS)) { | ||||
1927 | SAddr = RHS; | ||||
1928 | VOffset = ZextLHS; | ||||
1929 | } | ||||
1930 | } | ||||
1931 | |||||
1932 | if (SAddr) { | ||||
1933 | Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); | ||||
1934 | return true; | ||||
1935 | } | ||||
1936 | } | ||||
1937 | |||||
1938 | if (Addr->isDivergent() || Addr.getOpcode() == ISD::UNDEF || | ||||
1939 | isa<ConstantSDNode>(Addr)) | ||||
1940 | return false; | ||||
1941 | |||||
1942 | // It's cheaper to materialize a single 32-bit zero for vaddr than the two | ||||
1943 | // moves required to copy a 64-bit SGPR to VGPR. | ||||
1944 | SAddr = Addr; | ||||
1945 | SDNode *VMov = | ||||
1946 | CurDAG->getMachineNode(AMDGPU::V_MOV_B32_e32, SDLoc(Addr), MVT::i32, | ||||
1947 | CurDAG->getTargetConstant(0, SDLoc(), MVT::i32)); | ||||
1948 | VOffset = SDValue(VMov, 0); | ||||
1949 | Offset = CurDAG->getTargetConstant(ImmOffset, SDLoc(), MVT::i16); | ||||
1950 | return true; | ||||
1951 | } | ||||
1952 | |||||
1953 | static SDValue SelectSAddrFI(SelectionDAG *CurDAG, SDValue SAddr) { | ||||
1954 | if (auto FI = dyn_cast<FrameIndexSDNode>(SAddr)) { | ||||
1955 | SAddr = CurDAG->getTargetFrameIndex(FI->getIndex(), FI->getValueType(0)); | ||||
1956 | } else if (SAddr.getOpcode() == ISD::ADD && | ||||
1957 | isa<FrameIndexSDNode>(SAddr.getOperand(0))) { | ||||
1958 | // Materialize this into a scalar move for scalar address to avoid | ||||
1959 | // readfirstlane. | ||||
1960 | auto FI = cast<FrameIndexSDNode>(SAddr.getOperand(0)); | ||||
1961 | SDValue TFI = CurDAG->getTargetFrameIndex(FI->getIndex(), | ||||
1962 | FI->getValueType(0)); | ||||
1963 | SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, SDLoc(SAddr), | ||||
1964 | MVT::i32, TFI, SAddr.getOperand(1)), | ||||
1965 | 0); | ||||
1966 | } | ||||
1967 | |||||
1968 | return SAddr; | ||||
1969 | } | ||||
1970 | |||||
1971 | // Match (32-bit SGPR base) + sext(imm offset) | ||||
1972 | bool AMDGPUDAGToDAGISel::SelectScratchSAddr(SDNode *Parent, SDValue Addr, | ||||
1973 | SDValue &SAddr, | ||||
1974 | SDValue &Offset) const { | ||||
1975 | if (Addr->isDivergent()) | ||||
1976 | return false; | ||||
1977 | |||||
1978 | SDLoc DL(Addr); | ||||
1979 | |||||
1980 | int64_t COffsetVal = 0; | ||||
1981 | |||||
1982 | if (CurDAG->isBaseWithConstantOffset(Addr)) { | ||||
1983 | COffsetVal = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue(); | ||||
1984 | SAddr = Addr.getOperand(0); | ||||
1985 | } else { | ||||
1986 | SAddr = Addr; | ||||
1987 | } | ||||
1988 | |||||
1989 | SAddr = SelectSAddrFI(CurDAG, SAddr); | ||||
1990 | |||||
1991 | const SIInstrInfo *TII = Subtarget->getInstrInfo(); | ||||
1992 | |||||
1993 | if (!TII->isLegalFLATOffset(COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, | ||||
1994 | SIInstrFlags::FlatScratch)) { | ||||
1995 | int64_t SplitImmOffset, RemainderOffset; | ||||
1996 | std::tie(SplitImmOffset, RemainderOffset) = TII->splitFlatOffset( | ||||
1997 | COffsetVal, AMDGPUAS::PRIVATE_ADDRESS, SIInstrFlags::FlatScratch); | ||||
1998 | |||||
1999 | COffsetVal = SplitImmOffset; | ||||
2000 | |||||
2001 | SDValue AddOffset = | ||||
2002 | SAddr.getOpcode() == ISD::TargetFrameIndex | ||||
2003 | ? getMaterializedScalarImm32(Lo_32(RemainderOffset), DL) | ||||
2004 | : CurDAG->getTargetConstant(RemainderOffset, DL, MVT::i32); | ||||
2005 | SAddr = SDValue(CurDAG->getMachineNode(AMDGPU::S_ADD_I32, DL, MVT::i32, | ||||
2006 | SAddr, AddOffset), | ||||
2007 | 0); | ||||
2008 | } | ||||
2009 | |||||
2010 | Offset = CurDAG->getTargetConstant(COffsetVal, DL, MVT::i16); | ||||
2011 | |||||
2012 | return true; | ||||
2013 | } | ||||
2014 | |||||
2015 | bool AMDGPUDAGToDAGISel::SelectSMRDOffset(SDValue ByteOffsetNode, | ||||
2016 | SDValue &Offset, bool &Imm) const { | ||||
2017 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(ByteOffsetNode); | ||||
2018 | if (!C) { | ||||
2019 | if (ByteOffsetNode.getValueType().isScalarInteger() && | ||||
2020 | ByteOffsetNode.getValueType().getSizeInBits() == 32) { | ||||
2021 | Offset = ByteOffsetNode; | ||||
2022 | Imm = false; | ||||
2023 | return true; | ||||
2024 | } | ||||
2025 | if (ByteOffsetNode.getOpcode() == ISD::ZERO_EXTEND) { | ||||
2026 | if (ByteOffsetNode.getOperand(0).getValueType().getSizeInBits() == 32) { | ||||
2027 | Offset = ByteOffsetNode.getOperand(0); | ||||
2028 | Imm = false; | ||||
2029 | return true; | ||||
2030 | } | ||||
2031 | } | ||||
2032 | return false; | ||||
2033 | } | ||||
2034 | |||||
2035 | SDLoc SL(ByteOffsetNode); | ||||
2036 | // GFX9 and GFX10 have signed byte immediate offsets. | ||||
2037 | int64_t ByteOffset = C->getSExtValue(); | ||||
2038 | Optional<int64_t> EncodedOffset = | ||||
2039 | AMDGPU::getSMRDEncodedOffset(*Subtarget, ByteOffset, false); | ||||
2040 | if (EncodedOffset) { | ||||
2041 | Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); | ||||
2042 | Imm = true; | ||||
2043 | return true; | ||||
2044 | } | ||||
2045 | |||||
2046 | // SGPR and literal offsets are unsigned. | ||||
2047 | if (ByteOffset < 0) | ||||
2048 | return false; | ||||
2049 | |||||
2050 | EncodedOffset = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, ByteOffset); | ||||
2051 | if (EncodedOffset) { | ||||
2052 | Offset = CurDAG->getTargetConstant(*EncodedOffset, SL, MVT::i32); | ||||
2053 | return true; | ||||
2054 | } | ||||
2055 | |||||
2056 | if (!isUInt<32>(ByteOffset) && !isInt<32>(ByteOffset)) | ||||
2057 | return false; | ||||
2058 | |||||
2059 | SDValue C32Bit = CurDAG->getTargetConstant(ByteOffset, SL, MVT::i32); | ||||
2060 | Offset = SDValue( | ||||
2061 | CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, C32Bit), 0); | ||||
2062 | |||||
2063 | return true; | ||||
2064 | } | ||||
2065 | |||||
2066 | SDValue AMDGPUDAGToDAGISel::Expand32BitAddress(SDValue Addr) const { | ||||
2067 | if (Addr.getValueType() != MVT::i32) | ||||
2068 | return Addr; | ||||
2069 | |||||
2070 | // Zero-extend a 32-bit address. | ||||
2071 | SDLoc SL(Addr); | ||||
2072 | |||||
2073 | const MachineFunction &MF = CurDAG->getMachineFunction(); | ||||
2074 | const SIMachineFunctionInfo *Info = MF.getInfo<SIMachineFunctionInfo>(); | ||||
2075 | unsigned AddrHiVal = Info->get32BitAddressHighBits(); | ||||
2076 | SDValue AddrHi = CurDAG->getTargetConstant(AddrHiVal, SL, MVT::i32); | ||||
2077 | |||||
2078 | const SDValue Ops[] = { | ||||
2079 | CurDAG->getTargetConstant(AMDGPU::SReg_64_XEXECRegClassID, SL, MVT::i32), | ||||
2080 | Addr, | ||||
2081 | CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), | ||||
2082 | SDValue(CurDAG->getMachineNode(AMDGPU::S_MOV_B32, SL, MVT::i32, AddrHi), | ||||
2083 | 0), | ||||
2084 | CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32), | ||||
2085 | }; | ||||
2086 | |||||
2087 | return SDValue(CurDAG->getMachineNode(AMDGPU::REG_SEQUENCE, SL, MVT::i64, | ||||
2088 | Ops), 0); | ||||
2089 | } | ||||
2090 | |||||
2091 | bool AMDGPUDAGToDAGISel::SelectSMRD(SDValue Addr, SDValue &SBase, | ||||
2092 | SDValue &Offset, bool &Imm) const { | ||||
2093 | SDLoc SL(Addr); | ||||
2094 | |||||
2095 | // A 32-bit (address + offset) should not cause unsigned 32-bit integer | ||||
2096 | // wraparound, because s_load instructions perform the addition in 64 bits. | ||||
2097 | if ((Addr.getValueType() != MVT::i32 || | ||||
2098 | Addr->getFlags().hasNoUnsignedWrap())) { | ||||
2099 | SDValue N0, N1; | ||||
2100 | // Extract the base and offset if possible. | ||||
2101 | if (CurDAG->isBaseWithConstantOffset(Addr) || | ||||
2102 | Addr.getOpcode() == ISD::ADD) { | ||||
2103 | N0 = Addr.getOperand(0); | ||||
2104 | N1 = Addr.getOperand(1); | ||||
2105 | } else if (getBaseWithOffsetUsingSplitOR(*CurDAG, Addr, N0, N1)) { | ||||
2106 | assert(N0 && N1 && isa<ConstantSDNode>(N1))((void)0); | ||||
2107 | } | ||||
2108 | if (N0 && N1) { | ||||
2109 | if (SelectSMRDOffset(N1, Offset, Imm)) { | ||||
2110 | SBase = Expand32BitAddress(N0); | ||||
2111 | return true; | ||||
2112 | } | ||||
2113 | } | ||||
2114 | } | ||||
2115 | SBase = Expand32BitAddress(Addr); | ||||
2116 | Offset = CurDAG->getTargetConstant(0, SL, MVT::i32); | ||||
2117 | Imm = true; | ||||
2118 | return true; | ||||
2119 | } | ||||
2120 | |||||
2121 | bool AMDGPUDAGToDAGISel::SelectSMRDImm(SDValue Addr, SDValue &SBase, | ||||
2122 | SDValue &Offset) const { | ||||
2123 | bool Imm = false; | ||||
2124 | return SelectSMRD(Addr, SBase, Offset, Imm) && Imm; | ||||
2125 | } | ||||
2126 | |||||
2127 | bool AMDGPUDAGToDAGISel::SelectSMRDImm32(SDValue Addr, SDValue &SBase, | ||||
2128 | SDValue &Offset) const { | ||||
2129 | |||||
2130 | assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((void)0); | ||||
2131 | |||||
2132 | bool Imm = false; | ||||
2133 | if (!SelectSMRD(Addr, SBase, Offset, Imm)) | ||||
2134 | return false; | ||||
2135 | |||||
2136 | return !Imm && isa<ConstantSDNode>(Offset); | ||||
2137 | } | ||||
2138 | |||||
2139 | bool AMDGPUDAGToDAGISel::SelectSMRDSgpr(SDValue Addr, SDValue &SBase, | ||||
2140 | SDValue &Offset) const { | ||||
2141 | bool Imm = false; | ||||
2142 | return SelectSMRD(Addr, SBase, Offset, Imm) && !Imm && | ||||
2143 | !isa<ConstantSDNode>(Offset); | ||||
2144 | } | ||||
2145 | |||||
2146 | bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm(SDValue Addr, | ||||
2147 | SDValue &Offset) const { | ||||
2148 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) { | ||||
2149 | // The immediate offset for S_BUFFER instructions is unsigned. | ||||
2150 | if (auto Imm = | ||||
2151 | AMDGPU::getSMRDEncodedOffset(*Subtarget, C->getZExtValue(), true)) { | ||||
2152 | Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32); | ||||
2153 | return true; | ||||
2154 | } | ||||
2155 | } | ||||
2156 | |||||
2157 | return false; | ||||
2158 | } | ||||
2159 | |||||
2160 | bool AMDGPUDAGToDAGISel::SelectSMRDBufferImm32(SDValue Addr, | ||||
2161 | SDValue &Offset) const { | ||||
2162 | assert(Subtarget->getGeneration() == AMDGPUSubtarget::SEA_ISLANDS)((void)0); | ||||
2163 | |||||
2164 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Addr)) { | ||||
2165 | if (auto Imm = AMDGPU::getSMRDEncodedLiteralOffset32(*Subtarget, | ||||
2166 | C->getZExtValue())) { | ||||
2167 | Offset = CurDAG->getTargetConstant(*Imm, SDLoc(Addr), MVT::i32); | ||||
2168 | return true; | ||||
2169 | } | ||||
2170 | } | ||||
2171 | |||||
2172 | return false; | ||||
2173 | } | ||||
2174 | |||||
2175 | bool AMDGPUDAGToDAGISel::SelectMOVRELOffset(SDValue Index, | ||||
2176 | SDValue &Base, | ||||
2177 | SDValue &Offset) const { | ||||
2178 | SDLoc DL(Index); | ||||
2179 | |||||
2180 | if (CurDAG->isBaseWithConstantOffset(Index)) { | ||||
2181 | SDValue N0 = Index.getOperand(0); | ||||
2182 | SDValue N1 = Index.getOperand(1); | ||||
2183 | ConstantSDNode *C1 = cast<ConstantSDNode>(N1); | ||||
2184 | |||||
2185 | // (add n0, c0) | ||||
2186 | // Don't peel off the offset (c0) if doing so could possibly lead | ||||
2187 | // the base (n0) to be negative. | ||||
2188 | // (or n0, |c0|) can never change a sign given isBaseWithConstantOffset. | ||||
2189 | if (C1->getSExtValue() <= 0 || CurDAG->SignBitIsZero(N0) || | ||||
2190 | (Index->getOpcode() == ISD::OR && C1->getSExtValue() >= 0)) { | ||||
2191 | Base = N0; | ||||
2192 | Offset = CurDAG->getTargetConstant(C1->getZExtValue(), DL, MVT::i32); | ||||
2193 | return true; | ||||
2194 | } | ||||
2195 | } | ||||
2196 | |||||
2197 | if (isa<ConstantSDNode>(Index)) | ||||
2198 | return false; | ||||
2199 | |||||
2200 | Base = Index; | ||||
2201 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
2202 | return true; | ||||
2203 | } | ||||
2204 | |||||
2205 | SDNode *AMDGPUDAGToDAGISel::getS_BFE(unsigned Opcode, const SDLoc &DL, | ||||
2206 | SDValue Val, uint32_t Offset, | ||||
2207 | uint32_t Width) { | ||||
2208 | // Transformation function, pack the offset and width of a BFE into | ||||
2209 | // the format expected by the S_BFE_I32 / S_BFE_U32. In the second | ||||
2210 | // source, bits [5:0] contain the offset and bits [22:16] the width. | ||||
2211 | uint32_t PackedVal = Offset | (Width << 16); | ||||
2212 | SDValue PackedConst = CurDAG->getTargetConstant(PackedVal, DL, MVT::i32); | ||||
2213 | |||||
2214 | return CurDAG->getMachineNode(Opcode, DL, MVT::i32, Val, PackedConst); | ||||
2215 | } | ||||
2216 | |||||
2217 | void AMDGPUDAGToDAGISel::SelectS_BFEFromShifts(SDNode *N) { | ||||
2218 | // "(a << b) srl c)" ---> "BFE_U32 a, (c-b), (32-c) | ||||
2219 | // "(a << b) sra c)" ---> "BFE_I32 a, (c-b), (32-c) | ||||
2220 | // Predicate: 0 < b <= c < 32 | ||||
2221 | |||||
2222 | const SDValue &Shl = N->getOperand(0); | ||||
2223 | ConstantSDNode *B = dyn_cast<ConstantSDNode>(Shl->getOperand(1)); | ||||
2224 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
2225 | |||||
2226 | if (B && C) { | ||||
2227 | uint32_t BVal = B->getZExtValue(); | ||||
2228 | uint32_t CVal = C->getZExtValue(); | ||||
2229 | |||||
2230 | if (0 < BVal && BVal <= CVal && CVal < 32) { | ||||
2231 | bool Signed = N->getOpcode() == ISD::SRA; | ||||
2232 | unsigned Opcode = Signed ? AMDGPU::S_BFE_I32 : AMDGPU::S_BFE_U32; | ||||
2233 | |||||
2234 | ReplaceNode(N, getS_BFE(Opcode, SDLoc(N), Shl.getOperand(0), CVal - BVal, | ||||
2235 | 32 - CVal)); | ||||
2236 | return; | ||||
2237 | } | ||||
2238 | } | ||||
2239 | SelectCode(N); | ||||
2240 | } | ||||
2241 | |||||
2242 | void AMDGPUDAGToDAGISel::SelectS_BFE(SDNode *N) { | ||||
2243 | switch (N->getOpcode()) { | ||||
2244 | case ISD::AND: | ||||
2245 | if (N->getOperand(0).getOpcode() == ISD::SRL) { | ||||
2246 | // "(a srl b) & mask" ---> "BFE_U32 a, b, popcount(mask)" | ||||
2247 | // Predicate: isMask(mask) | ||||
2248 | const SDValue &Srl = N->getOperand(0); | ||||
2249 | ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(Srl.getOperand(1)); | ||||
2250 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
2251 | |||||
2252 | if (Shift && Mask) { | ||||
2253 | uint32_t ShiftVal = Shift->getZExtValue(); | ||||
2254 | uint32_t MaskVal = Mask->getZExtValue(); | ||||
2255 | |||||
2256 | if (isMask_32(MaskVal)) { | ||||
2257 | uint32_t WidthVal = countPopulation(MaskVal); | ||||
2258 | |||||
2259 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), | ||||
2260 | Srl.getOperand(0), ShiftVal, WidthVal)); | ||||
2261 | return; | ||||
2262 | } | ||||
2263 | } | ||||
2264 | } | ||||
2265 | break; | ||||
2266 | case ISD::SRL: | ||||
2267 | if (N->getOperand(0).getOpcode() == ISD::AND) { | ||||
2268 | // "(a & mask) srl b)" ---> "BFE_U32 a, b, popcount(mask >> b)" | ||||
2269 | // Predicate: isMask(mask >> b) | ||||
2270 | const SDValue &And = N->getOperand(0); | ||||
2271 | ConstantSDNode *Shift = dyn_cast<ConstantSDNode>(N->getOperand(1)); | ||||
2272 | ConstantSDNode *Mask = dyn_cast<ConstantSDNode>(And->getOperand(1)); | ||||
2273 | |||||
2274 | if (Shift && Mask) { | ||||
2275 | uint32_t ShiftVal = Shift->getZExtValue(); | ||||
2276 | uint32_t MaskVal = Mask->getZExtValue() >> ShiftVal; | ||||
2277 | |||||
2278 | if (isMask_32(MaskVal)) { | ||||
2279 | uint32_t WidthVal = countPopulation(MaskVal); | ||||
2280 | |||||
2281 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_U32, SDLoc(N), | ||||
2282 | And.getOperand(0), ShiftVal, WidthVal)); | ||||
2283 | return; | ||||
2284 | } | ||||
2285 | } | ||||
2286 | } else if (N->getOperand(0).getOpcode() == ISD::SHL) { | ||||
2287 | SelectS_BFEFromShifts(N); | ||||
2288 | return; | ||||
2289 | } | ||||
2290 | break; | ||||
2291 | case ISD::SRA: | ||||
2292 | if (N->getOperand(0).getOpcode() == ISD::SHL) { | ||||
2293 | SelectS_BFEFromShifts(N); | ||||
2294 | return; | ||||
2295 | } | ||||
2296 | break; | ||||
2297 | |||||
2298 | case ISD::SIGN_EXTEND_INREG: { | ||||
2299 | // sext_inreg (srl x, 16), i8 -> bfe_i32 x, 16, 8 | ||||
2300 | SDValue Src = N->getOperand(0); | ||||
2301 | if (Src.getOpcode() != ISD::SRL) | ||||
2302 | break; | ||||
2303 | |||||
2304 | const ConstantSDNode *Amt = dyn_cast<ConstantSDNode>(Src.getOperand(1)); | ||||
2305 | if (!Amt) | ||||
2306 | break; | ||||
2307 | |||||
2308 | unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); | ||||
2309 | ReplaceNode(N, getS_BFE(AMDGPU::S_BFE_I32, SDLoc(N), Src.getOperand(0), | ||||
2310 | Amt->getZExtValue(), Width)); | ||||
2311 | return; | ||||
2312 | } | ||||
2313 | } | ||||
2314 | |||||
2315 | SelectCode(N); | ||||
2316 | } | ||||
2317 | |||||
2318 | bool AMDGPUDAGToDAGISel::isCBranchSCC(const SDNode *N) const { | ||||
2319 | assert(N->getOpcode() == ISD::BRCOND)((void)0); | ||||
2320 | if (!N->hasOneUse()) | ||||
2321 | return false; | ||||
2322 | |||||
2323 | SDValue Cond = N->getOperand(1); | ||||
2324 | if (Cond.getOpcode() == ISD::CopyToReg) | ||||
2325 | Cond = Cond.getOperand(2); | ||||
2326 | |||||
2327 | if (Cond.getOpcode() != ISD::SETCC || !Cond.hasOneUse()) | ||||
2328 | return false; | ||||
2329 | |||||
2330 | MVT VT = Cond.getOperand(0).getSimpleValueType(); | ||||
2331 | if (VT == MVT::i32) | ||||
2332 | return true; | ||||
2333 | |||||
2334 | if (VT == MVT::i64) { | ||||
2335 | auto ST = static_cast<const GCNSubtarget *>(Subtarget); | ||||
2336 | |||||
2337 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); | ||||
2338 | return (CC == ISD::SETEQ || CC == ISD::SETNE) && ST->hasScalarCompareEq64(); | ||||
2339 | } | ||||
2340 | |||||
2341 | return false; | ||||
2342 | } | ||||
2343 | |||||
2344 | void AMDGPUDAGToDAGISel::SelectBRCOND(SDNode *N) { | ||||
2345 | SDValue Cond = N->getOperand(1); | ||||
2346 | |||||
2347 | if (Cond.isUndef()) { | ||||
2348 | CurDAG->SelectNodeTo(N, AMDGPU::SI_BR_UNDEF, MVT::Other, | ||||
2349 | N->getOperand(2), N->getOperand(0)); | ||||
2350 | return; | ||||
2351 | } | ||||
2352 | |||||
2353 | const GCNSubtarget *ST = static_cast<const GCNSubtarget *>(Subtarget); | ||||
2354 | const SIRegisterInfo *TRI = ST->getRegisterInfo(); | ||||
2355 | |||||
2356 | bool UseSCCBr = isCBranchSCC(N) && isUniformBr(N); | ||||
2357 | unsigned BrOp = UseSCCBr ? AMDGPU::S_CBRANCH_SCC1 : AMDGPU::S_CBRANCH_VCCNZ; | ||||
2358 | Register CondReg = UseSCCBr ? AMDGPU::SCC : TRI->getVCC(); | ||||
2359 | SDLoc SL(N); | ||||
2360 | |||||
2361 | if (!UseSCCBr) { | ||||
2362 | // This is the case that we are selecting to S_CBRANCH_VCCNZ. We have not | ||||
2363 | // analyzed what generates the vcc value, so we do not know whether vcc | ||||
2364 | // bits for disabled lanes are 0. Thus we need to mask out bits for | ||||
2365 | // disabled lanes. | ||||
2366 | // | ||||
2367 | // For the case that we select S_CBRANCH_SCC1 and it gets | ||||
2368 | // changed to S_CBRANCH_VCCNZ in SIFixSGPRCopies, SIFixSGPRCopies calls | ||||
2369 | // SIInstrInfo::moveToVALU which inserts the S_AND). | ||||
2370 | // | ||||
2371 | // We could add an analysis of what generates the vcc value here and omit | ||||
2372 | // the S_AND when is unnecessary. But it would be better to add a separate | ||||
2373 | // pass after SIFixSGPRCopies to do the unnecessary S_AND removal, so it | ||||
2374 | // catches both cases. | ||||
2375 | Cond = SDValue(CurDAG->getMachineNode(ST->isWave32() ? AMDGPU::S_AND_B32 | ||||
2376 | : AMDGPU::S_AND_B64, | ||||
2377 | SL, MVT::i1, | ||||
2378 | CurDAG->getRegister(ST->isWave32() ? AMDGPU::EXEC_LO | ||||
2379 | : AMDGPU::EXEC, | ||||
2380 | MVT::i1), | ||||
2381 | Cond), | ||||
2382 | 0); | ||||
2383 | } | ||||
2384 | |||||
2385 | SDValue VCC = CurDAG->getCopyToReg(N->getOperand(0), SL, CondReg, Cond); | ||||
2386 | CurDAG->SelectNodeTo(N, BrOp, MVT::Other, | ||||
2387 | N->getOperand(2), // Basic Block | ||||
2388 | VCC.getValue(0)); | ||||
2389 | } | ||||
2390 | |||||
2391 | void AMDGPUDAGToDAGISel::SelectFMAD_FMA(SDNode *N) { | ||||
2392 | MVT VT = N->getSimpleValueType(0); | ||||
2393 | bool IsFMA = N->getOpcode() == ISD::FMA; | ||||
2394 | if (VT != MVT::f32 || (!Subtarget->hasMadMixInsts() && | ||||
2395 | !Subtarget->hasFmaMixInsts()) || | ||||
2396 | ((IsFMA && Subtarget->hasMadMixInsts()) || | ||||
2397 | (!IsFMA && Subtarget->hasFmaMixInsts()))) { | ||||
2398 | SelectCode(N); | ||||
2399 | return; | ||||
2400 | } | ||||
2401 | |||||
2402 | SDValue Src0 = N->getOperand(0); | ||||
2403 | SDValue Src1 = N->getOperand(1); | ||||
2404 | SDValue Src2 = N->getOperand(2); | ||||
2405 | unsigned Src0Mods, Src1Mods, Src2Mods; | ||||
2406 | |||||
2407 | // Avoid using v_mad_mix_f32/v_fma_mix_f32 unless there is actually an operand | ||||
2408 | // using the conversion from f16. | ||||
2409 | bool Sel0 = SelectVOP3PMadMixModsImpl(Src0, Src0, Src0Mods); | ||||
2410 | bool Sel1 = SelectVOP3PMadMixModsImpl(Src1, Src1, Src1Mods); | ||||
2411 | bool Sel2 = SelectVOP3PMadMixModsImpl(Src2, Src2, Src2Mods); | ||||
2412 | |||||
2413 | assert((IsFMA || !Mode.allFP32Denormals()) &&((void)0) | ||||
2414 | "fmad selected with denormals enabled")((void)0); | ||||
2415 | // TODO: We can select this with f32 denormals enabled if all the sources are | ||||
2416 | // converted from f16 (in which case fmad isn't legal). | ||||
2417 | |||||
2418 | if (Sel0 || Sel1 || Sel2) { | ||||
2419 | // For dummy operands. | ||||
2420 | SDValue Zero = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); | ||||
2421 | SDValue Ops[] = { | ||||
2422 | CurDAG->getTargetConstant(Src0Mods, SDLoc(), MVT::i32), Src0, | ||||
2423 | CurDAG->getTargetConstant(Src1Mods, SDLoc(), MVT::i32), Src1, | ||||
2424 | CurDAG->getTargetConstant(Src2Mods, SDLoc(), MVT::i32), Src2, | ||||
2425 | CurDAG->getTargetConstant(0, SDLoc(), MVT::i1), | ||||
2426 | Zero, Zero | ||||
2427 | }; | ||||
2428 | |||||
2429 | CurDAG->SelectNodeTo(N, | ||||
2430 | IsFMA ? AMDGPU::V_FMA_MIX_F32 : AMDGPU::V_MAD_MIX_F32, | ||||
2431 | MVT::f32, Ops); | ||||
2432 | } else { | ||||
2433 | SelectCode(N); | ||||
2434 | } | ||||
2435 | } | ||||
2436 | |||||
2437 | // This is here because there isn't a way to use the generated sub0_sub1 as the | ||||
2438 | // subreg index to EXTRACT_SUBREG in tablegen. | ||||
2439 | void AMDGPUDAGToDAGISel::SelectATOMIC_CMP_SWAP(SDNode *N) { | ||||
2440 | MemSDNode *Mem = cast<MemSDNode>(N); | ||||
2441 | unsigned AS = Mem->getAddressSpace(); | ||||
2442 | if (AS == AMDGPUAS::FLAT_ADDRESS) { | ||||
2443 | SelectCode(N); | ||||
2444 | return; | ||||
2445 | } | ||||
2446 | |||||
2447 | MVT VT = N->getSimpleValueType(0); | ||||
2448 | bool Is32 = (VT == MVT::i32); | ||||
2449 | SDLoc SL(N); | ||||
2450 | |||||
2451 | MachineSDNode *CmpSwap = nullptr; | ||||
2452 | if (Subtarget->hasAddr64()) { | ||||
2453 | SDValue SRsrc, VAddr, SOffset, Offset; | ||||
2454 | |||||
2455 | if (SelectMUBUFAddr64(Mem->getBasePtr(), SRsrc, VAddr, SOffset, Offset)) { | ||||
2456 | unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_ADDR64_RTN : | ||||
2457 | AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_ADDR64_RTN; | ||||
2458 | SDValue CmpVal = Mem->getOperand(2); | ||||
2459 | SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); | ||||
2460 | |||||
2461 | // XXX - Do we care about glue operands? | ||||
2462 | |||||
2463 | SDValue Ops[] = {CmpVal, VAddr, SRsrc, SOffset, Offset, CPol, | ||||
2464 | Mem->getChain()}; | ||||
2465 | |||||
2466 | CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); | ||||
2467 | } | ||||
2468 | } | ||||
2469 | |||||
2470 | if (!CmpSwap) { | ||||
2471 | SDValue SRsrc, SOffset, Offset; | ||||
2472 | if (SelectMUBUFOffset(Mem->getBasePtr(), SRsrc, SOffset, Offset)) { | ||||
2473 | unsigned Opcode = Is32 ? AMDGPU::BUFFER_ATOMIC_CMPSWAP_OFFSET_RTN : | ||||
2474 | AMDGPU::BUFFER_ATOMIC_CMPSWAP_X2_OFFSET_RTN; | ||||
2475 | |||||
2476 | SDValue CmpVal = Mem->getOperand(2); | ||||
2477 | SDValue CPol = CurDAG->getTargetConstant(AMDGPU::CPol::GLC, SL, MVT::i32); | ||||
2478 | SDValue Ops[] = {CmpVal, SRsrc, SOffset, Offset, CPol, Mem->getChain()}; | ||||
2479 | |||||
2480 | CmpSwap = CurDAG->getMachineNode(Opcode, SL, Mem->getVTList(), Ops); | ||||
2481 | } | ||||
2482 | } | ||||
2483 | |||||
2484 | if (!CmpSwap) { | ||||
2485 | SelectCode(N); | ||||
2486 | return; | ||||
2487 | } | ||||
2488 | |||||
2489 | MachineMemOperand *MMO = Mem->getMemOperand(); | ||||
2490 | CurDAG->setNodeMemRefs(CmpSwap, {MMO}); | ||||
2491 | |||||
2492 | unsigned SubReg = Is32 ? AMDGPU::sub0 : AMDGPU::sub0_sub1; | ||||
2493 | SDValue Extract | ||||
2494 | = CurDAG->getTargetExtractSubreg(SubReg, SL, VT, SDValue(CmpSwap, 0)); | ||||
2495 | |||||
2496 | ReplaceUses(SDValue(N, 0), Extract); | ||||
2497 | ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 1)); | ||||
2498 | CurDAG->RemoveDeadNode(N); | ||||
2499 | } | ||||
2500 | |||||
2501 | void AMDGPUDAGToDAGISel::SelectDSAppendConsume(SDNode *N, unsigned IntrID) { | ||||
2502 | // The address is assumed to be uniform, so if it ends up in a VGPR, it will | ||||
2503 | // be copied to an SGPR with readfirstlane. | ||||
2504 | unsigned Opc = IntrID == Intrinsic::amdgcn_ds_append ? | ||||
2505 | AMDGPU::DS_APPEND : AMDGPU::DS_CONSUME; | ||||
2506 | |||||
2507 | SDValue Chain = N->getOperand(0); | ||||
2508 | SDValue Ptr = N->getOperand(2); | ||||
2509 | MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); | ||||
2510 | MachineMemOperand *MMO = M->getMemOperand(); | ||||
2511 | bool IsGDS = M->getAddressSpace() == AMDGPUAS::REGION_ADDRESS; | ||||
2512 | |||||
2513 | SDValue Offset; | ||||
2514 | if (CurDAG->isBaseWithConstantOffset(Ptr)) { | ||||
2515 | SDValue PtrBase = Ptr.getOperand(0); | ||||
2516 | SDValue PtrOffset = Ptr.getOperand(1); | ||||
2517 | |||||
2518 | const APInt &OffsetVal = cast<ConstantSDNode>(PtrOffset)->getAPIntValue(); | ||||
2519 | if (isDSOffsetLegal(PtrBase, OffsetVal.getZExtValue())) { | ||||
2520 | N = glueCopyToM0(N, PtrBase); | ||||
2521 | Offset = CurDAG->getTargetConstant(OffsetVal, SDLoc(), MVT::i32); | ||||
2522 | } | ||||
2523 | } | ||||
2524 | |||||
2525 | if (!Offset) { | ||||
2526 | N = glueCopyToM0(N, Ptr); | ||||
2527 | Offset = CurDAG->getTargetConstant(0, SDLoc(), MVT::i32); | ||||
2528 | } | ||||
2529 | |||||
2530 | SDValue Ops[] = { | ||||
2531 | Offset, | ||||
2532 | CurDAG->getTargetConstant(IsGDS, SDLoc(), MVT::i32), | ||||
2533 | Chain, | ||||
2534 | N->getOperand(N->getNumOperands() - 1) // New glue | ||||
2535 | }; | ||||
2536 | |||||
2537 | SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
2538 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); | ||||
2539 | } | ||||
2540 | |||||
2541 | static unsigned gwsIntrinToOpcode(unsigned IntrID) { | ||||
2542 | switch (IntrID) { | ||||
2543 | case Intrinsic::amdgcn_ds_gws_init: | ||||
2544 | return AMDGPU::DS_GWS_INIT; | ||||
2545 | case Intrinsic::amdgcn_ds_gws_barrier: | ||||
2546 | return AMDGPU::DS_GWS_BARRIER; | ||||
2547 | case Intrinsic::amdgcn_ds_gws_sema_v: | ||||
2548 | return AMDGPU::DS_GWS_SEMA_V; | ||||
2549 | case Intrinsic::amdgcn_ds_gws_sema_br: | ||||
2550 | return AMDGPU::DS_GWS_SEMA_BR; | ||||
2551 | case Intrinsic::amdgcn_ds_gws_sema_p: | ||||
2552 | return AMDGPU::DS_GWS_SEMA_P; | ||||
2553 | case Intrinsic::amdgcn_ds_gws_sema_release_all: | ||||
2554 | return AMDGPU::DS_GWS_SEMA_RELEASE_ALL; | ||||
2555 | default: | ||||
2556 | llvm_unreachable("not a gws intrinsic")__builtin_unreachable(); | ||||
2557 | } | ||||
2558 | } | ||||
2559 | |||||
2560 | void AMDGPUDAGToDAGISel::SelectDS_GWS(SDNode *N, unsigned IntrID) { | ||||
2561 | if (IntrID == Intrinsic::amdgcn_ds_gws_sema_release_all && | ||||
2562 | !Subtarget->hasGWSSemaReleaseAll()) { | ||||
2563 | // Let this error. | ||||
2564 | SelectCode(N); | ||||
2565 | return; | ||||
2566 | } | ||||
2567 | |||||
2568 | // Chain, intrinsic ID, vsrc, offset | ||||
2569 | const bool HasVSrc = N->getNumOperands() == 4; | ||||
2570 | assert(HasVSrc || N->getNumOperands() == 3)((void)0); | ||||
2571 | |||||
2572 | SDLoc SL(N); | ||||
2573 | SDValue BaseOffset = N->getOperand(HasVSrc ? 3 : 2); | ||||
2574 | int ImmOffset = 0; | ||||
2575 | MemIntrinsicSDNode *M = cast<MemIntrinsicSDNode>(N); | ||||
2576 | MachineMemOperand *MMO = M->getMemOperand(); | ||||
2577 | |||||
2578 | // Don't worry if the offset ends up in a VGPR. Only one lane will have | ||||
2579 | // effect, so SIFixSGPRCopies will validly insert readfirstlane. | ||||
2580 | |||||
2581 | // The resource id offset is computed as (<isa opaque base> + M0[21:16] + | ||||
2582 | // offset field) % 64. Some versions of the programming guide omit the m0 | ||||
2583 | // part, or claim it's from offset 0. | ||||
2584 | if (ConstantSDNode *ConstOffset = dyn_cast<ConstantSDNode>(BaseOffset)) { | ||||
2585 | // If we have a constant offset, try to use the 0 in m0 as the base. | ||||
2586 | // TODO: Look into changing the default m0 initialization value. If the | ||||
2587 | // default -1 only set the low 16-bits, we could leave it as-is and add 1 to | ||||
2588 | // the immediate offset. | ||||
2589 | glueCopyToM0(N, CurDAG->getTargetConstant(0, SL, MVT::i32)); | ||||
2590 | ImmOffset = ConstOffset->getZExtValue(); | ||||
2591 | } else { | ||||
2592 | if (CurDAG->isBaseWithConstantOffset(BaseOffset)) { | ||||
2593 | ImmOffset = BaseOffset.getConstantOperandVal(1); | ||||
2594 | BaseOffset = BaseOffset.getOperand(0); | ||||
2595 | } | ||||
2596 | |||||
2597 | // Prefer to do the shift in an SGPR since it should be possible to use m0 | ||||
2598 | // as the result directly. If it's already an SGPR, it will be eliminated | ||||
2599 | // later. | ||||
2600 | SDNode *SGPROffset | ||||
2601 | = CurDAG->getMachineNode(AMDGPU::V_READFIRSTLANE_B32, SL, MVT::i32, | ||||
2602 | BaseOffset); | ||||
2603 | // Shift to offset in m0 | ||||
2604 | SDNode *M0Base | ||||
2605 | = CurDAG->getMachineNode(AMDGPU::S_LSHL_B32, SL, MVT::i32, | ||||
2606 | SDValue(SGPROffset, 0), | ||||
2607 | CurDAG->getTargetConstant(16, SL, MVT::i32)); | ||||
2608 | glueCopyToM0(N, SDValue(M0Base, 0)); | ||||
2609 | } | ||||
2610 | |||||
2611 | SDValue Chain = N->getOperand(0); | ||||
2612 | SDValue OffsetField = CurDAG->getTargetConstant(ImmOffset, SL, MVT::i32); | ||||
2613 | |||||
2614 | const unsigned Opc = gwsIntrinToOpcode(IntrID); | ||||
2615 | SmallVector<SDValue, 5> Ops; | ||||
2616 | if (HasVSrc) | ||||
2617 | Ops.push_back(N->getOperand(2)); | ||||
2618 | Ops.push_back(OffsetField); | ||||
2619 | Ops.push_back(Chain); | ||||
2620 | |||||
2621 | SDNode *Selected = CurDAG->SelectNodeTo(N, Opc, N->getVTList(), Ops); | ||||
2622 | CurDAG->setNodeMemRefs(cast<MachineSDNode>(Selected), {MMO}); | ||||
2623 | } | ||||
2624 | |||||
2625 | void AMDGPUDAGToDAGISel::SelectInterpP1F16(SDNode *N) { | ||||
2626 | if (Subtarget->getLDSBankCount() != 16) { | ||||
2627 | // This is a single instruction with a pattern. | ||||
2628 | SelectCode(N); | ||||
2629 | return; | ||||
2630 | } | ||||
2631 | |||||
2632 | SDLoc DL(N); | ||||
2633 | |||||
2634 | // This requires 2 instructions. It is possible to write a pattern to support | ||||
2635 | // this, but the generated isel emitter doesn't correctly deal with multiple | ||||
2636 | // output instructions using the same physical register input. The copy to m0 | ||||
2637 | // is incorrectly placed before the second instruction. | ||||
2638 | // | ||||
2639 | // TODO: Match source modifiers. | ||||
2640 | // | ||||
2641 | // def : Pat < | ||||
2642 | // (int_amdgcn_interp_p1_f16 | ||||
2643 | // (VOP3Mods f32:$src0, i32:$src0_modifiers), | ||||
2644 | // (i32 timm:$attrchan), (i32 timm:$attr), | ||||
2645 | // (i1 timm:$high), M0), | ||||
2646 | // (V_INTERP_P1LV_F16 $src0_modifiers, VGPR_32:$src0, timm:$attr, | ||||
2647 | // timm:$attrchan, 0, | ||||
2648 | // (V_INTERP_MOV_F32 2, timm:$attr, timm:$attrchan), timm:$high)> { | ||||
2649 | // let Predicates = [has16BankLDS]; | ||||
2650 | // } | ||||
2651 | |||||
2652 | // 16 bank LDS | ||||
2653 | SDValue ToM0 = CurDAG->getCopyToReg(CurDAG->getEntryNode(), DL, AMDGPU::M0, | ||||
2654 | N->getOperand(5), SDValue()); | ||||
2655 | |||||
2656 | SDVTList VTs = CurDAG->getVTList(MVT::f32, MVT::Other); | ||||
2657 | |||||
2658 | SDNode *InterpMov = | ||||
2659 | CurDAG->getMachineNode(AMDGPU::V_INTERP_MOV_F32, DL, VTs, { | ||||
2660 | CurDAG->getTargetConstant(2, DL, MVT::i32), // P0 | ||||
2661 | N->getOperand(3), // Attr | ||||
2662 | N->getOperand(2), // Attrchan | ||||
2663 | ToM0.getValue(1) // In glue | ||||
2664 | }); | ||||
2665 | |||||
2666 | SDNode *InterpP1LV = | ||||
2667 | CurDAG->getMachineNode(AMDGPU::V_INTERP_P1LV_F16, DL, MVT::f32, { | ||||
2668 | CurDAG->getTargetConstant(0, DL, MVT::i32), // $src0_modifiers | ||||
2669 | N->getOperand(1), // Src0 | ||||
2670 | N->getOperand(3), // Attr | ||||
2671 | N->getOperand(2), // Attrchan | ||||
2672 | CurDAG->getTargetConstant(0, DL, MVT::i32), // $src2_modifiers | ||||
2673 | SDValue(InterpMov, 0), // Src2 - holds two f16 values selected by high | ||||
2674 | N->getOperand(4), // high | ||||
2675 | CurDAG->getTargetConstant(0, DL, MVT::i1), // $clamp | ||||
2676 | CurDAG->getTargetConstant(0, DL, MVT::i32), // $omod | ||||
2677 | SDValue(InterpMov, 1) | ||||
2678 | }); | ||||
2679 | |||||
2680 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), SDValue(InterpP1LV, 0)); | ||||
2681 | } | ||||
2682 | |||||
2683 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_W_CHAIN(SDNode *N) { | ||||
2684 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); | ||||
2685 | switch (IntrID) { | ||||
2686 | case Intrinsic::amdgcn_ds_append: | ||||
2687 | case Intrinsic::amdgcn_ds_consume: { | ||||
2688 | if (N->getValueType(0) != MVT::i32) | ||||
2689 | break; | ||||
2690 | SelectDSAppendConsume(N, IntrID); | ||||
2691 | return; | ||||
2692 | } | ||||
2693 | } | ||||
2694 | |||||
2695 | SelectCode(N); | ||||
2696 | } | ||||
2697 | |||||
2698 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_WO_CHAIN(SDNode *N) { | ||||
2699 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); | ||||
2700 | unsigned Opcode; | ||||
2701 | switch (IntrID) { | ||||
2702 | case Intrinsic::amdgcn_wqm: | ||||
2703 | Opcode = AMDGPU::WQM; | ||||
2704 | break; | ||||
2705 | case Intrinsic::amdgcn_softwqm: | ||||
2706 | Opcode = AMDGPU::SOFT_WQM; | ||||
2707 | break; | ||||
2708 | case Intrinsic::amdgcn_wwm: | ||||
2709 | case Intrinsic::amdgcn_strict_wwm: | ||||
2710 | Opcode = AMDGPU::STRICT_WWM; | ||||
2711 | break; | ||||
2712 | case Intrinsic::amdgcn_strict_wqm: | ||||
2713 | Opcode = AMDGPU::STRICT_WQM; | ||||
2714 | break; | ||||
2715 | case Intrinsic::amdgcn_interp_p1_f16: | ||||
2716 | SelectInterpP1F16(N); | ||||
2717 | return; | ||||
2718 | default: | ||||
2719 | SelectCode(N); | ||||
2720 | return; | ||||
2721 | } | ||||
2722 | |||||
2723 | SDValue Src = N->getOperand(1); | ||||
2724 | CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), {Src}); | ||||
2725 | } | ||||
2726 | |||||
2727 | void AMDGPUDAGToDAGISel::SelectINTRINSIC_VOID(SDNode *N) { | ||||
2728 | unsigned IntrID = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); | ||||
2729 | switch (IntrID) { | ||||
2730 | case Intrinsic::amdgcn_ds_gws_init: | ||||
2731 | case Intrinsic::amdgcn_ds_gws_barrier: | ||||
2732 | case Intrinsic::amdgcn_ds_gws_sema_v: | ||||
2733 | case Intrinsic::amdgcn_ds_gws_sema_br: | ||||
2734 | case Intrinsic::amdgcn_ds_gws_sema_p: | ||||
2735 | case Intrinsic::amdgcn_ds_gws_sema_release_all: | ||||
2736 | SelectDS_GWS(N, IntrID); | ||||
2737 | return; | ||||
2738 | default: | ||||
2739 | break; | ||||
2740 | } | ||||
2741 | |||||
2742 | SelectCode(N); | ||||
2743 | } | ||||
2744 | |||||
2745 | bool AMDGPUDAGToDAGISel::SelectVOP3ModsImpl(SDValue In, SDValue &Src, | ||||
2746 | unsigned &Mods, | ||||
2747 | bool AllowAbs) const { | ||||
2748 | Mods = 0; | ||||
2749 | Src = In; | ||||
2750 | |||||
2751 | if (Src.getOpcode() == ISD::FNEG) { | ||||
2752 | Mods |= SISrcMods::NEG; | ||||
2753 | Src = Src.getOperand(0); | ||||
2754 | } | ||||
2755 | |||||
2756 | if (AllowAbs && Src.getOpcode() == ISD::FABS) { | ||||
2757 | Mods |= SISrcMods::ABS; | ||||
2758 | Src = Src.getOperand(0); | ||||
2759 | } | ||||
2760 | |||||
2761 | return true; | ||||
2762 | } | ||||
2763 | |||||
2764 | bool AMDGPUDAGToDAGISel::SelectVOP3Mods(SDValue In, SDValue &Src, | ||||
2765 | SDValue &SrcMods) const { | ||||
2766 | unsigned Mods; | ||||
2767 | if (SelectVOP3ModsImpl(In, Src, Mods)) { | ||||
2768 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
2769 | return true; | ||||
2770 | } | ||||
2771 | |||||
2772 | return false; | ||||
2773 | } | ||||
2774 | |||||
2775 | bool AMDGPUDAGToDAGISel::SelectVOP3BMods(SDValue In, SDValue &Src, | ||||
2776 | SDValue &SrcMods) const { | ||||
2777 | unsigned Mods; | ||||
2778 | if (SelectVOP3ModsImpl(In, Src, Mods, /* AllowAbs */ false)) { | ||||
2779 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
2780 | return true; | ||||
2781 | } | ||||
2782 | |||||
2783 | return false; | ||||
2784 | } | ||||
2785 | |||||
2786 | bool AMDGPUDAGToDAGISel::SelectVOP3Mods_NNaN(SDValue In, SDValue &Src, | ||||
2787 | SDValue &SrcMods) const { | ||||
2788 | SelectVOP3Mods(In, Src, SrcMods); | ||||
2789 | return isNoNanSrc(Src); | ||||
2790 | } | ||||
2791 | |||||
2792 | bool AMDGPUDAGToDAGISel::SelectVOP3NoMods(SDValue In, SDValue &Src) const { | ||||
2793 | if (In.getOpcode() == ISD::FABS || In.getOpcode() == ISD::FNEG) | ||||
2794 | return false; | ||||
2795 | |||||
2796 | Src = In; | ||||
2797 | return true; | ||||
2798 | } | ||||
2799 | |||||
2800 | bool AMDGPUDAGToDAGISel::SelectVOP3Mods0(SDValue In, SDValue &Src, | ||||
2801 | SDValue &SrcMods, SDValue &Clamp, | ||||
2802 | SDValue &Omod) const { | ||||
2803 | SDLoc DL(In); | ||||
2804 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
2805 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
2806 | |||||
2807 | return SelectVOP3Mods(In, Src, SrcMods); | ||||
2808 | } | ||||
2809 | |||||
2810 | bool AMDGPUDAGToDAGISel::SelectVOP3BMods0(SDValue In, SDValue &Src, | ||||
2811 | SDValue &SrcMods, SDValue &Clamp, | ||||
2812 | SDValue &Omod) const { | ||||
2813 | SDLoc DL(In); | ||||
2814 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
2815 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
2816 | |||||
2817 | return SelectVOP3BMods(In, Src, SrcMods); | ||||
2818 | } | ||||
2819 | |||||
2820 | bool AMDGPUDAGToDAGISel::SelectVOP3OMods(SDValue In, SDValue &Src, | ||||
2821 | SDValue &Clamp, SDValue &Omod) const { | ||||
2822 | Src = In; | ||||
2823 | |||||
2824 | SDLoc DL(In); | ||||
2825 | Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
2826 | Omod = CurDAG->getTargetConstant(0, DL, MVT::i1); | ||||
2827 | |||||
2828 | return true; | ||||
2829 | } | ||||
2830 | |||||
2831 | bool AMDGPUDAGToDAGISel::SelectVOP3PMods(SDValue In, SDValue &Src, | ||||
2832 | SDValue &SrcMods) const { | ||||
2833 | unsigned Mods = 0; | ||||
2834 | Src = In; | ||||
2835 | |||||
2836 | if (Src.getOpcode() == ISD::FNEG) { | ||||
2837 | Mods ^= (SISrcMods::NEG | SISrcMods::NEG_HI); | ||||
2838 | Src = Src.getOperand(0); | ||||
2839 | } | ||||
2840 | |||||
2841 | if (Src.getOpcode() == ISD::BUILD_VECTOR) { | ||||
2842 | unsigned VecMods = Mods; | ||||
2843 | |||||
2844 | SDValue Lo = stripBitcast(Src.getOperand(0)); | ||||
2845 | SDValue Hi = stripBitcast(Src.getOperand(1)); | ||||
2846 | |||||
2847 | if (Lo.getOpcode() == ISD::FNEG) { | ||||
2848 | Lo = stripBitcast(Lo.getOperand(0)); | ||||
2849 | Mods ^= SISrcMods::NEG; | ||||
2850 | } | ||||
2851 | |||||
2852 | if (Hi.getOpcode() == ISD::FNEG) { | ||||
2853 | Hi = stripBitcast(Hi.getOperand(0)); | ||||
2854 | Mods ^= SISrcMods::NEG_HI; | ||||
2855 | } | ||||
2856 | |||||
2857 | if (isExtractHiElt(Lo, Lo)) | ||||
2858 | Mods |= SISrcMods::OP_SEL_0; | ||||
2859 | |||||
2860 | if (isExtractHiElt(Hi, Hi)) | ||||
2861 | Mods |= SISrcMods::OP_SEL_1; | ||||
2862 | |||||
2863 | unsigned VecSize = Src.getValueSizeInBits(); | ||||
2864 | Lo = stripExtractLoElt(Lo); | ||||
2865 | Hi = stripExtractLoElt(Hi); | ||||
2866 | |||||
2867 | if (Lo.getValueSizeInBits() > VecSize) { | ||||
2868 | Lo = CurDAG->getTargetExtractSubreg( | ||||
2869 | (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In), | ||||
2870 | MVT::getIntegerVT(VecSize), Lo); | ||||
2871 | } | ||||
2872 | |||||
2873 | if (Hi.getValueSizeInBits() > VecSize) { | ||||
2874 | Hi = CurDAG->getTargetExtractSubreg( | ||||
2875 | (VecSize > 32) ? AMDGPU::sub0_sub1 : AMDGPU::sub0, SDLoc(In), | ||||
2876 | MVT::getIntegerVT(VecSize), Hi); | ||||
2877 | } | ||||
2878 | |||||
2879 | assert(Lo.getValueSizeInBits() <= VecSize &&((void)0) | ||||
2880 | Hi.getValueSizeInBits() <= VecSize)((void)0); | ||||
2881 | |||||
2882 | if (Lo == Hi && !isInlineImmediate(Lo.getNode())) { | ||||
2883 | // Really a scalar input. Just select from the low half of the register to | ||||
2884 | // avoid packing. | ||||
2885 | |||||
2886 | if (VecSize == 32 || VecSize == Lo.getValueSizeInBits()) { | ||||
2887 | Src = Lo; | ||||
2888 | } else { | ||||
2889 | assert(Lo.getValueSizeInBits() == 32 && VecSize == 64)((void)0); | ||||
2890 | |||||
2891 | SDLoc SL(In); | ||||
2892 | SDValue Undef = SDValue( | ||||
2893 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, SL, | ||||
2894 | Lo.getValueType()), 0); | ||||
2895 | auto RC = Lo->isDivergent() ? AMDGPU::VReg_64RegClassID | ||||
2896 | : AMDGPU::SReg_64RegClassID; | ||||
2897 | const SDValue Ops[] = { | ||||
2898 | CurDAG->getTargetConstant(RC, SL, MVT::i32), | ||||
2899 | Lo, CurDAG->getTargetConstant(AMDGPU::sub0, SL, MVT::i32), | ||||
2900 | Undef, CurDAG->getTargetConstant(AMDGPU::sub1, SL, MVT::i32) }; | ||||
2901 | |||||
2902 | Src = SDValue(CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, SL, | ||||
2903 | Src.getValueType(), Ops), 0); | ||||
2904 | } | ||||
2905 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
2906 | return true; | ||||
2907 | } | ||||
2908 | |||||
2909 | if (VecSize == 64 && Lo == Hi && isa<ConstantFPSDNode>(Lo)) { | ||||
2910 | uint64_t Lit = cast<ConstantFPSDNode>(Lo)->getValueAPF() | ||||
2911 | .bitcastToAPInt().getZExtValue(); | ||||
2912 | if (AMDGPU::isInlinableLiteral32(Lit, Subtarget->hasInv2PiInlineImm())) { | ||||
2913 | Src = CurDAG->getTargetConstant(Lit, SDLoc(In), MVT::i64);; | ||||
2914 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
2915 | return true; | ||||
2916 | } | ||||
2917 | } | ||||
2918 | |||||
2919 | Mods = VecMods; | ||||
2920 | } | ||||
2921 | |||||
2922 | // Packed instructions do not have abs modifiers. | ||||
2923 | Mods |= SISrcMods::OP_SEL_1; | ||||
2924 | |||||
2925 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
2926 | return true; | ||||
2927 | } | ||||
2928 | |||||
2929 | bool AMDGPUDAGToDAGISel::SelectVOP3OpSel(SDValue In, SDValue &Src, | ||||
2930 | SDValue &SrcMods) const { | ||||
2931 | Src = In; | ||||
2932 | // FIXME: Handle op_sel | ||||
2933 | SrcMods = CurDAG->getTargetConstant(0, SDLoc(In), MVT::i32); | ||||
2934 | return true; | ||||
2935 | } | ||||
2936 | |||||
2937 | bool AMDGPUDAGToDAGISel::SelectVOP3OpSelMods(SDValue In, SDValue &Src, | ||||
2938 | SDValue &SrcMods) const { | ||||
2939 | // FIXME: Handle op_sel | ||||
2940 | return SelectVOP3Mods(In, Src, SrcMods); | ||||
2941 | } | ||||
2942 | |||||
2943 | // The return value is not whether the match is possible (which it always is), | ||||
2944 | // but whether or not it a conversion is really used. | ||||
2945 | bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixModsImpl(SDValue In, SDValue &Src, | ||||
2946 | unsigned &Mods) const { | ||||
2947 | Mods = 0; | ||||
2948 | SelectVOP3ModsImpl(In, Src, Mods); | ||||
2949 | |||||
2950 | if (Src.getOpcode() == ISD::FP_EXTEND) { | ||||
2951 | Src = Src.getOperand(0); | ||||
2952 | assert(Src.getValueType() == MVT::f16)((void)0); | ||||
2953 | Src = stripBitcast(Src); | ||||
2954 | |||||
2955 | // Be careful about folding modifiers if we already have an abs. fneg is | ||||
2956 | // applied last, so we don't want to apply an earlier fneg. | ||||
2957 | if ((Mods & SISrcMods::ABS) == 0) { | ||||
2958 | unsigned ModsTmp; | ||||
2959 | SelectVOP3ModsImpl(Src, Src, ModsTmp); | ||||
2960 | |||||
2961 | if ((ModsTmp & SISrcMods::NEG) != 0) | ||||
2962 | Mods ^= SISrcMods::NEG; | ||||
2963 | |||||
2964 | if ((ModsTmp & SISrcMods::ABS) != 0) | ||||
2965 | Mods |= SISrcMods::ABS; | ||||
2966 | } | ||||
2967 | |||||
2968 | // op_sel/op_sel_hi decide the source type and source. | ||||
2969 | // If the source's op_sel_hi is set, it indicates to do a conversion from fp16. | ||||
2970 | // If the sources's op_sel is set, it picks the high half of the source | ||||
2971 | // register. | ||||
2972 | |||||
2973 | Mods |= SISrcMods::OP_SEL_1; | ||||
2974 | if (isExtractHiElt(Src, Src)) { | ||||
2975 | Mods |= SISrcMods::OP_SEL_0; | ||||
2976 | |||||
2977 | // TODO: Should we try to look for neg/abs here? | ||||
2978 | } | ||||
2979 | |||||
2980 | return true; | ||||
2981 | } | ||||
2982 | |||||
2983 | return false; | ||||
2984 | } | ||||
2985 | |||||
2986 | bool AMDGPUDAGToDAGISel::SelectVOP3PMadMixMods(SDValue In, SDValue &Src, | ||||
2987 | SDValue &SrcMods) const { | ||||
2988 | unsigned Mods = 0; | ||||
2989 | SelectVOP3PMadMixModsImpl(In, Src, Mods); | ||||
2990 | SrcMods = CurDAG->getTargetConstant(Mods, SDLoc(In), MVT::i32); | ||||
2991 | return true; | ||||
2992 | } | ||||
2993 | |||||
2994 | SDValue AMDGPUDAGToDAGISel::getHi16Elt(SDValue In) const { | ||||
2995 | if (In.isUndef()) | ||||
2996 | return CurDAG->getUNDEF(MVT::i32); | ||||
2997 | |||||
2998 | if (ConstantSDNode *C
| ||||
2999 | SDLoc SL(In); | ||||
3000 | return CurDAG->getConstant(C->getZExtValue() << 16, SL, MVT::i32); | ||||
3001 | } | ||||
3002 | |||||
3003 | if (ConstantFPSDNode *C
| ||||
3004 | SDLoc SL(In); | ||||
3005 | return CurDAG->getConstant( | ||||
3006 | C->getValueAPF().bitcastToAPInt().getZExtValue() << 16, SL, MVT::i32); | ||||
3007 | } | ||||
3008 | |||||
3009 | SDValue Src; | ||||
3010 | if (isExtractHiElt(In, Src)) | ||||
3011 | return Src; | ||||
3012 | |||||
3013 | return SDValue(); | ||||
3014 | } | ||||
3015 | |||||
3016 | bool AMDGPUDAGToDAGISel::isVGPRImm(const SDNode * N) const { | ||||
3017 | assert(CurDAG->getTarget().getTargetTriple().getArch() == Triple::amdgcn)((void)0); | ||||
3018 | |||||
3019 | const SIRegisterInfo *SIRI = | ||||
3020 | static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); | ||||
3021 | const SIInstrInfo * SII = | ||||
3022 | static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo()); | ||||
3023 | |||||
3024 | unsigned Limit = 0; | ||||
3025 | bool AllUsesAcceptSReg = true; | ||||
3026 | for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); | ||||
3027 | Limit < 10 && U != E; ++U, ++Limit) { | ||||
3028 | const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); | ||||
3029 | |||||
3030 | // If the register class is unknown, it could be an unknown | ||||
3031 | // register class that needs to be an SGPR, e.g. an inline asm | ||||
3032 | // constraint | ||||
3033 | if (!RC || SIRI->isSGPRClass(RC)) | ||||
3034 | return false; | ||||
3035 | |||||
3036 | if (RC != &AMDGPU::VS_32RegClass) { | ||||
3037 | AllUsesAcceptSReg = false; | ||||
3038 | SDNode * User = *U; | ||||
3039 | if (User->isMachineOpcode()) { | ||||
3040 | unsigned Opc = User->getMachineOpcode(); | ||||
3041 | MCInstrDesc Desc = SII->get(Opc); | ||||
3042 | if (Desc.isCommutable()) { | ||||
3043 | unsigned OpIdx = Desc.getNumDefs() + U.getOperandNo(); | ||||
3044 | unsigned CommuteIdx1 = TargetInstrInfo::CommuteAnyOperandIndex; | ||||
3045 | if (SII->findCommutedOpIndices(Desc, OpIdx, CommuteIdx1)) { | ||||
3046 | unsigned CommutedOpNo = CommuteIdx1 - Desc.getNumDefs(); | ||||
3047 | const TargetRegisterClass *CommutedRC = getOperandRegClass(*U, CommutedOpNo); | ||||
3048 | if (CommutedRC == &AMDGPU::VS_32RegClass) | ||||
3049 | AllUsesAcceptSReg = true; | ||||
3050 | } | ||||
3051 | } | ||||
3052 | } | ||||
3053 | // If "AllUsesAcceptSReg == false" so far we haven't suceeded | ||||
3054 | // commuting current user. This means have at least one use | ||||
3055 | // that strictly require VGPR. Thus, we will not attempt to commute | ||||
3056 | // other user instructions. | ||||
3057 | if (!AllUsesAcceptSReg) | ||||
3058 | break; | ||||
3059 | } | ||||
3060 | } | ||||
3061 | return !AllUsesAcceptSReg && (Limit < 10); | ||||
3062 | } | ||||
3063 | |||||
3064 | bool AMDGPUDAGToDAGISel::isUniformLoad(const SDNode * N) const { | ||||
3065 | auto Ld = cast<LoadSDNode>(N); | ||||
3066 | |||||
3067 | return Ld->getAlignment() >= 4 && | ||||
3068 | ( | ||||
3069 | ( | ||||
3070 | ( | ||||
3071 | Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || | ||||
3072 | Ld->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT | ||||
3073 | ) | ||||
3074 | && | ||||
3075 | !N->isDivergent() | ||||
3076 | ) | ||||
3077 | || | ||||
3078 | ( | ||||
3079 | Subtarget->getScalarizeGlobalBehavior() && | ||||
3080 | Ld->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS && | ||||
3081 | Ld->isSimple() && | ||||
3082 | !N->isDivergent() && | ||||
3083 | static_cast<const SITargetLowering *>( | ||||
3084 | getTargetLowering())->isMemOpHasNoClobberedMemOperand(N) | ||||
3085 | ) | ||||
3086 | ); | ||||
3087 | } | ||||
3088 | |||||
3089 | void AMDGPUDAGToDAGISel::PostprocessISelDAG() { | ||||
3090 | const AMDGPUTargetLowering& Lowering = | ||||
3091 | *static_cast<const AMDGPUTargetLowering*>(getTargetLowering()); | ||||
3092 | bool IsModified = false; | ||||
3093 | do { | ||||
3094 | IsModified = false; | ||||
3095 | |||||
3096 | // Go over all selected nodes and try to fold them a bit more | ||||
3097 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_begin(); | ||||
3098 | while (Position != CurDAG->allnodes_end()) { | ||||
3099 | SDNode *Node = &*Position++; | ||||
3100 | MachineSDNode *MachineNode = dyn_cast<MachineSDNode>(Node); | ||||
3101 | if (!MachineNode) | ||||
3102 | continue; | ||||
3103 | |||||
3104 | SDNode *ResNode = Lowering.PostISelFolding(MachineNode, *CurDAG); | ||||
3105 | if (ResNode != Node) { | ||||
3106 | if (ResNode) | ||||
3107 | ReplaceUses(Node, ResNode); | ||||
3108 | IsModified = true; | ||||
3109 | } | ||||
3110 | } | ||||
3111 | CurDAG->RemoveDeadNodes(); | ||||
3112 | } while (IsModified); | ||||
3113 | } | ||||
3114 | |||||
3115 | bool R600DAGToDAGISel::runOnMachineFunction(MachineFunction &MF) { | ||||
3116 | Subtarget = &MF.getSubtarget<R600Subtarget>(); | ||||
3117 | return SelectionDAGISel::runOnMachineFunction(MF); | ||||
3118 | } | ||||
3119 | |||||
3120 | bool R600DAGToDAGISel::isConstantLoad(const MemSDNode *N, int CbId) const { | ||||
3121 | if (!N->readMem()) | ||||
3122 | return false; | ||||
3123 | if (CbId == -1) | ||||
3124 | return N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS || | ||||
3125 | N->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS_32BIT; | ||||
3126 | |||||
3127 | return N->getAddressSpace() == AMDGPUAS::CONSTANT_BUFFER_0 + CbId; | ||||
3128 | } | ||||
3129 | |||||
3130 | bool R600DAGToDAGISel::SelectGlobalValueConstantOffset(SDValue Addr, | ||||
3131 | SDValue& IntPtr) { | ||||
3132 | if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Addr)) { | ||||
3133 | IntPtr = CurDAG->getIntPtrConstant(Cst->getZExtValue() / 4, SDLoc(Addr), | ||||
3134 | true); | ||||
3135 | return true; | ||||
3136 | } | ||||
3137 | return false; | ||||
3138 | } | ||||
3139 | |||||
3140 | bool R600DAGToDAGISel::SelectGlobalValueVariableOffset(SDValue Addr, | ||||
3141 | SDValue& BaseReg, SDValue &Offset) { | ||||
3142 | if (!isa<ConstantSDNode>(Addr)) { | ||||
3143 | BaseReg = Addr; | ||||
3144 | Offset = CurDAG->getIntPtrConstant(0, SDLoc(Addr), true); | ||||
3145 | return true; | ||||
3146 | } | ||||
3147 | return false; | ||||
3148 | } | ||||
3149 | |||||
3150 | void R600DAGToDAGISel::Select(SDNode *N) { | ||||
3151 | unsigned int Opc = N->getOpcode(); | ||||
3152 | if (N->isMachineOpcode()) { | ||||
3153 | N->setNodeId(-1); | ||||
3154 | return; // Already selected. | ||||
3155 | } | ||||
3156 | |||||
3157 | switch (Opc) { | ||||
3158 | default: break; | ||||
3159 | case AMDGPUISD::BUILD_VERTICAL_VECTOR: | ||||
3160 | case ISD::SCALAR_TO_VECTOR: | ||||
3161 | case ISD::BUILD_VECTOR: { | ||||
3162 | EVT VT = N->getValueType(0); | ||||
3163 | unsigned NumVectorElts = VT.getVectorNumElements(); | ||||
3164 | unsigned RegClassID; | ||||
3165 | // BUILD_VECTOR was lowered into an IMPLICIT_DEF + 4 INSERT_SUBREG | ||||
3166 | // that adds a 128 bits reg copy when going through TwoAddressInstructions | ||||
3167 | // pass. We want to avoid 128 bits copies as much as possible because they | ||||
3168 | // can't be bundled by our scheduler. | ||||
3169 | switch(NumVectorElts) { | ||||
3170 | case 2: RegClassID = R600::R600_Reg64RegClassID; break; | ||||
3171 | case 4: | ||||
3172 | if (Opc == AMDGPUISD::BUILD_VERTICAL_VECTOR) | ||||
3173 | RegClassID = R600::R600_Reg128VerticalRegClassID; | ||||
3174 | else | ||||
3175 | RegClassID = R600::R600_Reg128RegClassID; | ||||
3176 | break; | ||||
3177 | default: llvm_unreachable("Do not know how to lower this BUILD_VECTOR")__builtin_unreachable(); | ||||
3178 | } | ||||
3179 | SelectBuildVector(N, RegClassID); | ||||
3180 | return; | ||||
3181 | } | ||||
3182 | } | ||||
3183 | |||||
3184 | SelectCode(N); | ||||
3185 | } | ||||
3186 | |||||
3187 | bool R600DAGToDAGISel::SelectADDRIndirect(SDValue Addr, SDValue &Base, | ||||
3188 | SDValue &Offset) { | ||||
3189 | ConstantSDNode *C; | ||||
3190 | SDLoc DL(Addr); | ||||
3191 | |||||
3192 | if ((C = dyn_cast<ConstantSDNode>(Addr))) { | ||||
3193 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
3194 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
3195 | } else if ((Addr.getOpcode() == AMDGPUISD::DWORDADDR) && | ||||
3196 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(0)))) { | ||||
3197 | Base = CurDAG->getRegister(R600::INDIRECT_BASE_ADDR, MVT::i32); | ||||
3198 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
3199 | } else if ((Addr.getOpcode() == ISD::ADD || Addr.getOpcode() == ISD::OR) && | ||||
3200 | (C = dyn_cast<ConstantSDNode>(Addr.getOperand(1)))) { | ||||
3201 | Base = Addr.getOperand(0); | ||||
3202 | Offset = CurDAG->getTargetConstant(C->getZExtValue(), DL, MVT::i32); | ||||
3203 | } else { | ||||
3204 | Base = Addr; | ||||
3205 | Offset = CurDAG->getTargetConstant(0, DL, MVT::i32); | ||||
3206 | } | ||||
3207 | |||||
3208 | return true; | ||||
3209 | } | ||||
3210 | |||||
3211 | bool R600DAGToDAGISel::SelectADDRVTX_READ(SDValue Addr, SDValue &Base, | ||||
3212 | SDValue &Offset) { | ||||
3213 | ConstantSDNode *IMMOffset; | ||||
3214 | |||||
3215 | if (Addr.getOpcode() == ISD::ADD | ||||
3216 | && (IMMOffset = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) | ||||
3217 | && isInt<16>(IMMOffset->getZExtValue())) { | ||||
3218 | |||||
3219 | Base = Addr.getOperand(0); | ||||
3220 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), | ||||
3221 | MVT::i32); | ||||
3222 | return true; | ||||
3223 | // If the pointer address is constant, we can move it to the offset field. | ||||
3224 | } else if ((IMMOffset = dyn_cast<ConstantSDNode>(Addr)) | ||||
3225 | && isInt<16>(IMMOffset->getZExtValue())) { | ||||
3226 | Base = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), | ||||
3227 | SDLoc(CurDAG->getEntryNode()), | ||||
3228 | R600::ZERO, MVT::i32); | ||||
3229 | Offset = CurDAG->getTargetConstant(IMMOffset->getZExtValue(), SDLoc(Addr), | ||||
3230 | MVT::i32); | ||||
3231 | return true; | ||||
3232 | } | ||||
3233 | |||||
3234 | // Default case, no offset | ||||
3235 | Base = Addr; | ||||
3236 | Offset = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i32); | ||||
3237 | return true; | ||||
3238 | } |
1 | //===- llvm/CodeGen/SelectionDAGNodes.h - SelectionDAG Nodes ----*- C++ -*-===// | |||
2 | // | |||
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
4 | // See https://llvm.org/LICENSE.txt for license information. | |||
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
6 | // | |||
7 | //===----------------------------------------------------------------------===// | |||
8 | // | |||
9 | // This file declares the SDNode class and derived classes, which are used to | |||
10 | // represent the nodes and operations present in a SelectionDAG. These nodes | |||
11 | // and operations are machine code level operations, with some similarities to | |||
12 | // the GCC RTL representation. | |||
13 | // | |||
14 | // Clients should include the SelectionDAG.h file instead of this file directly. | |||
15 | // | |||
16 | //===----------------------------------------------------------------------===// | |||
17 | ||||
18 | #ifndef LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
19 | #define LLVM_CODEGEN_SELECTIONDAGNODES_H | |||
20 | ||||
21 | #include "llvm/ADT/APFloat.h" | |||
22 | #include "llvm/ADT/ArrayRef.h" | |||
23 | #include "llvm/ADT/BitVector.h" | |||
24 | #include "llvm/ADT/FoldingSet.h" | |||
25 | #include "llvm/ADT/GraphTraits.h" | |||
26 | #include "llvm/ADT/SmallPtrSet.h" | |||
27 | #include "llvm/ADT/SmallVector.h" | |||
28 | #include "llvm/ADT/ilist_node.h" | |||
29 | #include "llvm/ADT/iterator.h" | |||
30 | #include "llvm/ADT/iterator_range.h" | |||
31 | #include "llvm/CodeGen/ISDOpcodes.h" | |||
32 | #include "llvm/CodeGen/MachineMemOperand.h" | |||
33 | #include "llvm/CodeGen/Register.h" | |||
34 | #include "llvm/CodeGen/ValueTypes.h" | |||
35 | #include "llvm/IR/Constants.h" | |||
36 | #include "llvm/IR/DebugLoc.h" | |||
37 | #include "llvm/IR/Instruction.h" | |||
38 | #include "llvm/IR/Instructions.h" | |||
39 | #include "llvm/IR/Metadata.h" | |||
40 | #include "llvm/IR/Operator.h" | |||
41 | #include "llvm/Support/AlignOf.h" | |||
42 | #include "llvm/Support/AtomicOrdering.h" | |||
43 | #include "llvm/Support/Casting.h" | |||
44 | #include "llvm/Support/ErrorHandling.h" | |||
45 | #include "llvm/Support/MachineValueType.h" | |||
46 | #include "llvm/Support/TypeSize.h" | |||
47 | #include <algorithm> | |||
48 | #include <cassert> | |||
49 | #include <climits> | |||
50 | #include <cstddef> | |||
51 | #include <cstdint> | |||
52 | #include <cstring> | |||
53 | #include <iterator> | |||
54 | #include <string> | |||
55 | #include <tuple> | |||
56 | ||||
57 | namespace llvm { | |||
58 | ||||
59 | class APInt; | |||
60 | class Constant; | |||
61 | template <typename T> struct DenseMapInfo; | |||
62 | class GlobalValue; | |||
63 | class MachineBasicBlock; | |||
64 | class MachineConstantPoolValue; | |||
65 | class MCSymbol; | |||
66 | class raw_ostream; | |||
67 | class SDNode; | |||
68 | class SelectionDAG; | |||
69 | class Type; | |||
70 | class Value; | |||
71 | ||||
72 | void checkForCycles(const SDNode *N, const SelectionDAG *DAG = nullptr, | |||
73 | bool force = false); | |||
74 | ||||
75 | /// This represents a list of ValueType's that has been intern'd by | |||
76 | /// a SelectionDAG. Instances of this simple value class are returned by | |||
77 | /// SelectionDAG::getVTList(...). | |||
78 | /// | |||
79 | struct SDVTList { | |||
80 | const EVT *VTs; | |||
81 | unsigned int NumVTs; | |||
82 | }; | |||
83 | ||||
84 | namespace ISD { | |||
85 | ||||
86 | /// Node predicates | |||
87 | ||||
88 | /// If N is a BUILD_VECTOR or SPLAT_VECTOR node whose elements are all the | |||
89 | /// same constant or undefined, return true and return the constant value in | |||
90 | /// \p SplatValue. | |||
91 | bool isConstantSplatVector(const SDNode *N, APInt &SplatValue); | |||
92 | ||||
93 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
94 | /// all of the elements are ~0 or undef. If \p BuildVectorOnly is set to | |||
95 | /// true, it only checks BUILD_VECTOR. | |||
96 | bool isConstantSplatVectorAllOnes(const SDNode *N, | |||
97 | bool BuildVectorOnly = false); | |||
98 | ||||
99 | /// Return true if the specified node is a BUILD_VECTOR or SPLAT_VECTOR where | |||
100 | /// all of the elements are 0 or undef. If \p BuildVectorOnly is set to true, it | |||
101 | /// only checks BUILD_VECTOR. | |||
102 | bool isConstantSplatVectorAllZeros(const SDNode *N, | |||
103 | bool BuildVectorOnly = false); | |||
104 | ||||
105 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
106 | /// elements are ~0 or undef. | |||
107 | bool isBuildVectorAllOnes(const SDNode *N); | |||
108 | ||||
109 | /// Return true if the specified node is a BUILD_VECTOR where all of the | |||
110 | /// elements are 0 or undef. | |||
111 | bool isBuildVectorAllZeros(const SDNode *N); | |||
112 | ||||
113 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
114 | /// ConstantSDNode or undef. | |||
115 | bool isBuildVectorOfConstantSDNodes(const SDNode *N); | |||
116 | ||||
117 | /// Return true if the specified node is a BUILD_VECTOR node of all | |||
118 | /// ConstantFPSDNode or undef. | |||
119 | bool isBuildVectorOfConstantFPSDNodes(const SDNode *N); | |||
120 | ||||
121 | /// Return true if the node has at least one operand and all operands of the | |||
122 | /// specified node are ISD::UNDEF. | |||
123 | bool allOperandsUndef(const SDNode *N); | |||
124 | ||||
125 | } // end namespace ISD | |||
126 | ||||
127 | //===----------------------------------------------------------------------===// | |||
128 | /// Unlike LLVM values, Selection DAG nodes may return multiple | |||
129 | /// values as the result of a computation. Many nodes return multiple values, | |||
130 | /// from loads (which define a token and a return value) to ADDC (which returns | |||
131 | /// a result and a carry value), to calls (which may return an arbitrary number | |||
132 | /// of values). | |||
133 | /// | |||
134 | /// As such, each use of a SelectionDAG computation must indicate the node that | |||
135 | /// computes it as well as which return value to use from that node. This pair | |||
136 | /// of information is represented with the SDValue value type. | |||
137 | /// | |||
138 | class SDValue { | |||
139 | friend struct DenseMapInfo<SDValue>; | |||
140 | ||||
141 | SDNode *Node = nullptr; // The node defining the value we are using. | |||
142 | unsigned ResNo = 0; // Which return value of the node we are using. | |||
143 | ||||
144 | public: | |||
145 | SDValue() = default; | |||
146 | SDValue(SDNode *node, unsigned resno); | |||
147 | ||||
148 | /// get the index which selects a specific result in the SDNode | |||
149 | unsigned getResNo() const { return ResNo; } | |||
150 | ||||
151 | /// get the SDNode which holds the desired result | |||
152 | SDNode *getNode() const { return Node; } | |||
153 | ||||
154 | /// set the SDNode | |||
155 | void setNode(SDNode *N) { Node = N; } | |||
156 | ||||
157 | inline SDNode *operator->() const { return Node; } | |||
158 | ||||
159 | bool operator==(const SDValue &O) const { | |||
160 | return Node == O.Node && ResNo == O.ResNo; | |||
161 | } | |||
162 | bool operator!=(const SDValue &O) const { | |||
163 | return !operator==(O); | |||
164 | } | |||
165 | bool operator<(const SDValue &O) const { | |||
166 | return std::tie(Node, ResNo) < std::tie(O.Node, O.ResNo); | |||
167 | } | |||
168 | explicit operator bool() const { | |||
169 | return Node != nullptr; | |||
170 | } | |||
171 | ||||
172 | SDValue getValue(unsigned R) const { | |||
173 | return SDValue(Node, R); | |||
174 | } | |||
175 | ||||
176 | /// Return true if this node is an operand of N. | |||
177 | bool isOperandOf(const SDNode *N) const; | |||
178 | ||||
179 | /// Return the ValueType of the referenced return value. | |||
180 | inline EVT getValueType() const; | |||
181 | ||||
182 | /// Return the simple ValueType of the referenced return value. | |||
183 | MVT getSimpleValueType() const { | |||
184 | return getValueType().getSimpleVT(); | |||
185 | } | |||
186 | ||||
187 | /// Returns the size of the value in bits. | |||
188 | /// | |||
189 | /// If the value type is a scalable vector type, the scalable property will | |||
190 | /// be set and the runtime size will be a positive integer multiple of the | |||
191 | /// base size. | |||
192 | TypeSize getValueSizeInBits() const { | |||
193 | return getValueType().getSizeInBits(); | |||
194 | } | |||
195 | ||||
196 | uint64_t getScalarValueSizeInBits() const { | |||
197 | return getValueType().getScalarType().getFixedSizeInBits(); | |||
198 | } | |||
199 | ||||
200 | // Forwarding methods - These forward to the corresponding methods in SDNode. | |||
201 | inline unsigned getOpcode() const; | |||
202 | inline unsigned getNumOperands() const; | |||
203 | inline const SDValue &getOperand(unsigned i) const; | |||
204 | inline uint64_t getConstantOperandVal(unsigned i) const; | |||
205 | inline const APInt &getConstantOperandAPInt(unsigned i) const; | |||
206 | inline bool isTargetMemoryOpcode() const; | |||
207 | inline bool isTargetOpcode() const; | |||
208 | inline bool isMachineOpcode() const; | |||
209 | inline bool isUndef() const; | |||
210 | inline unsigned getMachineOpcode() const; | |||
211 | inline const DebugLoc &getDebugLoc() const; | |||
212 | inline void dump() const; | |||
213 | inline void dump(const SelectionDAG *G) const; | |||
214 | inline void dumpr() const; | |||
215 | inline void dumpr(const SelectionDAG *G) const; | |||
216 | ||||
217 | /// Return true if this operand (which must be a chain) reaches the | |||
218 | /// specified operand without crossing any side-effecting instructions. | |||
219 | /// In practice, this looks through token factors and non-volatile loads. | |||
220 | /// In order to remain efficient, this only | |||
221 | /// looks a couple of nodes in, it does not do an exhaustive search. | |||
222 | bool reachesChainWithoutSideEffects(SDValue Dest, | |||
223 | unsigned Depth = 2) const; | |||
224 | ||||
225 | /// Return true if there are no nodes using value ResNo of Node. | |||
226 | inline bool use_empty() const; | |||
227 | ||||
228 | /// Return true if there is exactly one node using value ResNo of Node. | |||
229 | inline bool hasOneUse() const; | |||
230 | }; | |||
231 | ||||
232 | template<> struct DenseMapInfo<SDValue> { | |||
233 | static inline SDValue getEmptyKey() { | |||
234 | SDValue V; | |||
235 | V.ResNo = -1U; | |||
236 | return V; | |||
237 | } | |||
238 | ||||
239 | static inline SDValue getTombstoneKey() { | |||
240 | SDValue V; | |||
241 | V.ResNo = -2U; | |||
242 | return V; | |||
243 | } | |||
244 | ||||
245 | static unsigned getHashValue(const SDValue &Val) { | |||
246 | return ((unsigned)((uintptr_t)Val.getNode() >> 4) ^ | |||
247 | (unsigned)((uintptr_t)Val.getNode() >> 9)) + Val.getResNo(); | |||
248 | } | |||
249 | ||||
250 | static bool isEqual(const SDValue &LHS, const SDValue &RHS) { | |||
251 | return LHS == RHS; | |||
252 | } | |||
253 | }; | |||
254 | ||||
255 | /// Allow casting operators to work directly on | |||
256 | /// SDValues as if they were SDNode*'s. | |||
257 | template<> struct simplify_type<SDValue> { | |||
258 | using SimpleType = SDNode *; | |||
259 | ||||
260 | static SimpleType getSimplifiedValue(SDValue &Val) { | |||
261 | return Val.getNode(); | |||
262 | } | |||
263 | }; | |||
264 | template<> struct simplify_type<const SDValue> { | |||
265 | using SimpleType = /*const*/ SDNode *; | |||
266 | ||||
267 | static SimpleType getSimplifiedValue(const SDValue &Val) { | |||
268 | return Val.getNode(); | |||
269 | } | |||
270 | }; | |||
271 | ||||
272 | /// Represents a use of a SDNode. This class holds an SDValue, | |||
273 | /// which records the SDNode being used and the result number, a | |||
274 | /// pointer to the SDNode using the value, and Next and Prev pointers, | |||
275 | /// which link together all the uses of an SDNode. | |||
276 | /// | |||
277 | class SDUse { | |||
278 | /// Val - The value being used. | |||
279 | SDValue Val; | |||
280 | /// User - The user of this value. | |||
281 | SDNode *User = nullptr; | |||
282 | /// Prev, Next - Pointers to the uses list of the SDNode referred by | |||
283 | /// this operand. | |||
284 | SDUse **Prev = nullptr; | |||
285 | SDUse *Next = nullptr; | |||
286 | ||||
287 | public: | |||
288 | SDUse() = default; | |||
289 | SDUse(const SDUse &U) = delete; | |||
290 | SDUse &operator=(const SDUse &) = delete; | |||
291 | ||||
292 | /// Normally SDUse will just implicitly convert to an SDValue that it holds. | |||
293 | operator const SDValue&() const { return Val; } | |||
294 | ||||
295 | /// If implicit conversion to SDValue doesn't work, the get() method returns | |||
296 | /// the SDValue. | |||
297 | const SDValue &get() const { return Val; } | |||
298 | ||||
299 | /// This returns the SDNode that contains this Use. | |||
300 | SDNode *getUser() { return User; } | |||
301 | ||||
302 | /// Get the next SDUse in the use list. | |||
303 | SDUse *getNext() const { return Next; } | |||
304 | ||||
305 | /// Convenience function for get().getNode(). | |||
306 | SDNode *getNode() const { return Val.getNode(); } | |||
307 | /// Convenience function for get().getResNo(). | |||
308 | unsigned getResNo() const { return Val.getResNo(); } | |||
309 | /// Convenience function for get().getValueType(). | |||
310 | EVT getValueType() const { return Val.getValueType(); } | |||
311 | ||||
312 | /// Convenience function for get().operator== | |||
313 | bool operator==(const SDValue &V) const { | |||
314 | return Val == V; | |||
315 | } | |||
316 | ||||
317 | /// Convenience function for get().operator!= | |||
318 | bool operator!=(const SDValue &V) const { | |||
319 | return Val != V; | |||
320 | } | |||
321 | ||||
322 | /// Convenience function for get().operator< | |||
323 | bool operator<(const SDValue &V) const { | |||
324 | return Val < V; | |||
325 | } | |||
326 | ||||
327 | private: | |||
328 | friend class SelectionDAG; | |||
329 | friend class SDNode; | |||
330 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
331 | friend class HandleSDNode; | |||
332 | ||||
333 | void setUser(SDNode *p) { User = p; } | |||
334 | ||||
335 | /// Remove this use from its existing use list, assign it the | |||
336 | /// given value, and add it to the new value's node's use list. | |||
337 | inline void set(const SDValue &V); | |||
338 | /// Like set, but only supports initializing a newly-allocated | |||
339 | /// SDUse with a non-null value. | |||
340 | inline void setInitial(const SDValue &V); | |||
341 | /// Like set, but only sets the Node portion of the value, | |||
342 | /// leaving the ResNo portion unmodified. | |||
343 | inline void setNode(SDNode *N); | |||
344 | ||||
345 | void addToList(SDUse **List) { | |||
346 | Next = *List; | |||
347 | if (Next) Next->Prev = &Next; | |||
348 | Prev = List; | |||
349 | *List = this; | |||
350 | } | |||
351 | ||||
352 | void removeFromList() { | |||
353 | *Prev = Next; | |||
354 | if (Next) Next->Prev = Prev; | |||
355 | } | |||
356 | }; | |||
357 | ||||
358 | /// simplify_type specializations - Allow casting operators to work directly on | |||
359 | /// SDValues as if they were SDNode*'s. | |||
360 | template<> struct simplify_type<SDUse> { | |||
361 | using SimpleType = SDNode *; | |||
362 | ||||
363 | static SimpleType getSimplifiedValue(SDUse &Val) { | |||
364 | return Val.getNode(); | |||
365 | } | |||
366 | }; | |||
367 | ||||
368 | /// These are IR-level optimization flags that may be propagated to SDNodes. | |||
369 | /// TODO: This data structure should be shared by the IR optimizer and the | |||
370 | /// the backend. | |||
371 | struct SDNodeFlags { | |||
372 | private: | |||
373 | bool NoUnsignedWrap : 1; | |||
374 | bool NoSignedWrap : 1; | |||
375 | bool Exact : 1; | |||
376 | bool NoNaNs : 1; | |||
377 | bool NoInfs : 1; | |||
378 | bool NoSignedZeros : 1; | |||
379 | bool AllowReciprocal : 1; | |||
380 | bool AllowContract : 1; | |||
381 | bool ApproximateFuncs : 1; | |||
382 | bool AllowReassociation : 1; | |||
383 | ||||
384 | // We assume instructions do not raise floating-point exceptions by default, | |||
385 | // and only those marked explicitly may do so. We could choose to represent | |||
386 | // this via a positive "FPExcept" flags like on the MI level, but having a | |||
387 | // negative "NoFPExcept" flag here (that defaults to true) makes the flag | |||
388 | // intersection logic more straightforward. | |||
389 | bool NoFPExcept : 1; | |||
390 | ||||
391 | public: | |||
392 | /// Default constructor turns off all optimization flags. | |||
393 | SDNodeFlags() | |||
394 | : NoUnsignedWrap(false), NoSignedWrap(false), Exact(false), NoNaNs(false), | |||
395 | NoInfs(false), NoSignedZeros(false), AllowReciprocal(false), | |||
396 | AllowContract(false), ApproximateFuncs(false), | |||
397 | AllowReassociation(false), NoFPExcept(false) {} | |||
398 | ||||
399 | /// Propagate the fast-math-flags from an IR FPMathOperator. | |||
400 | void copyFMF(const FPMathOperator &FPMO) { | |||
401 | setNoNaNs(FPMO.hasNoNaNs()); | |||
402 | setNoInfs(FPMO.hasNoInfs()); | |||
403 | setNoSignedZeros(FPMO.hasNoSignedZeros()); | |||
404 | setAllowReciprocal(FPMO.hasAllowReciprocal()); | |||
405 | setAllowContract(FPMO.hasAllowContract()); | |||
406 | setApproximateFuncs(FPMO.hasApproxFunc()); | |||
407 | setAllowReassociation(FPMO.hasAllowReassoc()); | |||
408 | } | |||
409 | ||||
410 | // These are mutators for each flag. | |||
411 | void setNoUnsignedWrap(bool b) { NoUnsignedWrap = b; } | |||
412 | void setNoSignedWrap(bool b) { NoSignedWrap = b; } | |||
413 | void setExact(bool b) { Exact = b; } | |||
414 | void setNoNaNs(bool b) { NoNaNs = b; } | |||
415 | void setNoInfs(bool b) { NoInfs = b; } | |||
416 | void setNoSignedZeros(bool b) { NoSignedZeros = b; } | |||
417 | void setAllowReciprocal(bool b) { AllowReciprocal = b; } | |||
418 | void setAllowContract(bool b) { AllowContract = b; } | |||
419 | void setApproximateFuncs(bool b) { ApproximateFuncs = b; } | |||
420 | void setAllowReassociation(bool b) { AllowReassociation = b; } | |||
421 | void setNoFPExcept(bool b) { NoFPExcept = b; } | |||
422 | ||||
423 | // These are accessors for each flag. | |||
424 | bool hasNoUnsignedWrap() const { return NoUnsignedWrap; } | |||
425 | bool hasNoSignedWrap() const { return NoSignedWrap; } | |||
426 | bool hasExact() const { return Exact; } | |||
427 | bool hasNoNaNs() const { return NoNaNs; } | |||
428 | bool hasNoInfs() const { return NoInfs; } | |||
429 | bool hasNoSignedZeros() const { return NoSignedZeros; } | |||
430 | bool hasAllowReciprocal() const { return AllowReciprocal; } | |||
431 | bool hasAllowContract() const { return AllowContract; } | |||
432 | bool hasApproximateFuncs() const { return ApproximateFuncs; } | |||
433 | bool hasAllowReassociation() const { return AllowReassociation; } | |||
434 | bool hasNoFPExcept() const { return NoFPExcept; } | |||
435 | ||||
436 | /// Clear any flags in this flag set that aren't also set in Flags. All | |||
437 | /// flags will be cleared if Flags are undefined. | |||
438 | void intersectWith(const SDNodeFlags Flags) { | |||
439 | NoUnsignedWrap &= Flags.NoUnsignedWrap; | |||
440 | NoSignedWrap &= Flags.NoSignedWrap; | |||
441 | Exact &= Flags.Exact; | |||
442 | NoNaNs &= Flags.NoNaNs; | |||
443 | NoInfs &= Flags.NoInfs; | |||
444 | NoSignedZeros &= Flags.NoSignedZeros; | |||
445 | AllowReciprocal &= Flags.AllowReciprocal; | |||
446 | AllowContract &= Flags.AllowContract; | |||
447 | ApproximateFuncs &= Flags.ApproximateFuncs; | |||
448 | AllowReassociation &= Flags.AllowReassociation; | |||
449 | NoFPExcept &= Flags.NoFPExcept; | |||
450 | } | |||
451 | }; | |||
452 | ||||
453 | /// Represents one node in the SelectionDAG. | |||
454 | /// | |||
455 | class SDNode : public FoldingSetNode, public ilist_node<SDNode> { | |||
456 | private: | |||
457 | /// The operation that this node performs. | |||
458 | int16_t NodeType; | |||
459 | ||||
460 | protected: | |||
461 | // We define a set of mini-helper classes to help us interpret the bits in our | |||
462 | // SubclassData. These are designed to fit within a uint16_t so they pack | |||
463 | // with NodeType. | |||
464 | ||||
465 | #if defined(_AIX) && (!defined(__GNUC__4) || defined(__clang__1)) | |||
466 | // Except for GCC; by default, AIX compilers store bit-fields in 4-byte words | |||
467 | // and give the `pack` pragma push semantics. | |||
468 | #define BEGIN_TWO_BYTE_PACK() _Pragma("pack(2)")pack(2) | |||
469 | #define END_TWO_BYTE_PACK() _Pragma("pack(pop)")pack(pop) | |||
470 | #else | |||
471 | #define BEGIN_TWO_BYTE_PACK() | |||
472 | #define END_TWO_BYTE_PACK() | |||
473 | #endif | |||
474 | ||||
475 | BEGIN_TWO_BYTE_PACK() | |||
476 | class SDNodeBitfields { | |||
477 | friend class SDNode; | |||
478 | friend class MemIntrinsicSDNode; | |||
479 | friend class MemSDNode; | |||
480 | friend class SelectionDAG; | |||
481 | ||||
482 | uint16_t HasDebugValue : 1; | |||
483 | uint16_t IsMemIntrinsic : 1; | |||
484 | uint16_t IsDivergent : 1; | |||
485 | }; | |||
486 | enum { NumSDNodeBits = 3 }; | |||
487 | ||||
488 | class ConstantSDNodeBitfields { | |||
489 | friend class ConstantSDNode; | |||
490 | ||||
491 | uint16_t : NumSDNodeBits; | |||
492 | ||||
493 | uint16_t IsOpaque : 1; | |||
494 | }; | |||
495 | ||||
496 | class MemSDNodeBitfields { | |||
497 | friend class MemSDNode; | |||
498 | friend class MemIntrinsicSDNode; | |||
499 | friend class AtomicSDNode; | |||
500 | ||||
501 | uint16_t : NumSDNodeBits; | |||
502 | ||||
503 | uint16_t IsVolatile : 1; | |||
504 | uint16_t IsNonTemporal : 1; | |||
505 | uint16_t IsDereferenceable : 1; | |||
506 | uint16_t IsInvariant : 1; | |||
507 | }; | |||
508 | enum { NumMemSDNodeBits = NumSDNodeBits + 4 }; | |||
509 | ||||
510 | class LSBaseSDNodeBitfields { | |||
511 | friend class LSBaseSDNode; | |||
512 | friend class MaskedLoadStoreSDNode; | |||
513 | friend class MaskedGatherScatterSDNode; | |||
514 | ||||
515 | uint16_t : NumMemSDNodeBits; | |||
516 | ||||
517 | // This storage is shared between disparate class hierarchies to hold an | |||
518 | // enumeration specific to the class hierarchy in use. | |||
519 | // LSBaseSDNode => enum ISD::MemIndexedMode | |||
520 | // MaskedLoadStoreBaseSDNode => enum ISD::MemIndexedMode | |||
521 | // MaskedGatherScatterSDNode => enum ISD::MemIndexType | |||
522 | uint16_t AddressingMode : 3; | |||
523 | }; | |||
524 | enum { NumLSBaseSDNodeBits = NumMemSDNodeBits + 3 }; | |||
525 | ||||
526 | class LoadSDNodeBitfields { | |||
527 | friend class LoadSDNode; | |||
528 | friend class MaskedLoadSDNode; | |||
529 | friend class MaskedGatherSDNode; | |||
530 | ||||
531 | uint16_t : NumLSBaseSDNodeBits; | |||
532 | ||||
533 | uint16_t ExtTy : 2; // enum ISD::LoadExtType | |||
534 | uint16_t IsExpanding : 1; | |||
535 | }; | |||
536 | ||||
537 | class StoreSDNodeBitfields { | |||
538 | friend class StoreSDNode; | |||
539 | friend class MaskedStoreSDNode; | |||
540 | friend class MaskedScatterSDNode; | |||
541 | ||||
542 | uint16_t : NumLSBaseSDNodeBits; | |||
543 | ||||
544 | uint16_t IsTruncating : 1; | |||
545 | uint16_t IsCompressing : 1; | |||
546 | }; | |||
547 | ||||
548 | union { | |||
549 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
550 | SDNodeBitfields SDNodeBits; | |||
551 | ConstantSDNodeBitfields ConstantSDNodeBits; | |||
552 | MemSDNodeBitfields MemSDNodeBits; | |||
553 | LSBaseSDNodeBitfields LSBaseSDNodeBits; | |||
554 | LoadSDNodeBitfields LoadSDNodeBits; | |||
555 | StoreSDNodeBitfields StoreSDNodeBits; | |||
556 | }; | |||
557 | END_TWO_BYTE_PACK() | |||
558 | #undef BEGIN_TWO_BYTE_PACK | |||
559 | #undef END_TWO_BYTE_PACK | |||
560 | ||||
561 | // RawSDNodeBits must cover the entirety of the union. This means that all of | |||
562 | // the union's members must have size <= RawSDNodeBits. We write the RHS as | |||
563 | // "2" instead of sizeof(RawSDNodeBits) because MSVC can't handle the latter. | |||
564 | static_assert(sizeof(SDNodeBitfields) <= 2, "field too wide"); | |||
565 | static_assert(sizeof(ConstantSDNodeBitfields) <= 2, "field too wide"); | |||
566 | static_assert(sizeof(MemSDNodeBitfields) <= 2, "field too wide"); | |||
567 | static_assert(sizeof(LSBaseSDNodeBitfields) <= 2, "field too wide"); | |||
568 | static_assert(sizeof(LoadSDNodeBitfields) <= 2, "field too wide"); | |||
569 | static_assert(sizeof(StoreSDNodeBitfields) <= 2, "field too wide"); | |||
570 | ||||
571 | private: | |||
572 | friend class SelectionDAG; | |||
573 | // TODO: unfriend HandleSDNode once we fix its operand handling. | |||
574 | friend class HandleSDNode; | |||
575 | ||||
576 | /// Unique id per SDNode in the DAG. | |||
577 | int NodeId = -1; | |||
578 | ||||
579 | /// The values that are used by this operation. | |||
580 | SDUse *OperandList = nullptr; | |||
581 | ||||
582 | /// The types of the values this node defines. SDNode's may | |||
583 | /// define multiple values simultaneously. | |||
584 | const EVT *ValueList; | |||
585 | ||||
586 | /// List of uses for this SDNode. | |||
587 | SDUse *UseList = nullptr; | |||
588 | ||||
589 | /// The number of entries in the Operand/Value list. | |||
590 | unsigned short NumOperands = 0; | |||
591 | unsigned short NumValues; | |||
592 | ||||
593 | // The ordering of the SDNodes. It roughly corresponds to the ordering of the | |||
594 | // original LLVM instructions. | |||
595 | // This is used for turning off scheduling, because we'll forgo | |||
596 | // the normal scheduling algorithms and output the instructions according to | |||
597 | // this ordering. | |||
598 | unsigned IROrder; | |||
599 | ||||
600 | /// Source line information. | |||
601 | DebugLoc debugLoc; | |||
602 | ||||
603 | /// Return a pointer to the specified value type. | |||
604 | static const EVT *getValueTypeList(EVT VT); | |||
605 | ||||
606 | SDNodeFlags Flags; | |||
607 | ||||
608 | public: | |||
609 | /// Unique and persistent id per SDNode in the DAG. | |||
610 | /// Used for debug printing. | |||
611 | uint16_t PersistentId; | |||
612 | ||||
613 | //===--------------------------------------------------------------------===// | |||
614 | // Accessors | |||
615 | // | |||
616 | ||||
617 | /// Return the SelectionDAG opcode value for this node. For | |||
618 | /// pre-isel nodes (those for which isMachineOpcode returns false), these | |||
619 | /// are the opcode values in the ISD and <target>ISD namespaces. For | |||
620 | /// post-isel opcodes, see getMachineOpcode. | |||
621 | unsigned getOpcode() const { return (unsigned short)NodeType; } | |||
622 | ||||
623 | /// Test if this node has a target-specific opcode (in the | |||
624 | /// \<target\>ISD namespace). | |||
625 | bool isTargetOpcode() const { return NodeType >= ISD::BUILTIN_OP_END; } | |||
626 | ||||
627 | /// Test if this node has a target-specific opcode that may raise | |||
628 | /// FP exceptions (in the \<target\>ISD namespace and greater than | |||
629 | /// FIRST_TARGET_STRICTFP_OPCODE). Note that all target memory | |||
630 | /// opcode are currently automatically considered to possibly raise | |||
631 | /// FP exceptions as well. | |||
632 | bool isTargetStrictFPOpcode() const { | |||
633 | return NodeType >= ISD::FIRST_TARGET_STRICTFP_OPCODE; | |||
634 | } | |||
635 | ||||
636 | /// Test if this node has a target-specific | |||
637 | /// memory-referencing opcode (in the \<target\>ISD namespace and | |||
638 | /// greater than FIRST_TARGET_MEMORY_OPCODE). | |||
639 | bool isTargetMemoryOpcode() const { | |||
640 | return NodeType >= ISD::FIRST_TARGET_MEMORY_OPCODE; | |||
641 | } | |||
642 | ||||
643 | /// Return true if the type of the node type undefined. | |||
644 | bool isUndef() const { return NodeType == ISD::UNDEF; } | |||
645 | ||||
646 | /// Test if this node is a memory intrinsic (with valid pointer information). | |||
647 | /// INTRINSIC_W_CHAIN and INTRINSIC_VOID nodes are sometimes created for | |||
648 | /// non-memory intrinsics (with chains) that are not really instances of | |||
649 | /// MemSDNode. For such nodes, we need some extra state to determine the | |||
650 | /// proper classof relationship. | |||
651 | bool isMemIntrinsic() const { | |||
652 | return (NodeType == ISD::INTRINSIC_W_CHAIN || | |||
653 | NodeType == ISD::INTRINSIC_VOID) && | |||
654 | SDNodeBits.IsMemIntrinsic; | |||
655 | } | |||
656 | ||||
657 | /// Test if this node is a strict floating point pseudo-op. | |||
658 | bool isStrictFPOpcode() { | |||
659 | switch (NodeType) { | |||
660 | default: | |||
661 | return false; | |||
662 | case ISD::STRICT_FP16_TO_FP: | |||
663 | case ISD::STRICT_FP_TO_FP16: | |||
664 | #define DAG_INSTRUCTION(NAME, NARG, ROUND_MODE, INTRINSIC, DAGN) \ | |||
665 | case ISD::STRICT_##DAGN: | |||
666 | #include "llvm/IR/ConstrainedOps.def" | |||
667 | return true; | |||
668 | } | |||
669 | } | |||
670 | ||||
671 | /// Test if this node has a post-isel opcode, directly | |||
672 | /// corresponding to a MachineInstr opcode. | |||
673 | bool isMachineOpcode() const { return NodeType < 0; } | |||
674 | ||||
675 | /// This may only be called if isMachineOpcode returns | |||
676 | /// true. It returns the MachineInstr opcode value that the node's opcode | |||
677 | /// corresponds to. | |||
678 | unsigned getMachineOpcode() const { | |||
679 | assert(isMachineOpcode() && "Not a MachineInstr opcode!")((void)0); | |||
680 | return ~NodeType; | |||
681 | } | |||
682 | ||||
683 | bool getHasDebugValue() const { return SDNodeBits.HasDebugValue; } | |||
684 | void setHasDebugValue(bool b) { SDNodeBits.HasDebugValue = b; } | |||
685 | ||||
686 | bool isDivergent() const { return SDNodeBits.IsDivergent; } | |||
687 | ||||
688 | /// Return true if there are no uses of this node. | |||
689 | bool use_empty() const { return UseList == nullptr; } | |||
690 | ||||
691 | /// Return true if there is exactly one use of this node. | |||
692 | bool hasOneUse() const { return hasSingleElement(uses()); } | |||
693 | ||||
694 | /// Return the number of uses of this node. This method takes | |||
695 | /// time proportional to the number of uses. | |||
696 | size_t use_size() const { return std::distance(use_begin(), use_end()); } | |||
697 | ||||
698 | /// Return the unique node id. | |||
699 | int getNodeId() const { return NodeId; } | |||
700 | ||||
701 | /// Set unique node id. | |||
702 | void setNodeId(int Id) { NodeId = Id; } | |||
703 | ||||
704 | /// Return the node ordering. | |||
705 | unsigned getIROrder() const { return IROrder; } | |||
706 | ||||
707 | /// Set the node ordering. | |||
708 | void setIROrder(unsigned Order) { IROrder = Order; } | |||
709 | ||||
710 | /// Return the source location info. | |||
711 | const DebugLoc &getDebugLoc() const { return debugLoc; } | |||
712 | ||||
713 | /// Set source location info. Try to avoid this, putting | |||
714 | /// it in the constructor is preferable. | |||
715 | void setDebugLoc(DebugLoc dl) { debugLoc = std::move(dl); } | |||
716 | ||||
717 | /// This class provides iterator support for SDUse | |||
718 | /// operands that use a specific SDNode. | |||
719 | class use_iterator { | |||
720 | friend class SDNode; | |||
721 | ||||
722 | SDUse *Op = nullptr; | |||
723 | ||||
724 | explicit use_iterator(SDUse *op) : Op(op) {} | |||
725 | ||||
726 | public: | |||
727 | using iterator_category = std::forward_iterator_tag; | |||
728 | using value_type = SDUse; | |||
729 | using difference_type = std::ptrdiff_t; | |||
730 | using pointer = value_type *; | |||
731 | using reference = value_type &; | |||
732 | ||||
733 | use_iterator() = default; | |||
734 | use_iterator(const use_iterator &I) : Op(I.Op) {} | |||
735 | ||||
736 | bool operator==(const use_iterator &x) const { | |||
737 | return Op == x.Op; | |||
738 | } | |||
739 | bool operator!=(const use_iterator &x) const { | |||
740 | return !operator==(x); | |||
741 | } | |||
742 | ||||
743 | /// Return true if this iterator is at the end of uses list. | |||
744 | bool atEnd() const { return Op == nullptr; } | |||
745 | ||||
746 | // Iterator traversal: forward iteration only. | |||
747 | use_iterator &operator++() { // Preincrement | |||
748 | assert(Op && "Cannot increment end iterator!")((void)0); | |||
749 | Op = Op->getNext(); | |||
750 | return *this; | |||
751 | } | |||
752 | ||||
753 | use_iterator operator++(int) { // Postincrement | |||
754 | use_iterator tmp = *this; ++*this; return tmp; | |||
755 | } | |||
756 | ||||
757 | /// Retrieve a pointer to the current user node. | |||
758 | SDNode *operator*() const { | |||
759 | assert(Op && "Cannot dereference end iterator!")((void)0); | |||
760 | return Op->getUser(); | |||
761 | } | |||
762 | ||||
763 | SDNode *operator->() const { return operator*(); } | |||
764 | ||||
765 | SDUse &getUse() const { return *Op; } | |||
766 | ||||
767 | /// Retrieve the operand # of this use in its user. | |||
768 | unsigned getOperandNo() const { | |||
769 | assert(Op && "Cannot dereference end iterator!")((void)0); | |||
770 | return (unsigned)(Op - Op->getUser()->OperandList); | |||
771 | } | |||
772 | }; | |||
773 | ||||
774 | /// Provide iteration support to walk over all uses of an SDNode. | |||
775 | use_iterator use_begin() const { | |||
776 | return use_iterator(UseList); | |||
777 | } | |||
778 | ||||
779 | static use_iterator use_end() { return use_iterator(nullptr); } | |||
780 | ||||
781 | inline iterator_range<use_iterator> uses() { | |||
782 | return make_range(use_begin(), use_end()); | |||
783 | } | |||
784 | inline iterator_range<use_iterator> uses() const { | |||
785 | return make_range(use_begin(), use_end()); | |||
786 | } | |||
787 | ||||
788 | /// Return true if there are exactly NUSES uses of the indicated value. | |||
789 | /// This method ignores uses of other values defined by this operation. | |||
790 | bool hasNUsesOfValue(unsigned NUses, unsigned Value) const; | |||
791 | ||||
792 | /// Return true if there are any use of the indicated value. | |||
793 | /// This method ignores uses of other values defined by this operation. | |||
794 | bool hasAnyUseOfValue(unsigned Value) const; | |||
795 | ||||
796 | /// Return true if this node is the only use of N. | |||
797 | bool isOnlyUserOf(const SDNode *N) const; | |||
798 | ||||
799 | /// Return true if this node is an operand of N. | |||
800 | bool isOperandOf(const SDNode *N) const; | |||
801 | ||||
802 | /// Return true if this node is a predecessor of N. | |||
803 | /// NOTE: Implemented on top of hasPredecessor and every bit as | |||
804 | /// expensive. Use carefully. | |||
805 | bool isPredecessorOf(const SDNode *N) const { | |||
806 | return N->hasPredecessor(this); | |||
807 | } | |||
808 | ||||
809 | /// Return true if N is a predecessor of this node. | |||
810 | /// N is either an operand of this node, or can be reached by recursively | |||
811 | /// traversing up the operands. | |||
812 | /// NOTE: This is an expensive method. Use it carefully. | |||
813 | bool hasPredecessor(const SDNode *N) const; | |||
814 | ||||
815 | /// Returns true if N is a predecessor of any node in Worklist. This | |||
816 | /// helper keeps Visited and Worklist sets externally to allow unions | |||
817 | /// searches to be performed in parallel, caching of results across | |||
818 | /// queries and incremental addition to Worklist. Stops early if N is | |||
819 | /// found but will resume. Remember to clear Visited and Worklists | |||
820 | /// if DAG changes. MaxSteps gives a maximum number of nodes to visit before | |||
821 | /// giving up. The TopologicalPrune flag signals that positive NodeIds are | |||
822 | /// topologically ordered (Operands have strictly smaller node id) and search | |||
823 | /// can be pruned leveraging this. | |||
824 | static bool hasPredecessorHelper(const SDNode *N, | |||
825 | SmallPtrSetImpl<const SDNode *> &Visited, | |||
826 | SmallVectorImpl<const SDNode *> &Worklist, | |||
827 | unsigned int MaxSteps = 0, | |||
828 | bool TopologicalPrune = false) { | |||
829 | SmallVector<const SDNode *, 8> DeferredNodes; | |||
830 | if (Visited.count(N)) | |||
831 | return true; | |||
832 | ||||
833 | // Node Id's are assigned in three places: As a topological | |||
834 | // ordering (> 0), during legalization (results in values set to | |||
835 | // 0), new nodes (set to -1). If N has a topolgical id then we | |||
836 | // know that all nodes with ids smaller than it cannot be | |||
837 | // successors and we need not check them. Filter out all node | |||
838 | // that can't be matches. We add them to the worklist before exit | |||
839 | // in case of multiple calls. Note that during selection the topological id | |||
840 | // may be violated if a node's predecessor is selected before it. We mark | |||
841 | // this at selection negating the id of unselected successors and | |||
842 | // restricting topological pruning to positive ids. | |||
843 | ||||
844 | int NId = N->getNodeId(); | |||
845 | // If we Invalidated the Id, reconstruct original NId. | |||
846 | if (NId < -1) | |||
847 | NId = -(NId + 1); | |||
848 | ||||
849 | bool Found = false; | |||
850 | while (!Worklist.empty()) { | |||
851 | const SDNode *M = Worklist.pop_back_val(); | |||
852 | int MId = M->getNodeId(); | |||
853 | if (TopologicalPrune && M->getOpcode() != ISD::TokenFactor && (NId > 0) && | |||
854 | (MId > 0) && (MId < NId)) { | |||
855 | DeferredNodes.push_back(M); | |||
856 | continue; | |||
857 | } | |||
858 | for (const SDValue &OpV : M->op_values()) { | |||
859 | SDNode *Op = OpV.getNode(); | |||
860 | if (Visited.insert(Op).second) | |||
861 | Worklist.push_back(Op); | |||
862 | if (Op == N) | |||
863 | Found = true; | |||
864 | } | |||
865 | if (Found) | |||
866 | break; | |||
867 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
868 | break; | |||
869 | } | |||
870 | // Push deferred nodes back on worklist. | |||
871 | Worklist.append(DeferredNodes.begin(), DeferredNodes.end()); | |||
872 | // If we bailed early, conservatively return found. | |||
873 | if (MaxSteps != 0 && Visited.size() >= MaxSteps) | |||
874 | return true; | |||
875 | return Found; | |||
876 | } | |||
877 | ||||
878 | /// Return true if all the users of N are contained in Nodes. | |||
879 | /// NOTE: Requires at least one match, but doesn't require them all. | |||
880 | static bool areOnlyUsersOf(ArrayRef<const SDNode *> Nodes, const SDNode *N); | |||
881 | ||||
882 | /// Return the number of values used by this operation. | |||
883 | unsigned getNumOperands() const { return NumOperands; } | |||
884 | ||||
885 | /// Return the maximum number of operands that a SDNode can hold. | |||
886 | static constexpr size_t getMaxNumOperands() { | |||
887 | return std::numeric_limits<decltype(SDNode::NumOperands)>::max(); | |||
888 | } | |||
889 | ||||
890 | /// Helper method returns the integer value of a ConstantSDNode operand. | |||
891 | inline uint64_t getConstantOperandVal(unsigned Num) const; | |||
892 | ||||
893 | /// Helper method returns the APInt of a ConstantSDNode operand. | |||
894 | inline const APInt &getConstantOperandAPInt(unsigned Num) const; | |||
895 | ||||
896 | const SDValue &getOperand(unsigned Num) const { | |||
897 | assert(Num < NumOperands && "Invalid child # of SDNode!")((void)0); | |||
898 | return OperandList[Num]; | |||
899 | } | |||
900 | ||||
901 | using op_iterator = SDUse *; | |||
902 | ||||
903 | op_iterator op_begin() const { return OperandList; } | |||
904 | op_iterator op_end() const { return OperandList+NumOperands; } | |||
905 | ArrayRef<SDUse> ops() const { return makeArrayRef(op_begin(), op_end()); } | |||
906 | ||||
907 | /// Iterator for directly iterating over the operand SDValue's. | |||
908 | struct value_op_iterator | |||
909 | : iterator_adaptor_base<value_op_iterator, op_iterator, | |||
910 | std::random_access_iterator_tag, SDValue, | |||
911 | ptrdiff_t, value_op_iterator *, | |||
912 | value_op_iterator *> { | |||
913 | explicit value_op_iterator(SDUse *U = nullptr) | |||
914 | : iterator_adaptor_base(U) {} | |||
915 | ||||
916 | const SDValue &operator*() const { return I->get(); } | |||
917 | }; | |||
918 | ||||
919 | iterator_range<value_op_iterator> op_values() const { | |||
920 | return make_range(value_op_iterator(op_begin()), | |||
921 | value_op_iterator(op_end())); | |||
922 | } | |||
923 | ||||
924 | SDVTList getVTList() const { | |||
925 | SDVTList X = { ValueList, NumValues }; | |||
926 | return X; | |||
927 | } | |||
928 | ||||
929 | /// If this node has a glue operand, return the node | |||
930 | /// to which the glue operand points. Otherwise return NULL. | |||
931 | SDNode *getGluedNode() const { | |||
932 | if (getNumOperands() != 0 && | |||
933 | getOperand(getNumOperands()-1).getValueType() == MVT::Glue) | |||
934 | return getOperand(getNumOperands()-1).getNode(); | |||
935 | return nullptr; | |||
936 | } | |||
937 | ||||
938 | /// If this node has a glue value with a user, return | |||
939 | /// the user (there is at most one). Otherwise return NULL. | |||
940 | SDNode *getGluedUser() const { | |||
941 | for (use_iterator UI = use_begin(), UE = use_end(); UI != UE; ++UI) | |||
942 | if (UI.getUse().get().getValueType() == MVT::Glue) | |||
943 | return *UI; | |||
944 | return nullptr; | |||
945 | } | |||
946 | ||||
947 | SDNodeFlags getFlags() const { return Flags; } | |||
948 | void setFlags(SDNodeFlags NewFlags) { Flags = NewFlags; } | |||
949 | ||||
950 | /// Clear any flags in this node that aren't also set in Flags. | |||
951 | /// If Flags is not in a defined state then this has no effect. | |||
952 | void intersectFlagsWith(const SDNodeFlags Flags); | |||
953 | ||||
954 | /// Return the number of values defined/returned by this operator. | |||
955 | unsigned getNumValues() const { return NumValues; } | |||
956 | ||||
957 | /// Return the type of a specified result. | |||
958 | EVT getValueType(unsigned ResNo) const { | |||
959 | assert(ResNo < NumValues && "Illegal result number!")((void)0); | |||
960 | return ValueList[ResNo]; | |||
961 | } | |||
962 | ||||
963 | /// Return the type of a specified result as a simple type. | |||
964 | MVT getSimpleValueType(unsigned ResNo) const { | |||
965 | return getValueType(ResNo).getSimpleVT(); | |||
966 | } | |||
967 | ||||
968 | /// Returns MVT::getSizeInBits(getValueType(ResNo)). | |||
969 | /// | |||
970 | /// If the value type is a scalable vector type, the scalable property will | |||
971 | /// be set and the runtime size will be a positive integer multiple of the | |||
972 | /// base size. | |||
973 | TypeSize getValueSizeInBits(unsigned ResNo) const { | |||
974 | return getValueType(ResNo).getSizeInBits(); | |||
975 | } | |||
976 | ||||
977 | using value_iterator = const EVT *; | |||
978 | ||||
979 | value_iterator value_begin() const { return ValueList; } | |||
980 | value_iterator value_end() const { return ValueList+NumValues; } | |||
981 | iterator_range<value_iterator> values() const { | |||
982 | return llvm::make_range(value_begin(), value_end()); | |||
983 | } | |||
984 | ||||
985 | /// Return the opcode of this operation for printing. | |||
986 | std::string getOperationName(const SelectionDAG *G = nullptr) const; | |||
987 | static const char* getIndexedModeName(ISD::MemIndexedMode AM); | |||
988 | void print_types(raw_ostream &OS, const SelectionDAG *G) const; | |||
989 | void print_details(raw_ostream &OS, const SelectionDAG *G) const; | |||
990 | void print(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
991 | void printr(raw_ostream &OS, const SelectionDAG *G = nullptr) const; | |||
992 | ||||
993 | /// Print a SelectionDAG node and all children down to | |||
994 | /// the leaves. The given SelectionDAG allows target-specific nodes | |||
995 | /// to be printed in human-readable form. Unlike printr, this will | |||
996 | /// print the whole DAG, including children that appear multiple | |||
997 | /// times. | |||
998 | /// | |||
999 | void printrFull(raw_ostream &O, const SelectionDAG *G = nullptr) const; | |||
1000 | ||||
1001 | /// Print a SelectionDAG node and children up to | |||
1002 | /// depth "depth." The given SelectionDAG allows target-specific | |||
1003 | /// nodes to be printed in human-readable form. Unlike printr, this | |||
1004 | /// will print children that appear multiple times wherever they are | |||
1005 | /// used. | |||
1006 | /// | |||
1007 | void printrWithDepth(raw_ostream &O, const SelectionDAG *G = nullptr, | |||
1008 | unsigned depth = 100) const; | |||
1009 | ||||
1010 | /// Dump this node, for debugging. | |||
1011 | void dump() const; | |||
1012 | ||||
1013 | /// Dump (recursively) this node and its use-def subgraph. | |||
1014 | void dumpr() const; | |||
1015 | ||||
1016 | /// Dump this node, for debugging. | |||
1017 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1018 | /// in human-readable form. | |||
1019 | void dump(const SelectionDAG *G) const; | |||
1020 | ||||
1021 | /// Dump (recursively) this node and its use-def subgraph. | |||
1022 | /// The given SelectionDAG allows target-specific nodes to be printed | |||
1023 | /// in human-readable form. | |||
1024 | void dumpr(const SelectionDAG *G) const; | |||
1025 | ||||
1026 | /// printrFull to dbgs(). The given SelectionDAG allows | |||
1027 | /// target-specific nodes to be printed in human-readable form. | |||
1028 | /// Unlike dumpr, this will print the whole DAG, including children | |||
1029 | /// that appear multiple times. | |||
1030 | void dumprFull(const SelectionDAG *G = nullptr) const; | |||
1031 | ||||
1032 | /// printrWithDepth to dbgs(). The given | |||
1033 | /// SelectionDAG allows target-specific nodes to be printed in | |||
1034 | /// human-readable form. Unlike dumpr, this will print children | |||
1035 | /// that appear multiple times wherever they are used. | |||
1036 | /// | |||
1037 | void dumprWithDepth(const SelectionDAG *G = nullptr, | |||
1038 | unsigned depth = 100) const; | |||
1039 | ||||
1040 | /// Gather unique data for the node. | |||
1041 | void Profile(FoldingSetNodeID &ID) const; | |||
1042 | ||||
1043 | /// This method should only be used by the SDUse class. | |||
1044 | void addUse(SDUse &U) { U.addToList(&UseList); } | |||
1045 | ||||
1046 | protected: | |||
1047 | static SDVTList getSDVTList(EVT VT) { | |||
1048 | SDVTList Ret = { getValueTypeList(VT), 1 }; | |||
1049 | return Ret; | |||
1050 | } | |||
1051 | ||||
1052 | /// Create an SDNode. | |||
1053 | /// | |||
1054 | /// SDNodes are created without any operands, and never own the operand | |||
1055 | /// storage. To add operands, see SelectionDAG::createOperands. | |||
1056 | SDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs) | |||
1057 | : NodeType(Opc), ValueList(VTs.VTs), NumValues(VTs.NumVTs), | |||
1058 | IROrder(Order), debugLoc(std::move(dl)) { | |||
1059 | memset(&RawSDNodeBits, 0, sizeof(RawSDNodeBits)); | |||
1060 | assert(debugLoc.hasTrivialDestructor() && "Expected trivial destructor")((void)0); | |||
1061 | assert(NumValues == VTs.NumVTs &&((void)0) | |||
1062 | "NumValues wasn't wide enough for its operands!")((void)0); | |||
1063 | } | |||
1064 | ||||
1065 | /// Release the operands and set this node to have zero operands. | |||
1066 | void DropOperands(); | |||
1067 | }; | |||
1068 | ||||
1069 | /// Wrapper class for IR location info (IR ordering and DebugLoc) to be passed | |||
1070 | /// into SDNode creation functions. | |||
1071 | /// When an SDNode is created from the DAGBuilder, the DebugLoc is extracted | |||
1072 | /// from the original Instruction, and IROrder is the ordinal position of | |||
1073 | /// the instruction. | |||
1074 | /// When an SDNode is created after the DAG is being built, both DebugLoc and | |||
1075 | /// the IROrder are propagated from the original SDNode. | |||
1076 | /// So SDLoc class provides two constructors besides the default one, one to | |||
1077 | /// be used by the DAGBuilder, the other to be used by others. | |||
1078 | class SDLoc { | |||
1079 | private: | |||
1080 | DebugLoc DL; | |||
1081 | int IROrder = 0; | |||
1082 | ||||
1083 | public: | |||
1084 | SDLoc() = default; | |||
1085 | SDLoc(const SDNode *N) : DL(N->getDebugLoc()), IROrder(N->getIROrder()) {} | |||
1086 | SDLoc(const SDValue V) : SDLoc(V.getNode()) {} | |||
1087 | SDLoc(const Instruction *I, int Order) : IROrder(Order) { | |||
1088 | assert(Order >= 0 && "bad IROrder")((void)0); | |||
1089 | if (I) | |||
1090 | DL = I->getDebugLoc(); | |||
1091 | } | |||
1092 | ||||
1093 | unsigned getIROrder() const { return IROrder; } | |||
1094 | const DebugLoc &getDebugLoc() const { return DL; } | |||
1095 | }; | |||
1096 | ||||
1097 | // Define inline functions from the SDValue class. | |||
1098 | ||||
1099 | inline SDValue::SDValue(SDNode *node, unsigned resno) | |||
1100 | : Node(node), ResNo(resno) { | |||
1101 | // Explicitly check for !ResNo to avoid use-after-free, because there are | |||
1102 | // callers that use SDValue(N, 0) with a deleted N to indicate successful | |||
1103 | // combines. | |||
1104 | assert((!Node || !ResNo || ResNo < Node->getNumValues()) &&((void)0) | |||
1105 | "Invalid result number for the given node!")((void)0); | |||
1106 | assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps.")((void)0); | |||
1107 | } | |||
1108 | ||||
1109 | inline unsigned SDValue::getOpcode() const { | |||
1110 | return Node->getOpcode(); | |||
| ||||
1111 | } | |||
1112 | ||||
1113 | inline EVT SDValue::getValueType() const { | |||
1114 | return Node->getValueType(ResNo); | |||
1115 | } | |||
1116 | ||||
1117 | inline unsigned SDValue::getNumOperands() const { | |||
1118 | return Node->getNumOperands(); | |||
1119 | } | |||
1120 | ||||
1121 | inline const SDValue &SDValue::getOperand(unsigned i) const { | |||
1122 | return Node->getOperand(i); | |||
1123 | } | |||
1124 | ||||
1125 | inline uint64_t SDValue::getConstantOperandVal(unsigned i) const { | |||
1126 | return Node->getConstantOperandVal(i); | |||
1127 | } | |||
1128 | ||||
1129 | inline const APInt &SDValue::getConstantOperandAPInt(unsigned i) const { | |||
1130 | return Node->getConstantOperandAPInt(i); | |||
1131 | } | |||
1132 | ||||
1133 | inline bool SDValue::isTargetOpcode() const { | |||
1134 | return Node->isTargetOpcode(); | |||
1135 | } | |||
1136 | ||||
1137 | inline bool SDValue::isTargetMemoryOpcode() const { | |||
1138 | return Node->isTargetMemoryOpcode(); | |||
1139 | } | |||
1140 | ||||
1141 | inline bool SDValue::isMachineOpcode() const { | |||
1142 | return Node->isMachineOpcode(); | |||
1143 | } | |||
1144 | ||||
1145 | inline unsigned SDValue::getMachineOpcode() const { | |||
1146 | return Node->getMachineOpcode(); | |||
1147 | } | |||
1148 | ||||
1149 | inline bool SDValue::isUndef() const { | |||
1150 | return Node->isUndef(); | |||
1151 | } | |||
1152 | ||||
1153 | inline bool SDValue::use_empty() const { | |||
1154 | return !Node->hasAnyUseOfValue(ResNo); | |||
1155 | } | |||
1156 | ||||
1157 | inline bool SDValue::hasOneUse() const { | |||
1158 | return Node->hasNUsesOfValue(1, ResNo); | |||
1159 | } | |||
1160 | ||||
1161 | inline const DebugLoc &SDValue::getDebugLoc() const { | |||
1162 | return Node->getDebugLoc(); | |||
1163 | } | |||
1164 | ||||
1165 | inline void SDValue::dump() const { | |||
1166 | return Node->dump(); | |||
1167 | } | |||
1168 | ||||
1169 | inline void SDValue::dump(const SelectionDAG *G) const { | |||
1170 | return Node->dump(G); | |||
1171 | } | |||
1172 | ||||
1173 | inline void SDValue::dumpr() const { | |||
1174 | return Node->dumpr(); | |||
1175 | } | |||
1176 | ||||
1177 | inline void SDValue::dumpr(const SelectionDAG *G) const { | |||
1178 | return Node->dumpr(G); | |||
1179 | } | |||
1180 | ||||
1181 | // Define inline functions from the SDUse class. | |||
1182 | ||||
1183 | inline void SDUse::set(const SDValue &V) { | |||
1184 | if (Val.getNode()) removeFromList(); | |||
1185 | Val = V; | |||
1186 | if (V.getNode()) V.getNode()->addUse(*this); | |||
1187 | } | |||
1188 | ||||
1189 | inline void SDUse::setInitial(const SDValue &V) { | |||
1190 | Val = V; | |||
1191 | V.getNode()->addUse(*this); | |||
1192 | } | |||
1193 | ||||
1194 | inline void SDUse::setNode(SDNode *N) { | |||
1195 | if (Val.getNode()) removeFromList(); | |||
1196 | Val.setNode(N); | |||
1197 | if (N) N->addUse(*this); | |||
1198 | } | |||
1199 | ||||
1200 | /// This class is used to form a handle around another node that | |||
1201 | /// is persistent and is updated across invocations of replaceAllUsesWith on its | |||
1202 | /// operand. This node should be directly created by end-users and not added to | |||
1203 | /// the AllNodes list. | |||
1204 | class HandleSDNode : public SDNode { | |||
1205 | SDUse Op; | |||
1206 | ||||
1207 | public: | |||
1208 | explicit HandleSDNode(SDValue X) | |||
1209 | : SDNode(ISD::HANDLENODE, 0, DebugLoc(), getSDVTList(MVT::Other)) { | |||
1210 | // HandleSDNodes are never inserted into the DAG, so they won't be | |||
1211 | // auto-numbered. Use ID 65535 as a sentinel. | |||
1212 | PersistentId = 0xffff; | |||
1213 | ||||
1214 | // Manually set up the operand list. This node type is special in that it's | |||
1215 | // always stack allocated and SelectionDAG does not manage its operands. | |||
1216 | // TODO: This should either (a) not be in the SDNode hierarchy, or (b) not | |||
1217 | // be so special. | |||
1218 | Op.setUser(this); | |||
1219 | Op.setInitial(X); | |||
1220 | NumOperands = 1; | |||
1221 | OperandList = &Op; | |||
1222 | } | |||
1223 | ~HandleSDNode(); | |||
1224 | ||||
1225 | const SDValue &getValue() const { return Op; } | |||
1226 | }; | |||
1227 | ||||
1228 | class AddrSpaceCastSDNode : public SDNode { | |||
1229 | private: | |||
1230 | unsigned SrcAddrSpace; | |||
1231 | unsigned DestAddrSpace; | |||
1232 | ||||
1233 | public: | |||
1234 | AddrSpaceCastSDNode(unsigned Order, const DebugLoc &dl, EVT VT, | |||
1235 | unsigned SrcAS, unsigned DestAS); | |||
1236 | ||||
1237 | unsigned getSrcAddressSpace() const { return SrcAddrSpace; } | |||
1238 | unsigned getDestAddressSpace() const { return DestAddrSpace; } | |||
1239 | ||||
1240 | static bool classof(const SDNode *N) { | |||
1241 | return N->getOpcode() == ISD::ADDRSPACECAST; | |||
1242 | } | |||
1243 | }; | |||
1244 | ||||
1245 | /// This is an abstract virtual class for memory operations. | |||
1246 | class MemSDNode : public SDNode { | |||
1247 | private: | |||
1248 | // VT of in-memory value. | |||
1249 | EVT MemoryVT; | |||
1250 | ||||
1251 | protected: | |||
1252 | /// Memory reference information. | |||
1253 | MachineMemOperand *MMO; | |||
1254 | ||||
1255 | public: | |||
1256 | MemSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
1257 | EVT memvt, MachineMemOperand *MMO); | |||
1258 | ||||
1259 | bool readMem() const { return MMO->isLoad(); } | |||
1260 | bool writeMem() const { return MMO->isStore(); } | |||
1261 | ||||
1262 | /// Returns alignment and volatility of the memory access | |||
1263 | Align getOriginalAlign() const { return MMO->getBaseAlign(); } | |||
1264 | Align getAlign() const { return MMO->getAlign(); } | |||
1265 | // FIXME: Remove once transition to getAlign is over. | |||
1266 | unsigned getAlignment() const { return MMO->getAlign().value(); } | |||
1267 | ||||
1268 | /// Return the SubclassData value, without HasDebugValue. This contains an | |||
1269 | /// encoding of the volatile flag, as well as bits used by subclasses. This | |||
1270 | /// function should only be used to compute a FoldingSetNodeID value. | |||
1271 | /// The HasDebugValue bit is masked out because CSE map needs to match | |||
1272 | /// nodes with debug info with nodes without debug info. Same is about | |||
1273 | /// isDivergent bit. | |||
1274 | unsigned getRawSubclassData() const { | |||
1275 | uint16_t Data; | |||
1276 | union { | |||
1277 | char RawSDNodeBits[sizeof(uint16_t)]; | |||
1278 | SDNodeBitfields SDNodeBits; | |||
1279 | }; | |||
1280 | memcpy(&RawSDNodeBits, &this->RawSDNodeBits, sizeof(this->RawSDNodeBits)); | |||
1281 | SDNodeBits.HasDebugValue = 0; | |||
1282 | SDNodeBits.IsDivergent = false; | |||
1283 | memcpy(&Data, &RawSDNodeBits, sizeof(RawSDNodeBits)); | |||
1284 | return Data; | |||
1285 | } | |||
1286 | ||||
1287 | bool isVolatile() const { return MemSDNodeBits.IsVolatile; } | |||
1288 | bool isNonTemporal() const { return MemSDNodeBits.IsNonTemporal; } | |||
1289 | bool isDereferenceable() const { return MemSDNodeBits.IsDereferenceable; } | |||
1290 | bool isInvariant() const { return MemSDNodeBits.IsInvariant; } | |||
1291 | ||||
1292 | // Returns the offset from the location of the access. | |||
1293 | int64_t getSrcValueOffset() const { return MMO->getOffset(); } | |||
1294 | ||||
1295 | /// Returns the AA info that describes the dereference. | |||
1296 | AAMDNodes getAAInfo() const { return MMO->getAAInfo(); } | |||
1297 | ||||
1298 | /// Returns the Ranges that describes the dereference. | |||
1299 | const MDNode *getRanges() const { return MMO->getRanges(); } | |||
1300 | ||||
1301 | /// Returns the synchronization scope ID for this memory operation. | |||
1302 | SyncScope::ID getSyncScopeID() const { return MMO->getSyncScopeID(); } | |||
1303 | ||||
1304 | /// Return the atomic ordering requirements for this memory operation. For | |||
1305 | /// cmpxchg atomic operations, return the atomic ordering requirements when | |||
1306 | /// store occurs. | |||
1307 | AtomicOrdering getSuccessOrdering() const { | |||
1308 | return MMO->getSuccessOrdering(); | |||
1309 | } | |||
1310 | ||||
1311 | /// Return a single atomic ordering that is at least as strong as both the | |||
1312 | /// success and failure orderings for an atomic operation. (For operations | |||
1313 | /// other than cmpxchg, this is equivalent to getSuccessOrdering().) | |||
1314 | AtomicOrdering getMergedOrdering() const { return MMO->getMergedOrdering(); } | |||
1315 | ||||
1316 | /// Return true if the memory operation ordering is Unordered or higher. | |||
1317 | bool isAtomic() const { return MMO->isAtomic(); } | |||
1318 | ||||
1319 | /// Returns true if the memory operation doesn't imply any ordering | |||
1320 | /// constraints on surrounding memory operations beyond the normal memory | |||
1321 | /// aliasing rules. | |||
1322 | bool isUnordered() const { return MMO->isUnordered(); } | |||
1323 | ||||
1324 | /// Returns true if the memory operation is neither atomic or volatile. | |||
1325 | bool isSimple() const { return !isAtomic() && !isVolatile(); } | |||
1326 | ||||
1327 | /// Return the type of the in-memory value. | |||
1328 | EVT getMemoryVT() const { return MemoryVT; } | |||
1329 | ||||
1330 | /// Return a MachineMemOperand object describing the memory | |||
1331 | /// reference performed by operation. | |||
1332 | MachineMemOperand *getMemOperand() const { return MMO; } | |||
1333 | ||||
1334 | const MachinePointerInfo &getPointerInfo() const { | |||
1335 | return MMO->getPointerInfo(); | |||
1336 | } | |||
1337 | ||||
1338 | /// Return the address space for the associated pointer | |||
1339 | unsigned getAddressSpace() const { | |||
1340 | return getPointerInfo().getAddrSpace(); | |||
1341 | } | |||
1342 | ||||
1343 | /// Update this MemSDNode's MachineMemOperand information | |||
1344 | /// to reflect the alignment of NewMMO, if it has a greater alignment. | |||
1345 | /// This must only be used when the new alignment applies to all users of | |||
1346 | /// this MachineMemOperand. | |||
1347 | void refineAlignment(const MachineMemOperand *NewMMO) { | |||
1348 | MMO->refineAlignment(NewMMO); | |||
1349 | } | |||
1350 | ||||
1351 | const SDValue &getChain() const { return getOperand(0); } | |||
1352 | ||||
1353 | const SDValue &getBasePtr() const { | |||
1354 | switch (getOpcode()) { | |||
1355 | case ISD::STORE: | |||
1356 | case ISD::MSTORE: | |||
1357 | return getOperand(2); | |||
1358 | case ISD::MGATHER: | |||
1359 | case ISD::MSCATTER: | |||
1360 | return getOperand(3); | |||
1361 | default: | |||
1362 | return getOperand(1); | |||
1363 | } | |||
1364 | } | |||
1365 | ||||
1366 | // Methods to support isa and dyn_cast | |||
1367 | static bool classof(const SDNode *N) { | |||
1368 | // For some targets, we lower some target intrinsics to a MemIntrinsicNode | |||
1369 | // with either an intrinsic or a target opcode. | |||
1370 | switch (N->getOpcode()) { | |||
1371 | case ISD::LOAD: | |||
1372 | case ISD::STORE: | |||
1373 | case ISD::PREFETCH: | |||
1374 | case ISD::ATOMIC_CMP_SWAP: | |||
1375 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: | |||
1376 | case ISD::ATOMIC_SWAP: | |||
1377 | case ISD::ATOMIC_LOAD_ADD: | |||
1378 | case ISD::ATOMIC_LOAD_SUB: | |||
1379 | case ISD::ATOMIC_LOAD_AND: | |||
1380 | case ISD::ATOMIC_LOAD_CLR: | |||
1381 | case ISD::ATOMIC_LOAD_OR: | |||
1382 | case ISD::ATOMIC_LOAD_XOR: | |||
1383 | case ISD::ATOMIC_LOAD_NAND: | |||
1384 | case ISD::ATOMIC_LOAD_MIN: | |||
1385 | case ISD::ATOMIC_LOAD_MAX: | |||
1386 | case ISD::ATOMIC_LOAD_UMIN: | |||
1387 | case ISD::ATOMIC_LOAD_UMAX: | |||
1388 | case ISD::ATOMIC_LOAD_FADD: | |||
1389 | case ISD::ATOMIC_LOAD_FSUB: | |||
1390 | case ISD::ATOMIC_LOAD: | |||
1391 | case ISD::ATOMIC_STORE: | |||
1392 | case ISD::MLOAD: | |||
1393 | case ISD::MSTORE: | |||
1394 | case ISD::MGATHER: | |||
1395 | case ISD::MSCATTER: | |||
1396 | return true; | |||
1397 | default: | |||
1398 | return N->isMemIntrinsic() || N->isTargetMemoryOpcode(); | |||
1399 | } | |||
1400 | } | |||
1401 | }; | |||
1402 | ||||
1403 | /// This is an SDNode representing atomic operations. | |||
1404 | class AtomicSDNode : public MemSDNode { | |||
1405 | public: | |||
1406 | AtomicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, SDVTList VTL, | |||
1407 | EVT MemVT, MachineMemOperand *MMO) | |||
1408 | : MemSDNode(Opc, Order, dl, VTL, MemVT, MMO) { | |||
1409 | assert(((Opc != ISD::ATOMIC_LOAD && Opc != ISD::ATOMIC_STORE) ||((void)0) | |||
1410 | MMO->isAtomic()) && "then why are we using an AtomicSDNode?")((void)0); | |||
1411 | } | |||
1412 | ||||
1413 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
1414 | const SDValue &getVal() const { return getOperand(2); } | |||
1415 | ||||
1416 | /// Returns true if this SDNode represents cmpxchg atomic operation, false | |||
1417 | /// otherwise. | |||
1418 | bool isCompareAndSwap() const { | |||
1419 | unsigned Op = getOpcode(); | |||
1420 | return Op == ISD::ATOMIC_CMP_SWAP || | |||
1421 | Op == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS; | |||
1422 | } | |||
1423 | ||||
1424 | /// For cmpxchg atomic operations, return the atomic ordering requirements | |||
1425 | /// when store does not occur. | |||
1426 | AtomicOrdering getFailureOrdering() const { | |||
1427 | assert(isCompareAndSwap() && "Must be cmpxchg operation")((void)0); | |||
1428 | return MMO->getFailureOrdering(); | |||
1429 | } | |||
1430 | ||||
1431 | // Methods to support isa and dyn_cast | |||
1432 | static bool classof(const SDNode *N) { | |||
1433 | return N->getOpcode() == ISD::ATOMIC_CMP_SWAP || | |||
1434 | N->getOpcode() == ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS || | |||
1435 | N->getOpcode() == ISD::ATOMIC_SWAP || | |||
1436 | N->getOpcode() == ISD::ATOMIC_LOAD_ADD || | |||
1437 | N->getOpcode() == ISD::ATOMIC_LOAD_SUB || | |||
1438 | N->getOpcode() == ISD::ATOMIC_LOAD_AND || | |||
1439 | N->getOpcode() == ISD::ATOMIC_LOAD_CLR || | |||
1440 | N->getOpcode() == ISD::ATOMIC_LOAD_OR || | |||
1441 | N->getOpcode() == ISD::ATOMIC_LOAD_XOR || | |||
1442 | N->getOpcode() == ISD::ATOMIC_LOAD_NAND || | |||
1443 | N->getOpcode() == ISD::ATOMIC_LOAD_MIN || | |||
1444 | N->getOpcode() == ISD::ATOMIC_LOAD_MAX || | |||
1445 | N->getOpcode() == ISD::ATOMIC_LOAD_UMIN || | |||
1446 | N->getOpcode() == ISD::ATOMIC_LOAD_UMAX || | |||
1447 | N->getOpcode() == ISD::ATOMIC_LOAD_FADD || | |||
1448 | N->getOpcode() == ISD::ATOMIC_LOAD_FSUB || | |||
1449 | N->getOpcode() == ISD::ATOMIC_LOAD || | |||
1450 | N->getOpcode() == ISD::ATOMIC_STORE; | |||
1451 | } | |||
1452 | }; | |||
1453 | ||||
1454 | /// This SDNode is used for target intrinsics that touch | |||
1455 | /// memory and need an associated MachineMemOperand. Its opcode may be | |||
1456 | /// INTRINSIC_VOID, INTRINSIC_W_CHAIN, PREFETCH, or a target-specific opcode | |||
1457 | /// with a value not less than FIRST_TARGET_MEMORY_OPCODE. | |||
1458 | class MemIntrinsicSDNode : public MemSDNode { | |||
1459 | public: | |||
1460 | MemIntrinsicSDNode(unsigned Opc, unsigned Order, const DebugLoc &dl, | |||
1461 | SDVTList VTs, EVT MemoryVT, MachineMemOperand *MMO) | |||
1462 | : MemSDNode(Opc, Order, dl, VTs, MemoryVT, MMO) { | |||
1463 | SDNodeBits.IsMemIntrinsic = true; | |||
1464 | } | |||
1465 | ||||
1466 | // Methods to support isa and dyn_cast | |||
1467 | static bool classof(const SDNode *N) { | |||
1468 | // We lower some target intrinsics to their target opcode | |||
1469 | // early a node with a target opcode can be of this class | |||
1470 | return N->isMemIntrinsic() || | |||
1471 | N->getOpcode() == ISD::PREFETCH || | |||
1472 | N->isTargetMemoryOpcode(); | |||
1473 | } | |||
1474 | }; | |||
1475 | ||||
1476 | /// This SDNode is used to implement the code generator | |||
1477 | /// support for the llvm IR shufflevector instruction. It combines elements | |||
1478 | /// from two input vectors into a new input vector, with the selection and | |||
1479 | /// ordering of elements determined by an array of integers, referred to as | |||
1480 | /// the shuffle mask. For input vectors of width N, mask indices of 0..N-1 | |||
1481 | /// refer to elements from the LHS input, and indices from N to 2N-1 the RHS. | |||
1482 | /// An index of -1 is treated as undef, such that the code generator may put | |||
1483 | /// any value in the corresponding element of the result. | |||
1484 | class ShuffleVectorSDNode : public SDNode { | |||
1485 | // The memory for Mask is owned by the SelectionDAG's OperandAllocator, and | |||
1486 | // is freed when the SelectionDAG object is destroyed. | |||
1487 | const int *Mask; | |||
1488 | ||||
1489 | protected: | |||
1490 | friend class SelectionDAG; | |||
1491 | ||||
1492 | ShuffleVectorSDNode(EVT VT, unsigned Order, const DebugLoc &dl, const int *M) | |||
1493 | : SDNode(ISD::VECTOR_SHUFFLE, Order, dl, getSDVTList(VT)), Mask(M) {} | |||
1494 | ||||
1495 | public: | |||
1496 | ArrayRef<int> getMask() const { | |||
1497 | EVT VT = getValueType(0); | |||
1498 | return makeArrayRef(Mask, VT.getVectorNumElements()); | |||
1499 | } | |||
1500 | ||||
1501 | int getMaskElt(unsigned Idx) const { | |||
1502 | assert(Idx < getValueType(0).getVectorNumElements() && "Idx out of range!")((void)0); | |||
1503 | return Mask[Idx]; | |||
1504 | } | |||
1505 | ||||
1506 | bool isSplat() const { return isSplatMask(Mask, getValueType(0)); } | |||
1507 | ||||
1508 | int getSplatIndex() const { | |||
1509 | assert(isSplat() && "Cannot get splat index for non-splat!")((void)0); | |||
1510 | EVT VT = getValueType(0); | |||
1511 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) | |||
1512 | if (Mask[i] >= 0) | |||
1513 | return Mask[i]; | |||
1514 | ||||
1515 | // We can choose any index value here and be correct because all elements | |||
1516 | // are undefined. Return 0 for better potential for callers to simplify. | |||
1517 | return 0; | |||
1518 | } | |||
1519 | ||||
1520 | static bool isSplatMask(const int *Mask, EVT VT); | |||
1521 | ||||
1522 | /// Change values in a shuffle permute mask assuming | |||
1523 | /// the two vector operands have swapped position. | |||
1524 | static void commuteMask(MutableArrayRef<int> Mask) { | |||
1525 | unsigned NumElems = Mask.size(); | |||
1526 | for (unsigned i = 0; i != NumElems; ++i) { | |||
1527 | int idx = Mask[i]; | |||
1528 | if (idx < 0) | |||
1529 | continue; | |||
1530 | else if (idx < (int)NumElems) | |||
1531 | Mask[i] = idx + NumElems; | |||
1532 | else | |||
1533 | Mask[i] = idx - NumElems; | |||
1534 | } | |||
1535 | } | |||
1536 | ||||
1537 | static bool classof(const SDNode *N) { | |||
1538 | return N->getOpcode() == ISD::VECTOR_SHUFFLE; | |||
1539 | } | |||
1540 | }; | |||
1541 | ||||
1542 | class ConstantSDNode : public SDNode { | |||
1543 | friend class SelectionDAG; | |||
1544 | ||||
1545 | const ConstantInt *Value; | |||
1546 | ||||
1547 | ConstantSDNode(bool isTarget, bool isOpaque, const ConstantInt *val, EVT VT) | |||
1548 | : SDNode(isTarget ? ISD::TargetConstant : ISD::Constant, 0, DebugLoc(), | |||
1549 | getSDVTList(VT)), | |||
1550 | Value(val) { | |||
1551 | ConstantSDNodeBits.IsOpaque = isOpaque; | |||
1552 | } | |||
1553 | ||||
1554 | public: | |||
1555 | const ConstantInt *getConstantIntValue() const { return Value; } | |||
1556 | const APInt &getAPIntValue() const { return Value->getValue(); } | |||
1557 | uint64_t getZExtValue() const { return Value->getZExtValue(); } | |||
1558 | int64_t getSExtValue() const { return Value->getSExtValue(); } | |||
1559 | uint64_t getLimitedValue(uint64_t Limit = UINT64_MAX0xffffffffffffffffULL) { | |||
1560 | return Value->getLimitedValue(Limit); | |||
1561 | } | |||
1562 | MaybeAlign getMaybeAlignValue() const { return Value->getMaybeAlignValue(); } | |||
1563 | Align getAlignValue() const { return Value->getAlignValue(); } | |||
1564 | ||||
1565 | bool isOne() const { return Value->isOne(); } | |||
1566 | bool isNullValue() const { return Value->isZero(); } | |||
1567 | bool isAllOnesValue() const { return Value->isMinusOne(); } | |||
1568 | bool isMaxSignedValue() const { return Value->isMaxValue(true); } | |||
1569 | bool isMinSignedValue() const { return Value->isMinValue(true); } | |||
1570 | ||||
1571 | bool isOpaque() const { return ConstantSDNodeBits.IsOpaque; } | |||
1572 | ||||
1573 | static bool classof(const SDNode *N) { | |||
1574 | return N->getOpcode() == ISD::Constant || | |||
1575 | N->getOpcode() == ISD::TargetConstant; | |||
1576 | } | |||
1577 | }; | |||
1578 | ||||
1579 | uint64_t SDNode::getConstantOperandVal(unsigned Num) const { | |||
1580 | return cast<ConstantSDNode>(getOperand(Num))->getZExtValue(); | |||
1581 | } | |||
1582 | ||||
1583 | const APInt &SDNode::getConstantOperandAPInt(unsigned Num) const { | |||
1584 | return cast<ConstantSDNode>(getOperand(Num))->getAPIntValue(); | |||
1585 | } | |||
1586 | ||||
1587 | class ConstantFPSDNode : public SDNode { | |||
1588 | friend class SelectionDAG; | |||
1589 | ||||
1590 | const ConstantFP *Value; | |||
1591 | ||||
1592 | ConstantFPSDNode(bool isTarget, const ConstantFP *val, EVT VT) | |||
1593 | : SDNode(isTarget ? ISD::TargetConstantFP : ISD::ConstantFP, 0, | |||
1594 | DebugLoc(), getSDVTList(VT)), | |||
1595 | Value(val) {} | |||
1596 | ||||
1597 | public: | |||
1598 | const APFloat& getValueAPF() const { return Value->getValueAPF(); } | |||
1599 | const ConstantFP *getConstantFPValue() const { return Value; } | |||
1600 | ||||
1601 | /// Return true if the value is positive or negative zero. | |||
1602 | bool isZero() const { return Value->isZero(); } | |||
1603 | ||||
1604 | /// Return true if the value is a NaN. | |||
1605 | bool isNaN() const { return Value->isNaN(); } | |||
1606 | ||||
1607 | /// Return true if the value is an infinity | |||
1608 | bool isInfinity() const { return Value->isInfinity(); } | |||
1609 | ||||
1610 | /// Return true if the value is negative. | |||
1611 | bool isNegative() const { return Value->isNegative(); } | |||
1612 | ||||
1613 | /// We don't rely on operator== working on double values, as | |||
1614 | /// it returns true for things that are clearly not equal, like -0.0 and 0.0. | |||
1615 | /// As such, this method can be used to do an exact bit-for-bit comparison of | |||
1616 | /// two floating point values. | |||
1617 | ||||
1618 | /// We leave the version with the double argument here because it's just so | |||
1619 | /// convenient to write "2.0" and the like. Without this function we'd | |||
1620 | /// have to duplicate its logic everywhere it's called. | |||
1621 | bool isExactlyValue(double V) const { | |||
1622 | return Value->getValueAPF().isExactlyValue(V); | |||
1623 | } | |||
1624 | bool isExactlyValue(const APFloat& V) const; | |||
1625 | ||||
1626 | static bool isValueValidForType(EVT VT, const APFloat& Val); | |||
1627 | ||||
1628 | static bool classof(const SDNode *N) { | |||
1629 | return N->getOpcode() == ISD::ConstantFP || | |||
1630 | N->getOpcode() == ISD::TargetConstantFP; | |||
1631 | } | |||
1632 | }; | |||
1633 | ||||
1634 | /// Returns true if \p V is a constant integer zero. | |||
1635 | bool isNullConstant(SDValue V); | |||
1636 | ||||
1637 | /// Returns true if \p V is an FP constant with a value of positive zero. | |||
1638 | bool isNullFPConstant(SDValue V); | |||
1639 | ||||
1640 | /// Returns true if \p V is an integer constant with all bits set. | |||
1641 | bool isAllOnesConstant(SDValue V); | |||
1642 | ||||
1643 | /// Returns true if \p V is a constant integer one. | |||
1644 | bool isOneConstant(SDValue V); | |||
1645 | ||||
1646 | /// Return the non-bitcasted source operand of \p V if it exists. | |||
1647 | /// If \p V is not a bitcasted value, it is returned as-is. | |||
1648 | SDValue peekThroughBitcasts(SDValue V); | |||
1649 | ||||
1650 | /// Return the non-bitcasted and one-use source operand of \p V if it exists. | |||
1651 | /// If \p V is not a bitcasted one-use value, it is returned as-is. | |||
1652 | SDValue peekThroughOneUseBitcasts(SDValue V); | |||
1653 | ||||
1654 | /// Return the non-extracted vector source operand of \p V if it exists. | |||
1655 | /// If \p V is not an extracted subvector, it is returned as-is. | |||
1656 | SDValue peekThroughExtractSubvectors(SDValue V); | |||
1657 | ||||
1658 | /// Returns true if \p V is a bitwise not operation. Assumes that an all ones | |||
1659 | /// constant is canonicalized to be operand 1. | |||
1660 | bool isBitwiseNot(SDValue V, bool AllowUndefs = false); | |||
1661 | ||||
1662 | /// Returns the SDNode if it is a constant splat BuildVector or constant int. | |||
1663 | ConstantSDNode *isConstOrConstSplat(SDValue N, bool AllowUndefs = false, | |||
1664 | bool AllowTruncation = false); | |||
1665 | ||||
1666 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1667 | /// constant int. | |||
1668 | ConstantSDNode *isConstOrConstSplat(SDValue N, const APInt &DemandedElts, | |||
1669 | bool AllowUndefs = false, | |||
1670 | bool AllowTruncation = false); | |||
1671 | ||||
1672 | /// Returns the SDNode if it is a constant splat BuildVector or constant float. | |||
1673 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, bool AllowUndefs = false); | |||
1674 | ||||
1675 | /// Returns the SDNode if it is a demanded constant splat BuildVector or | |||
1676 | /// constant float. | |||
1677 | ConstantFPSDNode *isConstOrConstSplatFP(SDValue N, const APInt &DemandedElts, | |||
1678 | bool AllowUndefs = false); | |||
1679 | ||||
1680 | /// Return true if the value is a constant 0 integer or a splatted vector of | |||
1681 | /// a constant 0 integer (with no undefs by default). | |||
1682 | /// Build vector implicit truncation is not an issue for null values. | |||
1683 | bool isNullOrNullSplat(SDValue V, bool AllowUndefs = false); | |||
1684 | ||||
1685 | /// Return true if the value is a constant 1 integer or a splatted vector of a | |||
1686 | /// constant 1 integer (with no undefs). | |||
1687 | /// Does not permit build vector implicit truncation. | |||
1688 | bool isOneOrOneSplat(SDValue V, bool AllowUndefs = false); | |||
1689 | ||||
1690 | /// Return true if the value is a constant -1 integer or a splatted vector of a | |||
1691 | /// constant -1 integer (with no undefs). | |||
1692 | /// Does not permit build vector implicit truncation. | |||
1693 | bool isAllOnesOrAllOnesSplat(SDValue V, bool AllowUndefs = false); | |||
1694 | ||||
1695 | /// Return true if \p V is either a integer or FP constant. | |||
1696 | inline bool isIntOrFPConstant(SDValue V) { | |||
1697 | return isa<ConstantSDNode>(V) || isa<ConstantFPSDNode>(V); | |||
1698 | } | |||
1699 | ||||
1700 | class GlobalAddressSDNode : public SDNode { | |||
1701 | friend class SelectionDAG; | |||
1702 | ||||
1703 | const GlobalValue *TheGlobal; | |||
1704 | int64_t Offset; | |||
1705 | unsigned TargetFlags; | |||
1706 | ||||
1707 | GlobalAddressSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, | |||
1708 | const GlobalValue *GA, EVT VT, int64_t o, | |||
1709 | unsigned TF); | |||
1710 | ||||
1711 | public: | |||
1712 | const GlobalValue *getGlobal() const { return TheGlobal; } | |||
1713 | int64_t getOffset() const { return Offset; } | |||
1714 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1715 | // Return the address space this GlobalAddress belongs to. | |||
1716 | unsigned getAddressSpace() const; | |||
1717 | ||||
1718 | static bool classof(const SDNode *N) { | |||
1719 | return N->getOpcode() == ISD::GlobalAddress || | |||
1720 | N->getOpcode() == ISD::TargetGlobalAddress || | |||
1721 | N->getOpcode() == ISD::GlobalTLSAddress || | |||
1722 | N->getOpcode() == ISD::TargetGlobalTLSAddress; | |||
1723 | } | |||
1724 | }; | |||
1725 | ||||
1726 | class FrameIndexSDNode : public SDNode { | |||
1727 | friend class SelectionDAG; | |||
1728 | ||||
1729 | int FI; | |||
1730 | ||||
1731 | FrameIndexSDNode(int fi, EVT VT, bool isTarg) | |||
1732 | : SDNode(isTarg ? ISD::TargetFrameIndex : ISD::FrameIndex, | |||
1733 | 0, DebugLoc(), getSDVTList(VT)), FI(fi) { | |||
1734 | } | |||
1735 | ||||
1736 | public: | |||
1737 | int getIndex() const { return FI; } | |||
1738 | ||||
1739 | static bool classof(const SDNode *N) { | |||
1740 | return N->getOpcode() == ISD::FrameIndex || | |||
1741 | N->getOpcode() == ISD::TargetFrameIndex; | |||
1742 | } | |||
1743 | }; | |||
1744 | ||||
1745 | /// This SDNode is used for LIFETIME_START/LIFETIME_END values, which indicate | |||
1746 | /// the offet and size that are started/ended in the underlying FrameIndex. | |||
1747 | class LifetimeSDNode : public SDNode { | |||
1748 | friend class SelectionDAG; | |||
1749 | int64_t Size; | |||
1750 | int64_t Offset; // -1 if offset is unknown. | |||
1751 | ||||
1752 | LifetimeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, | |||
1753 | SDVTList VTs, int64_t Size, int64_t Offset) | |||
1754 | : SDNode(Opcode, Order, dl, VTs), Size(Size), Offset(Offset) {} | |||
1755 | public: | |||
1756 | int64_t getFrameIndex() const { | |||
1757 | return cast<FrameIndexSDNode>(getOperand(1))->getIndex(); | |||
1758 | } | |||
1759 | ||||
1760 | bool hasOffset() const { return Offset >= 0; } | |||
1761 | int64_t getOffset() const { | |||
1762 | assert(hasOffset() && "offset is unknown")((void)0); | |||
1763 | return Offset; | |||
1764 | } | |||
1765 | int64_t getSize() const { | |||
1766 | assert(hasOffset() && "offset is unknown")((void)0); | |||
1767 | return Size; | |||
1768 | } | |||
1769 | ||||
1770 | // Methods to support isa and dyn_cast | |||
1771 | static bool classof(const SDNode *N) { | |||
1772 | return N->getOpcode() == ISD::LIFETIME_START || | |||
1773 | N->getOpcode() == ISD::LIFETIME_END; | |||
1774 | } | |||
1775 | }; | |||
1776 | ||||
1777 | /// This SDNode is used for PSEUDO_PROBE values, which are the function guid and | |||
1778 | /// the index of the basic block being probed. A pseudo probe serves as a place | |||
1779 | /// holder and will be removed at the end of compilation. It does not have any | |||
1780 | /// operand because we do not want the instruction selection to deal with any. | |||
1781 | class PseudoProbeSDNode : public SDNode { | |||
1782 | friend class SelectionDAG; | |||
1783 | uint64_t Guid; | |||
1784 | uint64_t Index; | |||
1785 | uint32_t Attributes; | |||
1786 | ||||
1787 | PseudoProbeSDNode(unsigned Opcode, unsigned Order, const DebugLoc &Dl, | |||
1788 | SDVTList VTs, uint64_t Guid, uint64_t Index, uint32_t Attr) | |||
1789 | : SDNode(Opcode, Order, Dl, VTs), Guid(Guid), Index(Index), | |||
1790 | Attributes(Attr) {} | |||
1791 | ||||
1792 | public: | |||
1793 | uint64_t getGuid() const { return Guid; } | |||
1794 | uint64_t getIndex() const { return Index; } | |||
1795 | uint32_t getAttributes() const { return Attributes; } | |||
1796 | ||||
1797 | // Methods to support isa and dyn_cast | |||
1798 | static bool classof(const SDNode *N) { | |||
1799 | return N->getOpcode() == ISD::PSEUDO_PROBE; | |||
1800 | } | |||
1801 | }; | |||
1802 | ||||
1803 | class JumpTableSDNode : public SDNode { | |||
1804 | friend class SelectionDAG; | |||
1805 | ||||
1806 | int JTI; | |||
1807 | unsigned TargetFlags; | |||
1808 | ||||
1809 | JumpTableSDNode(int jti, EVT VT, bool isTarg, unsigned TF) | |||
1810 | : SDNode(isTarg ? ISD::TargetJumpTable : ISD::JumpTable, | |||
1811 | 0, DebugLoc(), getSDVTList(VT)), JTI(jti), TargetFlags(TF) { | |||
1812 | } | |||
1813 | ||||
1814 | public: | |||
1815 | int getIndex() const { return JTI; } | |||
1816 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1817 | ||||
1818 | static bool classof(const SDNode *N) { | |||
1819 | return N->getOpcode() == ISD::JumpTable || | |||
1820 | N->getOpcode() == ISD::TargetJumpTable; | |||
1821 | } | |||
1822 | }; | |||
1823 | ||||
1824 | class ConstantPoolSDNode : public SDNode { | |||
1825 | friend class SelectionDAG; | |||
1826 | ||||
1827 | union { | |||
1828 | const Constant *ConstVal; | |||
1829 | MachineConstantPoolValue *MachineCPVal; | |||
1830 | } Val; | |||
1831 | int Offset; // It's a MachineConstantPoolValue if top bit is set. | |||
1832 | Align Alignment; // Minimum alignment requirement of CP. | |||
1833 | unsigned TargetFlags; | |||
1834 | ||||
1835 | ConstantPoolSDNode(bool isTarget, const Constant *c, EVT VT, int o, | |||
1836 | Align Alignment, unsigned TF) | |||
1837 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1838 | DebugLoc(), getSDVTList(VT)), | |||
1839 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
1840 | assert(Offset >= 0 && "Offset is too large")((void)0); | |||
1841 | Val.ConstVal = c; | |||
1842 | } | |||
1843 | ||||
1844 | ConstantPoolSDNode(bool isTarget, MachineConstantPoolValue *v, EVT VT, int o, | |||
1845 | Align Alignment, unsigned TF) | |||
1846 | : SDNode(isTarget ? ISD::TargetConstantPool : ISD::ConstantPool, 0, | |||
1847 | DebugLoc(), getSDVTList(VT)), | |||
1848 | Offset(o), Alignment(Alignment), TargetFlags(TF) { | |||
1849 | assert(Offset >= 0 && "Offset is too large")((void)0); | |||
1850 | Val.MachineCPVal = v; | |||
1851 | Offset |= 1 << (sizeof(unsigned)*CHAR_BIT8-1); | |||
1852 | } | |||
1853 | ||||
1854 | public: | |||
1855 | bool isMachineConstantPoolEntry() const { | |||
1856 | return Offset < 0; | |||
1857 | } | |||
1858 | ||||
1859 | const Constant *getConstVal() const { | |||
1860 | assert(!isMachineConstantPoolEntry() && "Wrong constantpool type")((void)0); | |||
1861 | return Val.ConstVal; | |||
1862 | } | |||
1863 | ||||
1864 | MachineConstantPoolValue *getMachineCPVal() const { | |||
1865 | assert(isMachineConstantPoolEntry() && "Wrong constantpool type")((void)0); | |||
1866 | return Val.MachineCPVal; | |||
1867 | } | |||
1868 | ||||
1869 | int getOffset() const { | |||
1870 | return Offset & ~(1 << (sizeof(unsigned)*CHAR_BIT8-1)); | |||
1871 | } | |||
1872 | ||||
1873 | // Return the alignment of this constant pool object, which is either 0 (for | |||
1874 | // default alignment) or the desired value. | |||
1875 | Align getAlign() const { return Alignment; } | |||
1876 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1877 | ||||
1878 | Type *getType() const; | |||
1879 | ||||
1880 | static bool classof(const SDNode *N) { | |||
1881 | return N->getOpcode() == ISD::ConstantPool || | |||
1882 | N->getOpcode() == ISD::TargetConstantPool; | |||
1883 | } | |||
1884 | }; | |||
1885 | ||||
1886 | /// Completely target-dependent object reference. | |||
1887 | class TargetIndexSDNode : public SDNode { | |||
1888 | friend class SelectionDAG; | |||
1889 | ||||
1890 | unsigned TargetFlags; | |||
1891 | int Index; | |||
1892 | int64_t Offset; | |||
1893 | ||||
1894 | public: | |||
1895 | TargetIndexSDNode(int Idx, EVT VT, int64_t Ofs, unsigned TF) | |||
1896 | : SDNode(ISD::TargetIndex, 0, DebugLoc(), getSDVTList(VT)), | |||
1897 | TargetFlags(TF), Index(Idx), Offset(Ofs) {} | |||
1898 | ||||
1899 | unsigned getTargetFlags() const { return TargetFlags; } | |||
1900 | int getIndex() const { return Index; } | |||
1901 | int64_t getOffset() const { return Offset; } | |||
1902 | ||||
1903 | static bool classof(const SDNode *N) { | |||
1904 | return N->getOpcode() == ISD::TargetIndex; | |||
1905 | } | |||
1906 | }; | |||
1907 | ||||
1908 | class BasicBlockSDNode : public SDNode { | |||
1909 | friend class SelectionDAG; | |||
1910 | ||||
1911 | MachineBasicBlock *MBB; | |||
1912 | ||||
1913 | /// Debug info is meaningful and potentially useful here, but we create | |||
1914 | /// blocks out of order when they're jumped to, which makes it a bit | |||
1915 | /// harder. Let's see if we need it first. | |||
1916 | explicit BasicBlockSDNode(MachineBasicBlock *mbb) | |||
1917 | : SDNode(ISD::BasicBlock, 0, DebugLoc(), getSDVTList(MVT::Other)), MBB(mbb) | |||
1918 | {} | |||
1919 | ||||
1920 | public: | |||
1921 | MachineBasicBlock *getBasicBlock() const { return MBB; } | |||
1922 | ||||
1923 | static bool classof(const SDNode *N) { | |||
1924 | return N->getOpcode() == ISD::BasicBlock; | |||
1925 | } | |||
1926 | }; | |||
1927 | ||||
1928 | /// A "pseudo-class" with methods for operating on BUILD_VECTORs. | |||
1929 | class BuildVectorSDNode : public SDNode { | |||
1930 | public: | |||
1931 | // These are constructed as SDNodes and then cast to BuildVectorSDNodes. | |||
1932 | explicit BuildVectorSDNode() = delete; | |||
1933 | ||||
1934 | /// Check if this is a constant splat, and if so, find the | |||
1935 | /// smallest element size that splats the vector. If MinSplatBits is | |||
1936 | /// nonzero, the element size must be at least that large. Note that the | |||
1937 | /// splat element may be the entire vector (i.e., a one element vector). | |||
1938 | /// Returns the splat element value in SplatValue. Any undefined bits in | |||
1939 | /// that value are zero, and the corresponding bits in the SplatUndef mask | |||
1940 | /// are set. The SplatBitSize value is set to the splat element size in | |||
1941 | /// bits. HasAnyUndefs is set to true if any bits in the vector are | |||
1942 | /// undefined. isBigEndian describes the endianness of the target. | |||
1943 | bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, | |||
1944 | unsigned &SplatBitSize, bool &HasAnyUndefs, | |||
1945 | unsigned MinSplatBits = 0, | |||
1946 | bool isBigEndian = false) const; | |||
1947 | ||||
1948 | /// Returns the demanded splatted value or a null value if this is not a | |||
1949 | /// splat. | |||
1950 | /// | |||
1951 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1952 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1953 | /// the vector width and set the bits where elements are undef. | |||
1954 | SDValue getSplatValue(const APInt &DemandedElts, | |||
1955 | BitVector *UndefElements = nullptr) const; | |||
1956 | ||||
1957 | /// Returns the splatted value or a null value if this is not a splat. | |||
1958 | /// | |||
1959 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1960 | /// the vector width and set the bits where elements are undef. | |||
1961 | SDValue getSplatValue(BitVector *UndefElements = nullptr) const; | |||
1962 | ||||
1963 | /// Find the shortest repeating sequence of values in the build vector. | |||
1964 | /// | |||
1965 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
1966 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
1967 | /// | |||
1968 | /// Currently this must be a power-of-2 build vector. | |||
1969 | /// The DemandedElts mask indicates the elements that must be present, | |||
1970 | /// undemanded elements in Sequence may be null (SDValue()). If passed a | |||
1971 | /// non-null UndefElements bitvector, it will resize it to match the original | |||
1972 | /// vector width and set the bits where elements are undef. If result is | |||
1973 | /// false, Sequence will be empty. | |||
1974 | bool getRepeatedSequence(const APInt &DemandedElts, | |||
1975 | SmallVectorImpl<SDValue> &Sequence, | |||
1976 | BitVector *UndefElements = nullptr) const; | |||
1977 | ||||
1978 | /// Find the shortest repeating sequence of values in the build vector. | |||
1979 | /// | |||
1980 | /// e.g. { u, X, u, X, u, u, X, u } -> { X } | |||
1981 | /// { X, Y, u, Y, u, u, X, u } -> { X, Y } | |||
1982 | /// | |||
1983 | /// Currently this must be a power-of-2 build vector. | |||
1984 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1985 | /// the original vector width and set the bits where elements are undef. | |||
1986 | /// If result is false, Sequence will be empty. | |||
1987 | bool getRepeatedSequence(SmallVectorImpl<SDValue> &Sequence, | |||
1988 | BitVector *UndefElements = nullptr) const; | |||
1989 | ||||
1990 | /// Returns the demanded splatted constant or null if this is not a constant | |||
1991 | /// splat. | |||
1992 | /// | |||
1993 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
1994 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
1995 | /// the vector width and set the bits where elements are undef. | |||
1996 | ConstantSDNode * | |||
1997 | getConstantSplatNode(const APInt &DemandedElts, | |||
1998 | BitVector *UndefElements = nullptr) const; | |||
1999 | ||||
2000 | /// Returns the splatted constant or null if this is not a constant | |||
2001 | /// splat. | |||
2002 | /// | |||
2003 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2004 | /// the vector width and set the bits where elements are undef. | |||
2005 | ConstantSDNode * | |||
2006 | getConstantSplatNode(BitVector *UndefElements = nullptr) const; | |||
2007 | ||||
2008 | /// Returns the demanded splatted constant FP or null if this is not a | |||
2009 | /// constant FP splat. | |||
2010 | /// | |||
2011 | /// The DemandedElts mask indicates the elements that must be in the splat. | |||
2012 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2013 | /// the vector width and set the bits where elements are undef. | |||
2014 | ConstantFPSDNode * | |||
2015 | getConstantFPSplatNode(const APInt &DemandedElts, | |||
2016 | BitVector *UndefElements = nullptr) const; | |||
2017 | ||||
2018 | /// Returns the splatted constant FP or null if this is not a constant | |||
2019 | /// FP splat. | |||
2020 | /// | |||
2021 | /// If passed a non-null UndefElements bitvector, it will resize it to match | |||
2022 | /// the vector width and set the bits where elements are undef. | |||
2023 | ConstantFPSDNode * | |||
2024 | getConstantFPSplatNode(BitVector *UndefElements = nullptr) const; | |||
2025 | ||||
2026 | /// If this is a constant FP splat and the splatted constant FP is an | |||
2027 | /// exact power or 2, return the log base 2 integer value. Otherwise, | |||
2028 | /// return -1. | |||
2029 | /// | |||
2030 | /// The BitWidth specifies the necessary bit precision. | |||
2031 | int32_t getConstantFPSplatPow2ToLog2Int(BitVector *UndefElements, | |||
2032 | uint32_t BitWidth) const; | |||
2033 | ||||
2034 | bool isConstant() const; | |||
2035 | ||||
2036 | static bool classof(const SDNode *N) { | |||
2037 | return N->getOpcode() == ISD::BUILD_VECTOR; | |||
2038 | } | |||
2039 | }; | |||
2040 | ||||
2041 | /// An SDNode that holds an arbitrary LLVM IR Value. This is | |||
2042 | /// used when the SelectionDAG needs to make a simple reference to something | |||
2043 | /// in the LLVM IR representation. | |||
2044 | /// | |||
2045 | class SrcValueSDNode : public SDNode { | |||
2046 | friend class SelectionDAG; | |||
2047 | ||||
2048 | const Value *V; | |||
2049 | ||||
2050 | /// Create a SrcValue for a general value. | |||
2051 | explicit SrcValueSDNode(const Value *v) | |||
2052 | : SDNode(ISD::SRCVALUE, 0, DebugLoc(), getSDVTList(MVT::Other)), V(v) {} | |||
2053 | ||||
2054 | public: | |||
2055 | /// Return the contained Value. | |||
2056 | const Value *getValue() const { return V; } | |||
2057 | ||||
2058 | static bool classof(const SDNode *N) { | |||
2059 | return N->getOpcode() == ISD::SRCVALUE; | |||
2060 | } | |||
2061 | }; | |||
2062 | ||||
2063 | class MDNodeSDNode : public SDNode { | |||
2064 | friend class SelectionDAG; | |||
2065 | ||||
2066 | const MDNode *MD; | |||
2067 | ||||
2068 | explicit MDNodeSDNode(const MDNode *md) | |||
2069 | : SDNode(ISD::MDNODE_SDNODE, 0, DebugLoc(), getSDVTList(MVT::Other)), MD(md) | |||
2070 | {} | |||
2071 | ||||
2072 | public: | |||
2073 | const MDNode *getMD() const { return MD; } | |||
2074 | ||||
2075 | static bool classof(const SDNode *N) { | |||
2076 | return N->getOpcode() == ISD::MDNODE_SDNODE; | |||
2077 | } | |||
2078 | }; | |||
2079 | ||||
2080 | class RegisterSDNode : public SDNode { | |||
2081 | friend class SelectionDAG; | |||
2082 | ||||
2083 | Register Reg; | |||
2084 | ||||
2085 | RegisterSDNode(Register reg, EVT VT) | |||
2086 | : SDNode(ISD::Register, 0, DebugLoc(), getSDVTList(VT)), Reg(reg) {} | |||
2087 | ||||
2088 | public: | |||
2089 | Register getReg() const { return Reg; } | |||
2090 | ||||
2091 | static bool classof(const SDNode *N) { | |||
2092 | return N->getOpcode() == ISD::Register; | |||
2093 | } | |||
2094 | }; | |||
2095 | ||||
2096 | class RegisterMaskSDNode : public SDNode { | |||
2097 | friend class SelectionDAG; | |||
2098 | ||||
2099 | // The memory for RegMask is not owned by the node. | |||
2100 | const uint32_t *RegMask; | |||
2101 | ||||
2102 | RegisterMaskSDNode(const uint32_t *mask) | |||
2103 | : SDNode(ISD::RegisterMask, 0, DebugLoc(), getSDVTList(MVT::Untyped)), | |||
2104 | RegMask(mask) {} | |||
2105 | ||||
2106 | public: | |||
2107 | const uint32_t *getRegMask() const { return RegMask; } | |||
2108 | ||||
2109 | static bool classof(const SDNode *N) { | |||
2110 | return N->getOpcode() == ISD::RegisterMask; | |||
2111 | } | |||
2112 | }; | |||
2113 | ||||
2114 | class BlockAddressSDNode : public SDNode { | |||
2115 | friend class SelectionDAG; | |||
2116 | ||||
2117 | const BlockAddress *BA; | |||
2118 | int64_t Offset; | |||
2119 | unsigned TargetFlags; | |||
2120 | ||||
2121 | BlockAddressSDNode(unsigned NodeTy, EVT VT, const BlockAddress *ba, | |||
2122 | int64_t o, unsigned Flags) | |||
2123 | : SDNode(NodeTy, 0, DebugLoc(), getSDVTList(VT)), | |||
2124 | BA(ba), Offset(o), TargetFlags(Flags) {} | |||
2125 | ||||
2126 | public: | |||
2127 | const BlockAddress *getBlockAddress() const { return BA; } | |||
2128 | int64_t getOffset() const { return Offset; } | |||
2129 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2130 | ||||
2131 | static bool classof(const SDNode *N) { | |||
2132 | return N->getOpcode() == ISD::BlockAddress || | |||
2133 | N->getOpcode() == ISD::TargetBlockAddress; | |||
2134 | } | |||
2135 | }; | |||
2136 | ||||
2137 | class LabelSDNode : public SDNode { | |||
2138 | friend class SelectionDAG; | |||
2139 | ||||
2140 | MCSymbol *Label; | |||
2141 | ||||
2142 | LabelSDNode(unsigned Opcode, unsigned Order, const DebugLoc &dl, MCSymbol *L) | |||
2143 | : SDNode(Opcode, Order, dl, getSDVTList(MVT::Other)), Label(L) { | |||
2144 | assert(LabelSDNode::classof(this) && "not a label opcode")((void)0); | |||
2145 | } | |||
2146 | ||||
2147 | public: | |||
2148 | MCSymbol *getLabel() const { return Label; } | |||
2149 | ||||
2150 | static bool classof(const SDNode *N) { | |||
2151 | return N->getOpcode() == ISD::EH_LABEL || | |||
2152 | N->getOpcode() == ISD::ANNOTATION_LABEL; | |||
2153 | } | |||
2154 | }; | |||
2155 | ||||
2156 | class ExternalSymbolSDNode : public SDNode { | |||
2157 | friend class SelectionDAG; | |||
2158 | ||||
2159 | const char *Symbol; | |||
2160 | unsigned TargetFlags; | |||
2161 | ||||
2162 | ExternalSymbolSDNode(bool isTarget, const char *Sym, unsigned TF, EVT VT) | |||
2163 | : SDNode(isTarget ? ISD::TargetExternalSymbol : ISD::ExternalSymbol, 0, | |||
2164 | DebugLoc(), getSDVTList(VT)), | |||
2165 | Symbol(Sym), TargetFlags(TF) {} | |||
2166 | ||||
2167 | public: | |||
2168 | const char *getSymbol() const { return Symbol; } | |||
2169 | unsigned getTargetFlags() const { return TargetFlags; } | |||
2170 | ||||
2171 | static bool classof(const SDNode *N) { | |||
2172 | return N->getOpcode() == ISD::ExternalSymbol || | |||
2173 | N->getOpcode() == ISD::TargetExternalSymbol; | |||
2174 | } | |||
2175 | }; | |||
2176 | ||||
2177 | class MCSymbolSDNode : public SDNode { | |||
2178 | friend class SelectionDAG; | |||
2179 | ||||
2180 | MCSymbol *Symbol; | |||
2181 | ||||
2182 | MCSymbolSDNode(MCSymbol *Symbol, EVT VT) | |||
2183 | : SDNode(ISD::MCSymbol, 0, DebugLoc(), getSDVTList(VT)), Symbol(Symbol) {} | |||
2184 | ||||
2185 | public: | |||
2186 | MCSymbol *getMCSymbol() const { return Symbol; } | |||
2187 | ||||
2188 | static bool classof(const SDNode *N) { | |||
2189 | return N->getOpcode() == ISD::MCSymbol; | |||
2190 | } | |||
2191 | }; | |||
2192 | ||||
2193 | class CondCodeSDNode : public SDNode { | |||
2194 | friend class SelectionDAG; | |||
2195 | ||||
2196 | ISD::CondCode Condition; | |||
2197 | ||||
2198 | explicit CondCodeSDNode(ISD::CondCode Cond) | |||
2199 | : SDNode(ISD::CONDCODE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2200 | Condition(Cond) {} | |||
2201 | ||||
2202 | public: | |||
2203 | ISD::CondCode get() const { return Condition; } | |||
2204 | ||||
2205 | static bool classof(const SDNode *N) { | |||
2206 | return N->getOpcode() == ISD::CONDCODE; | |||
2207 | } | |||
2208 | }; | |||
2209 | ||||
2210 | /// This class is used to represent EVT's, which are used | |||
2211 | /// to parameterize some operations. | |||
2212 | class VTSDNode : public SDNode { | |||
2213 | friend class SelectionDAG; | |||
2214 | ||||
2215 | EVT ValueType; | |||
2216 | ||||
2217 | explicit VTSDNode(EVT VT) | |||
2218 | : SDNode(ISD::VALUETYPE, 0, DebugLoc(), getSDVTList(MVT::Other)), | |||
2219 | ValueType(VT) {} | |||
2220 | ||||
2221 | public: | |||
2222 | EVT getVT() const { return ValueType; } | |||
2223 | ||||
2224 | static bool classof(const SDNode *N) { | |||
2225 | return N->getOpcode() == ISD::VALUETYPE; | |||
2226 | } | |||
2227 | }; | |||
2228 | ||||
2229 | /// Base class for LoadSDNode and StoreSDNode | |||
2230 | class LSBaseSDNode : public MemSDNode { | |||
2231 | public: | |||
2232 | LSBaseSDNode(ISD::NodeType NodeTy, unsigned Order, const DebugLoc &dl, | |||
2233 | SDVTList VTs, ISD::MemIndexedMode AM, EVT MemVT, | |||
2234 | MachineMemOperand *MMO) | |||
2235 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2236 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2237 | assert(getAddressingMode() == AM && "Value truncated")((void)0); | |||
2238 | } | |||
2239 | ||||
2240 | const SDValue &getOffset() const { | |||
2241 | return getOperand(getOpcode() == ISD::LOAD ? 2 : 3); | |||
2242 | } | |||
2243 | ||||
2244 | /// Return the addressing mode for this load or store: | |||
2245 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2246 | ISD::MemIndexedMode getAddressingMode() const { | |||
2247 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2248 | } | |||
2249 | ||||
2250 | /// Return true if this is a pre/post inc/dec load/store. | |||
2251 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2252 | ||||
2253 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2254 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2255 | ||||
2256 | static bool classof(const SDNode *N) { | |||
2257 | return N->getOpcode() == ISD::LOAD || | |||
2258 | N->getOpcode() == ISD::STORE; | |||
2259 | } | |||
2260 | }; | |||
2261 | ||||
2262 | /// This class is used to represent ISD::LOAD nodes. | |||
2263 | class LoadSDNode : public LSBaseSDNode { | |||
2264 | friend class SelectionDAG; | |||
2265 | ||||
2266 | LoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2267 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, EVT MemVT, | |||
2268 | MachineMemOperand *MMO) | |||
2269 | : LSBaseSDNode(ISD::LOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2270 | LoadSDNodeBits.ExtTy = ETy; | |||
2271 | assert(readMem() && "Load MachineMemOperand is not a load!")((void)0); | |||
2272 | assert(!writeMem() && "Load MachineMemOperand is a store!")((void)0); | |||
2273 | } | |||
2274 | ||||
2275 | public: | |||
2276 | /// Return whether this is a plain node, | |||
2277 | /// or one of the varieties of value-extending loads. | |||
2278 | ISD::LoadExtType getExtensionType() const { | |||
2279 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2280 | } | |||
2281 | ||||
2282 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2283 | const SDValue &getOffset() const { return getOperand(2); } | |||
2284 | ||||
2285 | static bool classof(const SDNode *N) { | |||
2286 | return N->getOpcode() == ISD::LOAD; | |||
2287 | } | |||
2288 | }; | |||
2289 | ||||
2290 | /// This class is used to represent ISD::STORE nodes. | |||
2291 | class StoreSDNode : public LSBaseSDNode { | |||
2292 | friend class SelectionDAG; | |||
2293 | ||||
2294 | StoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2295 | ISD::MemIndexedMode AM, bool isTrunc, EVT MemVT, | |||
2296 | MachineMemOperand *MMO) | |||
2297 | : LSBaseSDNode(ISD::STORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2298 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2299 | assert(!readMem() && "Store MachineMemOperand is a load!")((void)0); | |||
2300 | assert(writeMem() && "Store MachineMemOperand is not a store!")((void)0); | |||
2301 | } | |||
2302 | ||||
2303 | public: | |||
2304 | /// Return true if the op does a truncation before store. | |||
2305 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2306 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2307 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2308 | void setTruncatingStore(bool Truncating) { | |||
2309 | StoreSDNodeBits.IsTruncating = Truncating; | |||
2310 | } | |||
2311 | ||||
2312 | const SDValue &getValue() const { return getOperand(1); } | |||
2313 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2314 | const SDValue &getOffset() const { return getOperand(3); } | |||
2315 | ||||
2316 | static bool classof(const SDNode *N) { | |||
2317 | return N->getOpcode() == ISD::STORE; | |||
2318 | } | |||
2319 | }; | |||
2320 | ||||
2321 | /// This base class is used to represent MLOAD and MSTORE nodes | |||
2322 | class MaskedLoadStoreSDNode : public MemSDNode { | |||
2323 | public: | |||
2324 | friend class SelectionDAG; | |||
2325 | ||||
2326 | MaskedLoadStoreSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2327 | const DebugLoc &dl, SDVTList VTs, | |||
2328 | ISD::MemIndexedMode AM, EVT MemVT, | |||
2329 | MachineMemOperand *MMO) | |||
2330 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2331 | LSBaseSDNodeBits.AddressingMode = AM; | |||
2332 | assert(getAddressingMode() == AM && "Value truncated")((void)0); | |||
2333 | } | |||
2334 | ||||
2335 | // MaskedLoadSDNode (Chain, ptr, offset, mask, passthru) | |||
2336 | // MaskedStoreSDNode (Chain, data, ptr, offset, mask) | |||
2337 | // Mask is a vector of i1 elements | |||
2338 | const SDValue &getOffset() const { | |||
2339 | return getOperand(getOpcode() == ISD::MLOAD ? 2 : 3); | |||
2340 | } | |||
2341 | const SDValue &getMask() const { | |||
2342 | return getOperand(getOpcode() == ISD::MLOAD ? 3 : 4); | |||
2343 | } | |||
2344 | ||||
2345 | /// Return the addressing mode for this load or store: | |||
2346 | /// unindexed, pre-inc, pre-dec, post-inc, or post-dec. | |||
2347 | ISD::MemIndexedMode getAddressingMode() const { | |||
2348 | return static_cast<ISD::MemIndexedMode>(LSBaseSDNodeBits.AddressingMode); | |||
2349 | } | |||
2350 | ||||
2351 | /// Return true if this is a pre/post inc/dec load/store. | |||
2352 | bool isIndexed() const { return getAddressingMode() != ISD::UNINDEXED; } | |||
2353 | ||||
2354 | /// Return true if this is NOT a pre/post inc/dec load/store. | |||
2355 | bool isUnindexed() const { return getAddressingMode() == ISD::UNINDEXED; } | |||
2356 | ||||
2357 | static bool classof(const SDNode *N) { | |||
2358 | return N->getOpcode() == ISD::MLOAD || | |||
2359 | N->getOpcode() == ISD::MSTORE; | |||
2360 | } | |||
2361 | }; | |||
2362 | ||||
2363 | /// This class is used to represent an MLOAD node | |||
2364 | class MaskedLoadSDNode : public MaskedLoadStoreSDNode { | |||
2365 | public: | |||
2366 | friend class SelectionDAG; | |||
2367 | ||||
2368 | MaskedLoadSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2369 | ISD::MemIndexedMode AM, ISD::LoadExtType ETy, | |||
2370 | bool IsExpanding, EVT MemVT, MachineMemOperand *MMO) | |||
2371 | : MaskedLoadStoreSDNode(ISD::MLOAD, Order, dl, VTs, AM, MemVT, MMO) { | |||
2372 | LoadSDNodeBits.ExtTy = ETy; | |||
2373 | LoadSDNodeBits.IsExpanding = IsExpanding; | |||
2374 | } | |||
2375 | ||||
2376 | ISD::LoadExtType getExtensionType() const { | |||
2377 | return static_cast<ISD::LoadExtType>(LoadSDNodeBits.ExtTy); | |||
2378 | } | |||
2379 | ||||
2380 | const SDValue &getBasePtr() const { return getOperand(1); } | |||
2381 | const SDValue &getOffset() const { return getOperand(2); } | |||
2382 | const SDValue &getMask() const { return getOperand(3); } | |||
2383 | const SDValue &getPassThru() const { return getOperand(4); } | |||
2384 | ||||
2385 | static bool classof(const SDNode *N) { | |||
2386 | return N->getOpcode() == ISD::MLOAD; | |||
2387 | } | |||
2388 | ||||
2389 | bool isExpandingLoad() const { return LoadSDNodeBits.IsExpanding; } | |||
2390 | }; | |||
2391 | ||||
2392 | /// This class is used to represent an MSTORE node | |||
2393 | class MaskedStoreSDNode : public MaskedLoadStoreSDNode { | |||
2394 | public: | |||
2395 | friend class SelectionDAG; | |||
2396 | ||||
2397 | MaskedStoreSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2398 | ISD::MemIndexedMode AM, bool isTrunc, bool isCompressing, | |||
2399 | EVT MemVT, MachineMemOperand *MMO) | |||
2400 | : MaskedLoadStoreSDNode(ISD::MSTORE, Order, dl, VTs, AM, MemVT, MMO) { | |||
2401 | StoreSDNodeBits.IsTruncating = isTrunc; | |||
2402 | StoreSDNodeBits.IsCompressing = isCompressing; | |||
2403 | } | |||
2404 | ||||
2405 | /// Return true if the op does a truncation before store. | |||
2406 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2407 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2408 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2409 | ||||
2410 | /// Returns true if the op does a compression to the vector before storing. | |||
2411 | /// The node contiguously stores the active elements (integers or floats) | |||
2412 | /// in src (those with their respective bit set in writemask k) to unaligned | |||
2413 | /// memory at base_addr. | |||
2414 | bool isCompressingStore() const { return StoreSDNodeBits.IsCompressing; } | |||
2415 | ||||
2416 | const SDValue &getValue() const { return getOperand(1); } | |||
2417 | const SDValue &getBasePtr() const { return getOperand(2); } | |||
2418 | const SDValue &getOffset() const { return getOperand(3); } | |||
2419 | const SDValue &getMask() const { return getOperand(4); } | |||
2420 | ||||
2421 | static bool classof(const SDNode *N) { | |||
2422 | return N->getOpcode() == ISD::MSTORE; | |||
2423 | } | |||
2424 | }; | |||
2425 | ||||
2426 | /// This is a base class used to represent | |||
2427 | /// MGATHER and MSCATTER nodes | |||
2428 | /// | |||
2429 | class MaskedGatherScatterSDNode : public MemSDNode { | |||
2430 | public: | |||
2431 | friend class SelectionDAG; | |||
2432 | ||||
2433 | MaskedGatherScatterSDNode(ISD::NodeType NodeTy, unsigned Order, | |||
2434 | const DebugLoc &dl, SDVTList VTs, EVT MemVT, | |||
2435 | MachineMemOperand *MMO, ISD::MemIndexType IndexType) | |||
2436 | : MemSDNode(NodeTy, Order, dl, VTs, MemVT, MMO) { | |||
2437 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2438 | assert(getIndexType() == IndexType && "Value truncated")((void)0); | |||
2439 | } | |||
2440 | ||||
2441 | /// How is Index applied to BasePtr when computing addresses. | |||
2442 | ISD::MemIndexType getIndexType() const { | |||
2443 | return static_cast<ISD::MemIndexType>(LSBaseSDNodeBits.AddressingMode); | |||
2444 | } | |||
2445 | void setIndexType(ISD::MemIndexType IndexType) { | |||
2446 | LSBaseSDNodeBits.AddressingMode = IndexType; | |||
2447 | } | |||
2448 | bool isIndexScaled() const { | |||
2449 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2450 | (getIndexType() == ISD::UNSIGNED_SCALED); | |||
2451 | } | |||
2452 | bool isIndexSigned() const { | |||
2453 | return (getIndexType() == ISD::SIGNED_SCALED) || | |||
2454 | (getIndexType() == ISD::SIGNED_UNSCALED); | |||
2455 | } | |||
2456 | ||||
2457 | // In the both nodes address is Op1, mask is Op2: | |||
2458 | // MaskedGatherSDNode (Chain, passthru, mask, base, index, scale) | |||
2459 | // MaskedScatterSDNode (Chain, value, mask, base, index, scale) | |||
2460 | // Mask is a vector of i1 elements | |||
2461 | const SDValue &getBasePtr() const { return getOperand(3); } | |||
2462 | const SDValue &getIndex() const { return getOperand(4); } | |||
2463 | const SDValue &getMask() const { return getOperand(2); } | |||
2464 | const SDValue &getScale() const { return getOperand(5); } | |||
2465 | ||||
2466 | static bool classof(const SDNode *N) { | |||
2467 | return N->getOpcode() == ISD::MGATHER || | |||
2468 | N->getOpcode() == ISD::MSCATTER; | |||
2469 | } | |||
2470 | }; | |||
2471 | ||||
2472 | /// This class is used to represent an MGATHER node | |||
2473 | /// | |||
2474 | class MaskedGatherSDNode : public MaskedGatherScatterSDNode { | |||
2475 | public: | |||
2476 | friend class SelectionDAG; | |||
2477 | ||||
2478 | MaskedGatherSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2479 | EVT MemVT, MachineMemOperand *MMO, | |||
2480 | ISD::MemIndexType IndexType, ISD::LoadExtType ETy) | |||
2481 | : MaskedGatherScatterSDNode(ISD::MGATHER, Order, dl, VTs, MemVT, MMO, | |||
2482 | IndexType) { | |||
2483 | LoadSDNodeBits.ExtTy = ETy; | |||
2484 | } | |||
2485 | ||||
2486 | const SDValue &getPassThru() const { return getOperand(1); } | |||
2487 | ||||
2488 | ISD::LoadExtType getExtensionType() const { | |||
2489 | return ISD::LoadExtType(LoadSDNodeBits.ExtTy); | |||
2490 | } | |||
2491 | ||||
2492 | static bool classof(const SDNode *N) { | |||
2493 | return N->getOpcode() == ISD::MGATHER; | |||
2494 | } | |||
2495 | }; | |||
2496 | ||||
2497 | /// This class is used to represent an MSCATTER node | |||
2498 | /// | |||
2499 | class MaskedScatterSDNode : public MaskedGatherScatterSDNode { | |||
2500 | public: | |||
2501 | friend class SelectionDAG; | |||
2502 | ||||
2503 | MaskedScatterSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, | |||
2504 | EVT MemVT, MachineMemOperand *MMO, | |||
2505 | ISD::MemIndexType IndexType, bool IsTrunc) | |||
2506 | : MaskedGatherScatterSDNode(ISD::MSCATTER, Order, dl, VTs, MemVT, MMO, | |||
2507 | IndexType) { | |||
2508 | StoreSDNodeBits.IsTruncating = IsTrunc; | |||
2509 | } | |||
2510 | ||||
2511 | /// Return true if the op does a truncation before store. | |||
2512 | /// For integers this is the same as doing a TRUNCATE and storing the result. | |||
2513 | /// For floats, it is the same as doing an FP_ROUND and storing the result. | |||
2514 | bool isTruncatingStore() const { return StoreSDNodeBits.IsTruncating; } | |||
2515 | ||||
2516 | const SDValue &getValue() const { return getOperand(1); } | |||
2517 | ||||
2518 | static bool classof(const SDNode *N) { | |||
2519 | return N->getOpcode() == ISD::MSCATTER; | |||
2520 | } | |||
2521 | }; | |||
2522 | ||||
2523 | /// An SDNode that represents everything that will be needed | |||
2524 | /// to construct a MachineInstr. These nodes are created during the | |||
2525 | /// instruction selection proper phase. | |||
2526 | /// | |||
2527 | /// Note that the only supported way to set the `memoperands` is by calling the | |||
2528 | /// `SelectionDAG::setNodeMemRefs` function as the memory management happens | |||
2529 | /// inside the DAG rather than in the node. | |||
2530 | class MachineSDNode : public SDNode { | |||
2531 | private: | |||
2532 | friend class SelectionDAG; | |||
2533 | ||||
2534 | MachineSDNode(unsigned Opc, unsigned Order, const DebugLoc &DL, SDVTList VTs) | |||
2535 | : SDNode(Opc, Order, DL, VTs) {} | |||
2536 | ||||
2537 | // We use a pointer union between a single `MachineMemOperand` pointer and | |||
2538 | // a pointer to an array of `MachineMemOperand` pointers. This is null when | |||
2539 | // the number of these is zero, the single pointer variant used when the | |||
2540 | // number is one, and the array is used for larger numbers. | |||
2541 | // | |||
2542 | // The array is allocated via the `SelectionDAG`'s allocator and so will | |||
2543 | // always live until the DAG is cleaned up and doesn't require ownership here. | |||
2544 | // | |||
2545 | // We can't use something simpler like `TinyPtrVector` here because `SDNode` | |||
2546 | // subclasses aren't managed in a conforming C++ manner. See the comments on | |||
2547 | // `SelectionDAG::MorphNodeTo` which details what all goes on, but the | |||
2548 | // constraint here is that these don't manage memory with their constructor or | |||
2549 | // destructor and can be initialized to a good state even if they start off | |||
2550 | // uninitialized. | |||
2551 | PointerUnion<MachineMemOperand *, MachineMemOperand **> MemRefs = {}; | |||
2552 | ||||
2553 | // Note that this could be folded into the above `MemRefs` member if doing so | |||
2554 | // is advantageous at some point. We don't need to store this in most cases. | |||
2555 | // However, at the moment this doesn't appear to make the allocation any | |||
2556 | // smaller and makes the code somewhat simpler to read. | |||
2557 | int NumMemRefs = 0; | |||
2558 | ||||
2559 | public: | |||
2560 | using mmo_iterator = ArrayRef<MachineMemOperand *>::const_iterator; | |||
2561 | ||||
2562 | ArrayRef<MachineMemOperand *> memoperands() const { | |||
2563 | // Special case the common cases. | |||
2564 | if (NumMemRefs == 0) | |||
2565 | return {}; | |||
2566 | if (NumMemRefs == 1) | |||
2567 | return makeArrayRef(MemRefs.getAddrOfPtr1(), 1); | |||
2568 | ||||
2569 | // Otherwise we have an actual array. | |||
2570 | return makeArrayRef(MemRefs.get<MachineMemOperand **>(), NumMemRefs); | |||
2571 | } | |||
2572 | mmo_iterator memoperands_begin() const { return memoperands().begin(); } | |||
2573 | mmo_iterator memoperands_end() const { return memoperands().end(); } | |||
2574 | bool memoperands_empty() const { return memoperands().empty(); } | |||
2575 | ||||
2576 | /// Clear out the memory reference descriptor list. | |||
2577 | void clearMemRefs() { | |||
2578 | MemRefs = nullptr; | |||
2579 | NumMemRefs = 0; | |||
2580 | } | |||
2581 | ||||
2582 | static bool classof(const SDNode *N) { | |||
2583 | return N->isMachineOpcode(); | |||
2584 | } | |||
2585 | }; | |||
2586 | ||||
2587 | /// An SDNode that records if a register contains a value that is guaranteed to | |||
2588 | /// be aligned accordingly. | |||
2589 | class AssertAlignSDNode : public SDNode { | |||
2590 | Align Alignment; | |||
2591 | ||||
2592 | public: | |||
2593 | AssertAlignSDNode(unsigned Order, const DebugLoc &DL, EVT VT, Align A) | |||
2594 | : SDNode(ISD::AssertAlign, Order, DL, getSDVTList(VT)), Alignment(A) {} | |||
2595 | ||||
2596 | Align getAlign() const { return Alignment; } | |||
2597 | ||||
2598 | static bool classof(const SDNode *N) { | |||
2599 | return N->getOpcode() == ISD::AssertAlign; | |||
2600 | } | |||
2601 | }; | |||
2602 | ||||
2603 | class SDNodeIterator { | |||
2604 | const SDNode *Node; | |||
2605 | unsigned Operand; | |||
2606 | ||||
2607 | SDNodeIterator(const SDNode *N, unsigned Op) : Node(N), Operand(Op) {} | |||
2608 | ||||
2609 | public: | |||
2610 | using iterator_category = std::forward_iterator_tag; | |||
2611 | using value_type = SDNode; | |||
2612 | using difference_type = std::ptrdiff_t; | |||
2613 | using pointer = value_type *; | |||
2614 | using reference = value_type &; | |||
2615 | ||||
2616 | bool operator==(const SDNodeIterator& x) const { | |||
2617 | return Operand == x.Operand; | |||
2618 | } | |||
2619 | bool operator!=(const SDNodeIterator& x) const { return !operator==(x); } | |||
2620 | ||||
2621 | pointer operator*() const { | |||
2622 | return Node->getOperand(Operand).getNode(); | |||
2623 | } | |||
2624 | pointer operator->() const { return operator*(); } | |||
2625 | ||||
2626 | SDNodeIterator& operator++() { // Preincrement | |||
2627 | ++Operand; | |||
2628 | return *this; | |||
2629 | } | |||
2630 | SDNodeIterator operator++(int) { // Postincrement | |||
2631 | SDNodeIterator tmp = *this; ++*this; return tmp; | |||
2632 | } | |||
2633 | size_t operator-(SDNodeIterator Other) const { | |||
2634 | assert(Node == Other.Node &&((void)0) | |||
2635 | "Cannot compare iterators of two different nodes!")((void)0); | |||
2636 | return Operand - Other.Operand; | |||
2637 | } | |||
2638 | ||||
2639 | static SDNodeIterator begin(const SDNode *N) { return SDNodeIterator(N, 0); } | |||
2640 | static SDNodeIterator end (const SDNode *N) { | |||
2641 | return SDNodeIterator(N, N->getNumOperands()); | |||
2642 | } | |||
2643 | ||||
2644 | unsigned getOperand() const { return Operand; } | |||
2645 | const SDNode *getNode() const { return Node; } | |||
2646 | }; | |||
2647 | ||||
2648 | template <> struct GraphTraits<SDNode*> { | |||
2649 | using NodeRef = SDNode *; | |||
2650 | using ChildIteratorType = SDNodeIterator; | |||
2651 | ||||
2652 | static NodeRef getEntryNode(SDNode *N) { return N; } | |||
2653 | ||||
2654 | static ChildIteratorType child_begin(NodeRef N) { | |||
2655 | return SDNodeIterator::begin(N); | |||
2656 | } | |||
2657 | ||||
2658 | static ChildIteratorType child_end(NodeRef N) { | |||
2659 | return SDNodeIterator::end(N); | |||
2660 | } | |||
2661 | }; | |||
2662 | ||||
2663 | /// A representation of the largest SDNode, for use in sizeof(). | |||
2664 | /// | |||
2665 | /// This needs to be a union because the largest node differs on 32 bit systems | |||
2666 | /// with 4 and 8 byte pointer alignment, respectively. | |||
2667 | using LargestSDNode = AlignedCharArrayUnion<AtomicSDNode, TargetIndexSDNode, | |||
2668 | BlockAddressSDNode, | |||
2669 | GlobalAddressSDNode, | |||
2670 | PseudoProbeSDNode>; | |||
2671 | ||||
2672 | /// The SDNode class with the greatest alignment requirement. | |||
2673 | using MostAlignedSDNode = GlobalAddressSDNode; | |||
2674 | ||||
2675 | namespace ISD { | |||
2676 | ||||
2677 | /// Returns true if the specified node is a non-extending and unindexed load. | |||
2678 | inline bool isNormalLoad(const SDNode *N) { | |||
2679 | const LoadSDNode *Ld = dyn_cast<LoadSDNode>(N); | |||
2680 | return Ld && Ld->getExtensionType() == ISD::NON_EXTLOAD && | |||
2681 | Ld->getAddressingMode() == ISD::UNINDEXED; | |||
2682 | } | |||
2683 | ||||
2684 | /// Returns true if the specified node is a non-extending load. | |||
2685 | inline bool isNON_EXTLoad(const SDNode *N) { | |||
2686 | return isa<LoadSDNode>(N) && | |||
2687 | cast<LoadSDNode>(N)->getExtensionType() == ISD::NON_EXTLOAD; | |||
2688 | } | |||
2689 | ||||
2690 | /// Returns true if the specified node is a EXTLOAD. | |||
2691 | inline bool isEXTLoad(const SDNode *N) { | |||
2692 | return isa<LoadSDNode>(N) && | |||
2693 | cast<LoadSDNode>(N)->getExtensionType() == ISD::EXTLOAD; | |||
2694 | } | |||
2695 | ||||
2696 | /// Returns true if the specified node is a SEXTLOAD. | |||
2697 | inline bool isSEXTLoad(const SDNode *N) { | |||
2698 | return isa<LoadSDNode>(N) && | |||
2699 | cast<LoadSDNode>(N)->getExtensionType() == ISD::SEXTLOAD; | |||
2700 | } | |||
2701 | ||||
2702 | /// Returns true if the specified node is a ZEXTLOAD. | |||
2703 | inline bool isZEXTLoad(const SDNode *N) { | |||
2704 | return isa<LoadSDNode>(N) && | |||
2705 | cast<LoadSDNode>(N)->getExtensionType() == ISD::ZEXTLOAD; | |||
2706 | } | |||
2707 | ||||
2708 | /// Returns true if the specified node is an unindexed load. | |||
2709 | inline bool isUNINDEXEDLoad(const SDNode *N) { | |||
2710 | return isa<LoadSDNode>(N) && | |||
2711 | cast<LoadSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2712 | } | |||
2713 | ||||
2714 | /// Returns true if the specified node is a non-truncating | |||
2715 | /// and unindexed store. | |||
2716 | inline bool isNormalStore(const SDNode *N) { | |||
2717 | const StoreSDNode *St = dyn_cast<StoreSDNode>(N); | |||
2718 | return St && !St->isTruncatingStore() && | |||
2719 | St->getAddressingMode() == ISD::UNINDEXED; | |||
2720 | } | |||
2721 | ||||
2722 | /// Returns true if the specified node is an unindexed store. | |||
2723 | inline bool isUNINDEXEDStore(const SDNode *N) { | |||
2724 | return isa<StoreSDNode>(N) && | |||
2725 | cast<StoreSDNode>(N)->getAddressingMode() == ISD::UNINDEXED; | |||
2726 | } | |||
2727 | ||||
2728 | /// Attempt to match a unary predicate against a scalar/splat constant or | |||
2729 | /// every element of a constant BUILD_VECTOR. | |||
2730 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2731 | bool matchUnaryPredicate(SDValue Op, | |||
2732 | std::function<bool(ConstantSDNode *)> Match, | |||
2733 | bool AllowUndefs = false); | |||
2734 | ||||
2735 | /// Attempt to match a binary predicate against a pair of scalar/splat | |||
2736 | /// constants or every element of a pair of constant BUILD_VECTORs. | |||
2737 | /// If AllowUndef is true, then UNDEF elements will pass nullptr to Match. | |||
2738 | /// If AllowTypeMismatch is true then RetType + ArgTypes don't need to match. | |||
2739 | bool matchBinaryPredicate( | |||
2740 | SDValue LHS, SDValue RHS, | |||
2741 | std::function<bool(ConstantSDNode *, ConstantSDNode *)> Match, | |||
2742 | bool AllowUndefs = false, bool AllowTypeMismatch = false); | |||
2743 | ||||
2744 | /// Returns true if the specified value is the overflow result from one | |||
2745 | /// of the overflow intrinsic nodes. | |||
2746 | inline bool isOverflowIntrOpRes(SDValue Op) { | |||
2747 | unsigned Opc = Op.getOpcode(); | |||
2748 | return (Op.getResNo() == 1 && | |||
2749 | (Opc == ISD::SADDO || Opc == ISD::UADDO || Opc == ISD::SSUBO || | |||
2750 | Opc == ISD::USUBO || Opc == ISD::SMULO || Opc == ISD::UMULO)); | |||
2751 | } | |||
2752 | ||||
2753 | } // end namespace ISD | |||
2754 | ||||
2755 | } // end namespace llvm | |||
2756 | ||||
2757 | #endif // LLVM_CODEGEN_SELECTIONDAGNODES_H |