clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelDAGToDAG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "X86.h" |
15 | #include "X86MachineFunctionInfo.h" |
16 | #include "X86RegisterInfo.h" |
17 | #include "X86Subtarget.h" |
18 | #include "X86TargetMachine.h" |
19 | #include "llvm/ADT/Statistic.h" |
20 | #include "llvm/CodeGen/MachineModuleInfo.h" |
21 | #include "llvm/CodeGen/SelectionDAGISel.h" |
22 | #include "llvm/Config/llvm-config.h" |
23 | #include "llvm/IR/ConstantRange.h" |
24 | #include "llvm/IR/Function.h" |
25 | #include "llvm/IR/Instructions.h" |
26 | #include "llvm/IR/Intrinsics.h" |
27 | #include "llvm/IR/IntrinsicsX86.h" |
28 | #include "llvm/IR/Type.h" |
29 | #include "llvm/Support/Debug.h" |
30 | #include "llvm/Support/ErrorHandling.h" |
31 | #include "llvm/Support/KnownBits.h" |
32 | #include "llvm/Support/MathExtras.h" |
33 | #include <cstdint> |
34 | |
35 | using namespace llvm; |
36 | |
37 | #define DEBUG_TYPE "x86-isel" |
38 | |
39 | STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor"); |
40 | |
41 | static cl::opt<bool> AndImmShrink("x86-and-imm-shrink", cl::init(true), |
42 | cl::desc("Enable setting constant bits to reduce size of mask immediates"), |
43 | cl::Hidden); |
44 | |
45 | static cl::opt<bool> EnablePromoteAnyextLoad( |
46 | "x86-promote-anyext-load", cl::init(true), |
47 | cl::desc("Enable promoting aligned anyext load to wider load"), cl::Hidden); |
48 | |
49 | extern cl::opt<bool> IndirectBranchTracking; |
50 | |
51 | |
52 | |
53 | |
54 | |
55 | namespace { |
56 | |
57 | |
58 | struct X86ISelAddressMode { |
59 | enum { |
60 | RegBase, |
61 | FrameIndexBase |
62 | } BaseType; |
63 | |
64 | |
65 | SDValue Base_Reg; |
66 | int Base_FrameIndex; |
67 | |
68 | unsigned Scale; |
69 | SDValue IndexReg; |
70 | int32_t Disp; |
71 | SDValue Segment; |
72 | const GlobalValue *GV; |
73 | const Constant *CP; |
74 | const BlockAddress *BlockAddr; |
75 | const char *ES; |
76 | MCSymbol *MCSym; |
77 | int JT; |
78 | Align Alignment; |
79 | unsigned char SymbolFlags; |
80 | bool NegateIndex = false; |
81 | |
82 | X86ISelAddressMode() |
83 | : BaseType(RegBase), Base_FrameIndex(0), Scale(1), IndexReg(), Disp(0), |
84 | Segment(), GV(nullptr), CP(nullptr), BlockAddr(nullptr), ES(nullptr), |
85 | MCSym(nullptr), JT(-1), SymbolFlags(X86II::MO_NO_FLAG) {} |
86 | |
87 | bool hasSymbolicDisplacement() const { |
88 | return GV != nullptr || CP != nullptr || ES != nullptr || |
89 | MCSym != nullptr || JT != -1 || BlockAddr != nullptr; |
90 | } |
91 | |
92 | bool hasBaseOrIndexReg() const { |
93 | return BaseType == FrameIndexBase || |
94 | IndexReg.getNode() != nullptr || Base_Reg.getNode() != nullptr; |
95 | } |
96 | |
97 | |
98 | bool isRIPRelative() const { |
99 | if (BaseType != RegBase) return false; |
100 | if (RegisterSDNode *RegNode = |
101 | dyn_cast_or_null<RegisterSDNode>(Base_Reg.getNode())) |
102 | return RegNode->getReg() == X86::RIP; |
103 | return false; |
104 | } |
105 | |
106 | void setBaseReg(SDValue Reg) { |
107 | BaseType = RegBase; |
108 | Base_Reg = Reg; |
109 | } |
110 | |
111 | #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
112 | void dump(SelectionDAG *DAG = nullptr) { |
113 | dbgs() << "X86ISelAddressMode " << this << '\n'; |
114 | dbgs() << "Base_Reg "; |
115 | if (Base_Reg.getNode()) |
116 | Base_Reg.getNode()->dump(DAG); |
117 | else |
118 | dbgs() << "nul\n"; |
119 | if (BaseType == FrameIndexBase) |
120 | dbgs() << " Base.FrameIndex " << Base_FrameIndex << '\n'; |
121 | dbgs() << " Scale " << Scale << '\n' |
122 | << "IndexReg "; |
123 | if (NegateIndex) |
124 | dbgs() << "negate "; |
125 | if (IndexReg.getNode()) |
126 | IndexReg.getNode()->dump(DAG); |
127 | else |
128 | dbgs() << "nul\n"; |
129 | dbgs() << " Disp " << Disp << '\n' |
130 | << "GV "; |
131 | if (GV) |
132 | GV->dump(); |
133 | else |
134 | dbgs() << "nul"; |
135 | dbgs() << " CP "; |
136 | if (CP) |
137 | CP->dump(); |
138 | else |
139 | dbgs() << "nul"; |
140 | dbgs() << '\n' |
141 | << "ES "; |
142 | if (ES) |
143 | dbgs() << ES; |
144 | else |
145 | dbgs() << "nul"; |
146 | dbgs() << " MCSym "; |
147 | if (MCSym) |
148 | dbgs() << MCSym; |
149 | else |
150 | dbgs() << "nul"; |
151 | dbgs() << " JT" << JT << " Align" << Alignment.value() << '\n'; |
152 | } |
153 | #endif |
154 | }; |
155 | } |
156 | |
157 | namespace { |
158 | |
159 | |
160 | |
161 | |
162 | class X86DAGToDAGISel final : public SelectionDAGISel { |
163 | |
164 | |
165 | const X86Subtarget *Subtarget; |
166 | |
167 | |
168 | bool OptForMinSize; |
169 | |
170 | |
171 | bool IndirectTlsSegRefs; |
172 | |
173 | public: |
174 | explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel) |
175 | : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), |
176 | OptForMinSize(false), IndirectTlsSegRefs(false) {} |
177 | |
178 | StringRef getPassName() const override { |
179 | return "X86 DAG->DAG Instruction Selection"; |
180 | } |
181 | |
182 | bool runOnMachineFunction(MachineFunction &MF) override { |
183 | |
184 | Subtarget = &MF.getSubtarget<X86Subtarget>(); |
185 | IndirectTlsSegRefs = MF.getFunction().hasFnAttribute( |
186 | "indirect-tls-seg-refs"); |
187 | |
188 | |
189 | OptForMinSize = MF.getFunction().hasMinSize(); |
190 | assert((!OptForMinSize || MF.getFunction().hasOptSize()) && |
191 | "OptForMinSize implies OptForSize"); |
192 | |
193 | SelectionDAGISel::runOnMachineFunction(MF); |
194 | return true; |
195 | } |
196 | |
197 | void emitFunctionEntryCode() override; |
198 | |
199 | bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const override; |
200 | |
201 | void PreprocessISelDAG() override; |
202 | void PostprocessISelDAG() override; |
203 | |
204 | |
205 | #include "X86GenDAGISel.inc" |
206 | |
207 | private: |
208 | void Select(SDNode *N) override; |
209 | |
210 | bool foldOffsetIntoAddress(uint64_t Offset, X86ISelAddressMode &AM); |
211 | bool matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, |
212 | bool AllowSegmentRegForX32 = false); |
213 | bool matchWrapper(SDValue N, X86ISelAddressMode &AM); |
214 | bool matchAddress(SDValue N, X86ISelAddressMode &AM); |
215 | bool matchVectorAddress(SDValue N, X86ISelAddressMode &AM); |
216 | bool matchAdd(SDValue &N, X86ISelAddressMode &AM, unsigned Depth); |
217 | bool matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
218 | unsigned Depth); |
219 | bool matchAddressBase(SDValue N, X86ISelAddressMode &AM); |
220 | bool selectAddr(SDNode *Parent, SDValue N, SDValue &Base, |
221 | SDValue &Scale, SDValue &Index, SDValue &Disp, |
222 | SDValue &Segment); |
223 | bool selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, SDValue IndexOp, |
224 | SDValue ScaleOp, SDValue &Base, SDValue &Scale, |
225 | SDValue &Index, SDValue &Disp, SDValue &Segment); |
226 | bool selectMOV64Imm32(SDValue N, SDValue &Imm); |
227 | bool selectLEAAddr(SDValue N, SDValue &Base, |
228 | SDValue &Scale, SDValue &Index, SDValue &Disp, |
229 | SDValue &Segment); |
230 | bool selectLEA64_32Addr(SDValue N, SDValue &Base, |
231 | SDValue &Scale, SDValue &Index, SDValue &Disp, |
232 | SDValue &Segment); |
233 | bool selectTLSADDRAddr(SDValue N, SDValue &Base, |
234 | SDValue &Scale, SDValue &Index, SDValue &Disp, |
235 | SDValue &Segment); |
236 | bool selectRelocImm(SDValue N, SDValue &Op); |
237 | |
238 | bool tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, |
239 | SDValue &Base, SDValue &Scale, |
240 | SDValue &Index, SDValue &Disp, |
241 | SDValue &Segment); |
242 | |
243 | |
244 | bool tryFoldLoad(SDNode *P, SDValue N, |
245 | SDValue &Base, SDValue &Scale, |
246 | SDValue &Index, SDValue &Disp, |
247 | SDValue &Segment) { |
248 | return tryFoldLoad(P, P, N, Base, Scale, Index, Disp, Segment); |
249 | } |
250 | |
251 | bool tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, |
252 | SDValue &Base, SDValue &Scale, |
253 | SDValue &Index, SDValue &Disp, |
254 | SDValue &Segment); |
255 | |
256 | bool isProfitableToFormMaskedOp(SDNode *N) const; |
257 | |
258 | |
259 | bool SelectInlineAsmMemoryOperand(const SDValue &Op, |
260 | unsigned ConstraintID, |
261 | std::vector<SDValue> &OutOps) override; |
262 | |
263 | void emitSpecialCodeForMain(); |
264 | |
265 | inline void getAddressOperands(X86ISelAddressMode &AM, const SDLoc &DL, |
266 | MVT VT, SDValue &Base, SDValue &Scale, |
267 | SDValue &Index, SDValue &Disp, |
268 | SDValue &Segment) { |
269 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
270 | Base = CurDAG->getTargetFrameIndex( |
271 | AM.Base_FrameIndex, TLI->getPointerTy(CurDAG->getDataLayout())); |
272 | else if (AM.Base_Reg.getNode()) |
273 | Base = AM.Base_Reg; |
274 | else |
275 | Base = CurDAG->getRegister(0, VT); |
276 | |
277 | Scale = getI8Imm(AM.Scale, DL); |
278 | |
279 | |
280 | if (AM.NegateIndex) { |
281 | unsigned NegOpc = VT == MVT::i64 ? X86::NEG64r : X86::NEG32r; |
282 | SDValue Neg = SDValue(CurDAG->getMachineNode(NegOpc, DL, VT, MVT::i32, |
283 | AM.IndexReg), 0); |
284 | AM.IndexReg = Neg; |
285 | } |
286 | |
287 | if (AM.IndexReg.getNode()) |
288 | Index = AM.IndexReg; |
289 | else |
290 | Index = CurDAG->getRegister(0, VT); |
291 | |
292 | |
293 | |
294 | if (AM.GV) |
295 | Disp = CurDAG->getTargetGlobalAddress(AM.GV, SDLoc(), |
296 | MVT::i32, AM.Disp, |
297 | AM.SymbolFlags); |
298 | else if (AM.CP) |
299 | Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i32, AM.Alignment, |
300 | AM.Disp, AM.SymbolFlags); |
301 | else if (AM.ES) { |
302 | assert(!AM.Disp && "Non-zero displacement is ignored with ES."); |
303 | Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i32, AM.SymbolFlags); |
304 | } else if (AM.MCSym) { |
305 | assert(!AM.Disp && "Non-zero displacement is ignored with MCSym."); |
306 | assert(AM.SymbolFlags == 0 && "oo"); |
307 | Disp = CurDAG->getMCSymbol(AM.MCSym, MVT::i32); |
308 | } else if (AM.JT != -1) { |
309 | assert(!AM.Disp && "Non-zero displacement is ignored with JT."); |
310 | Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i32, AM.SymbolFlags); |
311 | } else if (AM.BlockAddr) |
312 | Disp = CurDAG->getTargetBlockAddress(AM.BlockAddr, MVT::i32, AM.Disp, |
313 | AM.SymbolFlags); |
314 | else |
315 | Disp = CurDAG->getTargetConstant(AM.Disp, DL, MVT::i32); |
316 | |
317 | if (AM.Segment.getNode()) |
318 | Segment = AM.Segment; |
319 | else |
320 | Segment = CurDAG->getRegister(0, MVT::i16); |
321 | } |
322 | |
323 | |
324 | |
325 | |
326 | |
327 | |
328 | |
329 | bool shouldAvoidImmediateInstFormsForSize(SDNode *N) const { |
330 | uint32_t UseCount = 0; |
331 | |
332 | |
333 | |
334 | |
335 | if (!CurDAG->shouldOptForSize()) |
336 | return false; |
337 | |
338 | |
339 | for (SDNode::use_iterator UI = N->use_begin(), |
340 | UE = N->use_end(); (UI != UE) && (UseCount < 2); ++UI) { |
341 | |
342 | SDNode *User = *UI; |
343 | |
344 | |
345 | |
346 | if (User->isMachineOpcode()) { |
347 | UseCount++; |
348 | continue; |
349 | } |
350 | |
351 | |
352 | if (User->getOpcode() == ISD::STORE && |
353 | User->getOperand(1).getNode() == N) { |
354 | UseCount++; |
355 | continue; |
356 | } |
357 | |
358 | |
359 | |
360 | |
361 | |
362 | |
363 | |
364 | if (User->getNumOperands() != 2) |
365 | continue; |
366 | |
367 | |
368 | |
369 | auto *C = dyn_cast<ConstantSDNode>(N); |
370 | if (C && isInt<8>(C->getSExtValue())) |
371 | continue; |
372 | |
373 | |
374 | |
375 | |
376 | |
377 | if (User->getOpcode() == X86ISD::ADD || |
378 | User->getOpcode() == ISD::ADD || |
379 | User->getOpcode() == X86ISD::SUB || |
380 | User->getOpcode() == ISD::SUB) { |
381 | |
382 | |
383 | SDValue OtherOp = User->getOperand(0); |
384 | if (OtherOp.getNode() == N) |
385 | OtherOp = User->getOperand(1); |
386 | |
387 | |
388 | RegisterSDNode *RegNode; |
389 | if (OtherOp->getOpcode() == ISD::CopyFromReg && |
390 | (RegNode = dyn_cast_or_null<RegisterSDNode>( |
391 | OtherOp->getOperand(1).getNode()))) |
392 | if ((RegNode->getReg() == X86::ESP) || |
393 | (RegNode->getReg() == X86::RSP)) |
394 | continue; |
395 | } |
396 | |
397 | |
398 | UseCount++; |
399 | } |
400 | |
401 | |
402 | return (UseCount > 1); |
403 | } |
404 | |
405 | |
406 | inline SDValue getI8Imm(unsigned Imm, const SDLoc &DL) { |
407 | return CurDAG->getTargetConstant(Imm, DL, MVT::i8); |
408 | } |
409 | |
410 | |
411 | inline SDValue getI32Imm(unsigned Imm, const SDLoc &DL) { |
412 | return CurDAG->getTargetConstant(Imm, DL, MVT::i32); |
413 | } |
414 | |
415 | |
416 | inline SDValue getI64Imm(uint64_t Imm, const SDLoc &DL) { |
417 | return CurDAG->getTargetConstant(Imm, DL, MVT::i64); |
418 | } |
419 | |
420 | SDValue getExtractVEXTRACTImmediate(SDNode *N, unsigned VecWidth, |
421 | const SDLoc &DL) { |
422 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); |
423 | uint64_t Index = N->getConstantOperandVal(1); |
424 | MVT VecVT = N->getOperand(0).getSimpleValueType(); |
425 | return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); |
426 | } |
427 | |
428 | SDValue getInsertVINSERTImmediate(SDNode *N, unsigned VecWidth, |
429 | const SDLoc &DL) { |
430 | assert((VecWidth == 128 || VecWidth == 256) && "Unexpected vector width"); |
431 | uint64_t Index = N->getConstantOperandVal(2); |
432 | MVT VecVT = N->getSimpleValueType(0); |
433 | return getI8Imm((Index * VecVT.getScalarSizeInBits()) / VecWidth, DL); |
434 | } |
435 | |
436 | |
437 | |
438 | bool isUnneededShiftMask(SDNode *N, unsigned Width) const { |
439 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode"); |
440 | const APInt &Val = cast<ConstantSDNode>(N->getOperand(1))->getAPIntValue(); |
441 | |
442 | if (Val.countTrailingOnes() >= Width) |
443 | return true; |
444 | |
445 | APInt Mask = Val | CurDAG->computeKnownBits(N->getOperand(0)).Zero; |
446 | return Mask.countTrailingOnes() >= Width; |
447 | } |
448 | |
449 | |
450 | |
451 | |
452 | SDNode *getGlobalBaseReg(); |
453 | |
454 | |
455 | |
456 | const X86TargetMachine &getTargetMachine() const { |
457 | return static_cast<const X86TargetMachine &>(TM); |
458 | } |
459 | |
460 | |
461 | |
462 | const X86InstrInfo *getInstrInfo() const { |
463 | return Subtarget->getInstrInfo(); |
464 | } |
465 | |
466 | |
467 | |
468 | |
469 | bool ComplexPatternFuncMutatesDAG() const override { |
470 | return true; |
471 | } |
472 | |
473 | bool isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const; |
474 | |
475 | |
476 | bool useNonTemporalLoad(LoadSDNode *N) const { |
477 | if (!N->isNonTemporal()) |
478 | return false; |
479 | |
480 | unsigned StoreSize = N->getMemoryVT().getStoreSize(); |
481 | |
482 | if (N->getAlignment() < StoreSize) |
483 | return false; |
484 | |
485 | switch (StoreSize) { |
486 | default: llvm_unreachable("Unsupported store size"); |
487 | case 4: |
488 | case 8: |
489 | return false; |
490 | case 16: |
491 | return Subtarget->hasSSE41(); |
492 | case 32: |
493 | return Subtarget->hasAVX2(); |
494 | case 64: |
495 | return Subtarget->hasAVX512(); |
496 | } |
497 | } |
498 | |
499 | bool foldLoadStoreIntoMemOperand(SDNode *Node); |
500 | MachineSDNode *matchBEXTRFromAndImm(SDNode *Node); |
501 | bool matchBitExtract(SDNode *Node); |
502 | bool shrinkAndImmediate(SDNode *N); |
503 | bool isMaskZeroExtended(SDNode *N) const; |
504 | bool tryShiftAmountMod(SDNode *N); |
505 | bool tryShrinkShlLogicImm(SDNode *N); |
506 | bool tryVPTERNLOG(SDNode *N); |
507 | bool matchVPTERNLOG(SDNode *Root, SDNode *ParentA, SDNode *ParentBC, |
508 | SDValue A, SDValue B, SDValue C, uint8_t Imm); |
509 | bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); |
510 | bool tryMatchBitSelect(SDNode *N); |
511 | |
512 | MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, |
513 | const SDLoc &dl, MVT VT, SDNode *Node); |
514 | MachineSDNode *emitPCMPESTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, |
515 | const SDLoc &dl, MVT VT, SDNode *Node, |
516 | SDValue &InFlag); |
517 | |
518 | bool tryOptimizeRem8Extend(SDNode *N); |
519 | |
520 | bool onlyUsesZeroFlag(SDValue Flags) const; |
521 | bool hasNoSignFlagUses(SDValue Flags) const; |
522 | bool hasNoCarryFlagUses(SDValue Flags) const; |
523 | }; |
524 | } |
525 | |
526 | |
527 | |
528 | |
529 | static bool isLegalMaskCompare(SDNode *N, const X86Subtarget *Subtarget) { |
530 | unsigned Opcode = N->getOpcode(); |
531 | if (Opcode == X86ISD::CMPM || Opcode == X86ISD::CMPMM || |
532 | Opcode == X86ISD::STRICT_CMPM || Opcode == ISD::SETCC || |
533 | Opcode == X86ISD::CMPMM_SAE || Opcode == X86ISD::VFPCLASS) { |
534 | |
535 | |
536 | |
537 | EVT OpVT = N->getOperand(0).getValueType(); |
538 | |
539 | |
540 | if (Opcode == X86ISD::STRICT_CMPM) |
541 | OpVT = N->getOperand(1).getValueType(); |
542 | if (OpVT.is256BitVector() || OpVT.is128BitVector()) |
543 | return Subtarget->hasVLX(); |
544 | |
545 | return true; |
546 | } |
547 | |
548 | if (Opcode == X86ISD::VFPCLASSS || Opcode == X86ISD::FSETCCM || |
549 | Opcode == X86ISD::FSETCCM_SAE) |
550 | return true; |
551 | |
552 | return false; |
553 | } |
554 | |
555 | |
556 | |
557 | bool X86DAGToDAGISel::isMaskZeroExtended(SDNode *N) const { |
558 | |
559 | |
560 | |
561 | if (N->getOpcode() == ISD::AND) |
562 | return isLegalMaskCompare(N->getOperand(0).getNode(), Subtarget) || |
563 | isLegalMaskCompare(N->getOperand(1).getNode(), Subtarget); |
564 | |
565 | return isLegalMaskCompare(N, Subtarget); |
566 | } |
567 | |
568 | bool |
569 | X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const { |
570 | if (OptLevel == CodeGenOpt::None) return false; |
571 | |
572 | if (!N.hasOneUse()) |
573 | return false; |
574 | |
575 | if (N.getOpcode() != ISD::LOAD) |
576 | return true; |
577 | |
578 | |
579 | if (useNonTemporalLoad(cast<LoadSDNode>(N))) |
580 | return false; |
581 | |
582 | |
583 | if (U == Root) { |
584 | switch (U->getOpcode()) { |
585 | default: break; |
586 | case X86ISD::ADD: |
587 | case X86ISD::ADC: |
588 | case X86ISD::SUB: |
589 | case X86ISD::SBB: |
590 | case X86ISD::AND: |
591 | case X86ISD::XOR: |
592 | case X86ISD::OR: |
593 | case ISD::ADD: |
594 | case ISD::ADDCARRY: |
595 | case ISD::AND: |
596 | case ISD::OR: |
597 | case ISD::XOR: { |
598 | SDValue Op1 = U->getOperand(1); |
599 | |
600 | |
601 | |
602 | |
603 | |
604 | |
605 | |
606 | |
607 | |
608 | |
609 | |
610 | if (ConstantSDNode *Imm = dyn_cast<ConstantSDNode>(Op1)) { |
611 | if (Imm->getAPIntValue().isSignedIntN(8)) |
612 | return false; |
613 | |
614 | |
615 | |
616 | |
617 | |
618 | |
619 | if (U->getOpcode() == ISD::AND && |
620 | Imm->getAPIntValue().getBitWidth() == 64 && |
621 | Imm->getAPIntValue().isIntN(32)) |
622 | return false; |
623 | |
624 | |
625 | |
626 | |
627 | if (U->getOpcode() == ISD::AND && |
628 | (Imm->getAPIntValue() == UINT8_MAX || |
629 | Imm->getAPIntValue() == UINT16_MAX || |
630 | Imm->getAPIntValue() == UINT32_MAX)) |
631 | return false; |
632 | |
633 | |
634 | |
635 | if ((U->getOpcode() == ISD::ADD || U->getOpcode() == ISD::SUB) && |
636 | (-Imm->getAPIntValue()).isSignedIntN(8)) |
637 | return false; |
638 | |
639 | if ((U->getOpcode() == X86ISD::ADD || U->getOpcode() == X86ISD::SUB) && |
640 | (-Imm->getAPIntValue()).isSignedIntN(8) && |
641 | hasNoCarryFlagUses(SDValue(U, 1))) |
642 | return false; |
643 | } |
644 | |
645 | |
646 | |
647 | |
648 | |
649 | |
650 | |
651 | |
652 | |
653 | |
654 | |
655 | if (Op1.getOpcode() == X86ISD::Wrapper) { |
656 | SDValue Val = Op1.getOperand(0); |
657 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) |
658 | return false; |
659 | } |
660 | |
661 | |
662 | |
663 | |
664 | |
665 | if (U->getOpcode() == ISD::OR || U->getOpcode() == ISD::XOR) { |
666 | if (U->getOperand(0).getOpcode() == ISD::SHL && |
667 | isOneConstant(U->getOperand(0).getOperand(0))) |
668 | return false; |
669 | |
670 | if (U->getOperand(1).getOpcode() == ISD::SHL && |
671 | isOneConstant(U->getOperand(1).getOperand(0))) |
672 | return false; |
673 | } |
674 | if (U->getOpcode() == ISD::AND) { |
675 | SDValue U0 = U->getOperand(0); |
676 | SDValue U1 = U->getOperand(1); |
677 | if (U0.getOpcode() == ISD::ROTL) { |
678 | auto *C = dyn_cast<ConstantSDNode>(U0.getOperand(0)); |
679 | if (C && C->getSExtValue() == -2) |
680 | return false; |
681 | } |
682 | |
683 | if (U1.getOpcode() == ISD::ROTL) { |
684 | auto *C = dyn_cast<ConstantSDNode>(U1.getOperand(0)); |
685 | if (C && C->getSExtValue() == -2) |
686 | return false; |
687 | } |
688 | } |
689 | |
690 | break; |
691 | } |
692 | case ISD::SHL: |
693 | case ISD::SRA: |
694 | case ISD::SRL: |
695 | |
696 | |
697 | |
698 | |
699 | if (isa<ConstantSDNode>(U->getOperand(1))) |
700 | return false; |
701 | |
702 | break; |
703 | } |
704 | } |
705 | |
706 | |
707 | |
708 | if (Root->getOpcode() == ISD::INSERT_SUBVECTOR && |
709 | isNullConstant(Root->getOperand(2)) && |
710 | (Root->getOperand(0).isUndef() || |
711 | ISD::isBuildVectorAllZeros(Root->getOperand(0).getNode()))) |
712 | return false; |
713 | |
714 | return true; |
715 | } |
716 | |
717 | |
718 | |
719 | |
720 | bool X86DAGToDAGISel::isProfitableToFormMaskedOp(SDNode *N) const { |
721 | assert( |
722 | (N->getOpcode() == ISD::VSELECT || N->getOpcode() == X86ISD::SELECTS) && |
723 | "Unexpected opcode!"); |
724 | |
725 | |
726 | |
727 | |
728 | return N->getOperand(1).hasOneUse(); |
729 | } |
730 | |
731 | |
732 | |
733 | static void moveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load, |
734 | SDValue Call, SDValue OrigChain) { |
735 | SmallVector<SDValue, 8> Ops; |
736 | SDValue Chain = OrigChain.getOperand(0); |
737 | if (Chain.getNode() == Load.getNode()) |
738 | Ops.push_back(Load.getOperand(0)); |
739 | else { |
740 | assert(Chain.getOpcode() == ISD::TokenFactor && |
741 | "Unexpected chain operand"); |
742 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) |
743 | if (Chain.getOperand(i).getNode() == Load.getNode()) |
744 | Ops.push_back(Load.getOperand(0)); |
745 | else |
746 | Ops.push_back(Chain.getOperand(i)); |
747 | SDValue NewChain = |
748 | CurDAG->getNode(ISD::TokenFactor, SDLoc(Load), MVT::Other, Ops); |
749 | Ops.clear(); |
750 | Ops.push_back(NewChain); |
751 | } |
752 | Ops.append(OrigChain->op_begin() + 1, OrigChain->op_end()); |
753 | CurDAG->UpdateNodeOperands(OrigChain.getNode(), Ops); |
754 | CurDAG->UpdateNodeOperands(Load.getNode(), Call.getOperand(0), |
755 | Load.getOperand(1), Load.getOperand(2)); |
756 | |
757 | Ops.clear(); |
758 | Ops.push_back(SDValue(Load.getNode(), 1)); |
759 | Ops.append(Call->op_begin() + 1, Call->op_end()); |
760 | CurDAG->UpdateNodeOperands(Call.getNode(), Ops); |
761 | } |
762 | |
763 | |
764 | |
765 | |
766 | |
767 | |
768 | static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) { |
769 | |
770 | |
771 | |
772 | |
773 | if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse()) |
774 | return false; |
775 | LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode()); |
776 | if (!LD || |
777 | !LD->isSimple() || |
778 | LD->getAddressingMode() != ISD::UNINDEXED || |
779 | LD->getExtensionType() != ISD::NON_EXTLOAD) |
780 | return false; |
781 | |
782 | |
783 | while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) { |
784 | if (!Chain.hasOneUse()) |
785 | return false; |
786 | Chain = Chain.getOperand(0); |
787 | } |
788 | |
789 | if (!Chain.getNumOperands()) |
790 | return false; |
791 | |
792 | |
793 | if (isa<MemSDNode>(Chain.getNode()) && |
794 | cast<MemSDNode>(Chain.getNode())->writeMem()) |
795 | return false; |
796 | if (Chain.getOperand(0).getNode() == Callee.getNode()) |
797 | return true; |
798 | if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor && |
799 | Callee.getValue(1).isOperandOf(Chain.getOperand(0).getNode()) && |
800 | Callee.getValue(1).hasOneUse()) |
801 | return true; |
802 | return false; |
803 | } |
804 | |
805 | static bool isEndbrImm64(uint64_t Imm) { |
806 | |
807 | |
808 | if ((Imm & 0x00FFFFFF) != 0x0F1EFA) |
809 | return false; |
810 | |
811 | uint8_t OptionalPrefixBytes [] = {0x26, 0x2e, 0x36, 0x3e, 0x64, |
812 | 0x65, 0x66, 0x67, 0xf0, 0xf2}; |
813 | int i = 24; |
814 | while (i < 64) { |
815 | uint8_t Byte = (Imm >> i) & 0xFF; |
816 | if (Byte == 0xF3) |
817 | return true; |
818 | if (!llvm::is_contained(OptionalPrefixBytes, Byte)) |
819 | return false; |
820 | i += 8; |
821 | } |
822 | |
823 | return false; |
824 | } |
825 | |
826 | void X86DAGToDAGISel::PreprocessISelDAG() { |
827 | bool MadeChange = false; |
828 | for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), |
829 | E = CurDAG->allnodes_end(); I != E; ) { |
830 | SDNode *N = &*I++; |
831 | |
832 | |
833 | |
834 | |
835 | |
836 | |
837 | |
838 | |
839 | |
840 | |
841 | |
842 | |
843 | |
844 | |
845 | |
846 | |
847 | if (N->getOpcode() == ISD::Constant) { |
848 | MVT VT = N->getSimpleValueType(0); |
849 | int64_t Imm = cast<ConstantSDNode>(N)->getSExtValue(); |
850 | int32_t EndbrImm = Subtarget->is64Bit() ? 0xF30F1EFA : 0xF30F1EFB; |
851 | if (Imm == EndbrImm || isEndbrImm64(Imm)) { |
852 | |
853 | Metadata *CFProtectionBranch = |
854 | MF->getMMI().getModule()->getModuleFlag("cf-protection-branch"); |
855 | if (CFProtectionBranch || IndirectBranchTracking) { |
856 | SDLoc dl(N); |
857 | SDValue Complement = CurDAG->getConstant(~Imm, dl, VT, false, true); |
858 | Complement = CurDAG->getNOT(dl, Complement, VT); |
859 | --I; |
860 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Complement); |
861 | ++I; |
862 | MadeChange = true; |
863 | continue; |
864 | } |
865 | } |
866 | } |
867 | |
868 | |
869 | |
870 | if (N->getOpcode() == X86ISD::AND && !N->hasAnyUseOfValue(1)) { |
871 | SDValue Res = CurDAG->getNode(ISD::AND, SDLoc(N), N->getValueType(0), |
872 | N->getOperand(0), N->getOperand(1)); |
873 | --I; |
874 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
875 | ++I; |
876 | MadeChange = true; |
877 | continue; |
878 | } |
879 | |
880 | |
881 | |
882 | |
883 | |
884 | |
885 | |
886 | |
887 | if ((N->getOpcode() == ISD::ADD || N->getOpcode() == ISD::SUB) && |
888 | N->getSimpleValueType(0).isVector()) { |
889 | |
890 | APInt SplatVal; |
891 | if (X86::isConstantSplat(N->getOperand(1), SplatVal) && |
892 | SplatVal.isOneValue()) { |
893 | SDLoc DL(N); |
894 | |
895 | MVT VT = N->getSimpleValueType(0); |
896 | unsigned NumElts = VT.getSizeInBits() / 32; |
897 | SDValue AllOnes = |
898 | CurDAG->getAllOnesConstant(DL, MVT::getVectorVT(MVT::i32, NumElts)); |
899 | AllOnes = CurDAG->getBitcast(VT, AllOnes); |
900 | |
901 | unsigned NewOpcode = N->getOpcode() == ISD::ADD ? ISD::SUB : ISD::ADD; |
902 | SDValue Res = |
903 | CurDAG->getNode(NewOpcode, DL, VT, N->getOperand(0), AllOnes); |
904 | --I; |
905 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
906 | ++I; |
907 | MadeChange = true; |
908 | continue; |
909 | } |
910 | } |
911 | |
912 | switch (N->getOpcode()) { |
913 | case X86ISD::VBROADCAST: { |
914 | MVT VT = N->getSimpleValueType(0); |
915 | |
916 | if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { |
917 | MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; |
918 | SDLoc dl(N); |
919 | SDValue NarrowBCast = |
920 | CurDAG->getNode(X86ISD::VBROADCAST, dl, NarrowVT, N->getOperand(0)); |
921 | SDValue Res = |
922 | CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), |
923 | NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); |
924 | unsigned Index = VT == MVT::v32i16 ? 16 : 32; |
925 | Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, |
926 | CurDAG->getIntPtrConstant(Index, dl)); |
927 | |
928 | --I; |
929 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
930 | ++I; |
931 | MadeChange = true; |
932 | continue; |
933 | } |
934 | |
935 | break; |
936 | } |
937 | case X86ISD::VBROADCAST_LOAD: { |
938 | MVT VT = N->getSimpleValueType(0); |
939 | |
940 | if (!Subtarget->hasBWI() && (VT == MVT::v32i16 || VT == MVT::v64i8)) { |
941 | MVT NarrowVT = VT == MVT::v32i16 ? MVT::v16i16 : MVT::v32i8; |
942 | auto *MemNode = cast<MemSDNode>(N); |
943 | SDLoc dl(N); |
944 | SDVTList VTs = CurDAG->getVTList(NarrowVT, MVT::Other); |
945 | SDValue Ops[] = {MemNode->getChain(), MemNode->getBasePtr()}; |
946 | SDValue NarrowBCast = CurDAG->getMemIntrinsicNode( |
947 | X86ISD::VBROADCAST_LOAD, dl, VTs, Ops, MemNode->getMemoryVT(), |
948 | MemNode->getMemOperand()); |
949 | SDValue Res = |
950 | CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, CurDAG->getUNDEF(VT), |
951 | NarrowBCast, CurDAG->getIntPtrConstant(0, dl)); |
952 | unsigned Index = VT == MVT::v32i16 ? 16 : 32; |
953 | Res = CurDAG->getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, NarrowBCast, |
954 | CurDAG->getIntPtrConstant(Index, dl)); |
955 | |
956 | --I; |
957 | SDValue To[] = {Res, NarrowBCast.getValue(1)}; |
958 | CurDAG->ReplaceAllUsesWith(N, To); |
959 | ++I; |
960 | MadeChange = true; |
961 | continue; |
962 | } |
963 | |
964 | break; |
965 | } |
966 | case ISD::VSELECT: { |
967 | |
968 | if (N->getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
969 | break; |
970 | |
971 | assert(Subtarget->hasSSE41() && "Expected SSE4.1 support!"); |
972 | SDValue Blendv = |
973 | CurDAG->getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), |
974 | N->getOperand(0), N->getOperand(1), N->getOperand(2)); |
975 | --I; |
976 | CurDAG->ReplaceAllUsesWith(N, Blendv.getNode()); |
977 | ++I; |
978 | MadeChange = true; |
979 | continue; |
980 | } |
981 | case ISD::FP_ROUND: |
982 | case ISD::STRICT_FP_ROUND: |
983 | case ISD::FP_TO_SINT: |
984 | case ISD::FP_TO_UINT: |
985 | case ISD::STRICT_FP_TO_SINT: |
986 | case ISD::STRICT_FP_TO_UINT: { |
987 | |
988 | |
989 | if (!N->getSimpleValueType(0).isVector()) |
990 | break; |
991 | |
992 | unsigned NewOpc; |
993 | switch (N->getOpcode()) { |
994 | default: llvm_unreachable("Unexpected opcode!"); |
995 | case ISD::FP_ROUND: NewOpc = X86ISD::VFPROUND; break; |
996 | case ISD::STRICT_FP_ROUND: NewOpc = X86ISD::STRICT_VFPROUND; break; |
997 | case ISD::STRICT_FP_TO_SINT: NewOpc = X86ISD::STRICT_CVTTP2SI; break; |
998 | case ISD::FP_TO_SINT: NewOpc = X86ISD::CVTTP2SI; break; |
999 | case ISD::STRICT_FP_TO_UINT: NewOpc = X86ISD::STRICT_CVTTP2UI; break; |
1000 | case ISD::FP_TO_UINT: NewOpc = X86ISD::CVTTP2UI; break; |
1001 | } |
1002 | SDValue Res; |
1003 | if (N->isStrictFPOpcode()) |
1004 | Res = |
1005 | CurDAG->getNode(NewOpc, SDLoc(N), {N->getValueType(0), MVT::Other}, |
1006 | {N->getOperand(0), N->getOperand(1)}); |
1007 | else |
1008 | Res = |
1009 | CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
1010 | N->getOperand(0)); |
1011 | --I; |
1012 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
1013 | ++I; |
1014 | MadeChange = true; |
1015 | continue; |
1016 | } |
1017 | case ISD::SHL: |
1018 | case ISD::SRA: |
1019 | case ISD::SRL: { |
1020 | |
1021 | |
1022 | if (!N->getValueType(0).isVector()) |
1023 | break; |
1024 | |
1025 | unsigned NewOpc; |
1026 | switch (N->getOpcode()) { |
1027 | default: llvm_unreachable("Unexpected opcode!"); |
1028 | case ISD::SHL: NewOpc = X86ISD::VSHLV; break; |
1029 | case ISD::SRA: NewOpc = X86ISD::VSRAV; break; |
1030 | case ISD::SRL: NewOpc = X86ISD::VSRLV; break; |
1031 | } |
1032 | SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
1033 | N->getOperand(0), N->getOperand(1)); |
1034 | --I; |
1035 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
1036 | ++I; |
1037 | MadeChange = true; |
1038 | continue; |
1039 | } |
1040 | case ISD::ANY_EXTEND: |
1041 | case ISD::ANY_EXTEND_VECTOR_INREG: { |
1042 | |
1043 | |
1044 | if (!N->getValueType(0).isVector()) |
1045 | break; |
1046 | |
1047 | unsigned NewOpc; |
1048 | if (N->getOperand(0).getScalarValueSizeInBits() == 1) { |
1049 | assert(N->getOpcode() == ISD::ANY_EXTEND && |
1050 | "Unexpected opcode for mask vector!"); |
1051 | NewOpc = ISD::SIGN_EXTEND; |
1052 | } else { |
1053 | NewOpc = N->getOpcode() == ISD::ANY_EXTEND |
1054 | ? ISD::ZERO_EXTEND |
1055 | : ISD::ZERO_EXTEND_VECTOR_INREG; |
1056 | } |
1057 | |
1058 | SDValue Res = CurDAG->getNode(NewOpc, SDLoc(N), N->getValueType(0), |
1059 | N->getOperand(0)); |
1060 | --I; |
1061 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
1062 | ++I; |
1063 | MadeChange = true; |
1064 | continue; |
1065 | } |
1066 | case ISD::FCEIL: |
1067 | case ISD::STRICT_FCEIL: |
1068 | case ISD::FFLOOR: |
1069 | case ISD::STRICT_FFLOOR: |
1070 | case ISD::FTRUNC: |
1071 | case ISD::STRICT_FTRUNC: |
1072 | case ISD::FROUNDEVEN: |
1073 | case ISD::STRICT_FROUNDEVEN: |
1074 | case ISD::FNEARBYINT: |
1075 | case ISD::STRICT_FNEARBYINT: |
1076 | case ISD::FRINT: |
1077 | case ISD::STRICT_FRINT: { |
1078 | |
1079 | |
1080 | unsigned Imm; |
1081 | switch (N->getOpcode()) { |
1082 | default: llvm_unreachable("Unexpected opcode!"); |
1083 | case ISD::STRICT_FCEIL: |
1084 | case ISD::FCEIL: Imm = 0xA; break; |
1085 | case ISD::STRICT_FFLOOR: |
1086 | case ISD::FFLOOR: Imm = 0x9; break; |
1087 | case ISD::STRICT_FTRUNC: |
1088 | case ISD::FTRUNC: Imm = 0xB; break; |
1089 | case ISD::STRICT_FROUNDEVEN: |
1090 | case ISD::FROUNDEVEN: Imm = 0x8; break; |
1091 | case ISD::STRICT_FNEARBYINT: |
1092 | case ISD::FNEARBYINT: Imm = 0xC; break; |
1093 | case ISD::STRICT_FRINT: |
1094 | case ISD::FRINT: Imm = 0x4; break; |
1095 | } |
1096 | SDLoc dl(N); |
1097 | bool IsStrict = N->isStrictFPOpcode(); |
1098 | SDValue Res; |
1099 | if (IsStrict) |
1100 | Res = CurDAG->getNode(X86ISD::STRICT_VRNDSCALE, dl, |
1101 | {N->getValueType(0), MVT::Other}, |
1102 | {N->getOperand(0), N->getOperand(1), |
1103 | CurDAG->getTargetConstant(Imm, dl, MVT::i32)}); |
1104 | else |
1105 | Res = CurDAG->getNode(X86ISD::VRNDSCALE, dl, N->getValueType(0), |
1106 | N->getOperand(0), |
1107 | CurDAG->getTargetConstant(Imm, dl, MVT::i32)); |
1108 | --I; |
1109 | CurDAG->ReplaceAllUsesWith(N, Res.getNode()); |
1110 | ++I; |
1111 | MadeChange = true; |
1112 | continue; |
1113 | } |
1114 | case X86ISD::FANDN: |
1115 | case X86ISD::FAND: |
1116 | case X86ISD::FOR: |
1117 | case X86ISD::FXOR: { |
1118 | |
1119 | |
1120 | MVT VT = N->getSimpleValueType(0); |
1121 | if (VT.isVector() || VT == MVT::f128) |
1122 | break; |
1123 | |
1124 | MVT VecVT = VT == MVT::f64 ? MVT::v2f64 : MVT::v4f32; |
1125 | SDLoc dl(N); |
1126 | SDValue Op0 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, |
1127 | N->getOperand(0)); |
1128 | SDValue Op1 = CurDAG->getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, |
1129 | N->getOperand(1)); |
1130 | |
1131 | SDValue Res; |
1132 | if (Subtarget->hasSSE2()) { |
1133 | EVT IntVT = EVT(VecVT).changeVectorElementTypeToInteger(); |
1134 | Op0 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op0); |
1135 | Op1 = CurDAG->getNode(ISD::BITCAST, dl, IntVT, Op1); |
1136 | unsigned Opc; |
1137 | switch (N->getOpcode()) { |
1138 | default: llvm_unreachable("Unexpected opcode!"); |
1139 | case X86ISD::FANDN: Opc = X86ISD::ANDNP; break; |
1140 | case X86ISD::FAND: Opc = ISD::AND; break; |
1141 | case X86ISD::FOR: Opc = ISD::OR; break; |
1142 | case X86ISD::FXOR: Opc = ISD::XOR; break; |
1143 | } |
1144 | Res = CurDAG->getNode(Opc, dl, IntVT, Op0, Op1); |
1145 | Res = CurDAG->getNode(ISD::BITCAST, dl, VecVT, Res); |
1146 | } else { |
1147 | Res = CurDAG->getNode(N->getOpcode(), dl, VecVT, Op0, Op1); |
1148 | } |
1149 | Res = CurDAG->getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, |
1150 | CurDAG->getIntPtrConstant(0, dl)); |
1151 | --I; |
1152 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Res); |
1153 | ++I; |
1154 | MadeChange = true; |
1155 | continue; |
1156 | } |
1157 | } |
1158 | |
1159 | if (OptLevel != CodeGenOpt::None && |
1160 | |
1161 | |
1162 | !Subtarget->useIndirectThunkCalls() && |
1163 | ((N->getOpcode() == X86ISD::CALL && !Subtarget->slowTwoMemOps()) || |
1164 | (N->getOpcode() == X86ISD::TC_RETURN && |
1165 | (Subtarget->is64Bit() || |
1166 | !getTargetMachine().isPositionIndependent())))) { |
1167 | |
1168 | |
1169 | |
1170 | |
1171 | |
1172 | |
1173 | |
1174 | |
1175 | |
1176 | |
1177 | |
1178 | |
1179 | |
1180 | |
1181 | |
1182 | |
1183 | |
1184 | |
1185 | |
1186 | bool HasCallSeq = N->getOpcode() == X86ISD::CALL; |
1187 | SDValue Chain = N->getOperand(0); |
1188 | SDValue Load = N->getOperand(1); |
1189 | if (!isCalleeLoad(Load, Chain, HasCallSeq)) |
1190 | continue; |
1191 | moveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain); |
1192 | ++NumLoadMoved; |
1193 | MadeChange = true; |
1194 | continue; |
1195 | } |
1196 | |
1197 | |
1198 | |
1199 | |
1200 | |
1201 | |
1202 | |
1203 | |
1204 | |
1205 | switch (N->getOpcode()) { |
1206 | default: continue; |
1207 | case ISD::FP_ROUND: |
1208 | case ISD::FP_EXTEND: |
1209 | { |
1210 | MVT SrcVT = N->getOperand(0).getSimpleValueType(); |
1211 | MVT DstVT = N->getSimpleValueType(0); |
1212 | |
1213 | |
1214 | if (SrcVT.isVector() || DstVT.isVector()) |
1215 | continue; |
1216 | |
1217 | |
1218 | |
1219 | const X86TargetLowering *X86Lowering = |
1220 | static_cast<const X86TargetLowering *>(TLI); |
1221 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); |
1222 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); |
1223 | if (SrcIsSSE && DstIsSSE) |
1224 | continue; |
1225 | |
1226 | if (!SrcIsSSE && !DstIsSSE) { |
1227 | |
1228 | if (N->getOpcode() == ISD::FP_EXTEND) |
1229 | continue; |
1230 | |
1231 | if (N->getConstantOperandVal(1)) |
1232 | continue; |
1233 | } |
1234 | |
1235 | |
1236 | |
1237 | |
1238 | MVT MemVT = (N->getOpcode() == ISD::FP_ROUND) ? DstVT : SrcVT; |
1239 | SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); |
1240 | int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex(); |
1241 | MachinePointerInfo MPI = |
1242 | MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); |
1243 | SDLoc dl(N); |
1244 | |
1245 | |
1246 | |
1247 | SDValue Store = CurDAG->getTruncStore( |
1248 | CurDAG->getEntryNode(), dl, N->getOperand(0), MemTmp, MPI, MemVT); |
1249 | SDValue Result = CurDAG->getExtLoad(ISD::EXTLOAD, dl, DstVT, Store, |
1250 | MemTmp, MPI, MemVT); |
1251 | |
1252 | |
1253 | |
1254 | |
1255 | |
1256 | --I; |
1257 | CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); |
1258 | break; |
1259 | } |
1260 | |
1261 | |
1262 | |
1263 | case ISD::STRICT_FP_ROUND: |
1264 | case ISD::STRICT_FP_EXTEND: |
1265 | { |
1266 | MVT SrcVT = N->getOperand(1).getSimpleValueType(); |
1267 | MVT DstVT = N->getSimpleValueType(0); |
1268 | |
1269 | |
1270 | if (SrcVT.isVector() || DstVT.isVector()) |
1271 | continue; |
1272 | |
1273 | |
1274 | |
1275 | const X86TargetLowering *X86Lowering = |
1276 | static_cast<const X86TargetLowering *>(TLI); |
1277 | bool SrcIsSSE = X86Lowering->isScalarFPTypeInSSEReg(SrcVT); |
1278 | bool DstIsSSE = X86Lowering->isScalarFPTypeInSSEReg(DstVT); |
1279 | if (SrcIsSSE && DstIsSSE) |
1280 | continue; |
1281 | |
1282 | if (!SrcIsSSE && !DstIsSSE) { |
1283 | |
1284 | if (N->getOpcode() == ISD::STRICT_FP_EXTEND) |
1285 | continue; |
1286 | |
1287 | if (N->getConstantOperandVal(2)) |
1288 | continue; |
1289 | } |
1290 | |
1291 | |
1292 | |
1293 | |
1294 | MVT MemVT = (N->getOpcode() == ISD::STRICT_FP_ROUND) ? DstVT : SrcVT; |
1295 | SDValue MemTmp = CurDAG->CreateStackTemporary(MemVT); |
1296 | int SPFI = cast<FrameIndexSDNode>(MemTmp)->getIndex(); |
1297 | MachinePointerInfo MPI = |
1298 | MachinePointerInfo::getFixedStack(CurDAG->getMachineFunction(), SPFI); |
1299 | SDLoc dl(N); |
1300 | |
1301 | |
1302 | |
1303 | |
1304 | SDValue Store, Result; |
1305 | if (!SrcIsSSE) { |
1306 | SDVTList VTs = CurDAG->getVTList(MVT::Other); |
1307 | SDValue Ops[] = {N->getOperand(0), N->getOperand(1), MemTmp}; |
1308 | Store = CurDAG->getMemIntrinsicNode(X86ISD::FST, dl, VTs, Ops, MemVT, |
1309 | MPI, None, |
1310 | MachineMemOperand::MOStore); |
1311 | if (N->getFlags().hasNoFPExcept()) { |
1312 | SDNodeFlags Flags = Store->getFlags(); |
1313 | Flags.setNoFPExcept(true); |
1314 | Store->setFlags(Flags); |
1315 | } |
1316 | } else { |
1317 | assert(SrcVT == MemVT && "Unexpected VT!"); |
1318 | Store = CurDAG->getStore(N->getOperand(0), dl, N->getOperand(1), MemTmp, |
1319 | MPI); |
1320 | } |
1321 | |
1322 | if (!DstIsSSE) { |
1323 | SDVTList VTs = CurDAG->getVTList(DstVT, MVT::Other); |
1324 | SDValue Ops[] = {Store, MemTmp}; |
1325 | Result = CurDAG->getMemIntrinsicNode( |
1326 | X86ISD::FLD, dl, VTs, Ops, MemVT, MPI, |
1327 | None, MachineMemOperand::MOLoad); |
1328 | if (N->getFlags().hasNoFPExcept()) { |
1329 | SDNodeFlags Flags = Result->getFlags(); |
1330 | Flags.setNoFPExcept(true); |
1331 | Result->setFlags(Flags); |
1332 | } |
1333 | } else { |
1334 | assert(DstVT == MemVT && "Unexpected VT!"); |
1335 | Result = CurDAG->getLoad(DstVT, dl, Store, MemTmp, MPI); |
1336 | } |
1337 | |
1338 | |
1339 | |
1340 | |
1341 | |
1342 | --I; |
1343 | CurDAG->ReplaceAllUsesWith(N, Result.getNode()); |
1344 | break; |
1345 | } |
1346 | } |
1347 | |
1348 | |
1349 | |
1350 | |
1351 | ++I; |
1352 | MadeChange = true; |
1353 | } |
1354 | |
1355 | |
1356 | if (MadeChange) |
1357 | CurDAG->RemoveDeadNodes(); |
1358 | } |
1359 | |
1360 | |
1361 | bool X86DAGToDAGISel::tryOptimizeRem8Extend(SDNode *N) { |
1362 | unsigned Opc = N->getMachineOpcode(); |
1363 | if (Opc != X86::MOVZX32rr8 && Opc != X86::MOVSX32rr8 && |
1364 | Opc != X86::MOVSX64rr8) |
1365 | return false; |
1366 | |
1367 | SDValue N0 = N->getOperand(0); |
1368 | |
1369 | |
1370 | if (!N0.isMachineOpcode() || |
1371 | N0.getMachineOpcode() != TargetOpcode::EXTRACT_SUBREG || |
1372 | N0.getConstantOperandVal(1) != X86::sub_8bit) |
1373 | return false; |
1374 | |
1375 | |
1376 | unsigned ExpectedOpc = Opc == X86::MOVZX32rr8 ? X86::MOVZX32rr8_NOREX |
1377 | : X86::MOVSX32rr8_NOREX; |
1378 | SDValue N00 = N0.getOperand(0); |
1379 | if (!N00.isMachineOpcode() || N00.getMachineOpcode() != ExpectedOpc) |
1380 | return false; |
1381 | |
1382 | if (Opc == X86::MOVSX64rr8) { |
1383 | |
1384 | |
1385 | MachineSDNode *Extend = CurDAG->getMachineNode(X86::MOVSX64rr32, SDLoc(N), |
1386 | MVT::i64, N00); |
1387 | ReplaceUses(N, Extend); |
1388 | } else { |
1389 | |
1390 | ReplaceUses(N, N00.getNode()); |
1391 | } |
1392 | |
1393 | return true; |
1394 | } |
1395 | |
1396 | void X86DAGToDAGISel::PostprocessISelDAG() { |
1397 | |
1398 | if (TM.getOptLevel() == CodeGenOpt::None) |
1399 | return; |
1400 | |
1401 | SelectionDAG::allnodes_iterator Position = CurDAG->allnodes_end(); |
1402 | |
1403 | bool MadeChange = false; |
1404 | while (Position != CurDAG->allnodes_begin()) { |
1405 | SDNode *N = &*--Position; |
1406 | |
1407 | if (N->use_empty() || !N->isMachineOpcode()) |
1408 | continue; |
1409 | |
1410 | if (tryOptimizeRem8Extend(N)) { |
1411 | MadeChange = true; |
1412 | continue; |
1413 | } |
1414 | |
1415 | |
1416 | |
1417 | unsigned Opc = N->getMachineOpcode(); |
1418 | if ((Opc == X86::TEST8rr || Opc == X86::TEST16rr || |
1419 | Opc == X86::TEST32rr || Opc == X86::TEST64rr) && |
1420 | N->getOperand(0) == N->getOperand(1) && |
1421 | N->isOnlyUserOf(N->getOperand(0).getNode()) && |
1422 | N->getOperand(0).isMachineOpcode()) { |
1423 | SDValue And = N->getOperand(0); |
1424 | unsigned N0Opc = And.getMachineOpcode(); |
1425 | if (N0Opc == X86::AND8rr || N0Opc == X86::AND16rr || |
1426 | N0Opc == X86::AND32rr || N0Opc == X86::AND64rr) { |
1427 | MachineSDNode *Test = CurDAG->getMachineNode(Opc, SDLoc(N), |
1428 | MVT::i32, |
1429 | And.getOperand(0), |
1430 | And.getOperand(1)); |
1431 | ReplaceUses(N, Test); |
1432 | MadeChange = true; |
1433 | continue; |
1434 | } |
1435 | if (N0Opc == X86::AND8rm || N0Opc == X86::AND16rm || |
1436 | N0Opc == X86::AND32rm || N0Opc == X86::AND64rm) { |
1437 | unsigned NewOpc; |
1438 | switch (N0Opc) { |
1439 | case X86::AND8rm: NewOpc = X86::TEST8mr; break; |
1440 | case X86::AND16rm: NewOpc = X86::TEST16mr; break; |
1441 | case X86::AND32rm: NewOpc = X86::TEST32mr; break; |
1442 | case X86::AND64rm: NewOpc = X86::TEST64mr; break; |
1443 | } |
1444 | |
1445 | |
1446 | SDValue Ops[] = { And.getOperand(1), |
1447 | And.getOperand(2), |
1448 | And.getOperand(3), |
1449 | And.getOperand(4), |
1450 | And.getOperand(5), |
1451 | And.getOperand(0), |
1452 | And.getOperand(6) }; |
1453 | MachineSDNode *Test = CurDAG->getMachineNode(NewOpc, SDLoc(N), |
1454 | MVT::i32, MVT::Other, Ops); |
1455 | CurDAG->setNodeMemRefs( |
1456 | Test, cast<MachineSDNode>(And.getNode())->memoperands()); |
1457 | ReplaceUses(N, Test); |
1458 | MadeChange = true; |
1459 | continue; |
1460 | } |
1461 | } |
1462 | |
1463 | |
1464 | |
1465 | |
1466 | |
1467 | if ((Opc == X86::KORTESTBrr || Opc == X86::KORTESTWrr || |
1468 | Opc == X86::KORTESTDrr || Opc == X86::KORTESTQrr) && |
1469 | N->getOperand(0) == N->getOperand(1) && |
1470 | N->isOnlyUserOf(N->getOperand(0).getNode()) && |
1471 | N->getOperand(0).isMachineOpcode() && |
1472 | onlyUsesZeroFlag(SDValue(N, 0))) { |
1473 | SDValue And = N->getOperand(0); |
1474 | unsigned N0Opc = And.getMachineOpcode(); |
1475 | |
1476 | |
1477 | if (N0Opc == X86::KANDBrr || |
1478 | (N0Opc == X86::KANDWrr && Subtarget->hasDQI()) || |
1479 | N0Opc == X86::KANDDrr || N0Opc == X86::KANDQrr) { |
1480 | unsigned NewOpc; |
1481 | switch (Opc) { |
1482 | default: llvm_unreachable("Unexpected opcode!"); |
1483 | case X86::KORTESTBrr: NewOpc = X86::KTESTBrr; break; |
1484 | case X86::KORTESTWrr: NewOpc = X86::KTESTWrr; break; |
1485 | case X86::KORTESTDrr: NewOpc = X86::KTESTDrr; break; |
1486 | case X86::KORTESTQrr: NewOpc = X86::KTESTQrr; break; |
1487 | } |
1488 | MachineSDNode *KTest = CurDAG->getMachineNode(NewOpc, SDLoc(N), |
1489 | MVT::i32, |
1490 | And.getOperand(0), |
1491 | And.getOperand(1)); |
1492 | ReplaceUses(N, KTest); |
1493 | MadeChange = true; |
1494 | continue; |
1495 | } |
1496 | } |
1497 | |
1498 | |
1499 | if (Opc != TargetOpcode::SUBREG_TO_REG) |
1500 | continue; |
1501 | |
1502 | unsigned SubRegIdx = N->getConstantOperandVal(2); |
1503 | if (SubRegIdx != X86::sub_xmm && SubRegIdx != X86::sub_ymm) |
1504 | continue; |
1505 | |
1506 | SDValue Move = N->getOperand(1); |
1507 | if (!Move.isMachineOpcode()) |
1508 | continue; |
1509 | |
1510 | |
1511 | switch (Move.getMachineOpcode()) { |
1512 | default: |
1513 | continue; |
1514 | case X86::VMOVAPDrr: case X86::VMOVUPDrr: |
1515 | case X86::VMOVAPSrr: case X86::VMOVUPSrr: |
1516 | case X86::VMOVDQArr: case X86::VMOVDQUrr: |
1517 | case X86::VMOVAPDYrr: case X86::VMOVUPDYrr: |
1518 | case X86::VMOVAPSYrr: case X86::VMOVUPSYrr: |
1519 | case X86::VMOVDQAYrr: case X86::VMOVDQUYrr: |
1520 | case X86::VMOVAPDZ128rr: case X86::VMOVUPDZ128rr: |
1521 | case X86::VMOVAPSZ128rr: case X86::VMOVUPSZ128rr: |
1522 | case X86::VMOVDQA32Z128rr: case X86::VMOVDQU32Z128rr: |
1523 | case X86::VMOVDQA64Z128rr: case X86::VMOVDQU64Z128rr: |
1524 | case X86::VMOVAPDZ256rr: case X86::VMOVUPDZ256rr: |
1525 | case X86::VMOVAPSZ256rr: case X86::VMOVUPSZ256rr: |
1526 | case X86::VMOVDQA32Z256rr: case X86::VMOVDQU32Z256rr: |
1527 | case X86::VMOVDQA64Z256rr: case X86::VMOVDQU64Z256rr: |
1528 | break; |
1529 | } |
1530 | |
1531 | SDValue In = Move.getOperand(0); |
1532 | if (!In.isMachineOpcode() || |
1533 | In.getMachineOpcode() <= TargetOpcode::GENERIC_OP_END) |
1534 | continue; |
1535 | |
1536 | |
1537 | |
1538 | uint64_t TSFlags = getInstrInfo()->get(In.getMachineOpcode()).TSFlags; |
1539 | if ((TSFlags & X86II::EncodingMask) != X86II::VEX && |
1540 | (TSFlags & X86II::EncodingMask) != X86II::EVEX && |
1541 | (TSFlags & X86II::EncodingMask) != X86II::XOP) |
1542 | continue; |
1543 | |
1544 | |
1545 | |
1546 | CurDAG->UpdateNodeOperands(N, N->getOperand(0), In, N->getOperand(2)); |
1547 | MadeChange = true; |
1548 | } |
1549 | |
1550 | if (MadeChange) |
1551 | CurDAG->RemoveDeadNodes(); |
1552 | } |
1553 | |
1554 | |
1555 | |
1556 | void X86DAGToDAGISel::emitSpecialCodeForMain() { |
1557 | if (Subtarget->isTargetCygMing()) { |
1558 | TargetLowering::ArgListTy Args; |
1559 | auto &DL = CurDAG->getDataLayout(); |
1560 | |
1561 | TargetLowering::CallLoweringInfo CLI(*CurDAG); |
1562 | CLI.setChain(CurDAG->getRoot()) |
1563 | .setCallee(CallingConv::C, Type::getVoidTy(*CurDAG->getContext()), |
1564 | CurDAG->getExternalSymbol("__main", TLI->getPointerTy(DL)), |
1565 | std::move(Args)); |
1566 | const TargetLowering &TLI = CurDAG->getTargetLoweringInfo(); |
1567 | std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI); |
1568 | CurDAG->setRoot(Result.second); |
1569 | } |
1570 | } |
1571 | |
1572 | void X86DAGToDAGISel::emitFunctionEntryCode() { |
1573 | |
1574 | const Function &F = MF->getFunction(); |
1575 | if (F.hasExternalLinkage() && F.getName() == "main") |
1576 | emitSpecialCodeForMain(); |
1577 | } |
1578 | |
1579 | static bool isDispSafeForFrameIndex(int64_t Val) { |
1580 | |
1581 | |
1582 | |
1583 | |
1584 | |
1585 | |
1586 | return isInt<31>(Val); |
1587 | } |
1588 | |
1589 | bool X86DAGToDAGISel::foldOffsetIntoAddress(uint64_t Offset, |
1590 | X86ISelAddressMode &AM) { |
1591 | |
1592 | |
1593 | |
1594 | |
1595 | int64_t Val = AM.Disp + Offset; |
1596 | |
1597 | |
1598 | if (Val != 0 && (AM.ES || AM.MCSym)) |
1599 | return true; |
1600 | |
1601 | CodeModel::Model M = TM.getCodeModel(); |
1602 | if (Subtarget->is64Bit()) { |
1603 | if (Val != 0 && |
1604 | !X86::isOffsetSuitableForCodeModel(Val, M, |
1605 | AM.hasSymbolicDisplacement())) |
1606 | return true; |
1607 | |
1608 | |
1609 | if (AM.BaseType == X86ISelAddressMode::FrameIndexBase && |
1610 | !isDispSafeForFrameIndex(Val)) |
1611 | return true; |
1612 | } |
1613 | AM.Disp = Val; |
1614 | return false; |
1615 | |
1616 | } |
1617 | |
1618 | bool X86DAGToDAGISel::matchLoadInAddress(LoadSDNode *N, X86ISelAddressMode &AM, |
1619 | bool AllowSegmentRegForX32) { |
1620 | SDValue Address = N->getOperand(1); |
1621 | |
1622 | |
1623 | |
1624 | |
1625 | |
1626 | |
1627 | |
1628 | |
1629 | |
1630 | |
1631 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Address)) { |
1632 | if (C->getSExtValue() == 0 && AM.Segment.getNode() == nullptr && |
1633 | !IndirectTlsSegRefs && |
1634 | (Subtarget->isTargetGlibc() || Subtarget->isTargetAndroid() || |
1635 | Subtarget->isTargetFuchsia())) { |
1636 | if (Subtarget->isTarget64BitILP32() && !AllowSegmentRegForX32) |
1637 | return true; |
1638 | switch (N->getPointerInfo().getAddrSpace()) { |
1639 | case X86AS::GS: |
1640 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
1641 | return false; |
1642 | case X86AS::FS: |
1643 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
1644 | return false; |
1645 | |
1646 | |
1647 | } |
1648 | } |
1649 | } |
1650 | |
1651 | return true; |
1652 | } |
1653 | |
1654 | |
1655 | |
1656 | |
1657 | bool X86DAGToDAGISel::matchWrapper(SDValue N, X86ISelAddressMode &AM) { |
1658 | |
1659 | |
1660 | if (AM.hasSymbolicDisplacement()) |
1661 | return true; |
1662 | |
1663 | bool IsRIPRelTLS = false; |
1664 | bool IsRIPRel = N.getOpcode() == X86ISD::WrapperRIP; |
1665 | if (IsRIPRel) { |
1666 | SDValue Val = N.getOperand(0); |
1667 | if (Val.getOpcode() == ISD::TargetGlobalTLSAddress) |
1668 | IsRIPRelTLS = true; |
1669 | } |
1670 | |
1671 | |
1672 | |
1673 | |
1674 | |
1675 | |
1676 | CodeModel::Model M = TM.getCodeModel(); |
1677 | if (Subtarget->is64Bit() && |
1678 | ((M == CodeModel::Large && !IsRIPRelTLS) || |
1679 | (M == CodeModel::Medium && !IsRIPRel))) |
1680 | return true; |
1681 | |
1682 | |
1683 | if (IsRIPRel && AM.hasBaseOrIndexReg()) |
1684 | return true; |
1685 | |
1686 | |
1687 | X86ISelAddressMode Backup = AM; |
1688 | |
1689 | int64_t Offset = 0; |
1690 | SDValue N0 = N.getOperand(0); |
1691 | if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) { |
1692 | AM.GV = G->getGlobal(); |
1693 | AM.SymbolFlags = G->getTargetFlags(); |
1694 | Offset = G->getOffset(); |
1695 | } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) { |
1696 | AM.CP = CP->getConstVal(); |
1697 | AM.Alignment = CP->getAlign(); |
1698 | AM.SymbolFlags = CP->getTargetFlags(); |
1699 | Offset = CP->getOffset(); |
1700 | } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) { |
1701 | AM.ES = S->getSymbol(); |
1702 | AM.SymbolFlags = S->getTargetFlags(); |
1703 | } else if (auto *S = dyn_cast<MCSymbolSDNode>(N0)) { |
1704 | AM.MCSym = S->getMCSymbol(); |
1705 | } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) { |
1706 | AM.JT = J->getIndex(); |
1707 | AM.SymbolFlags = J->getTargetFlags(); |
1708 | } else if (BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(N0)) { |
1709 | AM.BlockAddr = BA->getBlockAddress(); |
1710 | AM.SymbolFlags = BA->getTargetFlags(); |
1711 | Offset = BA->getOffset(); |
1712 | } else |
1713 | llvm_unreachable("Unhandled symbol reference node."); |
1714 | |
1715 | if (foldOffsetIntoAddress(Offset, AM)) { |
1716 | AM = Backup; |
1717 | return true; |
1718 | } |
1719 | |
1720 | if (IsRIPRel) |
1721 | AM.setBaseReg(CurDAG->getRegister(X86::RIP, MVT::i64)); |
1722 | |
1723 | |
1724 | return false; |
1725 | } |
1726 | |
1727 | |
1728 | |
1729 | bool X86DAGToDAGISel::matchAddress(SDValue N, X86ISelAddressMode &AM) { |
1730 | if (matchAddressRecursively(N, AM, 0)) |
1731 | return true; |
1732 | |
1733 | |
1734 | |
1735 | |
1736 | |
1737 | if (Subtarget->isTarget64BitILP32() && |
1738 | AM.BaseType == X86ISelAddressMode::RegBase && |
1739 | AM.Base_Reg.getNode() != nullptr && AM.IndexReg.getNode() == nullptr) { |
1740 | SDValue Save_Base_Reg = AM.Base_Reg; |
1741 | if (auto *LoadN = dyn_cast<LoadSDNode>(Save_Base_Reg)) { |
1742 | AM.Base_Reg = SDValue(); |
1743 | if (matchLoadInAddress(LoadN, AM, true)) |
1744 | AM.Base_Reg = Save_Base_Reg; |
1745 | } |
1746 | } |
1747 | |
1748 | |
1749 | |
1750 | if (AM.Scale == 2 && |
1751 | AM.BaseType == X86ISelAddressMode::RegBase && |
1752 | AM.Base_Reg.getNode() == nullptr) { |
1753 | AM.Base_Reg = AM.IndexReg; |
1754 | AM.Scale = 1; |
1755 | } |
1756 | |
1757 | |
1758 | |
1759 | |
1760 | switch (TM.getCodeModel()) { |
1761 | default: break; |
1762 | case CodeModel::Small: |
1763 | case CodeModel::Kernel: |
1764 | if (Subtarget->is64Bit() && |
1765 | AM.Scale == 1 && |
1766 | AM.BaseType == X86ISelAddressMode::RegBase && |
1767 | AM.Base_Reg.getNode() == nullptr && |
1768 | AM.IndexReg.getNode() == nullptr && |
1769 | AM.SymbolFlags == X86II::MO_NO_FLAG && |
1770 | AM.hasSymbolicDisplacement()) |
1771 | AM.Base_Reg = CurDAG->getRegister(X86::RIP, MVT::i64); |
1772 | break; |
1773 | } |
1774 | |
1775 | return false; |
1776 | } |
1777 | |
1778 | bool X86DAGToDAGISel::matchAdd(SDValue &N, X86ISelAddressMode &AM, |
1779 | unsigned Depth) { |
1780 | |
1781 | |
1782 | HandleSDNode Handle(N); |
1783 | |
1784 | X86ISelAddressMode Backup = AM; |
1785 | if (!matchAddressRecursively(N.getOperand(0), AM, Depth+1) && |
1786 | !matchAddressRecursively(Handle.getValue().getOperand(1), AM, Depth+1)) |
1787 | return false; |
1788 | AM = Backup; |
1789 | |
1790 | |
1791 | if (!matchAddressRecursively(Handle.getValue().getOperand(1), AM, |
1792 | Depth + 1) && |
1793 | !matchAddressRecursively(Handle.getValue().getOperand(0), AM, Depth + 1)) |
1794 | return false; |
1795 | AM = Backup; |
1796 | |
1797 | |
1798 | |
1799 | |
1800 | if (AM.BaseType == X86ISelAddressMode::RegBase && |
1801 | !AM.Base_Reg.getNode() && |
1802 | !AM.IndexReg.getNode()) { |
1803 | N = Handle.getValue(); |
1804 | AM.Base_Reg = N.getOperand(0); |
1805 | AM.IndexReg = N.getOperand(1); |
1806 | AM.Scale = 1; |
1807 | return false; |
1808 | } |
1809 | N = Handle.getValue(); |
1810 | return true; |
1811 | } |
1812 | |
1813 | |
1814 | |
1815 | |
1816 | |
1817 | |
1818 | static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { |
1819 | if (N->getNodeId() == -1 || |
1820 | (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > |
1821 | SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { |
1822 | DAG.RepositionNode(Pos->getIterator(), N.getNode()); |
1823 | |
1824 | |
1825 | |
1826 | |
1827 | N->setNodeId(Pos->getNodeId()); |
1828 | SelectionDAGISel::InvalidateNodeId(N.getNode()); |
1829 | } |
1830 | } |
1831 | |
1832 | |
1833 | |
1834 | |
1835 | |
1836 | static bool foldMaskAndShiftToExtract(SelectionDAG &DAG, SDValue N, |
1837 | uint64_t Mask, |
1838 | SDValue Shift, SDValue X, |
1839 | X86ISelAddressMode &AM) { |
1840 | if (Shift.getOpcode() != ISD::SRL || |
1841 | !isa<ConstantSDNode>(Shift.getOperand(1)) || |
1842 | !Shift.hasOneUse()) |
1843 | return true; |
1844 | |
1845 | int ScaleLog = 8 - Shift.getConstantOperandVal(1); |
1846 | if (ScaleLog <= 0 || ScaleLog >= 4 || |
1847 | Mask != (0xffu << ScaleLog)) |
1848 | return true; |
1849 | |
1850 | MVT VT = N.getSimpleValueType(); |
1851 | SDLoc DL(N); |
1852 | SDValue Eight = DAG.getConstant(8, DL, MVT::i8); |
1853 | SDValue NewMask = DAG.getConstant(0xff, DL, VT); |
1854 | SDValue Srl = DAG.getNode(ISD::SRL, DL, VT, X, Eight); |
1855 | SDValue And = DAG.getNode(ISD::AND, DL, VT, Srl, NewMask); |
1856 | SDValue ShlCount = DAG.getConstant(ScaleLog, DL, MVT::i8); |
1857 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And, ShlCount); |
1858 | |
1859 | |
1860 | |
1861 | |
1862 | |
1863 | |
1864 | insertDAGNode(DAG, N, Eight); |
1865 | insertDAGNode(DAG, N, Srl); |
1866 | insertDAGNode(DAG, N, NewMask); |
1867 | insertDAGNode(DAG, N, And); |
1868 | insertDAGNode(DAG, N, ShlCount); |
1869 | insertDAGNode(DAG, N, Shl); |
1870 | DAG.ReplaceAllUsesWith(N, Shl); |
1871 | DAG.RemoveDeadNode(N.getNode()); |
1872 | AM.IndexReg = And; |
1873 | AM.Scale = (1 << ScaleLog); |
1874 | return false; |
1875 | } |
1876 | |
1877 | |
1878 | |
1879 | |
1880 | static bool foldMaskedShiftToScaledMask(SelectionDAG &DAG, SDValue N, |
1881 | X86ISelAddressMode &AM) { |
1882 | SDValue Shift = N.getOperand(0); |
1883 | |
1884 | |
1885 | |
1886 | |
1887 | int64_t Mask = cast<ConstantSDNode>(N->getOperand(1))->getSExtValue(); |
1888 | |
1889 | |
1890 | |
1891 | |
1892 | bool FoundAnyExtend = false; |
1893 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && |
1894 | Shift.getOperand(0).getSimpleValueType() == MVT::i32 && |
1895 | isUInt<32>(Mask)) { |
1896 | FoundAnyExtend = true; |
1897 | Shift = Shift.getOperand(0); |
1898 | } |
1899 | |
1900 | if (Shift.getOpcode() != ISD::SHL || |
1901 | !isa<ConstantSDNode>(Shift.getOperand(1))) |
1902 | return true; |
1903 | |
1904 | SDValue X = Shift.getOperand(0); |
1905 | |
1906 | |
1907 | |
1908 | |
1909 | if (!N.hasOneUse() || !Shift.hasOneUse()) |
1910 | return true; |
1911 | |
1912 | |
1913 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
1914 | if (ShiftAmt != 1 && ShiftAmt != 2 && ShiftAmt != 3) |
1915 | return true; |
1916 | |
1917 | MVT VT = N.getSimpleValueType(); |
1918 | SDLoc DL(N); |
1919 | if (FoundAnyExtend) { |
1920 | SDValue NewX = DAG.getNode(ISD::ANY_EXTEND, DL, VT, X); |
1921 | insertDAGNode(DAG, N, NewX); |
1922 | X = NewX; |
1923 | } |
1924 | |
1925 | SDValue NewMask = DAG.getConstant(Mask >> ShiftAmt, DL, VT); |
1926 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, X, NewMask); |
1927 | SDValue NewShift = DAG.getNode(ISD::SHL, DL, VT, NewAnd, Shift.getOperand(1)); |
1928 | |
1929 | |
1930 | |
1931 | |
1932 | |
1933 | |
1934 | insertDAGNode(DAG, N, NewMask); |
1935 | insertDAGNode(DAG, N, NewAnd); |
1936 | insertDAGNode(DAG, N, NewShift); |
1937 | DAG.ReplaceAllUsesWith(N, NewShift); |
1938 | DAG.RemoveDeadNode(N.getNode()); |
1939 | |
1940 | AM.Scale = 1 << ShiftAmt; |
1941 | AM.IndexReg = NewAnd; |
1942 | return false; |
1943 | } |
1944 | |
1945 | |
1946 | |
1947 | |
1948 | |
1949 | |
1950 | |
1951 | |
1952 | |
1953 | |
1954 | |
1955 | |
1956 | |
1957 | |
1958 | |
1959 | |
1960 | |
1961 | |
1962 | |
1963 | |
1964 | |
1965 | |
1966 | |
1967 | |
1968 | |
1969 | |
1970 | |
1971 | |
1972 | static bool foldMaskAndShiftToScale(SelectionDAG &DAG, SDValue N, |
1973 | uint64_t Mask, |
1974 | SDValue Shift, SDValue X, |
1975 | X86ISelAddressMode &AM) { |
1976 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse() || |
1977 | !isa<ConstantSDNode>(Shift.getOperand(1))) |
1978 | return true; |
1979 | |
1980 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
1981 | unsigned MaskLZ = countLeadingZeros(Mask); |
1982 | unsigned MaskTZ = countTrailingZeros(Mask); |
1983 | |
1984 | |
1985 | |
1986 | unsigned AMShiftAmt = MaskTZ; |
1987 | |
1988 | |
1989 | |
1990 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; |
1991 | |
1992 | |
1993 | if (countTrailingOnes(Mask >> MaskTZ) + MaskTZ + MaskLZ != 64) return true; |
1994 | |
1995 | |
1996 | |
1997 | unsigned ScaleDown = (64 - X.getSimpleValueType().getSizeInBits()) + ShiftAmt; |
1998 | if (MaskLZ < ScaleDown) |
1999 | return true; |
2000 | MaskLZ -= ScaleDown; |
2001 | |
2002 | |
2003 | |
2004 | |
2005 | |
2006 | |
2007 | |
2008 | bool ReplacingAnyExtend = false; |
2009 | if (X.getOpcode() == ISD::ANY_EXTEND) { |
2010 | unsigned ExtendBits = X.getSimpleValueType().getSizeInBits() - |
2011 | X.getOperand(0).getSimpleValueType().getSizeInBits(); |
2012 | |
2013 | |
2014 | X = X.getOperand(0); |
2015 | MaskLZ = ExtendBits > MaskLZ ? 0 : MaskLZ - ExtendBits; |
2016 | ReplacingAnyExtend = true; |
2017 | } |
2018 | APInt MaskedHighBits = |
2019 | APInt::getHighBitsSet(X.getSimpleValueType().getSizeInBits(), MaskLZ); |
2020 | KnownBits Known = DAG.computeKnownBits(X); |
2021 | if (MaskedHighBits != Known.Zero) return true; |
2022 | |
2023 | |
2024 | |
2025 | MVT VT = N.getSimpleValueType(); |
2026 | if (ReplacingAnyExtend) { |
2027 | assert(X.getValueType() != VT); |
2028 | |
2029 | SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); |
2030 | insertDAGNode(DAG, N, NewX); |
2031 | X = NewX; |
2032 | } |
2033 | SDLoc DL(N); |
2034 | SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); |
2035 | SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); |
2036 | SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); |
2037 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewSRL, NewSHLAmt); |
2038 | |
2039 | |
2040 | |
2041 | |
2042 | |
2043 | |
2044 | insertDAGNode(DAG, N, NewSRLAmt); |
2045 | insertDAGNode(DAG, N, NewSRL); |
2046 | insertDAGNode(DAG, N, NewSHLAmt); |
2047 | insertDAGNode(DAG, N, NewSHL); |
2048 | DAG.ReplaceAllUsesWith(N, NewSHL); |
2049 | DAG.RemoveDeadNode(N.getNode()); |
2050 | |
2051 | AM.Scale = 1 << AMShiftAmt; |
2052 | AM.IndexReg = NewSRL; |
2053 | return false; |
2054 | } |
2055 | |
2056 | |
2057 | |
2058 | |
2059 | static bool foldMaskedShiftToBEXTR(SelectionDAG &DAG, SDValue N, |
2060 | uint64_t Mask, |
2061 | SDValue Shift, SDValue X, |
2062 | X86ISelAddressMode &AM, |
2063 | const X86Subtarget &Subtarget) { |
2064 | if (Shift.getOpcode() != ISD::SRL || |
2065 | !isa<ConstantSDNode>(Shift.getOperand(1)) || |
2066 | !Shift.hasOneUse() || !N.hasOneUse()) |
2067 | return true; |
2068 | |
2069 | |
2070 | if (!Subtarget.hasTBM() && |
2071 | !(Subtarget.hasBMI() && Subtarget.hasFastBEXTR())) |
2072 | return true; |
2073 | |
2074 | |
2075 | if (!isShiftedMask_64(Mask)) return true; |
2076 | |
2077 | unsigned ShiftAmt = Shift.getConstantOperandVal(1); |
2078 | |
2079 | |
2080 | |
2081 | unsigned AMShiftAmt = countTrailingZeros(Mask); |
2082 | |
2083 | |
2084 | |
2085 | if (AMShiftAmt == 0 || AMShiftAmt > 3) return true; |
2086 | |
2087 | MVT VT = N.getSimpleValueType(); |
2088 | SDLoc DL(N); |
2089 | SDValue NewSRLAmt = DAG.getConstant(ShiftAmt + AMShiftAmt, DL, MVT::i8); |
2090 | SDValue NewSRL = DAG.getNode(ISD::SRL, DL, VT, X, NewSRLAmt); |
2091 | SDValue NewMask = DAG.getConstant(Mask >> AMShiftAmt, DL, VT); |
2092 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, VT, NewSRL, NewMask); |
2093 | SDValue NewSHLAmt = DAG.getConstant(AMShiftAmt, DL, MVT::i8); |
2094 | SDValue NewSHL = DAG.getNode(ISD::SHL, DL, VT, NewAnd, NewSHLAmt); |
2095 | |
2096 | |
2097 | |
2098 | |
2099 | |
2100 | |
2101 | insertDAGNode(DAG, N, NewSRLAmt); |
2102 | insertDAGNode(DAG, N, NewSRL); |
2103 | insertDAGNode(DAG, N, NewMask); |
2104 | insertDAGNode(DAG, N, NewAnd); |
2105 | insertDAGNode(DAG, N, NewSHLAmt); |
2106 | insertDAGNode(DAG, N, NewSHL); |
2107 | DAG.ReplaceAllUsesWith(N, NewSHL); |
2108 | DAG.RemoveDeadNode(N.getNode()); |
2109 | |
2110 | AM.Scale = 1 << AMShiftAmt; |
2111 | AM.IndexReg = NewAnd; |
2112 | return false; |
2113 | } |
2114 | |
2115 | bool X86DAGToDAGISel::matchAddressRecursively(SDValue N, X86ISelAddressMode &AM, |
2116 | unsigned Depth) { |
2117 | SDLoc dl(N); |
2118 | LLVM_DEBUG({ |
2119 | dbgs() << "MatchAddress: "; |
2120 | AM.dump(CurDAG); |
2121 | }); |
2122 | |
2123 | if (Depth > 5) |
2124 | return matchAddressBase(N, AM); |
2125 | |
2126 | |
2127 | |
2128 | |
2129 | if (AM.isRIPRelative()) { |
2130 | |
2131 | |
2132 | |
2133 | if (!(AM.ES || AM.MCSym) && AM.JT != -1) |
2134 | return true; |
2135 | |
2136 | if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N)) |
2137 | if (!foldOffsetIntoAddress(Cst->getSExtValue(), AM)) |
2138 | return false; |
2139 | return true; |
2140 | } |
2141 | |
2142 | switch (N.getOpcode()) { |
2143 | default: break; |
2144 | case ISD::LOCAL_RECOVER: { |
2145 | if (!AM.hasSymbolicDisplacement() && AM.Disp == 0) |
2146 | if (const auto *ESNode = dyn_cast<MCSymbolSDNode>(N.getOperand(0))) { |
2147 | |
2148 | AM.MCSym = ESNode->getMCSymbol(); |
2149 | return false; |
2150 | } |
2151 | break; |
2152 | } |
2153 | case ISD::Constant: { |
2154 | uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); |
2155 | if (!foldOffsetIntoAddress(Val, AM)) |
2156 | return false; |
2157 | break; |
2158 | } |
2159 | |
2160 | case X86ISD::Wrapper: |
2161 | case X86ISD::WrapperRIP: |
2162 | if (!matchWrapper(N, AM)) |
2163 | return false; |
2164 | break; |
2165 | |
2166 | case ISD::LOAD: |
2167 | if (!matchLoadInAddress(cast<LoadSDNode>(N), AM)) |
2168 | return false; |
2169 | break; |
2170 | |
2171 | case ISD::FrameIndex: |
2172 | if (AM.BaseType == X86ISelAddressMode::RegBase && |
2173 | AM.Base_Reg.getNode() == nullptr && |
2174 | (!Subtarget->is64Bit() || isDispSafeForFrameIndex(AM.Disp))) { |
2175 | AM.BaseType = X86ISelAddressMode::FrameIndexBase; |
2176 | AM.Base_FrameIndex = cast<FrameIndexSDNode>(N)->getIndex(); |
2177 | return false; |
2178 | } |
2179 | break; |
2180 | |
2181 | case ISD::SHL: |
2182 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) |
2183 | break; |
2184 | |
2185 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) { |
2186 | unsigned Val = CN->getZExtValue(); |
2187 | |
2188 | |
2189 | |
2190 | |
2191 | if (Val == 1 || Val == 2 || Val == 3) { |
2192 | AM.Scale = 1 << Val; |
2193 | SDValue ShVal = N.getOperand(0); |
2194 | |
2195 | |
2196 | |
2197 | |
2198 | if (CurDAG->isBaseWithConstantOffset(ShVal)) { |
2199 | AM.IndexReg = ShVal.getOperand(0); |
2200 | ConstantSDNode *AddVal = cast<ConstantSDNode>(ShVal.getOperand(1)); |
2201 | uint64_t Disp = (uint64_t)AddVal->getSExtValue() << Val; |
2202 | if (!foldOffsetIntoAddress(Disp, AM)) |
2203 | return false; |
2204 | } |
2205 | |
2206 | AM.IndexReg = ShVal; |
2207 | return false; |
2208 | } |
2209 | } |
2210 | break; |
2211 | |
2212 | case ISD::SRL: { |
2213 | |
2214 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; |
2215 | |
2216 | |
2217 | |
2218 | assert(N.getSimpleValueType().getSizeInBits() <= 64 && |
2219 | "Unexpected value size!"); |
2220 | |
2221 | SDValue And = N.getOperand(0); |
2222 | if (And.getOpcode() != ISD::AND) break; |
2223 | SDValue X = And.getOperand(0); |
2224 | |
2225 | |
2226 | |
2227 | |
2228 | if (!isa<ConstantSDNode>(N.getOperand(1)) || |
2229 | !isa<ConstantSDNode>(And.getOperand(1))) |
2230 | break; |
2231 | uint64_t Mask = And.getConstantOperandVal(1) >> N.getConstantOperandVal(1); |
2232 | |
2233 | |
2234 | |
2235 | if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, N, X, AM)) |
2236 | return false; |
2237 | break; |
2238 | } |
2239 | |
2240 | case ISD::SMUL_LOHI: |
2241 | case ISD::UMUL_LOHI: |
2242 | |
2243 | if (N.getResNo() != 0) break; |
2244 | LLVM_FALLTHROUGH; |
2245 | case ISD::MUL: |
2246 | case X86ISD::MUL_IMM: |
2247 | |
2248 | if (AM.BaseType == X86ISelAddressMode::RegBase && |
2249 | AM.Base_Reg.getNode() == nullptr && |
2250 | AM.IndexReg.getNode() == nullptr) { |
2251 | if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) |
2252 | if (CN->getZExtValue() == 3 || CN->getZExtValue() == 5 || |
2253 | CN->getZExtValue() == 9) { |
2254 | AM.Scale = unsigned(CN->getZExtValue())-1; |
2255 | |
2256 | SDValue MulVal = N.getOperand(0); |
2257 | SDValue Reg; |
2258 | |
2259 | |
2260 | |
2261 | |
2262 | if (MulVal.getNode()->getOpcode() == ISD::ADD && MulVal.hasOneUse() && |
2263 | isa<ConstantSDNode>(MulVal.getOperand(1))) { |
2264 | Reg = MulVal.getOperand(0); |
2265 | ConstantSDNode *AddVal = |
2266 | cast<ConstantSDNode>(MulVal.getOperand(1)); |
2267 | uint64_t Disp = AddVal->getSExtValue() * CN->getZExtValue(); |
2268 | if (foldOffsetIntoAddress(Disp, AM)) |
2269 | Reg = N.getOperand(0); |
2270 | } else { |
2271 | Reg = N.getOperand(0); |
2272 | } |
2273 | |
2274 | AM.IndexReg = AM.Base_Reg = Reg; |
2275 | return false; |
2276 | } |
2277 | } |
2278 | break; |
2279 | |
2280 | case ISD::SUB: { |
2281 | |
2282 | |
2283 | |
2284 | |
2285 | |
2286 | |
2287 | |
2288 | |
2289 | |
2290 | HandleSDNode Handle(N); |
2291 | |
2292 | |
2293 | X86ISelAddressMode Backup = AM; |
2294 | if (matchAddressRecursively(N.getOperand(0), AM, Depth+1)) { |
2295 | N = Handle.getValue(); |
2296 | AM = Backup; |
2297 | break; |
2298 | } |
2299 | N = Handle.getValue(); |
2300 | |
2301 | if (AM.IndexReg.getNode() || AM.isRIPRelative()) { |
2302 | AM = Backup; |
2303 | break; |
2304 | } |
2305 | |
2306 | int Cost = 0; |
2307 | SDValue RHS = N.getOperand(1); |
2308 | |
2309 | |
2310 | |
2311 | if (!RHS.getNode()->hasOneUse() || |
2312 | RHS.getNode()->getOpcode() == ISD::CopyFromReg || |
2313 | RHS.getNode()->getOpcode() == ISD::TRUNCATE || |
2314 | RHS.getNode()->getOpcode() == ISD::ANY_EXTEND || |
2315 | (RHS.getNode()->getOpcode() == ISD::ZERO_EXTEND && |
2316 | RHS.getOperand(0).getValueType() == MVT::i32)) |
2317 | ++Cost; |
2318 | |
2319 | |
2320 | if ((AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode() && |
2321 | !AM.Base_Reg.getNode()->hasOneUse()) || |
2322 | AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
2323 | --Cost; |
2324 | |
2325 | |
2326 | if ((AM.hasSymbolicDisplacement() && !Backup.hasSymbolicDisplacement()) + |
2327 | ((AM.Disp != 0) && (Backup.Disp == 0)) + |
2328 | (AM.Segment.getNode() && !Backup.Segment.getNode()) >= 2) |
2329 | --Cost; |
2330 | |
2331 | if (Cost >= 0) { |
2332 | AM = Backup; |
2333 | break; |
2334 | } |
2335 | |
2336 | |
2337 | |
2338 | |
2339 | AM.IndexReg = RHS; |
2340 | AM.NegateIndex = true; |
2341 | AM.Scale = 1; |
2342 | return false; |
2343 | } |
2344 | |
2345 | case ISD::ADD: |
2346 | if (!matchAdd(N, AM, Depth)) |
2347 | return false; |
2348 | break; |
2349 | |
2350 | case ISD::OR: |
2351 | |
2352 | |
2353 | |
2354 | |
2355 | |
2356 | |
2357 | if (CurDAG->haveNoCommonBitsSet(N.getOperand(0), N.getOperand(1)) && |
2358 | !matchAdd(N, AM, Depth)) |
2359 | return false; |
2360 | break; |
2361 | |
2362 | case ISD::AND: { |
2363 | |
2364 | |
2365 | |
2366 | |
2367 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) break; |
2368 | |
2369 | |
2370 | |
2371 | assert(N.getSimpleValueType().getSizeInBits() <= 64 && |
2372 | "Unexpected value size!"); |
2373 | |
2374 | if (!isa<ConstantSDNode>(N.getOperand(1))) |
2375 | break; |
2376 | |
2377 | if (N.getOperand(0).getOpcode() == ISD::SRL) { |
2378 | SDValue Shift = N.getOperand(0); |
2379 | SDValue X = Shift.getOperand(0); |
2380 | |
2381 | uint64_t Mask = N.getConstantOperandVal(1); |
2382 | |
2383 | |
2384 | if (!foldMaskAndShiftToExtract(*CurDAG, N, Mask, Shift, X, AM)) |
2385 | return false; |
2386 | |
2387 | |
2388 | if (!foldMaskAndShiftToScale(*CurDAG, N, Mask, Shift, X, AM)) |
2389 | return false; |
2390 | |
2391 | |
2392 | if (!foldMaskedShiftToBEXTR(*CurDAG, N, Mask, Shift, X, AM, *Subtarget)) |
2393 | return false; |
2394 | } |
2395 | |
2396 | |
2397 | |
2398 | if (!foldMaskedShiftToScaledMask(*CurDAG, N, AM)) |
2399 | return false; |
2400 | |
2401 | break; |
2402 | } |
2403 | case ISD::ZERO_EXTEND: { |
2404 | |
2405 | |
2406 | if (AM.IndexReg.getNode() != nullptr || AM.Scale != 1) |
2407 | break; |
2408 | if (N.getOperand(0).getOpcode() != ISD::SHL || !N.getOperand(0).hasOneUse()) |
2409 | break; |
2410 | |
2411 | |
2412 | SDValue Shl = N.getOperand(0); |
2413 | auto *ShAmtC = dyn_cast<ConstantSDNode>(Shl.getOperand(1)); |
2414 | if (!ShAmtC || ShAmtC->getZExtValue() > 3) |
2415 | break; |
2416 | |
2417 | |
2418 | |
2419 | APInt HighZeros = APInt::getHighBitsSet(Shl.getValueSizeInBits(), |
2420 | ShAmtC->getZExtValue()); |
2421 | if (!CurDAG->MaskedValueIsZero(Shl.getOperand(0), HighZeros)) |
2422 | break; |
2423 | |
2424 | |
2425 | MVT VT = N.getSimpleValueType(); |
2426 | SDLoc DL(N); |
2427 | SDValue Zext = CurDAG->getNode(ISD::ZERO_EXTEND, DL, VT, Shl.getOperand(0)); |
2428 | SDValue NewShl = CurDAG->getNode(ISD::SHL, DL, VT, Zext, Shl.getOperand(1)); |
2429 | |
2430 | |
2431 | AM.Scale = 1 << ShAmtC->getZExtValue(); |
2432 | AM.IndexReg = Zext; |
2433 | |
2434 | insertDAGNode(*CurDAG, N, Zext); |
2435 | insertDAGNode(*CurDAG, N, NewShl); |
2436 | CurDAG->ReplaceAllUsesWith(N, NewShl); |
2437 | CurDAG->RemoveDeadNode(N.getNode()); |
2438 | return false; |
2439 | } |
2440 | } |
2441 | |
2442 | return matchAddressBase(N, AM); |
2443 | } |
2444 | |
2445 | |
2446 | |
2447 | bool X86DAGToDAGISel::matchAddressBase(SDValue N, X86ISelAddressMode &AM) { |
2448 | |
2449 | if (AM.BaseType != X86ISelAddressMode::RegBase || AM.Base_Reg.getNode()) { |
2450 | |
2451 | if (!AM.IndexReg.getNode()) { |
2452 | AM.IndexReg = N; |
2453 | AM.Scale = 1; |
2454 | return false; |
2455 | } |
2456 | |
2457 | |
2458 | return true; |
2459 | } |
2460 | |
2461 | |
2462 | AM.BaseType = X86ISelAddressMode::RegBase; |
2463 | AM.Base_Reg = N; |
2464 | return false; |
2465 | } |
2466 | |
2467 | |
2468 | |
2469 | |
2470 | bool X86DAGToDAGISel::matchVectorAddress(SDValue N, X86ISelAddressMode &AM) { |
2471 | |
2472 | switch (N.getOpcode()) { |
2473 | case ISD::Constant: { |
2474 | uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue(); |
2475 | if (!foldOffsetIntoAddress(Val, AM)) |
2476 | return false; |
2477 | break; |
2478 | } |
2479 | case X86ISD::Wrapper: |
2480 | if (!matchWrapper(N, AM)) |
2481 | return false; |
2482 | break; |
2483 | } |
2484 | |
2485 | return matchAddressBase(N, AM); |
2486 | } |
2487 | |
2488 | bool X86DAGToDAGISel::selectVectorAddr(MemSDNode *Parent, SDValue BasePtr, |
2489 | SDValue IndexOp, SDValue ScaleOp, |
2490 | SDValue &Base, SDValue &Scale, |
2491 | SDValue &Index, SDValue &Disp, |
2492 | SDValue &Segment) { |
2493 | X86ISelAddressMode AM; |
2494 | AM.IndexReg = IndexOp; |
2495 | AM.Scale = cast<ConstantSDNode>(ScaleOp)->getZExtValue(); |
2496 | |
2497 | unsigned AddrSpace = Parent->getPointerInfo().getAddrSpace(); |
2498 | if (AddrSpace == X86AS::GS) |
2499 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
2500 | if (AddrSpace == X86AS::FS) |
2501 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
2502 | if (AddrSpace == X86AS::SS) |
2503 | AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); |
2504 | |
2505 | SDLoc DL(BasePtr); |
2506 | MVT VT = BasePtr.getSimpleValueType(); |
2507 | |
2508 | |
2509 | if (matchVectorAddress(BasePtr, AM)) |
2510 | return false; |
2511 | |
2512 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
2513 | return true; |
2514 | } |
2515 | |
2516 | |
2517 | |
2518 | |
2519 | |
2520 | |
2521 | |
2522 | |
2523 | bool X86DAGToDAGISel::selectAddr(SDNode *Parent, SDValue N, SDValue &Base, |
2524 | SDValue &Scale, SDValue &Index, |
2525 | SDValue &Disp, SDValue &Segment) { |
2526 | X86ISelAddressMode AM; |
2527 | |
2528 | if (Parent && |
2529 | |
2530 | |
2531 | Parent->getOpcode() != ISD::INTRINSIC_W_CHAIN && |
2532 | Parent->getOpcode() != ISD::INTRINSIC_VOID && |
2533 | Parent->getOpcode() != X86ISD::TLSCALL && |
2534 | Parent->getOpcode() != X86ISD::ENQCMD && |
2535 | Parent->getOpcode() != X86ISD::ENQCMDS && |
2536 | Parent->getOpcode() != X86ISD::EH_SJLJ_SETJMP && |
2537 | Parent->getOpcode() != X86ISD::EH_SJLJ_LONGJMP) { |
2538 | unsigned AddrSpace = |
2539 | cast<MemSDNode>(Parent)->getPointerInfo().getAddrSpace(); |
2540 | if (AddrSpace == X86AS::GS) |
2541 | AM.Segment = CurDAG->getRegister(X86::GS, MVT::i16); |
2542 | if (AddrSpace == X86AS::FS) |
2543 | AM.Segment = CurDAG->getRegister(X86::FS, MVT::i16); |
2544 | if (AddrSpace == X86AS::SS) |
2545 | AM.Segment = CurDAG->getRegister(X86::SS, MVT::i16); |
2546 | } |
2547 | |
2548 | |
2549 | SDLoc DL(N); |
2550 | MVT VT = N.getSimpleValueType(); |
2551 | |
2552 | if (matchAddress(N, AM)) |
2553 | return false; |
2554 | |
2555 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
2556 | return true; |
2557 | } |
2558 | |
2559 | bool X86DAGToDAGISel::selectMOV64Imm32(SDValue N, SDValue &Imm) { |
2560 | |
2561 | |
2562 | if (N->getOpcode() != X86ISD::Wrapper) |
2563 | return false; |
2564 | |
2565 | N = N.getOperand(0); |
2566 | |
2567 | |
2568 | |
2569 | if (N->getOpcode() == ISD::TargetGlobalTLSAddress) |
2570 | return false; |
2571 | |
2572 | Imm = N; |
2573 | if (N->getOpcode() != ISD::TargetGlobalAddress) |
2574 | return TM.getCodeModel() == CodeModel::Small; |
2575 | |
2576 | Optional<ConstantRange> CR = |
2577 | cast<GlobalAddressSDNode>(N)->getGlobal()->getAbsoluteSymbolRange(); |
2578 | if (!CR) |
2579 | return TM.getCodeModel() == CodeModel::Small; |
2580 | |
2581 | return CR->getUnsignedMax().ult(1ull << 32); |
2582 | } |
2583 | |
2584 | bool X86DAGToDAGISel::selectLEA64_32Addr(SDValue N, SDValue &Base, |
2585 | SDValue &Scale, SDValue &Index, |
2586 | SDValue &Disp, SDValue &Segment) { |
2587 | |
2588 | SDLoc DL(N); |
2589 | |
2590 | if (!selectLEAAddr(N, Base, Scale, Index, Disp, Segment)) |
| 1 | Calling 'X86DAGToDAGISel::selectLEAAddr' | |
|
| 18 | | Returning from 'X86DAGToDAGISel::selectLEAAddr' | |
|
| |
2591 | return false; |
2592 | |
2593 | RegisterSDNode *RN = dyn_cast<RegisterSDNode>(Base); |
| 20 | | Calling 'dyn_cast<llvm::RegisterSDNode, llvm::SDValue>' | |
|
| 33 | | Returning from 'dyn_cast<llvm::RegisterSDNode, llvm::SDValue>' | |
|
2594 | if (RN && RN->getReg() == 0) |
| |
| |
2595 | Base = CurDAG->getRegister(0, MVT::i64); |
2596 | else if (Base.getValueType() == MVT::i32 && !isa<FrameIndexSDNode>(Base)) { |
| 36 | | Calling 'SDValue::getValueType' | |
|
2597 | |
2598 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, |
2599 | MVT::i64), 0); |
2600 | Base = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, |
2601 | Base); |
2602 | } |
2603 | |
2604 | RN = dyn_cast<RegisterSDNode>(Index); |
2605 | if (RN && RN->getReg() == 0) |
2606 | Index = CurDAG->getRegister(0, MVT::i64); |
2607 | else { |
2608 | assert(Index.getValueType() == MVT::i32 && |
2609 | "Expect to be extending 32-bit registers for use in LEA"); |
2610 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, DL, |
2611 | MVT::i64), 0); |
2612 | Index = CurDAG->getTargetInsertSubreg(X86::sub_32bit, DL, MVT::i64, ImplDef, |
2613 | Index); |
2614 | } |
2615 | |
2616 | return true; |
2617 | } |
2618 | |
2619 | |
2620 | |
2621 | bool X86DAGToDAGISel::selectLEAAddr(SDValue N, |
2622 | SDValue &Base, SDValue &Scale, |
2623 | SDValue &Index, SDValue &Disp, |
2624 | SDValue &Segment) { |
2625 | X86ISelAddressMode AM; |
2626 | |
2627 | |
2628 | SDLoc DL(N); |
2629 | MVT VT = N.getSimpleValueType(); |
2630 | |
2631 | |
2632 | |
2633 | SDValue Copy = AM.Segment; |
2634 | SDValue T = CurDAG->getRegister(0, MVT::i32); |
2635 | AM.Segment = T; |
2636 | if (matchAddress(N, AM)) |
| |
2637 | return false; |
2638 | assert (T == AM.Segment); |
2639 | AM.Segment = Copy; |
2640 | |
2641 | unsigned Complexity = 0; |
2642 | if (AM.BaseType == X86ISelAddressMode::RegBase && AM.Base_Reg.getNode()) |
| 3 | | Assuming field 'BaseType' is not equal to RegBase | |
|
2643 | Complexity = 1; |
2644 | else if (AM.BaseType == X86ISelAddressMode::FrameIndexBase) |
| 4 | | Assuming field 'BaseType' is not equal to FrameIndexBase | |
|
| |
2645 | Complexity = 4; |
2646 | |
2647 | if (AM.IndexReg.getNode()) |
| 6 | | Assuming the condition is false | |
|
| |
2648 | Complexity++; |
2649 | |
2650 | |
2651 | |
2652 | if (AM.Scale > 1) |
| 8 | | Assuming field 'Scale' is <= 1 | |
|
| |
2653 | Complexity++; |
2654 | |
2655 | |
2656 | |
2657 | |
2658 | |
2659 | |
2660 | if (AM.hasSymbolicDisplacement()) { |
| |
2661 | |
2662 | if (Subtarget->is64Bit()) |
| |
2663 | Complexity = 4; |
2664 | else |
2665 | Complexity += 2; |
2666 | } |
2667 | |
2668 | |
2669 | |
2670 | |
2671 | if (N.getOpcode() == ISD::ADD) { |
| 12 | | Assuming the condition is false | |
|
| |
2672 | auto isMathWithFlags = [](SDValue V) { |
2673 | switch (V.getOpcode()) { |
2674 | case X86ISD::ADD: |
2675 | case X86ISD::SUB: |
2676 | case X86ISD::ADC: |
2677 | case X86ISD::SBB: |
2678 | |
2679 | |
2680 | |
2681 | |
2682 | |
2683 | |
2684 | |
2685 | |
2686 | |
2687 | return !SDValue(V.getNode(), 1).use_empty(); |
2688 | default: |
2689 | return false; |
2690 | } |
2691 | }; |
2692 | |
2693 | |
2694 | |
2695 | if (isMathWithFlags(N.getOperand(0)) && isMathWithFlags(N.getOperand(1))) |
2696 | Complexity++; |
2697 | } |
2698 | |
2699 | if (AM.Disp) |
| 14 | | Assuming field 'Disp' is not equal to 0 | |
|
| |
2700 | Complexity++; |
2701 | |
2702 | |
2703 | if (Complexity <= 2) |
| |
2704 | return false; |
2705 | |
2706 | getAddressOperands(AM, DL, VT, Base, Scale, Index, Disp, Segment); |
| 17 | | Value assigned to field 'Node' | |
|
2707 | return true; |
2708 | } |
2709 | |
2710 | |
2711 | bool X86DAGToDAGISel::selectTLSADDRAddr(SDValue N, SDValue &Base, |
2712 | SDValue &Scale, SDValue &Index, |
2713 | SDValue &Disp, SDValue &Segment) { |
2714 | assert(N.getOpcode() == ISD::TargetGlobalTLSAddress); |
2715 | const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N); |
2716 | |
2717 | X86ISelAddressMode AM; |
2718 | AM.GV = GA->getGlobal(); |
2719 | AM.Disp += GA->getOffset(); |
2720 | AM.SymbolFlags = GA->getTargetFlags(); |
2721 | |
2722 | if (Subtarget->is32Bit()) { |
2723 | AM.Scale = 1; |
2724 | AM.IndexReg = CurDAG->getRegister(X86::EBX, MVT::i32); |
2725 | } |
2726 | |
2727 | MVT VT = N.getSimpleValueType(); |
2728 | getAddressOperands(AM, SDLoc(N), VT, Base, Scale, Index, Disp, Segment); |
2729 | return true; |
2730 | } |
2731 | |
2732 | bool X86DAGToDAGISel::selectRelocImm(SDValue N, SDValue &Op) { |
2733 | |
2734 | |
2735 | |
2736 | EVT VT = N.getValueType(); |
2737 | bool WasTruncated = false; |
2738 | if (N.getOpcode() == ISD::TRUNCATE) { |
2739 | WasTruncated = true; |
2740 | N = N.getOperand(0); |
2741 | } |
2742 | |
2743 | if (N.getOpcode() != X86ISD::Wrapper) |
2744 | return false; |
2745 | |
2746 | |
2747 | |
2748 | |
2749 | unsigned Opc = N.getOperand(0)->getOpcode(); |
2750 | if (Opc != ISD::TargetGlobalAddress || !WasTruncated) { |
2751 | Op = N.getOperand(0); |
2752 | |
2753 | |
2754 | return !WasTruncated; |
2755 | } |
2756 | |
2757 | |
2758 | auto *GA = cast<GlobalAddressSDNode>(N.getOperand(0)); |
2759 | Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); |
2760 | if (!CR || CR->getUnsignedMax().uge(1ull << VT.getSizeInBits())) |
2761 | return false; |
2762 | |
2763 | |
2764 | Op = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(N), VT, |
2765 | GA->getOffset(), GA->getTargetFlags()); |
2766 | return true; |
2767 | } |
2768 | |
2769 | bool X86DAGToDAGISel::tryFoldLoad(SDNode *Root, SDNode *P, SDValue N, |
2770 | SDValue &Base, SDValue &Scale, |
2771 | SDValue &Index, SDValue &Disp, |
2772 | SDValue &Segment) { |
2773 | assert(Root && P && "Unknown root/parent nodes"); |
2774 | if (!ISD::isNON_EXTLoad(N.getNode()) || |
2775 | !IsProfitableToFold(N, P, Root) || |
2776 | !IsLegalToFold(N, P, Root, OptLevel)) |
2777 | return false; |
2778 | |
2779 | return selectAddr(N.getNode(), |
2780 | N.getOperand(1), Base, Scale, Index, Disp, Segment); |
2781 | } |
2782 | |
2783 | bool X86DAGToDAGISel::tryFoldBroadcast(SDNode *Root, SDNode *P, SDValue N, |
2784 | SDValue &Base, SDValue &Scale, |
2785 | SDValue &Index, SDValue &Disp, |
2786 | SDValue &Segment) { |
2787 | assert(Root && P && "Unknown root/parent nodes"); |
2788 | if (N->getOpcode() != X86ISD::VBROADCAST_LOAD || |
2789 | !IsProfitableToFold(N, P, Root) || |
2790 | !IsLegalToFold(N, P, Root, OptLevel)) |
2791 | return false; |
2792 | |
2793 | return selectAddr(N.getNode(), |
2794 | N.getOperand(1), Base, Scale, Index, Disp, Segment); |
2795 | } |
2796 | |
2797 | |
2798 | |
2799 | |
2800 | SDNode *X86DAGToDAGISel::getGlobalBaseReg() { |
2801 | unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF); |
2802 | auto &DL = MF->getDataLayout(); |
2803 | return CurDAG->getRegister(GlobalBaseReg, TLI->getPointerTy(DL)).getNode(); |
2804 | } |
2805 | |
2806 | bool X86DAGToDAGISel::isSExtAbsoluteSymbolRef(unsigned Width, SDNode *N) const { |
2807 | if (N->getOpcode() == ISD::TRUNCATE) |
2808 | N = N->getOperand(0).getNode(); |
2809 | if (N->getOpcode() != X86ISD::Wrapper) |
2810 | return false; |
2811 | |
2812 | auto *GA = dyn_cast<GlobalAddressSDNode>(N->getOperand(0)); |
2813 | if (!GA) |
2814 | return false; |
2815 | |
2816 | Optional<ConstantRange> CR = GA->getGlobal()->getAbsoluteSymbolRange(); |
2817 | if (!CR) |
2818 | return Width == 32 && TM.getCodeModel() == CodeModel::Small; |
2819 | |
2820 | return CR->getSignedMin().sge(-1ull << Width) && |
2821 | CR->getSignedMax().slt(1ull << Width); |
2822 | } |
2823 | |
2824 | static X86::CondCode getCondFromNode(SDNode *N) { |
2825 | assert(N->isMachineOpcode() && "Unexpected node"); |
2826 | X86::CondCode CC = X86::COND_INVALID; |
2827 | unsigned Opc = N->getMachineOpcode(); |
2828 | if (Opc == X86::JCC_1) |
2829 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(1)); |
2830 | else if (Opc == X86::SETCCr) |
2831 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(0)); |
2832 | else if (Opc == X86::SETCCm) |
2833 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(5)); |
2834 | else if (Opc == X86::CMOV16rr || Opc == X86::CMOV32rr || |
2835 | Opc == X86::CMOV64rr) |
2836 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(2)); |
2837 | else if (Opc == X86::CMOV16rm || Opc == X86::CMOV32rm || |
2838 | Opc == X86::CMOV64rm) |
2839 | CC = static_cast<X86::CondCode>(N->getConstantOperandVal(6)); |
2840 | |
2841 | return CC; |
2842 | } |
2843 | |
2844 | |
2845 | |
2846 | bool X86DAGToDAGISel::onlyUsesZeroFlag(SDValue Flags) const { |
2847 | |
2848 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
2849 | UI != UE; ++UI) { |
2850 | |
2851 | if (UI.getUse().getResNo() != Flags.getResNo()) |
2852 | continue; |
2853 | |
2854 | if (UI->getOpcode() != ISD::CopyToReg || |
2855 | cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) |
2856 | return false; |
2857 | |
2858 | for (SDNode::use_iterator FlagUI = UI->use_begin(), |
2859 | FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { |
2860 | |
2861 | if (FlagUI.getUse().getResNo() != 1) continue; |
2862 | |
2863 | if (!FlagUI->isMachineOpcode()) return false; |
2864 | |
2865 | X86::CondCode CC = getCondFromNode(*FlagUI); |
2866 | |
2867 | switch (CC) { |
2868 | |
2869 | case X86::COND_E: case X86::COND_NE: |
2870 | continue; |
2871 | |
2872 | default: |
2873 | return false; |
2874 | } |
2875 | } |
2876 | } |
2877 | return true; |
2878 | } |
2879 | |
2880 | |
2881 | |
2882 | bool X86DAGToDAGISel::hasNoSignFlagUses(SDValue Flags) const { |
2883 | |
2884 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
2885 | UI != UE; ++UI) { |
2886 | |
2887 | if (UI.getUse().getResNo() != Flags.getResNo()) |
2888 | continue; |
2889 | |
2890 | if (UI->getOpcode() != ISD::CopyToReg || |
2891 | cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) |
2892 | return false; |
2893 | |
2894 | for (SDNode::use_iterator FlagUI = UI->use_begin(), |
2895 | FlagUE = UI->use_end(); FlagUI != FlagUE; ++FlagUI) { |
2896 | |
2897 | if (FlagUI.getUse().getResNo() != 1) continue; |
2898 | |
2899 | if (!FlagUI->isMachineOpcode()) return false; |
2900 | |
2901 | X86::CondCode CC = getCondFromNode(*FlagUI); |
2902 | |
2903 | switch (CC) { |
2904 | |
2905 | case X86::COND_A: case X86::COND_AE: |
2906 | case X86::COND_B: case X86::COND_BE: |
2907 | case X86::COND_E: case X86::COND_NE: |
2908 | case X86::COND_O: case X86::COND_NO: |
2909 | case X86::COND_P: case X86::COND_NP: |
2910 | continue; |
2911 | |
2912 | default: |
2913 | return false; |
2914 | } |
2915 | } |
2916 | } |
2917 | return true; |
2918 | } |
2919 | |
2920 | static bool mayUseCarryFlag(X86::CondCode CC) { |
2921 | switch (CC) { |
2922 | |
2923 | case X86::COND_O: case X86::COND_NO: |
2924 | case X86::COND_E: case X86::COND_NE: |
2925 | case X86::COND_S: case X86::COND_NS: |
2926 | case X86::COND_P: case X86::COND_NP: |
2927 | case X86::COND_L: case X86::COND_GE: |
2928 | case X86::COND_G: case X86::COND_LE: |
2929 | return false; |
2930 | |
2931 | default: |
2932 | return true; |
2933 | } |
2934 | } |
2935 | |
2936 | |
2937 | |
2938 | bool X86DAGToDAGISel::hasNoCarryFlagUses(SDValue Flags) const { |
2939 | |
2940 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
2941 | UI != UE; ++UI) { |
2942 | |
2943 | if (UI.getUse().getResNo() != Flags.getResNo()) |
2944 | continue; |
2945 | |
2946 | unsigned UIOpc = UI->getOpcode(); |
2947 | |
2948 | if (UIOpc == ISD::CopyToReg) { |
2949 | |
2950 | if (cast<RegisterSDNode>(UI->getOperand(1))->getReg() != X86::EFLAGS) |
2951 | return false; |
2952 | |
2953 | for (SDNode::use_iterator FlagUI = UI->use_begin(), FlagUE = UI->use_end(); |
2954 | FlagUI != FlagUE; ++FlagUI) { |
2955 | |
2956 | if (FlagUI.getUse().getResNo() != 1) |
2957 | continue; |
2958 | |
2959 | if (!FlagUI->isMachineOpcode()) |
2960 | return false; |
2961 | |
2962 | X86::CondCode CC = getCondFromNode(*FlagUI); |
2963 | |
2964 | if (mayUseCarryFlag(CC)) |
2965 | return false; |
2966 | } |
2967 | |
2968 | |
2969 | continue; |
2970 | } |
2971 | |
2972 | |
2973 | |
2974 | unsigned CCOpNo; |
2975 | switch (UIOpc) { |
2976 | default: |
2977 | |
2978 | return false; |
2979 | case X86ISD::SETCC: CCOpNo = 0; break; |
2980 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; |
2981 | case X86ISD::CMOV: CCOpNo = 2; break; |
2982 | case X86ISD::BRCOND: CCOpNo = 2; break; |
2983 | } |
2984 | |
2985 | X86::CondCode CC = (X86::CondCode)UI->getConstantOperandVal(CCOpNo); |
2986 | if (mayUseCarryFlag(CC)) |
2987 | return false; |
2988 | } |
2989 | return true; |
2990 | } |
2991 | |
2992 | |
2993 | |
2994 | static bool isFusableLoadOpStorePattern(StoreSDNode *StoreNode, |
2995 | SDValue StoredVal, SelectionDAG *CurDAG, |
2996 | unsigned LoadOpNo, |
2997 | LoadSDNode *&LoadNode, |
2998 | SDValue &InputChain) { |
2999 | |
3000 | if (StoredVal.getResNo() != 0) return false; |
3001 | |
3002 | |
3003 | if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; |
3004 | |
3005 | |
3006 | if (!ISD::isNormalStore(StoreNode) || StoreNode->isNonTemporal()) |
3007 | return false; |
3008 | |
3009 | SDValue Load = StoredVal->getOperand(LoadOpNo); |
3010 | |
3011 | if (!ISD::isNormalLoad(Load.getNode())) return false; |
3012 | |
3013 | |
3014 | LoadNode = cast<LoadSDNode>(Load); |
3015 | |
3016 | |
3017 | if (!Load.hasOneUse()) |
3018 | return false; |
3019 | |
3020 | |
3021 | if (LoadNode->getBasePtr() != StoreNode->getBasePtr() || |
3022 | LoadNode->getOffset() != StoreNode->getOffset()) |
3023 | return false; |
3024 | |
3025 | bool FoundLoad = false; |
3026 | SmallVector<SDValue, 4> ChainOps; |
3027 | SmallVector<const SDNode *, 4> LoopWorklist; |
3028 | SmallPtrSet<const SDNode *, 16> Visited; |
3029 | const unsigned int Max = 1024; |
3030 | |
3031 | |
3032 | |
3033 | |
3034 | |
3035 | |
3036 | |
3037 | |
3038 | |
3039 | |
3040 | |
3041 | |
3042 | |
3043 | |
3044 | |
3045 | |
3046 | |
3047 | |
3048 | |
3049 | |
3050 | |
3051 | |
3052 | |
3053 | |
3054 | |
3055 | |
3056 | |
3057 | |
3058 | |
3059 | |
3060 | |
3061 | |
3062 | |
3063 | |
3064 | |
3065 | |
3066 | |
3067 | |
3068 | SDValue Chain = StoreNode->getChain(); |
3069 | |
3070 | |
3071 | if (Chain == Load.getValue(1)) { |
3072 | FoundLoad = true; |
3073 | ChainOps.push_back(Load.getOperand(0)); |
3074 | } else if (Chain.getOpcode() == ISD::TokenFactor) { |
3075 | for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i) { |
3076 | SDValue Op = Chain.getOperand(i); |
3077 | if (Op == Load.getValue(1)) { |
3078 | FoundLoad = true; |
3079 | |
3080 | ChainOps.push_back(Load.getOperand(0)); |
3081 | continue; |
3082 | } |
3083 | LoopWorklist.push_back(Op.getNode()); |
3084 | ChainOps.push_back(Op); |
3085 | } |
3086 | } |
3087 | |
3088 | if (!FoundLoad) |
3089 | return false; |
3090 | |
3091 | |
3092 | for (SDValue Op : StoredVal->ops()) |
3093 | if (Op.getNode() != LoadNode) |
3094 | LoopWorklist.push_back(Op.getNode()); |
3095 | |
3096 | |
3097 | if (SDNode::hasPredecessorHelper(Load.getNode(), Visited, LoopWorklist, Max, |
3098 | true)) |
3099 | return false; |
3100 | |
3101 | InputChain = |
3102 | CurDAG->getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, ChainOps); |
3103 | return true; |
3104 | } |
3105 | |
3106 | |
3107 | |
3108 | |
3109 | |
3110 | |
3111 | |
3112 | |
3113 | |
3114 | |
3115 | |
3116 | |
3117 | |
3118 | |
3119 | |
3120 | |
3121 | |
3122 | |
3123 | |
3124 | |
3125 | |
3126 | bool X86DAGToDAGISel::foldLoadStoreIntoMemOperand(SDNode *Node) { |
3127 | StoreSDNode *StoreNode = cast<StoreSDNode>(Node); |
3128 | SDValue StoredVal = StoreNode->getOperand(1); |
3129 | unsigned Opc = StoredVal->getOpcode(); |
3130 | |
3131 | |
3132 | |
3133 | |
3134 | EVT MemVT = StoreNode->getMemoryVT(); |
3135 | if (MemVT != MVT::i64 && MemVT != MVT::i32 && MemVT != MVT::i16 && |
3136 | MemVT != MVT::i8) |
3137 | return false; |
3138 | |
3139 | bool IsCommutable = false; |
3140 | bool IsNegate = false; |
3141 | switch (Opc) { |
3142 | default: |
3143 | return false; |
3144 | case X86ISD::SUB: |
3145 | IsNegate = isNullConstant(StoredVal.getOperand(0)); |
3146 | break; |
3147 | case X86ISD::SBB: |
3148 | break; |
3149 | case X86ISD::ADD: |
3150 | case X86ISD::ADC: |
3151 | case X86ISD::AND: |
3152 | case X86ISD::OR: |
3153 | case X86ISD::XOR: |
3154 | IsCommutable = true; |
3155 | break; |
3156 | } |
3157 | |
3158 | unsigned LoadOpNo = IsNegate ? 1 : 0; |
3159 | LoadSDNode *LoadNode = nullptr; |
3160 | SDValue InputChain; |
3161 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, |
3162 | LoadNode, InputChain)) { |
3163 | if (!IsCommutable) |
3164 | return false; |
3165 | |
3166 | |
3167 | LoadOpNo = 1; |
3168 | if (!isFusableLoadOpStorePattern(StoreNode, StoredVal, CurDAG, LoadOpNo, |
3169 | LoadNode, InputChain)) |
3170 | return false; |
3171 | } |
3172 | |
3173 | SDValue Base, Scale, Index, Disp, Segment; |
3174 | if (!selectAddr(LoadNode, LoadNode->getBasePtr(), Base, Scale, Index, Disp, |
3175 | Segment)) |
3176 | return false; |
3177 | |
3178 | auto SelectOpcode = [&](unsigned Opc64, unsigned Opc32, unsigned Opc16, |
3179 | unsigned Opc8) { |
3180 | switch (MemVT.getSimpleVT().SimpleTy) { |
3181 | case MVT::i64: |
3182 | return Opc64; |
3183 | case MVT::i32: |
3184 | return Opc32; |
3185 | case MVT::i16: |
3186 | return Opc16; |
3187 | case MVT::i8: |
3188 | return Opc8; |
3189 | default: |
3190 | llvm_unreachable("Invalid size!"); |
3191 | } |
3192 | }; |
3193 | |
3194 | MachineSDNode *Result; |
3195 | switch (Opc) { |
3196 | case X86ISD::SUB: |
3197 | |
3198 | if (IsNegate) { |
3199 | unsigned NewOpc = SelectOpcode(X86::NEG64m, X86::NEG32m, X86::NEG16m, |
3200 | X86::NEG8m); |
3201 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; |
3202 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, |
3203 | MVT::Other, Ops); |
3204 | break; |
3205 | } |
3206 | LLVM_FALLTHROUGH; |
3207 | case X86ISD::ADD: |
3208 | |
3209 | if (!Subtarget->slowIncDec() || CurDAG->shouldOptForSize()) { |
3210 | bool IsOne = isOneConstant(StoredVal.getOperand(1)); |
3211 | bool IsNegOne = isAllOnesConstant(StoredVal.getOperand(1)); |
3212 | |
3213 | if ((IsOne || IsNegOne) && hasNoCarryFlagUses(StoredVal.getValue(1))) { |
3214 | unsigned NewOpc = |
3215 | ((Opc == X86ISD::ADD) == IsOne) |
3216 | ? SelectOpcode(X86::INC64m, X86::INC32m, X86::INC16m, X86::INC8m) |
3217 | : SelectOpcode(X86::DEC64m, X86::DEC32m, X86::DEC16m, X86::DEC8m); |
3218 | const SDValue Ops[] = {Base, Scale, Index, Disp, Segment, InputChain}; |
3219 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, |
3220 | MVT::Other, Ops); |
3221 | break; |
3222 | } |
3223 | } |
3224 | LLVM_FALLTHROUGH; |
3225 | case X86ISD::ADC: |
3226 | case X86ISD::SBB: |
3227 | case X86ISD::AND: |
3228 | case X86ISD::OR: |
3229 | case X86ISD::XOR: { |
3230 | auto SelectRegOpcode = [SelectOpcode](unsigned Opc) { |
3231 | switch (Opc) { |
3232 | case X86ISD::ADD: |
3233 | return SelectOpcode(X86::ADD64mr, X86::ADD32mr, X86::ADD16mr, |
3234 | X86::ADD8mr); |
3235 | case X86ISD::ADC: |
3236 | return SelectOpcode(X86::ADC64mr, X86::ADC32mr, X86::ADC16mr, |
3237 | X86::ADC8mr); |
3238 | case X86ISD::SUB: |
3239 | return SelectOpcode(X86::SUB64mr, X86::SUB32mr, X86::SUB16mr, |
3240 | X86::SUB8mr); |
3241 | case X86ISD::SBB: |
3242 | return SelectOpcode(X86::SBB64mr, X86::SBB32mr, X86::SBB16mr, |
3243 | X86::SBB8mr); |
3244 | case X86ISD::AND: |
3245 | return SelectOpcode(X86::AND64mr, X86::AND32mr, X86::AND16mr, |
3246 | X86::AND8mr); |
3247 | case X86ISD::OR: |
3248 | return SelectOpcode(X86::OR64mr, X86::OR32mr, X86::OR16mr, X86::OR8mr); |
3249 | case X86ISD::XOR: |
3250 | return SelectOpcode(X86::XOR64mr, X86::XOR32mr, X86::XOR16mr, |
3251 | X86::XOR8mr); |
3252 | default: |
3253 | llvm_unreachable("Invalid opcode!"); |
3254 | } |
3255 | }; |
3256 | auto SelectImm8Opcode = [SelectOpcode](unsigned Opc) { |
3257 | switch (Opc) { |
3258 | case X86ISD::ADD: |
3259 | return SelectOpcode(X86::ADD64mi8, X86::ADD32mi8, X86::ADD16mi8, 0); |
3260 | case X86ISD::ADC: |
3261 | return SelectOpcode(X86::ADC64mi8, X86::ADC32mi8, X86::ADC16mi8, 0); |
3262 | case X86ISD::SUB: |
3263 | return SelectOpcode(X86::SUB64mi8, X86::SUB32mi8, X86::SUB16mi8, 0); |
3264 | case X86ISD::SBB: |
3265 | return SelectOpcode(X86::SBB64mi8, X86::SBB32mi8, X86::SBB16mi8, 0); |
3266 | case X86ISD::AND: |
3267 | return SelectOpcode(X86::AND64mi8, X86::AND32mi8, X86::AND16mi8, 0); |
3268 | case X86ISD::OR: |
3269 | return SelectOpcode(X86::OR64mi8, X86::OR32mi8, X86::OR16mi8, 0); |
3270 | case X86ISD::XOR: |
3271 | return SelectOpcode(X86::XOR64mi8, X86::XOR32mi8, X86::XOR16mi8, 0); |
3272 | default: |
3273 | llvm_unreachable("Invalid opcode!"); |
3274 | } |
3275 | }; |
3276 | auto SelectImmOpcode = [SelectOpcode](unsigned Opc) { |
3277 | switch (Opc) { |
3278 | case X86ISD::ADD: |
3279 | return SelectOpcode(X86::ADD64mi32, X86::ADD32mi, X86::ADD16mi, |
3280 | X86::ADD8mi); |
3281 | case X86ISD::ADC: |
3282 | return SelectOpcode(X86::ADC64mi32, X86::ADC32mi, X86::ADC16mi, |
3283 | X86::ADC8mi); |
3284 | case X86ISD::SUB: |
3285 | return SelectOpcode(X86::SUB64mi32, X86::SUB32mi, X86::SUB16mi, |
3286 | X86::SUB8mi); |
3287 | case X86ISD::SBB: |
3288 | return SelectOpcode(X86::SBB64mi32, X86::SBB32mi, X86::SBB16mi, |
3289 | X86::SBB8mi); |
3290 | case X86ISD::AND: |
3291 | return SelectOpcode(X86::AND64mi32, X86::AND32mi, X86::AND16mi, |
3292 | X86::AND8mi); |
3293 | case X86ISD::OR: |
3294 | return SelectOpcode(X86::OR64mi32, X86::OR32mi, X86::OR16mi, |
3295 | X86::OR8mi); |
3296 | case X86ISD::XOR: |
3297 | return SelectOpcode(X86::XOR64mi32, X86::XOR32mi, X86::XOR16mi, |
3298 | X86::XOR8mi); |
3299 | default: |
3300 | llvm_unreachable("Invalid opcode!"); |
3301 | } |
3302 | }; |
3303 | |
3304 | unsigned NewOpc = SelectRegOpcode(Opc); |
3305 | SDValue Operand = StoredVal->getOperand(1-LoadOpNo); |
3306 | |
3307 | |
3308 | |
3309 | if (auto *OperandC = dyn_cast<ConstantSDNode>(Operand)) { |
3310 | int64_t OperandV = OperandC->getSExtValue(); |
3311 | |
3312 | |
3313 | |
3314 | |
3315 | if ((Opc == X86ISD::ADD || Opc == X86ISD::SUB) && |
3316 | ((MemVT != MVT::i8 && !isInt<8>(OperandV) && isInt<8>(-OperandV)) || |
3317 | (MemVT == MVT::i64 && !isInt<32>(OperandV) && |
3318 | isInt<32>(-OperandV))) && |
3319 | hasNoCarryFlagUses(StoredVal.getValue(1))) { |
3320 | OperandV = -OperandV; |
3321 | Opc = Opc == X86ISD::ADD ? X86ISD::SUB : X86ISD::ADD; |
3322 | } |
3323 | |
3324 | |
3325 | |
3326 | if (MemVT != MVT::i8 && isInt<8>(OperandV)) { |
3327 | Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); |
3328 | NewOpc = SelectImm8Opcode(Opc); |
3329 | } else if (MemVT != MVT::i64 || isInt<32>(OperandV)) { |
3330 | Operand = CurDAG->getTargetConstant(OperandV, SDLoc(Node), MemVT); |
3331 | NewOpc = SelectImmOpcode(Opc); |
3332 | } |
3333 | } |
3334 | |
3335 | if (Opc == X86ISD::ADC || Opc == X86ISD::SBB) { |
3336 | SDValue CopyTo = |
3337 | CurDAG->getCopyToReg(InputChain, SDLoc(Node), X86::EFLAGS, |
3338 | StoredVal.getOperand(2), SDValue()); |
3339 | |
3340 | const SDValue Ops[] = {Base, Scale, Index, Disp, |
3341 | Segment, Operand, CopyTo, CopyTo.getValue(1)}; |
3342 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, |
3343 | Ops); |
3344 | } else { |
3345 | const SDValue Ops[] = {Base, Scale, Index, Disp, |
3346 | Segment, Operand, InputChain}; |
3347 | Result = CurDAG->getMachineNode(NewOpc, SDLoc(Node), MVT::i32, MVT::Other, |
3348 | Ops); |
3349 | } |
3350 | break; |
3351 | } |
3352 | default: |
3353 | llvm_unreachable("Invalid opcode!"); |
3354 | } |
3355 | |
3356 | MachineMemOperand *MemOps[] = {StoreNode->getMemOperand(), |
3357 | LoadNode->getMemOperand()}; |
3358 | CurDAG->setNodeMemRefs(Result, MemOps); |
3359 | |
3360 | |
3361 | ReplaceUses(SDValue(LoadNode, 1), SDValue(Result, 1)); |
3362 | ReplaceUses(SDValue(StoreNode, 0), SDValue(Result, 1)); |
3363 | ReplaceUses(SDValue(StoredVal.getNode(), 1), SDValue(Result, 0)); |
3364 | CurDAG->RemoveDeadNode(Node); |
3365 | return true; |
3366 | } |
3367 | |
3368 | |
3369 | |
3370 | |
3371 | |
3372 | |
3373 | |
3374 | bool X86DAGToDAGISel::matchBitExtract(SDNode *Node) { |
3375 | assert( |
3376 | (Node->getOpcode() == ISD::AND || Node->getOpcode() == ISD::SRL) && |
3377 | "Should be either an and-mask, or right-shift after clearing high bits."); |
3378 | |
3379 | |
3380 | if (!Subtarget->hasBMI() && !Subtarget->hasBMI2()) |
3381 | return false; |
3382 | |
3383 | MVT NVT = Node->getSimpleValueType(0); |
3384 | |
3385 | |
3386 | if (NVT != MVT::i32 && NVT != MVT::i64) |
3387 | return false; |
3388 | |
3389 | SDValue NBits; |
3390 | |
3391 | |
3392 | |
3393 | const bool CanHaveExtraUses = Subtarget->hasBMI2(); |
3394 | auto checkUses = [CanHaveExtraUses](SDValue Op, unsigned NUses) { |
3395 | return CanHaveExtraUses || |
3396 | Op.getNode()->hasNUsesOfValue(NUses, Op.getResNo()); |
3397 | }; |
3398 | auto checkOneUse = [checkUses](SDValue Op) { return checkUses(Op, 1); }; |
3399 | auto checkTwoUse = [checkUses](SDValue Op) { return checkUses(Op, 2); }; |
3400 | |
3401 | auto peekThroughOneUseTruncation = [checkOneUse](SDValue V) { |
3402 | if (V->getOpcode() == ISD::TRUNCATE && checkOneUse(V)) { |
3403 | assert(V.getSimpleValueType() == MVT::i32 && |
3404 | V.getOperand(0).getSimpleValueType() == MVT::i64 && |
3405 | "Expected i64 -> i32 truncation"); |
3406 | V = V.getOperand(0); |
3407 | } |
3408 | return V; |
3409 | }; |
3410 | |
3411 | |
3412 | auto matchPatternA = [checkOneUse, peekThroughOneUseTruncation, |
3413 | &NBits](SDValue Mask) -> bool { |
3414 | |
3415 | if (Mask->getOpcode() != ISD::ADD || !checkOneUse(Mask)) |
3416 | return false; |
3417 | |
3418 | if (!isAllOnesConstant(Mask->getOperand(1))) |
3419 | return false; |
3420 | |
3421 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); |
3422 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) |
3423 | return false; |
3424 | if (!isOneConstant(M0->getOperand(0))) |
3425 | return false; |
3426 | NBits = M0->getOperand(1); |
3427 | return true; |
3428 | }; |
3429 | |
3430 | auto isAllOnes = [this, peekThroughOneUseTruncation, NVT](SDValue V) { |
3431 | V = peekThroughOneUseTruncation(V); |
3432 | return CurDAG->MaskedValueIsAllOnes( |
3433 | V, APInt::getLowBitsSet(V.getSimpleValueType().getSizeInBits(), |
3434 | NVT.getSizeInBits())); |
3435 | }; |
3436 | |
3437 | |
3438 | auto matchPatternB = [checkOneUse, isAllOnes, peekThroughOneUseTruncation, |
3439 | &NBits](SDValue Mask) -> bool { |
3440 | |
3441 | if (Mask.getOpcode() != ISD::XOR || !checkOneUse(Mask)) |
3442 | return false; |
3443 | |
3444 | if (!isAllOnes(Mask->getOperand(1))) |
3445 | return false; |
3446 | |
3447 | SDValue M0 = peekThroughOneUseTruncation(Mask->getOperand(0)); |
3448 | if (M0->getOpcode() != ISD::SHL || !checkOneUse(M0)) |
3449 | return false; |
3450 | |
3451 | if (!isAllOnes(M0->getOperand(0))) |
3452 | return false; |
3453 | NBits = M0->getOperand(1); |
3454 | return true; |
3455 | }; |
3456 | |
3457 | |
3458 | auto matchShiftAmt = [checkOneUse, &NBits](SDValue ShiftAmt, |
3459 | unsigned Bitwidth) { |
3460 | |
3461 | if (ShiftAmt.getOpcode() == ISD::TRUNCATE) { |
3462 | ShiftAmt = ShiftAmt.getOperand(0); |
3463 | |
3464 | if (!checkOneUse(ShiftAmt)) |
3465 | return false; |
3466 | } |
3467 | |
3468 | if (ShiftAmt.getOpcode() != ISD::SUB) |
3469 | return false; |
3470 | auto *V0 = dyn_cast<ConstantSDNode>(ShiftAmt.getOperand(0)); |
3471 | if (!V0 || V0->getZExtValue() != Bitwidth) |
3472 | return false; |
3473 | NBits = ShiftAmt.getOperand(1); |
3474 | return true; |
3475 | }; |
3476 | |
3477 | |
3478 | auto matchPatternC = [checkOneUse, peekThroughOneUseTruncation, |
3479 | matchShiftAmt](SDValue Mask) -> bool { |
3480 | |
3481 | Mask = peekThroughOneUseTruncation(Mask); |
3482 | unsigned Bitwidth = Mask.getSimpleValueType().getSizeInBits(); |
3483 | |
3484 | if (Mask.getOpcode() != ISD::SRL || !checkOneUse(Mask)) |
3485 | return false; |
3486 | |
3487 | if (!isAllOnesConstant(Mask.getOperand(0))) |
3488 | return false; |
3489 | SDValue M1 = Mask.getOperand(1); |
3490 | |
3491 | if (!checkOneUse(M1)) |
3492 | return false; |
3493 | return matchShiftAmt(M1, Bitwidth); |
3494 | }; |
3495 | |
3496 | SDValue X; |
3497 | |
3498 | |
3499 | auto matchPatternD = [checkOneUse, checkTwoUse, matchShiftAmt, |
3500 | &X](SDNode *Node) -> bool { |
3501 | if (Node->getOpcode() != ISD::SRL) |
3502 | return false; |
3503 | SDValue N0 = Node->getOperand(0); |
3504 | if (N0->getOpcode() != ISD::SHL || !checkOneUse(N0)) |
3505 | return false; |
3506 | unsigned Bitwidth = N0.getSimpleValueType().getSizeInBits(); |
3507 | SDValue N1 = Node->getOperand(1); |
3508 | SDValue N01 = N0->getOperand(1); |
3509 | |
3510 | |
3511 | if (N1 != N01 || !checkTwoUse(N1)) |
3512 | return false; |
3513 | if (!matchShiftAmt(N1, Bitwidth)) |
3514 | return false; |
3515 | X = N0->getOperand(0); |
3516 | return true; |
3517 | }; |
3518 | |
3519 | auto matchLowBitMask = [matchPatternA, matchPatternB, |
3520 | matchPatternC](SDValue Mask) -> bool { |
3521 | return matchPatternA(Mask) || matchPatternB(Mask) || matchPatternC(Mask); |
3522 | }; |
3523 | |
3524 | if (Node->getOpcode() == ISD::AND) { |
3525 | X = Node->getOperand(0); |
3526 | SDValue Mask = Node->getOperand(1); |
3527 | |
3528 | if (matchLowBitMask(Mask)) { |
3529 | |
3530 | } else { |
3531 | std::swap(X, Mask); |
3532 | if (!matchLowBitMask(Mask)) |
3533 | return false; |
3534 | } |
3535 | } else if (!matchPatternD(Node)) |
3536 | return false; |
3537 | |
3538 | SDLoc DL(Node); |
3539 | |
3540 | |
3541 | NBits = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NBits); |
3542 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3543 | |
3544 | |
3545 | |
3546 | SDValue ImplDef = SDValue( |
3547 | CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i32), 0); |
3548 | insertDAGNode(*CurDAG, SDValue(Node, 0), ImplDef); |
3549 | |
3550 | SDValue SRIdxVal = CurDAG->getTargetConstant(X86::sub_8bit, DL, MVT::i32); |
3551 | insertDAGNode(*CurDAG, SDValue(Node, 0), SRIdxVal); |
3552 | NBits = SDValue( |
3553 | CurDAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::i32, ImplDef, |
3554 | NBits, SRIdxVal), 0); |
3555 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3556 | |
3557 | if (Subtarget->hasBMI2()) { |
3558 | |
3559 | if (NVT != MVT::i32) { |
3560 | |
3561 | NBits = CurDAG->getNode(ISD::ANY_EXTEND, DL, NVT, NBits); |
3562 | insertDAGNode(*CurDAG, SDValue(Node, 0), NBits); |
3563 | } |
3564 | |
3565 | SDValue Extract = CurDAG->getNode(X86ISD::BZHI, DL, NVT, X, NBits); |
3566 | ReplaceNode(Node, Extract.getNode()); |
3567 | SelectCode(Extract.getNode()); |
3568 | return true; |
3569 | } |
3570 | |
3571 | |
3572 | |
3573 | |
3574 | |
3575 | { |
3576 | SDValue RealX = peekThroughOneUseTruncation(X); |
3577 | |
3578 | if (RealX != X && RealX.getOpcode() == ISD::SRL) |
3579 | X = RealX; |
3580 | } |
3581 | |
3582 | MVT XVT = X.getSimpleValueType(); |
3583 | |
3584 | |
3585 | |
3586 | |
3587 | |
3588 | |
3589 | |
3590 | |
3591 | |
3592 | SDValue C8 = CurDAG->getConstant(8, DL, MVT::i8); |
3593 | insertDAGNode(*CurDAG, SDValue(Node, 0), C8); |
3594 | SDValue Control = CurDAG->getNode(ISD::SHL, DL, MVT::i32, NBits, C8); |
3595 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
3596 | |
3597 | |
3598 | |
3599 | if (X.getOpcode() == ISD::SRL) { |
3600 | SDValue ShiftAmt = X.getOperand(1); |
3601 | X = X.getOperand(0); |
3602 | |
3603 | assert(ShiftAmt.getValueType() == MVT::i8 && |
3604 | "Expected shift amount to be i8"); |
3605 | |
3606 | |
3607 | |
3608 | SDValue OrigShiftAmt = ShiftAmt; |
3609 | ShiftAmt = CurDAG->getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShiftAmt); |
3610 | insertDAGNode(*CurDAG, OrigShiftAmt, ShiftAmt); |
3611 | |
3612 | |
3613 | Control = CurDAG->getNode(ISD::OR, DL, MVT::i32, Control, ShiftAmt); |
3614 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
3615 | } |
3616 | |
3617 | |
3618 | if (XVT != MVT::i32) { |
3619 | Control = CurDAG->getNode(ISD::ANY_EXTEND, DL, XVT, Control); |
3620 | insertDAGNode(*CurDAG, SDValue(Node, 0), Control); |
3621 | } |
3622 | |
3623 | |
3624 | SDValue Extract = CurDAG->getNode(X86ISD::BEXTR, DL, XVT, X, Control); |
3625 | |
3626 | |
3627 | if (XVT != NVT) { |
3628 | insertDAGNode(*CurDAG, SDValue(Node, 0), Extract); |
3629 | Extract = CurDAG->getNode(ISD::TRUNCATE, DL, NVT, Extract); |
3630 | } |
3631 | |
3632 | ReplaceNode(Node, Extract.getNode()); |
3633 | SelectCode(Extract.getNode()); |
3634 | |
3635 | return true; |
3636 | } |
3637 | |
3638 | |
3639 | MachineSDNode *X86DAGToDAGISel::matchBEXTRFromAndImm(SDNode *Node) { |
3640 | MVT NVT = Node->getSimpleValueType(0); |
3641 | SDLoc dl(Node); |
3642 | |
3643 | SDValue N0 = Node->getOperand(0); |
3644 | SDValue N1 = Node->getOperand(1); |
3645 | |
3646 | |
3647 | |
3648 | |
3649 | |
3650 | |
3651 | |
3652 | bool PreferBEXTR = |
3653 | Subtarget->hasTBM() || (Subtarget->hasBMI() && Subtarget->hasFastBEXTR()); |
3654 | if (!PreferBEXTR && !Subtarget->hasBMI2()) |
3655 | return nullptr; |
3656 | |
3657 | |
3658 | if (N0->getOpcode() != ISD::SRL && N0->getOpcode() != ISD::SRA) |
3659 | return nullptr; |
3660 | |
3661 | |
3662 | if (!N0->hasOneUse()) |
3663 | return nullptr; |
3664 | |
3665 | |
3666 | if (NVT != MVT::i32 && NVT != MVT::i64) |
3667 | return nullptr; |
3668 | |
3669 | |
3670 | ConstantSDNode *MaskCst = dyn_cast<ConstantSDNode>(N1); |
3671 | ConstantSDNode *ShiftCst = dyn_cast<ConstantSDNode>(N0->getOperand(1)); |
3672 | if (!MaskCst || !ShiftCst) |
3673 | return nullptr; |
3674 | |
3675 | |
3676 | uint64_t Mask = MaskCst->getZExtValue(); |
3677 | if (!isMask_64(Mask)) |
3678 | return nullptr; |
3679 | |
3680 | uint64_t Shift = ShiftCst->getZExtValue(); |
3681 | uint64_t MaskSize = countPopulation(Mask); |
3682 | |
3683 | |
3684 | |
3685 | if (Shift == 8 && MaskSize == 8) |
3686 | return nullptr; |
3687 | |
3688 | |
3689 | |
3690 | if (Shift + MaskSize > NVT.getSizeInBits()) |
3691 | return nullptr; |
3692 | |
3693 | |
3694 | |
3695 | |
3696 | if (!PreferBEXTR && MaskSize <= 32) |
3697 | return nullptr; |
3698 | |
3699 | SDValue Control; |
3700 | unsigned ROpc, MOpc; |
3701 | |
3702 | if (!PreferBEXTR) { |
3703 | assert(Subtarget->hasBMI2() && "We must have BMI2's BZHI then."); |
3704 | |
3705 | |
3706 | |
3707 | Control = CurDAG->getTargetConstant(Shift + MaskSize, dl, NVT); |
3708 | ROpc = NVT == MVT::i64 ? X86::BZHI64rr : X86::BZHI32rr; |
3709 | MOpc = NVT == MVT::i64 ? X86::BZHI64rm : X86::BZHI32rm; |
3710 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; |
3711 | Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); |
3712 | } else { |
3713 | |
3714 | |
3715 | |
3716 | |
3717 | Control = CurDAG->getTargetConstant(Shift | (MaskSize << 8), dl, NVT); |
3718 | if (Subtarget->hasTBM()) { |
3719 | ROpc = NVT == MVT::i64 ? X86::BEXTRI64ri : X86::BEXTRI32ri; |
3720 | MOpc = NVT == MVT::i64 ? X86::BEXTRI64mi : X86::BEXTRI32mi; |
3721 | } else { |
3722 | assert(Subtarget->hasBMI() && "We must have BMI1's BEXTR then."); |
3723 | |
3724 | ROpc = NVT == MVT::i64 ? X86::BEXTR64rr : X86::BEXTR32rr; |
3725 | MOpc = NVT == MVT::i64 ? X86::BEXTR64rm : X86::BEXTR32rm; |
3726 | unsigned NewOpc = NVT == MVT::i64 ? X86::MOV32ri64 : X86::MOV32ri; |
3727 | Control = SDValue(CurDAG->getMachineNode(NewOpc, dl, NVT, Control), 0); |
3728 | } |
3729 | } |
3730 | |
3731 | MachineSDNode *NewNode; |
3732 | SDValue Input = N0->getOperand(0); |
3733 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
3734 | if (tryFoldLoad(Node, N0.getNode(), Input, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
3735 | SDValue Ops[] = { |
3736 | Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Control, Input.getOperand(0)}; |
3737 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
3738 | NewNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
3739 | |
3740 | ReplaceUses(Input.getValue(1), SDValue(NewNode, 2)); |
3741 | |
3742 | CurDAG->setNodeMemRefs(NewNode, {cast<LoadSDNode>(Input)->getMemOperand()}); |
3743 | } else { |
3744 | NewNode = CurDAG->getMachineNode(ROpc, dl, NVT, MVT::i32, Input, Control); |
3745 | } |
3746 | |
3747 | if (!PreferBEXTR) { |
3748 | |
3749 | SDValue ShAmt = CurDAG->getTargetConstant(Shift, dl, NVT); |
3750 | unsigned NewOpc = NVT == MVT::i64 ? X86::SHR64ri : X86::SHR32ri; |
3751 | NewNode = |
3752 | CurDAG->getMachineNode(NewOpc, dl, NVT, SDValue(NewNode, 0), ShAmt); |
3753 | } |
3754 | |
3755 | return NewNode; |
3756 | } |
3757 | |
3758 | |
3759 | MachineSDNode *X86DAGToDAGISel::emitPCMPISTR(unsigned ROpc, unsigned MOpc, |
3760 | bool MayFoldLoad, const SDLoc &dl, |
3761 | MVT VT, SDNode *Node) { |
3762 | SDValue N0 = Node->getOperand(0); |
3763 | SDValue N1 = Node->getOperand(1); |
3764 | SDValue Imm = Node->getOperand(2); |
3765 | const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue(); |
3766 | Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); |
3767 | |
3768 | |
3769 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
3770 | if (MayFoldLoad && tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
3771 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
3772 | N1.getOperand(0) }; |
3773 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other); |
3774 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
3775 | |
3776 | ReplaceUses(N1.getValue(1), SDValue(CNode, 2)); |
3777 | |
3778 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); |
3779 | return CNode; |
3780 | } |
3781 | |
3782 | SDValue Ops[] = { N0, N1, Imm }; |
3783 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32); |
3784 | MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); |
3785 | return CNode; |
3786 | } |
3787 | |
3788 | |
3789 | |
3790 | |
3791 | MachineSDNode *X86DAGToDAGISel::emitPCMPESTR(unsigned ROpc, unsigned MOpc, |
3792 | bool MayFoldLoad, const SDLoc &dl, |
3793 | MVT VT, SDNode *Node, |
3794 | SDValue &InFlag) { |
3795 | SDValue N0 = Node->getOperand(0); |
3796 | SDValue N2 = Node->getOperand(2); |
3797 | SDValue Imm = Node->getOperand(4); |
3798 | const ConstantInt *Val = cast<ConstantSDNode>(Imm)->getConstantIntValue(); |
3799 | Imm = CurDAG->getTargetConstant(*Val, SDLoc(Node), Imm.getValueType()); |
3800 | |
3801 | |
3802 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
3803 | if (MayFoldLoad && tryFoldLoad(Node, N2, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
3804 | SDValue Ops[] = { N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
3805 | N2.getOperand(0), InFlag }; |
3806 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Other, MVT::Glue); |
3807 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
3808 | InFlag = SDValue(CNode, 3); |
3809 | |
3810 | ReplaceUses(N2.getValue(1), SDValue(CNode, 2)); |
3811 | |
3812 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N2)->getMemOperand()}); |
3813 | return CNode; |
3814 | } |
3815 | |
3816 | SDValue Ops[] = { N0, N2, Imm, InFlag }; |
3817 | SDVTList VTs = CurDAG->getVTList(VT, MVT::i32, MVT::Glue); |
3818 | MachineSDNode *CNode = CurDAG->getMachineNode(ROpc, dl, VTs, Ops); |
3819 | InFlag = SDValue(CNode, 2); |
3820 | return CNode; |
3821 | } |
3822 | |
3823 | bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { |
3824 | EVT VT = N->getValueType(0); |
3825 | |
3826 | |
3827 | if (VT.isVector()) |
3828 | return false; |
3829 | |
3830 | |
3831 | unsigned Size = VT == MVT::i64 ? 64 : 32; |
3832 | |
3833 | SDValue OrigShiftAmt = N->getOperand(1); |
3834 | SDValue ShiftAmt = OrigShiftAmt; |
3835 | SDLoc DL(N); |
3836 | |
3837 | |
3838 | if (ShiftAmt->getOpcode() == ISD::TRUNCATE) |
3839 | ShiftAmt = ShiftAmt->getOperand(0); |
3840 | |
3841 | |
3842 | |
3843 | |
3844 | SDValue NewShiftAmt; |
3845 | if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { |
3846 | SDValue Add0 = ShiftAmt->getOperand(0); |
3847 | SDValue Add1 = ShiftAmt->getOperand(1); |
3848 | auto *Add0C = dyn_cast<ConstantSDNode>(Add0); |
3849 | auto *Add1C = dyn_cast<ConstantSDNode>(Add1); |
3850 | |
3851 | |
3852 | if (Add1C && Add1C->getAPIntValue().urem(Size) == 0) { |
3853 | NewShiftAmt = Add0; |
3854 | |
3855 | |
3856 | } else if (ShiftAmt->getOpcode() == ISD::SUB && Add0C && |
3857 | Add0C->getZExtValue() != 0) { |
3858 | EVT SubVT = ShiftAmt.getValueType(); |
3859 | SDValue X; |
3860 | if (Add0C->getZExtValue() % Size == 0) |
3861 | X = Add1; |
3862 | else if (ShiftAmt.hasOneUse() && Size == 64 && |
3863 | Add0C->getZExtValue() % 32 == 0) { |
3864 | |
3865 | |
3866 | if (Add1.getOpcode() == ISD::TRUNCATE) { |
3867 | Add1 = Add1.getOperand(0); |
3868 | SubVT = Add1.getValueType(); |
3869 | } |
3870 | if (Add0.getValueType() != SubVT) { |
3871 | Add0 = CurDAG->getZExtOrTrunc(Add0, DL, SubVT); |
3872 | insertDAGNode(*CurDAG, OrigShiftAmt, Add0); |
3873 | } |
3874 | |
3875 | X = CurDAG->getNode(ISD::ADD, DL, SubVT, Add1, Add0); |
3876 | insertDAGNode(*CurDAG, OrigShiftAmt, X); |
3877 | } else |
3878 | return false; |
3879 | |
3880 | |
3881 | |
3882 | SDValue Zero = CurDAG->getConstant(0, DL, SubVT); |
3883 | SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, X); |
3884 | NewShiftAmt = Neg; |
3885 | |
3886 | |
3887 | |
3888 | insertDAGNode(*CurDAG, OrigShiftAmt, Zero); |
3889 | insertDAGNode(*CurDAG, OrigShiftAmt, Neg); |
3890 | } else |
3891 | return false; |
3892 | } else |
3893 | return false; |
3894 | |
3895 | if (NewShiftAmt.getValueType() != MVT::i8) { |
3896 | |
3897 | NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); |
3898 | |
3899 | insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
3900 | } |
3901 | |
3902 | |
3903 | |
3904 | NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, |
3905 | CurDAG->getConstant(Size - 1, DL, MVT::i8)); |
3906 | |
3907 | insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); |
3908 | |
3909 | SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0), |
3910 | NewShiftAmt); |
3911 | if (UpdatedNode != N) { |
3912 | |
3913 | |
3914 | ReplaceNode(N, UpdatedNode); |
3915 | return true; |
3916 | } |
3917 | |
3918 | |
3919 | |
3920 | if (OrigShiftAmt.getNode()->use_empty()) |
3921 | CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); |
3922 | |
3923 | |
3924 | |
3925 | SelectCode(N); |
3926 | return true; |
3927 | } |
3928 | |
3929 | bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { |
3930 | MVT NVT = N->getSimpleValueType(0); |
3931 | unsigned Opcode = N->getOpcode(); |
3932 | SDLoc dl(N); |
3933 | |
3934 | |
3935 | |
3936 | SDValue Shift = N->getOperand(0); |
3937 | SDValue N1 = N->getOperand(1); |
3938 | |
3939 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); |
3940 | if (!Cst) |
3941 | return false; |
3942 | |
3943 | int64_t Val = Cst->getSExtValue(); |
3944 | |
3945 | |
3946 | |
3947 | |
3948 | bool FoundAnyExtend = false; |
3949 | if (Shift.getOpcode() == ISD::ANY_EXTEND && Shift.hasOneUse() && |
3950 | Shift.getOperand(0).getSimpleValueType() == MVT::i32 && |
3951 | isUInt<32>(Val)) { |
3952 | FoundAnyExtend = true; |
3953 | Shift = Shift.getOperand(0); |
3954 | } |
3955 | |
3956 | if (Shift.getOpcode() != ISD::SHL || !Shift.hasOneUse()) |
3957 | return false; |
3958 | |
3959 | |
3960 | if (NVT != MVT::i32 && NVT != MVT::i64) |
3961 | return false; |
3962 | |
3963 | ConstantSDNode *ShlCst = dyn_cast<ConstantSDNode>(Shift.getOperand(1)); |
3964 | if (!ShlCst) |
3965 | return false; |
3966 | |
3967 | uint64_t ShAmt = ShlCst->getZExtValue(); |
3968 | |
3969 | |
3970 | |
3971 | uint64_t RemovedBitsMask = (1ULL << ShAmt) - 1; |
3972 | if (Opcode != ISD::AND && (Val & RemovedBitsMask) != 0) |
3973 | return false; |
3974 | |
3975 | |
3976 | |
3977 | auto CanShrinkImmediate = [&](int64_t &ShiftedVal) { |
3978 | if (Opcode == ISD::AND) { |
3979 | |
3980 | |
3981 | ShiftedVal = (uint64_t)Val >> ShAmt; |
3982 | if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) |
3983 | return true; |
3984 | |
3985 | if (ShiftedVal == UINT8_MAX || ShiftedVal == UINT16_MAX) |
3986 | return true; |
3987 | } |
3988 | ShiftedVal = Val >> ShAmt; |
3989 | if ((!isInt<8>(Val) && isInt<8>(ShiftedVal)) || |
3990 | (!isInt<32>(Val) && isInt<32>(ShiftedVal))) |
3991 | return true; |
3992 | if (Opcode != ISD::AND) { |
3993 | |
3994 | ShiftedVal = (uint64_t)Val >> ShAmt; |
3995 | if (NVT == MVT::i64 && !isUInt<32>(Val) && isUInt<32>(ShiftedVal)) |
3996 | return true; |
3997 | } |
3998 | return false; |
3999 | }; |
4000 | |
4001 | int64_t ShiftedVal; |
4002 | if (!CanShrinkImmediate(ShiftedVal)) |
4003 | return false; |
4004 | |
4005 | |
4006 | |
4007 | |
4008 | |
4009 | |
4010 | if (Opcode == ISD::AND) { |
4011 | |
4012 | unsigned ZExtWidth = Cst->getAPIntValue().getActiveBits(); |
4013 | ZExtWidth = PowerOf2Ceil(std::max(ZExtWidth, 8U)); |
4014 | |
4015 | |
4016 | APInt NeededMask = APInt::getLowBitsSet(NVT.getSizeInBits(), |
4017 | ZExtWidth); |
4018 | NeededMask &= ~Cst->getAPIntValue(); |
4019 | |
4020 | if (CurDAG->MaskedValueIsZero(N->getOperand(0), NeededMask)) |
4021 | return false; |
4022 | } |
4023 | |
4024 | SDValue X = Shift.getOperand(0); |
4025 | if (FoundAnyExtend) { |
4026 | SDValue NewX = CurDAG->getNode(ISD::ANY_EXTEND, dl, NVT, X); |
4027 | insertDAGNode(*CurDAG, SDValue(N, 0), NewX); |
4028 | X = NewX; |
4029 | } |
4030 | |
4031 | SDValue NewCst = CurDAG->getConstant(ShiftedVal, dl, NVT); |
4032 | insertDAGNode(*CurDAG, SDValue(N, 0), NewCst); |
4033 | SDValue NewBinOp = CurDAG->getNode(Opcode, dl, NVT, X, NewCst); |
4034 | insertDAGNode(*CurDAG, SDValue(N, 0), NewBinOp); |
4035 | SDValue NewSHL = CurDAG->getNode(ISD::SHL, dl, NVT, NewBinOp, |
4036 | Shift.getOperand(1)); |
4037 | ReplaceNode(N, NewSHL.getNode()); |
4038 | SelectCode(NewSHL.getNode()); |
4039 | return true; |
4040 | } |
4041 | |
4042 | bool X86DAGToDAGISel::matchVPTERNLOG(SDNode *Root, SDNode *ParentA, |
4043 | SDNode *ParentBC, SDValue A, SDValue B, |
4044 | SDValue C, uint8_t Imm) { |
4045 | assert(A.isOperandOf(ParentA)); |
4046 | assert(B.isOperandOf(ParentBC)); |
4047 | assert(C.isOperandOf(ParentBC)); |
4048 | |
4049 | auto tryFoldLoadOrBCast = |
4050 | [this](SDNode *Root, SDNode *P, SDValue &L, SDValue &Base, SDValue &Scale, |
4051 | SDValue &Index, SDValue &Disp, SDValue &Segment) { |
4052 | if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) |
4053 | return true; |
4054 | |
4055 | |
4056 | if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { |
4057 | P = L.getNode(); |
4058 | L = L.getOperand(0); |
4059 | } |
4060 | |
4061 | if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) |
4062 | return false; |
4063 | |
4064 | |
4065 | auto *MemIntr = cast<MemIntrinsicSDNode>(L); |
4066 | unsigned Size = MemIntr->getMemoryVT().getSizeInBits(); |
4067 | if (Size != 32 && Size != 64) |
4068 | return false; |
4069 | |
4070 | return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); |
4071 | }; |
4072 | |
4073 | bool FoldedLoad = false; |
4074 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
4075 | if (tryFoldLoadOrBCast(Root, ParentBC, C, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
4076 | FoldedLoad = true; |
4077 | } else if (tryFoldLoadOrBCast(Root, ParentA, A, Tmp0, Tmp1, Tmp2, Tmp3, |
4078 | Tmp4)) { |
4079 | FoldedLoad = true; |
4080 | std::swap(A, C); |
4081 | |
4082 | uint8_t OldImm = Imm; |
4083 | Imm = OldImm & 0xa5; |
4084 | if (OldImm & 0x02) Imm |= 0x10; |
4085 | if (OldImm & 0x10) Imm |= 0x02; |
4086 | if (OldImm & 0x08) Imm |= 0x40; |
4087 | if (OldImm & 0x40) Imm |= 0x08; |
4088 | } else if (tryFoldLoadOrBCast(Root, ParentBC, B, Tmp0, Tmp1, Tmp2, Tmp3, |
4089 | Tmp4)) { |
4090 | FoldedLoad = true; |
4091 | std::swap(B, C); |
4092 | |
4093 | uint8_t OldImm = Imm; |
4094 | Imm = OldImm & 0x99; |
4095 | if (OldImm & 0x02) Imm |= 0x04; |
4096 | if (OldImm & 0x04) Imm |= 0x02; |
4097 | if (OldImm & 0x20) Imm |= 0x40; |
4098 | if (OldImm & 0x40) Imm |= 0x20; |
4099 | } |
4100 | |
4101 | SDLoc DL(Root); |
4102 | |
4103 | SDValue TImm = CurDAG->getTargetConstant(Imm, DL, MVT::i8); |
4104 | |
4105 | MVT NVT = Root->getSimpleValueType(0); |
4106 | |
4107 | MachineSDNode *MNode; |
4108 | if (FoldedLoad) { |
4109 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); |
4110 | |
4111 | unsigned Opc; |
4112 | if (C.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
4113 | auto *MemIntr = cast<MemIntrinsicSDNode>(C); |
4114 | unsigned EltSize = MemIntr->getMemoryVT().getSizeInBits(); |
4115 | assert((EltSize == 32 || EltSize == 64) && "Unexpected broadcast size!"); |
4116 | |
4117 | bool UseD = EltSize == 32; |
4118 | if (NVT.is128BitVector()) |
4119 | Opc = UseD ? X86::VPTERNLOGDZ128rmbi : X86::VPTERNLOGQZ128rmbi; |
4120 | else if (NVT.is256BitVector()) |
4121 | Opc = UseD ? X86::VPTERNLOGDZ256rmbi : X86::VPTERNLOGQZ256rmbi; |
4122 | else if (NVT.is512BitVector()) |
4123 | Opc = UseD ? X86::VPTERNLOGDZrmbi : X86::VPTERNLOGQZrmbi; |
4124 | else |
4125 | llvm_unreachable("Unexpected vector size!"); |
4126 | } else { |
4127 | bool UseD = NVT.getVectorElementType() == MVT::i32; |
4128 | if (NVT.is128BitVector()) |
4129 | Opc = UseD ? X86::VPTERNLOGDZ128rmi : X86::VPTERNLOGQZ128rmi; |
4130 | else if (NVT.is256BitVector()) |
4131 | Opc = UseD ? X86::VPTERNLOGDZ256rmi : X86::VPTERNLOGQZ256rmi; |
4132 | else if (NVT.is512BitVector()) |
4133 | Opc = UseD ? X86::VPTERNLOGDZrmi : X86::VPTERNLOGQZrmi; |
4134 | else |
4135 | llvm_unreachable("Unexpected vector size!"); |
4136 | } |
4137 | |
4138 | SDValue Ops[] = {A, B, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, TImm, C.getOperand(0)}; |
4139 | MNode = CurDAG->getMachineNode(Opc, DL, VTs, Ops); |
4140 | |
4141 | |
4142 | ReplaceUses(C.getValue(1), SDValue(MNode, 1)); |
4143 | |
4144 | CurDAG->setNodeMemRefs(MNode, {cast<MemSDNode>(C)->getMemOperand()}); |
4145 | } else { |
4146 | bool UseD = NVT.getVectorElementType() == MVT::i32; |
4147 | unsigned Opc; |
4148 | if (NVT.is128BitVector()) |
4149 | Opc = UseD ? X86::VPTERNLOGDZ128rri : X86::VPTERNLOGQZ128rri; |
4150 | else if (NVT.is256BitVector()) |
4151 | Opc = UseD ? X86::VPTERNLOGDZ256rri : X86::VPTERNLOGQZ256rri; |
4152 | else if (NVT.is512BitVector()) |
4153 | Opc = UseD ? X86::VPTERNLOGDZrri : X86::VPTERNLOGQZrri; |
4154 | else |
4155 | llvm_unreachable("Unexpected vector size!"); |
4156 | |
4157 | MNode = CurDAG->getMachineNode(Opc, DL, NVT, {A, B, C, TImm}); |
4158 | } |
4159 | |
4160 | ReplaceUses(SDValue(Root, 0), SDValue(MNode, 0)); |
4161 | CurDAG->RemoveDeadNode(Root); |
4162 | return true; |
4163 | } |
4164 | |
4165 | |
4166 | |
4167 | |
4168 | bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { |
4169 | MVT NVT = N->getSimpleValueType(0); |
4170 | |
4171 | |
4172 | if (!NVT.isVector() || !Subtarget->hasAVX512() || |
4173 | NVT.getVectorElementType() == MVT::i1) |
4174 | return false; |
4175 | |
4176 | |
4177 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) |
4178 | return false; |
4179 | |
4180 | SDValue N0 = N->getOperand(0); |
4181 | SDValue N1 = N->getOperand(1); |
4182 | |
4183 | auto getFoldableLogicOp = [](SDValue Op) { |
4184 | |
4185 | if (Op.getOpcode() == ISD::BITCAST && Op.hasOneUse()) |
4186 | Op = Op.getOperand(0); |
4187 | |
4188 | if (!Op.hasOneUse()) |
4189 | return SDValue(); |
4190 | |
4191 | unsigned Opc = Op.getOpcode(); |
4192 | if (Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR || |
4193 | Opc == X86ISD::ANDNP) |
4194 | return Op; |
4195 | |
4196 | return SDValue(); |
4197 | }; |
4198 | |
4199 | SDValue A, FoldableOp; |
4200 | if ((FoldableOp = getFoldableLogicOp(N1))) { |
4201 | A = N0; |
4202 | } else if ((FoldableOp = getFoldableLogicOp(N0))) { |
4203 | A = N1; |
4204 | } else |
4205 | return false; |
4206 | |
4207 | SDValue B = FoldableOp.getOperand(0); |
4208 | SDValue C = FoldableOp.getOperand(1); |
4209 | |
4210 | |
4211 | |
4212 | const uint8_t TernlogMagicA = 0xf0; |
4213 | const uint8_t TernlogMagicB = 0xcc; |
4214 | const uint8_t TernlogMagicC = 0xaa; |
4215 | |
4216 | uint8_t Imm; |
4217 | switch (FoldableOp.getOpcode()) { |
4218 | default: llvm_unreachable("Unexpected opcode!"); |
4219 | case ISD::AND: Imm = TernlogMagicB & TernlogMagicC; break; |
4220 | case ISD::OR: Imm = TernlogMagicB | TernlogMagicC; break; |
4221 | case ISD::XOR: Imm = TernlogMagicB ^ TernlogMagicC; break; |
4222 | case X86ISD::ANDNP: Imm = ~(TernlogMagicB) & TernlogMagicC; break; |
4223 | } |
4224 | |
4225 | switch (N->getOpcode()) { |
4226 | default: llvm_unreachable("Unexpected opcode!"); |
4227 | case X86ISD::ANDNP: |
4228 | if (A == N0) |
4229 | Imm &= ~TernlogMagicA; |
4230 | else |
4231 | Imm = ~(Imm) & TernlogMagicA; |
4232 | break; |
4233 | case ISD::AND: Imm &= TernlogMagicA; break; |
4234 | case ISD::OR: Imm |= TernlogMagicA; break; |
4235 | case ISD::XOR: Imm ^= TernlogMagicA; break; |
4236 | } |
4237 | |
4238 | return matchVPTERNLOG(N, N, FoldableOp.getNode(), A, B, C, Imm); |
4239 | } |
4240 | |
4241 | |
4242 | |
4243 | |
4244 | |
4245 | |
4246 | |
4247 | |
4248 | bool X86DAGToDAGISel::shrinkAndImmediate(SDNode *And) { |
4249 | |
4250 | |
4251 | MVT VT = And->getSimpleValueType(0); |
4252 | if (VT != MVT::i32 && VT != MVT::i64) |
4253 | return false; |
4254 | |
4255 | auto *And1C = dyn_cast<ConstantSDNode>(And->getOperand(1)); |
4256 | if (!And1C) |
4257 | return false; |
4258 | |
4259 | |
4260 | |
4261 | |
4262 | |
4263 | |
4264 | APInt MaskVal = And1C->getAPIntValue(); |
4265 | unsigned MaskLZ = MaskVal.countLeadingZeros(); |
4266 | if (!MaskLZ || (VT == MVT::i64 && MaskLZ == 32)) |
4267 | return false; |
4268 | |
4269 | |
4270 | if (VT == MVT::i64 && MaskLZ >= 32) { |
4271 | MaskLZ -= 32; |
4272 | MaskVal = MaskVal.trunc(32); |
4273 | } |
4274 | |
4275 | SDValue And0 = And->getOperand(0); |
4276 | APInt HighZeros = APInt::getHighBitsSet(MaskVal.getBitWidth(), MaskLZ); |
4277 | APInt NegMaskVal = MaskVal | HighZeros; |
4278 | |
4279 | |
4280 | |
4281 | unsigned MinWidth = NegMaskVal.getMinSignedBits(); |
4282 | if (MinWidth > 32 || (MinWidth > 8 && MaskVal.getMinSignedBits() <= 32)) |
4283 | return false; |
4284 | |
4285 | |
4286 | if (VT == MVT::i64 && MaskVal.getBitWidth() < 64) { |
4287 | NegMaskVal = NegMaskVal.zext(64); |
4288 | HighZeros = HighZeros.zext(64); |
4289 | } |
4290 | |
4291 | |
4292 | |
4293 | if (!CurDAG->MaskedValueIsZero(And0, HighZeros)) |
4294 | return false; |
4295 | |
4296 | |
4297 | |
4298 | if (NegMaskVal.isAllOnesValue()) { |
4299 | ReplaceNode(And, And0.getNode()); |
4300 | return true; |
4301 | } |
4302 | |
4303 | |
4304 | SDValue NewMask = CurDAG->getConstant(NegMaskVal, SDLoc(And), VT); |
4305 | insertDAGNode(*CurDAG, SDValue(And, 0), NewMask); |
4306 | SDValue NewAnd = CurDAG->getNode(ISD::AND, SDLoc(And), VT, And0, NewMask); |
4307 | ReplaceNode(And, NewAnd.getNode()); |
4308 | SelectCode(NewAnd.getNode()); |
4309 | return true; |
4310 | } |
4311 | |
4312 | static unsigned getVPTESTMOpc(MVT TestVT, bool IsTestN, bool FoldedLoad, |
4313 | bool FoldedBCast, bool Masked) { |
4314 | #define VPTESTM_CASE(VT, SUFFIX) \ |
4315 | case MVT::VT: \ |
4316 | if (Masked) \ |
4317 | return IsTestN ? X86::VPTESTNM##SUFFIX##k: X86::VPTESTM##SUFFIX##k; \ |
4318 | return IsTestN ? X86::VPTESTNM##SUFFIX : X86::VPTESTM##SUFFIX; |
4319 | |
4320 | |
4321 | #define VPTESTM_BROADCAST_CASES(SUFFIX) \ |
4322 | default: llvm_unreachable("Unexpected VT!"); \ |
4323 | VPTESTM_CASE(v4i32, DZ128##SUFFIX) \ |
4324 | VPTESTM_CASE(v2i64, QZ128##SUFFIX) \ |
4325 | VPTESTM_CASE(v8i32, DZ256##SUFFIX) \ |
4326 | VPTESTM_CASE(v4i64, QZ256##SUFFIX) \ |
4327 | VPTESTM_CASE(v16i32, DZ##SUFFIX) \ |
4328 | VPTESTM_CASE(v8i64, QZ##SUFFIX) |
4329 | |
4330 | #define VPTESTM_FULL_CASES(SUFFIX) \ |
4331 | VPTESTM_BROADCAST_CASES(SUFFIX) \ |
4332 | VPTESTM_CASE(v16i8, BZ128##SUFFIX) \ |
4333 | VPTESTM_CASE(v8i16, WZ128##SUFFIX) \ |
4334 | VPTESTM_CASE(v32i8, BZ256##SUFFIX) \ |
4335 | VPTESTM_CASE(v16i16, WZ256##SUFFIX) \ |
4336 | VPTESTM_CASE(v64i8, BZ##SUFFIX) \ |
4337 | VPTESTM_CASE(v32i16, WZ##SUFFIX) |
4338 | |
4339 | if (FoldedBCast) { |
4340 | switch (TestVT.SimpleTy) { |
4341 | VPTESTM_BROADCAST_CASES(rmb) |
4342 | } |
4343 | } |
4344 | |
4345 | if (FoldedLoad) { |
4346 | switch (TestVT.SimpleTy) { |
4347 | VPTESTM_FULL_CASES(rm) |
4348 | } |
4349 | } |
4350 | |
4351 | switch (TestVT.SimpleTy) { |
4352 | VPTESTM_FULL_CASES(rr) |
4353 | } |
4354 | |
4355 | #undef VPTESTM_FULL_CASES |
4356 | #undef VPTESTM_BROADCAST_CASES |
4357 | #undef VPTESTM_CASE |
4358 | } |
4359 | |
4360 | |
4361 | |
4362 | bool X86DAGToDAGISel::tryVPTESTM(SDNode *Root, SDValue Setcc, |
4363 | SDValue InMask) { |
4364 | assert(Subtarget->hasAVX512() && "Expected AVX512!"); |
4365 | assert(Setcc.getSimpleValueType().getVectorElementType() == MVT::i1 && |
4366 | "Unexpected VT!"); |
4367 | |
4368 | |
4369 | ISD::CondCode CC = cast<CondCodeSDNode>(Setcc.getOperand(2))->get(); |
4370 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
4371 | return false; |
4372 | |
4373 | SDValue SetccOp0 = Setcc.getOperand(0); |
4374 | SDValue SetccOp1 = Setcc.getOperand(1); |
4375 | |
4376 | |
4377 | if (ISD::isBuildVectorAllZeros(SetccOp0.getNode())) |
4378 | std::swap(SetccOp0, SetccOp1); |
4379 | |
4380 | |
4381 | if (!ISD::isBuildVectorAllZeros(SetccOp1.getNode())) |
4382 | return false; |
4383 | |
4384 | SDValue N0 = SetccOp0; |
4385 | |
4386 | MVT CmpVT = N0.getSimpleValueType(); |
4387 | MVT CmpSVT = CmpVT.getVectorElementType(); |
4388 | |
4389 | |
4390 | SDValue Src0 = N0; |
4391 | SDValue Src1 = N0; |
4392 | |
4393 | { |
4394 | |
4395 | SDValue N0Temp = N0; |
4396 | if (N0Temp.getOpcode() == ISD::BITCAST && N0Temp.hasOneUse()) |
4397 | N0Temp = N0.getOperand(0); |
4398 | |
4399 | |
4400 | if (N0Temp.getOpcode() == ISD::AND && N0Temp.hasOneUse()) { |
4401 | Src0 = N0Temp.getOperand(0); |
4402 | Src1 = N0Temp.getOperand(1); |
4403 | } |
4404 | } |
4405 | |
4406 | |
4407 | bool Widen = !Subtarget->hasVLX() && !CmpVT.is512BitVector(); |
4408 | |
4409 | auto tryFoldLoadOrBCast = [&](SDNode *Root, SDNode *P, SDValue &L, |
4410 | SDValue &Base, SDValue &Scale, SDValue &Index, |
4411 | SDValue &Disp, SDValue &Segment) { |
4412 | |
4413 | if (!Widen) |
4414 | if (tryFoldLoad(Root, P, L, Base, Scale, Index, Disp, Segment)) |
4415 | return true; |
4416 | |
4417 | |
4418 | |
4419 | if (CmpSVT != MVT::i32 && CmpSVT != MVT::i64) |
4420 | return false; |
4421 | |
4422 | |
4423 | if (L.getOpcode() == ISD::BITCAST && L.hasOneUse()) { |
4424 | P = L.getNode(); |
4425 | L = L.getOperand(0); |
4426 | } |
4427 | |
4428 | if (L.getOpcode() != X86ISD::VBROADCAST_LOAD) |
4429 | return false; |
4430 | |
4431 | auto *MemIntr = cast<MemIntrinsicSDNode>(L); |
4432 | if (MemIntr->getMemoryVT().getSizeInBits() != CmpSVT.getSizeInBits()) |
4433 | return false; |
4434 | |
4435 | return tryFoldBroadcast(Root, P, L, Base, Scale, Index, Disp, Segment); |
4436 | }; |
4437 | |
4438 | |
4439 | bool CanFoldLoads = Src0 != Src1; |
4440 | |
4441 | bool FoldedLoad = false; |
4442 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
4443 | if (CanFoldLoads) { |
4444 | FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src1, Tmp0, Tmp1, Tmp2, |
4445 | Tmp3, Tmp4); |
4446 | if (!FoldedLoad) { |
4447 | |
4448 | FoldedLoad = tryFoldLoadOrBCast(Root, N0.getNode(), Src0, Tmp0, Tmp1, |
4449 | Tmp2, Tmp3, Tmp4); |
4450 | if (FoldedLoad) |
4451 | std::swap(Src0, Src1); |
4452 | } |
4453 | } |
4454 | |
4455 | bool FoldedBCast = FoldedLoad && Src1.getOpcode() == X86ISD::VBROADCAST_LOAD; |
4456 | |
4457 | bool IsMasked = InMask.getNode() != nullptr; |
4458 | |
4459 | SDLoc dl(Root); |
4460 | |
4461 | MVT ResVT = Setcc.getSimpleValueType(); |
4462 | MVT MaskVT = ResVT; |
4463 | if (Widen) { |
4464 | |
4465 | unsigned Scale = CmpVT.is128BitVector() ? 4 : 2; |
4466 | unsigned SubReg = CmpVT.is128BitVector() ? X86::sub_xmm : X86::sub_ymm; |
4467 | unsigned NumElts = CmpVT.getVectorNumElements() * Scale; |
4468 | CmpVT = MVT::getVectorVT(CmpSVT, NumElts); |
4469 | MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
4470 | SDValue ImplDef = SDValue(CurDAG->getMachineNode(X86::IMPLICIT_DEF, dl, |
4471 | CmpVT), 0); |
4472 | Src0 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src0); |
4473 | |
4474 | if (!FoldedBCast) |
4475 | Src1 = CurDAG->getTargetInsertSubreg(SubReg, dl, CmpVT, ImplDef, Src1); |
4476 | |
4477 | if (IsMasked) { |
4478 | |
4479 | unsigned RegClass = TLI->getRegClassFor(MaskVT)->getID(); |
4480 | SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); |
4481 | InMask = SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
4482 | dl, MaskVT, InMask, RC), 0); |
4483 | } |
4484 | } |
4485 | |
4486 | bool IsTestN = CC == ISD::SETEQ; |
4487 | unsigned Opc = getVPTESTMOpc(CmpVT, IsTestN, FoldedLoad, FoldedBCast, |
4488 | IsMasked); |
4489 | |
4490 | MachineSDNode *CNode; |
4491 | if (FoldedLoad) { |
4492 | SDVTList VTs = CurDAG->getVTList(MaskVT, MVT::Other); |
4493 | |
4494 | if (IsMasked) { |
4495 | SDValue Ops[] = { InMask, Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, |
4496 | Src1.getOperand(0) }; |
4497 | CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
4498 | } else { |
4499 | SDValue Ops[] = { Src0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, |
4500 | Src1.getOperand(0) }; |
4501 | CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
4502 | } |
4503 | |
4504 | |
4505 | ReplaceUses(Src1.getValue(1), SDValue(CNode, 1)); |
4506 | |
4507 | CurDAG->setNodeMemRefs(CNode, {cast<MemSDNode>(Src1)->getMemOperand()}); |
4508 | } else { |
4509 | if (IsMasked) |
4510 | CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, InMask, Src0, Src1); |
4511 | else |
4512 | CNode = CurDAG->getMachineNode(Opc, dl, MaskVT, Src0, Src1); |
4513 | } |
4514 | |
4515 | |
4516 | if (Widen) { |
4517 | unsigned RegClass = TLI->getRegClassFor(ResVT)->getID(); |
4518 | SDValue RC = CurDAG->getTargetConstant(RegClass, dl, MVT::i32); |
4519 | CNode = CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, |
4520 | dl, ResVT, SDValue(CNode, 0), RC); |
4521 | } |
4522 | |
4523 | ReplaceUses(SDValue(Root, 0), SDValue(CNode, 0)); |
4524 | CurDAG->RemoveDeadNode(Root); |
4525 | return true; |
4526 | } |
4527 | |
4528 | |
4529 | |
4530 | bool X86DAGToDAGISel::tryMatchBitSelect(SDNode *N) { |
4531 | assert(N->getOpcode() == ISD::OR && "Unexpected opcode!"); |
4532 | |
4533 | MVT NVT = N->getSimpleValueType(0); |
4534 | |
4535 | |
4536 | if (!NVT.isVector() || !Subtarget->hasAVX512()) |
4537 | return false; |
4538 | |
4539 | |
4540 | if (!(Subtarget->hasVLX() || NVT.is512BitVector())) |
4541 | return false; |
4542 | |
4543 | SDValue N0 = N->getOperand(0); |
4544 | SDValue N1 = N->getOperand(1); |
4545 | |
4546 | |
4547 | if (N1.getOpcode() == ISD::AND) |
4548 | std::swap(N0, N1); |
4549 | |
4550 | if (N0.getOpcode() != ISD::AND || |
4551 | N1.getOpcode() != X86ISD::ANDNP || |
4552 | !N0.hasOneUse() || !N1.hasOneUse()) |
4553 | return false; |
4554 | |
4555 | |
4556 | SDValue A = N1.getOperand(0); |
4557 | SDValue C = N1.getOperand(1); |
4558 | |
4559 | |
4560 | |
4561 | SDValue B; |
4562 | if (N0.getOperand(0) == A) |
4563 | B = N0.getOperand(1); |
4564 | else if (N0.getOperand(1) == A) |
4565 | B = N0.getOperand(0); |
4566 | else |
4567 | return false; |
4568 | |
4569 | SDLoc dl(N); |
4570 | SDValue Imm = CurDAG->getTargetConstant(0xCA, dl, MVT::i8); |
4571 | SDValue Ternlog = CurDAG->getNode(X86ISD::VPTERNLOG, dl, NVT, A, B, C, Imm); |
4572 | ReplaceNode(N, Ternlog.getNode()); |
4573 | |
4574 | return matchVPTERNLOG(Ternlog.getNode(), Ternlog.getNode(), Ternlog.getNode(), |
4575 | A, B, C, 0xCA); |
4576 | } |
4577 | |
4578 | void X86DAGToDAGISel::Select(SDNode *Node) { |
4579 | MVT NVT = Node->getSimpleValueType(0); |
4580 | unsigned Opcode = Node->getOpcode(); |
4581 | SDLoc dl(Node); |
4582 | |
4583 | if (Node->isMachineOpcode()) { |
4584 | LLVM_DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n'); |
4585 | Node->setNodeId(-1); |
4586 | return; |
4587 | } |
4588 | |
4589 | switch (Opcode) { |
4590 | default: break; |
4591 | case ISD::INTRINSIC_W_CHAIN: { |
4592 | unsigned IntNo = Node->getConstantOperandVal(1); |
4593 | switch (IntNo) { |
4594 | default: break; |
4595 | case Intrinsic::x86_encodekey128: |
4596 | case Intrinsic::x86_encodekey256: { |
4597 | if (!Subtarget->hasKL()) |
4598 | break; |
4599 | |
4600 | unsigned Opcode; |
4601 | switch (IntNo) { |
4602 | default: llvm_unreachable("Impossible intrinsic"); |
4603 | case Intrinsic::x86_encodekey128: Opcode = X86::ENCODEKEY128; break; |
4604 | case Intrinsic::x86_encodekey256: Opcode = X86::ENCODEKEY256; break; |
4605 | } |
4606 | |
4607 | SDValue Chain = Node->getOperand(0); |
4608 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(3), |
4609 | SDValue()); |
4610 | if (Opcode == X86::ENCODEKEY256) |
4611 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(4), |
4612 | Chain.getValue(1)); |
4613 | |
4614 | MachineSDNode *Res = CurDAG->getMachineNode( |
4615 | Opcode, dl, Node->getVTList(), |
4616 | {Node->getOperand(2), Chain, Chain.getValue(1)}); |
4617 | ReplaceNode(Node, Res); |
4618 | return; |
4619 | } |
4620 | case Intrinsic::x86_tileloadd64_internal: |
4621 | case Intrinsic::x86_tileloaddt164_internal: { |
4622 | if (!Subtarget->hasAMXTILE()) |
4623 | break; |
4624 | unsigned Opc = IntNo == Intrinsic::x86_tileloadd64_internal |
4625 | ? X86::PTILELOADDV |
4626 | : X86::PTILELOADDT1V; |
4627 | |
4628 | SDValue Base = Node->getOperand(4); |
4629 | SDValue Scale = getI8Imm(1, dl); |
4630 | SDValue Index = Node->getOperand(5); |
4631 | SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
4632 | SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
4633 | SDValue Chain = Node->getOperand(0); |
4634 | MachineSDNode *CNode; |
4635 | SDValue Ops[] = {Node->getOperand(2), |
4636 | Node->getOperand(3), |
4637 | Base, |
4638 | Scale, |
4639 | Index, |
4640 | Disp, |
4641 | Segment, |
4642 | Chain}; |
4643 | CNode = CurDAG->getMachineNode(Opc, dl, {MVT::x86amx, MVT::Other}, Ops); |
4644 | ReplaceNode(Node, CNode); |
4645 | return; |
4646 | } |
4647 | } |
4648 | break; |
4649 | } |
4650 | case ISD::INTRINSIC_VOID: { |
4651 | unsigned IntNo = Node->getConstantOperandVal(1); |
4652 | switch (IntNo) { |
4653 | default: break; |
4654 | case Intrinsic::x86_sse3_monitor: |
4655 | case Intrinsic::x86_monitorx: |
4656 | case Intrinsic::x86_clzero: { |
4657 | bool Use64BitPtr = Node->getOperand(2).getValueType() == MVT::i64; |
4658 | |
4659 | unsigned Opc = 0; |
4660 | switch (IntNo) { |
4661 | default: llvm_unreachable("Unexpected intrinsic!"); |
4662 | case Intrinsic::x86_sse3_monitor: |
4663 | if (!Subtarget->hasSSE3()) |
4664 | break; |
4665 | Opc = Use64BitPtr ? X86::MONITOR64rrr : X86::MONITOR32rrr; |
4666 | break; |
4667 | case Intrinsic::x86_monitorx: |
4668 | if (!Subtarget->hasMWAITX()) |
4669 | break; |
4670 | Opc = Use64BitPtr ? X86::MONITORX64rrr : X86::MONITORX32rrr; |
4671 | break; |
4672 | case Intrinsic::x86_clzero: |
4673 | if (!Subtarget->hasCLZERO()) |
4674 | break; |
4675 | Opc = Use64BitPtr ? X86::CLZERO64r : X86::CLZERO32r; |
4676 | break; |
4677 | } |
4678 | |
4679 | if (Opc) { |
4680 | unsigned PtrReg = Use64BitPtr ? X86::RAX : X86::EAX; |
4681 | SDValue Chain = CurDAG->getCopyToReg(Node->getOperand(0), dl, PtrReg, |
4682 | Node->getOperand(2), SDValue()); |
4683 | SDValue InFlag = Chain.getValue(1); |
4684 | |
4685 | if (IntNo == Intrinsic::x86_sse3_monitor || |
4686 | IntNo == Intrinsic::x86_monitorx) { |
4687 | |
4688 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::ECX, Node->getOperand(3), |
4689 | InFlag); |
4690 | InFlag = Chain.getValue(1); |
4691 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::EDX, Node->getOperand(4), |
4692 | InFlag); |
4693 | InFlag = Chain.getValue(1); |
4694 | } |
4695 | |
4696 | MachineSDNode *CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, |
4697 | { Chain, InFlag}); |
4698 | ReplaceNode(Node, CNode); |
4699 | return; |
4700 | } |
4701 | |
4702 | break; |
4703 | } |
4704 | case Intrinsic::x86_tilestored64_internal: { |
4705 | unsigned Opc = X86::PTILESTOREDV; |
4706 | |
4707 | SDValue Base = Node->getOperand(4); |
4708 | SDValue Scale = getI8Imm(1, dl); |
4709 | SDValue Index = Node->getOperand(5); |
4710 | SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
4711 | SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
4712 | SDValue Chain = Node->getOperand(0); |
4713 | MachineSDNode *CNode; |
4714 | SDValue Ops[] = {Node->getOperand(2), |
4715 | Node->getOperand(3), |
4716 | Base, |
4717 | Scale, |
4718 | Index, |
4719 | Disp, |
4720 | Segment, |
4721 | Node->getOperand(6), |
4722 | Chain}; |
4723 | CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
4724 | ReplaceNode(Node, CNode); |
4725 | return; |
4726 | } |
4727 | case Intrinsic::x86_tileloadd64: |
4728 | case Intrinsic::x86_tileloaddt164: |
4729 | case Intrinsic::x86_tilestored64: { |
4730 | if (!Subtarget->hasAMXTILE()) |
4731 | break; |
4732 | unsigned Opc; |
4733 | switch (IntNo) { |
4734 | default: llvm_unreachable("Unexpected intrinsic!"); |
4735 | case Intrinsic::x86_tileloadd64: Opc = X86::PTILELOADD; break; |
4736 | case Intrinsic::x86_tileloaddt164: Opc = X86::PTILELOADDT1; break; |
4737 | case Intrinsic::x86_tilestored64: Opc = X86::PTILESTORED; break; |
4738 | } |
4739 | |
4740 | unsigned TIndex = Node->getConstantOperandVal(2); |
4741 | SDValue TReg = getI8Imm(TIndex, dl); |
4742 | SDValue Base = Node->getOperand(3); |
4743 | SDValue Scale = getI8Imm(1, dl); |
4744 | SDValue Index = Node->getOperand(4); |
4745 | SDValue Disp = CurDAG->getTargetConstant(0, dl, MVT::i32); |
4746 | SDValue Segment = CurDAG->getRegister(0, MVT::i16); |
4747 | SDValue Chain = Node->getOperand(0); |
4748 | MachineSDNode *CNode; |
4749 | if (Opc == X86::PTILESTORED) { |
4750 | SDValue Ops[] = { Base, Scale, Index, Disp, Segment, TReg, Chain }; |
4751 | CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
4752 | } else { |
4753 | SDValue Ops[] = { TReg, Base, Scale, Index, Disp, Segment, Chain }; |
4754 | CNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); |
4755 | } |
4756 | ReplaceNode(Node, CNode); |
4757 | return; |
4758 | } |
4759 | } |
4760 | break; |
4761 | } |
4762 | case ISD::BRIND: |
4763 | case X86ISD::NT_BRIND: { |
4764 | if (Subtarget->isTargetNaCl()) |
4765 | |
4766 | |
4767 | break; |
4768 | if (Subtarget->isTarget64BitILP32()) { |
4769 | |
4770 | |
4771 | |
4772 | SDValue Target = Node->getOperand(1); |
4773 | assert(Target.getValueType() == MVT::i32 && "Unexpected VT!"); |
4774 | SDValue ZextTarget = CurDAG->getZExtOrTrunc(Target, dl, MVT::i64); |
4775 | SDValue Brind = CurDAG->getNode(Opcode, dl, MVT::Other, |
4776 | Node->getOperand(0), ZextTarget); |
4777 | ReplaceNode(Node, Brind.getNode()); |
4778 | SelectCode(ZextTarget.getNode()); |
4779 | SelectCode(Brind.getNode()); |
4780 | return; |
4781 | } |
4782 | break; |
4783 | } |
4784 | case X86ISD::GlobalBaseReg: |
4785 | ReplaceNode(Node, getGlobalBaseReg()); |
4786 | return; |
4787 | |
4788 | case ISD::BITCAST: |
4789 | |
4790 | if (NVT.is512BitVector() || NVT.is256BitVector() || NVT.is128BitVector() || |
4791 | NVT == MVT::f128) { |
4792 | ReplaceUses(SDValue(Node, 0), Node->getOperand(0)); |
4793 | CurDAG->RemoveDeadNode(Node); |
4794 | return; |
4795 | } |
4796 | break; |
4797 | |
4798 | case ISD::SRL: |
4799 | if (matchBitExtract(Node)) |
4800 | return; |
4801 | LLVM_FALLTHROUGH; |
4802 | case ISD::SRA: |
4803 | case ISD::SHL: |
4804 | if (tryShiftAmountMod(Node)) |
4805 | return; |
4806 | break; |
4807 | |
4808 | case X86ISD::VPTERNLOG: { |
4809 | uint8_t Imm = cast<ConstantSDNode>(Node->getOperand(3))->getZExtValue(); |
4810 | if (matchVPTERNLOG(Node, Node, Node, Node->getOperand(0), |
4811 | Node->getOperand(1), Node->getOperand(2), Imm)) |
4812 | return; |
4813 | break; |
4814 | } |
4815 | |
4816 | case X86ISD::ANDNP: |
4817 | if (tryVPTERNLOG(Node)) |
4818 | return; |
4819 | break; |
4820 | |
4821 | case ISD::AND: |
4822 | if (NVT.isVector() && NVT.getVectorElementType() == MVT::i1) { |
4823 | |
4824 | SDValue N0 = Node->getOperand(0); |
4825 | SDValue N1 = Node->getOperand(1); |
4826 | if (N0.getOpcode() == ISD::SETCC && N0.hasOneUse() && |
4827 | tryVPTESTM(Node, N0, N1)) |
4828 | return; |
4829 | if (N1.getOpcode() == ISD::SETCC && N1.hasOneUse() && |
4830 | tryVPTESTM(Node, N1, N0)) |
4831 | return; |
4832 | } |
4833 | |
4834 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(Node)) { |
4835 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
4836 | CurDAG->RemoveDeadNode(Node); |
4837 | return; |
4838 | } |
4839 | if (matchBitExtract(Node)) |
4840 | return; |
4841 | if (AndImmShrink && shrinkAndImmediate(Node)) |
4842 | return; |
4843 | |
4844 | LLVM_FALLTHROUGH; |
4845 | case ISD::OR: |
4846 | case ISD::XOR: |
4847 | if (tryShrinkShlLogicImm(Node)) |
4848 | return; |
4849 | if (Opcode == ISD::OR && tryMatchBitSelect(Node)) |
4850 | return; |
4851 | if (tryVPTERNLOG(Node)) |
4852 | return; |
4853 | |
4854 | LLVM_FALLTHROUGH; |
4855 | case ISD::ADD: |
4856 | case ISD::SUB: { |
4857 | |
4858 | |
4859 | |
4860 | |
4861 | |
4862 | |
4863 | if (!CurDAG->shouldOptForSize()) |
4864 | break; |
4865 | |
4866 | |
4867 | if (NVT != MVT::i8 && NVT != MVT::i16 && NVT != MVT::i32 && NVT != MVT::i64) |
4868 | break; |
4869 | |
4870 | SDValue N0 = Node->getOperand(0); |
4871 | SDValue N1 = Node->getOperand(1); |
4872 | |
4873 | ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(N1); |
4874 | if (!Cst) |
4875 | break; |
4876 | |
4877 | int64_t Val = Cst->getSExtValue(); |
4878 | |
4879 | |
4880 | |
4881 | if (!isInt<8>(Val) && !isInt<32>(Val)) |
4882 | break; |
4883 | |
4884 | |
4885 | if (Opcode == ISD::ADD && (Val == 1 || Val == -1)) |
4886 | break; |
4887 | |
4888 | |
4889 | if (!shouldAvoidImmediateInstFormsForSize(N1.getNode())) |
4890 | break; |
4891 | |
4892 | |
4893 | unsigned ROpc, MOpc; |
4894 | switch (NVT.SimpleTy) { |
4895 | default: llvm_unreachable("Unexpected VT!"); |
4896 | case MVT::i8: |
4897 | switch (Opcode) { |
4898 | default: llvm_unreachable("Unexpected opcode!"); |
4899 | case ISD::ADD: ROpc = X86::ADD8rr; MOpc = X86::ADD8rm; break; |
4900 | case ISD::SUB: ROpc = X86::SUB8rr; MOpc = X86::SUB8rm; break; |
4901 | case ISD::AND: ROpc = X86::AND8rr; MOpc = X86::AND8rm; break; |
4902 | case ISD::OR: ROpc = X86::OR8rr; MOpc = X86::OR8rm; break; |
4903 | case ISD::XOR: ROpc = X86::XOR8rr; MOpc = X86::XOR8rm; break; |
4904 | } |
4905 | break; |
4906 | case MVT::i16: |
4907 | switch (Opcode) { |
4908 | default: llvm_unreachable("Unexpected opcode!"); |
4909 | case ISD::ADD: ROpc = X86::ADD16rr; MOpc = X86::ADD16rm; break; |
4910 | case ISD::SUB: ROpc = X86::SUB16rr; MOpc = X86::SUB16rm; break; |
4911 | case ISD::AND: ROpc = X86::AND16rr; MOpc = X86::AND16rm; break; |
4912 | case ISD::OR: ROpc = X86::OR16rr; MOpc = X86::OR16rm; break; |
4913 | case ISD::XOR: ROpc = X86::XOR16rr; MOpc = X86::XOR16rm; break; |
4914 | } |
4915 | break; |
4916 | case MVT::i32: |
4917 | switch (Opcode) { |
4918 | default: llvm_unreachable("Unexpected opcode!"); |
4919 | case ISD::ADD: ROpc = X86::ADD32rr; MOpc = X86::ADD32rm; break; |
4920 | case ISD::SUB: ROpc = X86::SUB32rr; MOpc = X86::SUB32rm; break; |
4921 | case ISD::AND: ROpc = X86::AND32rr; MOpc = X86::AND32rm; break; |
4922 | case ISD::OR: ROpc = X86::OR32rr; MOpc = X86::OR32rm; break; |
4923 | case ISD::XOR: ROpc = X86::XOR32rr; MOpc = X86::XOR32rm; break; |
4924 | } |
4925 | break; |
4926 | case MVT::i64: |
4927 | switch (Opcode) { |
4928 | default: llvm_unreachable("Unexpected opcode!"); |
4929 | case ISD::ADD: ROpc = X86::ADD64rr; MOpc = X86::ADD64rm; break; |
4930 | case ISD::SUB: ROpc = X86::SUB64rr; MOpc = X86::SUB64rm; break; |
4931 | case ISD::AND: ROpc = X86::AND64rr; MOpc = X86::AND64rm; break; |
4932 | case ISD::OR: ROpc = X86::OR64rr; MOpc = X86::OR64rm; break; |
4933 | case ISD::XOR: ROpc = X86::XOR64rr; MOpc = X86::XOR64rm; break; |
4934 | } |
4935 | break; |
4936 | } |
4937 | |
4938 | |
4939 | |
4940 | |
4941 | if (Opcode != ISD::SUB) { |
4942 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
4943 | if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
4944 | SDValue Ops[] = { N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; |
4945 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
4946 | MachineSDNode *CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
4947 | |
4948 | ReplaceUses(N0.getValue(1), SDValue(CNode, 2)); |
4949 | |
4950 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N0)->getMemOperand()}); |
4951 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
4952 | CurDAG->RemoveDeadNode(Node); |
4953 | return; |
4954 | } |
4955 | } |
4956 | |
4957 | CurDAG->SelectNodeTo(Node, ROpc, NVT, MVT::i32, N0, N1); |
4958 | return; |
4959 | } |
4960 | |
4961 | case X86ISD::SMUL: |
4962 | |
4963 | if (NVT != MVT::i8) |
4964 | break; |
4965 | LLVM_FALLTHROUGH; |
4966 | case X86ISD::UMUL: { |
4967 | SDValue N0 = Node->getOperand(0); |
4968 | SDValue N1 = Node->getOperand(1); |
4969 | |
4970 | unsigned LoReg, ROpc, MOpc; |
4971 | switch (NVT.SimpleTy) { |
4972 | default: llvm_unreachable("Unsupported VT!"); |
4973 | case MVT::i8: |
4974 | LoReg = X86::AL; |
4975 | ROpc = Opcode == X86ISD::SMUL ? X86::IMUL8r : X86::MUL8r; |
4976 | MOpc = Opcode == X86ISD::SMUL ? X86::IMUL8m : X86::MUL8m; |
4977 | break; |
4978 | case MVT::i16: |
4979 | LoReg = X86::AX; |
4980 | ROpc = X86::MUL16r; |
4981 | MOpc = X86::MUL16m; |
4982 | break; |
4983 | case MVT::i32: |
4984 | LoReg = X86::EAX; |
4985 | ROpc = X86::MUL32r; |
4986 | MOpc = X86::MUL32m; |
4987 | break; |
4988 | case MVT::i64: |
4989 | LoReg = X86::RAX; |
4990 | ROpc = X86::MUL64r; |
4991 | MOpc = X86::MUL64m; |
4992 | break; |
4993 | } |
4994 | |
4995 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
4996 | bool FoldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
4997 | |
4998 | if (!FoldedLoad) { |
4999 | FoldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5000 | if (FoldedLoad) |
5001 | std::swap(N0, N1); |
5002 | } |
5003 | |
5004 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, |
5005 | N0, SDValue()).getValue(1); |
5006 | |
5007 | MachineSDNode *CNode; |
5008 | if (FoldedLoad) { |
5009 | |
5010 | |
5011 | SDVTList VTs; |
5012 | if (NVT == MVT::i8) |
5013 | VTs = CurDAG->getVTList(NVT, MVT::i32, MVT::Other); |
5014 | else |
5015 | VTs = CurDAG->getVTList(NVT, NVT, MVT::i32, MVT::Other); |
5016 | |
5017 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
5018 | InFlag }; |
5019 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5020 | |
5021 | |
5022 | ReplaceUses(N1.getValue(1), SDValue(CNode, NVT == MVT::i8 ? 2 : 3)); |
5023 | |
5024 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); |
5025 | } else { |
5026 | |
5027 | |
5028 | SDVTList VTs; |
5029 | if (NVT == MVT::i8) |
5030 | VTs = CurDAG->getVTList(NVT, MVT::i32); |
5031 | else |
5032 | VTs = CurDAG->getVTList(NVT, NVT, MVT::i32); |
5033 | |
5034 | CNode = CurDAG->getMachineNode(ROpc, dl, VTs, {N1, InFlag}); |
5035 | } |
5036 | |
5037 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5038 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, NVT == MVT::i8 ? 1 : 2)); |
5039 | CurDAG->RemoveDeadNode(Node); |
5040 | return; |
5041 | } |
5042 | |
5043 | case ISD::SMUL_LOHI: |
5044 | case ISD::UMUL_LOHI: { |
5045 | SDValue N0 = Node->getOperand(0); |
5046 | SDValue N1 = Node->getOperand(1); |
5047 | |
5048 | unsigned Opc, MOpc; |
5049 | unsigned LoReg, HiReg; |
5050 | bool IsSigned = Opcode == ISD::SMUL_LOHI; |
5051 | bool UseMULX = !IsSigned && Subtarget->hasBMI2(); |
5052 | bool UseMULXHi = UseMULX && SDValue(Node, 0).use_empty(); |
5053 | switch (NVT.SimpleTy) { |
5054 | default: llvm_unreachable("Unsupported VT!"); |
5055 | case MVT::i32: |
5056 | Opc = UseMULXHi ? X86::MULX32Hrr : |
5057 | UseMULX ? X86::MULX32rr : |
5058 | IsSigned ? X86::IMUL32r : X86::MUL32r; |
5059 | MOpc = UseMULXHi ? X86::MULX32Hrm : |
5060 | UseMULX ? X86::MULX32rm : |
5061 | IsSigned ? X86::IMUL32m : X86::MUL32m; |
5062 | LoReg = UseMULX ? X86::EDX : X86::EAX; |
5063 | HiReg = X86::EDX; |
5064 | break; |
5065 | case MVT::i64: |
5066 | Opc = UseMULXHi ? X86::MULX64Hrr : |
5067 | UseMULX ? X86::MULX64rr : |
5068 | IsSigned ? X86::IMUL64r : X86::MUL64r; |
5069 | MOpc = UseMULXHi ? X86::MULX64Hrm : |
5070 | UseMULX ? X86::MULX64rm : |
5071 | IsSigned ? X86::IMUL64m : X86::MUL64m; |
5072 | LoReg = UseMULX ? X86::RDX : X86::RAX; |
5073 | HiReg = X86::RDX; |
5074 | break; |
5075 | } |
5076 | |
5077 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5078 | bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5079 | |
5080 | if (!foldedLoad) { |
5081 | foldedLoad = tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5082 | if (foldedLoad) |
5083 | std::swap(N0, N1); |
5084 | } |
5085 | |
5086 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, LoReg, |
5087 | N0, SDValue()).getValue(1); |
5088 | SDValue ResHi, ResLo; |
5089 | if (foldedLoad) { |
5090 | SDValue Chain; |
5091 | MachineSDNode *CNode = nullptr; |
5092 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
5093 | InFlag }; |
5094 | if (UseMULXHi) { |
5095 | SDVTList VTs = CurDAG->getVTList(NVT, MVT::Other); |
5096 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5097 | ResHi = SDValue(CNode, 0); |
5098 | Chain = SDValue(CNode, 1); |
5099 | } else if (UseMULX) { |
5100 | SDVTList VTs = CurDAG->getVTList(NVT, NVT, MVT::Other); |
5101 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5102 | ResHi = SDValue(CNode, 0); |
5103 | ResLo = SDValue(CNode, 1); |
5104 | Chain = SDValue(CNode, 2); |
5105 | } else { |
5106 | SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
5107 | CNode = CurDAG->getMachineNode(MOpc, dl, VTs, Ops); |
5108 | Chain = SDValue(CNode, 0); |
5109 | InFlag = SDValue(CNode, 1); |
5110 | } |
5111 | |
5112 | |
5113 | ReplaceUses(N1.getValue(1), Chain); |
5114 | |
5115 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); |
5116 | } else { |
5117 | SDValue Ops[] = { N1, InFlag }; |
5118 | if (UseMULXHi) { |
5119 | SDVTList VTs = CurDAG->getVTList(NVT); |
5120 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
5121 | ResHi = SDValue(CNode, 0); |
5122 | } else if (UseMULX) { |
5123 | SDVTList VTs = CurDAG->getVTList(NVT, NVT); |
5124 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
5125 | ResHi = SDValue(CNode, 0); |
5126 | ResLo = SDValue(CNode, 1); |
5127 | } else { |
5128 | SDVTList VTs = CurDAG->getVTList(MVT::Glue); |
5129 | SDNode *CNode = CurDAG->getMachineNode(Opc, dl, VTs, Ops); |
5130 | InFlag = SDValue(CNode, 0); |
5131 | } |
5132 | } |
5133 | |
5134 | |
5135 | if (!SDValue(Node, 0).use_empty()) { |
5136 | if (!ResLo) { |
5137 | assert(LoReg && "Register for low half is not defined!"); |
5138 | ResLo = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, LoReg, |
5139 | NVT, InFlag); |
5140 | InFlag = ResLo.getValue(2); |
5141 | } |
5142 | ReplaceUses(SDValue(Node, 0), ResLo); |
5143 | LLVM_DEBUG(dbgs() << "=> "; ResLo.getNode()->dump(CurDAG); |
5144 | dbgs() << '\n'); |
5145 | } |
5146 | |
5147 | if (!SDValue(Node, 1).use_empty()) { |
5148 | if (!ResHi) { |
5149 | assert(HiReg && "Register for high half is not defined!"); |
5150 | ResHi = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, HiReg, |
5151 | NVT, InFlag); |
5152 | InFlag = ResHi.getValue(2); |
5153 | } |
5154 | ReplaceUses(SDValue(Node, 1), ResHi); |
5155 | LLVM_DEBUG(dbgs() << "=> "; ResHi.getNode()->dump(CurDAG); |
5156 | dbgs() << '\n'); |
5157 | } |
5158 | |
5159 | CurDAG->RemoveDeadNode(Node); |
5160 | return; |
5161 | } |
5162 | |
5163 | case ISD::SDIVREM: |
5164 | case ISD::UDIVREM: { |
5165 | SDValue N0 = Node->getOperand(0); |
5166 | SDValue N1 = Node->getOperand(1); |
5167 | |
5168 | unsigned ROpc, MOpc; |
5169 | bool isSigned = Opcode == ISD::SDIVREM; |
5170 | if (!isSigned) { |
5171 | switch (NVT.SimpleTy) { |
5172 | default: llvm_unreachable("Unsupported VT!"); |
5173 | case MVT::i8: ROpc = X86::DIV8r; MOpc = X86::DIV8m; break; |
5174 | case MVT::i16: ROpc = X86::DIV16r; MOpc = X86::DIV16m; break; |
5175 | case MVT::i32: ROpc = X86::DIV32r; MOpc = X86::DIV32m; break; |
5176 | case MVT::i64: ROpc = X86::DIV64r; MOpc = X86::DIV64m; break; |
5177 | } |
5178 | } else { |
5179 | switch (NVT.SimpleTy) { |
5180 | default: llvm_unreachable("Unsupported VT!"); |
5181 | case MVT::i8: ROpc = X86::IDIV8r; MOpc = X86::IDIV8m; break; |
5182 | case MVT::i16: ROpc = X86::IDIV16r; MOpc = X86::IDIV16m; break; |
5183 | case MVT::i32: ROpc = X86::IDIV32r; MOpc = X86::IDIV32m; break; |
5184 | case MVT::i64: ROpc = X86::IDIV64r; MOpc = X86::IDIV64m; break; |
5185 | } |
5186 | } |
5187 | |
5188 | unsigned LoReg, HiReg, ClrReg; |
5189 | unsigned SExtOpcode; |
5190 | switch (NVT.SimpleTy) { |
5191 | default: llvm_unreachable("Unsupported VT!"); |
5192 | case MVT::i8: |
5193 | LoReg = X86::AL; ClrReg = HiReg = X86::AH; |
5194 | SExtOpcode = 0; |
5195 | break; |
5196 | case MVT::i16: |
5197 | LoReg = X86::AX; HiReg = X86::DX; |
5198 | ClrReg = X86::DX; |
5199 | SExtOpcode = X86::CWD; |
5200 | break; |
5201 | case MVT::i32: |
5202 | LoReg = X86::EAX; ClrReg = HiReg = X86::EDX; |
5203 | SExtOpcode = X86::CDQ; |
5204 | break; |
5205 | case MVT::i64: |
5206 | LoReg = X86::RAX; ClrReg = HiReg = X86::RDX; |
5207 | SExtOpcode = X86::CQO; |
5208 | break; |
5209 | } |
5210 | |
5211 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5212 | bool foldedLoad = tryFoldLoad(Node, N1, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4); |
5213 | bool signBitIsZero = CurDAG->SignBitIsZero(N0); |
5214 | |
5215 | SDValue InFlag; |
5216 | if (NVT == MVT::i8) { |
5217 | |
5218 | |
5219 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Chain; |
5220 | MachineSDNode *Move; |
5221 | if (tryFoldLoad(Node, N0, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
5222 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N0.getOperand(0) }; |
5223 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rm8 |
5224 | : X86::MOVZX16rm8; |
5225 | Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, MVT::Other, Ops); |
5226 | Chain = SDValue(Move, 1); |
5227 | ReplaceUses(N0.getValue(1), Chain); |
5228 | |
5229 | CurDAG->setNodeMemRefs(Move, {cast<LoadSDNode>(N0)->getMemOperand()}); |
5230 | } else { |
5231 | unsigned Opc = (isSigned && !signBitIsZero) ? X86::MOVSX16rr8 |
5232 | : X86::MOVZX16rr8; |
5233 | Move = CurDAG->getMachineNode(Opc, dl, MVT::i16, N0); |
5234 | Chain = CurDAG->getEntryNode(); |
5235 | } |
5236 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::AX, SDValue(Move, 0), |
5237 | SDValue()); |
5238 | InFlag = Chain.getValue(1); |
5239 | } else { |
5240 | InFlag = |
5241 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, |
5242 | LoReg, N0, SDValue()).getValue(1); |
5243 | if (isSigned && !signBitIsZero) { |
5244 | |
5245 | InFlag = |
5246 | SDValue(CurDAG->getMachineNode(SExtOpcode, dl, MVT::Glue, InFlag),0); |
5247 | } else { |
5248 | |
5249 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); |
5250 | SDValue ClrNode = |
5251 | SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); |
5252 | switch (NVT.SimpleTy) { |
5253 | case MVT::i16: |
5254 | ClrNode = |
5255 | SDValue(CurDAG->getMachineNode( |
5256 | TargetOpcode::EXTRACT_SUBREG, dl, MVT::i16, ClrNode, |
5257 | CurDAG->getTargetConstant(X86::sub_16bit, dl, |
5258 | MVT::i32)), |
5259 | 0); |
5260 | break; |
5261 | case MVT::i32: |
5262 | break; |
5263 | case MVT::i64: |
5264 | ClrNode = |
5265 | SDValue(CurDAG->getMachineNode( |
5266 | TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, |
5267 | CurDAG->getTargetConstant(0, dl, MVT::i64), ClrNode, |
5268 | CurDAG->getTargetConstant(X86::sub_32bit, dl, |
5269 | MVT::i32)), |
5270 | 0); |
5271 | break; |
5272 | default: |
5273 | llvm_unreachable("Unexpected division source"); |
5274 | } |
5275 | |
5276 | InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, ClrReg, |
5277 | ClrNode, InFlag).getValue(1); |
5278 | } |
5279 | } |
5280 | |
5281 | if (foldedLoad) { |
5282 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, N1.getOperand(0), |
5283 | InFlag }; |
5284 | MachineSDNode *CNode = |
5285 | CurDAG->getMachineNode(MOpc, dl, MVT::Other, MVT::Glue, Ops); |
5286 | InFlag = SDValue(CNode, 1); |
5287 | |
5288 | ReplaceUses(N1.getValue(1), SDValue(CNode, 0)); |
5289 | |
5290 | CurDAG->setNodeMemRefs(CNode, {cast<LoadSDNode>(N1)->getMemOperand()}); |
5291 | } else { |
5292 | InFlag = |
5293 | SDValue(CurDAG->getMachineNode(ROpc, dl, MVT::Glue, N1, InFlag), 0); |
5294 | } |
5295 | |
5296 | |
5297 | |
5298 | |
5299 | |
5300 | |
5301 | |
5302 | |
5303 | if (HiReg == X86::AH && !SDValue(Node, 1).use_empty()) { |
5304 | SDValue AHCopy = CurDAG->getRegister(X86::AH, MVT::i8); |
5305 | unsigned AHExtOpcode = |
5306 | isSigned ? X86::MOVSX32rr8_NOREX : X86::MOVZX32rr8_NOREX; |
5307 | |
5308 | SDNode *RNode = CurDAG->getMachineNode(AHExtOpcode, dl, MVT::i32, |
5309 | MVT::Glue, AHCopy, InFlag); |
5310 | SDValue Result(RNode, 0); |
5311 | InFlag = SDValue(RNode, 1); |
5312 | |
5313 | Result = |
5314 | CurDAG->getTargetExtractSubreg(X86::sub_8bit, dl, MVT::i8, Result); |
5315 | |
5316 | ReplaceUses(SDValue(Node, 1), Result); |
5317 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
5318 | dbgs() << '\n'); |
5319 | } |
5320 | |
5321 | if (!SDValue(Node, 0).use_empty()) { |
5322 | SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, |
5323 | LoReg, NVT, InFlag); |
5324 | InFlag = Result.getValue(2); |
5325 | ReplaceUses(SDValue(Node, 0), Result); |
5326 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
5327 | dbgs() << '\n'); |
5328 | } |
5329 | |
5330 | if (!SDValue(Node, 1).use_empty()) { |
5331 | SDValue Result = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), dl, |
5332 | HiReg, NVT, InFlag); |
5333 | InFlag = Result.getValue(2); |
5334 | ReplaceUses(SDValue(Node, 1), Result); |
5335 | LLVM_DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); |
5336 | dbgs() << '\n'); |
5337 | } |
5338 | CurDAG->RemoveDeadNode(Node); |
5339 | return; |
5340 | } |
5341 | |
5342 | case X86ISD::FCMP: |
5343 | case X86ISD::STRICT_FCMP: |
5344 | case X86ISD::STRICT_FCMPS: { |
5345 | bool IsStrictCmp = Node->getOpcode() == X86ISD::STRICT_FCMP || |
5346 | Node->getOpcode() == X86ISD::STRICT_FCMPS; |
5347 | SDValue N0 = Node->getOperand(IsStrictCmp ? 1 : 0); |
5348 | SDValue N1 = Node->getOperand(IsStrictCmp ? 2 : 1); |
5349 | |
5350 | |
5351 | MVT CmpVT = N0.getSimpleValueType(); |
5352 | |
5353 | |
5354 | if (Subtarget->hasCMov()) |
5355 | break; |
5356 | |
5357 | bool IsSignaling = Node->getOpcode() == X86ISD::STRICT_FCMPS; |
5358 | |
5359 | unsigned Opc; |
5360 | switch (CmpVT.SimpleTy) { |
5361 | default: llvm_unreachable("Unexpected type!"); |
5362 | case MVT::f32: |
5363 | Opc = IsSignaling ? X86::COM_Fpr32 : X86::UCOM_Fpr32; |
5364 | break; |
5365 | case MVT::f64: |
5366 | Opc = IsSignaling ? X86::COM_Fpr64 : X86::UCOM_Fpr64; |
5367 | break; |
5368 | case MVT::f80: |
5369 | Opc = IsSignaling ? X86::COM_Fpr80 : X86::UCOM_Fpr80; |
5370 | break; |
5371 | } |
5372 | |
5373 | SDValue Chain = |
5374 | IsStrictCmp ? Node->getOperand(0) : CurDAG->getEntryNode(); |
5375 | SDValue Glue; |
5376 | if (IsStrictCmp) { |
5377 | SDVTList VTs = CurDAG->getVTList(MVT::Other, MVT::Glue); |
5378 | Chain = SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {N0, N1, Chain}), 0); |
5379 | Glue = Chain.getValue(1); |
5380 | } else { |
5381 | Glue = SDValue(CurDAG->getMachineNode(Opc, dl, MVT::Glue, N0, N1), 0); |
5382 | } |
5383 | |
5384 | |
5385 | SDValue FNSTSW = |
5386 | SDValue(CurDAG->getMachineNode(X86::FNSTSW16r, dl, MVT::i16, Glue), 0); |
5387 | |
5388 | |
5389 | SDValue Extract = |
5390 | CurDAG->getTargetExtractSubreg(X86::sub_8bit_hi, dl, MVT::i8, FNSTSW); |
5391 | |
5392 | |
5393 | |
5394 | assert(Subtarget->hasLAHFSAHF() && |
5395 | "Target doesn't support SAHF or FCOMI?"); |
5396 | SDValue AH = CurDAG->getCopyToReg(Chain, dl, X86::AH, Extract, SDValue()); |
5397 | Chain = AH; |
5398 | SDValue SAHF = SDValue( |
5399 | CurDAG->getMachineNode(X86::SAHF, dl, MVT::i32, AH.getValue(1)), 0); |
5400 | |
5401 | if (IsStrictCmp) |
5402 | ReplaceUses(SDValue(Node, 1), Chain); |
5403 | |
5404 | ReplaceUses(SDValue(Node, 0), SAHF); |
5405 | CurDAG->RemoveDeadNode(Node); |
5406 | return; |
5407 | } |
5408 | |
5409 | case X86ISD::CMP: { |
5410 | SDValue N0 = Node->getOperand(0); |
5411 | SDValue N1 = Node->getOperand(1); |
5412 | |
5413 | |
5414 | if (!isNullConstant(N1)) |
5415 | break; |
5416 | |
5417 | |
5418 | MVT CmpVT = N0.getSimpleValueType(); |
5419 | |
5420 | |
5421 | |
5422 | |
5423 | |
5424 | if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { |
5425 | if (MachineSDNode *NewNode = matchBEXTRFromAndImm(N0.getNode())) { |
5426 | unsigned TestOpc = CmpVT == MVT::i64 ? X86::TEST64rr |
5427 | : X86::TEST32rr; |
5428 | SDValue BEXTR = SDValue(NewNode, 0); |
5429 | NewNode = CurDAG->getMachineNode(TestOpc, dl, MVT::i32, BEXTR, BEXTR); |
5430 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
5431 | CurDAG->RemoveDeadNode(Node); |
5432 | return; |
5433 | } |
5434 | } |
5435 | |
5436 | |
5437 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) |
5438 | N0 = N0.getOperand(0); |
5439 | |
5440 | |
5441 | |
5442 | |
5443 | if (N0.getOpcode() == ISD::AND && |
5444 | N0.getNode()->hasOneUse() && |
5445 | N0.getValueType() != MVT::i8) { |
5446 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
5447 | if (!C) break; |
5448 | uint64_t Mask = C->getZExtValue(); |
5449 | |
5450 | |
5451 | Mask &= maskTrailingOnes<uint64_t>(CmpVT.getScalarSizeInBits()); |
5452 | |
5453 | |
5454 | |
5455 | |
5456 | if (CmpVT == MVT::i64 && !isInt<32>(Mask) && |
5457 | onlyUsesZeroFlag(SDValue(Node, 0))) { |
5458 | if (isMask_64(~Mask)) { |
5459 | unsigned TrailingZeros = countTrailingZeros(Mask); |
5460 | SDValue Imm = CurDAG->getTargetConstant(TrailingZeros, dl, MVT::i64); |
5461 | SDValue Shift = |
5462 | SDValue(CurDAG->getMachineNode(X86::SHR64ri, dl, MVT::i64, MVT::i32, |
5463 | N0.getOperand(0), Imm), 0); |
5464 | MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, |
5465 | MVT::i32, Shift, Shift); |
5466 | ReplaceNode(Node, Test); |
5467 | return; |
5468 | } |
5469 | if (isMask_64(Mask)) { |
5470 | unsigned LeadingZeros = countLeadingZeros(Mask); |
5471 | SDValue Imm = CurDAG->getTargetConstant(LeadingZeros, dl, MVT::i64); |
5472 | SDValue Shift = |
5473 | SDValue(CurDAG->getMachineNode(X86::SHL64ri, dl, MVT::i64, MVT::i32, |
5474 | N0.getOperand(0), Imm), 0); |
5475 | MachineSDNode *Test = CurDAG->getMachineNode(X86::TEST64rr, dl, |
5476 | MVT::i32, Shift, Shift); |
5477 | ReplaceNode(Node, Test); |
5478 | return; |
5479 | } |
5480 | } |
5481 | |
5482 | MVT VT; |
5483 | int SubRegOp; |
5484 | unsigned ROpc, MOpc; |
5485 | |
5486 | |
5487 | |
5488 | |
5489 | |
5490 | |
5491 | if (isUInt<8>(Mask) && |
5492 | (!(Mask & 0x80) || CmpVT == MVT::i8 || |
5493 | hasNoSignFlagUses(SDValue(Node, 0)))) { |
5494 | |
5495 | VT = MVT::i8; |
5496 | SubRegOp = X86::sub_8bit; |
5497 | ROpc = X86::TEST8ri; |
5498 | MOpc = X86::TEST8mi; |
5499 | } else if (OptForMinSize && isUInt<16>(Mask) && |
5500 | (!(Mask & 0x8000) || CmpVT == MVT::i16 || |
5501 | hasNoSignFlagUses(SDValue(Node, 0)))) { |
5502 | |
5503 | |
5504 | |
5505 | |
5506 | VT = MVT::i16; |
5507 | SubRegOp = X86::sub_16bit; |
5508 | ROpc = X86::TEST16ri; |
5509 | MOpc = X86::TEST16mi; |
5510 | } else if (isUInt<32>(Mask) && N0.getValueType() != MVT::i16 && |
5511 | ((!(Mask & 0x80000000) && |
5512 | |
5513 | |
5514 | (CmpVT != MVT::i16 || !(Mask & 0x8000))) || |
5515 | CmpVT == MVT::i32 || |
5516 | hasNoSignFlagUses(SDValue(Node, 0)))) { |
5517 | |
5518 | |
5519 | |
5520 | |
5521 | |
5522 | VT = MVT::i32; |
5523 | SubRegOp = X86::sub_32bit; |
5524 | ROpc = X86::TEST32ri; |
5525 | MOpc = X86::TEST32mi; |
5526 | } else { |
5527 | |
5528 | break; |
5529 | } |
5530 | |
5531 | SDValue Imm = CurDAG->getTargetConstant(Mask, dl, VT); |
5532 | SDValue Reg = N0.getOperand(0); |
5533 | |
5534 | |
5535 | MachineSDNode *NewNode; |
5536 | SDValue Tmp0, Tmp1, Tmp2, Tmp3, Tmp4; |
5537 | if (tryFoldLoad(Node, N0.getNode(), Reg, Tmp0, Tmp1, Tmp2, Tmp3, Tmp4)) { |
5538 | if (auto *LoadN = dyn_cast<LoadSDNode>(N0.getOperand(0).getNode())) { |
5539 | if (!LoadN->isSimple()) { |
5540 | unsigned NumVolBits = LoadN->getValueType(0).getSizeInBits(); |
5541 | if ((MOpc == X86::TEST8mi && NumVolBits != 8) || |
5542 | (MOpc == X86::TEST16mi && NumVolBits != 16) || |
5543 | (MOpc == X86::TEST32mi && NumVolBits != 32)) |
5544 | break; |
5545 | } |
5546 | } |
5547 | SDValue Ops[] = { Tmp0, Tmp1, Tmp2, Tmp3, Tmp4, Imm, |
5548 | Reg.getOperand(0) }; |
5549 | NewNode = CurDAG->getMachineNode(MOpc, dl, MVT::i32, MVT::Other, Ops); |
5550 | |
5551 | ReplaceUses(Reg.getValue(1), SDValue(NewNode, 1)); |
5552 | |
5553 | CurDAG->setNodeMemRefs(NewNode, |
5554 | {cast<LoadSDNode>(Reg)->getMemOperand()}); |
5555 | } else { |
5556 | |
5557 | if (N0.getValueType() != VT) |
5558 | Reg = CurDAG->getTargetExtractSubreg(SubRegOp, dl, VT, Reg); |
5559 | |
5560 | NewNode = CurDAG->getMachineNode(ROpc, dl, MVT::i32, Reg, Imm); |
5561 | } |
5562 | |
5563 | ReplaceNode(Node, NewNode); |
5564 | return; |
5565 | } |
5566 | break; |
5567 | } |
5568 | case X86ISD::PCMPISTR: { |
5569 | if (!Subtarget->hasSSE42()) |
5570 | break; |
5571 | |
5572 | bool NeedIndex = !SDValue(Node, 0).use_empty(); |
5573 | bool NeedMask = !SDValue(Node, 1).use_empty(); |
5574 | |
5575 | bool MayFoldLoad = !NeedIndex || !NeedMask; |
5576 | |
5577 | MachineSDNode *CNode; |
5578 | if (NeedMask) { |
5579 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrr : X86::PCMPISTRMrr; |
5580 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRMrm : X86::PCMPISTRMrm; |
5581 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node); |
5582 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); |
5583 | } |
5584 | if (NeedIndex || !NeedMask) { |
5585 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrr : X86::PCMPISTRIrr; |
5586 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPISTRIrm : X86::PCMPISTRIrm; |
5587 | CNode = emitPCMPISTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node); |
5588 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5589 | } |
5590 | |
5591 | |
5592 | ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); |
5593 | CurDAG->RemoveDeadNode(Node); |
5594 | return; |
5595 | } |
5596 | case X86ISD::PCMPESTR: { |
5597 | if (!Subtarget->hasSSE42()) |
5598 | break; |
5599 | |
5600 | |
5601 | SDValue InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EAX, |
5602 | Node->getOperand(1), |
5603 | SDValue()).getValue(1); |
5604 | InFlag = CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EDX, |
5605 | Node->getOperand(3), InFlag).getValue(1); |
5606 | |
5607 | bool NeedIndex = !SDValue(Node, 0).use_empty(); |
5608 | bool NeedMask = !SDValue(Node, 1).use_empty(); |
5609 | |
5610 | bool MayFoldLoad = !NeedIndex || !NeedMask; |
5611 | |
5612 | MachineSDNode *CNode; |
5613 | if (NeedMask) { |
5614 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrr : X86::PCMPESTRMrr; |
5615 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRMrm : X86::PCMPESTRMrm; |
5616 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::v16i8, Node, |
5617 | InFlag); |
5618 | ReplaceUses(SDValue(Node, 1), SDValue(CNode, 0)); |
5619 | } |
5620 | if (NeedIndex || !NeedMask) { |
5621 | unsigned ROpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrr : X86::PCMPESTRIrr; |
5622 | unsigned MOpc = Subtarget->hasAVX() ? X86::VPCMPESTRIrm : X86::PCMPESTRIrm; |
5623 | CNode = emitPCMPESTR(ROpc, MOpc, MayFoldLoad, dl, MVT::i32, Node, InFlag); |
5624 | ReplaceUses(SDValue(Node, 0), SDValue(CNode, 0)); |
5625 | } |
5626 | |
5627 | ReplaceUses(SDValue(Node, 2), SDValue(CNode, 1)); |
5628 | CurDAG->RemoveDeadNode(Node); |
5629 | return; |
5630 | } |
5631 | |
5632 | case ISD::SETCC: { |
5633 | if (NVT.isVector() && tryVPTESTM(Node, SDValue(Node, 0), SDValue())) |
5634 | return; |
5635 | |
5636 | break; |
5637 | } |
5638 | |
5639 | case ISD::STORE: |
5640 | if (foldLoadStoreIntoMemOperand(Node)) |
5641 | return; |
5642 | break; |
5643 | |
5644 | case X86ISD::SETCC_CARRY: { |
5645 | |
5646 | |
5647 | MVT VT = Node->getSimpleValueType(0); |
5648 | |
5649 | |
5650 | SDValue EFLAGS = |
5651 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, |
5652 | Node->getOperand(1), SDValue()); |
5653 | |
5654 | |
5655 | |
5656 | unsigned Opc = VT == MVT::i64 ? X86::SETB_C64r : X86::SETB_C32r; |
5657 | MVT SetVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; |
5658 | SDValue Result = SDValue( |
5659 | CurDAG->getMachineNode(Opc, dl, SetVT, EFLAGS, EFLAGS.getValue(1)), 0); |
5660 | |
5661 | |
5662 | if (VT == MVT::i8 || VT == MVT::i16) { |
5663 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; |
5664 | Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); |
5665 | } |
5666 | |
5667 | ReplaceUses(SDValue(Node, 0), Result); |
5668 | CurDAG->RemoveDeadNode(Node); |
5669 | return; |
5670 | } |
5671 | case X86ISD::SBB: { |
5672 | if (isNullConstant(Node->getOperand(0)) && |
5673 | isNullConstant(Node->getOperand(1))) { |
5674 | MVT VT = Node->getSimpleValueType(0); |
5675 | |
5676 | |
5677 | SDVTList VTs = CurDAG->getVTList(MVT::i32, MVT::i32); |
5678 | SDValue Zero = |
5679 | SDValue(CurDAG->getMachineNode(X86::MOV32r0, dl, VTs, None), 0); |
5680 | if (VT == MVT::i64) { |
5681 | Zero = SDValue( |
5682 | CurDAG->getMachineNode( |
5683 | TargetOpcode::SUBREG_TO_REG, dl, MVT::i64, |
5684 | CurDAG->getTargetConstant(0, dl, MVT::i64), Zero, |
5685 | CurDAG->getTargetConstant(X86::sub_32bit, dl, MVT::i32)), |
5686 | 0); |
5687 | } |
5688 | |
5689 | |
5690 | SDValue EFLAGS = |
5691 | CurDAG->getCopyToReg(CurDAG->getEntryNode(), dl, X86::EFLAGS, |
5692 | Node->getOperand(2), SDValue()); |
5693 | |
5694 | |
5695 | |
5696 | unsigned Opc = VT == MVT::i64 ? X86::SBB64rr : X86::SBB32rr; |
5697 | MVT SBBVT = VT == MVT::i64 ? MVT::i64 : MVT::i32; |
5698 | VTs = CurDAG->getVTList(SBBVT, MVT::i32); |
5699 | SDValue Result = |
5700 | SDValue(CurDAG->getMachineNode(Opc, dl, VTs, {Zero, Zero, EFLAGS, |
5701 | EFLAGS.getValue(1)}), |
5702 | 0); |
5703 | |
5704 | |
5705 | ReplaceUses(SDValue(Node, 1), Result.getValue(1)); |
5706 | |
5707 | |
5708 | if (!SDValue(Node, 0).use_empty()) { |
5709 | |
5710 | if (VT == MVT::i8 || VT == MVT::i16) { |
5711 | int SubIndex = VT == MVT::i16 ? X86::sub_16bit : X86::sub_8bit; |
5712 | Result = CurDAG->getTargetExtractSubreg(SubIndex, dl, VT, Result); |
5713 | } |
5714 | ReplaceUses(SDValue(Node, 0), Result); |
5715 | } |
5716 | |
5717 | CurDAG->RemoveDeadNode(Node); |
5718 | return; |
5719 | } |
5720 | break; |
5721 | } |
5722 | case X86ISD::MGATHER: { |
5723 | auto *Mgt = cast<X86MaskedGatherSDNode>(Node); |
5724 | SDValue IndexOp = Mgt->getIndex(); |
5725 | SDValue Mask = Mgt->getMask(); |
5726 | MVT IndexVT = IndexOp.getSimpleValueType(); |
5727 | MVT ValueVT = Node->getSimpleValueType(0); |
5728 | MVT MaskVT = Mask.getSimpleValueType(); |
5729 | |
5730 | |
5731 | |
5732 | |
5733 | if (!ValueVT.isVector() || !MaskVT.isVector()) |
5734 | break; |
5735 | |
5736 | unsigned NumElts = ValueVT.getVectorNumElements(); |
5737 | MVT ValueSVT = ValueVT.getVectorElementType(); |
5738 | |
5739 | bool IsFP = ValueSVT.isFloatingPoint(); |
5740 | unsigned EltSize = ValueSVT.getSizeInBits(); |
5741 | |
5742 | unsigned Opc = 0; |
5743 | bool AVX512Gather = MaskVT.getVectorElementType() == MVT::i1; |
5744 | if (AVX512Gather) { |
5745 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
5746 | Opc = IsFP ? X86::VGATHERDPSZ128rm : X86::VPGATHERDDZ128rm; |
5747 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
5748 | Opc = IsFP ? X86::VGATHERDPSZ256rm : X86::VPGATHERDDZ256rm; |
5749 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) |
5750 | Opc = IsFP ? X86::VGATHERDPSZrm : X86::VPGATHERDDZrm; |
5751 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
5752 | Opc = IsFP ? X86::VGATHERDPDZ128rm : X86::VPGATHERDQZ128rm; |
5753 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
5754 | Opc = IsFP ? X86::VGATHERDPDZ256rm : X86::VPGATHERDQZ256rm; |
5755 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) |
5756 | Opc = IsFP ? X86::VGATHERDPDZrm : X86::VPGATHERDQZrm; |
5757 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
5758 | Opc = IsFP ? X86::VGATHERQPSZ128rm : X86::VPGATHERQDZ128rm; |
5759 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
5760 | Opc = IsFP ? X86::VGATHERQPSZ256rm : X86::VPGATHERQDZ256rm; |
5761 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) |
5762 | Opc = IsFP ? X86::VGATHERQPSZrm : X86::VPGATHERQDZrm; |
5763 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
5764 | Opc = IsFP ? X86::VGATHERQPDZ128rm : X86::VPGATHERQQZ128rm; |
5765 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
5766 | Opc = IsFP ? X86::VGATHERQPDZ256rm : X86::VPGATHERQQZ256rm; |
5767 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) |
5768 | Opc = IsFP ? X86::VGATHERQPDZrm : X86::VPGATHERQQZrm; |
5769 | } else { |
5770 | assert(EVT(MaskVT) == EVT(ValueVT).changeVectorElementTypeToInteger() && |
5771 | "Unexpected mask VT!"); |
5772 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
5773 | Opc = IsFP ? X86::VGATHERDPSrm : X86::VPGATHERDDrm; |
5774 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
5775 | Opc = IsFP ? X86::VGATHERDPSYrm : X86::VPGATHERDDYrm; |
5776 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
5777 | Opc = IsFP ? X86::VGATHERDPDrm : X86::VPGATHERDQrm; |
5778 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
5779 | Opc = IsFP ? X86::VGATHERDPDYrm : X86::VPGATHERDQYrm; |
5780 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
5781 | Opc = IsFP ? X86::VGATHERQPSrm : X86::VPGATHERQDrm; |
5782 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
5783 | Opc = IsFP ? X86::VGATHERQPSYrm : X86::VPGATHERQDYrm; |
5784 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
5785 | Opc = IsFP ? X86::VGATHERQPDrm : X86::VPGATHERQQrm; |
5786 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
5787 | Opc = IsFP ? X86::VGATHERQPDYrm : X86::VPGATHERQQYrm; |
5788 | } |
5789 | |
5790 | if (!Opc) |
5791 | break; |
5792 | |
5793 | SDValue Base, Scale, Index, Disp, Segment; |
5794 | if (!selectVectorAddr(Mgt, Mgt->getBasePtr(), IndexOp, Mgt->getScale(), |
5795 | Base, Scale, Index, Disp, Segment)) |
5796 | break; |
5797 | |
5798 | SDValue PassThru = Mgt->getPassThru(); |
5799 | SDValue Chain = Mgt->getChain(); |
5800 | |
5801 | SDVTList VTs = CurDAG->getVTList(ValueVT, MaskVT, MVT::Other); |
5802 | |
5803 | MachineSDNode *NewNode; |
5804 | if (AVX512Gather) { |
5805 | SDValue Ops[] = {PassThru, Mask, Base, Scale, |
5806 | Index, Disp, Segment, Chain}; |
5807 | NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
5808 | } else { |
5809 | SDValue Ops[] = {PassThru, Base, Scale, Index, |
5810 | Disp, Segment, Mask, Chain}; |
5811 | NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
5812 | } |
5813 | CurDAG->setNodeMemRefs(NewNode, {Mgt->getMemOperand()}); |
5814 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 0)); |
5815 | ReplaceUses(SDValue(Node, 1), SDValue(NewNode, 2)); |
5816 | CurDAG->RemoveDeadNode(Node); |
5817 | return; |
5818 | } |
5819 | case X86ISD::MSCATTER: { |
5820 | auto *Sc = cast<X86MaskedScatterSDNode>(Node); |
5821 | SDValue Value = Sc->getValue(); |
5822 | SDValue IndexOp = Sc->getIndex(); |
5823 | MVT IndexVT = IndexOp.getSimpleValueType(); |
5824 | MVT ValueVT = Value.getSimpleValueType(); |
5825 | |
5826 | |
5827 | |
5828 | |
5829 | if (!ValueVT.isVector()) |
5830 | break; |
5831 | |
5832 | unsigned NumElts = ValueVT.getVectorNumElements(); |
5833 | MVT ValueSVT = ValueVT.getVectorElementType(); |
5834 | |
5835 | bool IsFP = ValueSVT.isFloatingPoint(); |
5836 | unsigned EltSize = ValueSVT.getSizeInBits(); |
5837 | |
5838 | unsigned Opc; |
5839 | if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 32) |
5840 | Opc = IsFP ? X86::VSCATTERDPSZ128mr : X86::VPSCATTERDDZ128mr; |
5841 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 32) |
5842 | Opc = IsFP ? X86::VSCATTERDPSZ256mr : X86::VPSCATTERDDZ256mr; |
5843 | else if (IndexVT == MVT::v16i32 && NumElts == 16 && EltSize == 32) |
5844 | Opc = IsFP ? X86::VSCATTERDPSZmr : X86::VPSCATTERDDZmr; |
5845 | else if (IndexVT == MVT::v4i32 && NumElts == 2 && EltSize == 64) |
5846 | Opc = IsFP ? X86::VSCATTERDPDZ128mr : X86::VPSCATTERDQZ128mr; |
5847 | else if (IndexVT == MVT::v4i32 && NumElts == 4 && EltSize == 64) |
5848 | Opc = IsFP ? X86::VSCATTERDPDZ256mr : X86::VPSCATTERDQZ256mr; |
5849 | else if (IndexVT == MVT::v8i32 && NumElts == 8 && EltSize == 64) |
5850 | Opc = IsFP ? X86::VSCATTERDPDZmr : X86::VPSCATTERDQZmr; |
5851 | else if (IndexVT == MVT::v2i64 && NumElts == 4 && EltSize == 32) |
5852 | Opc = IsFP ? X86::VSCATTERQPSZ128mr : X86::VPSCATTERQDZ128mr; |
5853 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 32) |
5854 | Opc = IsFP ? X86::VSCATTERQPSZ256mr : X86::VPSCATTERQDZ256mr; |
5855 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 32) |
5856 | Opc = IsFP ? X86::VSCATTERQPSZmr : X86::VPSCATTERQDZmr; |
5857 | else if (IndexVT == MVT::v2i64 && NumElts == 2 && EltSize == 64) |
5858 | Opc = IsFP ? X86::VSCATTERQPDZ128mr : X86::VPSCATTERQQZ128mr; |
5859 | else if (IndexVT == MVT::v4i64 && NumElts == 4 && EltSize == 64) |
5860 | Opc = IsFP ? X86::VSCATTERQPDZ256mr : X86::VPSCATTERQQZ256mr; |
5861 | else if (IndexVT == MVT::v8i64 && NumElts == 8 && EltSize == 64) |
5862 | Opc = IsFP ? X86::VSCATTERQPDZmr : X86::VPSCATTERQQZmr; |
5863 | else |
5864 | break; |
5865 | |
5866 | SDValue Base, Scale, Index, Disp, Segment; |
5867 | if (!selectVectorAddr(Sc, Sc->getBasePtr(), IndexOp, Sc->getScale(), |
5868 | Base, Scale, Index, Disp, Segment)) |
5869 | break; |
5870 | |
5871 | SDValue Mask = Sc->getMask(); |
5872 | SDValue Chain = Sc->getChain(); |
5873 | |
5874 | SDVTList VTs = CurDAG->getVTList(Mask.getValueType(), MVT::Other); |
5875 | SDValue Ops[] = {Base, Scale, Index, Disp, Segment, Mask, Value, Chain}; |
5876 | |
5877 | MachineSDNode *NewNode = CurDAG->getMachineNode(Opc, SDLoc(dl), VTs, Ops); |
5878 | CurDAG->setNodeMemRefs(NewNode, {Sc->getMemOperand()}); |
5879 | ReplaceUses(SDValue(Node, 0), SDValue(NewNode, 1)); |
5880 | CurDAG->RemoveDeadNode(Node); |
5881 | return; |
5882 | } |
5883 | case ISD::PREALLOCATED_SETUP: { |
5884 | auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); |
5885 | auto CallId = MFI->getPreallocatedIdForCallSite( |
5886 | cast<SrcValueSDNode>(Node->getOperand(1))->getValue()); |
5887 | SDValue Chain = Node->getOperand(0); |
5888 | SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); |
5889 | MachineSDNode *New = CurDAG->getMachineNode( |
5890 | TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain); |
5891 | ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); |
5892 | CurDAG->RemoveDeadNode(Node); |
5893 | return; |
5894 | } |
5895 | case ISD::PREALLOCATED_ARG: { |
5896 | auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>(); |
5897 | auto CallId = MFI->getPreallocatedIdForCallSite( |
5898 | cast<SrcValueSDNode>(Node->getOperand(1))->getValue()); |
5899 | SDValue Chain = Node->getOperand(0); |
5900 | SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32); |
5901 | SDValue ArgIndex = Node->getOperand(2); |
5902 | SDValue Ops[3]; |
5903 | Ops[0] = CallIdValue; |
5904 | Ops[1] = ArgIndex; |
5905 | Ops[2] = Chain; |
5906 | MachineSDNode *New = CurDAG->getMachineNode( |
5907 | TargetOpcode::PREALLOCATED_ARG, dl, |
5908 | CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()), |
5909 | MVT::Other), |
5910 | Ops); |
5911 | ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); |
5912 | ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); |
5913 | CurDAG->RemoveDeadNode(Node); |
5914 | return; |
5915 | } |
5916 | case X86ISD::AESENCWIDE128KL: |
5917 | case X86ISD::AESDECWIDE128KL: |
5918 | case X86ISD::AESENCWIDE256KL: |
5919 | case X86ISD::AESDECWIDE256KL: { |
5920 | if (!Subtarget->hasWIDEKL()) |
5921 | break; |
5922 | |
5923 | unsigned Opcode; |
5924 | switch (Node->getOpcode()) { |
5925 | default: |
5926 | llvm_unreachable("Unexpected opcode!"); |
5927 | case X86ISD::AESENCWIDE128KL: |
5928 | Opcode = X86::AESENCWIDE128KL; |
5929 | break; |
5930 | case X86ISD::AESDECWIDE128KL: |
5931 | Opcode = X86::AESDECWIDE128KL; |
5932 | break; |
5933 | case X86ISD::AESENCWIDE256KL: |
5934 | Opcode = X86::AESENCWIDE256KL; |
5935 | break; |
5936 | case X86ISD::AESDECWIDE256KL: |
5937 | Opcode = X86::AESDECWIDE256KL; |
5938 | break; |
5939 | } |
5940 | |
5941 | SDValue Chain = Node->getOperand(0); |
5942 | SDValue Addr = Node->getOperand(1); |
5943 | |
5944 | SDValue Base, Scale, Index, Disp, Segment; |
5945 | if (!selectAddr(Node, Addr, Base, Scale, Index, Disp, Segment)) |
5946 | break; |
5947 | |
5948 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM0, Node->getOperand(2), |
5949 | SDValue()); |
5950 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM1, Node->getOperand(3), |
5951 | Chain.getValue(1)); |
5952 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM2, Node->getOperand(4), |
5953 | Chain.getValue(1)); |
5954 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM3, Node->getOperand(5), |
5955 | Chain.getValue(1)); |
5956 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM4, Node->getOperand(6), |
5957 | Chain.getValue(1)); |
5958 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM5, Node->getOperand(7), |
5959 | Chain.getValue(1)); |
5960 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM6, Node->getOperand(8), |
5961 | Chain.getValue(1)); |
5962 | Chain = CurDAG->getCopyToReg(Chain, dl, X86::XMM7, Node->getOperand(9), |
5963 | Chain.getValue(1)); |
5964 | |
5965 | MachineSDNode *Res = CurDAG->getMachineNode( |
5966 | Opcode, dl, Node->getVTList(), |
5967 | {Base, Scale, Index, Disp, Segment, Chain, Chain.getValue(1)}); |
5968 | CurDAG->setNodeMemRefs(Res, cast<MemSDNode>(Node)->getMemOperand()); |
5969 | ReplaceNode(Node, Res); |
5970 | return; |
5971 | } |
5972 | } |
5973 | |
5974 | SelectCode(Node); |
5975 | } |
5976 | |
5977 | bool X86DAGToDAGISel:: |
5978 | SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, |
5979 | std::vector<SDValue> &OutOps) { |
5980 | SDValue Op0, Op1, Op2, Op3, Op4; |
5981 | switch (ConstraintID) { |
5982 | default: |
5983 | llvm_unreachable("Unexpected asm memory constraint"); |
5984 | case InlineAsm::Constraint_o: |
5985 | case InlineAsm::Constraint_v: |
5986 | case InlineAsm::Constraint_m: |
5987 | case InlineAsm::Constraint_X: |
5988 | if (!selectAddr(nullptr, Op, Op0, Op1, Op2, Op3, Op4)) |
5989 | return true; |
5990 | break; |
5991 | } |
5992 | |
5993 | OutOps.push_back(Op0); |
5994 | OutOps.push_back(Op1); |
5995 | OutOps.push_back(Op2); |
5996 | OutOps.push_back(Op3); |
5997 | OutOps.push_back(Op4); |
5998 | return false; |
5999 | } |
6000 | |
6001 | |
6002 | |
6003 | FunctionPass *llvm::createX86ISelDag(X86TargetMachine &TM, |
6004 | CodeGenOpt::Level OptLevel) { |
6005 | return new X86DAGToDAGISel(TM, OptLevel); |
6006 | } |