clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelLowering.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | #include "X86ISelLowering.h" |
15 | #include "MCTargetDesc/X86ShuffleDecode.h" |
16 | #include "X86.h" |
17 | #include "X86CallingConv.h" |
18 | #include "X86FrameLowering.h" |
19 | #include "X86InstrBuilder.h" |
20 | #include "X86IntrinsicsInfo.h" |
21 | #include "X86MachineFunctionInfo.h" |
22 | #include "X86TargetMachine.h" |
23 | #include "X86TargetObjectFile.h" |
24 | #include "llvm/ADT/SmallBitVector.h" |
25 | #include "llvm/ADT/SmallSet.h" |
26 | #include "llvm/ADT/Statistic.h" |
27 | #include "llvm/ADT/StringExtras.h" |
28 | #include "llvm/ADT/StringSwitch.h" |
29 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
30 | #include "llvm/Analysis/EHPersonalities.h" |
31 | #include "llvm/Analysis/ObjCARCUtil.h" |
32 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
33 | #include "llvm/Analysis/VectorUtils.h" |
34 | #include "llvm/CodeGen/IntrinsicLowering.h" |
35 | #include "llvm/CodeGen/MachineFrameInfo.h" |
36 | #include "llvm/CodeGen/MachineFunction.h" |
37 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
38 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
39 | #include "llvm/CodeGen/MachineLoopInfo.h" |
40 | #include "llvm/CodeGen/MachineModuleInfo.h" |
41 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
42 | #include "llvm/CodeGen/TargetLowering.h" |
43 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
44 | #include "llvm/IR/CallingConv.h" |
45 | #include "llvm/IR/Constants.h" |
46 | #include "llvm/IR/DerivedTypes.h" |
47 | #include "llvm/IR/DiagnosticInfo.h" |
48 | #include "llvm/IR/Function.h" |
49 | #include "llvm/IR/GlobalAlias.h" |
50 | #include "llvm/IR/GlobalVariable.h" |
51 | #include "llvm/IR/Instructions.h" |
52 | #include "llvm/IR/Intrinsics.h" |
53 | #include "llvm/IR/IRBuilder.h" |
54 | #include "llvm/MC/MCAsmInfo.h" |
55 | #include "llvm/MC/MCContext.h" |
56 | #include "llvm/MC/MCExpr.h" |
57 | #include "llvm/MC/MCSymbol.h" |
58 | #include "llvm/Support/CommandLine.h" |
59 | #include "llvm/Support/Debug.h" |
60 | #include "llvm/Support/ErrorHandling.h" |
61 | #include "llvm/Support/KnownBits.h" |
62 | #include "llvm/Support/MathExtras.h" |
63 | #include "llvm/Target/TargetOptions.h" |
64 | #include <algorithm> |
65 | #include <bitset> |
66 | #include <cctype> |
67 | #include <numeric> |
68 | using namespace llvm; |
69 | |
70 | #define DEBUG_TYPE "x86-isel" |
71 | |
72 | STATISTIC(NumTailCalls, "Number of tail calls"); |
73 | |
74 | static cl::opt<int> ExperimentalPrefLoopAlignment( |
75 | "x86-experimental-pref-loop-alignment", cl::init(4), |
76 | cl::desc( |
77 | "Sets the preferable loop alignment for experiments (as log2 bytes)" |
78 | "(the last x86-experimental-pref-loop-alignment bits" |
79 | " of the loop header PC will be 0)."), |
80 | cl::Hidden); |
81 | |
82 | static cl::opt<int> ExperimentalPrefInnermostLoopAlignment( |
83 | "x86-experimental-pref-innermost-loop-alignment", cl::init(4), |
84 | cl::desc( |
85 | "Sets the preferable loop alignment for experiments (as log2 bytes) " |
86 | "for innermost loops only. If specified, this option overrides " |
87 | "alignment set by x86-experimental-pref-loop-alignment."), |
88 | cl::Hidden); |
89 | |
90 | static cl::opt<bool> MulConstantOptimization( |
91 | "mul-constant-optimization", cl::init(true), |
92 | cl::desc("Replace 'mul x, Const' with more effective instructions like " |
93 | "SHIFT, LEA, etc."), |
94 | cl::Hidden); |
95 | |
96 | static cl::opt<bool> ExperimentalUnorderedISEL( |
97 | "x86-experimental-unordered-atomic-isel", cl::init(false), |
98 | cl::desc("Use LoadSDNode and StoreSDNode instead of " |
99 | "AtomicSDNode for unordered atomic loads and " |
100 | "stores respectively."), |
101 | cl::Hidden); |
102 | |
103 | |
104 | |
105 | |
106 | |
107 | static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, |
108 | const char *Msg) { |
109 | MachineFunction &MF = DAG.getMachineFunction(); |
110 | DAG.getContext()->diagnose( |
111 | DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); |
112 | } |
113 | |
114 | X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, |
115 | const X86Subtarget &STI) |
116 | : TargetLowering(TM), Subtarget(STI) { |
117 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
118 | X86ScalarSSEf64 = Subtarget.hasSSE2(); |
119 | X86ScalarSSEf32 = Subtarget.hasSSE1(); |
120 | MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); |
121 | |
122 | |
123 | |
124 | |
125 | setBooleanContents(ZeroOrOneBooleanContent); |
126 | |
127 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
128 | |
129 | |
130 | |
131 | |
132 | if (Subtarget.isAtom()) |
133 | setSchedulingPreference(Sched::ILP); |
134 | else if (Subtarget.is64Bit()) |
135 | setSchedulingPreference(Sched::ILP); |
136 | else |
137 | setSchedulingPreference(Sched::RegPressure); |
138 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
139 | setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); |
140 | |
141 | |
142 | if (TM.getOptLevel() >= CodeGenOpt::Default) { |
143 | if (Subtarget.hasSlowDivide32()) |
144 | addBypassSlowDiv(32, 8); |
145 | if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) |
146 | addBypassSlowDiv(64, 32); |
147 | } |
148 | |
149 | |
150 | if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { |
151 | static const struct { |
152 | const RTLIB::Libcall Op; |
153 | const char * const Name; |
154 | const CallingConv::ID CC; |
155 | } LibraryCalls[] = { |
156 | { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall }, |
157 | { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall }, |
158 | { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall }, |
159 | { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall }, |
160 | { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall }, |
161 | }; |
162 | |
163 | for (const auto &LC : LibraryCalls) { |
164 | setLibcallName(LC.Op, LC.Name); |
165 | setLibcallCallingConv(LC.Op, LC.CC); |
166 | } |
167 | } |
168 | |
169 | if (Subtarget.getTargetTriple().isOSMSVCRT()) { |
170 | |
171 | setLibcallName(RTLIB::POWI_F32, nullptr); |
172 | setLibcallName(RTLIB::POWI_F64, nullptr); |
173 | } |
174 | |
175 | |
176 | |
177 | |
178 | |
179 | if (!Subtarget.hasCmpxchg8b()) |
180 | setMaxAtomicSizeInBitsSupported(32); |
181 | |
182 | |
183 | addRegisterClass(MVT::i8, &X86::GR8RegClass); |
184 | addRegisterClass(MVT::i16, &X86::GR16RegClass); |
185 | addRegisterClass(MVT::i32, &X86::GR32RegClass); |
186 | if (Subtarget.is64Bit()) |
187 | addRegisterClass(MVT::i64, &X86::GR64RegClass); |
188 | |
189 | for (MVT VT : MVT::integer_valuetypes()) |
190 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
191 | |
192 | |
193 | setTruncStoreAction(MVT::i64, MVT::i32, Expand); |
194 | setTruncStoreAction(MVT::i64, MVT::i16, Expand); |
195 | setTruncStoreAction(MVT::i64, MVT::i8 , Expand); |
196 | setTruncStoreAction(MVT::i32, MVT::i16, Expand); |
197 | setTruncStoreAction(MVT::i32, MVT::i8 , Expand); |
198 | setTruncStoreAction(MVT::i16, MVT::i8, Expand); |
199 | |
200 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
201 | |
202 | |
203 | for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { |
204 | setCondCodeAction(ISD::SETOEQ, VT, Expand); |
205 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
206 | } |
207 | |
208 | |
209 | if (Subtarget.hasCMov()) { |
210 | setOperationAction(ISD::ABS , MVT::i16 , Custom); |
211 | setOperationAction(ISD::ABS , MVT::i32 , Custom); |
212 | if (Subtarget.is64Bit()) |
213 | setOperationAction(ISD::ABS , MVT::i64 , Custom); |
214 | } |
215 | |
216 | |
217 | for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { |
218 | |
219 | LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal; |
220 | |
221 | setOperationAction(ShiftOp , MVT::i8 , Custom); |
222 | setOperationAction(ShiftOp , MVT::i16 , Custom); |
223 | setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction); |
224 | if (Subtarget.is64Bit()) |
225 | setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction); |
226 | } |
227 | |
228 | if (!Subtarget.useSoftFloat()) { |
229 | |
230 | |
231 | setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); |
232 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote); |
233 | setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); |
234 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote); |
235 | |
236 | |
237 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
238 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
239 | |
240 | |
241 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
242 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
243 | |
244 | |
245 | |
246 | setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); |
247 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote); |
248 | |
249 | |
250 | setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); |
251 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom); |
252 | |
253 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
254 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
255 | |
256 | |
257 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
258 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
259 | |
260 | |
261 | |
262 | setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); |
263 | |
264 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote); |
265 | setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom); |
266 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom); |
267 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
268 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
269 | |
270 | |
271 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
272 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
273 | |
274 | |
275 | |
276 | setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); |
277 | |
278 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote); |
279 | setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); |
280 | |
281 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote); |
282 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
283 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
284 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
285 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
286 | |
287 | setOperationAction(ISD::LRINT, MVT::f32, Custom); |
288 | setOperationAction(ISD::LRINT, MVT::f64, Custom); |
289 | setOperationAction(ISD::LLRINT, MVT::f32, Custom); |
290 | setOperationAction(ISD::LLRINT, MVT::f64, Custom); |
291 | |
292 | if (!Subtarget.is64Bit()) { |
293 | setOperationAction(ISD::LRINT, MVT::i64, Custom); |
294 | setOperationAction(ISD::LLRINT, MVT::i64, Custom); |
295 | } |
296 | } |
297 | |
298 | if (Subtarget.hasSSE2()) { |
299 | |
300 | |
301 | for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) { |
302 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
303 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
304 | } |
305 | if (Subtarget.is64Bit()) { |
306 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
307 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
308 | } |
309 | } |
310 | |
311 | |
312 | setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); |
313 | setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); |
314 | |
315 | |
316 | if (!X86ScalarSSEf64) { |
317 | setOperationAction(ISD::BITCAST , MVT::f32 , Expand); |
318 | setOperationAction(ISD::BITCAST , MVT::i32 , Expand); |
319 | if (Subtarget.is64Bit()) { |
320 | setOperationAction(ISD::BITCAST , MVT::f64 , Expand); |
321 | |
322 | setOperationAction(ISD::BITCAST , MVT::i64 , Expand); |
323 | } |
324 | } else if (!Subtarget.is64Bit()) |
325 | setOperationAction(ISD::BITCAST , MVT::i64 , Custom); |
326 | |
327 | |
328 | |
329 | |
330 | |
331 | |
332 | |
333 | |
334 | |
335 | |
336 | |
337 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
338 | setOperationAction(ISD::MULHS, VT, Expand); |
339 | setOperationAction(ISD::MULHU, VT, Expand); |
340 | setOperationAction(ISD::SDIV, VT, Expand); |
341 | setOperationAction(ISD::UDIV, VT, Expand); |
342 | setOperationAction(ISD::SREM, VT, Expand); |
343 | setOperationAction(ISD::UREM, VT, Expand); |
344 | } |
345 | |
346 | setOperationAction(ISD::BR_JT , MVT::Other, Expand); |
347 | setOperationAction(ISD::BRCOND , MVT::Other, Custom); |
348 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, |
349 | MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
350 | setOperationAction(ISD::BR_CC, VT, Expand); |
351 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
352 | } |
353 | if (Subtarget.is64Bit()) |
354 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
355 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); |
356 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
357 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); |
358 | |
359 | setOperationAction(ISD::FREM , MVT::f32 , Expand); |
360 | setOperationAction(ISD::FREM , MVT::f64 , Expand); |
361 | setOperationAction(ISD::FREM , MVT::f80 , Expand); |
362 | setOperationAction(ISD::FREM , MVT::f128 , Expand); |
363 | |
364 | if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) { |
365 | setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); |
366 | setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom); |
367 | } |
368 | |
369 | |
370 | |
371 | setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); |
372 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
373 | |
374 | if (Subtarget.hasBMI()) { |
375 | |
376 | |
377 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32); |
378 | } else { |
379 | setOperationAction(ISD::CTTZ, MVT::i16, Custom); |
380 | setOperationAction(ISD::CTTZ , MVT::i32 , Custom); |
381 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); |
382 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); |
383 | if (Subtarget.is64Bit()) { |
384 | setOperationAction(ISD::CTTZ , MVT::i64 , Custom); |
385 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); |
386 | } |
387 | } |
388 | |
389 | if (Subtarget.hasLZCNT()) { |
390 | |
391 | |
392 | setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); |
393 | setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
394 | } else { |
395 | for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
396 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
397 | continue; |
398 | setOperationAction(ISD::CTLZ , VT, Custom); |
399 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); |
400 | } |
401 | } |
402 | |
403 | for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16, |
404 | ISD::STRICT_FP_TO_FP16}) { |
405 | |
406 | |
407 | |
408 | setOperationAction( |
409 | Op, MVT::f32, |
410 | (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand); |
411 | |
412 | setOperationAction(Op, MVT::f64, Expand); |
413 | setOperationAction(Op, MVT::f80, Expand); |
414 | setOperationAction(Op, MVT::f128, Expand); |
415 | } |
416 | |
417 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
418 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
419 | setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); |
420 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); |
421 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
422 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
423 | setTruncStoreAction(MVT::f80, MVT::f16, Expand); |
424 | setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
425 | |
426 | setOperationAction(ISD::PARITY, MVT::i8, Custom); |
427 | if (Subtarget.hasPOPCNT()) { |
428 | setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); |
429 | } else { |
430 | setOperationAction(ISD::CTPOP , MVT::i8 , Expand); |
431 | setOperationAction(ISD::CTPOP , MVT::i16 , Expand); |
432 | setOperationAction(ISD::CTPOP , MVT::i32 , Expand); |
433 | if (Subtarget.is64Bit()) |
434 | setOperationAction(ISD::CTPOP , MVT::i64 , Expand); |
435 | else |
436 | setOperationAction(ISD::CTPOP , MVT::i64 , Custom); |
437 | |
438 | setOperationAction(ISD::PARITY, MVT::i16, Custom); |
439 | setOperationAction(ISD::PARITY, MVT::i32, Custom); |
440 | if (Subtarget.is64Bit()) |
441 | setOperationAction(ISD::PARITY, MVT::i64, Custom); |
442 | } |
443 | |
444 | setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); |
445 | |
446 | if (!Subtarget.hasMOVBE()) |
447 | setOperationAction(ISD::BSWAP , MVT::i16 , Expand); |
448 | |
449 | |
450 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { |
451 | setOperationAction(ISD::SELECT, VT, Custom); |
452 | setOperationAction(ISD::SETCC, VT, Custom); |
453 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
454 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
455 | } |
456 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
457 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
458 | continue; |
459 | setOperationAction(ISD::SELECT, VT, Custom); |
460 | setOperationAction(ISD::SETCC, VT, Custom); |
461 | } |
462 | |
463 | |
464 | setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); |
465 | setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); |
466 | |
467 | setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); |
468 | |
469 | |
470 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
471 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
472 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
473 | if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) |
474 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
475 | |
476 | |
477 | for (auto VT : { MVT::i32, MVT::i64 }) { |
478 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
479 | continue; |
480 | setOperationAction(ISD::ConstantPool , VT, Custom); |
481 | setOperationAction(ISD::JumpTable , VT, Custom); |
482 | setOperationAction(ISD::GlobalAddress , VT, Custom); |
483 | setOperationAction(ISD::GlobalTLSAddress, VT, Custom); |
484 | setOperationAction(ISD::ExternalSymbol , VT, Custom); |
485 | setOperationAction(ISD::BlockAddress , VT, Custom); |
486 | } |
487 | |
488 | |
489 | for (auto VT : { MVT::i32, MVT::i64 }) { |
490 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
491 | continue; |
492 | setOperationAction(ISD::SHL_PARTS, VT, Custom); |
493 | setOperationAction(ISD::SRA_PARTS, VT, Custom); |
494 | setOperationAction(ISD::SRL_PARTS, VT, Custom); |
495 | } |
496 | |
497 | if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) |
498 | setOperationAction(ISD::PREFETCH , MVT::Other, Legal); |
499 | |
500 | setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); |
501 | |
502 | |
503 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
504 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); |
505 | setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); |
506 | setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); |
507 | setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); |
508 | setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); |
509 | setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); |
510 | setOperationAction(ISD::ATOMIC_STORE, VT, Custom); |
511 | } |
512 | |
513 | if (!Subtarget.is64Bit()) |
514 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); |
515 | |
516 | if (Subtarget.hasCmpxchg16b()) { |
517 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); |
518 | } |
519 | |
520 | |
521 | if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && |
522 | !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && |
523 | TM.Options.ExceptionModel != ExceptionHandling::SjLj) { |
524 | setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); |
525 | } |
526 | |
527 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); |
528 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); |
529 | |
530 | setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
531 | setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
532 | |
533 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
534 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
535 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); |
536 | |
537 | |
538 | setOperationAction(ISD::VASTART , MVT::Other, Custom); |
539 | setOperationAction(ISD::VAEND , MVT::Other, Expand); |
540 | bool Is64Bit = Subtarget.is64Bit(); |
541 | setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); |
542 | setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); |
543 | |
544 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
545 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
546 | |
547 | setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); |
548 | |
549 | |
550 | setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); |
551 | setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); |
552 | |
553 | if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { |
554 | |
555 | |
556 | addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass |
557 | : &X86::FR32RegClass); |
558 | addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass |
559 | : &X86::FR64RegClass); |
560 | |
561 | |
562 | |
563 | |
564 | |
565 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
566 | |
567 | for (auto VT : { MVT::f32, MVT::f64 }) { |
568 | |
569 | setOperationAction(ISD::FABS, VT, Custom); |
570 | |
571 | |
572 | setOperationAction(ISD::FNEG, VT, Custom); |
573 | |
574 | |
575 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
576 | |
577 | |
578 | setOperationAction(ISD::FADD, VT, Custom); |
579 | setOperationAction(ISD::FSUB, VT, Custom); |
580 | |
581 | |
582 | setOperationAction(ISD::FSIN , VT, Expand); |
583 | setOperationAction(ISD::FCOS , VT, Expand); |
584 | setOperationAction(ISD::FSINCOS, VT, Expand); |
585 | } |
586 | |
587 | |
588 | setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); |
589 | setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); |
590 | |
591 | } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 && |
592 | (UseX87 || Is64Bit)) { |
593 | |
594 | |
595 | addRegisterClass(MVT::f32, &X86::FR32RegClass); |
596 | if (UseX87) |
597 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
598 | |
599 | |
600 | setOperationAction(ISD::FABS , MVT::f32, Custom); |
601 | |
602 | |
603 | setOperationAction(ISD::FNEG , MVT::f32, Custom); |
604 | |
605 | if (UseX87) |
606 | setOperationAction(ISD::UNDEF, MVT::f64, Expand); |
607 | |
608 | |
609 | if (UseX87) |
610 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
611 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
612 | |
613 | |
614 | setOperationAction(ISD::FSIN , MVT::f32, Expand); |
615 | setOperationAction(ISD::FCOS , MVT::f32, Expand); |
616 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
617 | |
618 | if (UseX87) { |
619 | |
620 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
621 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
622 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
623 | } |
624 | } else if (UseX87) { |
625 | |
626 | |
627 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
628 | addRegisterClass(MVT::f32, &X86::RFP32RegClass); |
629 | |
630 | for (auto VT : { MVT::f32, MVT::f64 }) { |
631 | setOperationAction(ISD::UNDEF, VT, Expand); |
632 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
633 | |
634 | |
635 | setOperationAction(ISD::FSIN , VT, Expand); |
636 | setOperationAction(ISD::FCOS , VT, Expand); |
637 | setOperationAction(ISD::FSINCOS, VT, Expand); |
638 | } |
639 | } |
640 | |
641 | |
642 | if (isTypeLegal(MVT::f32)) { |
643 | if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { |
644 | addLegalFPImmediate(APFloat(+0.0f)); |
645 | addLegalFPImmediate(APFloat(+1.0f)); |
646 | addLegalFPImmediate(APFloat(-0.0f)); |
647 | addLegalFPImmediate(APFloat(-1.0f)); |
648 | } else |
649 | addLegalFPImmediate(APFloat(+0.0f)); |
650 | } |
651 | |
652 | if (isTypeLegal(MVT::f64)) { |
653 | if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { |
654 | addLegalFPImmediate(APFloat(+0.0)); |
655 | addLegalFPImmediate(APFloat(+1.0)); |
656 | addLegalFPImmediate(APFloat(-0.0)); |
657 | addLegalFPImmediate(APFloat(-1.0)); |
658 | } else |
659 | addLegalFPImmediate(APFloat(+0.0)); |
660 | } |
661 | |
662 | setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); |
663 | setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); |
664 | setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); |
665 | setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); |
666 | setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); |
667 | setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); |
668 | setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); |
669 | setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); |
670 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); |
671 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
672 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); |
673 | setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); |
674 | setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); |
675 | |
676 | |
677 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
678 | setOperationAction(ISD::FMA, MVT::f32, Expand); |
679 | |
680 | |
681 | if (UseX87) { |
682 | addRegisterClass(MVT::f80, &X86::RFP80RegClass); |
683 | setOperationAction(ISD::UNDEF, MVT::f80, Expand); |
684 | setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); |
685 | { |
686 | APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); |
687 | addLegalFPImmediate(TmpFlt); |
688 | TmpFlt.changeSign(); |
689 | addLegalFPImmediate(TmpFlt); |
690 | |
691 | bool ignored; |
692 | APFloat TmpFlt2(+1.0); |
693 | TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
694 | &ignored); |
695 | addLegalFPImmediate(TmpFlt2); |
696 | TmpFlt2.changeSign(); |
697 | addLegalFPImmediate(TmpFlt2); |
698 | } |
699 | |
700 | |
701 | setOperationAction(ISD::FSIN , MVT::f80, Expand); |
702 | setOperationAction(ISD::FCOS , MVT::f80, Expand); |
703 | setOperationAction(ISD::FSINCOS, MVT::f80, Expand); |
704 | |
705 | setOperationAction(ISD::FFLOOR, MVT::f80, Expand); |
706 | setOperationAction(ISD::FCEIL, MVT::f80, Expand); |
707 | setOperationAction(ISD::FTRUNC, MVT::f80, Expand); |
708 | setOperationAction(ISD::FRINT, MVT::f80, Expand); |
709 | setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); |
710 | setOperationAction(ISD::FMA, MVT::f80, Expand); |
711 | setOperationAction(ISD::LROUND, MVT::f80, Expand); |
712 | setOperationAction(ISD::LLROUND, MVT::f80, Expand); |
713 | setOperationAction(ISD::LRINT, MVT::f80, Custom); |
714 | setOperationAction(ISD::LLRINT, MVT::f80, Custom); |
715 | |
716 | |
717 | setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); |
718 | setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); |
719 | setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); |
720 | setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); |
721 | setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); |
722 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); |
723 | |
724 | |
725 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); |
726 | } |
727 | |
728 | |
729 | if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { |
730 | addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass |
731 | : &X86::VR128RegClass); |
732 | |
733 | addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); |
734 | |
735 | setOperationAction(ISD::FADD, MVT::f128, LibCall); |
736 | setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall); |
737 | setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
738 | setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall); |
739 | setOperationAction(ISD::FDIV, MVT::f128, LibCall); |
740 | setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall); |
741 | setOperationAction(ISD::FMUL, MVT::f128, LibCall); |
742 | setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall); |
743 | setOperationAction(ISD::FMA, MVT::f128, LibCall); |
744 | setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall); |
745 | |
746 | setOperationAction(ISD::FABS, MVT::f128, Custom); |
747 | setOperationAction(ISD::FNEG, MVT::f128, Custom); |
748 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); |
749 | |
750 | setOperationAction(ISD::FSIN, MVT::f128, LibCall); |
751 | setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall); |
752 | setOperationAction(ISD::FCOS, MVT::f128, LibCall); |
753 | setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall); |
754 | setOperationAction(ISD::FSINCOS, MVT::f128, LibCall); |
755 | |
756 | setOperationAction(ISD::FSQRT, MVT::f128, LibCall); |
757 | setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); |
758 | |
759 | setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
760 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); |
761 | |
762 | |
763 | |
764 | if (isTypeLegal(MVT::f32)) { |
765 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
766 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
767 | } |
768 | if (isTypeLegal(MVT::f64)) { |
769 | setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
770 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
771 | } |
772 | if (isTypeLegal(MVT::f80)) { |
773 | setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); |
774 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); |
775 | } |
776 | |
777 | setOperationAction(ISD::SETCC, MVT::f128, Custom); |
778 | |
779 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); |
780 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); |
781 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); |
782 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
783 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
784 | setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
785 | } |
786 | |
787 | |
788 | setOperationAction(ISD::FPOW , MVT::f32 , Expand); |
789 | setOperationAction(ISD::FPOW , MVT::f64 , Expand); |
790 | setOperationAction(ISD::FPOW , MVT::f80 , Expand); |
791 | setOperationAction(ISD::FPOW , MVT::f128 , Expand); |
792 | |
793 | setOperationAction(ISD::FLOG, MVT::f80, Expand); |
794 | setOperationAction(ISD::FLOG2, MVT::f80, Expand); |
795 | setOperationAction(ISD::FLOG10, MVT::f80, Expand); |
796 | setOperationAction(ISD::FEXP, MVT::f80, Expand); |
797 | setOperationAction(ISD::FEXP2, MVT::f80, Expand); |
798 | setOperationAction(ISD::FMINNUM, MVT::f80, Expand); |
799 | setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); |
800 | |
801 | |
802 | for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, |
803 | MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { |
804 | setOperationAction(ISD::FSIN, VT, Expand); |
805 | setOperationAction(ISD::FSINCOS, VT, Expand); |
806 | setOperationAction(ISD::FCOS, VT, Expand); |
807 | setOperationAction(ISD::FREM, VT, Expand); |
808 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
809 | setOperationAction(ISD::FPOW, VT, Expand); |
810 | setOperationAction(ISD::FLOG, VT, Expand); |
811 | setOperationAction(ISD::FLOG2, VT, Expand); |
812 | setOperationAction(ISD::FLOG10, VT, Expand); |
813 | setOperationAction(ISD::FEXP, VT, Expand); |
814 | setOperationAction(ISD::FEXP2, VT, Expand); |
815 | } |
816 | |
817 | |
818 | |
819 | |
820 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
821 | setOperationAction(ISD::SDIV, VT, Expand); |
822 | setOperationAction(ISD::UDIV, VT, Expand); |
823 | setOperationAction(ISD::SREM, VT, Expand); |
824 | setOperationAction(ISD::UREM, VT, Expand); |
825 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); |
826 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
827 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); |
828 | setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); |
829 | setOperationAction(ISD::FMA, VT, Expand); |
830 | setOperationAction(ISD::FFLOOR, VT, Expand); |
831 | setOperationAction(ISD::FCEIL, VT, Expand); |
832 | setOperationAction(ISD::FTRUNC, VT, Expand); |
833 | setOperationAction(ISD::FRINT, VT, Expand); |
834 | setOperationAction(ISD::FNEARBYINT, VT, Expand); |
835 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
836 | setOperationAction(ISD::MULHS, VT, Expand); |
837 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
838 | setOperationAction(ISD::MULHU, VT, Expand); |
839 | setOperationAction(ISD::SDIVREM, VT, Expand); |
840 | setOperationAction(ISD::UDIVREM, VT, Expand); |
841 | setOperationAction(ISD::CTPOP, VT, Expand); |
842 | setOperationAction(ISD::CTTZ, VT, Expand); |
843 | setOperationAction(ISD::CTLZ, VT, Expand); |
844 | setOperationAction(ISD::ROTL, VT, Expand); |
845 | setOperationAction(ISD::ROTR, VT, Expand); |
846 | setOperationAction(ISD::BSWAP, VT, Expand); |
847 | setOperationAction(ISD::SETCC, VT, Expand); |
848 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
849 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
850 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
851 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
852 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); |
853 | setOperationAction(ISD::TRUNCATE, VT, Expand); |
854 | setOperationAction(ISD::SIGN_EXTEND, VT, Expand); |
855 | setOperationAction(ISD::ZERO_EXTEND, VT, Expand); |
856 | setOperationAction(ISD::ANY_EXTEND, VT, Expand); |
857 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
858 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
859 | setTruncStoreAction(InnerVT, VT, Expand); |
860 | |
861 | setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); |
862 | setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); |
863 | |
864 | |
865 | |
866 | |
867 | |
868 | if (VT.getVectorElementType() == MVT::i1) |
869 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
870 | |
871 | |
872 | |
873 | if (VT.getVectorElementType() == MVT::f16) |
874 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
875 | } |
876 | } |
877 | |
878 | |
879 | |
880 | if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { |
881 | addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); |
882 | |
883 | } |
884 | |
885 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { |
886 | addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
887 | : &X86::VR128RegClass); |
888 | |
889 | setOperationAction(ISD::FNEG, MVT::v4f32, Custom); |
890 | setOperationAction(ISD::FABS, MVT::v4f32, Custom); |
891 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); |
892 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
893 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); |
894 | setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); |
895 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
896 | setOperationAction(ISD::SELECT, MVT::v4f32, Custom); |
897 | |
898 | setOperationAction(ISD::LOAD, MVT::v2f32, Custom); |
899 | setOperationAction(ISD::STORE, MVT::v2f32, Custom); |
900 | |
901 | setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); |
902 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); |
903 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); |
904 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); |
905 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); |
906 | } |
907 | |
908 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { |
909 | addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
910 | : &X86::VR128RegClass); |
911 | |
912 | |
913 | |
914 | addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass |
915 | : &X86::VR128RegClass); |
916 | addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass |
917 | : &X86::VR128RegClass); |
918 | addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
919 | : &X86::VR128RegClass); |
920 | addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
921 | : &X86::VR128RegClass); |
922 | |
923 | for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, |
924 | MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { |
925 | setOperationAction(ISD::SDIV, VT, Custom); |
926 | setOperationAction(ISD::SREM, VT, Custom); |
927 | setOperationAction(ISD::UDIV, VT, Custom); |
928 | setOperationAction(ISD::UREM, VT, Custom); |
929 | } |
930 | |
931 | setOperationAction(ISD::MUL, MVT::v2i8, Custom); |
932 | setOperationAction(ISD::MUL, MVT::v4i8, Custom); |
933 | setOperationAction(ISD::MUL, MVT::v8i8, Custom); |
934 | |
935 | setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
936 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
937 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
938 | setOperationAction(ISD::MULHU, MVT::v4i32, Custom); |
939 | setOperationAction(ISD::MULHS, MVT::v4i32, Custom); |
940 | setOperationAction(ISD::MULHU, MVT::v16i8, Custom); |
941 | setOperationAction(ISD::MULHS, MVT::v16i8, Custom); |
942 | setOperationAction(ISD::MULHU, MVT::v8i16, Legal); |
943 | setOperationAction(ISD::MULHS, MVT::v8i16, Legal); |
944 | setOperationAction(ISD::MUL, MVT::v8i16, Legal); |
945 | |
946 | setOperationAction(ISD::SMULO, MVT::v16i8, Custom); |
947 | setOperationAction(ISD::UMULO, MVT::v16i8, Custom); |
948 | |
949 | setOperationAction(ISD::FNEG, MVT::v2f64, Custom); |
950 | setOperationAction(ISD::FABS, MVT::v2f64, Custom); |
951 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); |
952 | |
953 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
954 | setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); |
955 | setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); |
956 | setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); |
957 | setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); |
958 | } |
959 | |
960 | setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); |
961 | setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); |
962 | setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); |
963 | setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); |
964 | setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); |
965 | setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); |
966 | setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); |
967 | setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); |
968 | setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); |
969 | setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); |
970 | |
971 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
972 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
973 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
974 | |
975 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
976 | setOperationAction(ISD::SETCC, VT, Custom); |
977 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
978 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
979 | setOperationAction(ISD::CTPOP, VT, Custom); |
980 | setOperationAction(ISD::ABS, VT, Custom); |
981 | |
982 | |
983 | |
984 | setCondCodeAction(ISD::SETLT, VT, Custom); |
985 | setCondCodeAction(ISD::SETLE, VT, Custom); |
986 | } |
987 | |
988 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
989 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
990 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
991 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
992 | setOperationAction(ISD::VSELECT, VT, Custom); |
993 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
994 | } |
995 | |
996 | for (auto VT : { MVT::v2f64, MVT::v2i64 }) { |
997 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
998 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
999 | setOperationAction(ISD::VSELECT, VT, Custom); |
1000 | |
1001 | if (VT == MVT::v2i64 && !Subtarget.is64Bit()) |
1002 | continue; |
1003 | |
1004 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1005 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1006 | } |
1007 | |
1008 | |
1009 | setOperationAction(ISD::SELECT, MVT::v2f64, Custom); |
1010 | setOperationAction(ISD::SELECT, MVT::v2i64, Custom); |
1011 | setOperationAction(ISD::SELECT, MVT::v4i32, Custom); |
1012 | setOperationAction(ISD::SELECT, MVT::v8i16, Custom); |
1013 | setOperationAction(ISD::SELECT, MVT::v16i8, Custom); |
1014 | |
1015 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
1016 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom); |
1017 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); |
1018 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); |
1019 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); |
1020 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom); |
1021 | |
1022 | |
1023 | for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) { |
1024 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
1025 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
1026 | setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); |
1027 | setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); |
1028 | } |
1029 | |
1030 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
1031 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); |
1032 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
1033 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom); |
1034 | |
1035 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
1036 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom); |
1037 | |
1038 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
1039 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom); |
1040 | |
1041 | |
1042 | setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); |
1043 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom); |
1044 | setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); |
1045 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom); |
1046 | |
1047 | setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); |
1048 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom); |
1049 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); |
1050 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom); |
1051 | |
1052 | |
1053 | |
1054 | |
1055 | setOperationAction(ISD::LOAD, MVT::v2i32, Custom); |
1056 | setOperationAction(ISD::LOAD, MVT::v4i16, Custom); |
1057 | setOperationAction(ISD::LOAD, MVT::v8i8, Custom); |
1058 | setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
1059 | setOperationAction(ISD::STORE, MVT::v4i16, Custom); |
1060 | setOperationAction(ISD::STORE, MVT::v8i8, Custom); |
1061 | |
1062 | setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); |
1063 | setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); |
1064 | setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); |
1065 | if (!Subtarget.hasAVX512()) |
1066 | setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); |
1067 | |
1068 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); |
1069 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); |
1070 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); |
1071 | |
1072 | setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); |
1073 | |
1074 | setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); |
1075 | setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); |
1076 | setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); |
1077 | setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); |
1078 | setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); |
1079 | setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); |
1080 | |
1081 | |
1082 | |
1083 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1084 | setOperationAction(ISD::SRL, VT, Custom); |
1085 | setOperationAction(ISD::SHL, VT, Custom); |
1086 | setOperationAction(ISD::SRA, VT, Custom); |
1087 | } |
1088 | |
1089 | setOperationAction(ISD::ROTL, MVT::v4i32, Custom); |
1090 | setOperationAction(ISD::ROTL, MVT::v8i16, Custom); |
1091 | |
1092 | |
1093 | |
1094 | if (!Subtarget.useAVX512Regs() && |
1095 | !(Subtarget.hasBWI() && Subtarget.hasVLX())) |
1096 | setOperationAction(ISD::ROTL, MVT::v16i8, Custom); |
1097 | |
1098 | setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); |
1099 | setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); |
1100 | setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); |
1101 | setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); |
1102 | setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); |
1103 | } |
1104 | |
1105 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { |
1106 | setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
1107 | setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
1108 | setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
1109 | setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); |
1110 | setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); |
1111 | setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); |
1112 | setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); |
1113 | setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
1114 | |
1115 | |
1116 | setOperationAction(ISD::ADD, MVT::i16, Custom); |
1117 | setOperationAction(ISD::ADD, MVT::i32, Custom); |
1118 | setOperationAction(ISD::SUB, MVT::i16, Custom); |
1119 | setOperationAction(ISD::SUB, MVT::i32, Custom); |
1120 | } |
1121 | |
1122 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { |
1123 | for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { |
1124 | setOperationAction(ISD::FFLOOR, RoundedTy, Legal); |
1125 | setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal); |
1126 | setOperationAction(ISD::FCEIL, RoundedTy, Legal); |
1127 | setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal); |
1128 | setOperationAction(ISD::FTRUNC, RoundedTy, Legal); |
1129 | setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal); |
1130 | setOperationAction(ISD::FRINT, RoundedTy, Legal); |
1131 | setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal); |
1132 | setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); |
1133 | setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal); |
1134 | setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal); |
1135 | setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal); |
1136 | |
1137 | setOperationAction(ISD::FROUND, RoundedTy, Custom); |
1138 | } |
1139 | |
1140 | setOperationAction(ISD::SMAX, MVT::v16i8, Legal); |
1141 | setOperationAction(ISD::SMAX, MVT::v4i32, Legal); |
1142 | setOperationAction(ISD::UMAX, MVT::v8i16, Legal); |
1143 | setOperationAction(ISD::UMAX, MVT::v4i32, Legal); |
1144 | setOperationAction(ISD::SMIN, MVT::v16i8, Legal); |
1145 | setOperationAction(ISD::SMIN, MVT::v4i32, Legal); |
1146 | setOperationAction(ISD::UMIN, MVT::v8i16, Legal); |
1147 | setOperationAction(ISD::UMIN, MVT::v4i32, Legal); |
1148 | |
1149 | setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); |
1150 | |
1151 | |
1152 | setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
1153 | |
1154 | |
1155 | |
1156 | setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
1157 | |
1158 | |
1159 | |
1160 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1161 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); |
1162 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); |
1163 | } |
1164 | |
1165 | |
1166 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
1167 | setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); |
1168 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); |
1169 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); |
1170 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); |
1171 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); |
1172 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); |
1173 | } |
1174 | |
1175 | |
1176 | |
1177 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
1178 | |
1179 | if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) { |
1180 | |
1181 | |
1182 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom); |
1183 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom); |
1184 | |
1185 | |
1186 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom); |
1187 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom); |
1188 | } |
1189 | } |
1190 | |
1191 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) { |
1192 | setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); |
1193 | } |
1194 | |
1195 | if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { |
1196 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1197 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
1198 | setOperationAction(ISD::ROTL, VT, Custom); |
1199 | |
1200 | |
1201 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) |
1202 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1203 | |
1204 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1205 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
1206 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
1207 | } |
1208 | |
1209 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { |
1210 | bool HasInt256 = Subtarget.hasInt256(); |
1211 | |
1212 | addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1213 | : &X86::VR256RegClass); |
1214 | addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1215 | : &X86::VR256RegClass); |
1216 | addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1217 | : &X86::VR256RegClass); |
1218 | addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1219 | : &X86::VR256RegClass); |
1220 | addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1221 | : &X86::VR256RegClass); |
1222 | addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
1223 | : &X86::VR256RegClass); |
1224 | |
1225 | for (auto VT : { MVT::v8f32, MVT::v4f64 }) { |
1226 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1227 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
1228 | setOperationAction(ISD::FCEIL, VT, Legal); |
1229 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
1230 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1231 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
1232 | setOperationAction(ISD::FRINT, VT, Legal); |
1233 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
1234 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1235 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
1236 | setOperationAction(ISD::FROUNDEVEN, VT, Legal); |
1237 | setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); |
1238 | |
1239 | setOperationAction(ISD::FROUND, VT, Custom); |
1240 | |
1241 | setOperationAction(ISD::FNEG, VT, Custom); |
1242 | setOperationAction(ISD::FABS, VT, Custom); |
1243 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1244 | } |
1245 | |
1246 | |
1247 | |
1248 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
1249 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
1250 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
1251 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
1252 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); |
1253 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom); |
1254 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal); |
1255 | |
1256 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); |
1257 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal); |
1258 | |
1259 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); |
1260 | setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); |
1261 | setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); |
1262 | setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); |
1263 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal); |
1264 | setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal); |
1265 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); |
1266 | setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); |
1267 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); |
1268 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); |
1269 | setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); |
1270 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); |
1271 | |
1272 | if (!Subtarget.hasAVX512()) |
1273 | setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); |
1274 | |
1275 | |
1276 | |
1277 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1278 | setOperationAction(ISD::SRL, VT, Custom); |
1279 | setOperationAction(ISD::SHL, VT, Custom); |
1280 | setOperationAction(ISD::SRA, VT, Custom); |
1281 | } |
1282 | |
1283 | |
1284 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
1285 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
1286 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
1287 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
1288 | |
1289 | setOperationAction(ISD::ROTL, MVT::v8i32, Custom); |
1290 | setOperationAction(ISD::ROTL, MVT::v16i16, Custom); |
1291 | |
1292 | |
1293 | if (!Subtarget.useBWIRegs()) |
1294 | setOperationAction(ISD::ROTL, MVT::v32i8, Custom); |
1295 | |
1296 | setOperationAction(ISD::SELECT, MVT::v4f64, Custom); |
1297 | setOperationAction(ISD::SELECT, MVT::v4i64, Custom); |
1298 | setOperationAction(ISD::SELECT, MVT::v8i32, Custom); |
1299 | setOperationAction(ISD::SELECT, MVT::v16i16, Custom); |
1300 | setOperationAction(ISD::SELECT, MVT::v32i8, Custom); |
1301 | setOperationAction(ISD::SELECT, MVT::v8f32, Custom); |
1302 | |
1303 | for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1304 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1305 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1306 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1307 | } |
1308 | |
1309 | setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); |
1310 | setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); |
1311 | setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); |
1312 | setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); |
1313 | |
1314 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1315 | setOperationAction(ISD::SETCC, VT, Custom); |
1316 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
1317 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
1318 | setOperationAction(ISD::CTPOP, VT, Custom); |
1319 | setOperationAction(ISD::CTLZ, VT, Custom); |
1320 | |
1321 | |
1322 | |
1323 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1324 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1325 | } |
1326 | |
1327 | if (Subtarget.hasAnyFMA()) { |
1328 | for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, |
1329 | MVT::v2f64, MVT::v4f64 }) { |
1330 | setOperationAction(ISD::FMA, VT, Legal); |
1331 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
1332 | } |
1333 | } |
1334 | |
1335 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
1336 | setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); |
1337 | setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); |
1338 | } |
1339 | |
1340 | setOperationAction(ISD::MUL, MVT::v4i64, Custom); |
1341 | setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); |
1342 | setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); |
1343 | setOperationAction(ISD::MUL, MVT::v32i8, Custom); |
1344 | |
1345 | setOperationAction(ISD::MULHU, MVT::v8i32, Custom); |
1346 | setOperationAction(ISD::MULHS, MVT::v8i32, Custom); |
1347 | setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); |
1348 | setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); |
1349 | setOperationAction(ISD::MULHU, MVT::v32i8, Custom); |
1350 | setOperationAction(ISD::MULHS, MVT::v32i8, Custom); |
1351 | |
1352 | setOperationAction(ISD::SMULO, MVT::v32i8, Custom); |
1353 | setOperationAction(ISD::UMULO, MVT::v32i8, Custom); |
1354 | |
1355 | setOperationAction(ISD::ABS, MVT::v4i64, Custom); |
1356 | setOperationAction(ISD::SMAX, MVT::v4i64, Custom); |
1357 | setOperationAction(ISD::UMAX, MVT::v4i64, Custom); |
1358 | setOperationAction(ISD::SMIN, MVT::v4i64, Custom); |
1359 | setOperationAction(ISD::UMIN, MVT::v4i64, Custom); |
1360 | |
1361 | setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1362 | setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1363 | setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1364 | setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
1365 | setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1366 | setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1367 | setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1368 | setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
1369 | setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom); |
1370 | setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom); |
1371 | setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom); |
1372 | setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom); |
1373 | |
1374 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { |
1375 | setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); |
1376 | setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); |
1377 | setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); |
1378 | setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); |
1379 | setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); |
1380 | } |
1381 | |
1382 | for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { |
1383 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
1384 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
1385 | } |
1386 | |
1387 | if (HasInt256) { |
1388 | |
1389 | |
1390 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); |
1391 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom); |
1392 | |
1393 | |
1394 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
1395 | setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); |
1396 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); |
1397 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); |
1398 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); |
1399 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); |
1400 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); |
1401 | } |
1402 | } |
1403 | |
1404 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1405 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { |
1406 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
1407 | setOperationAction(ISD::MSTORE, VT, Legal); |
1408 | } |
1409 | |
1410 | |
1411 | |
1412 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1413 | MVT::v4f32, MVT::v2f64 }) { |
1414 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
1415 | } |
1416 | |
1417 | |
1418 | for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1419 | MVT::v8f32, MVT::v4f64 }) { |
1420 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1421 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1422 | setOperationAction(ISD::VSELECT, VT, Custom); |
1423 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1424 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1425 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1426 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
1427 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1428 | setOperationAction(ISD::STORE, VT, Custom); |
1429 | } |
1430 | |
1431 | if (HasInt256) { |
1432 | setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); |
1433 | |
1434 | |
1435 | setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); |
1436 | setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); |
1437 | |
1438 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1439 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
1440 | setOperationAction(ISD::MGATHER, VT, Custom); |
1441 | } |
1442 | } |
1443 | |
1444 | |
1445 | |
1446 | |
1447 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
1448 | addRegisterClass(MVT::v1i1, &X86::VK1RegClass); |
1449 | addRegisterClass(MVT::v2i1, &X86::VK2RegClass); |
1450 | addRegisterClass(MVT::v4i1, &X86::VK4RegClass); |
1451 | addRegisterClass(MVT::v8i1, &X86::VK8RegClass); |
1452 | addRegisterClass(MVT::v16i1, &X86::VK16RegClass); |
1453 | |
1454 | setOperationAction(ISD::SELECT, MVT::v1i1, Custom); |
1455 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); |
1456 | setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); |
1457 | |
1458 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
1459 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
1460 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
1461 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
1462 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
1463 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
1464 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
1465 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
1466 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); |
1467 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); |
1468 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom); |
1469 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom); |
1470 | |
1471 | |
1472 | if (!Subtarget.hasDQI()) { |
1473 | setOperationAction(ISD::LOAD, MVT::v1i1, Custom); |
1474 | setOperationAction(ISD::LOAD, MVT::v2i1, Custom); |
1475 | setOperationAction(ISD::LOAD, MVT::v4i1, Custom); |
1476 | setOperationAction(ISD::LOAD, MVT::v8i1, Custom); |
1477 | |
1478 | setOperationAction(ISD::STORE, MVT::v1i1, Custom); |
1479 | setOperationAction(ISD::STORE, MVT::v2i1, Custom); |
1480 | setOperationAction(ISD::STORE, MVT::v4i1, Custom); |
1481 | setOperationAction(ISD::STORE, MVT::v8i1, Custom); |
1482 | } |
1483 | |
1484 | |
1485 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
1486 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
1487 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
1488 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
1489 | } |
1490 | |
1491 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) |
1492 | setOperationAction(ISD::VSELECT, VT, Expand); |
1493 | |
1494 | for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { |
1495 | setOperationAction(ISD::SETCC, VT, Custom); |
1496 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
1497 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
1498 | setOperationAction(ISD::SELECT, VT, Custom); |
1499 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1500 | |
1501 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1502 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1503 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1504 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1505 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1506 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1507 | } |
1508 | |
1509 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) |
1510 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1511 | } |
1512 | |
1513 | |
1514 | |
1515 | |
1516 | if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { |
1517 | bool HasBWI = Subtarget.hasBWI(); |
1518 | |
1519 | addRegisterClass(MVT::v16i32, &X86::VR512RegClass); |
1520 | addRegisterClass(MVT::v16f32, &X86::VR512RegClass); |
1521 | addRegisterClass(MVT::v8i64, &X86::VR512RegClass); |
1522 | addRegisterClass(MVT::v8f64, &X86::VR512RegClass); |
1523 | addRegisterClass(MVT::v32i16, &X86::VR512RegClass); |
1524 | addRegisterClass(MVT::v64i8, &X86::VR512RegClass); |
1525 | |
1526 | for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
1527 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); |
1528 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); |
1529 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); |
1530 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); |
1531 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); |
1532 | if (HasBWI) |
1533 | setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); |
1534 | } |
1535 | |
1536 | for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { |
1537 | setOperationAction(ISD::FNEG, VT, Custom); |
1538 | setOperationAction(ISD::FABS, VT, Custom); |
1539 | setOperationAction(ISD::FMA, VT, Legal); |
1540 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
1541 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
1542 | } |
1543 | |
1544 | for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) { |
1545 | setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); |
1546 | setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32); |
1547 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); |
1548 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); |
1549 | } |
1550 | setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); |
1551 | setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); |
1552 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal); |
1553 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal); |
1554 | setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); |
1555 | setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); |
1556 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal); |
1557 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal); |
1558 | |
1559 | setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); |
1560 | setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); |
1561 | setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); |
1562 | setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); |
1563 | setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); |
1564 | setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); |
1565 | setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); |
1566 | setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); |
1567 | setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); |
1568 | setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); |
1569 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); |
1570 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); |
1571 | |
1572 | setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); |
1573 | setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); |
1574 | setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); |
1575 | setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); |
1576 | setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); |
1577 | if (HasBWI) |
1578 | setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); |
1579 | |
1580 | |
1581 | |
1582 | |
1583 | if (!Subtarget.hasVLX()) { |
1584 | for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1585 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { |
1586 | setOperationAction(ISD::MLOAD, VT, Custom); |
1587 | setOperationAction(ISD::MSTORE, VT, Custom); |
1588 | } |
1589 | } |
1590 | |
1591 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal); |
1592 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal); |
1593 | setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom); |
1594 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
1595 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); |
1596 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
1597 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
1598 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); |
1599 | setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); |
1600 | setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); |
1601 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); |
1602 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
1603 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
1604 | |
1605 | if (HasBWI) { |
1606 | |
1607 | setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); |
1608 | setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); |
1609 | setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); |
1610 | } |
1611 | |
1612 | for (auto VT : { MVT::v16f32, MVT::v8f64 }) { |
1613 | setOperationAction(ISD::FFLOOR, VT, Legal); |
1614 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
1615 | setOperationAction(ISD::FCEIL, VT, Legal); |
1616 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
1617 | setOperationAction(ISD::FTRUNC, VT, Legal); |
1618 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
1619 | setOperationAction(ISD::FRINT, VT, Legal); |
1620 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
1621 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
1622 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
1623 | setOperationAction(ISD::FROUNDEVEN, VT, Legal); |
1624 | setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); |
1625 | |
1626 | setOperationAction(ISD::FROUND, VT, Custom); |
1627 | } |
1628 | |
1629 | for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) { |
1630 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
1631 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
1632 | } |
1633 | |
1634 | setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom); |
1635 | setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom); |
1636 | setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom); |
1637 | setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom); |
1638 | |
1639 | setOperationAction(ISD::MUL, MVT::v8i64, Custom); |
1640 | setOperationAction(ISD::MUL, MVT::v16i32, Legal); |
1641 | setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom); |
1642 | setOperationAction(ISD::MUL, MVT::v64i8, Custom); |
1643 | |
1644 | setOperationAction(ISD::MULHU, MVT::v16i32, Custom); |
1645 | setOperationAction(ISD::MULHS, MVT::v16i32, Custom); |
1646 | setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom); |
1647 | setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom); |
1648 | setOperationAction(ISD::MULHS, MVT::v64i8, Custom); |
1649 | setOperationAction(ISD::MULHU, MVT::v64i8, Custom); |
1650 | |
1651 | setOperationAction(ISD::SMULO, MVT::v64i8, Custom); |
1652 | setOperationAction(ISD::UMULO, MVT::v64i8, Custom); |
1653 | |
1654 | setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); |
1655 | |
1656 | for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { |
1657 | setOperationAction(ISD::SRL, VT, Custom); |
1658 | setOperationAction(ISD::SHL, VT, Custom); |
1659 | setOperationAction(ISD::SRA, VT, Custom); |
1660 | setOperationAction(ISD::SETCC, VT, Custom); |
1661 | |
1662 | |
1663 | |
1664 | setCondCodeAction(ISD::SETLT, VT, Custom); |
1665 | setCondCodeAction(ISD::SETLE, VT, Custom); |
1666 | } |
1667 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
1668 | setOperationAction(ISD::SMAX, VT, Legal); |
1669 | setOperationAction(ISD::UMAX, VT, Legal); |
1670 | setOperationAction(ISD::SMIN, VT, Legal); |
1671 | setOperationAction(ISD::UMIN, VT, Legal); |
1672 | setOperationAction(ISD::ABS, VT, Legal); |
1673 | setOperationAction(ISD::CTPOP, VT, Custom); |
1674 | setOperationAction(ISD::ROTL, VT, Custom); |
1675 | setOperationAction(ISD::ROTR, VT, Custom); |
1676 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
1677 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
1678 | } |
1679 | |
1680 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
1681 | setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); |
1682 | setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); |
1683 | setOperationAction(ISD::CTLZ, VT, Custom); |
1684 | setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom); |
1685 | setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom); |
1686 | setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom); |
1687 | setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom); |
1688 | setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom); |
1689 | setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom); |
1690 | setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom); |
1691 | setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom); |
1692 | } |
1693 | |
1694 | if (Subtarget.hasDQI()) { |
1695 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); |
1696 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); |
1697 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal); |
1698 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal); |
1699 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); |
1700 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); |
1701 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal); |
1702 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal); |
1703 | |
1704 | setOperationAction(ISD::MUL, MVT::v8i64, Legal); |
1705 | } |
1706 | |
1707 | if (Subtarget.hasCDI()) { |
1708 | |
1709 | for (auto VT : { MVT::v16i32, MVT::v8i64} ) { |
1710 | setOperationAction(ISD::CTLZ, VT, Legal); |
1711 | } |
1712 | } |
1713 | |
1714 | if (Subtarget.hasVPOPCNTDQ()) { |
1715 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) |
1716 | setOperationAction(ISD::CTPOP, VT, Legal); |
1717 | } |
1718 | |
1719 | |
1720 | |
1721 | |
1722 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1723 | MVT::v8f32, MVT::v4f64 }) |
1724 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
1725 | |
1726 | for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, |
1727 | MVT::v16f32, MVT::v8f64 }) { |
1728 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1729 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
1730 | setOperationAction(ISD::SELECT, VT, Custom); |
1731 | setOperationAction(ISD::VSELECT, VT, Custom); |
1732 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1733 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1734 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1735 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
1736 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1737 | } |
1738 | |
1739 | for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { |
1740 | setOperationAction(ISD::MLOAD, VT, Legal); |
1741 | setOperationAction(ISD::MSTORE, VT, Legal); |
1742 | setOperationAction(ISD::MGATHER, VT, Custom); |
1743 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1744 | } |
1745 | if (HasBWI) { |
1746 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
1747 | setOperationAction(ISD::MLOAD, VT, Legal); |
1748 | setOperationAction(ISD::MSTORE, VT, Legal); |
1749 | } |
1750 | } else { |
1751 | setOperationAction(ISD::STORE, MVT::v32i16, Custom); |
1752 | setOperationAction(ISD::STORE, MVT::v64i8, Custom); |
1753 | } |
1754 | |
1755 | if (Subtarget.hasVBMI2()) { |
1756 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, |
1757 | MVT::v16i16, MVT::v8i32, MVT::v4i64, |
1758 | MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { |
1759 | setOperationAction(ISD::FSHL, VT, Custom); |
1760 | setOperationAction(ISD::FSHR, VT, Custom); |
1761 | } |
1762 | |
1763 | setOperationAction(ISD::ROTL, MVT::v32i16, Custom); |
1764 | setOperationAction(ISD::ROTR, MVT::v8i16, Custom); |
1765 | setOperationAction(ISD::ROTR, MVT::v16i16, Custom); |
1766 | setOperationAction(ISD::ROTR, MVT::v32i16, Custom); |
1767 | } |
1768 | } |
1769 | |
1770 | |
1771 | |
1772 | |
1773 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
1774 | |
1775 | |
1776 | |
1777 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, |
1778 | Subtarget.hasVLX() ? Legal : Custom); |
1779 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, |
1780 | Subtarget.hasVLX() ? Legal : Custom); |
1781 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, |
1782 | Subtarget.hasVLX() ? Legal : Custom); |
1783 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, |
1784 | Subtarget.hasVLX() ? Legal : Custom); |
1785 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); |
1786 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, |
1787 | Subtarget.hasVLX() ? Legal : Custom); |
1788 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, |
1789 | Subtarget.hasVLX() ? Legal : Custom); |
1790 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, |
1791 | Subtarget.hasVLX() ? Legal : Custom); |
1792 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, |
1793 | Subtarget.hasVLX() ? Legal : Custom); |
1794 | |
1795 | if (Subtarget.hasDQI()) { |
1796 | |
1797 | |
1798 | assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && |
1799 | isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && |
1800 | "Unexpected operation action!"); |
1801 | |
1802 | setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); |
1803 | setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); |
1804 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom); |
1805 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom); |
1806 | } |
1807 | |
1808 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
1809 | setOperationAction(ISD::SMAX, VT, Legal); |
1810 | setOperationAction(ISD::UMAX, VT, Legal); |
1811 | setOperationAction(ISD::SMIN, VT, Legal); |
1812 | setOperationAction(ISD::UMIN, VT, Legal); |
1813 | setOperationAction(ISD::ABS, VT, Legal); |
1814 | } |
1815 | |
1816 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
1817 | setOperationAction(ISD::ROTL, VT, Custom); |
1818 | setOperationAction(ISD::ROTR, VT, Custom); |
1819 | } |
1820 | |
1821 | |
1822 | setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); |
1823 | setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); |
1824 | |
1825 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
1826 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
1827 | setOperationAction(ISD::MSCATTER, VT, Custom); |
1828 | |
1829 | if (Subtarget.hasDQI()) { |
1830 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
1831 | setOperationAction(ISD::SINT_TO_FP, VT, |
1832 | Subtarget.hasVLX() ? Legal : Custom); |
1833 | setOperationAction(ISD::UINT_TO_FP, VT, |
1834 | Subtarget.hasVLX() ? Legal : Custom); |
1835 | setOperationAction(ISD::STRICT_SINT_TO_FP, VT, |
1836 | Subtarget.hasVLX() ? Legal : Custom); |
1837 | setOperationAction(ISD::STRICT_UINT_TO_FP, VT, |
1838 | Subtarget.hasVLX() ? Legal : Custom); |
1839 | setOperationAction(ISD::FP_TO_SINT, VT, |
1840 | Subtarget.hasVLX() ? Legal : Custom); |
1841 | setOperationAction(ISD::FP_TO_UINT, VT, |
1842 | Subtarget.hasVLX() ? Legal : Custom); |
1843 | setOperationAction(ISD::STRICT_FP_TO_SINT, VT, |
1844 | Subtarget.hasVLX() ? Legal : Custom); |
1845 | setOperationAction(ISD::STRICT_FP_TO_UINT, VT, |
1846 | Subtarget.hasVLX() ? Legal : Custom); |
1847 | setOperationAction(ISD::MUL, VT, Legal); |
1848 | } |
1849 | } |
1850 | |
1851 | if (Subtarget.hasCDI()) { |
1852 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
1853 | setOperationAction(ISD::CTLZ, VT, Legal); |
1854 | } |
1855 | } |
1856 | |
1857 | if (Subtarget.hasVPOPCNTDQ()) { |
1858 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) |
1859 | setOperationAction(ISD::CTPOP, VT, Legal); |
1860 | } |
1861 | } |
1862 | |
1863 | |
1864 | |
1865 | |
1866 | if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
1867 | addRegisterClass(MVT::v32i1, &X86::VK32RegClass); |
1868 | addRegisterClass(MVT::v64i1, &X86::VK64RegClass); |
1869 | |
1870 | for (auto VT : { MVT::v32i1, MVT::v64i1 }) { |
1871 | setOperationAction(ISD::VSELECT, VT, Expand); |
1872 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
1873 | setOperationAction(ISD::SETCC, VT, Custom); |
1874 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
1875 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
1876 | setOperationAction(ISD::SELECT, VT, Custom); |
1877 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
1878 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
1879 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
1880 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
1881 | } |
1882 | |
1883 | for (auto VT : { MVT::v16i1, MVT::v32i1 }) |
1884 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
1885 | |
1886 | |
1887 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); |
1888 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); |
1889 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); |
1890 | |
1891 | for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { |
1892 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
1893 | setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); |
1894 | } |
1895 | |
1896 | |
1897 | |
1898 | |
1899 | |
1900 | if (Subtarget.hasBITALG()) { |
1901 | for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) |
1902 | setOperationAction(ISD::CTPOP, VT, Legal); |
1903 | } |
1904 | } |
1905 | |
1906 | if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { |
1907 | setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); |
1908 | setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); |
1909 | setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); |
1910 | setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); |
1911 | setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); |
1912 | |
1913 | setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); |
1914 | setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); |
1915 | setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); |
1916 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
1917 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
1918 | |
1919 | if (Subtarget.hasBWI()) { |
1920 | setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); |
1921 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
1922 | } |
1923 | |
1924 | setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); |
1925 | setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); |
1926 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
1927 | } |
1928 | |
1929 | if (Subtarget.hasAMXTILE()) { |
1930 | addRegisterClass(MVT::x86amx, &X86::TILERegClass); |
1931 | } |
1932 | |
1933 | |
1934 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
1935 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
1936 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
1937 | if (!Subtarget.is64Bit()) { |
1938 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
1939 | } |
1940 | |
1941 | |
1942 | |
1943 | |
1944 | |
1945 | |
1946 | |
1947 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
1948 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
1949 | continue; |
1950 | |
1951 | setOperationAction(ISD::SADDO, VT, Custom); |
1952 | setOperationAction(ISD::UADDO, VT, Custom); |
1953 | setOperationAction(ISD::SSUBO, VT, Custom); |
1954 | setOperationAction(ISD::USUBO, VT, Custom); |
1955 | setOperationAction(ISD::SMULO, VT, Custom); |
1956 | setOperationAction(ISD::UMULO, VT, Custom); |
1957 | |
1958 | |
1959 | setOperationAction(ISD::ADDCARRY, VT, Custom); |
1960 | setOperationAction(ISD::SUBCARRY, VT, Custom); |
1961 | setOperationAction(ISD::SETCCCARRY, VT, Custom); |
1962 | setOperationAction(ISD::SADDO_CARRY, VT, Custom); |
1963 | setOperationAction(ISD::SSUBO_CARRY, VT, Custom); |
1964 | } |
1965 | |
1966 | if (!Subtarget.is64Bit()) { |
1967 | |
1968 | setLibcallName(RTLIB::SHL_I128, nullptr); |
1969 | setLibcallName(RTLIB::SRL_I128, nullptr); |
1970 | setLibcallName(RTLIB::SRA_I128, nullptr); |
1971 | setLibcallName(RTLIB::MUL_I128, nullptr); |
1972 | } |
1973 | |
1974 | |
1975 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
1976 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
1977 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
1978 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
1979 | } |
1980 | |
1981 | if (Subtarget.isTargetWin64()) { |
1982 | setOperationAction(ISD::SDIV, MVT::i128, Custom); |
1983 | setOperationAction(ISD::UDIV, MVT::i128, Custom); |
1984 | setOperationAction(ISD::SREM, MVT::i128, Custom); |
1985 | setOperationAction(ISD::UREM, MVT::i128, Custom); |
1986 | } |
1987 | |
1988 | |
1989 | |
1990 | |
1991 | |
1992 | if (Subtarget.is32Bit() && |
1993 | (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium())) |
1994 | for (ISD::NodeType Op : |
1995 | {ISD::FCEIL, ISD::STRICT_FCEIL, |
1996 | ISD::FCOS, ISD::STRICT_FCOS, |
1997 | ISD::FEXP, ISD::STRICT_FEXP, |
1998 | ISD::FFLOOR, ISD::STRICT_FFLOOR, |
1999 | ISD::FREM, ISD::STRICT_FREM, |
2000 | ISD::FLOG, ISD::STRICT_FLOG, |
2001 | ISD::FLOG10, ISD::STRICT_FLOG10, |
2002 | ISD::FPOW, ISD::STRICT_FPOW, |
2003 | ISD::FSIN, ISD::STRICT_FSIN}) |
2004 | if (isOperationExpand(Op, MVT::f32)) |
2005 | setOperationAction(Op, MVT::f32, Promote); |
2006 | |
2007 | |
2008 | setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
2009 | setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); |
2010 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
2011 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
2012 | setTargetDAGCombine(ISD::CONCAT_VECTORS); |
2013 | setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
2014 | setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); |
2015 | setTargetDAGCombine(ISD::BITCAST); |
2016 | setTargetDAGCombine(ISD::VSELECT); |
2017 | setTargetDAGCombine(ISD::SELECT); |
2018 | setTargetDAGCombine(ISD::SHL); |
2019 | setTargetDAGCombine(ISD::SRA); |
2020 | setTargetDAGCombine(ISD::SRL); |
2021 | setTargetDAGCombine(ISD::OR); |
2022 | setTargetDAGCombine(ISD::AND); |
2023 | setTargetDAGCombine(ISD::ADD); |
2024 | setTargetDAGCombine(ISD::FADD); |
2025 | setTargetDAGCombine(ISD::FSUB); |
2026 | setTargetDAGCombine(ISD::FNEG); |
2027 | setTargetDAGCombine(ISD::FMA); |
2028 | setTargetDAGCombine(ISD::STRICT_FMA); |
2029 | setTargetDAGCombine(ISD::FMINNUM); |
2030 | setTargetDAGCombine(ISD::FMAXNUM); |
2031 | setTargetDAGCombine(ISD::SUB); |
2032 | setTargetDAGCombine(ISD::LOAD); |
2033 | setTargetDAGCombine(ISD::MLOAD); |
2034 | setTargetDAGCombine(ISD::STORE); |
2035 | setTargetDAGCombine(ISD::MSTORE); |
2036 | setTargetDAGCombine(ISD::TRUNCATE); |
2037 | setTargetDAGCombine(ISD::ZERO_EXTEND); |
2038 | setTargetDAGCombine(ISD::ANY_EXTEND); |
2039 | setTargetDAGCombine(ISD::SIGN_EXTEND); |
2040 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
2041 | setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); |
2042 | setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); |
2043 | setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); |
2044 | setTargetDAGCombine(ISD::SINT_TO_FP); |
2045 | setTargetDAGCombine(ISD::UINT_TO_FP); |
2046 | setTargetDAGCombine(ISD::STRICT_SINT_TO_FP); |
2047 | setTargetDAGCombine(ISD::STRICT_UINT_TO_FP); |
2048 | setTargetDAGCombine(ISD::SETCC); |
2049 | setTargetDAGCombine(ISD::MUL); |
2050 | setTargetDAGCombine(ISD::XOR); |
2051 | setTargetDAGCombine(ISD::MSCATTER); |
2052 | setTargetDAGCombine(ISD::MGATHER); |
2053 | setTargetDAGCombine(ISD::FP16_TO_FP); |
2054 | setTargetDAGCombine(ISD::FP_EXTEND); |
2055 | setTargetDAGCombine(ISD::STRICT_FP_EXTEND); |
2056 | setTargetDAGCombine(ISD::FP_ROUND); |
2057 | |
2058 | computeRegisterProperties(Subtarget.getRegisterInfo()); |
2059 | |
2060 | MaxStoresPerMemset = 16; |
2061 | MaxStoresPerMemsetOptSize = 8; |
2062 | MaxStoresPerMemcpy = 8; |
2063 | MaxStoresPerMemcpyOptSize = 4; |
2064 | MaxStoresPerMemmove = 8; |
2065 | MaxStoresPerMemmoveOptSize = 4; |
2066 | |
2067 | |
2068 | |
2069 | |
2070 | MaxLoadsPerMemcmp = 2; |
2071 | MaxLoadsPerMemcmpOptSize = 2; |
2072 | |
2073 | |
2074 | setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment)); |
2075 | |
2076 | |
2077 | |
2078 | PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); |
2079 | EnableExtLdPromotion = true; |
2080 | setPrefFunctionAlignment(Align(16)); |
2081 | |
2082 | verifyIntrinsicTables(); |
2083 | |
2084 | |
2085 | IsStrictFPEnabled = true; |
2086 | } |
2087 | |
2088 | |
2089 | bool X86TargetLowering::useLoadStackGuardNode() const { |
2090 | return Subtarget.isTargetMachO() && Subtarget.is64Bit(); |
2091 | } |
2092 | |
2093 | bool X86TargetLowering::useStackGuardXorFP() const { |
2094 | |
2095 | return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO(); |
2096 | } |
2097 | |
2098 | SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
2099 | const SDLoc &DL) const { |
2100 | EVT PtrTy = getPointerTy(DAG.getDataLayout()); |
2101 | unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; |
2102 | MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); |
2103 | return SDValue(Node, 0); |
2104 | } |
2105 | |
2106 | TargetLoweringBase::LegalizeTypeAction |
2107 | X86TargetLowering::getPreferredVectorAction(MVT VT) const { |
2108 | if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() && |
2109 | !Subtarget.hasBWI()) |
2110 | return TypeSplitVector; |
2111 | |
2112 | if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && |
2113 | VT.getVectorElementType() != MVT::i1) |
2114 | return TypeWidenVector; |
2115 | |
2116 | return TargetLoweringBase::getPreferredVectorAction(VT); |
2117 | } |
2118 | |
2119 | static std::pair<MVT, unsigned> |
2120 | handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, |
2121 | const X86Subtarget &Subtarget) { |
2122 | |
2123 | |
2124 | if (NumElts == 2) |
2125 | return {MVT::v2i64, 1}; |
2126 | if (NumElts == 4) |
2127 | return {MVT::v4i32, 1}; |
2128 | if (NumElts == 8 && CC != CallingConv::X86_RegCall && |
2129 | CC != CallingConv::Intel_OCL_BI) |
2130 | return {MVT::v8i16, 1}; |
2131 | if (NumElts == 16 && CC != CallingConv::X86_RegCall && |
2132 | CC != CallingConv::Intel_OCL_BI) |
2133 | return {MVT::v16i8, 1}; |
2134 | |
2135 | |
2136 | if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) |
2137 | return {MVT::v32i8, 1}; |
2138 | |
2139 | if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { |
2140 | if (Subtarget.useAVX512Regs()) |
2141 | return {MVT::v64i8, 1}; |
2142 | return {MVT::v32i8, 2}; |
2143 | } |
2144 | |
2145 | |
2146 | if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || |
2147 | NumElts > 64) |
2148 | return {MVT::i8, NumElts}; |
2149 | |
2150 | return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; |
2151 | } |
2152 | |
2153 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
2154 | CallingConv::ID CC, |
2155 | EVT VT) const { |
2156 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
2157 | Subtarget.hasAVX512()) { |
2158 | unsigned NumElts = VT.getVectorNumElements(); |
2159 | |
2160 | MVT RegisterVT; |
2161 | unsigned NumRegisters; |
2162 | std::tie(RegisterVT, NumRegisters) = |
2163 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
2164 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
2165 | return RegisterVT; |
2166 | } |
2167 | |
2168 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
2169 | } |
2170 | |
2171 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
2172 | CallingConv::ID CC, |
2173 | EVT VT) const { |
2174 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
2175 | Subtarget.hasAVX512()) { |
2176 | unsigned NumElts = VT.getVectorNumElements(); |
2177 | |
2178 | MVT RegisterVT; |
2179 | unsigned NumRegisters; |
2180 | std::tie(RegisterVT, NumRegisters) = |
2181 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
2182 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
2183 | return NumRegisters; |
2184 | } |
2185 | |
2186 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
2187 | } |
2188 | |
2189 | unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( |
2190 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
2191 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
2192 | |
2193 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
2194 | Subtarget.hasAVX512() && |
2195 | (!isPowerOf2_32(VT.getVectorNumElements()) || |
2196 | (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || |
2197 | VT.getVectorNumElements() > 64)) { |
2198 | RegisterVT = MVT::i8; |
2199 | IntermediateVT = MVT::i1; |
2200 | NumIntermediates = VT.getVectorNumElements(); |
2201 | return NumIntermediates; |
2202 | } |
2203 | |
2204 | |
2205 | if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
2206 | CC != CallingConv::X86_RegCall) { |
2207 | RegisterVT = MVT::v32i8; |
2208 | IntermediateVT = MVT::v32i1; |
2209 | NumIntermediates = 2; |
2210 | return 2; |
2211 | } |
2212 | |
2213 | return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, |
2214 | NumIntermediates, RegisterVT); |
2215 | } |
2216 | |
2217 | EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, |
2218 | LLVMContext& Context, |
2219 | EVT VT) const { |
2220 | if (!VT.isVector()) |
2221 | return MVT::i8; |
2222 | |
2223 | if (Subtarget.hasAVX512()) { |
2224 | |
2225 | EVT LegalVT = VT; |
2226 | while (getTypeAction(Context, LegalVT) != TypeLegal) |
2227 | LegalVT = getTypeToTransformTo(Context, LegalVT); |
2228 | |
2229 | |
2230 | if (LegalVT.getSimpleVT().is512BitVector()) |
2231 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
2232 | |
2233 | if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { |
2234 | |
2235 | |
2236 | |
2237 | MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); |
2238 | if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) |
2239 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
2240 | } |
2241 | } |
2242 | |
2243 | return VT.changeVectorElementTypeToInteger(); |
2244 | } |
2245 | |
2246 | |
2247 | |
2248 | static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { |
2249 | if (MaxAlign == 16) |
2250 | return; |
2251 | if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
2252 | if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128) |
2253 | MaxAlign = Align(16); |
2254 | } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
2255 | Align EltAlign; |
2256 | getMaxByValAlign(ATy->getElementType(), EltAlign); |
2257 | if (EltAlign > MaxAlign) |
2258 | MaxAlign = EltAlign; |
2259 | } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
2260 | for (auto *EltTy : STy->elements()) { |
2261 | Align EltAlign; |
2262 | getMaxByValAlign(EltTy, EltAlign); |
2263 | if (EltAlign > MaxAlign) |
2264 | MaxAlign = EltAlign; |
2265 | if (MaxAlign == 16) |
2266 | break; |
2267 | } |
2268 | } |
2269 | } |
2270 | |
2271 | |
2272 | |
2273 | |
2274 | |
2275 | unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, |
2276 | const DataLayout &DL) const { |
2277 | if (Subtarget.is64Bit()) { |
2278 | |
2279 | Align TyAlign = DL.getABITypeAlign(Ty); |
2280 | if (TyAlign > 8) |
2281 | return TyAlign.value(); |
2282 | return 8; |
2283 | } |
2284 | |
2285 | Align Alignment(4); |
2286 | if (Subtarget.hasSSE1()) |
2287 | getMaxByValAlign(Ty, Alignment); |
2288 | return Alignment.value(); |
2289 | } |
2290 | |
2291 | |
2292 | |
2293 | |
2294 | |
2295 | EVT X86TargetLowering::getOptimalMemOpType( |
2296 | const MemOp &Op, const AttributeList &FuncAttributes) const { |
2297 | if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { |
2298 | if (Op.size() >= 16 && |
2299 | (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { |
2300 | |
2301 | if (Op.size() >= 64 && Subtarget.hasAVX512() && |
2302 | (Subtarget.getPreferVectorWidth() >= 512)) { |
2303 | return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; |
2304 | } |
2305 | |
2306 | if (Op.size() >= 32 && Subtarget.hasAVX() && |
2307 | (Subtarget.getPreferVectorWidth() >= 256)) { |
2308 | |
2309 | |
2310 | |
2311 | |
2312 | |
2313 | return MVT::v32i8; |
2314 | } |
2315 | if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) |
2316 | return MVT::v16i8; |
2317 | |
2318 | |
2319 | if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && |
2320 | (Subtarget.getPreferVectorWidth() >= 128)) |
2321 | return MVT::v4f32; |
2322 | } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && |
2323 | Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { |
2324 | |
2325 | |
2326 | |
2327 | |
2328 | |
2329 | |
2330 | return MVT::f64; |
2331 | } |
2332 | } |
2333 | |
2334 | |
2335 | |
2336 | if (Subtarget.is64Bit() && Op.size() >= 8) |
2337 | return MVT::i64; |
2338 | return MVT::i32; |
2339 | } |
2340 | |
2341 | bool X86TargetLowering::isSafeMemOpType(MVT VT) const { |
2342 | if (VT == MVT::f32) |
2343 | return X86ScalarSSEf32; |
2344 | if (VT == MVT::f64) |
2345 | return X86ScalarSSEf64; |
2346 | return true; |
2347 | } |
2348 | |
2349 | bool X86TargetLowering::allowsMisalignedMemoryAccesses( |
2350 | EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, |
2351 | bool *Fast) const { |
2352 | if (Fast) { |
2353 | switch (VT.getSizeInBits()) { |
2354 | default: |
2355 | |
2356 | *Fast = true; |
2357 | break; |
2358 | case 128: |
2359 | *Fast = !Subtarget.isUnalignedMem16Slow(); |
2360 | break; |
2361 | case 256: |
2362 | *Fast = !Subtarget.isUnalignedMem32Slow(); |
2363 | break; |
2364 | |
2365 | } |
2366 | } |
2367 | |
2368 | if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
2369 | |
2370 | |
2371 | |
2372 | |
2373 | if (!!(Flags & MachineMemOperand::MOLoad)) |
2374 | return (Alignment < 16 || !Subtarget.hasSSE41()); |
2375 | return false; |
2376 | } |
2377 | |
2378 | return true; |
2379 | } |
2380 | |
2381 | |
2382 | |
2383 | |
2384 | unsigned X86TargetLowering::getJumpTableEncoding() const { |
2385 | |
2386 | |
2387 | if (isPositionIndependent() && Subtarget.isPICStyleGOT()) |
2388 | return MachineJumpTableInfo::EK_Custom32; |
2389 | |
2390 | |
2391 | return TargetLowering::getJumpTableEncoding(); |
2392 | } |
2393 | |
2394 | bool X86TargetLowering::useSoftFloat() const { |
2395 | return Subtarget.useSoftFloat(); |
2396 | } |
2397 | |
2398 | void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, |
2399 | ArgListTy &Args) const { |
2400 | |
2401 | |
2402 | if (Subtarget.is64Bit()) |
2403 | return; |
2404 | if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) |
2405 | return; |
2406 | unsigned ParamRegs = 0; |
2407 | if (auto *M = MF->getFunction().getParent()) |
2408 | ParamRegs = M->getNumberRegisterParameters(); |
2409 | |
2410 | |
2411 | for (auto &Arg : Args) { |
2412 | Type *T = Arg.Ty; |
2413 | if (T->isIntOrPtrTy()) |
2414 | if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { |
2415 | unsigned numRegs = 1; |
2416 | if (MF->getDataLayout().getTypeAllocSize(T) > 4) |
2417 | numRegs = 2; |
2418 | if (ParamRegs < numRegs) |
2419 | return; |
2420 | ParamRegs -= numRegs; |
2421 | Arg.IsInReg = true; |
2422 | } |
2423 | } |
2424 | } |
2425 | |
2426 | const MCExpr * |
2427 | X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
2428 | const MachineBasicBlock *MBB, |
2429 | unsigned uid,MCContext &Ctx) const{ |
2430 | assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); |
2431 | |
2432 | |
2433 | return MCSymbolRefExpr::create(MBB->getSymbol(), |
2434 | MCSymbolRefExpr::VK_GOTOFF, Ctx); |
2435 | } |
2436 | |
2437 | |
2438 | SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
2439 | SelectionDAG &DAG) const { |
2440 | if (!Subtarget.is64Bit()) |
2441 | |
2442 | |
2443 | return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
2444 | getPointerTy(DAG.getDataLayout())); |
2445 | return Table; |
2446 | } |
2447 | |
2448 | |
2449 | |
2450 | const MCExpr *X86TargetLowering:: |
2451 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, |
2452 | MCContext &Ctx) const { |
2453 | |
2454 | if (Subtarget.isPICStyleRIPRel()) |
2455 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
2456 | |
2457 | |
2458 | return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); |
2459 | } |
2460 | |
2461 | std::pair<const TargetRegisterClass *, uint8_t> |
2462 | X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
2463 | MVT VT) const { |
2464 | const TargetRegisterClass *RRC = nullptr; |
2465 | uint8_t Cost = 1; |
2466 | switch (VT.SimpleTy) { |
2467 | default: |
2468 | return TargetLowering::findRepresentativeClass(TRI, VT); |
2469 | case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: |
2470 | RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; |
2471 | break; |
2472 | case MVT::x86mmx: |
2473 | RRC = &X86::VR64RegClass; |
2474 | break; |
2475 | case MVT::f32: case MVT::f64: |
2476 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
2477 | case MVT::v4f32: case MVT::v2f64: |
2478 | case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: |
2479 | case MVT::v8f32: case MVT::v4f64: |
2480 | case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: |
2481 | case MVT::v16f32: case MVT::v8f64: |
2482 | RRC = &X86::VR128XRegClass; |
2483 | break; |
2484 | } |
2485 | return std::make_pair(RRC, Cost); |
2486 | } |
2487 | |
2488 | unsigned X86TargetLowering::getAddressSpace() const { |
2489 | if (Subtarget.is64Bit()) |
2490 | return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; |
2491 | return 256; |
2492 | } |
2493 | |
2494 | static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { |
2495 | return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || |
2496 | (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); |
2497 | } |
2498 | |
2499 | static Constant* SegmentOffset(IRBuilderBase &IRB, |
2500 | int Offset, unsigned AddressSpace) { |
2501 | return ConstantExpr::getIntToPtr( |
2502 | ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), |
2503 | Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); |
2504 | } |
2505 | |
2506 | Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
2507 | |
2508 | |
2509 | |
2510 | if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { |
2511 | if (Subtarget.isTargetFuchsia()) { |
2512 | |
2513 | return SegmentOffset(IRB, 0x10, getAddressSpace()); |
2514 | } else { |
2515 | unsigned AddressSpace = getAddressSpace(); |
2516 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
2517 | |
2518 | int Offset = M->getStackProtectorGuardOffset(); |
2519 | |
2520 | |
2521 | |
2522 | if (Offset == INT_MAX) |
2523 | Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; |
2524 | |
2525 | StringRef GuardReg = M->getStackProtectorGuardReg(); |
2526 | if (GuardReg == "fs") |
2527 | AddressSpace = X86AS::FS; |
2528 | else if (GuardReg == "gs") |
2529 | AddressSpace = X86AS::GS; |
2530 | return SegmentOffset(IRB, Offset, AddressSpace); |
2531 | } |
2532 | } |
2533 | return TargetLowering::getIRStackGuard(IRB); |
2534 | } |
2535 | |
2536 | void X86TargetLowering::insertSSPDeclarations(Module &M) const { |
2537 | |
2538 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2539 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2540 | |
2541 | M.getOrInsertGlobal("__security_cookie", |
2542 | Type::getInt8PtrTy(M.getContext())); |
2543 | |
2544 | |
2545 | FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
2546 | "__security_check_cookie", Type::getVoidTy(M.getContext()), |
2547 | Type::getInt8PtrTy(M.getContext())); |
2548 | if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { |
2549 | F->setCallingConv(CallingConv::X86_FastCall); |
2550 | F->addAttribute(1, Attribute::AttrKind::InReg); |
2551 | } |
2552 | return; |
2553 | } |
2554 | |
2555 | StringRef GuardMode = M.getStackProtectorGuard(); |
2556 | |
2557 | |
2558 | if ((GuardMode == "tls" || GuardMode.empty()) && |
2559 | hasStackGuardSlotTLS(Subtarget.getTargetTriple())) |
2560 | return; |
2561 | TargetLowering::insertSSPDeclarations(M); |
2562 | } |
2563 | |
2564 | Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { |
2565 | |
2566 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2567 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2568 | return M.getGlobalVariable("__security_cookie"); |
2569 | } |
2570 | return TargetLowering::getSDagStackGuard(M); |
2571 | } |
2572 | |
2573 | Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
2574 | |
2575 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
2576 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
2577 | return M.getFunction("__security_check_cookie"); |
2578 | } |
2579 | return TargetLowering::getSSPStackGuardCheck(M); |
2580 | } |
2581 | |
2582 | Value * |
2583 | X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
2584 | if (Subtarget.getTargetTriple().isOSContiki()) |
2585 | return getDefaultSafeStackPointerLocation(IRB, false); |
2586 | |
2587 | |
2588 | |
2589 | |
2590 | if (Subtarget.isTargetAndroid()) { |
2591 | |
2592 | |
2593 | int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; |
2594 | return SegmentOffset(IRB, Offset, getAddressSpace()); |
2595 | } |
2596 | |
2597 | |
2598 | if (Subtarget.isTargetFuchsia()) { |
2599 | |
2600 | return SegmentOffset(IRB, 0x18, getAddressSpace()); |
2601 | } |
2602 | |
2603 | return TargetLowering::getSafeStackPointerLocation(IRB); |
2604 | } |
2605 | |
2606 | |
2607 | |
2608 | |
2609 | |
2610 | bool X86TargetLowering::CanLowerReturn( |
2611 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
2612 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
2613 | SmallVector<CCValAssign, 16> RVLocs; |
2614 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
2615 | return CCInfo.CheckReturn(Outs, RetCC_X86); |
2616 | } |
2617 | |
2618 | const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { |
2619 | static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; |
2620 | return ScratchRegs; |
2621 | } |
2622 | |
2623 | |
2624 | |
2625 | static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, |
2626 | const SDLoc &Dl, SelectionDAG &DAG) { |
2627 | EVT ValVT = ValArg.getValueType(); |
2628 | |
2629 | if (ValVT == MVT::v1i1) |
2630 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg, |
2631 | DAG.getIntPtrConstant(0, Dl)); |
2632 | |
2633 | if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || |
2634 | (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { |
2635 | |
2636 | |
2637 | |
2638 | EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; |
2639 | SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); |
2640 | if (ValLoc == MVT::i32) |
2641 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); |
2642 | return ValToCopy; |
2643 | } |
2644 | |
2645 | if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || |
2646 | (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { |
2647 | |
2648 | |
2649 | return DAG.getBitcast(ValLoc, ValArg); |
2650 | } |
2651 | |
2652 | return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg); |
2653 | } |
2654 | |
2655 | |
2656 | static void Passv64i1ArgInRegs( |
2657 | const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg, |
2658 | SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, |
2659 | CCValAssign &NextVA, const X86Subtarget &Subtarget) { |
2660 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); |
2661 | assert(Subtarget.is32Bit() && "Expecting 32 bit target"); |
2662 | assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); |
2663 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
2664 | "The value should reside in two registers"); |
2665 | |
2666 | |
2667 | Arg = DAG.getBitcast(MVT::i64, Arg); |
2668 | |
2669 | |
2670 | SDValue Lo, Hi; |
2671 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
2672 | DAG.getConstant(0, Dl, MVT::i32)); |
2673 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
2674 | DAG.getConstant(1, Dl, MVT::i32)); |
2675 | |
2676 | |
2677 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); |
2678 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); |
2679 | } |
2680 | |
2681 | SDValue |
2682 | X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
2683 | bool isVarArg, |
2684 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
2685 | const SmallVectorImpl<SDValue> &OutVals, |
2686 | const SDLoc &dl, SelectionDAG &DAG) const { |
2687 | MachineFunction &MF = DAG.getMachineFunction(); |
2688 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
2689 | |
2690 | |
2691 | |
2692 | bool ShouldDisableCalleeSavedRegister = |
2693 | CallConv == CallingConv::X86_RegCall || |
2694 | MF.getFunction().hasFnAttribute("no_caller_saved_registers"); |
2695 | |
2696 | if (CallConv == CallingConv::X86_INTR && !Outs.empty()) |
2697 | report_fatal_error("X86 interrupts may not return any value"); |
2698 | |
2699 | SmallVector<CCValAssign, 16> RVLocs; |
2700 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); |
2701 | CCInfo.AnalyzeReturn(Outs, RetCC_X86); |
2702 | |
2703 | SmallVector<std::pair<Register, SDValue>, 4> RetVals; |
2704 | for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; |
2705 | ++I, ++OutsIndex) { |
2706 | CCValAssign &VA = RVLocs[I]; |
2707 | assert(VA.isRegLoc() && "Can only return in registers!"); |
2708 | |
2709 | |
2710 | if (ShouldDisableCalleeSavedRegister) |
2711 | MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); |
2712 | |
2713 | SDValue ValToCopy = OutVals[OutsIndex]; |
2714 | EVT ValVT = ValToCopy.getValueType(); |
2715 | |
2716 | |
2717 | if (VA.getLocInfo() == CCValAssign::SExt) |
2718 | ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2719 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
2720 | ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2721 | else if (VA.getLocInfo() == CCValAssign::AExt) { |
2722 | if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) |
2723 | ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); |
2724 | else |
2725 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); |
2726 | } |
2727 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
2728 | ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); |
2729 | |
2730 | assert(VA.getLocInfo() != CCValAssign::FPExt && |
2731 | "Unexpected FP-extend for return value."); |
2732 | |
2733 | |
2734 | |
2735 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { |
2736 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
2737 | VA.convertToReg(X86::FP0); |
2738 | } else if (!Subtarget.hasSSE2() && |
2739 | X86::FR64XRegClass.contains(VA.getLocReg()) && |
2740 | ValVT == MVT::f64) { |
2741 | |
2742 | |
2743 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
2744 | VA.convertToReg(X86::FP0); |
2745 | } |
2746 | |
2747 | |
2748 | |
2749 | if (VA.getLocReg() == X86::FP0 || |
2750 | VA.getLocReg() == X86::FP1) { |
2751 | |
2752 | |
2753 | if (isScalarFPTypeInSSEReg(VA.getValVT())) |
2754 | ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); |
2755 | RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
2756 | |
2757 | continue; |
2758 | } |
2759 | |
2760 | |
2761 | |
2762 | if (Subtarget.is64Bit()) { |
2763 | if (ValVT == MVT::x86mmx) { |
2764 | if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { |
2765 | ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); |
2766 | ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
2767 | ValToCopy); |
2768 | |
2769 | |
2770 | if (!Subtarget.hasSSE2()) |
2771 | ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); |
2772 | } |
2773 | } |
2774 | } |
2775 | |
2776 | if (VA.needsCustom()) { |
2777 | assert(VA.getValVT() == MVT::v64i1 && |
2778 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
2779 | |
2780 | Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I], |
2781 | Subtarget); |
2782 | |
2783 | |
2784 | if (ShouldDisableCalleeSavedRegister) |
2785 | MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); |
2786 | } else { |
2787 | RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
2788 | } |
2789 | } |
2790 | |
2791 | SDValue Flag; |
2792 | SmallVector<SDValue, 6> RetOps; |
2793 | RetOps.push_back(Chain); |
2794 | |
2795 | RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, |
2796 | MVT::i32)); |
2797 | |
2798 | |
2799 | for (auto &RetVal : RetVals) { |
2800 | if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { |
2801 | RetOps.push_back(RetVal.second); |
2802 | continue; |
2803 | } |
2804 | |
2805 | Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag); |
2806 | Flag = Chain.getValue(1); |
2807 | RetOps.push_back( |
2808 | DAG.getRegister(RetVal.first, RetVal.second.getValueType())); |
2809 | } |
2810 | |
2811 | |
2812 | |
2813 | |
2814 | |
2815 | |
2816 | |
2817 | |
2818 | |
2819 | |
2820 | |
2821 | |
2822 | |
2823 | if (Register SRetReg = FuncInfo->getSRetReturnReg()) { |
2824 | |
2825 | |
2826 | |
2827 | |
2828 | |
2829 | |
2830 | |
2831 | |
2832 | |
2833 | |
2834 | |
2835 | |
2836 | |
2837 | |
2838 | |
2839 | |
2840 | |
2841 | |
2842 | |
2843 | SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, |
2844 | getPointerTy(MF.getDataLayout())); |
2845 | |
2846 | Register RetValReg |
2847 | = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? |
2848 | X86::RAX : X86::EAX; |
2849 | Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); |
2850 | Flag = Chain.getValue(1); |
2851 | |
2852 | |
2853 | RetOps.push_back( |
2854 | DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); |
2855 | |
2856 | |
2857 | if (ShouldDisableCalleeSavedRegister) |
2858 | MF.getRegInfo().disableCalleeSavedRegister(RetValReg); |
2859 | } |
2860 | |
2861 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
2862 | const MCPhysReg *I = |
2863 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
2864 | if (I) { |
2865 | for (; *I; ++I) { |
2866 | if (X86::GR64RegClass.contains(*I)) |
2867 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
2868 | else |
2869 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
2870 | } |
2871 | } |
2872 | |
2873 | RetOps[0] = Chain; |
2874 | |
2875 | |
2876 | if (Flag.getNode()) |
2877 | RetOps.push_back(Flag); |
2878 | |
2879 | X86ISD::NodeType opcode = X86ISD::RET_FLAG; |
2880 | if (CallConv == CallingConv::X86_INTR) |
2881 | opcode = X86ISD::IRET; |
2882 | return DAG.getNode(opcode, dl, MVT::Other, RetOps); |
2883 | } |
2884 | |
2885 | bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
2886 | if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) |
2887 | return false; |
2888 | |
2889 | SDValue TCChain = Chain; |
2890 | SDNode *Copy = *N->use_begin(); |
2891 | if (Copy->getOpcode() == ISD::CopyToReg) { |
2892 | |
2893 | |
2894 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
2895 | return false; |
2896 | TCChain = Copy->getOperand(0); |
2897 | } else if (Copy->getOpcode() != ISD::FP_EXTEND) |
2898 | return false; |
2899 | |
2900 | bool HasRet = false; |
2901 | for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); |
2902 | UI != UE; ++UI) { |
2903 | if (UI->getOpcode() != X86ISD::RET_FLAG) |
2904 | return false; |
2905 | |
2906 | |
2907 | if (UI->getNumOperands() > 4) |
2908 | return false; |
2909 | if (UI->getNumOperands() == 4 && |
2910 | UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) |
2911 | return false; |
2912 | HasRet = true; |
2913 | } |
2914 | |
2915 | if (!HasRet) |
2916 | return false; |
2917 | |
2918 | Chain = TCChain; |
2919 | return true; |
2920 | } |
2921 | |
2922 | EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, |
2923 | ISD::NodeType ExtendKind) const { |
2924 | MVT ReturnMVT = MVT::i32; |
2925 | |
2926 | bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); |
2927 | if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { |
2928 | |
2929 | |
2930 | |
2931 | |
2932 | |
2933 | ReturnMVT = MVT::i8; |
2934 | } |
2935 | |
2936 | EVT MinVT = getRegisterType(Context, ReturnMVT); |
2937 | return VT.bitsLT(MinVT) ? MinVT : VT; |
2938 | } |
2939 | |
2940 | |
2941 | |
2942 | |
2943 | |
2944 | |
2945 | |
2946 | |
2947 | |
2948 | |
2949 | static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, |
2950 | SDValue &Root, SelectionDAG &DAG, |
2951 | const SDLoc &Dl, const X86Subtarget &Subtarget, |
2952 | SDValue *InFlag = nullptr) { |
2953 | assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); |
2954 | assert(Subtarget.is32Bit() && "Expecting 32 bit target"); |
2955 | assert(VA.getValVT() == MVT::v64i1 && |
2956 | "Expecting first location of 64 bit width type"); |
2957 | assert(NextVA.getValVT() == VA.getValVT() && |
2958 | "The locations should have the same type"); |
2959 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
2960 | "The values should reside in two registers"); |
2961 | |
2962 | SDValue Lo, Hi; |
2963 | SDValue ArgValueLo, ArgValueHi; |
2964 | |
2965 | MachineFunction &MF = DAG.getMachineFunction(); |
2966 | const TargetRegisterClass *RC = &X86::GR32RegClass; |
2967 | |
2968 | |
2969 | if (nullptr == InFlag) { |
2970 | |
2971 | |
2972 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
2973 | ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
2974 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); |
2975 | ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
2976 | } else { |
2977 | |
2978 | |
2979 | ArgValueLo = |
2980 | DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); |
2981 | *InFlag = ArgValueLo.getValue(2); |
2982 | ArgValueHi = |
2983 | DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); |
2984 | *InFlag = ArgValueHi.getValue(2); |
2985 | } |
2986 | |
2987 | |
2988 | Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); |
2989 | |
2990 | |
2991 | Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); |
2992 | |
2993 | |
2994 | return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi); |
2995 | } |
2996 | |
2997 | |
2998 | |
2999 | |
3000 | static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, |
3001 | const EVT &ValLoc, const SDLoc &Dl, |
3002 | SelectionDAG &DAG) { |
3003 | SDValue ValReturned = ValArg; |
3004 | |
3005 | if (ValVT == MVT::v1i1) |
3006 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); |
3007 | |
3008 | if (ValVT == MVT::v64i1) { |
3009 | |
3010 | assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); |
3011 | |
3012 | } else { |
3013 | MVT maskLen; |
3014 | switch (ValVT.getSimpleVT().SimpleTy) { |
3015 | case MVT::v8i1: |
3016 | maskLen = MVT::i8; |
3017 | break; |
3018 | case MVT::v16i1: |
3019 | maskLen = MVT::i16; |
3020 | break; |
3021 | case MVT::v32i1: |
3022 | maskLen = MVT::i32; |
3023 | break; |
3024 | default: |
3025 | llvm_unreachable("Expecting a vector of i1 types"); |
3026 | } |
3027 | |
3028 | ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); |
3029 | } |
3030 | return DAG.getBitcast(ValVT, ValReturned); |
3031 | } |
3032 | |
3033 | |
3034 | |
3035 | |
3036 | SDValue X86TargetLowering::LowerCallResult( |
3037 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
3038 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3039 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
3040 | uint32_t *RegMask) const { |
3041 | |
3042 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
3043 | |
3044 | SmallVector<CCValAssign, 16> RVLocs; |
3045 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
3046 | *DAG.getContext()); |
3047 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
3048 | |
3049 | |
3050 | for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; |
3051 | ++I, ++InsIndex) { |
3052 | CCValAssign &VA = RVLocs[I]; |
3053 | EVT CopyVT = VA.getLocVT(); |
3054 | |
3055 | |
3056 | |
3057 | if (RegMask) { |
3058 | for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, true); |
3059 | SubRegs.isValid(); ++SubRegs) |
3060 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
3061 | } |
3062 | |
3063 | |
3064 | |
3065 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { |
3066 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
3067 | if (VA.getLocReg() == X86::XMM1) |
3068 | VA.convertToReg(X86::FP1); |
3069 | else |
3070 | VA.convertToReg(X86::FP0); |
3071 | } else if (!Subtarget.hasSSE2() && |
3072 | X86::FR64XRegClass.contains(VA.getLocReg()) && |
3073 | CopyVT == MVT::f64) { |
3074 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
3075 | if (VA.getLocReg() == X86::XMM1) |
3076 | VA.convertToReg(X86::FP1); |
3077 | else |
3078 | VA.convertToReg(X86::FP0); |
3079 | } |
3080 | |
3081 | |
3082 | |
3083 | bool RoundAfterCopy = false; |
3084 | if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && |
3085 | isScalarFPTypeInSSEReg(VA.getValVT())) { |
3086 | if (!Subtarget.hasX87()) |
3087 | report_fatal_error("X87 register return with X87 disabled"); |
3088 | CopyVT = MVT::f80; |
3089 | RoundAfterCopy = (CopyVT != VA.getLocVT()); |
3090 | } |
3091 | |
3092 | SDValue Val; |
3093 | if (VA.needsCustom()) { |
3094 | assert(VA.getValVT() == MVT::v64i1 && |
3095 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
3096 | Val = |
3097 | getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag); |
3098 | } else { |
3099 | Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag) |
3100 | .getValue(1); |
3101 | Val = Chain.getValue(0); |
3102 | InFlag = Chain.getValue(2); |
3103 | } |
3104 | |
3105 | if (RoundAfterCopy) |
3106 | Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, |
3107 | |
3108 | DAG.getIntPtrConstant(1, dl)); |
3109 | |
3110 | if (VA.isExtInLoc()) { |
3111 | if (VA.getValVT().isVector() && |
3112 | VA.getValVT().getScalarType() == MVT::i1 && |
3113 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
3114 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
3115 | |
3116 | Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); |
3117 | } else |
3118 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
3119 | } |
3120 | |
3121 | if (VA.getLocInfo() == CCValAssign::BCvt) |
3122 | Val = DAG.getBitcast(VA.getValVT(), Val); |
3123 | |
3124 | InVals.push_back(Val); |
3125 | } |
3126 | |
3127 | return Chain; |
3128 | } |
3129 | |
3130 | |
3131 | |
3132 | |
3133 | |
3134 | |
3135 | |
3136 | |
3137 | |
3138 | |
3139 | |
3140 | |
3141 | |
3142 | enum StructReturnType { |
3143 | NotStructReturn, |
3144 | RegStructReturn, |
3145 | StackStructReturn |
3146 | }; |
3147 | static StructReturnType |
3148 | callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) { |
3149 | if (Outs.empty()) |
3150 | return NotStructReturn; |
3151 | |
3152 | const ISD::ArgFlagsTy &Flags = Outs[0].Flags; |
3153 | if (!Flags.isSRet()) |
3154 | return NotStructReturn; |
3155 | if (Flags.isInReg() || IsMCU) |
3156 | return RegStructReturn; |
3157 | return StackStructReturn; |
3158 | } |
3159 | |
3160 | |
3161 | static StructReturnType |
3162 | argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) { |
3163 | if (Ins.empty()) |
3164 | return NotStructReturn; |
3165 | |
3166 | const ISD::ArgFlagsTy &Flags = Ins[0].Flags; |
3167 | if (!Flags.isSRet()) |
3168 | return NotStructReturn; |
3169 | if (Flags.isInReg() || IsMCU) |
3170 | return RegStructReturn; |
3171 | return StackStructReturn; |
3172 | } |
3173 | |
3174 | |
3175 | |
3176 | |
3177 | static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, |
3178 | SDValue Chain, ISD::ArgFlagsTy Flags, |
3179 | SelectionDAG &DAG, const SDLoc &dl) { |
3180 | SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); |
3181 | |
3182 | return DAG.getMemcpy( |
3183 | Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), |
3184 | false, true, |
3185 | false, MachinePointerInfo(), MachinePointerInfo()); |
3186 | } |
3187 | |
3188 | |
3189 | static bool canGuaranteeTCO(CallingConv::ID CC) { |
3190 | return (CC == CallingConv::Fast || CC == CallingConv::GHC || |
3191 | CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || |
3192 | CC == CallingConv::HHVM || CC == CallingConv::Tail || |
3193 | CC == CallingConv::SwiftTail); |
3194 | } |
3195 | |
3196 | |
3197 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
3198 | switch (CC) { |
3199 | |
3200 | case CallingConv::C: |
3201 | case CallingConv::Win64: |
3202 | case CallingConv::X86_64_SysV: |
3203 | |
3204 | case CallingConv::X86_ThisCall: |
3205 | case CallingConv::X86_StdCall: |
3206 | case CallingConv::X86_VectorCall: |
3207 | case CallingConv::X86_FastCall: |
3208 | |
3209 | case CallingConv::Swift: |
3210 | return true; |
3211 | default: |
3212 | return canGuaranteeTCO(CC); |
3213 | } |
3214 | } |
3215 | |
3216 | |
3217 | |
3218 | static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { |
3219 | return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || |
3220 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
3221 | } |
3222 | |
3223 | bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
3224 | if (!CI->isTailCall()) |
3225 | return false; |
3226 | |
3227 | CallingConv::ID CalleeCC = CI->getCallingConv(); |
3228 | if (!mayTailCallThisCC(CalleeCC)) |
3229 | return false; |
3230 | |
3231 | return true; |
3232 | } |
3233 | |
3234 | SDValue |
3235 | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
3236 | const SmallVectorImpl<ISD::InputArg> &Ins, |
3237 | const SDLoc &dl, SelectionDAG &DAG, |
3238 | const CCValAssign &VA, |
3239 | MachineFrameInfo &MFI, unsigned i) const { |
3240 | |
3241 | ISD::ArgFlagsTy Flags = Ins[i].Flags; |
3242 | bool AlwaysUseMutable = shouldGuaranteeTCO( |
3243 | CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); |
3244 | bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); |
3245 | EVT ValVT; |
3246 | MVT PtrVT = getPointerTy(DAG.getDataLayout()); |
3247 | |
3248 | |
3249 | |
3250 | |
3251 | bool ExtendedInMem = |
3252 | VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && |
3253 | VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); |
3254 | |
3255 | if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) |
3256 | ValVT = VA.getLocVT(); |
3257 | else |
3258 | ValVT = VA.getValVT(); |
3259 | |
3260 | |
3261 | |
3262 | |
3263 | |
3264 | if (Flags.isByVal()) { |
3265 | unsigned Bytes = Flags.getByValSize(); |
3266 | if (Bytes == 0) Bytes = 1; |
3267 | |
3268 | |
3269 | |
3270 | int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, |
3271 | true); |
3272 | return DAG.getFrameIndex(FI, PtrVT); |
3273 | } |
3274 | |
3275 | EVT ArgVT = Ins[i].ArgVT; |
3276 | |
3277 | |
3278 | |
3279 | |
3280 | |
3281 | bool ScalarizedAndExtendedVector = |
3282 | ArgVT.isVector() && !VA.getLocVT().isVector() && |
3283 | VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits(); |
3284 | |
3285 | |
3286 | |
3287 | |
3288 | |
3289 | if (Flags.isCopyElisionCandidate() && |
3290 | VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && |
3291 | !ScalarizedAndExtendedVector) { |
3292 | SDValue PartAddr; |
3293 | if (Ins[i].PartOffset == 0) { |
3294 | |
3295 | |
3296 | |
3297 | |
3298 | int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), |
3299 | false); |
3300 | PartAddr = DAG.getFrameIndex(FI, PtrVT); |
3301 | return DAG.getLoad( |
3302 | ValVT, dl, Chain, PartAddr, |
3303 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3304 | } else { |
3305 | |
3306 | |
3307 | |
3308 | |
3309 | int64_t PartBegin = VA.getLocMemOffset(); |
3310 | int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; |
3311 | int FI = MFI.getObjectIndexBegin(); |
3312 | for (; MFI.isFixedObjectIndex(FI); ++FI) { |
3313 | int64_t ObjBegin = MFI.getObjectOffset(FI); |
3314 | int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); |
3315 | if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) |
3316 | break; |
3317 | } |
3318 | if (MFI.isFixedObjectIndex(FI)) { |
3319 | SDValue Addr = |
3320 | DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), |
3321 | DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); |
3322 | return DAG.getLoad( |
3323 | ValVT, dl, Chain, Addr, |
3324 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, |
3325 | Ins[i].PartOffset)); |
3326 | } |
3327 | } |
3328 | } |
3329 | |
3330 | int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, |
3331 | VA.getLocMemOffset(), isImmutable); |
3332 | |
3333 | |
3334 | if (VA.getLocInfo() == CCValAssign::ZExt) { |
3335 | MFI.setObjectZExt(FI, true); |
3336 | } else if (VA.getLocInfo() == CCValAssign::SExt) { |
3337 | MFI.setObjectSExt(FI, true); |
3338 | } |
3339 | |
3340 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
3341 | SDValue Val = DAG.getLoad( |
3342 | ValVT, dl, Chain, FIN, |
3343 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
3344 | return ExtendedInMem |
3345 | ? (VA.getValVT().isVector() |
3346 | ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) |
3347 | : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) |
3348 | : Val; |
3349 | } |
3350 | |
3351 | |
3352 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, |
3353 | const X86Subtarget &Subtarget) { |
3354 | assert(Subtarget.is64Bit()); |
3355 | |
3356 | if (Subtarget.isCallingConvWin64(CallConv)) { |
3357 | static const MCPhysReg GPR64ArgRegsWin64[] = { |
3358 | X86::RCX, X86::RDX, X86::R8, X86::R9 |
3359 | }; |
3360 | return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); |
3361 | } |
3362 | |
3363 | static const MCPhysReg GPR64ArgRegs64Bit[] = { |
3364 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 |
3365 | }; |
3366 | return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); |
3367 | } |
3368 | |
3369 | |
3370 | static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, |
3371 | CallingConv::ID CallConv, |
3372 | const X86Subtarget &Subtarget) { |
3373 | assert(Subtarget.is64Bit()); |
3374 | if (Subtarget.isCallingConvWin64(CallConv)) { |
3375 | |
3376 | |
3377 | |
3378 | |
3379 | return None; |
3380 | } |
3381 | |
3382 | bool isSoftFloat = Subtarget.useSoftFloat(); |
3383 | if (isSoftFloat || !Subtarget.hasSSE1()) |
3384 | |
3385 | |
3386 | return None; |
3387 | |
3388 | static const MCPhysReg XMMArgRegs64Bit[] = { |
3389 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
3390 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
3391 | }; |
3392 | return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); |
3393 | } |
3394 | |
3395 | #ifndef NDEBUG |
3396 | static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { |
3397 | return llvm::is_sorted( |
3398 | ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { |
3399 | return A.getValNo() < B.getValNo(); |
3400 | }); |
3401 | } |
3402 | #endif |
3403 | |
3404 | namespace { |
3405 | |
3406 | class VarArgsLoweringHelper { |
3407 | public: |
3408 | VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, |
3409 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
3410 | CallingConv::ID CallConv, CCState &CCInfo) |
3411 | : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), |
3412 | TheMachineFunction(DAG.getMachineFunction()), |
3413 | TheFunction(TheMachineFunction.getFunction()), |
3414 | FrameInfo(TheMachineFunction.getFrameInfo()), |
3415 | FrameLowering(*Subtarget.getFrameLowering()), |
3416 | TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), |
3417 | CCInfo(CCInfo) {} |
3418 | |
3419 | |
3420 | void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); |
3421 | |
3422 | private: |
3423 | void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); |
3424 | |
3425 | void forwardMustTailParameters(SDValue &Chain); |
3426 | |
3427 | bool is64Bit() const { return Subtarget.is64Bit(); } |
3428 | bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); } |
3429 | |
3430 | X86MachineFunctionInfo *FuncInfo; |
3431 | const SDLoc &DL; |
3432 | SelectionDAG &DAG; |
3433 | const X86Subtarget &Subtarget; |
3434 | MachineFunction &TheMachineFunction; |
3435 | const Function &TheFunction; |
3436 | MachineFrameInfo &FrameInfo; |
3437 | const TargetFrameLowering &FrameLowering; |
3438 | const TargetLowering &TargLowering; |
3439 | CallingConv::ID CallConv; |
3440 | CCState &CCInfo; |
3441 | }; |
3442 | } |
3443 | |
3444 | void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( |
3445 | SDValue &Chain, unsigned StackSize) { |
3446 | |
3447 | |
3448 | |
3449 | if (is64Bit() || (CallConv != CallingConv::X86_FastCall && |
3450 | CallConv != CallingConv::X86_ThisCall)) { |
3451 | FuncInfo->setVarArgsFrameIndex( |
3452 | FrameInfo.CreateFixedObject(1, StackSize, true)); |
3453 | } |
3454 | |
3455 | |
3456 | |
3457 | if (is64Bit()) { |
3458 | |
3459 | ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); |
3460 | ArrayRef<MCPhysReg> ArgXMMs = |
3461 | get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget); |
3462 | unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); |
3463 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); |
3464 | |
3465 | assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && |
3466 | "SSE register cannot be used when SSE is disabled!"); |
3467 | |
3468 | if (isWin64()) { |
3469 | |
3470 | |
3471 | int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; |
3472 | FuncInfo->setRegSaveFrameIndex( |
3473 | FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); |
3474 | |
3475 | if (NumIntRegs < 4) |
3476 | FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); |
3477 | } else { |
3478 | |
3479 | |
3480 | |
3481 | FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); |
3482 | FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); |
3483 | FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( |
3484 | ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false)); |
3485 | } |
3486 | |
3487 | SmallVector<SDValue, 6> |
3488 | LiveGPRs; |
3489 | SmallVector<SDValue, 8> LiveXMMRegs; |
3490 | |
3491 | SDValue ALVal; |
3492 | |
3493 | |
3494 | for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { |
3495 | Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass); |
3496 | LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64)); |
3497 | } |
3498 | const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs); |
3499 | if (!AvailableXmms.empty()) { |
3500 | Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); |
3501 | ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); |
3502 | for (MCPhysReg Reg : AvailableXmms) { |
3503 | |
3504 | |
3505 | |
3506 | |
3507 | TheMachineFunction.getRegInfo().addLiveIn(Reg); |
3508 | LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); |
3509 | } |
3510 | } |
3511 | |
3512 | |
3513 | SmallVector<SDValue, 8> MemOps; |
3514 | SDValue RSFIN = |
3515 | DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), |
3516 | TargLowering.getPointerTy(DAG.getDataLayout())); |
3517 | unsigned Offset = FuncInfo->getVarArgsGPOffset(); |
3518 | for (SDValue Val : LiveGPRs) { |
3519 | SDValue FIN = DAG.getNode(ISD::ADD, DL, |
3520 | TargLowering.getPointerTy(DAG.getDataLayout()), |
3521 | RSFIN, DAG.getIntPtrConstant(Offset, DL)); |
3522 | SDValue Store = |
3523 | DAG.getStore(Val.getValue(1), DL, Val, FIN, |
3524 | MachinePointerInfo::getFixedStack( |
3525 | DAG.getMachineFunction(), |
3526 | FuncInfo->getRegSaveFrameIndex(), Offset)); |
3527 | MemOps.push_back(Store); |
3528 | Offset += 8; |
3529 | } |
3530 | |
3531 | |
3532 | if (!LiveXMMRegs.empty()) { |
3533 | SmallVector<SDValue, 12> SaveXMMOps; |
3534 | SaveXMMOps.push_back(Chain); |
3535 | SaveXMMOps.push_back(ALVal); |
3536 | SaveXMMOps.push_back( |
3537 | DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); |
3538 | SaveXMMOps.push_back( |
3539 | DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); |
3540 | llvm::append_range(SaveXMMOps, LiveXMMRegs); |
3541 | MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, |
3542 | MVT::Other, SaveXMMOps)); |
3543 | } |
3544 | |
3545 | if (!MemOps.empty()) |
3546 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
3547 | } |
3548 | } |
3549 | |
3550 | void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { |
3551 | |
3552 | MVT VecVT = MVT::Other; |
3553 | |
3554 | if (Subtarget.useAVX512Regs() && |
3555 | (is64Bit() || (CallConv == CallingConv::X86_VectorCall || |
3556 | CallConv == CallingConv::Intel_OCL_BI))) |
3557 | VecVT = MVT::v16f32; |
3558 | else if (Subtarget.hasAVX()) |
3559 | VecVT = MVT::v8f32; |
3560 | else if (Subtarget.hasSSE2()) |
3561 | VecVT = MVT::v4f32; |
3562 | |
3563 | |
3564 | SmallVector<MVT, 2> RegParmTypes; |
3565 | MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; |
3566 | RegParmTypes.push_back(IntVT); |
3567 | if (VecVT != MVT::Other) |
3568 | RegParmTypes.push_back(VecVT); |
3569 | |
3570 | |
3571 | SmallVectorImpl<ForwardedRegister> &Forwards = |
3572 | FuncInfo->getForwardedMustTailRegParms(); |
3573 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); |
3574 | |
3575 | |
3576 | if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) { |
3577 | Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); |
3578 | Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); |
3579 | } |
3580 | |
3581 | |
3582 | for (ForwardedRegister &FR : Forwards) { |
3583 | |
3584 | SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT); |
3585 | FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( |
3586 | TargLowering.getRegClassFor(FR.VT)); |
3587 | Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal); |
3588 | } |
3589 | } |
3590 | |
3591 | void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, |
3592 | unsigned StackSize) { |
3593 | |
3594 | |
3595 | FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); |
3596 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
3597 | |
3598 | if (FrameInfo.hasVAStart()) |
3599 | createVarArgAreaAndStoreRegisters(Chain, StackSize); |
3600 | |
3601 | if (FrameInfo.hasMustTailInVarArgFunc()) |
3602 | forwardMustTailParameters(Chain); |
3603 | } |
3604 | |
3605 | SDValue X86TargetLowering::LowerFormalArguments( |
3606 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
3607 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
3608 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
3609 | MachineFunction &MF = DAG.getMachineFunction(); |
3610 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
3611 | |
3612 | const Function &F = MF.getFunction(); |
3613 | if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && |
3614 | F.getName() == "main") |
3615 | FuncInfo->setForceFramePointer(true); |
3616 | |
3617 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
3618 | bool Is64Bit = Subtarget.is64Bit(); |
3619 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
3620 | |
3621 | assert( |
3622 | !(IsVarArg && canGuaranteeTCO(CallConv)) && |
3623 | "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); |
3624 | |
3625 | |
3626 | SmallVector<CCValAssign, 16> ArgLocs; |
3627 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
3628 | |
3629 | |
3630 | if (IsWin64) |
3631 | CCInfo.AllocateStack(32, Align(8)); |
3632 | |
3633 | CCInfo.AnalyzeArguments(Ins, CC_X86); |
3634 | |
3635 | |
3636 | |
3637 | if (CallingConv::X86_VectorCall == CallConv) { |
3638 | CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); |
3639 | } |
3640 | |
3641 | |
3642 | |
3643 | assert(isSortedByValueNo(ArgLocs) && |
3644 | "Argument Location list must be sorted before lowering"); |
3645 | |
3646 | SDValue ArgValue; |
3647 | for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; |
3648 | ++I, ++InsIndex) { |
3649 | assert(InsIndex < Ins.size() && "Invalid Ins index"); |
3650 | CCValAssign &VA = ArgLocs[I]; |
3651 | |
3652 | if (VA.isRegLoc()) { |
3653 | EVT RegVT = VA.getLocVT(); |
3654 | if (VA.needsCustom()) { |
3655 | assert( |
3656 | VA.getValVT() == MVT::v64i1 && |
3657 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
3658 | |
3659 | |
3660 | |
3661 | ArgValue = |
3662 | getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); |
3663 | } else { |
3664 | const TargetRegisterClass *RC; |
3665 | if (RegVT == MVT::i8) |
3666 | RC = &X86::GR8RegClass; |
3667 | else if (RegVT == MVT::i16) |
3668 | RC = &X86::GR16RegClass; |
3669 | else if (RegVT == MVT::i32) |
3670 | RC = &X86::GR32RegClass; |
3671 | else if (Is64Bit && RegVT == MVT::i64) |
3672 | RC = &X86::GR64RegClass; |
3673 | else if (RegVT == MVT::f32) |
3674 | RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; |
3675 | else if (RegVT == MVT::f64) |
3676 | RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; |
3677 | else if (RegVT == MVT::f80) |
3678 | RC = &X86::RFP80RegClass; |
3679 | else if (RegVT == MVT::f128) |
3680 | RC = &X86::VR128RegClass; |
3681 | else if (RegVT.is512BitVector()) |
3682 | RC = &X86::VR512RegClass; |
3683 | else if (RegVT.is256BitVector()) |
3684 | RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; |
3685 | else if (RegVT.is128BitVector()) |
3686 | RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; |
3687 | else if (RegVT == MVT::x86mmx) |
3688 | RC = &X86::VR64RegClass; |
3689 | else if (RegVT == MVT::v1i1) |
3690 | RC = &X86::VK1RegClass; |
3691 | else if (RegVT == MVT::v8i1) |
3692 | RC = &X86::VK8RegClass; |
3693 | else if (RegVT == MVT::v16i1) |
3694 | RC = &X86::VK16RegClass; |
3695 | else if (RegVT == MVT::v32i1) |
3696 | RC = &X86::VK32RegClass; |
3697 | else if (RegVT == MVT::v64i1) |
3698 | RC = &X86::VK64RegClass; |
3699 | else |
3700 | llvm_unreachable("Unknown argument type!"); |
3701 | |
3702 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
3703 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); |
3704 | } |
3705 | |
3706 | |
3707 | |
3708 | |
3709 | if (VA.getLocInfo() == CCValAssign::SExt) |
3710 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, |
3711 | DAG.getValueType(VA.getValVT())); |
3712 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
3713 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, |
3714 | DAG.getValueType(VA.getValVT())); |
3715 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
3716 | ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); |
3717 | |
3718 | if (VA.isExtInLoc()) { |
3719 | |
3720 | if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) |
3721 | ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); |
3722 | else if (VA.getValVT().isVector() && |
3723 | VA.getValVT().getScalarType() == MVT::i1 && |
3724 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
3725 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
3726 | |
3727 | ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); |
3728 | } else |
3729 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); |
3730 | } |
3731 | } else { |
3732 | assert(VA.isMemLoc()); |
3733 | ArgValue = |
3734 | LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); |
3735 | } |
3736 | |
3737 | |
3738 | if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal()) |
3739 | ArgValue = |
3740 | DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); |
3741 | |
3742 | InVals.push_back(ArgValue); |
3743 | } |
3744 | |
3745 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
3746 | if (Ins[I].Flags.isSwiftAsync()) { |
3747 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
3748 | if (Subtarget.is64Bit()) |
3749 | X86FI->setHasSwiftAsyncContext(true); |
3750 | else { |
3751 | int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false); |
3752 | X86FI->setSwiftAsyncContextFrameIdx(FI); |
3753 | SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I], |
3754 | DAG.getFrameIndex(FI, MVT::i32), |
3755 | MachinePointerInfo::getFixedStack(MF, FI)); |
3756 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); |
3757 | } |
3758 | } |
3759 | |
3760 | |
3761 | |
3762 | if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) |
3763 | continue; |
3764 | |
3765 | |
3766 | |
3767 | |
3768 | |
3769 | if (Ins[I].Flags.isSRet()) { |
3770 | Register Reg = FuncInfo->getSRetReturnReg(); |
3771 | if (!Reg) { |
3772 | MVT PtrTy = getPointerTy(DAG.getDataLayout()); |
3773 | Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); |
3774 | FuncInfo->setSRetReturnReg(Reg); |
3775 | } |
3776 | SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); |
3777 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); |
3778 | break; |
3779 | } |
3780 | } |
3781 | |
3782 | unsigned StackSize = CCInfo.getNextStackOffset(); |
3783 | |
3784 | if (shouldGuaranteeTCO(CallConv, |
3785 | MF.getTarget().Options.GuaranteedTailCallOpt)) |
3786 | StackSize = GetAlignedArgumentStackSize(StackSize, DAG); |
3787 | |
3788 | if (IsVarArg) |
3789 | VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) |
3790 | .lowerVarArgsParameters(Chain, StackSize); |
3791 | |
3792 | |
3793 | if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg, |
3794 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
3795 | FuncInfo->setBytesToPopOnReturn(StackSize); |
3796 | } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { |
3797 | |
3798 | |
3799 | FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); |
3800 | } else { |
3801 | FuncInfo->setBytesToPopOnReturn(0); |
3802 | |
3803 | if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
3804 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
3805 | argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) |
3806 | FuncInfo->setBytesToPopOnReturn(4); |
3807 | } |
3808 | |
3809 | if (!Is64Bit) { |
3810 | |
3811 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
3812 | } |
3813 | |
3814 | FuncInfo->setArgumentStackSize(StackSize); |
3815 | |
3816 | if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { |
3817 | EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); |
3818 | if (Personality == EHPersonality::CoreCLR) { |
3819 | assert(Is64Bit); |
3820 | |
3821 | |
3822 | |
3823 | |
3824 | |
3825 | |
3826 | |
3827 | |
3828 | int PSPSymFI = MFI.CreateStackObject(8, Align(8), false); |
3829 | EHInfo->PSPSymFrameIdx = PSPSymFI; |
3830 | } |
3831 | } |
3832 | |
3833 | if (CallConv == CallingConv::X86_RegCall || |
3834 | F.hasFnAttribute("no_caller_saved_registers")) { |
3835 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
3836 | for (std::pair<Register, Register> Pair : MRI.liveins()) |
3837 | MRI.disableCalleeSavedRegister(Pair.first); |
3838 | } |
3839 | |
3840 | return Chain; |
3841 | } |
3842 | |
3843 | SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, |
3844 | SDValue Arg, const SDLoc &dl, |
3845 | SelectionDAG &DAG, |
3846 | const CCValAssign &VA, |
3847 | ISD::ArgFlagsTy Flags, |
3848 | bool isByVal) const { |
3849 | unsigned LocMemOffset = VA.getLocMemOffset(); |
3850 | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
3851 | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
3852 | StackPtr, PtrOff); |
3853 | if (isByVal) |
3854 | return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); |
3855 | |
3856 | return DAG.getStore( |
3857 | Chain, dl, Arg, PtrOff, |
3858 | MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); |
3859 | } |
3860 | |
3861 | |
3862 | |
3863 | SDValue X86TargetLowering::EmitTailCallLoadRetAddr( |
3864 | SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, |
3865 | bool Is64Bit, int FPDiff, const SDLoc &dl) const { |
3866 | |
3867 | EVT VT = getPointerTy(DAG.getDataLayout()); |
3868 | OutRetAddr = getReturnAddressFrameIndex(DAG); |
3869 | |
3870 | |
3871 | OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); |
3872 | return SDValue(OutRetAddr.getNode(), 1); |
3873 | } |
3874 | |
3875 | |
3876 | |
3877 | static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, |
3878 | SDValue Chain, SDValue RetAddrFrIdx, |
3879 | EVT PtrVT, unsigned SlotSize, |
3880 | int FPDiff, const SDLoc &dl) { |
3881 | |
3882 | if (!FPDiff) return Chain; |
3883 | |
3884 | int NewReturnAddrFI = |
3885 | MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, |
3886 | false); |
3887 | SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); |
3888 | Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, |
3889 | MachinePointerInfo::getFixedStack( |
3890 | DAG.getMachineFunction(), NewReturnAddrFI)); |
3891 | return Chain; |
3892 | } |
3893 | |
3894 | |
3895 | |
3896 | static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
3897 | SDValue V2) { |
3898 | unsigned NumElems = VT.getVectorNumElements(); |
3899 | SmallVector<int, 8> Mask; |
3900 | Mask.push_back(NumElems); |
3901 | for (unsigned i = 1; i != NumElems; ++i) |
3902 | Mask.push_back(i); |
3903 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
3904 | } |
3905 | |
3906 | SDValue |
3907 | X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
3908 | SmallVectorImpl<SDValue> &InVals) const { |
3909 | SelectionDAG &DAG = CLI.DAG; |
3910 | SDLoc &dl = CLI.DL; |
3911 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
3912 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
3913 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
3914 | SDValue Chain = CLI.Chain; |
3915 | SDValue Callee = CLI.Callee; |
3916 | CallingConv::ID CallConv = CLI.CallConv; |
3917 | bool &isTailCall = CLI.IsTailCall; |
3918 | bool isVarArg = CLI.IsVarArg; |
3919 | const auto *CB = CLI.CB; |
3920 | |
3921 | MachineFunction &MF = DAG.getMachineFunction(); |
3922 | bool Is64Bit = Subtarget.is64Bit(); |
3923 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
3924 | StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); |
3925 | bool IsSibcall = false; |
3926 | bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || |
3927 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
3928 | X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); |
3929 | bool HasNCSR = (CB && isa<CallInst>(CB) && |
3930 | CB->hasFnAttr("no_caller_saved_registers")); |
3931 | bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); |
3932 | bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall()); |
3933 | const Module *M = MF.getMMI().getModule(); |
3934 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
3935 | |
3936 | MachineFunction::CallSiteInfo CSInfo; |
3937 | if (CallConv == CallingConv::X86_INTR) |
3938 | report_fatal_error("X86 interrupts may not be called directly"); |
3939 | |
3940 | bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); |
3941 | if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { |
3942 | |
3943 | |
3944 | |
3945 | |
3946 | |
3947 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
3948 | if (!G || (!G->getGlobal()->hasLocalLinkage() && |
3949 | G->getGlobal()->hasDefaultVisibility())) |
3950 | isTailCall = false; |
3951 | } |
3952 | |
3953 | |
3954 | if (isTailCall && !IsMustTail) { |
3955 | |
3956 | isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, |
3957 | isVarArg, SR != NotStructReturn, |
3958 | MF.getFunction().hasStructRetAttr(), CLI.RetTy, |
3959 | Outs, OutVals, Ins, DAG); |
3960 | |
3961 | |
3962 | |
3963 | if (!IsGuaranteeTCO && isTailCall) |
3964 | IsSibcall = true; |
3965 | |
3966 | if (isTailCall) |
3967 | ++NumTailCalls; |
3968 | } |
3969 | |
3970 | if (IsMustTail && !isTailCall) |
3971 | report_fatal_error("failed to perform tail call elimination on a call " |
3972 | "site marked musttail"); |
3973 | |
3974 | assert(!(isVarArg && canGuaranteeTCO(CallConv)) && |
3975 | "Var args not supported with calling convention fastcc, ghc or hipe"); |
3976 | |
3977 | |
3978 | SmallVector<CCValAssign, 16> ArgLocs; |
3979 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
3980 | |
3981 | |
3982 | if (IsWin64) |
3983 | CCInfo.AllocateStack(32, Align(8)); |
3984 | |
3985 | CCInfo.AnalyzeArguments(Outs, CC_X86); |
3986 | |
3987 | |
3988 | |
3989 | if (CallingConv::X86_VectorCall == CallConv) { |
3990 | CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); |
3991 | } |
3992 | |
3993 | |
3994 | unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); |
3995 | if (IsSibcall) |
3996 | |
3997 | |
3998 | NumBytes = 0; |
3999 | else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) |
4000 | NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); |
4001 | |
4002 | int FPDiff = 0; |
4003 | if (isTailCall && |
4004 | shouldGuaranteeTCO(CallConv, |
4005 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
4006 | |
4007 | unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); |
4008 | |
4009 | FPDiff = NumBytesCallerPushed - NumBytes; |
4010 | |
4011 | |
4012 | |
4013 | if (FPDiff < X86Info->getTCReturnAddrDelta()) |
4014 | X86Info->setTCReturnAddrDelta(FPDiff); |
4015 | } |
4016 | |
4017 | unsigned NumBytesToPush = NumBytes; |
4018 | unsigned NumBytesToPop = NumBytes; |
4019 | |
4020 | |
4021 | |
4022 | |
4023 | if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { |
4024 | NumBytesToPush = 0; |
4025 | if (!ArgLocs.back().isMemLoc()) |
4026 | report_fatal_error("cannot use inalloca attribute on a register " |
4027 | "parameter"); |
4028 | if (ArgLocs.back().getLocMemOffset() != 0) |
4029 | report_fatal_error("any parameter with the inalloca attribute must be " |
4030 | "the only memory argument"); |
4031 | } else if (CLI.IsPreallocated) { |
4032 | assert(ArgLocs.back().isMemLoc() && |
4033 | "cannot use preallocated attribute on a register " |
4034 | "parameter"); |
4035 | SmallVector<size_t, 4> PreallocatedOffsets; |
4036 | for (size_t i = 0; i < CLI.OutVals.size(); ++i) { |
4037 | if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { |
4038 | PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); |
4039 | } |
4040 | } |
4041 | auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); |
4042 | size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB); |
4043 | MFI->setPreallocatedStackSize(PreallocatedId, NumBytes); |
4044 | MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); |
4045 | NumBytesToPush = 0; |
4046 | } |
4047 | |
4048 | if (!IsSibcall && !IsMustTail) |
4049 | Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, |
4050 | NumBytes - NumBytesToPush, dl); |
4051 | |
4052 | SDValue RetAddrFrIdx; |
4053 | |
4054 | if (isTailCall && FPDiff) |
4055 | Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, |
4056 | Is64Bit, FPDiff, dl); |
4057 | |
4058 | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
4059 | SmallVector<SDValue, 8> MemOpChains; |
4060 | SDValue StackPtr; |
4061 | |
4062 | |
4063 | |
4064 | assert(isSortedByValueNo(ArgLocs) && |
4065 | "Argument Location list must be sorted before lowering"); |
4066 | |
4067 | |
4068 | |
4069 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4070 | for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; |
4071 | ++I, ++OutIndex) { |
4072 | assert(OutIndex < Outs.size() && "Invalid Out index"); |
4073 | |
4074 | ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; |
4075 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
4076 | continue; |
4077 | |
4078 | CCValAssign &VA = ArgLocs[I]; |
4079 | EVT RegVT = VA.getLocVT(); |
4080 | SDValue Arg = OutVals[OutIndex]; |
4081 | bool isByVal = Flags.isByVal(); |
4082 | |
4083 | |
4084 | switch (VA.getLocInfo()) { |
4085 | default: llvm_unreachable("Unknown loc info!"); |
4086 | case CCValAssign::Full: break; |
4087 | case CCValAssign::SExt: |
4088 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); |
4089 | break; |
4090 | case CCValAssign::ZExt: |
4091 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); |
4092 | break; |
4093 | case CCValAssign::AExt: |
4094 | if (Arg.getValueType().isVector() && |
4095 | Arg.getValueType().getVectorElementType() == MVT::i1) |
4096 | Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); |
4097 | else if (RegVT.is128BitVector()) { |
4098 | |
4099 | Arg = DAG.getBitcast(MVT::i64, Arg); |
4100 | Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); |
4101 | Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); |
4102 | } else |
4103 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); |
4104 | break; |
4105 | case CCValAssign::BCvt: |
4106 | Arg = DAG.getBitcast(RegVT, Arg); |
4107 | break; |
4108 | case CCValAssign::Indirect: { |
4109 | if (isByVal) { |
4110 | |
4111 | |
4112 | |
4113 | int FrameIdx = MF.getFrameInfo().CreateStackObject( |
4114 | Flags.getByValSize(), |
4115 | std::max(Align(16), Flags.getNonZeroByValAlign()), false); |
4116 | SDValue StackSlot = |
4117 | DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); |
4118 | Chain = |
4119 | CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); |
4120 | |
4121 | Arg = StackSlot; |
4122 | isByVal = false; |
4123 | } else { |
4124 | |
4125 | SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); |
4126 | int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); |
4127 | Chain = DAG.getStore( |
4128 | Chain, dl, Arg, SpillSlot, |
4129 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
4130 | Arg = SpillSlot; |
4131 | } |
4132 | break; |
4133 | } |
4134 | } |
4135 | |
4136 | if (VA.needsCustom()) { |
4137 | assert(VA.getValVT() == MVT::v64i1 && |
4138 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
4139 | |
4140 | Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); |
4141 | } else if (VA.isRegLoc()) { |
4142 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
4143 | const TargetOptions &Options = DAG.getTarget().Options; |
4144 | if (Options.EmitCallSiteInfo) |
4145 | CSInfo.emplace_back(VA.getLocReg(), I); |
4146 | if (isVarArg && IsWin64) { |
4147 | |
4148 | |
4149 | Register ShadowReg; |
4150 | switch (VA.getLocReg()) { |
4151 | case X86::XMM0: ShadowReg = X86::RCX; break; |
4152 | case X86::XMM1: ShadowReg = X86::RDX; break; |
4153 | case X86::XMM2: ShadowReg = X86::R8; break; |
4154 | case X86::XMM3: ShadowReg = X86::R9; break; |
4155 | } |
4156 | if (ShadowReg) |
4157 | RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); |
4158 | } |
4159 | } else if (!IsSibcall && (!isTailCall || isByVal)) { |
4160 | assert(VA.isMemLoc()); |
4161 | if (!StackPtr.getNode()) |
4162 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
4163 | getPointerTy(DAG.getDataLayout())); |
4164 | MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, |
4165 | dl, DAG, VA, Flags, isByVal)); |
4166 | } |
4167 | } |
4168 | |
4169 | if (!MemOpChains.empty()) |
4170 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
4171 | |
4172 | if (Subtarget.isPICStyleGOT()) { |
4173 | |
4174 | |
4175 | if (!isTailCall) { |
4176 | |
4177 | |
4178 | |
4179 | if (CallConv != CallingConv::X86_RegCall) |
4180 | RegsToPass.push_back(std::make_pair( |
4181 | Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
4182 | getPointerTy(DAG.getDataLayout())))); |
4183 | } else { |
4184 | |
4185 | |
4186 | |
4187 | |
4188 | |
4189 | |
4190 | |
4191 | |
4192 | |
4193 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
4194 | if (G && !G->getGlobal()->hasLocalLinkage() && |
4195 | G->getGlobal()->hasDefaultVisibility()) |
4196 | Callee = LowerGlobalAddress(Callee, DAG); |
4197 | else if (isa<ExternalSymbolSDNode>(Callee)) |
4198 | Callee = LowerExternalSymbol(Callee, DAG); |
4199 | } |
4200 | } |
4201 | |
4202 | if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { |
4203 | |
4204 | |
4205 | |
4206 | |
4207 | |
4208 | |
4209 | |
4210 | |
4211 | |
4212 | static const MCPhysReg XMMArgRegs[] = { |
4213 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
4214 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
4215 | }; |
4216 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); |
4217 | assert((Subtarget.hasSSE1() || !NumXMMRegs) |
4218 | && "SSE registers cannot be used when SSE is disabled"); |
4219 | RegsToPass.push_back(std::make_pair(Register(X86::AL), |
4220 | DAG.getConstant(NumXMMRegs, dl, |
4221 | MVT::i8))); |
4222 | } |
4223 | |
4224 | if (isVarArg && IsMustTail) { |
4225 | const auto &Forwards = X86Info->getForwardedMustTailRegParms(); |
4226 | for (const auto &F : Forwards) { |
4227 | SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); |
4228 | RegsToPass.push_back(std::make_pair(F.PReg, Val)); |
4229 | } |
4230 | } |
4231 | |
4232 | |
4233 | |
4234 | |
4235 | if (!IsSibcall && isTailCall) { |
4236 | |
4237 | |
4238 | |
4239 | |
4240 | |
4241 | |
4242 | SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); |
4243 | |
4244 | SmallVector<SDValue, 8> MemOpChains2; |
4245 | SDValue FIN; |
4246 | int FI = 0; |
4247 | for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; |
4248 | ++I, ++OutsIndex) { |
4249 | CCValAssign &VA = ArgLocs[I]; |
4250 | |
4251 | if (VA.isRegLoc()) { |
4252 | if (VA.needsCustom()) { |
4253 | assert((CallConv == CallingConv::X86_RegCall) && |
4254 | "Expecting custom case only in regcall calling convention"); |
4255 | |
4256 | |
4257 | ++I; |
4258 | } |
4259 | |
4260 | continue; |
4261 | } |
4262 | |
4263 | assert(VA.isMemLoc()); |
4264 | SDValue Arg = OutVals[OutsIndex]; |
4265 | ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; |
4266 | |
4267 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
4268 | continue; |
4269 | |
4270 | int32_t Offset = VA.getLocMemOffset()+FPDiff; |
4271 | uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; |
4272 | FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
4273 | FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
4274 | |
4275 | if (Flags.isByVal()) { |
4276 | |
4277 | SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); |
4278 | if (!StackPtr.getNode()) |
4279 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
4280 | getPointerTy(DAG.getDataLayout())); |
4281 | Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
4282 | StackPtr, Source); |
4283 | |
4284 | MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, |
4285 | ArgChain, |
4286 | Flags, DAG, dl)); |
4287 | } else { |
4288 | |
4289 | MemOpChains2.push_back(DAG.getStore( |
4290 | ArgChain, dl, Arg, FIN, |
4291 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); |
4292 | } |
4293 | } |
4294 | |
4295 | if (!MemOpChains2.empty()) |
4296 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); |
4297 | |
4298 | |
4299 | Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, |
4300 | getPointerTy(DAG.getDataLayout()), |
4301 | RegInfo->getSlotSize(), FPDiff, dl); |
4302 | } |
4303 | |
4304 | |
4305 | |
4306 | SDValue InFlag; |
4307 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
4308 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
4309 | RegsToPass[i].second, InFlag); |
4310 | InFlag = Chain.getValue(1); |
4311 | } |
4312 | |
4313 | if (DAG.getTarget().getCodeModel() == CodeModel::Large) { |
4314 | assert(Is64Bit && "Large code model is only legal in 64-bit mode."); |
4315 | |
4316 | |
4317 | |
4318 | |
4319 | } else if (Callee->getOpcode() == ISD::GlobalAddress || |
4320 | Callee->getOpcode() == ISD::ExternalSymbol) { |
4321 | |
4322 | |
4323 | |
4324 | |
4325 | Callee = LowerGlobalOrExternal(Callee, DAG, true); |
4326 | } else if (Subtarget.isTarget64BitILP32() && |
4327 | Callee->getValueType(0) == MVT::i32) { |
4328 | |
4329 | Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); |
4330 | } |
4331 | |
4332 | |
4333 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
4334 | SmallVector<SDValue, 8> Ops; |
4335 | |
4336 | if (!IsSibcall && isTailCall && !IsMustTail) { |
4337 | Chain = DAG.getCALLSEQ_END(Chain, |
4338 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
4339 | DAG.getIntPtrConstant(0, dl, true), InFlag, dl); |
4340 | InFlag = Chain.getValue(1); |
4341 | } |
4342 | |
4343 | Ops.push_back(Chain); |
4344 | Ops.push_back(Callee); |
4345 | |
4346 | if (isTailCall) |
4347 | Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32)); |
4348 | |
4349 | |
4350 | |
4351 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
4352 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
4353 | RegsToPass[i].second.getValueType())); |
4354 | |
4355 | |
4356 | const uint32_t *Mask = [&]() { |
4357 | auto AdaptedCC = CallConv; |
4358 | |
4359 | |
4360 | |
4361 | if (HasNCSR) |
4362 | AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; |
4363 | |
4364 | |
4365 | if (CB && CB->hasFnAttr("no_callee_saved_registers")) |
4366 | AdaptedCC = (CallingConv::ID)CallingConv::GHC; |
4367 | return RegInfo->getCallPreservedMask(MF, AdaptedCC); |
4368 | }(); |
4369 | assert(Mask && "Missing call preserved mask for calling convention"); |
4370 | |
4371 | |
4372 | |
4373 | |
4374 | |
4375 | |
4376 | if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) { |
4377 | const Function &CallerFn = MF.getFunction(); |
4378 | EHPersonality Pers = |
4379 | CallerFn.hasPersonalityFn() |
4380 | ? classifyEHPersonality(CallerFn.getPersonalityFn()) |
4381 | : EHPersonality::Unknown; |
4382 | if (isFuncletEHPersonality(Pers)) |
4383 | Mask = RegInfo->getNoPreservedMask(); |
4384 | } |
4385 | |
4386 | |
4387 | uint32_t *RegMask = nullptr; |
4388 | |
4389 | |
4390 | |
4391 | if (CallConv == CallingConv::X86_RegCall || HasNCSR) { |
4392 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4393 | |
4394 | |
4395 | RegMask = MF.allocateRegMask(); |
4396 | unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); |
4397 | memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); |
4398 | |
4399 | |
4400 | |
4401 | for (auto const &RegPair : RegsToPass) |
4402 | for (MCSubRegIterator SubRegs(RegPair.first, TRI, true); |
4403 | SubRegs.isValid(); ++SubRegs) |
4404 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
4405 | |
4406 | |
4407 | Ops.push_back(DAG.getRegisterMask(RegMask)); |
4408 | } else { |
4409 | |
4410 | Ops.push_back(DAG.getRegisterMask(Mask)); |
4411 | } |
4412 | |
4413 | if (InFlag.getNode()) |
4414 | Ops.push_back(InFlag); |
4415 | |
4416 | if (isTailCall) { |
4417 | |
4418 | |
4419 | |
4420 | |
4421 | |
4422 | |
4423 | MF.getFrameInfo().setHasTailCall(); |
4424 | SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); |
4425 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
4426 | return Ret; |
4427 | } |
4428 | |
4429 | if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { |
4430 | Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); |
4431 | } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { |
4432 | |
4433 | |
4434 | |
4435 | assert(!isTailCall && |
4436 | "tail calls cannot be marked with clang.arc.attachedcall"); |
4437 | assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"); |
4438 | |
4439 | |
4440 | |
4441 | |
4442 | |
4443 | unsigned RuntimeCallType = |
4444 | objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1; |
4445 | Ops.insert(Ops.begin() + 1, |
4446 | DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32)); |
4447 | Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops); |
4448 | } else { |
4449 | Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); |
4450 | } |
4451 | |
4452 | InFlag = Chain.getValue(1); |
4453 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
4454 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
4455 | |
4456 | |
4457 | if (CLI.CB) |
4458 | if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite")) |
4459 | DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); |
4460 | |
4461 | |
4462 | unsigned NumBytesForCalleeToPop; |
4463 | if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, |
4464 | DAG.getTarget().Options.GuaranteedTailCallOpt)) |
4465 | NumBytesForCalleeToPop = NumBytes; |
4466 | else if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
4467 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
4468 | SR == StackStructReturn) |
4469 | |
4470 | |
4471 | |
4472 | |
4473 | NumBytesForCalleeToPop = 4; |
4474 | else |
4475 | NumBytesForCalleeToPop = 0; |
4476 | |
4477 | |
4478 | if (!IsSibcall) { |
4479 | Chain = DAG.getCALLSEQ_END(Chain, |
4480 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
4481 | DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, |
4482 | true), |
4483 | InFlag, dl); |
4484 | InFlag = Chain.getValue(1); |
4485 | } |
4486 | |
4487 | |
4488 | |
4489 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, |
4490 | InVals, RegMask); |
4491 | } |
4492 | |
4493 | |
4494 | |
4495 | |
4496 | |
4497 | |
4498 | |
4499 | |
4500 | |
4501 | |
4502 | |
4503 | |
4504 | |
4505 | |
4506 | |
4507 | |
4508 | |
4509 | |
4510 | |
4511 | |
4512 | |
4513 | |
4514 | |
4515 | |
4516 | |
4517 | |
4518 | |
4519 | |
4520 | |
4521 | |
4522 | |
4523 | |
4524 | |
4525 | |
4526 | unsigned |
4527 | X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, |
4528 | SelectionDAG &DAG) const { |
4529 | const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); |
4530 | const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); |
4531 | assert(StackSize % SlotSize == 0 && |
4532 | "StackSize must be a multiple of SlotSize"); |
4533 | return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; |
4534 | } |
4535 | |
4536 | |
4537 | |
4538 | static |
4539 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
4540 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
4541 | const X86InstrInfo *TII, const CCValAssign &VA) { |
4542 | unsigned Bytes = Arg.getValueSizeInBits() / 8; |
4543 | |
4544 | for (;;) { |
4545 | |
4546 | unsigned Op = Arg.getOpcode(); |
4547 | if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { |
4548 | Arg = Arg.getOperand(0); |
4549 | continue; |
4550 | } |
4551 | if (Op == ISD::TRUNCATE) { |
4552 | const SDValue &TruncInput = Arg.getOperand(0); |
4553 | if (TruncInput.getOpcode() == ISD::AssertZext && |
4554 | cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == |
4555 | Arg.getValueType()) { |
4556 | Arg = TruncInput.getOperand(0); |
4557 | continue; |
4558 | } |
4559 | } |
4560 | break; |
4561 | } |
4562 | |
4563 | int FI = INT_MAX; |
4564 | if (Arg.getOpcode() == ISD::CopyFromReg) { |
4565 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); |
4566 | if (!VR.isVirtual()) |
4567 | return false; |
4568 | MachineInstr *Def = MRI->getVRegDef(VR); |
4569 | if (!Def) |
4570 | return false; |
4571 | if (!Flags.isByVal()) { |
4572 | if (!TII->isLoadFromStackSlot(*Def, FI)) |
4573 | return false; |
4574 | } else { |
4575 | unsigned Opcode = Def->getOpcode(); |
4576 | if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || |
4577 | Opcode == X86::LEA64_32r) && |
4578 | Def->getOperand(1).isFI()) { |
4579 | FI = Def->getOperand(1).getIndex(); |
4580 | Bytes = Flags.getByValSize(); |
4581 | } else |
4582 | return false; |
4583 | } |
4584 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { |
4585 | if (Flags.isByVal()) |
4586 | |
4587 | |
4588 | |
4589 | |
4590 | |
4591 | return false; |
4592 | SDValue Ptr = Ld->getBasePtr(); |
4593 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); |
4594 | if (!FINode) |
4595 | return false; |
4596 | FI = FINode->getIndex(); |
4597 | } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { |
4598 | FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); |
4599 | FI = FINode->getIndex(); |
4600 | Bytes = Flags.getByValSize(); |
4601 | } else |
4602 | return false; |
4603 | |
4604 | assert(FI != INT_MAX); |
4605 | if (!MFI.isFixedObjectIndex(FI)) |
4606 | return false; |
4607 | |
4608 | if (Offset != MFI.getObjectOffset(FI)) |
4609 | return false; |
4610 | |
4611 | |
4612 | |
4613 | |
4614 | |
4615 | if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) |
4616 | return false; |
4617 | |
4618 | if (VA.getLocVT().getFixedSizeInBits() > |
4619 | Arg.getValueSizeInBits().getFixedSize()) { |
4620 | |
4621 | |
4622 | if (Flags.isZExt() != MFI.isObjectZExt(FI) || |
4623 | Flags.isSExt() != MFI.isObjectSExt(FI)) { |
4624 | return false; |
4625 | } |
4626 | } |
4627 | |
4628 | return Bytes == MFI.getObjectSize(FI); |
4629 | } |
4630 | |
4631 | |
4632 | |
4633 | bool X86TargetLowering::IsEligibleForTailCallOptimization( |
4634 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
4635 | bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, |
4636 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
4637 | const SmallVectorImpl<SDValue> &OutVals, |
4638 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { |
4639 | if (!mayTailCallThisCC(CalleeCC)) |
4640 | return false; |
4641 | |
4642 | |
4643 | MachineFunction &MF = DAG.getMachineFunction(); |
4644 | const Function &CallerF = MF.getFunction(); |
4645 | |
4646 | |
4647 | |
4648 | |
4649 | if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) |
4650 | return false; |
4651 | |
4652 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
4653 | bool CCMatch = CallerCC == CalleeCC; |
4654 | bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); |
4655 | bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); |
4656 | bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || |
4657 | CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; |
4658 | |
4659 | |
4660 | |
4661 | |
4662 | if (IsCalleeWin64 != IsCallerWin64) |
4663 | return false; |
4664 | |
4665 | if (IsGuaranteeTCO) { |
4666 | if (canGuaranteeTCO(CalleeCC) && CCMatch) |
4667 | return true; |
4668 | return false; |
4669 | } |
4670 | |
4671 | |
4672 | |
4673 | |
4674 | |
4675 | |
4676 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4677 | if (RegInfo->hasStackRealignment(MF)) |
4678 | return false; |
4679 | |
4680 | |
4681 | |
4682 | if (isCalleeStructRet || isCallerStructRet) |
4683 | return false; |
4684 | |
4685 | |
4686 | |
4687 | LLVMContext &C = *DAG.getContext(); |
4688 | if (isVarArg && !Outs.empty()) { |
4689 | |
4690 | |
4691 | if (IsCalleeWin64 || IsCallerWin64) |
4692 | return false; |
4693 | |
4694 | SmallVector<CCValAssign, 16> ArgLocs; |
4695 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
4696 | |
4697 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
4698 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) |
4699 | if (!ArgLocs[i].isRegLoc()) |
4700 | return false; |
4701 | } |
4702 | |
4703 | |
4704 | |
4705 | |
4706 | bool Unused = false; |
4707 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
4708 | if (!Ins[i].Used) { |
4709 | Unused = true; |
4710 | break; |
4711 | } |
4712 | } |
4713 | if (Unused) { |
4714 | SmallVector<CCValAssign, 16> RVLocs; |
4715 | CCState CCInfo(CalleeCC, false, MF, RVLocs, C); |
4716 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
4717 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
4718 | CCValAssign &VA = RVLocs[i]; |
4719 | if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) |
4720 | return false; |
4721 | } |
4722 | } |
4723 | |
4724 | |
4725 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
4726 | RetCC_X86, RetCC_X86)) |
4727 | return false; |
4728 | |
4729 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
4730 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
4731 | if (!CCMatch) { |
4732 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
4733 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
4734 | return false; |
4735 | } |
4736 | |
4737 | unsigned StackArgsSize = 0; |
4738 | |
4739 | |
4740 | |
4741 | if (!Outs.empty()) { |
4742 | |
4743 | |
4744 | SmallVector<CCValAssign, 16> ArgLocs; |
4745 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
4746 | |
4747 | |
4748 | if (IsCalleeWin64) |
4749 | CCInfo.AllocateStack(32, Align(8)); |
4750 | |
4751 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
4752 | StackArgsSize = CCInfo.getNextStackOffset(); |
4753 | |
4754 | if (CCInfo.getNextStackOffset()) { |
4755 | |
4756 | |
4757 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
4758 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
4759 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
4760 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4761 | CCValAssign &VA = ArgLocs[i]; |
4762 | SDValue Arg = OutVals[i]; |
4763 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
4764 | if (VA.getLocInfo() == CCValAssign::Indirect) |
4765 | return false; |
4766 | if (!VA.isRegLoc()) { |
4767 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, |
4768 | MFI, MRI, TII, VA)) |
4769 | return false; |
4770 | } |
4771 | } |
4772 | } |
4773 | |
4774 | bool PositionIndependent = isPositionIndependent(); |
4775 | |
4776 | |
4777 | |
4778 | |
4779 | |
4780 | if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && |
4781 | !isa<ExternalSymbolSDNode>(Callee)) || |
4782 | PositionIndependent)) { |
4783 | unsigned NumInRegs = 0; |
4784 | |
4785 | |
4786 | unsigned MaxInRegs = PositionIndependent ? 2 : 3; |
4787 | |
4788 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
4789 | CCValAssign &VA = ArgLocs[i]; |
4790 | if (!VA.isRegLoc()) |
4791 | continue; |
4792 | Register Reg = VA.getLocReg(); |
4793 | switch (Reg) { |
4794 | default: break; |
4795 | case X86::EAX: case X86::EDX: case X86::ECX: |
4796 | if (++NumInRegs == MaxInRegs) |
4797 | return false; |
4798 | break; |
4799 | } |
4800 | } |
4801 | } |
4802 | |
4803 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
4804 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
4805 | return false; |
4806 | } |
4807 | |
4808 | bool CalleeWillPop = |
4809 | X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, |
4810 | MF.getTarget().Options.GuaranteedTailCallOpt); |
4811 | |
4812 | if (unsigned BytesToPop = |
4813 | MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { |
4814 | |
4815 | bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; |
4816 | if (!CalleePopMatches) |
4817 | return false; |
4818 | } else if (CalleeWillPop && StackArgsSize > 0) { |
4819 | |
4820 | return false; |
4821 | } |
4822 | |
4823 | return true; |
4824 | } |
4825 | |
4826 | FastISel * |
4827 | X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
4828 | const TargetLibraryInfo *libInfo) const { |
4829 | return X86::createFastISel(funcInfo, libInfo); |
4830 | } |
4831 | |
4832 | |
4833 | |
4834 | |
4835 | |
4836 | static bool MayFoldLoad(SDValue Op) { |
4837 | return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); |
4838 | } |
4839 | |
4840 | static bool MayFoldIntoStore(SDValue Op) { |
4841 | return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); |
4842 | } |
4843 | |
4844 | static bool MayFoldIntoZeroExtend(SDValue Op) { |
4845 | if (Op.hasOneUse()) { |
4846 | unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); |
4847 | return (ISD::ZERO_EXTEND == Opcode); |
4848 | } |
4849 | return false; |
4850 | } |
4851 | |
4852 | static bool isTargetShuffle(unsigned Opcode) { |
4853 | switch(Opcode) { |
4854 | default: return false; |
4855 | case X86ISD::BLENDI: |
4856 | case X86ISD::PSHUFB: |
4857 | case X86ISD::PSHUFD: |
4858 | case X86ISD::PSHUFHW: |
4859 | case X86ISD::PSHUFLW: |
4860 | case X86ISD::SHUFP: |
4861 | case X86ISD::INSERTPS: |
4862 | case X86ISD::EXTRQI: |
4863 | case X86ISD::INSERTQI: |
4864 | case X86ISD::VALIGN: |
4865 | case X86ISD::PALIGNR: |
4866 | case X86ISD::VSHLDQ: |
4867 | case X86ISD::VSRLDQ: |
4868 | case X86ISD::MOVLHPS: |
4869 | case X86ISD::MOVHLPS: |
4870 | case X86ISD::MOVSHDUP: |
4871 | case X86ISD::MOVSLDUP: |
4872 | case X86ISD::MOVDDUP: |
4873 | case X86ISD::MOVSS: |
4874 | case X86ISD::MOVSD: |
4875 | case X86ISD::UNPCKL: |
4876 | case X86ISD::UNPCKH: |
4877 | case X86ISD::VBROADCAST: |
4878 | case X86ISD::VPERMILPI: |
4879 | case X86ISD::VPERMILPV: |
4880 | case X86ISD::VPERM2X128: |
4881 | case X86ISD::SHUF128: |
4882 | case X86ISD::VPERMIL2: |
4883 | case X86ISD::VPERMI: |
4884 | case X86ISD::VPPERM: |
4885 | case X86ISD::VPERMV: |
4886 | case X86ISD::VPERMV3: |
4887 | case X86ISD::VZEXT_MOVL: |
4888 | return true; |
4889 | } |
4890 | } |
4891 | |
4892 | static bool isTargetShuffleVariableMask(unsigned Opcode) { |
4893 | switch (Opcode) { |
4894 | default: return false; |
4895 | |
4896 | case X86ISD::PSHUFB: |
4897 | case X86ISD::VPERMILPV: |
4898 | case X86ISD::VPERMIL2: |
4899 | case X86ISD::VPPERM: |
4900 | case X86ISD::VPERMV: |
4901 | case X86ISD::VPERMV3: |
4902 | return true; |
4903 | |
4904 | case ISD::OR: |
4905 | case ISD::AND: |
4906 | case X86ISD::ANDNP: |
4907 | return true; |
4908 | } |
4909 | } |
4910 | |
4911 | static bool isTargetShuffleSplat(SDValue Op) { |
4912 | unsigned Opcode = Op.getOpcode(); |
4913 | if (Opcode == ISD::EXTRACT_SUBVECTOR) |
4914 | return isTargetShuffleSplat(Op.getOperand(0)); |
4915 | return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD; |
4916 | } |
4917 | |
4918 | SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { |
4919 | MachineFunction &MF = DAG.getMachineFunction(); |
4920 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
4921 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
4922 | int ReturnAddrIndex = FuncInfo->getRAIndex(); |
4923 | |
4924 | if (ReturnAddrIndex == 0) { |
4925 | |
4926 | unsigned SlotSize = RegInfo->getSlotSize(); |
4927 | ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, |
4928 | -(int64_t)SlotSize, |
4929 | false); |
4930 | FuncInfo->setRAIndex(ReturnAddrIndex); |
4931 | } |
4932 | |
4933 | return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); |
4934 | } |
4935 | |
4936 | bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
4937 | bool hasSymbolicDisplacement) { |
4938 | |
4939 | if (!isInt<32>(Offset)) |
4940 | return false; |
4941 | |
4942 | |
4943 | |
4944 | if (!hasSymbolicDisplacement) |
4945 | return true; |
4946 | |
4947 | |
4948 | if (M != CodeModel::Small && M != CodeModel::Kernel) |
4949 | return false; |
4950 | |
4951 | |
4952 | |
4953 | |
4954 | if (M == CodeModel::Small && Offset < 16*1024*1024) |
4955 | return true; |
4956 | |
4957 | |
4958 | |
4959 | |
4960 | if (M == CodeModel::Kernel && Offset >= 0) |
4961 | return true; |
4962 | |
4963 | return false; |
4964 | } |
4965 | |
4966 | |
4967 | |
4968 | bool X86::isCalleePop(CallingConv::ID CallingConv, |
4969 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { |
4970 | |
4971 | |
4972 | if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) |
4973 | return true; |
4974 | |
4975 | switch (CallingConv) { |
4976 | default: |
4977 | return false; |
4978 | case CallingConv::X86_StdCall: |
4979 | case CallingConv::X86_FastCall: |
4980 | case CallingConv::X86_ThisCall: |
4981 | case CallingConv::X86_VectorCall: |
4982 | return !is64Bit; |
4983 | } |
4984 | } |
4985 | |
4986 | |
4987 | static bool isX86CCSigned(unsigned X86CC) { |
4988 | switch (X86CC) { |
4989 | default: |
4990 | llvm_unreachable("Invalid integer condition!"); |
4991 | case X86::COND_E: |
4992 | case X86::COND_NE: |
4993 | case X86::COND_B: |
4994 | case X86::COND_A: |
4995 | case X86::COND_BE: |
4996 | case X86::COND_AE: |
4997 | return false; |
4998 | case X86::COND_G: |
4999 | case X86::COND_GE: |
5000 | case X86::COND_L: |
5001 | case X86::COND_LE: |
5002 | return true; |
5003 | } |
5004 | } |
5005 | |
5006 | static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { |
5007 | switch (SetCCOpcode) { |
5008 | default: llvm_unreachable("Invalid integer condition!"); |
5009 | case ISD::SETEQ: return X86::COND_E; |
5010 | case ISD::SETGT: return X86::COND_G; |
5011 | case ISD::SETGE: return X86::COND_GE; |
5012 | case ISD::SETLT: return X86::COND_L; |
5013 | case ISD::SETLE: return X86::COND_LE; |
5014 | case ISD::SETNE: return X86::COND_NE; |
5015 | case ISD::SETULT: return X86::COND_B; |
5016 | case ISD::SETUGT: return X86::COND_A; |
5017 | case ISD::SETULE: return X86::COND_BE; |
5018 | case ISD::SETUGE: return X86::COND_AE; |
5019 | } |
5020 | } |
5021 | |
5022 | |
5023 | |
5024 | |
5025 | static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, |
5026 | bool isFP, SDValue &LHS, SDValue &RHS, |
5027 | SelectionDAG &DAG) { |
5028 | if (!isFP) { |
5029 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { |
5030 | if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { |
5031 | |
5032 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
5033 | return X86::COND_NS; |
5034 | } |
5035 | if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { |
5036 | |
5037 | return X86::COND_S; |
5038 | } |
5039 | if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) { |
5040 | |
5041 | return X86::COND_NS; |
5042 | } |
5043 | if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) { |
5044 | |
5045 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
5046 | return X86::COND_LE; |
5047 | } |
5048 | } |
5049 | |
5050 | return TranslateIntegerX86CC(SetCCOpcode); |
5051 | } |
5052 | |
5053 | |
5054 | |
5055 | |
5056 | if (ISD::isNON_EXTLoad(LHS.getNode()) && |
5057 | !ISD::isNON_EXTLoad(RHS.getNode())) { |
5058 | SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); |
5059 | std::swap(LHS, RHS); |
5060 | } |
5061 | |
5062 | switch (SetCCOpcode) { |
5063 | default: break; |
5064 | case ISD::SETOLT: |
5065 | case ISD::SETOLE: |
5066 | case ISD::SETUGT: |
5067 | case ISD::SETUGE: |
5068 | std::swap(LHS, RHS); |
5069 | break; |
5070 | } |
5071 | |
5072 | |
5073 | |
5074 | |
5075 | |
5076 | |
5077 | |
5078 | switch (SetCCOpcode) { |
5079 | default: llvm_unreachable("Condcode should be pre-legalized away"); |
5080 | case ISD::SETUEQ: |
5081 | case ISD::SETEQ: return X86::COND_E; |
5082 | case ISD::SETOLT: |
5083 | case ISD::SETOGT: |
5084 | case ISD::SETGT: return X86::COND_A; |
5085 | case ISD::SETOLE: |
5086 | case ISD::SETOGE: |
5087 | case ISD::SETGE: return X86::COND_AE; |
5088 | case ISD::SETUGT: |
5089 | case ISD::SETULT: |
5090 | case ISD::SETLT: return X86::COND_B; |
5091 | case ISD::SETUGE: |
5092 | case ISD::SETULE: |
5093 | case ISD::SETLE: return X86::COND_BE; |
5094 | case ISD::SETONE: |
5095 | case ISD::SETNE: return X86::COND_NE; |
5096 | case ISD::SETUO: return X86::COND_P; |
5097 | case ISD::SETO: return X86::COND_NP; |
5098 | case ISD::SETOEQ: |
5099 | case ISD::SETUNE: return X86::COND_INVALID; |
5100 | } |
5101 | } |
5102 | |
5103 | |
5104 | |
5105 | |
5106 | static bool hasFPCMov(unsigned X86CC) { |
5107 | switch (X86CC) { |
5108 | default: |
5109 | return false; |
5110 | case X86::COND_B: |
5111 | case X86::COND_BE: |
5112 | case X86::COND_E: |
5113 | case X86::COND_P: |
5114 | case X86::COND_A: |
5115 | case X86::COND_AE: |
5116 | case X86::COND_NE: |
5117 | case X86::COND_NP: |
5118 | return true; |
5119 | } |
5120 | } |
5121 | |
5122 | |
5123 | bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
5124 | const CallInst &I, |
5125 | MachineFunction &MF, |
5126 | unsigned Intrinsic) const { |
5127 | Info.flags = MachineMemOperand::MONone; |
5128 | Info.offset = 0; |
5129 | |
5130 | const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); |
5131 | if (!IntrData) { |
5132 | switch (Intrinsic) { |
5133 | case Intrinsic::x86_aesenc128kl: |
5134 | case Intrinsic::x86_aesdec128kl: |
5135 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5136 | Info.ptrVal = I.getArgOperand(1); |
5137 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); |
5138 | Info.align = Align(1); |
5139 | Info.flags |= MachineMemOperand::MOLoad; |
5140 | return true; |
5141 | case Intrinsic::x86_aesenc256kl: |
5142 | case Intrinsic::x86_aesdec256kl: |
5143 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5144 | Info.ptrVal = I.getArgOperand(1); |
5145 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); |
5146 | Info.align = Align(1); |
5147 | Info.flags |= MachineMemOperand::MOLoad; |
5148 | return true; |
5149 | case Intrinsic::x86_aesencwide128kl: |
5150 | case Intrinsic::x86_aesdecwide128kl: |
5151 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5152 | Info.ptrVal = I.getArgOperand(0); |
5153 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); |
5154 | Info.align = Align(1); |
5155 | Info.flags |= MachineMemOperand::MOLoad; |
5156 | return true; |
5157 | case Intrinsic::x86_aesencwide256kl: |
5158 | case Intrinsic::x86_aesdecwide256kl: |
5159 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5160 | Info.ptrVal = I.getArgOperand(0); |
5161 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); |
5162 | Info.align = Align(1); |
5163 | Info.flags |= MachineMemOperand::MOLoad; |
5164 | return true; |
5165 | } |
5166 | return false; |
5167 | } |
5168 | |
5169 | switch (IntrData->Type) { |
5170 | case TRUNCATE_TO_MEM_VI8: |
5171 | case TRUNCATE_TO_MEM_VI16: |
5172 | case TRUNCATE_TO_MEM_VI32: { |
5173 | Info.opc = ISD::INTRINSIC_VOID; |
5174 | Info.ptrVal = I.getArgOperand(0); |
5175 | MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); |
5176 | MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE; |
5177 | if (IntrData->Type == TRUNCATE_TO_MEM_VI8) |
5178 | ScalarVT = MVT::i8; |
5179 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI16) |
5180 | ScalarVT = MVT::i16; |
5181 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI32) |
5182 | ScalarVT = MVT::i32; |
5183 | |
5184 | Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); |
5185 | Info.align = Align(1); |
5186 | Info.flags |= MachineMemOperand::MOStore; |
5187 | break; |
5188 | } |
5189 | case GATHER: |
5190 | case GATHER_AVX2: { |
5191 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
5192 | Info.ptrVal = nullptr; |
5193 | MVT DataVT = MVT::getVT(I.getType()); |
5194 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
5195 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
5196 | IndexVT.getVectorNumElements()); |
5197 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
5198 | Info.align = Align(1); |
5199 | Info.flags |= MachineMemOperand::MOLoad; |
5200 | break; |
5201 | } |
5202 | case SCATTER: { |
5203 | Info.opc = ISD::INTRINSIC_VOID; |
5204 | Info.ptrVal = nullptr; |
5205 | MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType()); |
5206 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
5207 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
5208 | IndexVT.getVectorNumElements()); |
5209 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
5210 | Info.align = Align(1); |
5211 | Info.flags |= MachineMemOperand::MOStore; |
5212 | break; |
5213 | } |
5214 | default: |
5215 | return false; |
5216 | } |
5217 | |
5218 | return true; |
5219 | } |
5220 | |
5221 | |
5222 | |
5223 | |
5224 | bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
5225 | bool ForCodeSize) const { |
5226 | for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { |
5227 | if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) |
5228 | return true; |
5229 | } |
5230 | return false; |
5231 | } |
5232 | |
5233 | bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, |
5234 | ISD::LoadExtType ExtTy, |
5235 | EVT NewVT) const { |
5236 | assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"); |
5237 | |
5238 | |
5239 | |
5240 | SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr(); |
5241 | if (BasePtr.getOpcode() == X86ISD::WrapperRIP) |
5242 | if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) |
5243 | return GA->getTargetFlags() != X86II::MO_GOTTPOFF; |
5244 | |
5245 | |
5246 | |
5247 | |
5248 | EVT VT = Load->getValueType(0); |
5249 | if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) { |
5250 | for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) { |
5251 | |
5252 | if (UI.getUse().getResNo() != 0) |
5253 | continue; |
5254 | |
5255 | |
5256 | if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() || |
5257 | UI->use_begin()->getOpcode() != ISD::STORE) |
5258 | return true; |
5259 | } |
5260 | |
5261 | return false; |
5262 | } |
5263 | |
5264 | return true; |
5265 | } |
5266 | |
5267 | |
5268 | |
5269 | bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
5270 | Type *Ty) const { |
5271 | assert(Ty->isIntegerTy()); |
5272 | |
5273 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
5274 | if (BitSize == 0 || BitSize > 64) |
5275 | return false; |
5276 | return true; |
5277 | } |
5278 | |
5279 | bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
5280 | |
5281 | |
5282 | |
5283 | |
5284 | bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128; |
5285 | return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); |
5286 | } |
5287 | |
5288 | bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { |
5289 | |
5290 | |
5291 | if (VT.isVector() && Subtarget.hasAVX512()) |
5292 | return false; |
5293 | |
5294 | return true; |
5295 | } |
5296 | |
5297 | bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
5298 | SDValue C) const { |
5299 | |
5300 | |
5301 | APInt MulC; |
5302 | if (!ISD::isConstantSplatVector(C.getNode(), MulC)) |
5303 | return false; |
5304 | |
5305 | |
5306 | |
5307 | |
5308 | |
5309 | |
5310 | |
5311 | while (getTypeAction(Context, VT) != TypeLegal) |
5312 | VT = getTypeToTransformTo(Context, VT); |
5313 | |
5314 | |
5315 | |
5316 | |
5317 | |
5318 | if (isOperationLegal(ISD::MUL, VT)) |
5319 | return false; |
5320 | |
5321 | |
5322 | return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() || |
5323 | (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); |
5324 | } |
5325 | |
5326 | bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
5327 | unsigned Index) const { |
5328 | if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) |
5329 | return false; |
5330 | |
5331 | |
5332 | |
5333 | if (ResVT.getVectorElementType() == MVT::i1) |
5334 | return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) && |
5335 | (Index == ResVT.getVectorNumElements())); |
5336 | |
5337 | return (Index % ResVT.getVectorNumElements()) == 0; |
5338 | } |
5339 | |
5340 | bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
5341 | unsigned Opc = VecOp.getOpcode(); |
5342 | |
5343 | |
5344 | |
5345 | if (Opc >= ISD::BUILTIN_OP_END) |
5346 | return false; |
5347 | |
5348 | |
5349 | EVT VecVT = VecOp.getValueType(); |
5350 | if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) |
5351 | return true; |
5352 | |
5353 | |
5354 | |
5355 | EVT ScalarVT = VecVT.getScalarType(); |
5356 | return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); |
5357 | } |
5358 | |
5359 | bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT, |
5360 | bool) const { |
5361 | |
5362 | if (VT.isVector()) |
5363 | return false; |
5364 | return VT.isSimple() || !isOperationExpand(Opcode, VT); |
5365 | } |
5366 | |
5367 | bool X86TargetLowering::isCheapToSpeculateCttz() const { |
5368 | |
5369 | return Subtarget.hasBMI(); |
5370 | } |
5371 | |
5372 | bool X86TargetLowering::isCheapToSpeculateCtlz() const { |
5373 | |
5374 | return Subtarget.hasLZCNT(); |
5375 | } |
5376 | |
5377 | bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
5378 | const SelectionDAG &DAG, |
5379 | const MachineMemOperand &MMO) const { |
5380 | if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && |
5381 | BitcastVT.getVectorElementType() == MVT::i1) |
5382 | return false; |
5383 | |
5384 | if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) |
5385 | return false; |
5386 | |
5387 | |
5388 | if (LoadVT.isVector() && BitcastVT.isVector() && |
5389 | isTypeLegal(LoadVT) && isTypeLegal(BitcastVT)) |
5390 | return true; |
5391 | |
5392 | return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO); |
5393 | } |
5394 | |
5395 | bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
5396 | const SelectionDAG &DAG) const { |
5397 | |
5398 | |
5399 | bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( |
5400 | Attribute::NoImplicitFloat); |
5401 | |
5402 | if (NoFloat) { |
5403 | unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; |
5404 | return (MemVT.getSizeInBits() <= MaxIntSize); |
5405 | } |
5406 | |
5407 | |
5408 | if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth()) |
5409 | return false; |
5410 | |
5411 | return true; |
5412 | } |
5413 | |
5414 | bool X86TargetLowering::isCtlzFast() const { |
5415 | return Subtarget.hasFastLZCNT(); |
5416 | } |
5417 | |
5418 | bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( |
5419 | const Instruction &AndI) const { |
5420 | return true; |
5421 | } |
5422 | |
5423 | bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { |
5424 | EVT VT = Y.getValueType(); |
5425 | |
5426 | if (VT.isVector()) |
5427 | return false; |
5428 | |
5429 | if (!Subtarget.hasBMI()) |
5430 | return false; |
5431 | |
5432 | |
5433 | if (VT != MVT::i32 && VT != MVT::i64) |
5434 | return false; |
5435 | |
5436 | return !isa<ConstantSDNode>(Y); |
5437 | } |
5438 | |
5439 | bool X86TargetLowering::hasAndNot(SDValue Y) const { |
5440 | EVT VT = Y.getValueType(); |
5441 | |
5442 | if (!VT.isVector()) |
5443 | return hasAndNotCompare(Y); |
5444 | |
5445 | |
5446 | |
5447 | if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) |
5448 | return false; |
5449 | |
5450 | if (VT == MVT::v4i32) |
5451 | return true; |
5452 | |
5453 | return Subtarget.hasSSE2(); |
5454 | } |
5455 | |
5456 | bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
5457 | return X.getValueType().isScalarInteger(); |
5458 | } |
5459 | |
5460 | bool X86TargetLowering:: |
5461 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
5462 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
5463 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
5464 | SelectionDAG &DAG) const { |
5465 | |
5466 | if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
5467 | X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) |
5468 | return false; |
5469 | |
5470 | if (X.getValueType().isScalarInteger()) |
5471 | return true; |
5472 | |
5473 | |
5474 | if (DAG.isSplatValue(Y, true)) |
5475 | return true; |
5476 | |
5477 | if (Subtarget.hasAVX2()) |
5478 | return true; |
5479 | |
5480 | return NewShiftOpcode == ISD::SHL; |
5481 | } |
5482 | |
5483 | bool X86TargetLowering::shouldFoldConstantShiftPairToMask( |
5484 | const SDNode *N, CombineLevel Level) const { |
5485 | assert(((N->getOpcode() == ISD::SHL && |
5486 | N->getOperand(0).getOpcode() == ISD::SRL) || |
5487 | (N->getOpcode() == ISD::SRL && |
5488 | N->getOperand(0).getOpcode() == ISD::SHL)) && |
5489 | "Expected shift-shift mask"); |
5490 | EVT VT = N->getValueType(0); |
5491 | if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || |
5492 | (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { |
5493 | |
5494 | |
5495 | |
5496 | return N->getOperand(1) == N->getOperand(0).getOperand(1); |
5497 | } |
5498 | return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); |
5499 | } |
5500 | |
5501 | bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { |
5502 | EVT VT = Y.getValueType(); |
5503 | |
5504 | |
5505 | if (VT.isVector()) |
5506 | return false; |
5507 | |
5508 | |
5509 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
5510 | return false; |
5511 | |
5512 | return true; |
5513 | } |
5514 | |
5515 | bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, |
5516 | SDNode *N) const { |
5517 | if (DAG.getMachineFunction().getFunction().hasMinSize() && |
5518 | !Subtarget.isOSWindows()) |
5519 | return false; |
5520 | return true; |
5521 | } |
5522 | |
5523 | bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { |
5524 | |
5525 | |
5526 | return isTypeLegal(VT); |
5527 | } |
5528 | |
5529 | MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { |
5530 | MVT VT = MVT::getIntegerVT(NumBits); |
5531 | if (isTypeLegal(VT)) |
5532 | return VT; |
5533 | |
5534 | |
5535 | if (NumBits == 128 && isTypeLegal(MVT::v16i8)) |
5536 | return MVT::v16i8; |
5537 | |
5538 | |
5539 | if (NumBits == 256 && isTypeLegal(MVT::v32i8)) |
5540 | return MVT::v32i8; |
5541 | |
5542 | |
5543 | |
5544 | |
5545 | |
5546 | return MVT::INVALID_SIMPLE_VALUE_TYPE; |
5547 | } |
5548 | |
5549 | |
5550 | static bool isUndefOrEqual(int Val, int CmpVal) { |
5551 | return ((Val == SM_SentinelUndef) || (Val == CmpVal)); |
5552 | } |
5553 | |
5554 | |
5555 | |
5556 | static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) { |
5557 | return llvm::all_of(Mask, [CmpVal](int M) { |
5558 | return (M == SM_SentinelUndef) || (M == CmpVal); |
5559 | }); |
5560 | } |
5561 | |
5562 | |
5563 | static bool isUndefOrZero(int Val) { |
5564 | return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero)); |
5565 | } |
5566 | |
5567 | |
5568 | |
5569 | static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) { |
5570 | return llvm::all_of(Mask.slice(Pos, Size), |
5571 | [](int M) { return M == SM_SentinelUndef; }); |
5572 | } |
5573 | |
5574 | |
5575 | static bool isUndefLowerHalf(ArrayRef<int> Mask) { |
5576 | unsigned NumElts = Mask.size(); |
5577 | return isUndefInRange(Mask, 0, NumElts / 2); |
5578 | } |
5579 | |
5580 | |
5581 | static bool isUndefUpperHalf(ArrayRef<int> Mask) { |
5582 | unsigned NumElts = Mask.size(); |
5583 | return isUndefInRange(Mask, NumElts / 2, NumElts / 2); |
5584 | } |
5585 | |
5586 | |
5587 | static bool isInRange(int Val, int Low, int Hi) { |
5588 | return (Val >= Low && Val < Hi); |
5589 | } |
5590 | |
5591 | |
5592 | |
5593 | static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5594 | return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); |
5595 | } |
5596 | |
5597 | |
5598 | static bool isAnyZero(ArrayRef<int> Mask) { |
5599 | return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); |
5600 | } |
5601 | |
5602 | |
5603 | |
5604 | static bool isAnyZeroOrUndef(ArrayRef<int> Mask) { |
5605 | return llvm::any_of(Mask, [](int M) { |
5606 | return M == SM_SentinelZero || M == SM_SentinelUndef; |
5607 | }); |
5608 | } |
5609 | |
5610 | |
5611 | |
5612 | static bool isUndefOrInRange(int Val, int Low, int Hi) { |
5613 | return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi); |
5614 | } |
5615 | |
5616 | |
5617 | |
5618 | static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5619 | return llvm::all_of( |
5620 | Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); |
5621 | } |
5622 | |
5623 | |
5624 | |
5625 | static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { |
5626 | return isUndefOrZero(Val) || isInRange(Val, Low, Hi); |
5627 | } |
5628 | |
5629 | |
5630 | |
5631 | static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
5632 | return llvm::all_of( |
5633 | Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); |
5634 | } |
5635 | |
5636 | |
5637 | |
5638 | |
5639 | static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos, |
5640 | unsigned Size, int Low, int Step = 1) { |
5641 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
5642 | if (!isUndefOrEqual(Mask[i], Low)) |
5643 | return false; |
5644 | return true; |
5645 | } |
5646 | |
5647 | |
5648 | |
5649 | |
5650 | static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
5651 | unsigned Size, int Low, |
5652 | int Step = 1) { |
5653 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
5654 | if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) |
5655 | return false; |
5656 | return true; |
5657 | } |
5658 | |
5659 | |
5660 | |
5661 | static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
5662 | unsigned Size) { |
5663 | return llvm::all_of(Mask.slice(Pos, Size), |
5664 | [](int M) { return isUndefOrZero(M); }); |
5665 | } |
5666 | |
5667 | |
5668 | |
5669 | |
5670 | |
5671 | |
5672 | |
5673 | |
5674 | |
5675 | |
5676 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
5677 | SmallVectorImpl<int> &WidenedMask) { |
5678 | WidenedMask.assign(Mask.size() / 2, 0); |
5679 | for (int i = 0, Size = Mask.size(); i < Size; i += 2) { |
5680 | int M0 = Mask[i]; |
5681 | int M1 = Mask[i + 1]; |
5682 | |
5683 | |
5684 | if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) { |
5685 | WidenedMask[i / 2] = SM_SentinelUndef; |
5686 | continue; |
5687 | } |
5688 | |
5689 | |
5690 | |
5691 | if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) { |
5692 | WidenedMask[i / 2] = M1 / 2; |
5693 | continue; |
5694 | } |
5695 | if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) { |
5696 | WidenedMask[i / 2] = M0 / 2; |
5697 | continue; |
5698 | } |
5699 | |
5700 | |
5701 | if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) { |
5702 | if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) && |
5703 | (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) { |
5704 | WidenedMask[i / 2] = SM_SentinelZero; |
5705 | continue; |
5706 | } |
5707 | return false; |
5708 | } |
5709 | |
5710 | |
5711 | |
5712 | if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) { |
5713 | WidenedMask[i / 2] = M0 / 2; |
5714 | continue; |
5715 | } |
5716 | |
5717 | |
5718 | return false; |
5719 | } |
5720 | assert(WidenedMask.size() == Mask.size() / 2 && |
5721 | "Incorrect size of mask after widening the elements!"); |
5722 | |
5723 | return true; |
5724 | } |
5725 | |
5726 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
5727 | const APInt &Zeroable, |
5728 | bool V2IsZero, |
5729 | SmallVectorImpl<int> &WidenedMask) { |
5730 | |
5731 | |
5732 | SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end()); |
5733 | if (V2IsZero) { |
5734 | assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); |
5735 | for (int i = 0, Size = Mask.size(); i != Size; ++i) |
5736 | if (Mask[i] != SM_SentinelUndef && Zeroable[i]) |
5737 | ZeroableMask[i] = SM_SentinelZero; |
5738 | } |
5739 | return canWidenShuffleElements(ZeroableMask, WidenedMask); |
5740 | } |
5741 | |
5742 | static bool canWidenShuffleElements(ArrayRef<int> Mask) { |
5743 | SmallVector<int, 32> WidenedMask; |
5744 | return canWidenShuffleElements(Mask, WidenedMask); |
5745 | } |
5746 | |
5747 | |
5748 | |
5749 | static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts, |
5750 | SmallVectorImpl<int> &ScaledMask) { |
5751 | unsigned NumSrcElts = Mask.size(); |
5752 | assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && |
5753 | "Illegal shuffle scale factor"); |
5754 | |
5755 | |
5756 | if (NumDstElts >= NumSrcElts) { |
5757 | int Scale = NumDstElts / NumSrcElts; |
5758 | llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask); |
5759 | return true; |
5760 | } |
5761 | |
5762 | |
5763 | |
5764 | if (canWidenShuffleElements(Mask, ScaledMask)) { |
5765 | while (ScaledMask.size() > NumDstElts) { |
5766 | SmallVector<int, 16> WidenedMask; |
5767 | if (!canWidenShuffleElements(ScaledMask, WidenedMask)) |
5768 | return false; |
5769 | ScaledMask = std::move(WidenedMask); |
5770 | } |
5771 | return true; |
5772 | } |
5773 | |
5774 | return false; |
5775 | } |
5776 | |
5777 | |
5778 | bool X86::isZeroNode(SDValue Elt) { |
5779 | return isNullConstant(Elt) || isNullFPConstant(Elt); |
5780 | } |
5781 | |
5782 | |
5783 | |
5784 | |
5785 | static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG, |
5786 | const SDLoc &dl, bool IsMask = false) { |
5787 | |
5788 | SmallVector<SDValue, 32> Ops; |
5789 | bool Split = false; |
5790 | |
5791 | MVT ConstVecVT = VT; |
5792 | unsigned NumElts = VT.getVectorNumElements(); |
5793 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
5794 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
5795 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
5796 | Split = true; |
5797 | } |
5798 | |
5799 | MVT EltVT = ConstVecVT.getVectorElementType(); |
5800 | for (unsigned i = 0; i < NumElts; ++i) { |
5801 | bool IsUndef = Values[i] < 0 && IsMask; |
5802 | SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : |
5803 | DAG.getConstant(Values[i], dl, EltVT); |
5804 | Ops.push_back(OpNode); |
5805 | if (Split) |
5806 | Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : |
5807 | DAG.getConstant(0, dl, EltVT)); |
5808 | } |
5809 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
5810 | if (Split) |
5811 | ConstsNode = DAG.getBitcast(VT, ConstsNode); |
5812 | return ConstsNode; |
5813 | } |
5814 | |
5815 | static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs, |
5816 | MVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
5817 | assert(Bits.size() == Undefs.getBitWidth() && |
5818 | "Unequal constant and undef arrays"); |
5819 | SmallVector<SDValue, 32> Ops; |
5820 | bool Split = false; |
5821 | |
5822 | MVT ConstVecVT = VT; |
5823 | unsigned NumElts = VT.getVectorNumElements(); |
5824 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
5825 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
5826 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
5827 | Split = true; |
5828 | } |
5829 | |
5830 | MVT EltVT = ConstVecVT.getVectorElementType(); |
5831 | for (unsigned i = 0, e = Bits.size(); i != e; ++i) { |
5832 | if (Undefs[i]) { |
5833 | Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT)); |
5834 | continue; |
5835 | } |
5836 | const APInt &V = Bits[i]; |
5837 | assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"); |
5838 | if (Split) { |
5839 | Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT)); |
5840 | Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT)); |
5841 | } else if (EltVT == MVT::f32) { |
5842 | APFloat FV(APFloat::IEEEsingle(), V); |
5843 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
5844 | } else if (EltVT == MVT::f64) { |
5845 | APFloat FV(APFloat::IEEEdouble(), V); |
5846 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
5847 | } else { |
5848 | Ops.push_back(DAG.getConstant(V, dl, EltVT)); |
5849 | } |
5850 | } |
5851 | |
5852 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
5853 | return DAG.getBitcast(VT, ConstsNode); |
5854 | } |
5855 | |
5856 | |
5857 | static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, |
5858 | SelectionDAG &DAG, const SDLoc &dl) { |
5859 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || |
5860 | VT.getVectorElementType() == MVT::i1) && |
5861 | "Unexpected vector type"); |
5862 | |
5863 | |
5864 | |
5865 | |
5866 | SDValue Vec; |
5867 | if (!Subtarget.hasSSE2() && VT.is128BitVector()) { |
5868 | Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); |
5869 | } else if (VT.isFloatingPoint()) { |
5870 | Vec = DAG.getConstantFP(+0.0, dl, VT); |
5871 | } else if (VT.getVectorElementType() == MVT::i1) { |
5872 | assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && |
5873 | "Unexpected vector type"); |
5874 | Vec = DAG.getConstant(0, dl, VT); |
5875 | } else { |
5876 | unsigned Num32BitElts = VT.getSizeInBits() / 32; |
5877 | Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts)); |
5878 | } |
5879 | return DAG.getBitcast(VT, Vec); |
5880 | } |
5881 | |
5882 | static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, |
5883 | const SDLoc &dl, unsigned vectorWidth) { |
5884 | EVT VT = Vec.getValueType(); |
5885 | EVT ElVT = VT.getVectorElementType(); |
5886 | unsigned Factor = VT.getSizeInBits() / vectorWidth; |
5887 | EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, |
5888 | VT.getVectorNumElements() / Factor); |
5889 | |
5890 | |
5891 | unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); |
5892 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
5893 | |
5894 | |
5895 | |
5896 | IdxVal &= ~(ElemsPerChunk - 1); |
5897 | |
5898 | |
5899 | if (Vec.getOpcode() == ISD::BUILD_VECTOR) |
5900 | return DAG.getBuildVector(ResultVT, dl, |
5901 | Vec->ops().slice(IdxVal, ElemsPerChunk)); |
5902 | |
5903 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
5904 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); |
5905 | } |
5906 | |
5907 | |
5908 | |
5909 | |
5910 | |
5911 | |
5912 | |
5913 | static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, |
5914 | SelectionDAG &DAG, const SDLoc &dl) { |
5915 | assert((Vec.getValueType().is256BitVector() || |
5916 | Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); |
5917 | return extractSubVector(Vec, IdxVal, DAG, dl, 128); |
5918 | } |
5919 | |
5920 | |
5921 | static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, |
5922 | SelectionDAG &DAG, const SDLoc &dl) { |
5923 | assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); |
5924 | return extractSubVector(Vec, IdxVal, DAG, dl, 256); |
5925 | } |
5926 | |
5927 | static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
5928 | SelectionDAG &DAG, const SDLoc &dl, |
5929 | unsigned vectorWidth) { |
5930 | assert((vectorWidth == 128 || vectorWidth == 256) && |
5931 | "Unsupported vector width"); |
5932 | |
5933 | if (Vec.isUndef()) |
5934 | return Result; |
5935 | EVT VT = Vec.getValueType(); |
5936 | EVT ElVT = VT.getVectorElementType(); |
5937 | EVT ResultVT = Result.getValueType(); |
5938 | |
5939 | |
5940 | unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); |
5941 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
5942 | |
5943 | |
5944 | |
5945 | IdxVal &= ~(ElemsPerChunk - 1); |
5946 | |
5947 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
5948 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); |
5949 | } |
5950 | |
5951 | |
5952 | |
5953 | |
5954 | |
5955 | |
5956 | |
5957 | static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
5958 | SelectionDAG &DAG, const SDLoc &dl) { |
5959 | assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); |
5960 | return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); |
5961 | } |
5962 | |
5963 | |
5964 | |
5965 | static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, |
5966 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
5967 | const SDLoc &dl) { |
5968 | assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && |
5969 | Vec.getValueType().getScalarType() == VT.getScalarType() && |
5970 | "Unsupported vector widening type"); |
5971 | SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) |
5972 | : DAG.getUNDEF(VT); |
5973 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, |
5974 | DAG.getIntPtrConstant(0, dl)); |
5975 | } |
5976 | |
5977 | |
5978 | |
5979 | static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements, |
5980 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
5981 | const SDLoc &dl, unsigned WideSizeInBits) { |
5982 | assert(Vec.getValueSizeInBits() < WideSizeInBits && |
5983 | (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && |
5984 | "Unsupported vector widening type"); |
5985 | unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits(); |
5986 | MVT SVT = Vec.getSimpleValueType().getScalarType(); |
5987 | MVT VT = MVT::getVectorVT(SVT, WideNumElts); |
5988 | return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl); |
5989 | } |
5990 | |
5991 | |
5992 | |
5993 | |
5994 | static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) { |
5995 | assert(Ops.empty() && "Expected an empty ops vector"); |
5996 | |
5997 | if (N->getOpcode() == ISD::CONCAT_VECTORS) { |
5998 | Ops.append(N->op_begin(), N->op_end()); |
5999 | return true; |
6000 | } |
6001 | |
6002 | if (N->getOpcode() == ISD::INSERT_SUBVECTOR) { |
6003 | SDValue Src = N->getOperand(0); |
6004 | SDValue Sub = N->getOperand(1); |
6005 | const APInt &Idx = N->getConstantOperandAPInt(2); |
6006 | EVT VT = Src.getValueType(); |
6007 | EVT SubVT = Sub.getValueType(); |
6008 | |
6009 | |
6010 | if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && |
6011 | Idx == (VT.getVectorNumElements() / 2)) { |
6012 | |
6013 | if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
6014 | Src.getOperand(1).getValueType() == SubVT && |
6015 | isNullConstant(Src.getOperand(2))) { |
6016 | Ops.push_back(Src.getOperand(1)); |
6017 | Ops.push_back(Sub); |
6018 | return true; |
6019 | } |
6020 | |
6021 | if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
6022 | Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) { |
6023 | Ops.append(2, Sub); |
6024 | return true; |
6025 | } |
6026 | } |
6027 | } |
6028 | |
6029 | return false; |
6030 | } |
6031 | |
6032 | static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG, |
6033 | const SDLoc &dl) { |
6034 | EVT VT = Op.getValueType(); |
6035 | unsigned NumElems = VT.getVectorNumElements(); |
6036 | unsigned SizeInBits = VT.getSizeInBits(); |
6037 | assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 && |
6038 | "Can't split odd sized vector"); |
6039 | |
6040 | SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2); |
6041 | SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2); |
6042 | return std::make_pair(Lo, Hi); |
6043 | } |
6044 | |
6045 | |
6046 | static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) { |
6047 | EVT VT = Op.getValueType(); |
6048 | |
6049 | |
6050 | |
6051 | assert((Op.getOperand(0).getValueType().is256BitVector() || |
6052 | Op.getOperand(0).getValueType().is512BitVector()) && |
6053 | (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); |
6054 | assert(Op.getOperand(0).getValueType().getVectorNumElements() == |
6055 | VT.getVectorNumElements() && |
6056 | "Unexpected VTs!"); |
6057 | |
6058 | SDLoc dl(Op); |
6059 | |
6060 | |
6061 | SDValue Lo, Hi; |
6062 | std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl); |
6063 | |
6064 | EVT LoVT, HiVT; |
6065 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
6066 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
6067 | DAG.getNode(Op.getOpcode(), dl, LoVT, Lo), |
6068 | DAG.getNode(Op.getOpcode(), dl, HiVT, Hi)); |
6069 | } |
6070 | |
6071 | |
6072 | |
6073 | static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) { |
6074 | EVT VT = Op.getValueType(); |
6075 | |
6076 | |
6077 | assert(Op.getOperand(0).getValueType() == VT && |
6078 | Op.getOperand(1).getValueType() == VT && "Unexpected VTs!"); |
6079 | assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); |
6080 | |
6081 | SDLoc dl(Op); |
6082 | |
6083 | |
6084 | SDValue LHS1, LHS2; |
6085 | std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl); |
6086 | |
6087 | |
6088 | SDValue RHS1, RHS2; |
6089 | std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl); |
6090 | |
6091 | EVT LoVT, HiVT; |
6092 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
6093 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
6094 | DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1), |
6095 | DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2)); |
6096 | } |
6097 | |
6098 | |
6099 | |
6100 | |
6101 | |
6102 | |
6103 | |
6104 | |
6105 | template <typename F> |
6106 | SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, |
6107 | const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, |
6108 | F Builder, bool CheckBWI = true) { |
6109 | assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); |
6110 | unsigned NumSubs = 1; |
6111 | if ((CheckBWI && Subtarget.useBWIRegs()) || |
6112 | (!CheckBWI && Subtarget.useAVX512Regs())) { |
6113 | if (VT.getSizeInBits() > 512) { |
6114 | NumSubs = VT.getSizeInBits() / 512; |
6115 | assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); |
6116 | } |
6117 | } else if (Subtarget.hasAVX2()) { |
6118 | if (VT.getSizeInBits() > 256) { |
6119 | NumSubs = VT.getSizeInBits() / 256; |
6120 | assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"); |
6121 | } |
6122 | } else { |
6123 | if (VT.getSizeInBits() > 128) { |
6124 | NumSubs = VT.getSizeInBits() / 128; |
6125 | assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"); |
6126 | } |
6127 | } |
6128 | |
6129 | if (NumSubs == 1) |
6130 | return Builder(DAG, DL, Ops); |
6131 | |
6132 | SmallVector<SDValue, 4> Subs; |
6133 | for (unsigned i = 0; i != NumSubs; ++i) { |
6134 | SmallVector<SDValue, 2> SubOps; |
6135 | for (SDValue Op : Ops) { |
6136 | EVT OpVT = Op.getValueType(); |
6137 | unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs; |
6138 | unsigned SizeSub = OpVT.getSizeInBits() / NumSubs; |
6139 | SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub)); |
6140 | } |
6141 | Subs.push_back(Builder(DAG, DL, SubOps)); |
6142 | } |
6143 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
6144 | } |
6145 | |
6146 | |
6147 | static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, |
6148 | const X86Subtarget &Subtarget) { |
6149 | |
6150 | SDLoc dl(Op); |
6151 | SDValue Vec = Op.getOperand(0); |
6152 | SDValue SubVec = Op.getOperand(1); |
6153 | SDValue Idx = Op.getOperand(2); |
6154 | unsigned IdxVal = Op.getConstantOperandVal(2); |
6155 | |
6156 | |
6157 | if (SubVec.isUndef()) |
6158 | return Vec; |
6159 | |
6160 | if (IdxVal == 0 && Vec.isUndef()) |
6161 | return Op; |
6162 | |
6163 | MVT OpVT = Op.getSimpleValueType(); |
6164 | unsigned NumElems = OpVT.getVectorNumElements(); |
6165 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
6166 | |
6167 | |
6168 | MVT WideOpVT = OpVT; |
6169 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
6170 | WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
6171 | |
6172 | |
6173 | |
6174 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { |
6175 | |
6176 | Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6177 | DAG.getConstant(0, dl, WideOpVT), |
6178 | SubVec, Idx); |
6179 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6180 | } |
6181 | |
6182 | MVT SubVecVT = SubVec.getSimpleValueType(); |
6183 | unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); |
6184 | assert(IdxVal + SubVecNumElems <= NumElems && |
6185 | IdxVal % SubVecVT.getSizeInBits() == 0 && |
6186 | "Unexpected index value in INSERT_SUBVECTOR"); |
6187 | |
6188 | SDValue Undef = DAG.getUNDEF(WideOpVT); |
6189 | |
6190 | if (IdxVal == 0) { |
6191 | |
6192 | SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); |
6193 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, |
6194 | ZeroIdx); |
6195 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
6196 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
6197 | |
6198 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6199 | DAG.getConstant(0, dl, WideOpVT), |
6200 | SubVec, ZeroIdx); |
6201 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
6202 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6203 | } |
6204 | |
6205 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6206 | Undef, SubVec, ZeroIdx); |
6207 | |
6208 | if (Vec.isUndef()) { |
6209 | assert(IdxVal != 0 && "Unexpected index"); |
6210 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6211 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
6212 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
6213 | } |
6214 | |
6215 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
6216 | assert(IdxVal != 0 && "Unexpected index"); |
6217 | NumElems = WideOpVT.getVectorNumElements(); |
6218 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
6219 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
6220 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6221 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
6222 | if (ShiftRight != 0) |
6223 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
6224 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
6225 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
6226 | } |
6227 | |
6228 | |
6229 | if (IdxVal + SubVecNumElems == NumElems) { |
6230 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6231 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
6232 | if (SubVecNumElems * 2 == NumElems) { |
6233 | |
6234 | |
6235 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); |
6236 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6237 | DAG.getConstant(0, dl, WideOpVT), |
6238 | Vec, ZeroIdx); |
6239 | } else { |
6240 | |
6241 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
6242 | Undef, Vec, ZeroIdx); |
6243 | NumElems = WideOpVT.getVectorNumElements(); |
6244 | SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); |
6245 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
6246 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
6247 | } |
6248 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
6249 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6250 | } |
6251 | |
6252 | |
6253 | |
6254 | NumElems = WideOpVT.getVectorNumElements(); |
6255 | |
6256 | |
6257 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); |
6258 | |
6259 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
6260 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
6261 | |
6262 | |
6263 | if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) { |
6264 | APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems); |
6265 | Mask0.flipAllBits(); |
6266 | SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems)); |
6267 | SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0); |
6268 | Vec = DAG.getNode(ISD::AND, dl, WideOpVT, Vec, VMask0); |
6269 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6270 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
6271 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
6272 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
6273 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
6274 | |
6275 | |
6276 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
6277 | } |
6278 | |
6279 | |
6280 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
6281 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
6282 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
6283 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
6284 | |
6285 | |
6286 | unsigned LowShift = NumElems - IdxVal; |
6287 | SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, |
6288 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
6289 | Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, |
6290 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
6291 | |
6292 | |
6293 | unsigned HighShift = IdxVal + SubVecNumElems; |
6294 | SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, |
6295 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
6296 | High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High, |
6297 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
6298 | |
6299 | |
6300 | Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); |
6301 | SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec); |
6302 | |
6303 | |
6304 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
6305 | } |
6306 | |
6307 | static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, |
6308 | const SDLoc &dl) { |
6309 | assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch"); |
6310 | EVT SubVT = V1.getValueType(); |
6311 | EVT SubSVT = SubVT.getScalarType(); |
6312 | unsigned SubNumElts = SubVT.getVectorNumElements(); |
6313 | unsigned SubVectorWidth = SubVT.getSizeInBits(); |
6314 | EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); |
6315 | SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); |
6316 | return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth); |
6317 | } |
6318 | |
6319 | |
6320 | |
6321 | |
6322 | static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
6323 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && |
6324 | "Expected a 128/256/512-bit vector type"); |
6325 | |
6326 | APInt Ones = APInt::getAllOnesValue(32); |
6327 | unsigned NumElts = VT.getSizeInBits() / 32; |
6328 | SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); |
6329 | return DAG.getBitcast(VT, Vec); |
6330 | } |
6331 | |
6332 | |
6333 | static unsigned getOpcode_EXTEND(unsigned Opcode) { |
6334 | switch (Opcode) { |
6335 | case ISD::ANY_EXTEND: |
6336 | case ISD::ANY_EXTEND_VECTOR_INREG: |
6337 | return ISD::ANY_EXTEND; |
6338 | case ISD::ZERO_EXTEND: |
6339 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
6340 | return ISD::ZERO_EXTEND; |
6341 | case ISD::SIGN_EXTEND: |
6342 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
6343 | return ISD::SIGN_EXTEND; |
6344 | } |
6345 | llvm_unreachable("Unknown opcode"); |
6346 | } |
6347 | |
6348 | |
6349 | static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) { |
6350 | switch (Opcode) { |
6351 | case ISD::ANY_EXTEND: |
6352 | case ISD::ANY_EXTEND_VECTOR_INREG: |
6353 | return ISD::ANY_EXTEND_VECTOR_INREG; |
6354 | case ISD::ZERO_EXTEND: |
6355 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
6356 | return ISD::ZERO_EXTEND_VECTOR_INREG; |
6357 | case ISD::SIGN_EXTEND: |
6358 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
6359 | return ISD::SIGN_EXTEND_VECTOR_INREG; |
6360 | } |
6361 | llvm_unreachable("Unknown opcode"); |
6362 | } |
6363 | |
6364 | static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT, |
6365 | SDValue In, SelectionDAG &DAG) { |
6366 | EVT InVT = In.getValueType(); |
6367 | assert(VT.isVector() && InVT.isVector() && "Expected vector VTs."); |
6368 | assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || |
6369 | ISD::ZERO_EXTEND == Opcode) && |
6370 | "Unknown extension opcode"); |
6371 | |
6372 | |
6373 | |
6374 | if (InVT.getSizeInBits() > 128) { |
6375 | assert(VT.getSizeInBits() == InVT.getSizeInBits() && |
6376 | "Expected VTs to be the same size!"); |
6377 | unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits(); |
6378 | In = extractSubVector(In, 0, DAG, DL, |
6379 | std::max(128U, (unsigned)VT.getSizeInBits() / Scale)); |
6380 | InVT = In.getValueType(); |
6381 | } |
6382 | |
6383 | if (VT.getVectorNumElements() != InVT.getVectorNumElements()) |
6384 | Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode); |
6385 | |
6386 | return DAG.getNode(Opcode, DL, VT, In); |
6387 | } |
6388 | |
6389 | |
6390 | |
6391 | |
6392 | static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { |
6393 | V = peekThroughBitcasts(V); |
6394 | if (V.getOpcode() == ISD::XOR && |
6395 | ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) |
6396 | return V.getOperand(0); |
6397 | if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
6398 | (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { |
6399 | if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { |
6400 | Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); |
6401 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), |
6402 | Not, V.getOperand(1)); |
6403 | } |
6404 | } |
6405 | SmallVector<SDValue, 2> CatOps; |
6406 | if (collectConcatOps(V.getNode(), CatOps)) { |
6407 | for (SDValue &CatOp : CatOps) { |
6408 | SDValue NotCat = IsNOT(CatOp, DAG); |
6409 | if (!NotCat) return SDValue(); |
6410 | CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); |
6411 | } |
6412 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); |
6413 | } |
6414 | return SDValue(); |
6415 | } |
6416 | |
6417 | void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, |
6418 | bool Lo, bool Unary) { |
6419 | assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 && |
6420 | "Illegal vector type to unpack"); |
6421 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
6422 | int NumElts = VT.getVectorNumElements(); |
6423 | int NumEltsInLane = 128 / VT.getScalarSizeInBits(); |
6424 | for (int i = 0; i < NumElts; ++i) { |
6425 | unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; |
6426 | int Pos = (i % NumEltsInLane) / 2 + LaneStart; |
6427 | Pos += (Unary ? 0 : NumElts * (i % 2)); |
6428 | Pos += (Lo ? 0 : NumEltsInLane / 2); |
6429 | Mask.push_back(Pos); |
6430 | } |
6431 | } |
6432 | |
6433 | |
6434 | |
6435 | |
6436 | |
6437 | void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, |
6438 | bool Lo) { |
6439 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
6440 | int NumElts = VT.getVectorNumElements(); |
6441 | for (int i = 0; i < NumElts; ++i) { |
6442 | int Pos = i / 2; |
6443 | Pos += (Lo ? 0 : NumElts / 2); |
6444 | Mask.push_back(Pos); |
6445 | } |
6446 | } |
6447 | |
6448 | |
6449 | static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT, |
6450 | SDValue V1, SDValue V2) { |
6451 | SmallVector<int, 8> Mask; |
6452 | createUnpackShuffleMask(VT, Mask, true, false); |
6453 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
6454 | } |
6455 | |
6456 | |
6457 | static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT, |
6458 | SDValue V1, SDValue V2) { |
6459 | SmallVector<int, 8> Mask; |
6460 | createUnpackShuffleMask(VT, Mask, false, false); |
6461 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
6462 | } |
6463 | |
6464 | |
6465 | |
6466 | |
6467 | |
6468 | static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, |
6469 | bool IsZero, |
6470 | const X86Subtarget &Subtarget, |
6471 | SelectionDAG &DAG) { |
6472 | MVT VT = V2.getSimpleValueType(); |
6473 | SDValue V1 = IsZero |
6474 | ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); |
6475 | int NumElems = VT.getVectorNumElements(); |
6476 | SmallVector<int, 16> MaskVec(NumElems); |
6477 | for (int i = 0; i != NumElems; ++i) |
6478 | |
6479 | MaskVec[i] = (i == Idx) ? NumElems : i; |
6480 | return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); |
6481 | } |
6482 | |
6483 | static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) { |
6484 | if (Ptr.getOpcode() == X86ISD::Wrapper || |
6485 | Ptr.getOpcode() == X86ISD::WrapperRIP) |
6486 | Ptr = Ptr.getOperand(0); |
6487 | |
6488 | auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); |
6489 | if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0) |
6490 | return nullptr; |
6491 | |
6492 | return CNode->getConstVal(); |
6493 | } |
6494 | |
6495 | static const Constant *getTargetConstantFromNode(LoadSDNode *Load) { |
6496 | if (!Load || !ISD::isNormalLoad(Load)) |
6497 | return nullptr; |
6498 | return getTargetConstantFromBasePtr(Load->getBasePtr()); |
6499 | } |
6500 | |
6501 | static const Constant *getTargetConstantFromNode(SDValue Op) { |
6502 | Op = peekThroughBitcasts(Op); |
6503 | return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)); |
6504 | } |
6505 | |
6506 | const Constant * |
6507 | X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const { |
6508 | assert(LD && "Unexpected null LoadSDNode"); |
6509 | return getTargetConstantFromNode(LD); |
6510 | } |
6511 | |
6512 | |
6513 | static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, |
6514 | APInt &UndefElts, |
6515 | SmallVectorImpl<APInt> &EltBits, |
6516 | bool AllowWholeUndefs = true, |
6517 | bool AllowPartialUndefs = true) { |
6518 | assert(EltBits.empty() && "Expected an empty EltBits vector"); |
6519 | |
6520 | Op = peekThroughBitcasts(Op); |
6521 | |
6522 | EVT VT = Op.getValueType(); |
6523 | unsigned SizeInBits = VT.getSizeInBits(); |
6524 | assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); |
6525 | unsigned NumElts = SizeInBits / EltSizeInBits; |
6526 | |
6527 | |
6528 | auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) { |
6529 | unsigned NumSrcElts = UndefSrcElts.getBitWidth(); |
6530 | unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); |
6531 | assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && |
6532 | "Constant bit sizes don't match"); |
6533 | |
6534 | |
6535 | bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; |
6536 | if (UndefSrcElts.getBoolValue() && !AllowUndefs) |
6537 | return false; |
6538 | |
6539 | |
6540 | if (NumSrcElts == NumElts) { |
6541 | UndefElts = UndefSrcElts; |
6542 | EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); |
6543 | return true; |
6544 | } |
6545 | |
6546 | |
6547 | APInt UndefBits(SizeInBits, 0); |
6548 | APInt MaskBits(SizeInBits, 0); |
6549 | |
6550 | for (unsigned i = 0; i != NumSrcElts; ++i) { |
6551 | unsigned BitOffset = i * SrcEltSizeInBits; |
6552 | if (UndefSrcElts[i]) |
6553 | UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); |
6554 | MaskBits.insertBits(SrcEltBits[i], BitOffset); |
6555 | } |
6556 | |
6557 | |
6558 | UndefElts = APInt(NumElts, 0); |
6559 | EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); |
6560 | |
6561 | for (unsigned i = 0; i != NumElts; ++i) { |
6562 | unsigned BitOffset = i * EltSizeInBits; |
6563 | APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset); |
6564 | |
6565 | |
6566 | if (UndefEltBits.isAllOnesValue()) { |
6567 | if (!AllowWholeUndefs) |
6568 | return false; |
6569 | UndefElts.setBit(i); |
6570 | continue; |
6571 | } |
6572 | |
6573 | |
6574 | |
6575 | if (UndefEltBits.getBoolValue() && !AllowPartialUndefs) |
6576 | return false; |
6577 | |
6578 | EltBits[i] = MaskBits.extractBits(EltSizeInBits, BitOffset); |
6579 | } |
6580 | return true; |
6581 | }; |
6582 | |
6583 | |
6584 | auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, |
6585 | unsigned UndefBitIndex) { |
6586 | if (!Cst) |
6587 | return false; |
6588 | if (isa<UndefValue>(Cst)) { |
6589 | Undefs.setBit(UndefBitIndex); |
6590 | return true; |
6591 | } |
6592 | if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { |
6593 | Mask = CInt->getValue(); |
6594 | return true; |
6595 | } |
6596 | if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { |
6597 | Mask = CFP->getValueAPF().bitcastToAPInt(); |
6598 | return true; |
6599 | } |
6600 | return false; |
6601 | }; |
6602 | |
6603 | |
6604 | if (Op.isUndef()) { |
6605 | APInt UndefSrcElts = APInt::getAllOnesValue(NumElts); |
6606 | SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0)); |
6607 | return CastBitData(UndefSrcElts, SrcEltBits); |
6608 | } |
6609 | |
6610 | |
6611 | if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) { |
6612 | APInt UndefSrcElts = APInt::getNullValue(1); |
6613 | SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue()); |
6614 | return CastBitData(UndefSrcElts, SrcEltBits); |
6615 | } |
6616 | if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) { |
6617 | APInt UndefSrcElts = APInt::getNullValue(1); |
6618 | APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); |
6619 | SmallVector<APInt, 64> SrcEltBits(1, RawBits); |
6620 | return CastBitData(UndefSrcElts, SrcEltBits); |
6621 | } |
6622 | |
6623 | |
6624 | if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { |
6625 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6626 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6627 | |
6628 | APInt UndefSrcElts(NumSrcElts, 0); |
6629 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
6630 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
6631 | const SDValue &Src = Op.getOperand(i); |
6632 | if (Src.isUndef()) { |
6633 | UndefSrcElts.setBit(i); |
6634 | continue; |
6635 | } |
6636 | auto *Cst = cast<ConstantSDNode>(Src); |
6637 | SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); |
6638 | } |
6639 | return CastBitData(UndefSrcElts, SrcEltBits); |
6640 | } |
6641 | if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) { |
6642 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6643 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6644 | |
6645 | APInt UndefSrcElts(NumSrcElts, 0); |
6646 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
6647 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
6648 | const SDValue &Src = Op.getOperand(i); |
6649 | if (Src.isUndef()) { |
6650 | UndefSrcElts.setBit(i); |
6651 | continue; |
6652 | } |
6653 | auto *Cst = cast<ConstantFPSDNode>(Src); |
6654 | APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); |
6655 | SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits); |
6656 | } |
6657 | return CastBitData(UndefSrcElts, SrcEltBits); |
6658 | } |
6659 | |
6660 | |
6661 | if (auto *Cst = getTargetConstantFromNode(Op)) { |
6662 | Type *CstTy = Cst->getType(); |
6663 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
6664 | if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0) |
6665 | return false; |
6666 | |
6667 | unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); |
6668 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6669 | |
6670 | APInt UndefSrcElts(NumSrcElts, 0); |
6671 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
6672 | for (unsigned i = 0; i != NumSrcElts; ++i) |
6673 | if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], |
6674 | UndefSrcElts, i)) |
6675 | return false; |
6676 | |
6677 | return CastBitData(UndefSrcElts, SrcEltBits); |
6678 | } |
6679 | |
6680 | |
6681 | if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD && |
6682 | EltSizeInBits <= VT.getScalarSizeInBits()) { |
6683 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
6684 | if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits()) |
6685 | return false; |
6686 | |
6687 | SDValue Ptr = MemIntr->getBasePtr(); |
6688 | if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) { |
6689 | unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits(); |
6690 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6691 | |
6692 | APInt UndefSrcElts(NumSrcElts, 0); |
6693 | SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); |
6694 | if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { |
6695 | if (UndefSrcElts[0]) |
6696 | UndefSrcElts.setBits(0, NumSrcElts); |
6697 | SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); |
6698 | return CastBitData(UndefSrcElts, SrcEltBits); |
6699 | } |
6700 | } |
6701 | } |
6702 | |
6703 | |
6704 | if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { |
6705 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
6706 | SDValue Ptr = MemIntr->getBasePtr(); |
6707 | |
6708 | |
6709 | if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) { |
6710 | Type *CstTy = Cst->getType(); |
6711 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
6712 | unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); |
6713 | if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || |
6714 | (SizeInBits % SubVecSizeInBits) != 0) |
6715 | return false; |
6716 | unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); |
6717 | unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits; |
6718 | unsigned NumSubVecs = SizeInBits / SubVecSizeInBits; |
6719 | APInt UndefSubElts(NumSubElts, 0); |
6720 | SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs, |
6721 | APInt(CstEltSizeInBits, 0)); |
6722 | for (unsigned i = 0; i != NumSubElts; ++i) { |
6723 | if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], |
6724 | UndefSubElts, i)) |
6725 | return false; |
6726 | for (unsigned j = 1; j != NumSubVecs; ++j) |
6727 | SubEltBits[i + (j * NumSubElts)] = SubEltBits[i]; |
6728 | } |
6729 | UndefSubElts = APInt::getSplat(NumSubVecs * UndefSubElts.getBitWidth(), |
6730 | UndefSubElts); |
6731 | return CastBitData(UndefSubElts, SubEltBits); |
6732 | } |
6733 | } |
6734 | |
6735 | |
6736 | if (Op.getOpcode() == X86ISD::VZEXT_MOVL && |
6737 | Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && |
6738 | isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) { |
6739 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6740 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
6741 | |
6742 | APInt UndefSrcElts(NumSrcElts, 0); |
6743 | SmallVector<APInt, 64> SrcEltBits; |
6744 | auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); |
6745 | SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); |
6746 | SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); |
6747 | return CastBitData(UndefSrcElts, SrcEltBits); |
6748 | } |
6749 | |
6750 | |
6751 | if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) { |
6752 | |
6753 | |
6754 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
6755 | bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits; |
6756 | |
6757 | APInt UndefSrcElts, UndefSubElts; |
6758 | SmallVector<APInt, 32> EltSrcBits, EltSubBits; |
6759 | if (getTargetConstantBitsFromNode(Op.getOperand(1), SrcEltSizeInBits, |
6760 | UndefSubElts, EltSubBits, |
6761 | AllowWholeUndefs && AllowUndefs, |
6762 | AllowPartialUndefs && AllowUndefs) && |
6763 | getTargetConstantBitsFromNode(Op.getOperand(0), SrcEltSizeInBits, |
6764 | UndefSrcElts, EltSrcBits, |
6765 | AllowWholeUndefs && AllowUndefs, |
6766 | AllowPartialUndefs && AllowUndefs)) { |
6767 | unsigned BaseIdx = Op.getConstantOperandVal(2); |
6768 | UndefSrcElts.insertBits(UndefSubElts, BaseIdx); |
6769 | for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i) |
6770 | EltSrcBits[BaseIdx + i] = EltSubBits[i]; |
6771 | return CastBitData(UndefSrcElts, EltSrcBits); |
6772 | } |
6773 | } |
6774 | |
6775 | |
6776 | if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
6777 | |
6778 | if (EltSizeInBits != VT.getScalarSizeInBits()) |
6779 | return false; |
6780 | |
6781 | if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, |
6782 | UndefElts, EltBits, AllowWholeUndefs, |
6783 | AllowPartialUndefs)) { |
6784 | EVT SrcVT = Op.getOperand(0).getValueType(); |
6785 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
6786 | unsigned NumSubElts = VT.getVectorNumElements(); |
6787 | unsigned BaseIdx = Op.getConstantOperandVal(1); |
6788 | UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx); |
6789 | if ((BaseIdx + NumSubElts) != NumSrcElts) |
6790 | EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end()); |
6791 | if (BaseIdx != 0) |
6792 | EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx); |
6793 | return true; |
6794 | } |
6795 | } |
6796 | |
6797 | |
6798 | if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) { |
6799 | |
6800 | if (EltSizeInBits != VT.getScalarSizeInBits()) |
6801 | return false; |
6802 | |
6803 | ArrayRef<int> Mask = SVN->getMask(); |
6804 | if ((!AllowWholeUndefs || !AllowPartialUndefs) && |
6805 | llvm::any_of(Mask, [](int M) { return M < 0; })) |
6806 | return false; |
6807 | |
6808 | APInt UndefElts0, UndefElts1; |
6809 | SmallVector<APInt, 32> EltBits0, EltBits1; |
6810 | if (isAnyInRange(Mask, 0, NumElts) && |
6811 | !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, |
6812 | UndefElts0, EltBits0, AllowWholeUndefs, |
6813 | AllowPartialUndefs)) |
6814 | return false; |
6815 | if (isAnyInRange(Mask, NumElts, 2 * NumElts) && |
6816 | !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits, |
6817 | UndefElts1, EltBits1, AllowWholeUndefs, |
6818 | AllowPartialUndefs)) |
6819 | return false; |
6820 | |
6821 | UndefElts = APInt::getNullValue(NumElts); |
6822 | for (int i = 0; i != (int)NumElts; ++i) { |
6823 | int M = Mask[i]; |
6824 | if (M < 0) { |
6825 | UndefElts.setBit(i); |
6826 | EltBits.push_back(APInt::getNullValue(EltSizeInBits)); |
6827 | } else if (M < (int)NumElts) { |
6828 | if (UndefElts0[M]) |
6829 | UndefElts.setBit(i); |
6830 | EltBits.push_back(EltBits0[M]); |
6831 | } else { |
6832 | if (UndefElts1[M - NumElts]) |
6833 | UndefElts.setBit(i); |
6834 | EltBits.push_back(EltBits1[M - NumElts]); |
6835 | } |
6836 | } |
6837 | return true; |
6838 | } |
6839 | |
6840 | return false; |
6841 | } |
6842 | |
6843 | namespace llvm { |
6844 | namespace X86 { |
6845 | bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) { |
6846 | APInt UndefElts; |
6847 | SmallVector<APInt, 16> EltBits; |
6848 | if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), |
6849 | UndefElts, EltBits, true, |
6850 | AllowPartialUndefs)) { |
6851 | int SplatIndex = -1; |
6852 | for (int i = 0, e = EltBits.size(); i != e; ++i) { |
6853 | if (UndefElts[i]) |
6854 | continue; |
6855 | if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) { |
6856 | SplatIndex = -1; |
6857 | break; |
6858 | } |
6859 | SplatIndex = i; |
6860 | } |
6861 | if (0 <= SplatIndex) { |
6862 | SplatVal = EltBits[SplatIndex]; |
6863 | return true; |
6864 | } |
6865 | } |
6866 | |
6867 | return false; |
6868 | } |
6869 | } |
6870 | } |
6871 | |
6872 | static bool getTargetShuffleMaskIndices(SDValue MaskNode, |
6873 | unsigned MaskEltSizeInBits, |
6874 | SmallVectorImpl<uint64_t> &RawMask, |
6875 | APInt &UndefElts) { |
6876 | |
6877 | SmallVector<APInt, 64> EltBits; |
6878 | if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, |
6879 | EltBits, true, |
6880 | false)) |
6881 | return false; |
6882 | |
6883 | |
6884 | for (const APInt &Elt : EltBits) |
6885 | RawMask.push_back(Elt.getZExtValue()); |
6886 | |
6887 | return true; |
6888 | } |
6889 | |
6890 | |
6891 | |
6892 | |
6893 | static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, |
6894 | bool Unary, unsigned NumStages = 1) { |
6895 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
6896 | unsigned NumElts = VT.getVectorNumElements(); |
6897 | unsigned NumLanes = VT.getSizeInBits() / 128; |
6898 | unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits(); |
6899 | unsigned Offset = Unary ? 0 : NumElts; |
6900 | unsigned Repetitions = 1u << (NumStages - 1); |
6901 | unsigned Increment = 1u << NumStages; |
6902 | assert((NumEltsPerLane >> NumStages) > 0 && "Illegal packing compaction"); |
6903 | |
6904 | for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { |
6905 | for (unsigned Stage = 0; Stage != Repetitions; ++Stage) { |
6906 | for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment) |
6907 | Mask.push_back(Elt + (Lane * NumEltsPerLane)); |
6908 | for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment) |
6909 | Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset); |
6910 | } |
6911 | } |
6912 | } |
6913 | |
6914 | |
6915 | static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, |
6916 | APInt &DemandedLHS, APInt &DemandedRHS) { |
6917 | int NumLanes = VT.getSizeInBits() / 128; |
6918 | int NumElts = DemandedElts.getBitWidth(); |
6919 | int NumInnerElts = NumElts / 2; |
6920 | int NumEltsPerLane = NumElts / NumLanes; |
6921 | int NumInnerEltsPerLane = NumInnerElts / NumLanes; |
6922 | |
6923 | DemandedLHS = APInt::getNullValue(NumInnerElts); |
6924 | DemandedRHS = APInt::getNullValue(NumInnerElts); |
6925 | |
6926 | |
6927 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
6928 | for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) { |
6929 | int OuterIdx = (Lane * NumEltsPerLane) + Elt; |
6930 | int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt; |
6931 | if (DemandedElts[OuterIdx]) |
6932 | DemandedLHS.setBit(InnerIdx); |
6933 | if (DemandedElts[OuterIdx + NumInnerEltsPerLane]) |
6934 | DemandedRHS.setBit(InnerIdx); |
6935 | } |
6936 | } |
6937 | } |
6938 | |
6939 | |
6940 | static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts, |
6941 | APInt &DemandedLHS, APInt &DemandedRHS) { |
6942 | int NumLanes = VT.getSizeInBits() / 128; |
6943 | int NumElts = DemandedElts.getBitWidth(); |
6944 | int NumEltsPerLane = NumElts / NumLanes; |
6945 | int HalfEltsPerLane = NumEltsPerLane / 2; |
6946 | |
6947 | DemandedLHS = APInt::getNullValue(NumElts); |
6948 | DemandedRHS = APInt::getNullValue(NumElts); |
6949 | |
6950 | |
6951 | for (int Idx = 0; Idx != NumElts; ++Idx) { |
6952 | if (!DemandedElts[Idx]) |
6953 | continue; |
6954 | int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane; |
6955 | int LocalIdx = Idx % NumEltsPerLane; |
6956 | if (LocalIdx < HalfEltsPerLane) { |
6957 | DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0); |
6958 | DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1); |
6959 | } else { |
6960 | LocalIdx -= HalfEltsPerLane; |
6961 | DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0); |
6962 | DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1); |
6963 | } |
6964 | } |
6965 | } |
6966 | |
6967 | |
6968 | |
6969 | |
6970 | |
6971 | |
6972 | |
6973 | |
6974 | static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, |
6975 | SmallVectorImpl<SDValue> &Ops, |
6976 | SmallVectorImpl<int> &Mask, bool &IsUnary) { |
6977 | unsigned NumElems = VT.getVectorNumElements(); |
6978 | unsigned MaskEltSize = VT.getScalarSizeInBits(); |
6979 | SmallVector<uint64_t, 32> RawMask; |
6980 | APInt RawUndefs; |
6981 | uint64_t ImmN; |
6982 | |
6983 | assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"); |
6984 | assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"); |
6985 | |
6986 | IsUnary = false; |
6987 | bool IsFakeUnary = false; |
6988 | switch (N->getOpcode()) { |
6989 | case X86ISD::BLENDI: |
6990 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
6991 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
6992 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
6993 | DecodeBLENDMask(NumElems, ImmN, Mask); |
6994 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
6995 | break; |
6996 | case X86ISD::SHUFP: |
6997 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
6998 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
6999 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7000 | DecodeSHUFPMask(NumElems, MaskEltSize, ImmN, Mask); |
7001 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7002 | break; |
7003 | case X86ISD::INSERTPS: |
7004 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7005 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7006 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7007 | DecodeINSERTPSMask(ImmN, Mask); |
7008 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7009 | break; |
7010 | case X86ISD::EXTRQI: |
7011 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7012 | if (isa<ConstantSDNode>(N->getOperand(1)) && |
7013 | isa<ConstantSDNode>(N->getOperand(2))) { |
7014 | int BitLen = N->getConstantOperandVal(1); |
7015 | int BitIdx = N->getConstantOperandVal(2); |
7016 | DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); |
7017 | IsUnary = true; |
7018 | } |
7019 | break; |
7020 | case X86ISD::INSERTQI: |
7021 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7022 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7023 | if (isa<ConstantSDNode>(N->getOperand(2)) && |
7024 | isa<ConstantSDNode>(N->getOperand(3))) { |
7025 | int BitLen = N->getConstantOperandVal(2); |
7026 | int BitIdx = N->getConstantOperandVal(3); |
7027 | DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); |
7028 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7029 | } |
7030 | break; |
7031 | case X86ISD::UNPCKH: |
7032 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7033 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7034 | DecodeUNPCKHMask(NumElems, MaskEltSize, Mask); |
7035 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7036 | break; |
7037 | case X86ISD::UNPCKL: |
7038 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7039 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7040 | DecodeUNPCKLMask(NumElems, MaskEltSize, Mask); |
7041 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7042 | break; |
7043 | case X86ISD::MOVHLPS: |
7044 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7045 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7046 | DecodeMOVHLPSMask(NumElems, Mask); |
7047 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7048 | break; |
7049 | case X86ISD::MOVLHPS: |
7050 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7051 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7052 | DecodeMOVLHPSMask(NumElems, Mask); |
7053 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7054 | break; |
7055 | case X86ISD::VALIGN: |
7056 | assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && |
7057 | "Only 32-bit and 64-bit elements are supported!"); |
7058 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7059 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7060 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7061 | DecodeVALIGNMask(NumElems, ImmN, Mask); |
7062 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7063 | Ops.push_back(N->getOperand(1)); |
7064 | Ops.push_back(N->getOperand(0)); |
7065 | break; |
7066 | case X86ISD::PALIGNR: |
7067 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7068 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7069 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7070 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7071 | DecodePALIGNRMask(NumElems, ImmN, Mask); |
7072 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7073 | Ops.push_back(N->getOperand(1)); |
7074 | Ops.push_back(N->getOperand(0)); |
7075 | break; |
7076 | case X86ISD::VSHLDQ: |
7077 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7078 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7079 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7080 | DecodePSLLDQMask(NumElems, ImmN, Mask); |
7081 | IsUnary = true; |
7082 | break; |
7083 | case X86ISD::VSRLDQ: |
7084 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7085 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7086 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7087 | DecodePSRLDQMask(NumElems, ImmN, Mask); |
7088 | IsUnary = true; |
7089 | break; |
7090 | case X86ISD::PSHUFD: |
7091 | case X86ISD::VPERMILPI: |
7092 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7093 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7094 | DecodePSHUFMask(NumElems, MaskEltSize, ImmN, Mask); |
7095 | IsUnary = true; |
7096 | break; |
7097 | case X86ISD::PSHUFHW: |
7098 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7099 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7100 | DecodePSHUFHWMask(NumElems, ImmN, Mask); |
7101 | IsUnary = true; |
7102 | break; |
7103 | case X86ISD::PSHUFLW: |
7104 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7105 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7106 | DecodePSHUFLWMask(NumElems, ImmN, Mask); |
7107 | IsUnary = true; |
7108 | break; |
7109 | case X86ISD::VZEXT_MOVL: |
7110 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7111 | DecodeZeroMoveLowMask(NumElems, Mask); |
7112 | IsUnary = true; |
7113 | break; |
7114 | case X86ISD::VBROADCAST: |
7115 | |
7116 | |
7117 | |
7118 | if (N->getOperand(0).getValueType() == VT) { |
7119 | DecodeVectorBroadcast(NumElems, Mask); |
7120 | IsUnary = true; |
7121 | break; |
7122 | } |
7123 | return false; |
7124 | case X86ISD::VPERMILPV: { |
7125 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7126 | IsUnary = true; |
7127 | SDValue MaskNode = N->getOperand(1); |
7128 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7129 | RawUndefs)) { |
7130 | DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask); |
7131 | break; |
7132 | } |
7133 | return false; |
7134 | } |
7135 | case X86ISD::PSHUFB: { |
7136 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
7137 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7138 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7139 | IsUnary = true; |
7140 | SDValue MaskNode = N->getOperand(1); |
7141 | if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { |
7142 | DecodePSHUFBMask(RawMask, RawUndefs, Mask); |
7143 | break; |
7144 | } |
7145 | return false; |
7146 | } |
7147 | case X86ISD::VPERMI: |
7148 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7149 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7150 | DecodeVPERMMask(NumElems, ImmN, Mask); |
7151 | IsUnary = true; |
7152 | break; |
7153 | case X86ISD::MOVSS: |
7154 | case X86ISD::MOVSD: |
7155 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7156 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7157 | DecodeScalarMoveMask(NumElems, false, Mask); |
7158 | break; |
7159 | case X86ISD::VPERM2X128: |
7160 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7161 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7162 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7163 | DecodeVPERM2X128Mask(NumElems, ImmN, Mask); |
7164 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7165 | break; |
7166 | case X86ISD::SHUF128: |
7167 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7168 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7169 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
7170 | decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize, ImmN, Mask); |
7171 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7172 | break; |
7173 | case X86ISD::MOVSLDUP: |
7174 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7175 | DecodeMOVSLDUPMask(NumElems, Mask); |
7176 | IsUnary = true; |
7177 | break; |
7178 | case X86ISD::MOVSHDUP: |
7179 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7180 | DecodeMOVSHDUPMask(NumElems, Mask); |
7181 | IsUnary = true; |
7182 | break; |
7183 | case X86ISD::MOVDDUP: |
7184 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7185 | DecodeMOVDDUPMask(NumElems, Mask); |
7186 | IsUnary = true; |
7187 | break; |
7188 | case X86ISD::VPERMIL2: { |
7189 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7190 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7191 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7192 | SDValue MaskNode = N->getOperand(2); |
7193 | SDValue CtrlNode = N->getOperand(3); |
7194 | if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) { |
7195 | unsigned CtrlImm = CtrlOp->getZExtValue(); |
7196 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7197 | RawUndefs)) { |
7198 | DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs, |
7199 | Mask); |
7200 | break; |
7201 | } |
7202 | } |
7203 | return false; |
7204 | } |
7205 | case X86ISD::VPPERM: { |
7206 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7207 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7208 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
7209 | SDValue MaskNode = N->getOperand(2); |
7210 | if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { |
7211 | DecodeVPPERMMask(RawMask, RawUndefs, Mask); |
7212 | break; |
7213 | } |
7214 | return false; |
7215 | } |
7216 | case X86ISD::VPERMV: { |
7217 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
7218 | IsUnary = true; |
7219 | |
7220 | Ops.push_back(N->getOperand(1)); |
7221 | SDValue MaskNode = N->getOperand(0); |
7222 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7223 | RawUndefs)) { |
7224 | DecodeVPERMVMask(RawMask, RawUndefs, Mask); |
7225 | break; |
7226 | } |
7227 | return false; |
7228 | } |
7229 | case X86ISD::VPERMV3: { |
7230 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
7231 | assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); |
7232 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2); |
7233 | |
7234 | Ops.push_back(N->getOperand(0)); |
7235 | Ops.push_back(N->getOperand(2)); |
7236 | SDValue MaskNode = N->getOperand(1); |
7237 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
7238 | RawUndefs)) { |
7239 | DecodeVPERMV3Mask(RawMask, RawUndefs, Mask); |
7240 | break; |
7241 | } |
7242 | return false; |
7243 | } |
7244 | default: llvm_unreachable("unknown target shuffle node"); |
7245 | } |
7246 | |
7247 | |
7248 | if (Mask.empty()) |
7249 | return false; |
7250 | |
7251 | |
7252 | if (!AllowSentinelZero && isAnyZero(Mask)) |
7253 | return false; |
7254 | |
7255 | |
7256 | |
7257 | |
7258 | if (IsFakeUnary) |
7259 | for (int &M : Mask) |
7260 | if (M >= (int)Mask.size()) |
7261 | M -= Mask.size(); |
7262 | |
7263 | |
7264 | |
7265 | if (Ops.empty()) { |
7266 | Ops.push_back(N->getOperand(0)); |
7267 | if (!IsUnary || IsFakeUnary) |
7268 | Ops.push_back(N->getOperand(1)); |
7269 | } |
7270 | |
7271 | return true; |
7272 | } |
7273 | |
7274 | |
7275 | static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, |
7276 | SmallVectorImpl<SDValue> &Ops, |
7277 | SmallVectorImpl<int> &Mask) { |
7278 | bool IsUnary; |
7279 | return getTargetShuffleMask(N, VT, AllowSentinelZero, Ops, Mask, IsUnary); |
7280 | } |
7281 | |
7282 | |
7283 | |
7284 | |
7285 | |
7286 | |
7287 | |
7288 | |
7289 | |
7290 | static void computeZeroableShuffleElements(ArrayRef<int> Mask, |
7291 | SDValue V1, SDValue V2, |
7292 | APInt &KnownUndef, APInt &KnownZero) { |
7293 | int Size = Mask.size(); |
7294 | KnownUndef = KnownZero = APInt::getNullValue(Size); |
7295 | |
7296 | V1 = peekThroughBitcasts(V1); |
7297 | V2 = peekThroughBitcasts(V2); |
7298 | |
7299 | bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); |
7300 | bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); |
7301 | |
7302 | int VectorSizeInBits = V1.getValueSizeInBits(); |
7303 | int ScalarSizeInBits = VectorSizeInBits / Size; |
7304 | assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); |
7305 | |
7306 | for (int i = 0; i < Size; ++i) { |
7307 | int M = Mask[i]; |
7308 | |
7309 | if (M < 0) { |
7310 | KnownUndef.setBit(i); |
7311 | continue; |
7312 | } |
7313 | if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { |
7314 | KnownZero.setBit(i); |
7315 | continue; |
7316 | } |
7317 | |
7318 | |
7319 | SDValue V = M < Size ? V1 : V2; |
7320 | M %= Size; |
7321 | |
7322 | |
7323 | if (V.getOpcode() != ISD::BUILD_VECTOR) |
7324 | continue; |
7325 | |
7326 | |
7327 | |
7328 | if ((Size % V.getNumOperands()) == 0) { |
7329 | int Scale = Size / V->getNumOperands(); |
7330 | SDValue Op = V.getOperand(M / Scale); |
7331 | if (Op.isUndef()) |
7332 | KnownUndef.setBit(i); |
7333 | if (X86::isZeroNode(Op)) |
7334 | KnownZero.setBit(i); |
7335 | else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { |
7336 | APInt Val = Cst->getAPIntValue(); |
7337 | Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); |
7338 | if (Val == 0) |
7339 | KnownZero.setBit(i); |
7340 | } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { |
7341 | APInt Val = Cst->getValueAPF().bitcastToAPInt(); |
7342 | Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); |
7343 | if (Val == 0) |
7344 | KnownZero.setBit(i); |
7345 | } |
7346 | continue; |
7347 | } |
7348 | |
7349 | |
7350 | |
7351 | if ((V.getNumOperands() % Size) == 0) { |
7352 | int Scale = V->getNumOperands() / Size; |
7353 | bool AllUndef = true; |
7354 | bool AllZero = true; |
7355 | for (int j = 0; j < Scale; ++j) { |
7356 | SDValue Op = V.getOperand((M * Scale) + j); |
7357 | AllUndef &= Op.isUndef(); |
7358 | AllZero &= X86::isZeroNode(Op); |
7359 | } |
7360 | if (AllUndef) |
7361 | KnownUndef.setBit(i); |
7362 | if (AllZero) |
7363 | KnownZero.setBit(i); |
7364 | continue; |
7365 | } |
7366 | } |
7367 | } |
7368 | |
7369 | |
7370 | |
7371 | |
7372 | |
7373 | static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask, |
7374 | SmallVectorImpl<SDValue> &Ops, |
7375 | APInt &KnownUndef, APInt &KnownZero) { |
7376 | bool IsUnary; |
7377 | if (!isTargetShuffle(N.getOpcode())) |
7378 | return false; |
7379 | |
7380 | MVT VT = N.getSimpleValueType(); |
7381 | if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) |
7382 | return false; |
7383 | |
7384 | int Size = Mask.size(); |
7385 | SDValue V1 = Ops[0]; |
7386 | SDValue V2 = IsUnary ? V1 : Ops[1]; |
7387 | KnownUndef = KnownZero = APInt::getNullValue(Size); |
7388 | |
7389 | V1 = peekThroughBitcasts(V1); |
7390 | V2 = peekThroughBitcasts(V2); |
7391 | |
7392 | assert((VT.getSizeInBits() % Size) == 0 && |
7393 | "Illegal split of shuffle value type"); |
7394 | unsigned EltSizeInBits = VT.getSizeInBits() / Size; |
7395 | |
7396 | |
7397 | APInt UndefSrcElts[2]; |
7398 | SmallVector<APInt, 32> SrcEltBits[2]; |
7399 | bool IsSrcConstant[2] = { |
7400 | getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0], |
7401 | SrcEltBits[0], true, false), |
7402 | getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], |
7403 | SrcEltBits[1], true, false)}; |
7404 | |
7405 | for (int i = 0; i < Size; ++i) { |
7406 | int M = Mask[i]; |
7407 | |
7408 | |
7409 | if (M < 0) { |
7410 | assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!"); |
7411 | if (SM_SentinelUndef == M) |
7412 | KnownUndef.setBit(i); |
7413 | if (SM_SentinelZero == M) |
7414 | KnownZero.setBit(i); |
7415 | continue; |
7416 | } |
7417 | |
7418 | |
7419 | unsigned SrcIdx = M / Size; |
7420 | SDValue V = M < Size ? V1 : V2; |
7421 | M %= Size; |
7422 | |
7423 | |
7424 | if (V.isUndef()) { |
7425 | KnownUndef.setBit(i); |
7426 | continue; |
7427 | } |
7428 | |
7429 | |
7430 | |
7431 | |
7432 | |
7433 | if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && |
7434 | (Size % V.getValueType().getVectorNumElements()) == 0) { |
7435 | int Scale = Size / V.getValueType().getVectorNumElements(); |
7436 | int Idx = M / Scale; |
7437 | if (Idx != 0 && !VT.isFloatingPoint()) |
7438 | KnownUndef.setBit(i); |
7439 | else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) |
7440 | KnownZero.setBit(i); |
7441 | continue; |
7442 | } |
7443 | |
7444 | |
7445 | |
7446 | if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { |
7447 | SDValue Vec = V.getOperand(0); |
7448 | int NumVecElts = Vec.getValueType().getVectorNumElements(); |
7449 | if (Vec.isUndef() && Size == NumVecElts) { |
7450 | int Idx = V.getConstantOperandVal(2); |
7451 | int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements(); |
7452 | if (M < Idx || (Idx + NumSubElts) <= M) |
7453 | KnownUndef.setBit(i); |
7454 | } |
7455 | continue; |
7456 | } |
7457 | |
7458 | |
7459 | if (IsSrcConstant[SrcIdx]) { |
7460 | if (UndefSrcElts[SrcIdx][M]) |
7461 | KnownUndef.setBit(i); |
7462 | else if (SrcEltBits[SrcIdx][M] == 0) |
7463 | KnownZero.setBit(i); |
7464 | } |
7465 | } |
7466 | |
7467 | assert(VT.getVectorNumElements() == (unsigned)Size && |
7468 | "Different mask size from vector size!"); |
7469 | return true; |
7470 | } |
7471 | |
7472 | |
7473 | static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask, |
7474 | const APInt &KnownUndef, |
7475 | const APInt &KnownZero, |
7476 | bool ResolveKnownZeros= true) { |
7477 | unsigned NumElts = Mask.size(); |
7478 | assert(KnownUndef.getBitWidth() == NumElts && |
7479 | KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch"); |
7480 | |
7481 | for (unsigned i = 0; i != NumElts; ++i) { |
7482 | if (KnownUndef[i]) |
7483 | Mask[i] = SM_SentinelUndef; |
7484 | else if (ResolveKnownZeros && KnownZero[i]) |
7485 | Mask[i] = SM_SentinelZero; |
7486 | } |
7487 | } |
7488 | |
7489 | |
7490 | static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask, |
7491 | APInt &KnownUndef, |
7492 | APInt &KnownZero) { |
7493 | unsigned NumElts = Mask.size(); |
7494 | KnownUndef = KnownZero = APInt::getNullValue(NumElts); |
7495 | |
7496 | for (unsigned i = 0; i != NumElts; ++i) { |
7497 | int M = Mask[i]; |
7498 | if (SM_SentinelUndef == M) |
7499 | KnownUndef.setBit(i); |
7500 | if (SM_SentinelZero == M) |
7501 | KnownZero.setBit(i); |
7502 | } |
7503 | } |
7504 | |
7505 | |
7506 | |
7507 | static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, |
7508 | SmallVectorImpl<int> &Mask, |
7509 | const SelectionDAG &DAG, unsigned Depth, |
7510 | bool ResolveKnownElts); |
7511 | |
7512 | |
7513 | |
7514 | |
7515 | static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, |
7516 | SmallVectorImpl<int> &Mask, |
7517 | SmallVectorImpl<SDValue> &Ops, |
7518 | const SelectionDAG &DAG, unsigned Depth, |
7519 | bool ResolveKnownElts) { |
7520 | Mask.clear(); |
7521 | Ops.clear(); |
7522 | |
7523 | MVT VT = N.getSimpleValueType(); |
7524 | unsigned NumElts = VT.getVectorNumElements(); |
7525 | unsigned NumSizeInBits = VT.getSizeInBits(); |
7526 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
7527 | if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0) |
7528 | return false; |
7529 | assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size"); |
7530 | unsigned NumSizeInBytes = NumSizeInBits / 8; |
7531 | unsigned NumBytesPerElt = NumBitsPerElt / 8; |
7532 | |
7533 | unsigned Opcode = N.getOpcode(); |
7534 | switch (Opcode) { |
7535 | case ISD::VECTOR_SHUFFLE: { |
7536 | |
7537 | ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask(); |
7538 | if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) { |
7539 | Mask.append(ShuffleMask.begin(), ShuffleMask.end()); |
7540 | Ops.push_back(N.getOperand(0)); |
7541 | Ops.push_back(N.getOperand(1)); |
7542 | return true; |
7543 | } |
7544 | return false; |
7545 | } |
7546 | case ISD::AND: |
7547 | case X86ISD::ANDNP: { |
7548 | |
7549 | APInt UndefElts; |
7550 | SmallVector<APInt, 32> EltBits; |
7551 | SDValue N0 = N.getOperand(0); |
7552 | SDValue N1 = N.getOperand(1); |
7553 | bool IsAndN = (X86ISD::ANDNP == Opcode); |
7554 | uint64_t ZeroMask = IsAndN ? 255 : 0; |
7555 | if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits)) |
7556 | return false; |
7557 | for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { |
7558 | if (UndefElts[i]) { |
7559 | Mask.push_back(SM_SentinelUndef); |
7560 | continue; |
7561 | } |
7562 | const APInt &ByteBits = EltBits[i]; |
7563 | if (ByteBits != 0 && ByteBits != 255) |
7564 | return false; |
7565 | Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); |
7566 | } |
7567 | Ops.push_back(IsAndN ? N1 : N0); |
7568 | return true; |
7569 | } |
7570 | case ISD::OR: { |
7571 | |
7572 | |
7573 | SDValue N0 = peekThroughBitcasts(N.getOperand(0)); |
7574 | SDValue N1 = peekThroughBitcasts(N.getOperand(1)); |
7575 | if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) |
7576 | return false; |
7577 | SmallVector<int, 64> SrcMask0, SrcMask1; |
7578 | SmallVector<SDValue, 2> SrcInputs0, SrcInputs1; |
7579 | if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, |
7580 | true) || |
7581 | !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, |
7582 | true)) |
7583 | return false; |
7584 | |
7585 | size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); |
7586 | SmallVector<int, 64> Mask0, Mask1; |
7587 | narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); |
7588 | narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); |
7589 | for (int i = 0; i != (int)MaskSize; ++i) { |
7590 | |
7591 | |
7592 | |
7593 | |
7594 | if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) |
7595 | Mask.push_back(SM_SentinelZero); |
7596 | else if (Mask1[i] == SM_SentinelZero) |
7597 | Mask.push_back(i); |
7598 | else if (Mask0[i] == SM_SentinelZero) |
7599 | Mask.push_back(i + MaskSize); |
7600 | else |
7601 | return false; |
7602 | } |
7603 | Ops.push_back(N0); |
7604 | Ops.push_back(N1); |
7605 | return true; |
7606 | } |
7607 | case ISD::INSERT_SUBVECTOR: { |
7608 | SDValue Src = N.getOperand(0); |
7609 | SDValue Sub = N.getOperand(1); |
7610 | EVT SubVT = Sub.getValueType(); |
7611 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
7612 | if (!N->isOnlyUserOf(Sub.getNode())) |
7613 | return false; |
7614 | uint64_t InsertIdx = N.getConstantOperandVal(2); |
7615 | |
7616 | if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
7617 | Sub.getOperand(0).getValueType() == VT) { |
7618 | uint64_t ExtractIdx = Sub.getConstantOperandVal(1); |
7619 | for (int i = 0; i != (int)NumElts; ++i) |
7620 | Mask.push_back(i); |
7621 | for (int i = 0; i != (int)NumSubElts; ++i) |
7622 | Mask[InsertIdx + i] = NumElts + ExtractIdx + i; |
7623 | Ops.push_back(Src); |
7624 | Ops.push_back(Sub.getOperand(0)); |
7625 | return true; |
7626 | } |
7627 | |
7628 | SmallVector<int, 64> SubMask; |
7629 | SmallVector<SDValue, 2> SubInputs; |
7630 | if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, |
7631 | SubMask, DAG, Depth + 1, ResolveKnownElts)) |
7632 | return false; |
7633 | |
7634 | |
7635 | if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) { |
7636 | return SubVT.getFixedSizeInBits() < |
7637 | SubInput.getValueSizeInBits().getFixedSize(); |
7638 | })) |
7639 | return false; |
7640 | |
7641 | if (SubMask.size() != NumSubElts) { |
7642 | assert(((SubMask.size() % NumSubElts) == 0 || |
7643 | (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale"); |
7644 | if ((NumSubElts % SubMask.size()) == 0) { |
7645 | int Scale = NumSubElts / SubMask.size(); |
7646 | SmallVector<int,64> ScaledSubMask; |
7647 | narrowShuffleMaskElts(Scale, SubMask, ScaledSubMask); |
7648 | SubMask = ScaledSubMask; |
7649 | } else { |
7650 | int Scale = SubMask.size() / NumSubElts; |
7651 | NumSubElts = SubMask.size(); |
7652 | NumElts *= Scale; |
7653 | InsertIdx *= Scale; |
7654 | } |
7655 | } |
7656 | Ops.push_back(Src); |
7657 | Ops.append(SubInputs.begin(), SubInputs.end()); |
7658 | if (ISD::isBuildVectorAllZeros(Src.getNode())) |
7659 | Mask.append(NumElts, SM_SentinelZero); |
7660 | else |
7661 | for (int i = 0; i != (int)NumElts; ++i) |
7662 | Mask.push_back(i); |
7663 | for (int i = 0; i != (int)NumSubElts; ++i) { |
7664 | int M = SubMask[i]; |
7665 | if (0 <= M) { |
7666 | int InputIdx = M / NumSubElts; |
7667 | M = (NumElts * (1 + InputIdx)) + (M % NumSubElts); |
7668 | } |
7669 | Mask[i + InsertIdx] = M; |
7670 | } |
7671 | return true; |
7672 | } |
7673 | case X86ISD::PINSRB: |
7674 | case X86ISD::PINSRW: |
7675 | case ISD::SCALAR_TO_VECTOR: |
7676 | case ISD::INSERT_VECTOR_ELT: { |
7677 | |
7678 | |
7679 | SDValue Scl = N.getOperand(Opcode == ISD::SCALAR_TO_VECTOR ? 0 : 1); |
7680 | |
7681 | unsigned DstIdx = 0; |
7682 | if (Opcode != ISD::SCALAR_TO_VECTOR) { |
7683 | |
7684 | if (!isa<ConstantSDNode>(N.getOperand(2)) || |
7685 | N.getConstantOperandAPInt(2).uge(NumElts)) |
7686 | return false; |
7687 | DstIdx = N.getConstantOperandVal(2); |
7688 | |
7689 | |
7690 | if (X86::isZeroNode(Scl)) { |
7691 | Ops.push_back(N.getOperand(0)); |
7692 | for (unsigned i = 0; i != NumElts; ++i) |
7693 | Mask.push_back(i == DstIdx ? SM_SentinelZero : (int)i); |
7694 | return true; |
7695 | } |
7696 | } |
7697 | |
7698 | |
7699 | |
7700 | |
7701 | unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits(); |
7702 | while (Scl.getOpcode() == ISD::TRUNCATE || |
7703 | Scl.getOpcode() == ISD::ANY_EXTEND || |
7704 | Scl.getOpcode() == ISD::ZERO_EXTEND) { |
7705 | Scl = Scl.getOperand(0); |
7706 | MinBitsPerElt = |
7707 | std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits()); |
7708 | } |
7709 | if ((MinBitsPerElt % 8) != 0) |
7710 | return false; |
7711 | |
7712 | |
7713 | SDValue SrcExtract; |
7714 | if ((Scl.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
7715 | Scl.getOpcode() == X86ISD::PEXTRW || |
7716 | Scl.getOpcode() == X86ISD::PEXTRB) && |
7717 | Scl.getOperand(0).getValueSizeInBits() == NumSizeInBits) { |
7718 | SrcExtract = Scl; |
7719 | } |
7720 | if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1))) |
7721 | return false; |
7722 | |
7723 | SDValue SrcVec = SrcExtract.getOperand(0); |
7724 | EVT SrcVT = SrcVec.getValueType(); |
7725 | if (!SrcVT.getScalarType().isByteSized()) |
7726 | return false; |
7727 | unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); |
7728 | unsigned SrcByte = SrcIdx * (SrcVT.getScalarSizeInBits() / 8); |
7729 | unsigned DstByte = DstIdx * NumBytesPerElt; |
7730 | MinBitsPerElt = |
7731 | std::min<unsigned>(MinBitsPerElt, SrcVT.getScalarSizeInBits()); |
7732 | |
7733 | |
7734 | if (Opcode == ISD::SCALAR_TO_VECTOR) { |
7735 | Ops.push_back(SrcVec); |
7736 | Mask.append(NumSizeInBytes, SM_SentinelUndef); |
7737 | } else { |
7738 | Ops.push_back(SrcVec); |
7739 | Ops.push_back(N.getOperand(0)); |
7740 | for (int i = 0; i != (int)NumSizeInBytes; ++i) |
7741 | Mask.push_back(NumSizeInBytes + i); |
7742 | } |
7743 | |
7744 | unsigned MinBytesPerElts = MinBitsPerElt / 8; |
7745 | MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt); |
7746 | for (unsigned i = 0; i != MinBytesPerElts; ++i) |
7747 | Mask[DstByte + i] = SrcByte + i; |
7748 | for (unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i) |
7749 | Mask[DstByte + i] = SM_SentinelZero; |
7750 | return true; |
7751 | } |
7752 | case X86ISD::PACKSS: |
7753 | case X86ISD::PACKUS: { |
7754 | SDValue N0 = N.getOperand(0); |
7755 | SDValue N1 = N.getOperand(1); |
7756 | assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) && |
7757 | N1.getValueType().getVectorNumElements() == (NumElts / 2) && |
7758 | "Unexpected input value type"); |
7759 | |
7760 | APInt EltsLHS, EltsRHS; |
7761 | getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS); |
7762 | |
7763 | |
7764 | |
7765 | bool Offset0 = false, Offset1 = false; |
7766 | if (Opcode == X86ISD::PACKSS) { |
7767 | if ((!(N0.isUndef() || EltsLHS.isNullValue()) && |
7768 | DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) || |
7769 | (!(N1.isUndef() || EltsRHS.isNullValue()) && |
7770 | DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt)) |
7771 | return false; |
7772 | |
7773 | |
7774 | |
7775 | if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) && |
7776 | N0.getConstantOperandAPInt(1) == NumBitsPerElt) { |
7777 | Offset0 = true; |
7778 | N0 = N0.getOperand(0); |
7779 | } |
7780 | if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) && |
7781 | N1.getConstantOperandAPInt(1) == NumBitsPerElt) { |
7782 | Offset1 = true; |
7783 | N1 = N1.getOperand(0); |
7784 | } |
7785 | } else { |
7786 | APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); |
7787 | if ((!(N0.isUndef() || EltsLHS.isNullValue()) && |
7788 | !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) || |
7789 | (!(N1.isUndef() || EltsRHS.isNullValue()) && |
7790 | !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1))) |
7791 | return false; |
7792 | } |
7793 | |
7794 | bool IsUnary = (N0 == N1); |
7795 | |
7796 | Ops.push_back(N0); |
7797 | if (!IsUnary) |
7798 | Ops.push_back(N1); |
7799 | |
7800 | createPackShuffleMask(VT, Mask, IsUnary); |
7801 | |
7802 | if (Offset0 || Offset1) { |
7803 | for (int &M : Mask) |
7804 | if ((Offset0 && isInRange(M, 0, NumElts)) || |
7805 | (Offset1 && isInRange(M, NumElts, 2 * NumElts))) |
7806 | ++M; |
7807 | } |
7808 | return true; |
7809 | } |
7810 | case X86ISD::VTRUNC: { |
7811 | SDValue Src = N.getOperand(0); |
7812 | EVT SrcVT = Src.getValueType(); |
7813 | |
7814 | if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || |
7815 | (SrcVT.getScalarSizeInBits() % 8) != 0) |
7816 | return false; |
7817 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
7818 | unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits(); |
7819 | unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt; |
7820 | assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 && "Illegal truncation"); |
7821 | for (unsigned i = 0; i != NumSrcElts; ++i) |
7822 | Mask.push_back(i * Scale); |
7823 | Mask.append(NumElts - NumSrcElts, SM_SentinelZero); |
7824 | Ops.push_back(Src); |
7825 | return true; |
7826 | } |
7827 | case X86ISD::VSHLI: |
7828 | case X86ISD::VSRLI: { |
7829 | uint64_t ShiftVal = N.getConstantOperandVal(1); |
7830 | |
7831 | if (NumBitsPerElt <= ShiftVal) { |
7832 | Mask.append(NumElts, SM_SentinelZero); |
7833 | return true; |
7834 | } |
7835 | |
7836 | |
7837 | if ((ShiftVal % 8) != 0) |
7838 | break; |
7839 | |
7840 | uint64_t ByteShift = ShiftVal / 8; |
7841 | Ops.push_back(N.getOperand(0)); |
7842 | |
7843 | |
7844 | Mask.append(NumSizeInBytes, SM_SentinelZero); |
7845 | |
7846 | if (X86ISD::VSHLI == Opcode) { |
7847 | for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) |
7848 | for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) |
7849 | Mask[i + j] = i + j - ByteShift; |
7850 | } else { |
7851 | for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) |
7852 | for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) |
7853 | Mask[i + j - ByteShift] = i + j; |
7854 | } |
7855 | return true; |
7856 | } |
7857 | case X86ISD::VROTLI: |
7858 | case X86ISD::VROTRI: { |
7859 | |
7860 | uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt); |
7861 | if ((RotateVal % 8) != 0) |
7862 | return false; |
7863 | Ops.push_back(N.getOperand(0)); |
7864 | int Offset = RotateVal / 8; |
7865 | Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset); |
7866 | for (int i = 0; i != (int)NumElts; ++i) { |
7867 | int BaseIdx = i * NumBytesPerElt; |
7868 | for (int j = 0; j != (int)NumBytesPerElt; ++j) { |
7869 | Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt)); |
7870 | } |
7871 | } |
7872 | return true; |
7873 | } |
7874 | case X86ISD::VBROADCAST: { |
7875 | SDValue Src = N.getOperand(0); |
7876 | if (!Src.getSimpleValueType().isVector()) { |
7877 | if (Src.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
7878 | !isNullConstant(Src.getOperand(1)) || |
7879 | Src.getOperand(0).getValueType().getScalarType() != |
7880 | VT.getScalarType()) |
7881 | return false; |
7882 | Src = Src.getOperand(0); |
7883 | } |
7884 | Ops.push_back(Src); |
7885 | Mask.append(NumElts, 0); |
7886 | return true; |
7887 | } |
7888 | case ISD::ZERO_EXTEND: |
7889 | case ISD::ANY_EXTEND: |
7890 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
7891 | case ISD::ANY_EXTEND_VECTOR_INREG: { |
7892 | SDValue Src = N.getOperand(0); |
7893 | EVT SrcVT = Src.getValueType(); |
7894 | |
7895 | |
7896 | if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || |
7897 | (SrcVT.getScalarSizeInBits() % 8) != 0) |
7898 | return false; |
7899 | |
7900 | bool IsAnyExtend = |
7901 | (ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode); |
7902 | DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts, |
7903 | IsAnyExtend, Mask); |
7904 | Ops.push_back(Src); |
7905 | return true; |
7906 | } |
7907 | } |
7908 | |
7909 | return false; |
7910 | } |
7911 | |
7912 | |
7913 | static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs, |
7914 | SmallVectorImpl<int> &Mask) { |
7915 | int MaskWidth = Mask.size(); |
7916 | SmallVector<SDValue, 16> UsedInputs; |
7917 | for (int i = 0, e = Inputs.size(); i < e; ++i) { |
7918 | int lo = UsedInputs.size() * MaskWidth; |
7919 | int hi = lo + MaskWidth; |
7920 | |
7921 | |
7922 | if (Inputs[i].isUndef()) |
7923 | for (int &M : Mask) |
7924 | if ((lo <= M) && (M < hi)) |
7925 | M = SM_SentinelUndef; |
7926 | |
7927 | |
7928 | if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { |
7929 | for (int &M : Mask) |
7930 | if (lo <= M) |
7931 | M -= MaskWidth; |
7932 | continue; |
7933 | } |
7934 | |
7935 | |
7936 | bool IsRepeat = false; |
7937 | for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) { |
7938 | if (UsedInputs[j] != Inputs[i]) |
7939 | continue; |
7940 | for (int &M : Mask) |
7941 | if (lo <= M) |
7942 | M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth); |
7943 | IsRepeat = true; |
7944 | break; |
7945 | } |
7946 | if (IsRepeat) |
7947 | continue; |
7948 | |
7949 | UsedInputs.push_back(Inputs[i]); |
7950 | } |
7951 | Inputs = UsedInputs; |
7952 | } |
7953 | |
7954 | |
7955 | |
7956 | |
7957 | static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, |
7958 | SmallVectorImpl<SDValue> &Inputs, |
7959 | SmallVectorImpl<int> &Mask, |
7960 | APInt &KnownUndef, APInt &KnownZero, |
7961 | const SelectionDAG &DAG, unsigned Depth, |
7962 | bool ResolveKnownElts) { |
7963 | EVT VT = Op.getValueType(); |
7964 | if (!VT.isSimple() || !VT.isVector()) |
7965 | return false; |
7966 | |
7967 | if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { |
7968 | if (ResolveKnownElts) |
7969 | resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero); |
7970 | return true; |
7971 | } |
7972 | if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, |
7973 | ResolveKnownElts)) { |
7974 | resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); |
7975 | return true; |
7976 | } |
7977 | return false; |
7978 | } |
7979 | |
7980 | static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, |
7981 | SmallVectorImpl<int> &Mask, |
7982 | const SelectionDAG &DAG, unsigned Depth = 0, |
7983 | bool ResolveKnownElts = true) { |
7984 | EVT VT = Op.getValueType(); |
7985 | if (!VT.isSimple() || !VT.isVector()) |
7986 | return false; |
7987 | |
7988 | APInt KnownUndef, KnownZero; |
7989 | unsigned NumElts = Op.getValueType().getVectorNumElements(); |
7990 | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
7991 | return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef, |
7992 | KnownZero, DAG, Depth, ResolveKnownElts); |
7993 | } |
7994 | |
7995 | |
7996 | static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT, |
7997 | EVT MemVT, MemSDNode *Mem, unsigned Offset, |
7998 | SelectionDAG &DAG) { |
7999 | assert((Opcode == X86ISD::VBROADCAST_LOAD || |
8000 | Opcode == X86ISD::SUBV_BROADCAST_LOAD) && |
8001 | "Unknown broadcast load type"); |
8002 | |
8003 | |
8004 | if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal()) |
8005 | return SDValue(); |
8006 | |
8007 | SDValue Ptr = |
8008 | DAG.getMemBasePlusOffset(Mem->getBasePtr(), TypeSize::Fixed(Offset), DL); |
8009 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
8010 | SDValue Ops[] = {Mem->getChain(), Ptr}; |
8011 | SDValue BcstLd = DAG.getMemIntrinsicNode( |
8012 | Opcode, DL, Tys, Ops, MemVT, |
8013 | DAG.getMachineFunction().getMachineMemOperand( |
8014 | Mem->getMemOperand(), Offset, MemVT.getStoreSize())); |
8015 | DAG.makeEquivalentMemoryOrdering(SDValue(Mem, 1), BcstLd.getValue(1)); |
8016 | return BcstLd; |
8017 | } |
8018 | |
8019 | |
8020 | |
8021 | static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, |
8022 | SelectionDAG &DAG, unsigned Depth) { |
8023 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
8024 | return SDValue(); |
8025 | |
8026 | EVT VT = Op.getValueType(); |
8027 | unsigned Opcode = Op.getOpcode(); |
8028 | unsigned NumElems = VT.getVectorNumElements(); |
8029 | |
8030 | |
8031 | if (auto *SV = dyn_cast<ShuffleVectorSDNode>(Op)) { |
8032 | int Elt = SV->getMaskElt(Index); |
8033 | |
8034 | if (Elt < 0) |
8035 | return DAG.getUNDEF(VT.getVectorElementType()); |
8036 | |
8037 | SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); |
8038 | return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1); |
8039 | } |
8040 | |
8041 | |
8042 | if (isTargetShuffle(Opcode)) { |
8043 | MVT ShufVT = VT.getSimpleVT(); |
8044 | MVT ShufSVT = ShufVT.getVectorElementType(); |
8045 | int NumElems = (int)ShufVT.getVectorNumElements(); |
8046 | SmallVector<int, 16> ShuffleMask; |
8047 | SmallVector<SDValue, 16> ShuffleOps; |
8048 | if (!getTargetShuffleMask(Op.getNode(), ShufVT, true, ShuffleOps, |
8049 | ShuffleMask)) |
8050 | return SDValue(); |
8051 | |
8052 | int Elt = ShuffleMask[Index]; |
8053 | if (Elt == SM_SentinelZero) |
8054 | return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(Op), ShufSVT) |
8055 | : DAG.getConstantFP(+0.0, SDLoc(Op), ShufSVT); |
8056 | if (Elt == SM_SentinelUndef) |
8057 | return DAG.getUNDEF(ShufSVT); |
8058 | |
8059 | assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range"); |
8060 | SDValue Src = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1]; |
8061 | return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1); |
8062 | } |
8063 | |
8064 | |
8065 | if (Opcode == ISD::INSERT_SUBVECTOR) { |
8066 | SDValue Vec = Op.getOperand(0); |
8067 | SDValue Sub = Op.getOperand(1); |
8068 | uint64_t SubIdx = Op.getConstantOperandVal(2); |
8069 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
8070 | |
8071 | if (SubIdx <= Index && Index < (SubIdx + NumSubElts)) |
8072 | return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1); |
8073 | return getShuffleScalarElt(Vec, Index, DAG, Depth + 1); |
8074 | } |
8075 | |
8076 | |
8077 | if (Opcode == ISD::CONCAT_VECTORS) { |
8078 | EVT SubVT = Op.getOperand(0).getValueType(); |
8079 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
8080 | uint64_t SubIdx = Index / NumSubElts; |
8081 | uint64_t SubElt = Index % NumSubElts; |
8082 | return getShuffleScalarElt(Op.getOperand(SubIdx), SubElt, DAG, Depth + 1); |
8083 | } |
8084 | |
8085 | |
8086 | if (Opcode == ISD::EXTRACT_SUBVECTOR) { |
8087 | SDValue Src = Op.getOperand(0); |
8088 | uint64_t SrcIdx = Op.getConstantOperandVal(1); |
8089 | return getShuffleScalarElt(Src, Index + SrcIdx, DAG, Depth + 1); |
8090 | } |
8091 | |
8092 | |
8093 | if (Opcode == ISD::BITCAST) { |
8094 | SDValue Src = Op.getOperand(0); |
8095 | EVT SrcVT = Src.getValueType(); |
8096 | if (SrcVT.isVector() && SrcVT.getVectorNumElements() == NumElems) |
8097 | return getShuffleScalarElt(Src, Index, DAG, Depth + 1); |
8098 | return SDValue(); |
8099 | } |
8100 | |
8101 | |
8102 | |
8103 | |
8104 | |
8105 | if (Opcode == ISD::INSERT_VECTOR_ELT && |
8106 | isa<ConstantSDNode>(Op.getOperand(2))) { |
8107 | if (Op.getConstantOperandAPInt(2) == Index) |
8108 | return Op.getOperand(1); |
8109 | return getShuffleScalarElt(Op.getOperand(0), Index, DAG, Depth + 1); |
8110 | } |
8111 | |
8112 | if (Opcode == ISD::SCALAR_TO_VECTOR) |
8113 | return (Index == 0) ? Op.getOperand(0) |
8114 | : DAG.getUNDEF(VT.getVectorElementType()); |
8115 | |
8116 | if (Opcode == ISD::BUILD_VECTOR) |
8117 | return Op.getOperand(Index); |
8118 | |
8119 | return SDValue(); |
8120 | } |
8121 | |
8122 | |
8123 | static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask, |
8124 | unsigned NumNonZero, unsigned NumZero, |
8125 | SelectionDAG &DAG, |
8126 | const X86Subtarget &Subtarget) { |
8127 | MVT VT = Op.getSimpleValueType(); |
8128 | unsigned NumElts = VT.getVectorNumElements(); |
8129 | assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) || |
8130 | ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && |
8131 | "Illegal vector insertion"); |
8132 | |
8133 | SDLoc dl(Op); |
8134 | SDValue V; |
8135 | bool First = true; |
8136 | |
8137 | for (unsigned i = 0; i < NumElts; ++i) { |
8138 | bool IsNonZero = NonZeroMask[i]; |
8139 | if (!IsNonZero) |
8140 | continue; |
8141 | |
8142 | |
8143 | |
8144 | |
8145 | if (First) { |
8146 | First = false; |
8147 | if (NumZero || 0 != i) |
8148 | V = getZeroVector(VT, Subtarget, DAG, dl); |
8149 | else { |
8150 | assert(0 == i && "Expected insertion into zero-index"); |
8151 | V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
8152 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); |
8153 | V = DAG.getBitcast(VT, V); |
8154 | continue; |
8155 | } |
8156 | } |
8157 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i), |
8158 | DAG.getIntPtrConstant(i, dl)); |
8159 | } |
8160 | |
8161 | return V; |
8162 | } |
8163 | |
8164 | |
8165 | static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask, |
8166 | unsigned NumNonZero, unsigned NumZero, |
8167 | SelectionDAG &DAG, |
8168 | const X86Subtarget &Subtarget) { |
8169 | if (NumNonZero > 8 && !Subtarget.hasSSE41()) |
8170 | return SDValue(); |
8171 | |
8172 | |
8173 | if (Subtarget.hasSSE41()) |
8174 | return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, |
8175 | Subtarget); |
8176 | |
8177 | SDLoc dl(Op); |
8178 | SDValue V; |
8179 | |
8180 | |
8181 | for (unsigned i = 0; i < 16; i += 2) { |
8182 | bool ThisIsNonZero = NonZeroMask[i]; |
8183 | bool NextIsNonZero = NonZeroMask[i + 1]; |
8184 | if (!ThisIsNonZero && !NextIsNonZero) |
8185 | continue; |
8186 | |
8187 | |
8188 | SDValue Elt; |
8189 | if (ThisIsNonZero) { |
8190 | if (NumZero || NextIsNonZero) |
8191 | Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
8192 | else |
8193 | Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
8194 | } |
8195 | |
8196 | if (NextIsNonZero) { |
8197 | SDValue NextElt = Op.getOperand(i + 1); |
8198 | if (i == 0 && NumZero) |
8199 | NextElt = DAG.getZExtOrTrunc(NextElt, dl, MVT::i32); |
8200 | else |
8201 | NextElt = DAG.getAnyExtOrTrunc(NextElt, dl, MVT::i32); |
8202 | NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt, |
8203 | DAG.getConstant(8, dl, MVT::i8)); |
8204 | if (ThisIsNonZero) |
8205 | Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt); |
8206 | else |
8207 | Elt = NextElt; |
8208 | } |
8209 | |
8210 | |
8211 | |
8212 | |
8213 | if (!V) { |
8214 | if (i != 0 || NumZero) |
8215 | V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); |
8216 | else { |
8217 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt); |
8218 | V = DAG.getBitcast(MVT::v8i16, V); |
8219 | continue; |
8220 | } |
8221 | } |
8222 | Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt); |
8223 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt, |
8224 | DAG.getIntPtrConstant(i / 2, dl)); |
8225 | } |
8226 | |
8227 | return DAG.getBitcast(MVT::v16i8, V); |
8228 | } |
8229 | |
8230 | |
8231 | static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask, |
8232 | unsigned NumNonZero, unsigned NumZero, |
8233 | SelectionDAG &DAG, |
8234 | const X86Subtarget &Subtarget) { |
8235 | if (NumNonZero > 4 && !Subtarget.hasSSE41()) |
8236 | return SDValue(); |
8237 | |
8238 | |
8239 | return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, |
8240 | Subtarget); |
8241 | } |
8242 | |
8243 | |
8244 | static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, |
8245 | const X86Subtarget &Subtarget) { |
8246 | |
8247 | |
8248 | |
8249 | |
8250 | if (Subtarget.hasSSE3() && !Subtarget.hasXOP() && |
8251 | Op.getOperand(0) == Op.getOperand(2) && |
8252 | Op.getOperand(1) == Op.getOperand(3) && |
8253 | Op.getOperand(0) != Op.getOperand(1)) { |
8254 | SDLoc DL(Op); |
8255 | MVT VT = Op.getSimpleValueType(); |
8256 | MVT EltVT = VT.getVectorElementType(); |
8257 | |
8258 | |
8259 | SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), |
8260 | DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; |
8261 | SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); |
8262 | SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV); |
8263 | return DAG.getBitcast(VT, Dup); |
8264 | } |
8265 | |
8266 | |
8267 | std::bitset<4> Zeroable, Undefs; |
8268 | for (int i = 0; i < 4; ++i) { |
8269 | SDValue Elt = Op.getOperand(i); |
8270 | Undefs[i] = Elt.isUndef(); |
8271 | Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt)); |
8272 | } |
8273 | assert(Zeroable.size() - Zeroable.count() > 1 && |
8274 | "We expect at least two non-zero elements!"); |
8275 | |
8276 | |
8277 | |
8278 | SDValue FirstNonZero; |
8279 | unsigned FirstNonZeroIdx; |
8280 | for (unsigned i = 0; i < 4; ++i) { |
8281 | if (Zeroable[i]) |
8282 | continue; |
8283 | SDValue Elt = Op.getOperand(i); |
8284 | if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
8285 | !isa<ConstantSDNode>(Elt.getOperand(1))) |
8286 | return SDValue(); |
8287 | |
8288 | MVT VT = Elt.getOperand(0).getSimpleValueType(); |
8289 | if (!VT.is128BitVector()) |
8290 | return SDValue(); |
8291 | if (!FirstNonZero.getNode()) { |
8292 | FirstNonZero = Elt; |
8293 | FirstNonZeroIdx = i; |
8294 | } |
8295 | } |
8296 | |
8297 | assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!"); |
8298 | SDValue V1 = FirstNonZero.getOperand(0); |
8299 | MVT VT = V1.getSimpleValueType(); |
8300 | |
8301 | |
8302 | SDValue Elt; |
8303 | unsigned EltMaskIdx, EltIdx; |
8304 | int Mask[4]; |
8305 | for (EltIdx = 0; EltIdx < 4; ++EltIdx) { |
8306 | if (Zeroable[EltIdx]) { |
8307 | |
8308 | Mask[EltIdx] = EltIdx+4; |
8309 | continue; |
8310 | } |
8311 | |
8312 | Elt = Op->getOperand(EltIdx); |
8313 | |
8314 | EltMaskIdx = Elt.getConstantOperandVal(1); |
8315 | if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx) |
8316 | break; |
8317 | Mask[EltIdx] = EltIdx; |
8318 | } |
8319 | |
8320 | if (EltIdx == 4) { |
8321 | |
8322 | SDValue VZeroOrUndef = (Zeroable == Undefs) |
8323 | ? DAG.getUNDEF(VT) |
8324 | : getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); |
8325 | if (V1.getSimpleValueType() != VT) |
8326 | V1 = DAG.getBitcast(VT, V1); |
8327 | return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask); |
8328 | } |
8329 | |
8330 | |
8331 | if (!Subtarget.hasSSE41()) |
8332 | return SDValue(); |
8333 | |
8334 | SDValue V2 = Elt.getOperand(0); |
8335 | if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx) |
8336 | V1 = SDValue(); |
8337 | |
8338 | bool CanFold = true; |
8339 | for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) { |
8340 | if (Zeroable[i]) |
8341 | continue; |
8342 | |
8343 | SDValue Current = Op->getOperand(i); |
8344 | SDValue SrcVector = Current->getOperand(0); |
8345 | if (!V1.getNode()) |
8346 | V1 = SrcVector; |
8347 | CanFold = (SrcVector == V1) && (Current.getConstantOperandAPInt(1) == i); |
8348 | } |
8349 | |
8350 | if (!CanFold) |
8351 | return SDValue(); |
8352 | |
8353 | assert(V1.getNode() && "Expected at least two non-zero elements!"); |
8354 | if (V1.getSimpleValueType() != MVT::v4f32) |
8355 | V1 = DAG.getBitcast(MVT::v4f32, V1); |
8356 | if (V2.getSimpleValueType() != MVT::v4f32) |
8357 | V2 = DAG.getBitcast(MVT::v4f32, V2); |
8358 | |
8359 | |
8360 | unsigned ZMask = Zeroable.to_ulong(); |
8361 | |
8362 | unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask; |
8363 | assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); |
8364 | SDLoc DL(Op); |
8365 | SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, |
8366 | DAG.getIntPtrConstant(InsertPSMask, DL, true)); |
8367 | return DAG.getBitcast(VT, Result); |
8368 | } |
8369 | |
8370 | |
8371 | static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, |
8372 | SelectionDAG &DAG, const TargetLowering &TLI, |
8373 | const SDLoc &dl) { |
8374 | assert(VT.is128BitVector() && "Unknown type for VShift"); |
8375 | MVT ShVT = MVT::v16i8; |
8376 | unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; |
8377 | SrcOp = DAG.getBitcast(ShVT, SrcOp); |
8378 | assert(NumBits % 8 == 0 && "Only support byte sized shifts"); |
8379 | SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); |
8380 | return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); |
8381 | } |
8382 | |
8383 | static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, |
8384 | SelectionDAG &DAG) { |
8385 | |
8386 | |
8387 | |
8388 | |
8389 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { |
8390 | SDValue Ptr = LD->getBasePtr(); |
8391 | if (!ISD::isNormalLoad(LD) || !LD->isSimple()) |
8392 | return SDValue(); |
8393 | EVT PVT = LD->getValueType(0); |
8394 | if (PVT != MVT::i32 && PVT != MVT::f32) |
8395 | return SDValue(); |
8396 | |
8397 | int FI = -1; |
8398 | int64_t Offset = 0; |
8399 | if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { |
8400 | FI = FINode->getIndex(); |
8401 | Offset = 0; |
8402 | } else if (DAG.isBaseWithConstantOffset(Ptr) && |
8403 | isa<FrameIndexSDNode>(Ptr.getOperand(0))) { |
8404 | FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); |
8405 | Offset = Ptr.getConstantOperandVal(1); |
8406 | Ptr = Ptr.getOperand(0); |
8407 | } else { |
8408 | return SDValue(); |
8409 | } |
8410 | |
8411 | |
8412 | |
8413 | Align RequiredAlign(VT.getSizeInBits() / 8); |
8414 | SDValue Chain = LD->getChain(); |
8415 | |
8416 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
8417 | MaybeAlign InferredAlign = DAG.InferPtrAlign(Ptr); |
8418 | if (!InferredAlign || *InferredAlign < RequiredAlign) { |
8419 | if (MFI.isFixedObjectIndex(FI)) { |
8420 | |
8421 | |
8422 | |
8423 | return SDValue(); |
8424 | } else { |
8425 | MFI.setObjectAlignment(FI, RequiredAlign); |
8426 | } |
8427 | } |
8428 | |
8429 | |
8430 | |
8431 | if (Offset < 0) |
8432 | return SDValue(); |
8433 | if ((Offset % RequiredAlign.value()) & 3) |
8434 | return SDValue(); |
8435 | int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1); |
8436 | if (StartOffset) { |
8437 | SDLoc DL(Ptr); |
8438 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
8439 | DAG.getConstant(StartOffset, DL, Ptr.getValueType())); |
8440 | } |
8441 | |
8442 | int EltNo = (Offset - StartOffset) >> 2; |
8443 | unsigned NumElems = VT.getVectorNumElements(); |
8444 | |
8445 | EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); |
8446 | SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, |
8447 | LD->getPointerInfo().getWithOffset(StartOffset)); |
8448 | |
8449 | SmallVector<int, 8> Mask(NumElems, EltNo); |
8450 | |
8451 | return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask); |
8452 | } |
8453 | |
8454 | return SDValue(); |
8455 | } |
8456 | |
8457 | |
8458 | static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { |
8459 | if (ISD::isNON_EXTLoad(Elt.getNode())) { |
8460 | auto *BaseLd = cast<LoadSDNode>(Elt); |
8461 | if (!BaseLd->isSimple()) |
8462 | return false; |
8463 | Ld = BaseLd; |
8464 | ByteOffset = 0; |
8465 | return true; |
8466 | } |
8467 | |
8468 | switch (Elt.getOpcode()) { |
8469 | case ISD::BITCAST: |
8470 | case ISD::TRUNCATE: |
8471 | case ISD::SCALAR_TO_VECTOR: |
8472 | return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset); |
8473 | case ISD::SRL: |
8474 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) { |
8475 | uint64_t Idx = IdxC->getZExtValue(); |
8476 | if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) { |
8477 | ByteOffset += Idx / 8; |
8478 | return true; |
8479 | } |
8480 | } |
8481 | break; |
8482 | case ISD::EXTRACT_VECTOR_ELT: |
8483 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) { |
8484 | SDValue Src = Elt.getOperand(0); |
8485 | unsigned SrcSizeInBits = Src.getScalarValueSizeInBits(); |
8486 | unsigned DstSizeInBits = Elt.getScalarValueSizeInBits(); |
8487 | if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 && |
8488 | findEltLoadSrc(Src, Ld, ByteOffset)) { |
8489 | uint64_t Idx = IdxC->getZExtValue(); |
8490 | ByteOffset += Idx * (SrcSizeInBits / 8); |
8491 | return true; |
8492 | } |
8493 | } |
8494 | break; |
8495 | } |
8496 | |
8497 | return false; |
8498 | } |
8499 | |
8500 | |
8501 | |
8502 | |
8503 | |
8504 | |
8505 | static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, |
8506 | const SDLoc &DL, SelectionDAG &DAG, |
8507 | const X86Subtarget &Subtarget, |
8508 | bool IsAfterLegalize) { |
8509 | if ((VT.getScalarSizeInBits() % 8) != 0) |
8510 | return SDValue(); |
8511 | |
8512 | unsigned NumElems = Elts.size(); |
8513 | |
8514 | int LastLoadedElt = -1; |
8515 | APInt LoadMask = APInt::getNullValue(NumElems); |
8516 | APInt ZeroMask = APInt::getNullValue(NumElems); |
8517 | APInt UndefMask = APInt::getNullValue(NumElems); |
8518 | |
8519 | SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr); |
8520 | SmallVector<int64_t, 8> ByteOffsets(NumElems, 0); |
8521 | |
8522 | |
8523 | |
8524 | for (unsigned i = 0; i < NumElems; ++i) { |
8525 | SDValue Elt = peekThroughBitcasts(Elts[i]); |
8526 | if (!Elt.getNode()) |
8527 | return SDValue(); |
8528 | if (Elt.isUndef()) { |
8529 | UndefMask.setBit(i); |
8530 | continue; |
8531 | } |
8532 | if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode())) { |
8533 | ZeroMask.setBit(i); |
8534 | continue; |
8535 | } |
8536 | |
8537 | |
8538 | |
8539 | unsigned EltSizeInBits = Elt.getValueSizeInBits(); |
8540 | if ((NumElems * EltSizeInBits) != VT.getSizeInBits()) |
8541 | return SDValue(); |
8542 | |
8543 | if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0) |
8544 | return SDValue(); |
8545 | unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0); |
8546 | if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits) |
8547 | return SDValue(); |
8548 | |
8549 | LoadMask.setBit(i); |
8550 | LastLoadedElt = i; |
8551 | } |
8552 | assert((ZeroMask.countPopulation() + UndefMask.countPopulation() + |
8553 | LoadMask.countPopulation()) == NumElems && |
8554 | "Incomplete element masks"); |
8555 | |
8556 | |
8557 | if (UndefMask.countPopulation() == NumElems) |
8558 | return DAG.getUNDEF(VT); |
8559 | if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems) |
8560 | return VT.isInteger() ? DAG.getConstant(0, DL, VT) |
8561 | : DAG.getConstantFP(0.0, DL, VT); |
8562 | |
8563 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8564 | int FirstLoadedElt = LoadMask.countTrailingZeros(); |
8565 | SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]); |
8566 | EVT EltBaseVT = EltBase.getValueType(); |
8567 | assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() && |
8568 | "Register/Memory size mismatch"); |
8569 | LoadSDNode *LDBase = Loads[FirstLoadedElt]; |
8570 | assert(LDBase && "Did not find base load for merging consecutive loads"); |
8571 | unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits(); |
8572 | unsigned BaseSizeInBytes = BaseSizeInBits / 8; |
8573 | int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt); |
8574 | int LoadSizeInBits = NumLoadedElts * BaseSizeInBits; |
8575 | assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); |
8576 | |
8577 | |
8578 | if (ByteOffsets[FirstLoadedElt] != 0) |
8579 | return SDValue(); |
8580 | |
8581 | |
8582 | |
8583 | auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { |
8584 | LoadSDNode *Ld = Loads[EltIdx]; |
8585 | int64_t ByteOffset = ByteOffsets[EltIdx]; |
8586 | if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) { |
8587 | int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); |
8588 | return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] && |
8589 | Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0); |
8590 | } |
8591 | return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes, |
8592 | EltIdx - FirstLoadedElt); |
8593 | }; |
8594 | |
8595 | |
8596 | |
8597 | |
8598 | bool IsConsecutiveLoad = true; |
8599 | bool IsConsecutiveLoadWithZeros = true; |
8600 | for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) { |
8601 | if (LoadMask[i]) { |
8602 | if (!CheckConsecutiveLoad(LDBase, i)) { |
8603 | IsConsecutiveLoad = false; |
8604 | IsConsecutiveLoadWithZeros = false; |
8605 | break; |
8606 | } |
8607 | } else if (ZeroMask[i]) { |
8608 | IsConsecutiveLoad = false; |
8609 | } |
8610 | } |
8611 | |
8612 | auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { |
8613 | auto MMOFlags = LDBase->getMemOperand()->getFlags(); |
8614 | assert(LDBase->isSimple() && |
8615 | "Cannot merge volatile or atomic loads."); |
8616 | SDValue NewLd = |
8617 | DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), |
8618 | LDBase->getPointerInfo(), LDBase->getOriginalAlign(), |
8619 | MMOFlags); |
8620 | for (auto *LD : Loads) |
8621 | if (LD) |
8622 | DAG.makeEquivalentMemoryOrdering(LD, NewLd); |
8623 | return NewLd; |
8624 | }; |
8625 | |
8626 | |
8627 | bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable( |
8628 | VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout()); |
8629 | |
8630 | |
8631 | |
8632 | |
8633 | |
8634 | |
8635 | if (FirstLoadedElt == 0 && |
8636 | (NumLoadedElts == (int)NumElems || IsDereferenceable) && |
8637 | (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) { |
8638 | if (IsAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) |
8639 | return SDValue(); |
8640 | |
8641 | |
8642 | |
8643 | if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && |
8644 | VT.is256BitVector() && !Subtarget.hasInt256()) |
8645 | return SDValue(); |
8646 | |
8647 | if (NumElems == 1) |
8648 | return DAG.getBitcast(VT, Elts[FirstLoadedElt]); |
8649 | |
8650 | if (!ZeroMask) |
8651 | return CreateLoad(VT, LDBase); |
8652 | |
8653 | |
8654 | |
8655 | if (!IsAfterLegalize && VT.isVector()) { |
8656 | unsigned NumMaskElts = VT.getVectorNumElements(); |
8657 | if ((NumMaskElts % NumElems) == 0) { |
8658 | unsigned Scale = NumMaskElts / NumElems; |
8659 | SmallVector<int, 4> ClearMask(NumMaskElts, -1); |
8660 | for (unsigned i = 0; i < NumElems; ++i) { |
8661 | if (UndefMask[i]) |
8662 | continue; |
8663 | int Offset = ZeroMask[i] ? NumMaskElts : 0; |
8664 | for (unsigned j = 0; j != Scale; ++j) |
8665 | ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset; |
8666 | } |
8667 | SDValue V = CreateLoad(VT, LDBase); |
8668 | SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) |
8669 | : DAG.getConstantFP(0.0, DL, VT); |
8670 | return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); |
8671 | } |
8672 | } |
8673 | } |
8674 | |
8675 | |
8676 | if (VT.is256BitVector() || VT.is512BitVector()) { |
8677 | unsigned HalfNumElems = NumElems / 2; |
8678 | if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) { |
8679 | EVT HalfVT = |
8680 | EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems); |
8681 | SDValue HalfLD = |
8682 | EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL, |
8683 | DAG, Subtarget, IsAfterLegalize); |
8684 | if (HalfLD) |
8685 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), |
8686 | HalfLD, DAG.getIntPtrConstant(0, DL)); |
8687 | } |
8688 | } |
8689 | |
8690 | |
8691 | if (IsConsecutiveLoad && FirstLoadedElt == 0 && |
8692 | (LoadSizeInBits == 32 || LoadSizeInBits == 64) && |
8693 | ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) { |
8694 | MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits) |
8695 | : MVT::getIntegerVT(LoadSizeInBits); |
8696 | MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits); |
8697 | |
8698 | |
8699 | if (!Subtarget.hasSSE2() && VT == MVT::v4f32) |
8700 | VecVT = MVT::v4f32; |
8701 | if (TLI.isTypeLegal(VecVT)) { |
8702 | SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); |
8703 | SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; |
8704 | SDValue ResNode = DAG.getMemIntrinsicNode( |
8705 | X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), |
8706 | LDBase->getOriginalAlign(), MachineMemOperand::MOLoad); |
8707 | for (auto *LD : Loads) |
8708 | if (LD) |
8709 | DAG.makeEquivalentMemoryOrdering(LD, ResNode); |
8710 | return DAG.getBitcast(VT, ResNode); |
8711 | } |
8712 | } |
8713 | |
8714 | |
8715 | |
8716 | if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() && |
8717 | (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) { |
8718 | for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) { |
8719 | unsigned RepeatSize = SubElems * BaseSizeInBits; |
8720 | unsigned ScalarSize = std::min(RepeatSize, 64u); |
8721 | if (!Subtarget.hasAVX2() && ScalarSize < 32) |
8722 | continue; |
8723 | |
8724 | |
8725 | |
8726 | if (RepeatSize > ScalarSize && SubElems == 1) |
8727 | continue; |
8728 | |
8729 | bool Match = true; |
8730 | SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT)); |
8731 | for (unsigned i = 0; i != NumElems && Match; ++i) { |
8732 | if (!LoadMask[i]) |
8733 | continue; |
8734 | SDValue Elt = peekThroughBitcasts(Elts[i]); |
8735 | if (RepeatedLoads[i % SubElems].isUndef()) |
8736 | RepeatedLoads[i % SubElems] = Elt; |
8737 | else |
8738 | Match &= (RepeatedLoads[i % SubElems] == Elt); |
8739 | } |
8740 | |
8741 | |
8742 | Match &= !RepeatedLoads.front().isUndef(); |
8743 | Match &= !RepeatedLoads.back().isUndef(); |
8744 | if (!Match) |
8745 | continue; |
8746 | |
8747 | EVT RepeatVT = |
8748 | VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64)) |
8749 | ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize) |
8750 | : EVT::getFloatingPointVT(ScalarSize); |
8751 | if (RepeatSize > ScalarSize) |
8752 | RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT, |
8753 | RepeatSize / ScalarSize); |
8754 | EVT BroadcastVT = |
8755 | EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(), |
8756 | VT.getSizeInBits() / ScalarSize); |
8757 | if (TLI.isTypeLegal(BroadcastVT)) { |
8758 | if (SDValue RepeatLoad = EltsFromConsecutiveLoads( |
8759 | RepeatVT, RepeatedLoads, DL, DAG, Subtarget, IsAfterLegalize)) { |
8760 | SDValue Broadcast = RepeatLoad; |
8761 | if (RepeatSize > ScalarSize) { |
8762 | while (Broadcast.getValueSizeInBits() < VT.getSizeInBits()) |
8763 | Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL); |
8764 | } else { |
8765 | Broadcast = |
8766 | DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad); |
8767 | } |
8768 | return DAG.getBitcast(VT, Broadcast); |
8769 | } |
8770 | } |
8771 | } |
8772 | } |
8773 | |
8774 | return SDValue(); |
8775 | } |
8776 | |
8777 | |
8778 | |
8779 | |
8780 | static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL, |
8781 | SelectionDAG &DAG, |
8782 | const X86Subtarget &Subtarget, |
8783 | bool IsAfterLegalize) { |
8784 | SmallVector<SDValue, 64> Elts; |
8785 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { |
8786 | if (SDValue Elt = getShuffleScalarElt(Op, i, DAG, 0)) { |
8787 | Elts.push_back(Elt); |
8788 | continue; |
8789 | } |
8790 | return SDValue(); |
8791 | } |
8792 | assert(Elts.size() == VT.getVectorNumElements()); |
8793 | return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget, |
8794 | IsAfterLegalize); |
8795 | } |
8796 | |
8797 | static Constant *getConstantVector(MVT VT, const APInt &SplatValue, |
8798 | unsigned SplatBitSize, LLVMContext &C) { |
8799 | unsigned ScalarSize = VT.getScalarSizeInBits(); |
8800 | unsigned NumElm = SplatBitSize / ScalarSize; |
8801 | |
8802 | SmallVector<Constant *, 32> ConstantVec; |
8803 | for (unsigned i = 0; i < NumElm; i++) { |
8804 | APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); |
8805 | Constant *Const; |
8806 | if (VT.isFloatingPoint()) { |
8807 | if (ScalarSize == 32) { |
8808 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); |
8809 | } else { |
8810 | assert(ScalarSize == 64 && "Unsupported floating point scalar size"); |
8811 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); |
8812 | } |
8813 | } else |
8814 | Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); |
8815 | ConstantVec.push_back(Const); |
8816 | } |
8817 | return ConstantVector::get(ArrayRef<Constant *>(ConstantVec)); |
8818 | } |
8819 | |
8820 | static bool isFoldableUseOfShuffle(SDNode *N) { |
8821 | for (auto *U : N->uses()) { |
8822 | unsigned Opc = U->getOpcode(); |
8823 | |
8824 | if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N) |
8825 | return false; |
8826 | if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N) |
8827 | return false; |
8828 | if (isTargetShuffle(Opc)) |
8829 | return true; |
8830 | if (Opc == ISD::BITCAST) |
8831 | return isFoldableUseOfShuffle(U); |
8832 | if (N->hasOneUse()) |
8833 | return true; |
8834 | } |
8835 | return false; |
8836 | } |
8837 | |
8838 | |
8839 | |
8840 | |
8841 | |
8842 | |
8843 | |
8844 | |
8845 | static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, |
8846 | const X86Subtarget &Subtarget, |
8847 | SelectionDAG &DAG) { |
8848 | |
8849 | |
8850 | |
8851 | if (!Subtarget.hasAVX()) |
8852 | return SDValue(); |
8853 | |
8854 | MVT VT = BVOp->getSimpleValueType(0); |
8855 | unsigned NumElts = VT.getVectorNumElements(); |
8856 | SDLoc dl(BVOp); |
8857 | |
8858 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && |
8859 | "Unsupported vector type for broadcast."); |
8860 | |
8861 | |
8862 | SDValue Ld; |
8863 | BitVector UndefElements; |
8864 | SmallVector<SDValue, 16> Sequence; |
8865 | if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) { |
8866 | assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit."); |
8867 | if (Sequence.size() == 1) |
8868 | Ld = Sequence[0]; |
8869 | } |
8870 | |
8871 | |
8872 | |
8873 | |
8874 | |
8875 | |
8876 | |
8877 | if (!Sequence.empty() && Subtarget.hasCDI()) { |
8878 | |
8879 | unsigned SeqLen = Sequence.size(); |
8880 | bool UpperZeroOrUndef = |
8881 | SeqLen == 1 || |
8882 | llvm::all_of(makeArrayRef(Sequence).drop_front(), [](SDValue V) { |
8883 | return !V || V.isUndef() || isNullConstant(V); |
8884 | }); |
8885 | SDValue Op0 = Sequence[0]; |
8886 | if (UpperZeroOrUndef && ((Op0.getOpcode() == ISD::BITCAST) || |
8887 | (Op0.getOpcode() == ISD::ZERO_EXTEND && |
8888 | Op0.getOperand(0).getOpcode() == ISD::BITCAST))) { |
8889 | SDValue BOperand = Op0.getOpcode() == ISD::BITCAST |
8890 | ? Op0.getOperand(0) |
8891 | : Op0.getOperand(0).getOperand(0); |
8892 | MVT MaskVT = BOperand.getSimpleValueType(); |
8893 | MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen); |
8894 | if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || |
8895 | (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { |
8896 | MVT BcstVT = MVT::getVectorVT(EltType, NumElts / SeqLen); |
8897 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) { |
8898 | unsigned Scale = 512 / VT.getSizeInBits(); |
8899 | BcstVT = MVT::getVectorVT(EltType, Scale * (NumElts / SeqLen)); |
8900 | } |
8901 | SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand); |
8902 | if (BcstVT.getSizeInBits() != VT.getSizeInBits()) |
8903 | Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits()); |
8904 | return DAG.getBitcast(VT, Bcst); |
8905 | } |
8906 | } |
8907 | } |
8908 | |
8909 | unsigned NumUndefElts = UndefElements.count(); |
8910 | if (!Ld || (NumElts - NumUndefElts) <= 1) { |
8911 | APInt SplatValue, Undef; |
8912 | unsigned SplatBitSize; |
8913 | bool HasUndef; |
8914 | |
8915 | if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) && |
8916 | SplatBitSize > VT.getScalarSizeInBits() && |
8917 | SplatBitSize < VT.getSizeInBits()) { |
8918 | |
8919 | |
8920 | if (isFoldableUseOfShuffle(BVOp)) |
8921 | return SDValue(); |
8922 | |
8923 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
8924 | LLVMContext *Ctx = DAG.getContext(); |
8925 | MVT PVT = TLI.getPointerTy(DAG.getDataLayout()); |
8926 | if (Subtarget.hasAVX()) { |
8927 | if (SplatBitSize == 32 || SplatBitSize == 64 || |
8928 | (SplatBitSize < 32 && Subtarget.hasAVX2())) { |
8929 | |
8930 | |
8931 | MVT CVT = MVT::getIntegerVT(SplatBitSize); |
8932 | Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize); |
8933 | Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue); |
8934 | SDValue CP = DAG.getConstantPool(C, PVT); |
8935 | unsigned Repeat = VT.getSizeInBits() / SplatBitSize; |
8936 | |
8937 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
8938 | SDVTList Tys = |
8939 | DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other); |
8940 | SDValue Ops[] = {DAG.getEntryNode(), CP}; |
8941 | MachinePointerInfo MPI = |
8942 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
8943 | SDValue Brdcst = DAG.getMemIntrinsicNode( |
8944 | X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, MPI, Alignment, |
8945 | MachineMemOperand::MOLoad); |
8946 | return DAG.getBitcast(VT, Brdcst); |
8947 | } |
8948 | if (SplatBitSize > 64) { |
8949 | |
8950 | Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, |
8951 | *Ctx); |
8952 | SDValue VCP = DAG.getConstantPool(VecC, PVT); |
8953 | unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits(); |
8954 | MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm); |
8955 | Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign(); |
8956 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
8957 | SDValue Ops[] = {DAG.getEntryNode(), VCP}; |
8958 | MachinePointerInfo MPI = |
8959 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
8960 | return DAG.getMemIntrinsicNode( |
8961 | X86ISD::SUBV_BROADCAST_LOAD, dl, Tys, Ops, VVT, MPI, Alignment, |
8962 | MachineMemOperand::MOLoad); |
8963 | } |
8964 | } |
8965 | } |
8966 | |
8967 | |
8968 | |
8969 | |
8970 | |
8971 | |
8972 | if (!Ld || NumElts - NumUndefElts != 1) |
8973 | return SDValue(); |
8974 | unsigned ScalarSize = Ld.getValueSizeInBits(); |
8975 | if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64))) |
8976 | return SDValue(); |
8977 | } |
8978 | |
8979 | bool ConstSplatVal = |
8980 | (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); |
8981 | bool IsLoad = ISD::isNormalLoad(Ld.getNode()); |
8982 | |
8983 | |
8984 | |
8985 | |
8986 | |
8987 | |
8988 | if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode())) |
8989 | return SDValue(); |
8990 | |
8991 | unsigned ScalarSize = Ld.getValueSizeInBits(); |
8992 | bool IsGE256 = (VT.getSizeInBits() >= 256); |
8993 | |
8994 | |
8995 | |
8996 | |
8997 | |
8998 | |
8999 | bool OptForSize = DAG.shouldOptForSize(); |
9000 | |
9001 | |
9002 | |
9003 | |
9004 | |
9005 | |
9006 | |
9007 | if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) { |
9008 | EVT CVT = Ld.getValueType(); |
9009 | assert(!CVT.isVector() && "Must not broadcast a vector type"); |
9010 | |
9011 | |
9012 | |
9013 | |
9014 | |
9015 | if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || |
9016 | (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { |
9017 | const Constant *C = nullptr; |
9018 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) |
9019 | C = CI->getConstantIntValue(); |
9020 | else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) |
9021 | C = CF->getConstantFPValue(); |
9022 | |
9023 | assert(C && "Invalid constant type"); |
9024 | |
9025 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9026 | SDValue CP = |
9027 | DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); |
9028 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
9029 | |
9030 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9031 | SDValue Ops[] = {DAG.getEntryNode(), CP}; |
9032 | MachinePointerInfo MPI = |
9033 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
9034 | return DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, |
9035 | MPI, Alignment, MachineMemOperand::MOLoad); |
9036 | } |
9037 | } |
9038 | |
9039 | |
9040 | if (!IsLoad && Subtarget.hasInt256() && |
9041 | (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) |
9042 | return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); |
9043 | |
9044 | |
9045 | if (!IsLoad) |
9046 | return SDValue(); |
9047 | |
9048 | |
9049 | if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0)) |
9050 | return SDValue(); |
9051 | |
9052 | if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || |
9053 | (Subtarget.hasVLX() && ScalarSize == 64)) { |
9054 | auto *LN = cast<LoadSDNode>(Ld); |
9055 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9056 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
9057 | SDValue BCast = |
9058 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
9059 | LN->getMemoryVT(), LN->getMemOperand()); |
9060 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); |
9061 | return BCast; |
9062 | } |
9063 | |
9064 | |
9065 | |
9066 | if (Subtarget.hasInt256() && Ld.getValueType().isInteger() && |
9067 | (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) { |
9068 | auto *LN = cast<LoadSDNode>(Ld); |
9069 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
9070 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
9071 | SDValue BCast = |
9072 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
9073 | LN->getMemoryVT(), LN->getMemOperand()); |
9074 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); |
9075 | return BCast; |
9076 | } |
9077 | |
9078 | |
9079 | return SDValue(); |
9080 | } |
9081 | |
9082 | |
9083 | |
9084 | |
9085 | |
9086 | |
9087 | static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, |
9088 | SDValue ExtIdx) { |
9089 | int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); |
9090 | if (!isa<ShuffleVectorSDNode>(ExtractedFromVec)) |
9091 | return Idx; |
9092 | |
9093 | |
9094 | |
9095 | |
9096 | |
9097 | |
9098 | |
9099 | |
9100 | |
9101 | |
9102 | |
9103 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec); |
9104 | SDValue ShuffleVec = SVOp->getOperand(0); |
9105 | MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); |
9106 | assert(ShuffleVecVT.getVectorElementType() == |
9107 | ExtractedFromVec.getSimpleValueType().getVectorElementType()); |
9108 | |
9109 | int ShuffleIdx = SVOp->getMaskElt(Idx); |
9110 | if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { |
9111 | ExtractedFromVec = ShuffleVec; |
9112 | return ShuffleIdx; |
9113 | } |
9114 | return Idx; |
9115 | } |
9116 | |
9117 | static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { |
9118 | MVT VT = Op.getSimpleValueType(); |
9119 | |
9120 | |
9121 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9122 | if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) |
9123 | return SDValue(); |
9124 | |
9125 | SDLoc DL(Op); |
9126 | unsigned NumElems = Op.getNumOperands(); |
9127 | |
9128 | SDValue VecIn1; |
9129 | SDValue VecIn2; |
9130 | SmallVector<unsigned, 4> InsertIndices; |
9131 | SmallVector<int, 8> Mask(NumElems, -1); |
9132 | |
9133 | for (unsigned i = 0; i != NumElems; ++i) { |
9134 | unsigned Opc = Op.getOperand(i).getOpcode(); |
9135 | |
9136 | if (Opc == ISD::UNDEF) |
9137 | continue; |
9138 | |
9139 | if (Opc != ISD::EXTRACT_VECTOR_ELT) { |
9140 | |
9141 | if (InsertIndices.size() > 1) |
9142 | return SDValue(); |
9143 | |
9144 | InsertIndices.push_back(i); |
9145 | continue; |
9146 | } |
9147 | |
9148 | SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); |
9149 | SDValue ExtIdx = Op.getOperand(i).getOperand(1); |
9150 | |
9151 | |
9152 | if (!isa<ConstantSDNode>(ExtIdx)) |
9153 | return SDValue(); |
9154 | int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); |
9155 | |
9156 | |
9157 | if (ExtractedFromVec.getValueType() != VT) |
9158 | return SDValue(); |
9159 | |
9160 | if (!VecIn1.getNode()) |
9161 | VecIn1 = ExtractedFromVec; |
9162 | else if (VecIn1 != ExtractedFromVec) { |
9163 | if (!VecIn2.getNode()) |
9164 | VecIn2 = ExtractedFromVec; |
9165 | else if (VecIn2 != ExtractedFromVec) |
9166 | |
9167 | return SDValue(); |
9168 | } |
9169 | |
9170 | if (ExtractedFromVec == VecIn1) |
9171 | Mask[i] = Idx; |
9172 | else if (ExtractedFromVec == VecIn2) |
9173 | Mask[i] = Idx + NumElems; |
9174 | } |
9175 | |
9176 | if (!VecIn1.getNode()) |
9177 | return SDValue(); |
9178 | |
9179 | VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); |
9180 | SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask); |
9181 | |
9182 | for (unsigned Idx : InsertIndices) |
9183 | NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), |
9184 | DAG.getIntPtrConstant(Idx, DL)); |
9185 | |
9186 | return NV; |
9187 | } |
9188 | |
9189 | |
9190 | static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, |
9191 | const X86Subtarget &Subtarget) { |
9192 | |
9193 | MVT VT = Op.getSimpleValueType(); |
9194 | assert((VT.getVectorElementType() == MVT::i1) && |
9195 | "Unexpected type in LowerBUILD_VECTORvXi1!"); |
9196 | |
9197 | SDLoc dl(Op); |
9198 | if (ISD::isBuildVectorAllZeros(Op.getNode()) || |
9199 | ISD::isBuildVectorAllOnes(Op.getNode())) |
9200 | return Op; |
9201 | |
9202 | uint64_t Immediate = 0; |
9203 | SmallVector<unsigned, 16> NonConstIdx; |
9204 | bool IsSplat = true; |
9205 | bool HasConstElts = false; |
9206 | int SplatIdx = -1; |
9207 | for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { |
9208 | SDValue In = Op.getOperand(idx); |
9209 | if (In.isUndef()) |
9210 | continue; |
9211 | if (auto *InC = dyn_cast<ConstantSDNode>(In)) { |
9212 | Immediate |= (InC->getZExtValue() & 0x1) << idx; |
9213 | HasConstElts = true; |
9214 | } else { |
9215 | NonConstIdx.push_back(idx); |
9216 | } |
9217 | if (SplatIdx < 0) |
9218 | SplatIdx = idx; |
9219 | else if (In != Op.getOperand(SplatIdx)) |
9220 | IsSplat = false; |
9221 | } |
9222 | |
9223 | |
9224 | if (IsSplat) { |
9225 | |
9226 | |
9227 | |
9228 | |
9229 | SDValue Cond = Op.getOperand(SplatIdx); |
9230 | assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); |
9231 | if (Cond.getOpcode() != ISD::SETCC) |
9232 | Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond, |
9233 | DAG.getConstant(1, dl, MVT::i8)); |
9234 | |
9235 | |
9236 | if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
9237 | SDValue Select = DAG.getSelect(dl, MVT::i32, Cond, |
9238 | DAG.getAllOnesConstant(dl, MVT::i32), |
9239 | DAG.getConstant(0, dl, MVT::i32)); |
9240 | Select = DAG.getBitcast(MVT::v32i1, Select); |
9241 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select); |
9242 | } else { |
9243 | MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); |
9244 | SDValue Select = DAG.getSelect(dl, ImmVT, Cond, |
9245 | DAG.getAllOnesConstant(dl, ImmVT), |
9246 | DAG.getConstant(0, dl, ImmVT)); |
9247 | MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; |
9248 | Select = DAG.getBitcast(VecVT, Select); |
9249 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select, |
9250 | DAG.getIntPtrConstant(0, dl)); |
9251 | } |
9252 | } |
9253 | |
9254 | |
9255 | SDValue DstVec; |
9256 | if (HasConstElts) { |
9257 | if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
9258 | SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32); |
9259 | SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32); |
9260 | ImmL = DAG.getBitcast(MVT::v32i1, ImmL); |
9261 | ImmH = DAG.getBitcast(MVT::v32i1, ImmH); |
9262 | DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH); |
9263 | } else { |
9264 | MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); |
9265 | SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT); |
9266 | MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; |
9267 | DstVec = DAG.getBitcast(VecVT, Imm); |
9268 | DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec, |
9269 | DAG.getIntPtrConstant(0, dl)); |
9270 | } |
9271 | } else |
9272 | DstVec = DAG.getUNDEF(VT); |
9273 | |
9274 | for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { |
9275 | unsigned InsertIdx = NonConstIdx[i]; |
9276 | DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, |
9277 | Op.getOperand(InsertIdx), |
9278 | DAG.getIntPtrConstant(InsertIdx, dl)); |
9279 | } |
9280 | return DstVec; |
9281 | } |
9282 | |
9283 | LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { |
9284 | switch (Opcode) { |
9285 | case X86ISD::PACKSS: |
9286 | case X86ISD::PACKUS: |
9287 | case X86ISD::FHADD: |
9288 | case X86ISD::FHSUB: |
9289 | case X86ISD::HADD: |
9290 | case X86ISD::HSUB: |
9291 | return true; |
9292 | } |
9293 | return false; |
9294 | } |
9295 | |
9296 | |
9297 | |
9298 | |
9299 | |
9300 | |
9301 | |
9302 | |
9303 | |
9304 | |
9305 | |
9306 | |
9307 | |
9308 | |
9309 | |
9310 | |
9311 | |
9312 | |
9313 | |
9314 | |
9315 | |
9316 | static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, |
9317 | SelectionDAG &DAG, |
9318 | unsigned BaseIdx, unsigned LastIdx, |
9319 | SDValue &V0, SDValue &V1) { |
9320 | EVT VT = N->getValueType(0); |
9321 | assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops"); |
9322 | assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); |
9323 | assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && |
9324 | "Invalid Vector in input!"); |
9325 | |
9326 | bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); |
9327 | bool CanFold = true; |
9328 | unsigned ExpectedVExtractIdx = BaseIdx; |
9329 | unsigned NumElts = LastIdx - BaseIdx; |
9330 | V0 = DAG.getUNDEF(VT); |
9331 | V1 = DAG.getUNDEF(VT); |
9332 | |
9333 | |
9334 | for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { |
9335 | SDValue Op = N->getOperand(i + BaseIdx); |
9336 | |
9337 | |
9338 | if (Op->isUndef()) { |
9339 | |
9340 | if (i * 2 == NumElts) |
9341 | ExpectedVExtractIdx = BaseIdx; |
9342 | ExpectedVExtractIdx += 2; |
9343 | continue; |
9344 | } |
9345 | |
9346 | CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); |
9347 | |
9348 | if (!CanFold) |
9349 | break; |
9350 | |
9351 | SDValue Op0 = Op.getOperand(0); |
9352 | SDValue Op1 = Op.getOperand(1); |
9353 | |
9354 | |
9355 | |
9356 | CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
9357 | Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
9358 | Op0.getOperand(0) == Op1.getOperand(0) && |
9359 | isa<ConstantSDNode>(Op0.getOperand(1)) && |
9360 | isa<ConstantSDNode>(Op1.getOperand(1))); |
9361 | if (!CanFold) |
9362 | break; |
9363 | |
9364 | unsigned I0 = Op0.getConstantOperandVal(1); |
9365 | unsigned I1 = Op1.getConstantOperandVal(1); |
9366 | |
9367 | if (i * 2 < NumElts) { |
9368 | if (V0.isUndef()) { |
9369 | V0 = Op0.getOperand(0); |
9370 | if (V0.getValueType() != VT) |
9371 | return false; |
9372 | } |
9373 | } else { |
9374 | if (V1.isUndef()) { |
9375 | V1 = Op0.getOperand(0); |
9376 | if (V1.getValueType() != VT) |
9377 | return false; |
9378 | } |
9379 | if (i * 2 == NumElts) |
9380 | ExpectedVExtractIdx = BaseIdx; |
9381 | } |
9382 | |
9383 | SDValue Expected = (i * 2 < NumElts) ? V0 : V1; |
9384 | if (I0 == ExpectedVExtractIdx) |
9385 | CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected; |
9386 | else if (IsCommutable && I1 == ExpectedVExtractIdx) { |
9387 | |
9388 | |
9389 | CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected; |
9390 | } else |
9391 | CanFold = false; |
9392 | |
9393 | ExpectedVExtractIdx += 2; |
9394 | } |
9395 | |
9396 | return CanFold; |
9397 | } |
9398 | |
9399 | |
9400 | |
9401 | |
9402 | |
9403 | |
9404 | |
9405 | |
9406 | |
9407 | |
9408 | |
9409 | |
9410 | |
9411 | |
9412 | |
9413 | |
9414 | |
9415 | |
9416 | |
9417 | |
9418 | |
9419 | |
9420 | |
9421 | |
9422 | |
9423 | |
9424 | |
9425 | |
9426 | |
9427 | |
9428 | |
9429 | |
9430 | static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, |
9431 | const SDLoc &DL, SelectionDAG &DAG, |
9432 | unsigned X86Opcode, bool Mode, |
9433 | bool isUndefLO, bool isUndefHI) { |
9434 | MVT VT = V0.getSimpleValueType(); |
9435 | assert(VT.is256BitVector() && VT == V1.getSimpleValueType() && |
9436 | "Invalid nodes in input!"); |
9437 | |
9438 | unsigned NumElts = VT.getVectorNumElements(); |
9439 | SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL); |
9440 | SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL); |
9441 | SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL); |
9442 | SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL); |
9443 | MVT NewVT = V0_LO.getSimpleValueType(); |
9444 | |
9445 | SDValue LO = DAG.getUNDEF(NewVT); |
9446 | SDValue HI = DAG.getUNDEF(NewVT); |
9447 | |
9448 | if (Mode) { |
9449 | |
9450 | if (!isUndefLO && !V0->isUndef()) |
9451 | LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); |
9452 | if (!isUndefHI && !V1->isUndef()) |
9453 | HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); |
9454 | } else { |
9455 | |
9456 | if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef())) |
9457 | LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); |
9458 | |
9459 | if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef())) |
9460 | HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); |
9461 | } |
9462 | |
9463 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); |
9464 | } |
9465 | |
9466 | |
9467 | |
9468 | |
9469 | |
9470 | |
9471 | static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, |
9472 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
9473 | SDValue &Opnd0, SDValue &Opnd1, |
9474 | unsigned &NumExtracts, |
9475 | bool &IsSubAdd) { |
9476 | |
9477 | MVT VT = BV->getSimpleValueType(0); |
9478 | if (!Subtarget.hasSSE3() || !VT.isFloatingPoint()) |
9479 | return false; |
9480 | |
9481 | unsigned NumElts = VT.getVectorNumElements(); |
9482 | SDValue InVec0 = DAG.getUNDEF(VT); |
9483 | SDValue InVec1 = DAG.getUNDEF(VT); |
9484 | |
9485 | NumExtracts = 0; |
9486 | |
9487 | |
9488 | |
9489 | |
9490 | |
9491 | unsigned Opc[2] = {0, 0}; |
9492 | for (unsigned i = 0, e = NumElts; i != e; ++i) { |
9493 | SDValue Op = BV->getOperand(i); |
9494 | |
9495 | |
9496 | unsigned Opcode = Op.getOpcode(); |
9497 | if (Opcode == ISD::UNDEF) |
9498 | continue; |
9499 | |
9500 | |
9501 | if (Opcode != ISD::FADD && Opcode != ISD::FSUB) |
9502 | return false; |
9503 | |
9504 | SDValue Op0 = Op.getOperand(0); |
9505 | SDValue Op1 = Op.getOperand(1); |
9506 | |
9507 | |
9508 | |
9509 | |
9510 | if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9511 | Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9512 | !isa<ConstantSDNode>(Op0.getOperand(1)) || |
9513 | Op0.getOperand(1) != Op1.getOperand(1)) |
9514 | return false; |
9515 | |
9516 | unsigned I0 = Op0.getConstantOperandVal(1); |
9517 | if (I0 != i) |
9518 | return false; |
9519 | |
9520 | |
9521 | |
9522 | if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode) |
9523 | return false; |
9524 | Opc[i % 2] = Opcode; |
9525 | |
9526 | |
9527 | if (InVec0.isUndef()) { |
9528 | InVec0 = Op0.getOperand(0); |
9529 | if (InVec0.getSimpleValueType() != VT) |
9530 | return false; |
9531 | } |
9532 | if (InVec1.isUndef()) { |
9533 | InVec1 = Op1.getOperand(0); |
9534 | if (InVec1.getSimpleValueType() != VT) |
9535 | return false; |
9536 | } |
9537 | |
9538 | |
9539 | |
9540 | if (InVec0 != Op0.getOperand(0)) { |
9541 | if (Opcode == ISD::FSUB) |
9542 | return false; |
9543 | |
9544 | |
9545 | |
9546 | std::swap(Op0, Op1); |
9547 | if (InVec0 != Op0.getOperand(0)) |
9548 | return false; |
9549 | } |
9550 | |
9551 | if (InVec1 != Op1.getOperand(0)) |
9552 | return false; |
9553 | |
9554 | |
9555 | ++NumExtracts; |
9556 | } |
9557 | |
9558 | |
9559 | |
9560 | |
9561 | if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || |
9562 | InVec0.isUndef() || InVec1.isUndef()) |
9563 | return false; |
9564 | |
9565 | IsSubAdd = Opc[0] == ISD::FADD; |
9566 | |
9567 | Opnd0 = InVec0; |
9568 | Opnd1 = InVec1; |
9569 | return true; |
9570 | } |
9571 | |
9572 | |
9573 | |
9574 | |
9575 | |
9576 | |
9577 | |
9578 | |
9579 | |
9580 | |
9581 | |
9582 | |
9583 | |
9584 | |
9585 | |
9586 | |
9587 | |
9588 | |
9589 | |
9590 | |
9591 | |
9592 | |
9593 | |
9594 | |
9595 | |
9596 | |
9597 | |
9598 | static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, |
9599 | SelectionDAG &DAG, |
9600 | SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, |
9601 | unsigned ExpectedUses) { |
9602 | if (Opnd0.getOpcode() != ISD::FMUL || |
9603 | !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) |
9604 | return false; |
9605 | |
9606 | |
9607 | |
9608 | |
9609 | |
9610 | const TargetOptions &Options = DAG.getTarget().Options; |
9611 | bool AllowFusion = |
9612 | (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); |
9613 | if (!AllowFusion) |
9614 | return false; |
9615 | |
9616 | Opnd2 = Opnd1; |
9617 | Opnd1 = Opnd0.getOperand(1); |
9618 | Opnd0 = Opnd0.getOperand(0); |
9619 | |
9620 | return true; |
9621 | } |
9622 | |
9623 | |
9624 | |
9625 | |
9626 | static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, |
9627 | const X86Subtarget &Subtarget, |
9628 | SelectionDAG &DAG) { |
9629 | SDValue Opnd0, Opnd1; |
9630 | unsigned NumExtracts; |
9631 | bool IsSubAdd; |
9632 | if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, |
9633 | IsSubAdd)) |
9634 | return SDValue(); |
9635 | |
9636 | MVT VT = BV->getSimpleValueType(0); |
9637 | SDLoc DL(BV); |
9638 | |
9639 | |
9640 | SDValue Opnd2; |
9641 | if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) { |
9642 | unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
9643 | return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); |
9644 | } |
9645 | |
9646 | |
9647 | if (IsSubAdd) |
9648 | return SDValue(); |
9649 | |
9650 | |
9651 | |
9652 | |
9653 | |
9654 | |
9655 | if (VT.is512BitVector()) |
9656 | return SDValue(); |
9657 | |
9658 | return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); |
9659 | } |
9660 | |
9661 | static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, |
9662 | unsigned &HOpcode, SDValue &V0, SDValue &V1) { |
9663 | |
9664 | MVT VT = BV->getSimpleValueType(0); |
9665 | HOpcode = ISD::DELETED_NODE; |
9666 | V0 = DAG.getUNDEF(VT); |
9667 | V1 = DAG.getUNDEF(VT); |
9668 | |
9669 | |
9670 | |
9671 | |
9672 | unsigned NumElts = VT.getVectorNumElements(); |
9673 | unsigned GenericOpcode = ISD::DELETED_NODE; |
9674 | unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1; |
9675 | unsigned NumEltsIn128Bits = NumElts / Num128BitChunks; |
9676 | unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2; |
9677 | for (unsigned i = 0; i != Num128BitChunks; ++i) { |
9678 | for (unsigned j = 0; j != NumEltsIn128Bits; ++j) { |
9679 | |
9680 | SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j); |
9681 | if (Op.isUndef()) |
9682 | continue; |
9683 | |
9684 | |
9685 | if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode) |
9686 | return false; |
9687 | |
9688 | |
9689 | if (HOpcode == ISD::DELETED_NODE) { |
9690 | GenericOpcode = Op.getOpcode(); |
9691 | switch (GenericOpcode) { |
9692 | case ISD::ADD: HOpcode = X86ISD::HADD; break; |
9693 | case ISD::SUB: HOpcode = X86ISD::HSUB; break; |
9694 | case ISD::FADD: HOpcode = X86ISD::FHADD; break; |
9695 | case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; |
9696 | default: return false; |
9697 | } |
9698 | } |
9699 | |
9700 | SDValue Op0 = Op.getOperand(0); |
9701 | SDValue Op1 = Op.getOperand(1); |
9702 | if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9703 | Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
9704 | Op0.getOperand(0) != Op1.getOperand(0) || |
9705 | !isa<ConstantSDNode>(Op0.getOperand(1)) || |
9706 | !isa<ConstantSDNode>(Op1.getOperand(1)) || !Op.hasOneUse()) |
9707 | return false; |
9708 | |
9709 | |
9710 | |
9711 | if (j < NumEltsIn64Bits) { |
9712 | if (V0.isUndef()) |
9713 | V0 = Op0.getOperand(0); |
9714 | } else { |
9715 | if (V1.isUndef()) |
9716 | V1 = Op0.getOperand(0); |
9717 | } |
9718 | |
9719 | SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1; |
9720 | if (SourceVec != Op0.getOperand(0)) |
9721 | return false; |
9722 | |
9723 | |
9724 | unsigned ExtIndex0 = Op0.getConstantOperandVal(1); |
9725 | unsigned ExtIndex1 = Op1.getConstantOperandVal(1); |
9726 | unsigned ExpectedIndex = i * NumEltsIn128Bits + |
9727 | (j % NumEltsIn64Bits) * 2; |
9728 | if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1) |
9729 | continue; |
9730 | |
9731 | |
9732 | if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD) |
9733 | return false; |
9734 | |
9735 | |
9736 | |
9737 | if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1) |
9738 | continue; |
9739 | |
9740 | |
9741 | return false; |
9742 | } |
9743 | } |
9744 | |
9745 | return true; |
9746 | } |
9747 | |
9748 | static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, |
9749 | SelectionDAG &DAG, unsigned HOpcode, |
9750 | SDValue V0, SDValue V1) { |
9751 | |
9752 | |
9753 | |
9754 | MVT VT = BV->getSimpleValueType(0); |
9755 | unsigned Width = VT.getSizeInBits(); |
9756 | if (V0.getValueSizeInBits() > Width) |
9757 | V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width); |
9758 | else if (V0.getValueSizeInBits() < Width) |
9759 | V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width); |
9760 | |
9761 | if (V1.getValueSizeInBits() > Width) |
9762 | V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width); |
9763 | else if (V1.getValueSizeInBits() < Width) |
9764 | V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width); |
9765 | |
9766 | unsigned NumElts = VT.getVectorNumElements(); |
9767 | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
9768 | for (unsigned i = 0; i != NumElts; ++i) |
9769 | if (BV->getOperand(i).isUndef()) |
9770 | DemandedElts.clearBit(i); |
9771 | |
9772 | |
9773 | unsigned HalfNumElts = NumElts / 2; |
9774 | if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { |
9775 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
9776 | V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128); |
9777 | V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128); |
9778 | SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1); |
9779 | return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256); |
9780 | } |
9781 | |
9782 | return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1); |
9783 | } |
9784 | |
9785 | |
9786 | static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, |
9787 | const X86Subtarget &Subtarget, |
9788 | SelectionDAG &DAG) { |
9789 | |
9790 | unsigned NumNonUndefs = |
9791 | count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); }); |
9792 | if (NumNonUndefs < 2) |
9793 | return SDValue(); |
9794 | |
9795 | |
9796 | |
9797 | |
9798 | MVT VT = BV->getSimpleValueType(0); |
9799 | if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) || |
9800 | ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) || |
9801 | ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) || |
9802 | ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) { |
9803 | unsigned HOpcode; |
9804 | SDValue V0, V1; |
9805 | if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) |
9806 | return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); |
9807 | } |
9808 | |
9809 | |
9810 | if (!Subtarget.hasAVX() || !VT.is256BitVector()) |
9811 | return SDValue(); |
9812 | |
9813 | |
9814 | unsigned NumElts = VT.getVectorNumElements(); |
9815 | unsigned Half = NumElts / 2; |
9816 | unsigned NumUndefsLO = 0; |
9817 | unsigned NumUndefsHI = 0; |
9818 | for (unsigned i = 0, e = Half; i != e; ++i) |
9819 | if (BV->getOperand(i)->isUndef()) |
9820 | NumUndefsLO++; |
9821 | |
9822 | for (unsigned i = Half, e = NumElts; i != e; ++i) |
9823 | if (BV->getOperand(i)->isUndef()) |
9824 | NumUndefsHI++; |
9825 | |
9826 | SDLoc DL(BV); |
9827 | SDValue InVec0, InVec1; |
9828 | if (VT == MVT::v8i32 || VT == MVT::v16i16) { |
9829 | SDValue InVec2, InVec3; |
9830 | unsigned X86Opcode; |
9831 | bool CanFold = true; |
9832 | |
9833 | if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && |
9834 | isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2, |
9835 | InVec3) && |
9836 | ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && |
9837 | ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) |
9838 | X86Opcode = X86ISD::HADD; |
9839 | else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0, |
9840 | InVec1) && |
9841 | isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2, |
9842 | InVec3) && |
9843 | ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && |
9844 | ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) |
9845 | X86Opcode = X86ISD::HSUB; |
9846 | else |
9847 | CanFold = false; |
9848 | |
9849 | if (CanFold) { |
9850 | |
9851 | |
9852 | if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) |
9853 | return SDValue(); |
9854 | |
9855 | |
9856 | |
9857 | |
9858 | SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0; |
9859 | SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1; |
9860 | assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); |
9861 | bool isUndefLO = NumUndefsLO == Half; |
9862 | bool isUndefHI = NumUndefsHI == Half; |
9863 | return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO, |
9864 | isUndefHI); |
9865 | } |
9866 | } |
9867 | |
9868 | if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || |
9869 | VT == MVT::v16i16) { |
9870 | unsigned X86Opcode; |
9871 | if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) |
9872 | X86Opcode = X86ISD::HADD; |
9873 | else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0, |
9874 | InVec1)) |
9875 | X86Opcode = X86ISD::HSUB; |
9876 | else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0, |
9877 | InVec1)) |
9878 | X86Opcode = X86ISD::FHADD; |
9879 | else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, |
9880 | InVec1)) |
9881 | X86Opcode = X86ISD::FHSUB; |
9882 | else |
9883 | return SDValue(); |
9884 | |
9885 | |
9886 | |
9887 | if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) |
9888 | return SDValue(); |
9889 | |
9890 | |
9891 | |
9892 | bool isUndefLO = NumUndefsLO == Half; |
9893 | bool isUndefHI = NumUndefsHI == Half; |
9894 | return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, |
9895 | isUndefLO, isUndefHI); |
9896 | } |
9897 | |
9898 | return SDValue(); |
9899 | } |
9900 | |
9901 | static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, |
9902 | SelectionDAG &DAG); |
9903 | |
9904 | |
9905 | |
9906 | |
9907 | |
9908 | |
9909 | |
9910 | static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, |
9911 | const X86Subtarget &Subtarget, |
9912 | SelectionDAG &DAG) { |
9913 | SDLoc DL(Op); |
9914 | MVT VT = Op->getSimpleValueType(0); |
9915 | unsigned NumElems = VT.getVectorNumElements(); |
9916 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
9917 | |
9918 | |
9919 | |
9920 | unsigned Opcode = Op->getOperand(0).getOpcode(); |
9921 | for (unsigned i = 1; i < NumElems; ++i) |
9922 | if (Opcode != Op->getOperand(i).getOpcode()) |
9923 | return SDValue(); |
9924 | |
9925 | |
9926 | bool IsShift = false; |
9927 | switch (Opcode) { |
9928 | default: |
9929 | return SDValue(); |
9930 | case ISD::SHL: |
9931 | case ISD::SRL: |
9932 | case ISD::SRA: |
9933 | IsShift = true; |
9934 | break; |
9935 | case ISD::AND: |
9936 | case ISD::XOR: |
9937 | case ISD::OR: |
9938 | |
9939 | |
9940 | if (Op->getSplatValue()) |
9941 | return SDValue(); |
9942 | if (!TLI.isOperationLegalOrPromote(Opcode, VT)) |
9943 | return SDValue(); |
9944 | break; |
9945 | } |
9946 | |
9947 | SmallVector<SDValue, 4> LHSElts, RHSElts; |
9948 | for (SDValue Elt : Op->ops()) { |
9949 | SDValue LHS = Elt.getOperand(0); |
9950 | SDValue RHS = Elt.getOperand(1); |
9951 | |
9952 | |
9953 | if (!isa<ConstantSDNode>(RHS)) |
9954 | return SDValue(); |
9955 | |
9956 | |
9957 | if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) { |
9958 | if (!IsShift) |
9959 | return SDValue(); |
9960 | RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType()); |
9961 | } |
9962 | |
9963 | LHSElts.push_back(LHS); |
9964 | RHSElts.push_back(RHS); |
9965 | } |
9966 | |
9967 | |
9968 | |
9969 | |
9970 | if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; })) |
9971 | return SDValue(); |
9972 | |
9973 | SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts); |
9974 | SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts); |
9975 | SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); |
9976 | |
9977 | if (!IsShift) |
9978 | return Res; |
9979 | |
9980 | |
9981 | |
9982 | return LowerShift(Res, Subtarget, DAG); |
9983 | } |
9984 | |
9985 | |
9986 | |
9987 | |
9988 | static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, |
9989 | const X86Subtarget &Subtarget) { |
9990 | SDLoc DL(Op); |
9991 | MVT VT = Op.getSimpleValueType(); |
9992 | |
9993 | |
9994 | if (ISD::isBuildVectorAllZeros(Op.getNode())) |
9995 | return Op; |
9996 | |
9997 | |
9998 | |
9999 | |
10000 | if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { |
10001 | if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) |
10002 | return Op; |
10003 | |
10004 | return getOnesVector(VT, DAG, DL); |
10005 | } |
10006 | |
10007 | return SDValue(); |
10008 | } |
10009 | |
10010 | |
10011 | |
10012 | |
10013 | static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, |
10014 | SDLoc &DL, SelectionDAG &DAG, |
10015 | const X86Subtarget &Subtarget) { |
10016 | MVT ShuffleVT = VT; |
10017 | EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); |
10018 | unsigned NumElts = VT.getVectorNumElements(); |
10019 | unsigned SizeInBits = VT.getSizeInBits(); |
10020 | |
10021 | |
10022 | assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts && |
10023 | "Illegal variable permute mask size"); |
10024 | if (IndicesVec.getValueType().getVectorNumElements() > NumElts) { |
| 5 | | Calling 'SDValue::getValueType' | |
|
10025 | |
10026 | if (IndicesVec.getValueSizeInBits() > SizeInBits) |
10027 | IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec), |
10028 | NumElts * VT.getScalarSizeInBits()); |
10029 | else if (IndicesVec.getValueSizeInBits() < SizeInBits) |
10030 | IndicesVec = widenSubVector(IndicesVec, false, Subtarget, DAG, |
10031 | SDLoc(IndicesVec), SizeInBits); |
10032 | |
10033 | if (IndicesVec.getValueType().getVectorNumElements() > NumElts) |
10034 | IndicesVec = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(IndicesVec), |
10035 | IndicesVT, IndicesVec); |
10036 | } |
10037 | IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); |
10038 | |
10039 | |
10040 | if (SrcVec.getValueSizeInBits() != SizeInBits) { |
10041 | if ((SrcVec.getValueSizeInBits() % SizeInBits) == 0) { |
10042 | |
10043 | unsigned Scale = SrcVec.getValueSizeInBits() / SizeInBits; |
10044 | VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts); |
10045 | IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); |
10046 | IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, |
10047 | Subtarget, DAG, SDLoc(IndicesVec)); |
10048 | SDValue NewSrcVec = |
10049 | createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); |
10050 | if (NewSrcVec) |
10051 | return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits); |
10052 | return SDValue(); |
10053 | } else if (SrcVec.getValueSizeInBits() < SizeInBits) { |
10054 | |
10055 | SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); |
10056 | } else |
10057 | return SDValue(); |
10058 | } |
10059 | |
10060 | auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) { |
10061 | assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale"); |
10062 | EVT SrcVT = Idx.getValueType(); |
10063 | unsigned NumDstBits = SrcVT.getScalarSizeInBits() / Scale; |
10064 | uint64_t IndexScale = 0; |
10065 | uint64_t IndexOffset = 0; |
10066 | |
10067 | |
10068 | |
10069 | |
10070 | |
10071 | |
10072 | for (uint64_t i = 0; i != Scale; ++i) { |
10073 | IndexScale |= Scale << (i * NumDstBits); |
10074 | IndexOffset |= i << (i * NumDstBits); |
10075 | } |
10076 | |
10077 | Idx = DAG.getNode(ISD::MUL, SDLoc(Idx), SrcVT, Idx, |
10078 | DAG.getConstant(IndexScale, SDLoc(Idx), SrcVT)); |
10079 | Idx = DAG.getNode(ISD::ADD, SDLoc(Idx), SrcVT, Idx, |
10080 | DAG.getConstant(IndexOffset, SDLoc(Idx), SrcVT)); |
10081 | return Idx; |
10082 | }; |
10083 | |
10084 | unsigned Opcode = 0; |
10085 | switch (VT.SimpleTy) { |
10086 | default: |
10087 | break; |
10088 | case MVT::v16i8: |
10089 | if (Subtarget.hasSSSE3()) |
10090 | Opcode = X86ISD::PSHUFB; |
10091 | break; |
10092 | case MVT::v8i16: |
10093 | if (Subtarget.hasVLX() && Subtarget.hasBWI()) |
10094 | Opcode = X86ISD::VPERMV; |
10095 | else if (Subtarget.hasSSSE3()) { |
10096 | Opcode = X86ISD::PSHUFB; |
10097 | ShuffleVT = MVT::v16i8; |
10098 | } |
10099 | break; |
10100 | case MVT::v4f32: |
10101 | case MVT::v4i32: |
10102 | if (Subtarget.hasAVX()) { |
10103 | Opcode = X86ISD::VPERMILPV; |
10104 | ShuffleVT = MVT::v4f32; |
10105 | } else if (Subtarget.hasSSSE3()) { |
10106 | Opcode = X86ISD::PSHUFB; |
10107 | ShuffleVT = MVT::v16i8; |
10108 | } |
10109 | break; |
10110 | case MVT::v2f64: |
10111 | case MVT::v2i64: |
10112 | if (Subtarget.hasAVX()) { |
10113 | |
10114 | IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); |
10115 | Opcode = X86ISD::VPERMILPV; |
10116 | ShuffleVT = MVT::v2f64; |
10117 | } else if (Subtarget.hasSSE41()) { |
10118 | |
10119 | return DAG.getSelectCC( |
10120 | DL, IndicesVec, |
10121 | getZeroVector(IndicesVT.getSimpleVT(), Subtarget, DAG, DL), |
10122 | DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}), |
10123 | DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}), |
10124 | ISD::CondCode::SETEQ); |
10125 | } |
10126 | break; |
10127 | case MVT::v32i8: |
10128 | if (Subtarget.hasVLX() && Subtarget.hasVBMI()) |
10129 | Opcode = X86ISD::VPERMV; |
10130 | else if (Subtarget.hasXOP()) { |
10131 | SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL); |
10132 | SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL); |
10133 | SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL); |
10134 | SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL); |
10135 | return DAG.getNode( |
10136 | ISD::CONCAT_VECTORS, DL, VT, |
10137 | DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx), |
10138 | DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx)); |
10139 | } else if (Subtarget.hasAVX()) { |
10140 | SDValue Lo = extract128BitVector(SrcVec, 0, DAG, DL); |
10141 | SDValue Hi = extract128BitVector(SrcVec, 16, DAG, DL); |
10142 | SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo); |
10143 | SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi); |
10144 | auto PSHUFBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
10145 | ArrayRef<SDValue> Ops) { |
10146 | |
10147 | |
10148 | |
10149 | SDValue Idx = Ops[2]; |
10150 | EVT VT = Idx.getValueType(); |
10151 | return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT), |
10152 | DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx), |
10153 | DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx), |
10154 | ISD::CondCode::SETGT); |
10155 | }; |
10156 | SDValue Ops[] = {LoLo, HiHi, IndicesVec}; |
10157 | return SplitOpsAndApply(DAG, Subtarget, DL, MVT::v32i8, Ops, |
10158 | PSHUFBBuilder); |
10159 | } |
10160 | break; |
10161 | case MVT::v16i16: |
10162 | if (Subtarget.hasVLX() && Subtarget.hasBWI()) |
10163 | Opcode = X86ISD::VPERMV; |
10164 | else if (Subtarget.hasAVX()) { |
10165 | |
10166 | IndicesVec = ScaleIndices(IndicesVec, 2); |
10167 | return DAG.getBitcast( |
10168 | VT, createVariablePermute( |
10169 | MVT::v32i8, DAG.getBitcast(MVT::v32i8, SrcVec), |
10170 | DAG.getBitcast(MVT::v32i8, IndicesVec), DL, DAG, Subtarget)); |
10171 | } |
10172 | break; |
10173 | case MVT::v8f32: |
10174 | case MVT::v8i32: |
10175 | if (Subtarget.hasAVX2()) |
10176 | Opcode = X86ISD::VPERMV; |
10177 | else if (Subtarget.hasAVX()) { |
10178 | SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec); |
10179 | SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, |
10180 | {0, 1, 2, 3, 0, 1, 2, 3}); |
10181 | SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, |
10182 | {4, 5, 6, 7, 4, 5, 6, 7}); |
10183 | if (Subtarget.hasXOP()) |
10184 | return DAG.getBitcast( |
10185 | VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, |
10186 | IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); |
10187 | |
10188 | |
10189 | SDValue Res = DAG.getSelectCC( |
10190 | DL, IndicesVec, DAG.getConstant(3, DL, MVT::v8i32), |
10191 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, HiHi, IndicesVec), |
10192 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, LoLo, IndicesVec), |
10193 | ISD::CondCode::SETGT); |
10194 | return DAG.getBitcast(VT, Res); |
10195 | } |
10196 | break; |
10197 | case MVT::v4i64: |
10198 | case MVT::v4f64: |
10199 | if (Subtarget.hasAVX512()) { |
10200 | if (!Subtarget.hasVLX()) { |
10201 | MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8); |
10202 | SrcVec = widenSubVector(WidenSrcVT, SrcVec, false, Subtarget, DAG, |
10203 | SDLoc(SrcVec)); |
10204 | IndicesVec = widenSubVector(MVT::v8i64, IndicesVec, false, Subtarget, |
10205 | DAG, SDLoc(IndicesVec)); |
10206 | SDValue Res = createVariablePermute(WidenSrcVT, SrcVec, IndicesVec, DL, |
10207 | DAG, Subtarget); |
10208 | return extract256BitVector(Res, 0, DAG, DL); |
10209 | } |
10210 | Opcode = X86ISD::VPERMV; |
10211 | } else if (Subtarget.hasAVX()) { |
10212 | SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec); |
10213 | SDValue LoLo = |
10214 | DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1}); |
10215 | SDValue HiHi = |
10216 | DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3}); |
10217 | |
10218 | IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); |
10219 | if (Subtarget.hasXOP()) |
10220 | return DAG.getBitcast( |
10221 | VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, |
10222 | IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); |
10223 | |
10224 | |
10225 | SDValue Res = DAG.getSelectCC( |
10226 | DL, IndicesVec, DAG.getConstant(2, DL, MVT::v4i64), |
10227 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, HiHi, IndicesVec), |
10228 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, LoLo, IndicesVec), |
10229 | ISD::CondCode::SETGT); |
10230 | return DAG.getBitcast(VT, Res); |
10231 | } |
10232 | break; |
10233 | case MVT::v64i8: |
10234 | if (Subtarget.hasVBMI()) |
10235 | Opcode = X86ISD::VPERMV; |
10236 | break; |
10237 | case MVT::v32i16: |
10238 | if (Subtarget.hasBWI()) |
10239 | Opcode = X86ISD::VPERMV; |
10240 | break; |
10241 | case MVT::v16f32: |
10242 | case MVT::v16i32: |
10243 | case MVT::v8f64: |
10244 | case MVT::v8i64: |
10245 | if (Subtarget.hasAVX512()) |
10246 | Opcode = X86ISD::VPERMV; |
10247 | break; |
10248 | } |
10249 | if (!Opcode) |
10250 | return SDValue(); |
10251 | |
10252 | assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) && |
10253 | (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 && |
10254 | "Illegal variable permute shuffle type"); |
10255 | |
10256 | uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits(); |
10257 | if (Scale > 1) |
10258 | IndicesVec = ScaleIndices(IndicesVec, Scale); |
10259 | |
10260 | EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger(); |
10261 | IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec); |
10262 | |
10263 | SrcVec = DAG.getBitcast(ShuffleVT, SrcVec); |
10264 | SDValue Res = Opcode == X86ISD::VPERMV |
10265 | ? DAG.getNode(Opcode, DL, ShuffleVT, IndicesVec, SrcVec) |
10266 | : DAG.getNode(Opcode, DL, ShuffleVT, SrcVec, IndicesVec); |
10267 | return DAG.getBitcast(VT, Res); |
10268 | } |
10269 | |
10270 | |
10271 | |
10272 | |
10273 | |
10274 | |
10275 | |
10276 | |
10277 | |
10278 | |
10279 | |
10280 | |
10281 | static SDValue |
10282 | LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, |
10283 | const X86Subtarget &Subtarget) { |
10284 | SDValue SrcVec, IndicesVec; |
10285 | |
10286 | |
10287 | |
10288 | for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) { |
| 1 | Assuming 'Idx' is equal to 'E' | |
|
| 2 | | Loop condition is false. Execution continues on line 10321 | |
|
10289 | SDValue Op = V.getOperand(Idx); |
10290 | if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
10291 | return SDValue(); |
10292 | |
10293 | |
10294 | |
10295 | |
10296 | if (!SrcVec) |
10297 | SrcVec = Op.getOperand(0); |
10298 | else if (SrcVec != Op.getOperand(0)) |
10299 | return SDValue(); |
10300 | SDValue ExtractedIndex = Op->getOperand(1); |
10301 | |
10302 | if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND || |
10303 | ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND) |
10304 | ExtractedIndex = ExtractedIndex.getOperand(0); |
10305 | if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
10306 | return SDValue(); |
10307 | |
10308 | |
10309 | |
10310 | |
10311 | if (!IndicesVec) |
10312 | IndicesVec = ExtractedIndex.getOperand(0); |
10313 | else if (IndicesVec != ExtractedIndex.getOperand(0)) |
10314 | return SDValue(); |
10315 | |
10316 | auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1)); |
10317 | if (!PermIdx || PermIdx->getAPIntValue() != Idx) |
10318 | return SDValue(); |
10319 | } |
10320 | |
10321 | SDLoc DL(V); |
10322 | MVT VT = V.getSimpleValueType(); |
10323 | return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); |
| 3 | | Null pointer value stored to 'IndicesVec.Node' | |
|
| 4 | | Calling 'createVariablePermute' | |
|
10324 | } |
10325 | |
10326 | SDValue |
10327 | X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { |
10328 | SDLoc dl(Op); |
10329 | |
10330 | MVT VT = Op.getSimpleValueType(); |
10331 | MVT EltVT = VT.getVectorElementType(); |
10332 | unsigned NumElems = Op.getNumOperands(); |
10333 | |
10334 | |
10335 | if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) |
10336 | return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); |
10337 | |
10338 | if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) |
10339 | return VectorConstant; |
10340 | |
10341 | unsigned EVTBits = EltVT.getSizeInBits(); |
10342 | APInt UndefMask = APInt::getNullValue(NumElems); |
10343 | APInt ZeroMask = APInt::getNullValue(NumElems); |
10344 | APInt NonZeroMask = APInt::getNullValue(NumElems); |
10345 | bool IsAllConstants = true; |
10346 | SmallSet<SDValue, 8> Values; |
10347 | unsigned NumConstants = NumElems; |
10348 | for (unsigned i = 0; i < NumElems; ++i) { |
10349 | SDValue Elt = Op.getOperand(i); |
10350 | if (Elt.isUndef()) { |
10351 | UndefMask.setBit(i); |
10352 | continue; |
10353 | } |
10354 | Values.insert(Elt); |
10355 | if (!isa<ConstantSDNode>(Elt) && !isa<ConstantFPSDNode>(Elt)) { |
10356 | IsAllConstants = false; |
10357 | NumConstants--; |
10358 | } |
10359 | if (X86::isZeroNode(Elt)) { |
10360 | ZeroMask.setBit(i); |
10361 | } else { |
10362 | NonZeroMask.setBit(i); |
10363 | } |
10364 | } |
10365 | |
10366 | |
10367 | if (NonZeroMask == 0) { |
10368 | assert(UndefMask.isAllOnesValue() && "Fully undef mask expected"); |
10369 | return DAG.getUNDEF(VT); |
10370 | } |
10371 | |
10372 | BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode()); |
10373 | |
10374 | |
10375 | |
10376 | if ((VT.is256BitVector() || VT.is512BitVector()) && |
10377 | !isFoldableUseOfShuffle(BV)) { |
10378 | unsigned UpperElems = NumElems / 2; |
10379 | APInt UndefOrZeroMask = UndefMask | ZeroMask; |
10380 | unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countLeadingOnes(); |
10381 | if (NumUpperUndefsOrZeros >= UpperElems) { |
10382 | if (VT.is512BitVector() && |
10383 | NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4))) |
10384 | UpperElems = NumElems - (NumElems / 4); |
10385 | bool UndefUpper = UndefMask.countLeadingOnes() >= UpperElems; |
10386 | MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems); |
10387 | SDValue NewBV = |
10388 | DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems)); |
10389 | return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl); |
10390 | } |
10391 | } |
10392 | |
10393 | if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) |
10394 | return AddSub; |
10395 | if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) |
10396 | return HorizontalOp; |
10397 | if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG)) |
10398 | return Broadcast; |
10399 | if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG)) |
10400 | return BitOp; |
10401 | |
10402 | unsigned NumZero = ZeroMask.countPopulation(); |
10403 | unsigned NumNonZero = NonZeroMask.countPopulation(); |
10404 | |
10405 | |
10406 | |
10407 | |
10408 | |
10409 | |
10410 | |
10411 | if (NumConstants == NumElems - 1 && NumNonZero != 1 && |
10412 | (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || |
10413 | isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { |
10414 | |
10415 | |
10416 | |
10417 | LLVMContext &Context = *DAG.getContext(); |
10418 | Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context); |
10419 | SmallVector<Constant *, 16> ConstVecOps(NumElems, UndefValue::get(EltType)); |
10420 | SDValue VarElt; |
10421 | SDValue InsIndex; |
10422 | for (unsigned i = 0; i != NumElems; ++i) { |
10423 | SDValue Elt = Op.getOperand(i); |
10424 | if (auto *C = dyn_cast<ConstantSDNode>(Elt)) |
10425 | ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue()); |
10426 | else if (auto *C = dyn_cast<ConstantFPSDNode>(Elt)) |
10427 | ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF()); |
10428 | else if (!Elt.isUndef()) { |
10429 | assert(!VarElt.getNode() && !InsIndex.getNode() && |
10430 | "Expected one variable element in this vector"); |
10431 | VarElt = Elt; |
10432 | InsIndex = DAG.getVectorIdxConstant(i, dl); |
10433 | } |
10434 | } |
10435 | Constant *CV = ConstantVector::get(ConstVecOps); |
10436 | SDValue DAGConstVec = DAG.getConstantPool(CV, VT); |
10437 | |
10438 | |
10439 | |
10440 | |
10441 | |
10442 | |
10443 | |
10444 | SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG); |
10445 | MachineFunction &MF = DAG.getMachineFunction(); |
10446 | MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); |
10447 | SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); |
10448 | unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue(); |
10449 | unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); |
10450 | if (InsertC < NumEltsInLow128Bits) |
10451 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); |
10452 | |
10453 | |
10454 | |
10455 | assert(VT.getSizeInBits() > 128 && "Invalid insertion index?"); |
10456 | assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector"); |
10457 | SmallVector<int, 8> ShuffleMask; |
10458 | unsigned NumElts = VT.getVectorNumElements(); |
10459 | for (unsigned i = 0; i != NumElts; ++i) |
10460 | ShuffleMask.push_back(i == InsertC ? NumElts : i); |
10461 | SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt); |
10462 | return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask); |
10463 | } |
10464 | |
10465 | |
10466 | if (NumNonZero == 1) { |
10467 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
10468 | SDValue Item = Op.getOperand(Idx); |
10469 | |
10470 | |
10471 | |
10472 | |
10473 | |
10474 | if (Idx == 0) { |
10475 | if (NumZero == 0) |
10476 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
10477 | |
10478 | if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || |
10479 | (EltVT == MVT::i64 && Subtarget.is64Bit())) { |
10480 | assert((VT.is128BitVector() || VT.is256BitVector() || |
10481 | VT.is512BitVector()) && |
10482 | "Expected an SSE value type!"); |
10483 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
10484 | |
10485 | return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); |
10486 | } |
10487 | |
10488 | |
10489 | |
10490 | if (EltVT == MVT::i16 || EltVT == MVT::i8) { |
10491 | Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); |
10492 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
10493 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); |
10494 | Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); |
10495 | return DAG.getBitcast(VT, Item); |
10496 | } |
10497 | } |
10498 | |
10499 | |
10500 | if (NumElems == 2 && Idx == 1 && |
10501 | X86::isZeroNode(Op.getOperand(0)) && |
10502 | !X86::isZeroNode(Op.getOperand(1))) { |
10503 | unsigned NumBits = VT.getSizeInBits(); |
10504 | return getVShift(true, VT, |
10505 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, |
10506 | VT, Op.getOperand(1)), |
10507 | NumBits/2, DAG, *this, dl); |
10508 | } |
10509 | |
10510 | if (IsAllConstants) |
10511 | return SDValue(); |
10512 | |
10513 | |
10514 | |
10515 | |
10516 | |
10517 | |
10518 | if (EVTBits == 32) { |
10519 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
10520 | return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG); |
10521 | } |
10522 | } |
10523 | |
10524 | |
10525 | if (Values.size() == 1) { |
10526 | if (EVTBits == 32) { |
10527 | |
10528 | |
10529 | |
10530 | |
10531 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
10532 | SDValue Item = Op.getOperand(Idx); |
10533 | if (Op.getNode()->isOnlyUserOf(Item.getNode())) |
10534 | return LowerAsSplatVectorLoad(Item, VT, dl, DAG); |
10535 | } |
10536 | return SDValue(); |
10537 | } |
10538 | |
10539 | |
10540 | |
10541 | if (IsAllConstants) |
10542 | return SDValue(); |
10543 | |
10544 | if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget)) |
10545 | return V; |
10546 | |
10547 | |
10548 | { |
10549 | SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); |
10550 | if (SDValue LD = |
10551 | EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) |
10552 | return LD; |
10553 | } |
10554 | |
10555 | |
10556 | |
10557 | |
10558 | if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { |
10559 | SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), |
10560 | DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; |
10561 | auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) { |
10562 | |
10563 | for (unsigned i = 2; i != NumElems; ++i) |
10564 | if (Ops[i % 2] != Op.getOperand(i)) |
10565 | return false; |
10566 | return true; |
10567 | }; |
10568 | if (CanSplat(Op, NumElems, Ops)) { |
10569 | MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; |
10570 | MVT NarrowVT = MVT::getVectorVT(EltVT, 4); |
10571 | |
10572 | SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), |
10573 | DAG.getBuildVector(NarrowVT, dl, Ops)); |
10574 | |
10575 | MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2); |
10576 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, |
10577 | NewBV)); |
10578 | } |
10579 | } |
10580 | |
10581 | |
10582 | |
10583 | if (VT.getSizeInBits() > 128) { |
10584 | MVT HVT = MVT::getVectorVT(EltVT, NumElems / 2); |
10585 | |
10586 | |
10587 | SDValue Lower = |
10588 | DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2)); |
10589 | SDValue Upper = DAG.getBuildVector( |
10590 | HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); |
10591 | |
10592 | |
10593 | return concatSubVectors(Lower, Upper, DAG, dl); |
10594 | } |
10595 | |
10596 | |
10597 | if (EVTBits == 64) { |
10598 | if (NumNonZero == 1) { |
10599 | |
10600 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
10601 | SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, |
10602 | Op.getOperand(Idx)); |
10603 | return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); |
10604 | } |
10605 | return SDValue(); |
10606 | } |
10607 | |
10608 | |
10609 | if (EVTBits == 8 && NumElems == 16) |
10610 | if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero, |
10611 | DAG, Subtarget)) |
10612 | return V; |
10613 | |
10614 | if (EVTBits == 16 && NumElems == 8) |
10615 | if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero, |
10616 | DAG, Subtarget)) |
10617 | return V; |
10618 | |
10619 | |
10620 | if (EVTBits == 32 && NumElems == 4) |
10621 | if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget)) |
10622 | return V; |
10623 | |
10624 | |
10625 | if (NumElems == 4 && NumZero > 0) { |
10626 | SmallVector<SDValue, 8> Ops(NumElems); |
10627 | for (unsigned i = 0; i < 4; ++i) { |
10628 | bool isZero = !NonZeroMask[i]; |
10629 | if (isZero) |
10630 | Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); |
10631 | else |
10632 | Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); |
10633 | } |
10634 | |
10635 | for (unsigned i = 0; i < 2; ++i) { |
10636 | switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) { |
10637 | default: llvm_unreachable("Unexpected NonZero count"); |
10638 | case 0: |
10639 | Ops[i] = Ops[i*2]; |
10640 | break; |
10641 | case 1: |
10642 | Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]); |
10643 | break; |
10644 | case 2: |
10645 | Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); |
10646 | break; |
10647 | case 3: |
10648 | Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); |
10649 | break; |
10650 | } |
10651 | } |
10652 | |
10653 | bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2; |
10654 | bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2; |
10655 | int MaskVec[] = { |
10656 | Reverse1 ? 1 : 0, |
10657 | Reverse1 ? 0 : 1, |
10658 | static_cast<int>(Reverse2 ? NumElems+1 : NumElems), |
10659 | static_cast<int>(Reverse2 ? NumElems : NumElems+1) |
10660 | }; |
10661 | return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec); |
10662 | } |
10663 | |
10664 | assert(Values.size() > 1 && "Expected non-undef and non-splat vector"); |
10665 | |
10666 | |
10667 | if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) |
10668 | return Sh; |
10669 | |
10670 | |
10671 | if (Subtarget.hasSSE41()) { |
10672 | SDValue Result; |
10673 | if (!Op.getOperand(0).isUndef()) |
10674 | Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); |
10675 | else |
10676 | Result = DAG.getUNDEF(VT); |
10677 | |
10678 | for (unsigned i = 1; i < NumElems; ++i) { |
10679 | if (Op.getOperand(i).isUndef()) continue; |
10680 | Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, |
10681 | Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); |
10682 | } |
10683 | return Result; |
10684 | } |
10685 | |
10686 | |
10687 | |
10688 | |
10689 | SmallVector<SDValue, 8> Ops(NumElems); |
10690 | for (unsigned i = 0; i < NumElems; ++i) { |
10691 | if (!Op.getOperand(i).isUndef()) |
10692 | Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); |
10693 | else |
10694 | Ops[i] = DAG.getUNDEF(VT); |
10695 | } |
10696 | |
10697 | |
10698 | |
10699 | |
10700 | |
10701 | for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { |
10702 | |
10703 | SmallVector<int, 16> Mask; |
10704 | for(unsigned i = 0; i != Scale; ++i) |
10705 | Mask.push_back(i); |
10706 | for (unsigned i = 0; i != Scale; ++i) |
10707 | Mask.push_back(NumElems+i); |
10708 | Mask.append(NumElems - Mask.size(), SM_SentinelUndef); |
10709 | |
10710 | for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) |
10711 | Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); |
10712 | } |
10713 | return Ops[0]; |
10714 | } |
10715 | |
10716 | |
10717 | |
10718 | |
10719 | static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, |
10720 | const X86Subtarget &Subtarget) { |
10721 | SDLoc dl(Op); |
10722 | MVT ResVT = Op.getSimpleValueType(); |
10723 | |
10724 | assert((ResVT.is256BitVector() || |
10725 | ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); |
10726 | |
10727 | unsigned NumOperands = Op.getNumOperands(); |
10728 | unsigned NumZero = 0; |
10729 | unsigned NumNonZero = 0; |
10730 | unsigned NonZeros = 0; |
10731 | for (unsigned i = 0; i != NumOperands; ++i) { |
10732 | SDValue SubVec = Op.getOperand(i); |
10733 | if (SubVec.isUndef()) |
10734 | continue; |
10735 | if (ISD::isBuildVectorAllZeros(SubVec.getNode())) |
10736 | ++NumZero; |
10737 | else { |
10738 | assert(i < sizeof(NonZeros) * CHAR_BIT); |
10739 | NonZeros |= 1 << i; |
10740 | ++NumNonZero; |
10741 | } |
10742 | } |
10743 | |
10744 | |
10745 | if (NumNonZero > 2) { |
10746 | MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); |
10747 | ArrayRef<SDUse> Ops = Op->ops(); |
10748 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
10749 | Ops.slice(0, NumOperands/2)); |
10750 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
10751 | Ops.slice(NumOperands/2)); |
10752 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); |
10753 | } |
10754 | |
10755 | |
10756 | SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) |
10757 | : DAG.getUNDEF(ResVT); |
10758 | |
10759 | MVT SubVT = Op.getOperand(0).getSimpleValueType(); |
10760 | unsigned NumSubElems = SubVT.getVectorNumElements(); |
10761 | for (unsigned i = 0; i != NumOperands; ++i) { |
10762 | if ((NonZeros & (1 << i)) == 0) |
10763 | continue; |
10764 | |
10765 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, |
10766 | Op.getOperand(i), |
10767 | DAG.getIntPtrConstant(i * NumSubElems, dl)); |
10768 | } |
10769 | |
10770 | return Vec; |
10771 | } |
10772 | |
10773 | |
10774 | |
10775 | |
10776 | |
10777 | static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, |
10778 | const X86Subtarget &Subtarget, |
10779 | SelectionDAG & DAG) { |
10780 | SDLoc dl(Op); |
10781 | MVT ResVT = Op.getSimpleValueType(); |
10782 | unsigned NumOperands = Op.getNumOperands(); |
10783 | |
10784 | assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && |
10785 | "Unexpected number of operands in CONCAT_VECTORS"); |
10786 | |
10787 | uint64_t Zeros = 0; |
10788 | uint64_t NonZeros = 0; |
10789 | for (unsigned i = 0; i != NumOperands; ++i) { |
10790 | SDValue SubVec = Op.getOperand(i); |
10791 | if (SubVec.isUndef()) |
10792 | continue; |
10793 | assert(i < sizeof(NonZeros) * CHAR_BIT); |
10794 | if (ISD::isBuildVectorAllZeros(SubVec.getNode())) |
10795 | Zeros |= (uint64_t)1 << i; |
10796 | else |
10797 | NonZeros |= (uint64_t)1 << i; |
10798 | } |
10799 | |
10800 | unsigned NumElems = ResVT.getVectorNumElements(); |
10801 | |
10802 | |
10803 | |
10804 | |
10805 | if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros && |
10806 | Log2_64(NonZeros) != NumOperands - 1) { |
10807 | MVT ShiftVT = ResVT; |
10808 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
10809 | ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
10810 | unsigned Idx = Log2_64(NonZeros); |
10811 | SDValue SubVec = Op.getOperand(Idx); |
10812 | unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); |
10813 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT, |
10814 | DAG.getUNDEF(ShiftVT), SubVec, |
10815 | DAG.getIntPtrConstant(0, dl)); |
10816 | Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, |
10817 | DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); |
10818 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, |
10819 | DAG.getIntPtrConstant(0, dl)); |
10820 | } |
10821 | |
10822 | |
10823 | if (NonZeros == 0 || isPowerOf2_64(NonZeros)) { |
10824 | SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT); |
10825 | if (!NonZeros) |
10826 | return Vec; |
10827 | unsigned Idx = Log2_64(NonZeros); |
10828 | SDValue SubVec = Op.getOperand(Idx); |
10829 | unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); |
10830 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, |
10831 | DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); |
10832 | } |
10833 | |
10834 | if (NumOperands > 2) { |
10835 | MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); |
10836 | ArrayRef<SDUse> Ops = Op->ops(); |
10837 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
10838 | Ops.slice(0, NumOperands/2)); |
10839 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
10840 | Ops.slice(NumOperands/2)); |
10841 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); |
10842 | } |
10843 | |
10844 | assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?"); |
10845 | |
10846 | if (ResVT.getVectorNumElements() >= 16) |
10847 | return Op; |
10848 | |
10849 | SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, |
10850 | DAG.getUNDEF(ResVT), Op.getOperand(0), |
10851 | DAG.getIntPtrConstant(0, dl)); |
10852 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), |
10853 | DAG.getIntPtrConstant(NumElems/2, dl)); |
10854 | } |
10855 | |
10856 | static SDValue LowerCONCAT_VECTORS(SDValue Op, |
10857 | const X86Subtarget &Subtarget, |
10858 | SelectionDAG &DAG) { |
10859 | MVT VT = Op.getSimpleValueType(); |
10860 | if (VT.getVectorElementType() == MVT::i1) |
10861 | return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); |
10862 | |
10863 | assert((VT.is256BitVector() && Op.getNumOperands() == 2) || |
10864 | (VT.is512BitVector() && (Op.getNumOperands() == 2 || |
10865 | Op.getNumOperands() == 4))); |
10866 | |
10867 | |
10868 | |
10869 | |
10870 | |
10871 | return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget); |
10872 | } |
10873 | |
10874 | |
10875 | |
10876 | |
10877 | |
10878 | |
10879 | |
10880 | |
10881 | |
10882 | |
10883 | |
10884 | |
10885 | |
10886 | |
10887 | |
10888 | |
10889 | |
10890 | |
10891 | |
10892 | static bool isNoopShuffleMask(ArrayRef<int> Mask) { |
10893 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
10894 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
10895 | if (Mask[i] >= 0 && Mask[i] != i) |
10896 | return false; |
10897 | } |
10898 | return true; |
10899 | } |
10900 | |
10901 | |
10902 | |
10903 | |
10904 | |
10905 | |
10906 | static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits, |
10907 | unsigned ScalarSizeInBits, |
10908 | ArrayRef<int> Mask) { |
10909 | assert(LaneSizeInBits && ScalarSizeInBits && |
10910 | (LaneSizeInBits % ScalarSizeInBits) == 0 && |
10911 | "Illegal shuffle lane size"); |
10912 | int LaneSize = LaneSizeInBits / ScalarSizeInBits; |
10913 | int Size = Mask.size(); |
10914 | for (int i = 0; i < Size; ++i) |
10915 | if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) |
10916 | return true; |
10917 | return false; |
10918 | } |
10919 | |
10920 | |
10921 | |
10922 | static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { |
10923 | return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask); |
10924 | } |
10925 | |
10926 | |
10927 | |
10928 | |
10929 | static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits, |
10930 | unsigned ScalarSizeInBits, |
10931 | ArrayRef<int> Mask) { |
10932 | assert(LaneSizeInBits && ScalarSizeInBits && |
10933 | (LaneSizeInBits % ScalarSizeInBits) == 0 && |
10934 | "Illegal shuffle lane size"); |
10935 | int NumElts = Mask.size(); |
10936 | int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits; |
10937 | int NumLanes = NumElts / NumEltsPerLane; |
10938 | if (NumLanes > 1) { |
10939 | for (int i = 0; i != NumLanes; ++i) { |
10940 | int SrcLane = -1; |
10941 | for (int j = 0; j != NumEltsPerLane; ++j) { |
10942 | int M = Mask[(i * NumEltsPerLane) + j]; |
10943 | if (M < 0) |
10944 | continue; |
10945 | int Lane = (M % NumElts) / NumEltsPerLane; |
10946 | if (SrcLane >= 0 && SrcLane != Lane) |
10947 | return true; |
10948 | SrcLane = Lane; |
10949 | } |
10950 | } |
10951 | } |
10952 | return false; |
10953 | } |
10954 | |
10955 | |
10956 | |
10957 | |
10958 | |
10959 | |
10960 | |
10961 | |
10962 | |
10963 | |
10964 | |
10965 | |
10966 | static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, |
10967 | ArrayRef<int> Mask, |
10968 | SmallVectorImpl<int> &RepeatedMask) { |
10969 | auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); |
10970 | RepeatedMask.assign(LaneSize, -1); |
10971 | int Size = Mask.size(); |
10972 | for (int i = 0; i < Size; ++i) { |
10973 | assert(Mask[i] == SM_SentinelUndef || Mask[i] >= 0); |
10974 | if (Mask[i] < 0) |
10975 | continue; |
10976 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
10977 | |
10978 | return false; |
10979 | |
10980 | |
10981 | |
10982 | int LocalM = Mask[i] < Size ? Mask[i] % LaneSize |
10983 | : Mask[i] % LaneSize + LaneSize; |
10984 | if (RepeatedMask[i % LaneSize] < 0) |
10985 | |
10986 | RepeatedMask[i % LaneSize] = LocalM; |
10987 | else if (RepeatedMask[i % LaneSize] != LocalM) |
10988 | |
10989 | return false; |
10990 | } |
10991 | return true; |
10992 | } |
10993 | |
10994 | |
10995 | static bool |
10996 | is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, |
10997 | SmallVectorImpl<int> &RepeatedMask) { |
10998 | return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); |
10999 | } |
11000 | |
11001 | static bool |
11002 | is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) { |
11003 | SmallVector<int, 32> RepeatedMask; |
11004 | return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); |
11005 | } |
11006 | |
11007 | |
11008 | static bool |
11009 | is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, |
11010 | SmallVectorImpl<int> &RepeatedMask) { |
11011 | return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask); |
11012 | } |
11013 | |
11014 | |
11015 | |
11016 | static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, |
11017 | unsigned EltSizeInBits, |
11018 | ArrayRef<int> Mask, |
11019 | SmallVectorImpl<int> &RepeatedMask) { |
11020 | int LaneSize = LaneSizeInBits / EltSizeInBits; |
11021 | RepeatedMask.assign(LaneSize, SM_SentinelUndef); |
11022 | int Size = Mask.size(); |
11023 | for (int i = 0; i < Size; ++i) { |
11024 | assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0)); |
11025 | if (Mask[i] == SM_SentinelUndef) |
11026 | continue; |
11027 | if (Mask[i] == SM_SentinelZero) { |
11028 | if (!isUndefOrZero(RepeatedMask[i % LaneSize])) |
11029 | return false; |
11030 | RepeatedMask[i % LaneSize] = SM_SentinelZero; |
11031 | continue; |
11032 | } |
11033 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
11034 | |
11035 | return false; |
11036 | |
11037 | |
11038 | |
11039 | int LaneM = Mask[i] / Size; |
11040 | int LocalM = (Mask[i] % LaneSize) + (LaneM * LaneSize); |
11041 | if (RepeatedMask[i % LaneSize] == SM_SentinelUndef) |
11042 | |
11043 | RepeatedMask[i % LaneSize] = LocalM; |
11044 | else if (RepeatedMask[i % LaneSize] != LocalM) |
11045 | |
11046 | return false; |
11047 | } |
11048 | return true; |
11049 | } |
11050 | |
11051 | |
11052 | |
11053 | static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT, |
11054 | ArrayRef<int> Mask, |
11055 | SmallVectorImpl<int> &RepeatedMask) { |
11056 | return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(), |
11057 | Mask, RepeatedMask); |
11058 | } |
11059 | |
11060 | |
11061 | |
11062 | static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, |
11063 | int Idx, int ExpectedIdx) { |
11064 | assert(0 <= Idx && Idx < MaskSize && 0 <= ExpectedIdx && |
11065 | ExpectedIdx < MaskSize && "Out of range element index"); |
11066 | if (!Op || !ExpectedOp || Op.getOpcode() != ExpectedOp.getOpcode()) |
11067 | return false; |
11068 | |
11069 | switch (Op.getOpcode()) { |
11070 | case ISD::BUILD_VECTOR: |
11071 | |
11072 | |
11073 | |
11074 | if (MaskSize == (int)Op.getNumOperands() && |
11075 | MaskSize == (int)ExpectedOp.getNumOperands()) |
11076 | return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx); |
11077 | break; |
11078 | case X86ISD::VBROADCAST: |
11079 | case X86ISD::VBROADCAST_LOAD: |
11080 | |
11081 | return (Op == ExpectedOp && |
11082 | (int)Op.getValueType().getVectorNumElements() == MaskSize); |
11083 | case X86ISD::HADD: |
11084 | case X86ISD::HSUB: |
11085 | case X86ISD::FHADD: |
11086 | case X86ISD::FHSUB: |
11087 | case X86ISD::PACKSS: |
11088 | case X86ISD::PACKUS: |
11089 | |
11090 | |
11091 | |
11092 | if (Op == ExpectedOp && Op.getOperand(0) == Op.getOperand(1)) { |
11093 | MVT VT = Op.getSimpleValueType(); |
11094 | int NumElts = VT.getVectorNumElements(); |
11095 | if (MaskSize == NumElts) { |
11096 | int NumLanes = VT.getSizeInBits() / 128; |
11097 | int NumEltsPerLane = NumElts / NumLanes; |
11098 | int NumHalfEltsPerLane = NumEltsPerLane / 2; |
11099 | bool SameLane = |
11100 | (Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane); |
11101 | bool SameElt = |
11102 | (Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane); |
11103 | return SameLane && SameElt; |
11104 | } |
11105 | } |
11106 | break; |
11107 | } |
11108 | |
11109 | return false; |
11110 | } |
11111 | |
11112 | |
11113 | |
11114 | |
11115 | |
11116 | |
11117 | |
11118 | |
11119 | |
11120 | |
11121 | |
11122 | static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, |
11123 | SDValue V1 = SDValue(), |
11124 | SDValue V2 = SDValue()) { |
11125 | int Size = Mask.size(); |
11126 | if (Size != (int)ExpectedMask.size()) |
11127 | return false; |
11128 | |
11129 | for (int i = 0; i < Size; ++i) { |
11130 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
11131 | int MaskIdx = Mask[i]; |
11132 | int ExpectedIdx = ExpectedMask[i]; |
11133 | if (0 <= MaskIdx && MaskIdx != ExpectedIdx) { |
11134 | SDValue MaskV = MaskIdx < Size ? V1 : V2; |
11135 | SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; |
11136 | MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); |
11137 | ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); |
11138 | if (!IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) |
11139 | return false; |
11140 | } |
11141 | } |
11142 | return true; |
11143 | } |
11144 | |
11145 | |
11146 | |
11147 | |
11148 | |
11149 | |
11150 | |
11151 | |
11152 | |
11153 | |
11154 | static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, |
11155 | ArrayRef<int> ExpectedMask, |
11156 | SDValue V1 = SDValue(), |
11157 | SDValue V2 = SDValue()) { |
11158 | int Size = Mask.size(); |
11159 | if (Size != (int)ExpectedMask.size()) |
11160 | return false; |
11161 | assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && |
11162 | "Illegal target shuffle mask"); |
11163 | |
11164 | |
11165 | if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) |
11166 | return false; |
11167 | |
11168 | |
11169 | if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits()) |
11170 | V1 = SDValue(); |
11171 | if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits()) |
11172 | V2 = SDValue(); |
11173 | |
11174 | for (int i = 0; i < Size; ++i) { |
11175 | int MaskIdx = Mask[i]; |
11176 | int ExpectedIdx = ExpectedMask[i]; |
11177 | if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx) |
11178 | continue; |
11179 | if (0 <= MaskIdx && 0 <= ExpectedIdx) { |
11180 | SDValue MaskV = MaskIdx < Size ? V1 : V2; |
11181 | SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; |
11182 | MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); |
11183 | ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); |
11184 | if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) |
11185 | continue; |
11186 | } |
11187 | |
11188 | return false; |
11189 | } |
11190 | return true; |
11191 | } |
11192 | |
11193 | |
11194 | static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask, |
11195 | SDValue Cond) { |
11196 | EVT CondVT = Cond.getValueType(); |
11197 | unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); |
11198 | unsigned NumElts = CondVT.getVectorNumElements(); |
11199 | |
11200 | APInt UndefElts; |
11201 | SmallVector<APInt, 32> EltBits; |
11202 | if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, |
11203 | true, false)) |
11204 | return false; |
11205 | |
11206 | Mask.resize(NumElts, SM_SentinelUndef); |
11207 | |
11208 | for (int i = 0; i != (int)NumElts; ++i) { |
11209 | Mask[i] = i; |
11210 | |
11211 | |
11212 | |
11213 | if (UndefElts[i] || EltBits[i].isNullValue()) |
11214 | Mask[i] += NumElts; |
11215 | } |
11216 | |
11217 | return true; |
11218 | } |
11219 | |
11220 | |
11221 | |
11222 | static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { |
11223 | if (VT != MVT::v8i32 && VT != MVT::v8f32) |
11224 | return false; |
11225 | |
11226 | SmallVector<int, 8> Unpcklwd; |
11227 | createUnpackShuffleMask(MVT::v8i16, Unpcklwd, true, |
11228 | false); |
11229 | SmallVector<int, 8> Unpckhwd; |
11230 | createUnpackShuffleMask(MVT::v8i16, Unpckhwd, false, |
11231 | false); |
11232 | bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) || |
11233 | isTargetShuffleEquivalent(VT, Mask, Unpckhwd)); |
11234 | return IsUnpackwdMask; |
11235 | } |
11236 | |
11237 | static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { |
11238 | |
11239 | MVT EltVT = MVT::getIntegerVT(128 / Mask.size()); |
11240 | MVT VT = MVT::getVectorVT(EltVT, Mask.size()); |
11241 | |
11242 | |
11243 | SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); |
11244 | ShuffleVectorSDNode::commuteMask(CommutedMask); |
11245 | |
11246 | |
11247 | for (unsigned i = 0; i != 4; ++i) { |
11248 | SmallVector<int, 16> UnpackMask; |
11249 | createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); |
11250 | if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) || |
11251 | isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask)) |
11252 | return true; |
11253 | } |
11254 | return false; |
11255 | } |
11256 | |
11257 | |
11258 | |
11259 | |
11260 | |
11261 | static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) { |
11262 | assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask"); |
11263 | unsigned HalfSize = Mask.size() / 2; |
11264 | for (unsigned i = 0; i != HalfSize; ++i) { |
11265 | if (Mask[i] != Mask[i + HalfSize]) |
11266 | return false; |
11267 | } |
11268 | return true; |
11269 | } |
11270 | |
11271 | |
11272 | |
11273 | |
11274 | |
11275 | |
11276 | |
11277 | |
11278 | |
11279 | static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) { |
11280 | assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); |
11281 | assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); |
11282 | assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); |
11283 | assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); |
11284 | assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); |
11285 | |
11286 | |
11287 | |
11288 | int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin(); |
11289 | assert(0 <= FirstIndex && FirstIndex < 4 && "All undef shuffle mask"); |
11290 | |
11291 | int FirstElt = Mask[FirstIndex]; |
11292 | if (all_of(Mask, [FirstElt](int M) { return M < 0 || M == FirstElt; })) |
11293 | return (FirstElt << 6) | (FirstElt << 4) | (FirstElt << 2) | FirstElt; |
11294 | |
11295 | unsigned Imm = 0; |
11296 | Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0; |
11297 | Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2; |
11298 | Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4; |
11299 | Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6; |
11300 | return Imm; |
11301 | } |
11302 | |
11303 | static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL, |
11304 | SelectionDAG &DAG) { |
11305 | return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); |
11306 | } |
11307 | |
11308 | |
11309 | |
11310 | |
11311 | |
11312 | |
11313 | |
11314 | |
11315 | static bool isNonZeroElementsInOrder(const APInt &Zeroable, |
11316 | ArrayRef<int> Mask, const EVT &VectorType, |
11317 | bool &IsZeroSideLeft) { |
11318 | int NextElement = -1; |
11319 | |
11320 | for (int i = 0, e = Mask.size(); i < e; i++) { |
11321 | |
11322 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
11323 | if (Mask[i] < 0) |
11324 | return false; |
11325 | if (Zeroable[i]) |
11326 | continue; |
11327 | |
11328 | if (NextElement < 0) { |
11329 | NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0; |
11330 | IsZeroSideLeft = NextElement != 0; |
11331 | } |
11332 | |
11333 | if (NextElement != Mask[i]) |
11334 | return false; |
11335 | NextElement++; |
11336 | } |
11337 | return true; |
11338 | } |
11339 | |
11340 | |
11341 | static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, |
11342 | ArrayRef<int> Mask, SDValue V1, |
11343 | SDValue V2, const APInt &Zeroable, |
11344 | const X86Subtarget &Subtarget, |
11345 | SelectionDAG &DAG) { |
11346 | int Size = Mask.size(); |
11347 | int LaneSize = 128 / VT.getScalarSizeInBits(); |
11348 | const int NumBytes = VT.getSizeInBits() / 8; |
11349 | const int NumEltBytes = VT.getScalarSizeInBits() / 8; |
11350 | |
11351 | assert((Subtarget.hasSSSE3() && VT.is128BitVector()) || |
11352 | (Subtarget.hasAVX2() && VT.is256BitVector()) || |
11353 | (Subtarget.hasBWI() && VT.is512BitVector())); |
11354 | |
11355 | SmallVector<SDValue, 64> PSHUFBMask(NumBytes); |
11356 | |
11357 | SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8); |
11358 | |
11359 | SDValue V; |
11360 | for (int i = 0; i < NumBytes; ++i) { |
11361 | int M = Mask[i / NumEltBytes]; |
11362 | if (M < 0) { |
11363 | PSHUFBMask[i] = DAG.getUNDEF(MVT::i8); |
11364 | continue; |
11365 | } |
11366 | if (Zeroable[i / NumEltBytes]) { |
11367 | PSHUFBMask[i] = ZeroMask; |
11368 | continue; |
11369 | } |
11370 | |
11371 | |
11372 | SDValue SrcV = (M >= Size ? V2 : V1); |
11373 | if (V && V != SrcV) |
11374 | return SDValue(); |
11375 | V = SrcV; |
11376 | M %= Size; |
11377 | |
11378 | |
11379 | if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize)) |
11380 | return SDValue(); |
11381 | |
11382 | M = M % LaneSize; |
11383 | M = M * NumEltBytes + (i % NumEltBytes); |
11384 | PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8); |
11385 | } |
11386 | assert(V && "Failed to find a source input"); |
11387 | |
11388 | MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes); |
11389 | return DAG.getBitcast( |
11390 | VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V), |
11391 | DAG.getBuildVector(I8VT, DL, PSHUFBMask))); |
11392 | } |
11393 | |
11394 | static SDValue getMaskNode(SDValue Mask, MVT MaskVT, |
11395 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
11396 | const SDLoc &dl); |
11397 | |
11398 | |
11399 | static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, |
11400 | const APInt &Zeroable, |
11401 | ArrayRef<int> Mask, SDValue &V1, |
11402 | SDValue &V2, SelectionDAG &DAG, |
11403 | const X86Subtarget &Subtarget) { |
11404 | bool IsLeftZeroSide = true; |
11405 | if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), |
11406 | IsLeftZeroSide)) |
11407 | return SDValue(); |
11408 | unsigned VEXPANDMask = (~Zeroable).getZExtValue(); |
11409 | MVT IntegerType = |
11410 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
11411 | SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType); |
11412 | unsigned NumElts = VT.getVectorNumElements(); |
11413 | assert((NumElts == 4 || NumElts == 8 || NumElts == 16) && |
11414 | "Unexpected number of vector elements"); |
11415 | SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts), |
11416 | Subtarget, DAG, DL); |
11417 | SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); |
11418 | SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; |
11419 | return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask); |
11420 | } |
11421 | |
11422 | static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, |
11423 | unsigned &UnpackOpcode, bool IsUnary, |
11424 | ArrayRef<int> TargetMask, const SDLoc &DL, |
11425 | SelectionDAG &DAG, |
11426 | const X86Subtarget &Subtarget) { |
11427 | int NumElts = VT.getVectorNumElements(); |
11428 | |
11429 | bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true; |
11430 | for (int i = 0; i != NumElts; i += 2) { |
11431 | int M1 = TargetMask[i + 0]; |
11432 | int M2 = TargetMask[i + 1]; |
11433 | Undef1 &= (SM_SentinelUndef == M1); |
11434 | Undef2 &= (SM_SentinelUndef == M2); |
11435 | Zero1 &= isUndefOrZero(M1); |
11436 | Zero2 &= isUndefOrZero(M2); |
11437 | } |
11438 | assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) && |
11439 | "Zeroable shuffle detected"); |
11440 | |
11441 | |
11442 | SmallVector<int, 64> Unpckl, Unpckh; |
11443 | createUnpackShuffleMask(VT, Unpckl, true, IsUnary); |
11444 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1, |
11445 | (IsUnary ? V1 : V2))) { |
11446 | UnpackOpcode = X86ISD::UNPCKL; |
11447 | V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); |
11448 | V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); |
11449 | return true; |
11450 | } |
11451 | |
11452 | createUnpackShuffleMask(VT, Unpckh, false, IsUnary); |
11453 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1, |
11454 | (IsUnary ? V1 : V2))) { |
11455 | UnpackOpcode = X86ISD::UNPCKH; |
11456 | V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); |
11457 | V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); |
11458 | return true; |
11459 | } |
11460 | |
11461 | |
11462 | if (IsUnary && (Zero1 || Zero2)) { |
11463 | |
11464 | if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) && |
11465 | isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0)) |
11466 | return false; |
11467 | |
11468 | bool MatchLo = true, MatchHi = true; |
11469 | for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) { |
11470 | int M = TargetMask[i]; |
11471 | |
11472 | |
11473 | if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) || |
11474 | (M == SM_SentinelUndef)) |
11475 | continue; |
11476 | |
11477 | MatchLo &= (M == Unpckl[i]); |
11478 | MatchHi &= (M == Unpckh[i]); |
11479 | } |
11480 | |
11481 | if (MatchLo || MatchHi) { |
11482 | UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; |
11483 | V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; |
11484 | V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; |
11485 | return true; |
11486 | } |
11487 | } |
11488 | |
11489 | |
11490 | if (!IsUnary) { |
11491 | ShuffleVectorSDNode::commuteMask(Unpckl); |
11492 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { |
11493 | UnpackOpcode = X86ISD::UNPCKL; |
11494 | std::swap(V1, V2); |
11495 | return true; |
11496 | } |
11497 | |
11498 | ShuffleVectorSDNode::commuteMask(Unpckh); |
11499 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { |
11500 | UnpackOpcode = X86ISD::UNPCKH; |
11501 | std::swap(V1, V2); |
11502 | return true; |
11503 | } |
11504 | } |
11505 | |
11506 | return false; |
11507 | } |
11508 | |
11509 | |
11510 | |
11511 | static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, |
11512 | ArrayRef<int> Mask, SDValue V1, SDValue V2, |
11513 | SelectionDAG &DAG) { |
11514 | SmallVector<int, 8> Unpckl; |
11515 | createUnpackShuffleMask(VT, Unpckl, true, false); |
11516 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
11517 | return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); |
11518 | |
11519 | SmallVector<int, 8> Unpckh; |
11520 | createUnpackShuffleMask(VT, Unpckh, false, false); |
11521 | if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
11522 | return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); |
11523 | |
11524 | |
11525 | ShuffleVectorSDNode::commuteMask(Unpckl); |
11526 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
11527 | return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); |
11528 | |
11529 | ShuffleVectorSDNode::commuteMask(Unpckh); |
11530 | if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
11531 | return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); |
11532 | |
11533 | return SDValue(); |
11534 | } |
11535 | |
11536 | |
11537 | |
11538 | static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT, |
11539 | ArrayRef<int> Mask, SDValue V1, |
11540 | SDValue V2, SelectionDAG &DAG) { |
11541 | SmallVector<int, 32> Unpckl, Unpckh; |
11542 | createSplat2ShuffleMask(VT, Unpckl, true); |
11543 | createSplat2ShuffleMask(VT, Unpckh, false); |
11544 | |
11545 | unsigned UnpackOpcode; |
11546 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
11547 | UnpackOpcode = X86ISD::UNPCKL; |
11548 | else if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
11549 | UnpackOpcode = X86ISD::UNPCKH; |
11550 | else |
11551 | return SDValue(); |
11552 | |
11553 | |
11554 | |
11555 | |
11556 | V1 = DAG.getVectorShuffle(MVT::v4f64, DL, DAG.getBitcast(MVT::v4f64, V1), |
11557 | DAG.getUNDEF(MVT::v4f64), {0, 2, 1, 3}); |
11558 | V1 = DAG.getBitcast(VT, V1); |
11559 | return DAG.getNode(UnpackOpcode, DL, VT, V1, V1); |
11560 | } |
11561 | |
11562 | |
11563 | |
11564 | static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, |
11565 | ArrayRef<int> Mask, const APInt &Zeroable, |
11566 | const X86Subtarget &Subtarget) { |
11567 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) |
11568 | return false; |
11569 | |
11570 | unsigned NumElts = Mask.size(); |
11571 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
11572 | unsigned MaxScale = 64 / EltSizeInBits; |
11573 | |
11574 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
11575 | unsigned SrcEltBits = EltSizeInBits * Scale; |
11576 | if (SrcEltBits < 32 && !Subtarget.hasBWI()) |
11577 | continue; |
11578 | unsigned NumSrcElts = NumElts / Scale; |
11579 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale)) |
11580 | continue; |
11581 | unsigned UpperElts = NumElts - NumSrcElts; |
11582 | if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
11583 | continue; |
11584 | SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale); |
11585 | SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts); |
11586 | DstVT = MVT::getIntegerVT(EltSizeInBits); |
11587 | if ((NumSrcElts * EltSizeInBits) >= 128) { |
11588 | |
11589 | DstVT = MVT::getVectorVT(DstVT, NumSrcElts); |
11590 | } else { |
11591 | |
11592 | DstVT = MVT::getVectorVT(DstVT, 128 / EltSizeInBits); |
11593 | } |
11594 | return true; |
11595 | } |
11596 | |
11597 | return false; |
11598 | } |
11599 | |
11600 | |
11601 | |
11602 | static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, |
11603 | const X86Subtarget &Subtarget, |
11604 | SelectionDAG &DAG, bool ZeroUppers) { |
11605 | MVT SrcVT = Src.getSimpleValueType(); |
11606 | MVT DstSVT = DstVT.getScalarType(); |
11607 | unsigned NumDstElts = DstVT.getVectorNumElements(); |
11608 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
11609 | unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits(); |
11610 | |
11611 | if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) |
11612 | return SDValue(); |
11613 | |
11614 | |
11615 | if (NumSrcElts == NumDstElts) |
11616 | return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src); |
11617 | |
11618 | if (NumSrcElts > NumDstElts) { |
11619 | MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); |
11620 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); |
11621 | return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits()); |
11622 | } |
11623 | |
11624 | if ((NumSrcElts * DstEltSizeInBits) >= 128) { |
11625 | MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); |
11626 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); |
11627 | return widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, |
11628 | DstVT.getSizeInBits()); |
11629 | } |
11630 | |
11631 | |
11632 | |
11633 | if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) { |
11634 | SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512); |
11635 | return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers); |
11636 | } |
11637 | |
11638 | |
11639 | MVT TruncVT = MVT::getVectorVT(DstSVT, 128 / DstEltSizeInBits); |
11640 | SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src); |
11641 | if (DstVT != TruncVT) |
11642 | Trunc = widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, |
11643 | DstVT.getSizeInBits()); |
11644 | return Trunc; |
11645 | } |
11646 | |
11647 | |
11648 | |
11649 | |
11650 | |
11651 | |
11652 | |
11653 | |
11654 | |
11655 | |
11656 | |
11657 | |
11658 | |
11659 | |
11660 | |
11661 | |
11662 | |
11663 | static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, |
11664 | SDValue V2, ArrayRef<int> Mask, |
11665 | const APInt &Zeroable, |
11666 | const X86Subtarget &Subtarget, |
11667 | SelectionDAG &DAG) { |
11668 | assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type"); |
11669 | if (!Subtarget.hasAVX512()) |
11670 | return SDValue(); |
11671 | |
11672 | unsigned NumElts = VT.getVectorNumElements(); |
11673 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
11674 | unsigned MaxScale = 64 / EltSizeInBits; |
11675 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
11676 | unsigned NumSrcElts = NumElts / Scale; |
11677 | unsigned UpperElts = NumElts - NumSrcElts; |
11678 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) || |
11679 | !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
11680 | continue; |
11681 | |
11682 | SDValue Src = V1; |
11683 | if (!Src.hasOneUse()) |
11684 | return SDValue(); |
11685 | |
11686 | Src = peekThroughOneUseBitcasts(Src); |
11687 | if (Src.getOpcode() != ISD::TRUNCATE || |
11688 | Src.getScalarValueSizeInBits() != (EltSizeInBits * Scale)) |
11689 | return SDValue(); |
11690 | Src = Src.getOperand(0); |
11691 | |
11692 | |
11693 | MVT SrcVT = Src.getSimpleValueType(); |
11694 | if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 && |
11695 | !Subtarget.hasBWI()) |
11696 | return SDValue(); |
11697 | |
11698 | bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts); |
11699 | return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); |
11700 | } |
11701 | |
11702 | return SDValue(); |
11703 | } |
11704 | |
11705 | |
11706 | static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, |
11707 | SDValue V2, ArrayRef<int> Mask, |
11708 | const APInt &Zeroable, |
11709 | const X86Subtarget &Subtarget, |
11710 | SelectionDAG &DAG) { |
11711 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
11712 | "Unexpected VTRUNC type"); |
11713 | if (!Subtarget.hasAVX512()) |
11714 | return SDValue(); |
11715 | |
11716 | unsigned NumElts = VT.getVectorNumElements(); |
11717 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
11718 | unsigned MaxScale = 64 / EltSizeInBits; |
11719 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
11720 | |
11721 | unsigned SrcEltBits = EltSizeInBits * Scale; |
11722 | if (SrcEltBits < 32 && !Subtarget.hasBWI()) |
11723 | continue; |
11724 | |
11725 | |
11726 | |
11727 | unsigned NumHalfSrcElts = NumElts / Scale; |
11728 | unsigned NumSrcElts = 2 * NumHalfSrcElts; |
11729 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) || |
11730 | isUndefInRange(Mask, NumHalfSrcElts, NumHalfSrcElts)) |
11731 | continue; |
11732 | |
11733 | |
11734 | unsigned UpperElts = NumElts - NumSrcElts; |
11735 | if (UpperElts > 0 && |
11736 | !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
11737 | continue; |
11738 | bool UndefUppers = |
11739 | UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts); |
11740 | |
11741 | |
11742 | |
11743 | MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2); |
11744 | SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2); |
11745 | |
11746 | MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); |
11747 | MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); |
11748 | Src = DAG.getBitcast(SrcVT, Src); |
11749 | return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); |
11750 | } |
11751 | |
11752 | return SDValue(); |
11753 | } |
11754 | |
11755 | |
11756 | |
11757 | |
11758 | |
11759 | |
11760 | |
11761 | |
11762 | |
11763 | |
11764 | |
11765 | |
11766 | |
11767 | |
11768 | |
11769 | |
11770 | |
11771 | |
11772 | |
11773 | |
11774 | |
11775 | |
11776 | static int canLowerByDroppingEvenElements(ArrayRef<int> Mask, |
11777 | bool IsSingleInput) { |
11778 | |
11779 | |
11780 | int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2); |
11781 | assert(isPowerOf2_32((uint32_t)ShuffleModulus) && |
11782 | "We should only be called with masks with a power-of-2 size!"); |
11783 | |
11784 | uint64_t ModMask = (uint64_t)ShuffleModulus - 1; |
11785 | |
11786 | |
11787 | |
11788 | |
11789 | bool ViableForN[3] = {true, true, true}; |
11790 | |
11791 | for (int i = 0, e = Mask.size(); i < e; ++i) { |
11792 | |
11793 | |
11794 | if (Mask[i] < 0) |
11795 | continue; |
11796 | |
11797 | bool IsAnyViable = false; |
11798 | for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) |
11799 | if (ViableForN[j]) { |
11800 | uint64_t N = j + 1; |
11801 | |
11802 | |
11803 | if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask)) |
11804 | IsAnyViable = true; |
11805 | else |
11806 | ViableForN[j] = false; |
11807 | } |
11808 | |
11809 | if (!IsAnyViable) |
11810 | break; |
11811 | } |
11812 | |
11813 | for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) |
11814 | if (ViableForN[j]) |
11815 | return j + 1; |
11816 | |
11817 | |
11818 | return 0; |
11819 | } |
11820 | |
11821 | |
11822 | |
11823 | |
11824 | |
11825 | static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, |
11826 | unsigned &PackOpcode, ArrayRef<int> TargetMask, |
11827 | const SelectionDAG &DAG, |
11828 | const X86Subtarget &Subtarget, |
11829 | unsigned MaxStages = 1) { |
11830 | unsigned NumElts = VT.getVectorNumElements(); |
11831 | unsigned BitSize = VT.getScalarSizeInBits(); |
11832 | assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 && |
11833 | "Illegal maximum compaction"); |
11834 | |
11835 | auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) { |
11836 | unsigned NumSrcBits = PackVT.getScalarSizeInBits(); |
11837 | unsigned NumPackedBits = NumSrcBits - BitSize; |
11838 | N1 = peekThroughBitcasts(N1); |
11839 | N2 = peekThroughBitcasts(N2); |
11840 | unsigned NumBits1 = N1.getScalarValueSizeInBits(); |
11841 | unsigned NumBits2 = N2.getScalarValueSizeInBits(); |
11842 | bool IsZero1 = llvm::isNullOrNullSplat(N1, false); |
11843 | bool IsZero2 = llvm::isNullOrNullSplat(N2, false); |
11844 | if ((!N1.isUndef() && !IsZero1 && NumBits1 != NumSrcBits) || |
11845 | (!N2.isUndef() && !IsZero2 && NumBits2 != NumSrcBits)) |
11846 | return false; |
11847 | if (Subtarget.hasSSE41() || BitSize == 8) { |
11848 | APInt ZeroMask = APInt::getHighBitsSet(NumSrcBits, NumPackedBits); |
11849 | if ((N1.isUndef() || IsZero1 || DAG.MaskedValueIsZero(N1, ZeroMask)) && |
11850 | (N2.isUndef() || IsZero2 || DAG.MaskedValueIsZero(N2, ZeroMask))) { |
11851 | V1 = N1; |
11852 | V2 = N2; |
11853 | SrcVT = PackVT; |
11854 | PackOpcode = X86ISD::PACKUS; |
11855 | return true; |
11856 | } |
11857 | } |
11858 | bool IsAllOnes1 = llvm::isAllOnesOrAllOnesSplat(N1, false); |
11859 | bool IsAllOnes2 = llvm::isAllOnesOrAllOnesSplat(N2, false); |
11860 | if ((N1.isUndef() || IsZero1 || IsAllOnes1 || |
11861 | DAG.ComputeNumSignBits(N1) > NumPackedBits) && |
11862 | (N2.isUndef() || IsZero2 || IsAllOnes2 || |
11863 | DAG.ComputeNumSignBits(N2) > NumPackedBits)) { |
11864 | V1 = N1; |
11865 | V2 = N2; |
11866 | SrcVT = PackVT; |
11867 | PackOpcode = X86ISD::PACKSS; |
11868 | return true; |
11869 | } |
11870 | return false; |
11871 | }; |
11872 | |
11873 | |
11874 | for (unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) { |
11875 | MVT PackSVT = MVT::getIntegerVT(BitSize << NumStages); |
11876 | MVT PackVT = MVT::getVectorVT(PackSVT, NumElts >> NumStages); |
11877 | |
11878 | |
11879 | SmallVector<int, 32> BinaryMask; |
11880 | createPackShuffleMask(VT, BinaryMask, false, NumStages); |
11881 | if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2)) |
11882 | if (MatchPACK(V1, V2, PackVT)) |
11883 | return true; |
11884 | |
11885 | |
11886 | SmallVector<int, 32> UnaryMask; |
11887 | createPackShuffleMask(VT, UnaryMask, true, NumStages); |
11888 | if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1)) |
11889 | if (MatchPACK(V1, V1, PackVT)) |
11890 | return true; |
11891 | } |
11892 | |
11893 | return false; |
11894 | } |
11895 | |
11896 | static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, |
11897 | SDValue V1, SDValue V2, SelectionDAG &DAG, |
11898 | const X86Subtarget &Subtarget) { |
11899 | MVT PackVT; |
11900 | unsigned PackOpcode; |
11901 | unsigned SizeBits = VT.getSizeInBits(); |
11902 | unsigned EltBits = VT.getScalarSizeInBits(); |
11903 | unsigned MaxStages = Log2_32(64 / EltBits); |
11904 | if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG, |
11905 | Subtarget, MaxStages)) |
11906 | return SDValue(); |
11907 | |
11908 | unsigned CurrentEltBits = PackVT.getScalarSizeInBits(); |
11909 | unsigned NumStages = Log2_32(CurrentEltBits / EltBits); |
11910 | |
11911 | |
11912 | if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX()) |
11913 | return SDValue(); |
11914 | |
11915 | |
11916 | |
11917 | unsigned MaxPackBits = 16; |
11918 | if (CurrentEltBits > 16 && |
11919 | (PackOpcode == X86ISD::PACKSS || Subtarget.hasSSE41())) |
11920 | MaxPackBits = 32; |
11921 | |
11922 | |
11923 | SDValue Res; |
11924 | for (unsigned i = 0; i != NumStages; ++i) { |
11925 | unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits); |
11926 | unsigned NumSrcElts = SizeBits / SrcEltBits; |
11927 | MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); |
11928 | MVT DstSVT = MVT::getIntegerVT(SrcEltBits / 2); |
11929 | MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); |
11930 | MVT DstVT = MVT::getVectorVT(DstSVT, NumSrcElts * 2); |
11931 | Res = DAG.getNode(PackOpcode, DL, DstVT, DAG.getBitcast(SrcVT, V1), |
11932 | DAG.getBitcast(SrcVT, V2)); |
11933 | V1 = V2 = Res; |
11934 | CurrentEltBits /= 2; |
11935 | } |
11936 | assert(Res && Res.getValueType() == VT && |
11937 | "Failed to lower compaction shuffle"); |
11938 | return Res; |
11939 | } |
11940 | |
11941 | |
11942 | |
11943 | |
11944 | |
11945 | static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, |
11946 | SDValue V2, ArrayRef<int> Mask, |
11947 | const APInt &Zeroable, |
11948 | const X86Subtarget &Subtarget, |
11949 | SelectionDAG &DAG) { |
11950 | MVT MaskVT = VT; |
11951 | MVT EltVT = VT.getVectorElementType(); |
11952 | SDValue Zero, AllOnes; |
11953 | |
11954 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
11955 | EltVT = MVT::f64; |
11956 | MaskVT = MVT::getVectorVT(EltVT, Mask.size()); |
11957 | } |
11958 | |
11959 | MVT LogicVT = VT; |
11960 | if (EltVT == MVT::f32 || EltVT == MVT::f64) { |
11961 | Zero = DAG.getConstantFP(0.0, DL, EltVT); |
11962 | APFloat AllOnesValue = APFloat::getAllOnesValue( |
11963 | SelectionDAG::EVTToAPFloatSemantics(EltVT), EltVT.getSizeInBits()); |
11964 | AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT); |
11965 | LogicVT = |
11966 | MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size()); |
11967 | } else { |
11968 | Zero = DAG.getConstant(0, DL, EltVT); |
11969 | AllOnes = DAG.getAllOnesConstant(DL, EltVT); |
11970 | } |
11971 | |
11972 | SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero); |
11973 | SDValue V; |
11974 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
11975 | if (Zeroable[i]) |
11976 | continue; |
11977 | if (Mask[i] % Size != i) |
11978 | return SDValue(); |
11979 | if (!V) |
11980 | V = Mask[i] < Size ? V1 : V2; |
11981 | else if (V != (Mask[i] < Size ? V1 : V2)) |
11982 | return SDValue(); |
11983 | |
11984 | VMaskOps[i] = AllOnes; |
11985 | } |
11986 | if (!V) |
11987 | return SDValue(); |
11988 | |
11989 | SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps); |
11990 | VMask = DAG.getBitcast(LogicVT, VMask); |
11991 | V = DAG.getBitcast(LogicVT, V); |
11992 | SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask); |
11993 | return DAG.getBitcast(VT, And); |
11994 | } |
11995 | |
11996 | |
11997 | |
11998 | |
11999 | |
12000 | |
12001 | static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, |
12002 | SDValue V2, ArrayRef<int> Mask, |
12003 | SelectionDAG &DAG) { |
12004 | assert(VT.isInteger() && "Only supports integer vector types!"); |
12005 | MVT EltVT = VT.getVectorElementType(); |
12006 | SDValue Zero = DAG.getConstant(0, DL, EltVT); |
12007 | SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); |
12008 | SmallVector<SDValue, 16> MaskOps; |
12009 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12010 | if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size) |
12011 | return SDValue(); |
12012 | MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero); |
12013 | } |
12014 | |
12015 | SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps); |
12016 | V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); |
12017 | V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2); |
12018 | return DAG.getNode(ISD::OR, DL, VT, V1, V2); |
12019 | } |
12020 | |
12021 | static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, |
12022 | SDValue PreservedSrc, |
12023 | const X86Subtarget &Subtarget, |
12024 | SelectionDAG &DAG); |
12025 | |
12026 | static bool matchShuffleAsBlend(SDValue V1, SDValue V2, |
12027 | MutableArrayRef<int> Mask, |
12028 | const APInt &Zeroable, bool &ForceV1Zero, |
12029 | bool &ForceV2Zero, uint64_t &BlendMask) { |
12030 | bool V1IsZeroOrUndef = |
12031 | V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); |
12032 | bool V2IsZeroOrUndef = |
12033 | V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode()); |
12034 | |
12035 | BlendMask = 0; |
12036 | ForceV1Zero = false, ForceV2Zero = false; |
12037 | assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask"); |
12038 | |
12039 | |
12040 | |
12041 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12042 | int M = Mask[i]; |
12043 | if (M == SM_SentinelUndef) |
12044 | continue; |
12045 | if (M == i) |
12046 | continue; |
12047 | if (M == i + Size) { |
12048 | BlendMask |= 1ull << i; |
12049 | continue; |
12050 | } |
12051 | if (Zeroable[i]) { |
12052 | if (V1IsZeroOrUndef) { |
12053 | ForceV1Zero = true; |
12054 | Mask[i] = i; |
12055 | continue; |
12056 | } |
12057 | if (V2IsZeroOrUndef) { |
12058 | ForceV2Zero = true; |
12059 | BlendMask |= 1ull << i; |
12060 | Mask[i] = i + Size; |
12061 | continue; |
12062 | } |
12063 | } |
12064 | return false; |
12065 | } |
12066 | return true; |
12067 | } |
12068 | |
12069 | static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, |
12070 | int Scale) { |
12071 | uint64_t ScaledMask = 0; |
12072 | for (int i = 0; i != Size; ++i) |
12073 | if (BlendMask & (1ull << i)) |
12074 | ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); |
12075 | return ScaledMask; |
12076 | } |
12077 | |
12078 | |
12079 | |
12080 | |
12081 | |
12082 | |
12083 | |
12084 | static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, |
12085 | SDValue V2, ArrayRef<int> Original, |
12086 | const APInt &Zeroable, |
12087 | const X86Subtarget &Subtarget, |
12088 | SelectionDAG &DAG) { |
12089 | uint64_t BlendMask = 0; |
12090 | bool ForceV1Zero = false, ForceV2Zero = false; |
12091 | SmallVector<int, 64> Mask(Original.begin(), Original.end()); |
12092 | if (!matchShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, |
12093 | BlendMask)) |
12094 | return SDValue(); |
12095 | |
12096 | |
12097 | if (ForceV1Zero) |
12098 | V1 = getZeroVector(VT, Subtarget, DAG, DL); |
12099 | if (ForceV2Zero) |
12100 | V2 = getZeroVector(VT, Subtarget, DAG, DL); |
12101 | |
12102 | switch (VT.SimpleTy) { |
12103 | case MVT::v4i64: |
12104 | case MVT::v8i32: |
12105 | assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); |
12106 | LLVM_FALLTHROUGH; |
12107 | case MVT::v4f64: |
12108 | case MVT::v8f32: |
12109 | assert(Subtarget.hasAVX() && "256-bit float blends require AVX!"); |
12110 | LLVM_FALLTHROUGH; |
12111 | case MVT::v2f64: |
12112 | case MVT::v2i64: |
12113 | case MVT::v4f32: |
12114 | case MVT::v4i32: |
12115 | case MVT::v8i16: |
12116 | assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); |
12117 | return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, |
12118 | DAG.getTargetConstant(BlendMask, DL, MVT::i8)); |
12119 | case MVT::v16i16: { |
12120 | assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); |
12121 | SmallVector<int, 8> RepeatedMask; |
12122 | if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { |
12123 | |
12124 | assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!"); |
12125 | BlendMask = 0; |
12126 | for (int i = 0; i < 8; ++i) |
12127 | if (RepeatedMask[i] >= 8) |
12128 | BlendMask |= 1ull << i; |
12129 | return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
12130 | DAG.getTargetConstant(BlendMask, DL, MVT::i8)); |
12131 | } |
12132 | |
12133 | |
12134 | |
12135 | uint64_t LoMask = BlendMask & 0xFF; |
12136 | uint64_t HiMask = (BlendMask >> 8) & 0xFF; |
12137 | if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { |
12138 | SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
12139 | DAG.getTargetConstant(LoMask, DL, MVT::i8)); |
12140 | SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
12141 | DAG.getTargetConstant(HiMask, DL, MVT::i8)); |
12142 | return DAG.getVectorShuffle( |
12143 | MVT::v16i16, DL, Lo, Hi, |
12144 | {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); |
12145 | } |
12146 | LLVM_FALLTHROUGH; |
12147 | } |
12148 | case MVT::v32i8: |
12149 | assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!"); |
12150 | LLVM_FALLTHROUGH; |
12151 | case MVT::v16i8: { |
12152 | assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!"); |
12153 | |
12154 | |
12155 | if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
12156 | Subtarget, DAG)) |
12157 | return Masked; |
12158 | |
12159 | if (Subtarget.hasBWI() && Subtarget.hasVLX()) { |
12160 | MVT IntegerType = |
12161 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
12162 | SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); |
12163 | return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); |
12164 | } |
12165 | |
12166 | |
12167 | if (Subtarget.hasVLX()) |
12168 | if (SDValue BitBlend = |
12169 | lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
12170 | return BitBlend; |
12171 | |
12172 | |
12173 | int Scale = VT.getScalarSizeInBits() / 8; |
12174 | |
12175 | |
12176 | |
12177 | MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
12178 | |
12179 | |
12180 | |
12181 | |
12182 | |
12183 | if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) { |
12184 | ShuffleVectorSDNode::commuteMask(Mask); |
12185 | std::swap(V1, V2); |
12186 | } |
12187 | |
12188 | |
12189 | |
12190 | |
12191 | |
12192 | |
12193 | |
12194 | |
12195 | |
12196 | |
12197 | |
12198 | |
12199 | SmallVector<SDValue, 32> VSELECTMask; |
12200 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
12201 | for (int j = 0; j < Scale; ++j) |
12202 | VSELECTMask.push_back( |
12203 | Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) |
12204 | : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, |
12205 | MVT::i8)); |
12206 | |
12207 | V1 = DAG.getBitcast(BlendVT, V1); |
12208 | V2 = DAG.getBitcast(BlendVT, V2); |
12209 | return DAG.getBitcast( |
12210 | VT, |
12211 | DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), |
12212 | V1, V2)); |
12213 | } |
12214 | case MVT::v16f32: |
12215 | case MVT::v8f64: |
12216 | case MVT::v8i64: |
12217 | case MVT::v16i32: |
12218 | case MVT::v32i16: |
12219 | case MVT::v64i8: { |
12220 | |
12221 | bool OptForSize = DAG.shouldOptForSize(); |
12222 | if (!OptForSize) { |
12223 | if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
12224 | Subtarget, DAG)) |
12225 | return Masked; |
12226 | } |
12227 | |
12228 | |
12229 | |
12230 | MVT IntegerType = |
12231 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
12232 | SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); |
12233 | return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); |
12234 | } |
12235 | default: |
12236 | llvm_unreachable("Not a supported integer vector type!"); |
12237 | } |
12238 | } |
12239 | |
12240 | |
12241 | |
12242 | |
12243 | |
12244 | |
12245 | static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, |
12246 | SDValue V1, SDValue V2, |
12247 | ArrayRef<int> Mask, |
12248 | SelectionDAG &DAG, |
12249 | bool ImmBlends = false) { |
12250 | |
12251 | |
12252 | SmallVector<int, 32> BlendMask(Mask.size(), -1); |
12253 | SmallVector<int, 32> PermuteMask(Mask.size(), -1); |
12254 | |
12255 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
12256 | if (Mask[i] < 0) |
12257 | continue; |
12258 | |
12259 | assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds."); |
12260 | |
12261 | if (BlendMask[Mask[i] % Size] < 0) |
12262 | BlendMask[Mask[i] % Size] = Mask[i]; |
12263 | else if (BlendMask[Mask[i] % Size] != Mask[i]) |
12264 | return SDValue(); |
12265 | |
12266 | PermuteMask[i] = Mask[i] % Size; |
12267 | } |
12268 | |
12269 | |
12270 | |
12271 | unsigned EltSize = VT.getScalarSizeInBits(); |
12272 | if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask)) |
12273 | return SDValue(); |
12274 | |
12275 | SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); |
12276 | return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask); |
12277 | } |
12278 | |
12279 | |
12280 | |
12281 | |
12282 | |
12283 | |
12284 | static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, |
12285 | SDValue V1, SDValue V2, |
12286 | ArrayRef<int> Mask, |
12287 | SelectionDAG &DAG) { |
12288 | int NumElts = Mask.size(); |
12289 | int NumLanes = VT.getSizeInBits() / 128; |
12290 | int NumLaneElts = NumElts / NumLanes; |
12291 | int NumHalfLaneElts = NumLaneElts / 2; |
12292 | |
12293 | bool MatchLo = true, MatchHi = true; |
12294 | SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; |
12295 | |
12296 | |
12297 | for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { |
12298 | for (int Elt = 0; Elt != NumLaneElts; ++Elt) { |
12299 | int M = Mask[Lane + Elt]; |
12300 | if (M < 0) |
12301 | continue; |
12302 | |
12303 | SDValue &Op = Ops[Elt & 1]; |
12304 | if (M < NumElts && (Op.isUndef() || Op == V1)) |
12305 | Op = V1; |
12306 | else if (NumElts <= M && (Op.isUndef() || Op == V2)) |
12307 | Op = V2; |
12308 | else |
12309 | return SDValue(); |
12310 | |
12311 | int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts; |
12312 | MatchLo &= isUndefOrInRange(M, Lo, Mid) || |
12313 | isUndefOrInRange(M, NumElts + Lo, NumElts + Mid); |
12314 | MatchHi &= isUndefOrInRange(M, Mid, Hi) || |
12315 | isUndefOrInRange(M, NumElts + Mid, NumElts + Hi); |
12316 | if (!MatchLo && !MatchHi) |
12317 | return SDValue(); |
12318 | } |
12319 | } |
12320 | assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI"); |
12321 | |
12322 | |
12323 | |
12324 | |
12325 | SmallVector<int, 32> PermuteMask(NumElts, -1); |
12326 | for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { |
12327 | for (int Elt = 0; Elt != NumLaneElts; Elt += 2) { |
12328 | int M0 = Mask[Lane + Elt + 0]; |
12329 | int M1 = Mask[Lane + Elt + 1]; |
12330 | if (0 <= M0 && 0 <= M1 && |
12331 | (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts)) |
12332 | return SDValue(); |
12333 | if (0 <= M0) |
12334 | PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts)); |
12335 | if (0 <= M1) |
12336 | PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1; |
12337 | } |
12338 | } |
12339 | |
12340 | unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; |
12341 | SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops); |
12342 | return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask); |
12343 | } |
12344 | |
12345 | |
12346 | |
12347 | static SDValue lowerShuffleAsByteRotateAndPermute( |
12348 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
12349 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
12350 | if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || |
12351 | (VT.is256BitVector() && !Subtarget.hasAVX2()) || |
12352 | (VT.is512BitVector() && !Subtarget.hasBWI())) |
12353 | return SDValue(); |
12354 | |
12355 | |
12356 | if (is128BitLaneCrossingShuffleMask(VT, Mask)) |
12357 | return SDValue(); |
12358 | |
12359 | int Scale = VT.getScalarSizeInBits() / 8; |
12360 | int NumLanes = VT.getSizeInBits() / 128; |
12361 | int NumElts = VT.getVectorNumElements(); |
12362 | int NumEltsPerLane = NumElts / NumLanes; |
12363 | |
12364 | |
12365 | bool Blend1 = true; |
12366 | bool Blend2 = true; |
12367 | std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN); |
12368 | std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN); |
12369 | for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { |
12370 | for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { |
12371 | int M = Mask[Lane + Elt]; |
12372 | if (M < 0) |
12373 | continue; |
12374 | if (M < NumElts) { |
12375 | Blend1 &= (M == (Lane + Elt)); |
12376 | assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); |
12377 | M = M % NumEltsPerLane; |
12378 | Range1.first = std::min(Range1.first, M); |
12379 | Range1.second = std::max(Range1.second, M); |
12380 | } else { |
12381 | M -= NumElts; |
12382 | Blend2 &= (M == (Lane + Elt)); |
12383 | assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); |
12384 | M = M % NumEltsPerLane; |
12385 | Range2.first = std::min(Range2.first, M); |
12386 | Range2.second = std::max(Range2.second, M); |
12387 | } |
12388 | } |
12389 | } |
12390 | |
12391 | |
12392 | |
12393 | |
12394 | if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) || |
12395 | !(0 <= Range2.first && Range2.second < NumEltsPerLane)) |
12396 | return SDValue(); |
12397 | |
12398 | if (VT.getSizeInBits() > 128 && (Blend1 || Blend2)) |
12399 | return SDValue(); |
12400 | |
12401 | |
12402 | auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) { |
12403 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
12404 | SDValue Rotate = DAG.getBitcast( |
12405 | VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), |
12406 | DAG.getBitcast(ByteVT, Lo), |
12407 | DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); |
12408 | SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef); |
12409 | for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { |
12410 | for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { |
12411 | int M = Mask[Lane + Elt]; |
12412 | if (M < 0) |
12413 | continue; |
12414 | if (M < NumElts) |
12415 | PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane); |
12416 | else |
12417 | PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane); |
12418 | } |
12419 | } |
12420 | return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask); |
12421 | }; |
12422 | |
12423 | |
12424 | if (Range2.second < Range1.first) |
12425 | return RotateAndPermute(V1, V2, Range1.first, 0); |
12426 | if (Range1.second < Range2.first) |
12427 | return RotateAndPermute(V2, V1, Range2.first, NumElts); |
12428 | return SDValue(); |
12429 | } |
12430 | |
12431 | |
12432 | |
12433 | |
12434 | |
12435 | |
12436 | |
12437 | |
12438 | static SDValue lowerShuffleAsDecomposedShuffleMerge( |
12439 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
12440 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
12441 | int NumElts = Mask.size(); |
12442 | int NumLanes = VT.getSizeInBits() / 128; |
12443 | int NumEltsPerLane = NumElts / NumLanes; |
12444 | |
12445 | |
12446 | |
12447 | bool IsAlternating = true; |
12448 | SmallVector<int, 32> V1Mask(NumElts, -1); |
12449 | SmallVector<int, 32> V2Mask(NumElts, -1); |
12450 | SmallVector<int, 32> FinalMask(NumElts, -1); |
12451 | for (int i = 0; i < NumElts; ++i) { |
12452 | int M = Mask[i]; |
12453 | if (M >= 0 && M < NumElts) { |
12454 | V1Mask[i] = M; |
12455 | FinalMask[i] = i; |
12456 | IsAlternating &= (i & 1) == 0; |
12457 | } else if (M >= NumElts) { |
12458 | V2Mask[i] = M - NumElts; |
12459 | FinalMask[i] = i + NumElts; |
12460 | IsAlternating &= (i & 1) == 1; |
12461 | } |
12462 | } |
12463 | |
12464 | |
12465 | |
12466 | |
12467 | |
12468 | |
12469 | if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) { |
12470 | |
12471 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, |
12472 | DAG, true)) |
12473 | return BlendPerm; |
12474 | if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, |
12475 | DAG)) |
12476 | return UnpackPerm; |
12477 | if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute( |
12478 | DL, VT, V1, V2, Mask, Subtarget, DAG)) |
12479 | return RotatePerm; |
12480 | |
12481 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, |
12482 | DAG)) |
12483 | return BlendPerm; |
12484 | } |
12485 | |
12486 | |
12487 | |
12488 | |
12489 | |
12490 | if (IsAlternating && VT.getScalarSizeInBits() < 32) { |
12491 | V1Mask.assign(NumElts, -1); |
12492 | V2Mask.assign(NumElts, -1); |
12493 | FinalMask.assign(NumElts, -1); |
12494 | for (int i = 0; i != NumElts; i += NumEltsPerLane) |
12495 | for (int j = 0; j != NumEltsPerLane; ++j) { |
12496 | int M = Mask[i + j]; |
12497 | if (M >= 0 && M < NumElts) { |
12498 | V1Mask[i + (j / 2)] = M; |
12499 | FinalMask[i + j] = i + (j / 2); |
12500 | } else if (M >= NumElts) { |
12501 | V2Mask[i + (j / 2)] = M - NumElts; |
12502 | FinalMask[i + j] = i + (j / 2) + NumElts; |
12503 | } |
12504 | } |
12505 | } |
12506 | |
12507 | V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); |
12508 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); |
12509 | return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask); |
12510 | } |
12511 | |
12512 | |
12513 | |
12514 | |
12515 | |
12516 | static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) { |
12517 | int NumElts = Mask.size(); |
12518 | assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask"); |
12519 | |
12520 | int RotateAmt = -1; |
12521 | for (int i = 0; i != NumElts; i += NumSubElts) { |
12522 | for (int j = 0; j != NumSubElts; ++j) { |
12523 | int M = Mask[i + j]; |
12524 | if (M < 0) |
12525 | continue; |
12526 | if (!isInRange(M, i, i + NumSubElts)) |
12527 | return -1; |
12528 | int Offset = (NumSubElts - (M - (i + j))) % NumSubElts; |
12529 | if (0 <= RotateAmt && Offset != RotateAmt) |
12530 | return -1; |
12531 | RotateAmt = Offset; |
12532 | } |
12533 | } |
12534 | return RotateAmt; |
12535 | } |
12536 | |
12537 | static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits, |
12538 | const X86Subtarget &Subtarget, |
12539 | ArrayRef<int> Mask) { |
12540 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
12541 | assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers"); |
12542 | |
12543 | |
12544 | int MinSubElts = Subtarget.hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2; |
12545 | int MaxSubElts = 64 / EltSizeInBits; |
12546 | for (int NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) { |
12547 | int RotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts); |
12548 | if (RotateAmt < 0) |
12549 | continue; |
12550 | |
12551 | int NumElts = Mask.size(); |
12552 | MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts); |
12553 | RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts); |
12554 | return RotateAmt * EltSizeInBits; |
12555 | } |
12556 | |
12557 | return -1; |
12558 | } |
12559 | |
12560 | |
12561 | static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1, |
12562 | ArrayRef<int> Mask, |
12563 | const X86Subtarget &Subtarget, |
12564 | SelectionDAG &DAG) { |
12565 | |
12566 | |
12567 | bool IsLegal = |
12568 | (VT.is128BitVector() && Subtarget.hasXOP()) || Subtarget.hasAVX512(); |
12569 | if (!IsLegal && Subtarget.hasSSE3()) |
12570 | return SDValue(); |
12571 | |
12572 | MVT RotateVT; |
12573 | int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(), |
12574 | Subtarget, Mask); |
12575 | if (RotateAmt < 0) |
12576 | return SDValue(); |
12577 | |
12578 | |
12579 | |
12580 | |
12581 | if (!IsLegal) { |
12582 | if ((RotateAmt % 16) == 0) |
12583 | return SDValue(); |
12584 | |
12585 | unsigned ShlAmt = RotateAmt; |
12586 | unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt; |
12587 | V1 = DAG.getBitcast(RotateVT, V1); |
12588 | SDValue SHL = DAG.getNode(X86ISD::VSHLI, DL, RotateVT, V1, |
12589 | DAG.getTargetConstant(ShlAmt, DL, MVT::i8)); |
12590 | SDValue SRL = DAG.getNode(X86ISD::VSRLI, DL, RotateVT, V1, |
12591 | DAG.getTargetConstant(SrlAmt, DL, MVT::i8)); |
12592 | SDValue Rot = DAG.getNode(ISD::OR, DL, RotateVT, SHL, SRL); |
12593 | return DAG.getBitcast(VT, Rot); |
12594 | } |
12595 | |
12596 | SDValue Rot = |
12597 | DAG.getNode(X86ISD::VROTLI, DL, RotateVT, DAG.getBitcast(RotateVT, V1), |
12598 | DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); |
12599 | return DAG.getBitcast(VT, Rot); |
12600 | } |
12601 | |
12602 | |
12603 | |
12604 | |
12605 | static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2, |
12606 | ArrayRef<int> Mask) { |
12607 | int NumElts = Mask.size(); |
12608 | |
12609 | |
12610 | |
12611 | |
12612 | |
12613 | |
12614 | |
12615 | |
12616 | int Rotation = 0; |
12617 | SDValue Lo, Hi; |
12618 | for (int i = 0; i < NumElts; ++i) { |
12619 | int M = Mask[i]; |
12620 | assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) && |
12621 | "Unexpected mask index."); |
12622 | if (M < 0) |
12623 | continue; |
12624 | |
12625 | |
12626 | int StartIdx = i - (M % NumElts); |
12627 | if (StartIdx == 0) |
12628 | |
12629 | return -1; |
12630 | |
12631 | |
12632 | |
12633 | |
12634 | int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx; |
12635 | |
12636 | if (Rotation == 0) |
12637 | Rotation = CandidateRotation; |
12638 | else if (Rotation != CandidateRotation) |
12639 | |
12640 | return -1; |
12641 | |
12642 | |
12643 | SDValue MaskV = M < NumElts ? V1 : V2; |
12644 | |
12645 | |
12646 | |
12647 | |
12648 | SDValue &TargetV = StartIdx < 0 ? Hi : Lo; |
12649 | |
12650 | |
12651 | |
12652 | if (!TargetV) |
12653 | TargetV = MaskV; |
12654 | else if (TargetV != MaskV) |
12655 | |
12656 | |
12657 | return -1; |
12658 | } |
12659 | |
12660 | |
12661 | assert(Rotation != 0 && "Failed to locate a viable rotation!"); |
12662 | assert((Lo || Hi) && "Failed to find a rotated input vector!"); |
12663 | if (!Lo) |
12664 | Lo = Hi; |
12665 | else if (!Hi) |
12666 | Hi = Lo; |
12667 | |
12668 | V1 = Lo; |
12669 | V2 = Hi; |
12670 | |
12671 | return Rotation; |
12672 | } |
12673 | |
12674 | |
12675 | |
12676 | |
12677 | |
12678 | |
12679 | |
12680 | |
12681 | |
12682 | |
12683 | |
12684 | |
12685 | |
12686 | |
12687 | |
12688 | |
12689 | |
12690 | static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, |
12691 | ArrayRef<int> Mask) { |
12692 | |
12693 | if (isAnyZero(Mask)) |
12694 | return -1; |
12695 | |
12696 | |
12697 | SmallVector<int, 16> RepeatedMask; |
12698 | if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) |
12699 | return -1; |
12700 | |
12701 | int Rotation = matchShuffleAsElementRotate(V1, V2, RepeatedMask); |
12702 | if (Rotation <= 0) |
12703 | return -1; |
12704 | |
12705 | |
12706 | |
12707 | int NumElts = RepeatedMask.size(); |
12708 | int Scale = 16 / NumElts; |
12709 | return Rotation * Scale; |
12710 | } |
12711 | |
12712 | static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, |
12713 | SDValue V2, ArrayRef<int> Mask, |
12714 | const X86Subtarget &Subtarget, |
12715 | SelectionDAG &DAG) { |
12716 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
12717 | |
12718 | SDValue Lo = V1, Hi = V2; |
12719 | int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask); |
12720 | if (ByteRotation <= 0) |
12721 | return SDValue(); |
12722 | |
12723 | |
12724 | |
12725 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
12726 | Lo = DAG.getBitcast(ByteVT, Lo); |
12727 | Hi = DAG.getBitcast(ByteVT, Hi); |
12728 | |
12729 | |
12730 | if (Subtarget.hasSSSE3()) { |
12731 | assert((!VT.is512BitVector() || Subtarget.hasBWI()) && |
12732 | "512-bit PALIGNR requires BWI instructions"); |
12733 | return DAG.getBitcast( |
12734 | VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, |
12735 | DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); |
12736 | } |
12737 | |
12738 | assert(VT.is128BitVector() && |
12739 | "Rotate-based lowering only supports 128-bit lowering!"); |
12740 | assert(Mask.size() <= 16 && |
12741 | "Can shuffle at most 16 bytes in a 128-bit vector!"); |
12742 | assert(ByteVT == MVT::v16i8 && |
12743 | "SSE2 rotate lowering only needed for v16i8!"); |
12744 | |
12745 | |
12746 | int LoByteShift = 16 - ByteRotation; |
12747 | int HiByteShift = ByteRotation; |
12748 | |
12749 | SDValue LoShift = |
12750 | DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, |
12751 | DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); |
12752 | SDValue HiShift = |
12753 | DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, |
12754 | DAG.getTargetConstant(HiByteShift, DL, MVT::i8)); |
12755 | return DAG.getBitcast(VT, |
12756 | DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); |
12757 | } |
12758 | |
12759 | |
12760 | |
12761 | |
12762 | |
12763 | |
12764 | |
12765 | |
12766 | |
12767 | |
12768 | |
12769 | static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1, |
12770 | SDValue V2, ArrayRef<int> Mask, |
12771 | const X86Subtarget &Subtarget, |
12772 | SelectionDAG &DAG) { |
12773 | assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && |
12774 | "Only 32-bit and 64-bit elements are supported!"); |
12775 | |
12776 | |
12777 | assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) |
12778 | && "VLX required for 128/256-bit vectors"); |
12779 | |
12780 | SDValue Lo = V1, Hi = V2; |
12781 | int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask); |
12782 | if (Rotation <= 0) |
12783 | return SDValue(); |
12784 | |
12785 | return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, |
12786 | DAG.getTargetConstant(Rotation, DL, MVT::i8)); |
12787 | } |
12788 | |
12789 | |
12790 | static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1, |
12791 | SDValue V2, ArrayRef<int> Mask, |
12792 | const APInt &Zeroable, |
12793 | const X86Subtarget &Subtarget, |
12794 | SelectionDAG &DAG) { |
12795 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
12796 | assert(VT.is128BitVector() && "Only 128-bit vectors supported"); |
12797 | |
12798 | |
12799 | |
12800 | unsigned ZeroLo = Zeroable.countTrailingOnes(); |
12801 | unsigned ZeroHi = Zeroable.countLeadingOnes(); |
12802 | if (!ZeroLo && !ZeroHi) |
12803 | return SDValue(); |
12804 | |
12805 | unsigned NumElts = Mask.size(); |
12806 | unsigned Len = NumElts - (ZeroLo + ZeroHi); |
12807 | if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo])) |
12808 | return SDValue(); |
12809 | |
12810 | unsigned Scale = VT.getScalarSizeInBits() / 8; |
12811 | ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len); |
12812 | if (!isUndefOrInRange(StubMask, 0, NumElts) && |
12813 | !isUndefOrInRange(StubMask, NumElts, 2 * NumElts)) |
12814 | return SDValue(); |
12815 | |
12816 | SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2; |
12817 | Res = DAG.getBitcast(MVT::v16i8, Res); |
12818 | |
12819 | |
12820 | |
12821 | |
12822 | |
12823 | |
12824 | if (ZeroLo == 0) { |
12825 | unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); |
12826 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
12827 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
12828 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
12829 | DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); |
12830 | } else if (ZeroHi == 0) { |
12831 | unsigned Shift = Mask[ZeroLo] % NumElts; |
12832 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
12833 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
12834 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
12835 | DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); |
12836 | } else if (!Subtarget.hasSSSE3()) { |
12837 | |
12838 | |
12839 | |
12840 | unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); |
12841 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
12842 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
12843 | Shift += Mask[ZeroLo] % NumElts; |
12844 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
12845 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
12846 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
12847 | DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); |
12848 | } else |
12849 | return SDValue(); |
12850 | |
12851 | return DAG.getBitcast(VT, Res); |
12852 | } |
12853 | |
12854 | |
12855 | |
12856 | |
12857 | |
12858 | |
12859 | |
12860 | |
12861 | |
12862 | |
12863 | |
12864 | |
12865 | |
12866 | |
12867 | |
12868 | |
12869 | |
12870 | |
12871 | |
12872 | |
12873 | |
12874 | |
12875 | |
12876 | |
12877 | static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, |
12878 | unsigned ScalarSizeInBits, ArrayRef<int> Mask, |
12879 | int MaskOffset, const APInt &Zeroable, |
12880 | const X86Subtarget &Subtarget) { |
12881 | int Size = Mask.size(); |
12882 | unsigned SizeInBits = Size * ScalarSizeInBits; |
12883 | |
12884 | auto CheckZeros = [&](int Shift, int Scale, bool Left) { |
12885 | for (int i = 0; i < Size; i += Scale) |
12886 | for (int j = 0; j < Shift; ++j) |
12887 | if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) |
12888 | return false; |
12889 | |
12890 | return true; |
12891 | }; |
12892 | |
12893 | auto MatchShift = [&](int Shift, int Scale, bool Left) { |
12894 | for (int i = 0; i != Size; i += Scale) { |
12895 | unsigned Pos = Left ? i + Shift : i; |
12896 | unsigned Low = Left ? i : i + Shift; |
12897 | unsigned Len = Scale - Shift; |
12898 | if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset)) |
12899 | return -1; |
12900 | } |
12901 | |
12902 | int ShiftEltBits = ScalarSizeInBits * Scale; |
12903 | bool ByteShift = ShiftEltBits > 64; |
12904 | Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI) |
12905 | : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI); |
12906 | int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1); |
12907 | |
12908 | |
12909 | |
12910 | Scale = ByteShift ? Scale / 2 : Scale; |
12911 | |
12912 | |
12913 | MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale); |
12914 | ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8) |
12915 | : MVT::getVectorVT(ShiftSVT, Size / Scale); |
12916 | return (int)ShiftAmt; |
12917 | }; |
12918 | |
12919 | |
12920 | |
12921 | |
12922 | |
12923 | |
12924 | |
12925 | unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128); |
12926 | for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2) |
12927 | for (int Shift = 1; Shift != Scale; ++Shift) |
12928 | for (bool Left : {true, false}) |
12929 | if (CheckZeros(Shift, Scale, Left)) { |
12930 | int ShiftAmt = MatchShift(Shift, Scale, Left); |
12931 | if (0 < ShiftAmt) |
12932 | return ShiftAmt; |
12933 | } |
12934 | |
12935 | |
12936 | return -1; |
12937 | } |
12938 | |
12939 | static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, |
12940 | SDValue V2, ArrayRef<int> Mask, |
12941 | const APInt &Zeroable, |
12942 | const X86Subtarget &Subtarget, |
12943 | SelectionDAG &DAG) { |
12944 | int Size = Mask.size(); |
12945 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
12946 | |
12947 | MVT ShiftVT; |
12948 | SDValue V = V1; |
12949 | unsigned Opcode; |
12950 | |
12951 | |
12952 | int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), |
12953 | Mask, 0, Zeroable, Subtarget); |
12954 | |
12955 | |
12956 | if (ShiftAmt < 0) { |
12957 | ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), |
12958 | Mask, Size, Zeroable, Subtarget); |
12959 | V = V2; |
12960 | } |
12961 | |
12962 | if (ShiftAmt < 0) |
12963 | return SDValue(); |
12964 | |
12965 | assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && |
12966 | "Illegal integer vector type"); |
12967 | V = DAG.getBitcast(ShiftVT, V); |
12968 | V = DAG.getNode(Opcode, DL, ShiftVT, V, |
12969 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
12970 | return DAG.getBitcast(VT, V); |
12971 | } |
12972 | |
12973 | |
12974 | |
12975 | static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, |
12976 | ArrayRef<int> Mask, uint64_t &BitLen, |
12977 | uint64_t &BitIdx, const APInt &Zeroable) { |
12978 | int Size = Mask.size(); |
12979 | int HalfSize = Size / 2; |
12980 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
12981 | assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask"); |
12982 | |
12983 | |
12984 | if (!isUndefUpperHalf(Mask)) |
12985 | return false; |
12986 | |
12987 | |
12988 | |
12989 | int Len = HalfSize; |
12990 | for (; Len > 0; --Len) |
12991 | if (!Zeroable[Len - 1]) |
12992 | break; |
12993 | assert(Len > 0 && "Zeroable shuffle mask"); |
12994 | |
12995 | |
12996 | SDValue Src; |
12997 | int Idx = -1; |
12998 | for (int i = 0; i != Len; ++i) { |
12999 | int M = Mask[i]; |
13000 | if (M == SM_SentinelUndef) |
13001 | continue; |
13002 | SDValue &V = (M < Size ? V1 : V2); |
13003 | M = M % Size; |
13004 | |
13005 | |
13006 | |
13007 | if (i > M || M >= HalfSize) |
13008 | return false; |
13009 | |
13010 | if (Idx < 0 || (Src == V && Idx == (M - i))) { |
13011 | Src = V; |
13012 | Idx = M - i; |
13013 | continue; |
13014 | } |
13015 | return false; |
13016 | } |
13017 | |
13018 | if (!Src || Idx < 0) |
13019 | return false; |
13020 | |
13021 | assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); |
13022 | BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; |
13023 | BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; |
13024 | V1 = Src; |
13025 | return true; |
13026 | } |
13027 | |
13028 | |
13029 | |
13030 | |
13031 | static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, |
13032 | ArrayRef<int> Mask, uint64_t &BitLen, |
13033 | uint64_t &BitIdx) { |
13034 | int Size = Mask.size(); |
13035 | int HalfSize = Size / 2; |
13036 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
13037 | |
13038 | |
13039 | if (!isUndefUpperHalf(Mask)) |
13040 | return false; |
13041 | |
13042 | for (int Idx = 0; Idx != HalfSize; ++Idx) { |
13043 | SDValue Base; |
13044 | |
13045 | |
13046 | if (isUndefInRange(Mask, 0, Idx)) { |
13047 | |
13048 | } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { |
13049 | Base = V1; |
13050 | } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { |
13051 | Base = V2; |
13052 | } else { |
13053 | continue; |
13054 | } |
13055 | |
13056 | |
13057 | |
13058 | for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { |
13059 | SDValue Insert; |
13060 | int Len = Hi - Idx; |
13061 | |
13062 | |
13063 | if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { |
13064 | Insert = V1; |
13065 | } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { |
13066 | Insert = V2; |
13067 | } else { |
13068 | continue; |
13069 | } |
13070 | |
13071 | |
13072 | if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { |
13073 | |
13074 | } else if ((!Base || (Base == V1)) && |
13075 | isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { |
13076 | Base = V1; |
13077 | } else if ((!Base || (Base == V2)) && |
13078 | isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, |
13079 | Size + Hi)) { |
13080 | Base = V2; |
13081 | } else { |
13082 | continue; |
13083 | } |
13084 | |
13085 | BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; |
13086 | BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; |
13087 | V1 = Base; |
13088 | V2 = Insert; |
13089 | return true; |
13090 | } |
13091 | } |
13092 | |
13093 | return false; |
13094 | } |
13095 | |
13096 | |
13097 | static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, |
13098 | SDValue V2, ArrayRef<int> Mask, |
13099 | const APInt &Zeroable, SelectionDAG &DAG) { |
13100 | uint64_t BitLen, BitIdx; |
13101 | if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) |
13102 | return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, |
13103 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
13104 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
13105 | |
13106 | if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) |
13107 | return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), |
13108 | V2 ? V2 : DAG.getUNDEF(VT), |
13109 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
13110 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
13111 | |
13112 | return SDValue(); |
13113 | } |
13114 | |
13115 | |
13116 | |
13117 | |
13118 | |
13119 | |
13120 | |
13121 | |
13122 | |
13123 | |
13124 | static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( |
13125 | const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, |
13126 | ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
13127 | assert(Scale > 1 && "Need a scale to extend."); |
13128 | int EltBits = VT.getScalarSizeInBits(); |
13129 | int NumElements = VT.getVectorNumElements(); |
13130 | int NumEltsPerLane = 128 / EltBits; |
13131 | int OffsetLane = Offset / NumEltsPerLane; |
13132 | assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && |
13133 | "Only 8, 16, and 32 bit elements can be extended."); |
13134 | assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits."); |
13135 | assert(0 <= Offset && "Extension offset must be positive."); |
13136 | assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) && |
13137 | "Extension offset must be in the first lane or start an upper lane."); |
13138 | |
13139 | |
13140 | auto SafeOffset = [&](int Idx) { |
13141 | return OffsetLane == (Idx / NumEltsPerLane); |
13142 | }; |
13143 | |
13144 | |
13145 | auto ShuffleOffset = [&](SDValue V) { |
13146 | if (!Offset) |
13147 | return V; |
13148 | |
13149 | SmallVector<int, 8> ShMask((unsigned)NumElements, -1); |
13150 | for (int i = 0; i * Scale < NumElements; ++i) { |
13151 | int SrcIdx = i + Offset; |
13152 | ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; |
13153 | } |
13154 | return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); |
13155 | }; |
13156 | |
13157 | |
13158 | |
13159 | if (Subtarget.hasSSE41()) { |
13160 | |
13161 | |
13162 | if (Offset && Scale == 2 && VT.is128BitVector()) |
13163 | return SDValue(); |
13164 | MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), |
13165 | NumElements / Scale); |
13166 | InputV = ShuffleOffset(InputV); |
13167 | InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, |
13168 | DL, ExtVT, InputV, DAG); |
13169 | return DAG.getBitcast(VT, InputV); |
13170 | } |
13171 | |
13172 | assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); |
13173 | |
13174 | |
13175 | |
13176 | if (AnyExt && EltBits == 32) { |
13177 | int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, |
13178 | -1}; |
13179 | return DAG.getBitcast( |
13180 | VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, |
13181 | DAG.getBitcast(MVT::v4i32, InputV), |
13182 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
13183 | } |
13184 | if (AnyExt && EltBits == 16 && Scale > 2) { |
13185 | int PSHUFDMask[4] = {Offset / 2, -1, |
13186 | SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; |
13187 | InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, |
13188 | DAG.getBitcast(MVT::v4i32, InputV), |
13189 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); |
13190 | int PSHUFWMask[4] = {1, -1, -1, -1}; |
13191 | unsigned OddEvenOp = (Offset & 1) ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; |
13192 | return DAG.getBitcast( |
13193 | VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, |
13194 | DAG.getBitcast(MVT::v8i16, InputV), |
13195 | getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG))); |
13196 | } |
13197 | |
13198 | |
13199 | |
13200 | if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) { |
13201 | assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); |
13202 | assert(VT.is128BitVector() && "Unexpected vector width!"); |
13203 | |
13204 | int LoIdx = Offset * EltBits; |
13205 | SDValue Lo = DAG.getBitcast( |
13206 | MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, |
13207 | DAG.getTargetConstant(EltBits, DL, MVT::i8), |
13208 | DAG.getTargetConstant(LoIdx, DL, MVT::i8))); |
13209 | |
13210 | if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) |
13211 | return DAG.getBitcast(VT, Lo); |
13212 | |
13213 | int HiIdx = (Offset + 1) * EltBits; |
13214 | SDValue Hi = DAG.getBitcast( |
13215 | MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, |
13216 | DAG.getTargetConstant(EltBits, DL, MVT::i8), |
13217 | DAG.getTargetConstant(HiIdx, DL, MVT::i8))); |
13218 | return DAG.getBitcast(VT, |
13219 | DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); |
13220 | } |
13221 | |
13222 | |
13223 | |
13224 | |
13225 | if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) { |
13226 | assert(NumElements == 16 && "Unexpected byte vector width!"); |
13227 | SDValue PSHUFBMask[16]; |
13228 | for (int i = 0; i < 16; ++i) { |
13229 | int Idx = Offset + (i / Scale); |
13230 | if ((i % Scale == 0 && SafeOffset(Idx))) { |
13231 | PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8); |
13232 | continue; |
13233 | } |
13234 | PSHUFBMask[i] = |
13235 | AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8); |
13236 | } |
13237 | InputV = DAG.getBitcast(MVT::v16i8, InputV); |
13238 | return DAG.getBitcast( |
13239 | VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, |
13240 | DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask))); |
13241 | } |
13242 | |
13243 | |
13244 | |
13245 | int AlignToUnpack = Offset % (NumElements / Scale); |
13246 | if (AlignToUnpack) { |
13247 | SmallVector<int, 8> ShMask((unsigned)NumElements, -1); |
13248 | for (int i = AlignToUnpack; i < NumElements; ++i) |
13249 | ShMask[i - AlignToUnpack] = i; |
13250 | InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); |
13251 | Offset -= AlignToUnpack; |
13252 | } |
13253 | |
13254 | |
13255 | do { |
13256 | unsigned UnpackLoHi = X86ISD::UNPCKL; |
13257 | if (Offset >= (NumElements / 2)) { |
13258 | UnpackLoHi = X86ISD::UNPCKH; |
13259 | Offset -= (NumElements / 2); |
13260 | } |
13261 | |
13262 | MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); |
13263 | SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) |
13264 | : getZeroVector(InputVT, Subtarget, DAG, DL); |
13265 | InputV = DAG.getBitcast(InputVT, InputV); |
13266 | InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext); |
13267 | Scale /= 2; |
13268 | EltBits *= 2; |
13269 | NumElements /= 2; |
13270 | } while (Scale > 1); |
13271 | return DAG.getBitcast(VT, InputV); |
13272 | } |
13273 | |
13274 | |
13275 | |
13276 | |
13277 | |
13278 | |
13279 | |
13280 | |
13281 | |
13282 | |
13283 | |
13284 | |
13285 | |
13286 | static SDValue lowerShuffleAsZeroOrAnyExtend( |
13287 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
13288 | const APInt &Zeroable, const X86Subtarget &Subtarget, |
13289 | SelectionDAG &DAG) { |
13290 | int Bits = VT.getSizeInBits(); |
13291 | int NumLanes = Bits / 128; |
13292 | int NumElements = VT.getVectorNumElements(); |
13293 | int NumEltsPerLane = NumElements / NumLanes; |
13294 | assert(VT.getScalarSizeInBits() <= 32 && |
13295 | "Exceeds 32-bit integer zero extension limit"); |
13296 | assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size"); |
13297 | |
13298 | |
13299 | |
13300 | auto Lower = [&](int Scale) -> SDValue { |
13301 | SDValue InputV; |
13302 | bool AnyExt = true; |
13303 | int Offset = 0; |
13304 | int Matches = 0; |
13305 | for (int i = 0; i < NumElements; ++i) { |
13306 | int M = Mask[i]; |
13307 | if (M < 0) |
13308 | continue; |
13309 | if (i % Scale != 0) { |
13310 | |
13311 | if (!Zeroable[i]) |
13312 | return SDValue(); |
13313 | |
13314 | |
13315 | AnyExt = false; |
13316 | continue; |
13317 | } |
13318 | |
13319 | |
13320 | |
13321 | SDValue V = M < NumElements ? V1 : V2; |
13322 | M = M % NumElements; |
13323 | if (!InputV) { |
13324 | InputV = V; |
13325 | Offset = M - (i / Scale); |
13326 | } else if (InputV != V) |
13327 | return SDValue(); |
13328 | |
13329 | |
13330 | |
13331 | |
13332 | if (!((0 <= Offset && Offset < NumEltsPerLane) || |
13333 | (Offset % NumEltsPerLane) == 0)) |
13334 | return SDValue(); |
13335 | |
13336 | |
13337 | |
13338 | if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane)) |
13339 | return SDValue(); |
13340 | |
13341 | if ((M % NumElements) != (Offset + (i / Scale))) |
13342 | return SDValue(); |
13343 | Matches++; |
13344 | } |
13345 | |
13346 | |
13347 | |
13348 | |
13349 | if (!InputV) |
13350 | return SDValue(); |
13351 | |
13352 | |
13353 | |
13354 | if (Offset != 0 && Matches < 2) |
13355 | return SDValue(); |
13356 | |
13357 | return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt, |
13358 | InputV, Mask, Subtarget, DAG); |
13359 | }; |
13360 | |
13361 | |
13362 | assert(Bits % 64 == 0 && |
13363 | "The number of bits in a vector must be divisible by 64 on x86!"); |
13364 | int NumExtElements = Bits / 64; |
13365 | |
13366 | |
13367 | |
13368 | for (; NumExtElements < NumElements; NumExtElements *= 2) { |
13369 | assert(NumElements % NumExtElements == 0 && |
13370 | "The input vector size must be divisible by the extended size."); |
13371 | if (SDValue V = Lower(NumElements / NumExtElements)) |
13372 | return V; |
13373 | } |
13374 | |
13375 | |
13376 | if (Bits != 128) |
13377 | return SDValue(); |
13378 | |
13379 | |
13380 | |
13381 | auto CanZExtLowHalf = [&]() { |
13382 | for (int i = NumElements / 2; i != NumElements; ++i) |
13383 | if (!Zeroable[i]) |
13384 | return SDValue(); |
13385 | if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0)) |
13386 | return V1; |
13387 | if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements)) |
13388 | return V2; |
13389 | return SDValue(); |
13390 | }; |
13391 | |
13392 | if (SDValue V = CanZExtLowHalf()) { |
13393 | V = DAG.getBitcast(MVT::v2i64, V); |
13394 | V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); |
13395 | return DAG.getBitcast(VT, V); |
13396 | } |
13397 | |
13398 | |
13399 | return SDValue(); |
13400 | } |
13401 | |
13402 | |
13403 | |
13404 | |
13405 | static SDValue getScalarValueForVectorElement(SDValue V, int Idx, |
13406 | SelectionDAG &DAG) { |
13407 | MVT VT = V.getSimpleValueType(); |
13408 | MVT EltVT = VT.getVectorElementType(); |
13409 | V = peekThroughBitcasts(V); |
13410 | |
13411 | |
13412 | |
13413 | MVT NewVT = V.getSimpleValueType(); |
13414 | if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) |
13415 | return SDValue(); |
13416 | |
13417 | if (V.getOpcode() == ISD::BUILD_VECTOR || |
13418 | (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) { |
13419 | |
13420 | |
13421 | SDValue S = V.getOperand(Idx); |
13422 | if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits()) |
13423 | return DAG.getBitcast(EltVT, S); |
13424 | } |
13425 | |
13426 | return SDValue(); |
13427 | } |
13428 | |
13429 | |
13430 | |
13431 | |
13432 | |
13433 | static bool isShuffleFoldableLoad(SDValue V) { |
13434 | V = peekThroughBitcasts(V); |
13435 | return ISD::isNON_EXTLoad(V.getNode()); |
13436 | } |
13437 | |
13438 | |
13439 | |
13440 | |
13441 | |
13442 | static SDValue lowerShuffleAsElementInsertion( |
13443 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
13444 | const APInt &Zeroable, const X86Subtarget &Subtarget, |
13445 | SelectionDAG &DAG) { |
13446 | MVT ExtVT = VT; |
13447 | MVT EltVT = VT.getVectorElementType(); |
13448 | |
13449 | int V2Index = |
13450 | find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - |
13451 | Mask.begin(); |
13452 | bool IsV1Zeroable = true; |
13453 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
13454 | if (i != V2Index && !Zeroable[i]) { |
13455 | IsV1Zeroable = false; |
13456 | break; |
13457 | } |
13458 | |
13459 | |
13460 | |
13461 | |
13462 | |
13463 | |
13464 | SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), |
13465 | DAG); |
13466 | if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { |
13467 | |
13468 | V2S = DAG.getBitcast(EltVT, V2S); |
13469 | if (EltVT == MVT::i8 || EltVT == MVT::i16) { |
13470 | |
13471 | |
13472 | if (!IsV1Zeroable) |
13473 | return SDValue(); |
13474 | |
13475 | |
13476 | ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32); |
13477 | V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); |
13478 | } |
13479 | V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); |
13480 | } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || |
13481 | EltVT == MVT::i16) { |
13482 | |
13483 | |
13484 | return SDValue(); |
13485 | } |
13486 | |
13487 | if (!IsV1Zeroable) { |
13488 | |
13489 | |
13490 | |
13491 | assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); |
13492 | if (!VT.isFloatingPoint() || V2Index != 0) |
13493 | return SDValue(); |
13494 | SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end()); |
13495 | V1Mask[V2Index] = -1; |
13496 | if (!isNoopShuffleMask(V1Mask)) |
13497 | return SDValue(); |
13498 | if (!VT.is128BitVector()) |
13499 | return SDValue(); |
13500 | |
13501 | |
13502 | assert((EltVT == MVT::f32 || EltVT == MVT::f64) && |
13503 | "Only two types of floating point element types to handle!"); |
13504 | return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL, |
13505 | ExtVT, V1, V2); |
13506 | } |
13507 | |
13508 | |
13509 | if (VT.isFloatingPoint() && V2Index != 0) |
13510 | return SDValue(); |
13511 | |
13512 | V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); |
13513 | if (ExtVT != VT) |
13514 | V2 = DAG.getBitcast(VT, V2); |
13515 | |
13516 | if (V2Index != 0) { |
13517 | |
13518 | |
13519 | |
13520 | |
13521 | if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) { |
13522 | SmallVector<int, 4> V2Shuffle(Mask.size(), 1); |
13523 | V2Shuffle[V2Index] = 0; |
13524 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); |
13525 | } else { |
13526 | V2 = DAG.getBitcast(MVT::v16i8, V2); |
13527 | V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, |
13528 | DAG.getTargetConstant( |
13529 | V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); |
13530 | V2 = DAG.getBitcast(VT, V2); |
13531 | } |
13532 | } |
13533 | return V2; |
13534 | } |
13535 | |
13536 | |
13537 | |
13538 | |
13539 | |
13540 | static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, |
13541 | int BroadcastIdx, |
13542 | const X86Subtarget &Subtarget, |
13543 | SelectionDAG &DAG) { |
13544 | assert(Subtarget.hasAVX2() && |
13545 | "We can only lower integer broadcasts with AVX2!"); |
13546 | |
13547 | MVT EltVT = VT.getVectorElementType(); |
13548 | MVT V0VT = V0.getSimpleValueType(); |
13549 | |
13550 | assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); |
13551 | assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); |
13552 | |
13553 | MVT V0EltVT = V0VT.getVectorElementType(); |
13554 | if (!V0EltVT.isInteger()) |
13555 | return SDValue(); |
13556 | |
13557 | const unsigned EltSize = EltVT.getSizeInBits(); |
13558 | const unsigned V0EltSize = V0EltVT.getSizeInBits(); |
13559 | |
13560 | |
13561 | if (V0EltSize <= EltSize) |
13562 | return SDValue(); |
13563 | |
13564 | assert(((V0EltSize % EltSize) == 0) && |
13565 | "Scalar type sizes must all be powers of 2 on x86!"); |
13566 | |
13567 | const unsigned V0Opc = V0.getOpcode(); |
13568 | const unsigned Scale = V0EltSize / EltSize; |
13569 | const unsigned V0BroadcastIdx = BroadcastIdx / Scale; |
13570 | |
13571 | if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && |
13572 | V0Opc != ISD::BUILD_VECTOR) |
13573 | return SDValue(); |
13574 | |
13575 | SDValue Scalar = V0.getOperand(V0BroadcastIdx); |
13576 | |
13577 | |
13578 | |
13579 | |
13580 | |
13581 | if (const int OffsetIdx = BroadcastIdx % Scale) |
13582 | Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, |
13583 | DAG.getConstant(OffsetIdx * EltSize, DL, MVT::i8)); |
13584 | |
13585 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
13586 | DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); |
13587 | } |
13588 | |
13589 | |
13590 | |
13591 | |
13592 | |
13593 | static bool isSingleSHUFPSMask(ArrayRef<int> Mask) { |
13594 | |
13595 | assert(Mask.size() == 4 && "Unsupported mask size!"); |
13596 | assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); |
13597 | assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); |
13598 | assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); |
13599 | assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); |
13600 | |
13601 | |
13602 | |
13603 | if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4)) |
13604 | return false; |
13605 | if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4)) |
13606 | return false; |
13607 | |
13608 | return true; |
13609 | } |
13610 | |
13611 | |
13612 | |
13613 | |
13614 | static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, |
13615 | SDValue N1, ArrayRef<int> Mask, |
13616 | SelectionDAG &DAG) { |
13617 | MVT VT = N0.getSimpleValueType(); |
13618 | assert((VT.is128BitVector() && |
13619 | (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) && |
13620 | "VPERM* family of shuffles requires 32-bit or 64-bit elements"); |
13621 | |
13622 | |
13623 | if (!N0.hasOneUse() || !N1.hasOneUse() || |
13624 | N0.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
13625 | N1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
13626 | N0.getOperand(0) != N1.getOperand(0)) |
13627 | return SDValue(); |
13628 | |
13629 | SDValue WideVec = N0.getOperand(0); |
13630 | MVT WideVT = WideVec.getSimpleValueType(); |
13631 | if (!WideVT.is256BitVector()) |
13632 | return SDValue(); |
13633 | |
13634 | |
13635 | |
13636 | unsigned NumElts = VT.getVectorNumElements(); |
13637 | SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); |
13638 | const APInt &ExtIndex0 = N0.getConstantOperandAPInt(1); |
13639 | const APInt &ExtIndex1 = N1.getConstantOperandAPInt(1); |
13640 | if (ExtIndex1 == 0 && ExtIndex0 == NumElts) |
13641 | ShuffleVectorSDNode::commuteMask(NewMask); |
13642 | else if (ExtIndex0 != 0 || ExtIndex1 != NumElts) |
13643 | return SDValue(); |
13644 | |
13645 | |
13646 | |
13647 | |
13648 | if (NumElts == 4 && |
13649 | (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask))) |
13650 | return SDValue(); |
13651 | |
13652 | |
13653 | NewMask.append(NumElts, -1); |
13654 | |
13655 | |
13656 | SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), |
13657 | NewMask); |
13658 | |
13659 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf, |
13660 | DAG.getIntPtrConstant(0, DL)); |
13661 | } |
13662 | |
13663 | |
13664 | |
13665 | |
13666 | |
13667 | |
13668 | static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, |
13669 | SDValue V2, ArrayRef<int> Mask, |
13670 | const X86Subtarget &Subtarget, |
13671 | SelectionDAG &DAG) { |
13672 | if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) || |
13673 | (Subtarget.hasAVX() && VT.isFloatingPoint()) || |
13674 | (Subtarget.hasAVX2() && VT.isInteger()))) |
13675 | return SDValue(); |
13676 | |
13677 | |
13678 | |
13679 | unsigned NumEltBits = VT.getScalarSizeInBits(); |
13680 | unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2()) |
13681 | ? X86ISD::MOVDDUP |
13682 | : X86ISD::VBROADCAST; |
13683 | bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2(); |
13684 | |
13685 | |
13686 | int BroadcastIdx = getSplatIndex(Mask); |
13687 | if (BroadcastIdx < 0) |
13688 | return SDValue(); |
13689 | assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with " |
13690 | "a sorted mask where the broadcast " |
13691 | "comes from V1."); |
13692 | |
13693 | |
13694 | |
13695 | |
13696 | |
13697 | int BitOffset = BroadcastIdx * NumEltBits; |
13698 | SDValue V = V1; |
13699 | for (;;) { |
13700 | switch (V.getOpcode()) { |
13701 | case ISD::BITCAST: { |
13702 | V = V.getOperand(0); |
13703 | continue; |
13704 | } |
13705 | case ISD::CONCAT_VECTORS: { |
13706 | int OpBitWidth = V.getOperand(0).getValueSizeInBits(); |
13707 | int OpIdx = BitOffset / OpBitWidth; |
13708 | V = V.getOperand(OpIdx); |
13709 | BitOffset %= OpBitWidth; |
13710 | continue; |
13711 | } |
13712 | case ISD::EXTRACT_SUBVECTOR: { |
13713 | |
13714 | unsigned EltBitWidth = V.getScalarValueSizeInBits(); |
13715 | unsigned Idx = V.getConstantOperandVal(1); |
13716 | unsigned BeginOffset = Idx * EltBitWidth; |
13717 | BitOffset += BeginOffset; |
13718 | V = V.getOperand(0); |
13719 | continue; |
13720 | } |
13721 | case ISD::INSERT_SUBVECTOR: { |
13722 | SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1); |
13723 | int EltBitWidth = VOuter.getScalarValueSizeInBits(); |
13724 | int Idx = (int)V.getConstantOperandVal(2); |
13725 | int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements(); |
13726 | int BeginOffset = Idx * EltBitWidth; |
13727 | int EndOffset = BeginOffset + NumSubElts * EltBitWidth; |
13728 | if (BeginOffset <= BitOffset && BitOffset < EndOffset) { |
13729 | BitOffset -= BeginOffset; |
13730 | V = VInner; |
13731 | } else { |
13732 | V = VOuter; |
13733 | } |
13734 | continue; |
13735 | } |
13736 | } |
13737 | break; |
13738 | } |
13739 | assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset"); |
13740 | BroadcastIdx = BitOffset / NumEltBits; |
13741 | |
13742 | |
13743 | bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits; |
13744 | |
13745 | |
13746 | |
13747 | |
13748 | |
13749 | |
13750 | if (BitCastSrc && VT.isInteger()) |
13751 | if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast( |
13752 | DL, VT, V, BroadcastIdx, Subtarget, DAG)) |
13753 | return TruncBroadcast; |
13754 | |
13755 | |
13756 | if (!BitCastSrc && |
13757 | ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) || |
13758 | (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0))) { |
13759 | V = V.getOperand(BroadcastIdx); |
13760 | |
13761 | |
13762 | if (!BroadcastFromReg && !isShuffleFoldableLoad(V)) |
13763 | return SDValue(); |
13764 | } else if (ISD::isNormalLoad(V.getNode()) && |
13765 | cast<LoadSDNode>(V)->isSimple()) { |
13766 | |
13767 | |
13768 | |
13769 | |
13770 | |
13771 | LoadSDNode *Ld = cast<LoadSDNode>(V); |
13772 | SDValue BaseAddr = Ld->getOperand(1); |
13773 | MVT SVT = VT.getScalarType(); |
13774 | unsigned Offset = BroadcastIdx * SVT.getStoreSize(); |
13775 | assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); |
13776 | SDValue NewAddr = |
13777 | DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL); |
13778 | |
13779 | |
13780 | |
13781 | |
13782 | if (Opcode == X86ISD::VBROADCAST) { |
13783 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
13784 | SDValue Ops[] = {Ld->getChain(), NewAddr}; |
13785 | V = DAG.getMemIntrinsicNode( |
13786 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT, |
13787 | DAG.getMachineFunction().getMachineMemOperand( |
13788 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
13789 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
13790 | return DAG.getBitcast(VT, V); |
13791 | } |
13792 | assert(SVT == MVT::f64 && "Unexpected VT!"); |
13793 | V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, |
13794 | DAG.getMachineFunction().getMachineMemOperand( |
13795 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
13796 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
13797 | } else if (!BroadcastFromReg) { |
13798 | |
13799 | return SDValue(); |
13800 | } else if (BitOffset != 0) { |
13801 | |
13802 | |
13803 | |
13804 | if (!VT.is256BitVector() && !VT.is512BitVector()) |
13805 | return SDValue(); |
13806 | |
13807 | |
13808 | if (VT == MVT::v4f64 || VT == MVT::v4i64) |
13809 | return SDValue(); |
13810 | |
13811 | |
13812 | if ((BitOffset % 128) != 0) |
13813 | return SDValue(); |
13814 | |
13815 | assert((BitOffset % V.getScalarValueSizeInBits()) == 0 && |
13816 | "Unexpected bit-offset"); |
13817 | assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) && |
13818 | "Unexpected vector size"); |
13819 | unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits(); |
13820 | V = extract128BitVector(V, ExtractIdx, DAG, DL); |
13821 | } |
13822 | |
13823 | |
13824 | if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) { |
13825 | V = DAG.getBitcast(MVT::f64, V); |
13826 | if (Subtarget.hasAVX()) { |
13827 | V = DAG.getNode(X86ISD::VBROADCAST, DL, MVT::v2f64, V); |
13828 | return DAG.getBitcast(VT, V); |
13829 | } |
13830 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V); |
13831 | } |
13832 | |
13833 | |
13834 | if (!V.getValueType().isVector()) { |
13835 | assert(V.getScalarValueSizeInBits() == NumEltBits && |
13836 | "Unexpected scalar size"); |
13837 | MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(), |
13838 | VT.getVectorNumElements()); |
13839 | return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); |
13840 | } |
13841 | |
13842 | |
13843 | |
13844 | |
13845 | if (V.getValueSizeInBits() > 128) |
13846 | V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL); |
13847 | |
13848 | |
13849 | |
13850 | unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; |
13851 | MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts); |
13852 | return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V)); |
13853 | } |
13854 | |
13855 | |
13856 | |
13857 | |
13858 | |
13859 | |
13860 | |
13861 | static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, |
13862 | unsigned &InsertPSMask, |
13863 | const APInt &Zeroable, |
13864 | ArrayRef<int> Mask, SelectionDAG &DAG) { |
13865 | assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); |
13866 | assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!"); |
13867 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
13868 | |
13869 | |
13870 | |
13871 | |
13872 | auto matchAsInsertPS = [&](SDValue VA, SDValue VB, |
13873 | ArrayRef<int> CandidateMask) { |
13874 | unsigned ZMask = 0; |
13875 | int VADstIndex = -1; |
13876 | int VBDstIndex = -1; |
13877 | bool VAUsedInPlace = false; |
13878 | |
13879 | for (int i = 0; i < 4; ++i) { |
13880 | |
13881 | if (Zeroable[i]) { |
13882 | ZMask |= 1 << i; |
13883 | continue; |
13884 | } |
13885 | |
13886 | |
13887 | if (i == CandidateMask[i]) { |
13888 | VAUsedInPlace = true; |
13889 | continue; |
13890 | } |
13891 | |
13892 | |
13893 | if (VADstIndex >= 0 || VBDstIndex >= 0) |
13894 | return false; |
13895 | |
13896 | if (CandidateMask[i] < 4) { |
13897 | |
13898 | VADstIndex = i; |
13899 | } else { |
13900 | |
13901 | VBDstIndex = i; |
13902 | } |
13903 | } |
13904 | |
13905 | |
13906 | if (VADstIndex < 0 && VBDstIndex < 0) |
13907 | return false; |
13908 | |
13909 | |
13910 | |
13911 | unsigned VBSrcIndex = 0; |
13912 | if (VADstIndex >= 0) { |
13913 | |
13914 | |
13915 | VBSrcIndex = CandidateMask[VADstIndex]; |
13916 | VBDstIndex = VADstIndex; |
13917 | VB = VA; |
13918 | } else { |
13919 | VBSrcIndex = CandidateMask[VBDstIndex] - 4; |
13920 | } |
13921 | |
13922 | |
13923 | |
13924 | if (!VAUsedInPlace) |
13925 | VA = DAG.getUNDEF(MVT::v4f32); |
13926 | |
13927 | |
13928 | V1 = VA; |
13929 | V2 = VB; |
13930 | |
13931 | |
13932 | InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask; |
13933 | assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); |
13934 | return true; |
13935 | }; |
13936 | |
13937 | if (matchAsInsertPS(V1, V2, Mask)) |
13938 | return true; |
13939 | |
13940 | |
13941 | SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); |
13942 | ShuffleVectorSDNode::commuteMask(CommutedMask); |
13943 | if (matchAsInsertPS(V2, V1, CommutedMask)) |
13944 | return true; |
13945 | |
13946 | return false; |
13947 | } |
13948 | |
13949 | static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, |
13950 | ArrayRef<int> Mask, const APInt &Zeroable, |
13951 | SelectionDAG &DAG) { |
13952 | assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
13953 | assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
13954 | |
13955 | |
13956 | unsigned InsertPSMask = 0; |
13957 | if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG)) |
13958 | return SDValue(); |
13959 | |
13960 | |
13961 | return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, |
13962 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
13963 | } |
13964 | |
13965 | |
13966 | |
13967 | |
13968 | |
13969 | |
13970 | |
13971 | |
13972 | |
13973 | |
13974 | static SDValue lowerShuffleAsPermuteAndUnpack( |
13975 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
13976 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
13977 | assert(!VT.isFloatingPoint() && |
13978 | "This routine only supports integer vectors."); |
13979 | assert(VT.is128BitVector() && |
13980 | "This routine only works on 128-bit vectors."); |
13981 | assert(!V2.isUndef() && |
13982 | "This routine should only be used when blending two inputs."); |
13983 | assert(Mask.size() >= 2 && "Single element masks are invalid."); |
13984 | |
13985 | int Size = Mask.size(); |
13986 | |
13987 | int NumLoInputs = |
13988 | count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; }); |
13989 | int NumHiInputs = |
13990 | count_if(Mask, [Size](int M) { return M % Size >= Size / 2; }); |
13991 | |
13992 | bool UnpackLo = NumLoInputs >= NumHiInputs; |
13993 | |
13994 | auto TryUnpack = [&](int ScalarSize, int Scale) { |
13995 | SmallVector<int, 16> V1Mask((unsigned)Size, -1); |
13996 | SmallVector<int, 16> V2Mask((unsigned)Size, -1); |
13997 | |
13998 | for (int i = 0; i < Size; ++i) { |
13999 | if (Mask[i] < 0) |
14000 | continue; |
14001 | |
14002 | |
14003 | int UnpackIdx = i / Scale; |
14004 | |
14005 | |
14006 | |
14007 | if ((UnpackIdx % 2 == 0) != (Mask[i] < Size)) |
14008 | return SDValue(); |
14009 | |
14010 | |
14011 | |
14012 | SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask; |
14013 | VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] = |
14014 | Mask[i] % Size; |
14015 | } |
14016 | |
14017 | |
14018 | |
14019 | if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) && |
14020 | !isNoopShuffleMask(V2Mask)) |
14021 | return SDValue(); |
14022 | |
14023 | |
14024 | V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); |
14025 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); |
14026 | |
14027 | |
14028 | MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale); |
14029 | V1 = DAG.getBitcast(UnpackVT, V1); |
14030 | V2 = DAG.getBitcast(UnpackVT, V2); |
14031 | |
14032 | |
14033 | return DAG.getBitcast( |
14034 | VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, |
14035 | UnpackVT, V1, V2)); |
14036 | }; |
14037 | |
14038 | |
14039 | |
14040 | int OrigScalarSize = VT.getScalarSizeInBits(); |
14041 | for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) |
14042 | if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize)) |
14043 | return Unpack; |
14044 | |
14045 | |
14046 | |
14047 | if (ISD::isBuildVectorAllZeros(V1.getNode()) || |
14048 | ISD::isBuildVectorAllZeros(V2.getNode())) |
14049 | return SDValue(); |
14050 | |
14051 | |
14052 | |
14053 | if (NumLoInputs == 0 || NumHiInputs == 0) { |
14054 | assert((NumLoInputs > 0 || NumHiInputs > 0) && |
14055 | "We have to have *some* inputs!"); |
14056 | int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0; |
14057 | |
14058 | |
14059 | |
14060 | |
14061 | |
14062 | |
14063 | SmallVector<int, 32> PermMask((unsigned)Size, -1); |
14064 | for (int i = 0; i < Size; ++i) { |
14065 | if (Mask[i] < 0) |
14066 | continue; |
14067 | |
14068 | assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!"); |
14069 | |
14070 | PermMask[i] = |
14071 | 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); |
14072 | } |
14073 | return DAG.getVectorShuffle( |
14074 | VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, |
14075 | DL, VT, V1, V2), |
14076 | DAG.getUNDEF(VT), PermMask); |
14077 | } |
14078 | |
14079 | return SDValue(); |
14080 | } |
14081 | |
14082 | |
14083 | |
14084 | |
14085 | |
14086 | |
14087 | |
14088 | |
14089 | static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14090 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14091 | const X86Subtarget &Subtarget, |
14092 | SelectionDAG &DAG) { |
14093 | assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); |
14094 | assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); |
14095 | assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); |
14096 | |
14097 | if (V2.isUndef()) { |
14098 | |
14099 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2, |
14100 | Mask, Subtarget, DAG)) |
14101 | return Broadcast; |
14102 | |
14103 | |
14104 | |
14105 | unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); |
14106 | |
14107 | if (Subtarget.hasAVX()) { |
14108 | |
14109 | |
14110 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, |
14111 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
14112 | } |
14113 | |
14114 | return DAG.getNode( |
14115 | X86ISD::SHUFP, DL, MVT::v2f64, |
14116 | Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, |
14117 | Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, |
14118 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
14119 | } |
14120 | assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); |
14121 | assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); |
14122 | assert(Mask[0] < 2 && "We sort V1 to be the first input."); |
14123 | assert(Mask[1] >= 2 && "We sort V2 to be the second input."); |
14124 | |
14125 | if (Subtarget.hasAVX2()) |
14126 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14127 | return Extract; |
14128 | |
14129 | |
14130 | |
14131 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14132 | DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14133 | return Insertion; |
14134 | |
14135 | |
14136 | int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), |
14137 | Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; |
14138 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14139 | DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) |
14140 | return Insertion; |
14141 | |
14142 | |
14143 | |
14144 | if (isShuffleEquivalent(Mask, {0, 3}, V1, V2) || |
14145 | isShuffleEquivalent(Mask, {1, 3}, V1, V2)) |
14146 | if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG)) |
14147 | |
14148 | |
14149 | return DAG.getNode( |
14150 | X86ISD::MOVSD, DL, MVT::v2f64, V2, |
14151 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); |
14152 | |
14153 | if (Subtarget.hasSSE41()) |
14154 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, |
14155 | Zeroable, Subtarget, DAG)) |
14156 | return Blend; |
14157 | |
14158 | |
14159 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) |
14160 | return V; |
14161 | |
14162 | unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); |
14163 | return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, |
14164 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
14165 | } |
14166 | |
14167 | |
14168 | |
14169 | |
14170 | |
14171 | |
14172 | |
14173 | static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14174 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14175 | const X86Subtarget &Subtarget, |
14176 | SelectionDAG &DAG) { |
14177 | assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); |
14178 | assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); |
14179 | assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); |
14180 | |
14181 | if (V2.isUndef()) { |
14182 | |
14183 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2, |
14184 | Mask, Subtarget, DAG)) |
14185 | return Broadcast; |
14186 | |
14187 | |
14188 | |
14189 | |
14190 | V1 = DAG.getBitcast(MVT::v4i32, V1); |
14191 | int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2), |
14192 | Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1), |
14193 | Mask[1] < 0 ? -1 : (Mask[1] * 2), |
14194 | Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)}; |
14195 | return DAG.getBitcast( |
14196 | MVT::v2i64, |
14197 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, |
14198 | getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); |
14199 | } |
14200 | assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); |
14201 | assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); |
14202 | assert(Mask[0] < 2 && "We sort V1 to be the first input."); |
14203 | assert(Mask[1] >= 2 && "We sort V2 to be the second input."); |
14204 | |
14205 | if (Subtarget.hasAVX2()) |
14206 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14207 | return Extract; |
14208 | |
14209 | |
14210 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, |
14211 | Zeroable, Subtarget, DAG)) |
14212 | return Shift; |
14213 | |
14214 | |
14215 | |
14216 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14217 | DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14218 | return Insertion; |
14219 | |
14220 | |
14221 | int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2}; |
14222 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
14223 | DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) |
14224 | return Insertion; |
14225 | |
14226 | |
14227 | |
14228 | bool IsBlendSupported = Subtarget.hasSSE41(); |
14229 | if (IsBlendSupported) |
14230 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, |
14231 | Zeroable, Subtarget, DAG)) |
14232 | return Blend; |
14233 | |
14234 | |
14235 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) |
14236 | return V; |
14237 | |
14238 | |
14239 | |
14240 | if (Subtarget.hasSSSE3()) { |
14241 | if (Subtarget.hasVLX()) |
14242 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask, |
14243 | Subtarget, DAG)) |
14244 | return Rotate; |
14245 | |
14246 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask, |
14247 | Subtarget, DAG)) |
14248 | return Rotate; |
14249 | } |
14250 | |
14251 | |
14252 | |
14253 | if (IsBlendSupported) |
14254 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v2i64, V1, V2, Mask, |
14255 | Subtarget, DAG); |
14256 | |
14257 | |
14258 | |
14259 | |
14260 | |
14261 | V1 = DAG.getBitcast(MVT::v2f64, V1); |
14262 | V2 = DAG.getBitcast(MVT::v2f64, V2); |
14263 | return DAG.getBitcast(MVT::v2i64, |
14264 | DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); |
14265 | } |
14266 | |
14267 | |
14268 | |
14269 | |
14270 | |
14271 | |
14272 | static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, |
14273 | ArrayRef<int> Mask, SDValue V1, |
14274 | SDValue V2, SelectionDAG &DAG) { |
14275 | SDValue LowV = V1, HighV = V2; |
14276 | SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); |
14277 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
14278 | |
14279 | if (NumV2Elements == 1) { |
14280 | int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin(); |
14281 | |
14282 | |
14283 | |
14284 | int V2AdjIndex = V2Index ^ 1; |
14285 | |
14286 | if (Mask[V2AdjIndex] < 0) { |
14287 | |
14288 | |
14289 | |
14290 | if (V2Index < 2) |
14291 | std::swap(LowV, HighV); |
14292 | NewMask[V2Index] -= 4; |
14293 | } else { |
14294 | |
14295 | |
14296 | int V1Index = V2AdjIndex; |
14297 | int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; |
14298 | V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, |
14299 | getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); |
14300 | |
14301 | |
14302 | |
14303 | if (V2Index < 2) { |
14304 | LowV = V2; |
14305 | HighV = V1; |
14306 | } else { |
14307 | HighV = V2; |
14308 | } |
14309 | NewMask[V1Index] = 2; |
14310 | NewMask[V2Index] = 0; |
14311 | } |
14312 | } else if (NumV2Elements == 2) { |
14313 | if (Mask[0] < 4 && Mask[1] < 4) { |
14314 | |
14315 | |
14316 | NewMask[2] -= 4; |
14317 | NewMask[3] -= 4; |
14318 | } else if (Mask[2] < 4 && Mask[3] < 4) { |
14319 | |
14320 | |
14321 | |
14322 | NewMask[0] -= 4; |
14323 | NewMask[1] -= 4; |
14324 | HighV = V1; |
14325 | LowV = V2; |
14326 | } else { |
14327 | |
14328 | |
14329 | |
14330 | |
14331 | |
14332 | |
14333 | int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1], |
14334 | Mask[2] < 4 ? Mask[2] : Mask[3], |
14335 | (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, |
14336 | (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; |
14337 | V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, |
14338 | getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); |
14339 | |
14340 | |
14341 | |
14342 | LowV = HighV = V1; |
14343 | NewMask[0] = Mask[0] < 4 ? 0 : 2; |
14344 | NewMask[1] = Mask[0] < 4 ? 2 : 0; |
14345 | NewMask[2] = Mask[2] < 4 ? 1 : 3; |
14346 | NewMask[3] = Mask[2] < 4 ? 3 : 1; |
14347 | } |
14348 | } else if (NumV2Elements == 3) { |
14349 | |
14350 | |
14351 | |
14352 | ShuffleVectorSDNode::commuteMask(NewMask); |
14353 | return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG); |
14354 | } |
14355 | return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, |
14356 | getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); |
14357 | } |
14358 | |
14359 | |
14360 | |
14361 | |
14362 | |
14363 | |
14364 | static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14365 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14366 | const X86Subtarget &Subtarget, |
14367 | SelectionDAG &DAG) { |
14368 | assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
14369 | assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
14370 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
14371 | |
14372 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
14373 | |
14374 | if (NumV2Elements == 0) { |
14375 | |
14376 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2, |
14377 | Mask, Subtarget, DAG)) |
14378 | return Broadcast; |
14379 | |
14380 | |
14381 | if (Subtarget.hasSSE3()) { |
14382 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2)) |
14383 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1); |
14384 | if (isShuffleEquivalent(Mask, {1, 1, 3, 3}, V1, V2)) |
14385 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1); |
14386 | } |
14387 | |
14388 | if (Subtarget.hasAVX()) { |
14389 | |
14390 | |
14391 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, |
14392 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
14393 | } |
14394 | |
14395 | |
14396 | |
14397 | if (!Subtarget.hasSSE2()) { |
14398 | if (isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2)) |
14399 | return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1); |
14400 | if (isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1, V2)) |
14401 | return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1); |
14402 | } |
14403 | |
14404 | |
14405 | |
14406 | return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, |
14407 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
14408 | } |
14409 | |
14410 | if (Subtarget.hasAVX2()) |
14411 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14412 | return Extract; |
14413 | |
14414 | |
14415 | |
14416 | |
14417 | |
14418 | |
14419 | if (NumV2Elements == 1 && Mask[0] >= 4) |
14420 | if (SDValue V = lowerShuffleAsElementInsertion( |
14421 | DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14422 | return V; |
14423 | |
14424 | if (Subtarget.hasSSE41()) { |
14425 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, |
14426 | Zeroable, Subtarget, DAG)) |
14427 | return Blend; |
14428 | |
14429 | |
14430 | if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG)) |
14431 | return V; |
14432 | |
14433 | if (!isSingleSHUFPSMask(Mask)) |
14434 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, |
14435 | V2, Mask, DAG)) |
14436 | return BlendPerm; |
14437 | } |
14438 | |
14439 | |
14440 | |
14441 | if (!Subtarget.hasSSE2()) { |
14442 | if (isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) |
14443 | return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2); |
14444 | if (isShuffleEquivalent(Mask, {2, 3, 6, 7}, V1, V2)) |
14445 | return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1); |
14446 | } |
14447 | |
14448 | |
14449 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) |
14450 | return V; |
14451 | |
14452 | |
14453 | return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); |
14454 | } |
14455 | |
14456 | |
14457 | |
14458 | |
14459 | |
14460 | static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
14461 | const APInt &Zeroable, SDValue V1, SDValue V2, |
14462 | const X86Subtarget &Subtarget, |
14463 | SelectionDAG &DAG) { |
14464 | assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); |
14465 | assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); |
14466 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
14467 | |
14468 | |
14469 | |
14470 | |
14471 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask, |
14472 | Zeroable, Subtarget, DAG)) |
14473 | return ZExt; |
14474 | |
14475 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
14476 | |
14477 | if (NumV2Elements == 0) { |
14478 | |
14479 | if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) { |
14480 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, |
14481 | Mask, Subtarget, DAG)) |
14482 | return Broadcast; |
14483 | } |
14484 | |
14485 | |
14486 | |
14487 | |
14488 | |
14489 | |
14490 | const int UnpackLoMask[] = {0, 0, 1, 1}; |
14491 | const int UnpackHiMask[] = {2, 2, 3, 3}; |
14492 | if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) |
14493 | Mask = UnpackLoMask; |
14494 | else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) |
14495 | Mask = UnpackHiMask; |
14496 | |
14497 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, |
14498 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
14499 | } |
14500 | |
14501 | if (Subtarget.hasAVX2()) |
14502 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
14503 | return Extract; |
14504 | |
14505 | |
14506 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, |
14507 | Zeroable, Subtarget, DAG)) |
14508 | return Shift; |
14509 | |
14510 | |
14511 | if (NumV2Elements == 1) |
14512 | if (SDValue V = lowerShuffleAsElementInsertion( |
14513 | DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
14514 | return V; |
14515 | |
14516 | |
14517 | |
14518 | bool IsBlendSupported = Subtarget.hasSSE41(); |
14519 | if (IsBlendSupported) |
14520 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, |
14521 | Zeroable, Subtarget, DAG)) |
14522 | return Blend; |
14523 | |
14524 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, |
14525 | Zeroable, Subtarget, DAG)) |
14526 | return Masked; |
14527 | |
14528 | |
14529 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) |
14530 | return V; |
14531 | |
14532 | |
14533 | |
14534 | if (Subtarget.hasSSSE3()) { |
14535 | if (Subtarget.hasVLX()) |
14536 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask, |
14537 | Subtarget, DAG)) |
14538 | return Rotate; |
14539 | |
14540 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask, |
14541 | Subtarget, DAG)) |
14542 | return Rotate; |
14543 | } |
14544 | |
14545 | |
14546 | |
14547 | |
14548 | if (!isSingleSHUFPSMask(Mask)) { |
14549 | |
14550 | |
14551 | if (IsBlendSupported) |
14552 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i32, V1, V2, Mask, |
14553 | Subtarget, DAG); |
14554 | |
14555 | |
14556 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2, |
14557 | Mask, Subtarget, DAG)) |
14558 | return Unpack; |
14559 | } |
14560 | |
14561 | |
14562 | |
14563 | |
14564 | |
14565 | |
14566 | SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1); |
14567 | SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2); |
14568 | SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask); |
14569 | return DAG.getBitcast(MVT::v4i32, ShufPS); |
14570 | } |
14571 | |
14572 | |
14573 | |
14574 | |
14575 | |
14576 | |
14577 | |
14578 | |
14579 | |
14580 | |
14581 | |
14582 | |
14583 | |
14584 | |
14585 | |
14586 | |
14587 | |
14588 | static SDValue lowerV8I16GeneralSingleInputShuffle( |
14589 | const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask, |
14590 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
14591 | assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); |
14592 | MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); |
14593 | |
14594 | assert(Mask.size() == 8 && "Shuffle mask length doesn't match!"); |
14595 | MutableArrayRef<int> LoMask = Mask.slice(0, 4); |
14596 | MutableArrayRef<int> HiMask = Mask.slice(4, 4); |
14597 | |
14598 | |
14599 | if (isUndefOrInRange(LoMask, 0, 4) && |
14600 | isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { |
14601 | return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
14602 | getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); |
14603 | } |
14604 | if (isUndefOrInRange(HiMask, 4, 8) && |
14605 | isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { |
14606 | for (int i = 0; i != 4; ++i) |
14607 | HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4)); |
14608 | return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
14609 | getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); |
14610 | } |
14611 | |
14612 | SmallVector<int, 4> LoInputs; |
14613 | copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; }); |
14614 | array_pod_sort(LoInputs.begin(), LoInputs.end()); |
14615 | LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); |
14616 | SmallVector<int, 4> HiInputs; |
14617 | copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; }); |
14618 | array_pod_sort(HiInputs.begin(), HiInputs.end()); |
14619 | HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); |
14620 | int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin(); |
14621 | int NumHToL = LoInputs.size() - NumLToL; |
14622 | int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin(); |
14623 | int NumHToH = HiInputs.size() - NumLToH; |
14624 | MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL); |
14625 | MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH); |
14626 | MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL); |
14627 | MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH); |
14628 | |
14629 | |
14630 | |
14631 | |
14632 | auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask, |
14633 | ArrayRef<int> PSHUFDMask, unsigned ShufWOp) { |
14634 | V = DAG.getNode(ShufWOp, DL, VT, V, |
14635 | getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); |
14636 | V = DAG.getBitcast(PSHUFDVT, V); |
14637 | V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V, |
14638 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); |
14639 | return DAG.getBitcast(VT, V); |
14640 | }; |
14641 | |
14642 | if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) { |
14643 | int PSHUFDMask[4] = { -1, -1, -1, -1 }; |
14644 | SmallVector<std::pair<int, int>, 4> DWordPairs; |
14645 | int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2); |
14646 | |
14647 | |
14648 | for (int DWord = 0; DWord != 4; ++DWord) { |
14649 | int M0 = Mask[2 * DWord + 0]; |
14650 | int M1 = Mask[2 * DWord + 1]; |
14651 | M0 = (M0 >= 0 ? M0 % 4 : M0); |
14652 | M1 = (M1 >= 0 ? M1 % 4 : M1); |
14653 | if (M0 < 0 && M1 < 0) |
14654 | continue; |
14655 | |
14656 | bool Match = false; |
14657 | for (int j = 0, e = DWordPairs.size(); j < e; ++j) { |
14658 | auto &DWordPair = DWordPairs[j]; |
14659 | if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) && |
14660 | (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) { |
14661 | DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first); |
14662 | DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second); |
14663 | PSHUFDMask[DWord] = DOffset + j; |
14664 | Match = true; |
14665 | break; |
14666 | } |
14667 | } |
14668 | if (!Match) { |
14669 | PSHUFDMask[DWord] = DOffset + DWordPairs.size(); |
14670 | DWordPairs.push_back(std::make_pair(M0, M1)); |
14671 | } |
14672 | } |
14673 | |
14674 | if (DWordPairs.size() <= 2) { |
14675 | DWordPairs.resize(2, std::make_pair(-1, -1)); |
14676 | int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second, |
14677 | DWordPairs[1].first, DWordPairs[1].second}; |
14678 | if ((NumHToL + NumHToH) == 0) |
14679 | return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW); |
14680 | if ((NumLToL + NumLToH) == 0) |
14681 | return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW); |
14682 | } |
14683 | } |
14684 | |
14685 | |
14686 | |
14687 | |
14688 | |
14689 | |
14690 | |
14691 | |
14692 | |
14693 | |
14694 | |
14695 | |
14696 | |
14697 | |
14698 | |
14699 | |
14700 | |
14701 | |
14702 | |
14703 | |
14704 | |
14705 | |
14706 | |
14707 | |
14708 | |
14709 | |
14710 | |
14711 | |
14712 | |
14713 | |
14714 | |
14715 | auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs, |
14716 | ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs, |
14717 | int AOffset, int BOffset) { |
14718 | assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) && |
14719 | "Must call this with A having 3 or 1 inputs from the A half."); |
14720 | assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) && |
14721 | "Must call this with B having 1 or 3 inputs from the B half."); |
14722 | assert(AToAInputs.size() + BToAInputs.size() == 4 && |
14723 | "Must call this with either 3:1 or 1:3 inputs (summing to 4)."); |
14724 | |
14725 | bool ThreeAInputs = AToAInputs.size() == 3; |
14726 | |
14727 | |
14728 | |
14729 | |
14730 | |
14731 | int ADWord = 0, BDWord = 0; |
14732 | int &TripleDWord = ThreeAInputs ? ADWord : BDWord; |
14733 | int &OneInputDWord = ThreeAInputs ? BDWord : ADWord; |
14734 | int TripleInputOffset = ThreeAInputs ? AOffset : BOffset; |
14735 | ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs; |
14736 | int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; |
14737 | int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); |
14738 | int TripleNonInputIdx = |
14739 | TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); |
14740 | TripleDWord = TripleNonInputIdx / 2; |
14741 | |
14742 | |
14743 | |
14744 | OneInputDWord = (OneInput / 2) ^ 1; |
14745 | |
14746 | |
14747 | |
14748 | |
14749 | |
14750 | |
14751 | if (BToBInputs.size() == 2 && AToBInputs.size() == 2) { |
14752 | |
14753 | |
14754 | |
14755 | |
14756 | int NumFlippedAToBInputs = |
14757 | std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) + |
14758 | std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1); |
14759 | int NumFlippedBToBInputs = |
14760 | std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) + |
14761 | std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1); |
14762 | if ((NumFlippedAToBInputs == 1 && |
14763 | (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) || |
14764 | (NumFlippedBToBInputs == 1 && |
14765 | (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) { |
14766 | |
14767 | |
14768 | |
14769 | |
14770 | auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord, |
14771 | ArrayRef<int> Inputs) { |
14772 | int FixIdx = PinnedIdx ^ 1; |
14773 | bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1); |
14774 | |
14775 | |
14776 | |
14777 | int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord)); |
14778 | bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); |
14779 | if (IsFixIdxInput == IsFixFreeIdxInput) |
14780 | FixFreeIdx += 1; |
14781 | IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); |
14782 | assert(IsFixIdxInput != IsFixFreeIdxInput && |
14783 | "We need to be changing the number of flipped inputs!"); |
14784 | int PSHUFHalfMask[] = {0, 1, 2, 3}; |
14785 | std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); |
14786 | V = DAG.getNode( |
14787 | FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, |
14788 | MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, |
14789 | getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); |
14790 | |
14791 | for (int &M : Mask) |
14792 | if (M >= 0 && M == FixIdx) |
14793 | M = FixFreeIdx; |
14794 | else if (M >= 0 && M == FixFreeIdx) |
14795 | M = FixIdx; |
14796 | }; |
14797 | if (NumFlippedBToBInputs != 0) { |
14798 | int BPinnedIdx = |
14799 | BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput; |
14800 | FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs); |
14801 | } else { |
14802 | assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!"); |
14803 | int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput; |
14804 | FixFlippedInputs(APinnedIdx, ADWord, AToBInputs); |
14805 | } |
14806 | } |
14807 | } |
14808 | |
14809 | int PSHUFDMask[] = {0, 1, 2, 3}; |
14810 | PSHUFDMask[ADWord] = BDWord; |
14811 | PSHUFDMask[BDWord] = ADWord; |
14812 | V = DAG.getBitcast( |
14813 | VT, |
14814 | DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), |
14815 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
14816 | |
14817 | |
14818 | for (int &M : Mask) |
14819 | if (M >= 0 && M/2 == ADWord) |
14820 | M = 2 * BDWord + M % 2; |
14821 | else if (M >= 0 && M/2 == BDWord) |
14822 | M = 2 * ADWord + M % 2; |
14823 | |
14824 | |
14825 | |
14826 | return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG); |
14827 | }; |
14828 | if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) |
14829 | return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); |
14830 | if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) |
14831 | return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0); |
14832 | |
14833 | |
14834 | |
14835 | |
14836 | |
14837 | |
14838 | int PSHUFLMask[4] = {-1, -1, -1, -1}; |
14839 | int PSHUFHMask[4] = {-1, -1, -1, -1}; |
14840 | int PSHUFDMask[4] = {-1, -1, -1, -1}; |
14841 | |
14842 | |
14843 | |
14844 | |
14845 | auto fixInPlaceInputs = |
14846 | [&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs, |
14847 | MutableArrayRef<int> SourceHalfMask, |
14848 | MutableArrayRef<int> HalfMask, int HalfOffset) { |
14849 | if (InPlaceInputs.empty()) |
14850 | return; |
14851 | if (InPlaceInputs.size() == 1) { |
14852 | SourceHalfMask[InPlaceInputs[0] - HalfOffset] = |
14853 | InPlaceInputs[0] - HalfOffset; |
14854 | PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2; |
14855 | return; |
14856 | } |
14857 | if (IncomingInputs.empty()) { |
14858 | |
14859 | for (int Input : InPlaceInputs) { |
14860 | SourceHalfMask[Input - HalfOffset] = Input - HalfOffset; |
14861 | PSHUFDMask[Input / 2] = Input / 2; |
14862 | } |
14863 | return; |
14864 | } |
14865 | |
14866 | assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!"); |
14867 | SourceHalfMask[InPlaceInputs[0] - HalfOffset] = |
14868 | InPlaceInputs[0] - HalfOffset; |
14869 | |
14870 | |
14871 | int AdjIndex = InPlaceInputs[0] ^ 1; |
14872 | SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; |
14873 | std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex); |
14874 | PSHUFDMask[AdjIndex / 2] = AdjIndex / 2; |
14875 | }; |
14876 | fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0); |
14877 | fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4); |
14878 | |
14879 | |
14880 | |
14881 | |
14882 | |
14883 | auto moveInputsToRightHalf = [&PSHUFDMask]( |
14884 | MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs, |
14885 | MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask, |
14886 | MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset, |
14887 | int DestOffset) { |
14888 | auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) { |
14889 | return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word; |
14890 | }; |
14891 | auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask, |
14892 | int Word) { |
14893 | int LowWord = Word & ~1; |
14894 | int HighWord = Word | 1; |
14895 | return isWordClobbered(SourceHalfMask, LowWord) || |
14896 | isWordClobbered(SourceHalfMask, HighWord); |
14897 | }; |
14898 | |
14899 | if (IncomingInputs.empty()) |
14900 | return; |
14901 | |
14902 | if (ExistingInputs.empty()) { |
14903 | |
14904 | for (int Input : IncomingInputs) { |
14905 | |
14906 | |
14907 | if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { |
14908 | if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) { |
14909 | SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = |
14910 | Input - SourceOffset; |
14911 | |
14912 | for (int &M : HalfMask) |
14913 | if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset) |
14914 | M = Input; |
14915 | else if (M == Input) |
14916 | M = SourceHalfMask[Input - SourceOffset] + SourceOffset; |
14917 | } else { |
14918 | assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == |
14919 | Input - SourceOffset && |
14920 | "Previous placement doesn't match!"); |
14921 | } |
14922 | |
14923 | |
14924 | |
14925 | Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; |
14926 | } |
14927 | |
14928 | |
14929 | if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0) |
14930 | PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; |
14931 | else |
14932 | assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == |
14933 | Input / 2 && |
14934 | "Previous placement doesn't match!"); |
14935 | } |
14936 | |
14937 | |
14938 | |
14939 | |
14940 | for (int &M : HalfMask) |
14941 | if (M >= SourceOffset && M < SourceOffset + 4) { |
14942 | M = M - SourceOffset + DestOffset; |
14943 | assert(M >= 0 && "This should never wrap below zero!"); |
14944 | } |
14945 | return; |
14946 | } |
14947 | |
14948 | |
14949 | |
14950 | |
14951 | if (IncomingInputs.size() == 1) { |
14952 | if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { |
14953 | int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) + |
14954 | SourceOffset; |
14955 | SourceHalfMask[InputFixed - SourceOffset] = |
14956 | IncomingInputs[0] - SourceOffset; |
14957 | std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0], |
14958 | InputFixed); |
14959 | IncomingInputs[0] = InputFixed; |
14960 | } |
14961 | } else if (IncomingInputs.size() == 2) { |
14962 | if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 || |
14963 | isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { |
14964 | |
14965 | |
14966 | |
14967 | int InputsFixed[2] = {IncomingInputs[0] - SourceOffset, |
14968 | IncomingInputs[1] - SourceOffset}; |
14969 | |
14970 | |
14971 | |
14972 | |
14973 | if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) && |
14974 | SourceHalfMask[InputsFixed[0] ^ 1] < 0) { |
14975 | SourceHalfMask[InputsFixed[0]] = InputsFixed[0]; |
14976 | SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; |
14977 | InputsFixed[1] = InputsFixed[0] ^ 1; |
14978 | } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) && |
14979 | SourceHalfMask[InputsFixed[1] ^ 1] < 0) { |
14980 | SourceHalfMask[InputsFixed[1]] = InputsFixed[1]; |
14981 | SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0]; |
14982 | InputsFixed[0] = InputsFixed[1] ^ 1; |
14983 | } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 && |
14984 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) { |
14985 | |
14986 | |
14987 | |
14988 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0]; |
14989 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1]; |
14990 | InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1); |
14991 | InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1; |
14992 | } else { |
14993 | |
14994 | |
14995 | |
14996 | |
14997 | for (int i = 0; i < 4; ++i) |
14998 | assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) && |
14999 | "We can't handle any clobbers here!"); |
15000 | assert(InputsFixed[1] != (InputsFixed[0] ^ 1) && |
15001 | "Cannot have adjacent inputs here!"); |
15002 | |
15003 | SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; |
15004 | SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1; |
15005 | |
15006 | |
15007 | |
15008 | for (int &M : FinalSourceHalfMask) |
15009 | if (M == (InputsFixed[0] ^ 1) + SourceOffset) |
15010 | M = InputsFixed[1] + SourceOffset; |
15011 | else if (M == InputsFixed[1] + SourceOffset) |
15012 | M = (InputsFixed[0] ^ 1) + SourceOffset; |
15013 | |
15014 | InputsFixed[1] = InputsFixed[0] ^ 1; |
15015 | } |
15016 | |
15017 | |
15018 | for (int &M : HalfMask) |
15019 | if (M == IncomingInputs[0]) |
15020 | M = InputsFixed[0] + SourceOffset; |
15021 | else if (M == IncomingInputs[1]) |
15022 | M = InputsFixed[1] + SourceOffset; |
15023 | |
15024 | IncomingInputs[0] = InputsFixed[0] + SourceOffset; |
15025 | IncomingInputs[1] = InputsFixed[1] + SourceOffset; |
15026 | } |
15027 | } else { |
15028 | llvm_unreachable("Unhandled input size!"); |
15029 | } |
15030 | |
15031 | |
15032 | int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2; |
15033 | assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free"); |
15034 | PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2; |
15035 | for (int &M : HalfMask) |
15036 | for (int Input : IncomingInputs) |
15037 | if (M == Input) |
15038 | M = FreeDWord * 2 + Input % 2; |
15039 | }; |
15040 | moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask, |
15041 | 4, 0); |
15042 | moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask, |
15043 | 0, 4); |
15044 | |
15045 | |
15046 | |
15047 | if (!isNoopShuffleMask(PSHUFLMask)) |
15048 | V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
15049 | getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG)); |
15050 | if (!isNoopShuffleMask(PSHUFHMask)) |
15051 | V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
15052 | getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); |
15053 | if (!isNoopShuffleMask(PSHUFDMask)) |
15054 | V = DAG.getBitcast( |
15055 | VT, |
15056 | DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), |
15057 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
15058 | |
15059 | |
15060 | |
15061 | assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 && |
15062 | "Failed to lift all the high half inputs to the low mask!"); |
15063 | assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 && |
15064 | "Failed to lift all the low half inputs to the high mask!"); |
15065 | |
15066 | |
15067 | if (!isNoopShuffleMask(LoMask)) |
15068 | V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
15069 | getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); |
15070 | |
15071 | |
15072 | for (int &M : HiMask) |
15073 | if (M >= 0) |
15074 | M -= 4; |
15075 | if (!isNoopShuffleMask(HiMask)) |
15076 | V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
15077 | getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); |
15078 | |
15079 | return V; |
15080 | } |
15081 | |
15082 | |
15083 | |
15084 | static SDValue lowerShuffleAsBlendOfPSHUFBs( |
15085 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
15086 | const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { |
15087 | assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && |
15088 | "Lane crossing shuffle masks not supported"); |
15089 | |
15090 | int NumBytes = VT.getSizeInBits() / 8; |
15091 | int Size = Mask.size(); |
15092 | int Scale = NumBytes / Size; |
15093 | |
15094 | SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8)); |
15095 | SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8)); |
15096 | V1InUse = false; |
15097 | V2InUse = false; |
15098 | |
15099 | for (int i = 0; i < NumBytes; ++i) { |
15100 | int M = Mask[i / Scale]; |
15101 | if (M < 0) |
15102 | continue; |
15103 | |
15104 | const int ZeroMask = 0x80; |
15105 | int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask; |
15106 | int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale; |
15107 | if (Zeroable[i / Scale]) |
15108 | V1Idx = V2Idx = ZeroMask; |
15109 | |
15110 | V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8); |
15111 | V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8); |
15112 | V1InUse |= (ZeroMask != V1Idx); |
15113 | V2InUse |= (ZeroMask != V2Idx); |
15114 | } |
15115 | |
15116 | MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes); |
15117 | if (V1InUse) |
15118 | V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1), |
15119 | DAG.getBuildVector(ShufVT, DL, V1Mask)); |
15120 | if (V2InUse) |
15121 | V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2), |
15122 | DAG.getBuildVector(ShufVT, DL, V2Mask)); |
15123 | |
15124 | |
15125 | SDValue V; |
15126 | if (V1InUse && V2InUse) |
15127 | V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2); |
15128 | else |
15129 | V = V1InUse ? V1 : V2; |
15130 | |
15131 | |
15132 | return DAG.getBitcast(VT, V); |
15133 | } |
15134 | |
15135 | |
15136 | |
15137 | |
15138 | |
15139 | |
15140 | |
15141 | |
15142 | |
15143 | |
15144 | |
15145 | |
15146 | |
15147 | static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15148 | const APInt &Zeroable, SDValue V1, SDValue V2, |
15149 | const X86Subtarget &Subtarget, |
15150 | SelectionDAG &DAG) { |
15151 | assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); |
15152 | assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); |
15153 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
15154 | |
15155 | |
15156 | |
15157 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask, |
15158 | Zeroable, Subtarget, DAG)) |
15159 | return ZExt; |
15160 | |
15161 | |
15162 | if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable, |
15163 | Subtarget, DAG)) |
15164 | return V; |
15165 | |
15166 | int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); |
15167 | |
15168 | if (NumV2Inputs == 0) { |
15169 | |
15170 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, |
15171 | Zeroable, Subtarget, DAG)) |
15172 | return Shift; |
15173 | |
15174 | |
15175 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, |
15176 | Mask, Subtarget, DAG)) |
15177 | return Broadcast; |
15178 | |
15179 | |
15180 | if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask, |
15181 | Subtarget, DAG)) |
15182 | return Rotate; |
15183 | |
15184 | |
15185 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) |
15186 | return V; |
15187 | |
15188 | |
15189 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, |
15190 | Subtarget)) |
15191 | return V; |
15192 | |
15193 | |
15194 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, |
15195 | Subtarget, DAG)) |
15196 | return Rotate; |
15197 | |
15198 | |
15199 | SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end()); |
15200 | return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask, |
15201 | Subtarget, DAG); |
15202 | } |
15203 | |
15204 | assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) && |
15205 | "All single-input shuffles should be canonicalized to be V1-input " |
15206 | "shuffles."); |
15207 | |
15208 | |
15209 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, |
15210 | Zeroable, Subtarget, DAG)) |
15211 | return Shift; |
15212 | |
15213 | |
15214 | if (Subtarget.hasSSE4A()) |
15215 | if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, |
15216 | Zeroable, DAG)) |
15217 | return V; |
15218 | |
15219 | |
15220 | if (NumV2Inputs == 1) |
15221 | if (SDValue V = lowerShuffleAsElementInsertion( |
15222 | DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
15223 | return V; |
15224 | |
15225 | |
15226 | |
15227 | bool IsBlendSupported = Subtarget.hasSSE41(); |
15228 | if (IsBlendSupported) |
15229 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, |
15230 | Zeroable, Subtarget, DAG)) |
15231 | return Blend; |
15232 | |
15233 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, |
15234 | Zeroable, Subtarget, DAG)) |
15235 | return Masked; |
15236 | |
15237 | |
15238 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) |
15239 | return V; |
15240 | |
15241 | |
15242 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, |
15243 | Subtarget)) |
15244 | return V; |
15245 | |
15246 | |
15247 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v8i16, V1, V2, Mask, Zeroable, |
15248 | Subtarget, DAG)) |
15249 | return V; |
15250 | |
15251 | |
15252 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask, |
15253 | Subtarget, DAG)) |
15254 | return Rotate; |
15255 | |
15256 | if (SDValue BitBlend = |
15257 | lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) |
15258 | return BitBlend; |
15259 | |
15260 | |
15261 | if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v8i16, V1, V2, Mask, |
15262 | Zeroable, Subtarget, DAG)) |
15263 | return V; |
15264 | |
15265 | |
15266 | |
15267 | |
15268 | int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false); |
15269 | if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() && |
15270 | !Subtarget.hasVLX()) { |
15271 | SmallVector<SDValue, 8> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); |
15272 | for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) |
15273 | DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); |
15274 | SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps); |
15275 | V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1), |
15276 | DWordClearMask); |
15277 | V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2), |
15278 | DWordClearMask); |
15279 | |
15280 | SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2); |
15281 | if (NumEvenDrops == 2) { |
15282 | Result = DAG.getBitcast(MVT::v4i32, Result); |
15283 | Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, Result, Result); |
15284 | } |
15285 | return Result; |
15286 | } |
15287 | |
15288 | |
15289 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2, |
15290 | Mask, Subtarget, DAG)) |
15291 | return Unpack; |
15292 | |
15293 | |
15294 | |
15295 | if (!IsBlendSupported && Subtarget.hasSSSE3()) { |
15296 | bool V1InUse, V2InUse; |
15297 | return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, |
15298 | Zeroable, DAG, V1InUse, V2InUse); |
15299 | } |
15300 | |
15301 | |
15302 | |
15303 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i16, V1, V2, |
15304 | Mask, Subtarget, DAG); |
15305 | } |
15306 | |
15307 | |
15308 | |
15309 | |
15310 | static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, |
15311 | ArrayRef<int> Mask, SDValue V1, SDValue V2, |
15312 | const X86Subtarget &Subtarget, |
15313 | SelectionDAG &DAG) { |
15314 | MVT MaskVT = VT.changeTypeToInteger(); |
15315 | SDValue MaskNode; |
15316 | MVT ShuffleVT = VT; |
15317 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) { |
15318 | V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512); |
15319 | V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512); |
15320 | ShuffleVT = V1.getSimpleValueType(); |
15321 | |
15322 | |
15323 | int NumElts = VT.getVectorNumElements(); |
15324 | unsigned Scale = 512 / VT.getSizeInBits(); |
15325 | SmallVector<int, 32> AdjustedMask(Mask.begin(), Mask.end()); |
15326 | for (int &M : AdjustedMask) |
15327 | if (NumElts <= M) |
15328 | M += (Scale - 1) * NumElts; |
15329 | MaskNode = getConstVector(AdjustedMask, MaskVT, DAG, DL, true); |
15330 | MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); |
15331 | } else { |
15332 | MaskNode = getConstVector(Mask, MaskVT, DAG, DL, true); |
15333 | } |
15334 | |
15335 | SDValue Result; |
15336 | if (V2.isUndef()) |
15337 | Result = DAG.getNode(X86ISD::VPERMV, DL, ShuffleVT, MaskNode, V1); |
15338 | else |
15339 | Result = DAG.getNode(X86ISD::VPERMV3, DL, ShuffleVT, V1, MaskNode, V2); |
15340 | |
15341 | if (VT != ShuffleVT) |
15342 | Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits()); |
15343 | |
15344 | return Result; |
15345 | } |
15346 | |
15347 | |
15348 | |
15349 | |
15350 | |
15351 | |
15352 | |
15353 | |
15354 | static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15355 | const APInt &Zeroable, SDValue V1, SDValue V2, |
15356 | const X86Subtarget &Subtarget, |
15357 | SelectionDAG &DAG) { |
15358 | assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); |
15359 | assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); |
15360 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
15361 | |
15362 | |
15363 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, |
15364 | Zeroable, Subtarget, DAG)) |
15365 | return Shift; |
15366 | |
15367 | |
15368 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask, |
15369 | Subtarget, DAG)) |
15370 | return Rotate; |
15371 | |
15372 | |
15373 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, |
15374 | Subtarget)) |
15375 | return V; |
15376 | |
15377 | |
15378 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask, |
15379 | Zeroable, Subtarget, DAG)) |
15380 | return ZExt; |
15381 | |
15382 | |
15383 | if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable, |
15384 | Subtarget, DAG)) |
15385 | return V; |
15386 | |
15387 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable, |
15388 | Subtarget, DAG)) |
15389 | return V; |
15390 | |
15391 | |
15392 | if (Subtarget.hasSSE4A()) |
15393 | if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, |
15394 | Zeroable, DAG)) |
15395 | return V; |
15396 | |
15397 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); |
15398 | |
15399 | |
15400 | if (NumV2Elements == 0) { |
15401 | |
15402 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2, |
15403 | Mask, Subtarget, DAG)) |
15404 | return Broadcast; |
15405 | |
15406 | |
15407 | if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask, |
15408 | Subtarget, DAG)) |
15409 | return Rotate; |
15410 | |
15411 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) |
15412 | return V; |
15413 | |
15414 | |
15415 | |
15416 | |
15417 | |
15418 | |
15419 | |
15420 | |
15421 | |
15422 | auto canWidenViaDuplication = [](ArrayRef<int> Mask) { |
15423 | for (int i = 0; i < 16; i += 2) |
15424 | if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1]) |
15425 | return false; |
15426 | |
15427 | return true; |
15428 | }; |
15429 | auto tryToWidenViaDuplication = [&]() -> SDValue { |
15430 | if (!canWidenViaDuplication(Mask)) |
15431 | return SDValue(); |
15432 | SmallVector<int, 4> LoInputs; |
15433 | copy_if(Mask, std::back_inserter(LoInputs), |
15434 | [](int M) { return M >= 0 && M < 8; }); |
15435 | array_pod_sort(LoInputs.begin(), LoInputs.end()); |
15436 | LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), |
15437 | LoInputs.end()); |
15438 | SmallVector<int, 4> HiInputs; |
15439 | copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; }); |
15440 | array_pod_sort(HiInputs.begin(), HiInputs.end()); |
15441 | HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), |
15442 | HiInputs.end()); |
15443 | |
15444 | bool TargetLo = LoInputs.size() >= HiInputs.size(); |
15445 | ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs; |
15446 | ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs; |
15447 | |
15448 | int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
15449 | SmallDenseMap<int, int, 8> LaneMap; |
15450 | for (int I : InPlaceInputs) { |
15451 | PreDupI16Shuffle[I/2] = I/2; |
15452 | LaneMap[I] = I; |
15453 | } |
15454 | int j = TargetLo ? 0 : 4, je = j + 4; |
15455 | for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { |
15456 | |
15457 | |
15458 | if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { |
15459 | |
15460 | |
15461 | while (j < je && PreDupI16Shuffle[j] >= 0) |
15462 | ++j; |
15463 | |
15464 | if (j == je) |
15465 | |
15466 | return SDValue(); |
15467 | |
15468 | |
15469 | PreDupI16Shuffle[j] = MovingInputs[i] / 2; |
15470 | } |
15471 | |
15472 | |
15473 | LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; |
15474 | } |
15475 | V1 = DAG.getBitcast( |
15476 | MVT::v16i8, |
15477 | DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), |
15478 | DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); |
15479 | |
15480 | |
15481 | bool EvenInUse = false, OddInUse = false; |
15482 | for (int i = 0; i < 16; i += 2) { |
15483 | EvenInUse |= (Mask[i + 0] >= 0); |
15484 | OddInUse |= (Mask[i + 1] >= 0); |
15485 | if (EvenInUse && OddInUse) |
15486 | break; |
15487 | } |
15488 | V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, |
15489 | MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8), |
15490 | OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8)); |
15491 | |
15492 | int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
15493 | for (int i = 0; i < 16; ++i) |
15494 | if (Mask[i] >= 0) { |
15495 | int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); |
15496 | assert(MappedMask < 8 && "Invalid v8 shuffle mask!"); |
15497 | if (PostDupI16Shuffle[i / 2] < 0) |
15498 | PostDupI16Shuffle[i / 2] = MappedMask; |
15499 | else |
15500 | assert(PostDupI16Shuffle[i / 2] == MappedMask && |
15501 | "Conflicting entries in the original shuffle!"); |
15502 | } |
15503 | return DAG.getBitcast( |
15504 | MVT::v16i8, |
15505 | DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), |
15506 | DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); |
15507 | }; |
15508 | if (SDValue V = tryToWidenViaDuplication()) |
15509 | return V; |
15510 | } |
15511 | |
15512 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, |
15513 | Zeroable, Subtarget, DAG)) |
15514 | return Masked; |
15515 | |
15516 | |
15517 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) |
15518 | return V; |
15519 | |
15520 | |
15521 | if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v16i8, V1, V2, Mask, |
15522 | Zeroable, Subtarget, DAG)) |
15523 | return V; |
15524 | |
15525 | |
15526 | bool IsSingleInput = V2.isUndef(); |
15527 | int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput); |
15528 | |
15529 | |
15530 | |
15531 | |
15532 | |
15533 | |
15534 | |
15535 | |
15536 | |
15537 | |
15538 | |
15539 | |
15540 | |
15541 | |
15542 | |
15543 | |
15544 | |
15545 | if (Subtarget.hasSSSE3() && (IsSingleInput || NumEvenDrops != 1)) { |
15546 | bool V1InUse = false; |
15547 | bool V2InUse = false; |
15548 | |
15549 | SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs( |
15550 | DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse); |
15551 | |
15552 | |
15553 | |
15554 | |
15555 | if (V1InUse && V2InUse) { |
15556 | if (Subtarget.hasSSE41()) |
15557 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask, |
15558 | Zeroable, Subtarget, DAG)) |
15559 | return Blend; |
15560 | |
15561 | |
15562 | |
15563 | |
15564 | |
15565 | |
15566 | |
15567 | |
15568 | |
15569 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack( |
15570 | DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) |
15571 | return Unpack; |
15572 | |
15573 | |
15574 | if (Subtarget.hasVBMI()) |
15575 | return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, Subtarget, |
15576 | DAG); |
15577 | |
15578 | |
15579 | if (Subtarget.hasXOP()) { |
15580 | SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true); |
15581 | return DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, V1, V2, MaskNode); |
15582 | } |
15583 | |
15584 | |
15585 | |
15586 | if (SDValue V = lowerShuffleAsByteRotateAndPermute( |
15587 | DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) |
15588 | return V; |
15589 | } |
15590 | |
15591 | return PSHUFB; |
15592 | } |
15593 | |
15594 | |
15595 | if (NumV2Elements == 1) |
15596 | if (SDValue V = lowerShuffleAsElementInsertion( |
15597 | DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
15598 | return V; |
15599 | |
15600 | if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) |
15601 | return Blend; |
15602 | |
15603 | |
15604 | |
15605 | |
15606 | |
15607 | |
15608 | |
15609 | |
15610 | if (NumEvenDrops) { |
15611 | |
15612 | |
15613 | |
15614 | |
15615 | |
15616 | assert(NumEvenDrops <= 3 && |
15617 | "No support for dropping even elements more than 3 times."); |
15618 | SmallVector<SDValue, 8> WordClearOps(8, DAG.getConstant(0, DL, MVT::i16)); |
15619 | for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1)) |
15620 | WordClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i16); |
15621 | SDValue WordClearMask = DAG.getBuildVector(MVT::v8i16, DL, WordClearOps); |
15622 | V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V1), |
15623 | WordClearMask); |
15624 | if (!IsSingleInput) |
15625 | V2 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V2), |
15626 | WordClearMask); |
15627 | |
15628 | |
15629 | SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, |
15630 | IsSingleInput ? V1 : V2); |
15631 | for (int i = 1; i < NumEvenDrops; ++i) { |
15632 | Result = DAG.getBitcast(MVT::v8i16, Result); |
15633 | Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); |
15634 | } |
15635 | return Result; |
15636 | } |
15637 | |
15638 | |
15639 | if (NumV2Elements > 0) |
15640 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask, |
15641 | Subtarget, DAG); |
15642 | |
15643 | |
15644 | |
15645 | |
15646 | SDValue V = V1; |
15647 | |
15648 | std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; |
15649 | std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; |
15650 | for (int i = 0; i < 16; ++i) |
15651 | if (Mask[i] >= 0) |
15652 | (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i]; |
15653 | |
15654 | SDValue VLoHalf, VHiHalf; |
15655 | |
15656 | |
15657 | |
15658 | if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) && |
15659 | none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) { |
15660 | |
15661 | VLoHalf = DAG.getBitcast(MVT::v8i16, V); |
15662 | VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, |
15663 | DAG.getConstant(0x00FF, DL, MVT::v8i16)); |
15664 | |
15665 | |
15666 | VHiHalf = DAG.getUNDEF(MVT::v8i16); |
15667 | |
15668 | |
15669 | for (int &M : LoBlendMask) |
15670 | if (M >= 0) |
15671 | M /= 2; |
15672 | for (int &M : HiBlendMask) |
15673 | if (M >= 0) |
15674 | M /= 2; |
15675 | } else { |
15676 | |
15677 | |
15678 | SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL); |
15679 | |
15680 | VLoHalf = DAG.getBitcast( |
15681 | MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); |
15682 | VHiHalf = DAG.getBitcast( |
15683 | MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); |
15684 | } |
15685 | |
15686 | SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); |
15687 | SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); |
15688 | |
15689 | return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); |
15690 | } |
15691 | |
15692 | |
15693 | |
15694 | |
15695 | |
15696 | static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
15697 | MVT VT, SDValue V1, SDValue V2, |
15698 | const APInt &Zeroable, |
15699 | const X86Subtarget &Subtarget, |
15700 | SelectionDAG &DAG) { |
15701 | switch (VT.SimpleTy) { |
15702 | case MVT::v2i64: |
15703 | return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15704 | case MVT::v2f64: |
15705 | return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15706 | case MVT::v4i32: |
15707 | return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15708 | case MVT::v4f32: |
15709 | return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15710 | case MVT::v8i16: |
15711 | return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15712 | case MVT::v16i8: |
15713 | return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
15714 | |
15715 | default: |
15716 | llvm_unreachable("Unimplemented!"); |
15717 | } |
15718 | } |
15719 | |
15720 | |
15721 | |
15722 | |
15723 | |
15724 | |
15725 | static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, |
15726 | SDValue V2, ArrayRef<int> Mask, |
15727 | SelectionDAG &DAG) { |
15728 | assert(VT.getSizeInBits() >= 256 && |
15729 | "Only for 256-bit or wider vector shuffles!"); |
15730 | assert(V1.getSimpleValueType() == VT && "Bad operand type!"); |
15731 | assert(V2.getSimpleValueType() == VT && "Bad operand type!"); |
15732 | |
15733 | ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2); |
15734 | ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2); |
15735 | |
15736 | int NumElements = VT.getVectorNumElements(); |
15737 | int SplitNumElements = NumElements / 2; |
15738 | MVT ScalarVT = VT.getVectorElementType(); |
15739 | MVT SplitVT = MVT::getVectorVT(ScalarVT, SplitNumElements); |
15740 | |
15741 | |
15742 | |
15743 | auto SplitVector = [&](SDValue V) { |
15744 | SDValue LoV, HiV; |
15745 | std::tie(LoV, HiV) = splitVector(peekThroughBitcasts(V), DAG, DL); |
15746 | return std::make_pair(DAG.getBitcast(SplitVT, LoV), |
15747 | DAG.getBitcast(SplitVT, HiV)); |
15748 | }; |
15749 | |
15750 | SDValue LoV1, HiV1, LoV2, HiV2; |
15751 | std::tie(LoV1, HiV1) = SplitVector(V1); |
15752 | std::tie(LoV2, HiV2) = SplitVector(V2); |
15753 | |
15754 | |
15755 | auto HalfBlend = [&](ArrayRef<int> HalfMask) { |
15756 | bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false; |
15757 | SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1); |
15758 | SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1); |
15759 | SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1); |
15760 | for (int i = 0; i < SplitNumElements; ++i) { |
15761 | int M = HalfMask[i]; |
15762 | if (M >= NumElements) { |
15763 | if (M >= NumElements + SplitNumElements) |
15764 | UseHiV2 = true; |
15765 | else |
15766 | UseLoV2 = true; |
15767 | V2BlendMask[i] = M - NumElements; |
15768 | BlendMask[i] = SplitNumElements + i; |
15769 | } else if (M >= 0) { |
15770 | if (M >= SplitNumElements) |
15771 | UseHiV1 = true; |
15772 | else |
15773 | UseLoV1 = true; |
15774 | V1BlendMask[i] = M; |
15775 | BlendMask[i] = i; |
15776 | } |
15777 | } |
15778 | |
15779 | |
15780 | |
15781 | |
15782 | |
15783 | |
15784 | if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2) |
15785 | return DAG.getUNDEF(SplitVT); |
15786 | if (!UseLoV2 && !UseHiV2) |
15787 | return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); |
15788 | if (!UseLoV1 && !UseHiV1) |
15789 | return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); |
15790 | |
15791 | SDValue V1Blend, V2Blend; |
15792 | if (UseLoV1 && UseHiV1) { |
15793 | V1Blend = |
15794 | DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); |
15795 | } else { |
15796 | |
15797 | V1Blend = UseLoV1 ? LoV1 : HiV1; |
15798 | for (int i = 0; i < SplitNumElements; ++i) |
15799 | if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements) |
15800 | BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); |
15801 | } |
15802 | if (UseLoV2 && UseHiV2) { |
15803 | V2Blend = |
15804 | DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); |
15805 | } else { |
15806 | |
15807 | V2Blend = UseLoV2 ? LoV2 : HiV2; |
15808 | for (int i = 0; i < SplitNumElements; ++i) |
15809 | if (BlendMask[i] >= SplitNumElements) |
15810 | BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0); |
15811 | } |
15812 | return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask); |
15813 | }; |
15814 | SDValue Lo = HalfBlend(LoMask); |
15815 | SDValue Hi = HalfBlend(HiMask); |
15816 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
15817 | } |
15818 | |
15819 | |
15820 | |
15821 | |
15822 | |
15823 | |
15824 | |
15825 | |
15826 | |
15827 | static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, |
15828 | SDValue V2, ArrayRef<int> Mask, |
15829 | const X86Subtarget &Subtarget, |
15830 | SelectionDAG &DAG) { |
15831 | assert(!V2.isUndef() && "This routine must not be used to lower single-input " |
15832 | "shuffles as it could then recurse on itself."); |
15833 | int Size = Mask.size(); |
15834 | |
15835 | |
15836 | |
15837 | |
15838 | auto DoBothBroadcast = [&] { |
15839 | int V1BroadcastIdx = -1, V2BroadcastIdx = -1; |
15840 | for (int M : Mask) |
15841 | if (M >= Size) { |
15842 | if (V2BroadcastIdx < 0) |
15843 | V2BroadcastIdx = M - Size; |
15844 | else if (M - Size != V2BroadcastIdx) |
15845 | return false; |
15846 | } else if (M >= 0) { |
15847 | if (V1BroadcastIdx < 0) |
15848 | V1BroadcastIdx = M; |
15849 | else if (M != V1BroadcastIdx) |
15850 | return false; |
15851 | } |
15852 | return true; |
15853 | }; |
15854 | if (DoBothBroadcast()) |
15855 | return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, |
15856 | DAG); |
15857 | |
15858 | |
15859 | |
15860 | |
15861 | int LaneCount = VT.getSizeInBits() / 128; |
15862 | int LaneSize = Size / LaneCount; |
15863 | SmallBitVector LaneInputs[2]; |
15864 | LaneInputs[0].resize(LaneCount, false); |
15865 | LaneInputs[1].resize(LaneCount, false); |
15866 | for (int i = 0; i < Size; ++i) |
15867 | if (Mask[i] >= 0) |
15868 | LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true; |
15869 | if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) |
15870 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
15871 | |
15872 | |
15873 | |
15874 | return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, |
15875 | DAG); |
15876 | } |
15877 | |
15878 | |
15879 | |
15880 | static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT, |
15881 | SDValue V1, SDValue V2, |
15882 | ArrayRef<int> Mask, |
15883 | SelectionDAG &DAG) { |
15884 | assert(VT == MVT::v4f64 && "Only for v4f64 shuffles"); |
15885 | |
15886 | int LHSMask[4] = {-1, -1, -1, -1}; |
15887 | int RHSMask[4] = {-1, -1, -1, -1}; |
15888 | unsigned SHUFPMask = 0; |
15889 | |
15890 | |
15891 | |
15892 | for (int i = 0; i != 4; ++i) { |
15893 | int M = Mask[i]; |
15894 | if (M < 0) |
15895 | continue; |
15896 | int LaneBase = i & ~1; |
15897 | auto &LaneMask = (i & 1) ? RHSMask : LHSMask; |
15898 | LaneMask[LaneBase + (M & 1)] = M; |
15899 | SHUFPMask |= (M & 1) << i; |
15900 | } |
15901 | |
15902 | SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask); |
15903 | SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask); |
15904 | return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS, |
15905 | DAG.getTargetConstant(SHUFPMask, DL, MVT::i8)); |
15906 | } |
15907 | |
15908 | |
15909 | |
15910 | |
15911 | |
15912 | |
15913 | |
15914 | |
15915 | |
15916 | static SDValue lowerShuffleAsLanePermuteAndPermute( |
15917 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
15918 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
15919 | int NumElts = VT.getVectorNumElements(); |
15920 | int NumLanes = VT.getSizeInBits() / 128; |
15921 | int NumEltsPerLane = NumElts / NumLanes; |
15922 | bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef(); |
15923 | |
15924 | |
15925 | |
15926 | |
15927 | |
15928 | |
15929 | auto getSublanePermute = [&](int NumSublanes) -> SDValue { |
15930 | int NumSublanesPerLane = NumSublanes / NumLanes; |
15931 | int NumEltsPerSublane = NumElts / NumSublanes; |
15932 | |
15933 | SmallVector<int, 16> CrossLaneMask; |
15934 | SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef); |
15935 | |
15936 | SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef); |
15937 | |
15938 | for (int i = 0; i != NumElts; ++i) { |
15939 | int M = Mask[i]; |
15940 | if (M < 0) |
15941 | continue; |
15942 | |
15943 | int SrcSublane = M / NumEltsPerSublane; |
15944 | int DstLane = i / NumEltsPerLane; |
15945 | |
15946 | |
15947 | |
15948 | bool Found = false; |
15949 | int DstSubStart = DstLane * NumSublanesPerLane; |
15950 | int DstSubEnd = DstSubStart + NumSublanesPerLane; |
15951 | for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) { |
15952 | if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane)) |
15953 | continue; |
15954 | |
15955 | Found = true; |
15956 | CrossLaneMaskLarge[DstSublane] = SrcSublane; |
15957 | int DstSublaneOffset = DstSublane * NumEltsPerSublane; |
15958 | InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane; |
15959 | break; |
15960 | } |
15961 | if (!Found) |
15962 | return SDValue(); |
15963 | } |
15964 | |
15965 | |
15966 | narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask); |
15967 | |
15968 | if (!CanUseSublanes) { |
15969 | |
15970 | |
15971 | |
15972 | |
15973 | int NumIdentityLanes = 0; |
15974 | bool OnlyShuffleLowestLane = true; |
15975 | for (int i = 0; i != NumLanes; ++i) { |
15976 | int LaneOffset = i * NumEltsPerLane; |
15977 | if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane, |
15978 | i * NumEltsPerLane)) |
15979 | NumIdentityLanes++; |
15980 | else if (CrossLaneMask[LaneOffset] != 0) |
15981 | OnlyShuffleLowestLane = false; |
15982 | } |
15983 | if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) |
15984 | return SDValue(); |
15985 | } |
15986 | |
15987 | SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask); |
15988 | return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT), |
15989 | InLaneMask); |
15990 | }; |
15991 | |
15992 | |
15993 | if (SDValue V = getSublanePermute(NumLanes)) |
15994 | return V; |
15995 | |
15996 | |
15997 | if (!CanUseSublanes) |
15998 | return SDValue(); |
15999 | |
16000 | |
16001 | if (SDValue V = getSublanePermute(NumLanes * 2)) |
16002 | return V; |
16003 | |
16004 | |
16005 | |
16006 | if (!Subtarget.hasFastVariableCrossLaneShuffle()) |
16007 | return SDValue(); |
16008 | |
16009 | return getSublanePermute(NumLanes * 4); |
16010 | } |
16011 | |
16012 | |
16013 | |
16014 | |
16015 | |
16016 | |
16017 | |
16018 | |
16019 | static SDValue lowerShuffleAsLanePermuteAndShuffle( |
16020 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16021 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
16022 | |
16023 | assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); |
16024 | int Size = Mask.size(); |
16025 | int LaneSize = Size / 2; |
16026 | |
16027 | |
16028 | |
16029 | |
16030 | if (VT == MVT::v4f64 && |
16031 | !all_of(Mask, [LaneSize](int M) { return M < LaneSize; })) |
16032 | if (SDValue V = |
16033 | lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG)) |
16034 | return V; |
16035 | |
16036 | |
16037 | |
16038 | |
16039 | if (!Subtarget.hasAVX2()) { |
16040 | bool LaneCrossing[2] = {false, false}; |
16041 | for (int i = 0; i < Size; ++i) |
16042 | if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize)) |
16043 | LaneCrossing[(Mask[i] % Size) / LaneSize] = true; |
16044 | if (!LaneCrossing[0] || !LaneCrossing[1]) |
16045 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
16046 | } else { |
16047 | bool LaneUsed[2] = {false, false}; |
16048 | for (int i = 0; i < Size; ++i) |
16049 | if (Mask[i] >= 0) |
16050 | LaneUsed[(Mask[i] % Size) / LaneSize] = true; |
16051 | if (!LaneUsed[0] || !LaneUsed[1]) |
16052 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
16053 | } |
16054 | |
16055 | |
16056 | assert(V2.isUndef() && |
16057 | "This last part of this routine only works on single input shuffles"); |
16058 | |
16059 | SmallVector<int, 32> InLaneMask(Mask.begin(), Mask.end()); |
16060 | for (int i = 0; i < Size; ++i) { |
16061 | int &M = InLaneMask[i]; |
16062 | if (M < 0) |
16063 | continue; |
16064 | if (((M % Size) / LaneSize) != (i / LaneSize)) |
16065 | M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size; |
16066 | } |
16067 | assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) && |
16068 | "In-lane shuffle mask expected"); |
16069 | |
16070 | |
16071 | MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; |
16072 | SDValue Flipped = DAG.getBitcast(PVT, V1); |
16073 | Flipped = |
16074 | DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1}); |
16075 | Flipped = DAG.getBitcast(VT, Flipped); |
16076 | return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask); |
16077 | } |
16078 | |
16079 | |
16080 | static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, |
16081 | SDValue V2, ArrayRef<int> Mask, |
16082 | const APInt &Zeroable, |
16083 | const X86Subtarget &Subtarget, |
16084 | SelectionDAG &DAG) { |
16085 | if (V2.isUndef()) { |
16086 | |
16087 | bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1); |
16088 | bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1); |
16089 | if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() && |
16090 | MayFoldLoad(peekThroughOneUseBitcasts(V1))) { |
16091 | auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1)); |
16092 | if (!Ld->isNonTemporal()) { |
16093 | MVT MemVT = VT.getHalfNumVectorElementsVT(); |
16094 | unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize(); |
16095 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
16096 | SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), |
16097 | TypeSize::Fixed(Ofs), DL); |
16098 | SDValue Ops[] = {Ld->getChain(), Ptr}; |
16099 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
16100 | X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT, |
16101 | DAG.getMachineFunction().getMachineMemOperand( |
16102 | Ld->getMemOperand(), Ofs, MemVT.getStoreSize())); |
16103 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); |
16104 | return BcastLd; |
16105 | } |
16106 | } |
16107 | |
16108 | |
16109 | if (Subtarget.hasAVX2()) |
16110 | return SDValue(); |
16111 | } |
16112 | |
16113 | bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode()); |
16114 | |
16115 | SmallVector<int, 4> WidenedMask; |
16116 | if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask)) |
16117 | return SDValue(); |
16118 | |
16119 | bool IsLowZero = (Zeroable & 0x3) == 0x3; |
16120 | bool IsHighZero = (Zeroable & 0xc) == 0xc; |
16121 | |
16122 | |
16123 | if (WidenedMask[0] == 0 && IsHighZero) { |
16124 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
16125 | SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, |
16126 | DAG.getIntPtrConstant(0, DL)); |
16127 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
16128 | getZeroVector(VT, Subtarget, DAG, DL), LoV, |
16129 | DAG.getIntPtrConstant(0, DL)); |
16130 | } |
16131 | |
16132 | |
16133 | |
16134 | |
16135 | |
16136 | |
16137 | if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, |
16138 | Subtarget, DAG)) |
16139 | return Blend; |
16140 | |
16141 | |
16142 | |
16143 | if (!IsLowZero && !IsHighZero) { |
16144 | |
16145 | |
16146 | bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2); |
16147 | if (OnlyUsesV1 || isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) { |
16148 | |
16149 | |
16150 | |
16151 | if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) { |
16152 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
16153 | SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, |
16154 | OnlyUsesV1 ? V1 : V2, |
16155 | DAG.getIntPtrConstant(0, DL)); |
16156 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, |
16157 | DAG.getIntPtrConstant(2, DL)); |
16158 | } |
16159 | } |
16160 | |
16161 | |
16162 | if (Subtarget.hasVLX()) { |
16163 | if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { |
16164 | unsigned PermMask = ((WidenedMask[0] % 2) << 0) | |
16165 | ((WidenedMask[1] % 2) << 1); |
16166 | return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, |
16167 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
16168 | } |
16169 | } |
16170 | } |
16171 | |
16172 | |
16173 | |
16174 | |
16175 | |
16176 | |
16177 | |
16178 | |
16179 | |
16180 | |
16181 | |
16182 | |
16183 | |
16184 | |
16185 | assert((WidenedMask[0] >= 0 || IsLowZero) && |
16186 | (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?"); |
16187 | |
16188 | unsigned PermMask = 0; |
16189 | PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); |
16190 | PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); |
16191 | |
16192 | |
16193 | if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00) |
16194 | V1 = DAG.getUNDEF(VT); |
16195 | if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20) |
16196 | V2 = DAG.getUNDEF(VT); |
16197 | |
16198 | return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, |
16199 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
16200 | } |
16201 | |
16202 | |
16203 | |
16204 | |
16205 | |
16206 | |
16207 | |
16208 | |
16209 | static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( |
16210 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16211 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
16212 | assert(!V2.isUndef() && "This is only useful with multiple inputs."); |
16213 | |
16214 | if (is128BitLaneRepeatedShuffleMask(VT, Mask)) |
16215 | return SDValue(); |
16216 | |
16217 | int NumElts = Mask.size(); |
16218 | int NumLanes = VT.getSizeInBits() / 128; |
16219 | int NumLaneElts = 128 / VT.getScalarSizeInBits(); |
16220 | SmallVector<int, 16> RepeatMask(NumLaneElts, -1); |
16221 | SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}}); |
16222 | |
16223 | |
16224 | |
16225 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16226 | int Srcs[2] = {-1, -1}; |
16227 | SmallVector<int, 16> InLaneMask(NumLaneElts, -1); |
16228 | for (int i = 0; i != NumLaneElts; ++i) { |
16229 | int M = Mask[(Lane * NumLaneElts) + i]; |
16230 | if (M < 0) |
16231 | continue; |
16232 | |
16233 | |
16234 | |
16235 | |
16236 | int LaneSrc = M / NumLaneElts; |
16237 | int Src; |
16238 | if (Srcs[0] < 0 || Srcs[0] == LaneSrc) |
16239 | Src = 0; |
16240 | else if (Srcs[1] < 0 || Srcs[1] == LaneSrc) |
16241 | Src = 1; |
16242 | else |
16243 | return SDValue(); |
16244 | |
16245 | Srcs[Src] = LaneSrc; |
16246 | InLaneMask[i] = (M % NumLaneElts) + Src * NumElts; |
16247 | } |
16248 | |
16249 | |
16250 | if (Srcs[1] < 0) |
16251 | continue; |
16252 | |
16253 | LaneSrcs[Lane][0] = Srcs[0]; |
16254 | LaneSrcs[Lane][1] = Srcs[1]; |
16255 | |
16256 | auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) { |
16257 | assert(M1.size() == M2.size() && "Unexpected mask size"); |
16258 | for (int i = 0, e = M1.size(); i != e; ++i) |
16259 | if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i]) |
16260 | return false; |
16261 | return true; |
16262 | }; |
16263 | |
16264 | auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) { |
16265 | assert(Mask.size() == MergedMask.size() && "Unexpected mask size"); |
16266 | for (int i = 0, e = MergedMask.size(); i != e; ++i) { |
16267 | int M = Mask[i]; |
16268 | if (M < 0) |
16269 | continue; |
16270 | assert((MergedMask[i] < 0 || MergedMask[i] == M) && |
16271 | "Unexpected mask element"); |
16272 | MergedMask[i] = M; |
16273 | } |
16274 | }; |
16275 | |
16276 | if (MatchMasks(InLaneMask, RepeatMask)) { |
16277 | |
16278 | MergeMasks(InLaneMask, RepeatMask); |
16279 | continue; |
16280 | } |
16281 | |
16282 | |
16283 | std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]); |
16284 | ShuffleVectorSDNode::commuteMask(InLaneMask); |
16285 | |
16286 | if (MatchMasks(InLaneMask, RepeatMask)) { |
16287 | |
16288 | MergeMasks(InLaneMask, RepeatMask); |
16289 | continue; |
16290 | } |
16291 | |
16292 | |
16293 | return SDValue(); |
16294 | } |
16295 | |
16296 | |
16297 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16298 | |
16299 | if (LaneSrcs[Lane][0] >= 0) |
16300 | continue; |
16301 | |
16302 | for (int i = 0; i != NumLaneElts; ++i) { |
16303 | int M = Mask[(Lane * NumLaneElts) + i]; |
16304 | if (M < 0) |
16305 | continue; |
16306 | |
16307 | |
16308 | if (RepeatMask[i] < 0) |
16309 | RepeatMask[i] = M % NumLaneElts; |
16310 | |
16311 | if (RepeatMask[i] < NumElts) { |
16312 | if (RepeatMask[i] != M % NumLaneElts) |
16313 | return SDValue(); |
16314 | LaneSrcs[Lane][0] = M / NumLaneElts; |
16315 | } else { |
16316 | if (RepeatMask[i] != ((M % NumLaneElts) + NumElts)) |
16317 | return SDValue(); |
16318 | LaneSrcs[Lane][1] = M / NumLaneElts; |
16319 | } |
16320 | } |
16321 | |
16322 | if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0) |
16323 | return SDValue(); |
16324 | } |
16325 | |
16326 | SmallVector<int, 16> NewMask(NumElts, -1); |
16327 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16328 | int Src = LaneSrcs[Lane][0]; |
16329 | for (int i = 0; i != NumLaneElts; ++i) { |
16330 | int M = -1; |
16331 | if (Src >= 0) |
16332 | M = Src * NumLaneElts + i; |
16333 | NewMask[Lane * NumLaneElts + i] = M; |
16334 | } |
16335 | } |
16336 | SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
16337 | |
16338 | |
16339 | |
16340 | if (isa<ShuffleVectorSDNode>(NewV1) && |
16341 | cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask) |
16342 | return SDValue(); |
16343 | |
16344 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
16345 | int Src = LaneSrcs[Lane][1]; |
16346 | for (int i = 0; i != NumLaneElts; ++i) { |
16347 | int M = -1; |
16348 | if (Src >= 0) |
16349 | M = Src * NumLaneElts + i; |
16350 | NewMask[Lane * NumLaneElts + i] = M; |
16351 | } |
16352 | } |
16353 | SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
16354 | |
16355 | |
16356 | |
16357 | if (isa<ShuffleVectorSDNode>(NewV2) && |
16358 | cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask) |
16359 | return SDValue(); |
16360 | |
16361 | for (int i = 0; i != NumElts; ++i) { |
16362 | NewMask[i] = RepeatMask[i % NumLaneElts]; |
16363 | if (NewMask[i] < 0) |
16364 | continue; |
16365 | |
16366 | NewMask[i] += (i / NumLaneElts) * NumLaneElts; |
16367 | } |
16368 | return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); |
16369 | } |
16370 | |
16371 | |
16372 | |
16373 | |
16374 | |
16375 | |
16376 | |
16377 | static bool |
16378 | getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask, |
16379 | int &HalfIdx1, int &HalfIdx2) { |
16380 | assert((Mask.size() == HalfMask.size() * 2) && |
16381 | "Expected input mask to be twice as long as output"); |
16382 | |
16383 | |
16384 | bool UndefLower = isUndefLowerHalf(Mask); |
16385 | bool UndefUpper = isUndefUpperHalf(Mask); |
16386 | if (UndefLower == UndefUpper) |
16387 | return false; |
16388 | |
16389 | unsigned HalfNumElts = HalfMask.size(); |
16390 | unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0; |
16391 | HalfIdx1 = -1; |
16392 | HalfIdx2 = -1; |
16393 | for (unsigned i = 0; i != HalfNumElts; ++i) { |
16394 | int M = Mask[i + MaskIndexOffset]; |
16395 | if (M < 0) { |
16396 | HalfMask[i] = M; |
16397 | continue; |
16398 | } |
16399 | |
16400 | |
16401 | |
16402 | int HalfIdx = M / HalfNumElts; |
16403 | |
16404 | |
16405 | int HalfElt = M % HalfNumElts; |
16406 | |
16407 | |
16408 | |
16409 | if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) { |
16410 | HalfMask[i] = HalfElt; |
16411 | HalfIdx1 = HalfIdx; |
16412 | continue; |
16413 | } |
16414 | if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) { |
16415 | HalfMask[i] = HalfElt + HalfNumElts; |
16416 | HalfIdx2 = HalfIdx; |
16417 | continue; |
16418 | } |
16419 | |
16420 | |
16421 | return false; |
16422 | } |
16423 | |
16424 | return true; |
16425 | } |
16426 | |
16427 | |
16428 | |
16429 | static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, |
16430 | ArrayRef<int> HalfMask, int HalfIdx1, |
16431 | int HalfIdx2, bool UndefLower, |
16432 | SelectionDAG &DAG, bool UseConcat = false) { |
16433 | assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?"); |
16434 | assert(V1.getValueType().isSimple() && "Expecting only simple types"); |
16435 | |
16436 | MVT VT = V1.getSimpleValueType(); |
16437 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
16438 | unsigned HalfNumElts = HalfVT.getVectorNumElements(); |
16439 | |
16440 | auto getHalfVector = [&](int HalfIdx) { |
16441 | if (HalfIdx < 0) |
16442 | return DAG.getUNDEF(HalfVT); |
16443 | SDValue V = (HalfIdx < 2 ? V1 : V2); |
16444 | HalfIdx = (HalfIdx % 2) * HalfNumElts; |
16445 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V, |
16446 | DAG.getIntPtrConstant(HalfIdx, DL)); |
16447 | }; |
16448 | |
16449 | |
16450 | SDValue Half1 = getHalfVector(HalfIdx1); |
16451 | SDValue Half2 = getHalfVector(HalfIdx2); |
16452 | SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); |
16453 | if (UseConcat) { |
16454 | SDValue Op0 = V; |
16455 | SDValue Op1 = DAG.getUNDEF(HalfVT); |
16456 | if (UndefLower) |
16457 | std::swap(Op0, Op1); |
16458 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); |
16459 | } |
16460 | |
16461 | unsigned Offset = UndefLower ? HalfNumElts : 0; |
16462 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, |
16463 | DAG.getIntPtrConstant(Offset, DL)); |
16464 | } |
16465 | |
16466 | |
16467 | |
16468 | |
16469 | static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, |
16470 | SDValue V2, ArrayRef<int> Mask, |
16471 | const X86Subtarget &Subtarget, |
16472 | SelectionDAG &DAG) { |
16473 | assert((VT.is256BitVector() || VT.is512BitVector()) && |
16474 | "Expected 256-bit or 512-bit vector"); |
16475 | |
16476 | bool UndefLower = isUndefLowerHalf(Mask); |
16477 | if (!UndefLower && !isUndefUpperHalf(Mask)) |
16478 | return SDValue(); |
16479 | |
16480 | assert((!UndefLower || !isUndefUpperHalf(Mask)) && |
16481 | "Completely undef shuffle mask should have been simplified already"); |
16482 | |
16483 | |
16484 | |
16485 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
16486 | unsigned HalfNumElts = HalfVT.getVectorNumElements(); |
16487 | if (!UndefLower && |
16488 | isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { |
16489 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, |
16490 | DAG.getIntPtrConstant(HalfNumElts, DL)); |
16491 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, |
16492 | DAG.getIntPtrConstant(0, DL)); |
16493 | } |
16494 | |
16495 | |
16496 | |
16497 | if (UndefLower && |
16498 | isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) { |
16499 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, |
16500 | DAG.getIntPtrConstant(0, DL)); |
16501 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, |
16502 | DAG.getIntPtrConstant(HalfNumElts, DL)); |
16503 | } |
16504 | |
16505 | int HalfIdx1, HalfIdx2; |
16506 | SmallVector<int, 8> HalfMask(HalfNumElts); |
16507 | if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2)) |
16508 | return SDValue(); |
16509 | |
16510 | assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length"); |
16511 | |
16512 | |
16513 | unsigned NumLowerHalves = |
16514 | (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2); |
16515 | unsigned NumUpperHalves = |
16516 | (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3); |
16517 | assert(NumLowerHalves + NumUpperHalves <= 2 && "Only 1 or 2 halves allowed"); |
16518 | |
16519 | |
16520 | |
16521 | unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); |
16522 | if (!UndefLower) { |
16523 | |
16524 | |
16525 | if (NumUpperHalves == 0) |
16526 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
16527 | UndefLower, DAG); |
16528 | |
16529 | if (NumUpperHalves == 1) { |
16530 | |
16531 | if (Subtarget.hasAVX2()) { |
16532 | |
16533 | if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() && |
16534 | !is128BitUnpackShuffleMask(HalfMask) && |
16535 | (!isSingleSHUFPSMask(HalfMask) || |
16536 | Subtarget.hasFastVariableCrossLaneShuffle())) |
16537 | return SDValue(); |
16538 | |
16539 | |
16540 | |
16541 | |
16542 | if (EltWidth == 64 && V2.isUndef()) |
16543 | return SDValue(); |
16544 | } |
16545 | |
16546 | if (Subtarget.hasAVX512() && VT.is512BitVector()) |
16547 | return SDValue(); |
16548 | |
16549 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
16550 | UndefLower, DAG); |
16551 | } |
16552 | |
16553 | |
16554 | assert(NumUpperHalves == 2 && "Half vector count went wrong"); |
16555 | return SDValue(); |
16556 | } |
16557 | |
16558 | |
16559 | if (NumUpperHalves == 0) { |
16560 | |
16561 | |
16562 | if (Subtarget.hasAVX2() && EltWidth == 64) |
16563 | return SDValue(); |
16564 | |
16565 | if (Subtarget.hasAVX512() && VT.is512BitVector()) |
16566 | return SDValue(); |
16567 | |
16568 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
16569 | UndefLower, DAG); |
16570 | } |
16571 | |
16572 | |
16573 | return SDValue(); |
16574 | } |
16575 | |
16576 | |
16577 | |
16578 | |
16579 | |
16580 | |
16581 | static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) { |
16582 | assert((Input == 0 || Input == 1) && "Only two inputs to shuffles."); |
16583 | int Size = Mask.size(); |
16584 | for (int i = 0; i < Size; ++i) |
16585 | if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i) |
16586 | return false; |
16587 | |
16588 | return true; |
16589 | } |
16590 | |
16591 | |
16592 | |
16593 | |
16594 | |
16595 | static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( |
16596 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
16597 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
16598 | int NumElts = VT.getVectorNumElements(); |
16599 | int NumLanes = VT.getSizeInBits() / 128; |
16600 | int NumLaneElts = NumElts / NumLanes; |
16601 | |
16602 | |
16603 | |
16604 | if (Subtarget.hasAVX2()) { |
16605 | for (unsigned BroadcastSize : {16, 32, 64}) { |
16606 | if (BroadcastSize <= VT.getScalarSizeInBits()) |
16607 | continue; |
16608 | int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits(); |
16609 | |
16610 | |
16611 | |
16612 | |
16613 | auto FindRepeatingBroadcastMask = [&](SmallVectorImpl<int> &RepeatMask) { |
16614 | for (int i = 0; i != NumElts; i += NumBroadcastElts) |
16615 | for (int j = 0; j != NumBroadcastElts; ++j) { |
16616 | int M = Mask[i + j]; |
16617 | if (M < 0) |
16618 | continue; |
16619 | int &R = RepeatMask[j]; |
16620 | if (0 != ((M % NumElts) / NumLaneElts)) |
16621 | return false; |
16622 | if (0 <= R && R != M) |
16623 | return false; |
16624 | R = M; |
16625 | } |
16626 | return true; |
16627 | }; |
16628 | |
16629 | SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1); |
16630 | if (!FindRepeatingBroadcastMask(RepeatMask)) |
16631 | continue; |
16632 | |
16633 | |
16634 | SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask); |
16635 | |
16636 | |
16637 | SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1); |
16638 | for (int i = 0; i != NumElts; i += NumBroadcastElts) |
16639 | for (int j = 0; j != NumBroadcastElts; ++j) |
16640 | BroadcastMask[i + j] = j; |
16641 | return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT), |
16642 | BroadcastMask); |
16643 | } |
16644 | } |
16645 | |
16646 | |
16647 | if (!is128BitLaneCrossingShuffleMask(VT, Mask)) |
16648 | return SDValue(); |
16649 | |
16650 | |
16651 | SmallVector<int, 8> RepeatedShuffleMask; |
16652 | if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask)) |
16653 | return SDValue(); |
16654 | |
16655 | |
16656 | |
16657 | int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1; |
16658 | int NumSubLanes = NumLanes * SubLaneScale; |
16659 | int NumSubLaneElts = NumLaneElts / SubLaneScale; |
16660 | |
16661 | |
16662 | |
16663 | |
16664 | int TopSrcSubLane = -1; |
16665 | SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1); |
16666 | SmallVector<int, 8> RepeatedSubLaneMasks[2] = { |
16667 | SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef), |
16668 | SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef)}; |
16669 | |
16670 | for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) { |
16671 | |
16672 | |
16673 | int SrcLane = -1; |
16674 | SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1); |
16675 | for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { |
16676 | int M = Mask[(DstSubLane * NumSubLaneElts) + Elt]; |
16677 | if (M < 0) |
16678 | continue; |
16679 | int Lane = (M % NumElts) / NumLaneElts; |
16680 | if ((0 <= SrcLane) && (SrcLane != Lane)) |
16681 | return SDValue(); |
16682 | SrcLane = Lane; |
16683 | int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts); |
16684 | SubLaneMask[Elt] = LocalM; |
16685 | } |
16686 | |
16687 | |
16688 | if (SrcLane < 0) |
16689 | continue; |
16690 | |
16691 | |
16692 | for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) { |
16693 | auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) { |
16694 | for (int i = 0; i != NumSubLaneElts; ++i) { |
16695 | if (M1[i] < 0 || M2[i] < 0) |
16696 | continue; |
16697 | if (M1[i] != M2[i]) |
16698 | return false; |
16699 | } |
16700 | return true; |
16701 | }; |
16702 | |
16703 | auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane]; |
16704 | if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask)) |
16705 | continue; |
16706 | |
16707 | |
16708 | for (int i = 0; i != NumSubLaneElts; ++i) { |
16709 | int M = SubLaneMask[i]; |
16710 | if (M < 0) |
16711 | continue; |
16712 | assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) && |
16713 | "Unexpected mask element"); |
16714 | RepeatedSubLaneMask[i] = M; |
16715 | } |
16716 | |
16717 | |
16718 | |
16719 | int SrcSubLane = (SrcLane * SubLaneScale) + SubLane; |
16720 | TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane); |
16721 | Dst2SrcSubLanes[DstSubLane] = SrcSubLane; |
16722 | break; |
16723 | } |
16724 | |
16725 | |
16726 | if (Dst2SrcSubLanes[DstSubLane] < 0) |
16727 | return SDValue(); |
16728 | } |
16729 | assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes && |
16730 | "Unexpected source lane"); |
16731 | |
16732 | |
16733 | SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1); |
16734 | for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) { |
16735 | int Lane = SubLane / SubLaneScale; |
16736 | auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale]; |
16737 | for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { |
16738 | int M = RepeatedSubLaneMask[Elt]; |
16739 | if (M < 0) |
16740 | continue; |
16741 | int Idx = (SubLane * NumSubLaneElts) + Elt; |
16742 | RepeatedMask[Idx] = M + (Lane * NumLaneElts); |
16743 | } |
16744 | } |
16745 | SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); |
16746 | |
16747 | |
16748 | SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1); |
16749 | for (int i = 0; i != NumElts; i += NumSubLaneElts) { |
16750 | int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts]; |
16751 | if (SrcSubLane < 0) |
16752 | continue; |
16753 | for (int j = 0; j != NumSubLaneElts; ++j) |
16754 | SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts); |
16755 | } |
16756 | |
16757 | return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), |
16758 | SubLaneMask); |
16759 | } |
16760 | |
16761 | static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, |
16762 | bool &ForceV1Zero, bool &ForceV2Zero, |
16763 | unsigned &ShuffleImm, ArrayRef<int> Mask, |
16764 | const APInt &Zeroable) { |
16765 | int NumElts = VT.getVectorNumElements(); |
16766 | assert(VT.getScalarSizeInBits() == 64 && |
16767 | (NumElts == 2 || NumElts == 4 || NumElts == 8) && |
16768 | "Unexpected data type for VSHUFPD"); |
16769 | assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) && |
16770 | "Illegal shuffle mask"); |
16771 | |
16772 | bool ZeroLane[2] = { true, true }; |
16773 | for (int i = 0; i < NumElts; ++i) |
16774 | ZeroLane[i & 1] &= Zeroable[i]; |
16775 | |
16776 | |
16777 | |
16778 | ShuffleImm = 0; |
16779 | bool ShufpdMask = true; |
16780 | bool CommutableMask = true; |
16781 | for (int i = 0; i < NumElts; ++i) { |
16782 | if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1]) |
16783 | continue; |
16784 | if (Mask[i] < 0) |
16785 | return false; |
16786 | int Val = (i & 6) + NumElts * (i & 1); |
16787 | int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1); |
16788 | if (Mask[i] < Val || Mask[i] > Val + 1) |
16789 | ShufpdMask = false; |
16790 | if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) |
16791 | CommutableMask = false; |
16792 | ShuffleImm |= (Mask[i] % 2) << i; |
16793 | } |
16794 | |
16795 | if (!ShufpdMask && !CommutableMask) |
16796 | return false; |
16797 | |
16798 | if (!ShufpdMask && CommutableMask) |
16799 | std::swap(V1, V2); |
16800 | |
16801 | ForceV1Zero = ZeroLane[0]; |
16802 | ForceV2Zero = ZeroLane[1]; |
16803 | return true; |
16804 | } |
16805 | |
16806 | static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, |
16807 | SDValue V2, ArrayRef<int> Mask, |
16808 | const APInt &Zeroable, |
16809 | const X86Subtarget &Subtarget, |
16810 | SelectionDAG &DAG) { |
16811 | assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && |
16812 | "Unexpected data type for VSHUFPD"); |
16813 | |
16814 | unsigned Immediate = 0; |
16815 | bool ForceV1Zero = false, ForceV2Zero = false; |
16816 | if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate, |
16817 | Mask, Zeroable)) |
16818 | return SDValue(); |
16819 | |
16820 | |
16821 | if (ForceV1Zero) |
16822 | V1 = getZeroVector(VT, Subtarget, DAG, DL); |
16823 | if (ForceV2Zero) |
16824 | V2 = getZeroVector(VT, Subtarget, DAG, DL); |
16825 | |
16826 | return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, |
16827 | DAG.getTargetConstant(Immediate, DL, MVT::i8)); |
16828 | } |
16829 | |
16830 | |
16831 | |
16832 | |
16833 | static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, |
16834 | SDValue V1, SDValue V2, |
16835 | ArrayRef<int> Mask, |
16836 | const APInt &Zeroable, |
16837 | SelectionDAG &DAG) { |
16838 | assert(VT == MVT::v32i8 && "Unexpected type!"); |
16839 | |
16840 | |
16841 | if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8)) |
16842 | return SDValue(); |
16843 | |
16844 | |
16845 | if (Zeroable.countLeadingOnes() < (Mask.size() - 8)) |
16846 | return SDValue(); |
16847 | |
16848 | V1 = DAG.getBitcast(MVT::v4i64, V1); |
16849 | V2 = DAG.getBitcast(MVT::v4i64, V2); |
16850 | |
16851 | V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1); |
16852 | V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2); |
16853 | |
16854 | |
16855 | |
16856 | SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, |
16857 | { 0, 1, 2, 3, 16, 17, 18, 19, |
16858 | 4, 5, 6, 7, 20, 21, 22, 23 }); |
16859 | |
16860 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8, |
16861 | DAG.getConstant(0, DL, MVT::v32i8), Unpack, |
16862 | DAG.getIntPtrConstant(0, DL)); |
16863 | } |
16864 | |
16865 | |
16866 | |
16867 | |
16868 | |
16869 | |
16870 | static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
16871 | const APInt &Zeroable, SDValue V1, SDValue V2, |
16872 | const X86Subtarget &Subtarget, |
16873 | SelectionDAG &DAG) { |
16874 | assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); |
16875 | assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); |
16876 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
16877 | |
16878 | if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable, |
16879 | Subtarget, DAG)) |
16880 | return V; |
16881 | |
16882 | if (V2.isUndef()) { |
16883 | |
16884 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2, |
16885 | Mask, Subtarget, DAG)) |
16886 | return Broadcast; |
16887 | |
16888 | |
16889 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2)) |
16890 | return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1); |
16891 | |
16892 | if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) { |
16893 | |
16894 | |
16895 | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | |
16896 | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); |
16897 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, |
16898 | DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); |
16899 | } |
16900 | |
16901 | |
16902 | if (Subtarget.hasAVX2()) |
16903 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, |
16904 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
16905 | |
16906 | |
16907 | |
16908 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
16909 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
16910 | return V; |
16911 | |
16912 | |
16913 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2, |
16914 | Mask, DAG, Subtarget)) |
16915 | return V; |
16916 | |
16917 | |
16918 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask, |
16919 | DAG, Subtarget); |
16920 | } |
16921 | |
16922 | |
16923 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) |
16924 | return V; |
16925 | |
16926 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, |
16927 | Zeroable, Subtarget, DAG)) |
16928 | return Blend; |
16929 | |
16930 | |
16931 | if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask, |
16932 | Zeroable, Subtarget, DAG)) |
16933 | return Op; |
16934 | |
16935 | |
16936 | |
16937 | |
16938 | |
16939 | if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask) && |
16940 | !all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) && |
16941 | (V1.getOpcode() != ISD::BUILD_VECTOR) && |
16942 | (V2.getOpcode() != ISD::BUILD_VECTOR)) |
16943 | if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, |
16944 | Mask, DAG)) |
16945 | return Op; |
16946 | |
16947 | |
16948 | |
16949 | if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) |
16950 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, |
16951 | Subtarget, DAG); |
16952 | |
16953 | |
16954 | |
16955 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
16956 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
16957 | return V; |
16958 | |
16959 | |
16960 | |
16961 | |
16962 | |
16963 | if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || |
16964 | isShuffleMaskInputInPlace(1, Mask)))) |
16965 | if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask( |
16966 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
16967 | return V; |
16968 | |
16969 | |
16970 | if (Subtarget.hasVLX()) |
16971 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2, |
16972 | DAG, Subtarget)) |
16973 | return V; |
16974 | |
16975 | |
16976 | |
16977 | if (Subtarget.hasAVX2()) |
16978 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, |
16979 | Subtarget, DAG); |
16980 | |
16981 | |
16982 | return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, |
16983 | Subtarget, DAG); |
16984 | } |
16985 | |
16986 | |
16987 | |
16988 | |
16989 | |
16990 | static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
16991 | const APInt &Zeroable, SDValue V1, SDValue V2, |
16992 | const X86Subtarget &Subtarget, |
16993 | SelectionDAG &DAG) { |
16994 | assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); |
16995 | assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); |
16996 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
16997 | assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); |
16998 | |
16999 | if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable, |
17000 | Subtarget, DAG)) |
17001 | return V; |
17002 | |
17003 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, |
17004 | Zeroable, Subtarget, DAG)) |
17005 | return Blend; |
17006 | |
17007 | |
17008 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, |
17009 | Subtarget, DAG)) |
17010 | return Broadcast; |
17011 | |
17012 | if (V2.isUndef()) { |
17013 | |
17014 | |
17015 | SmallVector<int, 2> RepeatedMask; |
17016 | if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { |
17017 | SmallVector<int, 4> PSHUFDMask; |
17018 | narrowShuffleMaskElts(2, RepeatedMask, PSHUFDMask); |
17019 | return DAG.getBitcast( |
17020 | MVT::v4i64, |
17021 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, |
17022 | DAG.getBitcast(MVT::v8i32, V1), |
17023 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
17024 | } |
17025 | |
17026 | |
17027 | |
17028 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1, |
17029 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
17030 | } |
17031 | |
17032 | |
17033 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, |
17034 | Zeroable, Subtarget, DAG)) |
17035 | return Shift; |
17036 | |
17037 | |
17038 | if (Subtarget.hasVLX()) { |
17039 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask, |
17040 | Subtarget, DAG)) |
17041 | return Rotate; |
17042 | |
17043 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, |
17044 | DAG, Subtarget)) |
17045 | return V; |
17046 | } |
17047 | |
17048 | |
17049 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, |
17050 | Subtarget, DAG)) |
17051 | return Rotate; |
17052 | |
17053 | |
17054 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) |
17055 | return V; |
17056 | |
17057 | |
17058 | |
17059 | if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) |
17060 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, |
17061 | Subtarget, DAG); |
17062 | |
17063 | |
17064 | |
17065 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17066 | DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) |
17067 | return V; |
17068 | |
17069 | |
17070 | |
17071 | |
17072 | |
17073 | if (!isShuffleMaskInputInPlace(0, Mask) && |
17074 | !isShuffleMaskInputInPlace(1, Mask)) |
17075 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17076 | DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) |
17077 | return Result; |
17078 | |
17079 | |
17080 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, |
17081 | Subtarget, DAG); |
17082 | } |
17083 | |
17084 | |
17085 | |
17086 | |
17087 | |
17088 | static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17089 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17090 | const X86Subtarget &Subtarget, |
17091 | SelectionDAG &DAG) { |
17092 | assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); |
17093 | assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); |
17094 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17095 | |
17096 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, |
17097 | Zeroable, Subtarget, DAG)) |
17098 | return Blend; |
17099 | |
17100 | |
17101 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, |
17102 | Subtarget, DAG)) |
17103 | return Broadcast; |
17104 | |
17105 | |
17106 | |
17107 | SmallVector<int, 4> RepeatedMask; |
17108 | if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) { |
17109 | assert(RepeatedMask.size() == 4 && |
17110 | "Repeated masks must be half the mask width!"); |
17111 | |
17112 | |
17113 | if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2)) |
17114 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1); |
17115 | if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2)) |
17116 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1); |
17117 | |
17118 | if (V2.isUndef()) |
17119 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1, |
17120 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17121 | |
17122 | |
17123 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) |
17124 | return V; |
17125 | |
17126 | |
17127 | |
17128 | return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); |
17129 | } |
17130 | |
17131 | |
17132 | |
17133 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17134 | DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) |
17135 | return V; |
17136 | |
17137 | |
17138 | |
17139 | if (V2.isUndef()) { |
17140 | if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) { |
17141 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
17142 | return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask); |
17143 | } |
17144 | if (Subtarget.hasAVX2()) { |
17145 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
17146 | return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); |
17147 | } |
17148 | |
17149 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask, |
17150 | DAG, Subtarget); |
17151 | } |
17152 | |
17153 | |
17154 | |
17155 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17156 | DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) |
17157 | return Result; |
17158 | |
17159 | |
17160 | if (Subtarget.hasVLX()) |
17161 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2, |
17162 | DAG, Subtarget)) |
17163 | return V; |
17164 | |
17165 | |
17166 | |
17167 | |
17168 | if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) |
17169 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, |
17170 | DAG); |
17171 | |
17172 | |
17173 | |
17174 | if (Subtarget.hasAVX2()) |
17175 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8f32, V1, V2, Mask, |
17176 | Subtarget, DAG); |
17177 | |
17178 | |
17179 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, |
17180 | Subtarget, DAG); |
17181 | } |
17182 | |
17183 | |
17184 | |
17185 | |
17186 | |
17187 | static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17188 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17189 | const X86Subtarget &Subtarget, |
17190 | SelectionDAG &DAG) { |
17191 | assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); |
17192 | assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); |
17193 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17194 | assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!"); |
17195 | |
17196 | |
17197 | |
17198 | |
17199 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask, |
17200 | Zeroable, Subtarget, DAG)) |
17201 | return ZExt; |
17202 | |
17203 | |
17204 | |
17205 | |
17206 | if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && |
17207 | !Subtarget.hasAVX512()) |
17208 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, |
17209 | DAG); |
17210 | |
17211 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, |
17212 | Zeroable, Subtarget, DAG)) |
17213 | return Blend; |
17214 | |
17215 | |
17216 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, |
17217 | Subtarget, DAG)) |
17218 | return Broadcast; |
17219 | |
17220 | |
17221 | |
17222 | |
17223 | SmallVector<int, 4> RepeatedMask; |
17224 | bool Is128BitLaneRepeatedShuffle = |
17225 | is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask); |
17226 | if (Is128BitLaneRepeatedShuffle) { |
17227 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
17228 | if (V2.isUndef()) |
17229 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, |
17230 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17231 | |
17232 | |
17233 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) |
17234 | return V; |
17235 | } |
17236 | |
17237 | |
17238 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, |
17239 | Zeroable, Subtarget, DAG)) |
17240 | return Shift; |
17241 | |
17242 | |
17243 | if (Subtarget.hasVLX()) { |
17244 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask, |
17245 | Subtarget, DAG)) |
17246 | return Rotate; |
17247 | |
17248 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, |
17249 | DAG, Subtarget)) |
17250 | return V; |
17251 | } |
17252 | |
17253 | |
17254 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask, |
17255 | Subtarget, DAG)) |
17256 | return Rotate; |
17257 | |
17258 | |
17259 | |
17260 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17261 | DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) |
17262 | return V; |
17263 | |
17264 | if (V2.isUndef()) { |
17265 | |
17266 | |
17267 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v8i32, Mask, V1, V2, DAG)) |
17268 | return V; |
17269 | |
17270 | |
17271 | |
17272 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
17273 | return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1); |
17274 | } |
17275 | |
17276 | |
17277 | |
17278 | |
17279 | if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { |
17280 | SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1); |
17281 | SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2); |
17282 | SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, |
17283 | CastV1, CastV2, DAG); |
17284 | return DAG.getBitcast(MVT::v8i32, ShufPS); |
17285 | } |
17286 | |
17287 | |
17288 | |
17289 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17290 | DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) |
17291 | return Result; |
17292 | |
17293 | |
17294 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i32, V1, V2, Mask, |
17295 | Subtarget, DAG); |
17296 | } |
17297 | |
17298 | |
17299 | |
17300 | |
17301 | |
17302 | static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17303 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17304 | const X86Subtarget &Subtarget, |
17305 | SelectionDAG &DAG) { |
17306 | assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); |
17307 | assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); |
17308 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
17309 | assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!"); |
17310 | |
17311 | |
17312 | |
17313 | |
17314 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
17315 | DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
17316 | return ZExt; |
17317 | |
17318 | |
17319 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask, |
17320 | Subtarget, DAG)) |
17321 | return Broadcast; |
17322 | |
17323 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, |
17324 | Zeroable, Subtarget, DAG)) |
17325 | return Blend; |
17326 | |
17327 | |
17328 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) |
17329 | return V; |
17330 | |
17331 | |
17332 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, |
17333 | Subtarget)) |
17334 | return V; |
17335 | |
17336 | |
17337 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable, |
17338 | Subtarget, DAG)) |
17339 | return V; |
17340 | |
17341 | |
17342 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, |
17343 | Zeroable, Subtarget, DAG)) |
17344 | return Shift; |
17345 | |
17346 | |
17347 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask, |
17348 | Subtarget, DAG)) |
17349 | return Rotate; |
17350 | |
17351 | |
17352 | |
17353 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17354 | DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) |
17355 | return V; |
17356 | |
17357 | if (V2.isUndef()) { |
17358 | |
17359 | if (SDValue Rotate = |
17360 | lowerShuffleAsBitRotate(DL, MVT::v16i16, V1, Mask, Subtarget, DAG)) |
17361 | return Rotate; |
17362 | |
17363 | |
17364 | |
17365 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, Mask, V1, V2, DAG)) |
17366 | return V; |
17367 | |
17368 | |
17369 | |
17370 | if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) { |
17371 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17372 | DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) |
17373 | return V; |
17374 | |
17375 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask, |
17376 | DAG, Subtarget); |
17377 | } |
17378 | |
17379 | SmallVector<int, 8> RepeatedMask; |
17380 | if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { |
17381 | |
17382 | |
17383 | |
17384 | return lowerV8I16GeneralSingleInputShuffle( |
17385 | DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); |
17386 | } |
17387 | } |
17388 | |
17389 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2, |
17390 | Zeroable, Subtarget, DAG)) |
17391 | return PSHUFB; |
17392 | |
17393 | |
17394 | if (Subtarget.hasBWI()) |
17395 | return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, Subtarget, DAG); |
17396 | |
17397 | |
17398 | |
17399 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17400 | DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) |
17401 | return Result; |
17402 | |
17403 | |
17404 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17405 | DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) |
17406 | return V; |
17407 | |
17408 | |
17409 | return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, |
17410 | Subtarget, DAG); |
17411 | } |
17412 | |
17413 | |
17414 | |
17415 | |
17416 | |
17417 | static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17418 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17419 | const X86Subtarget &Subtarget, |
17420 | SelectionDAG &DAG) { |
17421 | assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); |
17422 | assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); |
17423 | assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); |
17424 | assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!"); |
17425 | |
17426 | |
17427 | |
17428 | |
17429 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask, |
17430 | Zeroable, Subtarget, DAG)) |
17431 | return ZExt; |
17432 | |
17433 | |
17434 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, |
17435 | Subtarget, DAG)) |
17436 | return Broadcast; |
17437 | |
17438 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, |
17439 | Zeroable, Subtarget, DAG)) |
17440 | return Blend; |
17441 | |
17442 | |
17443 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) |
17444 | return V; |
17445 | |
17446 | |
17447 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, |
17448 | Subtarget)) |
17449 | return V; |
17450 | |
17451 | |
17452 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable, |
17453 | Subtarget, DAG)) |
17454 | return V; |
17455 | |
17456 | |
17457 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, |
17458 | Zeroable, Subtarget, DAG)) |
17459 | return Shift; |
17460 | |
17461 | |
17462 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask, |
17463 | Subtarget, DAG)) |
17464 | return Rotate; |
17465 | |
17466 | |
17467 | if (V2.isUndef()) |
17468 | if (SDValue Rotate = |
17469 | lowerShuffleAsBitRotate(DL, MVT::v32i8, V1, Mask, Subtarget, DAG)) |
17470 | return Rotate; |
17471 | |
17472 | |
17473 | |
17474 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17475 | DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) |
17476 | return V; |
17477 | |
17478 | |
17479 | |
17480 | if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) { |
17481 | |
17482 | |
17483 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, Mask, V1, V2, DAG)) |
17484 | return V; |
17485 | |
17486 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17487 | DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) |
17488 | return V; |
17489 | |
17490 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask, |
17491 | DAG, Subtarget); |
17492 | } |
17493 | |
17494 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2, |
17495 | Zeroable, Subtarget, DAG)) |
17496 | return PSHUFB; |
17497 | |
17498 | |
17499 | if (Subtarget.hasVBMI()) |
17500 | return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, Subtarget, DAG); |
17501 | |
17502 | |
17503 | |
17504 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
17505 | DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) |
17506 | return Result; |
17507 | |
17508 | |
17509 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
17510 | DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) |
17511 | return V; |
17512 | |
17513 | |
17514 | |
17515 | |
17516 | if (Subtarget.hasVLX()) |
17517 | if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, |
17518 | Mask, Zeroable, DAG)) |
17519 | return V; |
17520 | |
17521 | |
17522 | return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, |
17523 | Subtarget, DAG); |
17524 | } |
17525 | |
17526 | |
17527 | |
17528 | |
17529 | |
17530 | |
17531 | static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
17532 | SDValue V1, SDValue V2, const APInt &Zeroable, |
17533 | const X86Subtarget &Subtarget, |
17534 | SelectionDAG &DAG) { |
17535 | |
17536 | |
17537 | int NumElts = VT.getVectorNumElements(); |
17538 | int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); |
17539 | |
17540 | if (NumV2Elements == 1 && Mask[0] >= NumElts) |
17541 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
17542 | DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
17543 | return Insertion; |
17544 | |
17545 | |
17546 | if (SDValue V = |
17547 | lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
17548 | return V; |
17549 | |
17550 | |
17551 | |
17552 | |
17553 | |
17554 | |
17555 | |
17556 | if (VT.isInteger() && !Subtarget.hasAVX2()) { |
17557 | int ElementBits = VT.getScalarSizeInBits(); |
17558 | if (ElementBits < 32) { |
17559 | |
17560 | |
17561 | if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
17562 | Subtarget, DAG)) |
17563 | return V; |
17564 | if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
17565 | return V; |
17566 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
17567 | } |
17568 | |
17569 | MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), |
17570 | VT.getVectorNumElements()); |
17571 | V1 = DAG.getBitcast(FpVT, V1); |
17572 | V2 = DAG.getBitcast(FpVT, V2); |
17573 | return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); |
17574 | } |
17575 | |
17576 | switch (VT.SimpleTy) { |
17577 | case MVT::v4f64: |
17578 | return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17579 | case MVT::v4i64: |
17580 | return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17581 | case MVT::v8f32: |
17582 | return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17583 | case MVT::v8i32: |
17584 | return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17585 | case MVT::v16i16: |
17586 | return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17587 | case MVT::v32i8: |
17588 | return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
17589 | |
17590 | default: |
17591 | llvm_unreachable("Not a valid 256-bit x86 vector type!"); |
17592 | } |
17593 | } |
17594 | |
17595 | |
17596 | static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, |
17597 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17598 | const X86Subtarget &Subtarget, |
17599 | SelectionDAG &DAG) { |
17600 | assert(VT.getScalarSizeInBits() == 64 && |
17601 | "Unexpected element type size for 128bit shuffle."); |
17602 | |
17603 | |
17604 | |
17605 | assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle."); |
17606 | |
17607 | |
17608 | SmallVector<int, 4> Widened128Mask; |
17609 | if (!canWidenShuffleElements(Mask, Widened128Mask)) |
17610 | return SDValue(); |
17611 | assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch"); |
17612 | |
17613 | |
17614 | if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 && |
17615 | (Widened128Mask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) { |
17616 | unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4; |
17617 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
17618 | SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, |
17619 | DAG.getIntPtrConstant(0, DL)); |
17620 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
17621 | getZeroVector(VT, Subtarget, DAG, DL), LoV, |
17622 | DAG.getIntPtrConstant(0, DL)); |
17623 | } |
17624 | |
17625 | |
17626 | |
17627 | bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2); |
17628 | if (OnlyUsesV1 || |
17629 | isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) { |
17630 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); |
17631 | SDValue SubVec = |
17632 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, |
17633 | DAG.getIntPtrConstant(0, DL)); |
17634 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, |
17635 | DAG.getIntPtrConstant(4, DL)); |
17636 | } |
17637 | |
17638 | |
17639 | bool IsInsert = true; |
17640 | int V2Index = -1; |
17641 | for (int i = 0; i < 4; ++i) { |
17642 | assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
17643 | if (Widened128Mask[i] < 0) |
17644 | continue; |
17645 | |
17646 | |
17647 | if (Widened128Mask[i] < 4) { |
17648 | if (Widened128Mask[i] != i) { |
17649 | IsInsert = false; |
17650 | break; |
17651 | } |
17652 | } else { |
17653 | |
17654 | if (V2Index >= 0 || Widened128Mask[i] != 4) { |
17655 | IsInsert = false; |
17656 | break; |
17657 | } |
17658 | V2Index = i; |
17659 | } |
17660 | } |
17661 | if (IsInsert && V2Index >= 0) { |
17662 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
17663 | SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, |
17664 | DAG.getIntPtrConstant(0, DL)); |
17665 | return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); |
17666 | } |
17667 | |
17668 | |
17669 | |
17670 | |
17671 | |
17672 | SmallVector<int, 2> Widened256Mask; |
17673 | if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) { |
17674 | Widened128Mask.clear(); |
17675 | narrowShuffleMaskElts(2, Widened256Mask, Widened128Mask); |
17676 | } |
17677 | |
17678 | |
17679 | SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; |
17680 | unsigned PermMask = 0; |
17681 | |
17682 | for (int i = 0; i < 4; ++i) { |
17683 | assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
17684 | if (Widened128Mask[i] < 0) |
17685 | continue; |
17686 | |
17687 | SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1; |
17688 | unsigned OpIndex = i / 2; |
17689 | if (Ops[OpIndex].isUndef()) |
17690 | Ops[OpIndex] = Op; |
17691 | else if (Ops[OpIndex] != Op) |
17692 | return SDValue(); |
17693 | |
17694 | |
17695 | |
17696 | PermMask |= (Widened128Mask[i] % 4) << (i * 2); |
17697 | } |
17698 | |
17699 | return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], |
17700 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
17701 | } |
17702 | |
17703 | |
17704 | static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17705 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17706 | const X86Subtarget &Subtarget, |
17707 | SelectionDAG &DAG) { |
17708 | assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); |
17709 | assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); |
17710 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17711 | |
17712 | if (V2.isUndef()) { |
17713 | |
17714 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2)) |
17715 | return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1); |
17716 | |
17717 | if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) { |
17718 | |
17719 | |
17720 | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | |
17721 | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) | |
17722 | ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | |
17723 | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); |
17724 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, |
17725 | DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); |
17726 | } |
17727 | |
17728 | SmallVector<int, 4> RepeatedMask; |
17729 | if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) |
17730 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1, |
17731 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17732 | } |
17733 | |
17734 | if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1, |
17735 | V2, Subtarget, DAG)) |
17736 | return Shuf128; |
17737 | |
17738 | if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) |
17739 | return Unpck; |
17740 | |
17741 | |
17742 | if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask, |
17743 | Zeroable, Subtarget, DAG)) |
17744 | return Op; |
17745 | |
17746 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, |
17747 | DAG, Subtarget)) |
17748 | return V; |
17749 | |
17750 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, |
17751 | Zeroable, Subtarget, DAG)) |
17752 | return Blend; |
17753 | |
17754 | return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, Subtarget, DAG); |
17755 | } |
17756 | |
17757 | |
17758 | static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17759 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17760 | const X86Subtarget &Subtarget, |
17761 | SelectionDAG &DAG) { |
17762 | assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); |
17763 | assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); |
17764 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
17765 | |
17766 | |
17767 | |
17768 | SmallVector<int, 4> RepeatedMask; |
17769 | if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) { |
17770 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
17771 | |
17772 | |
17773 | if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2)) |
17774 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1); |
17775 | if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2)) |
17776 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1); |
17777 | |
17778 | if (V2.isUndef()) |
17779 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1, |
17780 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17781 | |
17782 | |
17783 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) |
17784 | return V; |
17785 | |
17786 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, |
17787 | Zeroable, Subtarget, DAG)) |
17788 | return Blend; |
17789 | |
17790 | |
17791 | return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); |
17792 | } |
17793 | |
17794 | |
17795 | |
17796 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17797 | DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG)) |
17798 | return V; |
17799 | |
17800 | |
17801 | |
17802 | if (V2.isUndef() && |
17803 | !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) { |
17804 | SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true); |
17805 | return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask); |
17806 | } |
17807 | |
17808 | |
17809 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, |
17810 | V1, V2, DAG, Subtarget)) |
17811 | return V; |
17812 | |
17813 | return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG); |
17814 | } |
17815 | |
17816 | |
17817 | static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17818 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17819 | const X86Subtarget &Subtarget, |
17820 | SelectionDAG &DAG) { |
17821 | assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); |
17822 | assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); |
17823 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
17824 | |
17825 | if (V2.isUndef()) { |
17826 | |
17827 | |
17828 | |
17829 | SmallVector<int, 2> Repeated128Mask; |
17830 | if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) { |
17831 | SmallVector<int, 4> PSHUFDMask; |
17832 | narrowShuffleMaskElts(2, Repeated128Mask, PSHUFDMask); |
17833 | return DAG.getBitcast( |
17834 | MVT::v8i64, |
17835 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, |
17836 | DAG.getBitcast(MVT::v16i32, V1), |
17837 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
17838 | } |
17839 | |
17840 | SmallVector<int, 4> Repeated256Mask; |
17841 | if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask)) |
17842 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1, |
17843 | getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); |
17844 | } |
17845 | |
17846 | if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1, |
17847 | V2, Subtarget, DAG)) |
17848 | return Shuf128; |
17849 | |
17850 | |
17851 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, |
17852 | Zeroable, Subtarget, DAG)) |
17853 | return Shift; |
17854 | |
17855 | |
17856 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask, |
17857 | Subtarget, DAG)) |
17858 | return Rotate; |
17859 | |
17860 | |
17861 | if (Subtarget.hasBWI()) |
17862 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask, |
17863 | Subtarget, DAG)) |
17864 | return Rotate; |
17865 | |
17866 | if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) |
17867 | return Unpck; |
17868 | |
17869 | |
17870 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2, |
17871 | DAG, Subtarget)) |
17872 | return V; |
17873 | |
17874 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, |
17875 | Zeroable, Subtarget, DAG)) |
17876 | return Blend; |
17877 | |
17878 | return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG); |
17879 | } |
17880 | |
17881 | |
17882 | static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17883 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17884 | const X86Subtarget &Subtarget, |
17885 | SelectionDAG &DAG) { |
17886 | assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); |
17887 | assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); |
17888 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
17889 | |
17890 | |
17891 | |
17892 | |
17893 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
17894 | DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
17895 | return ZExt; |
17896 | |
17897 | |
17898 | |
17899 | |
17900 | SmallVector<int, 4> RepeatedMask; |
17901 | bool Is128BitLaneRepeatedShuffle = |
17902 | is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); |
17903 | if (Is128BitLaneRepeatedShuffle) { |
17904 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
17905 | if (V2.isUndef()) |
17906 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, |
17907 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
17908 | |
17909 | |
17910 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) |
17911 | return V; |
17912 | } |
17913 | |
17914 | |
17915 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, |
17916 | Zeroable, Subtarget, DAG)) |
17917 | return Shift; |
17918 | |
17919 | |
17920 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask, |
17921 | Subtarget, DAG)) |
17922 | return Rotate; |
17923 | |
17924 | |
17925 | if (Subtarget.hasBWI()) |
17926 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask, |
17927 | Subtarget, DAG)) |
17928 | return Rotate; |
17929 | |
17930 | |
17931 | |
17932 | if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { |
17933 | SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); |
17934 | SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); |
17935 | SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, |
17936 | CastV1, CastV2, DAG); |
17937 | return DAG.getBitcast(MVT::v16i32, ShufPS); |
17938 | } |
17939 | |
17940 | |
17941 | |
17942 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
17943 | DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) |
17944 | return V; |
17945 | |
17946 | |
17947 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2, |
17948 | DAG, Subtarget)) |
17949 | return V; |
17950 | |
17951 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, |
17952 | Zeroable, Subtarget, DAG)) |
17953 | return Blend; |
17954 | |
17955 | return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, Subtarget, DAG); |
17956 | } |
17957 | |
17958 | |
17959 | static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
17960 | const APInt &Zeroable, SDValue V1, SDValue V2, |
17961 | const X86Subtarget &Subtarget, |
17962 | SelectionDAG &DAG) { |
17963 | assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); |
17964 | assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); |
17965 | assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); |
17966 | assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); |
17967 | |
17968 | |
17969 | |
17970 | |
17971 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
17972 | DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
17973 | return ZExt; |
17974 | |
17975 | |
17976 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) |
17977 | return V; |
17978 | |
17979 | |
17980 | if (SDValue V = |
17981 | lowerShuffleWithPACK(DL, MVT::v32i16, Mask, V1, V2, DAG, Subtarget)) |
17982 | return V; |
17983 | |
17984 | |
17985 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, |
17986 | Zeroable, Subtarget, DAG)) |
17987 | return Shift; |
17988 | |
17989 | |
17990 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask, |
17991 | Subtarget, DAG)) |
17992 | return Rotate; |
17993 | |
17994 | if (V2.isUndef()) { |
17995 | |
17996 | if (SDValue Rotate = |
17997 | lowerShuffleAsBitRotate(DL, MVT::v32i16, V1, Mask, Subtarget, DAG)) |
17998 | return Rotate; |
17999 | |
18000 | SmallVector<int, 8> RepeatedMask; |
18001 | if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) { |
18002 | |
18003 | |
18004 | |
18005 | return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1, |
18006 | RepeatedMask, Subtarget, DAG); |
18007 | } |
18008 | } |
18009 | |
18010 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, |
18011 | Zeroable, Subtarget, DAG)) |
18012 | return Blend; |
18013 | |
18014 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2, |
18015 | Zeroable, Subtarget, DAG)) |
18016 | return PSHUFB; |
18017 | |
18018 | return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, Subtarget, DAG); |
18019 | } |
18020 | |
18021 | |
18022 | static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18023 | const APInt &Zeroable, SDValue V1, SDValue V2, |
18024 | const X86Subtarget &Subtarget, |
18025 | SelectionDAG &DAG) { |
18026 | assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); |
18027 | assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); |
18028 | assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); |
18029 | assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); |
18030 | |
18031 | |
18032 | |
18033 | |
18034 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
18035 | DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
18036 | return ZExt; |
18037 | |
18038 | |
18039 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) |
18040 | return V; |
18041 | |
18042 | |
18043 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, |
18044 | Subtarget)) |
18045 | return V; |
18046 | |
18047 | |
18048 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, |
18049 | Zeroable, Subtarget, DAG)) |
18050 | return Shift; |
18051 | |
18052 | |
18053 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask, |
18054 | Subtarget, DAG)) |
18055 | return Rotate; |
18056 | |
18057 | |
18058 | if (V2.isUndef()) |
18059 | if (SDValue Rotate = |
18060 | lowerShuffleAsBitRotate(DL, MVT::v64i8, V1, Mask, Subtarget, DAG)) |
18061 | return Rotate; |
18062 | |
18063 | |
18064 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask, |
18065 | Zeroable, Subtarget, DAG)) |
18066 | return Masked; |
18067 | |
18068 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2, |
18069 | Zeroable, Subtarget, DAG)) |
18070 | return PSHUFB; |
18071 | |
18072 | |
18073 | if (Subtarget.hasVBMI()) |
18074 | return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG); |
18075 | |
18076 | |
18077 | |
18078 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
18079 | DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) |
18080 | return V; |
18081 | |
18082 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, |
18083 | Zeroable, Subtarget, DAG)) |
18084 | return Blend; |
18085 | |
18086 | |
18087 | |
18088 | if (!V2.isUndef()) |
18089 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
18090 | DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) |
18091 | return Result; |
18092 | |
18093 | |
18094 | return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); |
18095 | } |
18096 | |
18097 | |
18098 | |
18099 | |
18100 | |
18101 | |
18102 | static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18103 | MVT VT, SDValue V1, SDValue V2, |
18104 | const APInt &Zeroable, |
18105 | const X86Subtarget &Subtarget, |
18106 | SelectionDAG &DAG) { |
18107 | assert(Subtarget.hasAVX512() && |
18108 | "Cannot lower 512-bit vectors w/ basic ISA!"); |
18109 | |
18110 | |
18111 | |
18112 | int NumElts = Mask.size(); |
18113 | int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); |
18114 | |
18115 | if (NumV2Elements == 1 && Mask[0] >= NumElts) |
18116 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
18117 | DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
18118 | return Insertion; |
18119 | |
18120 | |
18121 | if (SDValue V = |
18122 | lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
18123 | return V; |
18124 | |
18125 | |
18126 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, |
18127 | Subtarget, DAG)) |
18128 | return Broadcast; |
18129 | |
18130 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) { |
18131 | |
18132 | |
18133 | if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
18134 | Subtarget, DAG)) |
18135 | return V; |
18136 | if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
18137 | return V; |
18138 | |
18139 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
18140 | } |
18141 | |
18142 | |
18143 | |
18144 | |
18145 | |
18146 | switch (VT.SimpleTy) { |
18147 | case MVT::v8f64: |
18148 | return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18149 | case MVT::v16f32: |
18150 | return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18151 | case MVT::v8i64: |
18152 | return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18153 | case MVT::v16i32: |
18154 | return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18155 | case MVT::v32i16: |
18156 | return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18157 | case MVT::v64i8: |
18158 | return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
18159 | |
18160 | default: |
18161 | llvm_unreachable("Not a valid 512-bit x86 vector type!"); |
18162 | } |
18163 | } |
18164 | |
18165 | static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask, |
18166 | MVT VT, SDValue V1, SDValue V2, |
18167 | const X86Subtarget &Subtarget, |
18168 | SelectionDAG &DAG) { |
18169 | |
18170 | if (!V2.isUndef()) |
18171 | return SDValue(); |
18172 | |
18173 | int ShiftAmt = -1; |
18174 | int NumElts = Mask.size(); |
18175 | for (int i = 0; i != NumElts; ++i) { |
18176 | int M = Mask[i]; |
18177 | assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) && |
18178 | "Unexpected mask index."); |
18179 | if (M < 0) |
18180 | continue; |
18181 | |
18182 | |
18183 | if (ShiftAmt < 0) { |
18184 | ShiftAmt = M - i; |
18185 | |
18186 | if (ShiftAmt <= 0) |
18187 | return SDValue(); |
18188 | } |
18189 | |
18190 | if (ShiftAmt != M - i) |
18191 | return SDValue(); |
18192 | } |
18193 | assert(ShiftAmt >= 0 && "All undef?"); |
18194 | |
18195 | |
18196 | MVT WideVT = VT; |
18197 | if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) |
18198 | WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
18199 | SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, |
18200 | DAG.getUNDEF(WideVT), V1, |
18201 | DAG.getIntPtrConstant(0, DL)); |
18202 | Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, |
18203 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
18204 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
18205 | DAG.getIntPtrConstant(0, DL)); |
18206 | } |
18207 | |
18208 | |
18209 | |
18210 | |
18211 | static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask, |
18212 | int MaskOffset, const APInt &Zeroable) { |
18213 | int Size = Mask.size(); |
18214 | |
18215 | auto CheckZeros = [&](int Shift, bool Left) { |
18216 | for (int j = 0; j < Shift; ++j) |
18217 | if (!Zeroable[j + (Left ? 0 : (Size - Shift))]) |
18218 | return false; |
18219 | |
18220 | return true; |
18221 | }; |
18222 | |
18223 | auto MatchShift = [&](int Shift, bool Left) { |
18224 | unsigned Pos = Left ? Shift : 0; |
18225 | unsigned Low = Left ? 0 : Shift; |
18226 | unsigned Len = Size - Shift; |
18227 | return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset); |
18228 | }; |
18229 | |
18230 | for (int Shift = 1; Shift != Size; ++Shift) |
18231 | for (bool Left : {true, false}) |
18232 | if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) { |
18233 | Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR; |
18234 | return Shift; |
18235 | } |
18236 | |
18237 | return -1; |
18238 | } |
18239 | |
18240 | |
18241 | |
18242 | |
18243 | |
18244 | |
18245 | static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
18246 | MVT VT, SDValue V1, SDValue V2, |
18247 | const APInt &Zeroable, |
18248 | const X86Subtarget &Subtarget, |
18249 | SelectionDAG &DAG) { |
18250 | assert(Subtarget.hasAVX512() && |
18251 | "Cannot lower 512-bit vectors w/o basic ISA!"); |
18252 | |
18253 | int NumElts = Mask.size(); |
18254 | |
18255 | |
18256 | int SubvecElts = 0; |
18257 | int Src = -1; |
18258 | for (int i = 0; i != NumElts; ++i) { |
18259 | if (Mask[i] >= 0) { |
18260 | |
18261 | |
18262 | if (Src < 0) |
18263 | Src = Mask[i] / NumElts; |
18264 | if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i) |
18265 | break; |
18266 | } |
18267 | |
18268 | ++SubvecElts; |
18269 | } |
18270 | assert(SubvecElts != NumElts && "Identity shuffle?"); |
18271 | |
18272 | |
18273 | SubvecElts = PowerOf2Floor(SubvecElts); |
18274 | |
18275 | |
18276 | |
18277 | if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { |
18278 | assert(Src >= 0 && "Expected a source!"); |
18279 | MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); |
18280 | SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, |
18281 | Src == 0 ? V1 : V2, |
18282 | DAG.getIntPtrConstant(0, DL)); |
18283 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
18284 | DAG.getConstant(0, DL, VT), |
18285 | Extract, DAG.getIntPtrConstant(0, DL)); |
18286 | } |
18287 | |
18288 | |
18289 | if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, |
18290 | DAG)) |
18291 | return Shift; |
18292 | |
18293 | |
18294 | unsigned Offset = 0; |
18295 | for (SDValue V : { V1, V2 }) { |
18296 | unsigned Opcode; |
18297 | int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); |
18298 | if (ShiftAmt >= 0) { |
18299 | MVT WideVT = VT; |
18300 | if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) |
18301 | WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
18302 | SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, |
18303 | DAG.getUNDEF(WideVT), V, |
18304 | DAG.getIntPtrConstant(0, DL)); |
18305 | |
18306 | if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { |
18307 | int WideElts = WideVT.getVectorNumElements(); |
18308 | |
18309 | Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, |
18310 | DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); |
18311 | |
18312 | ShiftAmt += WideElts - NumElts; |
18313 | } |
18314 | |
18315 | Res = DAG.getNode(Opcode, DL, WideVT, Res, |
18316 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
18317 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
18318 | DAG.getIntPtrConstant(0, DL)); |
18319 | } |
18320 | Offset += NumElts; |
18321 | } |
18322 | |
18323 | |
18324 | |
18325 | MVT ExtVT; |
18326 | switch (VT.SimpleTy) { |
18327 | default: |
18328 | llvm_unreachable("Expected a vector of i1 elements"); |
18329 | case MVT::v2i1: |
18330 | ExtVT = MVT::v2i64; |
18331 | break; |
18332 | case MVT::v4i1: |
18333 | ExtVT = MVT::v4i32; |
18334 | break; |
18335 | case MVT::v8i1: |
18336 | |
18337 | |
18338 | ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64; |
18339 | break; |
18340 | case MVT::v16i1: |
18341 | |
18342 | |
18343 | ExtVT = Subtarget.canExtendTo512DQ() ? MVT::v16i32 : MVT::v16i16; |
18344 | break; |
18345 | case MVT::v32i1: |
18346 | |
18347 | |
18348 | assert(Subtarget.hasBWI() && "Expected AVX512BW support"); |
18349 | ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8; |
18350 | break; |
18351 | case MVT::v64i1: |
18352 | |
18353 | |
18354 | if (!Subtarget.useBWIRegs()) |
18355 | return SDValue(); |
18356 | ExtVT = MVT::v64i8; |
18357 | break; |
18358 | } |
18359 | |
18360 | V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); |
18361 | V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); |
18362 | |
18363 | SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask); |
18364 | |
18365 | int NumElems = VT.getVectorNumElements(); |
18366 | if ((Subtarget.hasBWI() && (NumElems >= 32)) || |
18367 | (Subtarget.hasDQI() && (NumElems < 32))) |
18368 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), |
18369 | Shuffle, ISD::SETGT); |
18370 | |
18371 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); |
18372 | } |
18373 | |
18374 | |
18375 | |
18376 | static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) { |
18377 | int NumElements = Mask.size(); |
18378 | |
18379 | int NumV1Elements = 0, NumV2Elements = 0; |
18380 | for (int M : Mask) |
18381 | if (M < 0) |
18382 | continue; |
18383 | else if (M < NumElements) |
18384 | ++NumV1Elements; |
18385 | else |
18386 | ++NumV2Elements; |
18387 | |
18388 | |
18389 | |
18390 | |
18391 | if (NumV2Elements > NumV1Elements) |
18392 | return true; |
18393 | |
18394 | assert(NumV1Elements > 0 && "No V1 indices"); |
18395 | |
18396 | if (NumV2Elements == 0) |
18397 | return false; |
18398 | |
18399 | |
18400 | |
18401 | |
18402 | |
18403 | |
18404 | if (NumV1Elements == NumV2Elements) { |
18405 | int LowV1Elements = 0, LowV2Elements = 0; |
18406 | for (int M : Mask.slice(0, NumElements / 2)) |
18407 | if (M >= NumElements) |
18408 | ++LowV2Elements; |
18409 | else if (M >= 0) |
18410 | ++LowV1Elements; |
18411 | if (LowV2Elements > LowV1Elements) |
18412 | return true; |
18413 | if (LowV2Elements == LowV1Elements) { |
18414 | int SumV1Indices = 0, SumV2Indices = 0; |
18415 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
18416 | if (Mask[i] >= NumElements) |
18417 | SumV2Indices += i; |
18418 | else if (Mask[i] >= 0) |
18419 | SumV1Indices += i; |
18420 | if (SumV2Indices < SumV1Indices) |
18421 | return true; |
18422 | if (SumV2Indices == SumV1Indices) { |
18423 | int NumV1OddIndices = 0, NumV2OddIndices = 0; |
18424 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
18425 | if (Mask[i] >= NumElements) |
18426 | NumV2OddIndices += i % 2; |
18427 | else if (Mask[i] >= 0) |
18428 | NumV1OddIndices += i % 2; |
18429 | if (NumV2OddIndices < NumV1OddIndices) |
18430 | return true; |
18431 | } |
18432 | } |
18433 | } |
18434 | |
18435 | return false; |
18436 | } |
18437 | |
18438 | |
18439 | |
18440 | |
18441 | |
18442 | |
18443 | |
18444 | |
18445 | static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget, |
18446 | SelectionDAG &DAG) { |
18447 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); |
18448 | ArrayRef<int> OrigMask = SVOp->getMask(); |
18449 | SDValue V1 = Op.getOperand(0); |
18450 | SDValue V2 = Op.getOperand(1); |
18451 | MVT VT = Op.getSimpleValueType(); |
18452 | int NumElements = VT.getVectorNumElements(); |
18453 | SDLoc DL(Op); |
18454 | bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); |
18455 | |
18456 | assert((VT.getSizeInBits() != 64 || Is1BitVector) && |
18457 | "Can't lower MMX shuffles"); |
18458 | |
18459 | bool V1IsUndef = V1.isUndef(); |
18460 | bool V2IsUndef = V2.isUndef(); |
18461 | if (V1IsUndef && V2IsUndef) |
18462 | return DAG.getUNDEF(VT); |
18463 | |
18464 | |
18465 | |
18466 | |
18467 | if (V1IsUndef) |
18468 | return DAG.getCommutedVectorShuffle(*SVOp); |
18469 | |
18470 | |
18471 | |
18472 | |
18473 | if (V2IsUndef && |
18474 | any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { |
18475 | SmallVector<int, 8> NewMask(OrigMask.begin(), OrigMask.end()); |
18476 | for (int &M : NewMask) |
18477 | if (M >= NumElements) |
18478 | M = -1; |
18479 | return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
18480 | } |
18481 | |
18482 | |
18483 | int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); |
18484 | (void)MaskUpperLimit; |
18485 | assert(llvm::all_of(OrigMask, |
18486 | [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && |
18487 | "Out of bounds shuffle index"); |
18488 | |
18489 | |
18490 | |
18491 | |
18492 | APInt KnownUndef, KnownZero; |
18493 | computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero); |
18494 | |
18495 | APInt Zeroable = KnownUndef | KnownZero; |
18496 | if (Zeroable.isAllOnesValue()) |
18497 | return getZeroVector(VT, Subtarget, DAG, DL); |
18498 | |
18499 | bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode()); |
18500 | |
18501 | |
18502 | |
18503 | |
18504 | |
18505 | SmallVector<int, 16> WidenedMask; |
18506 | if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && |
18507 | canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) { |
18508 | |
18509 | |
18510 | |
18511 | |
18512 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, |
18513 | Subtarget, DAG)) |
18514 | return Broadcast; |
18515 | |
18516 | MVT NewEltVT = VT.isFloatingPoint() |
18517 | ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) |
18518 | : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); |
18519 | int NewNumElts = NumElements / 2; |
18520 | MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts); |
18521 | |
18522 | |
18523 | if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { |
18524 | if (V2IsZero) { |
18525 | |
18526 | |
18527 | bool UsedZeroVector = false; |
18528 | assert(is_contained(WidenedMask, SM_SentinelZero) && |
18529 | "V2's non-undef elements are used?!"); |
18530 | for (int i = 0; i != NewNumElts; ++i) |
18531 | if (WidenedMask[i] == SM_SentinelZero) { |
18532 | WidenedMask[i] = i + NewNumElts; |
18533 | UsedZeroVector = true; |
18534 | } |
18535 | |
18536 | |
18537 | if (UsedZeroVector) |
18538 | V2 = getZeroVector(NewVT, Subtarget, DAG, DL); |
18539 | } |
18540 | V1 = DAG.getBitcast(NewVT, V1); |
18541 | V2 = DAG.getBitcast(NewVT, V2); |
18542 | return DAG.getBitcast( |
18543 | VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask)); |
18544 | } |
18545 | } |
18546 | |
18547 | |
18548 | SmallVector<int, 64> Mask(OrigMask.begin(), OrigMask.end()); |
18549 | if (canonicalizeShuffleMaskWithCommute(Mask)) { |
18550 | ShuffleVectorSDNode::commuteMask(Mask); |
18551 | std::swap(V1, V2); |
18552 | } |
18553 | |
18554 | |
18555 | if (VT.is128BitVector()) |
18556 | return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18557 | |
18558 | if (VT.is256BitVector()) |
18559 | return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18560 | |
18561 | if (VT.is512BitVector()) |
18562 | return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18563 | |
18564 | if (Is1BitVector) |
18565 | return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
18566 | |
18567 | llvm_unreachable("Unimplemented!"); |
18568 | } |
18569 | |
18570 | |
18571 | static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, |
18572 | const X86Subtarget &Subtarget, |
18573 | SelectionDAG &DAG) { |
18574 | SDValue Cond = Op.getOperand(0); |
18575 | SDValue LHS = Op.getOperand(1); |
18576 | SDValue RHS = Op.getOperand(2); |
18577 | MVT VT = Op.getSimpleValueType(); |
18578 | |
18579 | |
18580 | |
18581 | if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { |
18582 | SmallVector<int, 32> Mask; |
18583 | if (createShuffleMaskFromVSELECT(Mask, Cond)) |
18584 | return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); |
18585 | } |
18586 | |
18587 | return SDValue(); |
18588 | } |
18589 | |
18590 | SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { |
18591 | SDValue Cond = Op.getOperand(0); |
18592 | SDValue LHS = Op.getOperand(1); |
18593 | SDValue RHS = Op.getOperand(2); |
18594 | |
18595 | |
18596 | |
18597 | if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && |
18598 | ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && |
18599 | ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) |
18600 | return SDValue(); |
18601 | |
18602 | |
18603 | |
18604 | if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) |
18605 | return BlendOp; |
18606 | |
18607 | |
18608 | |
18609 | MVT CondVT = Cond.getSimpleValueType(); |
18610 | unsigned CondEltSize = Cond.getScalarValueSizeInBits(); |
18611 | if (CondEltSize == 1) |
18612 | return Op; |
18613 | |
18614 | |
18615 | if (!Subtarget.hasSSE41()) |
18616 | return SDValue(); |
18617 | |
18618 | SDLoc dl(Op); |
18619 | MVT VT = Op.getSimpleValueType(); |
18620 | unsigned EltSize = VT.getScalarSizeInBits(); |
18621 | unsigned NumElts = VT.getVectorNumElements(); |
18622 | |
18623 | |
18624 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
18625 | return SDValue(); |
18626 | |
18627 | |
18628 | |
18629 | |
18630 | if (VT.getSizeInBits() == 512) { |
18631 | |
18632 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
18633 | SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond, |
18634 | DAG.getConstant(0, dl, CondVT), |
18635 | ISD::SETNE); |
18636 | |
18637 | return DAG.getSelect(dl, VT, Mask, LHS, RHS); |
18638 | } |
18639 | |
18640 | |
18641 | if (CondEltSize != EltSize) { |
18642 | |
18643 | if (CondEltSize != DAG.ComputeNumSignBits(Cond)) |
18644 | return SDValue(); |
18645 | |
18646 | MVT NewCondSVT = MVT::getIntegerVT(EltSize); |
18647 | MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts); |
18648 | Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT); |
18649 | return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS); |
18650 | } |
18651 | |
18652 | |
18653 | |
18654 | |
18655 | switch (VT.SimpleTy) { |
18656 | default: |
18657 | |
18658 | return Op; |
18659 | |
18660 | case MVT::v32i8: |
18661 | |
18662 | if (Subtarget.hasAVX2()) |
18663 | return Op; |
18664 | |
18665 | return SDValue(); |
18666 | |
18667 | case MVT::v8i16: |
18668 | case MVT::v16i16: { |
18669 | |
18670 | MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2); |
18671 | Cond = DAG.getBitcast(CastVT, Cond); |
18672 | LHS = DAG.getBitcast(CastVT, LHS); |
18673 | RHS = DAG.getBitcast(CastVT, RHS); |
18674 | SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS); |
18675 | return DAG.getBitcast(VT, Select); |
18676 | } |
18677 | } |
18678 | } |
18679 | |
18680 | static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { |
18681 | MVT VT = Op.getSimpleValueType(); |
18682 | SDValue Vec = Op.getOperand(0); |
18683 | SDValue Idx = Op.getOperand(1); |
18684 | assert(isa<ConstantSDNode>(Idx) && "Constant index expected"); |
18685 | SDLoc dl(Op); |
18686 | |
18687 | if (!Vec.getSimpleValueType().is128BitVector()) |
18688 | return SDValue(); |
18689 | |
18690 | if (VT.getSizeInBits() == 8) { |
18691 | |
18692 | |
18693 | if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) && |
18694 | !MayFoldIntoStore(Op)) |
18695 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
18696 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
18697 | DAG.getBitcast(MVT::v4i32, Vec), Idx)); |
18698 | |
18699 | unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); |
18700 | SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, |
18701 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
18702 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); |
18703 | } |
18704 | |
18705 | if (VT == MVT::f32) { |
18706 | |
18707 | |
18708 | |
18709 | |
18710 | |
18711 | if (!Op.hasOneUse()) |
18712 | return SDValue(); |
18713 | SDNode *User = *Op.getNode()->use_begin(); |
18714 | if ((User->getOpcode() != ISD::STORE || isNullConstant(Idx)) && |
18715 | (User->getOpcode() != ISD::BITCAST || |
18716 | User->getValueType(0) != MVT::i32)) |
18717 | return SDValue(); |
18718 | SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
18719 | DAG.getBitcast(MVT::v4i32, Vec), Idx); |
18720 | return DAG.getBitcast(MVT::f32, Extract); |
18721 | } |
18722 | |
18723 | if (VT == MVT::i32 || VT == MVT::i64) |
18724 | return Op; |
18725 | |
18726 | return SDValue(); |
18727 | } |
18728 | |
18729 | |
18730 | |
18731 | static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, |
18732 | const X86Subtarget &Subtarget) { |
18733 | SDValue Vec = Op.getOperand(0); |
18734 | SDLoc dl(Vec); |
18735 | MVT VecVT = Vec.getSimpleValueType(); |
18736 | SDValue Idx = Op.getOperand(1); |
18737 | auto* IdxC = dyn_cast<ConstantSDNode>(Idx); |
18738 | MVT EltVT = Op.getSimpleValueType(); |
18739 | |
18740 | assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && |
18741 | "Unexpected vector type in ExtractBitFromMaskVector"); |
18742 | |
18743 | |
18744 | |
18745 | if (!IdxC) { |
18746 | unsigned NumElts = VecVT.getVectorNumElements(); |
18747 | |
18748 | |
18749 | MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; |
18750 | MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); |
18751 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); |
18752 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); |
18753 | return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); |
18754 | } |
18755 | |
18756 | unsigned IdxVal = IdxC->getZExtValue(); |
18757 | if (IdxVal == 0) |
18758 | return Op; |
18759 | |
18760 | |
18761 | unsigned NumElems = VecVT.getVectorNumElements(); |
18762 | MVT WideVecVT = VecVT; |
18763 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { |
18764 | WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
18765 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, |
18766 | DAG.getUNDEF(WideVecVT), Vec, |
18767 | DAG.getIntPtrConstant(0, dl)); |
18768 | } |
18769 | |
18770 | |
18771 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, |
18772 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
18773 | |
18774 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, |
18775 | DAG.getIntPtrConstant(0, dl)); |
18776 | } |
18777 | |
18778 | SDValue |
18779 | X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
18780 | SelectionDAG &DAG) const { |
18781 | SDLoc dl(Op); |
18782 | SDValue Vec = Op.getOperand(0); |
18783 | MVT VecVT = Vec.getSimpleValueType(); |
18784 | SDValue Idx = Op.getOperand(1); |
18785 | auto* IdxC = dyn_cast<ConstantSDNode>(Idx); |
18786 | |
18787 | if (VecVT.getVectorElementType() == MVT::i1) |
18788 | return ExtractBitFromMaskVector(Op, DAG, Subtarget); |
18789 | |
18790 | if (!IdxC) { |
18791 | |
18792 | |
18793 | |
18794 | |
18795 | |
18796 | |
18797 | |
18798 | |
18799 | |
18800 | |
18801 | |
18802 | |
18803 | |
18804 | |
18805 | |
18806 | |
18807 | |
18808 | |
18809 | |
18810 | |
18811 | |
18812 | |
18813 | |
18814 | |
18815 | |
18816 | |
18817 | |
18818 | |
18819 | |
18820 | |
18821 | return SDValue(); |
18822 | } |
18823 | |
18824 | unsigned IdxVal = IdxC->getZExtValue(); |
18825 | |
18826 | |
18827 | |
18828 | if (VecVT.is256BitVector() || VecVT.is512BitVector()) { |
18829 | |
18830 | Vec = extract128BitVector(Vec, IdxVal, DAG, dl); |
18831 | MVT EltVT = VecVT.getVectorElementType(); |
18832 | |
18833 | unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); |
18834 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
18835 | |
18836 | |
18837 | |
18838 | IdxVal &= ElemsPerChunk - 1; |
18839 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, |
18840 | DAG.getIntPtrConstant(IdxVal, dl)); |
18841 | } |
18842 | |
18843 | assert(VecVT.is128BitVector() && "Unexpected vector length"); |
18844 | |
18845 | MVT VT = Op.getSimpleValueType(); |
18846 | |
18847 | if (VT.getSizeInBits() == 16) { |
18848 | |
18849 | |
18850 | if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) && |
18851 | !(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) |
18852 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, |
18853 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
18854 | DAG.getBitcast(MVT::v4i32, Vec), Idx)); |
18855 | |
18856 | SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec, |
18857 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
18858 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); |
18859 | } |
18860 | |
18861 | if (Subtarget.hasSSE41()) |
18862 | if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) |
18863 | return Res; |
18864 | |
18865 | |
18866 | |
18867 | |
18868 | if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) { |
18869 | |
18870 | int DWordIdx = IdxVal / 4; |
18871 | if (DWordIdx == 0) { |
18872 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
18873 | DAG.getBitcast(MVT::v4i32, Vec), |
18874 | DAG.getIntPtrConstant(DWordIdx, dl)); |
18875 | int ShiftVal = (IdxVal % 4) * 8; |
18876 | if (ShiftVal != 0) |
18877 | Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res, |
18878 | DAG.getConstant(ShiftVal, dl, MVT::i8)); |
18879 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
18880 | } |
18881 | |
18882 | int WordIdx = IdxVal / 2; |
18883 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, |
18884 | DAG.getBitcast(MVT::v8i16, Vec), |
18885 | DAG.getIntPtrConstant(WordIdx, dl)); |
18886 | int ShiftVal = (IdxVal % 2) * 8; |
18887 | if (ShiftVal != 0) |
18888 | Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res, |
18889 | DAG.getConstant(ShiftVal, dl, MVT::i8)); |
18890 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
18891 | } |
18892 | |
18893 | if (VT.getSizeInBits() == 32) { |
18894 | if (IdxVal == 0) |
18895 | return Op; |
18896 | |
18897 | |
18898 | int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 }; |
18899 | Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); |
18900 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, |
18901 | DAG.getIntPtrConstant(0, dl)); |
18902 | } |
18903 | |
18904 | if (VT.getSizeInBits() == 64) { |
18905 | |
18906 | |
18907 | |
18908 | if (IdxVal == 0) |
18909 | return Op; |
18910 | |
18911 | |
18912 | |
18913 | |
18914 | int Mask[2] = { 1, -1 }; |
18915 | Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); |
18916 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, |
18917 | DAG.getIntPtrConstant(0, dl)); |
18918 | } |
18919 | |
18920 | return SDValue(); |
18921 | } |
18922 | |
18923 | |
18924 | |
18925 | static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, |
18926 | const X86Subtarget &Subtarget) { |
18927 | SDLoc dl(Op); |
18928 | SDValue Vec = Op.getOperand(0); |
18929 | SDValue Elt = Op.getOperand(1); |
18930 | SDValue Idx = Op.getOperand(2); |
18931 | MVT VecVT = Vec.getSimpleValueType(); |
18932 | |
18933 | if (!isa<ConstantSDNode>(Idx)) { |
18934 | |
18935 | |
18936 | unsigned NumElts = VecVT.getVectorNumElements(); |
18937 | MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; |
18938 | MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); |
18939 | SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, |
18940 | DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), |
18941 | DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); |
18942 | return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); |
18943 | } |
18944 | |
18945 | |
18946 | SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt); |
18947 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec, Idx); |
18948 | } |
18949 | |
18950 | SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, |
18951 | SelectionDAG &DAG) const { |
18952 | MVT VT = Op.getSimpleValueType(); |
18953 | MVT EltVT = VT.getVectorElementType(); |
18954 | unsigned NumElts = VT.getVectorNumElements(); |
18955 | unsigned EltSizeInBits = EltVT.getScalarSizeInBits(); |
18956 | |
18957 | if (EltVT == MVT::i1) |
18958 | return InsertBitToMaskVector(Op, DAG, Subtarget); |
18959 | |
18960 | SDLoc dl(Op); |
18961 | SDValue N0 = Op.getOperand(0); |
18962 | SDValue N1 = Op.getOperand(1); |
18963 | SDValue N2 = Op.getOperand(2); |
18964 | auto *N2C = dyn_cast<ConstantSDNode>(N2); |
18965 | |
18966 | if (!N2C) { |
18967 | |
18968 | |
18969 | |
18970 | if (!(Subtarget.hasBWI() || |
18971 | (Subtarget.hasAVX512() && EltSizeInBits >= 32) || |
18972 | (Subtarget.hasSSE41() && VT.isFloatingPoint()))) |
18973 | return SDValue(); |
18974 | |
18975 | MVT IdxSVT = MVT::getIntegerVT(EltSizeInBits); |
18976 | MVT IdxVT = MVT::getVectorVT(IdxSVT, NumElts); |
18977 | if (!isTypeLegal(IdxSVT) || !isTypeLegal(IdxVT)) |
18978 | return SDValue(); |
18979 | |
18980 | SDValue IdxExt = DAG.getZExtOrTrunc(N2, dl, IdxSVT); |
18981 | SDValue IdxSplat = DAG.getSplatBuildVector(IdxVT, dl, IdxExt); |
18982 | SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1); |
18983 | |
18984 | SmallVector<SDValue, 16> RawIndices; |
18985 | for (unsigned I = 0; I != NumElts; ++I) |
18986 | RawIndices.push_back(DAG.getConstant(I, dl, IdxSVT)); |
18987 | SDValue Indices = DAG.getBuildVector(IdxVT, dl, RawIndices); |
18988 | |
18989 | |
18990 | return DAG.getSelectCC(dl, IdxSplat, Indices, EltSplat, N0, |
18991 | ISD::CondCode::SETEQ); |
18992 | } |
18993 | |
18994 | if (N2C->getAPIntValue().uge(NumElts)) |
18995 | return SDValue(); |
18996 | uint64_t IdxVal = N2C->getZExtValue(); |
18997 | |
18998 | bool IsZeroElt = X86::isZeroNode(N1); |
18999 | bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); |
19000 | |
19001 | |
19002 | |
19003 | |
19004 | if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && |
19005 | (16 <= EltSizeInBits || (IsZeroElt && !VT.is128BitVector()))) { |
19006 | SmallVector<int, 8> BlendMask; |
19007 | for (unsigned i = 0; i != NumElts; ++i) |
19008 | BlendMask.push_back(i == IdxVal ? i + NumElts : i); |
19009 | SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl) |
19010 | : getOnesVector(VT, DAG, dl); |
19011 | return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask); |
19012 | } |
19013 | |
19014 | |
19015 | |
19016 | if (VT.is256BitVector() || VT.is512BitVector()) { |
19017 | |
19018 | |
19019 | if (VT.is256BitVector() && IdxVal == 0) { |
19020 | |
19021 | |
19022 | |
19023 | if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || |
19024 | (Subtarget.hasAVX2() && EltVT == MVT::i32)) { |
19025 | SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); |
19026 | return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, |
19027 | DAG.getTargetConstant(1, dl, MVT::i8)); |
19028 | } |
19029 | } |
19030 | |
19031 | |
19032 | SDValue V = extract128BitVector(N0, IdxVal, DAG, dl); |
19033 | |
19034 | |
19035 | unsigned NumEltsIn128 = 128 / EltSizeInBits; |
19036 | assert(isPowerOf2_32(NumEltsIn128)); |
19037 | |
19038 | unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); |
19039 | |
19040 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, |
19041 | DAG.getIntPtrConstant(IdxIn128, dl)); |
19042 | |
19043 | |
19044 | return insert128BitVector(N0, V, IdxVal, DAG, dl); |
19045 | } |
19046 | assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); |
19047 | |
19048 | |
19049 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) { |
19050 | if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || |
19051 | EltVT == MVT::i64) { |
19052 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); |
19053 | return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG); |
19054 | } |
19055 | |
19056 | |
19057 | |
19058 | if (EltVT == MVT::i16 || EltVT == MVT::i8) { |
19059 | N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, N1); |
19060 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
19061 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, N1); |
19062 | N1 = getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG); |
19063 | return DAG.getBitcast(VT, N1); |
19064 | } |
19065 | } |
19066 | |
19067 | |
19068 | |
19069 | if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) { |
19070 | unsigned Opc; |
19071 | if (VT == MVT::v8i16) { |
19072 | assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW"); |
19073 | Opc = X86ISD::PINSRW; |
19074 | } else { |
19075 | assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector"); |
19076 | assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB"); |
19077 | Opc = X86ISD::PINSRB; |
19078 | } |
19079 | |
19080 | assert(N1.getValueType() != MVT::i32 && "Unexpected VT"); |
19081 | N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); |
19082 | N2 = DAG.getTargetConstant(IdxVal, dl, MVT::i8); |
19083 | return DAG.getNode(Opc, dl, VT, N0, N1, N2); |
19084 | } |
19085 | |
19086 | if (Subtarget.hasSSE41()) { |
19087 | if (EltVT == MVT::f32) { |
19088 | |
19089 | |
19090 | |
19091 | |
19092 | |
19093 | |
19094 | |
19095 | |
19096 | |
19097 | bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize(); |
19098 | if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { |
19099 | |
19100 | |
19101 | |
19102 | |
19103 | |
19104 | |
19105 | |
19106 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); |
19107 | return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, |
19108 | DAG.getTargetConstant(1, dl, MVT::i8)); |
19109 | } |
19110 | |
19111 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); |
19112 | return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, |
19113 | DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); |
19114 | } |
19115 | |
19116 | |
19117 | if (EltVT == MVT::i32 || EltVT == MVT::i64) |
19118 | return Op; |
19119 | } |
19120 | |
19121 | return SDValue(); |
19122 | } |
19123 | |
19124 | static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, |
19125 | SelectionDAG &DAG) { |
19126 | SDLoc dl(Op); |
19127 | MVT OpVT = Op.getSimpleValueType(); |
19128 | |
19129 | |
19130 | |
19131 | if (X86::isZeroNode(Op.getOperand(0))) |
19132 | return getZeroVector(OpVT, Subtarget, DAG, dl); |
19133 | |
19134 | |
19135 | |
19136 | if (!OpVT.is128BitVector()) { |
19137 | |
19138 | unsigned SizeFactor = OpVT.getSizeInBits() / 128; |
19139 | MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), |
19140 | OpVT.getVectorNumElements() / SizeFactor); |
19141 | |
19142 | Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); |
19143 | |
19144 | |
19145 | return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); |
19146 | } |
19147 | assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 && |
19148 | "Expected an SSE type!"); |
19149 | |
19150 | |
19151 | if (OpVT == MVT::v4i32) |
19152 | return Op; |
19153 | |
19154 | SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); |
19155 | return DAG.getBitcast( |
19156 | OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); |
19157 | } |
19158 | |
19159 | |
19160 | |
19161 | |
19162 | static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, |
19163 | SelectionDAG &DAG) { |
19164 | assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1); |
19165 | |
19166 | return insert1BitVector(Op, DAG, Subtarget); |
19167 | } |
19168 | |
19169 | static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, |
19170 | SelectionDAG &DAG) { |
19171 | assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 && |
19172 | "Only vXi1 extract_subvectors need custom lowering"); |
19173 | |
19174 | SDLoc dl(Op); |
19175 | SDValue Vec = Op.getOperand(0); |
19176 | uint64_t IdxVal = Op.getConstantOperandVal(1); |
19177 | |
19178 | if (IdxVal == 0) |
19179 | return Op; |
19180 | |
19181 | MVT VecVT = Vec.getSimpleValueType(); |
19182 | unsigned NumElems = VecVT.getVectorNumElements(); |
19183 | |
19184 | |
19185 | MVT WideVecVT = VecVT; |
19186 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { |
19187 | WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
19188 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, |
19189 | DAG.getUNDEF(WideVecVT), Vec, |
19190 | DAG.getIntPtrConstant(0, dl)); |
19191 | } |
19192 | |
19193 | |
19194 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, |
19195 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
19196 | |
19197 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, |
19198 | DAG.getIntPtrConstant(0, dl)); |
19199 | } |
19200 | |
19201 | |
19202 | unsigned X86TargetLowering::getGlobalWrapperKind( |
19203 | const GlobalValue *GV, const unsigned char OpFlags) const { |
19204 | |
19205 | if (GV && GV->isAbsoluteSymbolRef()) |
19206 | return X86ISD::Wrapper; |
19207 | |
19208 | CodeModel::Model M = getTargetMachine().getCodeModel(); |
19209 | if (Subtarget.isPICStyleRIPRel() && |
19210 | (M == CodeModel::Small || M == CodeModel::Kernel)) |
19211 | return X86ISD::WrapperRIP; |
19212 | |
19213 | |
19214 | if (OpFlags == X86II::MO_GOTPCREL) |
19215 | return X86ISD::WrapperRIP; |
19216 | |
19217 | return X86ISD::Wrapper; |
19218 | } |
19219 | |
19220 | |
19221 | |
19222 | |
19223 | |
19224 | |
19225 | |
19226 | SDValue |
19227 | X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { |
19228 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
19229 | |
19230 | |
19231 | |
19232 | unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); |
19233 | |
19234 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19235 | SDValue Result = DAG.getTargetConstantPool( |
19236 | CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag); |
19237 | SDLoc DL(CP); |
19238 | Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); |
19239 | |
19240 | if (OpFlag) { |
19241 | Result = |
19242 | DAG.getNode(ISD::ADD, DL, PtrVT, |
19243 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); |
19244 | } |
19245 | |
19246 | return Result; |
19247 | } |
19248 | |
19249 | SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { |
19250 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); |
19251 | |
19252 | |
19253 | |
19254 | unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); |
19255 | |
19256 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19257 | SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); |
19258 | SDLoc DL(JT); |
19259 | Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); |
19260 | |
19261 | |
19262 | if (OpFlag) |
19263 | Result = |
19264 | DAG.getNode(ISD::ADD, DL, PtrVT, |
19265 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); |
19266 | |
19267 | return Result; |
19268 | } |
19269 | |
19270 | SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, |
19271 | SelectionDAG &DAG) const { |
19272 | return LowerGlobalOrExternal(Op, DAG, false); |
19273 | } |
19274 | |
19275 | SDValue |
19276 | X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { |
19277 | |
19278 | unsigned char OpFlags = |
19279 | Subtarget.classifyBlockAddressReference(); |
19280 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); |
19281 | int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); |
19282 | SDLoc dl(Op); |
19283 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19284 | SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); |
19285 | Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result); |
19286 | |
19287 | |
19288 | if (isGlobalRelativeToPICBase(OpFlags)) { |
19289 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, |
19290 | DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); |
19291 | } |
19292 | |
19293 | return Result; |
19294 | } |
19295 | |
19296 | |
19297 | |
19298 | SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, |
19299 | bool ForCall) const { |
19300 | |
19301 | const SDLoc &dl = SDLoc(Op); |
19302 | const GlobalValue *GV = nullptr; |
19303 | int64_t Offset = 0; |
19304 | const char *ExternalSym = nullptr; |
19305 | if (const auto *G = dyn_cast<GlobalAddressSDNode>(Op)) { |
19306 | GV = G->getGlobal(); |
19307 | Offset = G->getOffset(); |
19308 | } else { |
19309 | const auto *ES = cast<ExternalSymbolSDNode>(Op); |
19310 | ExternalSym = ES->getSymbol(); |
19311 | } |
19312 | |
19313 | |
19314 | const Module &Mod = *DAG.getMachineFunction().getFunction().getParent(); |
19315 | unsigned char OpFlags; |
19316 | if (ForCall) |
19317 | OpFlags = Subtarget.classifyGlobalFunctionReference(GV, Mod); |
19318 | else |
19319 | OpFlags = Subtarget.classifyGlobalReference(GV, Mod); |
19320 | bool HasPICReg = isGlobalRelativeToPICBase(OpFlags); |
19321 | bool NeedsLoad = isGlobalStubReference(OpFlags); |
19322 | |
19323 | CodeModel::Model M = DAG.getTarget().getCodeModel(); |
19324 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19325 | SDValue Result; |
19326 | |
19327 | if (GV) { |
19328 | |
19329 | |
19330 | |
19331 | |
19332 | |
19333 | int64_t GlobalOffset = 0; |
19334 | if (OpFlags == X86II::MO_NO_FLAG && Offset >= 0 && |
19335 | X86::isOffsetSuitableForCodeModel(Offset, M, true)) { |
19336 | std::swap(GlobalOffset, Offset); |
19337 | } |
19338 | Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags); |
19339 | } else { |
19340 | |
19341 | Result = DAG.getTargetExternalSymbol(ExternalSym, PtrVT, OpFlags); |
19342 | } |
19343 | |
19344 | |
19345 | |
19346 | if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0) |
19347 | return Result; |
19348 | |
19349 | Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result); |
19350 | |
19351 | |
19352 | if (HasPICReg) { |
19353 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, |
19354 | DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); |
19355 | } |
19356 | |
19357 | |
19358 | |
19359 | if (NeedsLoad) |
19360 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, |
19361 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
19362 | |
19363 | |
19364 | |
19365 | if (Offset != 0) |
19366 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, |
19367 | DAG.getConstant(Offset, dl, PtrVT)); |
19368 | |
19369 | return Result; |
19370 | } |
19371 | |
19372 | SDValue |
19373 | X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { |
19374 | return LowerGlobalOrExternal(Op, DAG, false); |
19375 | } |
19376 | |
19377 | static SDValue |
19378 | GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, |
19379 | SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, |
19380 | unsigned char OperandFlags, bool LocalDynamic = false) { |
19381 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
19382 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
19383 | SDLoc dl(GA); |
19384 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
19385 | GA->getValueType(0), |
19386 | GA->getOffset(), |
19387 | OperandFlags); |
19388 | |
19389 | X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR |
19390 | : X86ISD::TLSADDR; |
19391 | |
19392 | if (InFlag) { |
19393 | SDValue Ops[] = { Chain, TGA, *InFlag }; |
19394 | Chain = DAG.getNode(CallType, dl, NodeTys, Ops); |
19395 | } else { |
19396 | SDValue Ops[] = { Chain, TGA }; |
19397 | Chain = DAG.getNode(CallType, dl, NodeTys, Ops); |
19398 | } |
19399 | |
19400 | |
19401 | MFI.setAdjustsStack(true); |
19402 | MFI.setHasCalls(true); |
19403 | |
19404 | SDValue Flag = Chain.getValue(1); |
19405 | return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); |
19406 | } |
19407 | |
19408 | |
19409 | static SDValue |
19410 | LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19411 | const EVT PtrVT) { |
19412 | SDValue InFlag; |
19413 | SDLoc dl(GA); |
19414 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, |
19415 | DAG.getNode(X86ISD::GlobalBaseReg, |
19416 | SDLoc(), PtrVT), InFlag); |
19417 | InFlag = Chain.getValue(1); |
19418 | |
19419 | return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); |
19420 | } |
19421 | |
19422 | |
19423 | static SDValue |
19424 | LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19425 | const EVT PtrVT) { |
19426 | return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, |
19427 | X86::RAX, X86II::MO_TLSGD); |
19428 | } |
19429 | |
19430 | |
19431 | static SDValue |
19432 | LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19433 | const EVT PtrVT) { |
19434 | return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, |
19435 | X86::EAX, X86II::MO_TLSGD); |
19436 | } |
19437 | |
19438 | static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, |
19439 | SelectionDAG &DAG, const EVT PtrVT, |
19440 | bool Is64Bit, bool Is64BitLP64) { |
19441 | SDLoc dl(GA); |
19442 | |
19443 | |
19444 | X86MachineFunctionInfo *MFI = DAG.getMachineFunction() |
19445 | .getInfo<X86MachineFunctionInfo>(); |
19446 | MFI->incNumLocalDynamicTLSAccesses(); |
19447 | |
19448 | SDValue Base; |
19449 | if (Is64Bit) { |
19450 | unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; |
19451 | Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, |
19452 | X86II::MO_TLSLD, true); |
19453 | } else { |
19454 | SDValue InFlag; |
19455 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, |
19456 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); |
19457 | InFlag = Chain.getValue(1); |
19458 | Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, |
19459 | X86II::MO_TLSLDM, true); |
19460 | } |
19461 | |
19462 | |
19463 | |
19464 | |
19465 | |
19466 | unsigned char OperandFlags = X86II::MO_DTPOFF; |
19467 | unsigned WrapperKind = X86ISD::Wrapper; |
19468 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
19469 | GA->getValueType(0), |
19470 | GA->getOffset(), OperandFlags); |
19471 | SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); |
19472 | |
19473 | |
19474 | return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); |
19475 | } |
19476 | |
19477 | |
19478 | static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
19479 | const EVT PtrVT, TLSModel::Model model, |
19480 | bool is64Bit, bool isPIC) { |
19481 | SDLoc dl(GA); |
19482 | |
19483 | |
19484 | Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), |
19485 | is64Bit ? 257 : 256)); |
19486 | |
19487 | SDValue ThreadPointer = |
19488 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), |
19489 | MachinePointerInfo(Ptr)); |
19490 | |
19491 | unsigned char OperandFlags = 0; |
19492 | |
19493 | |
19494 | unsigned WrapperKind = X86ISD::Wrapper; |
19495 | if (model == TLSModel::LocalExec) { |
19496 | OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; |
19497 | } else if (model == TLSModel::InitialExec) { |
19498 | if (is64Bit) { |
19499 | OperandFlags = X86II::MO_GOTTPOFF; |
19500 | WrapperKind = X86ISD::WrapperRIP; |
19501 | } else { |
19502 | OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF; |
19503 | } |
19504 | } else { |
19505 | llvm_unreachable("Unexpected model"); |
19506 | } |
19507 | |
19508 | |
19509 | |
19510 | |
19511 | SDValue TGA = |
19512 | DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), |
19513 | GA->getOffset(), OperandFlags); |
19514 | SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); |
19515 | |
19516 | if (model == TLSModel::InitialExec) { |
19517 | if (isPIC && !is64Bit) { |
19518 | Offset = DAG.getNode(ISD::ADD, dl, PtrVT, |
19519 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), |
19520 | Offset); |
19521 | } |
19522 | |
19523 | Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, |
19524 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
19525 | } |
19526 | |
19527 | |
19528 | |
19529 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); |
19530 | } |
19531 | |
19532 | SDValue |
19533 | X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { |
19534 | |
19535 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
19536 | |
19537 | if (DAG.getTarget().useEmulatedTLS()) |
19538 | return LowerToTLSEmulatedModel(GA, DAG); |
19539 | |
19540 | const GlobalValue *GV = GA->getGlobal(); |
19541 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
19542 | bool PositionIndependent = isPositionIndependent(); |
19543 | |
19544 | if (Subtarget.isTargetELF()) { |
19545 | TLSModel::Model model = DAG.getTarget().getTLSModel(GV); |
19546 | switch (model) { |
19547 | case TLSModel::GeneralDynamic: |
19548 | if (Subtarget.is64Bit()) { |
19549 | if (Subtarget.isTarget64BitLP64()) |
19550 | return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); |
19551 | return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT); |
19552 | } |
19553 | return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); |
19554 | case TLSModel::LocalDynamic: |
19555 | return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), |
19556 | Subtarget.isTarget64BitLP64()); |
19557 | case TLSModel::InitialExec: |
19558 | case TLSModel::LocalExec: |
19559 | return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), |
19560 | PositionIndependent); |
19561 | } |
19562 | llvm_unreachable("Unknown TLS model."); |
19563 | } |
19564 | |
19565 | if (Subtarget.isTargetDarwin()) { |
19566 | |
19567 | unsigned char OpFlag = 0; |
19568 | unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ? |
19569 | X86ISD::WrapperRIP : X86ISD::Wrapper; |
19570 | |
19571 | |
19572 | |
19573 | bool PIC32 = PositionIndependent && !Subtarget.is64Bit(); |
19574 | if (PIC32) |
19575 | OpFlag = X86II::MO_TLVP_PIC_BASE; |
19576 | else |
19577 | OpFlag = X86II::MO_TLVP; |
19578 | SDLoc DL(Op); |
19579 | SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, |
19580 | GA->getValueType(0), |
19581 | GA->getOffset(), OpFlag); |
19582 | SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); |
19583 | |
19584 | |
19585 | if (PIC32) |
19586 | Offset = DAG.getNode(ISD::ADD, DL, PtrVT, |
19587 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), |
19588 | Offset); |
19589 | |
19590 | |
19591 | |
19592 | SDValue Chain = DAG.getEntryNode(); |
19593 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
19594 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); |
19595 | SDValue Args[] = { Chain, Offset }; |
19596 | Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); |
19597 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), |
19598 | DAG.getIntPtrConstant(0, DL, true), |
19599 | Chain.getValue(1), DL); |
19600 | |
19601 | |
19602 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
19603 | MFI.setAdjustsStack(true); |
19604 | |
19605 | |
19606 | |
19607 | unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; |
19608 | return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); |
19609 | } |
19610 | |
19611 | if (Subtarget.isOSWindows()) { |
19612 | |
19613 | |
19614 | |
19615 | |
19616 | |
19617 | |
19618 | |
19619 | |
19620 | |
19621 | |
19622 | |
19623 | SDLoc dl(GA); |
19624 | SDValue Chain = DAG.getEntryNode(); |
19625 | |
19626 | |
19627 | |
19628 | |
19629 | Value *Ptr = Constant::getNullValue(Subtarget.is64Bit() |
19630 | ? Type::getInt8PtrTy(*DAG.getContext(), |
19631 | 256) |
19632 | : Type::getInt32PtrTy(*DAG.getContext(), |
19633 | 257)); |
19634 | |
19635 | SDValue TlsArray = Subtarget.is64Bit() |
19636 | ? DAG.getIntPtrConstant(0x58, dl) |
19637 | : (Subtarget.isTargetWindowsGNU() |
19638 | ? DAG.getIntPtrConstant(0x2C, dl) |
19639 | : DAG.getExternalSymbol("_tls_array", PtrVT)); |
19640 | |
19641 | SDValue ThreadPointer = |
19642 | DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr)); |
19643 | |
19644 | SDValue res; |
19645 | if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { |
19646 | res = ThreadPointer; |
19647 | } else { |
19648 | |
19649 | SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); |
19650 | if (Subtarget.is64Bit()) |
19651 | IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, |
19652 | MachinePointerInfo(), MVT::i32); |
19653 | else |
19654 | IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo()); |
19655 | |
19656 | const DataLayout &DL = DAG.getDataLayout(); |
19657 | SDValue Scale = |
19658 | DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8); |
19659 | IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); |
19660 | |
19661 | res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX); |
19662 | } |
19663 | |
19664 | res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo()); |
19665 | |
19666 | |
19667 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
19668 | GA->getValueType(0), |
19669 | GA->getOffset(), X86II::MO_SECREL); |
19670 | SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); |
19671 | |
19672 | |
19673 | |
19674 | return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset); |
19675 | } |
19676 | |
19677 | llvm_unreachable("TLS not implemented for this target."); |
19678 | } |
19679 | |
19680 | |
19681 | |
19682 | |
19683 | static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { |
19684 | SDValue Lo, Hi; |
19685 | DAG.getTargetLoweringInfo().expandShiftParts(Op.getNode(), Lo, Hi, DAG); |
19686 | return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); |
19687 | } |
19688 | |
19689 | static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, |
19690 | SelectionDAG &DAG) { |
19691 | MVT VT = Op.getSimpleValueType(); |
19692 | assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) && |
19693 | "Unexpected funnel shift opcode!"); |
19694 | |
19695 | SDLoc DL(Op); |
19696 | SDValue Op0 = Op.getOperand(0); |
19697 | SDValue Op1 = Op.getOperand(1); |
19698 | SDValue Amt = Op.getOperand(2); |
19699 | |
19700 | bool IsFSHR = Op.getOpcode() == ISD::FSHR; |
19701 | |
19702 | if (VT.isVector()) { |
19703 | assert(Subtarget.hasVBMI2() && "Expected VBMI2"); |
19704 | |
19705 | if (IsFSHR) |
19706 | std::swap(Op0, Op1); |
19707 | |
19708 | |
19709 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) { |
19710 | Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512); |
19711 | Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512); |
19712 | } |
19713 | |
19714 | SDValue Funnel; |
19715 | APInt APIntShiftAmt; |
19716 | MVT ResultVT = Op0.getSimpleValueType(); |
19717 | if (X86::isConstantSplat(Amt, APIntShiftAmt)) { |
19718 | uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); |
19719 | Funnel = |
19720 | DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0, |
19721 | Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
19722 | } else { |
19723 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) |
19724 | Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512); |
19725 | Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, |
19726 | ResultVT, Op0, Op1, Amt); |
19727 | } |
19728 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) |
19729 | Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits()); |
19730 | return Funnel; |
19731 | } |
19732 | assert( |
19733 | (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && |
19734 | "Unexpected funnel shift type!"); |
19735 | |
19736 | |
19737 | bool OptForSize = DAG.shouldOptForSize(); |
19738 | bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow(); |
19739 | |
19740 | |
19741 | |
19742 | if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) && |
19743 | !isa<ConstantSDNode>(Amt)) { |
19744 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
19745 | SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType()); |
19746 | SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType()); |
19747 | Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32); |
19748 | Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32); |
19749 | Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask); |
19750 | SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift); |
19751 | Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1); |
19752 | if (IsFSHR) { |
19753 | Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt); |
19754 | } else { |
19755 | Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt); |
19756 | Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift); |
19757 | } |
19758 | return DAG.getZExtOrTrunc(Res, DL, VT); |
19759 | } |
19760 | |
19761 | if (VT == MVT::i8 || ExpandFunnel) |
19762 | return SDValue(); |
19763 | |
19764 | |
19765 | if (VT == MVT::i16) { |
19766 | Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, |
19767 | DAG.getConstant(15, DL, Amt.getValueType())); |
19768 | unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL); |
19769 | return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt); |
19770 | } |
19771 | |
19772 | return Op; |
19773 | } |
19774 | |
19775 | |
19776 | |
19777 | static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, |
19778 | const X86Subtarget &Subtarget) { |
19779 | assert((Op.getOpcode() == ISD::SINT_TO_FP || |
19780 | Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
19781 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP || |
19782 | Op.getOpcode() == ISD::UINT_TO_FP) && |
19783 | "Unexpected opcode!"); |
19784 | bool IsStrict = Op->isStrictFPOpcode(); |
19785 | unsigned OpNo = IsStrict ? 1 : 0; |
19786 | SDValue Src = Op.getOperand(OpNo); |
19787 | MVT SrcVT = Src.getSimpleValueType(); |
19788 | MVT VT = Op.getSimpleValueType(); |
19789 | |
19790 | if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || |
19791 | (VT != MVT::f32 && VT != MVT::f64)) |
19792 | return SDValue(); |
19793 | |
19794 | |
19795 | |
19796 | |
19797 | unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; |
19798 | MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts); |
19799 | MVT VecVT = MVT::getVectorVT(VT, NumElts); |
19800 | |
19801 | SDLoc dl(Op); |
19802 | SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); |
19803 | if (IsStrict) { |
19804 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other}, |
19805 | {Op.getOperand(0), InVec}); |
19806 | SDValue Chain = CvtVec.getValue(1); |
19807 | SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
19808 | DAG.getIntPtrConstant(0, dl)); |
19809 | return DAG.getMergeValues({Value, Chain}, dl); |
19810 | } |
19811 | |
19812 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec); |
19813 | |
19814 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
19815 | DAG.getIntPtrConstant(0, dl)); |
19816 | } |
19817 | |
19818 | static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT, |
19819 | const X86Subtarget &Subtarget) { |
19820 | switch (Opcode) { |
19821 | case ISD::SINT_TO_FP: |
19822 | |
19823 | if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32) |
19824 | return false; |
19825 | |
19826 | return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64); |
19827 | |
19828 | case ISD::UINT_TO_FP: |
19829 | |
19830 | if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32) |
19831 | return false; |
19832 | |
19833 | return ToVT == MVT::v4f32 || ToVT == MVT::v4f64; |
19834 | |
19835 | default: |
19836 | return false; |
19837 | } |
19838 | } |
19839 | |
19840 | |
19841 | |
19842 | |
19843 | static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG, |
19844 | const X86Subtarget &Subtarget) { |
19845 | |
19846 | |
19847 | SDValue Extract = Cast.getOperand(0); |
19848 | MVT DestVT = Cast.getSimpleValueType(); |
19849 | if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
19850 | !isa<ConstantSDNode>(Extract.getOperand(1))) |
19851 | return SDValue(); |
19852 | |
19853 | |
19854 | SDValue VecOp = Extract.getOperand(0); |
19855 | MVT FromVT = VecOp.getSimpleValueType(); |
19856 | unsigned NumEltsInXMM = 128 / FromVT.getScalarSizeInBits(); |
19857 | MVT Vec128VT = MVT::getVectorVT(FromVT.getScalarType(), NumEltsInXMM); |
19858 | MVT ToVT = MVT::getVectorVT(DestVT, NumEltsInXMM); |
19859 | if (!useVectorCast(Cast.getOpcode(), Vec128VT, ToVT, Subtarget)) |
19860 | return SDValue(); |
19861 | |
19862 | |
19863 | |
19864 | SDLoc DL(Cast); |
19865 | if (!isNullConstant(Extract.getOperand(1))) { |
19866 | SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1); |
19867 | Mask[0] = Extract.getConstantOperandVal(1); |
19868 | VecOp = DAG.getVectorShuffle(FromVT, DL, VecOp, DAG.getUNDEF(FromVT), Mask); |
19869 | } |
19870 | |
19871 | |
19872 | if (FromVT != Vec128VT) |
19873 | VecOp = extract128BitVector(VecOp, 0, DAG, DL); |
19874 | |
19875 | |
19876 | |
19877 | SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp); |
19878 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestVT, VCast, |
19879 | DAG.getIntPtrConstant(0, DL)); |
19880 | } |
19881 | |
19882 | |
19883 | |
19884 | |
19885 | static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG, |
19886 | const X86Subtarget &Subtarget) { |
19887 | |
19888 | SDValue CastToInt = CastToFP.getOperand(0); |
19889 | MVT VT = CastToFP.getSimpleValueType(); |
19890 | if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector()) |
19891 | return SDValue(); |
19892 | |
19893 | MVT IntVT = CastToInt.getSimpleValueType(); |
19894 | SDValue X = CastToInt.getOperand(0); |
19895 | MVT SrcVT = X.getSimpleValueType(); |
19896 | if (SrcVT != MVT::f32 && SrcVT != MVT::f64) |
19897 | return SDValue(); |
19898 | |
19899 | |
19900 | |
19901 | if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || |
19902 | IntVT != MVT::i32) |
19903 | return SDValue(); |
19904 | |
19905 | unsigned SrcSize = SrcVT.getSizeInBits(); |
19906 | unsigned IntSize = IntVT.getSizeInBits(); |
19907 | unsigned VTSize = VT.getSizeInBits(); |
19908 | MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize); |
19909 | MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize); |
19910 | MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); |
19911 | |
19912 | |
19913 | unsigned ToIntOpcode = |
19914 | SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; |
19915 | unsigned ToFPOpcode = |
19916 | IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; |
19917 | |
19918 | |
19919 | |
19920 | |
19921 | |
19922 | |
19923 | |
19924 | SDLoc DL(CastToFP); |
19925 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); |
19926 | SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X); |
19927 | SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX); |
19928 | SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt); |
19929 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx); |
19930 | } |
19931 | |
19932 | static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, |
19933 | const X86Subtarget &Subtarget) { |
19934 | SDLoc DL(Op); |
19935 | bool IsStrict = Op->isStrictFPOpcode(); |
19936 | MVT VT = Op->getSimpleValueType(0); |
19937 | SDValue Src = Op->getOperand(IsStrict ? 1 : 0); |
19938 | |
19939 | if (Subtarget.hasDQI()) { |
19940 | assert(!Subtarget.hasVLX() && "Unexpected features"); |
19941 | |
19942 | assert((Src.getSimpleValueType() == MVT::v2i64 || |
19943 | Src.getSimpleValueType() == MVT::v4i64) && |
19944 | "Unsupported custom type"); |
19945 | |
19946 | |
19947 | assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) && |
19948 | "Unexpected VT!"); |
19949 | MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; |
19950 | |
19951 | |
19952 | |
19953 | SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64) |
19954 | : DAG.getUNDEF(MVT::v8i64); |
19955 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src, |
19956 | DAG.getIntPtrConstant(0, DL)); |
19957 | SDValue Res, Chain; |
19958 | if (IsStrict) { |
19959 | Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other}, |
19960 | {Op->getOperand(0), Src}); |
19961 | Chain = Res.getValue(1); |
19962 | } else { |
19963 | Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src); |
19964 | } |
19965 | |
19966 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
19967 | DAG.getIntPtrConstant(0, DL)); |
19968 | |
19969 | if (IsStrict) |
19970 | return DAG.getMergeValues({Res, Chain}, DL); |
19971 | return Res; |
19972 | } |
19973 | |
19974 | bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP || |
19975 | Op->getOpcode() == ISD::STRICT_SINT_TO_FP; |
19976 | if (VT != MVT::v4f32 || IsSigned) |
19977 | return SDValue(); |
19978 | |
19979 | SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64); |
19980 | SDValue One = DAG.getConstant(1, DL, MVT::v4i64); |
19981 | SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64, |
19982 | DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One), |
19983 | DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One)); |
19984 | SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT); |
19985 | SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src); |
19986 | SmallVector<SDValue, 4> SignCvts(4); |
19987 | SmallVector<SDValue, 4> Chains(4); |
19988 | for (int i = 0; i != 4; ++i) { |
19989 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc, |
19990 | DAG.getIntPtrConstant(i, DL)); |
19991 | if (IsStrict) { |
19992 | SignCvts[i] = |
19993 | DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other}, |
19994 | {Op.getOperand(0), Elt}); |
19995 | Chains[i] = SignCvts[i].getValue(1); |
19996 | } else { |
19997 | SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Elt); |
19998 | } |
19999 | } |
20000 | SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts); |
20001 | |
20002 | SDValue Slow, Chain; |
20003 | if (IsStrict) { |
20004 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
20005 | Slow = DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v4f32, MVT::Other}, |
20006 | {Chain, SignCvt, SignCvt}); |
20007 | Chain = Slow.getValue(1); |
20008 | } else { |
20009 | Slow = DAG.getNode(ISD::FADD, DL, MVT::v4f32, SignCvt, SignCvt); |
20010 | } |
20011 | |
20012 | IsNeg = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i32, IsNeg); |
20013 | SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt); |
20014 | |
20015 | if (IsStrict) |
20016 | return DAG.getMergeValues({Cvt, Chain}, DL); |
20017 | |
20018 | return Cvt; |
20019 | } |
20020 | |
20021 | SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, |
20022 | SelectionDAG &DAG) const { |
20023 | bool IsStrict = Op->isStrictFPOpcode(); |
20024 | unsigned OpNo = IsStrict ? 1 : 0; |
20025 | SDValue Src = Op.getOperand(OpNo); |
20026 | SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); |
20027 | MVT SrcVT = Src.getSimpleValueType(); |
20028 | MVT VT = Op.getSimpleValueType(); |
20029 | SDLoc dl(Op); |
20030 | |
20031 | if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) |
20032 | return Extract; |
20033 | |
20034 | if (SDValue R = lowerFPToIntToFP(Op, DAG, Subtarget)) |
20035 | return R; |
20036 | |
20037 | if (SrcVT.isVector()) { |
20038 | if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { |
20039 | |
20040 | |
20041 | if (IsStrict) |
20042 | return DAG.getNode( |
20043 | X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, |
20044 | {Chain, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
20045 | DAG.getUNDEF(SrcVT))}); |
20046 | return DAG.getNode(X86ISD::CVTSI2P, dl, VT, |
20047 | DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
20048 | DAG.getUNDEF(SrcVT))); |
20049 | } |
20050 | if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64) |
20051 | return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); |
20052 | |
20053 | return SDValue(); |
20054 | } |
20055 | |
20056 | assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && |
20057 | "Unknown SINT_TO_FP to lower!"); |
20058 | |
20059 | bool UseSSEReg = isScalarFPTypeInSSEReg(VT); |
20060 | |
20061 | |
20062 | |
20063 | if (SrcVT == MVT::i32 && UseSSEReg) |
20064 | return Op; |
20065 | if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit()) |
20066 | return Op; |
20067 | |
20068 | if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) |
20069 | return V; |
20070 | |
20071 | |
20072 | if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) { |
20073 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src); |
20074 | if (IsStrict) |
20075 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
20076 | {Chain, Ext}); |
20077 | |
20078 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); |
20079 | } |
20080 | |
20081 | if (VT == MVT::f128) |
20082 | return SDValue(); |
20083 | |
20084 | SDValue ValueToStore = Src; |
20085 | if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit()) |
20086 | |
20087 | |
20088 | |
20089 | ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); |
20090 | |
20091 | unsigned Size = SrcVT.getStoreSize(); |
20092 | Align Alignment(Size); |
20093 | MachineFunction &MF = DAG.getMachineFunction(); |
20094 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
20095 | int SSFI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false); |
20096 | MachinePointerInfo MPI = |
20097 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); |
20098 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
20099 | Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment); |
20100 | std::pair<SDValue, SDValue> Tmp = |
20101 | BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG); |
20102 | |
20103 | if (IsStrict) |
20104 | return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); |
20105 | |
20106 | return Tmp.first; |
20107 | } |
20108 | |
20109 | std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD( |
20110 | EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, |
20111 | MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const { |
20112 | |
20113 | SDVTList Tys; |
20114 | bool useSSE = isScalarFPTypeInSSEReg(DstVT); |
20115 | if (useSSE) |
20116 | Tys = DAG.getVTList(MVT::f80, MVT::Other); |
20117 | else |
20118 | Tys = DAG.getVTList(DstVT, MVT::Other); |
20119 | |
20120 | SDValue FILDOps[] = {Chain, Pointer}; |
20121 | SDValue Result = |
20122 | DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, PtrInfo, |
20123 | Alignment, MachineMemOperand::MOLoad); |
20124 | Chain = Result.getValue(1); |
20125 | |
20126 | if (useSSE) { |
20127 | MachineFunction &MF = DAG.getMachineFunction(); |
20128 | unsigned SSFISize = DstVT.getStoreSize(); |
20129 | int SSFI = |
20130 | MF.getFrameInfo().CreateStackObject(SSFISize, Align(SSFISize), false); |
20131 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
20132 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
20133 | Tys = DAG.getVTList(MVT::Other); |
20134 | SDValue FSTOps[] = {Chain, Result, StackSlot}; |
20135 | MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( |
20136 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), |
20137 | MachineMemOperand::MOStore, SSFISize, Align(SSFISize)); |
20138 | |
20139 | Chain = |
20140 | DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps, DstVT, StoreMMO); |
20141 | Result = DAG.getLoad( |
20142 | DstVT, DL, Chain, StackSlot, |
20143 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); |
20144 | Chain = Result.getValue(1); |
20145 | } |
20146 | |
20147 | return { Result, Chain }; |
20148 | } |
20149 | |
20150 | |
20151 | |
20152 | |
20153 | static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, |
20154 | const X86Subtarget &Subtarget) { |
20155 | bool IsOptimizingSize = DAG.shouldOptForSize(); |
20156 | bool HasFastHOps = Subtarget.hasFastHorizontalOps(); |
20157 | return !IsSingleSource || IsOptimizingSize || HasFastHOps; |
20158 | } |
20159 | |
20160 | |
20161 | static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, |
20162 | const X86Subtarget &Subtarget) { |
20163 | |
20164 | |
20165 | |
20166 | assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!"); |
20167 | |
20168 | |
20169 | |
20170 | |
20171 | |
20172 | |
20173 | |
20174 | |
20175 | |
20176 | |
20177 | |
20178 | |
20179 | |
20180 | SDLoc dl(Op); |
20181 | LLVMContext *Context = DAG.getContext(); |
20182 | |
20183 | |
20184 | static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; |
20185 | Constant *C0 = ConstantDataVector::get(*Context, CV0); |
20186 | auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
20187 | SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16)); |
20188 | |
20189 | SmallVector<Constant*,2> CV1; |
20190 | CV1.push_back( |
20191 | ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), |
20192 | APInt(64, 0x4330000000000000ULL)))); |
20193 | CV1.push_back( |
20194 | ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), |
20195 | APInt(64, 0x4530000000000000ULL)))); |
20196 | Constant *C1 = ConstantVector::get(CV1); |
20197 | SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16)); |
20198 | |
20199 | |
20200 | SDValue XR1 = |
20201 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); |
20202 | SDValue CLod0 = DAG.getLoad( |
20203 | MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, |
20204 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); |
20205 | SDValue Unpck1 = |
20206 | getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); |
20207 | |
20208 | SDValue CLod1 = DAG.getLoad( |
20209 | MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, |
20210 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); |
20211 | SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); |
20212 | |
20213 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); |
20214 | SDValue Result; |
20215 | |
20216 | if (Subtarget.hasSSE3() && |
20217 | shouldUseHorizontalOp(true, DAG, Subtarget)) { |
20218 | Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); |
20219 | } else { |
20220 | SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); |
20221 | Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); |
20222 | } |
20223 | Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, |
20224 | DAG.getIntPtrConstant(0, dl)); |
20225 | return Result; |
20226 | } |
20227 | |
20228 | |
20229 | static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, |
20230 | const X86Subtarget &Subtarget) { |
20231 | unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; |
20232 | SDLoc dl(Op); |
20233 | |
20234 | SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, |
20235 | MVT::f64); |
20236 | |
20237 | |
20238 | SDValue Load = |
20239 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo)); |
20240 | |
20241 | |
20242 | Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); |
20243 | |
20244 | |
20245 | SDValue Or = DAG.getNode( |
20246 | ISD::OR, dl, MVT::v2i64, |
20247 | DAG.getBitcast(MVT::v2i64, Load), |
20248 | DAG.getBitcast(MVT::v2i64, |
20249 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); |
20250 | Or = |
20251 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, |
20252 | DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); |
20253 | |
20254 | if (Op.getNode()->isStrictFPOpcode()) { |
20255 | |
20256 | |
20257 | SDValue Chain = Op.getOperand(0); |
20258 | SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, |
20259 | {Chain, Or, Bias}); |
20260 | |
20261 | if (Op.getValueType() == Sub.getValueType()) |
20262 | return Sub; |
20263 | |
20264 | |
20265 | std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound( |
20266 | Sub, Sub.getValue(1), dl, Op.getSimpleValueType()); |
20267 | |
20268 | return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl); |
20269 | } |
20270 | |
20271 | |
20272 | |
20273 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); |
20274 | |
20275 | |
20276 | return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); |
20277 | } |
20278 | |
20279 | static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, |
20280 | const X86Subtarget &Subtarget, |
20281 | const SDLoc &DL) { |
20282 | if (Op.getSimpleValueType() != MVT::v2f64) |
20283 | return SDValue(); |
20284 | |
20285 | bool IsStrict = Op->isStrictFPOpcode(); |
20286 | |
20287 | SDValue N0 = Op.getOperand(IsStrict ? 1 : 0); |
20288 | assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type"); |
20289 | |
20290 | if (Subtarget.hasAVX512()) { |
20291 | if (!Subtarget.hasVLX()) { |
20292 | |
20293 | if (!IsStrict) |
20294 | return SDValue(); |
20295 | |
20296 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
20297 | DAG.getConstant(0, DL, MVT::v2i32)); |
20298 | SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other}, |
20299 | {Op.getOperand(0), N0}); |
20300 | SDValue Chain = Res.getValue(1); |
20301 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2f64, Res, |
20302 | DAG.getIntPtrConstant(0, DL)); |
20303 | return DAG.getMergeValues({Res, Chain}, DL); |
20304 | } |
20305 | |
20306 | |
20307 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
20308 | DAG.getUNDEF(MVT::v2i32)); |
20309 | if (IsStrict) |
20310 | return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other}, |
20311 | {Op.getOperand(0), N0}); |
20312 | return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0); |
20313 | } |
20314 | |
20315 | |
20316 | |
20317 | |
20318 | |
20319 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0); |
20320 | SDValue VBias = |
20321 | DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64); |
20322 | SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn, |
20323 | DAG.getBitcast(MVT::v2i64, VBias)); |
20324 | Or = DAG.getBitcast(MVT::v2f64, Or); |
20325 | |
20326 | if (IsStrict) |
20327 | return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other}, |
20328 | {Op.getOperand(0), Or, VBias}); |
20329 | return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias); |
20330 | } |
20331 | |
20332 | static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, |
20333 | const X86Subtarget &Subtarget) { |
20334 | SDLoc DL(Op); |
20335 | bool IsStrict = Op->isStrictFPOpcode(); |
20336 | SDValue V = Op->getOperand(IsStrict ? 1 : 0); |
20337 | MVT VecIntVT = V.getSimpleValueType(); |
20338 | assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && |
20339 | "Unsupported custom type"); |
20340 | |
20341 | if (Subtarget.hasAVX512()) { |
20342 | |
20343 | assert(!Subtarget.hasVLX() && "Unexpected features"); |
20344 | MVT VT = Op->getSimpleValueType(0); |
20345 | |
20346 | |
20347 | if (VT == MVT::v8f64) |
20348 | return Op; |
20349 | |
20350 | assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) && |
20351 | "Unexpected VT!"); |
20352 | MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; |
20353 | MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; |
20354 | |
20355 | |
20356 | SDValue Tmp = |
20357 | IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT); |
20358 | V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V, |
20359 | DAG.getIntPtrConstant(0, DL)); |
20360 | SDValue Res, Chain; |
20361 | if (IsStrict) { |
20362 | Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other}, |
20363 | {Op->getOperand(0), V}); |
20364 | Chain = Res.getValue(1); |
20365 | } else { |
20366 | Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V); |
20367 | } |
20368 | |
20369 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
20370 | DAG.getIntPtrConstant(0, DL)); |
20371 | |
20372 | if (IsStrict) |
20373 | return DAG.getMergeValues({Res, Chain}, DL); |
20374 | return Res; |
20375 | } |
20376 | |
20377 | if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 && |
20378 | Op->getSimpleValueType(0) == MVT::v4f64) { |
20379 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V); |
20380 | Constant *Bias = ConstantFP::get( |
20381 | *DAG.getContext(), |
20382 | APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL))); |
20383 | auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
20384 | SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, Align(8)); |
20385 | SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other); |
20386 | SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; |
20387 | SDValue VBias = DAG.getMemIntrinsicNode( |
20388 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, |
20389 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(8), |
20390 | MachineMemOperand::MOLoad); |
20391 | |
20392 | SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn, |
20393 | DAG.getBitcast(MVT::v4i64, VBias)); |
20394 | Or = DAG.getBitcast(MVT::v4f64, Or); |
20395 | |
20396 | if (IsStrict) |
20397 | return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v4f64, MVT::Other}, |
20398 | {Op.getOperand(0), Or, VBias}); |
20399 | return DAG.getNode(ISD::FSUB, DL, MVT::v4f64, Or, VBias); |
20400 | } |
20401 | |
20402 | |
20403 | |
20404 | |
20405 | |
20406 | |
20407 | |
20408 | |
20409 | |
20410 | |
20411 | |
20412 | |
20413 | |
20414 | bool Is128 = VecIntVT == MVT::v4i32; |
20415 | MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; |
20416 | |
20417 | |
20418 | if (VecFloatVT != Op->getSimpleValueType(0)) |
20419 | return SDValue(); |
20420 | |
20421 | |
20422 | |
20423 | |
20424 | |
20425 | |
20426 | |
20427 | |
20428 | |
20429 | SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT); |
20430 | |
20431 | SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT); |
20432 | |
20433 | |
20434 | SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT); |
20435 | SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift); |
20436 | |
20437 | SDValue Low, High; |
20438 | if (Subtarget.hasSSE41()) { |
20439 | MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; |
20440 | |
20441 | SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); |
20442 | SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); |
20443 | |
20444 | |
20445 | Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, |
20446 | VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); |
20447 | |
20448 | |
20449 | SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); |
20450 | SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); |
20451 | |
20452 | |
20453 | High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, |
20454 | VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); |
20455 | } else { |
20456 | SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); |
20457 | |
20458 | SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask); |
20459 | Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow); |
20460 | |
20461 | |
20462 | High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh); |
20463 | } |
20464 | |
20465 | |
20466 | SDValue VecCstFSub = DAG.getConstantFP( |
20467 | APFloat(APFloat::IEEEsingle(), APInt(32, 0x53000080)), DL, VecFloatVT); |
20468 | |
20469 | |
20470 | |
20471 | |
20472 | |
20473 | SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); |
20474 | |
20475 | |
20476 | SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); |
20477 | |
20478 | if (IsStrict) { |
20479 | SDValue FHigh = DAG.getNode(ISD::STRICT_FSUB, DL, {VecFloatVT, MVT::Other}, |
20480 | {Op.getOperand(0), HighBitcast, VecCstFSub}); |
20481 | return DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other}, |
20482 | {FHigh.getValue(1), LowBitcast, FHigh}); |
20483 | } |
20484 | |
20485 | SDValue FHigh = |
20486 | DAG.getNode(ISD::FSUB, DL, VecFloatVT, HighBitcast, VecCstFSub); |
20487 | return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); |
20488 | } |
20489 | |
20490 | static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, |
20491 | const X86Subtarget &Subtarget) { |
20492 | unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; |
20493 | SDValue N0 = Op.getOperand(OpNo); |
20494 | MVT SrcVT = N0.getSimpleValueType(); |
20495 | SDLoc dl(Op); |
20496 | |
20497 | switch (SrcVT.SimpleTy) { |
20498 | default: |
20499 | llvm_unreachable("Custom UINT_TO_FP is not supported!"); |
20500 | case MVT::v2i32: |
20501 | return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl); |
20502 | case MVT::v4i32: |
20503 | case MVT::v8i32: |
20504 | return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); |
20505 | case MVT::v2i64: |
20506 | case MVT::v4i64: |
20507 | return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); |
20508 | } |
20509 | } |
20510 | |
20511 | SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, |
20512 | SelectionDAG &DAG) const { |
20513 | bool IsStrict = Op->isStrictFPOpcode(); |
20514 | unsigned OpNo = IsStrict ? 1 : 0; |
20515 | SDValue Src = Op.getOperand(OpNo); |
20516 | SDLoc dl(Op); |
20517 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
20518 | MVT SrcVT = Src.getSimpleValueType(); |
20519 | MVT DstVT = Op->getSimpleValueType(0); |
20520 | SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); |
20521 | |
20522 | if (DstVT == MVT::f128) |
20523 | return SDValue(); |
20524 | |
20525 | if (DstVT.isVector()) |
20526 | return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); |
20527 | |
20528 | if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) |
20529 | return Extract; |
20530 | |
20531 | if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && |
20532 | (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { |
20533 | |
20534 | |
20535 | return Op; |
20536 | } |
20537 | |
20538 | |
20539 | if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { |
20540 | Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src); |
20541 | if (IsStrict) |
20542 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other}, |
20543 | {Chain, Src}); |
20544 | return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); |
20545 | } |
20546 | |
20547 | if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) |
20548 | return V; |
20549 | |
20550 | |
20551 | |
20552 | if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict) |
20553 | return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); |
20554 | if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80) |
20555 | return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); |
20556 | if (Subtarget.is64Bit() && SrcVT == MVT::i64 && |
20557 | (DstVT == MVT::f32 || DstVT == MVT::f64)) |
20558 | return SDValue(); |
20559 | |
20560 | |
20561 | SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8); |
20562 | int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); |
20563 | Align SlotAlign(8); |
20564 | MachinePointerInfo MPI = |
20565 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); |
20566 | if (SrcVT == MVT::i32) { |
20567 | SDValue OffsetSlot = |
20568 | DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl); |
20569 | SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign); |
20570 | SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), |
20571 | OffsetSlot, MPI.getWithOffset(4), SlotAlign); |
20572 | std::pair<SDValue, SDValue> Tmp = |
20573 | BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG); |
20574 | if (IsStrict) |
20575 | return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); |
20576 | |
20577 | return Tmp.first; |
20578 | } |
20579 | |
20580 | assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); |
20581 | SDValue ValueToStore = Src; |
20582 | if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) { |
20583 | |
20584 | |
20585 | |
20586 | ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); |
20587 | } |
20588 | SDValue Store = |
20589 | DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign); |
20590 | |
20591 | |
20592 | |
20593 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
20594 | SDValue Ops[] = { Store, StackSlot }; |
20595 | SDValue Fild = |
20596 | DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI, |
20597 | SlotAlign, MachineMemOperand::MOLoad); |
20598 | Chain = Fild.getValue(1); |
20599 | |
20600 | |
20601 | |
20602 | SDValue SignSet = DAG.getSetCC( |
20603 | dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), |
20604 | Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); |
20605 | |
20606 | |
20607 | APInt FF(64, 0x5F80000000000000ULL); |
20608 | SDValue FudgePtr = DAG.getConstantPool( |
20609 | ConstantInt::get(*DAG.getContext(), FF), PtrVT); |
20610 | Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign(); |
20611 | |
20612 | |
20613 | SDValue Zero = DAG.getIntPtrConstant(0, dl); |
20614 | SDValue Four = DAG.getIntPtrConstant(4, dl); |
20615 | SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); |
20616 | FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); |
20617 | |
20618 | |
20619 | SDValue Fudge = DAG.getExtLoad( |
20620 | ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr, |
20621 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, |
20622 | CPAlignment); |
20623 | Chain = Fudge.getValue(1); |
20624 | |
20625 | |
20626 | if (IsStrict) { |
20627 | SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other}, |
20628 | {Chain, Fild, Fudge}); |
20629 | |
20630 | if (DstVT == MVT::f80) |
20631 | return Add; |
20632 | return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, |
20633 | {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)}); |
20634 | } |
20635 | SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); |
20636 | return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, |
20637 | DAG.getIntPtrConstant(0, dl)); |
20638 | } |
20639 | |
20640 | |
20641 | |
20642 | |
20643 | |
20644 | |
20645 | |
20646 | SDValue |
20647 | X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, |
20648 | bool IsSigned, SDValue &Chain) const { |
20649 | bool IsStrict = Op->isStrictFPOpcode(); |
20650 | SDLoc DL(Op); |
20651 | |
20652 | EVT DstTy = Op.getValueType(); |
20653 | SDValue Value = Op.getOperand(IsStrict ? 1 : 0); |
20654 | EVT TheVT = Value.getValueType(); |
20655 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
20656 | |
20657 | if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) { |
20658 | |
20659 | |
20660 | return SDValue(); |
20661 | } |
20662 | |
20663 | |
20664 | |
20665 | |
20666 | bool UnsignedFixup = !IsSigned && DstTy == MVT::i64; |
20667 | |
20668 | |
20669 | |
20670 | if (!IsSigned && DstTy != MVT::i64) { |
20671 | |
20672 | |
20673 | assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); |
20674 | DstTy = MVT::i64; |
20675 | } |
20676 | |
20677 | assert(DstTy.getSimpleVT() <= MVT::i64 && |
20678 | DstTy.getSimpleVT() >= MVT::i16 && |
20679 | "Unknown FP_TO_INT to lower!"); |
20680 | |
20681 | |
20682 | |
20683 | MachineFunction &MF = DAG.getMachineFunction(); |
20684 | unsigned MemSize = DstTy.getStoreSize(); |
20685 | int SSFI = |
20686 | MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false); |
20687 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
20688 | |
20689 | Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); |
20690 | |
20691 | SDValue Adjust; |
20692 | |
20693 | if (UnsignedFixup) { |
20694 | |
20695 | |
20696 | |
20697 | |
20698 | |
20699 | |
20700 | |
20701 | |
20702 | |
20703 | |
20704 | |
20705 | |
20706 | |
20707 | |
20708 | |
20709 | |
20710 | |
20711 | APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); |
20712 | LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; |
20713 | bool LosesInfo = false; |
20714 | if (TheVT == MVT::f64) |
20715 | |
20716 | Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, |
20717 | &LosesInfo); |
20718 | else if (TheVT == MVT::f80) |
20719 | Status = Thresh.convert(APFloat::x87DoubleExtended(), |
20720 | APFloat::rmNearestTiesToEven, &LosesInfo); |
20721 | |
20722 | assert(Status == APFloat::opOK && !LosesInfo && |
20723 | "FP conversion should have been exact"); |
20724 | |
20725 | SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); |
20726 | |
20727 | EVT ResVT = getSetCCResultType(DAG.getDataLayout(), |
20728 | *DAG.getContext(), TheVT); |
20729 | SDValue Cmp; |
20730 | if (IsStrict) { |
20731 | Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain, |
20732 | true); |
20733 | Chain = Cmp.getValue(1); |
20734 | } else { |
20735 | Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE); |
20736 | } |
20737 | |
20738 | |
20739 | |
20740 | |
20741 | |
20742 | |
20743 | |
20744 | |
20745 | |
20746 | |
20747 | |
20748 | |
20749 | SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Cmp); |
20750 | SDValue Const63 = DAG.getConstant(63, DL, MVT::i8); |
20751 | Adjust = DAG.getNode(ISD::SHL, DL, MVT::i64, Zext, Const63); |
20752 | |
20753 | SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, ThreshVal, |
20754 | DAG.getConstantFP(0.0, DL, TheVT)); |
20755 | |
20756 | if (IsStrict) { |
20757 | Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other}, |
20758 | { Chain, Value, FltOfs }); |
20759 | Chain = Value.getValue(1); |
20760 | } else |
20761 | Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs); |
20762 | } |
20763 | |
20764 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); |
20765 | |
20766 | |
20767 | |
20768 | if (isScalarFPTypeInSSEReg(TheVT)) { |
20769 | assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); |
20770 | Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI); |
20771 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
20772 | SDValue Ops[] = { Chain, StackSlot }; |
20773 | |
20774 | unsigned FLDSize = TheVT.getStoreSize(); |
20775 | assert(FLDSize <= MemSize && "Stack slot not big enough"); |
20776 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
20777 | MPI, MachineMemOperand::MOLoad, FLDSize, Align(FLDSize)); |
20778 | Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO); |
20779 | Chain = Value.getValue(1); |
20780 | } |
20781 | |
20782 | |
20783 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
20784 | MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize)); |
20785 | SDValue Ops[] = { Chain, Value, StackSlot }; |
20786 | SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL, |
20787 | DAG.getVTList(MVT::Other), |
20788 | Ops, DstTy, MMO); |
20789 | |
20790 | SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI); |
20791 | Chain = Res.getValue(1); |
20792 | |
20793 | |
20794 | if (UnsignedFixup) |
20795 | Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust); |
20796 | |
20797 | return Res; |
20798 | } |
20799 | |
20800 | static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, |
20801 | const X86Subtarget &Subtarget) { |
20802 | MVT VT = Op.getSimpleValueType(); |
20803 | SDValue In = Op.getOperand(0); |
20804 | MVT InVT = In.getSimpleValueType(); |
20805 | SDLoc dl(Op); |
20806 | unsigned Opc = Op.getOpcode(); |
20807 | |
20808 | assert(VT.isVector() && InVT.isVector() && "Expected vector type"); |
20809 | assert((Opc == ISD::ANY_EXTEND || Opc == ISD::ZERO_EXTEND) && |
20810 | "Unexpected extension opcode"); |
20811 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
20812 | "Expected same number of elements"); |
20813 | assert((VT.getVectorElementType() == MVT::i16 || |
20814 | VT.getVectorElementType() == MVT::i32 || |
20815 | VT.getVectorElementType() == MVT::i64) && |
20816 | "Unexpected element type"); |
20817 | assert((InVT.getVectorElementType() == MVT::i8 || |
20818 | InVT.getVectorElementType() == MVT::i16 || |
20819 | InVT.getVectorElementType() == MVT::i32) && |
20820 | "Unexpected element type"); |
20821 | |
20822 | unsigned ExtendInVecOpc = getOpcode_EXTEND_VECTOR_INREG(Opc); |
20823 | |
20824 | if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { |
20825 | assert(InVT == MVT::v32i8 && "Unexpected VT!"); |
20826 | return splitVectorIntUnary(Op, DAG); |
20827 | } |
20828 | |
20829 | if (Subtarget.hasInt256()) |
20830 | return Op; |
20831 | |
20832 | |
20833 | |
20834 | |
20835 | |
20836 | |
20837 | |
20838 | |
20839 | |
20840 | |
20841 | |
20842 | |
20843 | |
20844 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
20845 | SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In); |
20846 | |
20847 | |
20848 | |
20849 | if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In)) |
20850 | if (hasIdenticalHalvesShuffleMask(Shuf->getMask())) |
20851 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo); |
20852 | |
20853 | SDValue ZeroVec = DAG.getConstant(0, dl, InVT); |
20854 | SDValue Undef = DAG.getUNDEF(InVT); |
20855 | bool NeedZero = Opc == ISD::ZERO_EXTEND; |
20856 | SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); |
20857 | OpHi = DAG.getBitcast(HalfVT, OpHi); |
20858 | |
20859 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); |
20860 | } |
20861 | |
20862 | |
20863 | static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, |
20864 | const SDLoc &dl, SelectionDAG &DAG) { |
20865 | assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT."); |
20866 | SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, |
20867 | DAG.getIntPtrConstant(0, dl)); |
20868 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, |
20869 | DAG.getIntPtrConstant(8, dl)); |
20870 | Lo = DAG.getNode(ExtOpc, dl, MVT::v8i16, Lo); |
20871 | Hi = DAG.getNode(ExtOpc, dl, MVT::v8i16, Hi); |
20872 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi); |
20873 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
20874 | } |
20875 | |
20876 | static SDValue LowerZERO_EXTEND_Mask(SDValue Op, |
20877 | const X86Subtarget &Subtarget, |
20878 | SelectionDAG &DAG) { |
20879 | MVT VT = Op->getSimpleValueType(0); |
20880 | SDValue In = Op->getOperand(0); |
20881 | MVT InVT = In.getSimpleValueType(); |
20882 | assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); |
20883 | SDLoc DL(Op); |
20884 | unsigned NumElts = VT.getVectorNumElements(); |
20885 | |
20886 | |
20887 | |
20888 | if (VT.getVectorElementType() != MVT::i8) { |
20889 | SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In); |
20890 | return DAG.getNode(ISD::SRL, DL, VT, Extend, |
20891 | DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); |
20892 | } |
20893 | |
20894 | |
20895 | MVT ExtVT = VT; |
20896 | if (!Subtarget.hasBWI()) { |
20897 | |
20898 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) |
20899 | return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG); |
20900 | |
20901 | ExtVT = MVT::getVectorVT(MVT::i32, NumElts); |
20902 | } |
20903 | |
20904 | |
20905 | MVT WideVT = ExtVT; |
20906 | if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { |
20907 | NumElts *= 512 / ExtVT.getSizeInBits(); |
20908 | InVT = MVT::getVectorVT(MVT::i1, NumElts); |
20909 | In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), |
20910 | In, DAG.getIntPtrConstant(0, DL)); |
20911 | WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), |
20912 | NumElts); |
20913 | } |
20914 | |
20915 | SDValue One = DAG.getConstant(1, DL, WideVT); |
20916 | SDValue Zero = DAG.getConstant(0, DL, WideVT); |
20917 | |
20918 | SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero); |
20919 | |
20920 | |
20921 | if (VT != ExtVT) { |
20922 | WideVT = MVT::getVectorVT(MVT::i8, NumElts); |
20923 | SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); |
20924 | } |
20925 | |
20926 | |
20927 | if (WideVT != VT) |
20928 | SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, |
20929 | DAG.getIntPtrConstant(0, DL)); |
20930 | |
20931 | return SelectedVal; |
20932 | } |
20933 | |
20934 | static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
20935 | SelectionDAG &DAG) { |
20936 | SDValue In = Op.getOperand(0); |
20937 | MVT SVT = In.getSimpleValueType(); |
20938 | |
20939 | if (SVT.getVectorElementType() == MVT::i1) |
20940 | return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG); |
20941 | |
20942 | assert(Subtarget.hasAVX() && "Expected AVX support"); |
20943 | return LowerAVXExtend(Op, DAG, Subtarget); |
20944 | } |
20945 | |
20946 | |
20947 | |
20948 | |
20949 | |
20950 | |
20951 | static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, |
20952 | const SDLoc &DL, SelectionDAG &DAG, |
20953 | const X86Subtarget &Subtarget) { |
20954 | assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) && |
20955 | "Unexpected PACK opcode"); |
20956 | assert(DstVT.isVector() && "VT not a vector?"); |
20957 | |
20958 | |
20959 | if (!Subtarget.hasSSE2()) |
20960 | return SDValue(); |
20961 | |
20962 | EVT SrcVT = In.getValueType(); |
20963 | |
20964 | |
20965 | if (SrcVT == DstVT) |
20966 | return In; |
20967 | |
20968 | |
20969 | |
20970 | unsigned DstSizeInBits = DstVT.getSizeInBits(); |
20971 | unsigned SrcSizeInBits = SrcVT.getSizeInBits(); |
20972 | if ((DstSizeInBits % 64) != 0 || (SrcSizeInBits % 128) != 0) |
20973 | return SDValue(); |
20974 | |
20975 | unsigned NumElems = SrcVT.getVectorNumElements(); |
20976 | if (!isPowerOf2_32(NumElems)) |
20977 | return SDValue(); |
20978 | |
20979 | LLVMContext &Ctx = *DAG.getContext(); |
20980 | assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation"); |
20981 | assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation"); |
20982 | |
20983 | EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2); |
20984 | |
20985 | |
20986 | |
20987 | EVT InVT = MVT::i16, OutVT = MVT::i8; |
20988 | if (SrcVT.getScalarSizeInBits() > 16 && |
20989 | (Opcode == X86ISD::PACKSS || Subtarget.hasSSE41())) { |
20990 | InVT = MVT::i32; |
20991 | OutVT = MVT::i16; |
20992 | } |
20993 | |
20994 | |
20995 | if (SrcVT.is128BitVector()) { |
20996 | InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits()); |
20997 | OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits()); |
20998 | In = DAG.getBitcast(InVT, In); |
20999 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT)); |
21000 | Res = extractSubVector(Res, 0, DAG, DL, 64); |
21001 | return DAG.getBitcast(DstVT, Res); |
21002 | } |
21003 | |
21004 | |
21005 | SDValue Lo, Hi; |
21006 | std::tie(Lo, Hi) = splitVector(In, DAG, DL); |
21007 | |
21008 | unsigned SubSizeInBits = SrcSizeInBits / 2; |
21009 | InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); |
21010 | OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits()); |
21011 | |
21012 | |
21013 | if (SrcVT.is256BitVector() && DstVT.is128BitVector()) { |
21014 | Lo = DAG.getBitcast(InVT, Lo); |
21015 | Hi = DAG.getBitcast(InVT, Hi); |
21016 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); |
21017 | return DAG.getBitcast(DstVT, Res); |
21018 | } |
21019 | |
21020 | |
21021 | |
21022 | if (SrcVT.is512BitVector() && Subtarget.hasInt256()) { |
21023 | Lo = DAG.getBitcast(InVT, Lo); |
21024 | Hi = DAG.getBitcast(InVT, Hi); |
21025 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); |
21026 | |
21027 | |
21028 | |
21029 | |
21030 | SmallVector<int, 64> Mask; |
21031 | int Scale = 64 / OutVT.getScalarSizeInBits(); |
21032 | narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask); |
21033 | Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask); |
21034 | |
21035 | if (DstVT.is256BitVector()) |
21036 | return DAG.getBitcast(DstVT, Res); |
21037 | |
21038 | |
21039 | EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); |
21040 | Res = DAG.getBitcast(PackedVT, Res); |
21041 | return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); |
21042 | } |
21043 | |
21044 | |
21045 | assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater"); |
21046 | EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2); |
21047 | Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget); |
21048 | Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget); |
21049 | |
21050 | PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); |
21051 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi); |
21052 | return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); |
21053 | } |
21054 | |
21055 | static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, |
21056 | const X86Subtarget &Subtarget) { |
21057 | |
21058 | SDLoc DL(Op); |
21059 | MVT VT = Op.getSimpleValueType(); |
21060 | SDValue In = Op.getOperand(0); |
21061 | MVT InVT = In.getSimpleValueType(); |
21062 | |
21063 | assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type."); |
21064 | |
21065 | |
21066 | unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; |
21067 | if (InVT.getScalarSizeInBits() <= 16) { |
21068 | if (Subtarget.hasBWI()) { |
21069 | |
21070 | if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { |
21071 | |
21072 | |
21073 | MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); |
21074 | In = DAG.getNode(ISD::SHL, DL, ExtVT, |
21075 | DAG.getBitcast(ExtVT, In), |
21076 | DAG.getConstant(ShiftInx, DL, ExtVT)); |
21077 | In = DAG.getBitcast(InVT, In); |
21078 | } |
21079 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), |
21080 | In, ISD::SETGT); |
21081 | } |
21082 | |
21083 | assert((InVT.is256BitVector() || InVT.is128BitVector()) && |
21084 | "Unexpected vector type."); |
21085 | unsigned NumElts = InVT.getVectorNumElements(); |
21086 | assert((NumElts == 8 || NumElts == 16) && "Unexpected number of elements"); |
21087 | |
21088 | |
21089 | |
21090 | |
21091 | |
21092 | |
21093 | |
21094 | |
21095 | |
21096 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) { |
21097 | SDValue Lo, Hi; |
21098 | if (InVT == MVT::v16i8) { |
21099 | Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In); |
21100 | Hi = DAG.getVectorShuffle( |
21101 | InVT, DL, In, In, |
21102 | {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); |
21103 | Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi); |
21104 | } else { |
21105 | assert(InVT == MVT::v16i16 && "Unexpected VT!"); |
21106 | Lo = extract128BitVector(In, 0, DAG, DL); |
21107 | Hi = extract128BitVector(In, 8, DAG, DL); |
21108 | } |
21109 | |
21110 | |
21111 | Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo); |
21112 | Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Hi); |
21113 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
21114 | } |
21115 | |
21116 | |
21117 | |
21118 | MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts); |
21119 | MVT ExtVT = MVT::getVectorVT(EltVT, NumElts); |
21120 | In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); |
21121 | InVT = ExtVT; |
21122 | ShiftInx = InVT.getScalarSizeInBits() - 1; |
21123 | } |
21124 | |
21125 | if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { |
21126 | |
21127 | In = DAG.getNode(ISD::SHL, DL, InVT, In, |
21128 | DAG.getConstant(ShiftInx, DL, InVT)); |
21129 | } |
21130 | |
21131 | if (Subtarget.hasDQI()) |
21132 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); |
21133 | return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE); |
21134 | } |
21135 | |
21136 | SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { |
21137 | SDLoc DL(Op); |
21138 | MVT VT = Op.getSimpleValueType(); |
21139 | SDValue In = Op.getOperand(0); |
21140 | MVT InVT = In.getSimpleValueType(); |
21141 | unsigned InNumEltBits = InVT.getScalarSizeInBits(); |
21142 | |
21143 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
21144 | "Invalid TRUNCATE operation"); |
21145 | |
21146 | |
21147 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
21148 | if (!TLI.isTypeLegal(InVT)) { |
21149 | if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && |
21150 | VT.is128BitVector()) { |
21151 | assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) && |
21152 | "Unexpected subtarget!"); |
21153 | |
21154 | |
21155 | |
21156 | SDValue Lo, Hi; |
21157 | std::tie(Lo, Hi) = DAG.SplitVector(In, DL); |
21158 | |
21159 | EVT LoVT, HiVT; |
21160 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
21161 | |
21162 | Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo); |
21163 | Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi); |
21164 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
21165 | } |
21166 | |
21167 | |
21168 | return SDValue(); |
21169 | } |
21170 | |
21171 | if (VT.getVectorElementType() == MVT::i1) |
21172 | return LowerTruncateVecI1(Op, DAG, Subtarget); |
21173 | |
21174 | |
21175 | if (Subtarget.hasAVX512()) { |
21176 | if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) { |
21177 | assert(VT == MVT::v32i8 && "Unexpected VT!"); |
21178 | return splitVectorIntUnary(Op, DAG); |
21179 | } |
21180 | |
21181 | |
21182 | |
21183 | |
21184 | |
21185 | if (InVT != MVT::v16i16 || Subtarget.hasBWI() || |
21186 | Subtarget.canExtendTo512DQ()) |
21187 | return Op; |
21188 | } |
21189 | |
21190 | unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16); |
21191 | unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; |
21192 | |
21193 | |
21194 | |
21195 | |
21196 | KnownBits Known = DAG.computeKnownBits(In); |
21197 | if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) |
21198 | if (SDValue V = |
21199 | truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget)) |
21200 | return V; |
21201 | |
21202 | |
21203 | |
21204 | if ((InNumEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In)) |
21205 | if (SDValue V = |
21206 | truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget)) |
21207 | return V; |
21208 | |
21209 | |
21210 | assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); |
21211 | |
21212 | if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { |
21213 | In = DAG.getBitcast(MVT::v8i32, In); |
21214 | |
21215 | |
21216 | if (Subtarget.hasInt256()) { |
21217 | static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; |
21218 | In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); |
21219 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, |
21220 | DAG.getIntPtrConstant(0, DL)); |
21221 | } |
21222 | |
21223 | SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, |
21224 | DAG.getIntPtrConstant(0, DL)); |
21225 | SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, |
21226 | DAG.getIntPtrConstant(4, DL)); |
21227 | static const int ShufMask[] = {0, 2, 4, 6}; |
21228 | return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); |
21229 | } |
21230 | |
21231 | if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { |
21232 | In = DAG.getBitcast(MVT::v32i8, In); |
21233 | |
21234 | |
21235 | if (Subtarget.hasInt256()) { |
21236 | |
21237 | static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13, |
21238 | -1, -1, -1, -1, -1, -1, -1, -1, |
21239 | 16, 17, 20, 21, 24, 25, 28, 29, |
21240 | -1, -1, -1, -1, -1, -1, -1, -1 }; |
21241 | In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); |
21242 | In = DAG.getBitcast(MVT::v4i64, In); |
21243 | |
21244 | static const int ShufMask2[] = {0, 2, -1, -1}; |
21245 | In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); |
21246 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, |
21247 | DAG.getBitcast(MVT::v16i16, In), |
21248 | DAG.getIntPtrConstant(0, DL)); |
21249 | } |
21250 | |
21251 | SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, |
21252 | DAG.getIntPtrConstant(0, DL)); |
21253 | SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, |
21254 | DAG.getIntPtrConstant(16, DL)); |
21255 | |
21256 | |
21257 | static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, |
21258 | -1, -1, -1, -1, -1, -1, -1, -1}; |
21259 | |
21260 | OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); |
21261 | OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); |
21262 | |
21263 | OpLo = DAG.getBitcast(MVT::v4i32, OpLo); |
21264 | OpHi = DAG.getBitcast(MVT::v4i32, OpHi); |
21265 | |
21266 | |
21267 | static const int ShufMask2[] = {0, 1, 4, 5}; |
21268 | SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); |
21269 | return DAG.getBitcast(MVT::v8i16, res); |
21270 | } |
21271 | |
21272 | if (VT == MVT::v16i8 && InVT == MVT::v16i16) { |
21273 | |
21274 | In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT)); |
21275 | |
21276 | SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, |
21277 | DAG.getIntPtrConstant(0, DL)); |
21278 | SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, |
21279 | DAG.getIntPtrConstant(8, DL)); |
21280 | return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi); |
21281 | } |
21282 | |
21283 | llvm_unreachable("All 256->128 cases should have been handled above!"); |
21284 | } |
21285 | |
21286 | |
21287 | |
21288 | static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl, |
21289 | SelectionDAG &DAG, |
21290 | const X86Subtarget &Subtarget) { |
21291 | MVT SrcVT = Src.getSimpleValueType(); |
21292 | unsigned DstBits = VT.getScalarSizeInBits(); |
21293 | assert(DstBits == 32 && "expandFP_TO_UINT_SSE - only vXi32 supported"); |
21294 | |
21295 | |
21296 | |
21297 | SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src); |
21298 | SDValue Big = |
21299 | DAG.getNode(X86ISD::CVTTP2SI, dl, VT, |
21300 | DAG.getNode(ISD::FSUB, dl, SrcVT, Src, |
21301 | DAG.getConstantFP(2147483648.0f, dl, SrcVT))); |
21302 | |
21303 | |
21304 | |
21305 | |
21306 | |
21307 | |
21308 | |
21309 | |
21310 | |
21311 | |
21312 | if (VT == MVT::v8i32 && !Subtarget.hasAVX2()) { |
21313 | SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big); |
21314 | return DAG.getNode(X86ISD::BLENDV, dl, VT, Small, Overflow, Small); |
21315 | } |
21316 | |
21317 | SDValue IsOverflown = |
21318 | DAG.getNode(X86ISD::VSRAI, dl, VT, Small, |
21319 | DAG.getTargetConstant(DstBits - 1, dl, MVT::i8)); |
21320 | return DAG.getNode(ISD::OR, dl, VT, Small, |
21321 | DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); |
21322 | } |
21323 | |
21324 | SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { |
21325 | bool IsStrict = Op->isStrictFPOpcode(); |
21326 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || |
21327 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT; |
21328 | MVT VT = Op->getSimpleValueType(0); |
21329 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
21330 | MVT SrcVT = Src.getSimpleValueType(); |
21331 | SDLoc dl(Op); |
21332 | |
21333 | if (VT.isVector()) { |
21334 | if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { |
21335 | MVT ResVT = MVT::v4i32; |
21336 | MVT TruncVT = MVT::v4i1; |
21337 | unsigned Opc; |
21338 | if (IsStrict) |
21339 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
21340 | else |
21341 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
21342 | |
21343 | if (!IsSigned && !Subtarget.hasVLX()) { |
21344 | assert(Subtarget.useAVX512Regs() && "Unexpected features!"); |
21345 | |
21346 | ResVT = MVT::v8i32; |
21347 | TruncVT = MVT::v8i1; |
21348 | Opc = Op.getOpcode(); |
21349 | |
21350 | |
21351 | |
21352 | SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64) |
21353 | : DAG.getUNDEF(MVT::v8f64); |
21354 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src, |
21355 | DAG.getIntPtrConstant(0, dl)); |
21356 | } |
21357 | SDValue Res, Chain; |
21358 | if (IsStrict) { |
21359 | Res = |
21360 | DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src}); |
21361 | Chain = Res.getValue(1); |
21362 | } else { |
21363 | Res = DAG.getNode(Opc, dl, ResVT, Src); |
21364 | } |
21365 | |
21366 | Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); |
21367 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, |
21368 | DAG.getIntPtrConstant(0, dl)); |
21369 | if (IsStrict) |
21370 | return DAG.getMergeValues({Res, Chain}, dl); |
21371 | return Res; |
21372 | } |
21373 | |
21374 | |
21375 | if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) { |
21376 | assert(!IsSigned && "Expected unsigned conversion!"); |
21377 | assert(Subtarget.useAVX512Regs() && "Requires avx512f"); |
21378 | return Op; |
21379 | } |
21380 | |
21381 | |
21382 | if ((VT == MVT::v4i32 || VT == MVT::v8i32) && |
21383 | (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32) && |
21384 | Subtarget.useAVX512Regs()) { |
21385 | assert(!IsSigned && "Expected unsigned conversion!"); |
21386 | assert(!Subtarget.hasVLX() && "Unexpected features!"); |
21387 | MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; |
21388 | MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; |
21389 | |
21390 | |
21391 | |
21392 | SDValue Tmp = |
21393 | IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); |
21394 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, |
21395 | DAG.getIntPtrConstant(0, dl)); |
21396 | |
21397 | SDValue Res, Chain; |
21398 | if (IsStrict) { |
21399 | Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other}, |
21400 | {Op->getOperand(0), Src}); |
21401 | Chain = Res.getValue(1); |
21402 | } else { |
21403 | Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src); |
21404 | } |
21405 | |
21406 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
21407 | DAG.getIntPtrConstant(0, dl)); |
21408 | |
21409 | if (IsStrict) |
21410 | return DAG.getMergeValues({Res, Chain}, dl); |
21411 | return Res; |
21412 | } |
21413 | |
21414 | |
21415 | if ((VT == MVT::v2i64 || VT == MVT::v4i64) && |
21416 | (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32) && |
21417 | Subtarget.useAVX512Regs() && Subtarget.hasDQI()) { |
21418 | assert(!Subtarget.hasVLX() && "Unexpected features!"); |
21419 | MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; |
21420 | |
21421 | |
21422 | |
21423 | SDValue Tmp = |
21424 | IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); |
21425 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, |
21426 | DAG.getIntPtrConstant(0, dl)); |
21427 | |
21428 | SDValue Res, Chain; |
21429 | if (IsStrict) { |
21430 | Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, |
21431 | {Op->getOperand(0), Src}); |
21432 | Chain = Res.getValue(1); |
21433 | } else { |
21434 | Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src); |
21435 | } |
21436 | |
21437 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
21438 | DAG.getIntPtrConstant(0, dl)); |
21439 | |
21440 | if (IsStrict) |
21441 | return DAG.getMergeValues({Res, Chain}, dl); |
21442 | return Res; |
21443 | } |
21444 | |
21445 | if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { |
21446 | if (!Subtarget.hasVLX()) { |
21447 | |
21448 | |
21449 | if (!IsStrict) |
21450 | return SDValue(); |
21451 | |
21452 | SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32); |
21453 | SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32, |
21454 | {Src, Zero, Zero, Zero}); |
21455 | Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, |
21456 | {Op->getOperand(0), Tmp}); |
21457 | SDValue Chain = Tmp.getValue(1); |
21458 | Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp, |
21459 | DAG.getIntPtrConstant(0, dl)); |
21460 | return DAG.getMergeValues({Tmp, Chain}, dl); |
21461 | } |
21462 | |
21463 | assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL"); |
21464 | SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
21465 | DAG.getUNDEF(MVT::v2f32)); |
21466 | if (IsStrict) { |
21467 | unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI |
21468 | : X86ISD::STRICT_CVTTP2UI; |
21469 | return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); |
21470 | } |
21471 | unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
21472 | return DAG.getNode(Opc, dl, VT, Tmp); |
21473 | } |
21474 | |
21475 | |
21476 | |
21477 | if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) || |
21478 | (VT == MVT::v4i32 && SrcVT == MVT::v4f64) || |
21479 | (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) { |
21480 | assert(!IsSigned && "Expected unsigned conversion!"); |
21481 | return expandFP_TO_UINT_SSE(VT, Src, dl, DAG, Subtarget); |
21482 | } |
21483 | |
21484 | return SDValue(); |
21485 | } |
21486 | |
21487 | assert(!VT.isVector()); |
21488 | |
21489 | bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT); |
21490 | |
21491 | if (!IsSigned && UseSSEReg) { |
21492 | |
21493 | if (Subtarget.hasAVX512()) |
21494 | return Op; |
21495 | |
21496 | |
21497 | |
21498 | if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) || |
21499 | (VT == MVT::i64 && Subtarget.is64Bit()))) { |
21500 | unsigned DstBits = VT.getScalarSizeInBits(); |
21501 | APInt UIntLimit = APInt::getSignMask(DstBits); |
21502 | SDValue FloatOffset = DAG.getNode(ISD::UINT_TO_FP, dl, SrcVT, |
21503 | DAG.getConstant(UIntLimit, dl, VT)); |
21504 | MVT SrcVecVT = MVT::getVectorVT(SrcVT, 128 / SrcVT.getScalarSizeInBits()); |
21505 | |
21506 | |
21507 | |
21508 | |
21509 | SDValue Small = |
21510 | DAG.getNode(X86ISD::CVTTS2SI, dl, VT, |
21511 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, Src)); |
21512 | SDValue Big = DAG.getNode( |
21513 | X86ISD::CVTTS2SI, dl, VT, |
21514 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, |
21515 | DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FloatOffset))); |
21516 | |
21517 | |
21518 | |
21519 | |
21520 | |
21521 | |
21522 | |
21523 | SDValue IsOverflown = DAG.getNode( |
21524 | ISD::SRA, dl, VT, Small, DAG.getConstant(DstBits - 1, dl, MVT::i8)); |
21525 | return DAG.getNode(ISD::OR, dl, VT, Small, |
21526 | DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); |
21527 | } |
21528 | |
21529 | |
21530 | if (VT == MVT::i64) |
21531 | return SDValue(); |
21532 | |
21533 | assert(VT == MVT::i32 && "Unexpected VT!"); |
21534 | |
21535 | |
21536 | |
21537 | |
21538 | if (Subtarget.is64Bit()) { |
21539 | SDValue Res, Chain; |
21540 | if (IsStrict) { |
21541 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other}, |
21542 | { Op.getOperand(0), Src }); |
21543 | Chain = Res.getValue(1); |
21544 | } else |
21545 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src); |
21546 | |
21547 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
21548 | if (IsStrict) |
21549 | return DAG.getMergeValues({ Res, Chain }, dl); |
21550 | return Res; |
21551 | } |
21552 | |
21553 | |
21554 | |
21555 | if (!Subtarget.hasSSE3()) |
21556 | return SDValue(); |
21557 | } |
21558 | |
21559 | |
21560 | |
21561 | |
21562 | if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) { |
21563 | assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!"); |
21564 | SDValue Res, Chain; |
21565 | if (IsStrict) { |
21566 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other}, |
21567 | { Op.getOperand(0), Src }); |
21568 | Chain = Res.getValue(1); |
21569 | } else |
21570 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); |
21571 | |
21572 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
21573 | if (IsStrict) |
21574 | return DAG.getMergeValues({ Res, Chain }, dl); |
21575 | return Res; |
21576 | } |
21577 | |
21578 | |
21579 | if (UseSSEReg && IsSigned) |
21580 | return Op; |
21581 | |
21582 | |
21583 | if (SrcVT == MVT::f128) { |
21584 | RTLIB::Libcall LC; |
21585 | if (IsSigned) |
21586 | LC = RTLIB::getFPTOSINT(SrcVT, VT); |
21587 | else |
21588 | LC = RTLIB::getFPTOUINT(SrcVT, VT); |
21589 | |
21590 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
21591 | MakeLibCallOptions CallOptions; |
21592 | std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions, |
21593 | SDLoc(Op), Chain); |
21594 | |
21595 | if (IsStrict) |
21596 | return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); |
21597 | |
21598 | return Tmp.first; |
21599 | } |
21600 | |
21601 | |
21602 | SDValue Chain; |
21603 | if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) { |
21604 | if (IsStrict) |
21605 | return DAG.getMergeValues({V, Chain}, dl); |
21606 | return V; |
21607 | } |
21608 | |
21609 | llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases."); |
21610 | } |
21611 | |
21612 | SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op, |
21613 | SelectionDAG &DAG) const { |
21614 | SDValue Src = Op.getOperand(0); |
21615 | MVT SrcVT = Src.getSimpleValueType(); |
21616 | |
21617 | |
21618 | if (isScalarFPTypeInSSEReg(SrcVT)) |
21619 | return Op; |
21620 | |
21621 | return LRINT_LLRINTHelper(Op.getNode(), DAG); |
21622 | } |
21623 | |
21624 | SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N, |
21625 | SelectionDAG &DAG) const { |
21626 | EVT DstVT = N->getValueType(0); |
21627 | SDValue Src = N->getOperand(0); |
21628 | EVT SrcVT = Src.getValueType(); |
21629 | |
21630 | if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) { |
21631 | |
21632 | |
21633 | return SDValue(); |
21634 | } |
21635 | |
21636 | SDLoc DL(N); |
21637 | SDValue Chain = DAG.getEntryNode(); |
21638 | |
21639 | bool UseSSE = isScalarFPTypeInSSEReg(SrcVT); |
21640 | |
21641 | |
21642 | |
21643 | EVT OtherVT = UseSSE ? SrcVT : DstVT; |
21644 | SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT); |
21645 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
21646 | MachinePointerInfo MPI = |
21647 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
21648 | |
21649 | if (UseSSE) { |
21650 | assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!"); |
21651 | Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI); |
21652 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
21653 | SDValue Ops[] = { Chain, StackPtr }; |
21654 | |
21655 | Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI, |
21656 | None, MachineMemOperand::MOLoad); |
21657 | Chain = Src.getValue(1); |
21658 | } |
21659 | |
21660 | SDValue StoreOps[] = { Chain, Src, StackPtr }; |
21661 | Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other), |
21662 | StoreOps, DstVT, MPI, None, |
21663 | MachineMemOperand::MOStore); |
21664 | |
21665 | return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); |
21666 | } |
21667 | |
21668 | SDValue |
21669 | X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { |
21670 | |
21671 | |
21672 | SDNode *Node = Op.getNode(); |
21673 | bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; |
21674 | unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; |
21675 | SDLoc dl(SDValue(Node, 0)); |
21676 | SDValue Src = Node->getOperand(0); |
21677 | |
21678 | |
21679 | |
21680 | |
21681 | |
21682 | EVT SrcVT = Src.getValueType(); |
21683 | EVT DstVT = Node->getValueType(0); |
21684 | EVT TmpVT = DstVT; |
21685 | |
21686 | |
21687 | |
21688 | if (!isScalarFPTypeInSSEReg(SrcVT)) |
21689 | return SDValue(); |
21690 | |
21691 | EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); |
21692 | unsigned SatWidth = SatVT.getScalarSizeInBits(); |
21693 | unsigned DstWidth = DstVT.getScalarSizeInBits(); |
21694 | unsigned TmpWidth = TmpVT.getScalarSizeInBits(); |
21695 | assert(SatWidth <= DstWidth && SatWidth <= TmpWidth && |
21696 | "Expected saturation width smaller than result width"); |
21697 | |
21698 | |
21699 | if (TmpWidth < 32) { |
21700 | TmpVT = MVT::i32; |
21701 | TmpWidth = 32; |
21702 | } |
21703 | |
21704 | |
21705 | |
21706 | if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) { |
21707 | TmpVT = MVT::i64; |
21708 | TmpWidth = 64; |
21709 | } |
21710 | |
21711 | |
21712 | |
21713 | if (SatWidth < TmpWidth) |
21714 | FpToIntOpcode = ISD::FP_TO_SINT; |
21715 | |
21716 | |
21717 | |
21718 | APInt MinInt, MaxInt; |
21719 | if (IsSigned) { |
21720 | MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); |
21721 | MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); |
21722 | } else { |
21723 | MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); |
21724 | MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); |
21725 | } |
21726 | |
21727 | APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); |
21728 | APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); |
21729 | |
21730 | APFloat::opStatus MinStatus = MinFloat.convertFromAPInt( |
21731 | MinInt, IsSigned, APFloat::rmTowardZero); |
21732 | APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt( |
21733 | MaxInt, IsSigned, APFloat::rmTowardZero); |
21734 | bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) |
21735 | && !(MaxStatus & APFloat::opStatus::opInexact); |
21736 | |
21737 | SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); |
21738 | SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); |
21739 | |
21740 | |
21741 | |
21742 | if (AreExactFloatBounds) { |
21743 | if (DstVT != TmpVT) { |
21744 | |
21745 | SDValue MinClamped = DAG.getNode( |
21746 | X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src); |
21747 | |
21748 | SDValue BothClamped = DAG.getNode( |
21749 | X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped); |
21750 | |
21751 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped); |
21752 | |
21753 | |
21754 | |
21755 | return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); |
21756 | } |
21757 | |
21758 | |
21759 | SDValue MinClamped = DAG.getNode( |
21760 | X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode); |
21761 | |
21762 | SDValue BothClamped = DAG.getNode( |
21763 | X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode); |
21764 | |
21765 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped); |
21766 | |
21767 | if (!IsSigned) { |
21768 | |
21769 | |
21770 | return FpToInt; |
21771 | } |
21772 | |
21773 | |
21774 | SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); |
21775 | return DAG.getSelectCC( |
21776 | dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); |
21777 | } |
21778 | |
21779 | SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); |
21780 | SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); |
21781 | |
21782 | |
21783 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src); |
21784 | |
21785 | if (DstVT != TmpVT) { |
21786 | |
21787 | |
21788 | FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); |
21789 | } |
21790 | |
21791 | SDValue Select = FpToInt; |
21792 | |
21793 | |
21794 | |
21795 | if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) { |
21796 | |
21797 | |
21798 | Select = DAG.getSelectCC( |
21799 | dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT); |
21800 | } |
21801 | |
21802 | |
21803 | Select = DAG.getSelectCC( |
21804 | dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT); |
21805 | |
21806 | |
21807 | |
21808 | if (!IsSigned || DstVT != TmpVT) { |
21809 | return Select; |
21810 | } |
21811 | |
21812 | |
21813 | SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); |
21814 | return DAG.getSelectCC( |
21815 | dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); |
21816 | } |
21817 | |
21818 | SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { |
21819 | bool IsStrict = Op->isStrictFPOpcode(); |
21820 | |
21821 | SDLoc DL(Op); |
21822 | MVT VT = Op.getSimpleValueType(); |
21823 | SDValue In = Op.getOperand(IsStrict ? 1 : 0); |
21824 | MVT SVT = In.getSimpleValueType(); |
21825 | |
21826 | if (VT == MVT::f128) |
21827 | return SDValue(); |
21828 | |
21829 | assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); |
21830 | |
21831 | SDValue Res = |
21832 | DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT)); |
21833 | if (IsStrict) |
21834 | return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, |
21835 | {Op->getOperand(0), Res}); |
21836 | return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); |
21837 | } |
21838 | |
21839 | SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { |
21840 | bool IsStrict = Op->isStrictFPOpcode(); |
21841 | SDValue In = Op.getOperand(IsStrict ? 1 : 0); |
21842 | |
21843 | if (In.getSimpleValueType() != MVT::f128) |
21844 | return Op; |
21845 | |
21846 | return SDValue(); |
21847 | } |
21848 | |
21849 | static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) { |
21850 | bool IsStrict = Op->isStrictFPOpcode(); |
21851 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
21852 | assert(Src.getValueType() == MVT::i16 && Op.getValueType() == MVT::f32 && |
21853 | "Unexpected VT!"); |
21854 | |
21855 | SDLoc dl(Op); |
21856 | SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, |
21857 | DAG.getConstant(0, dl, MVT::v8i16), Src, |
21858 | DAG.getIntPtrConstant(0, dl)); |
21859 | |
21860 | SDValue Chain; |
21861 | if (IsStrict) { |
21862 | Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {MVT::v4f32, MVT::Other}, |
21863 | {Op.getOperand(0), Res}); |
21864 | Chain = Res.getValue(1); |
21865 | } else { |
21866 | Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res); |
21867 | } |
21868 | |
21869 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, |
21870 | DAG.getIntPtrConstant(0, dl)); |
21871 | |
21872 | if (IsStrict) |
21873 | return DAG.getMergeValues({Res, Chain}, dl); |
21874 | |
21875 | return Res; |
21876 | } |
21877 | |
21878 | static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) { |
21879 | bool IsStrict = Op->isStrictFPOpcode(); |
21880 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
21881 | assert(Src.getValueType() == MVT::f32 && Op.getValueType() == MVT::i16 && |
21882 | "Unexpected VT!"); |
21883 | |
21884 | SDLoc dl(Op); |
21885 | SDValue Res, Chain; |
21886 | if (IsStrict) { |
21887 | Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4f32, |
21888 | DAG.getConstantFP(0, dl, MVT::v4f32), Src, |
21889 | DAG.getIntPtrConstant(0, dl)); |
21890 | Res = DAG.getNode( |
21891 | X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other}, |
21892 | {Op.getOperand(0), Res, DAG.getTargetConstant(4, dl, MVT::i32)}); |
21893 | Chain = Res.getValue(1); |
21894 | } else { |
21895 | |
21896 | Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, Src); |
21897 | Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res, |
21898 | DAG.getTargetConstant(4, dl, MVT::i32)); |
21899 | } |
21900 | |
21901 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res, |
21902 | DAG.getIntPtrConstant(0, dl)); |
21903 | |
21904 | if (IsStrict) |
21905 | return DAG.getMergeValues({Res, Chain}, dl); |
21906 | |
21907 | return Res; |
21908 | } |
21909 | |
21910 | |
21911 | |
21912 | static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, |
21913 | const X86Subtarget &Subtarget) { |
21914 | |
21915 | SDValue LHS = Op.getOperand(0); |
21916 | SDValue RHS = Op.getOperand(1); |
21917 | if (!LHS.hasOneUse() && !RHS.hasOneUse()) |
21918 | return Op; |
21919 | |
21920 | |
21921 | bool IsFP = Op.getSimpleValueType().isFloatingPoint(); |
21922 | if (IsFP && !Subtarget.hasSSE3()) |
21923 | return Op; |
21924 | if (!IsFP && !Subtarget.hasSSSE3()) |
21925 | return Op; |
21926 | |
21927 | |
21928 | if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
21929 | RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
21930 | LHS.getOperand(0) != RHS.getOperand(0) || |
21931 | !isa<ConstantSDNode>(LHS.getOperand(1)) || |
21932 | !isa<ConstantSDNode>(RHS.getOperand(1)) || |
21933 | !shouldUseHorizontalOp(true, DAG, Subtarget)) |
21934 | return Op; |
21935 | |
21936 | |
21937 | |
21938 | unsigned HOpcode; |
21939 | switch (Op.getOpcode()) { |
21940 | case ISD::ADD: HOpcode = X86ISD::HADD; break; |
21941 | case ISD::SUB: HOpcode = X86ISD::HSUB; break; |
21942 | case ISD::FADD: HOpcode = X86ISD::FHADD; break; |
21943 | case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; |
21944 | default: |
21945 | llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); |
21946 | } |
21947 | unsigned LExtIndex = LHS.getConstantOperandVal(1); |
21948 | unsigned RExtIndex = RHS.getConstantOperandVal(1); |
21949 | if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 && |
21950 | (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD)) |
21951 | std::swap(LExtIndex, RExtIndex); |
21952 | |
21953 | if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1)) |
21954 | return Op; |
21955 | |
21956 | SDValue X = LHS.getOperand(0); |
21957 | EVT VecVT = X.getValueType(); |
21958 | unsigned BitWidth = VecVT.getSizeInBits(); |
21959 | unsigned NumLanes = BitWidth / 128; |
21960 | unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes; |
21961 | assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) && |
21962 | "Not expecting illegal vector widths here"); |
21963 | |
21964 | |
21965 | |
21966 | SDLoc DL(Op); |
21967 | if (BitWidth == 256 || BitWidth == 512) { |
21968 | unsigned LaneIdx = LExtIndex / NumEltsPerLane; |
21969 | X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL); |
21970 | LExtIndex %= NumEltsPerLane; |
21971 | } |
21972 | |
21973 | |
21974 | |
21975 | |
21976 | |
21977 | SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X); |
21978 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp, |
21979 | DAG.getIntPtrConstant(LExtIndex / 2, DL)); |
21980 | } |
21981 | |
21982 | |
21983 | |
21984 | SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const { |
21985 | assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && |
21986 | "Only expecting float/double"); |
21987 | return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); |
21988 | } |
21989 | |
21990 | |
21991 | |
21992 | |
21993 | |
21994 | static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) { |
21995 | SDValue N0 = Op.getOperand(0); |
21996 | SDLoc dl(Op); |
21997 | MVT VT = Op.getSimpleValueType(); |
21998 | |
21999 | |
22000 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
22001 | bool Ignored; |
22002 | APFloat Point5Pred = APFloat(0.5f); |
22003 | Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored); |
22004 | Point5Pred.next(true); |
22005 | |
22006 | SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT, |
22007 | DAG.getConstantFP(Point5Pred, dl, VT), N0); |
22008 | N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder); |
22009 | |
22010 | |
22011 | return DAG.getNode(ISD::FTRUNC, dl, VT, N0); |
22012 | } |
22013 | |
22014 | |
22015 | |
22016 | static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { |
22017 | assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && |
22018 | "Wrong opcode for lowering FABS or FNEG."); |
22019 | |
22020 | bool IsFABS = (Op.getOpcode() == ISD::FABS); |
22021 | |
22022 | |
22023 | |
22024 | if (IsFABS) |
22025 | for (SDNode *User : Op->uses()) |
22026 | if (User->getOpcode() == ISD::FNEG) |
22027 | return Op; |
22028 | |
22029 | SDLoc dl(Op); |
22030 | MVT VT = Op.getSimpleValueType(); |
22031 | |
22032 | bool IsF128 = (VT == MVT::f128); |
22033 | assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 || |
22034 | VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 || |
22035 | VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) && |
22036 | "Unexpected type in LowerFABSorFNEG"); |
22037 | |
22038 | |
22039 | |
22040 | |
22041 | |
22042 | |
22043 | |
22044 | |
22045 | |
22046 | bool IsFakeVector = !VT.isVector() && !IsF128; |
22047 | MVT LogicVT = VT; |
22048 | if (IsFakeVector) |
22049 | LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; |
22050 | |
22051 | unsigned EltBits = VT.getScalarSizeInBits(); |
22052 | |
22053 | APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : |
22054 | APInt::getSignMask(EltBits); |
22055 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
22056 | SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT); |
22057 | |
22058 | SDValue Op0 = Op.getOperand(0); |
22059 | bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); |
22060 | unsigned LogicOp = IsFABS ? X86ISD::FAND : |
22061 | IsFNABS ? X86ISD::FOR : |
22062 | X86ISD::FXOR; |
22063 | SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; |
22064 | |
22065 | if (VT.isVector() || IsF128) |
22066 | return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); |
22067 | |
22068 | |
22069 | |
22070 | Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand); |
22071 | SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); |
22072 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode, |
22073 | DAG.getIntPtrConstant(0, dl)); |
22074 | } |
22075 | |
22076 | static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { |
22077 | SDValue Mag = Op.getOperand(0); |
22078 | SDValue Sign = Op.getOperand(1); |
22079 | SDLoc dl(Op); |
22080 | |
22081 | |
22082 | MVT VT = Op.getSimpleValueType(); |
22083 | if (Sign.getSimpleValueType().bitsLT(VT)) |
22084 | Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign); |
22085 | |
22086 | |
22087 | if (Sign.getSimpleValueType().bitsGT(VT)) |
22088 | Sign = |
22089 | DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(0, dl)); |
22090 | |
22091 | |
22092 | |
22093 | bool IsF128 = (VT == MVT::f128); |
22094 | assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 || |
22095 | VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 || |
22096 | VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) && |
22097 | "Unexpected type in LowerFCOPYSIGN"); |
22098 | |
22099 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
22100 | |
22101 | |
22102 | |
22103 | |
22104 | |
22105 | |
22106 | bool IsFakeVector = !VT.isVector() && !IsF128; |
22107 | MVT LogicVT = VT; |
22108 | if (IsFakeVector) |
22109 | LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; |
22110 | |
22111 | |
22112 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
22113 | SDValue SignMask = DAG.getConstantFP( |
22114 | APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); |
22115 | SDValue MagMask = DAG.getConstantFP( |
22116 | APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT); |
22117 | |
22118 | |
22119 | if (IsFakeVector) |
22120 | Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign); |
22121 | SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask); |
22122 | |
22123 | |
22124 | |
22125 | |
22126 | SDValue MagBits; |
22127 | if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) { |
22128 | APFloat APF = Op0CN->getValueAPF(); |
22129 | APF.clearSign(); |
22130 | MagBits = DAG.getConstantFP(APF, dl, LogicVT); |
22131 | } else { |
22132 | |
22133 | if (IsFakeVector) |
22134 | Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag); |
22135 | MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask); |
22136 | } |
22137 | |
22138 | |
22139 | SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit); |
22140 | return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, |
22141 | DAG.getIntPtrConstant(0, dl)); |
22142 | } |
22143 | |
22144 | static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { |
22145 | SDValue N0 = Op.getOperand(0); |
22146 | SDLoc dl(Op); |
22147 | MVT VT = Op.getSimpleValueType(); |
22148 | |
22149 | MVT OpVT = N0.getSimpleValueType(); |
22150 | assert((OpVT == MVT::f32 || OpVT == MVT::f64) && |
22151 | "Unexpected type for FGETSIGN"); |
22152 | |
22153 | |
22154 | MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64); |
22155 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0); |
22156 | Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res); |
22157 | Res = DAG.getZExtOrTrunc(Res, dl, VT); |
22158 | Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT)); |
22159 | return Res; |
22160 | } |
22161 | |
22162 | |
22163 | static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, |
22164 | SelectionDAG &DAG) { |
22165 | return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, |
22166 | DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS); |
22167 | } |
22168 | |
22169 | |
22170 | |
22171 | |
22172 | |
22173 | static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, |
22174 | SmallVectorImpl<SDValue> &SrcOps, |
22175 | SmallVectorImpl<APInt> *SrcMask = nullptr) { |
22176 | SmallVector<SDValue, 8> Opnds; |
22177 | DenseMap<SDValue, APInt> SrcOpMap; |
22178 | EVT VT = MVT::Other; |
22179 | |
22180 | |
22181 | |
22182 | assert(Op.getOpcode() == unsigned(BinOp) && |
22183 | "Unexpected bit reduction opcode"); |
22184 | Opnds.push_back(Op.getOperand(0)); |
22185 | Opnds.push_back(Op.getOperand(1)); |
22186 | |
22187 | for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { |
22188 | SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot; |
22189 | |
22190 | if (I->getOpcode() == unsigned(BinOp)) { |
22191 | Opnds.push_back(I->getOperand(0)); |
22192 | Opnds.push_back(I->getOperand(1)); |
22193 | |
22194 | e += 2; |
22195 | continue; |
22196 | } |
22197 | |
22198 | |
22199 | if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
22200 | return false; |
22201 | |
22202 | |
22203 | auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1)); |
22204 | if (!Idx) |
22205 | return false; |
22206 | |
22207 | SDValue Src = I->getOperand(0); |
22208 | DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src); |
22209 | if (M == SrcOpMap.end()) { |
22210 | VT = Src.getValueType(); |
22211 | |
22212 | if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType()) |
22213 | return false; |
22214 | unsigned NumElts = VT.getVectorNumElements(); |
22215 | APInt EltCount = APInt::getNullValue(NumElts); |
22216 | M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first; |
22217 | SrcOps.push_back(Src); |
22218 | } |
22219 | |
22220 | |
22221 | unsigned CIdx = Idx->getZExtValue(); |
22222 | if (M->second[CIdx]) |
22223 | return false; |
22224 | M->second.setBit(CIdx); |
22225 | } |
22226 | |
22227 | if (SrcMask) { |
22228 | |
22229 | for (SDValue &SrcOp : SrcOps) |
22230 | SrcMask->push_back(SrcOpMap[SrcOp]); |
22231 | } else { |
22232 | |
22233 | for (const auto &I : SrcOpMap) |
22234 | if (!I.second.isAllOnesValue()) |
22235 | return false; |
22236 | } |
22237 | |
22238 | return true; |
22239 | } |
22240 | |
22241 | |
22242 | static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, |
22243 | const APInt &Mask, |
22244 | const X86Subtarget &Subtarget, |
22245 | SelectionDAG &DAG, X86::CondCode &X86CC) { |
22246 | EVT VT = V.getValueType(); |
22247 | unsigned ScalarSize = VT.getScalarSizeInBits(); |
22248 | if (Mask.getBitWidth() != ScalarSize) { |
22249 | assert(ScalarSize == 1 && "Element Mask vs Vector bitwidth mismatch"); |
22250 | return SDValue(); |
22251 | } |
22252 | |
22253 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); |
22254 | X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE); |
22255 | |
22256 | auto MaskBits = [&](SDValue Src) { |
22257 | if (Mask.isAllOnesValue()) |
22258 | return Src; |
22259 | EVT SrcVT = Src.getValueType(); |
22260 | SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT); |
22261 | return DAG.getNode(ISD::AND, DL, SrcVT, Src, MaskValue); |
22262 | }; |
22263 | |
22264 | |
22265 | if (VT.getSizeInBits() < 128) { |
22266 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); |
22267 | if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT)) |
22268 | return SDValue(); |
22269 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, |
22270 | DAG.getBitcast(IntVT, MaskBits(V)), |
22271 | DAG.getConstant(0, DL, IntVT)); |
22272 | } |
22273 | |
22274 | |
22275 | if (!isPowerOf2_32(VT.getSizeInBits())) |
22276 | return SDValue(); |
22277 | |
22278 | |
22279 | unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; |
22280 | while (VT.getSizeInBits() > TestSize) { |
22281 | auto Split = DAG.SplitVector(V, DL); |
22282 | VT = Split.first.getValueType(); |
22283 | V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); |
22284 | } |
22285 | |
22286 | bool UsePTEST = Subtarget.hasSSE41(); |
22287 | if (UsePTEST) { |
22288 | MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
22289 | V = DAG.getBitcast(TestVT, MaskBits(V)); |
22290 | return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V); |
22291 | } |
22292 | |
22293 | |
22294 | |
22295 | if (!Mask.isAllOnesValue() && VT.getScalarSizeInBits() > 32) |
22296 | return SDValue(); |
22297 | |
22298 | V = DAG.getBitcast(MVT::v16i8, MaskBits(V)); |
22299 | V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V, |
22300 | getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); |
22301 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
22302 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V, |
22303 | DAG.getConstant(0xFFFF, DL, MVT::i32)); |
22304 | } |
22305 | |
22306 | |
22307 | |
22308 | static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC, |
22309 | const SDLoc &DL, |
22310 | const X86Subtarget &Subtarget, |
22311 | SelectionDAG &DAG, SDValue &X86CC) { |
22312 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); |
22313 | |
22314 | if (!Subtarget.hasSSE2() || !Op->hasOneUse()) |
22315 | return SDValue(); |
22316 | |
22317 | |
22318 | |
22319 | APInt Mask = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); |
22320 | switch (Op.getOpcode()) { |
22321 | case ISD::TRUNCATE: { |
22322 | SDValue Src = Op.getOperand(0); |
22323 | Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(), |
22324 | Op.getScalarValueSizeInBits()); |
22325 | Op = Src; |
22326 | break; |
22327 | } |
22328 | case ISD::AND: { |
22329 | if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
22330 | Mask = Cst->getAPIntValue(); |
22331 | Op = Op.getOperand(0); |
22332 | } |
22333 | break; |
22334 | } |
22335 | } |
22336 | |
22337 | SmallVector<SDValue, 8> VecIns; |
22338 | if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) { |
22339 | EVT VT = VecIns[0].getValueType(); |
22340 | assert(llvm::all_of(VecIns, |
22341 | [VT](SDValue V) { return VT == V.getValueType(); }) && |
22342 | "Reduction source vector mismatch"); |
22343 | |
22344 | |
22345 | if (VT.getSizeInBits() < 128 || !isPowerOf2_32(VT.getSizeInBits())) |
22346 | return SDValue(); |
22347 | |
22348 | |
22349 | for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; |
22350 | Slot += 2, e += 1) { |
22351 | |
22352 | |
22353 | SDValue LHS = VecIns[Slot]; |
22354 | SDValue RHS = VecIns[Slot + 1]; |
22355 | VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS)); |
22356 | } |
22357 | |
22358 | X86::CondCode CCode; |
22359 | if (SDValue V = LowerVectorAllZero(DL, VecIns.back(), CC, Mask, Subtarget, |
22360 | DAG, CCode)) { |
22361 | X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8); |
22362 | return V; |
22363 | } |
22364 | } |
22365 | |
22366 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
22367 | ISD::NodeType BinOp; |
22368 | if (SDValue Match = |
22369 | DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) { |
22370 | X86::CondCode CCode; |
22371 | if (SDValue V = |
22372 | LowerVectorAllZero(DL, Match, CC, Mask, Subtarget, DAG, CCode)) { |
22373 | X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8); |
22374 | return V; |
22375 | } |
22376 | } |
22377 | } |
22378 | |
22379 | return SDValue(); |
22380 | } |
22381 | |
22382 | |
22383 | static bool hasNonFlagsUse(SDValue Op) { |
22384 | for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; |
22385 | ++UI) { |
22386 | SDNode *User = *UI; |
22387 | unsigned UOpNo = UI.getOperandNo(); |
22388 | if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { |
22389 | |
22390 | UOpNo = User->use_begin().getOperandNo(); |
22391 | User = *User->use_begin(); |
22392 | } |
22393 | |
22394 | if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && |
22395 | !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) |
22396 | return true; |
22397 | } |
22398 | return false; |
22399 | } |
22400 | |
22401 | |
22402 | |
22403 | |
22404 | static bool isProfitableToUseFlagOp(SDValue Op) { |
22405 | for (SDNode *U : Op->uses()) |
22406 | if (U->getOpcode() != ISD::CopyToReg && |
22407 | U->getOpcode() != ISD::SETCC && |
22408 | U->getOpcode() != ISD::STORE) |
22409 | return false; |
22410 | |
22411 | return true; |
22412 | } |
22413 | |
22414 | |
22415 | |
22416 | static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, |
22417 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
22418 | |
22419 | |
22420 | bool NeedCF = false; |
22421 | bool NeedOF = false; |
22422 | switch (X86CC) { |
22423 | default: break; |
22424 | case X86::COND_A: case X86::COND_AE: |
22425 | case X86::COND_B: case X86::COND_BE: |
22426 | NeedCF = true; |
22427 | break; |
22428 | case X86::COND_G: case X86::COND_GE: |
22429 | case X86::COND_L: case X86::COND_LE: |
22430 | case X86::COND_O: case X86::COND_NO: { |
22431 | |
22432 | |
22433 | |
22434 | switch (Op->getOpcode()) { |
22435 | case ISD::ADD: |
22436 | case ISD::SUB: |
22437 | case ISD::MUL: |
22438 | case ISD::SHL: |
22439 | if (Op.getNode()->getFlags().hasNoSignedWrap()) |
22440 | break; |
22441 | LLVM_FALLTHROUGH; |
22442 | default: |
22443 | NeedOF = true; |
22444 | break; |
22445 | } |
22446 | break; |
22447 | } |
22448 | } |
22449 | |
22450 | |
22451 | |
22452 | if (Op.getResNo() != 0 || NeedOF || NeedCF) { |
22453 | |
22454 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
22455 | DAG.getConstant(0, dl, Op.getValueType())); |
22456 | } |
22457 | unsigned Opcode = 0; |
22458 | unsigned NumOperands = 0; |
22459 | |
22460 | SDValue ArithOp = Op; |
22461 | |
22462 | |
22463 | |
22464 | |
22465 | switch (ArithOp.getOpcode()) { |
22466 | case ISD::AND: |
22467 | |
22468 | |
22469 | if (!hasNonFlagsUse(Op)) |
22470 | break; |
22471 | |
22472 | LLVM_FALLTHROUGH; |
22473 | case ISD::ADD: |
22474 | case ISD::SUB: |
22475 | case ISD::OR: |
22476 | case ISD::XOR: |
22477 | if (!isProfitableToUseFlagOp(Op)) |
22478 | break; |
22479 | |
22480 | |
22481 | switch (ArithOp.getOpcode()) { |
22482 | default: llvm_unreachable("unexpected operator!"); |
22483 | case ISD::ADD: Opcode = X86ISD::ADD; break; |
22484 | case ISD::SUB: Opcode = X86ISD::SUB; break; |
22485 | case ISD::XOR: Opcode = X86ISD::XOR; break; |
22486 | case ISD::AND: Opcode = X86ISD::AND; break; |
22487 | case ISD::OR: Opcode = X86ISD::OR; break; |
22488 | } |
22489 | |
22490 | NumOperands = 2; |
22491 | break; |
22492 | case X86ISD::ADD: |
22493 | case X86ISD::SUB: |
22494 | case X86ISD::OR: |
22495 | case X86ISD::XOR: |
22496 | case X86ISD::AND: |
22497 | return SDValue(Op.getNode(), 1); |
22498 | case ISD::SSUBO: |
22499 | case ISD::USUBO: { |
22500 | |
22501 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
22502 | return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), |
22503 | Op->getOperand(1)).getValue(1); |
22504 | } |
22505 | default: |
22506 | break; |
22507 | } |
22508 | |
22509 | if (Opcode == 0) { |
22510 | |
22511 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
22512 | DAG.getConstant(0, dl, Op.getValueType())); |
22513 | } |
22514 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
22515 | SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands); |
22516 | |
22517 | SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); |
22518 | DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), New); |
22519 | return SDValue(New.getNode(), 1); |
22520 | } |
22521 | |
22522 | |
22523 | |
22524 | static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, |
22525 | const SDLoc &dl, SelectionDAG &DAG, |
22526 | const X86Subtarget &Subtarget) { |
22527 | if (isNullConstant(Op1)) |
22528 | return EmitTest(Op0, X86CC, dl, DAG, Subtarget); |
22529 | |
22530 | EVT CmpVT = Op0.getValueType(); |
22531 | |
22532 | assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || |
22533 | CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!"); |
22534 | |
22535 | |
22536 | |
22537 | if (CmpVT == MVT::i16 && !Subtarget.isAtom() && |
22538 | !DAG.getMachineFunction().getFunction().hasMinSize()) { |
22539 | ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); |
22540 | ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); |
22541 | |
22542 | if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) || |
22543 | (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) { |
22544 | unsigned ExtendOp = |
22545 | isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
22546 | if (X86CC == X86::COND_E || X86CC == X86::COND_NE) { |
22547 | |
22548 | |
22549 | if (Op0.getOpcode() == ISD::TRUNCATE) { |
22550 | SDValue In = Op0.getOperand(0); |
22551 | unsigned EffBits = |
22552 | In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; |
22553 | if (EffBits <= 16) |
22554 | ExtendOp = ISD::SIGN_EXTEND; |
22555 | } else if (Op1.getOpcode() == ISD::TRUNCATE) { |
22556 | SDValue In = Op1.getOperand(0); |
22557 | unsigned EffBits = |
22558 | In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; |
22559 | if (EffBits <= 16) |
22560 | ExtendOp = ISD::SIGN_EXTEND; |
22561 | } |
22562 | } |
22563 | |
22564 | CmpVT = MVT::i32; |
22565 | Op0 = DAG.getNode(ExtendOp, dl, CmpVT, Op0); |
22566 | Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1); |
22567 | } |
22568 | } |
22569 | |
22570 | |
22571 | |
22572 | if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && |
22573 | Op0.hasOneUse() && |
22574 | cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && |
22575 | DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { |
22576 | CmpVT = MVT::i32; |
22577 | Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); |
22578 | Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1); |
22579 | } |
22580 | |
22581 | |
22582 | |
22583 | if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) && |
22584 | Op0.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { |
22585 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
22586 | SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(1), Op1); |
22587 | return Add.getValue(1); |
22588 | } |
22589 | |
22590 | |
22591 | |
22592 | if (Op1.getOpcode() == ISD::SUB && isNullConstant(Op1.getOperand(0)) && |
22593 | Op1.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { |
22594 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
22595 | SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0, Op1.getOperand(1)); |
22596 | return Add.getValue(1); |
22597 | } |
22598 | |
22599 | |
22600 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
22601 | SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); |
22602 | return Sub.getValue(1); |
22603 | } |
22604 | |
22605 | |
22606 | bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { |
22607 | EVT VT = Op.getValueType(); |
22608 | |
22609 | |
22610 | if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) |
22611 | return false; |
22612 | |
22613 | if (VT.isVector()) |
22614 | return Subtarget.hasFastVectorFSQRT(); |
22615 | return Subtarget.hasFastScalarFSQRT(); |
22616 | } |
22617 | |
22618 | |
22619 | |
22620 | SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, |
22621 | SelectionDAG &DAG, int Enabled, |
22622 | int &RefinementSteps, |
22623 | bool &UseOneConstNR, |
22624 | bool Reciprocal) const { |
22625 | EVT VT = Op.getValueType(); |
22626 | |
22627 | |
22628 | |
22629 | |
22630 | |
22631 | |
22632 | |
22633 | |
22634 | |
22635 | if ((VT == MVT::f32 && Subtarget.hasSSE1()) || |
22636 | (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) || |
22637 | (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) || |
22638 | (VT == MVT::v8f32 && Subtarget.hasAVX()) || |
22639 | (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { |
22640 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
22641 | RefinementSteps = 1; |
22642 | |
22643 | UseOneConstNR = false; |
22644 | |
22645 | unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT; |
22646 | return DAG.getNode(Opcode, SDLoc(Op), VT, Op); |
22647 | } |
22648 | return SDValue(); |
22649 | } |
22650 | |
22651 | |
22652 | |
22653 | SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, |
22654 | int Enabled, |
22655 | int &RefinementSteps) const { |
22656 | EVT VT = Op.getValueType(); |
22657 | |
22658 | |
22659 | |
22660 | |
22661 | |
22662 | |
22663 | |
22664 | |
22665 | if ((VT == MVT::f32 && Subtarget.hasSSE1()) || |
22666 | (VT == MVT::v4f32 && Subtarget.hasSSE1()) || |
22667 | (VT == MVT::v8f32 && Subtarget.hasAVX()) || |
22668 | (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { |
22669 | |
22670 | |
22671 | |
22672 | if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) |
22673 | return SDValue(); |
22674 | |
22675 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
22676 | RefinementSteps = 1; |
22677 | |
22678 | |
22679 | unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP; |
22680 | return DAG.getNode(Opcode, SDLoc(Op), VT, Op); |
22681 | } |
22682 | return SDValue(); |
22683 | } |
22684 | |
22685 | |
22686 | |
22687 | |
22688 | |
22689 | |
22690 | |
22691 | unsigned X86TargetLowering::combineRepeatedFPDivisors() const { |
22692 | return 2; |
22693 | } |
22694 | |
22695 | SDValue |
22696 | X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
22697 | SelectionDAG &DAG, |
22698 | SmallVectorImpl<SDNode *> &Created) const { |
22699 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
22700 | if (isIntDivCheap(N->getValueType(0), Attr)) |
22701 | return SDValue(N,0); |
22702 | |
22703 | assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && |
22704 | "Unexpected divisor!"); |
22705 | |
22706 | |
22707 | |
22708 | if (!Subtarget.hasCMov()) |
22709 | return SDValue(); |
22710 | |
22711 | |
22712 | EVT VT = N->getValueType(0); |
22713 | |
22714 | if (VT != MVT::i16 && VT != MVT::i32 && |
22715 | !(Subtarget.is64Bit() && VT == MVT::i64)) |
22716 | return SDValue(); |
22717 | |
22718 | unsigned Lg2 = Divisor.countTrailingZeros(); |
22719 | |
22720 | |
22721 | if (Lg2 == 1) |
22722 | return SDValue(); |
22723 | |
22724 | SDLoc DL(N); |
22725 | SDValue N0 = N->getOperand(0); |
22726 | SDValue Zero = DAG.getConstant(0, DL, VT); |
22727 | APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); |
22728 | SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); |
22729 | |
22730 | |
22731 | SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); |
22732 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); |
22733 | SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); |
22734 | |
22735 | Created.push_back(Cmp.getNode()); |
22736 | Created.push_back(Add.getNode()); |
22737 | Created.push_back(CMov.getNode()); |
22738 | |
22739 | |
22740 | SDValue SRA = |
22741 | DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8)); |
22742 | |
22743 | |
22744 | |
22745 | if (Divisor.isNonNegative()) |
22746 | return SRA; |
22747 | |
22748 | Created.push_back(SRA.getNode()); |
22749 | return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); |
22750 | } |
22751 | |
22752 | |
22753 | |
22754 | static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, |
22755 | const SDLoc &dl, SelectionDAG &DAG, |
22756 | SDValue &X86CC) { |
22757 | assert(And.getOpcode() == ISD::AND && "Expected AND node!"); |
22758 | SDValue Op0 = And.getOperand(0); |
22759 | SDValue Op1 = And.getOperand(1); |
22760 | if (Op0.getOpcode() == ISD::TRUNCATE) |
22761 | Op0 = Op0.getOperand(0); |
22762 | if (Op1.getOpcode() == ISD::TRUNCATE) |
22763 | Op1 = Op1.getOperand(0); |
22764 | |
22765 | SDValue Src, BitNo; |
22766 | if (Op1.getOpcode() == ISD::SHL) |
22767 | std::swap(Op0, Op1); |
22768 | if (Op0.getOpcode() == ISD::SHL) { |
22769 | if (isOneConstant(Op0.getOperand(0))) { |
22770 | |
22771 | |
22772 | unsigned BitWidth = Op0.getValueSizeInBits(); |
22773 | unsigned AndBitWidth = And.getValueSizeInBits(); |
22774 | if (BitWidth > AndBitWidth) { |
22775 | KnownBits Known = DAG.computeKnownBits(Op0); |
22776 | if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) |
22777 | return SDValue(); |
22778 | } |
22779 | Src = Op1; |
22780 | BitNo = Op0.getOperand(1); |
22781 | } |
22782 | } else if (Op1.getOpcode() == ISD::Constant) { |
22783 | ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); |
22784 | uint64_t AndRHSVal = AndRHS->getZExtValue(); |
22785 | SDValue AndLHS = Op0; |
22786 | |
22787 | if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { |
22788 | Src = AndLHS.getOperand(0); |
22789 | BitNo = AndLHS.getOperand(1); |
22790 | } else { |
22791 | |
22792 | |
22793 | bool OptForSize = DAG.shouldOptForSize(); |
22794 | if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && |
22795 | isPowerOf2_64(AndRHSVal)) { |
22796 | Src = AndLHS; |
22797 | BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, |
22798 | Src.getValueType()); |
22799 | } |
22800 | } |
22801 | } |
22802 | |
22803 | |
22804 | if (!Src.getNode()) |
22805 | return SDValue(); |
22806 | |
22807 | |
22808 | |
22809 | |
22810 | |
22811 | |
22812 | if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) |
22813 | Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); |
22814 | |
22815 | |
22816 | |
22817 | |
22818 | |
22819 | if (Src.getValueType() == MVT::i64 && |
22820 | DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) |
22821 | Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); |
22822 | |
22823 | |
22824 | |
22825 | if (Src.getValueType() != BitNo.getValueType()) |
22826 | BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); |
22827 | |
22828 | X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, |
22829 | dl, MVT::i8); |
22830 | return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); |
22831 | } |
22832 | |
22833 | |
22834 | |
22835 | static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, |
22836 | SDValue &Op1, bool &IsAlwaysSignaling) { |
22837 | unsigned SSECC; |
22838 | bool Swap = false; |
22839 | |
22840 | |
22841 | |
22842 | |
22843 | |
22844 | |
22845 | |
22846 | |
22847 | |
22848 | |
22849 | switch (SetCCOpcode) { |
22850 | default: llvm_unreachable("Unexpected SETCC condition"); |
22851 | case ISD::SETOEQ: |
22852 | case ISD::SETEQ: SSECC = 0; break; |
22853 | case ISD::SETOGT: |
22854 | case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH; |
22855 | case ISD::SETLT: |
22856 | case ISD::SETOLT: SSECC = 1; break; |
22857 | case ISD::SETOGE: |
22858 | case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; |
22859 | case ISD::SETLE: |
22860 | case ISD::SETOLE: SSECC = 2; break; |
22861 | case ISD::SETUO: SSECC = 3; break; |
22862 | case ISD::SETUNE: |
22863 | case ISD::SETNE: SSECC = 4; break; |
22864 | case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; |
22865 | case ISD::SETUGE: SSECC = 5; break; |
22866 | case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; |
22867 | case ISD::SETUGT: SSECC = 6; break; |
22868 | case ISD::SETO: SSECC = 7; break; |
22869 | case ISD::SETUEQ: SSECC = 8; break; |
22870 | case ISD::SETONE: SSECC = 12; break; |
22871 | } |
22872 | if (Swap) |
22873 | std::swap(Op0, Op1); |
22874 | |
22875 | switch (SetCCOpcode) { |
22876 | default: |
22877 | IsAlwaysSignaling = true; |
22878 | break; |
22879 | case ISD::SETEQ: |
22880 | case ISD::SETOEQ: |
22881 | case ISD::SETUEQ: |
22882 | case ISD::SETNE: |
22883 | case ISD::SETONE: |
22884 | case ISD::SETUNE: |
22885 | case ISD::SETO: |
22886 | case ISD::SETUO: |
22887 | IsAlwaysSignaling = false; |
22888 | break; |
22889 | } |
22890 | |
22891 | return SSECC; |
22892 | } |
22893 | |
22894 | |
22895 | |
22896 | static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS, |
22897 | ISD::CondCode Cond, SelectionDAG &DAG, |
22898 | const SDLoc &dl) { |
22899 | assert(VT.isInteger() && VT == LHS.getValueType() && |
22900 | VT == RHS.getValueType() && "Unsupported VTs!"); |
22901 | |
22902 | SDValue CC = DAG.getCondCode(Cond); |
22903 | |
22904 | |
22905 | SDValue LHS1, LHS2; |
22906 | std::tie(LHS1, LHS2) = splitVector(LHS, DAG, dl); |
22907 | |
22908 | |
22909 | SDValue RHS1, RHS2; |
22910 | std::tie(RHS1, RHS2) = splitVector(RHS, DAG, dl); |
22911 | |
22912 | |
22913 | EVT LoVT, HiVT; |
22914 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
22915 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
22916 | DAG.getNode(ISD::SETCC, dl, LoVT, LHS1, RHS1, CC), |
22917 | DAG.getNode(ISD::SETCC, dl, HiVT, LHS2, RHS2, CC)); |
22918 | } |
22919 | |
22920 | static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { |
22921 | |
22922 | SDValue Op0 = Op.getOperand(0); |
22923 | SDValue Op1 = Op.getOperand(1); |
22924 | SDValue CC = Op.getOperand(2); |
22925 | MVT VT = Op.getSimpleValueType(); |
22926 | SDLoc dl(Op); |
22927 | |
22928 | assert(VT.getVectorElementType() == MVT::i1 && |
22929 | "Cannot set masked compare for this operation"); |
22930 | |
22931 | ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); |
22932 | |
22933 | |
22934 | if (SetCCOpcode == ISD::SETLT) { |
22935 | SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode); |
22936 | std::swap(Op0, Op1); |
22937 | } |
22938 | |
22939 | return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode); |
22940 | } |
22941 | |
22942 | |
22943 | |
22944 | |
22945 | |
22946 | static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) { |
22947 | auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode()); |
22948 | if (!BV) |
22949 | return SDValue(); |
22950 | |
22951 | MVT VT = V.getSimpleValueType(); |
22952 | MVT EltVT = VT.getVectorElementType(); |
22953 | unsigned NumElts = VT.getVectorNumElements(); |
22954 | SmallVector<SDValue, 8> NewVecC; |
22955 | SDLoc DL(V); |
22956 | for (unsigned i = 0; i < NumElts; ++i) { |
22957 | auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i)); |
22958 | if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT) |
22959 | return SDValue(); |
22960 | |
22961 | |
22962 | const APInt &EltC = Elt->getAPIntValue(); |
22963 | if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue())) |
22964 | return SDValue(); |
22965 | |
22966 | NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT)); |
22967 | } |
22968 | |
22969 | return DAG.getBuildVector(VT, DL, NewVecC); |
22970 | } |
22971 | |
22972 | |
22973 | |
22974 | |
22975 | |
22976 | static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, |
22977 | ISD::CondCode Cond, const SDLoc &dl, |
22978 | const X86Subtarget &Subtarget, |
22979 | SelectionDAG &DAG) { |
22980 | if (!Subtarget.hasSSE2()) |
22981 | return SDValue(); |
22982 | |
22983 | MVT VET = VT.getVectorElementType(); |
22984 | if (VET != MVT::i8 && VET != MVT::i16) |
22985 | return SDValue(); |
22986 | |
22987 | switch (Cond) { |
22988 | default: |
22989 | return SDValue(); |
22990 | case ISD::SETULT: { |
22991 | |
22992 | |
22993 | |
22994 | |
22995 | |
22996 | if (Subtarget.hasAVX()) |
22997 | return SDValue(); |
22998 | SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false); |
22999 | if (!ULEOp1) |
23000 | return SDValue(); |
23001 | Op1 = ULEOp1; |
23002 | break; |
23003 | } |
23004 | case ISD::SETUGT: { |
23005 | |
23006 | |
23007 | |
23008 | |
23009 | SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true); |
23010 | if (!UGEOp1) |
23011 | return SDValue(); |
23012 | Op1 = Op0; |
23013 | Op0 = UGEOp1; |
23014 | break; |
23015 | } |
23016 | |
23017 | case ISD::SETUGE: |
23018 | std::swap(Op0, Op1); |
23019 | break; |
23020 | case ISD::SETULE: |
23021 | break; |
23022 | } |
23023 | |
23024 | SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); |
23025 | return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, |
23026 | DAG.getConstant(0, dl, VT)); |
23027 | } |
23028 | |
23029 | static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, |
23030 | SelectionDAG &DAG) { |
23031 | bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || |
23032 | Op.getOpcode() == ISD::STRICT_FSETCCS; |
23033 | SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); |
23034 | SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); |
23035 | SDValue CC = Op.getOperand(IsStrict ? 3 : 2); |
23036 | MVT VT = Op->getSimpleValueType(0); |
23037 | ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get(); |
23038 | bool isFP = Op1.getSimpleValueType().isFloatingPoint(); |
23039 | SDLoc dl(Op); |
23040 | |
23041 | if (isFP) { |
23042 | #ifndef NDEBUG |
23043 | MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); |
23044 | assert(EltVT == MVT::f32 || EltVT == MVT::f64); |
23045 | #endif |
23046 | |
23047 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
23048 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
23049 | |
23050 | |
23051 | |
23052 | |
23053 | |
23054 | unsigned Opc; |
23055 | if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 && |
23056 | (!IsStrict || Subtarget.hasVLX() || |
23057 | Op0.getSimpleValueType().is512BitVector())) { |
23058 | assert(VT.getVectorNumElements() <= 16); |
23059 | Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM; |
23060 | } else { |
23061 | Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP; |
23062 | |
23063 | |
23064 | |
23065 | VT = Op0.getSimpleValueType(); |
23066 | } |
23067 | |
23068 | SDValue Cmp; |
23069 | bool IsAlwaysSignaling; |
23070 | unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1, IsAlwaysSignaling); |
23071 | if (!Subtarget.hasAVX()) { |
23072 | |
23073 | |
23074 | |
23075 | |
23076 | |
23077 | |
23078 | |
23079 | if (IsStrict && IsAlwaysSignaling && !IsSignaling) |
23080 | return SDValue(); |
23081 | |
23082 | |
23083 | if (IsStrict && !IsAlwaysSignaling && IsSignaling) { |
23084 | SDValue SignalCmp = DAG.getNode( |
23085 | Opc, dl, {VT, MVT::Other}, |
23086 | {Chain, Op0, Op1, DAG.getTargetConstant(1, dl, MVT::i8)}); |
23087 | |
23088 | |
23089 | |
23090 | |
23091 | SignalCmp->setFlags(Op->getFlags()); |
23092 | Chain = SignalCmp.getValue(1); |
23093 | } |
23094 | |
23095 | |
23096 | |
23097 | if (SSECC >= 8) { |
23098 | |
23099 | unsigned CC0, CC1; |
23100 | unsigned CombineOpc; |
23101 | if (Cond == ISD::SETUEQ) { |
23102 | CC0 = 3; |
23103 | CC1 = 0; |
23104 | CombineOpc = X86ISD::FOR; |
23105 | } else { |
23106 | assert(Cond == ISD::SETONE); |
23107 | CC0 = 7; |
23108 | CC1 = 4; |
23109 | CombineOpc = X86ISD::FAND; |
23110 | } |
23111 | |
23112 | SDValue Cmp0, Cmp1; |
23113 | if (IsStrict) { |
23114 | Cmp0 = DAG.getNode( |
23115 | Opc, dl, {VT, MVT::Other}, |
23116 | {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)}); |
23117 | Cmp1 = DAG.getNode( |
23118 | Opc, dl, {VT, MVT::Other}, |
23119 | {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)}); |
23120 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1), |
23121 | Cmp1.getValue(1)); |
23122 | } else { |
23123 | Cmp0 = DAG.getNode( |
23124 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)); |
23125 | Cmp1 = DAG.getNode( |
23126 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)); |
23127 | } |
23128 | Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); |
23129 | } else { |
23130 | if (IsStrict) { |
23131 | Cmp = DAG.getNode( |
23132 | Opc, dl, {VT, MVT::Other}, |
23133 | {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); |
23134 | Chain = Cmp.getValue(1); |
23135 | } else |
23136 | Cmp = DAG.getNode( |
23137 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); |
23138 | } |
23139 | } else { |
23140 | |
23141 | if (IsStrict) { |
23142 | |
23143 | SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4; |
23144 | Cmp = DAG.getNode( |
23145 | Opc, dl, {VT, MVT::Other}, |
23146 | {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); |
23147 | Chain = Cmp.getValue(1); |
23148 | } else |
23149 | Cmp = DAG.getNode( |
23150 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); |
23151 | } |
23152 | |
23153 | if (VT.getFixedSizeInBits() > |
23154 | Op.getSimpleValueType().getFixedSizeInBits()) { |
23155 | |
23156 | |
23157 | EVT CastVT = EVT(VT).changeVectorElementTypeToInteger(); |
23158 | Cmp = DAG.getBitcast(CastVT, Cmp); |
23159 | Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp, |
23160 | DAG.getConstant(0, dl, CastVT), ISD::SETNE); |
23161 | } else { |
23162 | |
23163 | |
23164 | |
23165 | Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); |
23166 | } |
23167 | |
23168 | if (IsStrict) |
23169 | return DAG.getMergeValues({Cmp, Chain}, dl); |
23170 | |
23171 | return Cmp; |
23172 | } |
23173 | |
23174 | assert(!IsStrict && "Strict SETCC only handles FP operands."); |
23175 | |
23176 | MVT VTOp0 = Op0.getSimpleValueType(); |
23177 | (void)VTOp0; |
23178 | assert(VTOp0 == Op1.getSimpleValueType() && |
23179 | "Expected operands with same type!"); |
23180 | assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && |
23181 | "Invalid number of packed elements for source and destination!"); |
23182 | |
23183 | |
23184 | |
23185 | assert((Subtarget.hasAVX512() || (VT == VTOp0)) && |
23186 | "Value types for source and destination must be the same!"); |
23187 | |
23188 | |
23189 | if (VT.getVectorElementType() == MVT::i1) { |
23190 | |
23191 | |
23192 | assert((VTOp0.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) && |
23193 | "Unexpected operand type"); |
23194 | return LowerIntVSETCC_AVX512(Op, DAG); |
23195 | } |
23196 | |
23197 | |
23198 | if (VT.is128BitVector() && Subtarget.hasXOP()) { |
23199 | |
23200 | unsigned CmpMode = 0; |
23201 | switch (Cond) { |
23202 | default: llvm_unreachable("Unexpected SETCC condition"); |
23203 | case ISD::SETULT: |
23204 | case ISD::SETLT: CmpMode = 0x00; break; |
23205 | case ISD::SETULE: |
23206 | case ISD::SETLE: CmpMode = 0x01; break; |
23207 | case ISD::SETUGT: |
23208 | case ISD::SETGT: CmpMode = 0x02; break; |
23209 | case ISD::SETUGE: |
23210 | case ISD::SETGE: CmpMode = 0x03; break; |
23211 | case ISD::SETEQ: CmpMode = 0x04; break; |
23212 | case ISD::SETNE: CmpMode = 0x05; break; |
23213 | } |
23214 | |
23215 | |
23216 | unsigned Opc = |
23217 | ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; |
23218 | |
23219 | return DAG.getNode(Opc, dl, VT, Op0, Op1, |
23220 | DAG.getTargetConstant(CmpMode, dl, MVT::i8)); |
23221 | } |
23222 | |
23223 | |
23224 | |
23225 | if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) { |
23226 | SDValue BC0 = peekThroughBitcasts(Op0); |
23227 | if (BC0.getOpcode() == ISD::AND) { |
23228 | APInt UndefElts; |
23229 | SmallVector<APInt, 64> EltBits; |
23230 | if (getTargetConstantBitsFromNode(BC0.getOperand(1), |
23231 | VT.getScalarSizeInBits(), UndefElts, |
23232 | EltBits, false, false)) { |
23233 | if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) { |
23234 | Cond = ISD::SETEQ; |
23235 | Op1 = DAG.getBitcast(VT, BC0.getOperand(1)); |
23236 | } |
23237 | } |
23238 | } |
23239 | } |
23240 | |
23241 | |
23242 | if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND && |
23243 | Op0.getOperand(1) == Op1 && Op0.hasOneUse()) { |
23244 | ConstantSDNode *C1 = isConstOrConstSplat(Op1); |
23245 | if (C1 && C1->getAPIntValue().isPowerOf2()) { |
23246 | unsigned BitWidth = VT.getScalarSizeInBits(); |
23247 | unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1; |
23248 | |
23249 | SDValue Result = Op0.getOperand(0); |
23250 | Result = DAG.getNode(ISD::SHL, dl, VT, Result, |
23251 | DAG.getConstant(ShiftAmt, dl, VT)); |
23252 | Result = DAG.getNode(ISD::SRA, dl, VT, Result, |
23253 | DAG.getConstant(BitWidth - 1, dl, VT)); |
23254 | return Result; |
23255 | } |
23256 | } |
23257 | |
23258 | |
23259 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
23260 | return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); |
23261 | |
23262 | if (VT == MVT::v32i16 || VT == MVT::v64i8) { |
23263 | assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!"); |
23264 | return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); |
23265 | } |
23266 | |
23267 | |
23268 | |
23269 | |
23270 | |
23271 | |
23272 | APInt ConstValue; |
23273 | if (Cond == ISD::SETNE && |
23274 | ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) { |
23275 | if (ConstValue.isMinSignedValue()) |
23276 | Cond = ISD::SETGT; |
23277 | else if (ConstValue.isMaxSignedValue()) |
23278 | Cond = ISD::SETLT; |
23279 | else if (ConstValue.isNullValue() && DAG.SignBitIsZero(Op0)) |
23280 | Cond = ISD::SETGT; |
23281 | } |
23282 | |
23283 | |
23284 | |
23285 | |
23286 | |
23287 | bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && |
23288 | !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); |
23289 | |
23290 | |
23291 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
23292 | if (ISD::isUnsignedIntSetCC(Cond) && |
23293 | (FlipSigns || ISD::isTrueWhenEqual(Cond)) && |
23294 | TLI.isOperationLegal(ISD::UMIN, VT)) { |
23295 | |
23296 | |
23297 | if (Cond == ISD::SETUGT) { |
23298 | |
23299 | if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, true)) { |
23300 | Op1 = UGTOp1; |
23301 | Cond = ISD::SETUGE; |
23302 | } |
23303 | } |
23304 | if (Cond == ISD::SETULT) { |
23305 | |
23306 | if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, false)) { |
23307 | Op1 = ULTOp1; |
23308 | Cond = ISD::SETULE; |
23309 | } |
23310 | } |
23311 | bool Invert = false; |
23312 | unsigned Opc; |
23313 | switch (Cond) { |
23314 | default: llvm_unreachable("Unexpected condition code"); |
23315 | case ISD::SETUGT: Invert = true; LLVM_FALLTHROUGH; |
23316 | case ISD::SETULE: Opc = ISD::UMIN; break; |
23317 | case ISD::SETULT: Invert = true; LLVM_FALLTHROUGH; |
23318 | case ISD::SETUGE: Opc = ISD::UMAX; break; |
23319 | } |
23320 | |
23321 | SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); |
23322 | Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); |
23323 | |
23324 | |
23325 | if (Invert) |
23326 | Result = DAG.getNOT(dl, Result, VT); |
23327 | |
23328 | return Result; |
23329 | } |
23330 | |
23331 | |
23332 | if (FlipSigns) |
23333 | if (SDValue V = |
23334 | LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) |
23335 | return V; |
23336 | |
23337 | |
23338 | |
23339 | |
23340 | unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ |
23341 | : X86ISD::PCMPGT; |
23342 | bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || |
23343 | Cond == ISD::SETGE || Cond == ISD::SETUGE; |
23344 | bool Invert = Cond == ISD::SETNE || |
23345 | (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); |
23346 | |
23347 | if (Swap) |
23348 | std::swap(Op0, Op1); |
23349 | |
23350 | |
23351 | |
23352 | if (VT == MVT::v2i64) { |
23353 | if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) { |
23354 | assert(Subtarget.hasSSE2() && "Don't know how to lower!"); |
23355 | |
23356 | |
23357 | |
23358 | if (!FlipSigns && !Invert && ISD::isBuildVectorAllZeros(Op0.getNode())) { |
23359 | Op0 = DAG.getConstant(0, dl, MVT::v4i32); |
23360 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
23361 | |
23362 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
23363 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
23364 | SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
23365 | |
23366 | return DAG.getBitcast(VT, Result); |
23367 | } |
23368 | |
23369 | if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) { |
23370 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
23371 | Op1 = DAG.getConstant(-1, dl, MVT::v4i32); |
23372 | |
23373 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
23374 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
23375 | SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
23376 | |
23377 | return DAG.getBitcast(VT, Result); |
23378 | } |
23379 | |
23380 | |
23381 | |
23382 | |
23383 | SDValue SB; |
23384 | if (FlipSigns) { |
23385 | SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64); |
23386 | } else { |
23387 | SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64); |
23388 | } |
23389 | Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB); |
23390 | Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB); |
23391 | |
23392 | |
23393 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
23394 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
23395 | |
23396 | |
23397 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
23398 | SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); |
23399 | |
23400 | |
23401 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
23402 | static const int MaskLo[] = { 0, 0, 2, 2 }; |
23403 | SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); |
23404 | SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); |
23405 | SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
23406 | |
23407 | SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); |
23408 | Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); |
23409 | |
23410 | if (Invert) |
23411 | Result = DAG.getNOT(dl, Result, MVT::v4i32); |
23412 | |
23413 | return DAG.getBitcast(VT, Result); |
23414 | } |
23415 | |
23416 | if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) { |
23417 | |
23418 | |
23419 | assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!"); |
23420 | |
23421 | |
23422 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
23423 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
23424 | |
23425 | |
23426 | SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); |
23427 | |
23428 | |
23429 | static const int Mask[] = { 1, 0, 3, 2 }; |
23430 | SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); |
23431 | Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); |
23432 | |
23433 | if (Invert) |
23434 | Result = DAG.getNOT(dl, Result, MVT::v4i32); |
23435 | |
23436 | return DAG.getBitcast(VT, Result); |
23437 | } |
23438 | } |
23439 | |
23440 | |
23441 | |
23442 | if (FlipSigns) { |
23443 | MVT EltVT = VT.getVectorElementType(); |
23444 | SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, |
23445 | VT); |
23446 | Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); |
23447 | Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); |
23448 | } |
23449 | |
23450 | SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); |
23451 | |
23452 | |
23453 | if (Invert) |
23454 | Result = DAG.getNOT(dl, Result, VT); |
23455 | |
23456 | return Result; |
23457 | } |
23458 | |
23459 | |
23460 | static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, |
23461 | const SDLoc &dl, SelectionDAG &DAG, |
23462 | const X86Subtarget &Subtarget, |
23463 | SDValue &X86CC) { |
23464 | |
23465 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
23466 | return SDValue(); |
23467 | |
23468 | |
23469 | if (Op0.getOpcode() != ISD::BITCAST) |
23470 | return SDValue(); |
23471 | |
23472 | Op0 = Op0.getOperand(0); |
23473 | MVT VT = Op0.getSimpleValueType(); |
23474 | if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) && |
23475 | !(Subtarget.hasDQI() && VT == MVT::v8i1) && |
23476 | !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) |
23477 | return SDValue(); |
23478 | |
23479 | X86::CondCode X86Cond; |
23480 | if (isNullConstant(Op1)) { |
23481 | X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; |
23482 | } else if (isAllOnesConstant(Op1)) { |
23483 | |
23484 | X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE; |
23485 | } else |
23486 | return SDValue(); |
23487 | |
23488 | |
23489 | bool KTestable = false; |
23490 | if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) |
23491 | KTestable = true; |
23492 | if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)) |
23493 | KTestable = true; |
23494 | if (!isNullConstant(Op1)) |
23495 | KTestable = false; |
23496 | if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) { |
23497 | SDValue LHS = Op0.getOperand(0); |
23498 | SDValue RHS = Op0.getOperand(1); |
23499 | X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
23500 | return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS); |
23501 | } |
23502 | |
23503 | |
23504 | SDValue LHS = Op0; |
23505 | SDValue RHS = Op0; |
23506 | if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse()) { |
23507 | LHS = Op0.getOperand(0); |
23508 | RHS = Op0.getOperand(1); |
23509 | } |
23510 | |
23511 | X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
23512 | return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); |
23513 | } |
23514 | |
23515 | |
23516 | |
23517 | SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, |
23518 | ISD::CondCode CC, const SDLoc &dl, |
23519 | SelectionDAG &DAG, |
23520 | SDValue &X86CC) const { |
23521 | |
23522 | |
23523 | |
23524 | |
23525 | if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && |
23526 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
23527 | if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC)) |
23528 | return BT; |
23529 | } |
23530 | |
23531 | |
23532 | |
23533 | if (isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) |
23534 | if (SDValue CmpZ = |
23535 | MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG, X86CC)) |
23536 | return CmpZ; |
23537 | |
23538 | |
23539 | if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) |
23540 | return Test; |
23541 | |
23542 | |
23543 | |
23544 | if ((isOneConstant(Op1) || isNullConstant(Op1)) && |
23545 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
23546 | |
23547 | |
23548 | if (Op0.getOpcode() == X86ISD::SETCC) { |
23549 | bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); |
23550 | |
23551 | X86CC = Op0.getOperand(0); |
23552 | if (Invert) { |
23553 | X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); |
23554 | CCode = X86::GetOppositeBranchCondition(CCode); |
23555 | X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); |
23556 | } |
23557 | |
23558 | return Op0.getOperand(1); |
23559 | } |
23560 | } |
23561 | |
23562 | |
23563 | |
23564 | if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD && |
23565 | Op0.getOperand(1) == Op1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
23566 | if (isProfitableToUseFlagOp(Op0)) { |
23567 | SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); |
23568 | |
23569 | SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0), |
23570 | Op0.getOperand(1)); |
23571 | DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New); |
23572 | X86::CondCode CCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; |
23573 | X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); |
23574 | return SDValue(New.getNode(), 1); |
23575 | } |
23576 | } |
23577 | |
23578 | X86::CondCode CondCode = |
23579 | TranslateX86CC(CC, dl, false, Op0, Op1, DAG); |
23580 | assert(CondCode != X86::COND_INVALID && "Unexpected condition code!"); |
23581 | |
23582 | SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget); |
23583 | X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); |
23584 | return EFLAGS; |
23585 | } |
23586 | |
23587 | SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { |
23588 | |
23589 | bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || |
23590 | Op.getOpcode() == ISD::STRICT_FSETCCS; |
23591 | MVT VT = Op->getSimpleValueType(0); |
23592 | |
23593 | if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); |
23594 | |
23595 | assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); |
23596 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
23597 | SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); |
23598 | SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); |
23599 | SDLoc dl(Op); |
23600 | ISD::CondCode CC = |
23601 | cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); |
23602 | |
23603 | |
23604 | |
23605 | if (Op0.getValueType() == MVT::f128) { |
23606 | softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1, Chain, |
23607 | Op.getOpcode() == ISD::STRICT_FSETCCS); |
23608 | |
23609 | |
23610 | if (!Op1.getNode()) { |
23611 | assert(Op0.getValueType() == Op.getValueType() && |
23612 | "Unexpected setcc expansion!"); |
23613 | if (IsStrict) |
23614 | return DAG.getMergeValues({Op0, Chain}, dl); |
23615 | return Op0; |
23616 | } |
23617 | } |
23618 | |
23619 | if (Op0.getSimpleValueType().isInteger()) { |
23620 | |
23621 | |
23622 | |
23623 | |
23624 | |
23625 | |
23626 | |
23627 | |
23628 | |
23629 | |
23630 | if (auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) { |
23631 | const APInt &Op1Val = Op1C->getAPIntValue(); |
23632 | if (!Op1Val.isNullValue()) { |
23633 | |
23634 | if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) || |
23635 | (CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) { |
23636 | APInt Op1ValPlusOne = Op1Val + 1; |
23637 | if (Op1ValPlusOne.isSignedIntN(32) && |
23638 | (!Op1Val.isSignedIntN(8) || Op1ValPlusOne.isSignedIntN(8))) { |
23639 | Op1 = DAG.getConstant(Op1ValPlusOne, dl, Op0.getValueType()); |
23640 | CC = CC == ISD::CondCode::SETGT ? ISD::CondCode::SETGE |
23641 | : ISD::CondCode::SETUGE; |
23642 | } |
23643 | } |
23644 | } |
23645 | } |
23646 | |
23647 | SDValue X86CC; |
23648 | SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); |
23649 | SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); |
23650 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
23651 | } |
23652 | |
23653 | |
23654 | X86::CondCode CondCode = TranslateX86CC(CC, dl, true, Op0, Op1, DAG); |
23655 | if (CondCode == X86::COND_INVALID) |
23656 | return SDValue(); |
23657 | |
23658 | SDValue EFLAGS; |
23659 | if (IsStrict) { |
23660 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
23661 | EFLAGS = |
23662 | DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP, |
23663 | dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1}); |
23664 | Chain = EFLAGS.getValue(1); |
23665 | } else { |
23666 | EFLAGS = DAG.getNode(X86ISD::FCMP, dl, MVT::i32, Op0, Op1); |
23667 | } |
23668 | |
23669 | SDValue X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); |
23670 | SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); |
23671 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
23672 | } |
23673 | |
23674 | SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { |
23675 | SDValue LHS = Op.getOperand(0); |
23676 | SDValue RHS = Op.getOperand(1); |
23677 | SDValue Carry = Op.getOperand(2); |
23678 | SDValue Cond = Op.getOperand(3); |
23679 | SDLoc DL(Op); |
23680 | |
23681 | assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); |
23682 | X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get()); |
23683 | |
23684 | |
23685 | EVT CarryVT = Carry.getValueType(); |
23686 | Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), |
23687 | Carry, DAG.getAllOnesConstant(DL, CarryVT)); |
23688 | |
23689 | SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); |
23690 | SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); |
23691 | return getSETCC(CC, Cmp.getValue(1), DL, DAG); |
23692 | } |
23693 | |
23694 | |
23695 | |
23696 | |
23697 | |
23698 | static std::pair<SDValue, SDValue> |
23699 | getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) { |
23700 | assert(Op.getResNo() == 0 && "Unexpected result number!"); |
23701 | SDValue Value, Overflow; |
23702 | SDValue LHS = Op.getOperand(0); |
23703 | SDValue RHS = Op.getOperand(1); |
23704 | unsigned BaseOp = 0; |
23705 | SDLoc DL(Op); |
23706 | switch (Op.getOpcode()) { |
23707 | default: llvm_unreachable("Unknown ovf instruction!"); |
23708 | case ISD::SADDO: |
23709 | BaseOp = X86ISD::ADD; |
23710 | Cond = X86::COND_O; |
23711 | break; |
23712 | case ISD::UADDO: |
23713 | BaseOp = X86ISD::ADD; |
23714 | Cond = isOneConstant(RHS) ? X86::COND_E : X86::COND_B; |
23715 | break; |
23716 | case ISD::SSUBO: |
23717 | BaseOp = X86ISD::SUB; |
23718 | Cond = X86::COND_O; |
23719 | break; |
23720 | case ISD::USUBO: |
23721 | BaseOp = X86ISD::SUB; |
23722 | Cond = X86::COND_B; |
23723 | break; |
23724 | case ISD::SMULO: |
23725 | BaseOp = X86ISD::SMUL; |
23726 | Cond = X86::COND_O; |
23727 | break; |
23728 | case ISD::UMULO: |
23729 | BaseOp = X86ISD::UMUL; |
23730 | Cond = X86::COND_O; |
23731 | break; |
23732 | } |
23733 | |
23734 | if (BaseOp) { |
23735 | |
23736 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
23737 | Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); |
23738 | Overflow = Value.getValue(1); |
23739 | } |
23740 | |
23741 | return std::make_pair(Value, Overflow); |
23742 | } |
23743 | |
23744 | static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { |
23745 | |
23746 | |
23747 | |
23748 | |
23749 | SDLoc DL(Op); |
23750 | X86::CondCode Cond; |
23751 | SDValue Value, Overflow; |
23752 | std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG); |
23753 | |
23754 | SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG); |
23755 | assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!"); |
23756 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC); |
23757 | } |
23758 | |
23759 | |
23760 | static bool isX86LogicalCmp(SDValue Op) { |
23761 | unsigned Opc = Op.getOpcode(); |
23762 | if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || |
23763 | Opc == X86ISD::FCMP) |
23764 | return true; |
23765 | if (Op.getResNo() == 1 && |
23766 | (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || |
23767 | Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || |
23768 | Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) |
23769 | return true; |
23770 | |
23771 | return false; |
23772 | } |
23773 | |
23774 | static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { |
23775 | if (V.getOpcode() != ISD::TRUNCATE) |
23776 | return false; |
23777 | |
23778 | SDValue VOp0 = V.getOperand(0); |
23779 | unsigned InBits = VOp0.getValueSizeInBits(); |
23780 | unsigned Bits = V.getValueSizeInBits(); |
23781 | return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); |
23782 | } |
23783 | |
23784 | SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
23785 | bool AddTest = true; |
23786 | SDValue Cond = Op.getOperand(0); |
23787 | SDValue Op1 = Op.getOperand(1); |
23788 | SDValue Op2 = Op.getOperand(2); |
23789 | SDLoc DL(Op); |
23790 | MVT VT = Op1.getSimpleValueType(); |
23791 | SDValue CC; |
23792 | |
23793 | |
23794 | |
23795 | |
23796 | if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) && |
23797 | VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { |
23798 | SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); |
23799 | bool IsAlwaysSignaling; |
23800 | unsigned SSECC = |
23801 | translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(), |
23802 | CondOp0, CondOp1, IsAlwaysSignaling); |
23803 | |
23804 | if (Subtarget.hasAVX512()) { |
23805 | SDValue Cmp = |
23806 | DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, |
23807 | DAG.getTargetConstant(SSECC, DL, MVT::i8)); |
23808 | assert(!VT.isVector() && "Not a scalar type?"); |
23809 | return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); |
23810 | } |
23811 | |
23812 | if (SSECC < 8 || Subtarget.hasAVX()) { |
23813 | SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, |
23814 | DAG.getTargetConstant(SSECC, DL, MVT::i8)); |
23815 | |
23816 | |
23817 | |
23818 | |
23819 | |
23820 | |
23821 | |
23822 | |
23823 | |
23824 | |
23825 | |
23826 | |
23827 | |
23828 | if (Subtarget.hasAVX() && !isNullFPConstant(Op1) && |
23829 | !isNullFPConstant(Op2)) { |
23830 | |
23831 | |
23832 | MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; |
23833 | SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); |
23834 | SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); |
23835 | SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); |
23836 | |
23837 | MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; |
23838 | VCmp = DAG.getBitcast(VCmpVT, VCmp); |
23839 | |
23840 | SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); |
23841 | |
23842 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
23843 | VSel, DAG.getIntPtrConstant(0, DL)); |
23844 | } |
23845 | SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); |
23846 | SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); |
23847 | return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); |
23848 | } |
23849 | } |
23850 | |
23851 | |
23852 | if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) { |
23853 | SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond); |
23854 | return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); |
23855 | } |
23856 | |
23857 | if (Cond.getOpcode() == ISD::SETCC) { |
23858 | if (SDValue NewCond = LowerSETCC(Cond, DAG)) { |
23859 | Cond = NewCond; |
23860 | |
23861 | |
23862 | |
23863 | Op1 = Op.getOperand(1); |
23864 | Op2 = Op.getOperand(2); |
23865 | } |
23866 | } |
23867 | |
23868 | |
23869 | |
23870 | |
23871 | |
23872 | |
23873 | |
23874 | if (Cond.getOpcode() == X86ISD::SETCC && |
23875 | Cond.getOperand(1).getOpcode() == X86ISD::CMP && |
23876 | isNullConstant(Cond.getOperand(1).getOperand(1))) { |
23877 | SDValue Cmp = Cond.getOperand(1); |
23878 | SDValue CmpOp0 = Cmp.getOperand(0); |
23879 | unsigned CondCode = Cond.getConstantOperandVal(0); |
23880 | |
23881 | |
23882 | |
23883 | |
23884 | |
23885 | |
23886 | auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) { |
23887 | return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() && |
23888 | Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2)); |
23889 | }; |
23890 | if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) && |
23891 | ((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) || |
23892 | (CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) { |
23893 | |
23894 | } else if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && |
23895 | (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { |
23896 | SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; |
23897 | |
23898 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
23899 | SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); |
23900 | |
23901 | |
23902 | |
23903 | |
23904 | if (isNullConstant(Y) && |
23905 | (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { |
23906 | SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); |
23907 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0); |
23908 | Zero = DAG.getConstant(0, DL, Op.getValueType()); |
23909 | return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Neg.getValue(1)); |
23910 | } |
23911 | |
23912 | Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs, |
23913 | CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); |
23914 | |
23915 | SDValue Zero = DAG.getConstant(0, DL, Op.getValueType()); |
23916 | SDValue Res = |
23917 | DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp.getValue(1)); |
23918 | |
23919 | if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) |
23920 | Res = DAG.getNOT(DL, Res, Res.getValueType()); |
23921 | |
23922 | return DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); |
23923 | } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && |
23924 | Cmp.getOperand(0).getOpcode() == ISD::AND && |
23925 | isOneConstant(Cmp.getOperand(0).getOperand(1))) { |
23926 | SDValue Src1, Src2; |
23927 | |
23928 | |
23929 | |
23930 | auto isOrXorPattern = [&]() { |
23931 | if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) && |
23932 | (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) { |
23933 | Src1 = |
23934 | Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0); |
23935 | Src2 = Op1; |
23936 | return true; |
23937 | } |
23938 | return false; |
23939 | }; |
23940 | |
23941 | if (isOrXorPattern()) { |
23942 | SDValue Neg; |
23943 | unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits(); |
23944 | |
23945 | |
23946 | if (CmpSz > VT.getSizeInBits()) |
23947 | Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); |
23948 | else if (CmpSz < VT.getSizeInBits()) |
23949 | Neg = DAG.getNode(ISD::AND, DL, VT, |
23950 | DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), |
23951 | DAG.getConstant(1, DL, VT)); |
23952 | else |
23953 | Neg = CmpOp0; |
23954 | SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
23955 | Neg); |
23956 | SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); |
23957 | return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); |
23958 | } |
23959 | } |
23960 | } |
23961 | |
23962 | |
23963 | if (Cond.getOpcode() == ISD::AND && |
23964 | Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && |
23965 | isOneConstant(Cond.getOperand(1))) |
23966 | Cond = Cond.getOperand(0); |
23967 | |
23968 | |
23969 | |
23970 | unsigned CondOpcode = Cond.getOpcode(); |
23971 | if (CondOpcode == X86ISD::SETCC || |
23972 | CondOpcode == X86ISD::SETCC_CARRY) { |
23973 | CC = Cond.getOperand(0); |
23974 | |
23975 | SDValue Cmp = Cond.getOperand(1); |
23976 | bool IllegalFPCMov = false; |
23977 | if (VT.isFloatingPoint() && !VT.isVector() && |
23978 | !isScalarFPTypeInSSEReg(VT) && Subtarget.hasCMov()) |
23979 | IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); |
23980 | |
23981 | if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || |
23982 | Cmp.getOpcode() == X86ISD::BT) { |
23983 | Cond = Cmp; |
23984 | AddTest = false; |
23985 | } |
23986 | } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || |
23987 | CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || |
23988 | CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { |
23989 | SDValue Value; |
23990 | X86::CondCode X86Cond; |
23991 | std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); |
23992 | |
23993 | CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8); |
23994 | AddTest = false; |
23995 | } |
23996 | |
23997 | if (AddTest) { |
23998 | |
23999 | if (isTruncWithZeroHighBitsInput(Cond, DAG)) |
24000 | Cond = Cond.getOperand(0); |
24001 | |
24002 | |
24003 | |
24004 | if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { |
24005 | SDValue BTCC; |
24006 | if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) { |
24007 | CC = BTCC; |
24008 | Cond = BT; |
24009 | AddTest = false; |
24010 | } |
24011 | } |
24012 | } |
24013 | |
24014 | if (AddTest) { |
24015 | CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8); |
24016 | Cond = EmitTest(Cond, X86::COND_NE, DL, DAG, Subtarget); |
24017 | } |
24018 | |
24019 | |
24020 | |
24021 | |
24022 | |
24023 | if (Cond.getOpcode() == X86ISD::SUB) { |
24024 | unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); |
24025 | |
24026 | if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && |
24027 | (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && |
24028 | (isNullConstant(Op1) || isNullConstant(Op2))) { |
24029 | SDValue Res = |
24030 | DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), |
24031 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond); |
24032 | if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) |
24033 | return DAG.getNOT(DL, Res, Res.getValueType()); |
24034 | return Res; |
24035 | } |
24036 | } |
24037 | |
24038 | |
24039 | |
24040 | |
24041 | if (Op.getValueType() == MVT::i8 && |
24042 | Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { |
24043 | SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); |
24044 | if (T1.getValueType() == T2.getValueType() && |
24045 | |
24046 | T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ |
24047 | SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, |
24048 | CC, Cond); |
24049 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); |
24050 | } |
24051 | } |
24052 | |
24053 | |
24054 | |
24055 | |
24056 | |
24057 | |
24058 | |
24059 | if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) || |
24060 | (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && |
24061 | !MayFoldLoad(Op2))) { |
24062 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); |
24063 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); |
24064 | SDValue Ops[] = { Op2, Op1, CC, Cond }; |
24065 | SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops); |
24066 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); |
24067 | } |
24068 | |
24069 | |
24070 | |
24071 | SDValue Ops[] = { Op2, Op1, CC, Cond }; |
24072 | return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); |
24073 | } |
24074 | |
24075 | static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, |
24076 | const X86Subtarget &Subtarget, |
24077 | SelectionDAG &DAG) { |
24078 | MVT VT = Op->getSimpleValueType(0); |
24079 | SDValue In = Op->getOperand(0); |
24080 | MVT InVT = In.getSimpleValueType(); |
24081 | assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); |
24082 | MVT VTElt = VT.getVectorElementType(); |
24083 | SDLoc dl(Op); |
24084 | |
24085 | unsigned NumElts = VT.getVectorNumElements(); |
24086 | |
24087 | |
24088 | MVT ExtVT = VT; |
24089 | if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) { |
24090 | |
24091 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) |
24092 | return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG); |
24093 | |
24094 | ExtVT = MVT::getVectorVT(MVT::i32, NumElts); |
24095 | } |
24096 | |
24097 | |
24098 | MVT WideVT = ExtVT; |
24099 | if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { |
24100 | NumElts *= 512 / ExtVT.getSizeInBits(); |
24101 | InVT = MVT::getVectorVT(MVT::i1, NumElts); |
24102 | In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), |
24103 | In, DAG.getIntPtrConstant(0, dl)); |
24104 | WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); |
24105 | } |
24106 | |
24107 | SDValue V; |
24108 | MVT WideEltVT = WideVT.getVectorElementType(); |
24109 | if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) || |
24110 | (Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) { |
24111 | V = DAG.getNode(Op.getOpcode(), dl, WideVT, In); |
24112 | } else { |
24113 | SDValue NegOne = DAG.getConstant(-1, dl, WideVT); |
24114 | SDValue Zero = DAG.getConstant(0, dl, WideVT); |
24115 | V = DAG.getSelect(dl, WideVT, In, NegOne, Zero); |
24116 | } |
24117 | |
24118 | |
24119 | if (VT != ExtVT) { |
24120 | WideVT = MVT::getVectorVT(VTElt, NumElts); |
24121 | V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); |
24122 | } |
24123 | |
24124 | |
24125 | if (WideVT != VT) |
24126 | V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V, |
24127 | DAG.getIntPtrConstant(0, dl)); |
24128 | |
24129 | return V; |
24130 | } |
24131 | |
24132 | static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
24133 | SelectionDAG &DAG) { |
24134 | SDValue In = Op->getOperand(0); |
24135 | MVT InVT = In.getSimpleValueType(); |
24136 | |
24137 | if (InVT.getVectorElementType() == MVT::i1) |
24138 | return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); |
24139 | |
24140 | assert(Subtarget.hasAVX() && "Expected AVX support"); |
24141 | return LowerAVXExtend(Op, DAG, Subtarget); |
24142 | } |
24143 | |
24144 | |
24145 | |
24146 | |
24147 | |
24148 | static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, |
24149 | const X86Subtarget &Subtarget, |
24150 | SelectionDAG &DAG) { |
24151 | SDValue In = Op->getOperand(0); |
24152 | MVT VT = Op->getSimpleValueType(0); |
24153 | MVT InVT = In.getSimpleValueType(); |
24154 | |
24155 | MVT SVT = VT.getVectorElementType(); |
24156 | MVT InSVT = InVT.getVectorElementType(); |
24157 | assert(SVT.getFixedSizeInBits() > InSVT.getFixedSizeInBits()); |
24158 | |
24159 | if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) |
24160 | return SDValue(); |
24161 | if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) |
24162 | return SDValue(); |
24163 | if (!(VT.is128BitVector() && Subtarget.hasSSE2()) && |
24164 | !(VT.is256BitVector() && Subtarget.hasAVX()) && |
24165 | !(VT.is512BitVector() && Subtarget.hasAVX512())) |
24166 | return SDValue(); |
24167 | |
24168 | SDLoc dl(Op); |
24169 | unsigned Opc = Op.getOpcode(); |
24170 | unsigned NumElts = VT.getVectorNumElements(); |
24171 | |
24172 | |
24173 | |
24174 | if (InVT.getSizeInBits() > 128) { |
24175 | |
24176 | |
24177 | int InSize = InSVT.getSizeInBits() * NumElts; |
24178 | In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128)); |
24179 | InVT = In.getSimpleValueType(); |
24180 | } |
24181 | |
24182 | |
24183 | |
24184 | |
24185 | if (Subtarget.hasInt256()) { |
24186 | assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); |
24187 | |
24188 | if (InVT.getVectorNumElements() != NumElts) |
24189 | return DAG.getNode(Op.getOpcode(), dl, VT, In); |
24190 | |
24191 | |
24192 | |
24193 | unsigned ExtOpc = |
24194 | Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND |
24195 | : ISD::ZERO_EXTEND; |
24196 | return DAG.getNode(ExtOpc, dl, VT, In); |
24197 | } |
24198 | |
24199 | |
24200 | if (Subtarget.hasAVX()) { |
24201 | assert(VT.is256BitVector() && "256-bit vector expected"); |
24202 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
24203 | int HalfNumElts = HalfVT.getVectorNumElements(); |
24204 | |
24205 | unsigned NumSrcElts = InVT.getVectorNumElements(); |
24206 | SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef); |
24207 | for (int i = 0; i != HalfNumElts; ++i) |
24208 | HiMask[i] = HalfNumElts + i; |
24209 | |
24210 | SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In); |
24211 | SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask); |
24212 | Hi = DAG.getNode(Opc, dl, HalfVT, Hi); |
24213 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
24214 | } |
24215 | |
24216 | |
24217 | assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!"); |
24218 | assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs"); |
24219 | |
24220 | |
24221 | SDValue Curr = In; |
24222 | SDValue SignExt = Curr; |
24223 | |
24224 | |
24225 | |
24226 | if (InVT != MVT::v4i32) { |
24227 | MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT; |
24228 | |
24229 | unsigned DestWidth = DestVT.getScalarSizeInBits(); |
24230 | unsigned Scale = DestWidth / InSVT.getSizeInBits(); |
24231 | |
24232 | unsigned InNumElts = InVT.getVectorNumElements(); |
24233 | unsigned DestElts = DestVT.getVectorNumElements(); |
24234 | |
24235 | |
24236 | |
24237 | SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef); |
24238 | for (unsigned i = 0; i != DestElts; ++i) |
24239 | Mask[i * Scale + (Scale - 1)] = i; |
24240 | |
24241 | Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask); |
24242 | Curr = DAG.getBitcast(DestVT, Curr); |
24243 | |
24244 | unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); |
24245 | SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, |
24246 | DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); |
24247 | } |
24248 | |
24249 | if (VT == MVT::v2i64) { |
24250 | assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT"); |
24251 | SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); |
24252 | SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT); |
24253 | SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5}); |
24254 | SignExt = DAG.getBitcast(VT, SignExt); |
24255 | } |
24256 | |
24257 | return SignExt; |
24258 | } |
24259 | |
24260 | static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
24261 | SelectionDAG &DAG) { |
24262 | MVT VT = Op->getSimpleValueType(0); |
24263 | SDValue In = Op->getOperand(0); |
24264 | MVT InVT = In.getSimpleValueType(); |
24265 | SDLoc dl(Op); |
24266 | |
24267 | if (InVT.getVectorElementType() == MVT::i1) |
24268 | return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); |
24269 | |
24270 | assert(VT.isVector() && InVT.isVector() && "Expected vector type"); |
24271 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
24272 | "Expected same number of elements"); |
24273 | assert((VT.getVectorElementType() == MVT::i16 || |
24274 | VT.getVectorElementType() == MVT::i32 || |
24275 | VT.getVectorElementType() == MVT::i64) && |
24276 | "Unexpected element type"); |
24277 | assert((InVT.getVectorElementType() == MVT::i8 || |
24278 | InVT.getVectorElementType() == MVT::i16 || |
24279 | InVT.getVectorElementType() == MVT::i32) && |
24280 | "Unexpected element type"); |
24281 | |
24282 | if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { |
24283 | assert(InVT == MVT::v32i8 && "Unexpected VT!"); |
24284 | return splitVectorIntUnary(Op, DAG); |
24285 | } |
24286 | |
24287 | if (Subtarget.hasInt256()) |
24288 | return Op; |
24289 | |
24290 | |
24291 | |
24292 | |
24293 | |
24294 | |
24295 | |
24296 | |
24297 | |
24298 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
24299 | SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); |
24300 | |
24301 | unsigned NumElems = InVT.getVectorNumElements(); |
24302 | SmallVector<int,8> ShufMask(NumElems, -1); |
24303 | for (unsigned i = 0; i != NumElems/2; ++i) |
24304 | ShufMask[i] = i + NumElems/2; |
24305 | |
24306 | SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); |
24307 | OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi); |
24308 | |
24309 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); |
24310 | } |
24311 | |
24312 | |
24313 | static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { |
24314 | SDValue StoredVal = Store->getValue(); |
24315 | assert((StoredVal.getValueType().is256BitVector() || |
24316 | StoredVal.getValueType().is512BitVector()) && |
24317 | "Expecting 256/512-bit op"); |
24318 | |
24319 | |
24320 | |
24321 | |
24322 | |
24323 | |
24324 | if (!Store->isSimple()) |
24325 | return SDValue(); |
24326 | |
24327 | SDLoc DL(Store); |
24328 | SDValue Value0, Value1; |
24329 | std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL); |
24330 | unsigned HalfOffset = Value0.getValueType().getStoreSize(); |
24331 | SDValue Ptr0 = Store->getBasePtr(); |
24332 | SDValue Ptr1 = |
24333 | DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(HalfOffset), DL); |
24334 | SDValue Ch0 = |
24335 | DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(), |
24336 | Store->getOriginalAlign(), |
24337 | Store->getMemOperand()->getFlags()); |
24338 | SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1, |
24339 | Store->getPointerInfo().getWithOffset(HalfOffset), |
24340 | Store->getOriginalAlign(), |
24341 | Store->getMemOperand()->getFlags()); |
24342 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1); |
24343 | } |
24344 | |
24345 | |
24346 | |
24347 | static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, |
24348 | SelectionDAG &DAG) { |
24349 | SDValue StoredVal = Store->getValue(); |
24350 | assert(StoreVT.is128BitVector() && |
24351 | StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op"); |
24352 | StoredVal = DAG.getBitcast(StoreVT, StoredVal); |
24353 | |
24354 | |
24355 | |
24356 | |
24357 | if (!Store->isSimple()) |
24358 | return SDValue(); |
24359 | |
24360 | MVT StoreSVT = StoreVT.getScalarType(); |
24361 | unsigned NumElems = StoreVT.getVectorNumElements(); |
24362 | unsigned ScalarSize = StoreSVT.getStoreSize(); |
24363 | |
24364 | SDLoc DL(Store); |
24365 | SmallVector<SDValue, 4> Stores; |
24366 | for (unsigned i = 0; i != NumElems; ++i) { |
24367 | unsigned Offset = i * ScalarSize; |
24368 | SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), |
24369 | TypeSize::Fixed(Offset), DL); |
24370 | SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal, |
24371 | DAG.getIntPtrConstant(i, DL)); |
24372 | SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr, |
24373 | Store->getPointerInfo().getWithOffset(Offset), |
24374 | Store->getOriginalAlign(), |
24375 | Store->getMemOperand()->getFlags()); |
24376 | Stores.push_back(Ch); |
24377 | } |
24378 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); |
24379 | } |
24380 | |
24381 | static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, |
24382 | SelectionDAG &DAG) { |
24383 | StoreSDNode *St = cast<StoreSDNode>(Op.getNode()); |
24384 | SDLoc dl(St); |
24385 | SDValue StoredVal = St->getValue(); |
24386 | |
24387 | |
24388 | if (StoredVal.getValueType().isVector() && |
24389 | StoredVal.getValueType().getVectorElementType() == MVT::i1) { |
24390 | unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); |
24391 | assert(NumElts <= 8 && "Unexpected VT"); |
24392 | assert(!St->isTruncatingStore() && "Expected non-truncating store"); |
24393 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && |
24394 | "Expected AVX512F without AVX512DQI"); |
24395 | |
24396 | |
24397 | StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, |
24398 | DAG.getUNDEF(MVT::v16i1), StoredVal, |
24399 | DAG.getIntPtrConstant(0, dl)); |
24400 | StoredVal = DAG.getBitcast(MVT::i16, StoredVal); |
24401 | StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); |
24402 | |
24403 | if (NumElts < 8) |
24404 | StoredVal = DAG.getZeroExtendInReg( |
24405 | StoredVal, dl, EVT::getIntegerVT(*DAG.getContext(), NumElts)); |
24406 | |
24407 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
24408 | St->getPointerInfo(), St->getOriginalAlign(), |
24409 | St->getMemOperand()->getFlags()); |
24410 | } |
24411 | |
24412 | if (St->isTruncatingStore()) |
24413 | return SDValue(); |
24414 | |
24415 | |
24416 | |
24417 | |
24418 | |
24419 | MVT StoreVT = StoredVal.getSimpleValueType(); |
24420 | if (StoreVT.is256BitVector() || |
24421 | ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) && |
24422 | !Subtarget.hasBWI())) { |
24423 | SmallVector<SDValue, 4> CatOps; |
24424 | if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps)) |
24425 | return splitVectorStore(St, DAG); |
24426 | return SDValue(); |
24427 | } |
24428 | |
24429 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
24430 | assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && |
24431 | "Unexpected VT"); |
24432 | assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == |
24433 | TargetLowering::TypeWidenVector && "Unexpected type action!"); |
24434 | |
24435 | EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); |
24436 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, |
24437 | DAG.getUNDEF(StoreVT)); |
24438 | |
24439 | if (Subtarget.hasSSE2()) { |
24440 | |
24441 | |
24442 | MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64; |
24443 | MVT CastVT = MVT::getVectorVT(StVT, 2); |
24444 | StoredVal = DAG.getBitcast(CastVT, StoredVal); |
24445 | StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal, |
24446 | DAG.getIntPtrConstant(0, dl)); |
24447 | |
24448 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
24449 | St->getPointerInfo(), St->getOriginalAlign(), |
24450 | St->getMemOperand()->getFlags()); |
24451 | } |
24452 | assert(Subtarget.hasSSE1() && "Expected SSE"); |
24453 | SDVTList Tys = DAG.getVTList(MVT::Other); |
24454 | SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()}; |
24455 | return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64, |
24456 | St->getMemOperand()); |
24457 | } |
24458 | |
24459 | |
24460 | |
24461 | |
24462 | |
24463 | |
24464 | |
24465 | |
24466 | static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, |
24467 | SelectionDAG &DAG) { |
24468 | MVT RegVT = Op.getSimpleValueType(); |
24469 | assert(RegVT.isVector() && "We only custom lower vector loads."); |
24470 | assert(RegVT.isInteger() && |
24471 | "We only custom lower integer vector loads."); |
24472 | |
24473 | LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode()); |
24474 | SDLoc dl(Ld); |
24475 | |
24476 | |
24477 | if (RegVT.getVectorElementType() == MVT::i1) { |
24478 | assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); |
24479 | assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); |
24480 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && |
24481 | "Expected AVX512F without AVX512DQI"); |
24482 | |
24483 | SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), |
24484 | Ld->getPointerInfo(), Ld->getOriginalAlign(), |
24485 | Ld->getMemOperand()->getFlags()); |
24486 | |
24487 | |
24488 | assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); |
24489 | |
24490 | SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd); |
24491 | Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, |
24492 | DAG.getBitcast(MVT::v16i1, Val), |
24493 | DAG.getIntPtrConstant(0, dl)); |
24494 | return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); |
24495 | } |
24496 | |
24497 | return SDValue(); |
24498 | } |
24499 | |
24500 | |
24501 | |
24502 | static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { |
24503 | Opc = Op.getOpcode(); |
24504 | if (Opc != ISD::OR && Opc != ISD::AND) |
24505 | return false; |
24506 | return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && |
24507 | Op.getOperand(0).hasOneUse() && |
24508 | Op.getOperand(1).getOpcode() == X86ISD::SETCC && |
24509 | Op.getOperand(1).hasOneUse()); |
24510 | } |
24511 | |
24512 | SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
24513 | SDValue Chain = Op.getOperand(0); |
24514 | SDValue Cond = Op.getOperand(1); |
24515 | SDValue Dest = Op.getOperand(2); |
24516 | SDLoc dl(Op); |
24517 | |
24518 | if (Cond.getOpcode() == ISD::SETCC && |
24519 | Cond.getOperand(0).getValueType() != MVT::f128) { |
24520 | SDValue LHS = Cond.getOperand(0); |
24521 | SDValue RHS = Cond.getOperand(1); |
24522 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
24523 | |
24524 | |
24525 | |
24526 | |
24527 | if (ISD::isOverflowIntrOpRes(LHS) && |
24528 | (CC == ISD::SETEQ || CC == ISD::SETNE) && |
24529 | (isNullConstant(RHS) || isOneConstant(RHS))) { |
24530 | SDValue Value, Overflow; |
24531 | X86::CondCode X86Cond; |
24532 | std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, LHS.getValue(0), DAG); |
24533 | |
24534 | if ((CC == ISD::SETEQ) == isNullConstant(RHS)) |
24535 | X86Cond = X86::GetOppositeBranchCondition(X86Cond); |
24536 | |
24537 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
24538 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24539 | Overflow); |
24540 | } |
24541 | |
24542 | if (LHS.getSimpleValueType().isInteger()) { |
24543 | SDValue CCVal; |
24544 | SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, CC, SDLoc(Cond), DAG, CCVal); |
24545 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24546 | EFLAGS); |
24547 | } |
24548 | |
24549 | if (CC == ISD::SETOEQ) { |
24550 | |
24551 | |
24552 | |
24553 | |
24554 | |
24555 | if (Op.getNode()->hasOneUse()) { |
24556 | SDNode *User = *Op.getNode()->use_begin(); |
24557 | |
24558 | |
24559 | |
24560 | if (User->getOpcode() == ISD::BR) { |
24561 | SDValue FalseBB = User->getOperand(1); |
24562 | SDNode *NewBR = |
24563 | DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); |
24564 | assert(NewBR == User); |
24565 | (void)NewBR; |
24566 | Dest = FalseBB; |
24567 | |
24568 | SDValue Cmp = |
24569 | DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
24570 | SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); |
24571 | Chain = DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, |
24572 | CCVal, Cmp); |
24573 | CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); |
24574 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24575 | Cmp); |
24576 | } |
24577 | } |
24578 | } else if (CC == ISD::SETUNE) { |
24579 | |
24580 | |
24581 | |
24582 | SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
24583 | SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); |
24584 | Chain = |
24585 | DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp); |
24586 | CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); |
24587 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24588 | Cmp); |
24589 | } else { |
24590 | X86::CondCode X86Cond = |
24591 | TranslateX86CC(CC, dl, true, LHS, RHS, DAG); |
24592 | SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
24593 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
24594 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24595 | Cmp); |
24596 | } |
24597 | } |
24598 | |
24599 | if (ISD::isOverflowIntrOpRes(Cond)) { |
24600 | SDValue Value, Overflow; |
24601 | X86::CondCode X86Cond; |
24602 | std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); |
24603 | |
24604 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
24605 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24606 | Overflow); |
24607 | } |
24608 | |
24609 | |
24610 | if (isTruncWithZeroHighBitsInput(Cond, DAG)) |
24611 | Cond = Cond.getOperand(0); |
24612 | |
24613 | EVT CondVT = Cond.getValueType(); |
24614 | |
24615 | |
24616 | if (!(Cond.getOpcode() == ISD::AND && isOneConstant(Cond.getOperand(1)))) |
24617 | Cond = |
24618 | DAG.getNode(ISD::AND, dl, CondVT, Cond, DAG.getConstant(1, dl, CondVT)); |
24619 | |
24620 | SDValue LHS = Cond; |
24621 | SDValue RHS = DAG.getConstant(0, dl, CondVT); |
24622 | |
24623 | SDValue CCVal; |
24624 | SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, ISD::SETNE, dl, DAG, CCVal); |
24625 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
24626 | EFLAGS); |
24627 | } |
24628 | |
24629 | |
24630 | |
24631 | |
24632 | |
24633 | |
24634 | SDValue |
24635 | X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, |
24636 | SelectionDAG &DAG) const { |
24637 | MachineFunction &MF = DAG.getMachineFunction(); |
24638 | bool SplitStack = MF.shouldSplitStack(); |
24639 | bool EmitStackProbeCall = hasStackProbeSymbol(MF); |
24640 | bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || |
24641 | SplitStack || EmitStackProbeCall; |
24642 | SDLoc dl(Op); |
24643 | |
24644 | |
24645 | SDNode *Node = Op.getNode(); |
24646 | SDValue Chain = Op.getOperand(0); |
24647 | SDValue Size = Op.getOperand(1); |
24648 | MaybeAlign Alignment(Op.getConstantOperandVal(2)); |
24649 | EVT VT = Node->getValueType(0); |
24650 | |
24651 | |
24652 | |
24653 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); |
24654 | |
24655 | bool Is64Bit = Subtarget.is64Bit(); |
24656 | MVT SPTy = getPointerTy(DAG.getDataLayout()); |
24657 | |
24658 | SDValue Result; |
24659 | if (!Lower) { |
24660 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
24661 | Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); |
24662 | assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" |
24663 | " not tell us which reg is the stack pointer!"); |
24664 | |
24665 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
24666 | const Align StackAlign = TFI.getStackAlign(); |
24667 | if (hasInlineStackProbe(MF)) { |
24668 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
24669 | |
24670 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
24671 | Register Vreg = MRI.createVirtualRegister(AddrRegClass); |
24672 | Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); |
24673 | Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain, |
24674 | DAG.getRegister(Vreg, SPTy)); |
24675 | } else { |
24676 | SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); |
24677 | Chain = SP.getValue(1); |
24678 | Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); |
24679 | } |
24680 | if (Alignment && *Alignment > StackAlign) |
24681 | Result = |
24682 | DAG.getNode(ISD::AND, dl, VT, Result, |
24683 | DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); |
24684 | Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); |
24685 | } else if (SplitStack) { |
24686 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
24687 | |
24688 | if (Is64Bit) { |
24689 | |
24690 | |
24691 | const Function &F = MF.getFunction(); |
24692 | for (const auto &A : F.args()) { |
24693 | if (A.hasNestAttr()) |
24694 | report_fatal_error("Cannot use segmented stacks with functions that " |
24695 | "have nested arguments."); |
24696 | } |
24697 | } |
24698 | |
24699 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
24700 | Register Vreg = MRI.createVirtualRegister(AddrRegClass); |
24701 | Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); |
24702 | Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, |
24703 | DAG.getRegister(Vreg, SPTy)); |
24704 | } else { |
24705 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
24706 | Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); |
24707 | MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true); |
24708 | |
24709 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
24710 | Register SPReg = RegInfo->getStackRegister(); |
24711 | SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); |
24712 | Chain = SP.getValue(1); |
24713 | |
24714 | if (Alignment) { |
24715 | SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), |
24716 | DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); |
24717 | Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); |
24718 | } |
24719 | |
24720 | Result = SP; |
24721 | } |
24722 | |
24723 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), |
24724 | DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); |
24725 | |
24726 | SDValue Ops[2] = {Result, Chain}; |
24727 | return DAG.getMergeValues(Ops, dl); |
24728 | } |
24729 | |
24730 | SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
24731 | MachineFunction &MF = DAG.getMachineFunction(); |
24732 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
24733 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
24734 | |
24735 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
24736 | SDLoc DL(Op); |
24737 | |
24738 | if (!Subtarget.is64Bit() || |
24739 | Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) { |
24740 | |
24741 | |
24742 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
24743 | return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), |
24744 | MachinePointerInfo(SV)); |
24745 | } |
24746 | |
24747 | |
24748 | |
24749 | |
24750 | |
24751 | |
24752 | SmallVector<SDValue, 8> MemOps; |
24753 | SDValue FIN = Op.getOperand(1); |
24754 | |
24755 | SDValue Store = DAG.getStore( |
24756 | Op.getOperand(0), DL, |
24757 | DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN, |
24758 | MachinePointerInfo(SV)); |
24759 | MemOps.push_back(Store); |
24760 | |
24761 | |
24762 | FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::Fixed(4), DL); |
24763 | Store = DAG.getStore( |
24764 | Op.getOperand(0), DL, |
24765 | DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, |
24766 | MachinePointerInfo(SV, 4)); |
24767 | MemOps.push_back(Store); |
24768 | |
24769 | |
24770 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); |
24771 | SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
24772 | Store = |
24773 | DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8)); |
24774 | MemOps.push_back(Store); |
24775 | |
24776 | |
24777 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( |
24778 | Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); |
24779 | SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); |
24780 | Store = DAG.getStore( |
24781 | Op.getOperand(0), DL, RSFIN, FIN, |
24782 | MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12)); |
24783 | MemOps.push_back(Store); |
24784 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
24785 | } |
24786 | |
24787 | SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
24788 | assert(Subtarget.is64Bit() && |
24789 | "LowerVAARG only handles 64-bit va_arg!"); |
24790 | assert(Op.getNumOperands() == 4); |
24791 | |
24792 | MachineFunction &MF = DAG.getMachineFunction(); |
24793 | if (Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) |
24794 | |
24795 | return DAG.expandVAArg(Op.getNode()); |
24796 | |
24797 | SDValue Chain = Op.getOperand(0); |
24798 | SDValue SrcPtr = Op.getOperand(1); |
24799 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
24800 | unsigned Align = Op.getConstantOperandVal(3); |
24801 | SDLoc dl(Op); |
24802 | |
24803 | EVT ArgVT = Op.getNode()->getValueType(0); |
24804 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
24805 | uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); |
24806 | uint8_t ArgMode; |
24807 | |
24808 | |
24809 | |
24810 | |
24811 | assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented"); |
24812 | if (ArgVT.isFloatingPoint() && ArgSize <= 16 ) { |
24813 | ArgMode = 2; |
24814 | } else { |
24815 | assert(ArgVT.isInteger() && ArgSize <= 32 && |
24816 | "Unhandled argument type in LowerVAARG"); |
24817 | ArgMode = 1; |
24818 | } |
24819 | |
24820 | if (ArgMode == 2) { |
24821 | |
24822 | assert(!Subtarget.useSoftFloat() && |
24823 | !(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) && |
24824 | Subtarget.hasSSE1()); |
24825 | } |
24826 | |
24827 | |
24828 | |
24829 | SDValue InstOps[] = {Chain, SrcPtr, |
24830 | DAG.getTargetConstant(ArgSize, dl, MVT::i32), |
24831 | DAG.getTargetConstant(ArgMode, dl, MVT::i8), |
24832 | DAG.getTargetConstant(Align, dl, MVT::i32)}; |
24833 | SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); |
24834 | SDValue VAARG = DAG.getMemIntrinsicNode( |
24835 | Subtarget.isTarget64BitLP64() ? X86ISD::VAARG_64 : X86ISD::VAARG_X32, dl, |
24836 | VTs, InstOps, MVT::i64, MachinePointerInfo(SV), |
24837 | None, |
24838 | MachineMemOperand::MOLoad | MachineMemOperand::MOStore); |
24839 | Chain = VAARG.getValue(1); |
24840 | |
24841 | |
24842 | return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo()); |
24843 | } |
24844 | |
24845 | static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, |
24846 | SelectionDAG &DAG) { |
24847 | |
24848 | |
24849 | assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); |
24850 | if (Subtarget.isCallingConvWin64( |
24851 | DAG.getMachineFunction().getFunction().getCallingConv())) |
24852 | |
24853 | return DAG.expandVACopy(Op.getNode()); |
24854 | |
24855 | SDValue Chain = Op.getOperand(0); |
24856 | SDValue DstPtr = Op.getOperand(1); |
24857 | SDValue SrcPtr = Op.getOperand(2); |
24858 | const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); |
24859 | const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
24860 | SDLoc DL(Op); |
24861 | |
24862 | return DAG.getMemcpy( |
24863 | Chain, DL, DstPtr, SrcPtr, |
24864 | DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 24 : 16, DL), |
24865 | Align(Subtarget.isTarget64BitLP64() ? 8 : 4), false, false, |
24866 | false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); |
24867 | } |
24868 | |
24869 | |
24870 | static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) { |
24871 | switch (Opc) { |
24872 | case ISD::SHL: |
24873 | case X86ISD::VSHL: |
24874 | case X86ISD::VSHLI: |
24875 | return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI; |
24876 | case ISD::SRL: |
24877 | case X86ISD::VSRL: |
24878 | case X86ISD::VSRLI: |
24879 | return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI; |
24880 | case ISD::SRA: |
24881 | case X86ISD::VSRA: |
24882 | case X86ISD::VSRAI: |
24883 | return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI; |
24884 | } |
24885 | llvm_unreachable("Unknown target vector shift node"); |
24886 | } |
24887 | |
24888 | |
24889 | |
24890 | static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, |
24891 | SDValue SrcOp, uint64_t ShiftAmt, |
24892 | SelectionDAG &DAG) { |
24893 | MVT ElementType = VT.getVectorElementType(); |
24894 | |
24895 | |
24896 | |
24897 | if (VT != SrcOp.getSimpleValueType()) |
24898 | SrcOp = DAG.getBitcast(VT, SrcOp); |
24899 | |
24900 | |
24901 | if (ShiftAmt == 0) |
24902 | return SrcOp; |
24903 | |
24904 | |
24905 | if (ShiftAmt >= ElementType.getSizeInBits()) { |
24906 | if (Opc == X86ISD::VSRAI) |
24907 | ShiftAmt = ElementType.getSizeInBits() - 1; |
24908 | else |
24909 | return DAG.getConstant(0, dl, VT); |
24910 | } |
24911 | |
24912 | assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) |
24913 | && "Unknown target vector shift-by-constant node"); |
24914 | |
24915 | |
24916 | |
24917 | if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { |
24918 | SmallVector<SDValue, 8> Elts; |
24919 | unsigned NumElts = SrcOp->getNumOperands(); |
24920 | |
24921 | switch (Opc) { |
24922 | default: llvm_unreachable("Unknown opcode!"); |
24923 | case X86ISD::VSHLI: |
24924 | for (unsigned i = 0; i != NumElts; ++i) { |
24925 | SDValue CurrentOp = SrcOp->getOperand(i); |
24926 | if (CurrentOp->isUndef()) { |
24927 | |
24928 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
24929 | continue; |
24930 | } |
24931 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
24932 | const APInt &C = ND->getAPIntValue(); |
24933 | Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); |
24934 | } |
24935 | break; |
24936 | case X86ISD::VSRLI: |
24937 | for (unsigned i = 0; i != NumElts; ++i) { |
24938 | SDValue CurrentOp = SrcOp->getOperand(i); |
24939 | if (CurrentOp->isUndef()) { |
24940 | |
24941 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
24942 | continue; |
24943 | } |
24944 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
24945 | const APInt &C = ND->getAPIntValue(); |
24946 | Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); |
24947 | } |
24948 | break; |
24949 | case X86ISD::VSRAI: |
24950 | for (unsigned i = 0; i != NumElts; ++i) { |
24951 | SDValue CurrentOp = SrcOp->getOperand(i); |
24952 | if (CurrentOp->isUndef()) { |
24953 | |
24954 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
24955 | continue; |
24956 | } |
24957 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
24958 | const APInt &C = ND->getAPIntValue(); |
24959 | Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); |
24960 | } |
24961 | break; |
24962 | } |
24963 | |
24964 | return DAG.getBuildVector(VT, dl, Elts); |
24965 | } |
24966 | |
24967 | return DAG.getNode(Opc, dl, VT, SrcOp, |
24968 | DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); |
24969 | } |
24970 | |
24971 | |
24972 | |
24973 | static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, |
24974 | SDValue SrcOp, SDValue ShAmt, |
24975 | const X86Subtarget &Subtarget, |
24976 | SelectionDAG &DAG) { |
24977 | MVT SVT = ShAmt.getSimpleValueType(); |
24978 | assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); |
24979 | |
24980 | |
24981 | if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt)) |
24982 | return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp, |
24983 | CShAmt->getZExtValue(), DAG); |
24984 | |
24985 | |
24986 | Opc = getTargetVShiftUniformOpcode(Opc, true); |
24987 | |
24988 | |
24989 | |
24990 | |
24991 | |
24992 | |
24993 | |
24994 | |
24995 | |
24996 | |
24997 | |
24998 | |
24999 | |
25000 | if (SVT == MVT::i64) |
25001 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); |
25002 | else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && |
25003 | ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
25004 | (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || |
25005 | ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { |
25006 | ShAmt = ShAmt.getOperand(0); |
25007 | MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; |
25008 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); |
25009 | if (Subtarget.hasSSE41()) |
25010 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), |
25011 | MVT::v2i64, ShAmt); |
25012 | else { |
25013 | SDValue ByteShift = DAG.getTargetConstant( |
25014 | (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); |
25015 | ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); |
25016 | ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, |
25017 | ByteShift); |
25018 | ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, |
25019 | ByteShift); |
25020 | } |
25021 | } else if (Subtarget.hasSSE41() && |
25022 | ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
25023 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); |
25024 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), |
25025 | MVT::v2i64, ShAmt); |
25026 | } else { |
25027 | SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), |
25028 | DAG.getUNDEF(SVT)}; |
25029 | ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); |
25030 | } |
25031 | |
25032 | |
25033 | |
25034 | MVT EltVT = VT.getVectorElementType(); |
25035 | MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits()); |
25036 | |
25037 | ShAmt = DAG.getBitcast(ShVT, ShAmt); |
25038 | return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); |
25039 | } |
25040 | |
25041 | |
25042 | |
25043 | static SDValue getMaskNode(SDValue Mask, MVT MaskVT, |
25044 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
25045 | const SDLoc &dl) { |
25046 | |
25047 | if (isAllOnesConstant(Mask)) |
25048 | return DAG.getConstant(1, dl, MaskVT); |
25049 | if (X86::isZeroNode(Mask)) |
25050 | return DAG.getConstant(0, dl, MaskVT); |
25051 | |
25052 | assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!"); |
25053 | |
25054 | if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) { |
25055 | assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!"); |
25056 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); |
25057 | |
25058 | SDValue Lo, Hi; |
25059 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, |
25060 | DAG.getConstant(0, dl, MVT::i32)); |
25061 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, |
25062 | DAG.getConstant(1, dl, MVT::i32)); |
25063 | |
25064 | Lo = DAG.getBitcast(MVT::v32i1, Lo); |
25065 | Hi = DAG.getBitcast(MVT::v32i1, Hi); |
25066 | |
25067 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); |
25068 | } else { |
25069 | MVT BitcastVT = MVT::getVectorVT(MVT::i1, |
25070 | Mask.getSimpleValueType().getSizeInBits()); |
25071 | |
25072 | |
25073 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, |
25074 | DAG.getBitcast(BitcastVT, Mask), |
25075 | DAG.getIntPtrConstant(0, dl)); |
25076 | } |
25077 | } |
25078 | |
25079 | |
25080 | |
25081 | |
25082 | static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, |
25083 | SDValue PreservedSrc, |
25084 | const X86Subtarget &Subtarget, |
25085 | SelectionDAG &DAG) { |
25086 | MVT VT = Op.getSimpleValueType(); |
25087 | MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); |
25088 | unsigned OpcodeSelect = ISD::VSELECT; |
25089 | SDLoc dl(Op); |
25090 | |
25091 | if (isAllOnesConstant(Mask)) |
25092 | return Op; |
25093 | |
25094 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
25095 | |
25096 | if (PreservedSrc.isUndef()) |
25097 | PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); |
25098 | return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); |
25099 | } |
25100 | |
25101 | |
25102 | |
25103 | |
25104 | |
25105 | |
25106 | |
25107 | |
25108 | static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, |
25109 | SDValue PreservedSrc, |
25110 | const X86Subtarget &Subtarget, |
25111 | SelectionDAG &DAG) { |
25112 | |
25113 | if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask)) |
25114 | if (MaskConst->getZExtValue() & 0x1) |
25115 | return Op; |
25116 | |
25117 | MVT VT = Op.getSimpleValueType(); |
25118 | SDLoc dl(Op); |
25119 | |
25120 | assert(Mask.getValueType() == MVT::i8 && "Unexpect type"); |
25121 | SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1, |
25122 | DAG.getBitcast(MVT::v8i1, Mask), |
25123 | DAG.getIntPtrConstant(0, dl)); |
25124 | if (Op.getOpcode() == X86ISD::FSETCCM || |
25125 | Op.getOpcode() == X86ISD::FSETCCM_SAE || |
25126 | Op.getOpcode() == X86ISD::VFPCLASSS) |
25127 | return DAG.getNode(ISD::AND, dl, VT, Op, IMask); |
25128 | |
25129 | if (PreservedSrc.isUndef()) |
25130 | PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); |
25131 | return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc); |
25132 | } |
25133 | |
25134 | static int getSEHRegistrationNodeSize(const Function *Fn) { |
25135 | if (!Fn->hasPersonalityFn()) |
25136 | report_fatal_error( |
25137 | "querying registration node size for function without personality"); |
25138 | |
25139 | |
25140 | switch (classifyEHPersonality(Fn->getPersonalityFn())) { |
25141 | case EHPersonality::MSVC_X86SEH: return 24; |
25142 | case EHPersonality::MSVC_CXX: return 16; |
25143 | default: break; |
25144 | } |
25145 | report_fatal_error( |
25146 | "can only recover FP for 32-bit MSVC EH personality functions"); |
25147 | } |
25148 | |
25149 | |
25150 | |
25151 | |
25152 | |
25153 | |
25154 | |
25155 | |
25156 | |
25157 | static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, |
25158 | SDValue EntryEBP) { |
25159 | MachineFunction &MF = DAG.getMachineFunction(); |
25160 | SDLoc dl; |
25161 | |
25162 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
25163 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
25164 | |
25165 | |
25166 | |
25167 | |
25168 | if (!Fn->hasPersonalityFn()) |
25169 | return EntryEBP; |
25170 | |
25171 | |
25172 | |
25173 | MCSymbol *OffsetSym = |
25174 | MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( |
25175 | GlobalValue::dropLLVMManglingEscape(Fn->getName())); |
25176 | SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); |
25177 | SDValue ParentFrameOffset = |
25178 | DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); |
25179 | |
25180 | |
25181 | |
25182 | const X86Subtarget &Subtarget = |
25183 | static_cast<const X86Subtarget &>(DAG.getSubtarget()); |
25184 | if (Subtarget.is64Bit()) |
25185 | return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); |
25186 | |
25187 | int RegNodeSize = getSEHRegistrationNodeSize(Fn); |
25188 | |
25189 | |
25190 | SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, |
25191 | DAG.getConstant(RegNodeSize, dl, PtrVT)); |
25192 | return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); |
25193 | } |
25194 | |
25195 | SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
25196 | SelectionDAG &DAG) const { |
25197 | |
25198 | auto isRoundModeCurDirection = [](SDValue Rnd) { |
25199 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) |
25200 | return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; |
25201 | |
25202 | return false; |
25203 | }; |
25204 | auto isRoundModeSAE = [](SDValue Rnd) { |
25205 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) { |
25206 | unsigned RC = C->getZExtValue(); |
25207 | if (RC & X86::STATIC_ROUNDING::NO_EXC) { |
25208 | |
25209 | RC ^= X86::STATIC_ROUNDING::NO_EXC; |
25210 | |
25211 | |
25212 | return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION; |
25213 | } |
25214 | } |
25215 | |
25216 | return false; |
25217 | }; |
25218 | auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) { |
25219 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) { |
25220 | RC = C->getZExtValue(); |
25221 | if (RC & X86::STATIC_ROUNDING::NO_EXC) { |
25222 | |
25223 | RC ^= X86::STATIC_ROUNDING::NO_EXC; |
25224 | return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT || |
25225 | RC == X86::STATIC_ROUNDING::TO_NEG_INF || |
25226 | RC == X86::STATIC_ROUNDING::TO_POS_INF || |
25227 | RC == X86::STATIC_ROUNDING::TO_ZERO; |
25228 | } |
25229 | } |
25230 | |
25231 | return false; |
25232 | }; |
25233 | |
25234 | SDLoc dl(Op); |
25235 | unsigned IntNo = Op.getConstantOperandVal(0); |
25236 | MVT VT = Op.getSimpleValueType(); |
25237 | const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); |
25238 | |
25239 | |
25240 | SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags()); |
25241 | |
25242 | if (IntrData) { |
25243 | switch(IntrData->Type) { |
25244 | case INTR_TYPE_1OP: { |
25245 | |
25246 | |
25247 | |
25248 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25249 | if (IntrWithRoundingModeOpcode != 0) { |
25250 | SDValue Rnd = Op.getOperand(2); |
25251 | unsigned RC = 0; |
25252 | if (isRoundModeSAEToX(Rnd, RC)) |
25253 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25254 | Op.getOperand(1), |
25255 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25256 | if (!isRoundModeCurDirection(Rnd)) |
25257 | return SDValue(); |
25258 | } |
25259 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25260 | Op.getOperand(1)); |
25261 | } |
25262 | case INTR_TYPE_1OP_SAE: { |
25263 | SDValue Sae = Op.getOperand(2); |
25264 | |
25265 | unsigned Opc; |
25266 | if (isRoundModeCurDirection(Sae)) |
25267 | Opc = IntrData->Opc0; |
25268 | else if (isRoundModeSAE(Sae)) |
25269 | Opc = IntrData->Opc1; |
25270 | else |
25271 | return SDValue(); |
25272 | |
25273 | return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1)); |
25274 | } |
25275 | case INTR_TYPE_2OP: { |
25276 | SDValue Src2 = Op.getOperand(2); |
25277 | |
25278 | |
25279 | |
25280 | |
25281 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25282 | if (IntrWithRoundingModeOpcode != 0) { |
25283 | SDValue Rnd = Op.getOperand(3); |
25284 | unsigned RC = 0; |
25285 | if (isRoundModeSAEToX(Rnd, RC)) |
25286 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25287 | Op.getOperand(1), Src2, |
25288 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25289 | if (!isRoundModeCurDirection(Rnd)) |
25290 | return SDValue(); |
25291 | } |
25292 | |
25293 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25294 | Op.getOperand(1), Src2); |
25295 | } |
25296 | case INTR_TYPE_2OP_SAE: { |
25297 | SDValue Sae = Op.getOperand(3); |
25298 | |
25299 | unsigned Opc; |
25300 | if (isRoundModeCurDirection(Sae)) |
25301 | Opc = IntrData->Opc0; |
25302 | else if (isRoundModeSAE(Sae)) |
25303 | Opc = IntrData->Opc1; |
25304 | else |
25305 | return SDValue(); |
25306 | |
25307 | return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), |
25308 | Op.getOperand(2)); |
25309 | } |
25310 | case INTR_TYPE_3OP: |
25311 | case INTR_TYPE_3OP_IMM8: { |
25312 | SDValue Src1 = Op.getOperand(1); |
25313 | SDValue Src2 = Op.getOperand(2); |
25314 | SDValue Src3 = Op.getOperand(3); |
25315 | |
25316 | if (IntrData->Type == INTR_TYPE_3OP_IMM8 && |
25317 | Src3.getValueType() != MVT::i8) { |
25318 | Src3 = DAG.getTargetConstant( |
25319 | cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8); |
25320 | } |
25321 | |
25322 | |
25323 | |
25324 | |
25325 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25326 | if (IntrWithRoundingModeOpcode != 0) { |
25327 | SDValue Rnd = Op.getOperand(4); |
25328 | unsigned RC = 0; |
25329 | if (isRoundModeSAEToX(Rnd, RC)) |
25330 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25331 | Src1, Src2, Src3, |
25332 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25333 | if (!isRoundModeCurDirection(Rnd)) |
25334 | return SDValue(); |
25335 | } |
25336 | |
25337 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25338 | {Src1, Src2, Src3}); |
25339 | } |
25340 | case INTR_TYPE_4OP_IMM8: { |
25341 | assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant); |
25342 | SDValue Src4 = Op.getOperand(4); |
25343 | if (Src4.getValueType() != MVT::i8) { |
25344 | Src4 = DAG.getTargetConstant( |
25345 | cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8); |
25346 | } |
25347 | |
25348 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25349 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), |
25350 | Src4); |
25351 | } |
25352 | case INTR_TYPE_1OP_MASK: { |
25353 | SDValue Src = Op.getOperand(1); |
25354 | SDValue PassThru = Op.getOperand(2); |
25355 | SDValue Mask = Op.getOperand(3); |
25356 | |
25357 | |
25358 | |
25359 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25360 | if (IntrWithRoundingModeOpcode != 0) { |
25361 | SDValue Rnd = Op.getOperand(4); |
25362 | unsigned RC = 0; |
25363 | if (isRoundModeSAEToX(Rnd, RC)) |
25364 | return getVectorMaskingNode( |
25365 | DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
25366 | Src, DAG.getTargetConstant(RC, dl, MVT::i32)), |
25367 | Mask, PassThru, Subtarget, DAG); |
25368 | if (!isRoundModeCurDirection(Rnd)) |
25369 | return SDValue(); |
25370 | } |
25371 | return getVectorMaskingNode( |
25372 | DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, |
25373 | Subtarget, DAG); |
25374 | } |
25375 | case INTR_TYPE_1OP_MASK_SAE: { |
25376 | SDValue Src = Op.getOperand(1); |
25377 | SDValue PassThru = Op.getOperand(2); |
25378 | SDValue Mask = Op.getOperand(3); |
25379 | SDValue Rnd = Op.getOperand(4); |
25380 | |
25381 | unsigned Opc; |
25382 | if (isRoundModeCurDirection(Rnd)) |
25383 | Opc = IntrData->Opc0; |
25384 | else if (isRoundModeSAE(Rnd)) |
25385 | Opc = IntrData->Opc1; |
25386 | else |
25387 | return SDValue(); |
25388 | |
25389 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru, |
25390 | Subtarget, DAG); |
25391 | } |
25392 | case INTR_TYPE_SCALAR_MASK: { |
25393 | SDValue Src1 = Op.getOperand(1); |
25394 | SDValue Src2 = Op.getOperand(2); |
25395 | SDValue passThru = Op.getOperand(3); |
25396 | SDValue Mask = Op.getOperand(4); |
25397 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
25398 | |
25399 | |
25400 | |
25401 | bool HasRounding = IntrWithRoundingModeOpcode != 0; |
25402 | if (Op.getNumOperands() == (5U + HasRounding)) { |
25403 | if (HasRounding) { |
25404 | SDValue Rnd = Op.getOperand(5); |
25405 | unsigned RC = 0; |
25406 | if (isRoundModeSAEToX(Rnd, RC)) |
25407 | return getScalarMaskingNode( |
25408 | DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, |
25409 | DAG.getTargetConstant(RC, dl, MVT::i32)), |
25410 | Mask, passThru, Subtarget, DAG); |
25411 | if (!isRoundModeCurDirection(Rnd)) |
25412 | return SDValue(); |
25413 | } |
25414 | return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, |
25415 | Src2), |
25416 | Mask, passThru, Subtarget, DAG); |
25417 | } |
25418 | |
25419 | assert(Op.getNumOperands() == (6U + HasRounding) && |
25420 | "Unexpected intrinsic form"); |
25421 | SDValue RoundingMode = Op.getOperand(5); |
25422 | unsigned Opc = IntrData->Opc0; |
25423 | if (HasRounding) { |
25424 | SDValue Sae = Op.getOperand(6); |
25425 | if (isRoundModeSAE(Sae)) |
25426 | Opc = IntrWithRoundingModeOpcode; |
25427 | else if (!isRoundModeCurDirection(Sae)) |
25428 | return SDValue(); |
25429 | } |
25430 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, |
25431 | Src2, RoundingMode), |
25432 | Mask, passThru, Subtarget, DAG); |
25433 | } |
25434 | case INTR_TYPE_SCALAR_MASK_RND: { |
25435 | SDValue Src1 = Op.getOperand(1); |
25436 | SDValue Src2 = Op.getOperand(2); |
25437 | SDValue passThru = Op.getOperand(3); |
25438 | SDValue Mask = Op.getOperand(4); |
25439 | SDValue Rnd = Op.getOperand(5); |
25440 | |
25441 | SDValue NewOp; |
25442 | unsigned RC = 0; |
25443 | if (isRoundModeCurDirection(Rnd)) |
25444 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); |
25445 | else if (isRoundModeSAEToX(Rnd, RC)) |
25446 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, |
25447 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25448 | else |
25449 | return SDValue(); |
25450 | |
25451 | return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG); |
25452 | } |
25453 | case INTR_TYPE_SCALAR_MASK_SAE: { |
25454 | SDValue Src1 = Op.getOperand(1); |
25455 | SDValue Src2 = Op.getOperand(2); |
25456 | SDValue passThru = Op.getOperand(3); |
25457 | SDValue Mask = Op.getOperand(4); |
25458 | SDValue Sae = Op.getOperand(5); |
25459 | unsigned Opc; |
25460 | if (isRoundModeCurDirection(Sae)) |
25461 | Opc = IntrData->Opc0; |
25462 | else if (isRoundModeSAE(Sae)) |
25463 | Opc = IntrData->Opc1; |
25464 | else |
25465 | return SDValue(); |
25466 | |
25467 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), |
25468 | Mask, passThru, Subtarget, DAG); |
25469 | } |
25470 | case INTR_TYPE_2OP_MASK: { |
25471 | SDValue Src1 = Op.getOperand(1); |
25472 | SDValue Src2 = Op.getOperand(2); |
25473 | SDValue PassThru = Op.getOperand(3); |
25474 | SDValue Mask = Op.getOperand(4); |
25475 | SDValue NewOp; |
25476 | if (IntrData->Opc1 != 0) { |
25477 | SDValue Rnd = Op.getOperand(5); |
25478 | unsigned RC = 0; |
25479 | if (isRoundModeSAEToX(Rnd, RC)) |
25480 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, |
25481 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
25482 | else if (!isRoundModeCurDirection(Rnd)) |
25483 | return SDValue(); |
25484 | } |
25485 | if (!NewOp) |
25486 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); |
25487 | return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); |
25488 | } |
25489 | case INTR_TYPE_2OP_MASK_SAE: { |
25490 | SDValue Src1 = Op.getOperand(1); |
25491 | SDValue Src2 = Op.getOperand(2); |
25492 | SDValue PassThru = Op.getOperand(3); |
25493 | SDValue Mask = Op.getOperand(4); |
25494 | |
25495 | unsigned Opc = IntrData->Opc0; |
25496 | if (IntrData->Opc1 != 0) { |
25497 | SDValue Sae = Op.getOperand(5); |
25498 | if (isRoundModeSAE(Sae)) |
25499 | Opc = IntrData->Opc1; |
25500 | else if (!isRoundModeCurDirection(Sae)) |
25501 | return SDValue(); |
25502 | } |
25503 | |
25504 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), |
25505 | Mask, PassThru, Subtarget, DAG); |
25506 | } |
25507 | case INTR_TYPE_3OP_SCALAR_MASK_SAE: { |
25508 | SDValue Src1 = Op.getOperand(1); |
25509 | SDValue Src2 = Op.getOperand(2); |
25510 | SDValue Src3 = Op.getOperand(3); |
25511 | SDValue PassThru = Op.getOperand(4); |
25512 | SDValue Mask = Op.getOperand(5); |
25513 | SDValue Sae = Op.getOperand(6); |
25514 | unsigned Opc; |
25515 | if (isRoundModeCurDirection(Sae)) |
25516 | Opc = IntrData->Opc0; |
25517 | else if (isRoundModeSAE(Sae)) |
25518 | Opc = IntrData->Opc1; |
25519 | else |
25520 | return SDValue(); |
25521 | |
25522 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), |
25523 | Mask, PassThru, Subtarget, DAG); |
25524 | } |
25525 | case INTR_TYPE_3OP_MASK_SAE: { |
25526 | SDValue Src1 = Op.getOperand(1); |
25527 | SDValue Src2 = Op.getOperand(2); |
25528 | SDValue Src3 = Op.getOperand(3); |
25529 | SDValue PassThru = Op.getOperand(4); |
25530 | SDValue Mask = Op.getOperand(5); |
25531 | |
25532 | unsigned Opc = IntrData->Opc0; |
25533 | if (IntrData->Opc1 != 0) { |
25534 | SDValue Sae = Op.getOperand(6); |
25535 | if (isRoundModeSAE(Sae)) |
25536 | Opc = IntrData->Opc1; |
25537 | else if (!isRoundModeCurDirection(Sae)) |
25538 | return SDValue(); |
25539 | } |
25540 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), |
25541 | Mask, PassThru, Subtarget, DAG); |
25542 | } |
25543 | case BLENDV: { |
25544 | SDValue Src1 = Op.getOperand(1); |
25545 | SDValue Src2 = Op.getOperand(2); |
25546 | SDValue Src3 = Op.getOperand(3); |
25547 | |
25548 | EVT MaskVT = Src3.getValueType().changeVectorElementTypeToInteger(); |
25549 | Src3 = DAG.getBitcast(MaskVT, Src3); |
25550 | |
25551 | |
25552 | return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1); |
25553 | } |
25554 | case VPERM_2OP : { |
25555 | SDValue Src1 = Op.getOperand(1); |
25556 | SDValue Src2 = Op.getOperand(2); |
25557 | |
25558 | |
25559 | return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); |
25560 | } |
25561 | case IFMA_OP: |
25562 | |
25563 | |
25564 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25565 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
25566 | case FPCLASSS: { |
25567 | SDValue Src1 = Op.getOperand(1); |
25568 | SDValue Imm = Op.getOperand(2); |
25569 | SDValue Mask = Op.getOperand(3); |
25570 | SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); |
25571 | SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(), |
25572 | Subtarget, DAG); |
25573 | |
25574 | |
25575 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, |
25576 | DAG.getConstant(0, dl, MVT::v8i1), |
25577 | FPclassMask, DAG.getIntPtrConstant(0, dl)); |
25578 | return DAG.getBitcast(MVT::i8, Ins); |
25579 | } |
25580 | |
25581 | case CMP_MASK_CC: { |
25582 | MVT MaskVT = Op.getSimpleValueType(); |
25583 | SDValue CC = Op.getOperand(3); |
25584 | SDValue Mask = Op.getOperand(4); |
25585 | |
25586 | |
25587 | |
25588 | if (IntrData->Opc1 != 0) { |
25589 | SDValue Sae = Op.getOperand(5); |
25590 | if (isRoundModeSAE(Sae)) |
25591 | return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), |
25592 | Op.getOperand(2), CC, Mask, Sae); |
25593 | if (!isRoundModeCurDirection(Sae)) |
25594 | return SDValue(); |
25595 | } |
25596 | |
25597 | return DAG.getNode(IntrData->Opc0, dl, MaskVT, |
25598 | {Op.getOperand(1), Op.getOperand(2), CC, Mask}); |
25599 | } |
25600 | case CMP_MASK_SCALAR_CC: { |
25601 | SDValue Src1 = Op.getOperand(1); |
25602 | SDValue Src2 = Op.getOperand(2); |
25603 | SDValue CC = Op.getOperand(3); |
25604 | SDValue Mask = Op.getOperand(4); |
25605 | |
25606 | SDValue Cmp; |
25607 | if (IntrData->Opc1 != 0) { |
25608 | SDValue Sae = Op.getOperand(5); |
25609 | if (isRoundModeSAE(Sae)) |
25610 | Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae); |
25611 | else if (!isRoundModeCurDirection(Sae)) |
25612 | return SDValue(); |
25613 | } |
25614 | |
25615 | if (!Cmp.getNode()) |
25616 | Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); |
25617 | |
25618 | SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(), |
25619 | Subtarget, DAG); |
25620 | |
25621 | |
25622 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, |
25623 | DAG.getConstant(0, dl, MVT::v8i1), |
25624 | CmpMask, DAG.getIntPtrConstant(0, dl)); |
25625 | return DAG.getBitcast(MVT::i8, Ins); |
25626 | } |
25627 | case COMI: { |
25628 | ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; |
25629 | SDValue LHS = Op.getOperand(1); |
25630 | SDValue RHS = Op.getOperand(2); |
25631 | |
25632 | if (CC == ISD::SETLT || CC == ISD::SETLE) |
25633 | std::swap(LHS, RHS); |
25634 | |
25635 | SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); |
25636 | SDValue SetCC; |
25637 | switch (CC) { |
25638 | case ISD::SETEQ: { |
25639 | SetCC = getSETCC(X86::COND_E, Comi, dl, DAG); |
25640 | SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG); |
25641 | SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); |
25642 | break; |
25643 | } |
25644 | case ISD::SETNE: { |
25645 | SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); |
25646 | SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG); |
25647 | SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); |
25648 | break; |
25649 | } |
25650 | case ISD::SETGT: |
25651 | case ISD::SETLT: { |
25652 | SetCC = getSETCC(X86::COND_A, Comi, dl, DAG); |
25653 | break; |
25654 | } |
25655 | case ISD::SETGE: |
25656 | case ISD::SETLE: |
25657 | SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG); |
25658 | break; |
25659 | default: |
25660 | llvm_unreachable("Unexpected illegal condition!"); |
25661 | } |
25662 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
25663 | } |
25664 | case COMI_RM: { |
25665 | SDValue LHS = Op.getOperand(1); |
25666 | SDValue RHS = Op.getOperand(2); |
25667 | unsigned CondVal = Op.getConstantOperandVal(3); |
25668 | SDValue Sae = Op.getOperand(4); |
25669 | |
25670 | SDValue FCmp; |
25671 | if (isRoundModeCurDirection(Sae)) |
25672 | FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, |
25673 | DAG.getTargetConstant(CondVal, dl, MVT::i8)); |
25674 | else if (isRoundModeSAE(Sae)) |
25675 | FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, |
25676 | DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); |
25677 | else |
25678 | return SDValue(); |
25679 | |
25680 | |
25681 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, |
25682 | DAG.getConstant(0, dl, MVT::v16i1), |
25683 | FCmp, DAG.getIntPtrConstant(0, dl)); |
25684 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, |
25685 | DAG.getBitcast(MVT::i16, Ins)); |
25686 | } |
25687 | case VSHIFT: |
25688 | return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), |
25689 | Op.getOperand(1), Op.getOperand(2), Subtarget, |
25690 | DAG); |
25691 | case COMPRESS_EXPAND_IN_REG: { |
25692 | SDValue Mask = Op.getOperand(3); |
25693 | SDValue DataToCompress = Op.getOperand(1); |
25694 | SDValue PassThru = Op.getOperand(2); |
25695 | if (ISD::isBuildVectorAllOnes(Mask.getNode())) |
25696 | return Op.getOperand(1); |
25697 | |
25698 | |
25699 | if (PassThru.isUndef()) |
25700 | PassThru = DAG.getConstant(0, dl, VT); |
25701 | |
25702 | return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, |
25703 | Mask); |
25704 | } |
25705 | case FIXUPIMM: |
25706 | case FIXUPIMM_MASKZ: { |
25707 | SDValue Src1 = Op.getOperand(1); |
25708 | SDValue Src2 = Op.getOperand(2); |
25709 | SDValue Src3 = Op.getOperand(3); |
25710 | SDValue Imm = Op.getOperand(4); |
25711 | SDValue Mask = Op.getOperand(5); |
25712 | SDValue Passthru = (IntrData->Type == FIXUPIMM) |
25713 | ? Src1 |
25714 | : getZeroVector(VT, Subtarget, DAG, dl); |
25715 | |
25716 | unsigned Opc = IntrData->Opc0; |
25717 | if (IntrData->Opc1 != 0) { |
25718 | SDValue Sae = Op.getOperand(6); |
25719 | if (isRoundModeSAE(Sae)) |
25720 | Opc = IntrData->Opc1; |
25721 | else if (!isRoundModeCurDirection(Sae)) |
25722 | return SDValue(); |
25723 | } |
25724 | |
25725 | SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm); |
25726 | |
25727 | if (Opc == X86ISD::VFIXUPIMM || Opc == X86ISD::VFIXUPIMM_SAE) |
25728 | return getVectorMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG); |
25729 | |
25730 | return getScalarMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG); |
25731 | } |
25732 | case ROUNDP: { |
25733 | assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); |
25734 | |
25735 | |
25736 | auto Round = cast<ConstantSDNode>(Op.getOperand(2)); |
25737 | SDValue RoundingMode = |
25738 | DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); |
25739 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25740 | Op.getOperand(1), RoundingMode); |
25741 | } |
25742 | case ROUNDS: { |
25743 | assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); |
25744 | |
25745 | |
25746 | auto Round = cast<ConstantSDNode>(Op.getOperand(3)); |
25747 | SDValue RoundingMode = |
25748 | DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); |
25749 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25750 | Op.getOperand(1), Op.getOperand(2), RoundingMode); |
25751 | } |
25752 | case BEXTRI: { |
25753 | assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode"); |
25754 | |
25755 | uint64_t Imm = Op.getConstantOperandVal(2); |
25756 | SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl, |
25757 | Op.getValueType()); |
25758 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
25759 | Op.getOperand(1), Control); |
25760 | } |
25761 | |
25762 | case ADX: { |
25763 | SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); |
25764 | SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32); |
25765 | |
25766 | SDValue Res; |
25767 | |
25768 | |
25769 | if (isNullConstant(Op.getOperand(1))) { |
25770 | Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2), |
25771 | Op.getOperand(3)); |
25772 | } else { |
25773 | SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), |
25774 | DAG.getConstant(-1, dl, MVT::i8)); |
25775 | Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), |
25776 | Op.getOperand(3), GenCF.getValue(1)); |
25777 | } |
25778 | SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); |
25779 | SDValue Results[] = { SetCC, Res }; |
25780 | return DAG.getMergeValues(Results, dl); |
25781 | } |
25782 | case CVTPD2PS_MASK: |
25783 | case CVTPD2DQ_MASK: |
25784 | case CVTQQ2PS_MASK: |
25785 | case TRUNCATE_TO_REG: { |
25786 | SDValue Src = Op.getOperand(1); |
25787 | SDValue PassThru = Op.getOperand(2); |
25788 | SDValue Mask = Op.getOperand(3); |
25789 | |
25790 | if (isAllOnesConstant(Mask)) |
25791 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); |
25792 | |
25793 | MVT SrcVT = Src.getSimpleValueType(); |
25794 | MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); |
25795 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
25796 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), |
25797 | {Src, PassThru, Mask}); |
25798 | } |
25799 | case CVTPS2PH_MASK: { |
25800 | SDValue Src = Op.getOperand(1); |
25801 | SDValue Rnd = Op.getOperand(2); |
25802 | SDValue PassThru = Op.getOperand(3); |
25803 | SDValue Mask = Op.getOperand(4); |
25804 | |
25805 | if (isAllOnesConstant(Mask)) |
25806 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd); |
25807 | |
25808 | MVT SrcVT = Src.getSimpleValueType(); |
25809 | MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); |
25810 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
25811 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd, |
25812 | PassThru, Mask); |
25813 | |
25814 | } |
25815 | case CVTNEPS2BF16_MASK: { |
25816 | SDValue Src = Op.getOperand(1); |
25817 | SDValue PassThru = Op.getOperand(2); |
25818 | SDValue Mask = Op.getOperand(3); |
25819 | |
25820 | if (ISD::isBuildVectorAllOnes(Mask.getNode())) |
25821 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); |
25822 | |
25823 | |
25824 | if (PassThru.isUndef()) |
25825 | PassThru = DAG.getConstant(0, dl, PassThru.getValueType()); |
25826 | |
25827 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, |
25828 | Mask); |
25829 | } |
25830 | default: |
25831 | break; |
25832 | } |
25833 | } |
25834 | |
25835 | switch (IntNo) { |
25836 | default: return SDValue(); |
25837 | |
25838 | |
25839 | |
25840 | |
25841 | case Intrinsic::x86_avx512_ktestc_b: |
25842 | case Intrinsic::x86_avx512_ktestc_w: |
25843 | case Intrinsic::x86_avx512_ktestc_d: |
25844 | case Intrinsic::x86_avx512_ktestc_q: |
25845 | case Intrinsic::x86_avx512_ktestz_b: |
25846 | case Intrinsic::x86_avx512_ktestz_w: |
25847 | case Intrinsic::x86_avx512_ktestz_d: |
25848 | case Intrinsic::x86_avx512_ktestz_q: |
25849 | case Intrinsic::x86_sse41_ptestz: |
25850 | case Intrinsic::x86_sse41_ptestc: |
25851 | case Intrinsic::x86_sse41_ptestnzc: |
25852 | case Intrinsic::x86_avx_ptestz_256: |
25853 | case Intrinsic::x86_avx_ptestc_256: |
25854 | case Intrinsic::x86_avx_ptestnzc_256: |
25855 | case Intrinsic::x86_avx_vtestz_ps: |
25856 | case Intrinsic::x86_avx_vtestc_ps: |
25857 | case Intrinsic::x86_avx_vtestnzc_ps: |
25858 | case Intrinsic::x86_avx_vtestz_pd: |
25859 | case Intrinsic::x86_avx_vtestc_pd: |
25860 | case Intrinsic::x86_avx_vtestnzc_pd: |
25861 | case Intrinsic::x86_avx_vtestz_ps_256: |
25862 | case Intrinsic::x86_avx_vtestc_ps_256: |
25863 | case Intrinsic::x86_avx_vtestnzc_ps_256: |
25864 | case Intrinsic::x86_avx_vtestz_pd_256: |
25865 | case Intrinsic::x86_avx_vtestc_pd_256: |
25866 | case Intrinsic::x86_avx_vtestnzc_pd_256: { |
25867 | unsigned TestOpc = X86ISD::PTEST; |
25868 | X86::CondCode X86CC; |
25869 | switch (IntNo) { |
25870 | default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); |
25871 | case Intrinsic::x86_avx512_ktestc_b: |
25872 | case Intrinsic::x86_avx512_ktestc_w: |
25873 | case Intrinsic::x86_avx512_ktestc_d: |
25874 | case Intrinsic::x86_avx512_ktestc_q: |
25875 | |
25876 | TestOpc = X86ISD::KTEST; |
25877 | X86CC = X86::COND_B; |
25878 | break; |
25879 | case Intrinsic::x86_avx512_ktestz_b: |
25880 | case Intrinsic::x86_avx512_ktestz_w: |
25881 | case Intrinsic::x86_avx512_ktestz_d: |
25882 | case Intrinsic::x86_avx512_ktestz_q: |
25883 | TestOpc = X86ISD::KTEST; |
25884 | X86CC = X86::COND_E; |
25885 | break; |
25886 | case Intrinsic::x86_avx_vtestz_ps: |
25887 | case Intrinsic::x86_avx_vtestz_pd: |
25888 | case Intrinsic::x86_avx_vtestz_ps_256: |
25889 | case Intrinsic::x86_avx_vtestz_pd_256: |
25890 | TestOpc = X86ISD::TESTP; |
25891 | LLVM_FALLTHROUGH; |
25892 | case Intrinsic::x86_sse41_ptestz: |
25893 | case Intrinsic::x86_avx_ptestz_256: |
25894 | |
25895 | X86CC = X86::COND_E; |
25896 | break; |
25897 | case Intrinsic::x86_avx_vtestc_ps: |
25898 | case Intrinsic::x86_avx_vtestc_pd: |
25899 | case Intrinsic::x86_avx_vtestc_ps_256: |
25900 | case Intrinsic::x86_avx_vtestc_pd_256: |
25901 | TestOpc = X86ISD::TESTP; |
25902 | LLVM_FALLTHROUGH; |
25903 | case Intrinsic::x86_sse41_ptestc: |
25904 | case Intrinsic::x86_avx_ptestc_256: |
25905 | |
25906 | X86CC = X86::COND_B; |
25907 | break; |
25908 | case Intrinsic::x86_avx_vtestnzc_ps: |
25909 | case Intrinsic::x86_avx_vtestnzc_pd: |
25910 | case Intrinsic::x86_avx_vtestnzc_ps_256: |
25911 | case Intrinsic::x86_avx_vtestnzc_pd_256: |
25912 | TestOpc = X86ISD::TESTP; |
25913 | LLVM_FALLTHROUGH; |
25914 | case Intrinsic::x86_sse41_ptestnzc: |
25915 | case Intrinsic::x86_avx_ptestnzc_256: |
25916 | |
25917 | X86CC = X86::COND_A; |
25918 | break; |
25919 | } |
25920 | |
25921 | SDValue LHS = Op.getOperand(1); |
25922 | SDValue RHS = Op.getOperand(2); |
25923 | SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); |
25924 | SDValue SetCC = getSETCC(X86CC, Test, dl, DAG); |
25925 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
25926 | } |
25927 | |
25928 | case Intrinsic::x86_sse42_pcmpistria128: |
25929 | case Intrinsic::x86_sse42_pcmpestria128: |
25930 | case Intrinsic::x86_sse42_pcmpistric128: |
25931 | case Intrinsic::x86_sse42_pcmpestric128: |
25932 | case Intrinsic::x86_sse42_pcmpistrio128: |
25933 | case Intrinsic::x86_sse42_pcmpestrio128: |
25934 | case Intrinsic::x86_sse42_pcmpistris128: |
25935 | case Intrinsic::x86_sse42_pcmpestris128: |
25936 | case Intrinsic::x86_sse42_pcmpistriz128: |
25937 | case Intrinsic::x86_sse42_pcmpestriz128: { |
25938 | unsigned Opcode; |
25939 | X86::CondCode X86CC; |
25940 | switch (IntNo) { |
25941 | default: llvm_unreachable("Impossible intrinsic"); |
25942 | case Intrinsic::x86_sse42_pcmpistria128: |
25943 | Opcode = X86ISD::PCMPISTR; |
25944 | X86CC = X86::COND_A; |
25945 | break; |
25946 | case Intrinsic::x86_sse42_pcmpestria128: |
25947 | Opcode = X86ISD::PCMPESTR; |
25948 | X86CC = X86::COND_A; |
25949 | break; |
25950 | case Intrinsic::x86_sse42_pcmpistric128: |
25951 | Opcode = X86ISD::PCMPISTR; |
25952 | X86CC = X86::COND_B; |
25953 | break; |
25954 | case Intrinsic::x86_sse42_pcmpestric128: |
25955 | Opcode = X86ISD::PCMPESTR; |
25956 | X86CC = X86::COND_B; |
25957 | break; |
25958 | case Intrinsic::x86_sse42_pcmpistrio128: |
25959 | Opcode = X86ISD::PCMPISTR; |
25960 | X86CC = X86::COND_O; |
25961 | break; |
25962 | case Intrinsic::x86_sse42_pcmpestrio128: |
25963 | Opcode = X86ISD::PCMPESTR; |
25964 | X86CC = X86::COND_O; |
25965 | break; |
25966 | case Intrinsic::x86_sse42_pcmpistris128: |
25967 | Opcode = X86ISD::PCMPISTR; |
25968 | X86CC = X86::COND_S; |
25969 | break; |
25970 | case Intrinsic::x86_sse42_pcmpestris128: |
25971 | Opcode = X86ISD::PCMPESTR; |
25972 | X86CC = X86::COND_S; |
25973 | break; |
25974 | case Intrinsic::x86_sse42_pcmpistriz128: |
25975 | Opcode = X86ISD::PCMPISTR; |
25976 | X86CC = X86::COND_E; |
25977 | break; |
25978 | case Intrinsic::x86_sse42_pcmpestriz128: |
25979 | Opcode = X86ISD::PCMPESTR; |
25980 | X86CC = X86::COND_E; |
25981 | break; |
25982 | } |
25983 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
25984 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
25985 | SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2); |
25986 | SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG); |
25987 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
25988 | } |
25989 | |
25990 | case Intrinsic::x86_sse42_pcmpistri128: |
25991 | case Intrinsic::x86_sse42_pcmpestri128: { |
25992 | unsigned Opcode; |
25993 | if (IntNo == Intrinsic::x86_sse42_pcmpistri128) |
25994 | Opcode = X86ISD::PCMPISTR; |
25995 | else |
25996 | Opcode = X86ISD::PCMPESTR; |
25997 | |
25998 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
25999 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
26000 | return DAG.getNode(Opcode, dl, VTs, NewOps); |
26001 | } |
26002 | |
26003 | case Intrinsic::x86_sse42_pcmpistrm128: |
26004 | case Intrinsic::x86_sse42_pcmpestrm128: { |
26005 | unsigned Opcode; |
26006 | if (IntNo == Intrinsic::x86_sse42_pcmpistrm128) |
26007 | Opcode = X86ISD::PCMPISTR; |
26008 | else |
26009 | Opcode = X86ISD::PCMPESTR; |
26010 | |
26011 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
26012 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
26013 | return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1); |
26014 | } |
26015 | |
26016 | case Intrinsic::eh_sjlj_lsda: { |
26017 | MachineFunction &MF = DAG.getMachineFunction(); |
26018 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26019 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
26020 | auto &Context = MF.getMMI().getContext(); |
26021 | MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + |
26022 | Twine(MF.getFunctionNumber())); |
26023 | return DAG.getNode(getGlobalWrapperKind(), dl, VT, |
26024 | DAG.getMCSymbol(S, PtrVT)); |
26025 | } |
26026 | |
26027 | case Intrinsic::x86_seh_lsda: { |
26028 | |
26029 | MachineFunction &MF = DAG.getMachineFunction(); |
26030 | SDValue Op1 = Op.getOperand(1); |
26031 | auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); |
26032 | MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( |
26033 | GlobalValue::dropLLVMManglingEscape(Fn->getName())); |
26034 | |
26035 | |
26036 | |
26037 | SDValue Result = DAG.getMCSymbol(LSDASym, VT); |
26038 | return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); |
26039 | } |
26040 | |
26041 | case Intrinsic::eh_recoverfp: { |
26042 | SDValue FnOp = Op.getOperand(1); |
26043 | SDValue IncomingFPOp = Op.getOperand(2); |
26044 | GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); |
26045 | auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); |
26046 | if (!Fn) |
26047 | report_fatal_error( |
26048 | "llvm.eh.recoverfp must take a function as the first argument"); |
26049 | return recoverFramePointer(DAG, Fn, IncomingFPOp); |
26050 | } |
26051 | |
26052 | case Intrinsic::localaddress: { |
26053 | |
26054 | |
26055 | MachineFunction &MF = DAG.getMachineFunction(); |
26056 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26057 | unsigned Reg; |
26058 | if (RegInfo->hasBasePointer(MF)) |
26059 | Reg = RegInfo->getBaseRegister(); |
26060 | else { |
26061 | bool CantUseFP = RegInfo->hasStackRealignment(MF); |
26062 | if (CantUseFP) |
26063 | Reg = RegInfo->getPtrSizedStackRegister(MF); |
26064 | else |
26065 | Reg = RegInfo->getPtrSizedFrameRegister(MF); |
26066 | } |
26067 | return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); |
26068 | } |
26069 | case Intrinsic::swift_async_context_addr: { |
26070 | auto &MF = DAG.getMachineFunction(); |
26071 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
26072 | if (Subtarget.is64Bit()) { |
26073 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
26074 | X86FI->setHasSwiftAsyncContext(true); |
26075 | return SDValue( |
26076 | DAG.getMachineNode( |
26077 | X86::SUB64ri8, dl, MVT::i64, |
26078 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), |
26079 | DAG.getTargetConstant(8, dl, MVT::i32)), |
26080 | 0); |
26081 | } else { |
26082 | |
26083 | |
26084 | if (!X86FI->getSwiftAsyncContextFrameIdx()) |
26085 | X86FI->setSwiftAsyncContextFrameIdx( |
26086 | MF.getFrameInfo().CreateStackObject(4, Align(4), false)); |
26087 | return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); |
26088 | } |
26089 | } |
26090 | case Intrinsic::x86_avx512_vp2intersect_q_512: |
26091 | case Intrinsic::x86_avx512_vp2intersect_q_256: |
26092 | case Intrinsic::x86_avx512_vp2intersect_q_128: |
26093 | case Intrinsic::x86_avx512_vp2intersect_d_512: |
26094 | case Intrinsic::x86_avx512_vp2intersect_d_256: |
26095 | case Intrinsic::x86_avx512_vp2intersect_d_128: { |
26096 | MVT MaskVT = Op.getSimpleValueType(); |
26097 | |
26098 | SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other); |
26099 | SDLoc DL(Op); |
26100 | |
26101 | SDValue Operation = |
26102 | DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs, |
26103 | Op->getOperand(1), Op->getOperand(2)); |
26104 | |
26105 | SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, |
26106 | MaskVT, Operation); |
26107 | SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, |
26108 | MaskVT, Operation); |
26109 | return DAG.getMergeValues({Result0, Result1}, DL); |
26110 | } |
26111 | case Intrinsic::x86_mmx_pslli_w: |
26112 | case Intrinsic::x86_mmx_pslli_d: |
26113 | case Intrinsic::x86_mmx_pslli_q: |
26114 | case Intrinsic::x86_mmx_psrli_w: |
26115 | case Intrinsic::x86_mmx_psrli_d: |
26116 | case Intrinsic::x86_mmx_psrli_q: |
26117 | case Intrinsic::x86_mmx_psrai_w: |
26118 | case Intrinsic::x86_mmx_psrai_d: { |
26119 | SDLoc DL(Op); |
26120 | SDValue ShAmt = Op.getOperand(2); |
26121 | |
26122 | if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) { |
26123 | |
26124 | |
26125 | unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255); |
26126 | if (ShiftAmount == 0) |
26127 | return Op.getOperand(1); |
26128 | |
26129 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
26130 | Op.getOperand(0), Op.getOperand(1), |
26131 | DAG.getTargetConstant(ShiftAmount, DL, MVT::i32)); |
26132 | } |
26133 | |
26134 | unsigned NewIntrinsic; |
26135 | switch (IntNo) { |
26136 | default: llvm_unreachable("Impossible intrinsic"); |
26137 | case Intrinsic::x86_mmx_pslli_w: |
26138 | NewIntrinsic = Intrinsic::x86_mmx_psll_w; |
26139 | break; |
26140 | case Intrinsic::x86_mmx_pslli_d: |
26141 | NewIntrinsic = Intrinsic::x86_mmx_psll_d; |
26142 | break; |
26143 | case Intrinsic::x86_mmx_pslli_q: |
26144 | NewIntrinsic = Intrinsic::x86_mmx_psll_q; |
26145 | break; |
26146 | case Intrinsic::x86_mmx_psrli_w: |
26147 | NewIntrinsic = Intrinsic::x86_mmx_psrl_w; |
26148 | break; |
26149 | case Intrinsic::x86_mmx_psrli_d: |
26150 | NewIntrinsic = Intrinsic::x86_mmx_psrl_d; |
26151 | break; |
26152 | case Intrinsic::x86_mmx_psrli_q: |
26153 | NewIntrinsic = Intrinsic::x86_mmx_psrl_q; |
26154 | break; |
26155 | case Intrinsic::x86_mmx_psrai_w: |
26156 | NewIntrinsic = Intrinsic::x86_mmx_psra_w; |
26157 | break; |
26158 | case Intrinsic::x86_mmx_psrai_d: |
26159 | NewIntrinsic = Intrinsic::x86_mmx_psra_d; |
26160 | break; |
26161 | } |
26162 | |
26163 | |
26164 | |
26165 | |
26166 | ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt); |
26167 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
26168 | DAG.getTargetConstant(NewIntrinsic, DL, |
26169 | getPointerTy(DAG.getDataLayout())), |
26170 | Op.getOperand(1), ShAmt); |
26171 | } |
26172 | } |
26173 | } |
26174 | |
26175 | static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
26176 | SDValue Src, SDValue Mask, SDValue Base, |
26177 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
26178 | const X86Subtarget &Subtarget) { |
26179 | SDLoc dl(Op); |
26180 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26181 | |
26182 | if (!C) |
26183 | return SDValue(); |
26184 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26185 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26186 | TLI.getPointerTy(DAG.getDataLayout())); |
26187 | EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger(); |
26188 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); |
26189 | |
26190 | |
26191 | |
26192 | if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) |
26193 | Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); |
26194 | |
26195 | |
26196 | Mask = DAG.getBitcast(MaskVT, Mask); |
26197 | |
26198 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26199 | |
26200 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; |
26201 | SDValue Res = |
26202 | DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops, |
26203 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
26204 | return DAG.getMergeValues({Res, Res.getValue(1)}, dl); |
26205 | } |
26206 | |
26207 | static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, |
26208 | SDValue Src, SDValue Mask, SDValue Base, |
26209 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
26210 | const X86Subtarget &Subtarget) { |
26211 | MVT VT = Op.getSimpleValueType(); |
26212 | SDLoc dl(Op); |
26213 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26214 | |
26215 | if (!C) |
26216 | return SDValue(); |
26217 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26218 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26219 | TLI.getPointerTy(DAG.getDataLayout())); |
26220 | unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), |
26221 | VT.getVectorNumElements()); |
26222 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); |
26223 | |
26224 | |
26225 | |
26226 | if (Mask.getValueType() != MaskVT) |
26227 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26228 | |
26229 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); |
26230 | |
26231 | |
26232 | |
26233 | if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) |
26234 | Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); |
26235 | |
26236 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26237 | |
26238 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; |
26239 | SDValue Res = |
26240 | DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops, |
26241 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
26242 | return DAG.getMergeValues({Res, Res.getValue(1)}, dl); |
26243 | } |
26244 | |
26245 | static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
26246 | SDValue Src, SDValue Mask, SDValue Base, |
26247 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
26248 | const X86Subtarget &Subtarget) { |
26249 | SDLoc dl(Op); |
26250 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26251 | |
26252 | if (!C) |
26253 | return SDValue(); |
26254 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26255 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26256 | TLI.getPointerTy(DAG.getDataLayout())); |
26257 | unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), |
26258 | Src.getSimpleValueType().getVectorNumElements()); |
26259 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); |
26260 | |
26261 | |
26262 | |
26263 | if (Mask.getValueType() != MaskVT) |
26264 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26265 | |
26266 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26267 | |
26268 | SDVTList VTs = DAG.getVTList(MVT::Other); |
26269 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale}; |
26270 | SDValue Res = |
26271 | DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
26272 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
26273 | return Res; |
26274 | } |
26275 | |
26276 | static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
26277 | SDValue Mask, SDValue Base, SDValue Index, |
26278 | SDValue ScaleOp, SDValue Chain, |
26279 | const X86Subtarget &Subtarget) { |
26280 | SDLoc dl(Op); |
26281 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
26282 | |
26283 | if (!C) |
26284 | return SDValue(); |
26285 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
26286 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
26287 | TLI.getPointerTy(DAG.getDataLayout())); |
26288 | SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); |
26289 | SDValue Segment = DAG.getRegister(0, MVT::i32); |
26290 | MVT MaskVT = |
26291 | MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); |
26292 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26293 | SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain}; |
26294 | SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); |
26295 | return SDValue(Res, 0); |
26296 | } |
26297 | |
26298 | |
26299 | |
26300 | |
26301 | |
26302 | |
26303 | |
26304 | |
26305 | |
26306 | static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL, |
26307 | SelectionDAG &DAG, |
26308 | unsigned TargetOpcode, |
26309 | unsigned SrcReg, |
26310 | const X86Subtarget &Subtarget, |
26311 | SmallVectorImpl<SDValue> &Results) { |
26312 | SDValue Chain = N->getOperand(0); |
26313 | SDValue Glue; |
26314 | |
26315 | if (SrcReg) { |
26316 | assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); |
26317 | Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue); |
26318 | Glue = Chain.getValue(1); |
26319 | } |
26320 | |
26321 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
26322 | SDValue N1Ops[] = {Chain, Glue}; |
26323 | SDNode *N1 = DAG.getMachineNode( |
26324 | TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1)); |
26325 | Chain = SDValue(N1, 0); |
26326 | |
26327 | |
26328 | SDValue LO, HI; |
26329 | if (Subtarget.is64Bit()) { |
26330 | LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1)); |
26331 | HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, |
26332 | LO.getValue(2)); |
26333 | } else { |
26334 | LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1)); |
26335 | HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, |
26336 | LO.getValue(2)); |
26337 | } |
26338 | Chain = HI.getValue(1); |
26339 | Glue = HI.getValue(2); |
26340 | |
26341 | if (Subtarget.is64Bit()) { |
26342 | |
26343 | SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, |
26344 | DAG.getConstant(32, DL, MVT::i8)); |
26345 | Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); |
26346 | Results.push_back(Chain); |
26347 | return Glue; |
26348 | } |
26349 | |
26350 | |
26351 | SDValue Ops[] = { LO, HI }; |
26352 | SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); |
26353 | Results.push_back(Pair); |
26354 | Results.push_back(Chain); |
26355 | return Glue; |
26356 | } |
26357 | |
26358 | |
26359 | |
26360 | |
26361 | static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode, |
26362 | SelectionDAG &DAG, |
26363 | const X86Subtarget &Subtarget, |
26364 | SmallVectorImpl<SDValue> &Results) { |
26365 | |
26366 | |
26367 | |
26368 | SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode, |
26369 | 0, Subtarget, |
26370 | Results); |
26371 | if (Opcode != X86::RDTSCP) |
26372 | return; |
26373 | |
26374 | SDValue Chain = Results[1]; |
26375 | |
26376 | |
26377 | SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue); |
26378 | Results[1] = ecx; |
26379 | Results.push_back(ecx.getValue(1)); |
26380 | } |
26381 | |
26382 | static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, |
26383 | SelectionDAG &DAG) { |
26384 | SmallVector<SDValue, 3> Results; |
26385 | SDLoc DL(Op); |
26386 | getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget, |
26387 | Results); |
26388 | return DAG.getMergeValues(Results, DL); |
26389 | } |
26390 | |
26391 | static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { |
26392 | MachineFunction &MF = DAG.getMachineFunction(); |
26393 | SDValue Chain = Op.getOperand(0); |
26394 | SDValue RegNode = Op.getOperand(2); |
26395 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
26396 | if (!EHInfo) |
26397 | report_fatal_error("EH registrations only live in functions using WinEH"); |
26398 | |
26399 | |
26400 | auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode); |
26401 | if (!FINode) |
26402 | report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); |
26403 | EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); |
26404 | |
26405 | |
26406 | return Chain; |
26407 | } |
26408 | |
26409 | static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) { |
26410 | MachineFunction &MF = DAG.getMachineFunction(); |
26411 | SDValue Chain = Op.getOperand(0); |
26412 | SDValue EHGuard = Op.getOperand(2); |
26413 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
26414 | if (!EHInfo) |
26415 | report_fatal_error("EHGuard only live in functions using WinEH"); |
26416 | |
26417 | |
26418 | auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard); |
26419 | if (!FINode) |
26420 | report_fatal_error("llvm.x86.seh.ehguard expects a static alloca"); |
26421 | EHInfo->EHGuardFrameIndex = FINode->getIndex(); |
26422 | |
26423 | |
26424 | return Chain; |
26425 | } |
26426 | |
26427 | |
26428 | static SDValue |
26429 | EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, |
26430 | SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, |
26431 | SelectionDAG &DAG) { |
26432 | SDVTList VTs = DAG.getVTList(MVT::Other); |
26433 | SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); |
26434 | SDValue Ops[] = { Chain, Val, Ptr, Undef }; |
26435 | unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS; |
26436 | return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO); |
26437 | } |
26438 | |
26439 | |
26440 | static SDValue |
26441 | EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, |
26442 | SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, |
26443 | MachineMemOperand *MMO, SelectionDAG &DAG) { |
26444 | SDVTList VTs = DAG.getVTList(MVT::Other); |
26445 | SDValue Ops[] = { Chain, Val, Ptr, Mask }; |
26446 | unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS; |
26447 | return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO); |
26448 | } |
26449 | |
26450 | static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, |
26451 | SelectionDAG &DAG) { |
26452 | unsigned IntNo = Op.getConstantOperandVal(1); |
26453 | const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); |
26454 | if (!IntrData) { |
26455 | switch (IntNo) { |
26456 | case llvm::Intrinsic::x86_seh_ehregnode: |
26457 | return MarkEHRegistrationNode(Op, DAG); |
26458 | case llvm::Intrinsic::x86_seh_ehguard: |
26459 | return MarkEHGuard(Op, DAG); |
26460 | case llvm::Intrinsic::x86_rdpkru: { |
26461 | SDLoc dl(Op); |
26462 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
26463 | |
26464 | return DAG.getNode(X86ISD::RDPKRU, dl, VTs, Op.getOperand(0), |
26465 | DAG.getConstant(0, dl, MVT::i32)); |
26466 | } |
26467 | case llvm::Intrinsic::x86_wrpkru: { |
26468 | SDLoc dl(Op); |
26469 | |
26470 | |
26471 | return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, |
26472 | Op.getOperand(0), Op.getOperand(2), |
26473 | DAG.getConstant(0, dl, MVT::i32), |
26474 | DAG.getConstant(0, dl, MVT::i32)); |
26475 | } |
26476 | case llvm::Intrinsic::x86_flags_read_u32: |
26477 | case llvm::Intrinsic::x86_flags_read_u64: |
26478 | case llvm::Intrinsic::x86_flags_write_u32: |
26479 | case llvm::Intrinsic::x86_flags_write_u64: { |
26480 | |
26481 | |
26482 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
26483 | MFI.setHasCopyImplyingStackAdjustment(true); |
26484 | |
26485 | |
26486 | return Op; |
26487 | } |
26488 | case Intrinsic::x86_lwpins32: |
26489 | case Intrinsic::x86_lwpins64: |
26490 | case Intrinsic::x86_umwait: |
26491 | case Intrinsic::x86_tpause: { |
26492 | SDLoc dl(Op); |
26493 | SDValue Chain = Op->getOperand(0); |
26494 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
26495 | unsigned Opcode; |
26496 | |
26497 | switch (IntNo) { |
26498 | default: llvm_unreachable("Impossible intrinsic"); |
26499 | case Intrinsic::x86_umwait: |
26500 | Opcode = X86ISD::UMWAIT; |
26501 | break; |
26502 | case Intrinsic::x86_tpause: |
26503 | Opcode = X86ISD::TPAUSE; |
26504 | break; |
26505 | case Intrinsic::x86_lwpins32: |
26506 | case Intrinsic::x86_lwpins64: |
26507 | Opcode = X86ISD::LWPINS; |
26508 | break; |
26509 | } |
26510 | |
26511 | SDValue Operation = |
26512 | DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), |
26513 | Op->getOperand(3), Op->getOperand(4)); |
26514 | SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); |
26515 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
26516 | Operation.getValue(1)); |
26517 | } |
26518 | case Intrinsic::x86_enqcmd: |
26519 | case Intrinsic::x86_enqcmds: { |
26520 | SDLoc dl(Op); |
26521 | SDValue Chain = Op.getOperand(0); |
26522 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
26523 | unsigned Opcode; |
26524 | switch (IntNo) { |
26525 | default: llvm_unreachable("Impossible intrinsic!"); |
26526 | case Intrinsic::x86_enqcmd: |
26527 | Opcode = X86ISD::ENQCMD; |
26528 | break; |
26529 | case Intrinsic::x86_enqcmds: |
26530 | Opcode = X86ISD::ENQCMDS; |
26531 | break; |
26532 | } |
26533 | SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2), |
26534 | Op.getOperand(3)); |
26535 | SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG); |
26536 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
26537 | Operation.getValue(1)); |
26538 | } |
26539 | case Intrinsic::x86_aesenc128kl: |
26540 | case Intrinsic::x86_aesdec128kl: |
26541 | case Intrinsic::x86_aesenc256kl: |
26542 | case Intrinsic::x86_aesdec256kl: { |
26543 | SDLoc DL(Op); |
26544 | SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other); |
26545 | SDValue Chain = Op.getOperand(0); |
26546 | unsigned Opcode; |
26547 | |
26548 | switch (IntNo) { |
26549 | default: llvm_unreachable("Impossible intrinsic"); |
26550 | case Intrinsic::x86_aesenc128kl: |
26551 | Opcode = X86ISD::AESENC128KL; |
26552 | break; |
26553 | case Intrinsic::x86_aesdec128kl: |
26554 | Opcode = X86ISD::AESDEC128KL; |
26555 | break; |
26556 | case Intrinsic::x86_aesenc256kl: |
26557 | Opcode = X86ISD::AESENC256KL; |
26558 | break; |
26559 | case Intrinsic::x86_aesdec256kl: |
26560 | Opcode = X86ISD::AESDEC256KL; |
26561 | break; |
26562 | } |
26563 | |
26564 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26565 | MachineMemOperand *MMO = MemIntr->getMemOperand(); |
26566 | EVT MemVT = MemIntr->getMemoryVT(); |
26567 | SDValue Operation = DAG.getMemIntrinsicNode( |
26568 | Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3)}, MemVT, |
26569 | MMO); |
26570 | SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG); |
26571 | |
26572 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
26573 | {ZF, Operation.getValue(0), Operation.getValue(2)}); |
26574 | } |
26575 | case Intrinsic::x86_aesencwide128kl: |
26576 | case Intrinsic::x86_aesdecwide128kl: |
26577 | case Intrinsic::x86_aesencwide256kl: |
26578 | case Intrinsic::x86_aesdecwide256kl: { |
26579 | SDLoc DL(Op); |
26580 | SDVTList VTs = DAG.getVTList( |
26581 | {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, |
26582 | MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other}); |
26583 | SDValue Chain = Op.getOperand(0); |
26584 | unsigned Opcode; |
26585 | |
26586 | switch (IntNo) { |
26587 | default: llvm_unreachable("Impossible intrinsic"); |
26588 | case Intrinsic::x86_aesencwide128kl: |
26589 | Opcode = X86ISD::AESENCWIDE128KL; |
26590 | break; |
26591 | case Intrinsic::x86_aesdecwide128kl: |
26592 | Opcode = X86ISD::AESDECWIDE128KL; |
26593 | break; |
26594 | case Intrinsic::x86_aesencwide256kl: |
26595 | Opcode = X86ISD::AESENCWIDE256KL; |
26596 | break; |
26597 | case Intrinsic::x86_aesdecwide256kl: |
26598 | Opcode = X86ISD::AESDECWIDE256KL; |
26599 | break; |
26600 | } |
26601 | |
26602 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
26603 | MachineMemOperand *MMO = MemIntr->getMemOperand(); |
26604 | EVT MemVT = MemIntr->getMemoryVT(); |
26605 | SDValue Operation = DAG.getMemIntrinsicNode( |
26606 | Opcode, DL, VTs, |
26607 | {Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), |
26608 | Op.getOperand(5), Op.getOperand(6), Op.getOperand(7), |
26609 | Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)}, |
26610 | MemVT, MMO); |
26611 | SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG); |
26612 | |
26613 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
26614 | {ZF, Operation.getValue(1), Operation.getValue(2), |
26615 | Operation.getValue(3), Operation.getValue(4), |
26616 | Operation.getValue(5), Operation.getValue(6), |
26617 | Operation.getValue(7), Operation.getValue(8), |
26618 | Operation.getValue(9)}); |
26619 | } |
26620 | case Intrinsic::x86_testui: { |
26621 | SDLoc dl(Op); |
26622 | SDValue Chain = Op.getOperand(0); |
26623 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
26624 | SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain); |
26625 | SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); |
26626 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
26627 | Operation.getValue(1)); |
26628 | } |
26629 | } |
26630 | return SDValue(); |
26631 | } |
26632 | |
26633 | SDLoc dl(Op); |
26634 | switch(IntrData->Type) { |
26635 | default: llvm_unreachable("Unknown Intrinsic Type"); |
26636 | case RDSEED: |
26637 | case RDRAND: { |
26638 | |
26639 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other); |
26640 | SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); |
26641 | |
26642 | |
26643 | |
26644 | SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), |
26645 | DAG.getConstant(1, dl, Op->getValueType(1)), |
26646 | DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), |
26647 | SDValue(Result.getNode(), 1)}; |
26648 | SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); |
26649 | |
26650 | |
26651 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, |
26652 | SDValue(Result.getNode(), 2)); |
26653 | } |
26654 | case GATHER_AVX2: { |
26655 | SDValue Chain = Op.getOperand(0); |
26656 | SDValue Src = Op.getOperand(2); |
26657 | SDValue Base = Op.getOperand(3); |
26658 | SDValue Index = Op.getOperand(4); |
26659 | SDValue Mask = Op.getOperand(5); |
26660 | SDValue Scale = Op.getOperand(6); |
26661 | return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, |
26662 | Scale, Chain, Subtarget); |
26663 | } |
26664 | case GATHER: { |
26665 | |
26666 | SDValue Chain = Op.getOperand(0); |
26667 | SDValue Src = Op.getOperand(2); |
26668 | SDValue Base = Op.getOperand(3); |
26669 | SDValue Index = Op.getOperand(4); |
26670 | SDValue Mask = Op.getOperand(5); |
26671 | SDValue Scale = Op.getOperand(6); |
26672 | return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, |
26673 | Chain, Subtarget); |
26674 | } |
26675 | case SCATTER: { |
26676 | |
26677 | SDValue Chain = Op.getOperand(0); |
26678 | SDValue Base = Op.getOperand(2); |
26679 | SDValue Mask = Op.getOperand(3); |
26680 | SDValue Index = Op.getOperand(4); |
26681 | SDValue Src = Op.getOperand(5); |
26682 | SDValue Scale = Op.getOperand(6); |
26683 | return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, |
26684 | Scale, Chain, Subtarget); |
26685 | } |
26686 | case PREFETCH: { |
26687 | const APInt &HintVal = Op.getConstantOperandAPInt(6); |
26688 | assert((HintVal == 2 || HintVal == 3) && |
26689 | "Wrong prefetch hint in intrinsic: should be 2 or 3"); |
26690 | unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); |
26691 | SDValue Chain = Op.getOperand(0); |
26692 | SDValue Mask = Op.getOperand(2); |
26693 | SDValue Index = Op.getOperand(3); |
26694 | SDValue Base = Op.getOperand(4); |
26695 | SDValue Scale = Op.getOperand(5); |
26696 | return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain, |
26697 | Subtarget); |
26698 | } |
26699 | |
26700 | case RDTSC: { |
26701 | SmallVector<SDValue, 2> Results; |
26702 | getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, |
26703 | Results); |
26704 | return DAG.getMergeValues(Results, dl); |
26705 | } |
26706 | |
26707 | case RDPMC: |
26708 | |
26709 | case XGETBV: { |
26710 | SmallVector<SDValue, 2> Results; |
26711 | |
26712 | |
26713 | |
26714 | |
26715 | expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, |
26716 | Subtarget, Results); |
26717 | return DAG.getMergeValues(Results, dl); |
26718 | } |
26719 | |
26720 | case XTEST: { |
26721 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); |
26722 | SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); |
26723 | |
26724 | SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG); |
26725 | SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); |
26726 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), |
26727 | Ret, SDValue(InTrans.getNode(), 1)); |
26728 | } |
26729 | case TRUNCATE_TO_MEM_VI8: |
26730 | case TRUNCATE_TO_MEM_VI16: |
26731 | case TRUNCATE_TO_MEM_VI32: { |
26732 | SDValue Mask = Op.getOperand(4); |
26733 | SDValue DataToTruncate = Op.getOperand(3); |
26734 | SDValue Addr = Op.getOperand(2); |
26735 | SDValue Chain = Op.getOperand(0); |
26736 | |
26737 | MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); |
26738 | assert(MemIntr && "Expected MemIntrinsicSDNode!"); |
26739 | |
26740 | EVT MemVT = MemIntr->getMemoryVT(); |
26741 | |
26742 | uint16_t TruncationOp = IntrData->Opc0; |
26743 | switch (TruncationOp) { |
26744 | case X86ISD::VTRUNC: { |
26745 | if (isAllOnesConstant(Mask)) |
26746 | return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, |
26747 | MemIntr->getMemOperand()); |
26748 | |
26749 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); |
26750 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26751 | SDValue Offset = DAG.getUNDEF(VMask.getValueType()); |
26752 | |
26753 | return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask, |
26754 | MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, |
26755 | true ); |
26756 | } |
26757 | case X86ISD::VTRUNCUS: |
26758 | case X86ISD::VTRUNCS: { |
26759 | bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); |
26760 | if (isAllOnesConstant(Mask)) |
26761 | return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, |
26762 | MemIntr->getMemOperand(), DAG); |
26763 | |
26764 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); |
26765 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
26766 | |
26767 | return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, |
26768 | VMask, MemVT, MemIntr->getMemOperand(), DAG); |
26769 | } |
26770 | default: |
26771 | llvm_unreachable("Unsupported truncstore intrinsic"); |
26772 | } |
26773 | } |
26774 | } |
26775 | } |
26776 | |
26777 | SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, |
26778 | SelectionDAG &DAG) const { |
26779 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
26780 | MFI.setReturnAddressIsTaken(true); |
26781 | |
26782 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
26783 | return SDValue(); |
26784 | |
26785 | unsigned Depth = Op.getConstantOperandVal(0); |
26786 | SDLoc dl(Op); |
26787 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
26788 | |
26789 | if (Depth > 0) { |
26790 | SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); |
26791 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26792 | SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); |
26793 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), |
26794 | DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), |
26795 | MachinePointerInfo()); |
26796 | } |
26797 | |
26798 | |
26799 | SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); |
26800 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, |
26801 | MachinePointerInfo()); |
26802 | } |
26803 | |
26804 | SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op, |
26805 | SelectionDAG &DAG) const { |
26806 | DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true); |
26807 | return getReturnAddressFrameIndex(DAG); |
26808 | } |
26809 | |
26810 | SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { |
26811 | MachineFunction &MF = DAG.getMachineFunction(); |
26812 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
26813 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
26814 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26815 | EVT VT = Op.getValueType(); |
26816 | |
26817 | MFI.setFrameAddressIsTaken(true); |
26818 | |
26819 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { |
26820 | |
26821 | |
26822 | |
26823 | int FrameAddrIndex = FuncInfo->getFAIndex(); |
26824 | if (!FrameAddrIndex) { |
26825 | |
26826 | unsigned SlotSize = RegInfo->getSlotSize(); |
26827 | FrameAddrIndex = MF.getFrameInfo().CreateFixedObject( |
26828 | SlotSize, 0, false); |
26829 | FuncInfo->setFAIndex(FrameAddrIndex); |
26830 | } |
26831 | return DAG.getFrameIndex(FrameAddrIndex, VT); |
26832 | } |
26833 | |
26834 | unsigned FrameReg = |
26835 | RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); |
26836 | SDLoc dl(Op); |
26837 | unsigned Depth = Op.getConstantOperandVal(0); |
26838 | assert(((FrameReg == X86::RBP && VT == MVT::i64) || |
26839 | (FrameReg == X86::EBP && VT == MVT::i32)) && |
26840 | "Invalid Frame Register!"); |
26841 | SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); |
26842 | while (Depth--) |
26843 | FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, |
26844 | MachinePointerInfo()); |
26845 | return FrameAddr; |
26846 | } |
26847 | |
26848 | |
26849 | |
26850 | Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT, |
26851 | const MachineFunction &MF) const { |
26852 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
26853 | |
26854 | Register Reg = StringSwitch<unsigned>(RegName) |
26855 | .Case("esp", X86::ESP) |
26856 | .Case("rsp", X86::RSP) |
26857 | .Case("ebp", X86::EBP) |
26858 | .Case("rbp", X86::RBP) |
26859 | .Default(0); |
26860 | |
26861 | if (Reg == X86::EBP || Reg == X86::RBP) { |
26862 | if (!TFI.hasFP(MF)) |
26863 | report_fatal_error("register " + StringRef(RegName) + |
26864 | " is allocatable: function has no frame pointer"); |
26865 | #ifndef NDEBUG |
26866 | else { |
26867 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26868 | Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF); |
26869 | assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && |
26870 | "Invalid Frame Register!"); |
26871 | } |
26872 | #endif |
26873 | } |
26874 | |
26875 | if (Reg) |
26876 | return Reg; |
26877 | |
26878 | report_fatal_error("Invalid register name global variable"); |
26879 | } |
26880 | |
26881 | SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, |
26882 | SelectionDAG &DAG) const { |
26883 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26884 | return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); |
26885 | } |
26886 | |
26887 | Register X86TargetLowering::getExceptionPointerRegister( |
26888 | const Constant *PersonalityFn) const { |
26889 | if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) |
26890 | return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; |
26891 | |
26892 | return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX; |
26893 | } |
26894 | |
26895 | Register X86TargetLowering::getExceptionSelectorRegister( |
26896 | const Constant *PersonalityFn) const { |
26897 | |
26898 | if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) |
26899 | return X86::NoRegister; |
26900 | return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; |
26901 | } |
26902 | |
26903 | bool X86TargetLowering::needsFixedCatchObjects() const { |
26904 | return Subtarget.isTargetWin64(); |
26905 | } |
26906 | |
26907 | SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { |
26908 | SDValue Chain = Op.getOperand(0); |
26909 | SDValue Offset = Op.getOperand(1); |
26910 | SDValue Handler = Op.getOperand(2); |
26911 | SDLoc dl (Op); |
26912 | |
26913 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
26914 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
26915 | Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); |
26916 | assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || |
26917 | (FrameReg == X86::EBP && PtrVT == MVT::i32)) && |
26918 | "Invalid Frame Register!"); |
26919 | SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); |
26920 | Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; |
26921 | |
26922 | SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, |
26923 | DAG.getIntPtrConstant(RegInfo->getSlotSize(), |
26924 | dl)); |
26925 | StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); |
26926 | Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo()); |
26927 | Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); |
26928 | |
26929 | return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, |
26930 | DAG.getRegister(StoreAddrReg, PtrVT)); |
26931 | } |
26932 | |
26933 | SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, |
26934 | SelectionDAG &DAG) const { |
26935 | SDLoc DL(Op); |
26936 | |
26937 | |
26938 | |
26939 | |
26940 | |
26941 | |
26942 | if (!Subtarget.is64Bit()) { |
26943 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
26944 | (void)TII->getGlobalBaseReg(&DAG.getMachineFunction()); |
26945 | } |
26946 | return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, |
26947 | DAG.getVTList(MVT::i32, MVT::Other), |
26948 | Op.getOperand(0), Op.getOperand(1)); |
26949 | } |
26950 | |
26951 | SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, |
26952 | SelectionDAG &DAG) const { |
26953 | SDLoc DL(Op); |
26954 | return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, |
26955 | Op.getOperand(0), Op.getOperand(1)); |
26956 | } |
26957 | |
26958 | SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, |
26959 | SelectionDAG &DAG) const { |
26960 | SDLoc DL(Op); |
26961 | return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other, |
26962 | Op.getOperand(0)); |
26963 | } |
26964 | |
26965 | static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { |
26966 | return Op.getOperand(0); |
26967 | } |
26968 | |
26969 | SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, |
26970 | SelectionDAG &DAG) const { |
26971 | SDValue Root = Op.getOperand(0); |
26972 | SDValue Trmp = Op.getOperand(1); |
26973 | SDValue FPtr = Op.getOperand(2); |
26974 | SDValue Nest = Op.getOperand(3); |
26975 | SDLoc dl (Op); |
26976 | |
26977 | const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
26978 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
26979 | |
26980 | if (Subtarget.is64Bit()) { |
26981 | SDValue OutChains[6]; |
26982 | |
26983 | |
26984 | const unsigned char JMP64r = 0xFF; |
26985 | const unsigned char MOV64ri = 0xB8; |
26986 | |
26987 | const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; |
26988 | const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; |
26989 | |
26990 | const unsigned char REX_WB = 0x40 | 0x08 | 0x01; |
26991 | |
26992 | |
26993 | unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; |
26994 | SDValue Addr = Trmp; |
26995 | OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
26996 | Addr, MachinePointerInfo(TrmpAddr)); |
26997 | |
26998 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
26999 | DAG.getConstant(2, dl, MVT::i64)); |
27000 | OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, |
27001 | MachinePointerInfo(TrmpAddr, 2), Align(2)); |
27002 | |
27003 | |
27004 | |
27005 | OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; |
27006 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27007 | DAG.getConstant(10, dl, MVT::i64)); |
27008 | OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
27009 | Addr, MachinePointerInfo(TrmpAddr, 10)); |
27010 | |
27011 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27012 | DAG.getConstant(12, dl, MVT::i64)); |
27013 | OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, |
27014 | MachinePointerInfo(TrmpAddr, 12), Align(2)); |
27015 | |
27016 | |
27017 | OpCode = (JMP64r << 8) | REX_WB; |
27018 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27019 | DAG.getConstant(20, dl, MVT::i64)); |
27020 | OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
27021 | Addr, MachinePointerInfo(TrmpAddr, 20)); |
27022 | |
27023 | unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); |
27024 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
27025 | DAG.getConstant(22, dl, MVT::i64)); |
27026 | OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8), |
27027 | Addr, MachinePointerInfo(TrmpAddr, 22)); |
27028 | |
27029 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
27030 | } else { |
27031 | const Function *Func = |
27032 | cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); |
27033 | CallingConv::ID CC = Func->getCallingConv(); |
27034 | unsigned NestReg; |
27035 | |
27036 | switch (CC) { |
27037 | default: |
27038 | llvm_unreachable("Unsupported calling convention"); |
27039 | case CallingConv::C: |
27040 | case CallingConv::X86_StdCall: { |
27041 | |
27042 | |
27043 | NestReg = X86::ECX; |
27044 | |
27045 | |
27046 | FunctionType *FTy = Func->getFunctionType(); |
27047 | const AttributeList &Attrs = Func->getAttributes(); |
27048 | |
27049 | if (!Attrs.isEmpty() && !Func->isVarArg()) { |
27050 | unsigned InRegCount = 0; |
27051 | unsigned Idx = 1; |
27052 | |
27053 | for (FunctionType::param_iterator I = FTy->param_begin(), |
27054 | E = FTy->param_end(); I != E; ++I, ++Idx) |
27055 | if (Attrs.hasAttribute(Idx, Attribute::InReg)) { |
27056 | const DataLayout &DL = DAG.getDataLayout(); |
27057 | |
27058 | InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; |
27059 | } |
27060 | |
27061 | if (InRegCount > 2) { |
27062 | report_fatal_error("Nest register in use - reduce number of inreg" |
27063 | " parameters!"); |
27064 | } |
27065 | } |
27066 | break; |
27067 | } |
27068 | case CallingConv::X86_FastCall: |
27069 | case CallingConv::X86_ThisCall: |
27070 | case CallingConv::Fast: |
27071 | case CallingConv::Tail: |
27072 | case CallingConv::SwiftTail: |
27073 | |
27074 | |
27075 | NestReg = X86::EAX; |
27076 | break; |
27077 | } |
27078 | |
27079 | SDValue OutChains[4]; |
27080 | SDValue Addr, Disp; |
27081 | |
27082 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27083 | DAG.getConstant(10, dl, MVT::i32)); |
27084 | Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); |
27085 | |
27086 | |
27087 | const unsigned char MOV32ri = 0xB8; |
27088 | const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; |
27089 | OutChains[0] = |
27090 | DAG.getStore(Root, dl, DAG.getConstant(MOV32ri | N86Reg, dl, MVT::i8), |
27091 | Trmp, MachinePointerInfo(TrmpAddr)); |
27092 | |
27093 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27094 | DAG.getConstant(1, dl, MVT::i32)); |
27095 | OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, |
27096 | MachinePointerInfo(TrmpAddr, 1), Align(1)); |
27097 | |
27098 | const unsigned char JMP = 0xE9; |
27099 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27100 | DAG.getConstant(5, dl, MVT::i32)); |
27101 | OutChains[2] = |
27102 | DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr, |
27103 | MachinePointerInfo(TrmpAddr, 5), Align(1)); |
27104 | |
27105 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
27106 | DAG.getConstant(6, dl, MVT::i32)); |
27107 | OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, |
27108 | MachinePointerInfo(TrmpAddr, 6), Align(1)); |
27109 | |
27110 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
27111 | } |
27112 | } |
27113 | |
27114 | SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, |
27115 | SelectionDAG &DAG) const { |
27116 | |
27117 | |
27118 | |
27119 | |
27120 | |
27121 | |
27122 | |
27123 | |
27124 | |
27125 | |
27126 | |
27127 | |
27128 | |
27129 | |
27130 | |
27131 | |
27132 | |
27133 | |
27134 | |
27135 | |
27136 | |
27137 | |
27138 | MachineFunction &MF = DAG.getMachineFunction(); |
27139 | MVT VT = Op.getSimpleValueType(); |
27140 | SDLoc DL(Op); |
27141 | |
27142 | |
27143 | int SSFI = MF.getFrameInfo().CreateStackObject(2, Align(2), false); |
27144 | SDValue StackSlot = |
27145 | DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); |
27146 | |
27147 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); |
27148 | |
27149 | SDValue Chain = Op.getOperand(0); |
27150 | SDValue Ops[] = {Chain, StackSlot}; |
27151 | Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, |
27152 | DAG.getVTList(MVT::Other), Ops, MVT::i16, MPI, |
27153 | Align(2), MachineMemOperand::MOStore); |
27154 | |
27155 | |
27156 | SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2)); |
27157 | Chain = CWD.getValue(1); |
27158 | |
27159 | |
27160 | SDValue Shift = |
27161 | DAG.getNode(ISD::SRL, DL, MVT::i16, |
27162 | DAG.getNode(ISD::AND, DL, MVT::i16, |
27163 | CWD, DAG.getConstant(0xc00, DL, MVT::i16)), |
27164 | DAG.getConstant(9, DL, MVT::i8)); |
27165 | Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift); |
27166 | |
27167 | SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32); |
27168 | SDValue RetVal = |
27169 | DAG.getNode(ISD::AND, DL, MVT::i32, |
27170 | DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift), |
27171 | DAG.getConstant(3, DL, MVT::i32)); |
27172 | |
27173 | RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT); |
27174 | |
27175 | return DAG.getMergeValues({RetVal, Chain}, DL); |
27176 | } |
27177 | |
27178 | SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op, |
27179 | SelectionDAG &DAG) const { |
27180 | MachineFunction &MF = DAG.getMachineFunction(); |
27181 | SDLoc DL(Op); |
27182 | SDValue Chain = Op.getNode()->getOperand(0); |
27183 | |
27184 | |
27185 | |
27186 | int OldCWFrameIdx = MF.getFrameInfo().CreateStackObject(4, Align(4), false); |
27187 | SDValue StackSlot = |
27188 | DAG.getFrameIndex(OldCWFrameIdx, getPointerTy(DAG.getDataLayout())); |
27189 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, OldCWFrameIdx); |
27190 | MachineMemOperand *MMO = |
27191 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 2, Align(2)); |
27192 | |
27193 | |
27194 | SDValue Ops[] = {Chain, StackSlot}; |
27195 | Chain = DAG.getMemIntrinsicNode( |
27196 | X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); |
27197 | |
27198 | |
27199 | SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI); |
27200 | Chain = CWD.getValue(1); |
27201 | CWD = DAG.getNode(ISD::AND, DL, MVT::i16, CWD.getValue(0), |
27202 | DAG.getConstant(0xf3ff, DL, MVT::i16)); |
27203 | |
27204 | |
27205 | SDValue NewRM = Op.getNode()->getOperand(1); |
27206 | SDValue RMBits; |
27207 | if (auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) { |
27208 | uint64_t RM = CVal->getZExtValue(); |
27209 | int FieldVal; |
27210 | switch (static_cast<RoundingMode>(RM)) { |
27211 | case RoundingMode::NearestTiesToEven: FieldVal = X86::rmToNearest; break; |
27212 | case RoundingMode::TowardNegative: FieldVal = X86::rmDownward; break; |
27213 | case RoundingMode::TowardPositive: FieldVal = X86::rmUpward; break; |
27214 | case RoundingMode::TowardZero: FieldVal = X86::rmTowardZero; break; |
27215 | default: |
27216 | llvm_unreachable("rounding mode is not supported by X86 hardware"); |
27217 | } |
27218 | RMBits = DAG.getConstant(FieldVal, DL, MVT::i16); |
27219 | } else { |
27220 | |
27221 | |
27222 | |
27223 | |
27224 | |
27225 | |
27226 | |
27227 | |
27228 | |
27229 | |
27230 | |
27231 | |
27232 | SDValue ShiftValue = |
27233 | DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
27234 | DAG.getNode(ISD::ADD, DL, MVT::i32, |
27235 | DAG.getNode(ISD::SHL, DL, MVT::i32, NewRM, |
27236 | DAG.getConstant(1, DL, MVT::i8)), |
27237 | DAG.getConstant(4, DL, MVT::i32))); |
27238 | SDValue Shifted = |
27239 | DAG.getNode(ISD::SHL, DL, MVT::i16, DAG.getConstant(0xc9, DL, MVT::i16), |
27240 | ShiftValue); |
27241 | RMBits = DAG.getNode(ISD::AND, DL, MVT::i16, Shifted, |
27242 | DAG.getConstant(0xc00, DL, MVT::i16)); |
27243 | } |
27244 | |
27245 | |
27246 | CWD = DAG.getNode(ISD::OR, DL, MVT::i16, CWD, RMBits); |
27247 | Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, 2); |
27248 | |
27249 | |
27250 | SDValue OpsLD[] = {Chain, StackSlot}; |
27251 | MachineMemOperand *MMOL = |
27252 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 2, Align(2)); |
27253 | Chain = DAG.getMemIntrinsicNode( |
27254 | X86ISD::FLDCW16m, DL, DAG.getVTList(MVT::Other), OpsLD, MVT::i16, MMOL); |
27255 | |
27256 | |
27257 | |
27258 | if (Subtarget.hasSSE1()) { |
27259 | |
27260 | Chain = DAG.getNode( |
27261 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, |
27262 | DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32), |
27263 | StackSlot); |
27264 | |
27265 | |
27266 | SDValue CWD = DAG.getLoad(MVT::i32, DL, Chain, StackSlot, MPI); |
27267 | Chain = CWD.getValue(1); |
27268 | CWD = DAG.getNode(ISD::AND, DL, MVT::i32, CWD.getValue(0), |
27269 | DAG.getConstant(0xffff9fff, DL, MVT::i32)); |
27270 | |
27271 | |
27272 | RMBits = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, RMBits); |
27273 | RMBits = DAG.getNode(ISD::SHL, DL, MVT::i32, RMBits, |
27274 | DAG.getConstant(3, DL, MVT::i8)); |
27275 | |
27276 | |
27277 | CWD = DAG.getNode(ISD::OR, DL, MVT::i32, CWD, RMBits); |
27278 | Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, 4); |
27279 | |
27280 | |
27281 | Chain = DAG.getNode( |
27282 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, |
27283 | DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32), |
27284 | StackSlot); |
27285 | } |
27286 | |
27287 | return Chain; |
27288 | } |
27289 | |
27290 | |
27291 | |
27292 | |
27293 | |
27294 | |
27295 | |
27296 | static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, |
27297 | const X86Subtarget &Subtarget) { |
27298 | assert(Op.getOpcode() == ISD::CTLZ); |
27299 | SDLoc dl(Op); |
27300 | MVT VT = Op.getSimpleValueType(); |
27301 | MVT EltVT = VT.getVectorElementType(); |
27302 | unsigned NumElems = VT.getVectorNumElements(); |
27303 | |
27304 | assert((EltVT == MVT::i8 || EltVT == MVT::i16) && |
27305 | "Unsupported element type"); |
27306 | |
27307 | |
27308 | if (NumElems > 16 || |
27309 | (NumElems == 16 && !Subtarget.canExtendTo512DQ())) |
27310 | return splitVectorIntUnary(Op, DAG); |
27311 | |
27312 | MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); |
27313 | assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && |
27314 | "Unsupported value type for operation"); |
27315 | |
27316 | |
27317 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); |
27318 | SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op); |
27319 | SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); |
27320 | SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); |
27321 | |
27322 | return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); |
27323 | } |
27324 | |
27325 | |
27326 | static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, |
27327 | const X86Subtarget &Subtarget, |
27328 | SelectionDAG &DAG) { |
27329 | MVT VT = Op.getSimpleValueType(); |
27330 | int NumElts = VT.getVectorNumElements(); |
27331 | int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8); |
27332 | MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes); |
27333 | |
27334 | |
27335 | const int LUT[16] = { 4, 3, 2, 2, |
27336 | 1, 1, 1, 1, |
27337 | 0, 0, 0, 0, |
27338 | 0, 0, 0, 0}; |
27339 | |
27340 | SmallVector<SDValue, 64> LUTVec; |
27341 | for (int i = 0; i < NumBytes; ++i) |
27342 | LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); |
27343 | SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec); |
27344 | |
27345 | |
27346 | |
27347 | |
27348 | |
27349 | |
27350 | SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0)); |
27351 | SDValue Zero = DAG.getConstant(0, DL, CurrVT); |
27352 | |
27353 | SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT); |
27354 | SDValue Lo = Op0; |
27355 | SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift); |
27356 | SDValue HiZ; |
27357 | if (CurrVT.is512BitVector()) { |
27358 | MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); |
27359 | HiZ = DAG.getSetCC(DL, MaskVT, Hi, Zero, ISD::SETEQ); |
27360 | HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); |
27361 | } else { |
27362 | HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ); |
27363 | } |
27364 | |
27365 | Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo); |
27366 | Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi); |
27367 | Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ); |
27368 | SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi); |
27369 | |
27370 | |
27371 | |
27372 | |
27373 | |
27374 | |
27375 | while (CurrVT != VT) { |
27376 | int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits(); |
27377 | int CurrNumElts = CurrVT.getVectorNumElements(); |
27378 | MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2); |
27379 | MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2); |
27380 | SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT); |
27381 | |
27382 | |
27383 | if (CurrVT.is512BitVector()) { |
27384 | MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); |
27385 | HiZ = DAG.getSetCC(DL, MaskVT, DAG.getBitcast(CurrVT, Op0), |
27386 | DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); |
27387 | HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); |
27388 | } else { |
27389 | HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0), |
27390 | DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); |
27391 | } |
27392 | HiZ = DAG.getBitcast(NextVT, HiZ); |
27393 | |
27394 | |
27395 | |
27396 | |
27397 | SDValue ResNext = Res = DAG.getBitcast(NextVT, Res); |
27398 | SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift); |
27399 | SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift); |
27400 | R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1); |
27401 | Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1); |
27402 | CurrVT = NextVT; |
27403 | } |
27404 | |
27405 | return Res; |
27406 | } |
27407 | |
27408 | static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, |
27409 | const X86Subtarget &Subtarget, |
27410 | SelectionDAG &DAG) { |
27411 | MVT VT = Op.getSimpleValueType(); |
27412 | |
27413 | if (Subtarget.hasCDI() && |
27414 | |
27415 | (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8)) |
27416 | return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget); |
27417 | |
27418 | |
27419 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
27420 | return splitVectorIntUnary(Op, DAG); |
27421 | |
27422 | |
27423 | if (VT.is512BitVector() && !Subtarget.hasBWI()) |
27424 | return splitVectorIntUnary(Op, DAG); |
27425 | |
27426 | assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB"); |
27427 | return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG); |
27428 | } |
27429 | |
27430 | static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, |
27431 | SelectionDAG &DAG) { |
27432 | MVT VT = Op.getSimpleValueType(); |
27433 | MVT OpVT = VT; |
27434 | unsigned NumBits = VT.getSizeInBits(); |
27435 | SDLoc dl(Op); |
27436 | unsigned Opc = Op.getOpcode(); |
27437 | |
27438 | if (VT.isVector()) |
27439 | return LowerVectorCTLZ(Op, dl, Subtarget, DAG); |
27440 | |
27441 | Op = Op.getOperand(0); |
27442 | if (VT == MVT::i8) { |
27443 | |
27444 | OpVT = MVT::i32; |
27445 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); |
27446 | } |
27447 | |
27448 | |
27449 | SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); |
27450 | Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); |
27451 | |
27452 | if (Opc == ISD::CTLZ) { |
27453 | |
27454 | SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), |
27455 | DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), |
27456 | Op.getValue(1)}; |
27457 | Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); |
27458 | } |
27459 | |
27460 | |
27461 | Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, |
27462 | DAG.getConstant(NumBits - 1, dl, OpVT)); |
27463 | |
27464 | if (VT == MVT::i8) |
27465 | Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); |
27466 | return Op; |
27467 | } |
27468 | |
27469 | static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, |
27470 | SelectionDAG &DAG) { |
27471 | MVT VT = Op.getSimpleValueType(); |
27472 | unsigned NumBits = VT.getScalarSizeInBits(); |
27473 | SDValue N0 = Op.getOperand(0); |
27474 | SDLoc dl(Op); |
27475 | |
27476 | assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ && |
27477 | "Only scalar CTTZ requires custom lowering"); |
27478 | |
27479 | |
27480 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
27481 | Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); |
27482 | |
27483 | |
27484 | SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), |
27485 | DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), |
27486 | Op.getValue(1)}; |
27487 | return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); |
27488 | } |
27489 | |
27490 | static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, |
27491 | const X86Subtarget &Subtarget) { |
27492 | MVT VT = Op.getSimpleValueType(); |
27493 | if (VT == MVT::i16 || VT == MVT::i32) |
27494 | return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); |
27495 | |
27496 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
27497 | return splitVectorIntBinary(Op, DAG); |
27498 | |
27499 | assert(Op.getSimpleValueType().is256BitVector() && |
27500 | Op.getSimpleValueType().isInteger() && |
27501 | "Only handle AVX 256-bit vector integer operation"); |
27502 | return splitVectorIntBinary(Op, DAG); |
27503 | } |
27504 | |
27505 | static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG, |
27506 | const X86Subtarget &Subtarget) { |
27507 | MVT VT = Op.getSimpleValueType(); |
27508 | SDValue X = Op.getOperand(0), Y = Op.getOperand(1); |
27509 | unsigned Opcode = Op.getOpcode(); |
27510 | SDLoc DL(Op); |
27511 | |
27512 | if (VT == MVT::v32i16 || VT == MVT::v64i8 || |
27513 | (VT.is256BitVector() && !Subtarget.hasInt256())) { |
27514 | assert(Op.getSimpleValueType().isInteger() && |
27515 | "Only handle AVX vector integer operation"); |
27516 | return splitVectorIntBinary(Op, DAG); |
27517 | } |
27518 | |
27519 | |
27520 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
27521 | EVT SetCCResultType = |
27522 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
27523 | |
27524 | if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) { |
27525 | |
27526 | SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y); |
27527 | SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT); |
27528 | |
27529 | if (SetCCResultType == VT && |
27530 | DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits()) |
27531 | return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub); |
27532 | return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT)); |
27533 | } |
27534 | |
27535 | |
27536 | return SDValue(); |
27537 | } |
27538 | |
27539 | static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, |
27540 | SelectionDAG &DAG) { |
27541 | MVT VT = Op.getSimpleValueType(); |
27542 | if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { |
27543 | |
27544 | |
27545 | SDLoc DL(Op); |
27546 | SDValue N0 = Op.getOperand(0); |
27547 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), |
27548 | DAG.getConstant(0, DL, VT), N0); |
27549 | SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8), |
27550 | SDValue(Neg.getNode(), 1)}; |
27551 | return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); |
27552 | } |
27553 | |
27554 | |
27555 | if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) { |
27556 | SDLoc DL(Op); |
27557 | SDValue Src = Op.getOperand(0); |
27558 | SDValue Sub = |
27559 | DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); |
27560 | return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src); |
27561 | } |
27562 | |
27563 | if (VT.is256BitVector() && !Subtarget.hasInt256()) { |
27564 | assert(VT.isInteger() && |
27565 | "Only handle AVX 256-bit vector integer operation"); |
27566 | return splitVectorIntUnary(Op, DAG); |
27567 | } |
27568 | |
27569 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
27570 | return splitVectorIntUnary(Op, DAG); |
27571 | |
27572 | |
27573 | return SDValue(); |
27574 | } |
27575 | |
27576 | static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { |
27577 | MVT VT = Op.getSimpleValueType(); |
27578 | |
27579 | |
27580 | if (VT.getScalarType() != MVT::i64 && VT.is256BitVector()) |
27581 | return splitVectorIntBinary(Op, DAG); |
27582 | |
27583 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
27584 | return splitVectorIntBinary(Op, DAG); |
27585 | |
27586 | |
27587 | return SDValue(); |
27588 | } |
27589 | |
27590 | static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, |
27591 | SelectionDAG &DAG) { |
27592 | SDLoc dl(Op); |
27593 | MVT VT = Op.getSimpleValueType(); |
27594 | |
27595 | |
27596 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
27597 | return splitVectorIntBinary(Op, DAG); |
27598 | |
27599 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
27600 | return splitVectorIntBinary(Op, DAG); |
27601 | |
27602 | SDValue A = Op.getOperand(0); |
27603 | SDValue B = Op.getOperand(1); |
27604 | |
27605 | |
27606 | |
27607 | if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) { |
27608 | unsigned NumElts = VT.getVectorNumElements(); |
27609 | |
27610 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
27611 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
27612 | MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); |
27613 | return DAG.getNode( |
27614 | ISD::TRUNCATE, dl, VT, |
27615 | DAG.getNode(ISD::MUL, dl, ExVT, |
27616 | DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A), |
27617 | DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B))); |
27618 | } |
27619 | |
27620 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
27621 | |
27622 | |
27623 | |
27624 | |
27625 | |
27626 | SDValue Undef = DAG.getUNDEF(VT); |
27627 | SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef)); |
27628 | SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef)); |
27629 | |
27630 | SDValue BLo, BHi; |
27631 | if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { |
27632 | |
27633 | SmallVector<SDValue, 16> LoOps, HiOps; |
27634 | for (unsigned i = 0; i != NumElts; i += 16) { |
27635 | for (unsigned j = 0; j != 8; ++j) { |
27636 | LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, |
27637 | MVT::i16)); |
27638 | HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, |
27639 | MVT::i16)); |
27640 | } |
27641 | } |
27642 | |
27643 | BLo = DAG.getBuildVector(ExVT, dl, LoOps); |
27644 | BHi = DAG.getBuildVector(ExVT, dl, HiOps); |
27645 | } else { |
27646 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef)); |
27647 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef)); |
27648 | } |
27649 | |
27650 | |
27651 | SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); |
27652 | SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); |
27653 | RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); |
27654 | RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); |
27655 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
27656 | } |
27657 | |
27658 | |
27659 | if (VT == MVT::v4i32) { |
27660 | assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && |
27661 | "Should not custom lower when pmulld is available!"); |
27662 | |
27663 | |
27664 | static const int UnpackMask[] = { 1, -1, 3, -1 }; |
27665 | SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); |
27666 | SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); |
27667 | |
27668 | |
27669 | SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, |
27670 | DAG.getBitcast(MVT::v2i64, A), |
27671 | DAG.getBitcast(MVT::v2i64, B)); |
27672 | |
27673 | SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, |
27674 | DAG.getBitcast(MVT::v2i64, Aodds), |
27675 | DAG.getBitcast(MVT::v2i64, Bodds)); |
27676 | |
27677 | Evens = DAG.getBitcast(VT, Evens); |
27678 | Odds = DAG.getBitcast(VT, Odds); |
27679 | |
27680 | |
27681 | |
27682 | static const int ShufMask[] = { 0, 4, 2, 6 }; |
27683 | return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); |
27684 | } |
27685 | |
27686 | assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && |
27687 | "Only know how to lower V2I64/V4I64/V8I64 multiply"); |
27688 | assert(!Subtarget.hasDQI() && "DQI should use MULLQ"); |
27689 | |
27690 | |
27691 | |
27692 | |
27693 | |
27694 | |
27695 | |
27696 | |
27697 | |
27698 | |
27699 | KnownBits AKnown = DAG.computeKnownBits(A); |
27700 | KnownBits BKnown = DAG.computeKnownBits(B); |
27701 | |
27702 | APInt LowerBitsMask = APInt::getLowBitsSet(64, 32); |
27703 | bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero); |
27704 | bool BLoIsZero = LowerBitsMask.isSubsetOf(BKnown.Zero); |
27705 | |
27706 | APInt UpperBitsMask = APInt::getHighBitsSet(64, 32); |
27707 | bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero); |
27708 | bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero); |
27709 | |
27710 | SDValue Zero = DAG.getConstant(0, dl, VT); |
27711 | |
27712 | |
27713 | SDValue AloBlo = Zero; |
27714 | if (!ALoIsZero && !BLoIsZero) |
27715 | AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); |
27716 | |
27717 | SDValue AloBhi = Zero; |
27718 | if (!ALoIsZero && !BHiIsZero) { |
27719 | SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); |
27720 | AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); |
27721 | } |
27722 | |
27723 | SDValue AhiBlo = Zero; |
27724 | if (!AHiIsZero && !BLoIsZero) { |
27725 | SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); |
27726 | AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); |
27727 | } |
27728 | |
27729 | SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo); |
27730 | Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG); |
27731 | |
27732 | return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi); |
27733 | } |
27734 | |
27735 | static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, |
27736 | MVT VT, bool IsSigned, |
27737 | const X86Subtarget &Subtarget, |
27738 | SelectionDAG &DAG, |
27739 | SDValue *Low = nullptr) { |
27740 | unsigned NumElts = VT.getVectorNumElements(); |
27741 | |
27742 | |
27743 | |
27744 | |
27745 | |
27746 | |
27747 | |
27748 | |
27749 | |
27750 | |
27751 | |
27752 | |
27753 | |
27754 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
27755 | SDValue Zero = DAG.getConstant(0, dl, VT); |
27756 | |
27757 | SDValue ALo, AHi; |
27758 | if (IsSigned) { |
27759 | ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A)); |
27760 | AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); |
27761 | } else { |
27762 | ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); |
27763 | AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); |
27764 | } |
27765 | |
27766 | SDValue BLo, BHi; |
27767 | if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { |
27768 | |
27769 | SmallVector<SDValue, 16> LoOps, HiOps; |
27770 | for (unsigned i = 0; i != NumElts; i += 16) { |
27771 | for (unsigned j = 0; j != 8; ++j) { |
27772 | SDValue LoOp = B.getOperand(i + j); |
27773 | SDValue HiOp = B.getOperand(i + j + 8); |
27774 | |
27775 | if (IsSigned) { |
27776 | LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16); |
27777 | HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16); |
27778 | LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp, |
27779 | DAG.getConstant(8, dl, MVT::i16)); |
27780 | HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp, |
27781 | DAG.getConstant(8, dl, MVT::i16)); |
27782 | } else { |
27783 | LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16); |
27784 | HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16); |
27785 | } |
27786 | |
27787 | LoOps.push_back(LoOp); |
27788 | HiOps.push_back(HiOp); |
27789 | } |
27790 | } |
27791 | |
27792 | BLo = DAG.getBuildVector(ExVT, dl, LoOps); |
27793 | BHi = DAG.getBuildVector(ExVT, dl, HiOps); |
27794 | } else if (IsSigned) { |
27795 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B)); |
27796 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B)); |
27797 | } else { |
27798 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero)); |
27799 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero)); |
27800 | } |
27801 | |
27802 | |
27803 | |
27804 | unsigned MulOpc = IsSigned ? ISD::MULHS : ISD::MUL; |
27805 | SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo); |
27806 | SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi); |
27807 | |
27808 | if (Low) { |
27809 | |
27810 | SDValue Mask = DAG.getConstant(255, dl, ExVT); |
27811 | SDValue LLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, Mask); |
27812 | SDValue LHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, Mask); |
27813 | *Low = DAG.getNode(X86ISD::PACKUS, dl, VT, LLo, LHi); |
27814 | } |
27815 | |
27816 | RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG); |
27817 | RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG); |
27818 | |
27819 | |
27820 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
27821 | } |
27822 | |
27823 | static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, |
27824 | SelectionDAG &DAG) { |
27825 | SDLoc dl(Op); |
27826 | MVT VT = Op.getSimpleValueType(); |
27827 | bool IsSigned = Op->getOpcode() == ISD::MULHS; |
27828 | unsigned NumElts = VT.getVectorNumElements(); |
27829 | SDValue A = Op.getOperand(0); |
27830 | SDValue B = Op.getOperand(1); |
27831 | |
27832 | |
27833 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
27834 | return splitVectorIntBinary(Op, DAG); |
27835 | |
27836 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
27837 | return splitVectorIntBinary(Op, DAG); |
27838 | |
27839 | if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) { |
27840 | assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || |
27841 | (VT == MVT::v8i32 && Subtarget.hasInt256()) || |
27842 | (VT == MVT::v16i32 && Subtarget.hasAVX512())); |
27843 | |
27844 | |
27845 | |
27846 | |
27847 | |
27848 | |
27849 | |
27850 | |
27851 | |
27852 | |
27853 | |
27854 | |
27855 | |
27856 | const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, |
27857 | 9, -1, 11, -1, 13, -1, 15, -1}; |
27858 | |
27859 | SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A, |
27860 | makeArrayRef(&Mask[0], NumElts)); |
27861 | |
27862 | SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B, |
27863 | makeArrayRef(&Mask[0], NumElts)); |
27864 | |
27865 | |
27866 | |
27867 | MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2); |
27868 | unsigned Opcode = |
27869 | (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ; |
27870 | |
27871 | |
27872 | SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, |
27873 | DAG.getBitcast(MulVT, A), |
27874 | DAG.getBitcast(MulVT, B))); |
27875 | |
27876 | |
27877 | SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, |
27878 | DAG.getBitcast(MulVT, Odd0), |
27879 | DAG.getBitcast(MulVT, Odd1))); |
27880 | |
27881 | |
27882 | SmallVector<int, 16> ShufMask(NumElts); |
27883 | for (int i = 0; i != (int)NumElts; ++i) |
27884 | ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1; |
27885 | |
27886 | SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask); |
27887 | |
27888 | |
27889 | |
27890 | if (IsSigned && !Subtarget.hasSSE41()) { |
27891 | SDValue Zero = DAG.getConstant(0, dl, VT); |
27892 | SDValue T1 = DAG.getNode(ISD::AND, dl, VT, |
27893 | DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B); |
27894 | SDValue T2 = DAG.getNode(ISD::AND, dl, VT, |
27895 | DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A); |
27896 | |
27897 | SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); |
27898 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup); |
27899 | } |
27900 | |
27901 | return Res; |
27902 | } |
27903 | |
27904 | |
27905 | assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || |
27906 | (VT == MVT::v64i8 && Subtarget.hasBWI())) && |
27907 | "Unsupported vector type"); |
27908 | |
27909 | |
27910 | |
27911 | |
27912 | |
27913 | |
27914 | |
27915 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
27916 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
27917 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
27918 | unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
27919 | SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); |
27920 | SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); |
27921 | SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); |
27922 | Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
27923 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); |
27924 | } |
27925 | |
27926 | return LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG); |
27927 | } |
27928 | |
27929 | |
27930 | static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget, |
27931 | SelectionDAG &DAG) { |
27932 | MVT VT = Op.getSimpleValueType(); |
27933 | |
27934 | |
27935 | if (!VT.isVector()) |
27936 | return LowerXALUO(Op, DAG); |
27937 | |
27938 | SDLoc dl(Op); |
27939 | bool IsSigned = Op->getOpcode() == ISD::SMULO; |
27940 | SDValue A = Op.getOperand(0); |
27941 | SDValue B = Op.getOperand(1); |
27942 | EVT OvfVT = Op->getValueType(1); |
27943 | |
27944 | if ((VT == MVT::v32i8 && !Subtarget.hasInt256()) || |
27945 | (VT == MVT::v64i8 && !Subtarget.hasBWI())) { |
27946 | |
27947 | SDValue LHSLo, LHSHi; |
27948 | std::tie(LHSLo, LHSHi) = splitVector(A, DAG, dl); |
27949 | |
27950 | |
27951 | SDValue RHSLo, RHSHi; |
27952 | std::tie(RHSLo, RHSHi) = splitVector(B, DAG, dl); |
27953 | |
27954 | EVT LoOvfVT, HiOvfVT; |
27955 | std::tie(LoOvfVT, HiOvfVT) = DAG.GetSplitDestVTs(OvfVT); |
27956 | SDVTList LoVTs = DAG.getVTList(LHSLo.getValueType(), LoOvfVT); |
27957 | SDVTList HiVTs = DAG.getVTList(LHSHi.getValueType(), HiOvfVT); |
27958 | |
27959 | |
27960 | SDValue Lo = DAG.getNode(Op.getOpcode(), dl, LoVTs, LHSLo, RHSLo); |
27961 | SDValue Hi = DAG.getNode(Op.getOpcode(), dl, HiVTs, LHSHi, RHSHi); |
27962 | |
27963 | |
27964 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
27965 | SDValue Ovf = DAG.getNode(ISD::CONCAT_VECTORS, dl, OvfVT, Lo.getValue(1), |
27966 | Hi.getValue(1)); |
27967 | |
27968 | return DAG.getMergeValues({Res, Ovf}, dl); |
27969 | } |
27970 | |
27971 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
27972 | EVT SetccVT = |
27973 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
27974 | |
27975 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
27976 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
27977 | unsigned NumElts = VT.getVectorNumElements(); |
27978 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
27979 | unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
27980 | SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); |
27981 | SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); |
27982 | SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); |
27983 | |
27984 | SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); |
27985 | |
27986 | SDValue Ovf; |
27987 | if (IsSigned) { |
27988 | SDValue High, LowSign; |
27989 | if (OvfVT.getVectorElementType() == MVT::i1 && |
27990 | (Subtarget.hasBWI() || Subtarget.canExtendTo512DQ())) { |
27991 | |
27992 | |
27993 | High = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Mul, 8, DAG); |
27994 | |
27995 | LowSign = |
27996 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExVT, Mul, 8, DAG); |
27997 | LowSign = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, LowSign, |
27998 | 15, DAG); |
27999 | SetccVT = OvfVT; |
28000 | if (!Subtarget.hasBWI()) { |
28001 | |
28002 | High = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, High); |
28003 | LowSign = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, LowSign); |
28004 | } |
28005 | } else { |
28006 | |
28007 | High = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
28008 | High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); |
28009 | LowSign = |
28010 | DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); |
28011 | } |
28012 | |
28013 | Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE); |
28014 | } else { |
28015 | SDValue High = |
28016 | getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
28017 | if (OvfVT.getVectorElementType() == MVT::i1 && |
28018 | (Subtarget.hasBWI() || Subtarget.canExtendTo512DQ())) { |
28019 | |
28020 | SetccVT = OvfVT; |
28021 | if (!Subtarget.hasBWI()) { |
28022 | |
28023 | High = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, High); |
28024 | } |
28025 | } else { |
28026 | |
28027 | High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); |
28028 | } |
28029 | |
28030 | Ovf = |
28031 | DAG.getSetCC(dl, SetccVT, High, |
28032 | DAG.getConstant(0, dl, High.getValueType()), ISD::SETNE); |
28033 | } |
28034 | |
28035 | Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT); |
28036 | |
28037 | return DAG.getMergeValues({Low, Ovf}, dl); |
28038 | } |
28039 | |
28040 | SDValue Low; |
28041 | SDValue High = |
28042 | LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG, &Low); |
28043 | |
28044 | SDValue Ovf; |
28045 | if (IsSigned) { |
28046 | |
28047 | SDValue LowSign = |
28048 | DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); |
28049 | Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE); |
28050 | } else { |
28051 | |
28052 | Ovf = |
28053 | DAG.getSetCC(dl, SetccVT, High, DAG.getConstant(0, dl, VT), ISD::SETNE); |
28054 | } |
28055 | |
28056 | Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT); |
28057 | |
28058 | return DAG.getMergeValues({Low, Ovf}, dl); |
28059 | } |
28060 | |
28061 | SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { |
28062 | assert(Subtarget.isTargetWin64() && "Unexpected target"); |
28063 | EVT VT = Op.getValueType(); |
28064 | assert(VT.isInteger() && VT.getSizeInBits() == 128 && |
28065 | "Unexpected return type for lowering"); |
28066 | |
28067 | RTLIB::Libcall LC; |
28068 | bool isSigned; |
28069 | switch (Op->getOpcode()) { |
28070 | default: llvm_unreachable("Unexpected request for libcall!"); |
28071 | case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; |
28072 | case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; |
28073 | case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; |
28074 | case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; |
28075 | } |
28076 | |
28077 | SDLoc dl(Op); |
28078 | SDValue InChain = DAG.getEntryNode(); |
28079 | |
28080 | TargetLowering::ArgListTy Args; |
28081 | TargetLowering::ArgListEntry Entry; |
28082 | for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { |
28083 | EVT ArgVT = Op->getOperand(i).getValueType(); |
28084 | assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && |
28085 | "Unexpected argument type for lowering"); |
28086 | SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); |
28087 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
28088 | MachinePointerInfo MPI = |
28089 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
28090 | Entry.Node = StackPtr; |
28091 | InChain = |
28092 | DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16)); |
28093 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
28094 | Entry.Ty = PointerType::get(ArgTy,0); |
28095 | Entry.IsSExt = false; |
28096 | Entry.IsZExt = false; |
28097 | Args.push_back(Entry); |
28098 | } |
28099 | |
28100 | SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), |
28101 | getPointerTy(DAG.getDataLayout())); |
28102 | |
28103 | TargetLowering::CallLoweringInfo CLI(DAG); |
28104 | CLI.setDebugLoc(dl) |
28105 | .setChain(InChain) |
28106 | .setLibCallee( |
28107 | getLibcallCallingConv(LC), |
28108 | static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee, |
28109 | std::move(Args)) |
28110 | .setInRegister() |
28111 | .setSExtResult(isSigned) |
28112 | .setZExtResult(!isSigned); |
28113 | |
28114 | std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); |
28115 | return DAG.getBitcast(VT, CallInfo.first); |
28116 | } |
28117 | |
28118 | |
28119 | |
28120 | static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, |
28121 | unsigned Opcode) { |
28122 | if (VT.getScalarSizeInBits() < 16) |
28123 | return false; |
28124 | |
28125 | if (VT.is512BitVector() && Subtarget.hasAVX512() && |
28126 | (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI())) |
28127 | return true; |
28128 | |
28129 | bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) || |
28130 | (VT.is256BitVector() && Subtarget.hasInt256()); |
28131 | |
28132 | bool AShift = LShift && (Subtarget.hasAVX512() || |
28133 | (VT != MVT::v2i64 && VT != MVT::v4i64)); |
28134 | return (Opcode == ISD::SRA) ? AShift : LShift; |
28135 | } |
28136 | |
28137 | |
28138 | |
28139 | static |
28140 | bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget, |
28141 | unsigned Opcode) { |
28142 | return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); |
28143 | } |
28144 | |
28145 | |
28146 | |
28147 | static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, |
28148 | unsigned Opcode) { |
28149 | |
28150 | if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16) |
28151 | return false; |
28152 | |
28153 | |
28154 | if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) |
28155 | return false; |
28156 | |
28157 | if (Subtarget.hasAVX512()) |
28158 | return true; |
28159 | |
28160 | bool LShift = VT.is128BitVector() || VT.is256BitVector(); |
28161 | bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; |
28162 | return (Opcode == ISD::SRA) ? AShift : LShift; |
28163 | } |
28164 | |
28165 | static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, |
28166 | const X86Subtarget &Subtarget) { |
28167 | MVT VT = Op.getSimpleValueType(); |
28168 | SDLoc dl(Op); |
28169 | SDValue R = Op.getOperand(0); |
28170 | SDValue Amt = Op.getOperand(1); |
28171 | unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false); |
28172 | |
28173 | auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { |
28174 | assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); |
28175 | MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); |
28176 | SDValue Ex = DAG.getBitcast(ExVT, R); |
28177 | |
28178 | |
28179 | if (ShiftAmt == 63 && Subtarget.hasSSE42()) { |
28180 | assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && |
28181 | "Unsupported PCMPGT op"); |
28182 | return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R); |
28183 | } |
28184 | |
28185 | if (ShiftAmt >= 32) { |
28186 | |
28187 | SDValue Upper = |
28188 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); |
28189 | SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, |
28190 | ShiftAmt - 32, DAG); |
28191 | if (VT == MVT::v2i64) |
28192 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); |
28193 | if (VT == MVT::v4i64) |
28194 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, |
28195 | {9, 1, 11, 3, 13, 5, 15, 7}); |
28196 | } else { |
28197 | |
28198 | SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, |
28199 | ShiftAmt, DAG); |
28200 | SDValue Lower = |
28201 | getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); |
28202 | Lower = DAG.getBitcast(ExVT, Lower); |
28203 | if (VT == MVT::v2i64) |
28204 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); |
28205 | if (VT == MVT::v4i64) |
28206 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, |
28207 | {8, 1, 10, 3, 12, 5, 14, 7}); |
28208 | } |
28209 | return DAG.getBitcast(VT, Ex); |
28210 | }; |
28211 | |
28212 | |
28213 | APInt APIntShiftAmt; |
28214 | if (!X86::isConstantSplat(Amt, APIntShiftAmt)) |
28215 | return SDValue(); |
28216 | |
28217 | |
28218 | if (APIntShiftAmt.uge(VT.getScalarSizeInBits())) |
28219 | return DAG.getUNDEF(VT); |
28220 | |
28221 | uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); |
28222 | |
28223 | if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) |
28224 | return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); |
28225 | |
28226 | |
28227 | if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || |
28228 | (Subtarget.hasInt256() && VT == MVT::v4i64)) && |
28229 | Op.getOpcode() == ISD::SRA) |
28230 | return ArithmeticShiftRight64(ShiftAmt); |
28231 | |
28232 | if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || |
28233 | (Subtarget.hasBWI() && VT == MVT::v64i8)) { |
28234 | unsigned NumElts = VT.getVectorNumElements(); |
28235 | MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28236 | |
28237 | |
28238 | if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) |
28239 | return DAG.getNode(ISD::ADD, dl, VT, R, R); |
28240 | |
28241 | |
28242 | if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { |
28243 | SDValue Zeros = DAG.getConstant(0, dl, VT); |
28244 | if (VT.is512BitVector()) { |
28245 | assert(VT == MVT::v64i8 && "Unexpected element type!"); |
28246 | SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); |
28247 | return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); |
28248 | } |
28249 | return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); |
28250 | } |
28251 | |
28252 | |
28253 | if (VT == MVT::v16i8 && Subtarget.hasXOP()) |
28254 | return SDValue(); |
28255 | |
28256 | if (Op.getOpcode() == ISD::SHL) { |
28257 | |
28258 | SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, |
28259 | ShiftAmt, DAG); |
28260 | SHL = DAG.getBitcast(VT, SHL); |
28261 | |
28262 | APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt); |
28263 | return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT)); |
28264 | } |
28265 | if (Op.getOpcode() == ISD::SRL) { |
28266 | |
28267 | SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, |
28268 | ShiftAmt, DAG); |
28269 | SRL = DAG.getBitcast(VT, SRL); |
28270 | |
28271 | APInt Mask = APInt::getLowBitsSet(8, 8 - ShiftAmt); |
28272 | return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(Mask, dl, VT)); |
28273 | } |
28274 | if (Op.getOpcode() == ISD::SRA) { |
28275 | |
28276 | SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); |
28277 | |
28278 | SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); |
28279 | Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); |
28280 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); |
28281 | return Res; |
28282 | } |
28283 | llvm_unreachable("Unknown shift opcode."); |
28284 | } |
28285 | |
28286 | return SDValue(); |
28287 | } |
28288 | |
28289 | static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, |
28290 | const X86Subtarget &Subtarget) { |
28291 | MVT VT = Op.getSimpleValueType(); |
28292 | SDLoc dl(Op); |
28293 | SDValue R = Op.getOperand(0); |
28294 | SDValue Amt = Op.getOperand(1); |
28295 | unsigned Opcode = Op.getOpcode(); |
28296 | unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); |
28297 | unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); |
28298 | |
28299 | if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) { |
28300 | if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { |
28301 | MVT EltVT = VT.getVectorElementType(); |
28302 | assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); |
28303 | if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) |
28304 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); |
28305 | else if (EltVT.bitsLT(MVT::i32)) |
28306 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); |
28307 | |
28308 | return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); |
28309 | } |
28310 | |
28311 | |
28312 | if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || |
28313 | (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || |
28314 | VT == MVT::v64i8) && |
28315 | !Subtarget.hasXOP()) { |
28316 | unsigned NumElts = VT.getVectorNumElements(); |
28317 | MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28318 | if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) { |
28319 | unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL); |
28320 | unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false); |
28321 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); |
28322 | |
28323 | |
28324 | |
28325 | SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); |
28326 | BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask, |
28327 | BaseShAmt, Subtarget, DAG); |
28328 | if (Opcode != ISD::SHL) |
28329 | BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask, |
28330 | 8, DAG); |
28331 | BitMask = DAG.getBitcast(VT, BitMask); |
28332 | BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask, |
28333 | SmallVector<int, 64>(NumElts, 0)); |
28334 | |
28335 | SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, |
28336 | DAG.getBitcast(ExtVT, R), BaseShAmt, |
28337 | Subtarget, DAG); |
28338 | Res = DAG.getBitcast(VT, Res); |
28339 | Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); |
28340 | |
28341 | if (Opcode == ISD::SRA) { |
28342 | |
28343 | |
28344 | SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT); |
28345 | SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, |
28346 | BaseShAmt, Subtarget, DAG); |
28347 | SignMask = DAG.getBitcast(VT, SignMask); |
28348 | Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); |
28349 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); |
28350 | } |
28351 | return Res; |
28352 | } |
28353 | } |
28354 | } |
28355 | |
28356 | |
28357 | if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && |
28358 | Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { |
28359 | Amt = Amt.getOperand(0); |
28360 | unsigned Ratio = 64 / Amt.getScalarValueSizeInBits(); |
28361 | std::vector<SDValue> Vals(Ratio); |
28362 | for (unsigned i = 0; i != Ratio; ++i) |
28363 | Vals[i] = Amt.getOperand(i); |
28364 | for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) { |
28365 | for (unsigned j = 0; j != Ratio; ++j) |
28366 | if (Vals[j] != Amt.getOperand(i + j)) |
28367 | return SDValue(); |
28368 | } |
28369 | |
28370 | if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) |
28371 | return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); |
28372 | } |
28373 | return SDValue(); |
28374 | } |
28375 | |
28376 | |
28377 | static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, |
28378 | const X86Subtarget &Subtarget, |
28379 | SelectionDAG &DAG) { |
28380 | MVT VT = Amt.getSimpleValueType(); |
28381 | if (!(VT == MVT::v8i16 || VT == MVT::v4i32 || |
28382 | (Subtarget.hasInt256() && VT == MVT::v16i16) || |
28383 | (Subtarget.hasVBMI2() && VT == MVT::v32i16) || |
28384 | (!Subtarget.hasAVX512() && VT == MVT::v16i8))) |
28385 | return SDValue(); |
28386 | |
28387 | if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { |
28388 | SmallVector<SDValue, 8> Elts; |
28389 | MVT SVT = VT.getVectorElementType(); |
28390 | unsigned SVTBits = SVT.getSizeInBits(); |
28391 | APInt One(SVTBits, 1); |
28392 | unsigned NumElems = VT.getVectorNumElements(); |
28393 | |
28394 | for (unsigned i = 0; i != NumElems; ++i) { |
28395 | SDValue Op = Amt->getOperand(i); |
28396 | if (Op->isUndef()) { |
28397 | Elts.push_back(Op); |
28398 | continue; |
28399 | } |
28400 | |
28401 | ConstantSDNode *ND = cast<ConstantSDNode>(Op); |
28402 | APInt C(SVTBits, ND->getZExtValue()); |
28403 | uint64_t ShAmt = C.getZExtValue(); |
28404 | if (ShAmt >= SVTBits) { |
28405 | Elts.push_back(DAG.getUNDEF(SVT)); |
28406 | continue; |
28407 | } |
28408 | Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT)); |
28409 | } |
28410 | return DAG.getBuildVector(VT, dl, Elts); |
28411 | } |
28412 | |
28413 | |
28414 | |
28415 | if (VT == MVT::v4i32) { |
28416 | Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); |
28417 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, |
28418 | DAG.getConstant(0x3f800000U, dl, VT)); |
28419 | Amt = DAG.getBitcast(MVT::v4f32, Amt); |
28420 | return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt); |
28421 | } |
28422 | |
28423 | |
28424 | if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) { |
28425 | SDValue Z = DAG.getConstant(0, dl, VT); |
28426 | SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z)); |
28427 | SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z)); |
28428 | Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG); |
28429 | Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG); |
28430 | if (Subtarget.hasSSE41()) |
28431 | return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); |
28432 | |
28433 | return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo), |
28434 | DAG.getBitcast(VT, Hi), |
28435 | {0, 2, 4, 6, 8, 10, 12, 14}); |
28436 | } |
28437 | |
28438 | return SDValue(); |
28439 | } |
28440 | |
28441 | static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, |
28442 | SelectionDAG &DAG) { |
28443 | MVT VT = Op.getSimpleValueType(); |
28444 | SDLoc dl(Op); |
28445 | SDValue R = Op.getOperand(0); |
28446 | SDValue Amt = Op.getOperand(1); |
28447 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
28448 | bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
28449 | |
28450 | unsigned Opc = Op.getOpcode(); |
28451 | unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true); |
28452 | unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false); |
28453 | |
28454 | assert(VT.isVector() && "Custom lowering only for vector shifts!"); |
28455 | assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!"); |
28456 | |
28457 | if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) |
28458 | return V; |
28459 | |
28460 | if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) |
28461 | return V; |
28462 | |
28463 | if (SupportedVectorVarShift(VT, Subtarget, Opc)) |
28464 | return Op; |
28465 | |
28466 | |
28467 | |
28468 | if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || |
28469 | VT == MVT::v8i16 || VT == MVT::v16i8)) { |
28470 | if (Opc == ISD::SRL || Opc == ISD::SRA) { |
28471 | SDValue Zero = DAG.getConstant(0, dl, VT); |
28472 | Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt); |
28473 | } |
28474 | if (Opc == ISD::SHL || Opc == ISD::SRL) |
28475 | return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); |
28476 | if (Opc == ISD::SRA) |
28477 | return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); |
28478 | } |
28479 | |
28480 | |
28481 | |
28482 | if (VT == MVT::v2i64 && Opc != ISD::SRA) { |
28483 | |
28484 | SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); |
28485 | SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); |
28486 | SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); |
28487 | SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); |
28488 | return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); |
28489 | } |
28490 | |
28491 | |
28492 | |
28493 | |
28494 | if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && |
28495 | Opc == ISD::SRA) { |
28496 | SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); |
28497 | SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); |
28498 | R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); |
28499 | R = DAG.getNode(ISD::XOR, dl, VT, R, M); |
28500 | R = DAG.getNode(ISD::SUB, dl, VT, R, M); |
28501 | return R; |
28502 | } |
28503 | |
28504 | |
28505 | |
28506 | |
28507 | |
28508 | |
28509 | |
28510 | |
28511 | |
28512 | |
28513 | |
28514 | if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 || |
28515 | (VT == MVT::v16i16 && Subtarget.hasInt256()))) { |
28516 | SDValue Amt1, Amt2; |
28517 | unsigned NumElts = VT.getVectorNumElements(); |
28518 | SmallVector<int, 8> ShuffleMask; |
28519 | for (unsigned i = 0; i != NumElts; ++i) { |
28520 | SDValue A = Amt->getOperand(i); |
28521 | if (A.isUndef()) { |
28522 | ShuffleMask.push_back(SM_SentinelUndef); |
28523 | continue; |
28524 | } |
28525 | if (!Amt1 || Amt1 == A) { |
28526 | ShuffleMask.push_back(i); |
28527 | Amt1 = A; |
28528 | continue; |
28529 | } |
28530 | if (!Amt2 || Amt2 == A) { |
28531 | ShuffleMask.push_back(i + NumElts); |
28532 | Amt2 = A; |
28533 | continue; |
28534 | } |
28535 | break; |
28536 | } |
28537 | |
28538 | |
28539 | if (ShuffleMask.size() == NumElts && Amt1 && Amt2 && |
28540 | (VT != MVT::v16i16 || |
28541 | is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) && |
28542 | (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL || |
28543 | canWidenShuffleElements(ShuffleMask))) { |
28544 | auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1); |
28545 | auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2); |
28546 | if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) && |
28547 | Cst2->getAPIntValue().ult(EltSizeInBits)) { |
28548 | SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, |
28549 | Cst1->getZExtValue(), DAG); |
28550 | SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, |
28551 | Cst2->getZExtValue(), DAG); |
28552 | return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask); |
28553 | } |
28554 | } |
28555 | } |
28556 | |
28557 | |
28558 | |
28559 | if (Opc == ISD::SHL) |
28560 | if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG)) |
28561 | return DAG.getNode(ISD::MUL, dl, VT, R, Scale); |
28562 | |
28563 | |
28564 | |
28565 | if (Opc == ISD::SRL && ConstantAmt && |
28566 | (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) { |
28567 | SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); |
28568 | SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); |
28569 | if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { |
28570 | SDValue Zero = DAG.getConstant(0, dl, VT); |
28571 | SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); |
28572 | SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); |
28573 | return DAG.getSelect(dl, VT, ZAmt, R, Res); |
28574 | } |
28575 | } |
28576 | |
28577 | |
28578 | |
28579 | |
28580 | |
28581 | if (Opc == ISD::SRA && ConstantAmt && |
28582 | (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) && |
28583 | ((Subtarget.hasSSE41() && !Subtarget.hasXOP() && |
28584 | !Subtarget.hasAVX512()) || |
28585 | DAG.isKnownNeverZero(Amt))) { |
28586 | SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); |
28587 | SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); |
28588 | if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { |
28589 | SDValue Amt0 = |
28590 | DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ); |
28591 | SDValue Amt1 = |
28592 | DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ); |
28593 | SDValue Sra1 = |
28594 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG); |
28595 | SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale); |
28596 | Res = DAG.getSelect(dl, VT, Amt0, R, Res); |
28597 | return DAG.getSelect(dl, VT, Amt1, Sra1, Res); |
28598 | } |
28599 | } |
28600 | |
28601 | |
28602 | |
28603 | |
28604 | |
28605 | |
28606 | if (VT == MVT::v4i32) { |
28607 | SDValue Amt0, Amt1, Amt2, Amt3; |
28608 | if (ConstantAmt) { |
28609 | Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); |
28610 | Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); |
28611 | Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); |
28612 | Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); |
28613 | } else { |
28614 | |
28615 | |
28616 | |
28617 | |
28618 | if (Subtarget.hasAVX()) { |
28619 | SDValue Z = DAG.getConstant(0, dl, VT); |
28620 | Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); |
28621 | Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); |
28622 | Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); |
28623 | Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); |
28624 | } else { |
28625 | SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt); |
28626 | SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
28627 | {4, 5, 6, 7, -1, -1, -1, -1}); |
28628 | Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
28629 | {0, 1, 1, 1, -1, -1, -1, -1}); |
28630 | Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
28631 | {2, 3, 3, 3, -1, -1, -1, -1}); |
28632 | Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, |
28633 | {0, 1, 1, 1, -1, -1, -1, -1}); |
28634 | Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, |
28635 | {2, 3, 3, 3, -1, -1, -1, -1}); |
28636 | } |
28637 | } |
28638 | |
28639 | unsigned ShOpc = ConstantAmt ? Opc : X86OpcV; |
28640 | SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0)); |
28641 | SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1)); |
28642 | SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2)); |
28643 | SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3)); |
28644 | |
28645 | |
28646 | |
28647 | if (Subtarget.hasSSE41()) { |
28648 | SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); |
28649 | SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); |
28650 | return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); |
28651 | } |
28652 | SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5}); |
28653 | SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7}); |
28654 | return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7}); |
28655 | } |
28656 | |
28657 | |
28658 | |
28659 | |
28660 | |
28661 | if ((Subtarget.hasInt256() && VT == MVT::v8i16) || |
28662 | (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) || |
28663 | (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) || |
28664 | (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) || |
28665 | (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) { |
28666 | assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) && |
28667 | "Unexpected vector type"); |
28668 | MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32; |
28669 | MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); |
28670 | unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
28671 | R = DAG.getNode(ExtOpc, dl, ExtVT, R); |
28672 | Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt); |
28673 | return DAG.getNode(ISD::TRUNCATE, dl, VT, |
28674 | DAG.getNode(Opc, dl, ExtVT, R, Amt)); |
28675 | } |
28676 | |
28677 | |
28678 | |
28679 | if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) && |
28680 | (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || |
28681 | (VT == MVT::v64i8 && Subtarget.hasBWI())) && |
28682 | !Subtarget.hasXOP()) { |
28683 | int NumElts = VT.getVectorNumElements(); |
28684 | SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); |
28685 | |
28686 | |
28687 | |
28688 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
28689 | Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT); |
28690 | Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt); |
28691 | Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt); |
28692 | assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) && |
28693 | "Constant build vector expected"); |
28694 | |
28695 | if (VT == MVT::v16i8 && Subtarget.hasInt256()) { |
28696 | R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT) |
28697 | : DAG.getZExtOrTrunc(R, dl, ExVT); |
28698 | R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt); |
28699 | R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8); |
28700 | return DAG.getZExtOrTrunc(R, dl, VT); |
28701 | } |
28702 | |
28703 | SmallVector<SDValue, 16> LoAmt, HiAmt; |
28704 | for (int i = 0; i != NumElts; i += 16) { |
28705 | for (int j = 0; j != 8; ++j) { |
28706 | LoAmt.push_back(Amt.getOperand(i + j)); |
28707 | HiAmt.push_back(Amt.getOperand(i + j + 8)); |
28708 | } |
28709 | } |
28710 | |
28711 | MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2); |
28712 | SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt); |
28713 | SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt); |
28714 | |
28715 | SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R)); |
28716 | SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R)); |
28717 | LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8); |
28718 | HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8); |
28719 | LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA); |
28720 | HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA); |
28721 | LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8); |
28722 | HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8); |
28723 | return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR); |
28724 | } |
28725 | |
28726 | if (VT == MVT::v16i8 || |
28727 | (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || |
28728 | (VT == MVT::v64i8 && Subtarget.hasBWI())) { |
28729 | MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); |
28730 | |
28731 | auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { |
28732 | if (VT.is512BitVector()) { |
28733 | |
28734 | |
28735 | |
28736 | MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); |
28737 | V0 = DAG.getBitcast(VT, V0); |
28738 | V1 = DAG.getBitcast(VT, V1); |
28739 | Sel = DAG.getBitcast(VT, Sel); |
28740 | Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel, |
28741 | ISD::SETGT); |
28742 | return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); |
28743 | } else if (Subtarget.hasSSE41()) { |
28744 | |
28745 | |
28746 | V0 = DAG.getBitcast(VT, V0); |
28747 | V1 = DAG.getBitcast(VT, V1); |
28748 | Sel = DAG.getBitcast(VT, Sel); |
28749 | return DAG.getBitcast(SelVT, |
28750 | DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1)); |
28751 | } |
28752 | |
28753 | |
28754 | |
28755 | SDValue Z = DAG.getConstant(0, dl, SelVT); |
28756 | SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); |
28757 | return DAG.getSelect(dl, SelVT, C, V0, V1); |
28758 | }; |
28759 | |
28760 | |
28761 | |
28762 | |
28763 | Amt = DAG.getBitcast(ExtVT, Amt); |
28764 | Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG); |
28765 | Amt = DAG.getBitcast(VT, Amt); |
28766 | |
28767 | if (Opc == ISD::SHL || Opc == ISD::SRL) { |
28768 | |
28769 | SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT)); |
28770 | R = SignBitSelect(VT, Amt, M, R); |
28771 | |
28772 | |
28773 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
28774 | |
28775 | |
28776 | M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT)); |
28777 | R = SignBitSelect(VT, Amt, M, R); |
28778 | |
28779 | |
28780 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
28781 | |
28782 | |
28783 | M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT)); |
28784 | R = SignBitSelect(VT, Amt, M, R); |
28785 | return R; |
28786 | } |
28787 | |
28788 | if (Opc == ISD::SRA) { |
28789 | |
28790 | |
28791 | |
28792 | SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt); |
28793 | SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt); |
28794 | SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R); |
28795 | SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R); |
28796 | ALo = DAG.getBitcast(ExtVT, ALo); |
28797 | AHi = DAG.getBitcast(ExtVT, AHi); |
28798 | RLo = DAG.getBitcast(ExtVT, RLo); |
28799 | RHi = DAG.getBitcast(ExtVT, RHi); |
28800 | |
28801 | |
28802 | SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG); |
28803 | SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG); |
28804 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
28805 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
28806 | |
28807 | |
28808 | ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); |
28809 | AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); |
28810 | |
28811 | |
28812 | MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG); |
28813 | MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG); |
28814 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
28815 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
28816 | |
28817 | |
28818 | ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); |
28819 | AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); |
28820 | |
28821 | |
28822 | MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG); |
28823 | MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG); |
28824 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
28825 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
28826 | |
28827 | |
28828 | |
28829 | RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG); |
28830 | RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG); |
28831 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
28832 | } |
28833 | } |
28834 | |
28835 | if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { |
28836 | MVT ExtVT = MVT::v8i32; |
28837 | SDValue Z = DAG.getConstant(0, dl, VT); |
28838 | SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z); |
28839 | SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z); |
28840 | SDValue RLo = getUnpackl(DAG, dl, VT, Z, R); |
28841 | SDValue RHi = getUnpackh(DAG, dl, VT, Z, R); |
28842 | ALo = DAG.getBitcast(ExtVT, ALo); |
28843 | AHi = DAG.getBitcast(ExtVT, AHi); |
28844 | RLo = DAG.getBitcast(ExtVT, RLo); |
28845 | RHi = DAG.getBitcast(ExtVT, RHi); |
28846 | SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo); |
28847 | SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi); |
28848 | Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG); |
28849 | Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG); |
28850 | return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); |
28851 | } |
28852 | |
28853 | if (VT == MVT::v8i16) { |
28854 | |
28855 | |
28856 | bool UseSSE41 = Subtarget.hasSSE41() && |
28857 | !ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
28858 | |
28859 | auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) { |
28860 | |
28861 | |
28862 | if (UseSSE41) { |
28863 | MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); |
28864 | V0 = DAG.getBitcast(ExtVT, V0); |
28865 | V1 = DAG.getBitcast(ExtVT, V1); |
28866 | Sel = DAG.getBitcast(ExtVT, Sel); |
28867 | return DAG.getBitcast( |
28868 | VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1)); |
28869 | } |
28870 | |
28871 | |
28872 | |
28873 | SDValue C = |
28874 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG); |
28875 | return DAG.getSelect(dl, VT, C, V0, V1); |
28876 | }; |
28877 | |
28878 | |
28879 | if (UseSSE41) { |
28880 | |
28881 | |
28882 | Amt = DAG.getNode( |
28883 | ISD::OR, dl, VT, |
28884 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG), |
28885 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG)); |
28886 | } else { |
28887 | Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG); |
28888 | } |
28889 | |
28890 | |
28891 | SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG); |
28892 | R = SignBitSelect(Amt, M, R); |
28893 | |
28894 | |
28895 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
28896 | |
28897 | |
28898 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG); |
28899 | R = SignBitSelect(Amt, M, R); |
28900 | |
28901 | |
28902 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
28903 | |
28904 | |
28905 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG); |
28906 | R = SignBitSelect(Amt, M, R); |
28907 | |
28908 | |
28909 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
28910 | |
28911 | |
28912 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG); |
28913 | R = SignBitSelect(Amt, M, R); |
28914 | return R; |
28915 | } |
28916 | |
28917 | |
28918 | if (VT.is256BitVector()) |
28919 | return splitVectorIntBinary(Op, DAG); |
28920 | |
28921 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
28922 | return splitVectorIntBinary(Op, DAG); |
28923 | |
28924 | return SDValue(); |
28925 | } |
28926 | |
28927 | static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, |
28928 | SelectionDAG &DAG) { |
28929 | MVT VT = Op.getSimpleValueType(); |
28930 | assert(VT.isVector() && "Custom lowering only for vector rotates!"); |
28931 | |
28932 | SDLoc DL(Op); |
28933 | SDValue R = Op.getOperand(0); |
28934 | SDValue Amt = Op.getOperand(1); |
28935 | unsigned Opcode = Op.getOpcode(); |
28936 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
28937 | int NumElts = VT.getVectorNumElements(); |
28938 | |
28939 | |
28940 | APInt CstSplatValue; |
28941 | bool IsCstSplat = X86::isConstantSplat(Amt, CstSplatValue); |
28942 | |
28943 | |
28944 | if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0) |
28945 | return R; |
28946 | |
28947 | |
28948 | if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { |
28949 | |
28950 | if (IsCstSplat) { |
28951 | unsigned RotOpc = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); |
28952 | uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits); |
28953 | return DAG.getNode(RotOpc, DL, VT, R, |
28954 | DAG.getTargetConstant(RotAmt, DL, MVT::i8)); |
28955 | } |
28956 | |
28957 | |
28958 | return Op; |
28959 | } |
28960 | |
28961 | |
28962 | if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) { |
28963 | unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR); |
28964 | return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); |
28965 | } |
28966 | |
28967 | assert((Opcode == ISD::ROTL) && "Only ROTL supported"); |
28968 | |
28969 | |
28970 | |
28971 | |
28972 | if (Subtarget.hasXOP()) { |
28973 | if (VT.is256BitVector()) |
28974 | return splitVectorIntBinary(Op, DAG); |
28975 | assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); |
28976 | |
28977 | |
28978 | if (IsCstSplat) { |
28979 | uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits); |
28980 | return DAG.getNode(X86ISD::VROTLI, DL, VT, R, |
28981 | DAG.getTargetConstant(RotAmt, DL, MVT::i8)); |
28982 | } |
28983 | |
28984 | |
28985 | return Op; |
28986 | } |
28987 | |
28988 | |
28989 | if (VT.is256BitVector() && !Subtarget.hasAVX2()) |
28990 | return splitVectorIntBinary(Op, DAG); |
28991 | |
28992 | assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || |
28993 | ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8 || |
28994 | VT == MVT::v32i16) && |
28995 | Subtarget.hasAVX2())) && |
28996 | "Only vXi32/vXi16/vXi8 vector rotates supported"); |
28997 | |
28998 | |
28999 | if (IsCstSplat) |
29000 | return SDValue(); |
29001 | |
29002 | bool IsSplatAmt = DAG.isSplatValue(Amt); |
29003 | |
29004 | |
29005 | |
29006 | if (EltSizeInBits == 8 && !IsSplatAmt) { |
29007 | if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) |
29008 | return SDValue(); |
29009 | |
29010 | |
29011 | MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
29012 | |
29013 | auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { |
29014 | if (Subtarget.hasSSE41()) { |
29015 | |
29016 | |
29017 | V0 = DAG.getBitcast(VT, V0); |
29018 | V1 = DAG.getBitcast(VT, V1); |
29019 | Sel = DAG.getBitcast(VT, Sel); |
29020 | return DAG.getBitcast(SelVT, |
29021 | DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1)); |
29022 | } |
29023 | |
29024 | |
29025 | |
29026 | SDValue Z = DAG.getConstant(0, DL, SelVT); |
29027 | SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel); |
29028 | return DAG.getSelect(DL, SelVT, C, V0, V1); |
29029 | }; |
29030 | |
29031 | |
29032 | |
29033 | |
29034 | Amt = DAG.getBitcast(ExtVT, Amt); |
29035 | Amt = DAG.getNode(ISD::SHL, DL, ExtVT, Amt, DAG.getConstant(5, DL, ExtVT)); |
29036 | Amt = DAG.getBitcast(VT, Amt); |
29037 | |
29038 | |
29039 | SDValue M; |
29040 | M = DAG.getNode( |
29041 | ISD::OR, DL, VT, |
29042 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(4, DL, VT)), |
29043 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(4, DL, VT))); |
29044 | R = SignBitSelect(VT, Amt, M, R); |
29045 | |
29046 | |
29047 | Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); |
29048 | |
29049 | |
29050 | M = DAG.getNode( |
29051 | ISD::OR, DL, VT, |
29052 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(2, DL, VT)), |
29053 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(6, DL, VT))); |
29054 | R = SignBitSelect(VT, Amt, M, R); |
29055 | |
29056 | |
29057 | Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); |
29058 | |
29059 | |
29060 | M = DAG.getNode( |
29061 | ISD::OR, DL, VT, |
29062 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(1, DL, VT)), |
29063 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(7, DL, VT))); |
29064 | return SignBitSelect(VT, Amt, M, R); |
29065 | } |
29066 | |
29067 | |
29068 | if (SDValue BaseRotAmt = DAG.getSplatValue(Amt)) { |
29069 | |
29070 | |
29071 | Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt); |
29072 | Amt = DAG.getNode(ISD::AND, DL, VT, Amt, |
29073 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
29074 | Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT), |
29075 | SmallVector<int>(NumElts, 0)); |
29076 | } else { |
29077 | Amt = DAG.getNode(ISD::AND, DL, VT, Amt, |
29078 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
29079 | } |
29080 | |
29081 | bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
29082 | bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) && |
29083 | SupportedVectorVarShift(VT, Subtarget, ISD::SRL); |
29084 | |
29085 | |
29086 | |
29087 | if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) { |
29088 | SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); |
29089 | AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); |
29090 | SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt); |
29091 | SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR); |
29092 | return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); |
29093 | } |
29094 | |
29095 | |
29096 | SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); |
29097 | assert(Scale && "Failed to convert ROTL amount to scale"); |
29098 | |
29099 | |
29100 | if (EltSizeInBits == 16) { |
29101 | SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale); |
29102 | SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale); |
29103 | return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); |
29104 | } |
29105 | |
29106 | |
29107 | |
29108 | |
29109 | assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected"); |
29110 | static const int OddMask[] = {1, -1, 3, -1}; |
29111 | SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask); |
29112 | SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask); |
29113 | |
29114 | SDValue Res02 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, |
29115 | DAG.getBitcast(MVT::v2i64, R), |
29116 | DAG.getBitcast(MVT::v2i64, Scale)); |
29117 | SDValue Res13 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, |
29118 | DAG.getBitcast(MVT::v2i64, R13), |
29119 | DAG.getBitcast(MVT::v2i64, Scale13)); |
29120 | Res02 = DAG.getBitcast(VT, Res02); |
29121 | Res13 = DAG.getBitcast(VT, Res13); |
29122 | |
29123 | return DAG.getNode(ISD::OR, DL, VT, |
29124 | DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}), |
29125 | DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7})); |
29126 | } |
29127 | |
29128 | |
29129 | |
29130 | |
29131 | |
29132 | bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { |
29133 | unsigned OpWidth = MemType->getPrimitiveSizeInBits(); |
29134 | |
29135 | if (OpWidth == 64) |
29136 | return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit(); |
29137 | if (OpWidth == 128) |
29138 | return Subtarget.hasCmpxchg16b(); |
29139 | |
29140 | return false; |
29141 | } |
29142 | |
29143 | bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
29144 | Type *MemType = SI->getValueOperand()->getType(); |
29145 | |
29146 | bool NoImplicitFloatOps = |
29147 | SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); |
29148 | if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && |
29149 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && |
29150 | (Subtarget.hasSSE1() || Subtarget.hasX87())) |
29151 | return false; |
29152 | |
29153 | return needsCmpXchgNb(MemType); |
29154 | } |
29155 | |
29156 | |
29157 | |
29158 | TargetLowering::AtomicExpansionKind |
29159 | X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
29160 | Type *MemType = LI->getType(); |
29161 | |
29162 | |
29163 | |
29164 | |
29165 | bool NoImplicitFloatOps = |
29166 | LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); |
29167 | if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && |
29168 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && |
29169 | (Subtarget.hasSSE1() || Subtarget.hasX87())) |
29170 | return AtomicExpansionKind::None; |
29171 | |
29172 | return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg |
29173 | : AtomicExpansionKind::None; |
29174 | } |
29175 | |
29176 | TargetLowering::AtomicExpansionKind |
29177 | X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
29178 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
29179 | Type *MemType = AI->getType(); |
29180 | |
29181 | |
29182 | |
29183 | if (MemType->getPrimitiveSizeInBits() > NativeWidth) { |
29184 | return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg |
29185 | : AtomicExpansionKind::None; |
29186 | } |
29187 | |
29188 | AtomicRMWInst::BinOp Op = AI->getOperation(); |
29189 | switch (Op) { |
29190 | default: |
29191 | llvm_unreachable("Unknown atomic operation"); |
29192 | case AtomicRMWInst::Xchg: |
29193 | case AtomicRMWInst::Add: |
29194 | case AtomicRMWInst::Sub: |
29195 | |
29196 | return AtomicExpansionKind::None; |
29197 | case AtomicRMWInst::Or: |
29198 | case AtomicRMWInst::And: |
29199 | case AtomicRMWInst::Xor: |
29200 | |
29201 | |
29202 | return !AI->use_empty() ? AtomicExpansionKind::CmpXChg |
29203 | : AtomicExpansionKind::None; |
29204 | case AtomicRMWInst::Nand: |
29205 | case AtomicRMWInst::Max: |
29206 | case AtomicRMWInst::Min: |
29207 | case AtomicRMWInst::UMax: |
29208 | case AtomicRMWInst::UMin: |
29209 | case AtomicRMWInst::FAdd: |
29210 | case AtomicRMWInst::FSub: |
29211 | |
29212 | |
29213 | return AtomicExpansionKind::CmpXChg; |
29214 | } |
29215 | } |
29216 | |
29217 | LoadInst * |
29218 | X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { |
29219 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
29220 | Type *MemType = AI->getType(); |
29221 | |
29222 | |
29223 | |
29224 | if (MemType->getPrimitiveSizeInBits() > NativeWidth) |
29225 | return nullptr; |
29226 | |
29227 | |
29228 | |
29229 | |
29230 | if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand())) |
29231 | if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && |
29232 | AI->use_empty()) |
29233 | return nullptr; |
29234 | |
29235 | IRBuilder<> Builder(AI); |
29236 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
29237 | auto SSID = AI->getSyncScopeID(); |
29238 | |
29239 | |
29240 | auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); |
29241 | |
29242 | |
29243 | |
29244 | |
29245 | |
29246 | |
29247 | |
29248 | |
29249 | |
29250 | |
29251 | |
29252 | |
29253 | |
29254 | |
29255 | |
29256 | |
29257 | |
29258 | if (SSID == SyncScope::SingleThread) |
29259 | |
29260 | |
29261 | return nullptr; |
29262 | |
29263 | if (!Subtarget.hasMFence()) |
29264 | |
29265 | |
29266 | |
29267 | |
29268 | return nullptr; |
29269 | |
29270 | Function *MFence = |
29271 | llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence); |
29272 | Builder.CreateCall(MFence, {}); |
29273 | |
29274 | |
29275 | LoadInst *Loaded = Builder.CreateAlignedLoad( |
29276 | AI->getType(), AI->getPointerOperand(), AI->getAlign()); |
29277 | Loaded->setAtomic(Order, SSID); |
29278 | AI->replaceAllUsesWith(Loaded); |
29279 | AI->eraseFromParent(); |
29280 | return Loaded; |
29281 | } |
29282 | |
29283 | bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { |
29284 | if (!SI.isUnordered()) |
29285 | return false; |
29286 | return ExperimentalUnorderedISEL; |
29287 | } |
29288 | bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { |
29289 | if (!LI.isUnordered()) |
29290 | return false; |
29291 | return ExperimentalUnorderedISEL; |
29292 | } |
29293 | |
29294 | |
29295 | |
29296 | |
29297 | |
29298 | |
29299 | static SDValue emitLockedStackOp(SelectionDAG &DAG, |
29300 | const X86Subtarget &Subtarget, SDValue Chain, |
29301 | const SDLoc &DL) { |
29302 | |
29303 | |
29304 | |
29305 | |
29306 | |
29307 | |
29308 | |
29309 | |
29310 | |
29311 | |
29312 | |
29313 | |
29314 | |
29315 | |
29316 | |
29317 | |
29318 | |
29319 | |
29320 | |
29321 | |
29322 | |
29323 | |
29324 | |
29325 | |
29326 | auto &MF = DAG.getMachineFunction(); |
29327 | auto &TFL = *Subtarget.getFrameLowering(); |
29328 | const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0; |
29329 | |
29330 | if (Subtarget.is64Bit()) { |
29331 | SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); |
29332 | SDValue Ops[] = { |
29333 | DAG.getRegister(X86::RSP, MVT::i64), |
29334 | DAG.getTargetConstant(1, DL, MVT::i8), |
29335 | DAG.getRegister(0, MVT::i64), |
29336 | DAG.getTargetConstant(SPOffset, DL, MVT::i32), |
29337 | DAG.getRegister(0, MVT::i16), |
29338 | Zero, |
29339 | Chain}; |
29340 | SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, |
29341 | MVT::Other, Ops); |
29342 | return SDValue(Res, 1); |
29343 | } |
29344 | |
29345 | SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); |
29346 | SDValue Ops[] = { |
29347 | DAG.getRegister(X86::ESP, MVT::i32), |
29348 | DAG.getTargetConstant(1, DL, MVT::i8), |
29349 | DAG.getRegister(0, MVT::i32), |
29350 | DAG.getTargetConstant(SPOffset, DL, MVT::i32), |
29351 | DAG.getRegister(0, MVT::i16), |
29352 | Zero, |
29353 | Chain |
29354 | }; |
29355 | SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, |
29356 | MVT::Other, Ops); |
29357 | return SDValue(Res, 1); |
29358 | } |
29359 | |
29360 | static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, |
29361 | SelectionDAG &DAG) { |
29362 | SDLoc dl(Op); |
29363 | AtomicOrdering FenceOrdering = |
29364 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); |
29365 | SyncScope::ID FenceSSID = |
29366 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); |
29367 | |
29368 | |
29369 | |
29370 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
29371 | FenceSSID == SyncScope::System) { |
29372 | if (Subtarget.hasMFence()) |
29373 | return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); |
29374 | |
29375 | SDValue Chain = Op.getOperand(0); |
29376 | return emitLockedStackOp(DAG, Subtarget, Chain, dl); |
29377 | } |
29378 | |
29379 | |
29380 | return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
29381 | } |
29382 | |
29383 | static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, |
29384 | SelectionDAG &DAG) { |
29385 | MVT T = Op.getSimpleValueType(); |
29386 | SDLoc DL(Op); |
29387 | unsigned Reg = 0; |
29388 | unsigned size = 0; |
29389 | switch(T.SimpleTy) { |
29390 | default: llvm_unreachable("Invalid value type!"); |
29391 | case MVT::i8: Reg = X86::AL; size = 1; break; |
29392 | case MVT::i16: Reg = X86::AX; size = 2; break; |
29393 | case MVT::i32: Reg = X86::EAX; size = 4; break; |
29394 | case MVT::i64: |
29395 | assert(Subtarget.is64Bit() && "Node not type legal!"); |
29396 | Reg = X86::RAX; size = 8; |
29397 | break; |
29398 | } |
29399 | SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, |
29400 | Op.getOperand(2), SDValue()); |
29401 | SDValue Ops[] = { cpIn.getValue(0), |
29402 | Op.getOperand(1), |
29403 | Op.getOperand(3), |
29404 | DAG.getTargetConstant(size, DL, MVT::i8), |
29405 | cpIn.getValue(1) }; |
29406 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
29407 | MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); |
29408 | SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, |
29409 | Ops, T, MMO); |
29410 | |
29411 | SDValue cpOut = |
29412 | DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); |
29413 | SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, |
29414 | MVT::i32, cpOut.getValue(2)); |
29415 | SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG); |
29416 | |
29417 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
29418 | cpOut, Success, EFLAGS.getValue(1)); |
29419 | } |
29420 | |
29421 | |
29422 | static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, |
29423 | const X86Subtarget &Subtarget) { |
29424 | MVT InVT = V.getSimpleValueType(); |
29425 | |
29426 | if (InVT == MVT::v64i8) { |
29427 | SDValue Lo, Hi; |
29428 | std::tie(Lo, Hi) = DAG.SplitVector(V, DL); |
29429 | Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget); |
29430 | Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget); |
29431 | Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); |
29432 | Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); |
29433 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, |
29434 | DAG.getConstant(32, DL, MVT::i8)); |
29435 | return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); |
29436 | } |
29437 | if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) { |
29438 | SDValue Lo, Hi; |
29439 | std::tie(Lo, Hi) = DAG.SplitVector(V, DL); |
29440 | Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); |
29441 | Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); |
29442 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, |
29443 | DAG.getConstant(16, DL, MVT::i8)); |
29444 | return DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi); |
29445 | } |
29446 | |
29447 | return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
29448 | } |
29449 | |
29450 | static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, |
29451 | SelectionDAG &DAG) { |
29452 | SDValue Src = Op.getOperand(0); |
29453 | MVT SrcVT = Src.getSimpleValueType(); |
29454 | MVT DstVT = Op.getSimpleValueType(); |
29455 | |
29456 | |
29457 | |
29458 | if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) { |
29459 | assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); |
29460 | assert(Subtarget.hasBWI() && "Expected BWI target"); |
29461 | SDLoc dl(Op); |
29462 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, |
29463 | DAG.getIntPtrConstant(0, dl)); |
29464 | Lo = DAG.getBitcast(MVT::v32i1, Lo); |
29465 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, |
29466 | DAG.getIntPtrConstant(1, dl)); |
29467 | Hi = DAG.getBitcast(MVT::v32i1, Hi); |
29468 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); |
29469 | } |
29470 | |
29471 | |
29472 | if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) { |
29473 | assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512"); |
29474 | MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8; |
29475 | SDLoc DL(Op); |
29476 | SDValue V = DAG.getSExtOrTrunc(Src, DL, SExtVT); |
29477 | V = getPMOVMSKB(DL, V, DAG, Subtarget); |
29478 | return DAG.getZExtOrTrunc(V, DL, DstVT); |
29479 | } |
29480 | |
29481 | assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || |
29482 | SrcVT == MVT::i64) && "Unexpected VT!"); |
29483 | |
29484 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
29485 | if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) && |
29486 | !(DstVT == MVT::x86mmx && SrcVT.isVector())) |
29487 | |
29488 | return SDValue(); |
29489 | |
29490 | SDLoc dl(Op); |
29491 | if (SrcVT.isVector()) { |
29492 | |
29493 | |
29494 | MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), |
29495 | SrcVT.getVectorNumElements() * 2); |
29496 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, |
29497 | DAG.getUNDEF(SrcVT)); |
29498 | } else { |
29499 | assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && |
29500 | "Unexpected source type in LowerBITCAST"); |
29501 | Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); |
29502 | } |
29503 | |
29504 | MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; |
29505 | Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); |
29506 | |
29507 | if (DstVT == MVT::x86mmx) |
29508 | return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); |
29509 | |
29510 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, |
29511 | DAG.getIntPtrConstant(0, dl)); |
29512 | } |
29513 | |
29514 | |
29515 | |
29516 | |
29517 | |
29518 | |
29519 | |
29520 | static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, |
29521 | const X86Subtarget &Subtarget, |
29522 | SelectionDAG &DAG) { |
29523 | SDLoc DL(V); |
29524 | MVT ByteVecVT = V.getSimpleValueType(); |
29525 | MVT EltVT = VT.getVectorElementType(); |
29526 | assert(ByteVecVT.getVectorElementType() == MVT::i8 && |
29527 | "Expected value to have byte element type."); |
29528 | assert(EltVT != MVT::i8 && |
29529 | "Horizontal byte sum only makes sense for wider elements!"); |
29530 | unsigned VecSize = VT.getSizeInBits(); |
29531 | assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); |
29532 | |
29533 | |
29534 | |
29535 | if (EltVT == MVT::i64) { |
29536 | SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT); |
29537 | MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); |
29538 | V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); |
29539 | return DAG.getBitcast(VT, V); |
29540 | } |
29541 | |
29542 | if (EltVT == MVT::i32) { |
29543 | |
29544 | |
29545 | |
29546 | |
29547 | |
29548 | SDValue Zeros = DAG.getConstant(0, DL, VT); |
29549 | SDValue V32 = DAG.getBitcast(VT, V); |
29550 | SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); |
29551 | SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros); |
29552 | |
29553 | |
29554 | Zeros = DAG.getConstant(0, DL, ByteVecVT); |
29555 | MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); |
29556 | Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, |
29557 | DAG.getBitcast(ByteVecVT, Low), Zeros); |
29558 | High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, |
29559 | DAG.getBitcast(ByteVecVT, High), Zeros); |
29560 | |
29561 | |
29562 | MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); |
29563 | V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, |
29564 | DAG.getBitcast(ShortVecVT, Low), |
29565 | DAG.getBitcast(ShortVecVT, High)); |
29566 | |
29567 | return DAG.getBitcast(VT, V); |
29568 | } |
29569 | |
29570 | |
29571 | assert(EltVT == MVT::i16 && "Unknown how to handle type"); |
29572 | |
29573 | |
29574 | |
29575 | |
29576 | |
29577 | SDValue ShifterV = DAG.getConstant(8, DL, VT); |
29578 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV); |
29579 | V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), |
29580 | DAG.getBitcast(ByteVecVT, V)); |
29581 | return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV); |
29582 | } |
29583 | |
29584 | static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL, |
29585 | const X86Subtarget &Subtarget, |
29586 | SelectionDAG &DAG) { |
29587 | MVT VT = Op.getSimpleValueType(); |
29588 | MVT EltVT = VT.getVectorElementType(); |
29589 | int NumElts = VT.getVectorNumElements(); |
29590 | (void)EltVT; |
29591 | assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported."); |
29592 | |
29593 | |
29594 | |
29595 | |
29596 | |
29597 | |
29598 | |
29599 | |
29600 | |
29601 | |
29602 | |
29603 | const int LUT[16] = { 0, 1, 1, 2, |
29604 | 1, 2, 2, 3, |
29605 | 1, 2, 2, 3, |
29606 | 2, 3, 3, 4}; |
29607 | |
29608 | SmallVector<SDValue, 64> LUTVec; |
29609 | for (int i = 0; i < NumElts; ++i) |
29610 | LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); |
29611 | SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec); |
29612 | SDValue M0F = DAG.getConstant(0x0F, DL, VT); |
29613 | |
29614 | |
29615 | SDValue FourV = DAG.getConstant(4, DL, VT); |
29616 | SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV); |
29617 | |
29618 | |
29619 | SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F); |
29620 | |
29621 | |
29622 | |
29623 | |
29624 | SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles); |
29625 | SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles); |
29626 | return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt); |
29627 | } |
29628 | |
29629 | |
29630 | |
29631 | static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, |
29632 | SelectionDAG &DAG) { |
29633 | MVT VT = Op.getSimpleValueType(); |
29634 | assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) && |
29635 | "Unknown CTPOP type to handle"); |
29636 | SDLoc DL(Op.getNode()); |
29637 | SDValue Op0 = Op.getOperand(0); |
29638 | |
29639 | |
29640 | if (Subtarget.hasVPOPCNTDQ()) { |
29641 | unsigned NumElems = VT.getVectorNumElements(); |
29642 | assert((VT.getVectorElementType() == MVT::i8 || |
29643 | VT.getVectorElementType() == MVT::i16) && "Unexpected type"); |
29644 | if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) { |
29645 | MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); |
29646 | Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); |
29647 | Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op); |
29648 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); |
29649 | } |
29650 | } |
29651 | |
29652 | |
29653 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
29654 | return splitVectorIntUnary(Op, DAG); |
29655 | |
29656 | |
29657 | if (VT.is512BitVector() && !Subtarget.hasBWI()) |
29658 | return splitVectorIntUnary(Op, DAG); |
29659 | |
29660 | |
29661 | if (VT.getScalarType() != MVT::i8) { |
29662 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
29663 | SDValue ByteOp = DAG.getBitcast(ByteVT, Op0); |
29664 | SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp); |
29665 | return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG); |
29666 | } |
29667 | |
29668 | |
29669 | if (!Subtarget.hasSSSE3()) |
29670 | return SDValue(); |
29671 | |
29672 | return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); |
29673 | } |
29674 | |
29675 | static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget, |
29676 | SelectionDAG &DAG) { |
29677 | assert(Op.getSimpleValueType().isVector() && |
29678 | "We only do custom lowering for vector population count."); |
29679 | return LowerVectorCTPOP(Op, Subtarget, DAG); |
29680 | } |
29681 | |
29682 | static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) { |
29683 | MVT VT = Op.getSimpleValueType(); |
29684 | SDValue In = Op.getOperand(0); |
29685 | SDLoc DL(Op); |
29686 | |
29687 | |
29688 | |
29689 | if (!VT.isVector()) { |
29690 | MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); |
29691 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); |
29692 | Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res); |
29693 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res, |
29694 | DAG.getIntPtrConstant(0, DL)); |
29695 | } |
29696 | |
29697 | int NumElts = VT.getVectorNumElements(); |
29698 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
29699 | |
29700 | |
29701 | if (VT.is256BitVector()) |
29702 | return splitVectorIntUnary(Op, DAG); |
29703 | |
29704 | assert(VT.is128BitVector() && |
29705 | "Only 128-bit vector bitreverse lowering supported."); |
29706 | |
29707 | |
29708 | |
29709 | |
29710 | |
29711 | SmallVector<SDValue, 16> MaskElts; |
29712 | for (int i = 0; i != NumElts; ++i) { |
29713 | for (int j = ScalarSizeInBytes - 1; j >= 0; --j) { |
29714 | int SourceByte = 16 + (i * ScalarSizeInBytes) + j; |
29715 | int PermuteByte = SourceByte | (2 << 5); |
29716 | MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8)); |
29717 | } |
29718 | } |
29719 | |
29720 | SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts); |
29721 | SDValue Res = DAG.getBitcast(MVT::v16i8, In); |
29722 | Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8), |
29723 | Res, Mask); |
29724 | return DAG.getBitcast(VT, Res); |
29725 | } |
29726 | |
29727 | static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, |
29728 | SelectionDAG &DAG) { |
29729 | MVT VT = Op.getSimpleValueType(); |
29730 | |
29731 | if (Subtarget.hasXOP() && !VT.is512BitVector()) |
29732 | return LowerBITREVERSE_XOP(Op, DAG); |
29733 | |
29734 | assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE"); |
29735 | |
29736 | SDValue In = Op.getOperand(0); |
29737 | SDLoc DL(Op); |
29738 | |
29739 | assert(VT.getScalarType() == MVT::i8 && |
29740 | "Only byte vector BITREVERSE supported"); |
29741 | |
29742 | |
29743 | if (VT == MVT::v64i8 && !Subtarget.hasBWI()) |
29744 | return splitVectorIntUnary(Op, DAG); |
29745 | |
29746 | |
29747 | if (VT == MVT::v32i8 && !Subtarget.hasInt256()) |
29748 | return splitVectorIntUnary(Op, DAG); |
29749 | |
29750 | unsigned NumElts = VT.getVectorNumElements(); |
29751 | |
29752 | |
29753 | if (Subtarget.hasGFNI()) { |
29754 | MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8); |
29755 | SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT); |
29756 | Matrix = DAG.getBitcast(VT, Matrix); |
29757 | return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix, |
29758 | DAG.getTargetConstant(0, DL, MVT::i8)); |
29759 | } |
29760 | |
29761 | |
29762 | |
29763 | |
29764 | SDValue NibbleMask = DAG.getConstant(0xF, DL, VT); |
29765 | SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask); |
29766 | SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT)); |
29767 | |
29768 | const int LoLUT[16] = { |
29769 | 0x00, 0x80, 0x40, 0xC0, |
29770 | 0x20, 0xA0, 0x60, 0xE0, |
29771 | 0x10, 0x90, 0x50, 0xD0, |
29772 | 0x30, 0xB0, 0x70, 0xF0}; |
29773 | const int HiLUT[16] = { |
29774 | 0x00, 0x08, 0x04, 0x0C, |
29775 | 0x02, 0x0A, 0x06, 0x0E, |
29776 | 0x01, 0x09, 0x05, 0x0D, |
29777 | 0x03, 0x0B, 0x07, 0x0F}; |
29778 | |
29779 | SmallVector<SDValue, 16> LoMaskElts, HiMaskElts; |
29780 | for (unsigned i = 0; i < NumElts; ++i) { |
29781 | LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8)); |
29782 | HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8)); |
29783 | } |
29784 | |
29785 | SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts); |
29786 | SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts); |
29787 | Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo); |
29788 | Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi); |
29789 | return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); |
29790 | } |
29791 | |
29792 | static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, |
29793 | SelectionDAG &DAG) { |
29794 | SDLoc DL(Op); |
29795 | SDValue X = Op.getOperand(0); |
29796 | MVT VT = Op.getSimpleValueType(); |
29797 | |
29798 | |
29799 | if (VT == MVT::i8 || |
29800 | DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { |
29801 | X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); |
29802 | SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, |
29803 | DAG.getConstant(0, DL, MVT::i8)); |
29804 | |
29805 | SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); |
29806 | |
29807 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); |
29808 | } |
29809 | |
29810 | if (VT == MVT::i64) { |
29811 | |
29812 | SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, |
29813 | DAG.getNode(ISD::SRL, DL, MVT::i64, X, |
29814 | DAG.getConstant(32, DL, MVT::i8))); |
29815 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); |
29816 | X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); |
29817 | } |
29818 | |
29819 | if (VT != MVT::i16) { |
29820 | |
29821 | SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X, |
29822 | DAG.getConstant(16, DL, MVT::i8)); |
29823 | X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16); |
29824 | } else { |
29825 | |
29826 | X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X); |
29827 | } |
29828 | |
29829 | |
29830 | |
29831 | SDValue Hi = DAG.getNode( |
29832 | ISD::TRUNCATE, DL, MVT::i8, |
29833 | DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8))); |
29834 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); |
29835 | SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); |
29836 | SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); |
29837 | |
29838 | |
29839 | SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); |
29840 | |
29841 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); |
29842 | } |
29843 | |
29844 | static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, |
29845 | const X86Subtarget &Subtarget) { |
29846 | unsigned NewOpc = 0; |
29847 | switch (N->getOpcode()) { |
29848 | case ISD::ATOMIC_LOAD_ADD: |
29849 | NewOpc = X86ISD::LADD; |
29850 | break; |
29851 | case ISD::ATOMIC_LOAD_SUB: |
29852 | NewOpc = X86ISD::LSUB; |
29853 | break; |
29854 | case ISD::ATOMIC_LOAD_OR: |
29855 | NewOpc = X86ISD::LOR; |
29856 | break; |
29857 | case ISD::ATOMIC_LOAD_XOR: |
29858 | NewOpc = X86ISD::LXOR; |
29859 | break; |
29860 | case ISD::ATOMIC_LOAD_AND: |
29861 | NewOpc = X86ISD::LAND; |
29862 | break; |
29863 | default: |
29864 | llvm_unreachable("Unknown ATOMIC_LOAD_ opcode"); |
29865 | } |
29866 | |
29867 | MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand(); |
29868 | |
29869 | return DAG.getMemIntrinsicNode( |
29870 | NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other), |
29871 | {N->getOperand(0), N->getOperand(1), N->getOperand(2)}, |
29872 | N->getSimpleValueType(0), MMO); |
29873 | } |
29874 | |
29875 | |
29876 | static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, |
29877 | const X86Subtarget &Subtarget) { |
29878 | AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode()); |
29879 | SDValue Chain = N->getOperand(0); |
29880 | SDValue LHS = N->getOperand(1); |
29881 | SDValue RHS = N->getOperand(2); |
29882 | unsigned Opc = N->getOpcode(); |
29883 | MVT VT = N->getSimpleValueType(0); |
29884 | SDLoc DL(N); |
29885 | |
29886 | |
29887 | |
29888 | |
29889 | if (N->hasAnyUseOfValue(0)) { |
29890 | |
29891 | |
29892 | if (Opc == ISD::ATOMIC_LOAD_SUB) { |
29893 | RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); |
29894 | return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, |
29895 | RHS, AN->getMemOperand()); |
29896 | } |
29897 | assert(Opc == ISD::ATOMIC_LOAD_ADD && |
29898 | "Used AtomicRMW ops other than Add should have been expanded!"); |
29899 | return N; |
29900 | } |
29901 | |
29902 | |
29903 | |
29904 | |
29905 | |
29906 | |
29907 | if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) { |
29908 | |
29909 | |
29910 | |
29911 | |
29912 | |
29913 | if (AN->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent && |
29914 | AN->getSyncScopeID() == SyncScope::System) { |
29915 | |
29916 | |
29917 | |
29918 | SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); |
29919 | assert(!N->hasAnyUseOfValue(0)); |
29920 | |
29921 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
29922 | DAG.getUNDEF(VT), NewChain); |
29923 | } |
29924 | |
29925 | SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain); |
29926 | assert(!N->hasAnyUseOfValue(0)); |
29927 | |
29928 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
29929 | DAG.getUNDEF(VT), NewChain); |
29930 | } |
29931 | |
29932 | SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget); |
29933 | |
29934 | assert(!N->hasAnyUseOfValue(0)); |
29935 | |
29936 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
29937 | DAG.getUNDEF(VT), LockOp.getValue(1)); |
29938 | } |
29939 | |
29940 | static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, |
29941 | const X86Subtarget &Subtarget) { |
29942 | auto *Node = cast<AtomicSDNode>(Op.getNode()); |
29943 | SDLoc dl(Node); |
29944 | EVT VT = Node->getMemoryVT(); |
29945 | |
29946 | bool IsSeqCst = |
29947 | Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent; |
29948 | bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT); |
29949 | |
29950 | |
29951 | |
29952 | if (!IsSeqCst && IsTypeLegal) |
29953 | return Op; |
29954 | |
29955 | if (VT == MVT::i64 && !IsTypeLegal) { |
29956 | |
29957 | |
29958 | bool NoImplicitFloatOps = |
29959 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
29960 | Attribute::NoImplicitFloat); |
29961 | if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { |
29962 | SDValue Chain; |
29963 | if (Subtarget.hasSSE1()) { |
29964 | SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
29965 | Node->getOperand(2)); |
29966 | MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; |
29967 | SclToVec = DAG.getBitcast(StVT, SclToVec); |
29968 | SDVTList Tys = DAG.getVTList(MVT::Other); |
29969 | SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()}; |
29970 | Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, |
29971 | MVT::i64, Node->getMemOperand()); |
29972 | } else if (Subtarget.hasX87()) { |
29973 | |
29974 | |
29975 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
29976 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
29977 | MachinePointerInfo MPI = |
29978 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
29979 | Chain = |
29980 | DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr, |
29981 | MPI, MaybeAlign(), MachineMemOperand::MOStore); |
29982 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
29983 | SDValue LdOps[] = {Chain, StackPtr}; |
29984 | SDValue Value = |
29985 | DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, |
29986 | None, MachineMemOperand::MOLoad); |
29987 | Chain = Value.getValue(1); |
29988 | |
29989 | |
29990 | SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()}; |
29991 | Chain = |
29992 | DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other), |
29993 | StoreOps, MVT::i64, Node->getMemOperand()); |
29994 | } |
29995 | |
29996 | if (Chain) { |
29997 | |
29998 | |
29999 | if (IsSeqCst) |
30000 | Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl); |
30001 | |
30002 | return Chain; |
30003 | } |
30004 | } |
30005 | } |
30006 | |
30007 | |
30008 | |
30009 | |
30010 | SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, |
30011 | Node->getMemoryVT(), |
30012 | Node->getOperand(0), |
30013 | Node->getOperand(1), Node->getOperand(2), |
30014 | Node->getMemOperand()); |
30015 | return Swap.getValue(1); |
30016 | } |
30017 | |
30018 | static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { |
30019 | SDNode *N = Op.getNode(); |
30020 | MVT VT = N->getSimpleValueType(0); |
30021 | unsigned Opc = Op.getOpcode(); |
30022 | |
30023 | |
30024 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
30025 | return SDValue(); |
30026 | |
30027 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
30028 | SDLoc DL(N); |
30029 | |
30030 | |
30031 | SDValue Carry = Op.getOperand(2); |
30032 | EVT CarryVT = Carry.getValueType(); |
30033 | Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), |
30034 | Carry, DAG.getAllOnesConstant(DL, CarryVT)); |
30035 | |
30036 | bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY; |
30037 | SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs, |
30038 | Op.getOperand(0), Op.getOperand(1), |
30039 | Carry.getValue(1)); |
30040 | |
30041 | bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY; |
30042 | SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B, |
30043 | Sum.getValue(1), DL, DAG); |
30044 | if (N->getValueType(1) == MVT::i1) |
30045 | SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); |
30046 | |
30047 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); |
30048 | } |
30049 | |
30050 | static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, |
30051 | SelectionDAG &DAG) { |
30052 | assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); |
30053 | |
30054 | |
30055 | |
30056 | |
30057 | SDLoc dl(Op); |
30058 | SDValue Arg = Op.getOperand(0); |
30059 | EVT ArgVT = Arg.getValueType(); |
30060 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
30061 | |
30062 | TargetLowering::ArgListTy Args; |
30063 | TargetLowering::ArgListEntry Entry; |
30064 | |
30065 | Entry.Node = Arg; |
30066 | Entry.Ty = ArgTy; |
30067 | Entry.IsSExt = false; |
30068 | Entry.IsZExt = false; |
30069 | Args.push_back(Entry); |
30070 | |
30071 | bool isF64 = ArgVT == MVT::f64; |
30072 | |
30073 | |
30074 | |
30075 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
30076 | RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; |
30077 | const char *LibcallName = TLI.getLibcallName(LC); |
30078 | SDValue Callee = |
30079 | DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); |
30080 | |
30081 | Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) |
30082 | : (Type *)FixedVectorType::get(ArgTy, 4); |
30083 | |
30084 | TargetLowering::CallLoweringInfo CLI(DAG); |
30085 | CLI.setDebugLoc(dl) |
30086 | .setChain(DAG.getEntryNode()) |
30087 | .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); |
30088 | |
30089 | std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); |
30090 | |
30091 | if (isF64) |
30092 | |
30093 | return CallResult.first; |
30094 | |
30095 | |
30096 | SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, |
30097 | CallResult.first, DAG.getIntPtrConstant(0, dl)); |
30098 | SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, |
30099 | CallResult.first, DAG.getIntPtrConstant(1, dl)); |
30100 | SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); |
30101 | return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); |
30102 | } |
30103 | |
30104 | |
30105 | |
30106 | static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, |
30107 | bool FillWithZeroes = false) { |
30108 | |
30109 | MVT InVT = InOp.getSimpleValueType(); |
30110 | if (InVT == NVT) |
30111 | return InOp; |
30112 | |
30113 | if (InOp.isUndef()) |
30114 | return DAG.getUNDEF(NVT); |
30115 | |
30116 | assert(InVT.getVectorElementType() == NVT.getVectorElementType() && |
30117 | "input and widen element type must match"); |
30118 | |
30119 | unsigned InNumElts = InVT.getVectorNumElements(); |
30120 | unsigned WidenNumElts = NVT.getVectorNumElements(); |
30121 | assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && |
30122 | "Unexpected request for vector widening"); |
30123 | |
30124 | SDLoc dl(InOp); |
30125 | if (InOp.getOpcode() == ISD::CONCAT_VECTORS && |
30126 | InOp.getNumOperands() == 2) { |
30127 | SDValue N1 = InOp.getOperand(1); |
30128 | if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || |
30129 | N1.isUndef()) { |
30130 | InOp = InOp.getOperand(0); |
30131 | InVT = InOp.getSimpleValueType(); |
30132 | InNumElts = InVT.getVectorNumElements(); |
30133 | } |
30134 | } |
30135 | if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || |
30136 | ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { |
30137 | SmallVector<SDValue, 16> Ops; |
30138 | for (unsigned i = 0; i < InNumElts; ++i) |
30139 | Ops.push_back(InOp.getOperand(i)); |
30140 | |
30141 | EVT EltVT = InOp.getOperand(0).getValueType(); |
30142 | |
30143 | SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : |
30144 | DAG.getUNDEF(EltVT); |
30145 | for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) |
30146 | Ops.push_back(FillVal); |
30147 | return DAG.getBuildVector(NVT, dl, Ops); |
30148 | } |
30149 | SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : |
30150 | DAG.getUNDEF(NVT); |
30151 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, |
30152 | InOp, DAG.getIntPtrConstant(0, dl)); |
30153 | } |
30154 | |
30155 | static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, |
30156 | SelectionDAG &DAG) { |
30157 | assert(Subtarget.hasAVX512() && |
30158 | "MGATHER/MSCATTER are supported on AVX-512 arch only"); |
30159 | |
30160 | MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode()); |
30161 | SDValue Src = N->getValue(); |
30162 | MVT VT = Src.getSimpleValueType(); |
30163 | assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); |
30164 | SDLoc dl(Op); |
30165 | |
30166 | SDValue Scale = N->getScale(); |
30167 | SDValue Index = N->getIndex(); |
30168 | SDValue Mask = N->getMask(); |
30169 | SDValue Chain = N->getChain(); |
30170 | SDValue BasePtr = N->getBasePtr(); |
30171 | |
30172 | if (VT == MVT::v2f32 || VT == MVT::v2i32) { |
30173 | assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); |
30174 | |
30175 | if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { |
30176 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
30177 | EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); |
30178 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); |
30179 | SDVTList VTs = DAG.getVTList(MVT::Other); |
30180 | SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; |
30181 | return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
30182 | N->getMemoryVT(), N->getMemOperand()); |
30183 | } |
30184 | return SDValue(); |
30185 | } |
30186 | |
30187 | MVT IndexVT = Index.getSimpleValueType(); |
30188 | |
30189 | |
30190 | |
30191 | if (IndexVT == MVT::v2i32) |
30192 | return SDValue(); |
30193 | |
30194 | |
30195 | |
30196 | if (!Subtarget.hasVLX() && !VT.is512BitVector() && |
30197 | !Index.getSimpleValueType().is512BitVector()) { |
30198 | |
30199 | unsigned Factor = std::min(512/VT.getSizeInBits(), |
30200 | 512/IndexVT.getSizeInBits()); |
30201 | unsigned NumElts = VT.getVectorNumElements() * Factor; |
30202 | |
30203 | VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
30204 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); |
30205 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
30206 | |
30207 | Src = ExtendToType(Src, VT, DAG); |
30208 | Index = ExtendToType(Index, IndexVT, DAG); |
30209 | Mask = ExtendToType(Mask, MaskVT, DAG, true); |
30210 | } |
30211 | |
30212 | SDVTList VTs = DAG.getVTList(MVT::Other); |
30213 | SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; |
30214 | return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
30215 | N->getMemoryVT(), N->getMemOperand()); |
30216 | } |
30217 | |
30218 | static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, |
30219 | SelectionDAG &DAG) { |
30220 | |
30221 | MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode()); |
30222 | MVT VT = Op.getSimpleValueType(); |
30223 | MVT ScalarVT = VT.getScalarType(); |
30224 | SDValue Mask = N->getMask(); |
30225 | MVT MaskVT = Mask.getSimpleValueType(); |
30226 | SDValue PassThru = N->getPassThru(); |
30227 | SDLoc dl(Op); |
30228 | |
30229 | |
30230 | if (MaskVT.getVectorElementType() != MVT::i1) { |
30231 | |
30232 | if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode())) |
30233 | return Op; |
30234 | |
30235 | SDValue NewLoad = DAG.getMaskedLoad( |
30236 | VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, |
30237 | getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), |
30238 | N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), |
30239 | N->isExpandingLoad()); |
30240 | |
30241 | SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); |
30242 | return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl); |
30243 | } |
30244 | |
30245 | assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) && |
30246 | "Expanding masked load is supported on AVX-512 target only!"); |
30247 | |
30248 | assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && |
30249 | "Expanding masked load is supported for 32 and 64-bit types only!"); |
30250 | |
30251 | assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
30252 | "Cannot lower masked load op."); |
30253 | |
30254 | assert((ScalarVT.getSizeInBits() >= 32 || |
30255 | (Subtarget.hasBWI() && |
30256 | (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && |
30257 | "Unsupported masked load op."); |
30258 | |
30259 | |
30260 | |
30261 | unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); |
30262 | MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); |
30263 | PassThru = ExtendToType(PassThru, WideDataVT, DAG); |
30264 | |
30265 | |
30266 | assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && |
30267 | "Unexpected mask type"); |
30268 | |
30269 | MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); |
30270 | |
30271 | Mask = ExtendToType(Mask, WideMaskVT, DAG, true); |
30272 | SDValue NewLoad = DAG.getMaskedLoad( |
30273 | WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, |
30274 | PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), |
30275 | N->getExtensionType(), N->isExpandingLoad()); |
30276 | |
30277 | SDValue Extract = |
30278 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), |
30279 | DAG.getIntPtrConstant(0, dl)); |
30280 | SDValue RetOps[] = {Extract, NewLoad.getValue(1)}; |
30281 | return DAG.getMergeValues(RetOps, dl); |
30282 | } |
30283 | |
30284 | static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, |
30285 | SelectionDAG &DAG) { |
30286 | MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode()); |
30287 | SDValue DataToStore = N->getValue(); |
30288 | MVT VT = DataToStore.getSimpleValueType(); |
30289 | MVT ScalarVT = VT.getScalarType(); |
30290 | SDValue Mask = N->getMask(); |
30291 | SDLoc dl(Op); |
30292 | |
30293 | assert((!N->isCompressingStore() || Subtarget.hasAVX512()) && |
30294 | "Expanding masked load is supported on AVX-512 target only!"); |
30295 | |
30296 | assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && |
30297 | "Expanding masked load is supported for 32 and 64-bit types only!"); |
30298 | |
30299 | assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
30300 | "Cannot lower masked store op."); |
30301 | |
30302 | assert((ScalarVT.getSizeInBits() >= 32 || |
30303 | (Subtarget.hasBWI() && |
30304 | (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && |
30305 | "Unsupported masked store op."); |
30306 | |
30307 | |
30308 | |
30309 | unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); |
30310 | MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); |
30311 | |
30312 | |
30313 | assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && |
30314 | "Unexpected mask type"); |
30315 | |
30316 | MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); |
30317 | |
30318 | DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); |
30319 | Mask = ExtendToType(Mask, WideMaskVT, DAG, true); |
30320 | return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), |
30321 | N->getOffset(), Mask, N->getMemoryVT(), |
30322 | N->getMemOperand(), N->getAddressingMode(), |
30323 | N->isTruncatingStore(), N->isCompressingStore()); |
30324 | } |
30325 | |
30326 | static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, |
30327 | SelectionDAG &DAG) { |
30328 | assert(Subtarget.hasAVX2() && |
30329 | "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only"); |
30330 | |
30331 | MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode()); |
30332 | SDLoc dl(Op); |
30333 | MVT VT = Op.getSimpleValueType(); |
30334 | SDValue Index = N->getIndex(); |
30335 | SDValue Mask = N->getMask(); |
30336 | SDValue PassThru = N->getPassThru(); |
30337 | MVT IndexVT = Index.getSimpleValueType(); |
30338 | |
30339 | assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); |
30340 | |
30341 | |
30342 | if (IndexVT == MVT::v2i32) |
30343 | return SDValue(); |
30344 | |
30345 | |
30346 | |
30347 | MVT OrigVT = VT; |
30348 | if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
30349 | !IndexVT.is512BitVector()) { |
30350 | |
30351 | unsigned Factor = std::min(512/VT.getSizeInBits(), |
30352 | 512/IndexVT.getSizeInBits()); |
30353 | |
30354 | unsigned NumElts = VT.getVectorNumElements() * Factor; |
30355 | |
30356 | VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
30357 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); |
30358 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
30359 | |
30360 | PassThru = ExtendToType(PassThru, VT, DAG); |
30361 | Index = ExtendToType(Index, IndexVT, DAG); |
30362 | Mask = ExtendToType(Mask, MaskVT, DAG, true); |
30363 | } |
30364 | |
30365 | SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, |
30366 | N->getScale() }; |
30367 | SDValue NewGather = DAG.getMemIntrinsicNode( |
30368 | X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(), |
30369 | N->getMemOperand()); |
30370 | SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, |
30371 | NewGather, DAG.getIntPtrConstant(0, dl)); |
30372 | return DAG.getMergeValues({Extract, NewGather.getValue(1)}, dl); |
30373 | } |
30374 | |
30375 | static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) { |
30376 | SDLoc dl(Op); |
30377 | SDValue Src = Op.getOperand(0); |
30378 | MVT DstVT = Op.getSimpleValueType(); |
30379 | |
30380 | AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode()); |
30381 | unsigned SrcAS = N->getSrcAddressSpace(); |
30382 | |
30383 | assert(SrcAS != N->getDestAddressSpace() && |
30384 | "addrspacecast must be between different address spaces"); |
30385 | |
30386 | if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) { |
30387 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src); |
30388 | } else if (DstVT == MVT::i64) { |
30389 | Op = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src); |
30390 | } else if (DstVT == MVT::i32) { |
30391 | Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src); |
30392 | } else { |
30393 | report_fatal_error("Bad address space in addrspacecast"); |
30394 | } |
30395 | return Op; |
30396 | } |
30397 | |
30398 | SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op, |
30399 | SelectionDAG &DAG) const { |
30400 | |
30401 | |
30402 | |
30403 | |
30404 | |
30405 | |
30406 | SmallVector<SDValue, 2> Ops; |
30407 | |
30408 | Ops.push_back(Op.getOperand(0)); |
30409 | if (Op->getGluedNode()) |
30410 | Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); |
30411 | |
30412 | SDLoc OpDL(Op); |
30413 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
30414 | SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); |
30415 | |
30416 | return NOOP; |
30417 | } |
30418 | |
30419 | |
30420 | static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) { |
30421 | SDLoc dl(Op); |
30422 | EVT VT = Op.getValueType(); |
30423 | SDValue Lo, Hi; |
30424 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); |
30425 | EVT LoVT, HiVT; |
30426 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
30427 | SDValue RC = Op.getOperand(1); |
30428 | Lo = DAG.getNode(X86ISD::CVTPS2PH, dl, LoVT, Lo, RC); |
30429 | Hi = DAG.getNode(X86ISD::CVTPS2PH, dl, HiVT, Hi, RC); |
30430 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
30431 | } |
30432 | |
30433 | |
30434 | SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
30435 | switch (Op.getOpcode()) { |
30436 | default: llvm_unreachable("Should not custom lower this!"); |
30437 | case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); |
30438 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
30439 | return LowerCMP_SWAP(Op, Subtarget, DAG); |
30440 | case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG); |
30441 | case ISD::ATOMIC_LOAD_ADD: |
30442 | case ISD::ATOMIC_LOAD_SUB: |
30443 | case ISD::ATOMIC_LOAD_OR: |
30444 | case ISD::ATOMIC_LOAD_XOR: |
30445 | case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); |
30446 | case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget); |
30447 | case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); |
30448 | case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG); |
30449 | case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); |
30450 | case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); |
30451 | case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG); |
30452 | case ISD::VSELECT: return LowerVSELECT(Op, DAG); |
30453 | case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
30454 | case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); |
30455 | case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); |
30456 | case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); |
30457 | case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); |
30458 | case ISD::ConstantPool: return LowerConstantPool(Op, DAG); |
30459 | case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); |
30460 | case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); |
30461 | case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); |
30462 | case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); |
30463 | case ISD::SHL_PARTS: |
30464 | case ISD::SRA_PARTS: |
30465 | case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); |
30466 | case ISD::FSHL: |
30467 | case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); |
30468 | case ISD::STRICT_SINT_TO_FP: |
30469 | case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); |
30470 | case ISD::STRICT_UINT_TO_FP: |
30471 | case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); |
30472 | case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); |
30473 | case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); |
30474 | case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); |
30475 | case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); |
30476 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
30477 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
30478 | return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG); |
30479 | case ISD::FP_TO_SINT: |
30480 | case ISD::STRICT_FP_TO_SINT: |
30481 | case ISD::FP_TO_UINT: |
30482 | case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); |
30483 | case ISD::FP_TO_SINT_SAT: |
30484 | case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG); |
30485 | case ISD::FP_EXTEND: |
30486 | case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); |
30487 | case ISD::FP_ROUND: |
30488 | case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); |
30489 | case ISD::FP16_TO_FP: |
30490 | case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG); |
30491 | case ISD::FP_TO_FP16: |
30492 | case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); |
30493 | case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); |
30494 | case ISD::STORE: return LowerStore(Op, Subtarget, DAG); |
30495 | case ISD::FADD: |
30496 | case ISD::FSUB: return lowerFaddFsub(Op, DAG); |
30497 | case ISD::FROUND: return LowerFROUND(Op, DAG); |
30498 | case ISD::FABS: |
30499 | case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); |
30500 | case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); |
30501 | case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); |
30502 | case ISD::LRINT: |
30503 | case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG); |
30504 | case ISD::SETCC: |
30505 | case ISD::STRICT_FSETCC: |
30506 | case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); |
30507 | case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); |
30508 | case ISD::SELECT: return LowerSELECT(Op, DAG); |
30509 | case ISD::BRCOND: return LowerBRCOND(Op, DAG); |
30510 | case ISD::JumpTable: return LowerJumpTable(Op, DAG); |
30511 | case ISD::VASTART: return LowerVASTART(Op, DAG); |
30512 | case ISD::VAARG: return LowerVAARG(Op, DAG); |
30513 | case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); |
30514 | case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
30515 | case ISD::INTRINSIC_VOID: |
30516 | case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); |
30517 | case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); |
30518 | case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); |
30519 | case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); |
30520 | case ISD::FRAME_TO_ARGS_OFFSET: |
30521 | return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); |
30522 | case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); |
30523 | case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); |
30524 | case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); |
30525 | case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); |
30526 | case ISD::EH_SJLJ_SETUP_DISPATCH: |
30527 | return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); |
30528 | case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); |
30529 | case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); |
30530 | case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); |
30531 | case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); |
30532 | case ISD::CTLZ: |
30533 | case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); |
30534 | case ISD::CTTZ: |
30535 | case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG); |
30536 | case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); |
30537 | case ISD::MULHS: |
30538 | case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); |
30539 | case ISD::ROTL: |
30540 | case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); |
30541 | case ISD::SRA: |
30542 | case ISD::SRL: |
30543 | case ISD::SHL: return LowerShift(Op, Subtarget, DAG); |
30544 | case ISD::SADDO: |
30545 | case ISD::UADDO: |
30546 | case ISD::SSUBO: |
30547 | case ISD::USUBO: return LowerXALUO(Op, DAG); |
30548 | case ISD::SMULO: |
30549 | case ISD::UMULO: return LowerMULO(Op, Subtarget, DAG); |
30550 | case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); |
30551 | case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); |
30552 | case ISD::SADDO_CARRY: |
30553 | case ISD::SSUBO_CARRY: |
30554 | case ISD::ADDCARRY: |
30555 | case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); |
30556 | case ISD::ADD: |
30557 | case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget); |
30558 | case ISD::UADDSAT: |
30559 | case ISD::SADDSAT: |
30560 | case ISD::USUBSAT: |
30561 | case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG, Subtarget); |
30562 | case ISD::SMAX: |
30563 | case ISD::SMIN: |
30564 | case ISD::UMAX: |
30565 | case ISD::UMIN: return LowerMINMAX(Op, DAG); |
30566 | case ISD::ABS: return LowerABS(Op, Subtarget, DAG); |
30567 | case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); |
30568 | case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); |
30569 | case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); |
30570 | case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); |
30571 | case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); |
30572 | case ISD::GC_TRANSITION_START: |
30573 | case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG); |
30574 | case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG); |
30575 | case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG); |
30576 | } |
30577 | } |
30578 | |
30579 | |
30580 | |
30581 | void X86TargetLowering::ReplaceNodeResults(SDNode *N, |
30582 | SmallVectorImpl<SDValue>&Results, |
30583 | SelectionDAG &DAG) const { |
30584 | SDLoc dl(N); |
30585 | switch (N->getOpcode()) { |
30586 | default: |
30587 | #ifndef NDEBUG |
30588 | dbgs() << "ReplaceNodeResults: "; |
30589 | N->dump(&DAG); |
30590 | #endif |
30591 | llvm_unreachable("Do not know how to custom type legalize this operation!"); |
30592 | case X86ISD::CVTPH2PS: { |
30593 | EVT VT = N->getValueType(0); |
30594 | SDValue Lo, Hi; |
30595 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
30596 | EVT LoVT, HiVT; |
30597 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
30598 | Lo = DAG.getNode(X86ISD::CVTPH2PS, dl, LoVT, Lo); |
30599 | Hi = DAG.getNode(X86ISD::CVTPH2PS, dl, HiVT, Hi); |
30600 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
30601 | Results.push_back(Res); |
30602 | return; |
30603 | } |
30604 | case X86ISD::STRICT_CVTPH2PS: { |
30605 | EVT VT = N->getValueType(0); |
30606 | SDValue Lo, Hi; |
30607 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 1); |
30608 | EVT LoVT, HiVT; |
30609 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
30610 | Lo = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {LoVT, MVT::Other}, |
30611 | {N->getOperand(0), Lo}); |
30612 | Hi = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {HiVT, MVT::Other}, |
30613 | {N->getOperand(0), Hi}); |
30614 | SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
30615 | Lo.getValue(1), Hi.getValue(1)); |
30616 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
30617 | Results.push_back(Res); |
30618 | Results.push_back(Chain); |
30619 | return; |
30620 | } |
30621 | case X86ISD::CVTPS2PH: |
30622 | Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG)); |
30623 | return; |
30624 | case ISD::CTPOP: { |
30625 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
30626 | |
30627 | bool NoImplicitFloatOps = |
30628 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
30629 | Attribute::NoImplicitFloat); |
30630 | if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) { |
30631 | SDValue Wide = |
30632 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0)); |
30633 | Wide = DAG.getNode(ISD::CTPOP, dl, MVT::v2i64, Wide); |
30634 | |
30635 | |
30636 | Wide = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Wide); |
30637 | Wide = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Wide, |
30638 | DAG.getIntPtrConstant(0, dl)); |
30639 | Wide = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Wide); |
30640 | Results.push_back(Wide); |
30641 | } |
30642 | return; |
30643 | } |
30644 | case ISD::MUL: { |
30645 | EVT VT = N->getValueType(0); |
30646 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
30647 | VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); |
30648 | |
30649 | |
30650 | MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); |
30651 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); |
30652 | SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); |
30653 | SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); |
30654 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
30655 | unsigned NumConcats = 16 / VT.getVectorNumElements(); |
30656 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); |
30657 | ConcatOps[0] = Res; |
30658 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); |
30659 | Results.push_back(Res); |
30660 | return; |
30661 | } |
30662 | case X86ISD::VPMADDWD: |
30663 | case X86ISD::AVG: { |
30664 | |
30665 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
30666 | |
30667 | EVT VT = N->getValueType(0); |
30668 | EVT InVT = N->getOperand(0).getValueType(); |
30669 | assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && |
30670 | "Expected a VT that divides into 128 bits."); |
30671 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
30672 | "Unexpected type action!"); |
30673 | unsigned NumConcat = 128 / InVT.getSizeInBits(); |
30674 | |
30675 | EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), |
30676 | InVT.getVectorElementType(), |
30677 | NumConcat * InVT.getVectorNumElements()); |
30678 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), |
30679 | VT.getVectorElementType(), |
30680 | NumConcat * VT.getVectorNumElements()); |
30681 | |
30682 | SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); |
30683 | Ops[0] = N->getOperand(0); |
30684 | SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); |
30685 | Ops[0] = N->getOperand(1); |
30686 | SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); |
30687 | |
30688 | SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); |
30689 | Results.push_back(Res); |
30690 | return; |
30691 | } |
30692 | |
30693 | case X86ISD::FMINC: |
30694 | case X86ISD::FMIN: |
30695 | case X86ISD::FMAXC: |
30696 | case X86ISD::FMAX: { |
30697 | EVT VT = N->getValueType(0); |
30698 | assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); |
30699 | SDValue UNDEF = DAG.getUNDEF(VT); |
30700 | SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, |
30701 | N->getOperand(0), UNDEF); |
30702 | SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, |
30703 | N->getOperand(1), UNDEF); |
30704 | Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); |
30705 | return; |
30706 | } |
30707 | case ISD::SDIV: |
30708 | case ISD::UDIV: |
30709 | case ISD::SREM: |
30710 | case ISD::UREM: { |
30711 | EVT VT = N->getValueType(0); |
30712 | if (VT.isVector()) { |
30713 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
30714 | "Unexpected type action!"); |
30715 | |
30716 | |
30717 | |
30718 | APInt SplatVal; |
30719 | if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) { |
30720 | unsigned NumConcats = 128 / VT.getSizeInBits(); |
30721 | SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT)); |
30722 | Ops0[0] = N->getOperand(0); |
30723 | EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT); |
30724 | SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0); |
30725 | SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT); |
30726 | SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1); |
30727 | Results.push_back(Res); |
30728 | } |
30729 | return; |
30730 | } |
30731 | |
30732 | SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); |
30733 | Results.push_back(V); |
30734 | return; |
30735 | } |
30736 | case ISD::TRUNCATE: { |
30737 | MVT VT = N->getSimpleValueType(0); |
30738 | if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) |
30739 | return; |
30740 | |
30741 | |
30742 | |
30743 | |
30744 | MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT(); |
30745 | SDValue In = N->getOperand(0); |
30746 | EVT InVT = In.getValueType(); |
30747 | |
30748 | unsigned InBits = InVT.getSizeInBits(); |
30749 | if (128 % InBits == 0) { |
30750 | |
30751 | |
30752 | |
30753 | MVT InEltVT = InVT.getSimpleVT().getVectorElementType(); |
30754 | EVT EltVT = VT.getVectorElementType(); |
30755 | unsigned WidenNumElts = WidenVT.getVectorNumElements(); |
30756 | SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); |
30757 | |
30758 | |
30759 | unsigned MinElts = VT.getVectorNumElements(); |
30760 | for (unsigned i=0; i < MinElts; ++i) { |
30761 | SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In, |
30762 | DAG.getIntPtrConstant(i, dl)); |
30763 | Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val); |
30764 | } |
30765 | Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops)); |
30766 | return; |
30767 | } |
30768 | |
30769 | |
30770 | |
30771 | if (Subtarget.hasAVX512() && isTypeLegal(InVT)) { |
30772 | |
30773 | if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) { |
30774 | Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); |
30775 | return; |
30776 | } |
30777 | |
30778 | if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) { |
30779 | In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In, |
30780 | DAG.getUNDEF(MVT::v4i64)); |
30781 | Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); |
30782 | return; |
30783 | } |
30784 | } |
30785 | if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && |
30786 | getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector && |
30787 | isTypeLegal(MVT::v4i64)) { |
30788 | |
30789 | |
30790 | SDValue Lo, Hi; |
30791 | std::tie(Lo, Hi) = DAG.SplitVector(In, dl); |
30792 | |
30793 | Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo); |
30794 | Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi); |
30795 | SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi, |
30796 | { 0, 1, 2, 3, 16, 17, 18, 19, |
30797 | -1, -1, -1, -1, -1, -1, -1, -1 }); |
30798 | Results.push_back(Res); |
30799 | return; |
30800 | } |
30801 | |
30802 | return; |
30803 | } |
30804 | case ISD::ANY_EXTEND: |
30805 | |
30806 | |
30807 | assert(N->getValueType(0) == MVT::v8i8 && |
30808 | "Do not know how to legalize this Node"); |
30809 | return; |
30810 | case ISD::SIGN_EXTEND: |
30811 | case ISD::ZERO_EXTEND: { |
30812 | EVT VT = N->getValueType(0); |
30813 | SDValue In = N->getOperand(0); |
30814 | EVT InVT = In.getValueType(); |
30815 | if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && |
30816 | (InVT == MVT::v4i16 || InVT == MVT::v4i8)){ |
30817 | assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector && |
30818 | "Unexpected type action!"); |
30819 | assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); |
30820 | |
30821 | |
30822 | |
30823 | |
30824 | In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); |
30825 | |
30826 | |
30827 | SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); |
30828 | SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT); |
30829 | |
30830 | |
30831 | |
30832 | SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, |
30833 | {0, 4, 1, 5}); |
30834 | Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo); |
30835 | SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, |
30836 | {2, 6, 3, 7}); |
30837 | Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi); |
30838 | |
30839 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
30840 | Results.push_back(Res); |
30841 | return; |
30842 | } |
30843 | |
30844 | if (VT == MVT::v16i32 || VT == MVT::v8i64) { |
30845 | if (!InVT.is128BitVector()) { |
30846 | |
30847 | |
30848 | if (getTypeAction(*DAG.getContext(), InVT) != TypePromoteInteger) |
30849 | return; |
30850 | InVT = getTypeToTransformTo(*DAG.getContext(), InVT); |
30851 | if (!InVT.is128BitVector()) |
30852 | return; |
30853 | |
30854 | |
30855 | |
30856 | In = DAG.getNode(N->getOpcode(), dl, InVT, In); |
30857 | } |
30858 | |
30859 | |
30860 | |
30861 | EVT LoVT, HiVT; |
30862 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); |
30863 | assert(isTypeLegal(LoVT) && "Split VT not legal?"); |
30864 | |
30865 | SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG); |
30866 | |
30867 | |
30868 | unsigned NumElts = InVT.getVectorNumElements(); |
30869 | unsigned HalfNumElts = NumElts / 2; |
30870 | SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef); |
30871 | for (unsigned i = 0; i != HalfNumElts; ++i) |
30872 | ShufMask[i] = i + HalfNumElts; |
30873 | |
30874 | SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); |
30875 | Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG); |
30876 | |
30877 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
30878 | Results.push_back(Res); |
30879 | } |
30880 | return; |
30881 | } |
30882 | case ISD::FP_TO_SINT: |
30883 | case ISD::STRICT_FP_TO_SINT: |
30884 | case ISD::FP_TO_UINT: |
30885 | case ISD::STRICT_FP_TO_UINT: { |
30886 | bool IsStrict = N->isStrictFPOpcode(); |
30887 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || |
30888 | N->getOpcode() == ISD::STRICT_FP_TO_SINT; |
30889 | EVT VT = N->getValueType(0); |
30890 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
30891 | EVT SrcVT = Src.getValueType(); |
30892 | |
30893 | if (VT.isVector() && VT.getScalarSizeInBits() < 32) { |
30894 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
30895 | "Unexpected type action!"); |
30896 | |
30897 | |
30898 | unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); |
30899 | MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth), |
30900 | VT.getVectorNumElements()); |
30901 | SDValue Res; |
30902 | SDValue Chain; |
30903 | if (IsStrict) { |
30904 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {PromoteVT, MVT::Other}, |
30905 | {N->getOperand(0), Src}); |
30906 | Chain = Res.getValue(1); |
30907 | } else |
30908 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); |
30909 | |
30910 | |
30911 | |
30912 | if (PromoteVT == MVT::v2i32) |
30913 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, |
30914 | DAG.getUNDEF(MVT::v2i32)); |
30915 | |
30916 | Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl, |
30917 | Res.getValueType(), Res, |
30918 | DAG.getValueType(VT.getVectorElementType())); |
30919 | |
30920 | if (PromoteVT == MVT::v2i32) |
30921 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, |
30922 | DAG.getIntPtrConstant(0, dl)); |
30923 | |
30924 | |
30925 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
30926 | |
30927 | |
30928 | unsigned NumConcats = 128 / VT.getSizeInBits(); |
30929 | MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), |
30930 | VT.getVectorNumElements() * NumConcats); |
30931 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); |
30932 | ConcatOps[0] = Res; |
30933 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps); |
30934 | Results.push_back(Res); |
30935 | if (IsStrict) |
30936 | Results.push_back(Chain); |
30937 | return; |
30938 | } |
30939 | |
30940 | |
30941 | if (VT == MVT::v2i32) { |
30942 | assert((!IsStrict || IsSigned || Subtarget.hasAVX512()) && |
30943 | "Strict unsigned conversion requires AVX512"); |
30944 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
30945 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
30946 | "Unexpected type action!"); |
30947 | if (Src.getValueType() == MVT::v2f64) { |
30948 | if (!IsSigned && !Subtarget.hasAVX512()) { |
30949 | SDValue Res = |
30950 | expandFP_TO_UINT_SSE(MVT::v4i32, Src, dl, DAG, Subtarget); |
30951 | Results.push_back(Res); |
30952 | return; |
30953 | } |
30954 | |
30955 | unsigned Opc; |
30956 | if (IsStrict) |
30957 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
30958 | else |
30959 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
30960 | |
30961 | |
30962 | if (!IsSigned && !Subtarget.hasVLX()) { |
30963 | |
30964 | |
30965 | |
30966 | |
30967 | |
30968 | if (!IsStrict) |
30969 | return; |
30970 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src, |
30971 | DAG.getConstantFP(0.0, dl, MVT::v2f64)); |
30972 | Opc = N->getOpcode(); |
30973 | } |
30974 | SDValue Res; |
30975 | SDValue Chain; |
30976 | if (IsStrict) { |
30977 | Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other}, |
30978 | {N->getOperand(0), Src}); |
30979 | Chain = Res.getValue(1); |
30980 | } else { |
30981 | Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); |
30982 | } |
30983 | Results.push_back(Res); |
30984 | if (IsStrict) |
30985 | Results.push_back(Chain); |
30986 | return; |
30987 | } |
30988 | |
30989 | |
30990 | |
30991 | if (Src.getValueType() == MVT::v2f32 && IsStrict) { |
30992 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
30993 | DAG.getConstantFP(0.0, dl, MVT::v2f32)); |
30994 | SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other}, |
30995 | {N->getOperand(0), Src}); |
30996 | Results.push_back(Res); |
30997 | Results.push_back(Res.getValue(1)); |
30998 | return; |
30999 | } |
31000 | |
31001 | |
31002 | |
31003 | return; |
31004 | } |
31005 | |
31006 | assert(!VT.isVector() && "Vectors should have been handled above!"); |
31007 | |
31008 | if (Subtarget.hasDQI() && VT == MVT::i64 && |
31009 | (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { |
31010 | assert(!Subtarget.is64Bit() && "i64 should be legal"); |
31011 | unsigned NumElts = Subtarget.hasVLX() ? 2 : 8; |
31012 | |
31013 | unsigned SrcElts = |
31014 | std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits()); |
31015 | MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts); |
31016 | MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts); |
31017 | unsigned Opc = N->getOpcode(); |
31018 | if (NumElts != SrcElts) { |
31019 | if (IsStrict) |
31020 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
31021 | else |
31022 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
31023 | } |
31024 | |
31025 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
31026 | SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, |
31027 | DAG.getConstantFP(0.0, dl, VecInVT), Src, |
31028 | ZeroIdx); |
31029 | SDValue Chain; |
31030 | if (IsStrict) { |
31031 | SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); |
31032 | Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res); |
31033 | Chain = Res.getValue(1); |
31034 | } else |
31035 | Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res); |
31036 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); |
31037 | Results.push_back(Res); |
31038 | if (IsStrict) |
31039 | Results.push_back(Chain); |
31040 | return; |
31041 | } |
31042 | |
31043 | SDValue Chain; |
31044 | if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) { |
31045 | Results.push_back(V); |
31046 | if (IsStrict) |
31047 | Results.push_back(Chain); |
31048 | } |
31049 | return; |
31050 | } |
31051 | case ISD::LRINT: |
31052 | case ISD::LLRINT: { |
31053 | if (SDValue V = LRINT_LLRINTHelper(N, DAG)) |
31054 | Results.push_back(V); |
31055 | return; |
31056 | } |
31057 | |
31058 | case ISD::SINT_TO_FP: |
31059 | case ISD::STRICT_SINT_TO_FP: |
31060 | case ISD::UINT_TO_FP: |
31061 | case ISD::STRICT_UINT_TO_FP: { |
31062 | bool IsStrict = N->isStrictFPOpcode(); |
31063 | bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || |
31064 | N->getOpcode() == ISD::STRICT_SINT_TO_FP; |
31065 | EVT VT = N->getValueType(0); |
31066 | if (VT != MVT::v2f32) |
31067 | return; |
31068 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
31069 | EVT SrcVT = Src.getValueType(); |
31070 | if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) { |
31071 | if (IsStrict) { |
31072 | unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P |
31073 | : X86ISD::STRICT_CVTUI2P; |
31074 | SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other}, |
31075 | {N->getOperand(0), Src}); |
31076 | Results.push_back(Res); |
31077 | Results.push_back(Res.getValue(1)); |
31078 | } else { |
31079 | unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P; |
31080 | Results.push_back(DAG.getNode(Opc, dl, MVT::v4f32, Src)); |
31081 | } |
31082 | return; |
31083 | } |
31084 | if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() && |
31085 | Subtarget.hasSSE41() && !Subtarget.hasAVX512()) { |
31086 | SDValue Zero = DAG.getConstant(0, dl, SrcVT); |
31087 | SDValue One = DAG.getConstant(1, dl, SrcVT); |
31088 | SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT, |
31089 | DAG.getNode(ISD::SRL, dl, SrcVT, Src, One), |
31090 | DAG.getNode(ISD::AND, dl, SrcVT, Src, One)); |
31091 | SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT); |
31092 | SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src); |
31093 | SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32)); |
31094 | for (int i = 0; i != 2; ++i) { |
31095 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, |
31096 | SignSrc, DAG.getIntPtrConstant(i, dl)); |
31097 | if (IsStrict) |
31098 | SignCvts[i] = |
31099 | DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other}, |
31100 | {N->getOperand(0), Elt}); |
31101 | else |
31102 | SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Elt); |
31103 | }; |
31104 | SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts); |
31105 | SDValue Slow, Chain; |
31106 | if (IsStrict) { |
31107 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
31108 | SignCvts[0].getValue(1), SignCvts[1].getValue(1)); |
31109 | Slow = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v4f32, MVT::Other}, |
31110 | {Chain, SignCvt, SignCvt}); |
31111 | Chain = Slow.getValue(1); |
31112 | } else { |
31113 | Slow = DAG.getNode(ISD::FADD, dl, MVT::v4f32, SignCvt, SignCvt); |
31114 | } |
31115 | IsNeg = DAG.getBitcast(MVT::v4i32, IsNeg); |
31116 | IsNeg = |
31117 | DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1}); |
31118 | SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt); |
31119 | Results.push_back(Cvt); |
31120 | if (IsStrict) |
31121 | Results.push_back(Chain); |
31122 | return; |
31123 | } |
31124 | |
31125 | if (SrcVT != MVT::v2i32) |
31126 | return; |
31127 | |
31128 | if (IsSigned || Subtarget.hasAVX512()) { |
31129 | if (!IsStrict) |
31130 | return; |
31131 | |
31132 | |
31133 | |
31134 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
31135 | DAG.getConstant(0, dl, MVT::v2i32)); |
31136 | SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, |
31137 | {N->getOperand(0), Src}); |
31138 | Results.push_back(Res); |
31139 | Results.push_back(Res.getValue(1)); |
31140 | return; |
31141 | } |
31142 | |
31143 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
31144 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src); |
31145 | SDValue VBias = |
31146 | DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64); |
31147 | SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, |
31148 | DAG.getBitcast(MVT::v2i64, VBias)); |
31149 | Or = DAG.getBitcast(MVT::v2f64, Or); |
31150 | if (IsStrict) { |
31151 | SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other}, |
31152 | {N->getOperand(0), Or, VBias}); |
31153 | SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, |
31154 | {MVT::v4f32, MVT::Other}, |
31155 | {Sub.getValue(1), Sub}); |
31156 | Results.push_back(Res); |
31157 | Results.push_back(Res.getValue(1)); |
31158 | } else { |
31159 | |
31160 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); |
31161 | Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); |
31162 | } |
31163 | return; |
31164 | } |
31165 | case ISD::STRICT_FP_ROUND: |
31166 | case ISD::FP_ROUND: { |
31167 | bool IsStrict = N->isStrictFPOpcode(); |
31168 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
31169 | if (!isTypeLegal(Src.getValueType())) |
31170 | return; |
31171 | SDValue V; |
31172 | if (IsStrict) |
31173 | V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other}, |
31174 | {N->getOperand(0), N->getOperand(1)}); |
31175 | else |
31176 | V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); |
31177 | Results.push_back(V); |
31178 | if (IsStrict) |
31179 | Results.push_back(V.getValue(1)); |
31180 | return; |
31181 | } |
31182 | case ISD::FP_EXTEND: |
31183 | case ISD::STRICT_FP_EXTEND: { |
31184 | |
31185 | |
31186 | assert(N->getValueType(0) == MVT::v2f32 && |
31187 | "Do not know how to legalize this Node"); |
31188 | return; |
31189 | } |
31190 | case ISD::INTRINSIC_W_CHAIN: { |
31191 | unsigned IntNo = N->getConstantOperandVal(1); |
31192 | switch (IntNo) { |
31193 | default : llvm_unreachable("Do not know how to custom type " |
31194 | "legalize this intrinsic operation!"); |
31195 | case Intrinsic::x86_rdtsc: |
31196 | return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, |
31197 | Results); |
31198 | case Intrinsic::x86_rdtscp: |
31199 | return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget, |
31200 | Results); |
31201 | case Intrinsic::x86_rdpmc: |
31202 | expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget, |
31203 | Results); |
31204 | return; |
31205 | case Intrinsic::x86_xgetbv: |
31206 | expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget, |
31207 | Results); |
31208 | return; |
31209 | } |
31210 | } |
31211 | case ISD::READCYCLECOUNTER: { |
31212 | return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results); |
31213 | } |
31214 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { |
31215 | EVT T = N->getValueType(0); |
31216 | assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); |
31217 | bool Regs64bit = T == MVT::i128; |
31218 | assert((!Regs64bit || Subtarget.hasCmpxchg16b()) && |
31219 | "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); |
31220 | MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; |
31221 | SDValue cpInL, cpInH; |
31222 | cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), |
31223 | DAG.getConstant(0, dl, HalfT)); |
31224 | cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), |
31225 | DAG.getConstant(1, dl, HalfT)); |
31226 | cpInL = DAG.getCopyToReg(N->getOperand(0), dl, |
31227 | Regs64bit ? X86::RAX : X86::EAX, |
31228 | cpInL, SDValue()); |
31229 | cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, |
31230 | Regs64bit ? X86::RDX : X86::EDX, |
31231 | cpInH, cpInL.getValue(1)); |
31232 | SDValue swapInL, swapInH; |
31233 | swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), |
31234 | DAG.getConstant(0, dl, HalfT)); |
31235 | swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), |
31236 | DAG.getConstant(1, dl, HalfT)); |
31237 | swapInH = |
31238 | DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX, |
31239 | swapInH, cpInH.getValue(1)); |
31240 | |
31241 | |
31242 | |
31243 | |
31244 | |
31245 | |
31246 | |
31247 | SDValue Result; |
31248 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
31249 | MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); |
31250 | if (Regs64bit) { |
31251 | SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL, |
31252 | swapInH.getValue(1)}; |
31253 | Result = |
31254 | DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO); |
31255 | } else { |
31256 | swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL, |
31257 | swapInH.getValue(1)); |
31258 | SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1), |
31259 | swapInL.getValue(1)}; |
31260 | Result = |
31261 | DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO); |
31262 | } |
31263 | |
31264 | SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, |
31265 | Regs64bit ? X86::RAX : X86::EAX, |
31266 | HalfT, Result.getValue(1)); |
31267 | SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, |
31268 | Regs64bit ? X86::RDX : X86::EDX, |
31269 | HalfT, cpOutL.getValue(2)); |
31270 | SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; |
31271 | |
31272 | SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, |
31273 | MVT::i32, cpOutH.getValue(2)); |
31274 | SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG); |
31275 | Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); |
31276 | |
31277 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); |
31278 | Results.push_back(Success); |
31279 | Results.push_back(EFLAGS.getValue(1)); |
31280 | return; |
31281 | } |
31282 | case ISD::ATOMIC_LOAD: { |
31283 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
31284 | bool NoImplicitFloatOps = |
31285 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
31286 | Attribute::NoImplicitFloat); |
31287 | if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { |
31288 | auto *Node = cast<AtomicSDNode>(N); |
31289 | if (Subtarget.hasSSE1()) { |
31290 | |
31291 | |
31292 | MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; |
31293 | SDVTList Tys = DAG.getVTList(LdVT, MVT::Other); |
31294 | SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; |
31295 | SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, |
31296 | MVT::i64, Node->getMemOperand()); |
31297 | if (Subtarget.hasSSE2()) { |
31298 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, |
31299 | DAG.getIntPtrConstant(0, dl)); |
31300 | Results.push_back(Res); |
31301 | Results.push_back(Ld.getValue(1)); |
31302 | return; |
31303 | } |
31304 | |
31305 | |
31306 | |
31307 | SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld, |
31308 | DAG.getIntPtrConstant(0, dl)); |
31309 | Res = DAG.getBitcast(MVT::i64, Res); |
31310 | Results.push_back(Res); |
31311 | Results.push_back(Ld.getValue(1)); |
31312 | return; |
31313 | } |
31314 | if (Subtarget.hasX87()) { |
31315 | |
31316 | |
31317 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
31318 | SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; |
31319 | SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD, |
31320 | dl, Tys, Ops, MVT::i64, |
31321 | Node->getMemOperand()); |
31322 | SDValue Chain = Result.getValue(1); |
31323 | |
31324 | |
31325 | |
31326 | |
31327 | |
31328 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
31329 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
31330 | MachinePointerInfo MPI = |
31331 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
31332 | SDValue StoreOps[] = { Chain, Result, StackPtr }; |
31333 | Chain = DAG.getMemIntrinsicNode( |
31334 | X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, |
31335 | MPI, None , MachineMemOperand::MOStore); |
31336 | |
31337 | |
31338 | |
31339 | |
31340 | Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI); |
31341 | Results.push_back(Result); |
31342 | Results.push_back(Result.getValue(1)); |
31343 | return; |
31344 | } |
31345 | } |
31346 | |
31347 | |
31348 | |
31349 | break; |
31350 | } |
31351 | case ISD::ATOMIC_SWAP: |
31352 | case ISD::ATOMIC_LOAD_ADD: |
31353 | case ISD::ATOMIC_LOAD_SUB: |
31354 | case ISD::ATOMIC_LOAD_AND: |
31355 | case ISD::ATOMIC_LOAD_OR: |
31356 | case ISD::ATOMIC_LOAD_XOR: |
31357 | case ISD::ATOMIC_LOAD_NAND: |
31358 | case ISD::ATOMIC_LOAD_MIN: |
31359 | case ISD::ATOMIC_LOAD_MAX: |
31360 | case ISD::ATOMIC_LOAD_UMIN: |
31361 | case ISD::ATOMIC_LOAD_UMAX: |
31362 | |
31363 | |
31364 | break; |
31365 | |
31366 | case ISD::BITCAST: { |
31367 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
31368 | EVT DstVT = N->getValueType(0); |
31369 | EVT SrcVT = N->getOperand(0).getValueType(); |
31370 | |
31371 | |
31372 | |
31373 | if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) { |
31374 | assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); |
31375 | SDValue Lo, Hi; |
31376 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
31377 | Lo = DAG.getBitcast(MVT::i32, Lo); |
31378 | Hi = DAG.getBitcast(MVT::i32, Hi); |
31379 | SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); |
31380 | Results.push_back(Res); |
31381 | return; |
31382 | } |
31383 | |
31384 | if (DstVT.isVector() && SrcVT == MVT::x86mmx) { |
31385 | |
31386 | assert(Subtarget.hasSSE2() && "Requires SSE2"); |
31387 | assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && |
31388 | "Unexpected type action!"); |
31389 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); |
31390 | SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, |
31391 | N->getOperand(0)); |
31392 | Res = DAG.getBitcast(WideVT, Res); |
31393 | Results.push_back(Res); |
31394 | return; |
31395 | } |
31396 | |
31397 | return; |
31398 | } |
31399 | case ISD::MGATHER: { |
31400 | EVT VT = N->getValueType(0); |
31401 | if ((VT == MVT::v2f32 || VT == MVT::v2i32) && |
31402 | (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { |
31403 | auto *Gather = cast<MaskedGatherSDNode>(N); |
31404 | SDValue Index = Gather->getIndex(); |
31405 | if (Index.getValueType() != MVT::v2i64) |
31406 | return; |
31407 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31408 | "Unexpected type action!"); |
31409 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); |
31410 | SDValue Mask = Gather->getMask(); |
31411 | assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); |
31412 | SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, |
31413 | Gather->getPassThru(), |
31414 | DAG.getUNDEF(VT)); |
31415 | if (!Subtarget.hasVLX()) { |
31416 | |
31417 | |
31418 | Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, |
31419 | DAG.getUNDEF(MVT::v2i1)); |
31420 | Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); |
31421 | } |
31422 | SDValue Ops[] = { Gather->getChain(), PassThru, Mask, |
31423 | Gather->getBasePtr(), Index, Gather->getScale() }; |
31424 | SDValue Res = DAG.getMemIntrinsicNode( |
31425 | X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops, |
31426 | Gather->getMemoryVT(), Gather->getMemOperand()); |
31427 | Results.push_back(Res); |
31428 | Results.push_back(Res.getValue(1)); |
31429 | return; |
31430 | } |
31431 | return; |
31432 | } |
31433 | case ISD::LOAD: { |
31434 | |
31435 | |
31436 | |
31437 | MVT VT = N->getSimpleValueType(0); |
31438 | assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); |
31439 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
31440 | "Unexpected type action!"); |
31441 | if (!ISD::isNON_EXTLoad(N)) |
31442 | return; |
31443 | auto *Ld = cast<LoadSDNode>(N); |
31444 | if (Subtarget.hasSSE2()) { |
31445 | MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; |
31446 | SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), |
31447 | Ld->getPointerInfo(), Ld->getOriginalAlign(), |
31448 | Ld->getMemOperand()->getFlags()); |
31449 | SDValue Chain = Res.getValue(1); |
31450 | MVT VecVT = MVT::getVectorVT(LdVT, 2); |
31451 | Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res); |
31452 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); |
31453 | Res = DAG.getBitcast(WideVT, Res); |
31454 | Results.push_back(Res); |
31455 | Results.push_back(Chain); |
31456 | return; |
31457 | } |
31458 | assert(Subtarget.hasSSE1() && "Expected SSE"); |
31459 | SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); |
31460 | SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; |
31461 | SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, |
31462 | MVT::i64, Ld->getMemOperand()); |
31463 | Results.push_back(Res); |
31464 | Results.push_back(Res.getValue(1)); |
31465 | return; |
31466 | } |
31467 | case ISD::ADDRSPACECAST: { |
31468 | SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG); |
31469 | Results.push_back(V); |
31470 | return; |
31471 | } |
31472 | case ISD::BITREVERSE: |
31473 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
31474 | assert(Subtarget.hasXOP() && "Expected XOP"); |
31475 | |
31476 | |
31477 | Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG)); |
31478 | return; |
31479 | } |
31480 | } |
31481 | |
31482 | const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { |
31483 | switch ((X86ISD::NodeType)Opcode) { |
31484 | case X86ISD::FIRST_NUMBER: break; |
31485 | #define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE; |
31486 | NODE_NAME_CASE(BSF) |
31487 | NODE_NAME_CASE(BSR) |
31488 | NODE_NAME_CASE(FSHL) |
31489 | NODE_NAME_CASE(FSHR) |
31490 | NODE_NAME_CASE(FAND) |
31491 | NODE_NAME_CASE(FANDN) |
31492 | NODE_NAME_CASE(FOR) |
31493 | NODE_NAME_CASE(FXOR) |
31494 | NODE_NAME_CASE(FILD) |
31495 | NODE_NAME_CASE(FIST) |
31496 | NODE_NAME_CASE(FP_TO_INT_IN_MEM) |
31497 | NODE_NAME_CASE(FLD) |
31498 | NODE_NAME_CASE(FST) |
31499 | NODE_NAME_CASE(CALL) |
31500 | NODE_NAME_CASE(CALL_RVMARKER) |
31501 | NODE_NAME_CASE(BT) |
31502 | NODE_NAME_CASE(CMP) |
31503 | NODE_NAME_CASE(FCMP) |
31504 | NODE_NAME_CASE(STRICT_FCMP) |
31505 | NODE_NAME_CASE(STRICT_FCMPS) |
31506 | NODE_NAME_CASE(COMI) |
31507 | NODE_NAME_CASE(UCOMI) |
31508 | NODE_NAME_CASE(CMPM) |
31509 | NODE_NAME_CASE(CMPMM) |
31510 | NODE_NAME_CASE(STRICT_CMPM) |
31511 | NODE_NAME_CASE(CMPMM_SAE) |
31512 | NODE_NAME_CASE(SETCC) |
31513 | NODE_NAME_CASE(SETCC_CARRY) |
31514 | NODE_NAME_CASE(FSETCC) |
31515 | NODE_NAME_CASE(FSETCCM) |
31516 | NODE_NAME_CASE(FSETCCM_SAE) |
31517 | NODE_NAME_CASE(CMOV) |
31518 | NODE_NAME_CASE(BRCOND) |
31519 | NODE_NAME_CASE(RET_FLAG) |
31520 | NODE_NAME_CASE(IRET) |
31521 | NODE_NAME_CASE(REP_STOS) |
31522 | NODE_NAME_CASE(REP_MOVS) |
31523 | NODE_NAME_CASE(GlobalBaseReg) |
31524 | NODE_NAME_CASE(Wrapper) |
31525 | NODE_NAME_CASE(WrapperRIP) |
31526 | NODE_NAME_CASE(MOVQ2DQ) |
31527 | NODE_NAME_CASE(MOVDQ2Q) |
31528 | NODE_NAME_CASE(MMX_MOVD2W) |
31529 | NODE_NAME_CASE(MMX_MOVW2D) |
31530 | NODE_NAME_CASE(PEXTRB) |
31531 | NODE_NAME_CASE(PEXTRW) |
31532 | NODE_NAME_CASE(INSERTPS) |
31533 | NODE_NAME_CASE(PINSRB) |
31534 | NODE_NAME_CASE(PINSRW) |
31535 | NODE_NAME_CASE(PSHUFB) |
31536 | NODE_NAME_CASE(ANDNP) |
31537 | NODE_NAME_CASE(BLENDI) |
31538 | NODE_NAME_CASE(BLENDV) |
31539 | NODE_NAME_CASE(HADD) |
31540 | NODE_NAME_CASE(HSUB) |
31541 | NODE_NAME_CASE(FHADD) |
31542 | NODE_NAME_CASE(FHSUB) |
31543 | NODE_NAME_CASE(CONFLICT) |
31544 | NODE_NAME_CASE(FMAX) |
31545 | NODE_NAME_CASE(FMAXS) |
31546 | NODE_NAME_CASE(FMAX_SAE) |
31547 | NODE_NAME_CASE(FMAXS_SAE) |
31548 | NODE_NAME_CASE(FMIN) |
31549 | NODE_NAME_CASE(FMINS) |
31550 | NODE_NAME_CASE(FMIN_SAE) |
31551 | NODE_NAME_CASE(FMINS_SAE) |
31552 | NODE_NAME_CASE(FMAXC) |
31553 | NODE_NAME_CASE(FMINC) |
31554 | NODE_NAME_CASE(FRSQRT) |
31555 | NODE_NAME_CASE(FRCP) |
31556 | NODE_NAME_CASE(EXTRQI) |
31557 | NODE_NAME_CASE(INSERTQI) |
31558 | NODE_NAME_CASE(TLSADDR) |
31559 | NODE_NAME_CASE(TLSBASEADDR) |
31560 | NODE_NAME_CASE(TLSCALL) |
31561 | NODE_NAME_CASE(EH_SJLJ_SETJMP) |
31562 | NODE_NAME_CASE(EH_SJLJ_LONGJMP) |
31563 | NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH) |
31564 | NODE_NAME_CASE(EH_RETURN) |
31565 | NODE_NAME_CASE(TC_RETURN) |
31566 | NODE_NAME_CASE(FNSTCW16m) |
31567 | NODE_NAME_CASE(FLDCW16m) |
31568 | NODE_NAME_CASE(LCMPXCHG_DAG) |
31569 | NODE_NAME_CASE(LCMPXCHG8_DAG) |
31570 | NODE_NAME_CASE(LCMPXCHG16_DAG) |
31571 | NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) |
31572 | NODE_NAME_CASE(LADD) |
31573 | NODE_NAME_CASE(LSUB) |
31574 | NODE_NAME_CASE(LOR) |
31575 | NODE_NAME_CASE(LXOR) |
31576 | NODE_NAME_CASE(LAND) |
31577 | NODE_NAME_CASE(VZEXT_MOVL) |
31578 | NODE_NAME_CASE(VZEXT_LOAD) |
31579 | NODE_NAME_CASE(VEXTRACT_STORE) |
31580 | NODE_NAME_CASE(VTRUNC) |
31581 | NODE_NAME_CASE(VTRUNCS) |
31582 | NODE_NAME_CASE(VTRUNCUS) |
31583 | NODE_NAME_CASE(VMTRUNC) |
31584 | NODE_NAME_CASE(VMTRUNCS) |
31585 | NODE_NAME_CASE(VMTRUNCUS) |
31586 | NODE_NAME_CASE(VTRUNCSTORES) |
31587 | NODE_NAME_CASE(VTRUNCSTOREUS) |
31588 | NODE_NAME_CASE(VMTRUNCSTORES) |
31589 | NODE_NAME_CASE(VMTRUNCSTOREUS) |
31590 | NODE_NAME_CASE(VFPEXT) |
31591 | NODE_NAME_CASE(STRICT_VFPEXT) |
31592 | NODE_NAME_CASE(VFPEXT_SAE) |
31593 | NODE_NAME_CASE(VFPEXTS) |
31594 | NODE_NAME_CASE(VFPEXTS_SAE) |
31595 | NODE_NAME_CASE(VFPROUND) |
31596 | NODE_NAME_CASE(STRICT_VFPROUND) |
31597 | NODE_NAME_CASE(VMFPROUND) |
31598 | NODE_NAME_CASE(VFPROUND_RND) |
31599 | NODE_NAME_CASE(VFPROUNDS) |
31600 | NODE_NAME_CASE(VFPROUNDS_RND) |
31601 | NODE_NAME_CASE(VSHLDQ) |
31602 | NODE_NAME_CASE(VSRLDQ) |
31603 | NODE_NAME_CASE(VSHL) |
31604 | NODE_NAME_CASE(VSRL) |
31605 | NODE_NAME_CASE(VSRA) |
31606 | NODE_NAME_CASE(VSHLI) |
31607 | NODE_NAME_CASE(VSRLI) |
31608 | NODE_NAME_CASE(VSRAI) |
31609 | NODE_NAME_CASE(VSHLV) |
31610 | NODE_NAME_CASE(VSRLV) |
31611 | NODE_NAME_CASE(VSRAV) |
31612 | NODE_NAME_CASE(VROTLI) |
31613 | NODE_NAME_CASE(VROTRI) |
31614 | NODE_NAME_CASE(VPPERM) |
31615 | NODE_NAME_CASE(CMPP) |
31616 | NODE_NAME_CASE(STRICT_CMPP) |
31617 | NODE_NAME_CASE(PCMPEQ) |
31618 | NODE_NAME_CASE(PCMPGT) |
31619 | NODE_NAME_CASE(PHMINPOS) |
31620 | NODE_NAME_CASE(ADD) |
31621 | NODE_NAME_CASE(SUB) |
31622 | NODE_NAME_CASE(ADC) |
31623 | NODE_NAME_CASE(SBB) |
31624 | NODE_NAME_CASE(SMUL) |
31625 | NODE_NAME_CASE(UMUL) |
31626 | NODE_NAME_CASE(OR) |
31627 | NODE_NAME_CASE(XOR) |
31628 | NODE_NAME_CASE(AND) |
31629 | NODE_NAME_CASE(BEXTR) |
31630 | NODE_NAME_CASE(BEXTRI) |
31631 | NODE_NAME_CASE(BZHI) |
31632 | NODE_NAME_CASE(PDEP) |
31633 | NODE_NAME_CASE(PEXT) |
31634 | NODE_NAME_CASE(MUL_IMM) |
31635 | NODE_NAME_CASE(MOVMSK) |
31636 | NODE_NAME_CASE(PTEST) |
31637 | NODE_NAME_CASE(TESTP) |
31638 | NODE_NAME_CASE(KORTEST) |
31639 | NODE_NAME_CASE(KTEST) |
31640 | NODE_NAME_CASE(KADD) |
31641 | NODE_NAME_CASE(KSHIFTL) |
31642 | NODE_NAME_CASE(KSHIFTR) |
31643 | NODE_NAME_CASE(PACKSS) |
31644 | NODE_NAME_CASE(PACKUS) |
31645 | NODE_NAME_CASE(PALIGNR) |
31646 | NODE_NAME_CASE(VALIGN) |
31647 | NODE_NAME_CASE(VSHLD) |
31648 | NODE_NAME_CASE(VSHRD) |
31649 | NODE_NAME_CASE(VSHLDV) |
31650 | NODE_NAME_CASE(VSHRDV) |
31651 | NODE_NAME_CASE(PSHUFD) |
31652 | NODE_NAME_CASE(PSHUFHW) |
31653 | NODE_NAME_CASE(PSHUFLW) |
31654 | NODE_NAME_CASE(SHUFP) |
31655 | NODE_NAME_CASE(SHUF128) |
31656 | NODE_NAME_CASE(MOVLHPS) |
31657 | NODE_NAME_CASE(MOVHLPS) |
31658 | NODE_NAME_CASE(MOVDDUP) |
31659 | NODE_NAME_CASE(MOVSHDUP) |
31660 | NODE_NAME_CASE(MOVSLDUP) |
31661 | NODE_NAME_CASE(MOVSD) |
31662 | NODE_NAME_CASE(MOVSS) |
31663 | NODE_NAME_CASE(UNPCKL) |
31664 | NODE_NAME_CASE(UNPCKH) |
31665 | NODE_NAME_CASE(VBROADCAST) |
31666 | NODE_NAME_CASE(VBROADCAST_LOAD) |
31667 | NODE_NAME_CASE(VBROADCASTM) |
31668 | NODE_NAME_CASE(SUBV_BROADCAST_LOAD) |
31669 | NODE_NAME_CASE(VPERMILPV) |
31670 | NODE_NAME_CASE(VPERMILPI) |
31671 | NODE_NAME_CASE(VPERM2X128) |
31672 | NODE_NAME_CASE(VPERMV) |
31673 | NODE_NAME_CASE(VPERMV3) |
31674 | NODE_NAME_CASE(VPERMI) |
31675 | NODE_NAME_CASE(VPTERNLOG) |
31676 | NODE_NAME_CASE(VFIXUPIMM) |
31677 | NODE_NAME_CASE(VFIXUPIMM_SAE) |
31678 | NODE_NAME_CASE(VFIXUPIMMS) |
31679 | NODE_NAME_CASE(VFIXUPIMMS_SAE) |
31680 | NODE_NAME_CASE(VRANGE) |
31681 | NODE_NAME_CASE(VRANGE_SAE) |
31682 | NODE_NAME_CASE(VRANGES) |
31683 | NODE_NAME_CASE(VRANGES_SAE) |
31684 | NODE_NAME_CASE(PMULUDQ) |
31685 | NODE_NAME_CASE(PMULDQ) |
31686 | NODE_NAME_CASE(PSADBW) |
31687 | NODE_NAME_CASE(DBPSADBW) |
31688 | NODE_NAME_CASE(VASTART_SAVE_XMM_REGS) |
31689 | NODE_NAME_CASE(VAARG_64) |
31690 | NODE_NAME_CASE(VAARG_X32) |
31691 | NODE_NAME_CASE(WIN_ALLOCA) |
31692 | NODE_NAME_CASE(MEMBARRIER) |
31693 | NODE_NAME_CASE(MFENCE) |
31694 | NODE_NAME_CASE(SEG_ALLOCA) |
31695 | NODE_NAME_CASE(PROBED_ALLOCA) |
31696 | NODE_NAME_CASE(RDRAND) |
31697 | NODE_NAME_CASE(RDSEED) |
31698 | NODE_NAME_CASE(RDPKRU) |
31699 | NODE_NAME_CASE(WRPKRU) |
31700 | NODE_NAME_CASE(VPMADDUBSW) |
31701 | NODE_NAME_CASE(VPMADDWD) |
31702 | NODE_NAME_CASE(VPSHA) |
31703 | NODE_NAME_CASE(VPSHL) |
31704 | NODE_NAME_CASE(VPCOM) |
31705 | NODE_NAME_CASE(VPCOMU) |
31706 | NODE_NAME_CASE(VPERMIL2) |
31707 | NODE_NAME_CASE(FMSUB) |
31708 | NODE_NAME_CASE(STRICT_FMSUB) |
31709 | NODE_NAME_CASE(FNMADD) |
31710 | NODE_NAME_CASE(STRICT_FNMADD) |
31711 | NODE_NAME_CASE(FNMSUB) |
31712 | NODE_NAME_CASE(STRICT_FNMSUB) |
31713 | NODE_NAME_CASE(FMADDSUB) |
31714 | NODE_NAME_CASE(FMSUBADD) |
31715 | NODE_NAME_CASE(FMADD_RND) |
31716 | NODE_NAME_CASE(FNMADD_RND) |
31717 | NODE_NAME_CASE(FMSUB_RND) |
31718 | NODE_NAME_CASE(FNMSUB_RND) |
31719 | NODE_NAME_CASE(FMADDSUB_RND) |
31720 | NODE_NAME_CASE(FMSUBADD_RND) |
31721 | NODE_NAME_CASE(VPMADD52H) |
31722 | NODE_NAME_CASE(VPMADD52L) |
31723 | NODE_NAME_CASE(VRNDSCALE) |
31724 | NODE_NAME_CASE(STRICT_VRNDSCALE) |
31725 | NODE_NAME_CASE(VRNDSCALE_SAE) |
31726 | NODE_NAME_CASE(VRNDSCALES) |
31727 | NODE_NAME_CASE(VRNDSCALES_SAE) |
31728 | NODE_NAME_CASE(VREDUCE) |
31729 | NODE_NAME_CASE(VREDUCE_SAE) |
31730 | NODE_NAME_CASE(VREDUCES) |
31731 | NODE_NAME_CASE(VREDUCES_SAE) |
31732 | NODE_NAME_CASE(VGETMANT) |
31733 | NODE_NAME_CASE(VGETMANT_SAE) |
31734 | NODE_NAME_CASE(VGETMANTS) |
31735 | NODE_NAME_CASE(VGETMANTS_SAE) |
31736 | NODE_NAME_CASE(PCMPESTR) |
31737 | NODE_NAME_CASE(PCMPISTR) |
31738 | NODE_NAME_CASE(XTEST) |
31739 | NODE_NAME_CASE(COMPRESS) |
31740 | NODE_NAME_CASE(EXPAND) |
31741 | NODE_NAME_CASE(SELECTS) |
31742 | NODE_NAME_CASE(ADDSUB) |
31743 | NODE_NAME_CASE(RCP14) |
31744 | NODE_NAME_CASE(RCP14S) |
31745 | NODE_NAME_CASE(RCP28) |
31746 | NODE_NAME_CASE(RCP28_SAE) |
31747 | NODE_NAME_CASE(RCP28S) |
31748 | NODE_NAME_CASE(RCP28S_SAE) |
31749 | NODE_NAME_CASE(EXP2) |
31750 | NODE_NAME_CASE(EXP2_SAE) |
31751 | NODE_NAME_CASE(RSQRT14) |
31752 | NODE_NAME_CASE(RSQRT14S) |
31753 | NODE_NAME_CASE(RSQRT28) |
31754 | NODE_NAME_CASE(RSQRT28_SAE) |
31755 | NODE_NAME_CASE(RSQRT28S) |
31756 | NODE_NAME_CASE(RSQRT28S_SAE) |
31757 | NODE_NAME_CASE(FADD_RND) |
31758 | NODE_NAME_CASE(FADDS) |
31759 | NODE_NAME_CASE(FADDS_RND) |
31760 | NODE_NAME_CASE(FSUB_RND) |
31761 | NODE_NAME_CASE(FSUBS) |
31762 | NODE_NAME_CASE(FSUBS_RND) |
31763 | NODE_NAME_CASE(FMUL_RND) |
31764 | NODE_NAME_CASE(FMULS) |
31765 | NODE_NAME_CASE(FMULS_RND) |
31766 | NODE_NAME_CASE(FDIV_RND) |
31767 | NODE_NAME_CASE(FDIVS) |
31768 | NODE_NAME_CASE(FDIVS_RND) |
31769 | NODE_NAME_CASE(FSQRT_RND) |
31770 | NODE_NAME_CASE(FSQRTS) |
31771 | NODE_NAME_CASE(FSQRTS_RND) |
31772 | NODE_NAME_CASE(FGETEXP) |
31773 | NODE_NAME_CASE(FGETEXP_SAE) |
31774 | NODE_NAME_CASE(FGETEXPS) |
31775 | NODE_NAME_CASE(FGETEXPS_SAE) |
31776 | NODE_NAME_CASE(SCALEF) |
31777 | NODE_NAME_CASE(SCALEF_RND) |
31778 | NODE_NAME_CASE(SCALEFS) |
31779 | NODE_NAME_CASE(SCALEFS_RND) |
31780 | NODE_NAME_CASE(AVG) |
31781 | NODE_NAME_CASE(MULHRS) |
31782 | NODE_NAME_CASE(SINT_TO_FP_RND) |
31783 | NODE_NAME_CASE(UINT_TO_FP_RND) |
31784 | NODE_NAME_CASE(CVTTP2SI) |
31785 | NODE_NAME_CASE(CVTTP2UI) |
31786 | NODE_NAME_CASE(STRICT_CVTTP2SI) |
31787 | NODE_NAME_CASE(STRICT_CVTTP2UI) |
31788 | NODE_NAME_CASE(MCVTTP2SI) |
31789 | NODE_NAME_CASE(MCVTTP2UI) |
31790 | NODE_NAME_CASE(CVTTP2SI_SAE) |
31791 | NODE_NAME_CASE(CVTTP2UI_SAE) |
31792 | NODE_NAME_CASE(CVTTS2SI) |
31793 | NODE_NAME_CASE(CVTTS2UI) |
31794 | NODE_NAME_CASE(CVTTS2SI_SAE) |
31795 | NODE_NAME_CASE(CVTTS2UI_SAE) |
31796 | NODE_NAME_CASE(CVTSI2P) |
31797 | NODE_NAME_CASE(CVTUI2P) |
31798 | NODE_NAME_CASE(STRICT_CVTSI2P) |
31799 | NODE_NAME_CASE(STRICT_CVTUI2P) |
31800 | NODE_NAME_CASE(MCVTSI2P) |
31801 | NODE_NAME_CASE(MCVTUI2P) |
31802 | NODE_NAME_CASE(VFPCLASS) |
31803 | NODE_NAME_CASE(VFPCLASSS) |
31804 | NODE_NAME_CASE(MULTISHIFT) |
31805 | NODE_NAME_CASE(SCALAR_SINT_TO_FP) |
31806 | NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND) |
31807 | NODE_NAME_CASE(SCALAR_UINT_TO_FP) |
31808 | NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND) |
31809 | NODE_NAME_CASE(CVTPS2PH) |
31810 | NODE_NAME_CASE(STRICT_CVTPS2PH) |
31811 | NODE_NAME_CASE(MCVTPS2PH) |
31812 | NODE_NAME_CASE(CVTPH2PS) |
31813 | NODE_NAME_CASE(STRICT_CVTPH2PS) |
31814 | NODE_NAME_CASE(CVTPH2PS_SAE) |
31815 | NODE_NAME_CASE(CVTP2SI) |
31816 | NODE_NAME_CASE(CVTP2UI) |
31817 | NODE_NAME_CASE(MCVTP2SI) |
31818 | NODE_NAME_CASE(MCVTP2UI) |
31819 | NODE_NAME_CASE(CVTP2SI_RND) |
31820 | NODE_NAME_CASE(CVTP2UI_RND) |
31821 | NODE_NAME_CASE(CVTS2SI) |
31822 | NODE_NAME_CASE(CVTS2UI) |
31823 | NODE_NAME_CASE(CVTS2SI_RND) |
31824 | NODE_NAME_CASE(CVTS2UI_RND) |
31825 | NODE_NAME_CASE(CVTNE2PS2BF16) |
31826 | NODE_NAME_CASE(CVTNEPS2BF16) |
31827 | NODE_NAME_CASE(MCVTNEPS2BF16) |
31828 | NODE_NAME_CASE(DPBF16PS) |
31829 | NODE_NAME_CASE(LWPINS) |
31830 | NODE_NAME_CASE(MGATHER) |
31831 | NODE_NAME_CASE(MSCATTER) |
31832 | NODE_NAME_CASE(VPDPBUSD) |
31833 | NODE_NAME_CASE(VPDPBUSDS) |
31834 | NODE_NAME_CASE(VPDPWSSD) |
31835 | NODE_NAME_CASE(VPDPWSSDS) |
31836 | NODE_NAME_CASE(VPSHUFBITQMB) |
31837 | NODE_NAME_CASE(GF2P8MULB) |
31838 | NODE_NAME_CASE(GF2P8AFFINEQB) |
31839 | NODE_NAME_CASE(GF2P8AFFINEINVQB) |
31840 | NODE_NAME_CASE(NT_CALL) |
31841 | NODE_NAME_CASE(NT_BRIND) |
31842 | NODE_NAME_CASE(UMWAIT) |
31843 | NODE_NAME_CASE(TPAUSE) |
31844 | NODE_NAME_CASE(ENQCMD) |
31845 | NODE_NAME_CASE(ENQCMDS) |
31846 | NODE_NAME_CASE(VP2INTERSECT) |
31847 | NODE_NAME_CASE(AESENC128KL) |
31848 | NODE_NAME_CASE(AESDEC128KL) |
31849 | NODE_NAME_CASE(AESENC256KL) |
31850 | NODE_NAME_CASE(AESDEC256KL) |
31851 | NODE_NAME_CASE(AESENCWIDE128KL) |
31852 | NODE_NAME_CASE(AESDECWIDE128KL) |
31853 | NODE_NAME_CASE(AESENCWIDE256KL) |
31854 | NODE_NAME_CASE(AESDECWIDE256KL) |
31855 | NODE_NAME_CASE(TESTUI) |
31856 | } |
31857 | return nullptr; |
31858 | #undef NODE_NAME_CASE |
31859 | } |
31860 | |
31861 | |
31862 | |
31863 | bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, |
31864 | const AddrMode &AM, Type *Ty, |
31865 | unsigned AS, |
31866 | Instruction *I) const { |
31867 | |
31868 | CodeModel::Model M = getTargetMachine().getCodeModel(); |
31869 | |
31870 | |
31871 | if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr)) |
31872 | return false; |
31873 | |
31874 | if (AM.BaseGV) { |
31875 | unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV); |
31876 | |
31877 | |
31878 | if (isGlobalStubReference(GVFlags)) |
31879 | return false; |
31880 | |
31881 | |
31882 | |
31883 | if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) |
31884 | return false; |
31885 | |
31886 | |
31887 | if ((M != CodeModel::Small || isPositionIndependent()) && |
31888 | Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1)) |
31889 | return false; |
31890 | } |
31891 | |
31892 | switch (AM.Scale) { |
31893 | case 0: |
31894 | case 1: |
31895 | case 2: |
31896 | case 4: |
31897 | case 8: |
31898 | |
31899 | break; |
31900 | case 3: |
31901 | case 5: |
31902 | case 9: |
31903 | |
31904 | |
31905 | if (AM.HasBaseReg) |
31906 | return false; |
31907 | break; |
31908 | default: |
31909 | return false; |
31910 | } |
31911 | |
31912 | return true; |
31913 | } |
31914 | |
31915 | bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { |
31916 | unsigned Bits = Ty->getScalarSizeInBits(); |
31917 | |
31918 | |
31919 | |
31920 | if (Bits == 8) |
31921 | return false; |
31922 | |
31923 | |
31924 | |
31925 | if (Subtarget.hasXOP() && |
31926 | (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64)) |
31927 | return false; |
31928 | |
31929 | |
31930 | |
31931 | if (Subtarget.hasAVX2() && (Bits == 32 || Bits == 64)) |
31932 | return false; |
31933 | |
31934 | |
31935 | if (Subtarget.hasBWI() && Bits == 16) |
31936 | return false; |
31937 | |
31938 | |
31939 | |
31940 | return true; |
31941 | } |
31942 | |
31943 | bool X86TargetLowering::isBinOp(unsigned Opcode) const { |
31944 | switch (Opcode) { |
31945 | |
31946 | |
31947 | case X86ISD::ANDNP: |
31948 | case X86ISD::PCMPGT: |
31949 | case X86ISD::FMAX: |
31950 | case X86ISD::FMIN: |
31951 | case X86ISD::FANDN: |
31952 | return true; |
31953 | } |
31954 | |
31955 | return TargetLoweringBase::isBinOp(Opcode); |
31956 | } |
31957 | |
31958 | bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const { |
31959 | switch (Opcode) { |
31960 | |
31961 | case X86ISD::PCMPEQ: |
31962 | case X86ISD::PMULDQ: |
31963 | case X86ISD::PMULUDQ: |
31964 | case X86ISD::FMAXC: |
31965 | case X86ISD::FMINC: |
31966 | case X86ISD::FAND: |
31967 | case X86ISD::FOR: |
31968 | case X86ISD::FXOR: |
31969 | return true; |
31970 | } |
31971 | |
31972 | return TargetLoweringBase::isCommutativeBinOp(Opcode); |
31973 | } |
31974 | |
31975 | bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { |
31976 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
31977 | return false; |
31978 | unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); |
31979 | unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); |
31980 | return NumBits1 > NumBits2; |
31981 | } |
31982 | |
31983 | bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { |
31984 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
31985 | return false; |
31986 | |
31987 | if (!isTypeLegal(EVT::getEVT(Ty1))) |
31988 | return false; |
31989 | |
31990 | assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); |
31991 | |
31992 | |
31993 | |
31994 | return true; |
31995 | } |
31996 | |
31997 | bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
31998 | return isInt<32>(Imm); |
31999 | } |
32000 | |
32001 | bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { |
32002 | |
32003 | return isInt<32>(Imm); |
32004 | } |
32005 | |
32006 | bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const { |
32007 | return isInt<32>(Imm); |
32008 | } |
32009 | |
32010 | bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { |
32011 | if (!VT1.isScalarInteger() || !VT2.isScalarInteger()) |
32012 | return false; |
32013 | unsigned NumBits1 = VT1.getSizeInBits(); |
32014 | unsigned NumBits2 = VT2.getSizeInBits(); |
32015 | return NumBits1 > NumBits2; |
32016 | } |
32017 | |
32018 | bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { |
32019 | |
32020 | return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit(); |
32021 | } |
32022 | |
32023 | bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { |
32024 | |
32025 | return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit(); |
32026 | } |
32027 | |
32028 | bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
32029 | EVT VT1 = Val.getValueType(); |
32030 | if (isZExtFree(VT1, VT2)) |
32031 | return true; |
32032 | |
32033 | if (Val.getOpcode() != ISD::LOAD) |
32034 | return false; |
32035 | |
32036 | if (!VT1.isSimple() || !VT1.isInteger() || |
32037 | !VT2.isSimple() || !VT2.isInteger()) |
32038 | return false; |
32039 | |
32040 | switch (VT1.getSimpleVT().SimpleTy) { |
32041 | default: break; |
32042 | case MVT::i8: |
32043 | case MVT::i16: |
32044 | case MVT::i32: |
32045 | |
32046 | return true; |
32047 | } |
32048 | |
32049 | return false; |
32050 | } |
32051 | |
32052 | bool X86TargetLowering::shouldSinkOperands(Instruction *I, |
32053 | SmallVectorImpl<Use *> &Ops) const { |
32054 | |
32055 | |
32056 | |
32057 | int ShiftAmountOpNum = -1; |
32058 | if (I->isShift()) |
32059 | ShiftAmountOpNum = 1; |
32060 | else if (auto *II = dyn_cast<IntrinsicInst>(I)) { |
32061 | if (II->getIntrinsicID() == Intrinsic::fshl || |
32062 | II->getIntrinsicID() == Intrinsic::fshr) |
32063 | ShiftAmountOpNum = 2; |
32064 | } |
32065 | |
32066 | if (ShiftAmountOpNum == -1) |
32067 | return false; |
32068 | |
32069 | auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum)); |
32070 | if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && |
32071 | isVectorShiftByScalarCheap(I->getType())) { |
32072 | Ops.push_back(&I->getOperandUse(ShiftAmountOpNum)); |
32073 | return true; |
32074 | } |
32075 | |
32076 | return false; |
32077 | } |
32078 | |
32079 | bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const { |
32080 | if (!Subtarget.is64Bit()) |
32081 | return false; |
32082 | return TargetLowering::shouldConvertPhiType(From, To); |
32083 | } |
32084 | |
32085 | bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { |
32086 | if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0))) |
32087 | return false; |
32088 | |
32089 | EVT SrcVT = ExtVal.getOperand(0).getValueType(); |
32090 | |
32091 | |
32092 | if (SrcVT.getScalarType() == MVT::i1) |
32093 | return false; |
32094 | |
32095 | return true; |
32096 | } |
32097 | |
32098 | bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
32099 | EVT VT) const { |
32100 | if (!Subtarget.hasAnyFMA()) |
32101 | return false; |
32102 | |
32103 | VT = VT.getScalarType(); |
32104 | |
32105 | if (!VT.isSimple()) |
32106 | return false; |
32107 | |
32108 | switch (VT.getSimpleVT().SimpleTy) { |
32109 | case MVT::f32: |
32110 | case MVT::f64: |
32111 | return true; |
32112 | default: |
32113 | break; |
32114 | } |
32115 | |
32116 | return false; |
32117 | } |
32118 | |
32119 | bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { |
32120 | |
32121 | return !(VT1 == MVT::i32 && VT2 == MVT::i16); |
32122 | } |
32123 | |
32124 | |
32125 | |
32126 | |
32127 | |
32128 | bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const { |
32129 | if (!VT.isSimple()) |
32130 | return false; |
32131 | |
32132 | |
32133 | if (VT.getSimpleVT().getScalarType() == MVT::i1) |
32134 | return false; |
32135 | |
32136 | |
32137 | if (VT.getSimpleVT().getSizeInBits() == 64) |
32138 | return false; |
32139 | |
32140 | |
32141 | |
32142 | return isTypeLegal(VT.getSimpleVT()); |
32143 | } |
32144 | |
32145 | bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask, |
32146 | EVT VT) const { |
32147 | |
32148 | |
32149 | if (!Subtarget.hasAVX2()) |
32150 | if (VT == MVT::v32i8 || VT == MVT::v16i16) |
32151 | return false; |
32152 | |
32153 | |
32154 | return isShuffleMaskLegal(Mask, VT); |
32155 | } |
32156 | |
32157 | bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { |
32158 | |
32159 | if (Subtarget.useIndirectThunkBranches()) |
32160 | return false; |
32161 | |
32162 | |
32163 | return TargetLowering::areJTsAllowed(Fn); |
32164 | } |
32165 | |
32166 | |
32167 | |
32168 | |
32169 | |
32170 | |
32171 | |
32172 | static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, |
32173 | MachineBasicBlock *BB) { |
32174 | |
32175 | for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end(); |
32176 | miI != miE; ++miI) { |
32177 | const MachineInstr& mi = *miI; |
32178 | if (mi.readsRegister(X86::EFLAGS)) |
32179 | return true; |
32180 | |
32181 | if (mi.definesRegister(X86::EFLAGS)) |
32182 | return false; |
32183 | } |
32184 | |
32185 | |
32186 | |
32187 | for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), |
32188 | sEnd = BB->succ_end(); |
32189 | sItr != sEnd; ++sItr) { |
32190 | MachineBasicBlock* succ = *sItr; |
32191 | if (succ->isLiveIn(X86::EFLAGS)) |
32192 | return true; |
32193 | } |
32194 | |
32195 | return false; |
32196 | } |
32197 | |
32198 | |
32199 | static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, |
32200 | const TargetInstrInfo *TII) { |
32201 | const DebugLoc &DL = MI.getDebugLoc(); |
32202 | |
32203 | const BasicBlock *BB = MBB->getBasicBlock(); |
32204 | MachineFunction::iterator I = ++MBB->getIterator(); |
32205 | |
32206 | |
32207 | |
32208 | |
32209 | |
32210 | |
32211 | |
32212 | |
32213 | |
32214 | |
32215 | |
32216 | |
32217 | |
32218 | |
32219 | |
32220 | |
32221 | MachineBasicBlock *thisMBB = MBB; |
32222 | MachineFunction *MF = MBB->getParent(); |
32223 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
32224 | MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); |
32225 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
32226 | MF->insert(I, mainMBB); |
32227 | MF->insert(I, fallMBB); |
32228 | MF->insert(I, sinkMBB); |
32229 | |
32230 | if (isEFLAGSLiveAfter(MI, MBB)) { |
32231 | mainMBB->addLiveIn(X86::EFLAGS); |
32232 | fallMBB->addLiveIn(X86::EFLAGS); |
32233 | sinkMBB->addLiveIn(X86::EFLAGS); |
32234 | } |
32235 | |
32236 | |
32237 | sinkMBB->splice(sinkMBB->begin(), MBB, |
32238 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
32239 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
32240 | |
32241 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
32242 | Register DstReg = MI.getOperand(0).getReg(); |
32243 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
32244 | Register mainDstReg = MRI.createVirtualRegister(RC); |
32245 | Register fallDstReg = MRI.createVirtualRegister(RC); |
32246 | |
32247 | |
32248 | |
32249 | |
32250 | |
32251 | BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); |
32252 | thisMBB->addSuccessor(mainMBB); |
32253 | thisMBB->addSuccessor(fallMBB); |
32254 | |
32255 | |
32256 | |
32257 | BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); |
32258 | BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); |
32259 | mainMBB->addSuccessor(sinkMBB); |
32260 | |
32261 | |
32262 | |
32263 | |
32264 | |
32265 | BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF)); |
32266 | BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg) |
32267 | .addReg(X86::EAX); |
32268 | fallMBB->addSuccessor(sinkMBB); |
32269 | |
32270 | |
32271 | |
32272 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) |
32273 | .addReg(mainDstReg).addMBB(mainMBB) |
32274 | .addReg(fallDstReg).addMBB(fallMBB); |
32275 | |
32276 | MI.eraseFromParent(); |
32277 | return sinkMBB; |
32278 | } |
32279 | |
32280 | MachineBasicBlock * |
32281 | X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI, |
32282 | MachineBasicBlock *MBB) const { |
32283 | |
32284 | |
32285 | |
32286 | |
32287 | |
32288 | |
32289 | |
32290 | |
32291 | |
32292 | |
32293 | assert(MI.getNumOperands() == 10 && "VAARG should have 10 operands!"); |
32294 | static_assert(X86::AddrNumOperands == 5, "VAARG assumes 5 address operands"); |
32295 | |
32296 | Register DestReg = MI.getOperand(0).getReg(); |
32297 | MachineOperand &Base = MI.getOperand(1); |
32298 | MachineOperand &Scale = MI.getOperand(2); |
32299 | MachineOperand &Index = MI.getOperand(3); |
32300 | MachineOperand &Disp = MI.getOperand(4); |
32301 | MachineOperand &Segment = MI.getOperand(5); |
32302 | unsigned ArgSize = MI.getOperand(6).getImm(); |
32303 | unsigned ArgMode = MI.getOperand(7).getImm(); |
32304 | Align Alignment = Align(MI.getOperand(8).getImm()); |
32305 | |
32306 | MachineFunction *MF = MBB->getParent(); |
32307 | |
32308 | |
32309 | assert(MI.hasOneMemOperand() && "Expected VAARG to have one memoperand"); |
32310 | |
32311 | MachineMemOperand *OldMMO = MI.memoperands().front(); |
32312 | |
32313 | |
32314 | MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand( |
32315 | OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore); |
32316 | MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand( |
32317 | OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad); |
32318 | |
32319 | |
32320 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
32321 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
32322 | const TargetRegisterClass *AddrRegClass = |
32323 | getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout())); |
32324 | const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); |
32325 | const DebugLoc &DL = MI.getDebugLoc(); |
32326 | |
32327 | |
32328 | |
32329 | |
32330 | |
32331 | |
32332 | |
32333 | |
32334 | |
32335 | |
32336 | unsigned TotalNumIntRegs = 6; |
32337 | unsigned TotalNumXMMRegs = 8; |
32338 | bool UseGPOffset = (ArgMode == 1); |
32339 | bool UseFPOffset = (ArgMode == 2); |
32340 | unsigned MaxOffset = TotalNumIntRegs * 8 + |
32341 | (UseFPOffset ? TotalNumXMMRegs * 16 : 0); |
32342 | |
32343 | |
32344 | unsigned ArgSizeA8 = (ArgSize + 7) & ~7; |
32345 | bool NeedsAlign = (Alignment > 8); |
32346 | |
32347 | MachineBasicBlock *thisMBB = MBB; |
32348 | MachineBasicBlock *overflowMBB; |
32349 | MachineBasicBlock *offsetMBB; |
32350 | MachineBasicBlock *endMBB; |
32351 | |
32352 | unsigned OffsetDestReg = 0; |
32353 | unsigned OverflowDestReg = 0; |
32354 | unsigned OffsetReg = 0; |
32355 | |
32356 | if (!UseGPOffset && !UseFPOffset) { |
32357 | |
32358 | |
32359 | OffsetDestReg = 0; |
32360 | OverflowDestReg = DestReg; |
32361 | |
32362 | offsetMBB = nullptr; |
32363 | overflowMBB = thisMBB; |
32364 | endMBB = thisMBB; |
32365 | } else { |
32366 | |
32367 | |
32368 | |
32369 | |
32370 | |
32371 | |
32372 | |
32373 | |
32374 | |
32375 | |
32376 | |
32377 | |
32378 | |
32379 | OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); |
32380 | OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); |
32381 | |
32382 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
32383 | overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
32384 | offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
32385 | endMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
32386 | |
32387 | MachineFunction::iterator MBBIter = ++MBB->getIterator(); |
32388 | |
32389 | |
32390 | MF->insert(MBBIter, offsetMBB); |
32391 | MF->insert(MBBIter, overflowMBB); |
32392 | MF->insert(MBBIter, endMBB); |
32393 | |
32394 | |
32395 | endMBB->splice(endMBB->begin(), thisMBB, |
32396 | std::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); |
32397 | endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); |
32398 | |
32399 | |
32400 | thisMBB->addSuccessor(offsetMBB); |
32401 | thisMBB->addSuccessor(overflowMBB); |
32402 | |
32403 | |
32404 | offsetMBB->addSuccessor(endMBB); |
32405 | overflowMBB->addSuccessor(endMBB); |
32406 | |
32407 | |
32408 | OffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
32409 | BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) |
32410 | .add(Base) |
32411 | .add(Scale) |
32412 | .add(Index) |
32413 | .addDisp(Disp, UseFPOffset ? 4 : 0) |
32414 | .add(Segment) |
32415 | .setMemRefs(LoadOnlyMMO); |
32416 | |
32417 | |
32418 | BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) |
32419 | .addReg(OffsetReg) |
32420 | .addImm(MaxOffset + 8 - ArgSizeA8); |
32421 | |
32422 | |
32423 | |
32424 | BuildMI(thisMBB, DL, TII->get(X86::JCC_1)) |
32425 | .addMBB(overflowMBB).addImm(X86::COND_AE); |
32426 | } |
32427 | |
32428 | |
32429 | if (offsetMBB) { |
32430 | assert(OffsetReg != 0); |
32431 | |
32432 | |
32433 | Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass); |
32434 | BuildMI( |
32435 | offsetMBB, DL, |
32436 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), |
32437 | RegSaveReg) |
32438 | .add(Base) |
32439 | .add(Scale) |
32440 | .add(Index) |
32441 | .addDisp(Disp, Subtarget.isTarget64BitLP64() ? 16 : 12) |
32442 | .add(Segment) |
32443 | .setMemRefs(LoadOnlyMMO); |
32444 | |
32445 | if (Subtarget.isTarget64BitLP64()) { |
32446 | |
32447 | Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); |
32448 | BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) |
32449 | .addImm(0) |
32450 | .addReg(OffsetReg) |
32451 | .addImm(X86::sub_32bit); |
32452 | |
32453 | |
32454 | BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) |
32455 | .addReg(OffsetReg64) |
32456 | .addReg(RegSaveReg); |
32457 | } else { |
32458 | |
32459 | BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg) |
32460 | .addReg(OffsetReg) |
32461 | .addReg(RegSaveReg); |
32462 | } |
32463 | |
32464 | |
32465 | Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
32466 | BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) |
32467 | .addReg(OffsetReg) |
32468 | .addImm(UseFPOffset ? 16 : 8); |
32469 | |
32470 | |
32471 | BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) |
32472 | .add(Base) |
32473 | .add(Scale) |
32474 | .add(Index) |
32475 | .addDisp(Disp, UseFPOffset ? 4 : 0) |
32476 | .add(Segment) |
32477 | .addReg(NextOffsetReg) |
32478 | .setMemRefs(StoreOnlyMMO); |
32479 | |
32480 | |
32481 | BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) |
32482 | .addMBB(endMBB); |
32483 | } |
32484 | |
32485 | |
32486 | |
32487 | |
32488 | |
32489 | |
32490 | Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); |
32491 | BuildMI(overflowMBB, DL, |
32492 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), |
32493 | OverflowAddrReg) |
32494 | .add(Base) |
32495 | .add(Scale) |
32496 | .add(Index) |
32497 | .addDisp(Disp, 8) |
32498 | .add(Segment) |
32499 | .setMemRefs(LoadOnlyMMO); |
32500 | |
32501 | |
32502 | |
32503 | if (NeedsAlign) { |
32504 | |
32505 | Register TmpReg = MRI.createVirtualRegister(AddrRegClass); |
32506 | |
32507 | |
32508 | BuildMI( |
32509 | overflowMBB, DL, |
32510 | TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), |
32511 | TmpReg) |
32512 | .addReg(OverflowAddrReg) |
32513 | .addImm(Alignment.value() - 1); |
32514 | |
32515 | BuildMI( |
32516 | overflowMBB, DL, |
32517 | TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri), |
32518 | OverflowDestReg) |
32519 | .addReg(TmpReg) |
32520 | .addImm(~(uint64_t)(Alignment.value() - 1)); |
32521 | } else { |
32522 | BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) |
32523 | .addReg(OverflowAddrReg); |
32524 | } |
32525 | |
32526 | |
32527 | |
32528 | Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass); |
32529 | BuildMI( |
32530 | overflowMBB, DL, |
32531 | TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), |
32532 | NextAddrReg) |
32533 | .addReg(OverflowDestReg) |
32534 | .addImm(ArgSizeA8); |
32535 | |
32536 | |
32537 | BuildMI(overflowMBB, DL, |
32538 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr)) |
32539 | .add(Base) |
32540 | .add(Scale) |
32541 | .add(Index) |
32542 | .addDisp(Disp, 8) |
32543 | .add(Segment) |
32544 | .addReg(NextAddrReg) |
32545 | .setMemRefs(StoreOnlyMMO); |
32546 | |
32547 | |
32548 | if (offsetMBB) { |
32549 | BuildMI(*endMBB, endMBB->begin(), DL, |
32550 | TII->get(X86::PHI), DestReg) |
32551 | .addReg(OffsetDestReg).addMBB(offsetMBB) |
32552 | .addReg(OverflowDestReg).addMBB(overflowMBB); |
32553 | } |
32554 | |
32555 | |
32556 | MI.eraseFromParent(); |
32557 | |
32558 | return endMBB; |
32559 | } |
32560 | |
32561 | |
32562 | |
32563 | |
32564 | |
32565 | |
32566 | static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, |
32567 | MachineBasicBlock* BB, |
32568 | const TargetRegisterInfo* TRI) { |
32569 | if (isEFLAGSLiveAfter(SelectItr, BB)) |
32570 | return false; |
32571 | |
32572 | |
32573 | |
32574 | SelectItr->addRegisterKilled(X86::EFLAGS, TRI); |
32575 | return true; |
32576 | } |
32577 | |
32578 | |
32579 | |
32580 | |
32581 | static bool isCMOVPseudo(MachineInstr &MI) { |
32582 | switch (MI.getOpcode()) { |
32583 | case X86::CMOV_FR32: |
32584 | case X86::CMOV_FR32X: |
32585 | case X86::CMOV_FR64: |
32586 | case X86::CMOV_FR64X: |
32587 | case X86::CMOV_GR8: |
32588 | case X86::CMOV_GR16: |
32589 | case X86::CMOV_GR32: |
32590 | case X86::CMOV_RFP32: |
32591 | case X86::CMOV_RFP64: |
32592 | case X86::CMOV_RFP80: |
32593 | case X86::CMOV_VR64: |
32594 | case X86::CMOV_VR128: |
32595 | case X86::CMOV_VR128X: |
32596 | case X86::CMOV_VR256: |
32597 | case X86::CMOV_VR256X: |
32598 | case X86::CMOV_VR512: |
32599 | case X86::CMOV_VK1: |
32600 | case X86::CMOV_VK2: |
32601 | case X86::CMOV_VK4: |
32602 | case X86::CMOV_VK8: |
32603 | case X86::CMOV_VK16: |
32604 | case X86::CMOV_VK32: |
32605 | case X86::CMOV_VK64: |
32606 | return true; |
32607 | |
32608 | default: |
32609 | return false; |
32610 | } |
32611 | } |
32612 | |
32613 | |
32614 | |
32615 | |
32616 | |
32617 | |
32618 | static MachineInstrBuilder createPHIsForCMOVsInSinkBB( |
32619 | MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, |
32620 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, |
32621 | MachineBasicBlock *SinkMBB) { |
32622 | MachineFunction *MF = TrueMBB->getParent(); |
32623 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
32624 | const DebugLoc &DL = MIItBegin->getDebugLoc(); |
32625 | |
32626 | X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm()); |
32627 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
32628 | |
32629 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); |
32630 | |
32631 | |
32632 | |
32633 | |
32634 | |
32635 | |
32636 | |
32637 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; |
32638 | MachineInstrBuilder MIB; |
32639 | |
32640 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { |
32641 | Register DestReg = MIIt->getOperand(0).getReg(); |
32642 | Register Op1Reg = MIIt->getOperand(1).getReg(); |
32643 | Register Op2Reg = MIIt->getOperand(2).getReg(); |
32644 | |
32645 | |
32646 | |
32647 | |
32648 | if (MIIt->getOperand(3).getImm() == OppCC) |
32649 | std::swap(Op1Reg, Op2Reg); |
32650 | |
32651 | if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) |
32652 | Op1Reg = RegRewriteTable[Op1Reg].first; |
32653 | |
32654 | if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) |
32655 | Op2Reg = RegRewriteTable[Op2Reg].second; |
32656 | |
32657 | MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) |
32658 | .addReg(Op1Reg) |
32659 | .addMBB(FalseMBB) |
32660 | .addReg(Op2Reg) |
32661 | .addMBB(TrueMBB); |
32662 | |
32663 | |
32664 | RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); |
32665 | } |
32666 | |
32667 | return MIB; |
32668 | } |
32669 | |
32670 | |
32671 | MachineBasicBlock * |
32672 | X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, |
32673 | MachineInstr &SecondCascadedCMOV, |
32674 | MachineBasicBlock *ThisMBB) const { |
32675 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
32676 | const DebugLoc &DL = FirstCMOV.getDebugLoc(); |
32677 | |
32678 | |
32679 | |
32680 | |
32681 | |
32682 | |
32683 | |
32684 | |
32685 | |
32686 | |
32687 | |
32688 | |
32689 | |
32690 | |
32691 | |
32692 | |
32693 | |
32694 | |
32695 | |
32696 | |
32697 | |
32698 | |
32699 | |
32700 | |
32701 | |
32702 | |
32703 | |
32704 | |
32705 | |
32706 | |
32707 | |
32708 | |
32709 | |
32710 | |
32711 | |
32712 | |
32713 | |
32714 | |
32715 | |
32716 | |
32717 | |
32718 | |
32719 | |
32720 | |
32721 | |
32722 | |
32723 | |
32724 | |
32725 | |
32726 | |
32727 | |
32728 | |
32729 | |
32730 | |
32731 | |
32732 | |
32733 | |
32734 | |
32735 | |
32736 | |
32737 | |
32738 | |
32739 | |
32740 | |
32741 | |
32742 | |
32743 | |
32744 | |
32745 | |
32746 | |
32747 | |
32748 | |
32749 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
32750 | MachineFunction *F = ThisMBB->getParent(); |
32751 | MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); |
32752 | MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); |
32753 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
32754 | |
32755 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
32756 | F->insert(It, FirstInsertedMBB); |
32757 | F->insert(It, SecondInsertedMBB); |
32758 | F->insert(It, SinkMBB); |
32759 | |
32760 | |
32761 | |
32762 | |
32763 | FirstInsertedMBB->addLiveIn(X86::EFLAGS); |
32764 | |
32765 | |
32766 | |
32767 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
32768 | if (!SecondCascadedCMOV.killsRegister(X86::EFLAGS) && |
32769 | !checkAndUpdateEFLAGSKill(SecondCascadedCMOV, ThisMBB, TRI)) { |
32770 | SecondInsertedMBB->addLiveIn(X86::EFLAGS); |
32771 | SinkMBB->addLiveIn(X86::EFLAGS); |
32772 | } |
32773 | |
32774 | |
32775 | SinkMBB->splice(SinkMBB->begin(), ThisMBB, |
32776 | std::next(MachineBasicBlock::iterator(FirstCMOV)), |
32777 | ThisMBB->end()); |
32778 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
32779 | |
32780 | |
32781 | ThisMBB->addSuccessor(FirstInsertedMBB); |
32782 | |
32783 | ThisMBB->addSuccessor(SinkMBB); |
32784 | |
32785 | FirstInsertedMBB->addSuccessor(SecondInsertedMBB); |
32786 | |
32787 | FirstInsertedMBB->addSuccessor(SinkMBB); |
32788 | |
32789 | SecondInsertedMBB->addSuccessor(SinkMBB); |
32790 | |
32791 | |
32792 | X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm()); |
32793 | BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC); |
32794 | |
32795 | X86::CondCode SecondCC = |
32796 | X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm()); |
32797 | BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC); |
32798 | |
32799 | |
32800 | |
32801 | Register DestReg = FirstCMOV.getOperand(0).getReg(); |
32802 | Register Op1Reg = FirstCMOV.getOperand(1).getReg(); |
32803 | Register Op2Reg = FirstCMOV.getOperand(2).getReg(); |
32804 | MachineInstrBuilder MIB = |
32805 | BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) |
32806 | .addReg(Op1Reg) |
32807 | .addMBB(SecondInsertedMBB) |
32808 | .addReg(Op2Reg) |
32809 | .addMBB(ThisMBB); |
32810 | |
32811 | |
32812 | |
32813 | MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB); |
32814 | |
32815 | BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, |
32816 | TII->get(TargetOpcode::COPY), |
32817 | SecondCascadedCMOV.getOperand(0).getReg()) |
32818 | .addReg(FirstCMOV.getOperand(0).getReg()); |
32819 | |
32820 | |
32821 | FirstCMOV.eraseFromParent(); |
32822 | SecondCascadedCMOV.eraseFromParent(); |
32823 | |
32824 | return SinkMBB; |
32825 | } |
32826 | |
32827 | MachineBasicBlock * |
32828 | X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, |
32829 | MachineBasicBlock *ThisMBB) const { |
32830 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
32831 | const DebugLoc &DL = MI.getDebugLoc(); |
32832 | |
32833 | |
32834 | |
32835 | |
32836 | |
32837 | |
32838 | |
32839 | |
32840 | |
32841 | |
32842 | |
32843 | |
32844 | |
32845 | |
32846 | |
32847 | |
32848 | |
32849 | |
32850 | |
32851 | |
32852 | |
32853 | |
32854 | |
32855 | |
32856 | |
32857 | |
32858 | |
32859 | |
32860 | |
32861 | |
32862 | |
32863 | |
32864 | |
32865 | |
32866 | |
32867 | |
32868 | |
32869 | |
32870 | |
32871 | |
32872 | |
32873 | |
32874 | |
32875 | |
32876 | |
32877 | |
32878 | |
32879 | |
32880 | X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm()); |
32881 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
32882 | MachineInstr *LastCMOV = &MI; |
32883 | MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI); |
32884 | |
32885 | |
32886 | |
32887 | |
32888 | |
32889 | if (isCMOVPseudo(MI)) { |
32890 | |
32891 | |
32892 | while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && |
32893 | (NextMIIt->getOperand(3).getImm() == CC || |
32894 | NextMIIt->getOperand(3).getImm() == OppCC)) { |
32895 | LastCMOV = &*NextMIIt; |
32896 | NextMIIt = next_nodbg(NextMIIt, ThisMBB->end()); |
32897 | } |
32898 | } |
32899 | |
32900 | |
32901 | |
32902 | if (LastCMOV == &MI && NextMIIt != ThisMBB->end() && |
32903 | NextMIIt->getOpcode() == MI.getOpcode() && |
32904 | NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() && |
32905 | NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() && |
32906 | NextMIIt->getOperand(1).isKill()) { |
32907 | return EmitLoweredCascadedSelect(MI, *NextMIIt, ThisMBB); |
32908 | } |
32909 | |
32910 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
32911 | MachineFunction *F = ThisMBB->getParent(); |
32912 | MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); |
32913 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
32914 | |
32915 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
32916 | F->insert(It, FalseMBB); |
32917 | F->insert(It, SinkMBB); |
32918 | |
32919 | |
32920 | |
32921 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
32922 | if (!LastCMOV->killsRegister(X86::EFLAGS) && |
32923 | !checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) { |
32924 | FalseMBB->addLiveIn(X86::EFLAGS); |
32925 | SinkMBB->addLiveIn(X86::EFLAGS); |
32926 | } |
32927 | |
32928 | |
32929 | auto DbgEnd = MachineBasicBlock::iterator(LastCMOV); |
32930 | auto DbgIt = MachineBasicBlock::iterator(MI); |
32931 | while (DbgIt != DbgEnd) { |
32932 | auto Next = std::next(DbgIt); |
32933 | if (DbgIt->isDebugInstr()) |
32934 | SinkMBB->push_back(DbgIt->removeFromParent()); |
32935 | DbgIt = Next; |
32936 | } |
32937 | |
32938 | |
32939 | SinkMBB->splice(SinkMBB->end(), ThisMBB, |
32940 | std::next(MachineBasicBlock::iterator(LastCMOV)), |
32941 | ThisMBB->end()); |
32942 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
32943 | |
32944 | |
32945 | ThisMBB->addSuccessor(FalseMBB); |
32946 | |
32947 | ThisMBB->addSuccessor(SinkMBB); |
32948 | |
32949 | FalseMBB->addSuccessor(SinkMBB); |
32950 | |
32951 | |
32952 | BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC); |
32953 | |
32954 | |
32955 | |
32956 | |
32957 | MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); |
32958 | MachineBasicBlock::iterator MIItEnd = |
32959 | std::next(MachineBasicBlock::iterator(LastCMOV)); |
32960 | createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB); |
32961 | |
32962 | |
32963 | ThisMBB->erase(MIItBegin, MIItEnd); |
32964 | |
32965 | return SinkMBB; |
32966 | } |
32967 | |
32968 | static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { |
32969 | if (IsLP64) { |
32970 | if (isInt<8>(Imm)) |
32971 | return X86::SUB64ri8; |
32972 | return X86::SUB64ri32; |
32973 | } else { |
32974 | if (isInt<8>(Imm)) |
32975 | return X86::SUB32ri8; |
32976 | return X86::SUB32ri; |
32977 | } |
32978 | } |
32979 | |
32980 | MachineBasicBlock * |
32981 | X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, |
32982 | MachineBasicBlock *MBB) const { |
32983 | MachineFunction *MF = MBB->getParent(); |
32984 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
32985 | const X86FrameLowering &TFI = *Subtarget.getFrameLowering(); |
32986 | const DebugLoc &DL = MI.getDebugLoc(); |
32987 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
32988 | |
32989 | const unsigned ProbeSize = getStackProbeSize(*MF); |
32990 | |
32991 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
32992 | MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
32993 | MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
32994 | MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
32995 | |
32996 | MachineFunction::iterator MBBIter = ++MBB->getIterator(); |
32997 | MF->insert(MBBIter, testMBB); |
32998 | MF->insert(MBBIter, blockMBB); |
32999 | MF->insert(MBBIter, tailMBB); |
33000 | |
33001 | Register sizeVReg = MI.getOperand(1).getReg(); |
33002 | |
33003 | Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; |
33004 | |
33005 | Register TmpStackPtr = MRI.createVirtualRegister( |
33006 | TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); |
33007 | Register FinalStackPtr = MRI.createVirtualRegister( |
33008 | TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); |
33009 | |
33010 | BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr) |
33011 | .addReg(physSPReg); |
33012 | { |
33013 | const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr; |
33014 | BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr) |
33015 | .addReg(TmpStackPtr) |
33016 | .addReg(sizeVReg); |
33017 | } |
33018 | |
33019 | |
33020 | |
33021 | BuildMI(testMBB, DL, |
33022 | TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
33023 | .addReg(FinalStackPtr) |
33024 | .addReg(physSPReg); |
33025 | |
33026 | BuildMI(testMBB, DL, TII->get(X86::JCC_1)) |
33027 | .addMBB(tailMBB) |
33028 | .addImm(X86::COND_GE); |
33029 | testMBB->addSuccessor(blockMBB); |
33030 | testMBB->addSuccessor(tailMBB); |
33031 | |
33032 | |
33033 | |
33034 | |
33035 | |
33036 | |
33037 | |
33038 | |
33039 | |
33040 | |
33041 | |
33042 | |
33043 | |
33044 | const unsigned XORMIOpc = |
33045 | TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; |
33046 | addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) |
33047 | .addImm(0); |
33048 | |
33049 | BuildMI(blockMBB, DL, |
33050 | TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) |
33051 | .addReg(physSPReg) |
33052 | .addImm(ProbeSize); |
33053 | |
33054 | |
33055 | BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); |
33056 | blockMBB->addSuccessor(testMBB); |
33057 | |
33058 | |
33059 | BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) |
33060 | .addReg(FinalStackPtr); |
33061 | |
33062 | tailMBB->splice(tailMBB->end(), MBB, |
33063 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
33064 | tailMBB->transferSuccessorsAndUpdatePHIs(MBB); |
33065 | MBB->addSuccessor(testMBB); |
33066 | |
33067 | |
33068 | MI.eraseFromParent(); |
33069 | |
33070 | |
33071 | return tailMBB; |
33072 | } |
33073 | |
33074 | MachineBasicBlock * |
33075 | X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, |
33076 | MachineBasicBlock *BB) const { |
33077 | MachineFunction *MF = BB->getParent(); |
33078 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33079 | const DebugLoc &DL = MI.getDebugLoc(); |
33080 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
33081 | |
33082 | assert(MF->shouldSplitStack()); |
33083 | |
33084 | const bool Is64Bit = Subtarget.is64Bit(); |
33085 | const bool IsLP64 = Subtarget.isTarget64BitLP64(); |
33086 | |
33087 | const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; |
33088 | const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; |
33089 | |
33090 | |
33091 | |
33092 | |
33093 | |
33094 | |
33095 | |
33096 | |
33097 | |
33098 | |
33099 | |
33100 | |
33101 | |
33102 | |
33103 | |
33104 | |
33105 | |
33106 | MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33107 | MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33108 | MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
33109 | |
33110 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33111 | const TargetRegisterClass *AddrRegClass = |
33112 | getRegClassFor(getPointerTy(MF->getDataLayout())); |
33113 | |
33114 | Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), |
33115 | bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), |
33116 | tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), |
33117 | SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), |
33118 | sizeVReg = MI.getOperand(1).getReg(), |
33119 | physSPReg = |
33120 | IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP; |
33121 | |
33122 | MachineFunction::iterator MBBIter = ++BB->getIterator(); |
33123 | |
33124 | MF->insert(MBBIter, bumpMBB); |
33125 | MF->insert(MBBIter, mallocMBB); |
33126 | MF->insert(MBBIter, continueMBB); |
33127 | |
33128 | continueMBB->splice(continueMBB->begin(), BB, |
33129 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
33130 | continueMBB->transferSuccessorsAndUpdatePHIs(BB); |
33131 | |
33132 | |
33133 | |
33134 | BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); |
33135 | BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) |
33136 | .addReg(tmpSPVReg).addReg(sizeVReg); |
33137 | BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) |
33138 | .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) |
33139 | .addReg(SPLimitVReg); |
33140 | BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G); |
33141 | |
33142 | |
33143 | |
33144 | BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) |
33145 | .addReg(SPLimitVReg); |
33146 | BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) |
33147 | .addReg(SPLimitVReg); |
33148 | BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); |
33149 | |
33150 | |
33151 | const uint32_t *RegMask = |
33152 | Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); |
33153 | if (IsLP64) { |
33154 | BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) |
33155 | .addReg(sizeVReg); |
33156 | BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) |
33157 | .addExternalSymbol("__morestack_allocate_stack_space") |
33158 | .addRegMask(RegMask) |
33159 | .addReg(X86::RDI, RegState::Implicit) |
33160 | .addReg(X86::RAX, RegState::ImplicitDefine); |
33161 | } else if (Is64Bit) { |
33162 | BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) |
33163 | .addReg(sizeVReg); |
33164 | BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) |
33165 | .addExternalSymbol("__morestack_allocate_stack_space") |
33166 | .addRegMask(RegMask) |
33167 | .addReg(X86::EDI, RegState::Implicit) |
33168 | .addReg(X86::EAX, RegState::ImplicitDefine); |
33169 | } else { |
33170 | BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) |
33171 | .addImm(12); |
33172 | BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); |
33173 | BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) |
33174 | .addExternalSymbol("__morestack_allocate_stack_space") |
33175 | .addRegMask(RegMask) |
33176 | .addReg(X86::EAX, RegState::ImplicitDefine); |
33177 | } |
33178 | |
33179 | if (!Is64Bit) |
33180 | BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) |
33181 | .addImm(16); |
33182 | |
33183 | BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) |
33184 | .addReg(IsLP64 ? X86::RAX : X86::EAX); |
33185 | BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); |
33186 | |
33187 | |
33188 | BB->addSuccessor(bumpMBB); |
33189 | BB->addSuccessor(mallocMBB); |
33190 | mallocMBB->addSuccessor(continueMBB); |
33191 | bumpMBB->addSuccessor(continueMBB); |
33192 | |
33193 | |
33194 | BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), |
33195 | MI.getOperand(0).getReg()) |
33196 | .addReg(mallocPtrVReg) |
33197 | .addMBB(mallocMBB) |
33198 | .addReg(bumpSPPtrVReg) |
33199 | .addMBB(bumpMBB); |
33200 | |
33201 | |
33202 | MI.eraseFromParent(); |
33203 | |
33204 | |
33205 | return continueMBB; |
33206 | } |
33207 | |
33208 | MachineBasicBlock * |
33209 | X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, |
33210 | MachineBasicBlock *BB) const { |
33211 | MachineFunction *MF = BB->getParent(); |
33212 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
33213 | MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); |
33214 | const DebugLoc &DL = MI.getDebugLoc(); |
33215 | |
33216 | assert(!isAsynchronousEHPersonality( |
33217 | classifyEHPersonality(MF->getFunction().getPersonalityFn())) && |
33218 | "SEH does not use catchret!"); |
33219 | |
33220 | |
33221 | if (!Subtarget.is32Bit()) |
33222 | return BB; |
33223 | |
33224 | |
33225 | |
33226 | MachineBasicBlock *RestoreMBB = |
33227 | MF->CreateMachineBasicBlock(BB->getBasicBlock()); |
33228 | assert(BB->succ_size() == 1); |
33229 | MF->insert(std::next(BB->getIterator()), RestoreMBB); |
33230 | RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); |
33231 | BB->addSuccessor(RestoreMBB); |
33232 | MI.getOperand(0).setMBB(RestoreMBB); |
33233 | |
33234 | |
33235 | |
33236 | RestoreMBB->setIsEHPad(true); |
33237 | |
33238 | auto RestoreMBBI = RestoreMBB->begin(); |
33239 | BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); |
33240 | return BB; |
33241 | } |
33242 | |
33243 | MachineBasicBlock * |
33244 | X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, |
33245 | MachineBasicBlock *BB) const { |
33246 | |
33247 | |
33248 | |
33249 | |
33250 | |
33251 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
33252 | const DebugLoc &DL = MI.getDebugLoc(); |
33253 | MachineFunction &MF = *BB->getParent(); |
33254 | |
33255 | |
33256 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
33257 | MachineInstrBuilder CallseqStart = |
33258 | BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); |
33259 | BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); |
33260 | |
33261 | |
33262 | |
33263 | |
33264 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
33265 | MachineInstrBuilder CallseqEnd = |
33266 | BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); |
33267 | BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); |
33268 | |
33269 | return BB; |
33270 | } |
33271 | |
33272 | MachineBasicBlock * |
33273 | X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, |
33274 | MachineBasicBlock *BB) const { |
33275 | |
33276 | |
33277 | |
33278 | |
33279 | MachineFunction *F = BB->getParent(); |
33280 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
33281 | const DebugLoc &DL = MI.getDebugLoc(); |
33282 | |
33283 | assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); |
33284 | assert(MI.getOperand(3).isGlobal() && "This should be a global"); |
33285 | |
33286 | |
33287 | |
33288 | |
33289 | const uint32_t *RegMask = |
33290 | Subtarget.is64Bit() ? |
33291 | Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : |
33292 | Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); |
33293 | if (Subtarget.is64Bit()) { |
33294 | MachineInstrBuilder MIB = |
33295 | BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) |
33296 | .addReg(X86::RIP) |
33297 | .addImm(0) |
33298 | .addReg(0) |
33299 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
33300 | MI.getOperand(3).getTargetFlags()) |
33301 | .addReg(0); |
33302 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); |
33303 | addDirectMem(MIB, X86::RDI); |
33304 | MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); |
33305 | } else if (!isPositionIndependent()) { |
33306 | MachineInstrBuilder MIB = |
33307 | BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) |
33308 | .addReg(0) |
33309 | .addImm(0) |
33310 | .addReg(0) |
33311 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
33312 | MI.getOperand(3).getTargetFlags()) |
33313 | .addReg(0); |
33314 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); |
33315 | addDirectMem(MIB, X86::EAX); |
33316 | MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); |
33317 | } else { |
33318 | MachineInstrBuilder MIB = |
33319 | BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) |
33320 | .addReg(TII->getGlobalBaseReg(F)) |
33321 | .addImm(0) |
33322 | .addReg(0) |
33323 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
33324 | MI.getOperand(3).getTargetFlags()) |
33325 | .addReg(0); |
33326 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); |
33327 | addDirectMem(MIB, X86::EAX); |
33328 | MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); |
33329 | } |
33330 | |
33331 | MI.eraseFromParent(); |
33332 | return BB; |
33333 | } |
33334 | |
33335 | static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) { |
33336 | switch (RPOpc) { |
33337 | case X86::INDIRECT_THUNK_CALL32: |
33338 | return X86::CALLpcrel32; |
33339 | case X86::INDIRECT_THUNK_CALL64: |
33340 | return X86::CALL64pcrel32; |
33341 | case X86::INDIRECT_THUNK_TCRETURN32: |
33342 | return X86::TCRETURNdi; |
33343 | case X86::INDIRECT_THUNK_TCRETURN64: |
33344 | return X86::TCRETURNdi64; |
33345 | } |
33346 | llvm_unreachable("not indirect thunk opcode"); |
33347 | } |
33348 | |
33349 | static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, |
33350 | unsigned Reg) { |
33351 | if (Subtarget.useRetpolineExternalThunk()) { |
33352 | |
33353 | |
33354 | |
33355 | |
33356 | |
33357 | |
33358 | |
33359 | |
33360 | |
33361 | |
33362 | |
33363 | |
33364 | |
33365 | switch (Reg) { |
33366 | case X86::EAX: |
33367 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33368 | return "__x86_indirect_thunk_eax"; |
33369 | case X86::ECX: |
33370 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33371 | return "__x86_indirect_thunk_ecx"; |
33372 | case X86::EDX: |
33373 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33374 | return "__x86_indirect_thunk_edx"; |
33375 | case X86::EDI: |
33376 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33377 | return "__x86_indirect_thunk_edi"; |
33378 | case X86::R11: |
33379 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
33380 | return "__x86_indirect_thunk_r11"; |
33381 | } |
33382 | llvm_unreachable("unexpected reg for external indirect thunk"); |
33383 | } |
33384 | |
33385 | if (Subtarget.useRetpolineIndirectCalls() || |
33386 | Subtarget.useRetpolineIndirectBranches()) { |
33387 | |
33388 | switch (Reg) { |
33389 | case X86::EAX: |
33390 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33391 | return "__llvm_retpoline_eax"; |
33392 | case X86::ECX: |
33393 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33394 | return "__llvm_retpoline_ecx"; |
33395 | case X86::EDX: |
33396 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33397 | return "__llvm_retpoline_edx"; |
33398 | case X86::EDI: |
33399 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
33400 | return "__llvm_retpoline_edi"; |
33401 | case X86::R11: |
33402 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
33403 | return "__llvm_retpoline_r11"; |
33404 | } |
33405 | llvm_unreachable("unexpected reg for retpoline"); |
33406 | } |
33407 | |
33408 | if (Subtarget.useLVIControlFlowIntegrity()) { |
33409 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
33410 | return "__llvm_lvi_thunk_r11"; |
33411 | } |
33412 | llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); |
33413 | } |
33414 | |
33415 | MachineBasicBlock * |
33416 | X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, |
33417 | MachineBasicBlock *BB) const { |
33418 | |
33419 | |
33420 | const DebugLoc &DL = MI.getDebugLoc(); |
33421 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
33422 | Register CalleeVReg = MI.getOperand(0).getReg(); |
33423 | unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); |
33424 | |
33425 | |
33426 | |
33427 | |
33428 | |
33429 | |
33430 | |
33431 | SmallVector<unsigned, 3> AvailableRegs; |
33432 | if (Subtarget.is64Bit()) |
33433 | AvailableRegs.push_back(X86::R11); |
33434 | else |
33435 | AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI}); |
33436 | |
33437 | |
33438 | for (const auto &MO : MI.operands()) { |
33439 | if (MO.isReg() && MO.isUse()) |
33440 | for (unsigned &Reg : AvailableRegs) |
33441 | if (Reg == MO.getReg()) |
33442 | Reg = 0; |
33443 | } |
33444 | |
33445 | |
33446 | unsigned AvailableReg = 0; |
33447 | for (unsigned MaybeReg : AvailableRegs) { |
33448 | if (MaybeReg) { |
33449 | AvailableReg = MaybeReg; |
33450 | break; |
33451 | } |
33452 | } |
33453 | if (!AvailableReg) |
33454 | report_fatal_error("calling convention incompatible with retpoline, no " |
33455 | "available registers"); |
33456 | |
33457 | const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg); |
33458 | |
33459 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) |
33460 | .addReg(CalleeVReg); |
33461 | MI.getOperand(0).ChangeToES(Symbol); |
33462 | MI.setDesc(TII->get(Opc)); |
33463 | MachineInstrBuilder(*BB->getParent(), &MI) |
33464 | .addReg(AvailableReg, RegState::Implicit | RegState::Kill); |
33465 | return BB; |
33466 | } |
33467 | |
33468 | |
33469 | |
33470 | |
33471 | |
33472 | |
33473 | |
33474 | |
33475 | |
33476 | |
33477 | |
33478 | |
33479 | |
33480 | void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, |
33481 | MachineBasicBlock *MBB) const { |
33482 | const DebugLoc &DL = MI.getDebugLoc(); |
33483 | MachineFunction *MF = MBB->getParent(); |
33484 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33485 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33486 | MachineInstrBuilder MIB; |
33487 | |
33488 | |
33489 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
33490 | MI.memoperands_end()); |
33491 | |
33492 | |
33493 | MVT PVT = getPointerTy(MF->getDataLayout()); |
33494 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
33495 | Register ZReg = MRI.createVirtualRegister(PtrRC); |
33496 | unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; |
33497 | BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) |
33498 | .addDef(ZReg) |
33499 | .addReg(ZReg, RegState::Undef) |
33500 | .addReg(ZReg, RegState::Undef); |
33501 | |
33502 | |
33503 | Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); |
33504 | unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; |
33505 | BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); |
33506 | |
33507 | |
33508 | unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
33509 | MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc)); |
33510 | const int64_t SSPOffset = 3 * PVT.getStoreSize(); |
33511 | const unsigned MemOpndSlot = 1; |
33512 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
33513 | if (i == X86::AddrDisp) |
33514 | MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset); |
33515 | else |
33516 | MIB.add(MI.getOperand(MemOpndSlot + i)); |
33517 | } |
33518 | MIB.addReg(SSPCopyReg); |
33519 | MIB.setMemRefs(MMOs); |
33520 | } |
33521 | |
33522 | MachineBasicBlock * |
33523 | X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, |
33524 | MachineBasicBlock *MBB) const { |
33525 | const DebugLoc &DL = MI.getDebugLoc(); |
33526 | MachineFunction *MF = MBB->getParent(); |
33527 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33528 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
33529 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33530 | |
33531 | const BasicBlock *BB = MBB->getBasicBlock(); |
33532 | MachineFunction::iterator I = ++MBB->getIterator(); |
33533 | |
33534 | |
33535 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
33536 | MI.memoperands_end()); |
33537 | |
33538 | unsigned DstReg; |
33539 | unsigned MemOpndSlot = 0; |
33540 | |
33541 | unsigned CurOp = 0; |
33542 | |
33543 | DstReg = MI.getOperand(CurOp++).getReg(); |
33544 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
33545 | assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); |
33546 | (void)TRI; |
33547 | Register mainDstReg = MRI.createVirtualRegister(RC); |
33548 | Register restoreDstReg = MRI.createVirtualRegister(RC); |
33549 | |
33550 | MemOpndSlot = CurOp; |
33551 | |
33552 | MVT PVT = getPointerTy(MF->getDataLayout()); |
33553 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
33554 | "Invalid Pointer Size!"); |
33555 | |
33556 | |
33557 | |
33558 | |
33559 | |
33560 | |
33561 | |
33562 | |
33563 | |
33564 | |
33565 | |
33566 | |
33567 | |
33568 | |
33569 | |
33570 | |
33571 | |
33572 | MachineBasicBlock *thisMBB = MBB; |
33573 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
33574 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
33575 | MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); |
33576 | MF->insert(I, mainMBB); |
33577 | MF->insert(I, sinkMBB); |
33578 | MF->push_back(restoreMBB); |
33579 | restoreMBB->setHasAddressTaken(); |
33580 | |
33581 | MachineInstrBuilder MIB; |
33582 | |
33583 | |
33584 | sinkMBB->splice(sinkMBB->begin(), MBB, |
33585 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
33586 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
33587 | |
33588 | |
33589 | unsigned PtrStoreOpc = 0; |
33590 | unsigned LabelReg = 0; |
33591 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
33592 | bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && |
33593 | !isPositionIndependent(); |
33594 | |
33595 | |
33596 | if (!UseImmLabel) { |
33597 | PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
33598 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
33599 | LabelReg = MRI.createVirtualRegister(PtrRC); |
33600 | if (Subtarget.is64Bit()) { |
33601 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) |
33602 | .addReg(X86::RIP) |
33603 | .addImm(0) |
33604 | .addReg(0) |
33605 | .addMBB(restoreMBB) |
33606 | .addReg(0); |
33607 | } else { |
33608 | const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII); |
33609 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) |
33610 | .addReg(XII->getGlobalBaseReg(MF)) |
33611 | .addImm(0) |
33612 | .addReg(0) |
33613 | .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference()) |
33614 | .addReg(0); |
33615 | } |
33616 | } else |
33617 | PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; |
33618 | |
33619 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); |
33620 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
33621 | if (i == X86::AddrDisp) |
33622 | MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset); |
33623 | else |
33624 | MIB.add(MI.getOperand(MemOpndSlot + i)); |
33625 | } |
33626 | if (!UseImmLabel) |
33627 | MIB.addReg(LabelReg); |
33628 | else |
33629 | MIB.addMBB(restoreMBB); |
33630 | MIB.setMemRefs(MMOs); |
33631 | |
33632 | if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { |
33633 | emitSetJmpShadowStackFix(MI, thisMBB); |
33634 | } |
33635 | |
33636 | |
33637 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) |
33638 | .addMBB(restoreMBB); |
33639 | |
33640 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
33641 | MIB.addRegMask(RegInfo->getNoPreservedMask()); |
33642 | thisMBB->addSuccessor(mainMBB); |
33643 | thisMBB->addSuccessor(restoreMBB); |
33644 | |
33645 | |
33646 | |
33647 | BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); |
33648 | mainMBB->addSuccessor(sinkMBB); |
33649 | |
33650 | |
33651 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, |
33652 | TII->get(X86::PHI), DstReg) |
33653 | .addReg(mainDstReg).addMBB(mainMBB) |
33654 | .addReg(restoreDstReg).addMBB(restoreMBB); |
33655 | |
33656 | |
33657 | if (RegInfo->hasBasePointer(*MF)) { |
33658 | const bool Uses64BitFramePtr = |
33659 | Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); |
33660 | X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); |
33661 | X86FI->setRestoreBasePointer(MF); |
33662 | Register FramePtr = RegInfo->getFrameRegister(*MF); |
33663 | Register BasePtr = RegInfo->getBaseRegister(); |
33664 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; |
33665 | addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), |
33666 | FramePtr, true, X86FI->getRestoreBasePointerOffset()) |
33667 | .setMIFlag(MachineInstr::FrameSetup); |
33668 | } |
33669 | BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); |
33670 | BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); |
33671 | restoreMBB->addSuccessor(sinkMBB); |
33672 | |
33673 | MI.eraseFromParent(); |
33674 | return sinkMBB; |
33675 | } |
33676 | |
33677 | |
33678 | |
33679 | |
33680 | |
33681 | |
33682 | MachineBasicBlock * |
33683 | X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, |
33684 | MachineBasicBlock *MBB) const { |
33685 | const DebugLoc &DL = MI.getDebugLoc(); |
33686 | MachineFunction *MF = MBB->getParent(); |
33687 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33688 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33689 | |
33690 | |
33691 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
33692 | MI.memoperands_end()); |
33693 | |
33694 | MVT PVT = getPointerTy(MF->getDataLayout()); |
33695 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
33696 | |
33697 | |
33698 | |
33699 | |
33700 | |
33701 | |
33702 | |
33703 | |
33704 | |
33705 | |
33706 | |
33707 | |
33708 | |
33709 | |
33710 | |
33711 | |
33712 | |
33713 | |
33714 | |
33715 | |
33716 | |
33717 | |
33718 | |
33719 | |
33720 | |
33721 | MachineFunction::iterator I = ++MBB->getIterator(); |
33722 | const BasicBlock *BB = MBB->getBasicBlock(); |
33723 | |
33724 | MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB); |
33725 | MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); |
33726 | MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB); |
33727 | MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB); |
33728 | MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB); |
33729 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
33730 | MF->insert(I, checkSspMBB); |
33731 | MF->insert(I, fallMBB); |
33732 | MF->insert(I, fixShadowMBB); |
33733 | MF->insert(I, fixShadowLoopPrepareMBB); |
33734 | MF->insert(I, fixShadowLoopMBB); |
33735 | MF->insert(I, sinkMBB); |
33736 | |
33737 | |
33738 | sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI), |
33739 | MBB->end()); |
33740 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
33741 | |
33742 | MBB->addSuccessor(checkSspMBB); |
33743 | |
33744 | |
33745 | Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass); |
33746 | BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg); |
33747 | |
33748 | if (PVT == MVT::i64) { |
33749 | Register TmpZReg = MRI.createVirtualRegister(PtrRC); |
33750 | BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg) |
33751 | .addImm(0) |
33752 | .addReg(ZReg) |
33753 | .addImm(X86::sub_32bit); |
33754 | ZReg = TmpZReg; |
33755 | } |
33756 | |
33757 | |
33758 | Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); |
33759 | unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; |
33760 | BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); |
33761 | |
33762 | |
33763 | |
33764 | unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr; |
33765 | BuildMI(checkSspMBB, DL, TII->get(TestRROpc)) |
33766 | .addReg(SSPCopyReg) |
33767 | .addReg(SSPCopyReg); |
33768 | BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); |
33769 | checkSspMBB->addSuccessor(sinkMBB); |
33770 | checkSspMBB->addSuccessor(fallMBB); |
33771 | |
33772 | |
33773 | Register PrevSSPReg = MRI.createVirtualRegister(PtrRC); |
33774 | unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; |
33775 | const int64_t SPPOffset = 3 * PVT.getStoreSize(); |
33776 | MachineInstrBuilder MIB = |
33777 | BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg); |
33778 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
33779 | const MachineOperand &MO = MI.getOperand(i); |
33780 | if (i == X86::AddrDisp) |
33781 | MIB.addDisp(MO, SPPOffset); |
33782 | else if (MO.isReg()) |
33783 | |
33784 | MIB.addReg(MO.getReg()); |
33785 | else |
33786 | MIB.add(MO); |
33787 | } |
33788 | MIB.setMemRefs(MMOs); |
33789 | |
33790 | |
33791 | Register SspSubReg = MRI.createVirtualRegister(PtrRC); |
33792 | unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; |
33793 | BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) |
33794 | .addReg(PrevSSPReg) |
33795 | .addReg(SSPCopyReg); |
33796 | |
33797 | |
33798 | BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE); |
33799 | fallMBB->addSuccessor(sinkMBB); |
33800 | fallMBB->addSuccessor(fixShadowMBB); |
33801 | |
33802 | |
33803 | unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; |
33804 | unsigned Offset = (PVT == MVT::i64) ? 3 : 2; |
33805 | Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC); |
33806 | BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) |
33807 | .addReg(SspSubReg) |
33808 | .addImm(Offset); |
33809 | |
33810 | |
33811 | unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD; |
33812 | BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); |
33813 | |
33814 | |
33815 | Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC); |
33816 | BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) |
33817 | .addReg(SspFirstShrReg) |
33818 | .addImm(8); |
33819 | |
33820 | |
33821 | BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); |
33822 | fixShadowMBB->addSuccessor(sinkMBB); |
33823 | fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); |
33824 | |
33825 | |
33826 | unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; |
33827 | Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC); |
33828 | BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) |
33829 | .addReg(SspSecondShrReg); |
33830 | |
33831 | |
33832 | Register Value128InReg = MRI.createVirtualRegister(PtrRC); |
33833 | unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; |
33834 | BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) |
33835 | .addImm(128); |
33836 | fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB); |
33837 | |
33838 | |
33839 | |
33840 | Register DecReg = MRI.createVirtualRegister(PtrRC); |
33841 | Register CounterReg = MRI.createVirtualRegister(PtrRC); |
33842 | BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) |
33843 | .addReg(SspAfterShlReg) |
33844 | .addMBB(fixShadowLoopPrepareMBB) |
33845 | .addReg(DecReg) |
33846 | .addMBB(fixShadowLoopMBB); |
33847 | |
33848 | |
33849 | BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg); |
33850 | |
33851 | |
33852 | unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r; |
33853 | BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg); |
33854 | |
33855 | |
33856 | BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE); |
33857 | fixShadowLoopMBB->addSuccessor(sinkMBB); |
33858 | fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); |
33859 | |
33860 | return sinkMBB; |
33861 | } |
33862 | |
33863 | MachineBasicBlock * |
33864 | X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, |
33865 | MachineBasicBlock *MBB) const { |
33866 | const DebugLoc &DL = MI.getDebugLoc(); |
33867 | MachineFunction *MF = MBB->getParent(); |
33868 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
33869 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
33870 | |
33871 | |
33872 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
33873 | MI.memoperands_end()); |
33874 | |
33875 | MVT PVT = getPointerTy(MF->getDataLayout()); |
33876 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
33877 | "Invalid Pointer Size!"); |
33878 | |
33879 | const TargetRegisterClass *RC = |
33880 | (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; |
33881 | Register Tmp = MRI.createVirtualRegister(RC); |
33882 | |
33883 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
33884 | Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; |
33885 | Register SP = RegInfo->getStackRegister(); |
33886 | |
33887 | MachineInstrBuilder MIB; |
33888 | |
33889 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
33890 | const int64_t SPOffset = 2 * PVT.getStoreSize(); |
33891 | |
33892 | unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; |
33893 | unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; |
33894 | |
33895 | MachineBasicBlock *thisMBB = MBB; |
33896 | |
33897 | |
33898 | if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { |
33899 | thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); |
33900 | } |
33901 | |
33902 | |
33903 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP); |
33904 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
33905 | const MachineOperand &MO = MI.getOperand(i); |
33906 | if (MO.isReg()) |
33907 | |
33908 | MIB.addReg(MO.getReg()); |
33909 | else |
33910 | MIB.add(MO); |
33911 | } |
33912 | MIB.setMemRefs(MMOs); |
33913 | |
33914 | |
33915 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp); |
33916 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
33917 | const MachineOperand &MO = MI.getOperand(i); |
33918 | if (i == X86::AddrDisp) |
33919 | MIB.addDisp(MO, LabelOffset); |
33920 | else if (MO.isReg()) |
33921 | |
33922 | MIB.addReg(MO.getReg()); |
33923 | else |
33924 | MIB.add(MO); |
33925 | } |
33926 | MIB.setMemRefs(MMOs); |
33927 | |
33928 | |
33929 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP); |
33930 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
33931 | if (i == X86::AddrDisp) |
33932 | MIB.addDisp(MI.getOperand(i), SPOffset); |
33933 | else |
33934 | MIB.add(MI.getOperand(i)); |
33935 | |
33936 | } |
33937 | MIB.setMemRefs(MMOs); |
33938 | |
33939 | |
33940 | BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); |
33941 | |
33942 | MI.eraseFromParent(); |
33943 | return thisMBB; |
33944 | } |
33945 | |
33946 | void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, |
33947 | MachineBasicBlock *MBB, |
33948 | MachineBasicBlock *DispatchBB, |
33949 | int FI) const { |
33950 | const DebugLoc &DL = MI.getDebugLoc(); |
33951 | MachineFunction *MF = MBB->getParent(); |
33952 | MachineRegisterInfo *MRI = &MF->getRegInfo(); |
33953 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
33954 | |
33955 | MVT PVT = getPointerTy(MF->getDataLayout()); |
33956 | assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); |
33957 | |
33958 | unsigned Op = 0; |
33959 | unsigned VR = 0; |
33960 | |
33961 | bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && |
33962 | !isPositionIndependent(); |
33963 | |
33964 | if (UseImmLabel) { |
33965 | Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; |
33966 | } else { |
33967 | const TargetRegisterClass *TRC = |
33968 | (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; |
33969 | VR = MRI->createVirtualRegister(TRC); |
33970 | Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
33971 | |
33972 | if (Subtarget.is64Bit()) |
33973 | BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR) |
33974 | .addReg(X86::RIP) |
33975 | .addImm(1) |
33976 | .addReg(0) |
33977 | .addMBB(DispatchBB) |
33978 | .addReg(0); |
33979 | else |
33980 | BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR) |
33981 | .addReg(0) |
33982 | .addImm(1) |
33983 | .addReg(0) |
33984 | .addMBB(DispatchBB, Subtarget.classifyBlockAddressReference()) |
33985 | .addReg(0); |
33986 | } |
33987 | |
33988 | MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op)); |
33989 | addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36); |
33990 | if (UseImmLabel) |
33991 | MIB.addMBB(DispatchBB); |
33992 | else |
33993 | MIB.addReg(VR); |
33994 | } |
33995 | |
33996 | MachineBasicBlock * |
33997 | X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, |
33998 | MachineBasicBlock *BB) const { |
33999 | const DebugLoc &DL = MI.getDebugLoc(); |
34000 | MachineFunction *MF = BB->getParent(); |
34001 | MachineRegisterInfo *MRI = &MF->getRegInfo(); |
34002 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
34003 | int FI = MF->getFrameInfo().getFunctionContextIndex(); |
34004 | |
34005 | |
34006 | |
34007 | DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad; |
34008 | unsigned MaxCSNum = 0; |
34009 | for (auto &MBB : *MF) { |
34010 | if (!MBB.isEHPad()) |
34011 | continue; |
34012 | |
34013 | MCSymbol *Sym = nullptr; |
34014 | for (const auto &MI : MBB) { |
34015 | if (MI.isDebugInstr()) |
34016 | continue; |
34017 | |
34018 | assert(MI.isEHLabel() && "expected EH_LABEL"); |
34019 | Sym = MI.getOperand(0).getMCSymbol(); |
34020 | break; |
34021 | } |
34022 | |
34023 | if (!MF->hasCallSiteLandingPad(Sym)) |
34024 | continue; |
34025 | |
34026 | for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { |
34027 | CallSiteNumToLPad[CSI].push_back(&MBB); |
34028 | MaxCSNum = std::max(MaxCSNum, CSI); |
34029 | } |
34030 | } |
34031 | |
34032 | |
34033 | std::vector<MachineBasicBlock *> LPadList; |
34034 | SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs; |
34035 | LPadList.reserve(CallSiteNumToLPad.size()); |
34036 | |
34037 | for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { |
34038 | for (auto &LP : CallSiteNumToLPad[CSI]) { |
34039 | LPadList.push_back(LP); |
34040 | InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); |
34041 | } |
34042 | } |
34043 | |
34044 | assert(!LPadList.empty() && |
34045 | "No landing pad destinations for the dispatch jump table!"); |
34046 | |
34047 | |
34048 | |
34049 | |
34050 | MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); |
34051 | DispatchBB->setIsEHPad(true); |
34052 | |
34053 | MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); |
34054 | BuildMI(TrapBB, DL, TII->get(X86::TRAP)); |
34055 | DispatchBB->addSuccessor(TrapBB); |
34056 | |
34057 | MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); |
34058 | DispatchBB->addSuccessor(DispContBB); |
34059 | |
34060 | |
34061 | MF->push_back(DispatchBB); |
34062 | MF->push_back(DispContBB); |
34063 | MF->push_back(TrapBB); |
34064 | |
34065 | |
34066 | |
34067 | SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); |
34068 | |
34069 | |
34070 | unsigned JTE = getJumpTableEncoding(); |
34071 | MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); |
34072 | unsigned MJTI = JTI->createJumpTableIndex(LPadList); |
34073 | |
34074 | const X86RegisterInfo &RI = TII->getRegisterInfo(); |
34075 | |
34076 | |
34077 | if (RI.hasBasePointer(*MF)) { |
34078 | const bool FPIs64Bit = |
34079 | Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); |
34080 | X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>(); |
34081 | MFI->setRestoreBasePointer(MF); |
34082 | |
34083 | Register FP = RI.getFrameRegister(*MF); |
34084 | Register BP = RI.getBaseRegister(); |
34085 | unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm; |
34086 | addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true, |
34087 | MFI->getRestoreBasePointerOffset()) |
34088 | .addRegMask(RI.getNoPreservedMask()); |
34089 | } else { |
34090 | BuildMI(DispatchBB, DL, TII->get(X86::NOOP)) |
34091 | .addRegMask(RI.getNoPreservedMask()); |
34092 | } |
34093 | |
34094 | |
34095 | Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); |
34096 | addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, |
34097 | Subtarget.is64Bit() ? 8 : 4); |
34098 | BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) |
34099 | .addReg(IReg) |
34100 | .addImm(LPadList.size()); |
34101 | BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE); |
34102 | |
34103 | if (Subtarget.is64Bit()) { |
34104 | Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass); |
34105 | Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
34106 | |
34107 | |
34108 | BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) |
34109 | .addReg(X86::RIP) |
34110 | .addImm(1) |
34111 | .addReg(0) |
34112 | .addJumpTableIndex(MJTI) |
34113 | .addReg(0); |
34114 | |
34115 | BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64) |
34116 | .addImm(0) |
34117 | .addReg(IReg) |
34118 | .addImm(X86::sub_32bit); |
34119 | |
34120 | switch (JTE) { |
34121 | case MachineJumpTableInfo::EK_BlockAddress: |
34122 | |
34123 | BuildMI(DispContBB, DL, TII->get(X86::JMP64m)) |
34124 | .addReg(BReg) |
34125 | .addImm(8) |
34126 | .addReg(IReg64) |
34127 | .addImm(0) |
34128 | .addReg(0); |
34129 | break; |
34130 | case MachineJumpTableInfo::EK_LabelDifference32: { |
34131 | Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass); |
34132 | Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); |
34133 | Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass); |
34134 | |
34135 | |
34136 | BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) |
34137 | .addReg(BReg) |
34138 | .addImm(4) |
34139 | .addReg(IReg64) |
34140 | .addImm(0) |
34141 | .addReg(0); |
34142 | |
34143 | BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg); |
34144 | |
34145 | BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg) |
34146 | .addReg(OReg64) |
34147 | .addReg(BReg); |
34148 | |
34149 | BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg); |
34150 | break; |
34151 | } |
34152 | default: |
34153 | llvm_unreachable("Unexpected jump table encoding"); |
34154 | } |
34155 | } else { |
34156 | |
34157 | BuildMI(DispContBB, DL, TII->get(X86::JMP32m)) |
34158 | .addReg(0) |
34159 | .addImm(4) |
34160 | .addReg(IReg) |
34161 | .addJumpTableIndex(MJTI) |
34162 | .addReg(0); |
34163 | } |
34164 | |
34165 | |
34166 | SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs; |
34167 | for (auto &LP : LPadList) |
34168 | if (SeenMBBs.insert(LP).second) |
34169 | DispContBB->addSuccessor(LP); |
34170 | |
34171 | |
34172 | SmallVector<MachineBasicBlock *, 64> MBBLPads; |
34173 | const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); |
34174 | for (MachineBasicBlock *MBB : InvokeBBs) { |
34175 | |
34176 | |
34177 | |
34178 | SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(), |
34179 | MBB->succ_rend()); |
34180 | |
34181 | for (auto MBBS : Successors) { |
34182 | if (MBBS->isEHPad()) { |
34183 | MBB->removeSuccessor(MBBS); |
34184 | MBBLPads.push_back(MBBS); |
34185 | } |
34186 | } |
34187 | |
34188 | MBB->addSuccessor(DispatchBB); |
34189 | |
34190 | |
34191 | |
34192 | |
34193 | |
34194 | for (auto &II : reverse(*MBB)) { |
34195 | if (!II.isCall()) |
34196 | continue; |
34197 | |
34198 | DenseMap<unsigned, bool> DefRegs; |
34199 | for (auto &MOp : II.operands()) |
34200 | if (MOp.isReg()) |
34201 | DefRegs[MOp.getReg()] = true; |
34202 | |
34203 | MachineInstrBuilder MIB(*MF, &II); |
34204 | for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) { |
34205 | unsigned Reg = SavedRegs[RegIdx]; |
34206 | if (!DefRegs[Reg]) |
34207 | MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); |
34208 | } |
34209 | |
34210 | break; |
34211 | } |
34212 | } |
34213 | |
34214 | |
34215 | |
34216 | for (auto &LP : MBBLPads) |
34217 | LP->setIsEHPad(false); |
34218 | |
34219 | |
34220 | MI.eraseFromParent(); |
34221 | return BB; |
34222 | } |
34223 | |
34224 | MachineBasicBlock * |
34225 | X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
34226 | MachineBasicBlock *BB) const { |
34227 | MachineFunction *MF = BB->getParent(); |
34228 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
34229 | const DebugLoc &DL = MI.getDebugLoc(); |
34230 | |
34231 | auto TMMImmToTMMReg = [](unsigned Imm) { |
34232 | assert (Imm < 8 && "Illegal tmm index"); |
34233 | return X86::TMM0 + Imm; |
34234 | }; |
34235 | switch (MI.getOpcode()) { |
34236 | default: llvm_unreachable("Unexpected instr type to insert"); |
34237 | case X86::TLS_addr32: |
34238 | case X86::TLS_addr64: |
34239 | case X86::TLS_addrX32: |
34240 | case X86::TLS_base_addr32: |
34241 | case X86::TLS_base_addr64: |
34242 | case X86::TLS_base_addrX32: |
34243 | return EmitLoweredTLSAddr(MI, BB); |
34244 | case X86::INDIRECT_THUNK_CALL32: |
34245 | case X86::INDIRECT_THUNK_CALL64: |
34246 | case X86::INDIRECT_THUNK_TCRETURN32: |
34247 | case X86::INDIRECT_THUNK_TCRETURN64: |
34248 | return EmitLoweredIndirectThunk(MI, BB); |
34249 | case X86::CATCHRET: |
34250 | return EmitLoweredCatchRet(MI, BB); |
34251 | case X86::SEG_ALLOCA_32: |
34252 | case X86::SEG_ALLOCA_64: |
34253 | return EmitLoweredSegAlloca(MI, BB); |
34254 | case X86::PROBED_ALLOCA_32: |
34255 | case X86::PROBED_ALLOCA_64: |
34256 | return EmitLoweredProbedAlloca(MI, BB); |
34257 | case X86::TLSCall_32: |
34258 | case X86::TLSCall_64: |
34259 | return EmitLoweredTLSCall(MI, BB); |
34260 | case X86::CMOV_FR32: |
34261 | case X86::CMOV_FR32X: |
34262 | case X86::CMOV_FR64: |
34263 | case X86::CMOV_FR64X: |
34264 | case X86::CMOV_GR8: |
34265 | case X86::CMOV_GR16: |
34266 | case X86::CMOV_GR32: |
34267 | case X86::CMOV_RFP32: |
34268 | case X86::CMOV_RFP64: |
34269 | case X86::CMOV_RFP80: |
34270 | case X86::CMOV_VR64: |
34271 | case X86::CMOV_VR128: |
34272 | case X86::CMOV_VR128X: |
34273 | case X86::CMOV_VR256: |
34274 | case X86::CMOV_VR256X: |
34275 | case X86::CMOV_VR512: |
34276 | case X86::CMOV_VK1: |
34277 | case X86::CMOV_VK2: |
34278 | case X86::CMOV_VK4: |
34279 | case X86::CMOV_VK8: |
34280 | case X86::CMOV_VK16: |
34281 | case X86::CMOV_VK32: |
34282 | case X86::CMOV_VK64: |
34283 | return EmitLoweredSelect(MI, BB); |
34284 | |
34285 | case X86::RDFLAGS32: |
34286 | case X86::RDFLAGS64: { |
34287 | unsigned PushF = |
34288 | MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; |
34289 | unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; |
34290 | MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); |
34291 | |
34292 | |
34293 | |
34294 | |
34295 | assert(Push->getOperand(2).getReg() == X86::EFLAGS && |
34296 | "Unexpected register in operand!"); |
34297 | Push->getOperand(2).setIsUndef(); |
34298 | assert(Push->getOperand(3).getReg() == X86::DF && |
34299 | "Unexpected register in operand!"); |
34300 | Push->getOperand(3).setIsUndef(); |
34301 | BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); |
34302 | |
34303 | MI.eraseFromParent(); |
34304 | return BB; |
34305 | } |
34306 | |
34307 | case X86::WRFLAGS32: |
34308 | case X86::WRFLAGS64: { |
34309 | unsigned Push = |
34310 | MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; |
34311 | unsigned PopF = |
34312 | MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; |
34313 | BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg()); |
34314 | BuildMI(*BB, MI, DL, TII->get(PopF)); |
34315 | |
34316 | MI.eraseFromParent(); |
34317 | return BB; |
34318 | } |
34319 | |
34320 | case X86::FP32_TO_INT16_IN_MEM: |
34321 | case X86::FP32_TO_INT32_IN_MEM: |
34322 | case X86::FP32_TO_INT64_IN_MEM: |
34323 | case X86::FP64_TO_INT16_IN_MEM: |
34324 | case X86::FP64_TO_INT32_IN_MEM: |
34325 | case X86::FP64_TO_INT64_IN_MEM: |
34326 | case X86::FP80_TO_INT16_IN_MEM: |
34327 | case X86::FP80_TO_INT32_IN_MEM: |
34328 | case X86::FP80_TO_INT64_IN_MEM: { |
34329 | |
34330 | |
34331 | int OrigCWFrameIdx = |
34332 | MF->getFrameInfo().CreateStackObject(2, Align(2), false); |
34333 | addFrameReference(BuildMI(*BB, MI, DL, |
34334 | TII->get(X86::FNSTCW16m)), OrigCWFrameIdx); |
34335 | |
34336 | |
34337 | Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); |
34338 | addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW), |
34339 | OrigCWFrameIdx); |
34340 | |
34341 | |
34342 | Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); |
34343 | BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW) |
34344 | .addReg(OldCW, RegState::Kill).addImm(0xC00); |
34345 | |
34346 | |
34347 | Register NewCW16 = |
34348 | MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); |
34349 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16) |
34350 | .addReg(NewCW, RegState::Kill, X86::sub_16bit); |
34351 | |
34352 | |
34353 | int NewCWFrameIdx = |
34354 | MF->getFrameInfo().CreateStackObject(2, Align(2), false); |
34355 | addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), |
34356 | NewCWFrameIdx) |
34357 | .addReg(NewCW16, RegState::Kill); |
34358 | |
34359 | |
34360 | addFrameReference(BuildMI(*BB, MI, DL, |
34361 | TII->get(X86::FLDCW16m)), NewCWFrameIdx); |
34362 | |
34363 | |
34364 | unsigned Opc; |
34365 | switch (MI.getOpcode()) { |
34366 | default: llvm_unreachable("illegal opcode!"); |
34367 | case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; |
34368 | case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; |
34369 | case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; |
34370 | case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; |
34371 | case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; |
34372 | case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; |
34373 | case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; |
34374 | case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; |
34375 | case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; |
34376 | } |
34377 | |
34378 | X86AddressMode AM = getAddressFromInstr(&MI, 0); |
34379 | addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) |
34380 | .addReg(MI.getOperand(X86::AddrNumOperands).getReg()); |
34381 | |
34382 | |
34383 | addFrameReference(BuildMI(*BB, MI, DL, |
34384 | TII->get(X86::FLDCW16m)), OrigCWFrameIdx); |
34385 | |
34386 | MI.eraseFromParent(); |
34387 | return BB; |
34388 | } |
34389 | |
34390 | |
34391 | case X86::XBEGIN: |
34392 | return emitXBegin(MI, BB, Subtarget.getInstrInfo()); |
34393 | |
34394 | case X86::VAARG_64: |
34395 | case X86::VAARG_X32: |
34396 | return EmitVAARGWithCustomInserter(MI, BB); |
34397 | |
34398 | case X86::EH_SjLj_SetJmp32: |
34399 | case X86::EH_SjLj_SetJmp64: |
34400 | return emitEHSjLjSetJmp(MI, BB); |
34401 | |
34402 | case X86::EH_SjLj_LongJmp32: |
34403 | case X86::EH_SjLj_LongJmp64: |
34404 | return emitEHSjLjLongJmp(MI, BB); |
34405 | |
34406 | case X86::Int_eh_sjlj_setup_dispatch: |
34407 | return EmitSjLjDispatchBlock(MI, BB); |
34408 | |
34409 | case TargetOpcode::STATEPOINT: |
34410 | |
34411 | |
34412 | return emitPatchPoint(MI, BB); |
34413 | |
34414 | case TargetOpcode::STACKMAP: |
34415 | case TargetOpcode::PATCHPOINT: |
34416 | return emitPatchPoint(MI, BB); |
34417 | |
34418 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
34419 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
34420 | return BB; |
34421 | |
34422 | case X86::LCMPXCHG8B: { |
34423 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
34424 | |
34425 | |
34426 | |
34427 | |
34428 | |
34429 | |
34430 | |
34431 | |
34432 | |
34433 | |
34434 | |
34435 | if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) |
34436 | return BB; |
34437 | |
34438 | |
34439 | |
34440 | |
34441 | |
34442 | assert(TRI->getBaseRegister() == X86::ESI && |
34443 | "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a " |
34444 | "base pointer in mind"); |
34445 | |
34446 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
34447 | MVT SPTy = getPointerTy(MF->getDataLayout()); |
34448 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
34449 | Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); |
34450 | |
34451 | X86AddressMode AM = getAddressFromInstr(&MI, 0); |
34452 | |
34453 | |
34454 | if (AM.IndexReg == X86::NoRegister) |
34455 | return BB; |
34456 | |
34457 | |
34458 | |
34459 | |
34460 | MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator()); |
34461 | while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) || |
34462 | RMBBI->definesRegister(X86::EBX) || |
34463 | RMBBI->definesRegister(X86::ECX) || |
34464 | RMBBI->definesRegister(X86::EDX))) { |
34465 | ++RMBBI; |
34466 | } |
34467 | MachineBasicBlock::iterator MBBI(RMBBI); |
34468 | addFullAddress( |
34469 | BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); |
34470 | |
34471 | setDirectAddressInInstr(&MI, 0, computedAddrVReg); |
34472 | |
34473 | return BB; |
34474 | } |
34475 | case X86::LCMPXCHG16B_NO_RBX: { |
34476 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
34477 | Register BasePtr = TRI->getBaseRegister(); |
34478 | if (TRI->hasBasePointer(*MF) && |
34479 | (BasePtr == X86::RBX || BasePtr == X86::EBX)) { |
34480 | if (!BB->isLiveIn(BasePtr)) |
34481 | BB->addLiveIn(BasePtr); |
34482 | |
34483 | Register SaveRBX = |
34484 | MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
34485 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) |
34486 | .addReg(X86::RBX); |
34487 | Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
34488 | MachineInstrBuilder MIB = |
34489 | BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst); |
34490 | for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) |
34491 | MIB.add(MI.getOperand(Idx)); |
34492 | MIB.add(MI.getOperand(X86::AddrNumOperands)); |
34493 | MIB.addReg(SaveRBX); |
34494 | } else { |
34495 | |
34496 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX) |
34497 | .add(MI.getOperand(X86::AddrNumOperands)); |
34498 | MachineInstrBuilder MIB = |
34499 | BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)); |
34500 | for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) |
34501 | MIB.add(MI.getOperand(Idx)); |
34502 | } |
34503 | MI.eraseFromParent(); |
34504 | return BB; |
34505 | } |
34506 | case X86::MWAITX: { |
34507 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
34508 | Register BasePtr = TRI->getBaseRegister(); |
34509 | bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX); |
34510 | |
34511 | |
34512 | if (!IsRBX || !TRI->hasBasePointer(*MF)) { |
34513 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) |
34514 | .addReg(MI.getOperand(0).getReg()); |
34515 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) |
34516 | .addReg(MI.getOperand(1).getReg()); |
34517 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX) |
34518 | .addReg(MI.getOperand(2).getReg()); |
34519 | BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr)); |
34520 | MI.eraseFromParent(); |
34521 | } else { |
34522 | if (!BB->isLiveIn(BasePtr)) { |
34523 | BB->addLiveIn(BasePtr); |
34524 | } |
34525 | |
34526 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) |
34527 | .addReg(MI.getOperand(0).getReg()); |
34528 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) |
34529 | .addReg(MI.getOperand(1).getReg()); |
34530 | assert(Subtarget.is64Bit() && "Expected 64-bit mode!"); |
34531 | |
34532 | Register SaveRBX = |
34533 | MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
34534 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) |
34535 | .addReg(X86::RBX); |
34536 | |
34537 | Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
34538 | BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX)) |
34539 | .addDef(Dst) |
34540 | .addReg(MI.getOperand(2).getReg()) |
34541 | .addUse(SaveRBX); |
34542 | MI.eraseFromParent(); |
34543 | } |
34544 | return BB; |
34545 | } |
34546 | case TargetOpcode::PREALLOCATED_SETUP: { |
34547 | assert(Subtarget.is32Bit() && "preallocated only used in 32-bit"); |
34548 | auto MFI = MF->getInfo<X86MachineFunctionInfo>(); |
34549 | MFI->setHasPreallocatedCall(true); |
34550 | int64_t PreallocatedId = MI.getOperand(0).getImm(); |
34551 | size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId); |
34552 | assert(StackAdjustment != 0 && "0 stack adjustment"); |
34553 | LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment " |
34554 | << StackAdjustment << "\n"); |
34555 | BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP) |
34556 | .addReg(X86::ESP) |
34557 | .addImm(StackAdjustment); |
34558 | MI.eraseFromParent(); |
34559 | return BB; |
34560 | } |
34561 | case TargetOpcode::PREALLOCATED_ARG: { |
34562 | assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit"); |
34563 | int64_t PreallocatedId = MI.getOperand(1).getImm(); |
34564 | int64_t ArgIdx = MI.getOperand(2).getImm(); |
34565 | auto MFI = MF->getInfo<X86MachineFunctionInfo>(); |
34566 | size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx]; |
34567 | LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx |
34568 | << ", arg offset " << ArgOffset << "\n"); |
34569 | |
34570 | addRegOffset( |
34571 | BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()), |
34572 | X86::ESP, false, ArgOffset); |
34573 | MI.eraseFromParent(); |
34574 | return BB; |
34575 | } |
34576 | case X86::PTDPBSSD: |
34577 | case X86::PTDPBSUD: |
34578 | case X86::PTDPBUSD: |
34579 | case X86::PTDPBUUD: |
34580 | case X86::PTDPBF16PS: { |
34581 | unsigned Opc; |
34582 | switch (MI.getOpcode()) { |
34583 | case X86::PTDPBSSD: Opc = X86::TDPBSSD; break; |
34584 | case X86::PTDPBSUD: Opc = X86::TDPBSUD; break; |
34585 | case X86::PTDPBUSD: Opc = X86::TDPBUSD; break; |
34586 | case X86::PTDPBUUD: Opc = X86::TDPBUUD; break; |
34587 | case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break; |
34588 | } |
34589 | |
34590 | MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); |
34591 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define); |
34592 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef); |
34593 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef); |
34594 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(2).getImm()), RegState::Undef); |
34595 | |
34596 | MI.eraseFromParent(); |
34597 | return BB; |
34598 | } |
34599 | case X86::PTILEZERO: { |
34600 | unsigned Imm = MI.getOperand(0).getImm(); |
34601 | BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm)); |
34602 | MI.eraseFromParent(); |
34603 | return BB; |
34604 | } |
34605 | case X86::PTILELOADD: |
34606 | case X86::PTILELOADDT1: |
34607 | case X86::PTILESTORED: { |
34608 | unsigned Opc; |
34609 | switch (MI.getOpcode()) { |
34610 | case X86::PTILELOADD: Opc = X86::TILELOADD; break; |
34611 | case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break; |
34612 | case X86::PTILESTORED: Opc = X86::TILESTORED; break; |
34613 | } |
34614 | |
34615 | MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); |
34616 | unsigned CurOp = 0; |
34617 | if (Opc != X86::TILESTORED) |
34618 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()), |
34619 | RegState::Define); |
34620 | |
34621 | MIB.add(MI.getOperand(CurOp++)); |
34622 | MIB.add(MI.getOperand(CurOp++)); |
34623 | MIB.add(MI.getOperand(CurOp++)); |
34624 | MIB.add(MI.getOperand(CurOp++)); |
34625 | MIB.add(MI.getOperand(CurOp++)); |
34626 | |
34627 | if (Opc == X86::TILESTORED) |
34628 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()), |
34629 | RegState::Undef); |
34630 | |
34631 | MI.eraseFromParent(); |
34632 | return BB; |
34633 | } |
34634 | } |
34635 | } |
34636 | |
34637 | |
34638 | |
34639 | |
34640 | |
34641 | bool |
34642 | X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, |
34643 | const APInt &DemandedBits, |
34644 | const APInt &DemandedElts, |
34645 | TargetLoweringOpt &TLO) const { |
34646 | EVT VT = Op.getValueType(); |
34647 | unsigned Opcode = Op.getOpcode(); |
34648 | unsigned EltSize = VT.getScalarSizeInBits(); |
34649 | |
34650 | if (VT.isVector()) { |
34651 | |
34652 | |
34653 | |
34654 | auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) { |
34655 | if (!ISD::isBuildVectorOfConstantSDNodes(V.getNode())) |
34656 | return false; |
34657 | for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) { |
34658 | if (!DemandedElts[i] || V.getOperand(i).isUndef()) |
34659 | continue; |
34660 | const APInt &Val = V.getConstantOperandAPInt(i); |
34661 | if (Val.getBitWidth() > Val.getNumSignBits() && |
34662 | Val.trunc(ActiveBits).getNumSignBits() == ActiveBits) |
34663 | return true; |
34664 | } |
34665 | return false; |
34666 | }; |
34667 | |
34668 | |
34669 | unsigned ActiveBits = DemandedBits.getActiveBits(); |
34670 | if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) && |
34671 | (Opcode == ISD::OR || Opcode == ISD::XOR) && |
34672 | NeedsSignExtension(Op.getOperand(1), ActiveBits)) { |
34673 | EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits); |
34674 | EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT, |
34675 | VT.getVectorNumElements()); |
34676 | SDValue NewC = |
34677 | TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT, |
34678 | Op.getOperand(1), TLO.DAG.getValueType(ExtVT)); |
34679 | SDValue NewOp = |
34680 | TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC); |
34681 | return TLO.CombineTo(Op, NewOp); |
34682 | } |
34683 | return false; |
34684 | } |
34685 | |
34686 | |
34687 | |
34688 | if (Opcode != ISD::AND) |
34689 | return false; |
34690 | |
34691 | |
34692 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
34693 | if (!C) |
34694 | return false; |
34695 | |
34696 | const APInt &Mask = C->getAPIntValue(); |
34697 | |
34698 | |
34699 | APInt ShrunkMask = Mask & DemandedBits; |
34700 | |
34701 | |
34702 | unsigned Width = ShrunkMask.getActiveBits(); |
34703 | |
34704 | |
34705 | if (Width == 0) |
34706 | return false; |
34707 | |
34708 | |
34709 | Width = PowerOf2Ceil(std::max(Width, 8U)); |
34710 | |
34711 | Width = std::min(Width, EltSize); |
34712 | |
34713 | |
34714 | APInt ZeroExtendMask = APInt::getLowBitsSet(EltSize, Width); |
34715 | |
34716 | |
34717 | |
34718 | if (ZeroExtendMask == Mask) |
34719 | return true; |
34720 | |
34721 | |
34722 | |
34723 | if (!ZeroExtendMask.isSubsetOf(Mask | ~DemandedBits)) |
34724 | return false; |
34725 | |
34726 | |
34727 | SDLoc DL(Op); |
34728 | SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); |
34729 | SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); |
34730 | return TLO.CombineTo(Op, NewOp); |
34731 | } |
34732 | |
34733 | void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
34734 | KnownBits &Known, |
34735 | const APInt &DemandedElts, |
34736 | const SelectionDAG &DAG, |
34737 | unsigned Depth) const { |
34738 | unsigned BitWidth = Known.getBitWidth(); |
34739 | unsigned NumElts = DemandedElts.getBitWidth(); |
34740 | unsigned Opc = Op.getOpcode(); |
34741 | EVT VT = Op.getValueType(); |
34742 | assert((Opc >= ISD::BUILTIN_OP_END || |
34743 | Opc == ISD::INTRINSIC_WO_CHAIN || |
34744 | Opc == ISD::INTRINSIC_W_CHAIN || |
34745 | Opc == ISD::INTRINSIC_VOID) && |
34746 | "Should use MaskedValueIsZero if you don't know whether Op" |
34747 | " is a target node!"); |
34748 | |
34749 | Known.resetAll(); |
34750 | switch (Opc) { |
34751 | default: break; |
34752 | case X86ISD::SETCC: |
34753 | Known.Zero.setBitsFrom(1); |
34754 | break; |
34755 | case X86ISD::MOVMSK: { |
34756 | unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); |
34757 | Known.Zero.setBitsFrom(NumLoBits); |
34758 | break; |
34759 | } |
34760 | case X86ISD::PEXTRB: |
34761 | case X86ISD::PEXTRW: { |
34762 | SDValue Src = Op.getOperand(0); |
34763 | EVT SrcVT = Src.getValueType(); |
34764 | APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(), |
34765 | Op.getConstantOperandVal(1)); |
34766 | Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1); |
34767 | Known = Known.anyextOrTrunc(BitWidth); |
34768 | Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits()); |
34769 | break; |
34770 | } |
34771 | case X86ISD::VSRAI: |
34772 | case X86ISD::VSHLI: |
34773 | case X86ISD::VSRLI: { |
34774 | unsigned ShAmt = Op.getConstantOperandVal(1); |
34775 | if (ShAmt >= VT.getScalarSizeInBits()) { |
34776 | Known.setAllZero(); |
34777 | break; |
34778 | } |
34779 | |
34780 | Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
34781 | if (Opc == X86ISD::VSHLI) { |
34782 | Known.Zero <<= ShAmt; |
34783 | Known.One <<= ShAmt; |
34784 | |
34785 | Known.Zero.setLowBits(ShAmt); |
34786 | } else if (Opc == X86ISD::VSRLI) { |
34787 | Known.Zero.lshrInPlace(ShAmt); |
34788 | Known.One.lshrInPlace(ShAmt); |
34789 | |
34790 | Known.Zero.setHighBits(ShAmt); |
34791 | } else { |
34792 | Known.Zero.ashrInPlace(ShAmt); |
34793 | Known.One.ashrInPlace(ShAmt); |
34794 | } |
34795 | break; |
34796 | } |
34797 | case X86ISD::PACKUS: { |
34798 | |
34799 | APInt DemandedLHS, DemandedRHS; |
34800 | getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
34801 | |
34802 | Known.One = APInt::getAllOnesValue(BitWidth * 2); |
34803 | Known.Zero = APInt::getAllOnesValue(BitWidth * 2); |
34804 | |
34805 | KnownBits Known2; |
34806 | if (!!DemandedLHS) { |
34807 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
34808 | Known = KnownBits::commonBits(Known, Known2); |
34809 | } |
34810 | if (!!DemandedRHS) { |
34811 | Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
34812 | Known = KnownBits::commonBits(Known, Known2); |
34813 | } |
34814 | |
34815 | if (Known.countMinLeadingZeros() < BitWidth) |
34816 | Known.resetAll(); |
34817 | Known = Known.trunc(BitWidth); |
34818 | break; |
34819 | } |
34820 | case X86ISD::VBROADCAST: { |
34821 | SDValue Src = Op.getOperand(0); |
34822 | if (!Src.getSimpleValueType().isVector()) { |
34823 | Known = DAG.computeKnownBits(Src, Depth + 1); |
34824 | return; |
34825 | } |
34826 | break; |
34827 | } |
34828 | case X86ISD::ANDNP: { |
34829 | KnownBits Known2; |
34830 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
34831 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
34832 | |
34833 | |
34834 | Known.One &= Known2.Zero; |
34835 | Known.Zero |= Known2.One; |
34836 | break; |
34837 | } |
34838 | case X86ISD::FOR: { |
34839 | KnownBits Known2; |
34840 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
34841 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
34842 | |
34843 | Known |= Known2; |
34844 | break; |
34845 | } |
34846 | case X86ISD::PSADBW: { |
34847 | assert(VT.getScalarType() == MVT::i64 && |
34848 | Op.getOperand(0).getValueType().getScalarType() == MVT::i8 && |
34849 | "Unexpected PSADBW types"); |
34850 | |
34851 | |
34852 | Known.Zero.setBitsFrom(16); |
34853 | break; |
34854 | } |
34855 | case X86ISD::PMULUDQ: { |
34856 | KnownBits Known2; |
34857 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
34858 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
34859 | |
34860 | Known = Known.trunc(BitWidth / 2).zext(BitWidth); |
34861 | Known2 = Known2.trunc(BitWidth / 2).zext(BitWidth); |
34862 | Known = KnownBits::mul(Known, Known2); |
34863 | break; |
34864 | } |
34865 | case X86ISD::CMOV: { |
34866 | Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); |
34867 | |
34868 | if (Known.isUnknown()) |
34869 | break; |
34870 | KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
34871 | |
34872 | |
34873 | Known = KnownBits::commonBits(Known, Known2); |
34874 | break; |
34875 | } |
34876 | case X86ISD::BEXTR: |
34877 | case X86ISD::BEXTRI: { |
34878 | SDValue Op0 = Op.getOperand(0); |
34879 | SDValue Op1 = Op.getOperand(1); |
34880 | |
34881 | if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) { |
34882 | unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); |
34883 | unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); |
34884 | |
34885 | |
34886 | if (Length == 0) { |
34887 | Known.setAllZero(); |
34888 | break; |
34889 | } |
34890 | |
34891 | if ((Shift + Length) <= BitWidth) { |
34892 | Known = DAG.computeKnownBits(Op0, Depth + 1); |
34893 | Known = Known.extractBits(Length, Shift); |
34894 | Known = Known.zextOrTrunc(BitWidth); |
34895 | } |
34896 | } |
34897 | break; |
34898 | } |
34899 | case X86ISD::PDEP: { |
34900 | KnownBits Known2; |
34901 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
34902 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
34903 | |
34904 | Known.One.clearAllBits(); |
34905 | |
34906 | |
34907 | Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
34908 | break; |
34909 | } |
34910 | case X86ISD::PEXT: { |
34911 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
34912 | |
34913 | unsigned Count = Known.Zero.countPopulation(); |
34914 | Known.Zero = APInt::getHighBitsSet(BitWidth, Count); |
34915 | Known.One.clearAllBits(); |
34916 | break; |
34917 | } |
34918 | case X86ISD::VTRUNC: |
34919 | case X86ISD::VTRUNCS: |
34920 | case X86ISD::VTRUNCUS: |
34921 | case X86ISD::CVTSI2P: |
34922 | case X86ISD::CVTUI2P: |
34923 | case X86ISD::CVTP2SI: |
34924 | case X86ISD::CVTP2UI: |
34925 | case X86ISD::MCVTP2SI: |
34926 | case X86ISD::MCVTP2UI: |
34927 | case X86ISD::CVTTP2SI: |
34928 | case X86ISD::CVTTP2UI: |
34929 | case X86ISD::MCVTTP2SI: |
34930 | case X86ISD::MCVTTP2UI: |
34931 | case X86ISD::MCVTSI2P: |
34932 | case X86ISD::MCVTUI2P: |
34933 | case X86ISD::VFPROUND: |
34934 | case X86ISD::VMFPROUND: |
34935 | case X86ISD::CVTPS2PH: |
34936 | case X86ISD::MCVTPS2PH: { |
34937 | |
34938 | EVT SrcVT = Op.getOperand(0).getValueType(); |
34939 | if (SrcVT.isVector()) { |
34940 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
34941 | if (NumElts > NumSrcElts && |
34942 | DemandedElts.countTrailingZeros() >= NumSrcElts) |
34943 | Known.setAllZero(); |
34944 | } |
34945 | break; |
34946 | } |
34947 | case X86ISD::STRICT_CVTTP2SI: |
34948 | case X86ISD::STRICT_CVTTP2UI: |
34949 | case X86ISD::STRICT_CVTSI2P: |
34950 | case X86ISD::STRICT_CVTUI2P: |
34951 | case X86ISD::STRICT_VFPROUND: |
34952 | case X86ISD::STRICT_CVTPS2PH: { |
34953 | |
34954 | EVT SrcVT = Op.getOperand(1).getValueType(); |
34955 | if (SrcVT.isVector()) { |
34956 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
34957 | if (NumElts > NumSrcElts && |
34958 | DemandedElts.countTrailingZeros() >= NumSrcElts) |
34959 | Known.setAllZero(); |
34960 | } |
34961 | break; |
34962 | } |
34963 | case X86ISD::MOVQ2DQ: { |
34964 | |
34965 | if (DemandedElts.countTrailingZeros() >= (NumElts / 2)) |
34966 | Known.setAllZero(); |
34967 | break; |
34968 | } |
34969 | } |
34970 | |
34971 | |
34972 | |
34973 | if (isTargetShuffle(Opc)) { |
34974 | SmallVector<int, 64> Mask; |
34975 | SmallVector<SDValue, 2> Ops; |
34976 | if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) { |
34977 | unsigned NumOps = Ops.size(); |
34978 | unsigned NumElts = VT.getVectorNumElements(); |
34979 | if (Mask.size() == NumElts) { |
34980 | SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0)); |
34981 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
34982 | for (unsigned i = 0; i != NumElts; ++i) { |
34983 | if (!DemandedElts[i]) |
34984 | continue; |
34985 | int M = Mask[i]; |
34986 | if (M == SM_SentinelUndef) { |
34987 | |
34988 | |
34989 | Known.resetAll(); |
34990 | break; |
34991 | } |
34992 | if (M == SM_SentinelZero) { |
34993 | Known.One.clearAllBits(); |
34994 | continue; |
34995 | } |
34996 | assert(0 <= M && (unsigned)M < (NumOps * NumElts) && |
34997 | "Shuffle index out of range"); |
34998 | |
34999 | unsigned OpIdx = (unsigned)M / NumElts; |
35000 | unsigned EltIdx = (unsigned)M % NumElts; |
35001 | if (Ops[OpIdx].getValueType() != VT) { |
35002 | |
35003 | Known.resetAll(); |
35004 | break; |
35005 | } |
35006 | DemandedOps[OpIdx].setBit(EltIdx); |
35007 | } |
35008 | |
35009 | for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) { |
35010 | if (!DemandedOps[i]) |
35011 | continue; |
35012 | KnownBits Known2 = |
35013 | DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1); |
35014 | Known = KnownBits::commonBits(Known, Known2); |
35015 | } |
35016 | } |
35017 | } |
35018 | } |
35019 | } |
35020 | |
35021 | unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( |
35022 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
35023 | unsigned Depth) const { |
35024 | EVT VT = Op.getValueType(); |
35025 | unsigned VTBits = VT.getScalarSizeInBits(); |
35026 | unsigned Opcode = Op.getOpcode(); |
35027 | switch (Opcode) { |
35028 | case X86ISD::SETCC_CARRY: |
35029 | |
35030 | return VTBits; |
35031 | |
35032 | case X86ISD::VTRUNC: { |
35033 | SDValue Src = Op.getOperand(0); |
35034 | MVT SrcVT = Src.getSimpleValueType(); |
35035 | unsigned NumSrcBits = SrcVT.getScalarSizeInBits(); |
35036 | assert(VTBits < NumSrcBits && "Illegal truncation input type"); |
35037 | APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
35038 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedSrc, Depth + 1); |
35039 | if (Tmp > (NumSrcBits - VTBits)) |
35040 | return Tmp - (NumSrcBits - VTBits); |
35041 | return 1; |
35042 | } |
35043 | |
35044 | case X86ISD::PACKSS: { |
35045 | |
35046 | APInt DemandedLHS, DemandedRHS; |
35047 | getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS, |
35048 | DemandedRHS); |
35049 | |
35050 | unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits(); |
35051 | unsigned Tmp0 = SrcBits, Tmp1 = SrcBits; |
35052 | if (!!DemandedLHS) |
35053 | Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
35054 | if (!!DemandedRHS) |
35055 | Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
35056 | unsigned Tmp = std::min(Tmp0, Tmp1); |
35057 | if (Tmp > (SrcBits - VTBits)) |
35058 | return Tmp - (SrcBits - VTBits); |
35059 | return 1; |
35060 | } |
35061 | |
35062 | case X86ISD::VBROADCAST: { |
35063 | SDValue Src = Op.getOperand(0); |
35064 | if (!Src.getSimpleValueType().isVector()) |
35065 | return DAG.ComputeNumSignBits(Src, Depth + 1); |
35066 | break; |
35067 | } |
35068 | |
35069 | case X86ISD::VSHLI: { |
35070 | SDValue Src = Op.getOperand(0); |
35071 | const APInt &ShiftVal = Op.getConstantOperandAPInt(1); |
35072 | if (ShiftVal.uge(VTBits)) |
35073 | return VTBits; |
35074 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); |
35075 | if (ShiftVal.uge(Tmp)) |
35076 | return 1; |
35077 | return Tmp - ShiftVal.getZExtValue(); |
35078 | } |
35079 | |
35080 | case X86ISD::VSRAI: { |
35081 | SDValue Src = Op.getOperand(0); |
35082 | APInt ShiftVal = Op.getConstantOperandAPInt(1); |
35083 | if (ShiftVal.uge(VTBits - 1)) |
35084 | return VTBits; |
35085 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); |
35086 | ShiftVal += Tmp; |
35087 | return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); |
35088 | } |
35089 | |
35090 | case X86ISD::FSETCC: |
35091 | |
35092 | if (VT == MVT::f32 || VT == MVT::f64 || |
35093 | ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1)) |
35094 | return VTBits; |
35095 | break; |
35096 | |
35097 | case X86ISD::PCMPGT: |
35098 | case X86ISD::PCMPEQ: |
35099 | case X86ISD::CMPP: |
35100 | case X86ISD::VPCOM: |
35101 | case X86ISD::VPCOMU: |
35102 | |
35103 | return VTBits; |
35104 | |
35105 | case X86ISD::ANDNP: { |
35106 | unsigned Tmp0 = |
35107 | DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); |
35108 | if (Tmp0 == 1) return 1; |
35109 | unsigned Tmp1 = |
35110 | DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); |
35111 | return std::min(Tmp0, Tmp1); |
35112 | } |
35113 | |
35114 | case X86ISD::CMOV: { |
35115 | unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); |
35116 | if (Tmp0 == 1) return 1; |
35117 | unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); |
35118 | return std::min(Tmp0, Tmp1); |
35119 | } |
35120 | } |
35121 | |
35122 | |
35123 | |
35124 | if (isTargetShuffle(Opcode)) { |
35125 | SmallVector<int, 64> Mask; |
35126 | SmallVector<SDValue, 2> Ops; |
35127 | if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) { |
35128 | unsigned NumOps = Ops.size(); |
35129 | unsigned NumElts = VT.getVectorNumElements(); |
35130 | if (Mask.size() == NumElts) { |
35131 | SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0)); |
35132 | for (unsigned i = 0; i != NumElts; ++i) { |
35133 | if (!DemandedElts[i]) |
35134 | continue; |
35135 | int M = Mask[i]; |
35136 | if (M == SM_SentinelUndef) { |
35137 | |
35138 | |
35139 | return 1; |
35140 | } else if (M == SM_SentinelZero) { |
35141 | |
35142 | continue; |
35143 | } |
35144 | assert(0 <= M && (unsigned)M < (NumOps * NumElts) && |
35145 | "Shuffle index out of range"); |
35146 | |
35147 | unsigned OpIdx = (unsigned)M / NumElts; |
35148 | unsigned EltIdx = (unsigned)M % NumElts; |
35149 | if (Ops[OpIdx].getValueType() != VT) { |
35150 | |
35151 | return 1; |
35152 | } |
35153 | DemandedOps[OpIdx].setBit(EltIdx); |
35154 | } |
35155 | unsigned Tmp0 = VTBits; |
35156 | for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) { |
35157 | if (!DemandedOps[i]) |
35158 | continue; |
35159 | unsigned Tmp1 = |
35160 | DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1); |
35161 | Tmp0 = std::min(Tmp0, Tmp1); |
35162 | } |
35163 | return Tmp0; |
35164 | } |
35165 | } |
35166 | } |
35167 | |
35168 | |
35169 | return 1; |
35170 | } |
35171 | |
35172 | SDValue X86TargetLowering::unwrapAddress(SDValue N) const { |
35173 | if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) |
35174 | return N->getOperand(0); |
35175 | return N; |
35176 | } |
35177 | |
35178 | |
35179 | |
35180 | static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, |
35181 | SelectionDAG &DAG) { |
35182 | |
35183 | if (!LN->isSimple()) |
35184 | return SDValue(); |
35185 | |
35186 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
35187 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
35188 | return DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, SDLoc(LN), Tys, Ops, MemVT, |
35189 | LN->getPointerInfo(), LN->getOriginalAlign(), |
35190 | LN->getMemOperand()->getFlags()); |
35191 | } |
35192 | |
35193 | |
35194 | |
35195 | |
35196 | static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, |
35197 | bool AllowFloatDomain, bool AllowIntDomain, |
35198 | SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, |
35199 | const X86Subtarget &Subtarget, unsigned &Shuffle, |
35200 | MVT &SrcVT, MVT &DstVT) { |
35201 | unsigned NumMaskElts = Mask.size(); |
35202 | unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); |
35203 | |
35204 | |
35205 | if (MaskEltSize == 32 && Mask[0] == 0) { |
35206 | if (isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) { |
35207 | Shuffle = X86ISD::VZEXT_MOVL; |
35208 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
35209 | return true; |
35210 | } |
35211 | if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR && |
35212 | isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { |
35213 | Shuffle = X86ISD::VZEXT_MOVL; |
35214 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
35215 | return true; |
35216 | } |
35217 | } |
35218 | |
35219 | |
35220 | |
35221 | if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || |
35222 | (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { |
35223 | unsigned MaxScale = 64 / MaskEltSize; |
35224 | for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { |
35225 | bool MatchAny = true; |
35226 | bool MatchZero = true; |
35227 | unsigned NumDstElts = NumMaskElts / Scale; |
35228 | for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) { |
35229 | if (!isUndefOrEqual(Mask[i * Scale], (int)i)) { |
35230 | MatchAny = MatchZero = false; |
35231 | break; |
35232 | } |
35233 | MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1); |
35234 | MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); |
35235 | } |
35236 | if (MatchAny || MatchZero) { |
35237 | assert(MatchZero && "Failed to match zext but matched aext?"); |
35238 | unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); |
35239 | MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() : |
35240 | MVT::getIntegerVT(MaskEltSize); |
35241 | SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); |
35242 | |
35243 | if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) |
35244 | V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); |
35245 | |
35246 | Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND); |
35247 | if (SrcVT.getVectorNumElements() != NumDstElts) |
35248 | Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle); |
35249 | |
35250 | DstVT = MVT::getIntegerVT(Scale * MaskEltSize); |
35251 | DstVT = MVT::getVectorVT(DstVT, NumDstElts); |
35252 | return true; |
35253 | } |
35254 | } |
35255 | } |
35256 | |
35257 | |
35258 | if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && |
35259 | isUndefOrEqual(Mask[0], 0) && |
35260 | isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { |
35261 | Shuffle = X86ISD::VZEXT_MOVL; |
35262 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
35263 | return true; |
35264 | } |
35265 | |
35266 | |
35267 | |
35268 | |
35269 | if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { |
35270 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) { |
35271 | Shuffle = X86ISD::MOVDDUP; |
35272 | SrcVT = DstVT = MVT::v2f64; |
35273 | return true; |
35274 | } |
35275 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { |
35276 | Shuffle = X86ISD::MOVSLDUP; |
35277 | SrcVT = DstVT = MVT::v4f32; |
35278 | return true; |
35279 | } |
35280 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) { |
35281 | Shuffle = X86ISD::MOVSHDUP; |
35282 | SrcVT = DstVT = MVT::v4f32; |
35283 | return true; |
35284 | } |
35285 | } |
35286 | |
35287 | if (MaskVT.is256BitVector() && AllowFloatDomain) { |
35288 | assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); |
35289 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { |
35290 | Shuffle = X86ISD::MOVDDUP; |
35291 | SrcVT = DstVT = MVT::v4f64; |
35292 | return true; |
35293 | } |
35294 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { |
35295 | Shuffle = X86ISD::MOVSLDUP; |
35296 | SrcVT = DstVT = MVT::v8f32; |
35297 | return true; |
35298 | } |
35299 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { |
35300 | Shuffle = X86ISD::MOVSHDUP; |
35301 | SrcVT = DstVT = MVT::v8f32; |
35302 | return true; |
35303 | } |
35304 | } |
35305 | |
35306 | if (MaskVT.is512BitVector() && AllowFloatDomain) { |
35307 | assert(Subtarget.hasAVX512() && |
35308 | "AVX512 required for 512-bit vector shuffles"); |
35309 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { |
35310 | Shuffle = X86ISD::MOVDDUP; |
35311 | SrcVT = DstVT = MVT::v8f64; |
35312 | return true; |
35313 | } |
35314 | if (isTargetShuffleEquivalent( |
35315 | MaskVT, Mask, |
35316 | {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { |
35317 | Shuffle = X86ISD::MOVSLDUP; |
35318 | SrcVT = DstVT = MVT::v16f32; |
35319 | return true; |
35320 | } |
35321 | if (isTargetShuffleEquivalent( |
35322 | MaskVT, Mask, |
35323 | {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { |
35324 | Shuffle = X86ISD::MOVSHDUP; |
35325 | SrcVT = DstVT = MVT::v16f32; |
35326 | return true; |
35327 | } |
35328 | } |
35329 | |
35330 | return false; |
35331 | } |
35332 | |
35333 | |
35334 | |
35335 | |
35336 | static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask, |
35337 | const APInt &Zeroable, |
35338 | bool AllowFloatDomain, bool AllowIntDomain, |
35339 | const X86Subtarget &Subtarget, |
35340 | unsigned &Shuffle, MVT &ShuffleVT, |
35341 | unsigned &PermuteImm) { |
35342 | unsigned NumMaskElts = Mask.size(); |
35343 | unsigned InputSizeInBits = MaskVT.getSizeInBits(); |
35344 | unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; |
35345 | MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); |
35346 | bool ContainsZeros = isAnyZero(Mask); |
35347 | |
35348 | |
35349 | if (!ContainsZeros && MaskScalarSizeInBits == 64) { |
35350 | |
35351 | if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { |
35352 | |
35353 | if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { |
35354 | Shuffle = X86ISD::VPERMI; |
35355 | ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); |
35356 | PermuteImm = getV4X86ShuffleImm(Mask); |
35357 | return true; |
35358 | } |
35359 | if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { |
35360 | SmallVector<int, 4> RepeatedMask; |
35361 | if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { |
35362 | Shuffle = X86ISD::VPERMI; |
35363 | ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); |
35364 | PermuteImm = getV4X86ShuffleImm(RepeatedMask); |
35365 | return true; |
35366 | } |
35367 | } |
35368 | } else if (AllowFloatDomain && Subtarget.hasAVX()) { |
35369 | |
35370 | Shuffle = X86ISD::VPERMILPI; |
35371 | ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); |
35372 | PermuteImm = 0; |
35373 | for (int i = 0, e = Mask.size(); i != e; ++i) { |
35374 | int M = Mask[i]; |
35375 | if (M == SM_SentinelUndef) |
35376 | continue; |
35377 | assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); |
35378 | PermuteImm |= (M & 1) << i; |
35379 | } |
35380 | return true; |
35381 | } |
35382 | } |
35383 | |
35384 | |
35385 | |
35386 | |
35387 | if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && |
35388 | !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { |
35389 | SmallVector<int, 4> RepeatedMask; |
35390 | if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { |
35391 | |
35392 | SmallVector<int, 4> WordMask = RepeatedMask; |
35393 | if (MaskScalarSizeInBits == 64) |
35394 | narrowShuffleMaskElts(2, RepeatedMask, WordMask); |
35395 | |
35396 | Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); |
35397 | ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); |
35398 | ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); |
35399 | PermuteImm = getV4X86ShuffleImm(WordMask); |
35400 | return true; |
35401 | } |
35402 | } |
35403 | |
35404 | |
35405 | if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 && |
35406 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
35407 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
35408 | (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { |
35409 | SmallVector<int, 4> RepeatedMask; |
35410 | if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { |
35411 | ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4); |
35412 | ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4); |
35413 | |
35414 | |
35415 | if (isUndefOrInRange(LoMask, 0, 4) && |
35416 | isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { |
35417 | Shuffle = X86ISD::PSHUFLW; |
35418 | ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); |
35419 | PermuteImm = getV4X86ShuffleImm(LoMask); |
35420 | return true; |
35421 | } |
35422 | |
35423 | |
35424 | if (isUndefOrInRange(HiMask, 4, 8) && |
35425 | isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { |
35426 | |
35427 | int OffsetHiMask[4]; |
35428 | for (int i = 0; i != 4; ++i) |
35429 | OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4); |
35430 | |
35431 | Shuffle = X86ISD::PSHUFHW; |
35432 | ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); |
35433 | PermuteImm = getV4X86ShuffleImm(OffsetHiMask); |
35434 | return true; |
35435 | } |
35436 | } |
35437 | } |
35438 | |
35439 | |
35440 | if (AllowIntDomain && |
35441 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
35442 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
35443 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
35444 | int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits, |
35445 | Mask, 0, Zeroable, Subtarget); |
35446 | if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() || Subtarget.hasBWI() || |
35447 | 32 <= ShuffleVT.getScalarSizeInBits())) { |
35448 | PermuteImm = (unsigned)ShiftAmt; |
35449 | return true; |
35450 | } |
35451 | } |
35452 | |
35453 | |
35454 | if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 && |
35455 | ((MaskVT.is128BitVector() && Subtarget.hasXOP()) || |
35456 | Subtarget.hasAVX512())) { |
35457 | int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits, |
35458 | Subtarget, Mask); |
35459 | if (0 < RotateAmt) { |
35460 | Shuffle = X86ISD::VROTLI; |
35461 | PermuteImm = (unsigned)RotateAmt; |
35462 | return true; |
35463 | } |
35464 | } |
35465 | |
35466 | return false; |
35467 | } |
35468 | |
35469 | |
35470 | |
35471 | |
35472 | static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, |
35473 | bool AllowFloatDomain, bool AllowIntDomain, |
35474 | SDValue &V1, SDValue &V2, const SDLoc &DL, |
35475 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
35476 | unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, |
35477 | bool IsUnary) { |
35478 | unsigned NumMaskElts = Mask.size(); |
35479 | unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); |
35480 | |
35481 | if (MaskVT.is128BitVector()) { |
35482 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) { |
35483 | V2 = V1; |
35484 | V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); |
35485 | Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; |
35486 | SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; |
35487 | return true; |
35488 | } |
35489 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) { |
35490 | V2 = V1; |
35491 | Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; |
35492 | SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; |
35493 | return true; |
35494 | } |
35495 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) && |
35496 | Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { |
35497 | std::swap(V1, V2); |
35498 | Shuffle = X86ISD::MOVSD; |
35499 | SrcVT = DstVT = MVT::v2f64; |
35500 | return true; |
35501 | } |
35502 | if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) && |
35503 | (AllowFloatDomain || !Subtarget.hasSSE41())) { |
35504 | Shuffle = X86ISD::MOVSS; |
35505 | SrcVT = DstVT = MVT::v4f32; |
35506 | return true; |
35507 | } |
35508 | } |
35509 | |
35510 | |
35511 | if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) || |
35512 | ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) || |
35513 | ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) { |
35514 | if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG, |
35515 | Subtarget)) { |
35516 | DstVT = MaskVT; |
35517 | return true; |
35518 | } |
35519 | } |
35520 | |
35521 | |
35522 | if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) || |
35523 | (MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
35524 | (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) || |
35525 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
35526 | (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { |
35527 | if (matchShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG, |
35528 | Subtarget)) { |
35529 | SrcVT = DstVT = MaskVT; |
35530 | if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) |
35531 | SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); |
35532 | return true; |
35533 | } |
35534 | } |
35535 | |
35536 | |
35537 | |
35538 | if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && |
35539 | (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { |
35540 | bool IsBlend = true; |
35541 | unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); |
35542 | unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); |
35543 | unsigned Scale1 = NumV1Elts / NumMaskElts; |
35544 | unsigned Scale2 = NumV2Elts / NumMaskElts; |
35545 | APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); |
35546 | APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); |
35547 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
35548 | int M = Mask[i]; |
35549 | if (M == SM_SentinelUndef) |
35550 | continue; |
35551 | if (M == SM_SentinelZero) { |
35552 | DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); |
35553 | DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); |
35554 | continue; |
35555 | } |
35556 | if (M == (int)i) { |
35557 | DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); |
35558 | continue; |
35559 | } |
35560 | if (M == (int)(i + NumMaskElts)) { |
35561 | DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); |
35562 | continue; |
35563 | } |
35564 | IsBlend = false; |
35565 | break; |
35566 | } |
35567 | if (IsBlend && |
35568 | DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && |
35569 | DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { |
35570 | Shuffle = ISD::OR; |
35571 | SrcVT = DstVT = MaskVT.changeTypeToInteger(); |
35572 | return true; |
35573 | } |
35574 | } |
35575 | |
35576 | return false; |
35577 | } |
35578 | |
35579 | static bool matchBinaryPermuteShuffle( |
35580 | MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable, |
35581 | bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, |
35582 | const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, |
35583 | unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { |
35584 | unsigned NumMaskElts = Mask.size(); |
35585 | unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); |
35586 | |
35587 | |
35588 | if (AllowIntDomain && (EltSizeInBits == 64 || EltSizeInBits == 32) && |
35589 | ((MaskVT.is128BitVector() && Subtarget.hasVLX()) || |
35590 | (MaskVT.is256BitVector() && Subtarget.hasVLX()) || |
35591 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
35592 | if (!isAnyZero(Mask)) { |
35593 | int Rotation = matchShuffleAsElementRotate(V1, V2, Mask); |
35594 | if (0 < Rotation) { |
35595 | Shuffle = X86ISD::VALIGN; |
35596 | if (EltSizeInBits == 64) |
35597 | ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64); |
35598 | else |
35599 | ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32); |
35600 | PermuteImm = Rotation; |
35601 | return true; |
35602 | } |
35603 | } |
35604 | } |
35605 | |
35606 | |
35607 | if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || |
35608 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
35609 | (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { |
35610 | int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask); |
35611 | if (0 < ByteRotation) { |
35612 | Shuffle = X86ISD::PALIGNR; |
35613 | ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8); |
35614 | PermuteImm = ByteRotation; |
35615 | return true; |
35616 | } |
35617 | } |
35618 | |
35619 | |
35620 | if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) || |
35621 | (Subtarget.hasAVX() && MaskVT.is256BitVector()))) || |
35622 | (MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) { |
35623 | uint64_t BlendMask = 0; |
35624 | bool ForceV1Zero = false, ForceV2Zero = false; |
35625 | SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end()); |
35626 | if (matchShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero, |
35627 | ForceV2Zero, BlendMask)) { |
35628 | if (MaskVT == MVT::v16i16) { |
35629 | |
35630 | SmallVector<int, 8> RepeatedMask; |
35631 | if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask, |
35632 | RepeatedMask)) { |
35633 | assert(RepeatedMask.size() == 8 && |
35634 | "Repeated mask size doesn't match!"); |
35635 | PermuteImm = 0; |
35636 | for (int i = 0; i < 8; ++i) |
35637 | if (RepeatedMask[i] >= 8) |
35638 | PermuteImm |= 1 << i; |
35639 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
35640 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
35641 | Shuffle = X86ISD::BLENDI; |
35642 | ShuffleVT = MaskVT; |
35643 | return true; |
35644 | } |
35645 | } else { |
35646 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
35647 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
35648 | PermuteImm = (unsigned)BlendMask; |
35649 | Shuffle = X86ISD::BLENDI; |
35650 | ShuffleVT = MaskVT; |
35651 | return true; |
35652 | } |
35653 | } |
35654 | } |
35655 | |
35656 | |
35657 | |
35658 | if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && |
35659 | MaskVT.is128BitVector() && isAnyZero(Mask) && |
35660 | matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { |
35661 | Shuffle = X86ISD::INSERTPS; |
35662 | ShuffleVT = MVT::v4f32; |
35663 | return true; |
35664 | } |
35665 | |
35666 | |
35667 | if (AllowFloatDomain && EltSizeInBits == 64 && |
35668 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
35669 | (MaskVT.is256BitVector() && Subtarget.hasAVX()) || |
35670 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
35671 | bool ForceV1Zero = false, ForceV2Zero = false; |
35672 | if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero, |
35673 | PermuteImm, Mask, Zeroable)) { |
35674 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
35675 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
35676 | Shuffle = X86ISD::SHUFP; |
35677 | ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); |
35678 | return true; |
35679 | } |
35680 | } |
35681 | |
35682 | |
35683 | if (AllowFloatDomain && EltSizeInBits == 32 && |
35684 | ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) || |
35685 | (MaskVT.is256BitVector() && Subtarget.hasAVX()) || |
35686 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
35687 | SmallVector<int, 4> RepeatedMask; |
35688 | if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) { |
35689 | |
35690 | |
35691 | auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) { |
35692 | int M0 = RepeatedMask[Offset]; |
35693 | int M1 = RepeatedMask[Offset + 1]; |
35694 | |
35695 | if (isUndefInRange(RepeatedMask, Offset, 2)) { |
35696 | return DAG.getUNDEF(MaskVT); |
35697 | } else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) { |
35698 | S0 = (SM_SentinelUndef == M0 ? -1 : 0); |
35699 | S1 = (SM_SentinelUndef == M1 ? -1 : 1); |
35700 | return getZeroVector(MaskVT, Subtarget, DAG, DL); |
35701 | } else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) { |
35702 | S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); |
35703 | S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); |
35704 | return V1; |
35705 | } else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) { |
35706 | S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); |
35707 | S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); |
35708 | return V2; |
35709 | } |
35710 | |
35711 | return SDValue(); |
35712 | }; |
35713 | |
35714 | int ShufMask[4] = {-1, -1, -1, -1}; |
35715 | SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]); |
35716 | SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]); |
35717 | |
35718 | if (Lo && Hi) { |
35719 | V1 = Lo; |
35720 | V2 = Hi; |
35721 | Shuffle = X86ISD::SHUFP; |
35722 | ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32); |
35723 | PermuteImm = getV4X86ShuffleImm(ShufMask); |
35724 | return true; |
35725 | } |
35726 | } |
35727 | } |
35728 | |
35729 | |
35730 | if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && |
35731 | MaskVT.is128BitVector() && |
35732 | matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { |
35733 | Shuffle = X86ISD::INSERTPS; |
35734 | ShuffleVT = MVT::v4f32; |
35735 | return true; |
35736 | } |
35737 | |
35738 | return false; |
35739 | } |
35740 | |
35741 | static SDValue combineX86ShuffleChainWithExtract( |
35742 | ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, |
35743 | bool HasVariableMask, bool AllowVariableCrossLaneMask, |
35744 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
35745 | const X86Subtarget &Subtarget); |
35746 | |
35747 | |
35748 | |
35749 | |
35750 | |
35751 | |
35752 | |
35753 | |
35754 | |
35755 | |
35756 | static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, |
35757 | ArrayRef<int> BaseMask, int Depth, |
35758 | bool HasVariableMask, |
35759 | bool AllowVariableCrossLaneMask, |
35760 | bool AllowVariablePerLaneMask, |
35761 | SelectionDAG &DAG, |
35762 | const X86Subtarget &Subtarget) { |
35763 | assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); |
35764 | assert((Inputs.size() == 1 || Inputs.size() == 2) && |
35765 | "Unexpected number of shuffle inputs!"); |
35766 | |
35767 | MVT RootVT = Root.getSimpleValueType(); |
35768 | unsigned RootSizeInBits = RootVT.getSizeInBits(); |
35769 | unsigned NumRootElts = RootVT.getVectorNumElements(); |
35770 | |
35771 | |
35772 | |
35773 | auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { |
35774 | return DAG.getBitcast(VT, Op); |
35775 | }; |
35776 | |
35777 | |
35778 | |
35779 | bool UnaryShuffle = (Inputs.size() == 1); |
35780 | SDValue V1 = peekThroughBitcasts(Inputs[0]); |
35781 | SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType()) |
35782 | : peekThroughBitcasts(Inputs[1])); |
35783 | |
35784 | MVT VT1 = V1.getSimpleValueType(); |
35785 | MVT VT2 = V2.getSimpleValueType(); |
35786 | assert(VT1.getSizeInBits() == RootSizeInBits && |
35787 | VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch"); |
35788 | |
35789 | SDLoc DL(Root); |
35790 | SDValue Res; |
35791 | |
35792 | unsigned NumBaseMaskElts = BaseMask.size(); |
35793 | if (NumBaseMaskElts == 1) { |
35794 | assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); |
35795 | return CanonicalizeShuffleInput(RootVT, V1); |
35796 | } |
35797 | |
35798 | bool OptForSize = DAG.shouldOptForSize(); |
35799 | unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; |
35800 | bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || |
35801 | (RootVT.isFloatingPoint() && Depth >= 1) || |
35802 | (RootVT.is256BitVector() && !Subtarget.hasAVX2()); |
35803 | |
35804 | |
35805 | |
35806 | |
35807 | bool IsMaskedShuffle = false; |
35808 | if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) { |
35809 | if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT && |
35810 | Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) { |
35811 | IsMaskedShuffle = true; |
35812 | } |
35813 | } |
35814 | |
35815 | |
35816 | |
35817 | |
35818 | if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) && |
35819 | (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && |
35820 | V1.getValueSizeInBits() >= RootSizeInBits) { |
35821 | return CanonicalizeShuffleInput(RootVT, V1); |
35822 | } |
35823 | |
35824 | |
35825 | |
35826 | if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits && VT1.isVector()) { |
35827 | SmallVector<int> ScaledMask, IdentityMask; |
35828 | unsigned NumElts = VT1.getVectorNumElements(); |
35829 | if (BaseMask.size() <= NumElts && |
35830 | scaleShuffleElements(BaseMask, NumElts, ScaledMask)) { |
35831 | for (unsigned i = 0; i != NumElts; ++i) |
35832 | IdentityMask.push_back(i); |
35833 | if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2)) |
35834 | return CanonicalizeShuffleInput(RootVT, V1); |
35835 | } |
35836 | } |
35837 | |
35838 | |
35839 | if (RootVT.is512BitVector() && |
35840 | (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) { |
35841 | |
35842 | |
35843 | |
35844 | if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) { |
35845 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
35846 | return SDValue(); |
35847 | assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) && |
35848 | "Unexpected lane shuffle"); |
35849 | Res = CanonicalizeShuffleInput(RootVT, V1); |
35850 | unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts); |
35851 | bool UseZero = isAnyZero(BaseMask); |
35852 | Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits); |
35853 | return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits); |
35854 | } |
35855 | |
35856 | |
35857 | SmallVector<int, 4> Mask; |
35858 | assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size"); |
35859 | narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask); |
35860 | |
35861 | |
35862 | auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask, |
35863 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
35864 | unsigned PermMask = 0; |
35865 | |
35866 | SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)}; |
35867 | for (int i = 0; i < 4; ++i) { |
35868 | assert(Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
35869 | if (Mask[i] < 0) |
35870 | continue; |
35871 | |
35872 | SDValue Op = Mask[i] >= 4 ? V2 : V1; |
35873 | unsigned OpIndex = i / 2; |
35874 | if (Ops[OpIndex].isUndef()) |
35875 | Ops[OpIndex] = Op; |
35876 | else if (Ops[OpIndex] != Op) |
35877 | return SDValue(); |
35878 | |
35879 | |
35880 | |
35881 | |
35882 | PermMask |= (Mask[i] % 4) << (i * 2); |
35883 | } |
35884 | |
35885 | return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT, |
35886 | CanonicalizeShuffleInput(ShuffleVT, Ops[0]), |
35887 | CanonicalizeShuffleInput(ShuffleVT, Ops[1]), |
35888 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
35889 | }; |
35890 | |
35891 | |
35892 | |
35893 | |
35894 | bool PreferPERMQ = |
35895 | UnaryShuffle && isUndefOrInRange(Mask[0], 0, 2) && |
35896 | isUndefOrInRange(Mask[1], 0, 2) && isUndefOrInRange(Mask[2], 2, 4) && |
35897 | isUndefOrInRange(Mask[3], 2, 4) && |
35898 | (Mask[0] < 0 || Mask[2] < 0 || Mask[0] == (Mask[2] % 2)) && |
35899 | (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2)); |
35900 | |
35901 | if (!isAnyZero(Mask) && !PreferPERMQ) { |
35902 | if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) |
35903 | return SDValue(); |
35904 | MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64); |
35905 | if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG)) |
35906 | return DAG.getBitcast(RootVT, V); |
35907 | } |
35908 | } |
35909 | |
35910 | |
35911 | if (RootVT.is256BitVector() && NumBaseMaskElts == 2) { |
35912 | |
35913 | |
35914 | |
35915 | if (isUndefOrZero(BaseMask[1])) { |
35916 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
35917 | return SDValue(); |
35918 | assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle"); |
35919 | Res = CanonicalizeShuffleInput(RootVT, V1); |
35920 | Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL); |
35921 | return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG, |
35922 | DL, 256); |
35923 | } |
35924 | |
35925 | |
35926 | |
35927 | |
35928 | if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) { |
35929 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
35930 | return SDValue(); |
35931 | Res = CanonicalizeShuffleInput(RootVT, V1); |
35932 | Res = extractSubVector(Res, 0, DAG, DL, 128); |
35933 | return concatSubVectors(Res, Res, DAG, DL); |
35934 | } |
35935 | |
35936 | if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) |
35937 | return SDValue(); |
35938 | |
35939 | |
35940 | |
35941 | |
35942 | if (UnaryShuffle && |
35943 | !(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) && |
35944 | (OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) { |
35945 | unsigned PermMask = 0; |
35946 | PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); |
35947 | PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4); |
35948 | return DAG.getNode( |
35949 | X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1), |
35950 | DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
35951 | } |
35952 | |
35953 | if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) |
35954 | return SDValue(); |
35955 | |
35956 | |
35957 | if (!UnaryShuffle && !IsMaskedShuffle) { |
35958 | assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) && |
35959 | "Unexpected shuffle sentinel value"); |
35960 | |
35961 | if (!((BaseMask[0] == 0 && BaseMask[1] == 3) || |
35962 | (BaseMask[0] == 2 && BaseMask[1] == 1))) { |
35963 | unsigned PermMask = 0; |
35964 | PermMask |= ((BaseMask[0] & 3) << 0); |
35965 | PermMask |= ((BaseMask[1] & 3) << 4); |
35966 | SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2; |
35967 | SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2; |
35968 | return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT, |
35969 | CanonicalizeShuffleInput(RootVT, LHS), |
35970 | CanonicalizeShuffleInput(RootVT, RHS), |
35971 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
35972 | } |
35973 | } |
35974 | } |
35975 | |
35976 | |
35977 | |
35978 | SmallVector<int, 64> Mask; |
35979 | if (BaseMaskEltSizeInBits > 64) { |
35980 | assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size"); |
35981 | int MaskScale = BaseMaskEltSizeInBits / 64; |
35982 | narrowShuffleMaskElts(MaskScale, BaseMask, Mask); |
35983 | } else { |
35984 | Mask.assign(BaseMask.begin(), BaseMask.end()); |
35985 | } |
35986 | |
35987 | |
35988 | |
35989 | |
35990 | if (IsMaskedShuffle && NumRootElts > Mask.size()) { |
35991 | assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size"); |
35992 | int MaskScale = NumRootElts / Mask.size(); |
35993 | SmallVector<int, 64> ScaledMask; |
35994 | narrowShuffleMaskElts(MaskScale, Mask, ScaledMask); |
35995 | Mask = std::move(ScaledMask); |
35996 | } |
35997 | |
35998 | unsigned NumMaskElts = Mask.size(); |
35999 | unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts; |
36000 | |
36001 | |
36002 | FloatDomain &= (32 <= MaskEltSizeInBits); |
36003 | MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits) |
36004 | : MVT::getIntegerVT(MaskEltSizeInBits); |
36005 | MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts); |
36006 | |
36007 | |
36008 | if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) |
36009 | return SDValue(); |
36010 | |
36011 | |
36012 | MVT ShuffleSrcVT, ShuffleVT; |
36013 | unsigned Shuffle, PermuteImm; |
36014 | |
36015 | |
36016 | |
36017 | |
36018 | bool AllowFloatDomain = FloatDomain || (Depth >= 3); |
36019 | bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && |
36020 | (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); |
36021 | |
36022 | |
36023 | APInt KnownUndef, KnownZero; |
36024 | resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); |
36025 | APInt Zeroable = KnownUndef | KnownZero; |
36026 | |
36027 | if (UnaryShuffle) { |
36028 | |
36029 | |
36030 | if ((Subtarget.hasAVX2() || |
36031 | (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) && |
36032 | (!IsMaskedShuffle || NumRootElts == NumMaskElts)) { |
36033 | if (isUndefOrEqual(Mask, 0)) { |
36034 | if (V1.getValueType() == MaskVT && |
36035 | V1.getOpcode() == ISD::SCALAR_TO_VECTOR && |
36036 | MayFoldLoad(V1.getOperand(0))) { |
36037 | if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) |
36038 | return SDValue(); |
36039 | Res = V1.getOperand(0); |
36040 | Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); |
36041 | return DAG.getBitcast(RootVT, Res); |
36042 | } |
36043 | if (Subtarget.hasAVX2()) { |
36044 | if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) |
36045 | return SDValue(); |
36046 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36047 | Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); |
36048 | return DAG.getBitcast(RootVT, Res); |
36049 | } |
36050 | } |
36051 | } |
36052 | |
36053 | SDValue NewV1 = V1; |
36054 | if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, |
36055 | DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, |
36056 | ShuffleVT) && |
36057 | (!IsMaskedShuffle || |
36058 | (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36059 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36060 | return SDValue(); |
36061 | Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); |
36062 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); |
36063 | return DAG.getBitcast(RootVT, Res); |
36064 | } |
36065 | |
36066 | if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, |
36067 | AllowIntDomain, Subtarget, Shuffle, ShuffleVT, |
36068 | PermuteImm) && |
36069 | (!IsMaskedShuffle || |
36070 | (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36071 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36072 | return SDValue(); |
36073 | Res = CanonicalizeShuffleInput(ShuffleVT, V1); |
36074 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, |
36075 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36076 | return DAG.getBitcast(RootVT, Res); |
36077 | } |
36078 | } |
36079 | |
36080 | |
36081 | |
36082 | |
36083 | if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 && |
36084 | Subtarget.hasSSE41() && |
36085 | !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) { |
36086 | if (MaskEltSizeInBits == 32) { |
36087 | SDValue SrcV1 = V1, SrcV2 = V2; |
36088 | if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, |
36089 | DAG) && |
36090 | SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) { |
36091 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) |
36092 | return SDValue(); |
36093 | Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, |
36094 | CanonicalizeShuffleInput(MVT::v4f32, SrcV1), |
36095 | CanonicalizeShuffleInput(MVT::v4f32, SrcV2), |
36096 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36097 | return DAG.getBitcast(RootVT, Res); |
36098 | } |
36099 | } |
36100 | if (MaskEltSizeInBits == 64 && |
36101 | isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) && |
36102 | V2.getOpcode() == ISD::SCALAR_TO_VECTOR && |
36103 | V2.getScalarValueSizeInBits() <= 32) { |
36104 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) |
36105 | return SDValue(); |
36106 | PermuteImm = (2 << 4) | (0 << 0); |
36107 | Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, |
36108 | CanonicalizeShuffleInput(MVT::v4f32, V1), |
36109 | CanonicalizeShuffleInput(MVT::v4f32, V2), |
36110 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36111 | return DAG.getBitcast(RootVT, Res); |
36112 | } |
36113 | } |
36114 | |
36115 | SDValue NewV1 = V1; |
36116 | SDValue NewV2 = V2; |
36117 | if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, |
36118 | NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, |
36119 | ShuffleVT, UnaryShuffle) && |
36120 | (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36121 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36122 | return SDValue(); |
36123 | NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); |
36124 | NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2); |
36125 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); |
36126 | return DAG.getBitcast(RootVT, Res); |
36127 | } |
36128 | |
36129 | NewV1 = V1; |
36130 | NewV2 = V2; |
36131 | if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, |
36132 | AllowIntDomain, NewV1, NewV2, DL, DAG, |
36133 | Subtarget, Shuffle, ShuffleVT, PermuteImm) && |
36134 | (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
36135 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
36136 | return SDValue(); |
36137 | NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1); |
36138 | NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2); |
36139 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, |
36140 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
36141 | return DAG.getBitcast(RootVT, Res); |
36142 | } |
36143 | |
36144 | |
36145 | MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); |
36146 | IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); |
36147 | |
36148 | |
36149 | if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { |
36150 | uint64_t BitLen, BitIdx; |
36151 | if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, |
36152 | Zeroable)) { |
36153 | if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) |
36154 | return SDValue(); |
36155 | V1 = CanonicalizeShuffleInput(IntMaskVT, V1); |
36156 | Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, |
36157 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
36158 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
36159 | return DAG.getBitcast(RootVT, Res); |
36160 | } |
36161 | |
36162 | if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { |
36163 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) |
36164 | return SDValue(); |
36165 | V1 = CanonicalizeShuffleInput(IntMaskVT, V1); |
36166 | V2 = CanonicalizeShuffleInput(IntMaskVT, V2); |
36167 | Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, |
36168 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
36169 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
36170 | return DAG.getBitcast(RootVT, Res); |
36171 | } |
36172 | } |
36173 | |
36174 | |
36175 | if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.hasAVX512()) { |
36176 | |
36177 | if (matchShuffleAsVTRUNC(ShuffleSrcVT, ShuffleVT, IntMaskVT, Mask, Zeroable, |
36178 | Subtarget)) { |
36179 | bool IsTRUNCATE = ShuffleVT.getVectorNumElements() == |
36180 | ShuffleSrcVT.getVectorNumElements(); |
36181 | unsigned Opc = |
36182 | IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC; |
36183 | if (Depth == 0 && Root.getOpcode() == Opc) |
36184 | return SDValue(); |
36185 | V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); |
36186 | Res = DAG.getNode(Opc, DL, ShuffleVT, V1); |
36187 | if (ShuffleVT.getSizeInBits() < RootSizeInBits) |
36188 | Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits); |
36189 | return DAG.getBitcast(RootVT, Res); |
36190 | } |
36191 | |
36192 | |
36193 | if (RootSizeInBits < 512 && |
36194 | ((RootVT.is256BitVector() && Subtarget.useAVX512Regs()) || |
36195 | (RootVT.is128BitVector() && Subtarget.hasVLX())) && |
36196 | (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) && |
36197 | isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) { |
36198 | if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE) |
36199 | return SDValue(); |
36200 | ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); |
36201 | ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2); |
36202 | V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); |
36203 | V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2); |
36204 | ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); |
36205 | ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts); |
36206 | Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2); |
36207 | Res = DAG.getNode(ISD::TRUNCATE, DL, IntMaskVT, Res); |
36208 | return DAG.getBitcast(RootVT, Res); |
36209 | } |
36210 | } |
36211 | |
36212 | |
36213 | |
36214 | if (Depth < 1) |
36215 | return SDValue(); |
36216 | |
36217 | |
36218 | int VariableCrossLaneShuffleDepth = |
36219 | Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2; |
36220 | int VariablePerLaneShuffleDepth = |
36221 | Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2; |
36222 | AllowVariableCrossLaneMask &= |
36223 | (Depth >= VariableCrossLaneShuffleDepth) || HasVariableMask; |
36224 | AllowVariablePerLaneMask &= |
36225 | (Depth >= VariablePerLaneShuffleDepth) || HasVariableMask; |
36226 | |
36227 | |
36228 | bool AllowBWIVPERMV3 = |
36229 | (Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask); |
36230 | |
36231 | bool MaskContainsZeros = isAnyZero(Mask); |
36232 | |
36233 | if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) { |
36234 | |
36235 | if (UnaryShuffle && AllowVariableCrossLaneMask && !MaskContainsZeros) { |
36236 | if (Subtarget.hasAVX2() && |
36237 | (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) { |
36238 | SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); |
36239 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36240 | Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); |
36241 | return DAG.getBitcast(RootVT, Res); |
36242 | } |
36243 | |
36244 | if ((Subtarget.hasAVX512() && |
36245 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
36246 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || |
36247 | (Subtarget.hasBWI() && |
36248 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
36249 | (Subtarget.hasVBMI() && |
36250 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) { |
36251 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36252 | V2 = DAG.getUNDEF(MaskVT); |
36253 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36254 | return DAG.getBitcast(RootVT, Res); |
36255 | } |
36256 | } |
36257 | |
36258 | |
36259 | |
36260 | if (UnaryShuffle && AllowVariableCrossLaneMask && |
36261 | ((Subtarget.hasAVX512() && |
36262 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
36263 | MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || |
36264 | MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || |
36265 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || |
36266 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
36267 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
36268 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
36269 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { |
36270 | |
36271 | for (unsigned i = 0; i != NumMaskElts; ++i) |
36272 | if (Mask[i] == SM_SentinelZero) |
36273 | Mask[i] = NumMaskElts + i; |
36274 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36275 | V2 = getZeroVector(MaskVT, Subtarget, DAG, DL); |
36276 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36277 | return DAG.getBitcast(RootVT, Res); |
36278 | } |
36279 | |
36280 | |
36281 | |
36282 | if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( |
36283 | Inputs, Root, BaseMask, Depth, HasVariableMask, |
36284 | AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, |
36285 | Subtarget)) |
36286 | return WideShuffle; |
36287 | |
36288 | |
36289 | |
36290 | if (AllowVariableCrossLaneMask && !MaskContainsZeros && |
36291 | ((Subtarget.hasAVX512() && |
36292 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
36293 | MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || |
36294 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || |
36295 | MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || |
36296 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
36297 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
36298 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
36299 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { |
36300 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36301 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
36302 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36303 | return DAG.getBitcast(RootVT, Res); |
36304 | } |
36305 | return SDValue(); |
36306 | } |
36307 | |
36308 | |
36309 | |
36310 | if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask && |
36311 | isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) && |
36312 | DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) { |
36313 | APInt Zero = APInt::getNullValue(MaskEltSizeInBits); |
36314 | APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits); |
36315 | APInt UndefElts(NumMaskElts, 0); |
36316 | SmallVector<APInt, 64> EltBits(NumMaskElts, Zero); |
36317 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
36318 | int M = Mask[i]; |
36319 | if (M == SM_SentinelUndef) { |
36320 | UndefElts.setBit(i); |
36321 | continue; |
36322 | } |
36323 | if (M == SM_SentinelZero) |
36324 | continue; |
36325 | EltBits[i] = AllOnes; |
36326 | } |
36327 | SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL); |
36328 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36329 | unsigned AndOpcode = |
36330 | MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); |
36331 | Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); |
36332 | return DAG.getBitcast(RootVT, Res); |
36333 | } |
36334 | |
36335 | |
36336 | |
36337 | |
36338 | if (UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros && |
36339 | ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) || |
36340 | (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) { |
36341 | SmallVector<SDValue, 16> VPermIdx; |
36342 | for (int M : Mask) { |
36343 | SDValue Idx = |
36344 | M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); |
36345 | VPermIdx.push_back(Idx); |
36346 | } |
36347 | SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); |
36348 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
36349 | Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); |
36350 | return DAG.getBitcast(RootVT, Res); |
36351 | } |
36352 | |
36353 | |
36354 | |
36355 | if (AllowVariablePerLaneMask && Subtarget.hasXOP() && |
36356 | (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 || |
36357 | MaskVT == MVT::v8f32)) { |
36358 | |
36359 | |
36360 | |
36361 | |
36362 | unsigned NumLanes = MaskVT.getSizeInBits() / 128; |
36363 | unsigned NumEltsPerLane = NumMaskElts / NumLanes; |
36364 | SmallVector<int, 8> VPerm2Idx; |
36365 | unsigned M2ZImm = 0; |
36366 | for (int M : Mask) { |
36367 | if (M == SM_SentinelUndef) { |
36368 | VPerm2Idx.push_back(-1); |
36369 | continue; |
36370 | } |
36371 | if (M == SM_SentinelZero) { |
36372 | M2ZImm = 2; |
36373 | VPerm2Idx.push_back(8); |
36374 | continue; |
36375 | } |
36376 | int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane); |
36377 | Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index); |
36378 | VPerm2Idx.push_back(Index); |
36379 | } |
36380 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36381 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
36382 | SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); |
36383 | Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, |
36384 | DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); |
36385 | return DAG.getBitcast(RootVT, Res); |
36386 | } |
36387 | |
36388 | |
36389 | |
36390 | |
36391 | |
36392 | |
36393 | if (UnaryShuffle && AllowVariablePerLaneMask && |
36394 | ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) || |
36395 | (RootVT.is256BitVector() && Subtarget.hasAVX2()) || |
36396 | (RootVT.is512BitVector() && Subtarget.hasBWI()))) { |
36397 | SmallVector<SDValue, 16> PSHUFBMask; |
36398 | int NumBytes = RootVT.getSizeInBits() / 8; |
36399 | int Ratio = NumBytes / NumMaskElts; |
36400 | for (int i = 0; i < NumBytes; ++i) { |
36401 | int M = Mask[i / Ratio]; |
36402 | if (M == SM_SentinelUndef) { |
36403 | PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); |
36404 | continue; |
36405 | } |
36406 | if (M == SM_SentinelZero) { |
36407 | PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); |
36408 | continue; |
36409 | } |
36410 | M = Ratio * M + i % Ratio; |
36411 | assert((M / 16) == (i / 16) && "Lane crossing detected"); |
36412 | PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); |
36413 | } |
36414 | MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); |
36415 | Res = CanonicalizeShuffleInput(ByteVT, V1); |
36416 | SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask); |
36417 | Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); |
36418 | return DAG.getBitcast(RootVT, Res); |
36419 | } |
36420 | |
36421 | |
36422 | |
36423 | |
36424 | if (AllowVariablePerLaneMask && RootVT.is128BitVector() && |
36425 | Subtarget.hasXOP()) { |
36426 | |
36427 | |
36428 | |
36429 | SmallVector<SDValue, 16> VPPERMMask; |
36430 | int NumBytes = 16; |
36431 | int Ratio = NumBytes / NumMaskElts; |
36432 | for (int i = 0; i < NumBytes; ++i) { |
36433 | int M = Mask[i / Ratio]; |
36434 | if (M == SM_SentinelUndef) { |
36435 | VPPERMMask.push_back(DAG.getUNDEF(MVT::i8)); |
36436 | continue; |
36437 | } |
36438 | if (M == SM_SentinelZero) { |
36439 | VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); |
36440 | continue; |
36441 | } |
36442 | M = Ratio * M + i % Ratio; |
36443 | VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8)); |
36444 | } |
36445 | MVT ByteVT = MVT::v16i8; |
36446 | V1 = CanonicalizeShuffleInput(ByteVT, V1); |
36447 | V2 = CanonicalizeShuffleInput(ByteVT, V2); |
36448 | SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask); |
36449 | Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); |
36450 | return DAG.getBitcast(RootVT, Res); |
36451 | } |
36452 | |
36453 | |
36454 | |
36455 | if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( |
36456 | Inputs, Root, BaseMask, Depth, HasVariableMask, |
36457 | AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget)) |
36458 | return WideShuffle; |
36459 | |
36460 | |
36461 | |
36462 | if (!UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros && |
36463 | ((Subtarget.hasAVX512() && |
36464 | (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 || |
36465 | MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 || |
36466 | MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || |
36467 | MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || |
36468 | MaskVT == MVT::v16i32)) || |
36469 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
36470 | (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || |
36471 | MaskVT == MVT::v32i16)) || |
36472 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
36473 | (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || |
36474 | MaskVT == MVT::v64i8)))) { |
36475 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
36476 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
36477 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
36478 | return DAG.getBitcast(RootVT, Res); |
36479 | } |
36480 | |
36481 | |
36482 | return SDValue(); |
36483 | } |
36484 | |
36485 | |
36486 | |
36487 | |
36488 | |
36489 | |
36490 | |
36491 | |
36492 | |
36493 | static SDValue combineX86ShuffleChainWithExtract( |
36494 | ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, |
36495 | bool HasVariableMask, bool AllowVariableCrossLaneMask, |
36496 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
36497 | const X86Subtarget &Subtarget) { |
36498 | unsigned NumMaskElts = BaseMask.size(); |
36499 | unsigned NumInputs = Inputs.size(); |
36500 | if (NumInputs == 0) |
36501 | return SDValue(); |
36502 | |
36503 | EVT RootVT = Root.getValueType(); |
36504 | unsigned RootSizeInBits = RootVT.getSizeInBits(); |
36505 | assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask"); |
36506 | |
36507 | SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end()); |
36508 | SmallVector<unsigned, 4> Offsets(NumInputs, 0); |
36509 | |
36510 | |
36511 | |
36512 | unsigned WideSizeInBits = RootSizeInBits; |
36513 | for (unsigned i = 0; i != NumInputs; ++i) { |
36514 | SDValue &Src = WideInputs[i]; |
36515 | unsigned &Offset = Offsets[i]; |
36516 | Src = peekThroughBitcasts(Src); |
36517 | EVT BaseVT = Src.getValueType(); |
36518 | while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
36519 | Offset += Src.getConstantOperandVal(1); |
36520 | Src = Src.getOperand(0); |
36521 | } |
36522 | WideSizeInBits = std::max(WideSizeInBits, |
36523 | (unsigned)Src.getValueSizeInBits()); |
36524 | assert((Offset % BaseVT.getVectorNumElements()) == 0 && |
36525 | "Unexpected subvector extraction"); |
36526 | Offset /= BaseVT.getVectorNumElements(); |
36527 | Offset *= NumMaskElts; |
36528 | } |
36529 | |
36530 | |
36531 | |
36532 | if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; })) |
36533 | return SDValue(); |
36534 | |
36535 | unsigned Scale = WideSizeInBits / RootSizeInBits; |
36536 | assert((WideSizeInBits % RootSizeInBits) == 0 && |
36537 | "Unexpected subvector extraction"); |
36538 | |
36539 | |
36540 | |
36541 | |
36542 | EVT WideSVT = WideInputs[0].getValueType().getScalarType(); |
36543 | if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) { |
36544 | return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) || |
36545 | Op.getValueType().getScalarType() != WideSVT; |
36546 | })) |
36547 | return SDValue(); |
36548 | |
36549 | for (SDValue &NewInput : WideInputs) { |
36550 | assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 && |
36551 | "Shuffle vector size mismatch"); |
36552 | if (WideSizeInBits > NewInput.getValueSizeInBits()) |
36553 | NewInput = widenSubVector(NewInput, false, Subtarget, DAG, |
36554 | SDLoc(NewInput), WideSizeInBits); |
36555 | assert(WideSizeInBits == NewInput.getValueSizeInBits() && |
36556 | "Unexpected subvector extraction"); |
36557 | } |
36558 | |
36559 | |
36560 | for (unsigned i = 1; i != NumInputs; ++i) |
36561 | Offsets[i] += i * Scale * NumMaskElts; |
36562 | |
36563 | SmallVector<int, 64> WideMask(BaseMask.begin(), BaseMask.end()); |
36564 | for (int &M : WideMask) { |
36565 | if (M < 0) |
36566 | continue; |
36567 | M = (M % NumMaskElts) + Offsets[M / NumMaskElts]; |
36568 | } |
36569 | WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef); |
36570 | |
36571 | |
36572 | resolveTargetShuffleInputsAndMask(WideInputs, WideMask); |
36573 | assert(!WideInputs.empty() && "Shuffle with no inputs detected"); |
36574 | |
36575 | if (WideInputs.size() > 2) |
36576 | return SDValue(); |
36577 | |
36578 | |
36579 | Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; }); |
36580 | |
36581 | |
36582 | |
36583 | SDValue WideRoot = WideInputs[0]; |
36584 | if (SDValue WideShuffle = |
36585 | combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth, |
36586 | HasVariableMask, AllowVariableCrossLaneMask, |
36587 | AllowVariablePerLaneMask, DAG, Subtarget)) { |
36588 | WideShuffle = |
36589 | extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits); |
36590 | return DAG.getBitcast(RootVT, WideShuffle); |
36591 | } |
36592 | return SDValue(); |
36593 | } |
36594 | |
36595 | |
36596 | |
36597 | static SDValue canonicalizeShuffleMaskWithHorizOp( |
36598 | MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask, |
36599 | unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG, |
36600 | const X86Subtarget &Subtarget) { |
36601 | if (Mask.empty() || Ops.empty()) |
36602 | return SDValue(); |
36603 | |
36604 | SmallVector<SDValue> BC; |
36605 | for (SDValue Op : Ops) |
36606 | BC.push_back(peekThroughBitcasts(Op)); |
36607 | |
36608 | |
36609 | SDValue BC0 = BC[0]; |
36610 | EVT VT0 = BC0.getValueType(); |
36611 | unsigned Opcode0 = BC0.getOpcode(); |
36612 | if (VT0.getSizeInBits() != RootSizeInBits || llvm::any_of(BC, [&](SDValue V) { |
36613 | return V.getOpcode() != Opcode0 || V.getValueType() != VT0; |
36614 | })) |
36615 | return SDValue(); |
36616 | |
36617 | bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || |
36618 | Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB); |
36619 | bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS); |
36620 | if (!isHoriz && !isPack) |
36621 | return SDValue(); |
36622 | |
36623 | |
36624 | bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) { |
36625 | return Op.hasOneUse() && |
36626 | peekThroughBitcasts(Op) == peekThroughOneUseBitcasts(Op); |
36627 | }); |
36628 | |
36629 | int NumElts = VT0.getVectorNumElements(); |
36630 | int NumLanes = VT0.getSizeInBits() / 128; |
36631 | int NumEltsPerLane = NumElts / NumLanes; |
36632 | int NumHalfEltsPerLane = NumEltsPerLane / 2; |
36633 | MVT SrcVT = BC0.getOperand(0).getSimpleValueType(); |
36634 | unsigned EltSizeInBits = RootSizeInBits / Mask.size(); |
36635 | |
36636 | if (NumEltsPerLane >= 4 && |
36637 | (isPack || shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget))) { |
36638 | SmallVector<int> LaneMask, ScaledMask; |
36639 | if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, LaneMask) && |
36640 | scaleShuffleElements(LaneMask, 4, ScaledMask)) { |
36641 | |
36642 | |
36643 | |
36644 | |
36645 | if (isHoriz) { |
36646 | |
36647 | auto GetHOpSrc = [&](int M) { |
36648 | if (M == SM_SentinelUndef) |
36649 | return DAG.getUNDEF(VT0); |
36650 | if (M == SM_SentinelZero) |
36651 | return getZeroVector(VT0.getSimpleVT(), Subtarget, DAG, DL); |
36652 | SDValue Src0 = BC[M / 4]; |
36653 | SDValue Src1 = Src0.getOperand((M % 4) >= 2); |
36654 | if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode())) |
36655 | return Src1.getOperand(M % 2); |
36656 | return SDValue(); |
36657 | }; |
36658 | SDValue M0 = GetHOpSrc(ScaledMask[0]); |
36659 | SDValue M1 = GetHOpSrc(ScaledMask[1]); |
36660 | SDValue M2 = GetHOpSrc(ScaledMask[2]); |
36661 | SDValue M3 = GetHOpSrc(ScaledMask[3]); |
36662 | if (M0 && M1 && M2 && M3) { |
36663 | SDValue LHS = DAG.getNode(Opcode0, DL, SrcVT, M0, M1); |
36664 | SDValue RHS = DAG.getNode(Opcode0, DL, SrcVT, M2, M3); |
36665 | return DAG.getNode(Opcode0, DL, VT0, LHS, RHS); |
36666 | } |
36667 | } |
36668 | |
36669 | if (Ops.size() >= 2) { |
36670 | SDValue LHS, RHS; |
36671 | auto GetHOpSrc = [&](int M, int &OutM) { |
36672 | |
36673 | if (M < 0) |
36674 | return M == SM_SentinelUndef; |
36675 | SDValue Src = BC[M / 4].getOperand((M % 4) >= 2); |
36676 | if (!LHS || LHS == Src) { |
36677 | LHS = Src; |
36678 | OutM = (M % 2); |
36679 | return true; |
36680 | } |
36681 | if (!RHS || RHS == Src) { |
36682 | RHS = Src; |
36683 | OutM = (M % 2) + 2; |
36684 | return true; |
36685 | } |
36686 | return false; |
36687 | }; |
36688 | int PostMask[4] = {-1, -1, -1, -1}; |
36689 | if (GetHOpSrc(ScaledMask[0], PostMask[0]) && |
36690 | GetHOpSrc(ScaledMask[1], PostMask[1]) && |
36691 | GetHOpSrc(ScaledMask[2], PostMask[2]) && |
36692 | GetHOpSrc(ScaledMask[3], PostMask[3])) { |
36693 | LHS = DAG.getBitcast(SrcVT, LHS); |
36694 | RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS); |
36695 | SDValue Res = DAG.getNode(Opcode0, DL, VT0, LHS, RHS); |
36696 | |
36697 | |
36698 | MVT ShuffleVT = MVT::getVectorVT(MVT::f32, RootSizeInBits / 32); |
36699 | Res = DAG.getBitcast(ShuffleVT, Res); |
36700 | return DAG.getNode(X86ISD::SHUFP, DL, ShuffleVT, Res, Res, |
36701 | getV4X86ShuffleImm8ForMask(PostMask, DL, DAG)); |
36702 | } |
36703 | } |
36704 | } |
36705 | } |
36706 | |
36707 | if (2 < Ops.size()) |
36708 | return SDValue(); |
36709 | |
36710 | SDValue BC1 = BC[BC.size() - 1]; |
36711 | if (Mask.size() == VT0.getVectorNumElements()) { |
36712 | |
36713 | |
36714 | |
36715 | if (Ops.size() == 2) { |
36716 | auto ContainsOps = [](SDValue HOp, SDValue Op) { |
36717 | return Op == HOp.getOperand(0) || Op == HOp.getOperand(1); |
36718 | }; |
36719 | |
36720 | if (ContainsOps(BC1, BC0.getOperand(0)) && |
36721 | ContainsOps(BC1, BC0.getOperand(1))) { |
36722 | ShuffleVectorSDNode::commuteMask(Mask); |
36723 | std::swap(Ops[0], Ops[1]); |
36724 | std::swap(BC0, BC1); |
36725 | } |
36726 | |
36727 | |
36728 | if (ContainsOps(BC0, BC1.getOperand(0)) && |
36729 | ContainsOps(BC0, BC1.getOperand(1))) { |
36730 | for (int &M : Mask) { |
36731 | if (M < NumElts) |
36732 | continue; |
36733 | int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0; |
36734 | M -= NumElts + (SubLane * NumHalfEltsPerLane); |
36735 | if (BC1.getOperand(SubLane) != BC0.getOperand(0)) |
36736 | M += NumHalfEltsPerLane; |
36737 | } |
36738 | } |
36739 | } |
36740 | |
36741 | |
36742 | for (int i = 0; i != NumElts; ++i) { |
36743 | int &M = Mask[i]; |
36744 | if (isUndefOrZero(M)) |
36745 | continue; |
36746 | if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) && |
36747 | (M % NumEltsPerLane) >= NumHalfEltsPerLane) |
36748 | M -= NumHalfEltsPerLane; |
36749 | if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) && |
36750 | (M % NumEltsPerLane) >= NumHalfEltsPerLane) |
36751 | M -= NumHalfEltsPerLane; |
36752 | } |
36753 | } |
36754 | |
36755 | |
36756 | |
36757 | |
36758 | SmallVector<int, 16> TargetMask128, WideMask128; |
36759 | if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, TargetMask128) && |
36760 | scaleShuffleElements(TargetMask128, 2, WideMask128)) { |
36761 | assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle"); |
36762 | bool SingleOp = (Ops.size() == 1); |
36763 | if (isPack || OneUseOps || |
36764 | shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) { |
36765 | SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1; |
36766 | SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1; |
36767 | Lo = Lo.getOperand(WideMask128[0] & 1); |
36768 | Hi = Hi.getOperand(WideMask128[1] & 1); |
36769 | if (SingleOp) { |
36770 | SDValue Undef = DAG.getUNDEF(SrcVT); |
36771 | SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL); |
36772 | Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo); |
36773 | Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi); |
36774 | Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo); |
36775 | Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi); |
36776 | } |
36777 | return DAG.getNode(Opcode0, DL, VT0, Lo, Hi); |
36778 | } |
36779 | } |
36780 | |
36781 | return SDValue(); |
36782 | } |
36783 | |
36784 | |
36785 | |
36786 | |
36787 | static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, |
36788 | ArrayRef<int> Mask, SDValue Root, |
36789 | bool HasVariableMask, |
36790 | SelectionDAG &DAG, |
36791 | const X86Subtarget &Subtarget) { |
36792 | MVT VT = Root.getSimpleValueType(); |
36793 | |
36794 | unsigned SizeInBits = VT.getSizeInBits(); |
36795 | unsigned NumMaskElts = Mask.size(); |
36796 | unsigned MaskSizeInBits = SizeInBits / NumMaskElts; |
36797 | unsigned NumOps = Ops.size(); |
36798 | |
36799 | |
36800 | bool OneUseConstantOp = false; |
36801 | SmallVector<APInt, 16> UndefEltsOps(NumOps); |
36802 | SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps); |
36803 | for (unsigned i = 0; i != NumOps; ++i) { |
36804 | SDValue SrcOp = Ops[i]; |
36805 | OneUseConstantOp |= SrcOp.hasOneUse(); |
36806 | if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i], |
36807 | RawBitsOps[i])) |
36808 | return SDValue(); |
36809 | } |
36810 | |
36811 | |
36812 | |
36813 | |
36814 | if (!OneUseConstantOp && !HasVariableMask) |
36815 | return SDValue(); |
36816 | |
36817 | |
36818 | SDLoc DL(Root); |
36819 | APInt UndefElts(NumMaskElts, 0); |
36820 | APInt ZeroElts(NumMaskElts, 0); |
36821 | APInt ConstantElts(NumMaskElts, 0); |
36822 | SmallVector<APInt, 8> ConstantBitData(NumMaskElts, |
36823 | APInt::getNullValue(MaskSizeInBits)); |
36824 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
36825 | int M = Mask[i]; |
36826 | if (M == SM_SentinelUndef) { |
36827 | UndefElts.setBit(i); |
36828 | continue; |
36829 | } else if (M == SM_SentinelZero) { |
36830 | ZeroElts.setBit(i); |
36831 | continue; |
36832 | } |
36833 | assert(0 <= M && M < (int)(NumMaskElts * NumOps)); |
36834 | |
36835 | unsigned SrcOpIdx = (unsigned)M / NumMaskElts; |
36836 | unsigned SrcMaskIdx = (unsigned)M % NumMaskElts; |
36837 | |
36838 | auto &SrcUndefElts = UndefEltsOps[SrcOpIdx]; |
36839 | if (SrcUndefElts[SrcMaskIdx]) { |
36840 | UndefElts.setBit(i); |
36841 | continue; |
36842 | } |
36843 | |
36844 | auto &SrcEltBits = RawBitsOps[SrcOpIdx]; |
36845 | APInt &Bits = SrcEltBits[SrcMaskIdx]; |
36846 | if (!Bits) { |
36847 | ZeroElts.setBit(i); |
36848 | continue; |
36849 | } |
36850 | |
36851 | ConstantElts.setBit(i); |
36852 | ConstantBitData[i] = Bits; |
36853 | } |
36854 | assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); |
36855 | |
36856 | |
36857 | if ((UndefElts | ZeroElts).isAllOnesValue()) |
36858 | return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); |
36859 | |
36860 | |
36861 | MVT MaskSVT; |
36862 | if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) |
36863 | MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits); |
36864 | else |
36865 | MaskSVT = MVT::getIntegerVT(MaskSizeInBits); |
36866 | |
36867 | MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); |
36868 | if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) |
36869 | return SDValue(); |
36870 | |
36871 | SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); |
36872 | return DAG.getBitcast(VT, CstOp); |
36873 | } |
36874 | |
36875 | namespace llvm { |
36876 | namespace X86 { |
36877 | enum { |
36878 | MaxShuffleCombineDepth = 8 |
36879 | }; |
36880 | } |
36881 | } |
36882 | |
36883 | |
36884 | |
36885 | |
36886 | |
36887 | |
36888 | |
36889 | |
36890 | |
36891 | |
36892 | |
36893 | |
36894 | |
36895 | |
36896 | |
36897 | |
36898 | |
36899 | |
36900 | |
36901 | |
36902 | |
36903 | |
36904 | |
36905 | |
36906 | |
36907 | |
36908 | |
36909 | |
36910 | |
36911 | |
36912 | static SDValue combineX86ShufflesRecursively( |
36913 | ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root, |
36914 | ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth, |
36915 | unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask, |
36916 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
36917 | const X86Subtarget &Subtarget) { |
36918 | assert(RootMask.size() > 0 && |
36919 | (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) && |
36920 | "Illegal shuffle root mask"); |
36921 | assert(Root.getSimpleValueType().isVector() && |
36922 | "Shuffles operate on vector types!"); |
36923 | unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits(); |
36924 | |
36925 | |
36926 | |
36927 | if (Depth >= MaxDepth) |
36928 | return SDValue(); |
36929 | |
36930 | |
36931 | SDValue Op = SrcOps[SrcOpIndex]; |
36932 | Op = peekThroughOneUseBitcasts(Op); |
36933 | |
36934 | EVT VT = Op.getValueType(); |
36935 | if (!VT.isVector() || !VT.isSimple()) |
36936 | return SDValue(); |
36937 | |
36938 | assert((RootSizeInBits % VT.getSizeInBits()) == 0 && |
36939 | "Can only combine shuffles upto size of the root op."); |
36940 | |
36941 | |
36942 | |
36943 | SmallVector<int, 64> OpMask; |
36944 | SmallVector<SDValue, 2> OpInputs; |
36945 | APInt OpUndef, OpZero; |
36946 | APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
36947 | bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode()); |
36948 | if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, |
36949 | OpZero, DAG, Depth, false)) |
36950 | return SDValue(); |
36951 | |
36952 | |
36953 | |
36954 | if (llvm::any_of(OpInputs, [VT](SDValue OpInput) { |
36955 | return OpInput.getValueSizeInBits() > VT.getSizeInBits(); |
36956 | })) |
36957 | return SDValue(); |
36958 | |
36959 | |
36960 | |
36961 | if (RootSizeInBits > VT.getSizeInBits()) { |
36962 | unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits(); |
36963 | unsigned OpMaskSize = OpMask.size(); |
36964 | if (OpInputs.size() > 1) { |
36965 | unsigned PaddedMaskSize = NumSubVecs * OpMaskSize; |
36966 | for (int &M : OpMask) { |
36967 | if (M < 0) |
36968 | continue; |
36969 | int EltIdx = M % OpMaskSize; |
36970 | int OpIdx = M / OpMaskSize; |
36971 | M = (PaddedMaskSize * OpIdx) + EltIdx; |
36972 | } |
36973 | } |
36974 | OpZero = OpZero.zext(NumSubVecs * OpMaskSize); |
36975 | OpUndef = OpUndef.zext(NumSubVecs * OpMaskSize); |
36976 | OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef); |
36977 | } |
36978 | |
36979 | SmallVector<int, 64> Mask; |
36980 | SmallVector<SDValue, 16> Ops; |
36981 | |
36982 | |
36983 | bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1); |
36984 | if (EmptyRoot) { |
36985 | |
36986 | |
36987 | bool ResolveKnownZeros = true; |
36988 | if (!OpZero.isNullValue()) { |
36989 | APInt UsedInputs = APInt::getNullValue(OpInputs.size()); |
36990 | for (int i = 0, e = OpMask.size(); i != e; ++i) { |
36991 | int M = OpMask[i]; |
36992 | if (OpUndef[i] || OpZero[i] || isUndefOrZero(M)) |
36993 | continue; |
36994 | UsedInputs.setBit(M / OpMask.size()); |
36995 | if (UsedInputs.isAllOnesValue()) { |
36996 | ResolveKnownZeros = false; |
36997 | break; |
36998 | } |
36999 | } |
37000 | } |
37001 | resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero, |
37002 | ResolveKnownZeros); |
37003 | |
37004 | Mask = OpMask; |
37005 | Ops.append(OpInputs.begin(), OpInputs.end()); |
37006 | } else { |
37007 | resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero); |
37008 | |
37009 | |
37010 | Ops.append(SrcOps.begin(), SrcOps.end()); |
37011 | |
37012 | auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int { |
37013 | |
37014 | SDValue InputBC = peekThroughBitcasts(Input); |
37015 | for (int i = 0, e = Ops.size(); i < e; ++i) |
37016 | if (InputBC == peekThroughBitcasts(Ops[i])) |
37017 | return i; |
37018 | |
37019 | if (InsertionPoint >= 0) { |
37020 | Ops[InsertionPoint] = Input; |
37021 | return InsertionPoint; |
37022 | } |
37023 | |
37024 | Ops.push_back(Input); |
37025 | return Ops.size() - 1; |
37026 | }; |
37027 | |
37028 | SmallVector<int, 2> OpInputIdx; |
37029 | for (SDValue OpInput : OpInputs) |
37030 | OpInputIdx.push_back( |
37031 | AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1)); |
37032 | |
37033 | assert(((RootMask.size() > OpMask.size() && |
37034 | RootMask.size() % OpMask.size() == 0) || |
37035 | (OpMask.size() > RootMask.size() && |
37036 | OpMask.size() % RootMask.size() == 0) || |
37037 | OpMask.size() == RootMask.size()) && |
37038 | "The smaller number of elements must divide the larger."); |
37039 | |
37040 | |
37041 | |
37042 | |
37043 | assert(isPowerOf2_32(RootMask.size()) && |
37044 | "Non-power-of-2 shuffle mask sizes"); |
37045 | assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); |
37046 | unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); |
37047 | unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); |
37048 | |
37049 | unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size()); |
37050 | unsigned RootRatio = |
37051 | std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2); |
37052 | unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2); |
37053 | assert((RootRatio == 1 || OpRatio == 1) && |
37054 | "Must not have a ratio for both incoming and op masks!"); |
37055 | |
37056 | assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); |
37057 | assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); |
37058 | assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); |
37059 | unsigned RootRatioLog2 = countTrailingZeros(RootRatio); |
37060 | unsigned OpRatioLog2 = countTrailingZeros(OpRatio); |
37061 | |
37062 | Mask.resize(MaskWidth, SM_SentinelUndef); |
37063 | |
37064 | |
37065 | |
37066 | |
37067 | |
37068 | for (unsigned i = 0; i < MaskWidth; ++i) { |
37069 | unsigned RootIdx = i >> RootRatioLog2; |
37070 | if (RootMask[RootIdx] < 0) { |
37071 | |
37072 | Mask[i] = RootMask[RootIdx]; |
37073 | continue; |
37074 | } |
37075 | |
37076 | unsigned RootMaskedIdx = |
37077 | RootRatio == 1 |
37078 | ? RootMask[RootIdx] |
37079 | : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); |
37080 | |
37081 | |
37082 | |
37083 | if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) || |
37084 | (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) { |
37085 | Mask[i] = RootMaskedIdx; |
37086 | continue; |
37087 | } |
37088 | |
37089 | RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); |
37090 | unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; |
37091 | if (OpMask[OpIdx] < 0) { |
37092 | |
37093 | |
37094 | Mask[i] = OpMask[OpIdx]; |
37095 | continue; |
37096 | } |
37097 | |
37098 | |
37099 | unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx] |
37100 | : (OpMask[OpIdx] << OpRatioLog2) + |
37101 | (RootMaskedIdx & (OpRatio - 1)); |
37102 | |
37103 | OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); |
37104 | int InputIdx = OpMask[OpIdx] / (int)OpMask.size(); |
37105 | assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input"); |
37106 | OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth; |
37107 | |
37108 | Mask[i] = OpMaskedIdx; |
37109 | } |
37110 | } |
37111 | |
37112 | |
37113 | resolveTargetShuffleInputsAndMask(Ops, Mask); |
37114 | |
37115 | |
37116 | if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) |
37117 | return DAG.getUNDEF(Root.getValueType()); |
37118 | if (all_of(Mask, [](int Idx) { return Idx < 0; })) |
37119 | return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, |
37120 | SDLoc(Root)); |
37121 | if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) && |
37122 | none_of(Mask, [](int M) { return M == SM_SentinelZero; })) |
37123 | return getOnesVector(Root.getValueType(), DAG, SDLoc(Root)); |
37124 | |
37125 | assert(!Ops.empty() && "Shuffle with no inputs detected"); |
37126 | HasVariableMask |= IsOpVariableMask; |
37127 | |
37128 | |
37129 | SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(), |
37130 | SrcNodes.end()); |
37131 | CombinedNodes.push_back(Op.getNode()); |
37132 | |
37133 | |
37134 | |
37135 | |
37136 | |
37137 | |
37138 | |
37139 | |
37140 | if (Ops.size() < (MaxDepth - Depth)) { |
37141 | for (int i = 0, e = Ops.size(); i < e; ++i) { |
37142 | |
37143 | |
37144 | SmallVector<int, 64> ResolvedMask = Mask; |
37145 | if (EmptyRoot) |
37146 | resolveTargetShuffleFromZeroables(ResolvedMask, OpUndef, OpZero); |
37147 | bool AllowCrossLaneVar = false; |
37148 | bool AllowPerLaneVar = false; |
37149 | if (Ops[i].getNode()->hasOneUse() || |
37150 | SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) { |
37151 | AllowCrossLaneVar = AllowVariableCrossLaneMask; |
37152 | AllowPerLaneVar = AllowVariablePerLaneMask; |
37153 | } |
37154 | if (SDValue Res = combineX86ShufflesRecursively( |
37155 | Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth, |
37156 | HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG, |
37157 | Subtarget)) |
37158 | return Res; |
37159 | } |
37160 | } |
37161 | |
37162 | |
37163 | if (SDValue Cst = combineX86ShufflesConstants( |
37164 | Ops, Mask, Root, HasVariableMask, DAG, Subtarget)) |
37165 | return Cst; |
37166 | |
37167 | |
37168 | |
37169 | if (Depth == 0 && llvm::all_of(Ops, [&](SDValue Op) { |
37170 | APInt UndefElts; |
37171 | SmallVector<APInt> RawBits; |
37172 | unsigned EltSizeInBits = RootSizeInBits / Mask.size(); |
37173 | return getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, |
37174 | RawBits); |
37175 | })) { |
37176 | return SDValue(); |
37177 | } |
37178 | |
37179 | |
37180 | |
37181 | if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp( |
37182 | Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget)) |
37183 | return DAG.getBitcast(Root.getValueType(), HOp); |
37184 | |
37185 | |
37186 | if (any_of(Ops, [RootSizeInBits](SDValue Op) { |
37187 | return Op.getValueSizeInBits() < RootSizeInBits; |
37188 | })) { |
37189 | for (SDValue &Op : Ops) |
37190 | if (Op.getValueSizeInBits() < RootSizeInBits) |
37191 | Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op), |
37192 | RootSizeInBits); |
37193 | |
37194 | resolveTargetShuffleInputsAndMask(Ops, Mask); |
37195 | } |
37196 | |
37197 | |
37198 | if (Ops.size() <= 2) { |
37199 | |
37200 | |
37201 | |
37202 | |
37203 | |
37204 | while (Mask.size() > 1) { |
37205 | SmallVector<int, 64> WidenedMask; |
37206 | if (!canWidenShuffleElements(Mask, WidenedMask)) |
37207 | break; |
37208 | Mask = std::move(WidenedMask); |
37209 | } |
37210 | |
37211 | |
37212 | |
37213 | if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) { |
37214 | ShuffleVectorSDNode::commuteMask(Mask); |
37215 | std::swap(Ops[0], Ops[1]); |
37216 | } |
37217 | |
37218 | |
37219 | return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, |
37220 | AllowVariableCrossLaneMask, |
37221 | AllowVariablePerLaneMask, DAG, Subtarget); |
37222 | } |
37223 | |
37224 | |
37225 | |
37226 | return combineX86ShuffleChainWithExtract( |
37227 | Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask, |
37228 | AllowVariablePerLaneMask, DAG, Subtarget); |
37229 | } |
37230 | |
37231 | |
37232 | static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, |
37233 | const X86Subtarget &Subtarget) { |
37234 | return combineX86ShufflesRecursively( |
37235 | {Op}, 0, Op, {0}, {}, 0, X86::MaxShuffleCombineDepth, |
37236 | false, |
37237 | true, true, DAG, |
37238 | Subtarget); |
37239 | } |
37240 | |
37241 | |
37242 | |
37243 | |
37244 | |
37245 | static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { |
37246 | MVT VT = N.getSimpleValueType(); |
37247 | SmallVector<int, 4> Mask; |
37248 | SmallVector<SDValue, 2> Ops; |
37249 | bool HaveMask = |
37250 | getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask); |
37251 | (void)HaveMask; |
37252 | assert(HaveMask); |
37253 | |
37254 | |
37255 | |
37256 | if (VT.getSizeInBits() > 128) { |
37257 | int LaneElts = 128 / VT.getScalarSizeInBits(); |
37258 | #ifndef NDEBUG |
37259 | for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) |
37260 | for (int j = 0; j < LaneElts; ++j) |
37261 | assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && |
37262 | "Mask doesn't repeat in high 128-bit lanes!"); |
37263 | #endif |
37264 | Mask.resize(LaneElts); |
37265 | } |
37266 | |
37267 | switch (N.getOpcode()) { |
37268 | case X86ISD::PSHUFD: |
37269 | return Mask; |
37270 | case X86ISD::PSHUFLW: |
37271 | Mask.resize(4); |
37272 | return Mask; |
37273 | case X86ISD::PSHUFHW: |
37274 | Mask.erase(Mask.begin(), Mask.begin() + 4); |
37275 | for (int &M : Mask) |
37276 | M -= 4; |
37277 | return Mask; |
37278 | default: |
37279 | llvm_unreachable("No valid shuffle instruction found!"); |
37280 | } |
37281 | } |
37282 | |
37283 | |
37284 | |
37285 | |
37286 | |
37287 | |
37288 | static SDValue |
37289 | combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, |
37290 | SelectionDAG &DAG) { |
37291 | assert(N.getOpcode() == X86ISD::PSHUFD && |
37292 | "Called with something other than an x86 128-bit half shuffle!"); |
37293 | SDLoc DL(N); |
37294 | |
37295 | |
37296 | |
37297 | |
37298 | SmallVector<SDValue, 8> Chain; |
37299 | SDValue V = N.getOperand(0); |
37300 | for (; V.hasOneUse(); V = V.getOperand(0)) { |
37301 | switch (V.getOpcode()) { |
37302 | default: |
37303 | return SDValue(); |
37304 | |
37305 | case ISD::BITCAST: |
37306 | |
37307 | |
37308 | continue; |
37309 | |
37310 | case X86ISD::PSHUFD: |
37311 | |
37312 | break; |
37313 | |
37314 | case X86ISD::PSHUFLW: |
37315 | |
37316 | |
37317 | if (Mask[0] != 0 || Mask[1] != 1 || |
37318 | !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4)) |
37319 | return SDValue(); |
37320 | |
37321 | Chain.push_back(V); |
37322 | continue; |
37323 | |
37324 | case X86ISD::PSHUFHW: |
37325 | |
37326 | |
37327 | if (Mask[2] != 2 || Mask[3] != 3 || |
37328 | !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2)) |
37329 | return SDValue(); |
37330 | |
37331 | Chain.push_back(V); |
37332 | continue; |
37333 | |
37334 | case X86ISD::UNPCKL: |
37335 | case X86ISD::UNPCKH: |
37336 | |
37337 | |
37338 | if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && |
37339 | V.getSimpleValueType().getVectorElementType() != MVT::i16) |
37340 | return SDValue(); |
37341 | |
37342 | |
37343 | unsigned CombineOp = |
37344 | V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; |
37345 | if (V.getOperand(0) != V.getOperand(1) || |
37346 | !V->isOnlyUserOf(V.getOperand(0).getNode())) |
37347 | return SDValue(); |
37348 | Chain.push_back(V); |
37349 | V = V.getOperand(0); |
37350 | do { |
37351 | switch (V.getOpcode()) { |
37352 | default: |
37353 | return SDValue(); |
37354 | |
37355 | case X86ISD::PSHUFLW: |
37356 | case X86ISD::PSHUFHW: |
37357 | if (V.getOpcode() == CombineOp) |
37358 | break; |
37359 | |
37360 | Chain.push_back(V); |
37361 | |
37362 | LLVM_FALLTHROUGH; |
37363 | case ISD::BITCAST: |
37364 | V = V.getOperand(0); |
37365 | continue; |
37366 | } |
37367 | break; |
37368 | } while (V.hasOneUse()); |
37369 | break; |
37370 | } |
37371 | |
37372 | break; |
37373 | } |
37374 | |
37375 | if (!V.hasOneUse()) |
37376 | |
37377 | return SDValue(); |
37378 | |
37379 | |
37380 | SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); |
37381 | for (int &M : Mask) |
37382 | M = VMask[M]; |
37383 | V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), |
37384 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
37385 | |
37386 | |
37387 | while (!Chain.empty()) { |
37388 | SDValue W = Chain.pop_back_val(); |
37389 | |
37390 | if (V.getValueType() != W.getOperand(0).getValueType()) |
37391 | V = DAG.getBitcast(W.getOperand(0).getValueType(), V); |
37392 | |
37393 | switch (W.getOpcode()) { |
37394 | default: |
37395 | llvm_unreachable("Only PSHUF and UNPCK instructions get here!"); |
37396 | |
37397 | case X86ISD::UNPCKL: |
37398 | case X86ISD::UNPCKH: |
37399 | V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V); |
37400 | break; |
37401 | |
37402 | case X86ISD::PSHUFD: |
37403 | case X86ISD::PSHUFLW: |
37404 | case X86ISD::PSHUFHW: |
37405 | V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1)); |
37406 | break; |
37407 | } |
37408 | } |
37409 | if (V.getValueType() != N.getValueType()) |
37410 | V = DAG.getBitcast(N.getValueType(), V); |
37411 | |
37412 | |
37413 | return V; |
37414 | } |
37415 | |
37416 | |
37417 | |
37418 | static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, |
37419 | SelectionDAG &DAG) { |
37420 | |
37421 | if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32) |
37422 | return SDValue(); |
37423 | |
37424 | |
37425 | auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) { |
37426 | if (V.getOpcode() != X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode())) |
37427 | return SDValue(); |
37428 | SDValue N0 = V.getOperand(0); |
37429 | SDValue N1 = V.getOperand(1); |
37430 | unsigned Imm = V.getConstantOperandVal(2); |
37431 | if (!MayFoldLoad(peekThroughOneUseBitcasts(N0)) || |
37432 | MayFoldLoad(peekThroughOneUseBitcasts(N1))) |
37433 | return SDValue(); |
37434 | Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4); |
37435 | return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0, |
37436 | DAG.getTargetConstant(Imm, DL, MVT::i8)); |
37437 | }; |
37438 | |
37439 | switch (N.getOpcode()) { |
37440 | case X86ISD::VPERMILPI: |
37441 | if (SDValue NewSHUFP = commuteSHUFP(N, N.getOperand(0))) { |
37442 | unsigned Imm = N.getConstantOperandVal(1); |
37443 | return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP, |
37444 | DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8)); |
37445 | } |
37446 | break; |
37447 | case X86ISD::SHUFP: { |
37448 | SDValue N0 = N.getOperand(0); |
37449 | SDValue N1 = N.getOperand(1); |
37450 | unsigned Imm = N.getConstantOperandVal(2); |
37451 | if (N0 == N1) { |
37452 | if (SDValue NewSHUFP = commuteSHUFP(N, N0)) |
37453 | return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP, |
37454 | DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8)); |
37455 | } else if (SDValue NewSHUFP = commuteSHUFP(N, N0)) { |
37456 | return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1, |
37457 | DAG.getTargetConstant(Imm ^ 0x0A, DL, MVT::i8)); |
37458 | } else if (SDValue NewSHUFP = commuteSHUFP(N, N1)) { |
37459 | return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP, |
37460 | DAG.getTargetConstant(Imm ^ 0xA0, DL, MVT::i8)); |
37461 | } |
37462 | break; |
37463 | } |
37464 | } |
37465 | |
37466 | return SDValue(); |
37467 | } |
37468 | |
37469 | |
37470 | static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG, |
37471 | const SDLoc &DL) { |
37472 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
37473 | EVT ShuffleVT = N.getValueType(); |
37474 | |
37475 | auto IsMergeableWithShuffle = [](SDValue Op) { |
37476 | |
37477 | |
37478 | |
37479 | |
37480 | return ISD::isBuildVectorAllOnes(Op.getNode()) || |
37481 | ISD::isBuildVectorAllZeros(Op.getNode()) || |
37482 | ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || |
37483 | ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) || |
37484 | (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()); |
37485 | }; |
37486 | auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) { |
37487 | |
37488 | |
37489 | return BinOp == ISD::AND || BinOp == ISD::OR || BinOp == ISD::XOR || |
37490 | (Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits()); |
37491 | }; |
37492 | |
37493 | unsigned Opc = N.getOpcode(); |
37494 | switch (Opc) { |
37495 | |
37496 | case X86ISD::PSHUFB: { |
37497 | |
37498 | SmallVector<int> Mask; |
37499 | SmallVector<SDValue> Ops; |
37500 | if (!getTargetShuffleMask(N.getNode(), ShuffleVT.getSimpleVT(), false, Ops, |
37501 | Mask)) |
37502 | break; |
37503 | LLVM_FALLTHROUGH; |
37504 | } |
37505 | case X86ISD::VBROADCAST: |
37506 | case X86ISD::MOVDDUP: |
37507 | case X86ISD::PSHUFD: |
37508 | case X86ISD::VPERMI: |
37509 | case X86ISD::VPERMILPI: { |
37510 | if (N.getOperand(0).getValueType() == ShuffleVT && |
37511 | N->isOnlyUserOf(N.getOperand(0).getNode())) { |
37512 | SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); |
37513 | unsigned SrcOpcode = N0.getOpcode(); |
37514 | if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) { |
37515 | SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); |
37516 | SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); |
37517 | if (IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op01)) { |
37518 | SDValue LHS, RHS; |
37519 | Op00 = DAG.getBitcast(ShuffleVT, Op00); |
37520 | Op01 = DAG.getBitcast(ShuffleVT, Op01); |
37521 | if (N.getNumOperands() == 2) { |
37522 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1)); |
37523 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1)); |
37524 | } else { |
37525 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00); |
37526 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01); |
37527 | } |
37528 | EVT OpVT = N0.getValueType(); |
37529 | return DAG.getBitcast(ShuffleVT, |
37530 | DAG.getNode(SrcOpcode, DL, OpVT, |
37531 | DAG.getBitcast(OpVT, LHS), |
37532 | DAG.getBitcast(OpVT, RHS))); |
37533 | } |
37534 | } |
37535 | } |
37536 | break; |
37537 | } |
37538 | |
37539 | case X86ISD::INSERTPS: { |
37540 | |
37541 | unsigned InsertPSMask = N.getConstantOperandVal(2); |
37542 | unsigned ZeroMask = InsertPSMask & 0xF; |
37543 | if (ZeroMask != 0) |
37544 | break; |
37545 | LLVM_FALLTHROUGH; |
37546 | } |
37547 | case X86ISD::MOVSD: |
37548 | case X86ISD::MOVSS: |
37549 | case X86ISD::BLENDI: |
37550 | case X86ISD::SHUFP: |
37551 | case X86ISD::UNPCKH: |
37552 | case X86ISD::UNPCKL: { |
37553 | if (N->isOnlyUserOf(N.getOperand(0).getNode()) && |
37554 | N->isOnlyUserOf(N.getOperand(1).getNode())) { |
37555 | SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); |
37556 | SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); |
37557 | unsigned SrcOpcode = N0.getOpcode(); |
37558 | if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode && |
37559 | IsSafeToMoveShuffle(N0, SrcOpcode) && |
37560 | IsSafeToMoveShuffle(N1, SrcOpcode)) { |
37561 | SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); |
37562 | SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0)); |
37563 | SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); |
37564 | SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1)); |
37565 | |
37566 | |
37567 | if (((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) || |
37568 | (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) || |
37569 | ((IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op10)) && |
37570 | (IsMergeableWithShuffle(Op01) || IsMergeableWithShuffle(Op11)))) { |
37571 | SDValue LHS, RHS; |
37572 | Op00 = DAG.getBitcast(ShuffleVT, Op00); |
37573 | Op10 = DAG.getBitcast(ShuffleVT, Op10); |
37574 | Op01 = DAG.getBitcast(ShuffleVT, Op01); |
37575 | Op11 = DAG.getBitcast(ShuffleVT, Op11); |
37576 | if (N.getNumOperands() == 3) { |
37577 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2)); |
37578 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11, N.getOperand(2)); |
37579 | } else { |
37580 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10); |
37581 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11); |
37582 | } |
37583 | EVT OpVT = N0.getValueType(); |
37584 | return DAG.getBitcast(ShuffleVT, |
37585 | DAG.getNode(SrcOpcode, DL, OpVT, |
37586 | DAG.getBitcast(OpVT, LHS), |
37587 | DAG.getBitcast(OpVT, RHS))); |
37588 | } |
37589 | } |
37590 | } |
37591 | break; |
37592 | } |
37593 | } |
37594 | return SDValue(); |
37595 | } |
37596 | |
37597 | |
37598 | static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, |
37599 | SelectionDAG &DAG, |
37600 | const SDLoc &DL) { |
37601 | assert(V.getOpcode() == X86ISD::VPERM2X128 && "Unknown lane shuffle"); |
37602 | |
37603 | MVT VT = V.getSimpleValueType(); |
37604 | SDValue Src0 = peekThroughBitcasts(V.getOperand(0)); |
37605 | SDValue Src1 = peekThroughBitcasts(V.getOperand(1)); |
37606 | unsigned SrcOpc0 = Src0.getOpcode(); |
37607 | unsigned SrcOpc1 = Src1.getOpcode(); |
37608 | EVT SrcVT0 = Src0.getValueType(); |
37609 | EVT SrcVT1 = Src1.getValueType(); |
37610 | |
37611 | if (!Src1.isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1)) |
37612 | return SDValue(); |
37613 | |
37614 | switch (SrcOpc0) { |
37615 | case X86ISD::MOVDDUP: { |
37616 | SDValue LHS = Src0.getOperand(0); |
37617 | SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0); |
37618 | SDValue Res = |
37619 | DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, V.getOperand(2)); |
37620 | Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res); |
37621 | return DAG.getBitcast(VT, Res); |
37622 | } |
37623 | case X86ISD::VPERMILPI: |
37624 | |
37625 | if (SrcVT0 == MVT::v4f64) { |
37626 | uint64_t Mask = Src0.getConstantOperandVal(1); |
37627 | if ((Mask & 0x3) != ((Mask >> 2) & 0x3)) |
37628 | break; |
37629 | } |
37630 | LLVM_FALLTHROUGH; |
37631 | case X86ISD::VSHLI: |
37632 | case X86ISD::VSRLI: |
37633 | case X86ISD::VSRAI: |
37634 | case X86ISD::PSHUFD: |
37635 | if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { |
37636 | SDValue LHS = Src0.getOperand(0); |
37637 | SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0); |
37638 | SDValue Res = DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, |
37639 | V.getOperand(2)); |
37640 | Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res, Src0.getOperand(1)); |
37641 | return DAG.getBitcast(VT, Res); |
37642 | } |
37643 | break; |
37644 | } |
37645 | |
37646 | return SDValue(); |
37647 | } |
37648 | |
37649 | |
37650 | static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, |
37651 | TargetLowering::DAGCombinerInfo &DCI, |
37652 | const X86Subtarget &Subtarget) { |
37653 | SDLoc DL(N); |
37654 | MVT VT = N.getSimpleValueType(); |
37655 | SmallVector<int, 4> Mask; |
37656 | unsigned Opcode = N.getOpcode(); |
37657 | |
37658 | if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) |
37659 | return R; |
37660 | |
37661 | if (SDValue R = canonicalizeShuffleWithBinOps(N, DAG, DL)) |
37662 | return R; |
37663 | |
37664 | |
37665 | switch (Opcode) { |
37666 | case X86ISD::MOVDDUP: { |
37667 | SDValue Src = N.getOperand(0); |
37668 | |
37669 | if (VT == MVT::v2f64 && Src.hasOneUse() && |
37670 | ISD::isNormalLoad(Src.getNode())) { |
37671 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
37672 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::f64, MVT::v2f64, DAG)) { |
37673 | SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad); |
37674 | DCI.CombineTo(N.getNode(), Movddup); |
37675 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
37676 | DCI.recursivelyDeleteUnusedNodes(LN); |
37677 | return N; |
37678 | } |
37679 | } |
37680 | |
37681 | return SDValue(); |
37682 | } |
37683 | case X86ISD::VBROADCAST: { |
37684 | SDValue Src = N.getOperand(0); |
37685 | SDValue BC = peekThroughBitcasts(Src); |
37686 | EVT SrcVT = Src.getValueType(); |
37687 | EVT BCVT = BC.getValueType(); |
37688 | |
37689 | |
37690 | |
37691 | if (isTargetShuffle(BC.getOpcode()) && |
37692 | VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { |
37693 | unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); |
37694 | SmallVector<int, 16> DemandedMask(BCVT.getVectorNumElements(), |
37695 | SM_SentinelUndef); |
37696 | for (unsigned i = 0; i != Scale; ++i) |
37697 | DemandedMask[i] = i; |
37698 | if (SDValue Res = combineX86ShufflesRecursively( |
37699 | {BC}, 0, BC, DemandedMask, {}, 0, |
37700 | X86::MaxShuffleCombineDepth, |
37701 | false, true, |
37702 | true, DAG, Subtarget)) |
37703 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
37704 | DAG.getBitcast(SrcVT, Res)); |
37705 | } |
37706 | |
37707 | |
37708 | |
37709 | if (Src.getOpcode() == ISD::BITCAST && |
37710 | SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() && |
37711 | DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && |
37712 | FixedVectorType::isValidElementType( |
37713 | BCVT.getScalarType().getTypeForEVT(*DAG.getContext()))) { |
37714 | EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(), |
37715 | VT.getVectorNumElements()); |
37716 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); |
37717 | } |
37718 | |
37719 | |
37720 | if (SrcVT.getSizeInBits() > 128) |
37721 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
37722 | extract128BitVector(Src, 0, DAG, DL)); |
37723 | |
37724 | |
37725 | if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR) |
37726 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); |
37727 | |
37728 | |
37729 | |
37730 | for (SDNode *User : Src->uses()) |
37731 | if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && |
37732 | Src == User->getOperand(0) && |
37733 | User->getValueSizeInBits(0).getFixedSize() > |
37734 | VT.getFixedSizeInBits()) { |
37735 | return extractSubVector(SDValue(User, 0), 0, DAG, DL, |
37736 | VT.getSizeInBits()); |
37737 | } |
37738 | |
37739 | |
37740 | |
37741 | if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && |
37742 | ISD::isNormalLoad(Src.getNode())) { |
37743 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
37744 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37745 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
37746 | SDValue BcastLd = |
37747 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
37748 | LN->getMemoryVT(), LN->getMemOperand()); |
37749 | |
37750 | bool NoReplaceExtract = Src.hasOneUse(); |
37751 | DCI.CombineTo(N.getNode(), BcastLd); |
37752 | if (NoReplaceExtract) { |
37753 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
37754 | DCI.recursivelyDeleteUnusedNodes(LN); |
37755 | } else { |
37756 | SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd, |
37757 | DAG.getIntPtrConstant(0, DL)); |
37758 | DCI.CombineTo(LN, Scl, BcastLd.getValue(1)); |
37759 | } |
37760 | return N; |
37761 | } |
37762 | |
37763 | |
37764 | |
37765 | if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE && |
37766 | Src.hasOneUse() && Src.getOperand(0).hasOneUse()) { |
37767 | assert(Subtarget.hasAVX2() && "Expected AVX2"); |
37768 | SDValue TruncIn = Src.getOperand(0); |
37769 | |
37770 | |
37771 | |
37772 | if (ISD::isNormalLoad(TruncIn.getNode())) { |
37773 | LoadSDNode *LN = cast<LoadSDNode>(TruncIn); |
37774 | |
37775 | if (LN->isSimple()) { |
37776 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37777 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
37778 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
37779 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, |
37780 | LN->getPointerInfo(), LN->getOriginalAlign(), |
37781 | LN->getMemOperand()->getFlags()); |
37782 | DCI.CombineTo(N.getNode(), BcastLd); |
37783 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
37784 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
37785 | return N; |
37786 | } |
37787 | } |
37788 | |
37789 | |
37790 | if (ISD::isUNINDEXEDLoad(Src.getOperand(0).getNode()) && |
37791 | ISD::isEXTLoad(Src.getOperand(0).getNode())) { |
37792 | LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0)); |
37793 | if (LN->getMemoryVT().getSizeInBits() == 16) { |
37794 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37795 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
37796 | SDValue BcastLd = |
37797 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
37798 | LN->getMemoryVT(), LN->getMemOperand()); |
37799 | DCI.CombineTo(N.getNode(), BcastLd); |
37800 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
37801 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
37802 | return N; |
37803 | } |
37804 | } |
37805 | |
37806 | |
37807 | |
37808 | if (TruncIn.getOpcode() == ISD::SRL && |
37809 | TruncIn.getOperand(0).hasOneUse() && |
37810 | isa<ConstantSDNode>(TruncIn.getOperand(1)) && |
37811 | ISD::isNormalLoad(TruncIn.getOperand(0).getNode())) { |
37812 | LoadSDNode *LN = cast<LoadSDNode>(TruncIn.getOperand(0)); |
37813 | unsigned ShiftAmt = TruncIn.getConstantOperandVal(1); |
37814 | |
37815 | |
37816 | if (ShiftAmt % 16 == 0 && TruncIn.getValueSizeInBits() % 16 == 0 && |
37817 | LN->isSimple()) { |
37818 | unsigned Offset = ShiftAmt / 8; |
37819 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37820 | SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(), |
37821 | TypeSize::Fixed(Offset), DL); |
37822 | SDValue Ops[] = { LN->getChain(), Ptr }; |
37823 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
37824 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, |
37825 | LN->getPointerInfo().getWithOffset(Offset), |
37826 | LN->getOriginalAlign(), |
37827 | LN->getMemOperand()->getFlags()); |
37828 | DCI.CombineTo(N.getNode(), BcastLd); |
37829 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
37830 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
37831 | return N; |
37832 | } |
37833 | } |
37834 | } |
37835 | |
37836 | |
37837 | if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) { |
37838 | MemSDNode *LN = cast<MemIntrinsicSDNode>(Src); |
37839 | if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) { |
37840 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37841 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
37842 | SDValue BcastLd = |
37843 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
37844 | LN->getMemoryVT(), LN->getMemOperand()); |
37845 | DCI.CombineTo(N.getNode(), BcastLd); |
37846 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
37847 | DCI.recursivelyDeleteUnusedNodes(LN); |
37848 | return N; |
37849 | } |
37850 | } |
37851 | |
37852 | |
37853 | if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 || |
37854 | SrcVT == MVT::v4i32) && |
37855 | Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) { |
37856 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
37857 | |
37858 | if (LN->isSimple()) { |
37859 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37860 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
37861 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
37862 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(), |
37863 | LN->getPointerInfo(), LN->getOriginalAlign(), |
37864 | LN->getMemOperand()->getFlags()); |
37865 | DCI.CombineTo(N.getNode(), BcastLd); |
37866 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
37867 | DCI.recursivelyDeleteUnusedNodes(LN); |
37868 | return N; |
37869 | } |
37870 | } |
37871 | |
37872 | return SDValue(); |
37873 | } |
37874 | case X86ISD::VZEXT_MOVL: { |
37875 | SDValue N0 = N.getOperand(0); |
37876 | |
37877 | |
37878 | |
37879 | if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) { |
37880 | auto *LN = cast<LoadSDNode>(N0); |
37881 | if (SDValue VZLoad = |
37882 | narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) { |
37883 | DCI.CombineTo(N.getNode(), VZLoad); |
37884 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
37885 | DCI.recursivelyDeleteUnusedNodes(LN); |
37886 | return N; |
37887 | } |
37888 | } |
37889 | |
37890 | |
37891 | |
37892 | |
37893 | if (N0.hasOneUse() && N0.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
37894 | auto *LN = cast<MemSDNode>(N0); |
37895 | if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) { |
37896 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
37897 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
37898 | SDValue VZLoad = |
37899 | DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, |
37900 | LN->getMemoryVT(), LN->getMemOperand()); |
37901 | DCI.CombineTo(N.getNode(), VZLoad); |
37902 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
37903 | DCI.recursivelyDeleteUnusedNodes(LN); |
37904 | return N; |
37905 | } |
37906 | } |
37907 | |
37908 | |
37909 | |
37910 | |
37911 | if (N0.hasOneUse() && N0.getOpcode() == ISD::SCALAR_TO_VECTOR && |
37912 | N0.getOperand(0).hasOneUse() && |
37913 | N0.getOperand(0).getValueType() == MVT::i64) { |
37914 | SDValue In = N0.getOperand(0); |
37915 | APInt Mask = APInt::getHighBitsSet(64, 32); |
37916 | if (DAG.MaskedValueIsZero(In, Mask)) { |
37917 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In); |
37918 | MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); |
37919 | SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc); |
37920 | SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec); |
37921 | return DAG.getBitcast(VT, Movl); |
37922 | } |
37923 | } |
37924 | |
37925 | |
37926 | |
37927 | |
37928 | if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR) { |
37929 | if (auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { |
37930 | |
37931 | EVT ScalarVT = N0.getOperand(0).getValueType(); |
37932 | Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext()); |
37933 | unsigned NumElts = VT.getVectorNumElements(); |
37934 | Constant *Zero = ConstantInt::getNullValue(ScalarTy); |
37935 | SmallVector<Constant *, 32> ConstantVec(NumElts, Zero); |
37936 | ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue()); |
37937 | |
37938 | |
37939 | MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
37940 | SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT); |
37941 | MachinePointerInfo MPI = |
37942 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
37943 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
37944 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment, |
37945 | MachineMemOperand::MOLoad); |
37946 | } |
37947 | } |
37948 | |
37949 | |
37950 | |
37951 | |
37952 | |
37953 | if (!DCI.isBeforeLegalizeOps() && N0.hasOneUse()) { |
37954 | SDValue V = peekThroughOneUseBitcasts(N0); |
37955 | |
37956 | if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() && |
37957 | isNullConstant(V.getOperand(2))) { |
37958 | SDValue In = V.getOperand(1); |
37959 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), |
37960 | In.getValueSizeInBits() / |
37961 | VT.getScalarSizeInBits()); |
37962 | In = DAG.getBitcast(SubVT, In); |
37963 | SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In); |
37964 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
37965 | getZeroVector(VT, Subtarget, DAG, DL), Movl, |
37966 | V.getOperand(2)); |
37967 | } |
37968 | } |
37969 | |
37970 | return SDValue(); |
37971 | } |
37972 | case X86ISD::BLENDI: { |
37973 | SDValue N0 = N.getOperand(0); |
37974 | SDValue N1 = N.getOperand(1); |
37975 | |
37976 | |
37977 | |
37978 | if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && |
37979 | N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { |
37980 | MVT SrcVT = N0.getOperand(0).getSimpleValueType(); |
37981 | if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
37982 | SrcVT.getScalarSizeInBits() >= 32) { |
37983 | unsigned BlendMask = N.getConstantOperandVal(2); |
37984 | unsigned Size = VT.getVectorNumElements(); |
37985 | unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); |
37986 | BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale); |
37987 | return DAG.getBitcast( |
37988 | VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), |
37989 | N1.getOperand(0), |
37990 | DAG.getTargetConstant(BlendMask, DL, MVT::i8))); |
37991 | } |
37992 | } |
37993 | return SDValue(); |
37994 | } |
37995 | case X86ISD::VPERMI: { |
37996 | |
37997 | |
37998 | SDValue N0 = N.getOperand(0); |
37999 | SDValue N1 = N.getOperand(1); |
38000 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
38001 | if (N0.getOpcode() == ISD::BITCAST && |
38002 | N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) { |
38003 | SDValue Src = N0.getOperand(0); |
38004 | EVT SrcVT = Src.getValueType(); |
38005 | SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1); |
38006 | return DAG.getBitcast(VT, Res); |
38007 | } |
38008 | return SDValue(); |
38009 | } |
38010 | case X86ISD::VPERM2X128: { |
38011 | |
38012 | SDValue LHS = N->getOperand(0); |
38013 | SDValue RHS = N->getOperand(1); |
38014 | if (LHS.getOpcode() == ISD::BITCAST && |
38015 | (RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) { |
38016 | EVT SrcVT = LHS.getOperand(0).getValueType(); |
38017 | if (RHS.isUndef() || SrcVT == RHS.getOperand(0).getValueType()) { |
38018 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT, |
38019 | DAG.getBitcast(SrcVT, LHS), |
38020 | DAG.getBitcast(SrcVT, RHS), |
38021 | N->getOperand(2))); |
38022 | } |
38023 | } |
38024 | |
38025 | |
38026 | if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL)) |
38027 | return Res; |
38028 | |
38029 | |
38030 | |
38031 | auto FindSubVector128 = [&](unsigned Idx) { |
38032 | if (Idx > 3) |
38033 | return SDValue(); |
38034 | SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1)); |
38035 | SmallVector<SDValue> SubOps; |
38036 | if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2) |
38037 | return SubOps[Idx & 1]; |
38038 | unsigned NumElts = Src.getValueType().getVectorNumElements(); |
38039 | if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
38040 | Src.getOperand(1).getValueSizeInBits() == 128 && |
38041 | Src.getConstantOperandAPInt(2) == (NumElts / 2)) { |
38042 | return Src.getOperand(1); |
38043 | } |
38044 | return SDValue(); |
38045 | }; |
38046 | unsigned Imm = N.getConstantOperandVal(2); |
38047 | if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) { |
38048 | if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) { |
38049 | MVT SubVT = VT.getHalfNumVectorElementsVT(); |
38050 | SubLo = DAG.getBitcast(SubVT, SubLo); |
38051 | SubHi = DAG.getBitcast(SubVT, SubHi); |
38052 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi); |
38053 | } |
38054 | } |
38055 | return SDValue(); |
38056 | } |
38057 | case X86ISD::PSHUFD: |
38058 | case X86ISD::PSHUFLW: |
38059 | case X86ISD::PSHUFHW: |
38060 | Mask = getPSHUFShuffleMask(N); |
38061 | assert(Mask.size() == 4); |
38062 | break; |
38063 | case X86ISD::MOVSD: |
38064 | case X86ISD::MOVSS: { |
38065 | SDValue N0 = N.getOperand(0); |
38066 | SDValue N1 = N.getOperand(1); |
38067 | |
38068 | |
38069 | |
38070 | |
38071 | unsigned Opcode1 = N1.getOpcode(); |
38072 | if (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL || Opcode1 == ISD::FSUB || |
38073 | Opcode1 == ISD::FDIV) { |
38074 | SDValue N10 = N1.getOperand(0); |
38075 | SDValue N11 = N1.getOperand(1); |
38076 | if (N10 == N0 || |
38077 | (N11 == N0 && (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL))) { |
38078 | if (N10 != N0) |
38079 | std::swap(N10, N11); |
38080 | MVT SVT = VT.getVectorElementType(); |
38081 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); |
38082 | N10 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N10, ZeroIdx); |
38083 | N11 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N11, ZeroIdx); |
38084 | SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11); |
38085 | SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); |
38086 | return DAG.getNode(Opcode, DL, VT, N0, SclVec); |
38087 | } |
38088 | } |
38089 | |
38090 | return SDValue(); |
38091 | } |
38092 | case X86ISD::INSERTPS: { |
38093 | assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); |
38094 | SDValue Op0 = N.getOperand(0); |
38095 | SDValue Op1 = N.getOperand(1); |
38096 | unsigned InsertPSMask = N.getConstantOperandVal(2); |
38097 | unsigned SrcIdx = (InsertPSMask >> 6) & 0x3; |
38098 | unsigned DstIdx = (InsertPSMask >> 4) & 0x3; |
38099 | unsigned ZeroMask = InsertPSMask & 0xF; |
38100 | |
38101 | |
38102 | if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) |
38103 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, |
38104 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38105 | |
38106 | |
38107 | if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) |
38108 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), |
38109 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38110 | |
38111 | |
38112 | SmallVector<int, 8> TargetMask1; |
38113 | SmallVector<SDValue, 2> Ops1; |
38114 | APInt KnownUndef1, KnownZero1; |
38115 | if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1, |
38116 | KnownZero1)) { |
38117 | if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) { |
38118 | |
38119 | InsertPSMask |= (1u << DstIdx); |
38120 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), |
38121 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38122 | } |
38123 | |
38124 | int M = TargetMask1[SrcIdx]; |
38125 | assert(0 <= M && M < 8 && "Shuffle index out of range"); |
38126 | InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); |
38127 | Op1 = Ops1[M < 4 ? 0 : 1]; |
38128 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, |
38129 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38130 | } |
38131 | |
38132 | |
38133 | SmallVector<int, 8> TargetMask0; |
38134 | SmallVector<SDValue, 2> Ops0; |
38135 | APInt KnownUndef0, KnownZero0; |
38136 | if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0, |
38137 | KnownZero0)) { |
38138 | bool Updated = false; |
38139 | bool UseInput00 = false; |
38140 | bool UseInput01 = false; |
38141 | for (int i = 0; i != 4; ++i) { |
38142 | if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { |
38143 | |
38144 | continue; |
38145 | } else if (KnownUndef0[i] || KnownZero0[i]) { |
38146 | |
38147 | InsertPSMask |= (1u << i); |
38148 | Updated = true; |
38149 | continue; |
38150 | } |
38151 | |
38152 | |
38153 | int M = TargetMask0[i]; |
38154 | if (M != i && M != (i + 4)) |
38155 | return SDValue(); |
38156 | |
38157 | |
38158 | UseInput00 |= (0 <= M && M < 4); |
38159 | UseInput01 |= (4 <= M); |
38160 | } |
38161 | |
38162 | |
38163 | |
38164 | if (UseInput00 && !UseInput01) { |
38165 | Updated = true; |
38166 | Op0 = Ops0[0]; |
38167 | } else if (!UseInput00 && UseInput01) { |
38168 | Updated = true; |
38169 | Op0 = Ops0[1]; |
38170 | } |
38171 | |
38172 | if (Updated) |
38173 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, |
38174 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
38175 | } |
38176 | |
38177 | |
38178 | |
38179 | |
38180 | if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) { |
38181 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op1); |
38182 | if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { |
38183 | SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), |
38184 | MemIntr->getBasePtr(), |
38185 | MemIntr->getMemOperand()); |
38186 | SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, |
38187 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, |
38188 | Load), |
38189 | DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); |
38190 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); |
38191 | return Insert; |
38192 | } |
38193 | } |
38194 | |
38195 | return SDValue(); |
38196 | } |
38197 | default: |
38198 | return SDValue(); |
38199 | } |
38200 | |
38201 | |
38202 | if (isNoopShuffleMask(Mask)) |
38203 | return N.getOperand(0); |
38204 | |
38205 | |
38206 | SDValue V = N.getOperand(0); |
38207 | switch (N.getOpcode()) { |
38208 | default: |
38209 | break; |
38210 | case X86ISD::PSHUFLW: |
38211 | case X86ISD::PSHUFHW: |
38212 | assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); |
38213 | |
38214 | |
38215 | |
38216 | |
38217 | if (makeArrayRef(Mask).equals({2, 3, 0, 1})) { |
38218 | int DMask[] = {0, 1, 2, 3}; |
38219 | int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; |
38220 | DMask[DOffset + 0] = DOffset + 1; |
38221 | DMask[DOffset + 1] = DOffset + 0; |
38222 | MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); |
38223 | V = DAG.getBitcast(DVT, V); |
38224 | V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, |
38225 | getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); |
38226 | return DAG.getBitcast(VT, V); |
38227 | } |
38228 | |
38229 | |
38230 | |
38231 | |
38232 | if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && |
38233 | (V.getOpcode() == X86ISD::PSHUFLW || |
38234 | V.getOpcode() == X86ISD::PSHUFHW) && |
38235 | V.getOpcode() != N.getOpcode() && |
38236 | V.hasOneUse() && V.getOperand(0).hasOneUse()) { |
38237 | SDValue D = peekThroughOneUseBitcasts(V.getOperand(0)); |
38238 | if (D.getOpcode() == X86ISD::PSHUFD) { |
38239 | SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); |
38240 | SmallVector<int, 4> DMask = getPSHUFShuffleMask(D); |
38241 | int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; |
38242 | int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; |
38243 | int WordMask[8]; |
38244 | for (int i = 0; i < 4; ++i) { |
38245 | WordMask[i + NOffset] = Mask[i] + NOffset; |
38246 | WordMask[i + VOffset] = VMask[i] + VOffset; |
38247 | } |
38248 | |
38249 | int MappedMask[8]; |
38250 | for (int i = 0; i < 8; ++i) |
38251 | MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; |
38252 | if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || |
38253 | makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { |
38254 | |
38255 | V = DAG.getBitcast(VT, D.getOperand(0)); |
38256 | return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL |
38257 | : X86ISD::UNPCKH, |
38258 | DL, VT, V, V); |
38259 | } |
38260 | } |
38261 | } |
38262 | |
38263 | break; |
38264 | |
38265 | case X86ISD::PSHUFD: |
38266 | if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG)) |
38267 | return NewN; |
38268 | |
38269 | break; |
38270 | } |
38271 | |
38272 | return SDValue(); |
38273 | } |
38274 | |
38275 | |
38276 | |
38277 | |
38278 | static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) { |
38279 | |
38280 | int ParitySrc[2] = {-1, -1}; |
38281 | unsigned Size = Mask.size(); |
38282 | for (unsigned i = 0; i != Size; ++i) { |
38283 | int M = Mask[i]; |
38284 | if (M < 0) |
38285 | continue; |
38286 | |
38287 | |
38288 | if ((M % Size) != i) |
38289 | return false; |
38290 | |
38291 | |
38292 | int Src = M / Size; |
38293 | if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src) |
38294 | return false; |
38295 | ParitySrc[i % 2] = Src; |
38296 | } |
38297 | |
38298 | |
38299 | if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1]) |
38300 | return false; |
38301 | |
38302 | Op0Even = ParitySrc[0] == 0; |
38303 | return true; |
38304 | } |
38305 | |
38306 | |
38307 | |
38308 | |
38309 | |
38310 | |
38311 | |
38312 | |
38313 | |
38314 | |
38315 | static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, |
38316 | SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, |
38317 | bool &IsSubAdd) { |
38318 | |
38319 | EVT VT = N->getValueType(0); |
38320 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
38321 | if (!Subtarget.hasSSE3() || !TLI.isTypeLegal(VT) || |
38322 | !VT.getSimpleVT().isFloatingPoint()) |
38323 | return false; |
38324 | |
38325 | |
38326 | |
38327 | |
38328 | if (N->getOpcode() != ISD::VECTOR_SHUFFLE) |
38329 | return false; |
38330 | |
38331 | SDValue V1 = N->getOperand(0); |
38332 | SDValue V2 = N->getOperand(1); |
38333 | |
38334 | |
38335 | if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) || |
38336 | (V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) || |
38337 | V1.getOpcode() == V2.getOpcode()) |
38338 | return false; |
38339 | |
38340 | |
38341 | if (!V1->hasOneUse() || !V2->hasOneUse()) |
38342 | return false; |
38343 | |
38344 | |
38345 | |
38346 | SDValue LHS, RHS; |
38347 | if (V1.getOpcode() == ISD::FSUB) { |
38348 | LHS = V1->getOperand(0); RHS = V1->getOperand(1); |
38349 | if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && |
38350 | (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) |
38351 | return false; |
38352 | } else { |
38353 | assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode"); |
38354 | LHS = V2->getOperand(0); RHS = V2->getOperand(1); |
38355 | if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) && |
38356 | (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS)) |
38357 | return false; |
38358 | } |
38359 | |
38360 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); |
38361 | bool Op0Even; |
38362 | if (!isAddSubOrSubAddMask(Mask, Op0Even)) |
38363 | return false; |
38364 | |
38365 | |
38366 | IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD |
38367 | : V2->getOpcode() == ISD::FADD; |
38368 | |
38369 | Opnd0 = LHS; |
38370 | Opnd1 = RHS; |
38371 | return true; |
38372 | } |
38373 | |
38374 | |
38375 | static SDValue combineShuffleToFMAddSub(SDNode *N, |
38376 | const X86Subtarget &Subtarget, |
38377 | SelectionDAG &DAG) { |
38378 | |
38379 | |
38380 | |
38381 | if (N->getOpcode() != ISD::VECTOR_SHUFFLE) |
38382 | return SDValue(); |
38383 | |
38384 | MVT VT = N->getSimpleValueType(0); |
38385 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
38386 | if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT)) |
38387 | return SDValue(); |
38388 | |
38389 | |
38390 | SDValue Op0 = N->getOperand(0); |
38391 | SDValue Op1 = N->getOperand(1); |
38392 | SDValue FMAdd = Op0, FMSub = Op1; |
38393 | if (FMSub.getOpcode() != X86ISD::FMSUB) |
38394 | std::swap(FMAdd, FMSub); |
38395 | |
38396 | if (FMAdd.getOpcode() != ISD::FMA || FMSub.getOpcode() != X86ISD::FMSUB || |
38397 | FMAdd.getOperand(0) != FMSub.getOperand(0) || !FMAdd.hasOneUse() || |
38398 | FMAdd.getOperand(1) != FMSub.getOperand(1) || !FMSub.hasOneUse() || |
38399 | FMAdd.getOperand(2) != FMSub.getOperand(2)) |
38400 | return SDValue(); |
38401 | |
38402 | |
38403 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); |
38404 | bool Op0Even; |
38405 | if (!isAddSubOrSubAddMask(Mask, Op0Even)) |
38406 | return SDValue(); |
38407 | |
38408 | |
38409 | SDLoc DL(N); |
38410 | bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd; |
38411 | unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
38412 | return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), |
38413 | FMAdd.getOperand(2)); |
38414 | } |
38415 | |
38416 | |
38417 | |
38418 | static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, |
38419 | const X86Subtarget &Subtarget, |
38420 | SelectionDAG &DAG) { |
38421 | if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG)) |
38422 | return V; |
38423 | |
38424 | SDValue Opnd0, Opnd1; |
38425 | bool IsSubAdd; |
38426 | if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd)) |
38427 | return SDValue(); |
38428 | |
38429 | MVT VT = N->getSimpleValueType(0); |
38430 | SDLoc DL(N); |
38431 | |
38432 | |
38433 | SDValue Opnd2; |
38434 | if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) { |
38435 | unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
38436 | return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); |
38437 | } |
38438 | |
38439 | if (IsSubAdd) |
38440 | return SDValue(); |
38441 | |
38442 | |
38443 | |
38444 | |
38445 | if (VT.is512BitVector()) |
38446 | return SDValue(); |
38447 | |
38448 | return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); |
38449 | } |
38450 | |
38451 | |
38452 | |
38453 | |
38454 | static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, |
38455 | const X86Subtarget &Subtarget) { |
38456 | if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N)) |
38457 | return SDValue(); |
38458 | |
38459 | EVT VT = N->getValueType(0); |
38460 | |
38461 | |
38462 | if (!VT.is128BitVector() && !VT.is256BitVector()) |
38463 | return SDValue(); |
38464 | |
38465 | if (VT.getVectorElementType() != MVT::i32 && |
38466 | VT.getVectorElementType() != MVT::i64 && |
38467 | VT.getVectorElementType() != MVT::f32 && |
38468 | VT.getVectorElementType() != MVT::f64) |
38469 | return SDValue(); |
38470 | |
38471 | SDValue N0 = N->getOperand(0); |
38472 | SDValue N1 = N->getOperand(1); |
38473 | |
38474 | |
38475 | if (N0.getOpcode() != ISD::CONCAT_VECTORS || |
38476 | N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || |
38477 | N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() || |
38478 | !N1.getOperand(1).isUndef()) |
38479 | return SDValue(); |
38480 | |
38481 | |
38482 | |
38483 | SmallVector<int, 8> Mask; |
38484 | int NumElts = VT.getVectorNumElements(); |
38485 | |
38486 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
38487 | for (int Elt : SVOp->getMask()) |
38488 | Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2)); |
38489 | |
38490 | SDLoc DL(N); |
38491 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), |
38492 | N1.getOperand(0)); |
38493 | return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); |
38494 | } |
38495 | |
38496 | |
38497 | |
38498 | |
38499 | static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { |
38500 | if (!Shuf->getValueType(0).isSimple()) |
38501 | return SDValue(); |
38502 | MVT VT = Shuf->getSimpleValueType(0); |
38503 | if (!VT.is256BitVector() && !VT.is512BitVector()) |
38504 | return SDValue(); |
38505 | |
38506 | |
38507 | ArrayRef<int> Mask = Shuf->getMask(); |
38508 | if (!isUndefUpperHalf(Mask)) |
38509 | return SDValue(); |
38510 | |
38511 | |
38512 | |
38513 | int HalfIdx1, HalfIdx2; |
38514 | SmallVector<int, 8> HalfMask(Mask.size() / 2); |
38515 | if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2) || |
38516 | (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1)) |
38517 | return SDValue(); |
38518 | |
38519 | |
38520 | |
38521 | |
38522 | |
38523 | |
38524 | return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), |
38525 | Shuf->getOperand(1), HalfMask, HalfIdx1, |
38526 | HalfIdx2, false, DAG, true); |
38527 | } |
38528 | |
38529 | static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, |
38530 | TargetLowering::DAGCombinerInfo &DCI, |
38531 | const X86Subtarget &Subtarget) { |
38532 | if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N)) |
38533 | if (SDValue V = narrowShuffle(Shuf, DAG)) |
38534 | return V; |
38535 | |
38536 | |
38537 | |
38538 | SDLoc dl(N); |
38539 | EVT VT = N->getValueType(0); |
38540 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
38541 | if (TLI.isTypeLegal(VT)) |
38542 | if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) |
38543 | return AddSub; |
38544 | |
38545 | |
38546 | if (SDValue LD = combineToConsecutiveLoads( |
38547 | VT, SDValue(N, 0), dl, DAG, Subtarget, true)) |
38548 | return LD; |
38549 | |
38550 | |
38551 | |
38552 | |
38553 | |
38554 | |
38555 | |
38556 | if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget)) |
38557 | return ShufConcat; |
38558 | |
38559 | if (isTargetShuffle(N->getOpcode())) { |
38560 | SDValue Op(N, 0); |
38561 | if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget)) |
38562 | return Shuffle; |
38563 | |
38564 | |
38565 | |
38566 | |
38567 | |
38568 | |
38569 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
38570 | return Res; |
38571 | |
38572 | |
38573 | |
38574 | APInt KnownUndef, KnownZero; |
38575 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
38576 | if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, |
38577 | DCI)) |
38578 | return SDValue(N, 0); |
38579 | } |
38580 | |
38581 | return SDValue(); |
38582 | } |
38583 | |
38584 | |
38585 | |
38586 | bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle( |
38587 | SDValue Op, const APInt &DemandedElts, unsigned MaskIndex, |
38588 | TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const { |
38589 | |
38590 | unsigned NumElts = DemandedElts.getBitWidth(); |
38591 | if (DemandedElts.isAllOnesValue()) |
38592 | return false; |
38593 | |
38594 | SDValue Mask = Op.getOperand(MaskIndex); |
38595 | if (!Mask.hasOneUse()) |
38596 | return false; |
38597 | |
38598 | |
38599 | APInt MaskUndef, MaskZero; |
38600 | if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO, |
38601 | Depth + 1)) |
38602 | return true; |
38603 | |
38604 | |
38605 | |
38606 | SDValue BC = peekThroughOneUseBitcasts(Mask); |
38607 | EVT BCVT = BC.getValueType(); |
38608 | auto *Load = dyn_cast<LoadSDNode>(BC); |
38609 | if (!Load) |
38610 | return false; |
38611 | |
38612 | const Constant *C = getTargetConstantFromNode(Load); |
38613 | if (!C) |
38614 | return false; |
38615 | |
38616 | Type *CTy = C->getType(); |
38617 | if (!CTy->isVectorTy() || |
38618 | CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits()) |
38619 | return false; |
38620 | |
38621 | |
38622 | unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements(); |
38623 | if (NumCstElts != NumElts && NumCstElts != (NumElts * 2)) |
38624 | return false; |
38625 | unsigned Scale = NumCstElts / NumElts; |
38626 | |
38627 | |
38628 | bool Simplified = false; |
38629 | SmallVector<Constant *, 32> ConstVecOps; |
38630 | for (unsigned i = 0; i != NumCstElts; ++i) { |
38631 | Constant *Elt = C->getAggregateElement(i); |
38632 | if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) { |
38633 | ConstVecOps.push_back(UndefValue::get(Elt->getType())); |
38634 | Simplified = true; |
38635 | continue; |
38636 | } |
38637 | ConstVecOps.push_back(Elt); |
38638 | } |
38639 | if (!Simplified) |
38640 | return false; |
38641 | |
38642 | |
38643 | SDLoc DL(Op); |
38644 | SDValue CV = TLO.DAG.getConstantPool(ConstantVector::get(ConstVecOps), BCVT); |
38645 | SDValue LegalCV = LowerConstantPool(CV, TLO.DAG); |
38646 | SDValue NewMask = TLO.DAG.getLoad( |
38647 | BCVT, DL, TLO.DAG.getEntryNode(), LegalCV, |
38648 | MachinePointerInfo::getConstantPool(TLO.DAG.getMachineFunction()), |
38649 | Load->getAlign()); |
38650 | return TLO.CombineTo(Mask, TLO.DAG.getBitcast(Mask.getValueType(), NewMask)); |
38651 | } |
38652 | |
38653 | bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( |
38654 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, |
38655 | TargetLoweringOpt &TLO, unsigned Depth) const { |
38656 | int NumElts = DemandedElts.getBitWidth(); |
38657 | unsigned Opc = Op.getOpcode(); |
38658 | EVT VT = Op.getValueType(); |
38659 | |
38660 | |
38661 | switch (Opc) { |
38662 | case X86ISD::PMULDQ: |
38663 | case X86ISD::PMULUDQ: { |
38664 | APInt LHSUndef, LHSZero; |
38665 | APInt RHSUndef, RHSZero; |
38666 | SDValue LHS = Op.getOperand(0); |
38667 | SDValue RHS = Op.getOperand(1); |
38668 | if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO, |
38669 | Depth + 1)) |
38670 | return true; |
38671 | if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO, |
38672 | Depth + 1)) |
38673 | return true; |
38674 | |
38675 | KnownZero = LHSZero | RHSZero; |
38676 | break; |
38677 | } |
38678 | case X86ISD::VSHL: |
38679 | case X86ISD::VSRL: |
38680 | case X86ISD::VSRA: { |
38681 | |
38682 | SDValue Amt = Op.getOperand(1); |
38683 | MVT AmtVT = Amt.getSimpleValueType(); |
38684 | assert(AmtVT.is128BitVector() && "Unexpected value type"); |
38685 | |
38686 | |
38687 | |
38688 | bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) { |
38689 | unsigned UseOpc = Use->getOpcode(); |
38690 | return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL || |
38691 | UseOpc == X86ISD::VSRA) && |
38692 | Use->getOperand(0) != Amt; |
38693 | }); |
38694 | |
38695 | APInt AmtUndef, AmtZero; |
38696 | unsigned NumAmtElts = AmtVT.getVectorNumElements(); |
38697 | APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2); |
38698 | if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO, |
38699 | Depth + 1, AssumeSingleUse)) |
38700 | return true; |
38701 | LLVM_FALLTHROUGH; |
38702 | } |
38703 | case X86ISD::VSHLI: |
38704 | case X86ISD::VSRLI: |
38705 | case X86ISD::VSRAI: { |
38706 | SDValue Src = Op.getOperand(0); |
38707 | APInt SrcUndef; |
38708 | if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO, |
38709 | Depth + 1)) |
38710 | return true; |
38711 | |
38712 | |
38713 | if (!DemandedElts.isAllOnesValue()) |
38714 | if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
38715 | Src, DemandedElts, TLO.DAG, Depth + 1)) |
38716 | return TLO.CombineTo( |
38717 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1))); |
38718 | break; |
38719 | } |
38720 | case X86ISD::KSHIFTL: { |
38721 | SDValue Src = Op.getOperand(0); |
38722 | auto *Amt = cast<ConstantSDNode>(Op.getOperand(1)); |
38723 | assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); |
38724 | unsigned ShiftAmt = Amt->getZExtValue(); |
38725 | |
38726 | if (ShiftAmt == 0) |
38727 | return TLO.CombineTo(Op, Src); |
38728 | |
38729 | |
38730 | |
38731 | |
38732 | if (Src.getOpcode() == X86ISD::KSHIFTR) { |
38733 | if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) { |
38734 | unsigned C1 = Src.getConstantOperandVal(1); |
38735 | unsigned NewOpc = X86ISD::KSHIFTL; |
38736 | int Diff = ShiftAmt - C1; |
38737 | if (Diff < 0) { |
38738 | Diff = -Diff; |
38739 | NewOpc = X86ISD::KSHIFTR; |
38740 | } |
38741 | |
38742 | SDLoc dl(Op); |
38743 | SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); |
38744 | return TLO.CombineTo( |
38745 | Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); |
38746 | } |
38747 | } |
38748 | |
38749 | APInt DemandedSrc = DemandedElts.lshr(ShiftAmt); |
38750 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, |
38751 | Depth + 1)) |
38752 | return true; |
38753 | |
38754 | KnownUndef <<= ShiftAmt; |
38755 | KnownZero <<= ShiftAmt; |
38756 | KnownZero.setLowBits(ShiftAmt); |
38757 | break; |
38758 | } |
38759 | case X86ISD::KSHIFTR: { |
38760 | SDValue Src = Op.getOperand(0); |
38761 | auto *Amt = cast<ConstantSDNode>(Op.getOperand(1)); |
38762 | assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); |
38763 | unsigned ShiftAmt = Amt->getZExtValue(); |
38764 | |
38765 | if (ShiftAmt == 0) |
38766 | return TLO.CombineTo(Op, Src); |
38767 | |
38768 | |
38769 | |
38770 | |
38771 | if (Src.getOpcode() == X86ISD::KSHIFTL) { |
38772 | if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) { |
38773 | unsigned C1 = Src.getConstantOperandVal(1); |
38774 | unsigned NewOpc = X86ISD::KSHIFTR; |
38775 | int Diff = ShiftAmt - C1; |
38776 | if (Diff < 0) { |
38777 | Diff = -Diff; |
38778 | NewOpc = X86ISD::KSHIFTL; |
38779 | } |
38780 | |
38781 | SDLoc dl(Op); |
38782 | SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); |
38783 | return TLO.CombineTo( |
38784 | Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); |
38785 | } |
38786 | } |
38787 | |
38788 | APInt DemandedSrc = DemandedElts.shl(ShiftAmt); |
38789 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, |
38790 | Depth + 1)) |
38791 | return true; |
38792 | |
38793 | KnownUndef.lshrInPlace(ShiftAmt); |
38794 | KnownZero.lshrInPlace(ShiftAmt); |
38795 | KnownZero.setHighBits(ShiftAmt); |
38796 | break; |
38797 | } |
38798 | case X86ISD::CVTSI2P: |
38799 | case X86ISD::CVTUI2P: { |
38800 | SDValue Src = Op.getOperand(0); |
38801 | MVT SrcVT = Src.getSimpleValueType(); |
38802 | APInt SrcUndef, SrcZero; |
38803 | APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
38804 | if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, |
38805 | Depth + 1)) |
38806 | return true; |
38807 | break; |
38808 | } |
38809 | case X86ISD::PACKSS: |
38810 | case X86ISD::PACKUS: { |
38811 | SDValue N0 = Op.getOperand(0); |
38812 | SDValue N1 = Op.getOperand(1); |
38813 | |
38814 | APInt DemandedLHS, DemandedRHS; |
38815 | getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
38816 | |
38817 | APInt LHSUndef, LHSZero; |
38818 | if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO, |
38819 | Depth + 1)) |
38820 | return true; |
38821 | APInt RHSUndef, RHSZero; |
38822 | if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO, |
38823 | Depth + 1)) |
38824 | return true; |
38825 | |
38826 | |
38827 | |
38828 | |
38829 | |
38830 | if (!DemandedElts.isAllOnesValue()) { |
38831 | SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS, |
38832 | TLO.DAG, Depth + 1); |
38833 | SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS, |
38834 | TLO.DAG, Depth + 1); |
38835 | if (NewN0 || NewN1) { |
38836 | NewN0 = NewN0 ? NewN0 : N0; |
38837 | NewN1 = NewN1 ? NewN1 : N1; |
38838 | return TLO.CombineTo(Op, |
38839 | TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); |
38840 | } |
38841 | } |
38842 | break; |
38843 | } |
38844 | case X86ISD::HADD: |
38845 | case X86ISD::HSUB: |
38846 | case X86ISD::FHADD: |
38847 | case X86ISD::FHSUB: { |
38848 | SDValue N0 = Op.getOperand(0); |
38849 | SDValue N1 = Op.getOperand(1); |
38850 | |
38851 | APInt DemandedLHS, DemandedRHS; |
38852 | getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
38853 | |
38854 | APInt LHSUndef, LHSZero; |
38855 | if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO, |
38856 | Depth + 1)) |
38857 | return true; |
38858 | APInt RHSUndef, RHSZero; |
38859 | if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO, |
38860 | Depth + 1)) |
38861 | return true; |
38862 | |
38863 | |
38864 | |
38865 | |
38866 | |
38867 | if (N0 != N1 && !DemandedElts.isAllOnesValue()) { |
38868 | SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS, |
38869 | TLO.DAG, Depth + 1); |
38870 | SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS, |
38871 | TLO.DAG, Depth + 1); |
38872 | if (NewN0 || NewN1) { |
38873 | NewN0 = NewN0 ? NewN0 : N0; |
38874 | NewN1 = NewN1 ? NewN1 : N1; |
38875 | return TLO.CombineTo(Op, |
38876 | TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); |
38877 | } |
38878 | } |
38879 | break; |
38880 | } |
38881 | case X86ISD::VTRUNC: |
38882 | case X86ISD::VTRUNCS: |
38883 | case X86ISD::VTRUNCUS: { |
38884 | SDValue Src = Op.getOperand(0); |
38885 | MVT SrcVT = Src.getSimpleValueType(); |
38886 | APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
38887 | APInt SrcUndef, SrcZero; |
38888 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, SrcUndef, SrcZero, TLO, |
38889 | Depth + 1)) |
38890 | return true; |
38891 | KnownZero = SrcZero.zextOrTrunc(NumElts); |
38892 | KnownUndef = SrcUndef.zextOrTrunc(NumElts); |
38893 | break; |
38894 | } |
38895 | case X86ISD::BLENDV: { |
38896 | APInt SelUndef, SelZero; |
38897 | if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, SelUndef, |
38898 | SelZero, TLO, Depth + 1)) |
38899 | return true; |
38900 | |
38901 | |
38902 | APInt LHSUndef, LHSZero; |
38903 | if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, LHSUndef, |
38904 | LHSZero, TLO, Depth + 1)) |
38905 | return true; |
38906 | |
38907 | APInt RHSUndef, RHSZero; |
38908 | if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedElts, RHSUndef, |
38909 | RHSZero, TLO, Depth + 1)) |
38910 | return true; |
38911 | |
38912 | KnownZero = LHSZero & RHSZero; |
38913 | KnownUndef = LHSUndef & RHSUndef; |
38914 | break; |
38915 | } |
38916 | case X86ISD::VZEXT_MOVL: { |
38917 | |
38918 | SDValue Src = Op.getOperand(0); |
38919 | APInt DemandedUpperElts = DemandedElts; |
38920 | DemandedUpperElts.clearLowBits(1); |
38921 | if (TLO.DAG.computeKnownBits(Src, DemandedUpperElts, Depth + 1).isZero()) |
38922 | return TLO.CombineTo(Op, Src); |
38923 | break; |
38924 | } |
38925 | case X86ISD::VBROADCAST: { |
38926 | SDValue Src = Op.getOperand(0); |
38927 | MVT SrcVT = Src.getSimpleValueType(); |
38928 | if (!SrcVT.isVector()) |
38929 | break; |
38930 | |
38931 | if (DemandedElts == 1) { |
38932 | if (Src.getValueType() != VT) |
38933 | Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG, |
38934 | SDLoc(Op)); |
38935 | return TLO.CombineTo(Op, Src); |
38936 | } |
38937 | APInt SrcUndef, SrcZero; |
38938 | APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0); |
38939 | if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, |
38940 | Depth + 1)) |
38941 | return true; |
38942 | |
38943 | |
38944 | if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
38945 | Src, SrcElts, TLO.DAG, Depth + 1)) |
38946 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); |
38947 | break; |
38948 | } |
38949 | case X86ISD::VPERMV: |
38950 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 0, TLO, |
38951 | Depth)) |
38952 | return true; |
38953 | break; |
38954 | case X86ISD::PSHUFB: |
38955 | case X86ISD::VPERMV3: |
38956 | case X86ISD::VPERMILPV: |
38957 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 1, TLO, |
38958 | Depth)) |
38959 | return true; |
38960 | break; |
38961 | case X86ISD::VPPERM: |
38962 | case X86ISD::VPERMIL2: |
38963 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 2, TLO, |
38964 | Depth)) |
38965 | return true; |
38966 | break; |
38967 | } |
38968 | |
38969 | |
38970 | |
38971 | |
38972 | if ((VT.is256BitVector() || VT.is512BitVector()) && |
38973 | DemandedElts.lshr(NumElts / 2) == 0) { |
38974 | unsigned SizeInBits = VT.getSizeInBits(); |
38975 | unsigned ExtSizeInBits = SizeInBits / 2; |
38976 | |
38977 | |
38978 | if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0) |
38979 | ExtSizeInBits = SizeInBits / 4; |
38980 | |
38981 | switch (Opc) { |
38982 | |
38983 | case X86ISD::VBROADCAST: { |
38984 | SDLoc DL(Op); |
38985 | SDValue Src = Op.getOperand(0); |
38986 | if (Src.getValueSizeInBits() > ExtSizeInBits) |
38987 | Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); |
38988 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
38989 | ExtSizeInBits / VT.getScalarSizeInBits()); |
38990 | SDValue Bcst = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, BcstVT, Src); |
38991 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
38992 | TLO.DAG, DL, ExtSizeInBits)); |
38993 | } |
38994 | case X86ISD::VBROADCAST_LOAD: { |
38995 | SDLoc DL(Op); |
38996 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
38997 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
38998 | ExtSizeInBits / VT.getScalarSizeInBits()); |
38999 | SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); |
39000 | SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; |
39001 | SDValue Bcst = TLO.DAG.getMemIntrinsicNode( |
39002 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(), |
39003 | MemIntr->getMemOperand()); |
39004 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
39005 | Bcst.getValue(1)); |
39006 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
39007 | TLO.DAG, DL, ExtSizeInBits)); |
39008 | } |
39009 | |
39010 | case X86ISD::SUBV_BROADCAST_LOAD: { |
39011 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
39012 | EVT MemVT = MemIntr->getMemoryVT(); |
39013 | if (ExtSizeInBits == MemVT.getStoreSizeInBits()) { |
39014 | SDLoc DL(Op); |
39015 | SDValue Ld = |
39016 | TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(), |
39017 | MemIntr->getBasePtr(), MemIntr->getMemOperand()); |
39018 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
39019 | Ld.getValue(1)); |
39020 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0, |
39021 | TLO.DAG, DL, ExtSizeInBits)); |
39022 | } else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) { |
39023 | SDLoc DL(Op); |
39024 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
39025 | ExtSizeInBits / VT.getScalarSizeInBits()); |
39026 | SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); |
39027 | SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; |
39028 | SDValue Bcst = |
39029 | TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, |
39030 | Ops, MemVT, MemIntr->getMemOperand()); |
39031 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
39032 | Bcst.getValue(1)); |
39033 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
39034 | TLO.DAG, DL, ExtSizeInBits)); |
39035 | } |
39036 | break; |
39037 | } |
39038 | |
39039 | case X86ISD::VSHLDQ: |
39040 | case X86ISD::VSRLDQ: |
39041 | |
39042 | case X86ISD::VSHL: |
39043 | case X86ISD::VSRL: |
39044 | case X86ISD::VSRA: |
39045 | |
39046 | case X86ISD::VSHLI: |
39047 | case X86ISD::VSRLI: |
39048 | case X86ISD::VSRAI: { |
39049 | SDLoc DL(Op); |
39050 | SDValue Ext0 = |
39051 | extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); |
39052 | SDValue ExtOp = |
39053 | TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1)); |
39054 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39055 | SDValue Insert = |
39056 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
39057 | return TLO.CombineTo(Op, Insert); |
39058 | } |
39059 | case X86ISD::VPERMI: { |
39060 | |
39061 | |
39062 | if (VT == MVT::v4f64 || VT == MVT::v4i64) { |
39063 | SmallVector<int, 4> Mask; |
39064 | DecodeVPERMMask(NumElts, Op.getConstantOperandVal(1), Mask); |
39065 | if (isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3)) { |
39066 | SDLoc DL(Op); |
39067 | SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128); |
39068 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39069 | SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128); |
39070 | return TLO.CombineTo(Op, Insert); |
39071 | } |
39072 | } |
39073 | break; |
39074 | } |
39075 | case X86ISD::VPERM2X128: { |
39076 | |
39077 | SDLoc DL(Op); |
39078 | unsigned LoMask = Op.getConstantOperandVal(2) & 0xF; |
39079 | if (LoMask & 0x8) |
39080 | return TLO.CombineTo( |
39081 | Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, DL)); |
39082 | unsigned EltIdx = (LoMask & 0x1) * (NumElts / 2); |
39083 | unsigned SrcIdx = (LoMask & 0x2) >> 1; |
39084 | SDValue ExtOp = |
39085 | extractSubVector(Op.getOperand(SrcIdx), EltIdx, TLO.DAG, DL, 128); |
39086 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39087 | SDValue Insert = |
39088 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
39089 | return TLO.CombineTo(Op, Insert); |
39090 | } |
39091 | |
39092 | case X86ISD::VZEXT_MOVL: |
39093 | |
39094 | case X86ISD::PSHUFD: |
39095 | case X86ISD::PSHUFLW: |
39096 | case X86ISD::PSHUFHW: |
39097 | case X86ISD::VPERMILPI: |
39098 | |
39099 | case X86ISD::VPERMILPV: |
39100 | case X86ISD::VPERMIL2: |
39101 | case X86ISD::PSHUFB: |
39102 | case X86ISD::UNPCKL: |
39103 | case X86ISD::UNPCKH: |
39104 | case X86ISD::BLENDI: |
39105 | |
39106 | case X86ISD::AVG: |
39107 | case X86ISD::PACKSS: |
39108 | case X86ISD::PACKUS: |
39109 | |
39110 | case X86ISD::HADD: |
39111 | case X86ISD::HSUB: |
39112 | case X86ISD::FHADD: |
39113 | case X86ISD::FHSUB: { |
39114 | SDLoc DL(Op); |
39115 | SmallVector<SDValue, 4> Ops; |
39116 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
39117 | SDValue SrcOp = Op.getOperand(i); |
39118 | EVT SrcVT = SrcOp.getValueType(); |
39119 | assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) && |
39120 | "Unsupported vector size"); |
39121 | Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL, |
39122 | ExtSizeInBits) |
39123 | : SrcOp); |
39124 | } |
39125 | MVT ExtVT = VT.getSimpleVT(); |
39126 | ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), |
39127 | ExtSizeInBits / ExtVT.getScalarSizeInBits()); |
39128 | SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops); |
39129 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
39130 | SDValue Insert = |
39131 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
39132 | return TLO.CombineTo(Op, Insert); |
39133 | } |
39134 | } |
39135 | } |
39136 | |
39137 | |
39138 | APInt OpUndef, OpZero; |
39139 | SmallVector<int, 64> OpMask; |
39140 | SmallVector<SDValue, 2> OpInputs; |
39141 | if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef, |
39142 | OpZero, TLO.DAG, Depth, false)) |
39143 | return false; |
39144 | |
39145 | |
39146 | if (OpMask.size() != (unsigned)NumElts || |
39147 | llvm::any_of(OpInputs, [VT](SDValue V) { |
39148 | return VT.getSizeInBits() != V.getValueSizeInBits() || |
39149 | !V.getValueType().isVector(); |
39150 | })) |
39151 | return false; |
39152 | |
39153 | KnownZero = OpZero; |
39154 | KnownUndef = OpUndef; |
39155 | |
39156 | |
39157 | int NumSrcs = OpInputs.size(); |
39158 | for (int i = 0; i != NumElts; ++i) |
39159 | if (!DemandedElts[i]) |
39160 | OpMask[i] = SM_SentinelUndef; |
39161 | |
39162 | if (isUndefInRange(OpMask, 0, NumElts)) { |
39163 | KnownUndef.setAllBits(); |
39164 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
39165 | } |
39166 | if (isUndefOrZeroInRange(OpMask, 0, NumElts)) { |
39167 | KnownZero.setAllBits(); |
39168 | return TLO.CombineTo( |
39169 | Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); |
39170 | } |
39171 | for (int Src = 0; Src != NumSrcs; ++Src) |
39172 | if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) |
39173 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src])); |
39174 | |
39175 | |
39176 | for (int Src = 0; Src != NumSrcs; ++Src) { |
39177 | |
39178 | if (OpInputs[Src].getValueType() != VT) |
39179 | continue; |
39180 | |
39181 | int Lo = Src * NumElts; |
39182 | APInt SrcElts = APInt::getNullValue(NumElts); |
39183 | for (int i = 0; i != NumElts; ++i) |
39184 | if (DemandedElts[i]) { |
39185 | int M = OpMask[i] - Lo; |
39186 | if (0 <= M && M < NumElts) |
39187 | SrcElts.setBit(M); |
39188 | } |
39189 | |
39190 | |
39191 | APInt SrcUndef, SrcZero; |
39192 | if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero, |
39193 | TLO, Depth + 1)) |
39194 | return true; |
39195 | } |
39196 | |
39197 | |
39198 | |
39199 | |
39200 | |
39201 | |
39202 | |
39203 | |
39204 | if (!DemandedElts.isAllOnesValue()) { |
39205 | assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range"); |
39206 | |
39207 | SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef); |
39208 | for (int i = 0; i != NumElts; ++i) |
39209 | if (DemandedElts[i]) |
39210 | DemandedMask[i] = i; |
39211 | |
39212 | SDValue NewShuffle = combineX86ShufflesRecursively( |
39213 | {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth, |
39214 | false, |
39215 | true, true, TLO.DAG, |
39216 | Subtarget); |
39217 | if (NewShuffle) |
39218 | return TLO.CombineTo(Op, NewShuffle); |
39219 | } |
39220 | |
39221 | return false; |
39222 | } |
39223 | |
39224 | bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( |
39225 | SDValue Op, const APInt &OriginalDemandedBits, |
39226 | const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, |
39227 | unsigned Depth) const { |
39228 | EVT VT = Op.getValueType(); |
39229 | unsigned BitWidth = OriginalDemandedBits.getBitWidth(); |
39230 | unsigned Opc = Op.getOpcode(); |
39231 | switch(Opc) { |
39232 | case X86ISD::VTRUNC: { |
39233 | KnownBits KnownOp; |
39234 | SDValue Src = Op.getOperand(0); |
39235 | MVT SrcVT = Src.getSimpleValueType(); |
39236 | |
39237 | |
39238 | APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits()); |
39239 | APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements()); |
39240 | if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1)) |
39241 | return true; |
39242 | break; |
39243 | } |
39244 | case X86ISD::PMULDQ: |
39245 | case X86ISD::PMULUDQ: { |
39246 | |
39247 | KnownBits KnownOp; |
39248 | SDValue LHS = Op.getOperand(0); |
39249 | SDValue RHS = Op.getOperand(1); |
39250 | |
39251 | APInt DemandedMask = APInt::getLowBitsSet(64, 32); |
39252 | if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp, |
39253 | TLO, Depth + 1)) |
39254 | return true; |
39255 | if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp, |
39256 | TLO, Depth + 1)) |
39257 | return true; |
39258 | |
39259 | |
39260 | SDValue DemandedLHS = SimplifyMultipleUseDemandedBits( |
39261 | LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); |
39262 | SDValue DemandedRHS = SimplifyMultipleUseDemandedBits( |
39263 | RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); |
39264 | if (DemandedLHS || DemandedRHS) { |
39265 | DemandedLHS = DemandedLHS ? DemandedLHS : LHS; |
39266 | DemandedRHS = DemandedRHS ? DemandedRHS : RHS; |
39267 | return TLO.CombineTo( |
39268 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS)); |
39269 | } |
39270 | break; |
39271 | } |
39272 | case X86ISD::VSHLI: { |
39273 | SDValue Op0 = Op.getOperand(0); |
39274 | |
39275 | unsigned ShAmt = Op.getConstantOperandVal(1); |
39276 | if (ShAmt >= BitWidth) |
39277 | break; |
39278 | |
39279 | APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt); |
39280 | |
39281 | |
39282 | |
39283 | |
39284 | if (Op0.getOpcode() == X86ISD::VSRLI && |
39285 | OriginalDemandedBits.countTrailingZeros() >= ShAmt) { |
39286 | unsigned Shift2Amt = Op0.getConstantOperandVal(1); |
39287 | if (Shift2Amt < BitWidth) { |
39288 | int Diff = ShAmt - Shift2Amt; |
39289 | if (Diff == 0) |
39290 | return TLO.CombineTo(Op, Op0.getOperand(0)); |
39291 | |
39292 | unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; |
39293 | SDValue NewShift = TLO.DAG.getNode( |
39294 | NewOpc, SDLoc(Op), VT, Op0.getOperand(0), |
39295 | TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); |
39296 | return TLO.CombineTo(Op, NewShift); |
39297 | } |
39298 | } |
39299 | |
39300 | |
39301 | unsigned NumSignBits = |
39302 | TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1); |
39303 | unsigned UpperDemandedBits = |
39304 | BitWidth - OriginalDemandedBits.countTrailingZeros(); |
39305 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) |
39306 | return TLO.CombineTo(Op, Op0); |
39307 | |
39308 | if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, |
39309 | TLO, Depth + 1)) |
39310 | return true; |
39311 | |
39312 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
39313 | Known.Zero <<= ShAmt; |
39314 | Known.One <<= ShAmt; |
39315 | |
39316 | |
39317 | Known.Zero.setLowBits(ShAmt); |
39318 | return false; |
39319 | } |
39320 | case X86ISD::VSRLI: { |
39321 | unsigned ShAmt = Op.getConstantOperandVal(1); |
39322 | if (ShAmt >= BitWidth) |
39323 | break; |
39324 | |
39325 | APInt DemandedMask = OriginalDemandedBits << ShAmt; |
39326 | |
39327 | if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, |
39328 | OriginalDemandedElts, Known, TLO, Depth + 1)) |
39329 | return true; |
39330 | |
39331 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
39332 | Known.Zero.lshrInPlace(ShAmt); |
39333 | Known.One.lshrInPlace(ShAmt); |
39334 | |
39335 | |
39336 | Known.Zero.setHighBits(ShAmt); |
39337 | return false; |
39338 | } |
39339 | case X86ISD::VSRAI: { |
39340 | SDValue Op0 = Op.getOperand(0); |
39341 | SDValue Op1 = Op.getOperand(1); |
39342 | |
39343 | unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue(); |
39344 | if (ShAmt >= BitWidth) |
39345 | break; |
39346 | |
39347 | APInt DemandedMask = OriginalDemandedBits << ShAmt; |
39348 | |
39349 | |
39350 | if (OriginalDemandedBits.isSignMask()) |
39351 | return TLO.CombineTo(Op, Op0); |
39352 | |
39353 | |
39354 | if (Op0.getOpcode() == X86ISD::VSHLI && |
39355 | Op.getOperand(1) == Op0.getOperand(1)) { |
39356 | SDValue Op00 = Op0.getOperand(0); |
39357 | unsigned NumSignBits = |
39358 | TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts); |
39359 | if (ShAmt < NumSignBits) |
39360 | return TLO.CombineTo(Op, Op00); |
39361 | } |
39362 | |
39363 | |
39364 | |
39365 | if (OriginalDemandedBits.countLeadingZeros() < ShAmt) |
39366 | DemandedMask.setSignBit(); |
39367 | |
39368 | if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, |
39369 | TLO, Depth + 1)) |
39370 | return true; |
39371 | |
39372 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
39373 | Known.Zero.lshrInPlace(ShAmt); |
39374 | Known.One.lshrInPlace(ShAmt); |
39375 | |
39376 | |
39377 | |
39378 | if (Known.Zero[BitWidth - ShAmt - 1] || |
39379 | OriginalDemandedBits.countLeadingZeros() >= ShAmt) |
39380 | return TLO.CombineTo( |
39381 | Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1)); |
39382 | |
39383 | |
39384 | if (Known.One[BitWidth - ShAmt - 1]) |
39385 | Known.One.setHighBits(ShAmt); |
39386 | return false; |
39387 | } |
39388 | case X86ISD::PEXTRB: |
39389 | case X86ISD::PEXTRW: { |
39390 | SDValue Vec = Op.getOperand(0); |
39391 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
39392 | MVT VecVT = Vec.getSimpleValueType(); |
39393 | unsigned NumVecElts = VecVT.getVectorNumElements(); |
39394 | |
39395 | if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) { |
39396 | unsigned Idx = CIdx->getZExtValue(); |
39397 | unsigned VecBitWidth = VecVT.getScalarSizeInBits(); |
39398 | |
39399 | |
39400 | |
39401 | APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth); |
39402 | if (DemandedVecBits == 0) |
39403 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
39404 | |
39405 | APInt KnownUndef, KnownZero; |
39406 | APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx); |
39407 | if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef, |
39408 | KnownZero, TLO, Depth + 1)) |
39409 | return true; |
39410 | |
39411 | KnownBits KnownVec; |
39412 | if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, |
39413 | KnownVec, TLO, Depth + 1)) |
39414 | return true; |
39415 | |
39416 | if (SDValue V = SimplifyMultipleUseDemandedBits( |
39417 | Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1)) |
39418 | return TLO.CombineTo( |
39419 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); |
39420 | |
39421 | Known = KnownVec.zext(BitWidth); |
39422 | return false; |
39423 | } |
39424 | break; |
39425 | } |
39426 | case X86ISD::PINSRB: |
39427 | case X86ISD::PINSRW: { |
39428 | SDValue Vec = Op.getOperand(0); |
39429 | SDValue Scl = Op.getOperand(1); |
39430 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
39431 | MVT VecVT = Vec.getSimpleValueType(); |
39432 | |
39433 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { |
39434 | unsigned Idx = CIdx->getZExtValue(); |
39435 | if (!OriginalDemandedElts[Idx]) |
39436 | return TLO.CombineTo(Op, Vec); |
39437 | |
39438 | KnownBits KnownVec; |
39439 | APInt DemandedVecElts(OriginalDemandedElts); |
39440 | DemandedVecElts.clearBit(Idx); |
39441 | if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts, |
39442 | KnownVec, TLO, Depth + 1)) |
39443 | return true; |
39444 | |
39445 | KnownBits KnownScl; |
39446 | unsigned NumSclBits = Scl.getScalarValueSizeInBits(); |
39447 | APInt DemandedSclBits = OriginalDemandedBits.zext(NumSclBits); |
39448 | if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) |
39449 | return true; |
39450 | |
39451 | KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits()); |
39452 | Known = KnownBits::commonBits(KnownVec, KnownScl); |
39453 | return false; |
39454 | } |
39455 | break; |
39456 | } |
39457 | case X86ISD::PACKSS: |
39458 | |
39459 | |
39460 | |
39461 | if (OriginalDemandedBits.isSignMask()) { |
39462 | APInt DemandedLHS, DemandedRHS; |
39463 | getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS); |
39464 | |
39465 | KnownBits KnownLHS, KnownRHS; |
39466 | APInt SignMask = APInt::getSignMask(BitWidth * 2); |
39467 | if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS, |
39468 | KnownLHS, TLO, Depth + 1)) |
39469 | return true; |
39470 | if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS, |
39471 | KnownRHS, TLO, Depth + 1)) |
39472 | return true; |
39473 | |
39474 | |
39475 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
39476 | Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1); |
39477 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
39478 | Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1); |
39479 | if (DemandedOp0 || DemandedOp1) { |
39480 | SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0); |
39481 | SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1); |
39482 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1)); |
39483 | } |
39484 | } |
39485 | |
39486 | break; |
39487 | case X86ISD::VBROADCAST: { |
39488 | SDValue Src = Op.getOperand(0); |
39489 | MVT SrcVT = Src.getSimpleValueType(); |
39490 | APInt DemandedElts = APInt::getOneBitSet( |
39491 | SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1, 0); |
39492 | if (SimplifyDemandedBits(Src, OriginalDemandedBits, DemandedElts, Known, |
39493 | TLO, Depth + 1)) |
39494 | return true; |
39495 | |
39496 | |
39497 | |
39498 | if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() && |
39499 | OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) { |
39500 | MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2); |
39501 | SDValue NewSrc = |
39502 | TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src); |
39503 | MVT NewVT = MVT::getVectorVT(NewSrcVT, VT.getVectorNumElements() * 2); |
39504 | SDValue NewBcst = |
39505 | TLO.DAG.getNode(X86ISD::VBROADCAST, SDLoc(Op), NewVT, NewSrc); |
39506 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, NewBcst)); |
39507 | } |
39508 | break; |
39509 | } |
39510 | case X86ISD::PCMPGT: |
39511 | |
39512 | |
39513 | if (OriginalDemandedBits.isSignMask() && |
39514 | ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) |
39515 | return TLO.CombineTo(Op, Op.getOperand(1)); |
39516 | break; |
39517 | case X86ISD::MOVMSK: { |
39518 | SDValue Src = Op.getOperand(0); |
39519 | MVT SrcVT = Src.getSimpleValueType(); |
39520 | unsigned SrcBits = SrcVT.getScalarSizeInBits(); |
39521 | unsigned NumElts = SrcVT.getVectorNumElements(); |
39522 | |
39523 | |
39524 | if (OriginalDemandedBits.countTrailingZeros() >= NumElts) |
39525 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
39526 | |
39527 | |
39528 | APInt KnownUndef, KnownZero; |
39529 | APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts); |
39530 | if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, |
39531 | TLO, Depth + 1)) |
39532 | return true; |
39533 | |
39534 | Known.Zero = KnownZero.zextOrSelf(BitWidth); |
39535 | Known.Zero.setHighBits(BitWidth - NumElts); |
39536 | |
39537 | |
39538 | KnownBits KnownSrc; |
39539 | APInt DemandedSrcBits = APInt::getSignMask(SrcBits); |
39540 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO, |
39541 | Depth + 1)) |
39542 | return true; |
39543 | |
39544 | if (KnownSrc.One[SrcBits - 1]) |
39545 | Known.One.setLowBits(NumElts); |
39546 | else if (KnownSrc.Zero[SrcBits - 1]) |
39547 | Known.Zero.setLowBits(NumElts); |
39548 | |
39549 | |
39550 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
39551 | Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1)) |
39552 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); |
39553 | return false; |
39554 | } |
39555 | case X86ISD::BEXTR: |
39556 | case X86ISD::BEXTRI: { |
39557 | SDValue Op0 = Op.getOperand(0); |
39558 | SDValue Op1 = Op.getOperand(1); |
39559 | |
39560 | |
39561 | if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) { |
39562 | |
39563 | uint64_t Val1 = Cst1->getZExtValue(); |
39564 | uint64_t MaskedVal1 = Val1 & 0xFFFF; |
39565 | if (Opc == X86ISD::BEXTR && MaskedVal1 != Val1) { |
39566 | SDLoc DL(Op); |
39567 | return TLO.CombineTo( |
39568 | Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0, |
39569 | TLO.DAG.getConstant(MaskedVal1, DL, VT))); |
39570 | } |
39571 | |
39572 | unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); |
39573 | unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); |
39574 | |
39575 | |
39576 | if (Length == 0) { |
39577 | Known.setAllZero(); |
39578 | return false; |
39579 | } |
39580 | |
39581 | if ((Shift + Length) <= BitWidth) { |
39582 | APInt DemandedMask = APInt::getBitsSet(BitWidth, Shift, Shift + Length); |
39583 | if (SimplifyDemandedBits(Op0, DemandedMask, Known, TLO, Depth + 1)) |
39584 | return true; |
39585 | |
39586 | Known = Known.extractBits(Length, Shift); |
39587 | Known = Known.zextOrTrunc(BitWidth); |
39588 | return false; |
39589 | } |
39590 | } else { |
39591 | assert(Opc == X86ISD::BEXTR && "Unexpected opcode!"); |
39592 | KnownBits Known1; |
39593 | APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16)); |
39594 | if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1)) |
39595 | return true; |
39596 | |
39597 | |
39598 | KnownBits LengthBits = Known1.extractBits(8, 8); |
39599 | if (LengthBits.isZero()) |
39600 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
39601 | } |
39602 | |
39603 | break; |
39604 | } |
39605 | case X86ISD::PDEP: { |
39606 | SDValue Op0 = Op.getOperand(0); |
39607 | SDValue Op1 = Op.getOperand(1); |
39608 | |
39609 | unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros(); |
39610 | APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); |
39611 | |
39612 | |
39613 | |
39614 | if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) |
39615 | return true; |
39616 | |
39617 | |
39618 | |
39619 | |
39620 | KnownBits Known2; |
39621 | uint64_t Count = (~Known.Zero & LoMask).countPopulation(); |
39622 | APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); |
39623 | if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) |
39624 | return true; |
39625 | |
39626 | |
39627 | Known.One.clearAllBits(); |
39628 | |
39629 | |
39630 | Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
39631 | return false; |
39632 | } |
39633 | } |
39634 | |
39635 | return TargetLowering::SimplifyDemandedBitsForTargetNode( |
39636 | Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); |
39637 | } |
39638 | |
39639 | SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
39640 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
39641 | SelectionDAG &DAG, unsigned Depth) const { |
39642 | int NumElts = DemandedElts.getBitWidth(); |
39643 | unsigned Opc = Op.getOpcode(); |
39644 | EVT VT = Op.getValueType(); |
39645 | |
39646 | switch (Opc) { |
39647 | case X86ISD::PINSRB: |
39648 | case X86ISD::PINSRW: { |
39649 | |
39650 | SDValue Vec = Op.getOperand(0); |
39651 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
39652 | MVT VecVT = Vec.getSimpleValueType(); |
39653 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && |
39654 | !DemandedElts[CIdx->getZExtValue()]) |
39655 | return Vec; |
39656 | break; |
39657 | } |
39658 | case X86ISD::VSHLI: { |
39659 | |
39660 | |
39661 | SDValue Op0 = Op.getOperand(0); |
39662 | unsigned ShAmt = Op.getConstantOperandVal(1); |
39663 | unsigned BitWidth = DemandedBits.getBitWidth(); |
39664 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
39665 | unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); |
39666 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) |
39667 | return Op0; |
39668 | break; |
39669 | } |
39670 | case X86ISD::VSRAI: |
39671 | |
39672 | |
39673 | if (DemandedBits.isSignMask()) |
39674 | return Op.getOperand(0); |
39675 | break; |
39676 | case X86ISD::PCMPGT: |
39677 | |
39678 | |
39679 | if (DemandedBits.isSignMask() && |
39680 | ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) |
39681 | return Op.getOperand(1); |
39682 | break; |
39683 | } |
39684 | |
39685 | APInt ShuffleUndef, ShuffleZero; |
39686 | SmallVector<int, 16> ShuffleMask; |
39687 | SmallVector<SDValue, 2> ShuffleOps; |
39688 | if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask, |
39689 | ShuffleUndef, ShuffleZero, DAG, Depth, false)) { |
39690 | |
39691 | |
39692 | int NumOps = ShuffleOps.size(); |
39693 | if (ShuffleMask.size() == (unsigned)NumElts && |
39694 | llvm::all_of(ShuffleOps, [VT](SDValue V) { |
39695 | return VT.getSizeInBits() == V.getValueSizeInBits(); |
39696 | })) { |
39697 | |
39698 | if (DemandedElts.isSubsetOf(ShuffleUndef)) |
39699 | return DAG.getUNDEF(VT); |
39700 | if (DemandedElts.isSubsetOf(ShuffleUndef | ShuffleZero)) |
39701 | return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op)); |
39702 | |
39703 | |
39704 | APInt IdentityOp = APInt::getAllOnesValue(NumOps); |
39705 | for (int i = 0; i != NumElts; ++i) { |
39706 | int M = ShuffleMask[i]; |
39707 | if (!DemandedElts[i] || ShuffleUndef[i]) |
39708 | continue; |
39709 | int OpIdx = M / NumElts; |
39710 | int EltIdx = M % NumElts; |
39711 | if (M < 0 || EltIdx != i) { |
39712 | IdentityOp.clearAllBits(); |
39713 | break; |
39714 | } |
39715 | IdentityOp &= APInt::getOneBitSet(NumOps, OpIdx); |
39716 | if (IdentityOp == 0) |
39717 | break; |
39718 | } |
39719 | assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) && |
39720 | "Multiple identity shuffles detected"); |
39721 | |
39722 | if (IdentityOp != 0) |
39723 | return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]); |
39724 | } |
39725 | } |
39726 | |
39727 | return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
39728 | Op, DemandedBits, DemandedElts, DAG, Depth); |
39729 | } |
39730 | |
39731 | |
39732 | |
39733 | static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, |
39734 | bool AllowTruncate) { |
39735 | switch (Src.getOpcode()) { |
39736 | case ISD::TRUNCATE: |
39737 | if (!AllowTruncate) |
39738 | return false; |
39739 | LLVM_FALLTHROUGH; |
39740 | case ISD::SETCC: |
39741 | return Src.getOperand(0).getValueSizeInBits() == Size; |
39742 | case ISD::AND: |
39743 | case ISD::XOR: |
39744 | case ISD::OR: |
39745 | return checkBitcastSrcVectorSize(Src.getOperand(0), Size, AllowTruncate) && |
39746 | checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate); |
39747 | } |
39748 | return false; |
39749 | } |
39750 | |
39751 | |
39752 | static unsigned getAltBitOpcode(unsigned Opcode) { |
39753 | switch(Opcode) { |
39754 | case ISD::AND: return X86ISD::FAND; |
39755 | case ISD::OR: return X86ISD::FOR; |
39756 | case ISD::XOR: return X86ISD::FXOR; |
39757 | case X86ISD::ANDNP: return X86ISD::FANDN; |
39758 | } |
39759 | llvm_unreachable("Unknown bitwise opcode"); |
39760 | } |
39761 | |
39762 | |
39763 | static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src, |
39764 | const SDLoc &DL) { |
39765 | EVT SrcVT = Src.getValueType(); |
39766 | if (SrcVT != MVT::v4i1) |
39767 | return SDValue(); |
39768 | |
39769 | switch (Src.getOpcode()) { |
39770 | case ISD::SETCC: |
39771 | if (Src.getOperand(0).getValueType() == MVT::v4i32 && |
39772 | ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) && |
39773 | cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) { |
39774 | SDValue Op0 = Src.getOperand(0); |
39775 | if (ISD::isNormalLoad(Op0.getNode())) |
39776 | return DAG.getBitcast(MVT::v4f32, Op0); |
39777 | if (Op0.getOpcode() == ISD::BITCAST && |
39778 | Op0.getOperand(0).getValueType() == MVT::v4f32) |
39779 | return Op0.getOperand(0); |
39780 | } |
39781 | break; |
39782 | case ISD::AND: |
39783 | case ISD::XOR: |
39784 | case ISD::OR: { |
39785 | SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL); |
39786 | SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL); |
39787 | if (Op0 && Op1) |
39788 | return DAG.getNode(getAltBitOpcode(Src.getOpcode()), DL, MVT::v4f32, Op0, |
39789 | Op1); |
39790 | break; |
39791 | } |
39792 | } |
39793 | return SDValue(); |
39794 | } |
39795 | |
39796 | |
39797 | static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, |
39798 | SDValue Src, const SDLoc &DL) { |
39799 | switch (Src.getOpcode()) { |
39800 | case ISD::SETCC: |
39801 | case ISD::TRUNCATE: |
39802 | return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); |
39803 | case ISD::AND: |
39804 | case ISD::XOR: |
39805 | case ISD::OR: |
39806 | return DAG.getNode( |
39807 | Src.getOpcode(), DL, SExtVT, |
39808 | signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), |
39809 | signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); |
39810 | } |
39811 | llvm_unreachable("Unexpected node type for vXi1 sign extension"); |
39812 | } |
39813 | |
39814 | |
39815 | |
39816 | |
39817 | |
39818 | |
39819 | |
39820 | static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, |
39821 | const SDLoc &DL, |
39822 | const X86Subtarget &Subtarget) { |
39823 | EVT SrcVT = Src.getValueType(); |
39824 | if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) |
39825 | return SDValue(); |
39826 | |
39827 | |
39828 | |
39829 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2()) { |
39830 | if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) { |
39831 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, |
39832 | DAG.getBitcast(MVT::v4f32, V)); |
39833 | return DAG.getZExtOrTrunc(V, DL, VT); |
39834 | } |
39835 | } |
39836 | |
39837 | |
39838 | |
39839 | |
39840 | |
39841 | bool PreferMovMsk = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() && |
39842 | (Src.getOperand(0).getValueType() == MVT::v16i8 || |
39843 | Src.getOperand(0).getValueType() == MVT::v32i8 || |
39844 | Src.getOperand(0).getValueType() == MVT::v64i8); |
39845 | |
39846 | |
39847 | |
39848 | if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() && |
39849 | cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT && |
39850 | ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) { |
39851 | EVT CmpVT = Src.getOperand(0).getValueType(); |
39852 | EVT EltVT = CmpVT.getVectorElementType(); |
39853 | if (CmpVT.getSizeInBits() <= 256 && |
39854 | (EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64)) |
39855 | PreferMovMsk = true; |
39856 | } |
39857 | |
39858 | |
39859 | |
39860 | if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk)) |
39861 | return SDValue(); |
39862 | |
39863 | |
39864 | |
39865 | |
39866 | |
39867 | |
39868 | |
39869 | |
39870 | |
39871 | |
39872 | |
39873 | MVT SExtVT; |
39874 | bool PropagateSExt = false; |
39875 | switch (SrcVT.getSimpleVT().SimpleTy) { |
39876 | default: |
39877 | return SDValue(); |
39878 | case MVT::v2i1: |
39879 | SExtVT = MVT::v2i64; |
39880 | break; |
39881 | case MVT::v4i1: |
39882 | SExtVT = MVT::v4i32; |
39883 | |
39884 | |
39885 | if (Subtarget.hasAVX() && |
39886 | checkBitcastSrcVectorSize(Src, 256, Subtarget.hasAVX2())) { |
39887 | SExtVT = MVT::v4i64; |
39888 | PropagateSExt = true; |
39889 | } |
39890 | break; |
39891 | case MVT::v8i1: |
39892 | SExtVT = MVT::v8i16; |
39893 | |
39894 | |
39895 | |
39896 | |
39897 | |
39898 | if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256, true) || |
39899 | checkBitcastSrcVectorSize(Src, 512, true))) { |
39900 | SExtVT = MVT::v8i32; |
39901 | PropagateSExt = true; |
39902 | } |
39903 | break; |
39904 | case MVT::v16i1: |
39905 | SExtVT = MVT::v16i8; |
39906 | |
39907 | |
39908 | |
39909 | |
39910 | break; |
39911 | case MVT::v32i1: |
39912 | SExtVT = MVT::v32i8; |
39913 | break; |
39914 | case MVT::v64i1: |
39915 | |
39916 | |
39917 | if (Subtarget.hasAVX512()) { |
39918 | if (Subtarget.hasBWI()) |
39919 | return SDValue(); |
39920 | SExtVT = MVT::v64i8; |
39921 | break; |
39922 | } |
39923 | |
39924 | if (checkBitcastSrcVectorSize(Src, 512, false)) { |
39925 | SExtVT = MVT::v64i8; |
39926 | break; |
39927 | } |
39928 | return SDValue(); |
39929 | }; |
39930 | |
39931 | SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) |
39932 | : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); |
39933 | |
39934 | if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { |
39935 | V = getPMOVMSKB(DL, V, DAG, Subtarget); |
39936 | } else { |
39937 | if (SExtVT == MVT::v8i16) |
39938 | V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V, |
39939 | DAG.getUNDEF(MVT::v8i16)); |
39940 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
39941 | } |
39942 | |
39943 | EVT IntVT = |
39944 | EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements()); |
39945 | V = DAG.getZExtOrTrunc(V, DL, IntVT); |
39946 | return DAG.getBitcast(VT, V); |
39947 | } |
39948 | |
39949 | |
39950 | static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { |
39951 | EVT SrcVT = Op.getValueType(); |
39952 | assert(SrcVT.getVectorElementType() == MVT::i1 && |
39953 | "Expected a vXi1 vector"); |
39954 | assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
39955 | "Expected a constant build vector"); |
39956 | |
39957 | APInt Imm(SrcVT.getVectorNumElements(), 0); |
39958 | for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) { |
39959 | SDValue In = Op.getOperand(Idx); |
39960 | if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1)) |
39961 | Imm.setBit(Idx); |
39962 | } |
39963 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth()); |
39964 | return DAG.getConstant(Imm, SDLoc(Op), IntVT); |
39965 | } |
39966 | |
39967 | static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, |
39968 | TargetLowering::DAGCombinerInfo &DCI, |
39969 | const X86Subtarget &Subtarget) { |
39970 | assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast"); |
39971 | |
39972 | if (!DCI.isBeforeLegalizeOps()) |
39973 | return SDValue(); |
39974 | |
39975 | |
39976 | if (!Subtarget.hasAVX512()) |
39977 | return SDValue(); |
39978 | |
39979 | EVT DstVT = N->getValueType(0); |
39980 | SDValue Op = N->getOperand(0); |
39981 | EVT SrcVT = Op.getValueType(); |
39982 | |
39983 | if (!Op.hasOneUse()) |
39984 | return SDValue(); |
39985 | |
39986 | |
39987 | if (Op.getOpcode() != ISD::AND && |
39988 | Op.getOpcode() != ISD::OR && |
39989 | Op.getOpcode() != ISD::XOR) |
39990 | return SDValue(); |
39991 | |
39992 | |
39993 | if (!(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && |
39994 | DstVT.isScalarInteger()) && |
39995 | !(DstVT.isVector() && DstVT.getVectorElementType() == MVT::i1 && |
39996 | SrcVT.isScalarInteger())) |
39997 | return SDValue(); |
39998 | |
39999 | SDValue LHS = Op.getOperand(0); |
40000 | SDValue RHS = Op.getOperand(1); |
40001 | |
40002 | if (LHS.hasOneUse() && LHS.getOpcode() == ISD::BITCAST && |
40003 | LHS.getOperand(0).getValueType() == DstVT) |
40004 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, LHS.getOperand(0), |
40005 | DAG.getBitcast(DstVT, RHS)); |
40006 | |
40007 | if (RHS.hasOneUse() && RHS.getOpcode() == ISD::BITCAST && |
40008 | RHS.getOperand(0).getValueType() == DstVT) |
40009 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, |
40010 | DAG.getBitcast(DstVT, LHS), RHS.getOperand(0)); |
40011 | |
40012 | |
40013 | |
40014 | if (ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) { |
40015 | RHS = combinevXi1ConstantToInteger(RHS, DAG); |
40016 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, |
40017 | DAG.getBitcast(DstVT, LHS), RHS); |
40018 | } |
40019 | |
40020 | return SDValue(); |
40021 | } |
40022 | |
40023 | static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, |
40024 | const X86Subtarget &Subtarget) { |
40025 | SDLoc DL(BV); |
40026 | unsigned NumElts = BV->getNumOperands(); |
40027 | SDValue Splat = BV->getSplatValue(); |
40028 | |
40029 | |
40030 | auto CreateMMXElement = [&](SDValue V) { |
40031 | if (V.isUndef()) |
40032 | return DAG.getUNDEF(MVT::x86mmx); |
40033 | if (V.getValueType().isFloatingPoint()) { |
40034 | if (Subtarget.hasSSE1() && !isa<ConstantFPSDNode>(V)) { |
40035 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, V); |
40036 | V = DAG.getBitcast(MVT::v2i64, V); |
40037 | return DAG.getNode(X86ISD::MOVDQ2Q, DL, MVT::x86mmx, V); |
40038 | } |
40039 | V = DAG.getBitcast(MVT::i32, V); |
40040 | } else { |
40041 | V = DAG.getAnyExtOrTrunc(V, DL, MVT::i32); |
40042 | } |
40043 | return DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, V); |
40044 | }; |
40045 | |
40046 | |
40047 | SmallVector<SDValue, 8> Ops; |
40048 | |
40049 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40050 | |
40051 | |
40052 | if (Splat) { |
40053 | if (Splat.isUndef()) |
40054 | return DAG.getUNDEF(MVT::x86mmx); |
40055 | |
40056 | Splat = CreateMMXElement(Splat); |
40057 | |
40058 | if (Subtarget.hasSSE1()) { |
40059 | |
40060 | if (NumElts == 8) |
40061 | Splat = DAG.getNode( |
40062 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, |
40063 | DAG.getTargetConstant(Intrinsic::x86_mmx_punpcklbw, DL, |
40064 | TLI.getPointerTy(DAG.getDataLayout())), |
40065 | Splat, Splat); |
40066 | |
40067 | |
40068 | unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); |
40069 | return DAG.getNode( |
40070 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, |
40071 | DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, |
40072 | TLI.getPointerTy(DAG.getDataLayout())), |
40073 | Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); |
40074 | } |
40075 | Ops.append(NumElts, Splat); |
40076 | } else { |
40077 | for (unsigned i = 0; i != NumElts; ++i) |
40078 | Ops.push_back(CreateMMXElement(BV->getOperand(i))); |
40079 | } |
40080 | |
40081 | |
40082 | while (Ops.size() > 1) { |
40083 | unsigned NumOps = Ops.size(); |
40084 | unsigned IntrinOp = |
40085 | (NumOps == 2 ? Intrinsic::x86_mmx_punpckldq |
40086 | : (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd |
40087 | : Intrinsic::x86_mmx_punpcklbw)); |
40088 | SDValue Intrin = DAG.getTargetConstant( |
40089 | IntrinOp, DL, TLI.getPointerTy(DAG.getDataLayout())); |
40090 | for (unsigned i = 0; i != NumOps; i += 2) |
40091 | Ops[i / 2] = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, Intrin, |
40092 | Ops[i], Ops[i + 1]); |
40093 | Ops.resize(NumOps / 2); |
40094 | } |
40095 | |
40096 | return Ops[0]; |
40097 | } |
40098 | |
40099 | |
40100 | |
40101 | |
40102 | |
40103 | static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, |
40104 | SelectionDAG &DAG, |
40105 | const X86Subtarget &Subtarget) { |
40106 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40107 | unsigned Opc = V.getOpcode(); |
40108 | switch (Opc) { |
40109 | case ISD::BITCAST: { |
40110 | |
40111 | SDValue Src = V.getOperand(0); |
40112 | EVT SrcVT = Src.getValueType(); |
40113 | if (SrcVT.isVector() || SrcVT.isFloatingPoint()) |
40114 | return DAG.getBitcast(VT, Src); |
40115 | break; |
40116 | } |
40117 | case ISD::TRUNCATE: { |
40118 | |
40119 | SDValue Src = V.getOperand(0); |
40120 | EVT NewSrcVT = |
40121 | EVT::getVectorVT(*DAG.getContext(), MVT::i1, Src.getValueSizeInBits()); |
40122 | if (TLI.isTypeLegal(NewSrcVT)) |
40123 | if (SDValue N0 = |
40124 | combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget)) |
40125 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0, |
40126 | DAG.getIntPtrConstant(0, DL)); |
40127 | break; |
40128 | } |
40129 | case ISD::ANY_EXTEND: |
40130 | case ISD::ZERO_EXTEND: { |
40131 | |
40132 | SDValue Src = V.getOperand(0); |
40133 | EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, |
40134 | Src.getScalarValueSizeInBits()); |
40135 | if (TLI.isTypeLegal(NewSrcVT)) |
40136 | if (SDValue N0 = |
40137 | combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget)) |
40138 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
40139 | Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT) |
40140 | : DAG.getConstant(0, DL, VT), |
40141 | N0, DAG.getIntPtrConstant(0, DL)); |
40142 | break; |
40143 | } |
40144 | case ISD::OR: { |
40145 | |
40146 | SDValue Src0 = V.getOperand(0); |
40147 | SDValue Src1 = V.getOperand(1); |
40148 | if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) |
40149 | if (SDValue N1 = combineBitcastToBoolVector(VT, Src1, DL, DAG, Subtarget)) |
40150 | return DAG.getNode(Opc, DL, VT, N0, N1); |
40151 | break; |
40152 | } |
40153 | case ISD::SHL: { |
40154 | |
40155 | SDValue Src0 = V.getOperand(0); |
40156 | if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) || |
40157 | ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI())) |
40158 | break; |
40159 | |
40160 | if (auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1))) |
40161 | if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) |
40162 | return DAG.getNode( |
40163 | X86ISD::KSHIFTL, DL, VT, N0, |
40164 | DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8)); |
40165 | break; |
40166 | } |
40167 | } |
40168 | return SDValue(); |
40169 | } |
40170 | |
40171 | static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, |
40172 | TargetLowering::DAGCombinerInfo &DCI, |
40173 | const X86Subtarget &Subtarget) { |
40174 | SDValue N0 = N->getOperand(0); |
40175 | EVT VT = N->getValueType(0); |
40176 | EVT SrcVT = N0.getValueType(); |
40177 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40178 | |
40179 | |
40180 | |
40181 | |
40182 | |
40183 | |
40184 | |
40185 | if (DCI.isBeforeLegalize()) { |
40186 | SDLoc dl(N); |
40187 | if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) |
40188 | return V; |
40189 | |
40190 | |
40191 | |
40192 | if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && |
40193 | Subtarget.hasAVX512()) { |
40194 | N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); |
40195 | N0 = DAG.getBitcast(MVT::v8i1, N0); |
40196 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0, |
40197 | DAG.getIntPtrConstant(0, dl)); |
40198 | } |
40199 | |
40200 | |
40201 | |
40202 | if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && |
40203 | Subtarget.hasAVX512()) { |
40204 | |
40205 | |
40206 | |
40207 | |
40208 | |
40209 | |
40210 | |
40211 | if (N0.getOpcode() == ISD::CONCAT_VECTORS) { |
40212 | SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); |
40213 | if (ISD::isBuildVectorAllZeros(LastOp.getNode())) { |
40214 | SrcVT = LastOp.getValueType(); |
40215 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
40216 | SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end()); |
40217 | Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT)); |
40218 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
40219 | N0 = DAG.getBitcast(MVT::i8, N0); |
40220 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
40221 | } |
40222 | } |
40223 | |
40224 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
40225 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); |
40226 | Ops[0] = N0; |
40227 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
40228 | N0 = DAG.getBitcast(MVT::i8, N0); |
40229 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
40230 | } |
40231 | } else { |
40232 | |
40233 | |
40234 | if (VT.isVector() && VT.getScalarType() == MVT::i1 && |
40235 | SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) { |
40236 | if (SDValue V = |
40237 | combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget)) |
40238 | return V; |
40239 | } |
40240 | } |
40241 | |
40242 | |
40243 | |
40244 | |
40245 | |
40246 | |
40247 | if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() && |
40248 | !Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
40249 | N0.getOperand(0).getValueType() == MVT::v16i1 && |
40250 | isNullConstant(N0.getOperand(1))) |
40251 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, |
40252 | DAG.getBitcast(MVT::i16, N0.getOperand(0))); |
40253 | |
40254 | |
40255 | |
40256 | |
40257 | if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() && |
40258 | VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) { |
40259 | auto *BCast = cast<MemIntrinsicSDNode>(N0); |
40260 | unsigned SrcVTSize = SrcVT.getScalarSizeInBits(); |
40261 | unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits(); |
40262 | |
40263 | if (MemSize >= 32) { |
40264 | MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize) |
40265 | : MVT::getIntegerVT(MemSize); |
40266 | MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize) |
40267 | : MVT::getIntegerVT(SrcVTSize); |
40268 | LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements()); |
40269 | |
40270 | SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); |
40271 | SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; |
40272 | SDValue ResNode = |
40273 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, |
40274 | MemVT, BCast->getMemOperand()); |
40275 | DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1)); |
40276 | return DAG.getBitcast(VT, ResNode); |
40277 | } |
40278 | } |
40279 | |
40280 | |
40281 | |
40282 | |
40283 | if (VT == MVT::x86mmx) { |
40284 | |
40285 | APInt UndefElts; |
40286 | SmallVector<APInt, 1> EltBits; |
40287 | if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) { |
40288 | SDLoc DL(N0); |
40289 | |
40290 | if (EltBits[0].countLeadingZeros() >= 32) |
40291 | return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, |
40292 | DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32)); |
40293 | |
40294 | |
40295 | APFloat F64(APFloat::IEEEdouble(), EltBits[0]); |
40296 | return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64)); |
40297 | } |
40298 | |
40299 | |
40300 | if (N0.getOpcode() == ISD::BUILD_VECTOR && |
40301 | (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && |
40302 | N0.getOperand(0).getValueType() == SrcVT.getScalarType()) { |
40303 | bool LowUndef = true, AllUndefOrZero = true; |
40304 | for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) { |
40305 | SDValue Op = N0.getOperand(i); |
40306 | LowUndef &= Op.isUndef() || (i >= e/2); |
40307 | AllUndefOrZero &= (Op.isUndef() || isNullConstant(Op)); |
40308 | } |
40309 | if (AllUndefOrZero) { |
40310 | SDValue N00 = N0.getOperand(0); |
40311 | SDLoc dl(N00); |
40312 | N00 = LowUndef ? DAG.getAnyExtOrTrunc(N00, dl, MVT::i32) |
40313 | : DAG.getZExtOrTrunc(N00, dl, MVT::i32); |
40314 | return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00); |
40315 | } |
40316 | } |
40317 | |
40318 | |
40319 | |
40320 | |
40321 | if (N0.getOpcode() == ISD::BUILD_VECTOR && |
40322 | (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || |
40323 | SrcVT == MVT::v8i8)) |
40324 | return createMMXBuildVector(cast<BuildVectorSDNode>(N0), DAG, Subtarget); |
40325 | |
40326 | |
40327 | if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
40328 | N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) && |
40329 | isNullConstant(N0.getOperand(1))) { |
40330 | SDValue N00 = N0.getOperand(0); |
40331 | if (N00.getValueType().is128BitVector()) |
40332 | return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, |
40333 | DAG.getBitcast(MVT::v2i64, N00)); |
40334 | } |
40335 | |
40336 | |
40337 | if (SrcVT == MVT::v2i32 && N0.getOpcode() == ISD::FP_TO_SINT) { |
40338 | SDLoc DL(N0); |
40339 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
40340 | DAG.getUNDEF(MVT::v2i32)); |
40341 | return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, |
40342 | DAG.getBitcast(MVT::v2i64, Res)); |
40343 | } |
40344 | } |
40345 | |
40346 | |
40347 | |
40348 | if (Subtarget.hasAVX512() && VT.isScalarInteger() && |
40349 | SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && |
40350 | ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { |
40351 | return combinevXi1ConstantToInteger(N0, DAG); |
40352 | } |
40353 | |
40354 | if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && |
40355 | VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
40356 | isa<ConstantSDNode>(N0)) { |
40357 | auto *C = cast<ConstantSDNode>(N0); |
40358 | if (C->isAllOnesValue()) |
40359 | return DAG.getConstant(1, SDLoc(N0), VT); |
40360 | if (C->isNullValue()) |
40361 | return DAG.getConstant(0, SDLoc(N0), VT); |
40362 | } |
40363 | |
40364 | |
40365 | |
40366 | |
40367 | if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && |
40368 | VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
40369 | isPowerOf2_32(VT.getVectorNumElements())) { |
40370 | unsigned NumElts = VT.getVectorNumElements(); |
40371 | SDValue Src = N0; |
40372 | |
40373 | |
40374 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) |
40375 | Src = N0.getOperand(0); |
40376 | |
40377 | if (Src.getOpcode() == X86ISD::MOVMSK && Src.hasOneUse()) { |
40378 | SDValue MovmskIn = Src.getOperand(0); |
40379 | MVT MovmskVT = MovmskIn.getSimpleValueType(); |
40380 | unsigned MovMskElts = MovmskVT.getVectorNumElements(); |
40381 | |
40382 | |
40383 | |
40384 | if (MovMskElts <= NumElts && |
40385 | (Subtarget.hasBWI() || MovmskVT.getVectorElementType() != MVT::i8)) { |
40386 | EVT IntVT = EVT(MovmskVT).changeVectorElementTypeToInteger(); |
40387 | MovmskIn = DAG.getBitcast(IntVT, MovmskIn); |
40388 | SDLoc dl(N); |
40389 | MVT CmpVT = MVT::getVectorVT(MVT::i1, MovMskElts); |
40390 | SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn, |
40391 | DAG.getConstant(0, dl, IntVT), ISD::SETLT); |
40392 | if (EVT(CmpVT) == VT) |
40393 | return Cmp; |
40394 | |
40395 | |
40396 | |
40397 | unsigned NumConcats = NumElts / MovMskElts; |
40398 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT)); |
40399 | Ops[0] = Cmp; |
40400 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops); |
40401 | } |
40402 | } |
40403 | } |
40404 | |
40405 | |
40406 | |
40407 | if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget)) |
40408 | return V; |
40409 | |
40410 | |
40411 | |
40412 | |
40413 | |
40414 | |
40415 | unsigned FPOpcode; |
40416 | switch (N0.getOpcode()) { |
40417 | case ISD::AND: FPOpcode = X86ISD::FAND; break; |
40418 | case ISD::OR: FPOpcode = X86ISD::FOR; break; |
40419 | case ISD::XOR: FPOpcode = X86ISD::FXOR; break; |
40420 | default: return SDValue(); |
40421 | } |
40422 | |
40423 | |
40424 | if (!((Subtarget.hasSSE1() && VT == MVT::f32) || |
40425 | (Subtarget.hasSSE2() && VT == MVT::f64) || |
40426 | (Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() && |
40427 | TLI.isTypeLegal(VT)))) |
40428 | return SDValue(); |
40429 | |
40430 | SDValue LogicOp0 = N0.getOperand(0); |
40431 | SDValue LogicOp1 = N0.getOperand(1); |
40432 | SDLoc DL0(N0); |
40433 | |
40434 | |
40435 | if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST && |
40436 | LogicOp0.hasOneUse() && LogicOp0.getOperand(0).hasOneUse() && |
40437 | LogicOp0.getOperand(0).getValueType() == VT && |
40438 | !isa<ConstantSDNode>(LogicOp0.getOperand(0))) { |
40439 | SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); |
40440 | unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); |
40441 | return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); |
40442 | } |
40443 | |
40444 | if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST && |
40445 | LogicOp1.hasOneUse() && LogicOp1.getOperand(0).hasOneUse() && |
40446 | LogicOp1.getOperand(0).getValueType() == VT && |
40447 | !isa<ConstantSDNode>(LogicOp1.getOperand(0))) { |
40448 | SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); |
40449 | unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); |
40450 | return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); |
40451 | } |
40452 | |
40453 | return SDValue(); |
40454 | } |
40455 | |
40456 | |
40457 | |
40458 | |
40459 | static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) { |
40460 | SDValue AbsOp1 = Abs->getOperand(0); |
40461 | if (AbsOp1.getOpcode() != ISD::SUB) |
40462 | return false; |
40463 | |
40464 | Op0 = AbsOp1.getOperand(0); |
40465 | Op1 = AbsOp1.getOperand(1); |
40466 | |
40467 | |
40468 | if (Op0.getOpcode() != ISD::ZERO_EXTEND || |
40469 | Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 || |
40470 | Op1.getOpcode() != ISD::ZERO_EXTEND || |
40471 | Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8) |
40472 | return false; |
40473 | |
40474 | return true; |
40475 | } |
40476 | |
40477 | |
40478 | |
40479 | static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, |
40480 | const SDValue &Zext1, const SDLoc &DL, |
40481 | const X86Subtarget &Subtarget) { |
40482 | |
40483 | EVT InVT = Zext0.getOperand(0).getValueType(); |
40484 | unsigned RegSize = std::max(128u, (unsigned)InVT.getSizeInBits()); |
40485 | |
40486 | |
40487 | |
40488 | unsigned NumConcat = RegSize / InVT.getSizeInBits(); |
40489 | SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT)); |
40490 | Ops[0] = Zext0.getOperand(0); |
40491 | MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8); |
40492 | SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); |
40493 | Ops[0] = Zext1.getOperand(0); |
40494 | SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); |
40495 | |
40496 | |
40497 | auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
40498 | ArrayRef<SDValue> Ops) { |
40499 | MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); |
40500 | return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops); |
40501 | }; |
40502 | MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64); |
40503 | return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 }, |
40504 | PSADBWBuilder); |
40505 | } |
40506 | |
40507 | |
40508 | |
40509 | static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG, |
40510 | const X86Subtarget &Subtarget) { |
40511 | |
40512 | if (!Subtarget.hasSSE41()) |
40513 | return SDValue(); |
40514 | |
40515 | EVT ExtractVT = Extract->getValueType(0); |
40516 | if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8) |
40517 | return SDValue(); |
40518 | |
40519 | |
40520 | ISD::NodeType BinOp; |
40521 | SDValue Src = DAG.matchBinOpReduction( |
40522 | Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true); |
40523 | if (!Src) |
40524 | return SDValue(); |
40525 | |
40526 | EVT SrcVT = Src.getValueType(); |
40527 | EVT SrcSVT = SrcVT.getScalarType(); |
40528 | if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0) |
40529 | return SDValue(); |
40530 | |
40531 | SDLoc DL(Extract); |
40532 | SDValue MinPos = Src; |
40533 | |
40534 | |
40535 | while (SrcVT.getSizeInBits() > 128) { |
40536 | SDValue Lo, Hi; |
40537 | std::tie(Lo, Hi) = splitVector(MinPos, DAG, DL); |
40538 | SrcVT = Lo.getValueType(); |
40539 | MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi); |
40540 | } |
40541 | assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) || |
40542 | (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) && |
40543 | "Unexpected value type"); |
40544 | |
40545 | |
40546 | |
40547 | SDValue Mask; |
40548 | unsigned MaskEltsBits = ExtractVT.getSizeInBits(); |
40549 | if (BinOp == ISD::SMAX) |
40550 | Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT); |
40551 | else if (BinOp == ISD::SMIN) |
40552 | Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT); |
40553 | else if (BinOp == ISD::UMAX) |
40554 | Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT); |
40555 | |
40556 | if (Mask) |
40557 | MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); |
40558 | |
40559 | |
40560 | |
40561 | |
40562 | |
40563 | if (ExtractVT == MVT::i8) { |
40564 | SDValue Upper = DAG.getVectorShuffle( |
40565 | SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8), |
40566 | {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16}); |
40567 | MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper); |
40568 | } |
40569 | |
40570 | |
40571 | MinPos = DAG.getBitcast(MVT::v8i16, MinPos); |
40572 | MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos); |
40573 | MinPos = DAG.getBitcast(SrcVT, MinPos); |
40574 | |
40575 | if (Mask) |
40576 | MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); |
40577 | |
40578 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos, |
40579 | DAG.getIntPtrConstant(0, DL)); |
40580 | } |
40581 | |
40582 | |
40583 | static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, |
40584 | const X86Subtarget &Subtarget) { |
40585 | |
40586 | if (!Subtarget.hasSSE2()) |
40587 | return SDValue(); |
40588 | |
40589 | EVT ExtractVT = Extract->getValueType(0); |
40590 | unsigned BitWidth = ExtractVT.getSizeInBits(); |
40591 | if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 && |
40592 | ExtractVT != MVT::i8 && ExtractVT != MVT::i1) |
40593 | return SDValue(); |
40594 | |
40595 | |
40596 | ISD::NodeType BinOp; |
40597 | SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND}); |
40598 | if (!Match && ExtractVT == MVT::i1) |
40599 | Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::XOR}); |
40600 | if (!Match) |
40601 | return SDValue(); |
40602 | |
40603 | |
40604 | |
40605 | if (Match.getScalarValueSizeInBits() != BitWidth) |
40606 | return SDValue(); |
40607 | |
40608 | SDValue Movmsk; |
40609 | SDLoc DL(Extract); |
40610 | EVT MatchVT = Match.getValueType(); |
40611 | unsigned NumElts = MatchVT.getVectorNumElements(); |
40612 | unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16; |
40613 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
40614 | |
40615 | if (ExtractVT == MVT::i1) { |
40616 | |
40617 | if (NumElts > 64 || !isPowerOf2_32(NumElts)) |
40618 | return SDValue(); |
40619 | if (TLI.isTypeLegal(MatchVT)) { |
40620 | |
40621 | EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
40622 | Movmsk = DAG.getBitcast(MovmskVT, Match); |
40623 | } else { |
40624 | |
40625 | |
40626 | if (BinOp == ISD::AND && !Subtarget.hasSSE41() && |
40627 | Match.getOpcode() == ISD::SETCC && |
40628 | ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) && |
40629 | cast<CondCodeSDNode>(Match.getOperand(2))->get() == |
40630 | ISD::CondCode::SETEQ) { |
40631 | SDValue Vec = Match.getOperand(0); |
40632 | if (Vec.getValueType().getScalarType() == MVT::i64 && |
40633 | (2 * NumElts) <= MaxElts) { |
40634 | NumElts *= 2; |
40635 | EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); |
40636 | MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); |
40637 | Match = DAG.getSetCC( |
40638 | DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)), |
40639 | DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ); |
40640 | } |
40641 | } |
40642 | |
40643 | |
40644 | while (NumElts > MaxElts) { |
40645 | SDValue Lo, Hi; |
40646 | std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); |
40647 | Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); |
40648 | NumElts /= 2; |
40649 | } |
40650 | EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
40651 | Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget); |
40652 | } |
40653 | if (!Movmsk) |
40654 | return SDValue(); |
40655 | Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32); |
40656 | } else { |
40657 | |
40658 | unsigned MatchSizeInBits = Match.getValueSizeInBits(); |
40659 | if (!(MatchSizeInBits == 128 || |
40660 | (MatchSizeInBits == 256 && Subtarget.hasAVX()))) |
40661 | return SDValue(); |
40662 | |
40663 | |
40664 | |
40665 | |
40666 | |
40667 | if (Match.getValueType().getVectorNumElements() < 2) |
40668 | return SDValue(); |
40669 | |
40670 | |
40671 | if (DAG.ComputeNumSignBits(Match) != BitWidth) |
40672 | return SDValue(); |
40673 | |
40674 | if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) { |
40675 | SDValue Lo, Hi; |
40676 | std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); |
40677 | Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); |
40678 | MatchSizeInBits = Match.getValueSizeInBits(); |
40679 | } |
40680 | |
40681 | |
40682 | MVT MaskSrcVT; |
40683 | if (64 == BitWidth || 32 == BitWidth) |
40684 | MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), |
40685 | MatchSizeInBits / BitWidth); |
40686 | else |
40687 | MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); |
40688 | |
40689 | SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match); |
40690 | Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget); |
40691 | NumElts = MaskSrcVT.getVectorNumElements(); |
40692 | } |
40693 | assert((NumElts <= 32 || NumElts == 64) && |
40694 | "Not expecting more than 64 elements"); |
40695 | |
40696 | MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32; |
40697 | if (BinOp == ISD::XOR) { |
40698 | |
40699 | SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk); |
40700 | return DAG.getZExtOrTrunc(Result, DL, ExtractVT); |
40701 | } |
40702 | |
40703 | SDValue CmpC; |
40704 | ISD::CondCode CondCode; |
40705 | if (BinOp == ISD::OR) { |
40706 | |
40707 | CmpC = DAG.getConstant(0, DL, CmpVT); |
40708 | CondCode = ISD::CondCode::SETNE; |
40709 | } else { |
40710 | |
40711 | CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts), |
40712 | DL, CmpVT); |
40713 | CondCode = ISD::CondCode::SETEQ; |
40714 | } |
40715 | |
40716 | |
40717 | |
40718 | EVT SetccVT = |
40719 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); |
40720 | SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode); |
40721 | SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT); |
40722 | SDValue Zero = DAG.getConstant(0, DL, ExtractVT); |
40723 | return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext); |
40724 | } |
40725 | |
40726 | static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, |
40727 | const X86Subtarget &Subtarget) { |
40728 | |
40729 | if (!Subtarget.hasSSE2()) |
40730 | return SDValue(); |
40731 | |
40732 | EVT ExtractVT = Extract->getValueType(0); |
40733 | |
40734 | |
40735 | if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64) |
40736 | return SDValue(); |
40737 | |
40738 | EVT VT = Extract->getOperand(0).getValueType(); |
40739 | if (!isPowerOf2_32(VT.getVectorNumElements())) |
40740 | return SDValue(); |
40741 | |
40742 | |
40743 | ISD::NodeType BinOp; |
40744 | SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD}); |
40745 | |
40746 | |
40747 | |
40748 | |
40749 | |
40750 | |
40751 | |
40752 | |
40753 | |
40754 | if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || |
40755 | Root.getOpcode() == ISD::ZERO_EXTEND || |
40756 | Root.getOpcode() == ISD::ANY_EXTEND)) |
40757 | Root = Root.getOperand(0); |
40758 | |
40759 | |
40760 | |
40761 | if (!Root || Root.getOpcode() != ISD::ABS) |
40762 | return SDValue(); |
40763 | |
40764 | |
40765 | SDValue Zext0, Zext1; |
40766 | if (!detectZextAbsDiff(Root, Zext0, Zext1)) |
40767 | return SDValue(); |
40768 | |
40769 | |
40770 | SDLoc DL(Extract); |
40771 | SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget); |
40772 | |
40773 | |
40774 | |
40775 | unsigned Stages = Log2_32(VT.getVectorNumElements()); |
40776 | EVT SadVT = SAD.getValueType(); |
40777 | if (Stages > 3) { |
40778 | unsigned SadElems = SadVT.getVectorNumElements(); |
40779 | |
40780 | for(unsigned i = Stages - 3; i > 0; --i) { |
40781 | SmallVector<int, 16> Mask(SadElems, -1); |
40782 | for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) |
40783 | Mask[j] = MaskEnd + j; |
40784 | |
40785 | SDValue Shuffle = |
40786 | DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask); |
40787 | SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle); |
40788 | } |
40789 | } |
40790 | |
40791 | unsigned ExtractSizeInBits = ExtractVT.getSizeInBits(); |
40792 | |
40793 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), ExtractVT, |
40794 | SadVT.getSizeInBits() / ExtractSizeInBits); |
40795 | SAD = DAG.getBitcast(ResVT, SAD); |
40796 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, SAD, |
40797 | Extract->getOperand(1)); |
40798 | } |
40799 | |
40800 | |
40801 | |
40802 | static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, |
40803 | TargetLowering::DAGCombinerInfo &DCI, |
40804 | const X86Subtarget &Subtarget) { |
40805 | if (DCI.isBeforeLegalizeOps()) |
40806 | return SDValue(); |
40807 | |
40808 | SDLoc dl(N); |
40809 | SDValue Src = N->getOperand(0); |
40810 | SDValue Idx = N->getOperand(1); |
40811 | |
40812 | EVT VT = N->getValueType(0); |
40813 | EVT SrcVT = Src.getValueType(); |
40814 | EVT SrcSVT = SrcVT.getVectorElementType(); |
40815 | unsigned SrcEltBits = SrcSVT.getSizeInBits(); |
40816 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
40817 | |
40818 | |
40819 | if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx)) |
40820 | return SDValue(); |
40821 | |
40822 | const APInt &IdxC = N->getConstantOperandAPInt(1); |
40823 | if (IdxC.uge(NumSrcElts)) |
40824 | return SDValue(); |
40825 | |
40826 | SDValue SrcBC = peekThroughBitcasts(Src); |
40827 | |
40828 | |
40829 | if (X86ISD::VBROADCAST == SrcBC.getOpcode()) { |
40830 | SDValue SrcOp = SrcBC.getOperand(0); |
40831 | EVT SrcOpVT = SrcOp.getValueType(); |
40832 | if (SrcOpVT.isScalarInteger() && VT.isInteger() && |
40833 | (SrcOpVT.getSizeInBits() % SrcEltBits) == 0) { |
40834 | unsigned Scale = SrcOpVT.getSizeInBits() / SrcEltBits; |
40835 | unsigned Offset = IdxC.urem(Scale) * SrcEltBits; |
40836 | |
40837 | if (Offset == 0) { |
40838 | SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType()); |
40839 | SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT); |
40840 | return SrcOp; |
40841 | } |
40842 | } |
40843 | } |
40844 | |
40845 | |
40846 | |
40847 | if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) { |
40848 | auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC); |
40849 | unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits(); |
40850 | if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && |
40851 | VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) { |
40852 | SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), |
40853 | MemIntr->getBasePtr(), |
40854 | MemIntr->getPointerInfo(), |
40855 | MemIntr->getOriginalAlign(), |
40856 | MemIntr->getMemOperand()->getFlags()); |
40857 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); |
40858 | return Load; |
40859 | } |
40860 | } |
40861 | |
40862 | |
40863 | |
40864 | if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() && |
40865 | SrcBC.getValueType().isInteger() && |
40866 | (SrcBC.getScalarValueSizeInBits() % SrcEltBits) == 0 && |
40867 | SrcBC.getScalarValueSizeInBits() == |
40868 | SrcBC.getOperand(0).getValueSizeInBits()) { |
40869 | unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcEltBits; |
40870 | if (IdxC.ult(Scale)) { |
40871 | unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits(); |
40872 | SDValue Scl = SrcBC.getOperand(0); |
40873 | EVT SclVT = Scl.getValueType(); |
40874 | if (Offset) { |
40875 | Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl, |
40876 | DAG.getShiftAmountConstant(Offset, SclVT, dl)); |
40877 | } |
40878 | Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType()); |
40879 | Scl = DAG.getZExtOrTrunc(Scl, dl, VT); |
40880 | return Scl; |
40881 | } |
40882 | } |
40883 | |
40884 | |
40885 | |
40886 | |
40887 | if (ISD::TRUNCATE == Src.getOpcode() && IdxC == 0 && |
40888 | (SrcVT.getSizeInBits() % 128) == 0) { |
40889 | Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl); |
40890 | MVT ExtractVT = MVT::getVectorVT(SrcSVT.getSimpleVT(), 128 / SrcEltBits); |
40891 | return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src), |
40892 | Idx); |
40893 | } |
40894 | |
40895 | |
40896 | |
40897 | |
40898 | auto GetLegalExtract = [&Subtarget, &DAG, &dl](SDValue Vec, EVT VecVT, |
40899 | unsigned Idx) { |
40900 | EVT VecSVT = VecVT.getScalarType(); |
40901 | if ((VecVT.is256BitVector() || VecVT.is512BitVector()) && |
40902 | (VecSVT == MVT::i8 || VecSVT == MVT::i16 || VecSVT == MVT::i32 || |
40903 | VecSVT == MVT::i64)) { |
40904 | unsigned EltSizeInBits = VecSVT.getSizeInBits(); |
40905 | unsigned NumEltsPerLane = 128 / EltSizeInBits; |
40906 | unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits; |
40907 | unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits(); |
40908 | VecVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumEltsPerLane); |
40909 | Vec = extract128BitVector(Vec, LaneIdx, DAG, dl); |
40910 | Idx &= (NumEltsPerLane - 1); |
40911 | } |
40912 | if ((VecVT == MVT::v4i32 || VecVT == MVT::v2i64) && |
40913 | ((Idx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) { |
40914 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VecVT.getScalarType(), |
40915 | DAG.getBitcast(VecVT, Vec), |
40916 | DAG.getIntPtrConstant(Idx, dl)); |
40917 | } |
40918 | if ((VecVT == MVT::v8i16 && Subtarget.hasSSE2()) || |
40919 | (VecVT == MVT::v16i8 && Subtarget.hasSSE41())) { |
40920 | unsigned OpCode = (VecVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB); |
40921 | return DAG.getNode(OpCode, dl, MVT::i32, DAG.getBitcast(VecVT, Vec), |
40922 | DAG.getTargetConstant(Idx, dl, MVT::i8)); |
40923 | } |
40924 | return SDValue(); |
40925 | }; |
40926 | |
40927 | |
40928 | SmallVector<int, 16> Mask; |
40929 | SmallVector<SDValue, 2> Ops; |
40930 | if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) |
40931 | return SDValue(); |
40932 | |
40933 | |
40934 | if (llvm::any_of(Ops, [SrcVT](SDValue Op) { |
40935 | return SrcVT.getSizeInBits() != Op.getValueSizeInBits(); |
40936 | })) |
40937 | return SDValue(); |
40938 | |
40939 | |
40940 | if (Mask.size() != NumSrcElts) { |
40941 | if ((NumSrcElts % Mask.size()) == 0) { |
40942 | SmallVector<int, 16> ScaledMask; |
40943 | int Scale = NumSrcElts / Mask.size(); |
40944 | narrowShuffleMaskElts(Scale, Mask, ScaledMask); |
40945 | Mask = std::move(ScaledMask); |
40946 | } else if ((Mask.size() % NumSrcElts) == 0) { |
40947 | |
40948 | int ExtractIdx = (int)IdxC.getZExtValue(); |
40949 | int Scale = Mask.size() / NumSrcElts; |
40950 | int Lo = Scale * ExtractIdx; |
40951 | int Hi = Scale * (ExtractIdx + 1); |
40952 | for (int i = 0, e = (int)Mask.size(); i != e; ++i) |
40953 | if (i < Lo || Hi <= i) |
40954 | Mask[i] = SM_SentinelUndef; |
40955 | |
40956 | SmallVector<int, 16> WidenedMask; |
40957 | while (Mask.size() > NumSrcElts && |
40958 | canWidenShuffleElements(Mask, WidenedMask)) |
40959 | Mask = std::move(WidenedMask); |
40960 | } |
40961 | } |
40962 | |
40963 | |
40964 | int ExtractIdx; |
40965 | EVT ExtractVT; |
40966 | if (Mask.size() == NumSrcElts) { |
40967 | ExtractIdx = Mask[IdxC.getZExtValue()]; |
40968 | ExtractVT = SrcVT; |
40969 | } else { |
40970 | unsigned Scale = Mask.size() / NumSrcElts; |
40971 | if ((Mask.size() % NumSrcElts) != 0 || SrcVT.isFloatingPoint()) |
40972 | return SDValue(); |
40973 | unsigned ScaledIdx = Scale * IdxC.getZExtValue(); |
40974 | if (!isUndefOrZeroInRange(Mask, ScaledIdx + 1, Scale - 1)) |
40975 | return SDValue(); |
40976 | ExtractIdx = Mask[ScaledIdx]; |
40977 | EVT ExtractSVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltBits / Scale); |
40978 | ExtractVT = EVT::getVectorVT(*DAG.getContext(), ExtractSVT, Mask.size()); |
40979 | assert(SrcVT.getSizeInBits() == ExtractVT.getSizeInBits() && |
40980 | "Failed to widen vector type"); |
40981 | } |
40982 | |
40983 | |
40984 | if (ExtractIdx == SM_SentinelUndef) |
40985 | return DAG.getUNDEF(VT); |
40986 | |
40987 | if (ExtractIdx == SM_SentinelZero) |
40988 | return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT) |
40989 | : DAG.getConstant(0, dl, VT); |
40990 | |
40991 | SDValue SrcOp = Ops[ExtractIdx / Mask.size()]; |
40992 | ExtractIdx = ExtractIdx % Mask.size(); |
40993 | if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx)) |
40994 | return DAG.getZExtOrTrunc(V, dl, VT); |
40995 | |
40996 | return SDValue(); |
40997 | } |
40998 | |
40999 | |
41000 | |
41001 | static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { |
41002 | assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract"); |
41003 | SDValue Vec = ExtElt->getOperand(0); |
41004 | SDValue Index = ExtElt->getOperand(1); |
41005 | EVT VT = ExtElt->getValueType(0); |
41006 | EVT VecVT = Vec.getValueType(); |
41007 | |
41008 | |
41009 | |
41010 | if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT) |
41011 | return SDValue(); |
41012 | |
41013 | |
41014 | |
41015 | if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) { |
41016 | EVT OpVT = Vec.getOperand(0).getValueType().getScalarType(); |
41017 | if (OpVT != MVT::f32 && OpVT != MVT::f64) |
41018 | return SDValue(); |
41019 | |
41020 | |
41021 | SDLoc DL(ExtElt); |
41022 | SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, |
41023 | Vec.getOperand(0), Index); |
41024 | SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, |
41025 | Vec.getOperand(1), Index); |
41026 | return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2)); |
41027 | } |
41028 | |
41029 | if (VT != MVT::f32 && VT != MVT::f64) |
41030 | return SDValue(); |
41031 | |
41032 | |
41033 | |
41034 | |
41035 | |
41036 | |
41037 | |
41038 | if (Vec.getOpcode() == ISD::VSELECT && |
41039 | Vec.getOperand(0).getOpcode() == ISD::SETCC && |
41040 | Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 && |
41041 | Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { |
41042 | |
41043 | SDLoc DL(ExtElt); |
41044 | SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, |
41045 | Vec.getOperand(0).getValueType().getScalarType(), |
41046 | Vec.getOperand(0), Index); |
41047 | SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
41048 | Vec.getOperand(1), Index); |
41049 | SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
41050 | Vec.getOperand(2), Index); |
41051 | return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2); |
41052 | } |
41053 | |
41054 | |
41055 | |
41056 | |
41057 | switch (Vec.getOpcode()) { |
41058 | case ISD::FMA: |
41059 | case ISD::FMAD: |
41060 | case ISD::FADD: |
41061 | case ISD::FSUB: |
41062 | case ISD::FMUL: |
41063 | case ISD::FDIV: |
41064 | case ISD::FREM: |
41065 | case ISD::FCOPYSIGN: |
41066 | case ISD::FMINNUM: |
41067 | case ISD::FMAXNUM: |
41068 | case ISD::FMINNUM_IEEE: |
41069 | case ISD::FMAXNUM_IEEE: |
41070 | case ISD::FMAXIMUM: |
41071 | case ISD::FMINIMUM: |
41072 | case X86ISD::FMAX: |
41073 | case X86ISD::FMIN: |
41074 | case ISD::FABS: |
41075 | case ISD::FSQRT: |
41076 | case ISD::FRINT: |
41077 | case ISD::FCEIL: |
41078 | case ISD::FTRUNC: |
41079 | case ISD::FNEARBYINT: |
41080 | case ISD::FROUND: |
41081 | case ISD::FFLOOR: |
41082 | case X86ISD::FRCP: |
41083 | case X86ISD::FRSQRT: { |
41084 | |
41085 | SDLoc DL(ExtElt); |
41086 | SmallVector<SDValue, 4> ExtOps; |
41087 | for (SDValue Op : Vec->ops()) |
41088 | ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index)); |
41089 | return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps); |
41090 | } |
41091 | default: |
41092 | return SDValue(); |
41093 | } |
41094 | llvm_unreachable("All opcodes should return within switch"); |
41095 | } |
41096 | |
41097 | |
41098 | |
41099 | static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, |
41100 | const X86Subtarget &Subtarget) { |
41101 | assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); |
41102 | |
41103 | |
41104 | if (!Subtarget.hasSSE2()) |
41105 | return SDValue(); |
41106 | |
41107 | ISD::NodeType Opc; |
41108 | SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, |
41109 | {ISD::ADD, ISD::MUL, ISD::FADD}, true); |
41110 | if (!Rdx) |
41111 | return SDValue(); |
41112 | |
41113 | SDValue Index = ExtElt->getOperand(1); |
41114 | assert(isNullConstant(Index) && |
41115 | "Reduction doesn't end in an extract from index 0"); |
41116 | |
41117 | EVT VT = ExtElt->getValueType(0); |
41118 | EVT VecVT = Rdx.getValueType(); |
41119 | if (VecVT.getScalarType() != VT) |
41120 | return SDValue(); |
41121 | |
41122 | SDLoc DL(ExtElt); |
41123 | |
41124 | |
41125 | if (Opc == ISD::MUL) { |
41126 | unsigned NumElts = VecVT.getVectorNumElements(); |
41127 | if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts)) |
41128 | return SDValue(); |
41129 | if (VecVT.getSizeInBits() >= 128) { |
41130 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts / 2); |
41131 | SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT)); |
41132 | SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT)); |
41133 | Lo = DAG.getBitcast(WideVT, Lo); |
41134 | Hi = DAG.getBitcast(WideVT, Hi); |
41135 | Rdx = DAG.getNode(Opc, DL, WideVT, Lo, Hi); |
41136 | while (Rdx.getValueSizeInBits() > 128) { |
41137 | std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); |
41138 | Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi); |
41139 | } |
41140 | } else { |
41141 | if (VecVT == MVT::v4i8) |
41142 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, |
41143 | DAG.getUNDEF(MVT::v4i8)); |
41144 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, |
41145 | DAG.getUNDEF(MVT::v8i8)); |
41146 | Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8)); |
41147 | Rdx = DAG.getBitcast(MVT::v8i16, Rdx); |
41148 | } |
41149 | if (NumElts >= 8) |
41150 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
41151 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
41152 | {4, 5, 6, 7, -1, -1, -1, -1})); |
41153 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
41154 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
41155 | {2, 3, -1, -1, -1, -1, -1, -1})); |
41156 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
41157 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
41158 | {1, -1, -1, -1, -1, -1, -1, -1})); |
41159 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41160 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41161 | } |
41162 | |
41163 | |
41164 | if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) { |
41165 | if (VecVT == MVT::v4i8) { |
41166 | |
41167 | if (Subtarget.hasSSE41()) { |
41168 | Rdx = DAG.getBitcast(MVT::i32, Rdx); |
41169 | Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, |
41170 | DAG.getConstant(0, DL, MVT::v4i32), Rdx, |
41171 | DAG.getIntPtrConstant(0, DL)); |
41172 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41173 | } else { |
41174 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, |
41175 | DAG.getConstant(0, DL, VecVT)); |
41176 | } |
41177 | } |
41178 | if (Rdx.getValueType() == MVT::v8i8) { |
41179 | |
41180 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, |
41181 | DAG.getUNDEF(MVT::v8i8)); |
41182 | } |
41183 | Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, |
41184 | DAG.getConstant(0, DL, MVT::v16i8)); |
41185 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41186 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41187 | } |
41188 | |
41189 | |
41190 | if ((VecVT.getSizeInBits() % 128) != 0 || |
41191 | !isPowerOf2_32(VecVT.getVectorNumElements())) |
41192 | return SDValue(); |
41193 | |
41194 | |
41195 | if (VT == MVT::i8) { |
41196 | while (Rdx.getValueSizeInBits() > 128) { |
41197 | SDValue Lo, Hi; |
41198 | std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); |
41199 | VecVT = Lo.getValueType(); |
41200 | Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi); |
41201 | } |
41202 | assert(VecVT == MVT::v16i8 && "v16i8 reduction expected"); |
41203 | |
41204 | SDValue Hi = DAG.getVectorShuffle( |
41205 | MVT::v16i8, DL, Rdx, Rdx, |
41206 | {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); |
41207 | Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi); |
41208 | Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, |
41209 | getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); |
41210 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
41211 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41212 | } |
41213 | |
41214 | |
41215 | if (!shouldUseHorizontalOp(true, DAG, Subtarget)) |
41216 | return SDValue(); |
41217 | |
41218 | unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; |
41219 | |
41220 | |
41221 | |
41222 | |
41223 | |
41224 | if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) || |
41225 | ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) { |
41226 | unsigned NumElts = VecVT.getVectorNumElements(); |
41227 | SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL); |
41228 | SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL); |
41229 | Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo); |
41230 | VecVT = Rdx.getValueType(); |
41231 | } |
41232 | if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) && |
41233 | !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3())) |
41234 | return SDValue(); |
41235 | |
41236 | |
41237 | unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements()); |
41238 | for (unsigned i = 0; i != ReductionSteps; ++i) |
41239 | Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx); |
41240 | |
41241 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
41242 | } |
41243 | |
41244 | |
41245 | |
41246 | |
41247 | |
41248 | static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, |
41249 | TargetLowering::DAGCombinerInfo &DCI, |
41250 | const X86Subtarget &Subtarget) { |
41251 | if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) |
41252 | return NewOp; |
41253 | |
41254 | SDValue InputVector = N->getOperand(0); |
41255 | SDValue EltIdx = N->getOperand(1); |
41256 | auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx); |
41257 | |
41258 | EVT SrcVT = InputVector.getValueType(); |
41259 | EVT VT = N->getValueType(0); |
41260 | SDLoc dl(InputVector); |
41261 | bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT; |
41262 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
41263 | |
41264 | if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts)) |
41265 | return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); |
41266 | |
41267 | |
41268 | if (CIdx && VT.isInteger()) { |
41269 | APInt UndefVecElts; |
41270 | SmallVector<APInt, 16> EltBits; |
41271 | unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits(); |
41272 | if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts, |
41273 | EltBits, true, false)) { |
41274 | uint64_t Idx = CIdx->getZExtValue(); |
41275 | if (UndefVecElts[Idx]) |
41276 | return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); |
41277 | return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()), |
41278 | dl, VT); |
41279 | } |
41280 | } |
41281 | |
41282 | if (IsPextr) { |
41283 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
41284 | if (TLI.SimplifyDemandedBits( |
41285 | SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI)) |
41286 | return SDValue(N, 0); |
41287 | |
41288 | |
41289 | if ((InputVector.getOpcode() == X86ISD::PINSRB || |
41290 | InputVector.getOpcode() == X86ISD::PINSRW) && |
41291 | InputVector.getOperand(2) == EltIdx) { |
41292 | assert(SrcVT == InputVector.getOperand(0).getValueType() && |
41293 | "Vector type mismatch"); |
41294 | SDValue Scl = InputVector.getOperand(1); |
41295 | Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl); |
41296 | return DAG.getZExtOrTrunc(Scl, dl, VT); |
41297 | } |
41298 | |
41299 | |
41300 | |
41301 | |
41302 | return SDValue(); |
41303 | } |
41304 | |
41305 | |
41306 | if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && |
41307 | VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) { |
41308 | SDValue MMXSrc = InputVector.getOperand(0); |
41309 | |
41310 | |
41311 | if (MMXSrc.getValueType() == MVT::x86mmx) |
41312 | return DAG.getBitcast(VT, InputVector); |
41313 | } |
41314 | |
41315 | |
41316 | if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && |
41317 | VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) { |
41318 | SDValue MMXSrc = InputVector.getOperand(0); |
41319 | |
41320 | |
41321 | if (MMXSrc.getValueType() == MVT::x86mmx) |
41322 | return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc); |
41323 | } |
41324 | |
41325 | |
41326 | |
41327 | |
41328 | if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget)) |
41329 | return SAD; |
41330 | |
41331 | |
41332 | if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget)) |
41333 | return Cmp; |
41334 | |
41335 | |
41336 | if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget)) |
41337 | return MinMax; |
41338 | |
41339 | |
41340 | if (SDValue V = combineArithReduction(N, DAG, Subtarget)) |
41341 | return V; |
41342 | |
41343 | if (SDValue V = scalarizeExtEltFP(N, DAG)) |
41344 | return V; |
41345 | |
41346 | |
41347 | |
41348 | |
41349 | |
41350 | |
41351 | |
41352 | |
41353 | |
41354 | |
41355 | |
41356 | if (CIdx && SrcVT.getScalarType() == MVT::i1) { |
41357 | SmallVector<SDNode *, 16> BoolExtracts; |
41358 | unsigned ResNo = InputVector.getResNo(); |
41359 | auto IsBoolExtract = [&BoolExtracts, &ResNo](SDNode *Use) { |
41360 | if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
41361 | isa<ConstantSDNode>(Use->getOperand(1)) && |
41362 | Use->getOperand(0).getResNo() == ResNo && |
41363 | Use->getValueType(0) == MVT::i1) { |
41364 | BoolExtracts.push_back(Use); |
41365 | return true; |
41366 | } |
41367 | return false; |
41368 | }; |
41369 | if (all_of(InputVector->uses(), IsBoolExtract) && |
41370 | BoolExtracts.size() > 1) { |
41371 | EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts); |
41372 | if (SDValue BC = |
41373 | combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) { |
41374 | for (SDNode *Use : BoolExtracts) { |
41375 | |
41376 | unsigned MaskIdx = Use->getConstantOperandVal(1); |
41377 | APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx); |
41378 | SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT); |
41379 | SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask); |
41380 | Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ); |
41381 | DCI.CombineTo(Use, Res); |
41382 | } |
41383 | return SDValue(N, 0); |
41384 | } |
41385 | } |
41386 | } |
41387 | |
41388 | return SDValue(); |
41389 | } |
41390 | |
41391 | |
41392 | |
41393 | |
41394 | static SDValue |
41395 | combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, |
41396 | TargetLowering::DAGCombinerInfo &DCI, |
41397 | const X86Subtarget &Subtarget) { |
41398 | SDValue Cond = N->getOperand(0); |
41399 | SDValue LHS = N->getOperand(1); |
41400 | SDValue RHS = N->getOperand(2); |
41401 | EVT VT = LHS.getValueType(); |
41402 | EVT CondVT = Cond.getValueType(); |
41403 | SDLoc DL(N); |
41404 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
41405 | |
41406 | if (N->getOpcode() != ISD::VSELECT) |
41407 | return SDValue(); |
41408 | |
41409 | assert(CondVT.isVector() && "Vector select expects a vector selector!"); |
41410 | |
41411 | |
41412 | |
41413 | |
41414 | bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); |
41415 | bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); |
41416 | |
41417 | |
41418 | |
41419 | if (TValIsAllZeros && FValIsAllZeros) { |
41420 | if (VT.isFloatingPoint()) |
41421 | return DAG.getConstantFP(0.0, DL, VT); |
41422 | return DAG.getConstant(0, DL, VT); |
41423 | } |
41424 | |
41425 | |
41426 | |
41427 | |
41428 | |
41429 | |
41430 | if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) |
41431 | return SDValue(); |
41432 | |
41433 | |
41434 | |
41435 | bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); |
41436 | if (!TValIsAllOnes && !FValIsAllZeros && Cond.hasOneUse() && |
41437 | |
41438 | Cond.getOpcode() == ISD::SETCC && |
41439 | |
41440 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == |
41441 | CondVT) { |
41442 | bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); |
41443 | |
41444 | if (TValIsAllZeros || FValIsAllOnes) { |
41445 | SDValue CC = Cond.getOperand(2); |
41446 | ISD::CondCode NewCC = ISD::getSetCCInverse( |
41447 | cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType()); |
41448 | Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), |
41449 | NewCC); |
41450 | std::swap(LHS, RHS); |
41451 | TValIsAllOnes = FValIsAllOnes; |
41452 | FValIsAllZeros = TValIsAllZeros; |
41453 | } |
41454 | } |
41455 | |
41456 | |
41457 | if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits()) |
41458 | return SDValue(); |
41459 | |
41460 | |
41461 | if (TValIsAllOnes && FValIsAllZeros) |
41462 | return DAG.getBitcast(VT, Cond); |
41463 | |
41464 | if (!TLI.isTypeLegal(CondVT)) |
41465 | return SDValue(); |
41466 | |
41467 | |
41468 | if (TValIsAllOnes) { |
41469 | SDValue CastRHS = DAG.getBitcast(CondVT, RHS); |
41470 | SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); |
41471 | return DAG.getBitcast(VT, Or); |
41472 | } |
41473 | |
41474 | |
41475 | if (FValIsAllZeros) { |
41476 | SDValue CastLHS = DAG.getBitcast(CondVT, LHS); |
41477 | SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); |
41478 | return DAG.getBitcast(VT, And); |
41479 | } |
41480 | |
41481 | |
41482 | if (TValIsAllZeros) { |
41483 | SDValue CastRHS = DAG.getBitcast(CondVT, RHS); |
41484 | SDValue AndN; |
41485 | |
41486 | if (CondVT.getScalarType() == MVT::i1) |
41487 | AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), |
41488 | CastRHS); |
41489 | else |
41490 | AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS); |
41491 | return DAG.getBitcast(VT, AndN); |
41492 | } |
41493 | |
41494 | return SDValue(); |
41495 | } |
41496 | |
41497 | |
41498 | |
41499 | |
41500 | |
41501 | static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, |
41502 | const X86Subtarget &Subtarget) { |
41503 | unsigned Opcode = N->getOpcode(); |
41504 | if (Opcode != X86ISD::BLENDV && Opcode != ISD::VSELECT) |
41505 | return SDValue(); |
41506 | |
41507 | |
41508 | EVT VT = N->getValueType(0); |
41509 | if (!VT.is256BitVector()) |
41510 | return SDValue(); |
41511 | |
41512 | |
41513 | SDValue Cond = N->getOperand(0); |
41514 | SDValue TVal = N->getOperand(1); |
41515 | SDValue FVal = N->getOperand(2); |
41516 | SmallVector<SDValue, 4> CatOpsT, CatOpsF; |
41517 | if (!TVal.hasOneUse() || !FVal.hasOneUse() || |
41518 | !collectConcatOps(TVal.getNode(), CatOpsT) || |
41519 | !collectConcatOps(FVal.getNode(), CatOpsF)) |
41520 | return SDValue(); |
41521 | |
41522 | auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL, |
41523 | ArrayRef<SDValue> Ops) { |
41524 | return DAG.getNode(Opcode, DL, Ops[1].getValueType(), Ops); |
41525 | }; |
41526 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { Cond, TVal, FVal }, |
41527 | makeBlend, false); |
41528 | } |
41529 | |
41530 | static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { |
41531 | SDValue Cond = N->getOperand(0); |
41532 | SDValue LHS = N->getOperand(1); |
41533 | SDValue RHS = N->getOperand(2); |
41534 | SDLoc DL(N); |
41535 | |
41536 | auto *TrueC = dyn_cast<ConstantSDNode>(LHS); |
41537 | auto *FalseC = dyn_cast<ConstantSDNode>(RHS); |
41538 | if (!TrueC || !FalseC) |
41539 | return SDValue(); |
41540 | |
41541 | |
41542 | EVT VT = N->getValueType(0); |
41543 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
41544 | return SDValue(); |
41545 | |
41546 | |
41547 | |
41548 | |
41549 | if (Cond.getValueType() != MVT::i1) |
41550 | return SDValue(); |
41551 | |
41552 | |
41553 | |
41554 | |
41555 | |
41556 | const APInt &TrueVal = TrueC->getAPIntValue(); |
41557 | const APInt &FalseVal = FalseC->getAPIntValue(); |
41558 | bool OV; |
41559 | APInt Diff = TrueVal.ssub_ov(FalseVal, OV); |
41560 | if (OV) |
41561 | return SDValue(); |
41562 | |
41563 | APInt AbsDiff = Diff.abs(); |
41564 | if (AbsDiff.isPowerOf2() || |
41565 | ((VT == MVT::i32 || VT == MVT::i64) && |
41566 | (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) { |
41567 | |
41568 | |
41569 | |
41570 | |
41571 | if (TrueVal.slt(FalseVal)) { |
41572 | Cond = DAG.getNOT(DL, Cond, MVT::i1); |
41573 | std::swap(TrueC, FalseC); |
41574 | } |
41575 | |
41576 | |
41577 | SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); |
41578 | |
41579 | |
41580 | if (!AbsDiff.isOneValue()) |
41581 | R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); |
41582 | |
41583 | |
41584 | if (!FalseC->isNullValue()) |
41585 | R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); |
41586 | |
41587 | return R; |
41588 | } |
41589 | |
41590 | return SDValue(); |
41591 | } |
41592 | |
41593 | |
41594 | |
41595 | |
41596 | |
41597 | |
41598 | static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, |
41599 | TargetLowering::DAGCombinerInfo &DCI, |
41600 | const X86Subtarget &Subtarget) { |
41601 | SDValue Cond = N->getOperand(0); |
41602 | if ((N->getOpcode() != ISD::VSELECT && |
41603 | N->getOpcode() != X86ISD::BLENDV) || |
41604 | ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) |
41605 | return SDValue(); |
41606 | |
41607 | |
41608 | |
41609 | unsigned BitWidth = Cond.getScalarValueSizeInBits(); |
41610 | if (BitWidth < 8 || BitWidth > 64) |
41611 | return SDValue(); |
41612 | |
41613 | |
41614 | |
41615 | |
41616 | |
41617 | |
41618 | |
41619 | |
41620 | |
41621 | |
41622 | |
41623 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
41624 | EVT VT = N->getValueType(0); |
41625 | if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) |
41626 | return SDValue(); |
41627 | |
41628 | |
41629 | |
41630 | if (VT.getVectorElementType() == MVT::i16) |
41631 | return SDValue(); |
41632 | |
41633 | if (VT.is128BitVector() && !Subtarget.hasSSE41()) |
41634 | return SDValue(); |
41635 | |
41636 | if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) |
41637 | return SDValue(); |
41638 | |
41639 | if (VT.is512BitVector()) |
41640 | return SDValue(); |
41641 | |
41642 | auto OnlyUsedAsSelectCond = [](SDValue Cond) { |
41643 | for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); |
41644 | UI != UE; ++UI) |
41645 | if ((UI->getOpcode() != ISD::VSELECT && |
41646 | UI->getOpcode() != X86ISD::BLENDV) || |
41647 | UI.getOperandNo() != 0) |
41648 | return false; |
41649 | |
41650 | return true; |
41651 | }; |
41652 | |
41653 | APInt DemandedBits(APInt::getSignMask(BitWidth)); |
41654 | |
41655 | if (OnlyUsedAsSelectCond(Cond)) { |
41656 | KnownBits Known; |
41657 | TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), |
41658 | !DCI.isBeforeLegalizeOps()); |
41659 | if (!TLI.SimplifyDemandedBits(Cond, DemandedBits, Known, TLO, 0, true)) |
41660 | return SDValue(); |
41661 | |
41662 | |
41663 | |
41664 | |
41665 | |
41666 | |
41667 | for (SDNode *U : Cond->uses()) { |
41668 | if (U->getOpcode() == X86ISD::BLENDV) |
41669 | continue; |
41670 | |
41671 | SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), |
41672 | Cond, U->getOperand(1), U->getOperand(2)); |
41673 | DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); |
41674 | DCI.AddToWorklist(U); |
41675 | } |
41676 | DCI.CommitTargetLoweringOpt(TLO); |
41677 | return SDValue(N, 0); |
41678 | } |
41679 | |
41680 | |
41681 | if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedBits, DAG)) |
41682 | return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), V, |
41683 | N->getOperand(1), N->getOperand(2)); |
41684 | |
41685 | return SDValue(); |
41686 | } |
41687 | |
41688 | |
41689 | |
41690 | |
41691 | |
41692 | |
41693 | |
41694 | |
41695 | |
41696 | |
41697 | |
41698 | |
41699 | |
41700 | |
41701 | |
41702 | |
41703 | |
41704 | static SDValue combineLogicBlendIntoConditionalNegate( |
41705 | EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL, |
41706 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
41707 | EVT MaskVT = Mask.getValueType(); |
41708 | assert(MaskVT.isInteger() && |
41709 | DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() && |
41710 | "Mask must be zero/all-bits"); |
41711 | |
41712 | if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT) |
41713 | return SDValue(); |
41714 | if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) |
41715 | return SDValue(); |
41716 | |
41717 | auto IsNegV = [](SDNode *N, SDValue V) { |
41718 | return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && |
41719 | ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); |
41720 | }; |
41721 | |
41722 | SDValue V; |
41723 | if (IsNegV(Y.getNode(), X)) |
41724 | V = X; |
41725 | else if (IsNegV(X.getNode(), Y)) |
41726 | V = Y; |
41727 | else |
41728 | return SDValue(); |
41729 | |
41730 | SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); |
41731 | SDValue SubOp2 = Mask; |
41732 | |
41733 | |
41734 | |
41735 | |
41736 | |
41737 | |
41738 | |
41739 | |
41740 | |
41741 | |
41742 | |
41743 | if (V == Y) |
41744 | std::swap(SubOp1, SubOp2); |
41745 | |
41746 | SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2); |
41747 | return DAG.getBitcast(VT, Res); |
41748 | } |
41749 | |
41750 | |
41751 | static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, |
41752 | TargetLowering::DAGCombinerInfo &DCI, |
41753 | const X86Subtarget &Subtarget) { |
41754 | SDLoc DL(N); |
41755 | SDValue Cond = N->getOperand(0); |
41756 | SDValue LHS = N->getOperand(1); |
41757 | SDValue RHS = N->getOperand(2); |
41758 | |
41759 | |
41760 | |
41761 | if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS)) |
41762 | return V; |
41763 | |
41764 | EVT VT = LHS.getValueType(); |
41765 | EVT CondVT = Cond.getValueType(); |
41766 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
41767 | bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()); |
41768 | |
41769 | |
41770 | |
41771 | |
41772 | if (CondVT.isVector() && CondVT.isInteger() && |
41773 | CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() && |
41774 | (!CondConstantVector || CondVT.getScalarType() == MVT::i8) && |
41775 | DAG.ComputeNumSignBits(Cond) == CondVT.getScalarSizeInBits()) |
41776 | if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS, |
41777 | DL, DAG, Subtarget)) |
41778 | return V; |
41779 | |
41780 | |
41781 | if (CondConstantVector && DCI.isBeforeLegalizeOps()) { |
41782 | SmallVector<int, 64> Mask; |
41783 | if (createShuffleMaskFromVSELECT(Mask, Cond)) |
41784 | return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); |
41785 | } |
41786 | |
41787 | |
41788 | |
41789 | |
41790 | if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() && |
41791 | LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB && |
41792 | LHS.hasOneUse() && RHS.hasOneUse()) { |
41793 | MVT SimpleVT = VT.getSimpleVT(); |
41794 | SmallVector<SDValue, 1> LHSOps, RHSOps; |
41795 | SmallVector<int, 64> LHSMask, RHSMask, CondMask; |
41796 | if (createShuffleMaskFromVSELECT(CondMask, Cond) && |
41797 | getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask) && |
41798 | getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) { |
41799 | int NumElts = VT.getVectorNumElements(); |
41800 | for (int i = 0; i != NumElts; ++i) { |
41801 | if (CondMask[i] < NumElts) |
41802 | RHSMask[i] = 0x80; |
41803 | else |
41804 | LHSMask[i] = 0x80; |
41805 | } |
41806 | LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), |
41807 | getConstVector(LHSMask, SimpleVT, DAG, DL, true)); |
41808 | RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0), |
41809 | getConstVector(RHSMask, SimpleVT, DAG, DL, true)); |
41810 | return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); |
41811 | } |
41812 | } |
41813 | |
41814 | |
41815 | |
41816 | |
41817 | |
41818 | |
41819 | if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && |
41820 | VT != MVT::f80 && VT != MVT::f128 && |
41821 | (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && |
41822 | (Subtarget.hasSSE2() || |
41823 | (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { |
41824 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
41825 | |
41826 | unsigned Opcode = 0; |
41827 | |
41828 | if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && |
41829 | DAG.isEqualTo(RHS, Cond.getOperand(1))) { |
41830 | switch (CC) { |
41831 | default: break; |
41832 | case ISD::SETULT: |
41833 | |
41834 | |
41835 | |
41836 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { |
41837 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
41838 | !(DAG.isKnownNeverZeroFloat(LHS) || |
41839 | DAG.isKnownNeverZeroFloat(RHS))) |
41840 | break; |
41841 | std::swap(LHS, RHS); |
41842 | } |
41843 | Opcode = X86ISD::FMIN; |
41844 | break; |
41845 | case ISD::SETOLE: |
41846 | |
41847 | |
41848 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
41849 | !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) |
41850 | break; |
41851 | Opcode = X86ISD::FMIN; |
41852 | break; |
41853 | case ISD::SETULE: |
41854 | |
41855 | |
41856 | std::swap(LHS, RHS); |
41857 | LLVM_FALLTHROUGH; |
41858 | case ISD::SETOLT: |
41859 | case ISD::SETLT: |
41860 | case ISD::SETLE: |
41861 | Opcode = X86ISD::FMIN; |
41862 | break; |
41863 | |
41864 | case ISD::SETOGE: |
41865 | |
41866 | |
41867 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
41868 | !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) |
41869 | break; |
41870 | Opcode = X86ISD::FMAX; |
41871 | break; |
41872 | case ISD::SETUGT: |
41873 | |
41874 | |
41875 | |
41876 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { |
41877 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
41878 | !(DAG.isKnownNeverZeroFloat(LHS) || |
41879 | DAG.isKnownNeverZeroFloat(RHS))) |
41880 | break; |
41881 | std::swap(LHS, RHS); |
41882 | } |
41883 | Opcode = X86ISD::FMAX; |
41884 | break; |
41885 | case ISD::SETUGE: |
41886 | |
41887 | |
41888 | std::swap(LHS, RHS); |
41889 | LLVM_FALLTHROUGH; |
41890 | case ISD::SETOGT: |
41891 | case ISD::SETGT: |
41892 | case ISD::SETGE: |
41893 | Opcode = X86ISD::FMAX; |
41894 | break; |
41895 | } |
41896 | |
41897 | } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && |
41898 | DAG.isEqualTo(RHS, Cond.getOperand(0))) { |
41899 | switch (CC) { |
41900 | default: break; |
41901 | case ISD::SETOGE: |
41902 | |
41903 | |
41904 | |
41905 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
41906 | !(DAG.isKnownNeverZeroFloat(LHS) || |
41907 | DAG.isKnownNeverZeroFloat(RHS))) { |
41908 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
41909 | break; |
41910 | std::swap(LHS, RHS); |
41911 | } |
41912 | Opcode = X86ISD::FMIN; |
41913 | break; |
41914 | case ISD::SETUGT: |
41915 | |
41916 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
41917 | break; |
41918 | Opcode = X86ISD::FMIN; |
41919 | break; |
41920 | case ISD::SETUGE: |
41921 | |
41922 | |
41923 | std::swap(LHS, RHS); |
41924 | LLVM_FALLTHROUGH; |
41925 | case ISD::SETOGT: |
41926 | case ISD::SETGT: |
41927 | case ISD::SETGE: |
41928 | Opcode = X86ISD::FMIN; |
41929 | break; |
41930 | |
41931 | case ISD::SETULT: |
41932 | |
41933 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
41934 | break; |
41935 | Opcode = X86ISD::FMAX; |
41936 | break; |
41937 | case ISD::SETOLE: |
41938 | |
41939 | |
41940 | |
41941 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
41942 | !DAG.isKnownNeverZeroFloat(LHS) && |
41943 | !DAG.isKnownNeverZeroFloat(RHS)) { |
41944 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
41945 | break; |
41946 | std::swap(LHS, RHS); |
41947 | } |
41948 | Opcode = X86ISD::FMAX; |
41949 | break; |
41950 | case ISD::SETULE: |
41951 | |
41952 | |
41953 | std::swap(LHS, RHS); |
41954 | LLVM_FALLTHROUGH; |
41955 | case ISD::SETOLT: |
41956 | case ISD::SETLT: |
41957 | case ISD::SETLE: |
41958 | Opcode = X86ISD::FMAX; |
41959 | break; |
41960 | } |
41961 | } |
41962 | |
41963 | if (Opcode) |
41964 | return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); |
41965 | } |
41966 | |
41967 | |
41968 | |
41969 | |
41970 | |
41971 | |
41972 | if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT && |
41973 | Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) { |
41974 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
41975 | SDValue AndNode = Cond.getOperand(0); |
41976 | if (AndNode.getOpcode() == ISD::AND && CC == ISD::SETEQ && |
41977 | isNullConstant(Cond.getOperand(1)) && |
41978 | isOneConstant(AndNode.getOperand(1))) { |
41979 | |
41980 | |
41981 | AndNode = DAG.getZExtOrTrunc(AndNode, DL, MVT::i8); |
41982 | return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS); |
41983 | } |
41984 | } |
41985 | |
41986 | |
41987 | |
41988 | |
41989 | |
41990 | |
41991 | |
41992 | |
41993 | if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && |
41994 | CondVT.getVectorElementType() == MVT::i1 && |
41995 | (VT.getVectorElementType() == MVT::i8 || |
41996 | VT.getVectorElementType() == MVT::i16)) { |
41997 | Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); |
41998 | return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); |
41999 | } |
42000 | |
42001 | |
42002 | |
42003 | |
42004 | |
42005 | if (Subtarget.hasAVX512() && CondVT.isVector() && |
42006 | CondVT.getVectorElementType() == MVT::i1) { |
42007 | auto SelectableOp = [&TLI](SDValue Op) { |
42008 | return Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
42009 | isTargetShuffle(Op.getOperand(0).getOpcode()) && |
42010 | isNullConstant(Op.getOperand(1)) && |
42011 | TLI.isTypeLegal(Op.getOperand(0).getValueType()) && |
42012 | Op.hasOneUse() && Op.getOperand(0).hasOneUse(); |
42013 | }; |
42014 | |
42015 | bool SelectableLHS = SelectableOp(LHS); |
42016 | bool SelectableRHS = SelectableOp(RHS); |
42017 | bool ZeroLHS = ISD::isBuildVectorAllZeros(LHS.getNode()); |
42018 | bool ZeroRHS = ISD::isBuildVectorAllZeros(RHS.getNode()); |
42019 | |
42020 | if ((SelectableLHS && ZeroRHS) || (SelectableRHS && ZeroLHS)) { |
42021 | EVT SrcVT = SelectableLHS ? LHS.getOperand(0).getValueType() |
42022 | : RHS.getOperand(0).getValueType(); |
42023 | EVT SrcCondVT = SrcVT.changeVectorElementType(MVT::i1); |
42024 | LHS = insertSubVector(DAG.getUNDEF(SrcVT), LHS, 0, DAG, DL, |
42025 | VT.getSizeInBits()); |
42026 | RHS = insertSubVector(DAG.getUNDEF(SrcVT), RHS, 0, DAG, DL, |
42027 | VT.getSizeInBits()); |
42028 | Cond = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcCondVT, |
42029 | DAG.getUNDEF(SrcCondVT), Cond, |
42030 | DAG.getIntPtrConstant(0, DL)); |
42031 | SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS); |
42032 | return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); |
42033 | } |
42034 | } |
42035 | |
42036 | if (SDValue V = combineSelectOfTwoConstants(N, DAG)) |
42037 | return V; |
42038 | |
42039 | if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && |
42040 | Cond.hasOneUse()) { |
42041 | EVT CondVT = Cond.getValueType(); |
42042 | SDValue Cond0 = Cond.getOperand(0); |
42043 | SDValue Cond1 = Cond.getOperand(1); |
42044 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
42045 | |
42046 | |
42047 | |
42048 | |
42049 | |
42050 | |
42051 | |
42052 | |
42053 | |
42054 | |
42055 | |
42056 | |
42057 | |
42058 | |
42059 | |
42060 | |
42061 | |
42062 | |
42063 | |
42064 | |
42065 | if (LHS == Cond0 && RHS == Cond1) { |
42066 | if ((CC == ISD::SETGT && (isNullConstant(RHS) || isOneConstant(RHS))) || |
42067 | (CC == ISD::SETLT && isAllOnesConstant(RHS))) { |
42068 | ISD::CondCode NewCC = CC == ISD::SETGT ? ISD::SETGE : ISD::SETLE; |
42069 | Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC); |
42070 | return DAG.getSelect(DL, VT, Cond, LHS, RHS); |
42071 | } |
42072 | if (CC == ISD::SETUGT && isOneConstant(RHS)) { |
42073 | ISD::CondCode NewCC = ISD::SETUGE; |
42074 | Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC); |
42075 | return DAG.getSelect(DL, VT, Cond, LHS, RHS); |
42076 | } |
42077 | } |
42078 | |
42079 | |
42080 | |
42081 | |
42082 | |
42083 | |
42084 | |
42085 | |
42086 | if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS && |
42087 | RHS.getOperand(0).getOpcode() == ISD::SETCC) { |
42088 | SDValue InnerSetCC = RHS.getOperand(0); |
42089 | ISD::CondCode InnerCC = |
42090 | cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get(); |
42091 | if ((CC == ISD::SETEQ || InnerCC == ISD::SETEQ) && |
42092 | Cond0 == InnerSetCC.getOperand(0) && |
42093 | Cond1 == InnerSetCC.getOperand(1)) { |
42094 | ISD::CondCode NewCC; |
42095 | switch (CC == ISD::SETEQ ? InnerCC : CC) { |
42096 | case ISD::SETGT: NewCC = ISD::SETGE; break; |
42097 | case ISD::SETLT: NewCC = ISD::SETLE; break; |
42098 | case ISD::SETUGT: NewCC = ISD::SETUGE; break; |
42099 | case ISD::SETULT: NewCC = ISD::SETULE; break; |
42100 | default: NewCC = ISD::SETCC_INVALID; break; |
42101 | } |
42102 | if (NewCC != ISD::SETCC_INVALID) { |
42103 | Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC); |
42104 | return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2)); |
42105 | } |
42106 | } |
42107 | } |
42108 | } |
42109 | |
42110 | |
42111 | |
42112 | |
42113 | if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && |
42114 | Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && |
42115 | ISD::isBuildVectorAllZeros(LHS.getNode()) && |
42116 | !ISD::isBuildVectorAllZeros(RHS.getNode())) { |
42117 | |
42118 | SDValue CondNew = DAG.getNOT(DL, Cond, CondVT); |
42119 | |
42120 | return DAG.getSelect(DL, VT, CondNew, RHS, LHS); |
42121 | } |
42122 | |
42123 | |
42124 | if (!TLI.isTypeLegal(VT)) |
42125 | return SDValue(); |
42126 | |
42127 | if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) |
42128 | return V; |
42129 | |
42130 | if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget)) |
42131 | return V; |
42132 | |
42133 | if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) |
42134 | return V; |
42135 | |
42136 | |
42137 | if (CondVT.getScalarType() != MVT::i1) { |
42138 | if (SDValue CondNot = IsNOT(Cond, DAG)) |
42139 | return DAG.getNode(N->getOpcode(), DL, VT, |
42140 | DAG.getBitcast(CondVT, CondNot), RHS, LHS); |
42141 | |
42142 | if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() && |
42143 | ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) { |
42144 | Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT, |
42145 | DAG.getConstant(0, DL, CondVT), Cond.getOperand(0)); |
42146 | return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS); |
42147 | } |
42148 | } |
42149 | |
42150 | |
42151 | |
42152 | |
42153 | |
42154 | |
42155 | if (N->getOpcode() == ISD::SELECT && VT.isVector() && |
42156 | VT.getVectorElementType() == MVT::i1 && |
42157 | (DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) { |
42158 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); |
42159 | bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()); |
42160 | bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()); |
42161 | |
42162 | if ((LHSIsConst || |
42163 | (LHS.getOpcode() == ISD::BITCAST && |
42164 | LHS.getOperand(0).getValueType() == IntVT)) && |
42165 | (RHSIsConst || |
42166 | (RHS.getOpcode() == ISD::BITCAST && |
42167 | RHS.getOperand(0).getValueType() == IntVT))) { |
42168 | if (LHSIsConst) |
42169 | LHS = combinevXi1ConstantToInteger(LHS, DAG); |
42170 | else |
42171 | LHS = LHS.getOperand(0); |
42172 | |
42173 | if (RHSIsConst) |
42174 | RHS = combinevXi1ConstantToInteger(RHS, DAG); |
42175 | else |
42176 | RHS = RHS.getOperand(0); |
42177 | |
42178 | SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS); |
42179 | return DAG.getBitcast(VT, Select); |
42180 | } |
42181 | } |
42182 | |
42183 | |
42184 | |
42185 | |
42186 | if (DCI.isBeforeLegalize() && !Subtarget.hasAVX512() && |
42187 | N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && |
42188 | Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1 && |
42189 | Cond.getOperand(0).getOpcode() == ISD::AND && |
42190 | isNullOrNullSplat(Cond.getOperand(1)) && |
42191 | cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ && |
42192 | Cond.getOperand(0).getValueType() == VT) { |
42193 | |
42194 | SDValue And = Cond.getOperand(0); |
42195 | auto *C = isConstOrConstSplat(And.getOperand(1)); |
42196 | if (C && C->getAPIntValue().isPowerOf2()) { |
42197 | |
42198 | SDValue NotCond = |
42199 | DAG.getSetCC(DL, CondVT, And, Cond.getOperand(1), ISD::SETNE); |
42200 | return DAG.getSelect(DL, VT, NotCond, RHS, LHS); |
42201 | } |
42202 | |
42203 | |
42204 | |
42205 | |
42206 | unsigned EltBitWidth = VT.getScalarSizeInBits(); |
42207 | bool CanShiftBlend = |
42208 | TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) || |
42209 | (Subtarget.hasAVX2() && EltBitWidth == 64) || |
42210 | (Subtarget.hasXOP())); |
42211 | if (CanShiftBlend && |
42212 | ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) { |
42213 | return C->getAPIntValue().isPowerOf2(); |
42214 | })) { |
42215 | |
42216 | SDValue Mask = And.getOperand(1); |
42217 | SmallVector<int, 32> ShlVals; |
42218 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { |
42219 | auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i)); |
42220 | ShlVals.push_back(EltBitWidth - 1 - |
42221 | MaskVal->getAPIntValue().exactLogBase2()); |
42222 | } |
42223 | |
42224 | SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL); |
42225 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt); |
42226 | SDValue NewCond = |
42227 | DAG.getSetCC(DL, CondVT, Shl, Cond.getOperand(1), ISD::SETLT); |
42228 | return DAG.getSelect(DL, VT, NewCond, RHS, LHS); |
42229 | } |
42230 | } |
42231 | |
42232 | return SDValue(); |
42233 | } |
42234 | |
42235 | |
42236 | |
42237 | |
42238 | |
42239 | |
42240 | |
42241 | static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, |
42242 | SelectionDAG &DAG, |
42243 | const X86Subtarget &Subtarget) { |
42244 | |
42245 | if (!(Cmp.getOpcode() == X86ISD::CMP || |
42246 | (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) |
42247 | return SDValue(); |
42248 | |
42249 | |
42250 | |
42251 | |
42252 | if (!Cmp.hasOneUse()) |
42253 | return SDValue(); |
42254 | |
42255 | |
42256 | |
42257 | |
42258 | |
42259 | |
42260 | |
42261 | |
42262 | |
42263 | |
42264 | |
42265 | |
42266 | |
42267 | SDValue CmpLHS = Cmp.getOperand(0); |
42268 | SDValue CmpRHS = Cmp.getOperand(1); |
42269 | EVT CmpVT = CmpLHS.getValueType(); |
42270 | |
42271 | if (!CmpLHS.hasOneUse()) |
42272 | return SDValue(); |
42273 | |
42274 | unsigned Opc = CmpLHS.getOpcode(); |
42275 | if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB) |
42276 | return SDValue(); |
42277 | |
42278 | SDValue OpRHS = CmpLHS.getOperand(2); |
42279 | auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS); |
42280 | if (!OpRHSC) |
42281 | return SDValue(); |
42282 | |
42283 | APInt Addend = OpRHSC->getAPIntValue(); |
42284 | if (Opc == ISD::ATOMIC_LOAD_SUB) |
42285 | Addend = -Addend; |
42286 | |
42287 | auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS); |
42288 | if (!CmpRHSC) |
42289 | return SDValue(); |
42290 | |
42291 | APInt Comparison = CmpRHSC->getAPIntValue(); |
42292 | APInt NegAddend = -Addend; |
42293 | |
42294 | |
42295 | |
42296 | if (Comparison != NegAddend) { |
42297 | APInt IncComparison = Comparison + 1; |
42298 | if (IncComparison == NegAddend) { |
42299 | if (CC == X86::COND_A && !Comparison.isMaxValue()) { |
42300 | Comparison = IncComparison; |
42301 | CC = X86::COND_AE; |
42302 | } else if (CC == X86::COND_LE && !Comparison.isMaxSignedValue()) { |
42303 | Comparison = IncComparison; |
42304 | CC = X86::COND_L; |
42305 | } |
42306 | } |
42307 | APInt DecComparison = Comparison - 1; |
42308 | if (DecComparison == NegAddend) { |
42309 | if (CC == X86::COND_AE && !Comparison.isMinValue()) { |
42310 | Comparison = DecComparison; |
42311 | CC = X86::COND_A; |
42312 | } else if (CC == X86::COND_L && !Comparison.isMinSignedValue()) { |
42313 | Comparison = DecComparison; |
42314 | CC = X86::COND_LE; |
42315 | } |
42316 | } |
42317 | } |
42318 | |
42319 | |
42320 | |
42321 | if (Comparison == NegAddend) { |
42322 | |
42323 | |
42324 | auto *AN = cast<AtomicSDNode>(CmpLHS.getNode()); |
42325 | auto AtomicSub = DAG.getAtomic( |
42326 | ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpVT, |
42327 | CmpLHS.getOperand(0), CmpLHS.getOperand(1), |
42328 | DAG.getConstant(NegAddend, SDLoc(CmpRHS), CmpVT), |
42329 | AN->getMemOperand()); |
42330 | auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget); |
42331 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT)); |
42332 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); |
42333 | return LockOp; |
42334 | } |
42335 | |
42336 | |
42337 | |
42338 | if (!Comparison.isNullValue()) |
42339 | return SDValue(); |
42340 | |
42341 | if (CC == X86::COND_S && Addend == 1) |
42342 | CC = X86::COND_LE; |
42343 | else if (CC == X86::COND_NS && Addend == 1) |
42344 | CC = X86::COND_G; |
42345 | else if (CC == X86::COND_G && Addend == -1) |
42346 | CC = X86::COND_GE; |
42347 | else if (CC == X86::COND_LE && Addend == -1) |
42348 | CC = X86::COND_L; |
42349 | else |
42350 | return SDValue(); |
42351 | |
42352 | SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget); |
42353 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT)); |
42354 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); |
42355 | return LockOp; |
42356 | } |
42357 | |
42358 | |
42359 | |
42360 | |
42361 | |
42362 | |
42363 | |
42364 | |
42365 | |
42366 | |
42367 | |
42368 | |
42369 | |
42370 | |
42371 | |
42372 | |
42373 | static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { |
42374 | |
42375 | if (!(Cmp.getOpcode() == X86ISD::CMP || |
42376 | (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) |
42377 | return SDValue(); |
42378 | |
42379 | |
42380 | if (CC != X86::COND_E && CC != X86::COND_NE) |
42381 | return SDValue(); |
42382 | |
42383 | |
42384 | |
42385 | SDValue Op1 = Cmp.getOperand(0); |
42386 | SDValue Op2 = Cmp.getOperand(1); |
42387 | |
42388 | SDValue SetCC; |
42389 | const ConstantSDNode* C = nullptr; |
42390 | bool needOppositeCond = (CC == X86::COND_E); |
42391 | bool checkAgainstTrue = false; |
42392 | |
42393 | if ((C = dyn_cast<ConstantSDNode>(Op1))) |
42394 | SetCC = Op2; |
42395 | else if ((C = dyn_cast<ConstantSDNode>(Op2))) |
42396 | SetCC = Op1; |
42397 | else |
42398 | return SDValue(); |
42399 | |
42400 | if (C->getZExtValue() == 1) { |
42401 | needOppositeCond = !needOppositeCond; |
42402 | checkAgainstTrue = true; |
42403 | } else if (C->getZExtValue() != 0) |
42404 | |
42405 | return SDValue(); |
42406 | |
42407 | bool truncatedToBoolWithAnd = false; |
42408 | |
42409 | while (SetCC.getOpcode() == ISD::ZERO_EXTEND || |
42410 | SetCC.getOpcode() == ISD::TRUNCATE || |
42411 | SetCC.getOpcode() == ISD::AND) { |
42412 | if (SetCC.getOpcode() == ISD::AND) { |
42413 | int OpIdx = -1; |
42414 | if (isOneConstant(SetCC.getOperand(0))) |
42415 | OpIdx = 1; |
42416 | if (isOneConstant(SetCC.getOperand(1))) |
42417 | OpIdx = 0; |
42418 | if (OpIdx < 0) |
42419 | break; |
42420 | SetCC = SetCC.getOperand(OpIdx); |
42421 | truncatedToBoolWithAnd = true; |
42422 | } else |
42423 | SetCC = SetCC.getOperand(0); |
42424 | } |
42425 | |
42426 | switch (SetCC.getOpcode()) { |
42427 | case X86ISD::SETCC_CARRY: |
42428 | |
42429 | |
42430 | |
42431 | |
42432 | if (checkAgainstTrue && !truncatedToBoolWithAnd) |
42433 | break; |
42434 | assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && |
42435 | "Invalid use of SETCC_CARRY!"); |
42436 | LLVM_FALLTHROUGH; |
42437 | case X86ISD::SETCC: |
42438 | |
42439 | CC = X86::CondCode(SetCC.getConstantOperandVal(0)); |
42440 | if (needOppositeCond) |
42441 | CC = X86::GetOppositeBranchCondition(CC); |
42442 | return SetCC.getOperand(1); |
42443 | case X86ISD::CMOV: { |
42444 | |
42445 | ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0)); |
42446 | ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1)); |
42447 | |
42448 | if (!TVal) |
42449 | return SDValue(); |
42450 | |
42451 | if (!FVal) { |
42452 | SDValue Op = SetCC.getOperand(0); |
42453 | |
42454 | if (Op.getOpcode() == ISD::ZERO_EXTEND || |
42455 | Op.getOpcode() == ISD::TRUNCATE) |
42456 | Op = Op.getOperand(0); |
42457 | |
42458 | |
42459 | if ((Op.getOpcode() != X86ISD::RDRAND && |
42460 | Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) |
42461 | return SDValue(); |
42462 | } |
42463 | |
42464 | bool FValIsFalse = true; |
42465 | if (FVal && FVal->getZExtValue() != 0) { |
42466 | if (FVal->getZExtValue() != 1) |
42467 | return SDValue(); |
42468 | |
42469 | needOppositeCond = !needOppositeCond; |
42470 | FValIsFalse = false; |
42471 | } |
42472 | |
42473 | if (FValIsFalse && TVal->getZExtValue() != 1) |
42474 | return SDValue(); |
42475 | if (!FValIsFalse && TVal->getZExtValue() != 0) |
42476 | return SDValue(); |
42477 | CC = X86::CondCode(SetCC.getConstantOperandVal(2)); |
42478 | if (needOppositeCond) |
42479 | CC = X86::GetOppositeBranchCondition(CC); |
42480 | return SetCC.getOperand(3); |
42481 | } |
42482 | } |
42483 | |
42484 | return SDValue(); |
42485 | } |
42486 | |
42487 | |
42488 | |
42489 | |
42490 | |
42491 | static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, |
42492 | X86::CondCode &CC1, SDValue &Flags, |
42493 | bool &isAnd) { |
42494 | if (Cond->getOpcode() == X86ISD::CMP) { |
42495 | if (!isNullConstant(Cond->getOperand(1))) |
42496 | return false; |
42497 | |
42498 | Cond = Cond->getOperand(0); |
42499 | } |
42500 | |
42501 | isAnd = false; |
42502 | |
42503 | SDValue SetCC0, SetCC1; |
42504 | switch (Cond->getOpcode()) { |
42505 | default: return false; |
42506 | case ISD::AND: |
42507 | case X86ISD::AND: |
42508 | isAnd = true; |
42509 | LLVM_FALLTHROUGH; |
42510 | case ISD::OR: |
42511 | case X86ISD::OR: |
42512 | SetCC0 = Cond->getOperand(0); |
42513 | SetCC1 = Cond->getOperand(1); |
42514 | break; |
42515 | }; |
42516 | |
42517 | |
42518 | if (SetCC0.getOpcode() != X86ISD::SETCC || |
42519 | SetCC1.getOpcode() != X86ISD::SETCC || |
42520 | SetCC0->getOperand(1) != SetCC1->getOperand(1)) |
42521 | return false; |
42522 | |
42523 | CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); |
42524 | CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); |
42525 | Flags = SetCC0->getOperand(1); |
42526 | return true; |
42527 | } |
42528 | |
42529 | |
42530 | |
42531 | |
42532 | static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) { |
42533 | if (EFLAGS.getOpcode() == X86ISD::ADD) { |
42534 | if (isAllOnesConstant(EFLAGS.getOperand(1))) { |
42535 | SDValue Carry = EFLAGS.getOperand(0); |
42536 | while (Carry.getOpcode() == ISD::TRUNCATE || |
42537 | Carry.getOpcode() == ISD::ZERO_EXTEND || |
42538 | Carry.getOpcode() == ISD::SIGN_EXTEND || |
42539 | Carry.getOpcode() == ISD::ANY_EXTEND || |
42540 | (Carry.getOpcode() == ISD::AND && |
42541 | isOneConstant(Carry.getOperand(1)))) |
42542 | Carry = Carry.getOperand(0); |
42543 | if (Carry.getOpcode() == X86ISD::SETCC || |
42544 | Carry.getOpcode() == X86ISD::SETCC_CARRY) { |
42545 | |
42546 | uint64_t CarryCC = Carry.getConstantOperandVal(0); |
42547 | SDValue CarryOp1 = Carry.getOperand(1); |
42548 | if (CarryCC == X86::COND_B) |
42549 | return CarryOp1; |
42550 | if (CarryCC == X86::COND_A) { |
42551 | |
42552 | |
42553 | |
42554 | |
42555 | |
42556 | |
42557 | if (CarryOp1.getOpcode() == X86ISD::SUB && |
42558 | CarryOp1.getNode()->hasOneUse() && |
42559 | CarryOp1.getValueType().isInteger() && |
42560 | !isa<ConstantSDNode>(CarryOp1.getOperand(1))) { |
42561 | SDValue SubCommute = |
42562 | DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(), |
42563 | CarryOp1.getOperand(1), CarryOp1.getOperand(0)); |
42564 | return SDValue(SubCommute.getNode(), CarryOp1.getResNo()); |
42565 | } |
42566 | } |
42567 | |
42568 | |
42569 | if (CarryCC == X86::COND_E && |
42570 | CarryOp1.getOpcode() == X86ISD::ADD && |
42571 | isOneConstant(CarryOp1.getOperand(1))) |
42572 | return CarryOp1; |
42573 | } |
42574 | } |
42575 | } |
42576 | |
42577 | return SDValue(); |
42578 | } |
42579 | |
42580 | |
42581 | |
42582 | static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, |
42583 | SelectionDAG &DAG, |
42584 | const X86Subtarget &Subtarget) { |
42585 | |
42586 | if (EFLAGS.getOpcode() != X86ISD::PTEST && |
42587 | EFLAGS.getOpcode() != X86ISD::TESTP) |
42588 | return SDValue(); |
42589 | |
42590 | |
42591 | |
42592 | |
42593 | |
42594 | EVT VT = EFLAGS.getValueType(); |
42595 | SDValue Op0 = EFLAGS.getOperand(0); |
42596 | SDValue Op1 = EFLAGS.getOperand(1); |
42597 | EVT OpVT = Op0.getValueType(); |
42598 | |
42599 | |
42600 | if (SDValue NotOp0 = IsNOT(Op0, DAG)) { |
42601 | X86::CondCode InvCC; |
42602 | switch (CC) { |
42603 | case X86::COND_B: |
42604 | |
42605 | InvCC = X86::COND_E; |
42606 | break; |
42607 | case X86::COND_AE: |
42608 | |
42609 | InvCC = X86::COND_NE; |
42610 | break; |
42611 | case X86::COND_E: |
42612 | |
42613 | InvCC = X86::COND_B; |
42614 | break; |
42615 | case X86::COND_NE: |
42616 | |
42617 | InvCC = X86::COND_AE; |
42618 | break; |
42619 | case X86::COND_A: |
42620 | case X86::COND_BE: |
42621 | |
42622 | InvCC = CC; |
42623 | break; |
42624 | default: |
42625 | InvCC = X86::COND_INVALID; |
42626 | break; |
42627 | } |
42628 | |
42629 | if (InvCC != X86::COND_INVALID) { |
42630 | CC = InvCC; |
42631 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
42632 | DAG.getBitcast(OpVT, NotOp0), Op1); |
42633 | } |
42634 | } |
42635 | |
42636 | if (CC == X86::COND_E || CC == X86::COND_NE) { |
42637 | |
42638 | if (SDValue NotOp1 = IsNOT(Op1, DAG)) { |
42639 | CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); |
42640 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
42641 | DAG.getBitcast(OpVT, NotOp1), Op0); |
42642 | } |
42643 | |
42644 | if (Op0 == Op1) { |
42645 | SDValue BC = peekThroughBitcasts(Op0); |
42646 | EVT BCVT = BC.getValueType(); |
42647 | assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && |
42648 | "Unexpected vector type"); |
42649 | |
42650 | |
42651 | if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) { |
42652 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
42653 | DAG.getBitcast(OpVT, BC.getOperand(0)), |
42654 | DAG.getBitcast(OpVT, BC.getOperand(1))); |
42655 | } |
42656 | |
42657 | |
42658 | if (BC.getOpcode() == X86ISD::ANDNP || BC.getOpcode() == X86ISD::FANDN) { |
42659 | CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); |
42660 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
42661 | DAG.getBitcast(OpVT, BC.getOperand(0)), |
42662 | DAG.getBitcast(OpVT, BC.getOperand(1))); |
42663 | } |
42664 | |
42665 | |
42666 | |
42667 | |
42668 | |
42669 | |
42670 | unsigned EltBits = BCVT.getScalarSizeInBits(); |
42671 | if (DAG.ComputeNumSignBits(BC) == EltBits) { |
42672 | assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result"); |
42673 | APInt SignMask = APInt::getSignMask(EltBits); |
42674 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
42675 | if (SDValue Res = |
42676 | TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) { |
42677 | |
42678 | |
42679 | SDLoc DL(EFLAGS); |
42680 | if (EltBits == 16) { |
42681 | MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8; |
42682 | Res = DAG.getBitcast(MovmskVT, Res); |
42683 | Res = getPMOVMSKB(DL, Res, DAG, Subtarget); |
42684 | Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res, |
42685 | DAG.getConstant(0xAAAAAAAA, DL, MVT::i32)); |
42686 | } else { |
42687 | Res = getPMOVMSKB(DL, Res, DAG, Subtarget); |
42688 | } |
42689 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res, |
42690 | DAG.getConstant(0, DL, MVT::i32)); |
42691 | } |
42692 | } |
42693 | } |
42694 | |
42695 | |
42696 | if (ISD::isBuildVectorAllOnes(Op0.getNode())) |
42697 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1); |
42698 | |
42699 | |
42700 | if (ISD::isBuildVectorAllOnes(Op1.getNode())) |
42701 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); |
42702 | } |
42703 | |
42704 | return SDValue(); |
42705 | } |
42706 | |
42707 | |
42708 | static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, |
42709 | SelectionDAG &DAG, |
42710 | const X86Subtarget &Subtarget) { |
42711 | |
42712 | |
42713 | if (!(CC == X86::COND_E || CC == X86::COND_NE)) |
42714 | return SDValue(); |
42715 | if (EFLAGS.getValueType() != MVT::i32) |
42716 | return SDValue(); |
42717 | unsigned CmpOpcode = EFLAGS.getOpcode(); |
42718 | if (CmpOpcode != X86ISD::CMP && CmpOpcode != X86ISD::SUB) |
42719 | return SDValue(); |
42720 | auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1)); |
42721 | if (!CmpConstant) |
42722 | return SDValue(); |
42723 | const APInt &CmpVal = CmpConstant->getAPIntValue(); |
42724 | |
42725 | SDValue CmpOp = EFLAGS.getOperand(0); |
42726 | unsigned CmpBits = CmpOp.getValueSizeInBits(); |
42727 | assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch"); |
42728 | |
42729 | |
42730 | if (CmpOp.getOpcode() == ISD::TRUNCATE) |
42731 | CmpOp = CmpOp.getOperand(0); |
42732 | |
42733 | |
42734 | if (CmpOp.getOpcode() != X86ISD::MOVMSK) |
42735 | return SDValue(); |
42736 | |
42737 | SDValue Vec = CmpOp.getOperand(0); |
42738 | MVT VecVT = Vec.getSimpleValueType(); |
42739 | assert((VecVT.is128BitVector() || VecVT.is256BitVector()) && |
42740 | "Unexpected MOVMSK operand"); |
42741 | unsigned NumElts = VecVT.getVectorNumElements(); |
42742 | unsigned NumEltBits = VecVT.getScalarSizeInBits(); |
42743 | |
42744 | bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isNullValue(); |
42745 | bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits && |
42746 | CmpVal.isMask(NumElts); |
42747 | if (!IsAnyOf && !IsAllOf) |
42748 | return SDValue(); |
42749 | |
42750 | |
42751 | |
42752 | |
42753 | |
42754 | if (Vec.getOpcode() == ISD::BITCAST) { |
42755 | SDValue BC = peekThroughBitcasts(Vec); |
42756 | MVT BCVT = BC.getSimpleValueType(); |
42757 | unsigned BCNumElts = BCVT.getVectorNumElements(); |
42758 | unsigned BCNumEltBits = BCVT.getScalarSizeInBits(); |
42759 | if ((BCNumEltBits == 32 || BCNumEltBits == 64) && |
42760 | BCNumEltBits > NumEltBits && |
42761 | DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) { |
42762 | SDLoc DL(EFLAGS); |
42763 | unsigned CmpMask = IsAnyOf ? 0 : ((1 << BCNumElts) - 1); |
42764 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, |
42765 | DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC), |
42766 | DAG.getConstant(CmpMask, DL, MVT::i32)); |
42767 | } |
42768 | } |
42769 | |
42770 | |
42771 | |
42772 | if (IsAllOf && Subtarget.hasSSE41()) { |
42773 | SDValue BC = peekThroughBitcasts(Vec); |
42774 | if (BC.getOpcode() == X86ISD::PCMPEQ && |
42775 | ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) { |
42776 | MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
42777 | SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0)); |
42778 | return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); |
42779 | } |
42780 | } |
42781 | |
42782 | |
42783 | |
42784 | |
42785 | |
42786 | |
42787 | if (Vec.getOpcode() == X86ISD::PACKSS && VecVT == MVT::v16i8) { |
42788 | SDValue VecOp0 = Vec.getOperand(0); |
42789 | SDValue VecOp1 = Vec.getOperand(1); |
42790 | bool SignExt0 = DAG.ComputeNumSignBits(VecOp0) > 8; |
42791 | bool SignExt1 = DAG.ComputeNumSignBits(VecOp1) > 8; |
42792 | |
42793 | if (IsAnyOf && CmpBits == 8 && VecOp1.isUndef()) { |
42794 | SDLoc DL(EFLAGS); |
42795 | SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0); |
42796 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
42797 | Result = DAG.getZExtOrTrunc(Result, DL, MVT::i16); |
42798 | if (!SignExt0) { |
42799 | Result = DAG.getNode(ISD::AND, DL, MVT::i16, Result, |
42800 | DAG.getConstant(0xAAAA, DL, MVT::i16)); |
42801 | } |
42802 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
42803 | DAG.getConstant(0, DL, MVT::i16)); |
42804 | } |
42805 | |
42806 | |
42807 | if (CmpBits >= 16 && Subtarget.hasInt256() && |
42808 | VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
42809 | VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
42810 | VecOp0.getOperand(0) == VecOp1.getOperand(0) && |
42811 | VecOp0.getConstantOperandAPInt(1) == 0 && |
42812 | VecOp1.getConstantOperandAPInt(1) == 8 && |
42813 | (IsAnyOf || (SignExt0 && SignExt1))) { |
42814 | SDLoc DL(EFLAGS); |
42815 | SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0)); |
42816 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
42817 | unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF; |
42818 | if (!SignExt0 || !SignExt1) { |
42819 | assert(IsAnyOf && "Only perform v16i16 signmasks for any_of patterns"); |
42820 | Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, |
42821 | DAG.getConstant(0xAAAAAAAA, DL, MVT::i32)); |
42822 | } |
42823 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
42824 | DAG.getConstant(CmpMask, DL, MVT::i32)); |
42825 | } |
42826 | } |
42827 | |
42828 | |
42829 | SmallVector<int, 32> ShuffleMask; |
42830 | SmallVector<SDValue, 2> ShuffleInputs; |
42831 | if (NumElts <= CmpBits && |
42832 | getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs, |
42833 | ShuffleMask, DAG) && |
42834 | ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) && |
42835 | ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) { |
42836 | unsigned NumShuffleElts = ShuffleMask.size(); |
42837 | APInt DemandedElts = APInt::getNullValue(NumShuffleElts); |
42838 | for (int M : ShuffleMask) { |
42839 | assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index"); |
42840 | DemandedElts.setBit(M); |
42841 | } |
42842 | if (DemandedElts.isAllOnesValue()) { |
42843 | SDLoc DL(EFLAGS); |
42844 | SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]); |
42845 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
42846 | Result = |
42847 | DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType()); |
42848 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
42849 | EFLAGS.getOperand(1)); |
42850 | } |
42851 | } |
42852 | |
42853 | return SDValue(); |
42854 | } |
42855 | |
42856 | |
42857 | |
42858 | |
42859 | static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, |
42860 | SelectionDAG &DAG, |
42861 | const X86Subtarget &Subtarget) { |
42862 | if (CC == X86::COND_B) |
42863 | if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG)) |
42864 | return Flags; |
42865 | |
42866 | if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) |
42867 | return R; |
42868 | |
42869 | if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget)) |
42870 | return R; |
42871 | |
42872 | if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget)) |
42873 | return R; |
42874 | |
42875 | return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); |
42876 | } |
42877 | |
42878 | |
42879 | static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, |
42880 | TargetLowering::DAGCombinerInfo &DCI, |
42881 | const X86Subtarget &Subtarget) { |
42882 | SDLoc DL(N); |
42883 | |
42884 | SDValue FalseOp = N->getOperand(0); |
42885 | SDValue TrueOp = N->getOperand(1); |
42886 | X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); |
42887 | SDValue Cond = N->getOperand(3); |
42888 | |
42889 | |
42890 | if (TrueOp == FalseOp) |
42891 | return TrueOp; |
42892 | |
42893 | |
42894 | |
42895 | if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { |
42896 | if (!(FalseOp.getValueType() == MVT::f80 || |
42897 | (FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) || |
42898 | (FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) || |
42899 | !Subtarget.hasCMov() || hasFPCMov(CC)) { |
42900 | SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8), |
42901 | Flags}; |
42902 | return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
42903 | } |
42904 | } |
42905 | |
42906 | |
42907 | |
42908 | |
42909 | if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) { |
42910 | if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) { |
42911 | |
42912 | |
42913 | if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { |
42914 | CC = X86::GetOppositeBranchCondition(CC); |
42915 | std::swap(TrueC, FalseC); |
42916 | std::swap(TrueOp, FalseOp); |
42917 | } |
42918 | |
42919 | |
42920 | |
42921 | |
42922 | if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { |
42923 | Cond = getSETCC(CC, Cond, DL, DAG); |
42924 | |
42925 | |
42926 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); |
42927 | |
42928 | unsigned ShAmt = TrueC->getAPIntValue().logBase2(); |
42929 | Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, |
42930 | DAG.getConstant(ShAmt, DL, MVT::i8)); |
42931 | return Cond; |
42932 | } |
42933 | |
42934 | |
42935 | |
42936 | if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { |
42937 | Cond = getSETCC(CC, Cond, DL, DAG); |
42938 | |
42939 | |
42940 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, |
42941 | FalseC->getValueType(0), Cond); |
42942 | Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, |
42943 | SDValue(FalseC, 0)); |
42944 | return Cond; |
42945 | } |
42946 | |
42947 | |
42948 | |
42949 | if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { |
42950 | APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); |
42951 | assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() && |
42952 | "Implicit constant truncation"); |
42953 | |
42954 | bool isFastMultiplier = false; |
42955 | if (Diff.ult(10)) { |
42956 | switch (Diff.getZExtValue()) { |
42957 | default: break; |
42958 | case 1: |
42959 | case 2: |
42960 | case 3: |
42961 | case 4: |
42962 | case 5: |
42963 | case 8: |
42964 | case 9: |
42965 | isFastMultiplier = true; |
42966 | break; |
42967 | } |
42968 | } |
42969 | |
42970 | if (isFastMultiplier) { |
42971 | Cond = getSETCC(CC, Cond, DL ,DAG); |
42972 | |
42973 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), |
42974 | Cond); |
42975 | |
42976 | if (Diff != 1) |
42977 | Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, |
42978 | DAG.getConstant(Diff, DL, Cond.getValueType())); |
42979 | |
42980 | |
42981 | if (FalseC->getAPIntValue() != 0) |
42982 | Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, |
42983 | SDValue(FalseC, 0)); |
42984 | return Cond; |
42985 | } |
42986 | } |
42987 | } |
42988 | } |
42989 | |
42990 | |
42991 | |
42992 | |
42993 | |
42994 | |
42995 | |
42996 | |
42997 | |
42998 | |
42999 | |
43000 | |
43001 | |
43002 | |
43003 | |
43004 | if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { |
43005 | |
43006 | |
43007 | |
43008 | ConstantSDNode *CmpAgainst = nullptr; |
43009 | if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && |
43010 | (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) && |
43011 | !isa<ConstantSDNode>(Cond.getOperand(0))) { |
43012 | |
43013 | if (CC == X86::COND_NE && |
43014 | CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) { |
43015 | CC = X86::GetOppositeBranchCondition(CC); |
43016 | std::swap(TrueOp, FalseOp); |
43017 | } |
43018 | |
43019 | if (CC == X86::COND_E && |
43020 | CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) { |
43021 | SDValue Ops[] = {FalseOp, Cond.getOperand(0), |
43022 | DAG.getTargetConstant(CC, DL, MVT::i8), Cond}; |
43023 | return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
43024 | } |
43025 | } |
43026 | } |
43027 | |
43028 | |
43029 | |
43030 | |
43031 | |
43032 | |
43033 | |
43034 | |
43035 | |
43036 | |
43037 | |
43038 | |
43039 | |
43040 | |
43041 | |
43042 | |
43043 | |
43044 | |
43045 | if (CC == X86::COND_NE) { |
43046 | SDValue Flags; |
43047 | X86::CondCode CC0, CC1; |
43048 | bool isAndSetCC; |
43049 | if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) { |
43050 | if (isAndSetCC) { |
43051 | std::swap(FalseOp, TrueOp); |
43052 | CC0 = X86::GetOppositeBranchCondition(CC0); |
43053 | CC1 = X86::GetOppositeBranchCondition(CC1); |
43054 | } |
43055 | |
43056 | SDValue LOps[] = {FalseOp, TrueOp, |
43057 | DAG.getTargetConstant(CC0, DL, MVT::i8), Flags}; |
43058 | SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); |
43059 | SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8), |
43060 | Flags}; |
43061 | SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
43062 | return CMOV; |
43063 | } |
43064 | } |
43065 | |
43066 | |
43067 | |
43068 | |
43069 | |
43070 | if ((CC == X86::COND_NE || CC == X86::COND_E) && |
43071 | Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) { |
43072 | SDValue Add = TrueOp; |
43073 | SDValue Const = FalseOp; |
43074 | |
43075 | if (CC == X86::COND_E) |
43076 | std::swap(Add, Const); |
43077 | |
43078 | |
43079 | |
43080 | if (Const == Cond.getOperand(0)) |
43081 | Const = Cond.getOperand(1); |
43082 | |
43083 | |
43084 | if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD && |
43085 | Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) && |
43086 | (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF || |
43087 | Add.getOperand(0).getOpcode() == ISD::CTTZ) && |
43088 | Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) { |
43089 | EVT VT = N->getValueType(0); |
43090 | |
43091 | SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); |
43092 | SDValue CMov = |
43093 | DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), |
43094 | DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond); |
43095 | return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); |
43096 | } |
43097 | } |
43098 | |
43099 | return SDValue(); |
43100 | } |
43101 | |
43102 | |
43103 | enum class ShrinkMode { MULS8, MULU8, MULS16, MULU16 }; |
43104 | |
43105 | static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) { |
43106 | EVT VT = N->getOperand(0).getValueType(); |
43107 | if (VT.getScalarSizeInBits() != 32) |
43108 | return false; |
43109 | |
43110 | assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2"); |
43111 | unsigned SignBits[2] = {1, 1}; |
43112 | bool IsPositive[2] = {false, false}; |
43113 | for (unsigned i = 0; i < 2; i++) { |
43114 | SDValue Opd = N->getOperand(i); |
43115 | |
43116 | SignBits[i] = DAG.ComputeNumSignBits(Opd); |
43117 | IsPositive[i] = DAG.SignBitIsZero(Opd); |
43118 | } |
43119 | |
43120 | bool AllPositive = IsPositive[0] && IsPositive[1]; |
43121 | unsigned MinSignBits = std::min(SignBits[0], SignBits[1]); |
43122 | |
43123 | if (MinSignBits >= 25) |
43124 | Mode = ShrinkMode::MULS8; |
43125 | |
43126 | else if (AllPositive && MinSignBits >= 24) |
43127 | Mode = ShrinkMode::MULU8; |
43128 | |
43129 | else if (MinSignBits >= 17) |
43130 | Mode = ShrinkMode::MULS16; |
43131 | |
43132 | else if (AllPositive && MinSignBits >= 16) |
43133 | Mode = ShrinkMode::MULU16; |
43134 | else |
43135 | return false; |
43136 | return true; |
43137 | } |
43138 | |
43139 | |
43140 | |
43141 | |
43142 | |
43143 | |
43144 | |
43145 | |
43146 | |
43147 | |
43148 | |
43149 | |
43150 | |
43151 | |
43152 | |
43153 | |
43154 | |
43155 | |
43156 | |
43157 | |
43158 | |
43159 | |
43160 | |
43161 | |
43162 | |
43163 | |
43164 | |
43165 | |
43166 | |
43167 | static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, |
43168 | const X86Subtarget &Subtarget) { |
43169 | |
43170 | |
43171 | if (!Subtarget.hasSSE2()) |
43172 | return SDValue(); |
43173 | |
43174 | |
43175 | |
43176 | |
43177 | |
43178 | bool OptForMinSize = DAG.getMachineFunction().getFunction().hasMinSize(); |
43179 | if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow())) |
43180 | return SDValue(); |
43181 | |
43182 | ShrinkMode Mode; |
43183 | if (!canReduceVMulWidth(N, DAG, Mode)) |
43184 | return SDValue(); |
43185 | |
43186 | SDLoc DL(N); |
43187 | SDValue N0 = N->getOperand(0); |
43188 | SDValue N1 = N->getOperand(1); |
43189 | EVT VT = N->getOperand(0).getValueType(); |
43190 | unsigned NumElts = VT.getVectorNumElements(); |
43191 | if ((NumElts % 2) != 0) |
43192 | return SDValue(); |
43193 | |
43194 | EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); |
43195 | |
43196 | |
43197 | SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); |
43198 | SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); |
43199 | |
43200 | |
43201 | |
43202 | SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); |
43203 | if (Mode == ShrinkMode::MULU8 || Mode == ShrinkMode::MULS8) |
43204 | return DAG.getNode((Mode == ShrinkMode::MULU8) ? ISD::ZERO_EXTEND |
43205 | : ISD::SIGN_EXTEND, |
43206 | DL, VT, MulLo); |
43207 | |
43208 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts / 2); |
43209 | |
43210 | |
43211 | SDValue MulHi = |
43212 | DAG.getNode(Mode == ShrinkMode::MULS16 ? ISD::MULHS : ISD::MULHU, DL, |
43213 | ReducedVT, NewN0, NewN1); |
43214 | |
43215 | |
43216 | |
43217 | |
43218 | SmallVector<int, 16> ShuffleMask(NumElts); |
43219 | for (unsigned i = 0, e = NumElts / 2; i < e; i++) { |
43220 | ShuffleMask[2 * i] = i; |
43221 | ShuffleMask[2 * i + 1] = i + NumElts; |
43222 | } |
43223 | SDValue ResLo = |
43224 | DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); |
43225 | ResLo = DAG.getBitcast(ResVT, ResLo); |
43226 | |
43227 | for (unsigned i = 0, e = NumElts / 2; i < e; i++) { |
43228 | ShuffleMask[2 * i] = i + NumElts / 2; |
43229 | ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; |
43230 | } |
43231 | SDValue ResHi = |
43232 | DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); |
43233 | ResHi = DAG.getBitcast(ResVT, ResHi); |
43234 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); |
43235 | } |
43236 | |
43237 | static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, |
43238 | EVT VT, const SDLoc &DL) { |
43239 | |
43240 | auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { |
43241 | SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
43242 | DAG.getConstant(Mult, DL, VT)); |
43243 | Result = DAG.getNode(ISD::SHL, DL, VT, Result, |
43244 | DAG.getConstant(Shift, DL, MVT::i8)); |
43245 | Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, |
43246 | N->getOperand(0)); |
43247 | return Result; |
43248 | }; |
43249 | |
43250 | auto combineMulMulAddOrSub = [&](int Mul1, int Mul2, bool isAdd) { |
43251 | SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
43252 | DAG.getConstant(Mul1, DL, VT)); |
43253 | Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result, |
43254 | DAG.getConstant(Mul2, DL, VT)); |
43255 | Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, |
43256 | N->getOperand(0)); |
43257 | return Result; |
43258 | }; |
43259 | |
43260 | switch (MulAmt) { |
43261 | default: |
43262 | break; |
43263 | case 11: |
43264 | |
43265 | return combineMulShlAddOrSub(5, 1, true); |
43266 | case 21: |
43267 | |
43268 | return combineMulShlAddOrSub(5, 2, true); |
43269 | case 41: |
43270 | |
43271 | return combineMulShlAddOrSub(5, 3, true); |
43272 | case 22: |
43273 | |
43274 | return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), |
43275 | combineMulShlAddOrSub(5, 2, true)); |
43276 | case 19: |
43277 | |
43278 | return combineMulShlAddOrSub(9, 1, true); |
43279 | case 37: |
43280 | |
43281 | return combineMulShlAddOrSub(9, 2, true); |
43282 | case 73: |
43283 | |
43284 | return combineMulShlAddOrSub(9, 3, true); |
43285 | case 13: |
43286 | |
43287 | return combineMulShlAddOrSub(3, 2, true); |
43288 | case 23: |
43289 | |
43290 | return combineMulShlAddOrSub(3, 3, false); |
43291 | case 26: |
43292 | |
43293 | return combineMulMulAddOrSub(5, 5, true); |
43294 | case 28: |
43295 | |
43296 | return combineMulMulAddOrSub(9, 3, true); |
43297 | case 29: |
43298 | |
43299 | return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), |
43300 | combineMulMulAddOrSub(9, 3, true)); |
43301 | } |
43302 | |
43303 | |
43304 | |
43305 | |
43306 | |
43307 | |
43308 | if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { |
43309 | unsigned ScaleShift = countTrailingZeros(MulAmt); |
43310 | if (ScaleShift >= 1 && ScaleShift < 4) { |
43311 | unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); |
43312 | SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43313 | DAG.getConstant(ShiftAmt, DL, MVT::i8)); |
43314 | SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43315 | DAG.getConstant(ScaleShift, DL, MVT::i8)); |
43316 | return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); |
43317 | } |
43318 | } |
43319 | |
43320 | return SDValue(); |
43321 | } |
43322 | |
43323 | |
43324 | |
43325 | static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, |
43326 | const X86Subtarget &Subtarget) { |
43327 | if (!Subtarget.hasSSE2()) |
43328 | return SDValue(); |
43329 | |
43330 | if (Subtarget.isPMADDWDSlow()) |
43331 | return SDValue(); |
43332 | |
43333 | EVT VT = N->getValueType(0); |
43334 | |
43335 | |
43336 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32) |
43337 | return SDValue(); |
43338 | |
43339 | |
43340 | if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
43341 | return SDValue(); |
43342 | |
43343 | MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); |
43344 | |
43345 | |
43346 | if (WVT == MVT::v32i16 && !Subtarget.hasBWI()) |
43347 | return SDValue(); |
43348 | |
43349 | SDValue N0 = N->getOperand(0); |
43350 | SDValue N1 = N->getOperand(1); |
43351 | |
43352 | |
43353 | |
43354 | if (!Subtarget.hasSSE41() && |
43355 | (N0.getOpcode() == ISD::ZERO_EXTEND && |
43356 | N0.getOperand(0).getScalarValueSizeInBits() <= 8) && |
43357 | (N1.getOpcode() == ISD::ZERO_EXTEND && |
43358 | N1.getOperand(0).getScalarValueSizeInBits() <= 8)) |
43359 | return SDValue(); |
43360 | |
43361 | APInt Mask17 = APInt::getHighBitsSet(32, 17); |
43362 | if (!DAG.MaskedValueIsZero(N1, Mask17) || |
43363 | !DAG.MaskedValueIsZero(N0, Mask17)) |
43364 | return SDValue(); |
43365 | |
43366 | |
43367 | auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
43368 | ArrayRef<SDValue> Ops) { |
43369 | MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); |
43370 | return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); |
43371 | }; |
43372 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, |
43373 | { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) }, |
43374 | PMADDWDBuilder); |
43375 | } |
43376 | |
43377 | static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG, |
43378 | const X86Subtarget &Subtarget) { |
43379 | if (!Subtarget.hasSSE2()) |
43380 | return SDValue(); |
43381 | |
43382 | EVT VT = N->getValueType(0); |
43383 | |
43384 | |
43385 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 || |
43386 | VT.getVectorNumElements() < 2 || |
43387 | !isPowerOf2_32(VT.getVectorNumElements())) |
43388 | return SDValue(); |
43389 | |
43390 | SDValue N0 = N->getOperand(0); |
43391 | SDValue N1 = N->getOperand(1); |
43392 | |
43393 | |
43394 | |
43395 | if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(N0) > 32 && |
43396 | DAG.ComputeNumSignBits(N1) > 32) { |
43397 | auto PMULDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
43398 | ArrayRef<SDValue> Ops) { |
43399 | return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops); |
43400 | }; |
43401 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, |
43402 | PMULDQBuilder, false); |
43403 | } |
43404 | |
43405 | |
43406 | APInt Mask = APInt::getHighBitsSet(64, 32); |
43407 | if (DAG.MaskedValueIsZero(N0, Mask) && DAG.MaskedValueIsZero(N1, Mask)) { |
43408 | auto PMULUDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
43409 | ArrayRef<SDValue> Ops) { |
43410 | return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops); |
43411 | }; |
43412 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, |
43413 | PMULUDQBuilder, false); |
43414 | } |
43415 | |
43416 | return SDValue(); |
43417 | } |
43418 | |
43419 | |
43420 | |
43421 | static SDValue combineMul(SDNode *N, SelectionDAG &DAG, |
43422 | TargetLowering::DAGCombinerInfo &DCI, |
43423 | const X86Subtarget &Subtarget) { |
43424 | EVT VT = N->getValueType(0); |
43425 | |
43426 | if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget)) |
43427 | return V; |
43428 | |
43429 | if (SDValue V = combineMulToPMULDQ(N, DAG, Subtarget)) |
43430 | return V; |
43431 | |
43432 | if (DCI.isBeforeLegalize() && VT.isVector()) |
43433 | return reduceVMULWidth(N, DAG, Subtarget); |
43434 | |
43435 | if (!MulConstantOptimization) |
43436 | return SDValue(); |
43437 | |
43438 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
43439 | return SDValue(); |
43440 | |
43441 | if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) |
43442 | return SDValue(); |
43443 | |
43444 | if (VT != MVT::i64 && VT != MVT::i32) |
43445 | return SDValue(); |
43446 | |
43447 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
43448 | if (!C) |
43449 | return SDValue(); |
43450 | if (isPowerOf2_64(C->getZExtValue())) |
43451 | return SDValue(); |
43452 | |
43453 | int64_t SignMulAmt = C->getSExtValue(); |
43454 | assert(SignMulAmt != INT64_MIN && "Int min should have been handled!"); |
43455 | uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; |
43456 | |
43457 | SDLoc DL(N); |
43458 | if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) { |
43459 | SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
43460 | DAG.getConstant(AbsMulAmt, DL, VT)); |
43461 | if (SignMulAmt < 0) |
43462 | NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
43463 | NewMul); |
43464 | |
43465 | return NewMul; |
43466 | } |
43467 | |
43468 | uint64_t MulAmt1 = 0; |
43469 | uint64_t MulAmt2 = 0; |
43470 | if ((AbsMulAmt % 9) == 0) { |
43471 | MulAmt1 = 9; |
43472 | MulAmt2 = AbsMulAmt / 9; |
43473 | } else if ((AbsMulAmt % 5) == 0) { |
43474 | MulAmt1 = 5; |
43475 | MulAmt2 = AbsMulAmt / 5; |
43476 | } else if ((AbsMulAmt % 3) == 0) { |
43477 | MulAmt1 = 3; |
43478 | MulAmt2 = AbsMulAmt / 3; |
43479 | } |
43480 | |
43481 | SDValue NewMul; |
43482 | |
43483 | if (MulAmt2 && |
43484 | (isPowerOf2_64(MulAmt2) || |
43485 | (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) { |
43486 | |
43487 | if (isPowerOf2_64(MulAmt2) && |
43488 | !(SignMulAmt >= 0 && N->hasOneUse() && |
43489 | N->use_begin()->getOpcode() == ISD::ADD)) |
43490 | |
43491 | |
43492 | |
43493 | |
43494 | std::swap(MulAmt1, MulAmt2); |
43495 | |
43496 | if (isPowerOf2_64(MulAmt1)) |
43497 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43498 | DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); |
43499 | else |
43500 | NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
43501 | DAG.getConstant(MulAmt1, DL, VT)); |
43502 | |
43503 | if (isPowerOf2_64(MulAmt2)) |
43504 | NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, |
43505 | DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); |
43506 | else |
43507 | NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, |
43508 | DAG.getConstant(MulAmt2, DL, VT)); |
43509 | |
43510 | |
43511 | if (SignMulAmt < 0) |
43512 | NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
43513 | NewMul); |
43514 | } else if (!Subtarget.slowLEA()) |
43515 | NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); |
43516 | |
43517 | if (!NewMul) { |
43518 | assert(C->getZExtValue() != 0 && |
43519 | C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && |
43520 | "Both cases that could cause potential overflows should have " |
43521 | "already been handled."); |
43522 | if (isPowerOf2_64(AbsMulAmt - 1)) { |
43523 | |
43524 | NewMul = DAG.getNode( |
43525 | ISD::ADD, DL, VT, N->getOperand(0), |
43526 | DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43527 | DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, |
43528 | MVT::i8))); |
43529 | |
43530 | if (SignMulAmt < 0) |
43531 | NewMul = DAG.getNode(ISD::SUB, DL, VT, |
43532 | DAG.getConstant(0, DL, VT), NewMul); |
43533 | } else if (isPowerOf2_64(AbsMulAmt + 1)) { |
43534 | |
43535 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43536 | DAG.getConstant(Log2_64(AbsMulAmt + 1), |
43537 | DL, MVT::i8)); |
43538 | |
43539 | if (SignMulAmt < 0) |
43540 | NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul); |
43541 | else |
43542 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
43543 | } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) { |
43544 | |
43545 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43546 | DAG.getConstant(Log2_64(AbsMulAmt - 2), |
43547 | DL, MVT::i8)); |
43548 | NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); |
43549 | NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); |
43550 | } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) { |
43551 | |
43552 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
43553 | DAG.getConstant(Log2_64(AbsMulAmt + 2), |
43554 | DL, MVT::i8)); |
43555 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
43556 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
43557 | } |
43558 | } |
43559 | |
43560 | return NewMul; |
43561 | } |
43562 | |
43563 | |
43564 | |
43565 | |
43566 | |
43567 | |
43568 | |
43569 | |
43570 | static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG, |
43571 | const X86Subtarget &Subtarget) { |
43572 | assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && |
43573 | "SRL or SRA node is required here!"); |
43574 | SDLoc DL(N); |
43575 | |
43576 | |
43577 | |
43578 | if (!Subtarget.hasSSE41()) |
43579 | return SDValue(); |
43580 | |
43581 | |
43582 | SDValue ShiftOperand = N->getOperand(0); |
43583 | if (ShiftOperand.getOpcode() != ISD::MUL || !ShiftOperand.hasOneUse()) |
43584 | return SDValue(); |
43585 | |
43586 | |
43587 | EVT VT = N->getValueType(0); |
43588 | if (!VT.isVector() || VT.getVectorElementType().getSizeInBits() < 32) |
43589 | return SDValue(); |
43590 | |
43591 | |
43592 | APInt ShiftAmt; |
43593 | if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) || |
43594 | ShiftAmt != 16) |
43595 | return SDValue(); |
43596 | |
43597 | SDValue LHS = ShiftOperand.getOperand(0); |
43598 | SDValue RHS = ShiftOperand.getOperand(1); |
43599 | |
43600 | unsigned ExtOpc = LHS.getOpcode(); |
43601 | if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || |
43602 | RHS.getOpcode() != ExtOpc) |
43603 | return SDValue(); |
43604 | |
43605 | |
43606 | LHS = LHS.getOperand(0); |
43607 | RHS = RHS.getOperand(0); |
43608 | |
43609 | |
43610 | EVT MulVT = LHS.getValueType(); |
43611 | if (MulVT.getVectorElementType() != MVT::i16 || RHS.getValueType() != MulVT) |
43612 | return SDValue(); |
43613 | |
43614 | unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; |
43615 | SDValue Mulh = DAG.getNode(Opc, DL, MulVT, LHS, RHS); |
43616 | |
43617 | ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
43618 | return DAG.getNode(ExtOpc, DL, VT, Mulh); |
43619 | } |
43620 | |
43621 | static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { |
43622 | SDValue N0 = N->getOperand(0); |
43623 | SDValue N1 = N->getOperand(1); |
43624 | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
43625 | EVT VT = N0.getValueType(); |
43626 | |
43627 | |
43628 | |
43629 | if (VT.isInteger() && !VT.isVector() && |
43630 | N1C && N0.getOpcode() == ISD::AND && |
43631 | N0.getOperand(1).getOpcode() == ISD::Constant) { |
43632 | SDValue N00 = N0.getOperand(0); |
43633 | APInt Mask = N0.getConstantOperandAPInt(1); |
43634 | Mask <<= N1C->getAPIntValue(); |
43635 | bool MaskOK = false; |
43636 | |
43637 | |
43638 | |
43639 | |
43640 | |
43641 | |
43642 | |
43643 | |
43644 | |
43645 | |
43646 | |
43647 | if (N00.getOpcode() == X86ISD::SETCC_CARRY) { |
43648 | MaskOK = true; |
43649 | } else if (N00.getOpcode() == ISD::SIGN_EXTEND && |
43650 | N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { |
43651 | MaskOK = true; |
43652 | } else if ((N00.getOpcode() == ISD::ZERO_EXTEND || |
43653 | N00.getOpcode() == ISD::ANY_EXTEND) && |
43654 | N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { |
43655 | MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits()); |
43656 | } |
43657 | if (MaskOK && Mask != 0) { |
43658 | SDLoc DL(N); |
43659 | return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); |
43660 | } |
43661 | } |
43662 | |
43663 | |
43664 | |
43665 | |
43666 | |
43667 | if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) |
43668 | if (auto *N1SplatC = N1BV->getConstantSplatNode()) { |
43669 | assert(N0.getValueType().isVector() && "Invalid vector shift type"); |
43670 | |
43671 | |
43672 | |
43673 | if (N1SplatC->isOne()) |
43674 | return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); |
43675 | } |
43676 | |
43677 | return SDValue(); |
43678 | } |
43679 | |
43680 | static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, |
43681 | const X86Subtarget &Subtarget) { |
43682 | SDValue N0 = N->getOperand(0); |
43683 | SDValue N1 = N->getOperand(1); |
43684 | EVT VT = N0.getValueType(); |
43685 | unsigned Size = VT.getSizeInBits(); |
43686 | |
43687 | if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) |
43688 | return V; |
43689 | |
43690 | |
43691 | |
43692 | |
43693 | |
43694 | |
43695 | |
43696 | |
43697 | |
43698 | |
43699 | |
43700 | |
43701 | if (VT.isVector() || N1.getOpcode() != ISD::Constant || |
43702 | N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || |
43703 | N0.getOperand(1).getOpcode() != ISD::Constant) |
43704 | return SDValue(); |
43705 | |
43706 | SDValue N00 = N0.getOperand(0); |
43707 | SDValue N01 = N0.getOperand(1); |
43708 | APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue(); |
43709 | APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue(); |
43710 | EVT CVT = N1.getValueType(); |
43711 | |
43712 | if (SarConst.isNegative()) |
43713 | return SDValue(); |
43714 | |
43715 | for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { |
43716 | unsigned ShiftSize = SVT.getSizeInBits(); |
43717 | |
43718 | |
43719 | if (ShiftSize >= Size || ShlConst != Size - ShiftSize) |
43720 | continue; |
43721 | SDLoc DL(N); |
43722 | SDValue NN = |
43723 | DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); |
43724 | SarConst = SarConst - (Size - ShiftSize); |
43725 | if (SarConst == 0) |
43726 | return NN; |
43727 | else if (SarConst.isNegative()) |
43728 | return DAG.getNode(ISD::SHL, DL, VT, NN, |
43729 | DAG.getConstant(-SarConst, DL, CVT)); |
43730 | else |
43731 | return DAG.getNode(ISD::SRA, DL, VT, NN, |
43732 | DAG.getConstant(SarConst, DL, CVT)); |
43733 | } |
43734 | return SDValue(); |
43735 | } |
43736 | |
43737 | static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, |
43738 | TargetLowering::DAGCombinerInfo &DCI, |
43739 | const X86Subtarget &Subtarget) { |
43740 | SDValue N0 = N->getOperand(0); |
43741 | SDValue N1 = N->getOperand(1); |
43742 | EVT VT = N0.getValueType(); |
43743 | |
43744 | if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) |
43745 | return V; |
43746 | |
43747 | |
43748 | |
43749 | if (!DCI.isAfterLegalizeDAG()) |
43750 | return SDValue(); |
43751 | |
43752 | |
43753 | |
43754 | |
43755 | |
43756 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) |
43757 | return SDValue(); |
43758 | |
43759 | auto *ShiftC = dyn_cast<ConstantSDNode>(N1); |
43760 | auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
43761 | if (!ShiftC || !AndC) |
43762 | return SDValue(); |
43763 | |
43764 | |
43765 | |
43766 | |
43767 | APInt MaskVal = AndC->getAPIntValue(); |
43768 | |
43769 | |
43770 | if (MaskVal.isMask()) { |
43771 | unsigned TO = MaskVal.countTrailingOnes(); |
43772 | if (TO >= 8 && isPowerOf2_32(TO)) |
43773 | return SDValue(); |
43774 | } |
43775 | |
43776 | APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); |
43777 | unsigned OldMaskSize = MaskVal.getMinSignedBits(); |
43778 | unsigned NewMaskSize = NewMaskVal.getMinSignedBits(); |
43779 | if ((OldMaskSize > 8 && NewMaskSize <= 8) || |
43780 | (OldMaskSize > 32 && NewMaskSize <= 32)) { |
43781 | |
43782 | SDLoc DL(N); |
43783 | SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); |
43784 | SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); |
43785 | return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); |
43786 | } |
43787 | return SDValue(); |
43788 | } |
43789 | |
43790 | static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG, |
43791 | const X86Subtarget &Subtarget) { |
43792 | unsigned Opcode = N->getOpcode(); |
43793 | assert(isHorizOp(Opcode) && "Unexpected hadd/hsub/pack opcode"); |
43794 | |
43795 | SDLoc DL(N); |
43796 | EVT VT = N->getValueType(0); |
43797 | SDValue N0 = N->getOperand(0); |
43798 | SDValue N1 = N->getOperand(1); |
43799 | EVT SrcVT = N0.getValueType(); |
43800 | |
43801 | SDValue BC0 = |
43802 | N->isOnlyUserOf(N0.getNode()) ? peekThroughOneUseBitcasts(N0) : N0; |
43803 | SDValue BC1 = |
43804 | N->isOnlyUserOf(N1.getNode()) ? peekThroughOneUseBitcasts(N1) : N1; |
43805 | |
43806 | |
43807 | |
43808 | |
43809 | |
43810 | |
43811 | if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32 && |
43812 | BC0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
43813 | BC1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
43814 | BC0.getOperand(0) == BC1.getOperand(0) && |
43815 | BC0.getOperand(0).getValueType().is256BitVector() && |
43816 | BC0.getConstantOperandAPInt(1) == 0 && |
43817 | BC1.getConstantOperandAPInt(1) == |
43818 | BC0.getValueType().getVectorNumElements()) { |
43819 | SmallVector<SDValue> ShuffleOps; |
43820 | SmallVector<int> ShuffleMask, ScaledMask; |
43821 | SDValue Vec = peekThroughBitcasts(BC0.getOperand(0)); |
43822 | if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) { |
43823 | resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask); |
43824 | |
43825 | |
43826 | if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 && |
43827 | ShuffleOps[0].getValueType().is256BitVector() && |
43828 | scaleShuffleElements(ShuffleMask, 4, ScaledMask)) { |
43829 | SDValue Lo, Hi; |
43830 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
43831 | std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL); |
43832 | Lo = DAG.getBitcast(SrcVT, Lo); |
43833 | Hi = DAG.getBitcast(SrcVT, Hi); |
43834 | SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi); |
43835 | Res = DAG.getBitcast(ShufVT, Res); |
43836 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask); |
43837 | return DAG.getBitcast(VT, Res); |
43838 | } |
43839 | } |
43840 | } |
43841 | |
43842 | |
43843 | if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) { |
43844 | |
43845 | |
43846 | SmallVector<SDValue> Ops0, Ops1; |
43847 | SmallVector<int> Mask0, Mask1, ScaledMask0, ScaledMask1; |
43848 | bool IsShuf0 = |
43849 | getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) && |
43850 | scaleShuffleElements(Mask0, 2, ScaledMask0) && |
43851 | all_of(Ops0, [](SDValue Op) { return Op.getValueSizeInBits() == 128; }); |
43852 | bool IsShuf1 = |
43853 | getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) && |
43854 | scaleShuffleElements(Mask1, 2, ScaledMask1) && |
43855 | all_of(Ops1, [](SDValue Op) { return Op.getValueSizeInBits() == 128; }); |
43856 | if (IsShuf0 || IsShuf1) { |
43857 | if (!IsShuf0) { |
43858 | Ops0.assign({BC0}); |
43859 | ScaledMask0.assign({0, 1}); |
43860 | } |
43861 | if (!IsShuf1) { |
43862 | Ops1.assign({BC1}); |
43863 | ScaledMask1.assign({0, 1}); |
43864 | } |
43865 | |
43866 | SDValue LHS, RHS; |
43867 | int PostShuffle[4] = {-1, -1, -1, -1}; |
43868 | auto FindShuffleOpAndIdx = [&](int M, int &Idx, ArrayRef<SDValue> Ops) { |
43869 | if (M < 0) |
43870 | return true; |
43871 | Idx = M % 2; |
43872 | SDValue Src = Ops[M / 2]; |
43873 | if (!LHS || LHS == Src) { |
43874 | LHS = Src; |
43875 | return true; |
43876 | } |
43877 | if (!RHS || RHS == Src) { |
43878 | Idx += 2; |
43879 | RHS = Src; |
43880 | return true; |
43881 | } |
43882 | return false; |
43883 | }; |
43884 | if (FindShuffleOpAndIdx(ScaledMask0[0], PostShuffle[0], Ops0) && |
43885 | FindShuffleOpAndIdx(ScaledMask0[1], PostShuffle[1], Ops0) && |
43886 | FindShuffleOpAndIdx(ScaledMask1[0], PostShuffle[2], Ops1) && |
43887 | FindShuffleOpAndIdx(ScaledMask1[1], PostShuffle[3], Ops1)) { |
43888 | LHS = DAG.getBitcast(SrcVT, LHS); |
43889 | RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS); |
43890 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
43891 | SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); |
43892 | Res = DAG.getBitcast(ShufVT, Res); |
43893 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, PostShuffle); |
43894 | return DAG.getBitcast(VT, Res); |
43895 | } |
43896 | } |
43897 | } |
43898 | |
43899 | |
43900 | if (VT.is256BitVector() && Subtarget.hasInt256()) { |
43901 | SmallVector<int> Mask0, Mask1; |
43902 | SmallVector<SDValue> Ops0, Ops1; |
43903 | SmallVector<int, 2> ScaledMask0, ScaledMask1; |
43904 | if (getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) && |
43905 | getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) && |
43906 | !Ops0.empty() && !Ops1.empty() && |
43907 | all_of(Ops0, |
43908 | [](SDValue Op) { return Op.getValueType().is256BitVector(); }) && |
43909 | all_of(Ops1, |
43910 | [](SDValue Op) { return Op.getValueType().is256BitVector(); }) && |
43911 | scaleShuffleElements(Mask0, 2, ScaledMask0) && |
43912 | scaleShuffleElements(Mask1, 2, ScaledMask1)) { |
43913 | SDValue Op00 = peekThroughBitcasts(Ops0.front()); |
43914 | SDValue Op10 = peekThroughBitcasts(Ops1.front()); |
43915 | SDValue Op01 = peekThroughBitcasts(Ops0.back()); |
43916 | SDValue Op11 = peekThroughBitcasts(Ops1.back()); |
43917 | if ((Op00 == Op11) && (Op01 == Op10)) { |
43918 | std::swap(Op10, Op11); |
43919 | ShuffleVectorSDNode::commuteMask(ScaledMask1); |
43920 | } |
43921 | if ((Op00 == Op10) && (Op01 == Op11)) { |
43922 | const int Map[4] = {0, 2, 1, 3}; |
43923 | SmallVector<int, 4> ShuffleMask( |
43924 | {Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]], |
43925 | Map[ScaledMask1[1]]}); |
43926 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; |
43927 | SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00), |
43928 | DAG.getBitcast(SrcVT, Op01)); |
43929 | Res = DAG.getBitcast(ShufVT, Res); |
43930 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask); |
43931 | return DAG.getBitcast(VT, Res); |
43932 | } |
43933 | } |
43934 | } |
43935 | |
43936 | return SDValue(); |
43937 | } |
43938 | |
43939 | static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, |
43940 | TargetLowering::DAGCombinerInfo &DCI, |
43941 | const X86Subtarget &Subtarget) { |
43942 | unsigned Opcode = N->getOpcode(); |
43943 | assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) && |
43944 | "Unexpected pack opcode"); |
43945 | |
43946 | EVT VT = N->getValueType(0); |
43947 | SDValue N0 = N->getOperand(0); |
43948 | SDValue N1 = N->getOperand(1); |
43949 | unsigned NumDstElts = VT.getVectorNumElements(); |
43950 | unsigned DstBitsPerElt = VT.getScalarSizeInBits(); |
43951 | unsigned SrcBitsPerElt = 2 * DstBitsPerElt; |
43952 | assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt && |
43953 | N1.getScalarValueSizeInBits() == SrcBitsPerElt && |
43954 | "Unexpected PACKSS/PACKUS input type"); |
43955 | |
43956 | bool IsSigned = (X86ISD::PACKSS == Opcode); |
43957 | |
43958 | |
43959 | APInt UndefElts0, UndefElts1; |
43960 | SmallVector<APInt, 32> EltBits0, EltBits1; |
43961 | if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) && |
43962 | (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) && |
43963 | getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) && |
43964 | getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) { |
43965 | unsigned NumLanes = VT.getSizeInBits() / 128; |
43966 | unsigned NumSrcElts = NumDstElts / 2; |
43967 | unsigned NumDstEltsPerLane = NumDstElts / NumLanes; |
43968 | unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; |
43969 | |
43970 | APInt Undefs(NumDstElts, 0); |
43971 | SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); |
43972 | for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { |
43973 | for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) { |
43974 | unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane; |
43975 | auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0); |
43976 | auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0); |
43977 | |
43978 | if (UndefElts[SrcIdx]) { |
43979 | Undefs.setBit(Lane * NumDstEltsPerLane + Elt); |
43980 | continue; |
43981 | } |
43982 | |
43983 | APInt &Val = EltBits[SrcIdx]; |
43984 | if (IsSigned) { |
43985 | |
43986 | |
43987 | |
43988 | if (Val.isSignedIntN(DstBitsPerElt)) |
43989 | Val = Val.trunc(DstBitsPerElt); |
43990 | else if (Val.isNegative()) |
43991 | Val = APInt::getSignedMinValue(DstBitsPerElt); |
43992 | else |
43993 | Val = APInt::getSignedMaxValue(DstBitsPerElt); |
43994 | } else { |
43995 | |
43996 | |
43997 | |
43998 | if (Val.isIntN(DstBitsPerElt)) |
43999 | Val = Val.trunc(DstBitsPerElt); |
44000 | else if (Val.isNegative()) |
44001 | Val = APInt::getNullValue(DstBitsPerElt); |
44002 | else |
44003 | Val = APInt::getAllOnesValue(DstBitsPerElt); |
44004 | } |
44005 | Bits[Lane * NumDstEltsPerLane + Elt] = Val; |
44006 | } |
44007 | } |
44008 | |
44009 | return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); |
44010 | } |
44011 | |
44012 | |
44013 | if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget)) |
44014 | return V; |
44015 | |
44016 | |
44017 | |
44018 | if (Subtarget.hasAVX512() && |
44019 | N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && |
44020 | N0.getOperand(0).getValueType() == MVT::v8i32) { |
44021 | if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) || |
44022 | (!IsSigned && |
44023 | DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) { |
44024 | if (Subtarget.hasVLX()) |
44025 | return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); |
44026 | |
44027 | |
44028 | SDLoc dl(N); |
44029 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32, |
44030 | N0.getOperand(0), DAG.getUNDEF(MVT::v8i32)); |
44031 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat); |
44032 | } |
44033 | } |
44034 | |
44035 | |
44036 | if (VT.is128BitVector()) { |
44037 | unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
44038 | SDValue Src0, Src1; |
44039 | if (N0.getOpcode() == ExtOpc && |
44040 | N0.getOperand(0).getValueType().is64BitVector() && |
44041 | N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) { |
44042 | Src0 = N0.getOperand(0); |
44043 | } |
44044 | if (N1.getOpcode() == ExtOpc && |
44045 | N1.getOperand(0).getValueType().is64BitVector() && |
44046 | N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) { |
44047 | Src1 = N1.getOperand(0); |
44048 | } |
44049 | if ((Src0 || N0.isUndef()) && (Src1 || N1.isUndef())) { |
44050 | assert((Src0 || Src1) && "Found PACK(UNDEF,UNDEF)"); |
44051 | Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType()); |
44052 | Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType()); |
44053 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1); |
44054 | } |
44055 | } |
44056 | |
44057 | |
44058 | SDValue Op(N, 0); |
44059 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44060 | return Res; |
44061 | |
44062 | return SDValue(); |
44063 | } |
44064 | |
44065 | static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, |
44066 | TargetLowering::DAGCombinerInfo &DCI, |
44067 | const X86Subtarget &Subtarget) { |
44068 | assert((X86ISD::HADD == N->getOpcode() || X86ISD::FHADD == N->getOpcode() || |
44069 | X86ISD::HSUB == N->getOpcode() || X86ISD::FHSUB == N->getOpcode()) && |
44070 | "Unexpected horizontal add/sub opcode"); |
44071 | |
44072 | if (!shouldUseHorizontalOp(true, DAG, Subtarget)) { |
44073 | |
44074 | |
44075 | MVT VT = N->getSimpleValueType(0); |
44076 | SDValue LHS = N->getOperand(0); |
44077 | SDValue RHS = N->getOperand(1); |
44078 | if (VT.is128BitVector() && LHS == RHS) { |
44079 | for (SDNode *User : LHS->uses()) { |
44080 | if (User != N && User->getOpcode() == N->getOpcode()) { |
44081 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
44082 | if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) { |
44083 | return DAG.getBitcast( |
44084 | VT, |
44085 | DAG.getVectorShuffle(ShufVT, SDLoc(N), |
44086 | DAG.getBitcast(ShufVT, SDValue(User, 0)), |
44087 | DAG.getUNDEF(ShufVT), {0, 1, 0, 1})); |
44088 | } |
44089 | if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) { |
44090 | return DAG.getBitcast( |
44091 | VT, |
44092 | DAG.getVectorShuffle(ShufVT, SDLoc(N), |
44093 | DAG.getBitcast(ShufVT, SDValue(User, 0)), |
44094 | DAG.getUNDEF(ShufVT), {2, 3, 2, 3})); |
44095 | } |
44096 | } |
44097 | } |
44098 | } |
44099 | |
44100 | |
44101 | if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && |
44102 | LHS.getOpcode() == RHS.getOpcode() && |
44103 | LHS.getValueType() == RHS.getValueType()) { |
44104 | SDValue LHS0 = LHS.getOperand(0); |
44105 | SDValue RHS0 = LHS.getOperand(1); |
44106 | SDValue LHS1 = RHS.getOperand(0); |
44107 | SDValue RHS1 = RHS.getOperand(1); |
44108 | if ((LHS0 == RHS0 || LHS0.isUndef() || RHS0.isUndef()) && |
44109 | (LHS1 == RHS1 || LHS1.isUndef() || RHS1.isUndef())) { |
44110 | SDLoc DL(N); |
44111 | SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(), |
44112 | LHS0.isUndef() ? RHS0 : LHS0, |
44113 | LHS1.isUndef() ? RHS1 : LHS1); |
44114 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
44115 | Res = DAG.getBitcast(ShufVT, Res); |
44116 | SDValue NewLHS = |
44117 | DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res, |
44118 | getV4X86ShuffleImm8ForMask({0, 1, 0, 1}, DL, DAG)); |
44119 | SDValue NewRHS = |
44120 | DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res, |
44121 | getV4X86ShuffleImm8ForMask({2, 3, 2, 3}, DL, DAG)); |
44122 | DAG.ReplaceAllUsesOfValueWith(LHS, DAG.getBitcast(VT, NewLHS)); |
44123 | DAG.ReplaceAllUsesOfValueWith(RHS, DAG.getBitcast(VT, NewRHS)); |
44124 | return SDValue(N, 0); |
44125 | } |
44126 | } |
44127 | } |
44128 | |
44129 | |
44130 | if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget)) |
44131 | return V; |
44132 | |
44133 | return SDValue(); |
44134 | } |
44135 | |
44136 | static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, |
44137 | TargetLowering::DAGCombinerInfo &DCI, |
44138 | const X86Subtarget &Subtarget) { |
44139 | assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() || |
44140 | X86ISD::VSRL == N->getOpcode()) && |
44141 | "Unexpected shift opcode"); |
44142 | EVT VT = N->getValueType(0); |
44143 | SDValue N0 = N->getOperand(0); |
44144 | SDValue N1 = N->getOperand(1); |
44145 | |
44146 | |
44147 | if (ISD::isBuildVectorAllZeros(N0.getNode())) |
44148 | return DAG.getConstant(0, SDLoc(N), VT); |
44149 | |
44150 | |
44151 | APInt UndefElts; |
44152 | SmallVector<APInt, 32> EltBits; |
44153 | if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) { |
44154 | unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false); |
44155 | return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0, |
44156 | EltBits[0].getZExtValue(), DAG); |
44157 | } |
44158 | |
44159 | APInt KnownUndef, KnownZero; |
44160 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44161 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
44162 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
44163 | KnownZero, DCI)) |
44164 | return SDValue(N, 0); |
44165 | |
44166 | return SDValue(); |
44167 | } |
44168 | |
44169 | static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, |
44170 | TargetLowering::DAGCombinerInfo &DCI, |
44171 | const X86Subtarget &Subtarget) { |
44172 | unsigned Opcode = N->getOpcode(); |
44173 | assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode || |
44174 | X86ISD::VSRLI == Opcode) && |
44175 | "Unexpected shift opcode"); |
44176 | bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode; |
44177 | EVT VT = N->getValueType(0); |
44178 | SDValue N0 = N->getOperand(0); |
44179 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
44180 | assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 && |
44181 | "Unexpected value type"); |
44182 | assert(N->getOperand(1).getValueType() == MVT::i8 && |
44183 | "Unexpected shift amount type"); |
44184 | |
44185 | |
44186 | if (N0.isUndef()) |
44187 | return DAG.getConstant(0, SDLoc(N), VT); |
44188 | |
44189 | |
44190 | |
44191 | unsigned ShiftVal = N->getConstantOperandVal(1); |
44192 | if (ShiftVal >= NumBitsPerElt) { |
44193 | if (LogicalShift) |
44194 | return DAG.getConstant(0, SDLoc(N), VT); |
44195 | ShiftVal = NumBitsPerElt - 1; |
44196 | } |
44197 | |
44198 | |
44199 | if (!ShiftVal) |
44200 | return N0; |
44201 | |
44202 | |
44203 | if (ISD::isBuildVectorAllZeros(N0.getNode())) |
44204 | |
44205 | |
44206 | return DAG.getConstant(0, SDLoc(N), VT); |
44207 | |
44208 | |
44209 | if (!LogicalShift && ISD::isBuildVectorAllOnes(N0.getNode())) |
44210 | |
44211 | |
44212 | return DAG.getConstant(-1, SDLoc(N), VT); |
44213 | |
44214 | |
44215 | if (Opcode == N0.getOpcode()) { |
44216 | unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); |
44217 | unsigned NewShiftVal = ShiftVal + ShiftVal2; |
44218 | if (NewShiftVal >= NumBitsPerElt) { |
44219 | |
44220 | |
44221 | if (LogicalShift) |
44222 | return DAG.getConstant(0, SDLoc(N), VT); |
44223 | NewShiftVal = NumBitsPerElt - 1; |
44224 | } |
44225 | return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0), |
44226 | DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); |
44227 | } |
44228 | |
44229 | |
44230 | if (LogicalShift && (ShiftVal % 8) == 0) { |
44231 | SDValue Op(N, 0); |
44232 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44233 | return Res; |
44234 | } |
44235 | |
44236 | |
44237 | APInt UndefElts; |
44238 | SmallVector<APInt, 32> EltBits; |
44239 | if (N->isOnlyUserOf(N0.getNode()) && |
44240 | getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { |
44241 | assert(EltBits.size() == VT.getVectorNumElements() && |
44242 | "Unexpected shift value type"); |
44243 | |
44244 | |
44245 | |
44246 | for (unsigned i = 0, e = EltBits.size(); i != e; ++i) { |
44247 | APInt &Elt = EltBits[i]; |
44248 | if (UndefElts[i]) |
44249 | Elt = 0; |
44250 | else if (X86ISD::VSHLI == Opcode) |
44251 | Elt <<= ShiftVal; |
44252 | else if (X86ISD::VSRAI == Opcode) |
44253 | Elt.ashrInPlace(ShiftVal); |
44254 | else |
44255 | Elt.lshrInPlace(ShiftVal); |
44256 | } |
44257 | |
44258 | UndefElts = 0; |
44259 | return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); |
44260 | } |
44261 | |
44262 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44263 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
44264 | APInt::getAllOnesValue(NumBitsPerElt), DCI)) |
44265 | return SDValue(N, 0); |
44266 | |
44267 | return SDValue(); |
44268 | } |
44269 | |
44270 | static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, |
44271 | TargetLowering::DAGCombinerInfo &DCI, |
44272 | const X86Subtarget &Subtarget) { |
44273 | EVT VT = N->getValueType(0); |
44274 | assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) || |
44275 | (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) || |
44276 | N->getOpcode() == ISD::INSERT_VECTOR_ELT) && |
44277 | "Unexpected vector insertion"); |
44278 | |
44279 | if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) { |
44280 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
44281 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44282 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
44283 | APInt::getAllOnesValue(NumBitsPerElt), DCI)) |
44284 | return SDValue(N, 0); |
44285 | } |
44286 | |
44287 | |
44288 | if (VT.isSimple() && DCI.isAfterLegalizeDAG()) { |
44289 | SDValue Op(N, 0); |
44290 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44291 | return Res; |
44292 | } |
44293 | |
44294 | return SDValue(); |
44295 | } |
44296 | |
44297 | |
44298 | |
44299 | |
44300 | static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, |
44301 | TargetLowering::DAGCombinerInfo &DCI, |
44302 | const X86Subtarget &Subtarget) { |
44303 | unsigned opcode; |
44304 | |
44305 | |
44306 | |
44307 | if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { |
44308 | SDValue N0 = N->getOperand(0); |
44309 | SDValue N1 = N->getOperand(1); |
44310 | SDValue CMP0 = N0.getOperand(1); |
44311 | SDValue CMP1 = N1.getOperand(1); |
44312 | SDLoc DL(N); |
44313 | |
44314 | |
44315 | if (CMP0.getOpcode() != X86ISD::FCMP || CMP0 != CMP1) |
44316 | return SDValue(); |
44317 | |
44318 | SDValue CMP00 = CMP0->getOperand(0); |
44319 | SDValue CMP01 = CMP0->getOperand(1); |
44320 | EVT VT = CMP00.getValueType(); |
44321 | |
44322 | if (VT == MVT::f32 || VT == MVT::f64) { |
44323 | bool ExpectingFlags = false; |
44324 | |
44325 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); |
44326 | !ExpectingFlags && UI != UE; ++UI) |
44327 | switch (UI->getOpcode()) { |
44328 | default: |
44329 | case ISD::BR_CC: |
44330 | case ISD::BRCOND: |
44331 | case ISD::SELECT: |
44332 | ExpectingFlags = true; |
44333 | break; |
44334 | case ISD::CopyToReg: |
44335 | case ISD::SIGN_EXTEND: |
44336 | case ISD::ZERO_EXTEND: |
44337 | case ISD::ANY_EXTEND: |
44338 | break; |
44339 | } |
44340 | |
44341 | if (!ExpectingFlags) { |
44342 | enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); |
44343 | enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); |
44344 | |
44345 | if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { |
44346 | X86::CondCode tmp = cc0; |
44347 | cc0 = cc1; |
44348 | cc1 = tmp; |
44349 | } |
44350 | |
44351 | if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || |
44352 | (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { |
44353 | |
44354 | |
44355 | unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; |
44356 | if (Subtarget.hasAVX512()) { |
44357 | SDValue FSetCC = |
44358 | DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, |
44359 | DAG.getTargetConstant(x86cc, DL, MVT::i8)); |
44360 | |
44361 | |
44362 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, |
44363 | DAG.getConstant(0, DL, MVT::v16i1), |
44364 | FSetCC, DAG.getIntPtrConstant(0, DL)); |
44365 | return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, |
44366 | N->getSimpleValueType(0)); |
44367 | } |
44368 | SDValue OnesOrZeroesF = |
44369 | DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, |
44370 | CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); |
44371 | |
44372 | bool is64BitFP = (CMP00.getValueType() == MVT::f64); |
44373 | MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; |
44374 | |
44375 | if (is64BitFP && !Subtarget.is64Bit()) { |
44376 | |
44377 | |
44378 | |
44379 | |
44380 | |
44381 | SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, |
44382 | OnesOrZeroesF); |
44383 | SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); |
44384 | OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, |
44385 | Vector32, DAG.getIntPtrConstant(0, DL)); |
44386 | IntVT = MVT::i32; |
44387 | } |
44388 | |
44389 | SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); |
44390 | SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, |
44391 | DAG.getConstant(1, DL, IntVT)); |
44392 | SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
44393 | ANDed); |
44394 | return OneBitOfTruth; |
44395 | } |
44396 | } |
44397 | } |
44398 | } |
44399 | return SDValue(); |
44400 | } |
44401 | |
44402 | |
44403 | static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { |
44404 | assert(N->getOpcode() == ISD::AND); |
44405 | |
44406 | MVT VT = N->getSimpleValueType(0); |
44407 | if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) |
44408 | return SDValue(); |
44409 | |
44410 | SDValue X, Y; |
44411 | SDValue N0 = N->getOperand(0); |
44412 | SDValue N1 = N->getOperand(1); |
44413 | |
44414 | auto GetNot = [&VT, &DAG](SDValue V) { |
44415 | |
44416 | if (SDValue Not = IsNOT(V, DAG)) |
44417 | return Not; |
44418 | |
44419 | if (V.getOpcode() == X86ISD::VBROADCAST) { |
44420 | SDValue Src = V.getOperand(0); |
44421 | EVT SrcVT = Src.getValueType(); |
44422 | if (!SrcVT.isVector()) |
44423 | return SDValue(); |
44424 | if (SDValue Not = IsNOT(Src, DAG)) |
44425 | return DAG.getNode(X86ISD::VBROADCAST, SDLoc(V), VT, |
44426 | DAG.getBitcast(SrcVT, Not)); |
44427 | } |
44428 | return SDValue(); |
44429 | }; |
44430 | |
44431 | if (SDValue Not = GetNot(N0)) { |
44432 | X = Not; |
44433 | Y = N1; |
44434 | } else if (SDValue Not = GetNot(N1)) { |
44435 | X = Not; |
44436 | Y = N0; |
44437 | } else |
44438 | return SDValue(); |
44439 | |
44440 | X = DAG.getBitcast(VT, X); |
44441 | Y = DAG.getBitcast(VT, Y); |
44442 | return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); |
44443 | } |
44444 | |
44445 | |
44446 | |
44447 | |
44448 | |
44449 | |
44450 | |
44451 | |
44452 | |
44453 | |
44454 | static SDValue PromoteMaskArithmetic(SDNode *N, EVT VT, SelectionDAG &DAG, |
44455 | unsigned Depth) { |
44456 | |
44457 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
44458 | return SDValue(); |
44459 | |
44460 | if (N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND && |
44461 | N->getOpcode() != ISD::OR) |
44462 | return SDValue(); |
44463 | |
44464 | SDValue N0 = N->getOperand(0); |
44465 | SDValue N1 = N->getOperand(1); |
44466 | SDLoc DL(N); |
44467 | |
44468 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44469 | if (!TLI.isOperationLegalOrPromote(N->getOpcode(), VT)) |
44470 | return SDValue(); |
44471 | |
44472 | if (SDValue NN0 = PromoteMaskArithmetic(N0.getNode(), VT, DAG, Depth + 1)) |
44473 | N0 = NN0; |
44474 | else { |
44475 | |
44476 | if (N0.getOpcode() != ISD::TRUNCATE) |
44477 | return SDValue(); |
44478 | |
44479 | |
44480 | if (N0.getOperand(0).getValueType() != VT) |
44481 | return SDValue(); |
44482 | |
44483 | N0 = N0.getOperand(0); |
44484 | } |
44485 | |
44486 | if (SDValue NN1 = PromoteMaskArithmetic(N1.getNode(), VT, DAG, Depth + 1)) |
44487 | N1 = NN1; |
44488 | else { |
44489 | |
44490 | bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && |
44491 | N1.getOperand(0).getValueType() == VT; |
44492 | if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) |
44493 | return SDValue(); |
44494 | |
44495 | if (RHSTrunc) |
44496 | N1 = N1.getOperand(0); |
44497 | else |
44498 | N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); |
44499 | } |
44500 | |
44501 | return DAG.getNode(N->getOpcode(), DL, VT, N0, N1); |
44502 | } |
44503 | |
44504 | |
44505 | |
44506 | |
44507 | |
44508 | |
44509 | |
44510 | static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG, |
44511 | const X86Subtarget &Subtarget) { |
44512 | EVT VT = N->getValueType(0); |
44513 | assert(VT.isVector() && "Expected vector type"); |
44514 | |
44515 | SDLoc DL(N); |
44516 | assert((N->getOpcode() == ISD::ANY_EXTEND || |
44517 | N->getOpcode() == ISD::ZERO_EXTEND || |
44518 | N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); |
44519 | |
44520 | SDValue Narrow = N->getOperand(0); |
44521 | EVT NarrowVT = Narrow.getValueType(); |
44522 | |
44523 | |
44524 | SDValue Op = PromoteMaskArithmetic(Narrow.getNode(), VT, DAG, 0); |
44525 | if (!Op) |
44526 | return SDValue(); |
44527 | switch (N->getOpcode()) { |
44528 | default: llvm_unreachable("Unexpected opcode"); |
44529 | case ISD::ANY_EXTEND: |
44530 | return Op; |
44531 | case ISD::ZERO_EXTEND: |
44532 | return DAG.getZeroExtendInReg(Op, DL, NarrowVT); |
44533 | case ISD::SIGN_EXTEND: |
44534 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, |
44535 | Op, DAG.getValueType(NarrowVT)); |
44536 | } |
44537 | } |
44538 | |
44539 | static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) { |
44540 | unsigned FPOpcode; |
44541 | switch (Opcode) { |
44542 | default: llvm_unreachable("Unexpected input node for FP logic conversion"); |
44543 | case ISD::AND: FPOpcode = X86ISD::FAND; break; |
44544 | case ISD::OR: FPOpcode = X86ISD::FOR; break; |
44545 | case ISD::XOR: FPOpcode = X86ISD::FXOR; break; |
44546 | } |
44547 | return FPOpcode; |
44548 | } |
44549 | |
44550 | |
44551 | |
44552 | |
44553 | static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, |
44554 | const X86Subtarget &Subtarget) { |
44555 | EVT VT = N->getValueType(0); |
44556 | SDValue N0 = N->getOperand(0); |
44557 | SDValue N1 = N->getOperand(1); |
44558 | SDLoc DL(N); |
44559 | |
44560 | if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST) |
44561 | return SDValue(); |
44562 | |
44563 | SDValue N00 = N0.getOperand(0); |
44564 | SDValue N10 = N1.getOperand(0); |
44565 | EVT N00Type = N00.getValueType(); |
44566 | EVT N10Type = N10.getValueType(); |
44567 | |
44568 | |
44569 | if (N00Type != N10Type || |
44570 | !((Subtarget.hasSSE1() && N00Type == MVT::f32) || |
44571 | (Subtarget.hasSSE2() && N00Type == MVT::f64))) |
44572 | return SDValue(); |
44573 | |
44574 | unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode()); |
44575 | SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); |
44576 | return DAG.getBitcast(VT, FPLogic); |
44577 | } |
44578 | |
44579 | |
44580 | |
44581 | static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) { |
44582 | unsigned Opc = N->getOpcode(); |
44583 | assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) && |
44584 | "Unexpected bit opcode"); |
44585 | |
44586 | SDValue N0 = N->getOperand(0); |
44587 | SDValue N1 = N->getOperand(1); |
44588 | |
44589 | |
44590 | if (N0.getOpcode() != X86ISD::MOVMSK || !N0.hasOneUse() || |
44591 | N1.getOpcode() != X86ISD::MOVMSK || !N1.hasOneUse()) |
44592 | return SDValue(); |
44593 | |
44594 | SDValue Vec0 = N0.getOperand(0); |
44595 | SDValue Vec1 = N1.getOperand(0); |
44596 | EVT VecVT0 = Vec0.getValueType(); |
44597 | EVT VecVT1 = Vec1.getValueType(); |
44598 | |
44599 | |
44600 | |
44601 | if (VecVT0.getSizeInBits() != VecVT1.getSizeInBits() || |
44602 | VecVT0.getScalarSizeInBits() != VecVT1.getScalarSizeInBits()) |
44603 | return SDValue(); |
44604 | |
44605 | SDLoc DL(N); |
44606 | unsigned VecOpc = |
44607 | VecVT0.isFloatingPoint() ? convertIntLogicToFPLogicOpcode(Opc) : Opc; |
44608 | SDValue Result = |
44609 | DAG.getNode(VecOpc, DL, VecVT0, Vec0, DAG.getBitcast(VecVT0, Vec1)); |
44610 | return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
44611 | } |
44612 | |
44613 | |
44614 | |
44615 | |
44616 | static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, |
44617 | const X86Subtarget &Subtarget) { |
44618 | SDValue Op0 = peekThroughBitcasts(N->getOperand(0)); |
44619 | SDValue Op1 = peekThroughBitcasts(N->getOperand(1)); |
44620 | EVT VT0 = Op0.getValueType(); |
44621 | EVT VT1 = Op1.getValueType(); |
44622 | |
44623 | if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger()) |
44624 | return SDValue(); |
44625 | |
44626 | APInt SplatVal; |
44627 | if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || |
44628 | !SplatVal.isMask()) |
44629 | return SDValue(); |
44630 | |
44631 | |
44632 | if (isBitwiseNot(Op0)) |
44633 | return SDValue(); |
44634 | |
44635 | if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL)) |
44636 | return SDValue(); |
44637 | |
44638 | unsigned EltBitWidth = VT0.getScalarSizeInBits(); |
44639 | if (EltBitWidth != DAG.ComputeNumSignBits(Op0)) |
44640 | return SDValue(); |
44641 | |
44642 | SDLoc DL(N); |
44643 | unsigned ShiftVal = SplatVal.countTrailingOnes(); |
44644 | SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); |
44645 | SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); |
44646 | return DAG.getBitcast(N->getValueType(0), Shift); |
44647 | } |
44648 | |
44649 | |
44650 | |
44651 | static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) { |
44652 | if (Ld->isIndexed()) |
44653 | return SDValue(); |
44654 | |
44655 | SDValue Base = Ld->getBasePtr(); |
44656 | |
44657 | if (Base.getOpcode() != ISD::ADD) |
44658 | return SDValue(); |
44659 | |
44660 | SDValue ShiftedIndex = Base.getOperand(0); |
44661 | |
44662 | if (ShiftedIndex.getOpcode() != ISD::SHL) |
44663 | return SDValue(); |
44664 | |
44665 | return ShiftedIndex.getOperand(0); |
44666 | |
44667 | } |
44668 | |
44669 | static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { |
44670 | if (Subtarget.hasBMI2() && VT.isScalarInteger()) { |
44671 | switch (VT.getSizeInBits()) { |
44672 | default: return false; |
44673 | case 64: return Subtarget.is64Bit() ? true : false; |
44674 | case 32: return true; |
44675 | } |
44676 | } |
44677 | return false; |
44678 | } |
44679 | |
44680 | |
44681 | |
44682 | |
44683 | |
44684 | |
44685 | |
44686 | |
44687 | |
44688 | |
44689 | |
44690 | static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, |
44691 | const X86Subtarget &Subtarget) { |
44692 | MVT VT = Node->getSimpleValueType(0); |
44693 | SDLoc dl(Node); |
44694 | |
44695 | |
44696 | if (!hasBZHI(Subtarget, VT)) |
44697 | return SDValue(); |
44698 | |
44699 | |
44700 | for (unsigned i = 0; i < 2; i++) { |
44701 | SDValue N = Node->getOperand(i); |
44702 | LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode()); |
44703 | |
44704 | |
44705 | if (!Ld) |
44706 | return SDValue(); |
44707 | |
44708 | const Value *MemOp = Ld->getMemOperand()->getValue(); |
44709 | |
44710 | if (!MemOp) |
44711 | return SDValue(); |
44712 | |
44713 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) { |
44714 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) { |
44715 | if (GV->isConstant() && GV->hasDefinitiveInitializer()) { |
44716 | |
44717 | Constant *Init = GV->getInitializer(); |
44718 | Type *Ty = Init->getType(); |
44719 | if (!isa<ConstantDataArray>(Init) || |
44720 | !Ty->getArrayElementType()->isIntegerTy() || |
44721 | Ty->getArrayElementType()->getScalarSizeInBits() != |
44722 | VT.getSizeInBits() || |
44723 | Ty->getArrayNumElements() > |
44724 | Ty->getArrayElementType()->getScalarSizeInBits()) |
44725 | continue; |
44726 | |
44727 | |
44728 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); |
44729 | bool ConstantsMatch = true; |
44730 | for (uint64_t j = 0; j < ArrayElementCount; j++) { |
44731 | auto *Elem = cast<ConstantInt>(Init->getAggregateElement(j)); |
44732 | if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) { |
44733 | ConstantsMatch = false; |
44734 | break; |
44735 | } |
44736 | } |
44737 | if (!ConstantsMatch) |
44738 | continue; |
44739 | |
44740 | |
44741 | |
44742 | |
44743 | |
44744 | SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); |
44745 | SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32); |
44746 | |
44747 | |
44748 | SDValue Index = getIndexFromUnindexedLoad(Ld); |
44749 | if (!Index) |
44750 | return SDValue(); |
44751 | Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32); |
44752 | |
44753 | SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index); |
44754 | Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub); |
44755 | |
44756 | SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); |
44757 | SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); |
44758 | |
44759 | return DAG.getNode(ISD::AND, dl, VT, Inp, LShr); |
44760 | } |
44761 | } |
44762 | } |
44763 | } |
44764 | return SDValue(); |
44765 | } |
44766 | |
44767 | |
44768 | |
44769 | |
44770 | |
44771 | |
44772 | static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG, |
44773 | const X86Subtarget &Subtarget) { |
44774 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); |
44775 | |
44776 | EVT VT = N->getValueType(0); |
44777 | |
44778 | |
44779 | |
44780 | if (!isa<ConstantSDNode>(N->getOperand(1))) |
44781 | return SDValue(); |
44782 | |
44783 | |
44784 | assert(!VT.isVector() && "Expected scalar VT!"); |
44785 | |
44786 | if (N->getOperand(0).getOpcode() != ISD::BITCAST || |
44787 | !N->getOperand(0).hasOneUse() || |
44788 | !N->getOperand(0).getOperand(0).hasOneUse()) |
44789 | return SDValue(); |
44790 | |
44791 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44792 | SDValue Src = N->getOperand(0).getOperand(0); |
44793 | EVT SrcVT = Src.getValueType(); |
44794 | if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::i1 || |
44795 | !TLI.isTypeLegal(SrcVT)) |
44796 | return SDValue(); |
44797 | |
44798 | if (Src.getOpcode() != ISD::CONCAT_VECTORS) |
44799 | return SDValue(); |
44800 | |
44801 | |
44802 | |
44803 | SDValue SubVec = Src.getOperand(0); |
44804 | EVT SubVecVT = SubVec.getValueType(); |
44805 | |
44806 | |
44807 | |
44808 | if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) || |
44809 | !N->getConstantOperandAPInt(1).isMask(SubVecVT.getVectorNumElements())) |
44810 | return SDValue(); |
44811 | |
44812 | EVT SetccVT = SubVec.getOperand(0).getValueType(); |
44813 | if (!TLI.isTypeLegal(SetccVT) || |
44814 | !(Subtarget.hasVLX() || SetccVT.is512BitVector())) |
44815 | return SDValue(); |
44816 | |
44817 | if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32)) |
44818 | return SDValue(); |
44819 | |
44820 | |
44821 | |
44822 | SDLoc dl(N); |
44823 | SmallVector<SDValue, 4> Ops(Src.getNumOperands(), |
44824 | DAG.getConstant(0, dl, SubVecVT)); |
44825 | Ops[0] = SubVec; |
44826 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, |
44827 | Ops); |
44828 | return DAG.getBitcast(VT, Concat); |
44829 | } |
44830 | |
44831 | static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, |
44832 | TargetLowering::DAGCombinerInfo &DCI, |
44833 | const X86Subtarget &Subtarget) { |
44834 | EVT VT = N->getValueType(0); |
44835 | |
44836 | |
44837 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
44838 | return DAG.getBitcast( |
44839 | MVT::v4i32, DAG.getNode(X86ISD::FAND, SDLoc(N), MVT::v4f32, |
44840 | DAG.getBitcast(MVT::v4f32, N->getOperand(0)), |
44841 | DAG.getBitcast(MVT::v4f32, N->getOperand(1)))); |
44842 | } |
44843 | |
44844 | |
44845 | if (VT == MVT::i64 && Subtarget.is64Bit() && |
44846 | !isa<ConstantSDNode>(N->getOperand(1))) { |
44847 | APInt HiMask = APInt::getHighBitsSet(64, 32); |
44848 | if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) || |
44849 | DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) { |
44850 | SDLoc dl(N); |
44851 | SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0)); |
44852 | SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1)); |
44853 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, |
44854 | DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS)); |
44855 | } |
44856 | } |
44857 | |
44858 | |
44859 | |
44860 | if (VT == MVT::i1) { |
44861 | SmallVector<SDValue, 2> SrcOps; |
44862 | SmallVector<APInt, 2> SrcPartials; |
44863 | if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) && |
44864 | SrcOps.size() == 1) { |
44865 | SDLoc dl(N); |
44866 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
44867 | unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); |
44868 | EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
44869 | SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); |
44870 | if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType())) |
44871 | Mask = DAG.getBitcast(MaskVT, SrcOps[0]); |
44872 | if (Mask) { |
44873 | assert(SrcPartials[0].getBitWidth() == NumElts && |
44874 | "Unexpected partial reduction mask"); |
44875 | SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT); |
44876 | Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits); |
44877 | return DAG.getSetCC(dl, MVT::i1, Mask, PartialBits, ISD::SETEQ); |
44878 | } |
44879 | } |
44880 | } |
44881 | |
44882 | if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget)) |
44883 | return V; |
44884 | |
44885 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
44886 | return R; |
44887 | |
44888 | if (DCI.isBeforeLegalizeOps()) |
44889 | return SDValue(); |
44890 | |
44891 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) |
44892 | return R; |
44893 | |
44894 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
44895 | return FPLogic; |
44896 | |
44897 | if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG)) |
44898 | return R; |
44899 | |
44900 | if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget)) |
44901 | return ShiftRight; |
44902 | |
44903 | if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) |
44904 | return R; |
44905 | |
44906 | |
44907 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
44908 | SDValue Op(N, 0); |
44909 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
44910 | return Res; |
44911 | } |
44912 | |
44913 | |
44914 | if ((VT.getScalarSizeInBits() % 8) == 0 && |
44915 | N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
44916 | isa<ConstantSDNode>(N->getOperand(0).getOperand(1))) { |
44917 | SDValue BitMask = N->getOperand(1); |
44918 | SDValue SrcVec = N->getOperand(0).getOperand(0); |
44919 | EVT SrcVecVT = SrcVec.getValueType(); |
44920 | |
44921 | |
44922 | APInt UndefElts; |
44923 | SmallVector<APInt, 64> EltBits; |
44924 | if (VT == SrcVecVT.getScalarType() && |
44925 | N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && |
44926 | getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && |
44927 | llvm::all_of(EltBits, [](const APInt &M) { |
44928 | return M.isNullValue() || M.isAllOnesValue(); |
44929 | })) { |
44930 | unsigned NumElts = SrcVecVT.getVectorNumElements(); |
44931 | unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8; |
44932 | unsigned Idx = N->getOperand(0).getConstantOperandVal(1); |
44933 | |
44934 | |
44935 | SmallVector<int, 16> ShuffleMask(NumElts * Scale, SM_SentinelUndef); |
44936 | for (unsigned i = 0; i != Scale; ++i) { |
44937 | if (UndefElts[i]) |
44938 | continue; |
44939 | int VecIdx = Scale * Idx + i; |
44940 | ShuffleMask[VecIdx] = |
44941 | EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx; |
44942 | } |
44943 | |
44944 | if (SDValue Shuffle = combineX86ShufflesRecursively( |
44945 | {SrcVec}, 0, SrcVec, ShuffleMask, {}, 1, |
44946 | X86::MaxShuffleCombineDepth, |
44947 | false, true, |
44948 | true, DAG, Subtarget)) |
44949 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, |
44950 | N->getOperand(0).getOperand(1)); |
44951 | } |
44952 | } |
44953 | |
44954 | return SDValue(); |
44955 | } |
44956 | |
44957 | |
44958 | static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, |
44959 | const X86Subtarget &Subtarget) { |
44960 | assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); |
44961 | |
44962 | MVT VT = N->getSimpleValueType(0); |
44963 | if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) |
44964 | return SDValue(); |
44965 | |
44966 | SDValue N0 = peekThroughBitcasts(N->getOperand(0)); |
44967 | SDValue N1 = peekThroughBitcasts(N->getOperand(1)); |
44968 | if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) |
44969 | return SDValue(); |
44970 | |
44971 | |
44972 | |
44973 | bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) || |
44974 | Subtarget.hasVLX(); |
44975 | if (!(Subtarget.hasXOP() || UseVPTERNLOG || |
44976 | !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse())) |
44977 | return SDValue(); |
44978 | |
44979 | |
44980 | APInt UndefElts0, UndefElts1; |
44981 | SmallVector<APInt, 32> EltBits0, EltBits1; |
44982 | if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0, |
44983 | false, false)) |
44984 | return SDValue(); |
44985 | if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1, |
44986 | false, false)) |
44987 | return SDValue(); |
44988 | |
44989 | for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) { |
44990 | |
44991 | if (UndefElts0[i] || UndefElts1[i]) |
44992 | return SDValue(); |
44993 | if (EltBits0[i] != ~EltBits1[i]) |
44994 | return SDValue(); |
44995 | } |
44996 | |
44997 | SDLoc DL(N); |
44998 | |
44999 | if (UseVPTERNLOG) { |
45000 | |
45001 | SDValue A = DAG.getBitcast(VT, N0.getOperand(1)); |
45002 | SDValue B = DAG.getBitcast(VT, N0.getOperand(0)); |
45003 | SDValue C = DAG.getBitcast(VT, N1.getOperand(0)); |
45004 | SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8); |
45005 | return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm); |
45006 | } |
45007 | |
45008 | SDValue X = N->getOperand(0); |
45009 | SDValue Y = |
45010 | DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)), |
45011 | DAG.getBitcast(VT, N1.getOperand(0))); |
45012 | return DAG.getNode(ISD::OR, DL, VT, X, Y); |
45013 | } |
45014 | |
45015 | |
45016 | static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) { |
45017 | if (N->getOpcode() != ISD::OR) |
45018 | return false; |
45019 | |
45020 | SDValue N0 = N->getOperand(0); |
45021 | SDValue N1 = N->getOperand(1); |
45022 | |
45023 | |
45024 | if (N1.getOpcode() == ISD::AND) |
45025 | std::swap(N0, N1); |
45026 | |
45027 | |
45028 | if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP) |
45029 | return false; |
45030 | |
45031 | Mask = N1.getOperand(0); |
45032 | X = N1.getOperand(1); |
45033 | |
45034 | |
45035 | if (N0.getOperand(0) == Mask) |
45036 | Y = N0.getOperand(1); |
45037 | else if (N0.getOperand(1) == Mask) |
45038 | Y = N0.getOperand(0); |
45039 | else |
45040 | return false; |
45041 | |
45042 | |
45043 | |
45044 | return true; |
45045 | } |
45046 | |
45047 | |
45048 | |
45049 | |
45050 | |
45051 | |
45052 | |
45053 | |
45054 | |
45055 | static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, |
45056 | const X86Subtarget &Subtarget) { |
45057 | assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); |
45058 | |
45059 | EVT VT = N->getValueType(0); |
45060 | if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || |
45061 | (VT.is256BitVector() && Subtarget.hasInt256()))) |
45062 | return SDValue(); |
45063 | |
45064 | SDValue X, Y, Mask; |
45065 | if (!matchLogicBlend(N, X, Y, Mask)) |
45066 | return SDValue(); |
45067 | |
45068 | |
45069 | Mask = peekThroughBitcasts(Mask); |
45070 | X = peekThroughBitcasts(X); |
45071 | Y = peekThroughBitcasts(Y); |
45072 | |
45073 | EVT MaskVT = Mask.getValueType(); |
45074 | unsigned EltBits = MaskVT.getScalarSizeInBits(); |
45075 | |
45076 | |
45077 | if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits) |
45078 | return SDValue(); |
45079 | |
45080 | SDLoc DL(N); |
45081 | |
45082 | |
45083 | if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL, |
45084 | DAG, Subtarget)) |
45085 | return Res; |
45086 | |
45087 | |
45088 | if (!Subtarget.hasSSE41()) |
45089 | return SDValue(); |
45090 | |
45091 | |
45092 | if (Subtarget.hasVLX()) |
45093 | return SDValue(); |
45094 | |
45095 | MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8; |
45096 | |
45097 | X = DAG.getBitcast(BlendVT, X); |
45098 | Y = DAG.getBitcast(BlendVT, Y); |
45099 | Mask = DAG.getBitcast(BlendVT, Mask); |
45100 | Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); |
45101 | return DAG.getBitcast(VT, Mask); |
45102 | } |
45103 | |
45104 | |
45105 | |
45106 | |
45107 | |
45108 | |
45109 | |
45110 | static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, |
45111 | SelectionDAG &DAG) { |
45112 | SDValue Cmp = Op.getOperand(1); |
45113 | EVT VT = Cmp.getOperand(0).getValueType(); |
45114 | unsigned Log2b = Log2_32(VT.getSizeInBits()); |
45115 | SDLoc dl(Op); |
45116 | SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); |
45117 | |
45118 | |
45119 | SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); |
45120 | SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, |
45121 | DAG.getConstant(Log2b, dl, MVT::i8)); |
45122 | return DAG.getZExtOrTrunc(Scc, dl, ExtTy); |
45123 | } |
45124 | |
45125 | |
45126 | |
45127 | |
45128 | |
45129 | |
45130 | |
45131 | |
45132 | static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, |
45133 | TargetLowering::DAGCombinerInfo &DCI, |
45134 | const X86Subtarget &Subtarget) { |
45135 | if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) |
45136 | return SDValue(); |
45137 | |
45138 | auto isORCandidate = [](SDValue N) { |
45139 | return (N->getOpcode() == ISD::OR && N->hasOneUse()); |
45140 | }; |
45141 | |
45142 | |
45143 | |
45144 | |
45145 | if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || |
45146 | !isORCandidate(N->getOperand(0))) |
45147 | return SDValue(); |
45148 | |
45149 | |
45150 | auto isSetCCCandidate = [](SDValue N) { |
45151 | return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && |
45152 | X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && |
45153 | N->getOperand(1).getOpcode() == X86ISD::CMP && |
45154 | isNullConstant(N->getOperand(1).getOperand(1)) && |
45155 | N->getOperand(1).getValueType().bitsGE(MVT::i32); |
45156 | }; |
45157 | |
45158 | SDNode *OR = N->getOperand(0).getNode(); |
45159 | SDValue LHS = OR->getOperand(0); |
45160 | SDValue RHS = OR->getOperand(1); |
45161 | |
45162 | |
45163 | SmallVector<SDNode *, 2> ORNodes; |
45164 | while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) || |
45165 | (isORCandidate(RHS) && isSetCCCandidate(LHS)))) { |
45166 | ORNodes.push_back(OR); |
45167 | OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); |
45168 | LHS = OR->getOperand(0); |
45169 | RHS = OR->getOperand(1); |
45170 | } |
45171 | |
45172 | |
45173 | if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) || |
45174 | !isORCandidate(SDValue(OR, 0))) |
45175 | return SDValue(); |
45176 | |
45177 | |
45178 | |
45179 | |
45180 | |
45181 | |
45182 | EVT VT = OR->getValueType(0); |
45183 | SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); |
45184 | SDValue Ret, NewRHS; |
45185 | if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) |
45186 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); |
45187 | |
45188 | if (!Ret) |
45189 | return SDValue(); |
45190 | |
45191 | |
45192 | while (ORNodes.size() > 0) { |
45193 | OR = ORNodes.pop_back_val(); |
45194 | LHS = OR->getOperand(0); |
45195 | RHS = OR->getOperand(1); |
45196 | |
45197 | if (RHS->getOpcode() == ISD::OR) |
45198 | std::swap(LHS, RHS); |
45199 | NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); |
45200 | if (!NewRHS) |
45201 | return SDValue(); |
45202 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); |
45203 | } |
45204 | |
45205 | if (Ret) |
45206 | Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); |
45207 | |
45208 | return Ret; |
45209 | } |
45210 | |
45211 | static SDValue combineOr(SDNode *N, SelectionDAG &DAG, |
45212 | TargetLowering::DAGCombinerInfo &DCI, |
45213 | const X86Subtarget &Subtarget) { |
45214 | SDValue N0 = N->getOperand(0); |
45215 | SDValue N1 = N->getOperand(1); |
45216 | EVT VT = N->getValueType(0); |
45217 | |
45218 | |
45219 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
45220 | return DAG.getBitcast(MVT::v4i32, |
45221 | DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32, |
45222 | DAG.getBitcast(MVT::v4f32, N0), |
45223 | DAG.getBitcast(MVT::v4f32, N1))); |
45224 | } |
45225 | |
45226 | |
45227 | |
45228 | if (VT == MVT::i1) { |
45229 | SmallVector<SDValue, 2> SrcOps; |
45230 | SmallVector<APInt, 2> SrcPartials; |
45231 | if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) && |
45232 | SrcOps.size() == 1) { |
45233 | SDLoc dl(N); |
45234 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45235 | unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); |
45236 | EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
45237 | SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); |
45238 | if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType())) |
45239 | Mask = DAG.getBitcast(MaskVT, SrcOps[0]); |
45240 | if (Mask) { |
45241 | assert(SrcPartials[0].getBitWidth() == NumElts && |
45242 | "Unexpected partial reduction mask"); |
45243 | SDValue ZeroBits = DAG.getConstant(0, dl, MaskVT); |
45244 | SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT); |
45245 | Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits); |
45246 | return DAG.getSetCC(dl, MVT::i1, Mask, ZeroBits, ISD::SETNE); |
45247 | } |
45248 | } |
45249 | } |
45250 | |
45251 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
45252 | return R; |
45253 | |
45254 | if (DCI.isBeforeLegalizeOps()) |
45255 | return SDValue(); |
45256 | |
45257 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) |
45258 | return R; |
45259 | |
45260 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
45261 | return FPLogic; |
45262 | |
45263 | if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget)) |
45264 | return R; |
45265 | |
45266 | if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) |
45267 | return R; |
45268 | |
45269 | |
45270 | |
45271 | |
45272 | |
45273 | if (N0.getOpcode() == X86ISD::KSHIFTL || N1.getOpcode() == X86ISD::KSHIFTL) { |
45274 | unsigned NumElts = VT.getVectorNumElements(); |
45275 | unsigned HalfElts = NumElts / 2; |
45276 | APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts); |
45277 | if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL && |
45278 | N1.getConstantOperandAPInt(1) == HalfElts && |
45279 | DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) { |
45280 | SDLoc dl(N); |
45281 | return DAG.getNode( |
45282 | ISD::CONCAT_VECTORS, dl, VT, |
45283 | extractSubVector(N0, 0, DAG, dl, HalfElts), |
45284 | extractSubVector(N1.getOperand(0), 0, DAG, dl, HalfElts)); |
45285 | } |
45286 | if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL && |
45287 | N0.getConstantOperandAPInt(1) == HalfElts && |
45288 | DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) { |
45289 | SDLoc dl(N); |
45290 | return DAG.getNode( |
45291 | ISD::CONCAT_VECTORS, dl, VT, |
45292 | extractSubVector(N1, 0, DAG, dl, HalfElts), |
45293 | extractSubVector(N0.getOperand(0), 0, DAG, dl, HalfElts)); |
45294 | } |
45295 | } |
45296 | |
45297 | |
45298 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
45299 | SDValue Op(N, 0); |
45300 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
45301 | return Res; |
45302 | } |
45303 | |
45304 | return SDValue(); |
45305 | } |
45306 | |
45307 | |
45308 | |
45309 | |
45310 | |
45311 | static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { |
45312 | |
45313 | EVT ResultType = N->getValueType(0); |
45314 | if (ResultType != MVT::i8 && ResultType != MVT::i1) |
45315 | return SDValue(); |
45316 | |
45317 | SDValue N0 = N->getOperand(0); |
45318 | SDValue N1 = N->getOperand(1); |
45319 | |
45320 | |
45321 | if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse()) |
45322 | return SDValue(); |
45323 | |
45324 | |
45325 | if (!isOneConstant(N1)) |
45326 | return SDValue(); |
45327 | |
45328 | |
45329 | SDValue Shift = N0.getOperand(0); |
45330 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse()) |
45331 | return SDValue(); |
45332 | |
45333 | |
45334 | EVT ShiftTy = Shift.getValueType(); |
45335 | if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64) |
45336 | return SDValue(); |
45337 | |
45338 | |
45339 | if (!isa<ConstantSDNode>(Shift.getOperand(1)) || |
45340 | Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1)) |
45341 | return SDValue(); |
45342 | |
45343 | |
45344 | |
45345 | |
45346 | SDLoc DL(N); |
45347 | SDValue ShiftOp = Shift.getOperand(0); |
45348 | EVT ShiftOpTy = ShiftOp.getValueType(); |
45349 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45350 | EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(), |
45351 | *DAG.getContext(), ResultType); |
45352 | SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp, |
45353 | DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); |
45354 | if (SetCCResultType != ResultType) |
45355 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond); |
45356 | return Cond; |
45357 | } |
45358 | |
45359 | |
45360 | |
45361 | |
45362 | |
45363 | |
45364 | |
45365 | |
45366 | static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, |
45367 | const X86Subtarget &Subtarget) { |
45368 | EVT VT = N->getValueType(0); |
45369 | if (!VT.isSimple()) |
45370 | return SDValue(); |
45371 | |
45372 | switch (VT.getSimpleVT().SimpleTy) { |
45373 | default: return SDValue(); |
45374 | case MVT::v16i8: |
45375 | case MVT::v8i16: |
45376 | case MVT::v4i32: |
45377 | case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break; |
45378 | case MVT::v32i8: |
45379 | case MVT::v16i16: |
45380 | case MVT::v8i32: |
45381 | case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break; |
45382 | } |
45383 | |
45384 | |
45385 | |
45386 | SDValue Shift = N->getOperand(0); |
45387 | SDValue Ones = N->getOperand(1); |
45388 | if (Shift.getOpcode() != ISD::SRA || !Shift.hasOneUse() || |
45389 | !ISD::isBuildVectorAllOnes(Ones.getNode())) |
45390 | return SDValue(); |
45391 | |
45392 | |
45393 | auto *ShiftAmt = |
45394 | isConstOrConstSplat(Shift.getOperand(1), true); |
45395 | if (!ShiftAmt || |
45396 | ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1)) |
45397 | return SDValue(); |
45398 | |
45399 | |
45400 | |
45401 | return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT); |
45402 | } |
45403 | |
45404 | |
45405 | |
45406 | |
45407 | |
45408 | |
45409 | |
45410 | |
45411 | |
45412 | |
45413 | |
45414 | |
45415 | |
45416 | |
45417 | |
45418 | |
45419 | |
45420 | static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, |
45421 | const SDLoc &DL) { |
45422 | EVT InVT = In.getValueType(); |
45423 | |
45424 | |
45425 | assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() && |
45426 | "Unexpected types for truncate operation"); |
45427 | |
45428 | |
45429 | auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue { |
45430 | if (V.getOpcode() == Opcode && |
45431 | ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit)) |
45432 | return V.getOperand(0); |
45433 | return SDValue(); |
45434 | }; |
45435 | |
45436 | APInt C1, C2; |
45437 | if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2)) |
45438 | |
45439 | |
45440 | if (C2.isMask(VT.getScalarSizeInBits())) |
45441 | return UMin; |
45442 | |
45443 | if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2)) |
45444 | if (MatchMinMax(SMin, ISD::SMAX, C1)) |
45445 | if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits())) |
45446 | return SMin; |
45447 | |
45448 | if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1)) |
45449 | if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2)) |
45450 | if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) && |
45451 | C2.uge(C1)) { |
45452 | return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1)); |
45453 | } |
45454 | |
45455 | return SDValue(); |
45456 | } |
45457 | |
45458 | |
45459 | |
45460 | |
45461 | |
45462 | |
45463 | |
45464 | |
45465 | |
45466 | |
45467 | static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { |
45468 | unsigned NumDstBits = VT.getScalarSizeInBits(); |
45469 | unsigned NumSrcBits = In.getScalarValueSizeInBits(); |
45470 | assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); |
45471 | |
45472 | auto MatchMinMax = [](SDValue V, unsigned Opcode, |
45473 | const APInt &Limit) -> SDValue { |
45474 | APInt C; |
45475 | if (V.getOpcode() == Opcode && |
45476 | ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit) |
45477 | return V.getOperand(0); |
45478 | return SDValue(); |
45479 | }; |
45480 | |
45481 | APInt SignedMax, SignedMin; |
45482 | if (MatchPackUS) { |
45483 | SignedMax = APInt::getAllOnesValue(NumDstBits).zext(NumSrcBits); |
45484 | SignedMin = APInt(NumSrcBits, 0); |
45485 | } else { |
45486 | SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); |
45487 | SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); |
45488 | } |
45489 | |
45490 | if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) |
45491 | if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) |
45492 | return SMax; |
45493 | |
45494 | if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) |
45495 | if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) |
45496 | return SMin; |
45497 | |
45498 | return SDValue(); |
45499 | } |
45500 | |
45501 | static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, |
45502 | SelectionDAG &DAG, |
45503 | const X86Subtarget &Subtarget) { |
45504 | if (!Subtarget.hasSSE2() || !VT.isVector()) |
45505 | return SDValue(); |
45506 | |
45507 | EVT SVT = VT.getVectorElementType(); |
45508 | EVT InVT = In.getValueType(); |
45509 | EVT InSVT = InVT.getVectorElementType(); |
45510 | |
45511 | |
45512 | |
45513 | |
45514 | |
45515 | if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
45516 | InVT == MVT::v16i32 && VT == MVT::v16i8) { |
45517 | if (auto USatVal = detectSSatPattern(In, VT, true)) { |
45518 | |
45519 | SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, |
45520 | DL, DAG, Subtarget); |
45521 | assert(Mid && "Failed to pack!"); |
45522 | return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); |
45523 | } |
45524 | } |
45525 | |
45526 | |
45527 | |
45528 | |
45529 | |
45530 | |
45531 | |
45532 | bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || |
45533 | (Subtarget.hasBWI() && InSVT == MVT::i16)) && |
45534 | (InVT.getSizeInBits() > 128) && |
45535 | (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && |
45536 | !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); |
45537 | |
45538 | if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && |
45539 | VT.getSizeInBits() >= 64 && |
45540 | (SVT == MVT::i8 || SVT == MVT::i16) && |
45541 | (InSVT == MVT::i16 || InSVT == MVT::i32)) { |
45542 | if (auto USatVal = detectSSatPattern(In, VT, true)) { |
45543 | |
45544 | |
45545 | |
45546 | if (SVT == MVT::i8 && InSVT == MVT::i32) { |
45547 | EVT MidVT = VT.changeVectorElementType(MVT::i16); |
45548 | SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, |
45549 | DAG, Subtarget); |
45550 | assert(Mid && "Failed to pack!"); |
45551 | SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, |
45552 | Subtarget); |
45553 | assert(V && "Failed to pack!"); |
45554 | return V; |
45555 | } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) |
45556 | return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, |
45557 | Subtarget); |
45558 | } |
45559 | if (auto SSatVal = detectSSatPattern(In, VT)) |
45560 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, |
45561 | Subtarget); |
45562 | } |
45563 | |
45564 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45565 | if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 && |
45566 | Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI()) && |
45567 | (SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) { |
45568 | unsigned TruncOpc = 0; |
45569 | SDValue SatVal; |
45570 | if (auto SSatVal = detectSSatPattern(In, VT)) { |
45571 | SatVal = SSatVal; |
45572 | TruncOpc = X86ISD::VTRUNCS; |
45573 | } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { |
45574 | SatVal = USatVal; |
45575 | TruncOpc = X86ISD::VTRUNCUS; |
45576 | } |
45577 | if (SatVal) { |
45578 | unsigned ResElts = VT.getVectorNumElements(); |
45579 | |
45580 | |
45581 | if (!Subtarget.hasVLX() && !InVT.is512BitVector()) { |
45582 | unsigned NumConcats = 512 / InVT.getSizeInBits(); |
45583 | ResElts *= NumConcats; |
45584 | SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT)); |
45585 | ConcatOps[0] = SatVal; |
45586 | InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, |
45587 | NumConcats * InVT.getVectorNumElements()); |
45588 | SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps); |
45589 | } |
45590 | |
45591 | if (ResElts * SVT.getSizeInBits() < 128) |
45592 | ResElts = 128 / SVT.getSizeInBits(); |
45593 | EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts); |
45594 | SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal); |
45595 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
45596 | DAG.getIntPtrConstant(0, DL)); |
45597 | } |
45598 | } |
45599 | |
45600 | return SDValue(); |
45601 | } |
45602 | |
45603 | |
45604 | |
45605 | |
45606 | static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, |
45607 | const X86Subtarget &Subtarget, |
45608 | const SDLoc &DL) { |
45609 | if (!VT.isVector()) |
45610 | return SDValue(); |
45611 | EVT InVT = In.getValueType(); |
45612 | unsigned NumElems = VT.getVectorNumElements(); |
45613 | |
45614 | EVT ScalarVT = VT.getVectorElementType(); |
45615 | if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2)) |
45616 | return SDValue(); |
45617 | |
45618 | |
45619 | |
45620 | EVT InScalarVT = InVT.getVectorElementType(); |
45621 | if (InScalarVT.getFixedSizeInBits() <= ScalarVT.getFixedSizeInBits()) |
45622 | return SDValue(); |
45623 | |
45624 | if (!Subtarget.hasSSE2()) |
45625 | return SDValue(); |
45626 | |
45627 | |
45628 | |
45629 | |
45630 | |
45631 | |
45632 | |
45633 | |
45634 | |
45635 | |
45636 | |
45637 | if (In.getOpcode() != ISD::SRL) |
45638 | return SDValue(); |
45639 | |
45640 | |
45641 | |
45642 | auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { |
45643 | return ISD::matchUnaryPredicate(V, [Min, Max](ConstantSDNode *C) { |
45644 | return !(C->getAPIntValue().ult(Min) || C->getAPIntValue().ugt(Max)); |
45645 | }); |
45646 | }; |
45647 | |
45648 | |
45649 | SDValue LHS = In.getOperand(0); |
45650 | SDValue RHS = In.getOperand(1); |
45651 | if (!IsConstVectorInRange(RHS, 1, 1)) |
45652 | return SDValue(); |
45653 | if (LHS.getOpcode() != ISD::ADD) |
45654 | return SDValue(); |
45655 | |
45656 | |
45657 | SDValue Operands[3]; |
45658 | Operands[0] = LHS.getOperand(0); |
45659 | Operands[1] = LHS.getOperand(1); |
45660 | |
45661 | auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
45662 | ArrayRef<SDValue> Ops) { |
45663 | return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops); |
45664 | }; |
45665 | |
45666 | auto AVGSplitter = [&](SDValue Op0, SDValue Op1) { |
45667 | |
45668 | unsigned NumElemsPow2 = PowerOf2Ceil(NumElems); |
45669 | EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2); |
45670 | if (NumElemsPow2 != NumElems) { |
45671 | SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT)); |
45672 | SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT)); |
45673 | for (unsigned i = 0; i != NumElems; ++i) { |
45674 | SDValue Idx = DAG.getIntPtrConstant(i, DL); |
45675 | Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx); |
45676 | Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx); |
45677 | } |
45678 | Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0); |
45679 | Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1); |
45680 | } |
45681 | SDValue Res = |
45682 | SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder); |
45683 | if (NumElemsPow2 == NumElems) |
45684 | return Res; |
45685 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
45686 | DAG.getIntPtrConstant(0, DL)); |
45687 | }; |
45688 | |
45689 | |
45690 | |
45691 | if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && |
45692 | Operands[0].getOpcode() == ISD::ZERO_EXTEND && |
45693 | Operands[0].getOperand(0).getValueType() == VT) { |
45694 | |
45695 | |
45696 | SDValue VecOnes = DAG.getConstant(1, DL, InVT); |
45697 | Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); |
45698 | Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); |
45699 | return AVGSplitter(Operands[0].getOperand(0), Operands[1]); |
45700 | } |
45701 | |
45702 | |
45703 | |
45704 | auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) { |
45705 | if (ISD::ADD == V.getOpcode()) { |
45706 | Op0 = V.getOperand(0); |
45707 | Op1 = V.getOperand(1); |
45708 | return true; |
45709 | } |
45710 | if (ISD::ZERO_EXTEND != V.getOpcode()) |
45711 | return false; |
45712 | V = V.getOperand(0); |
45713 | if (V.getValueType() != VT || ISD::OR != V.getOpcode() || |
45714 | !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1))) |
45715 | return false; |
45716 | Op0 = V.getOperand(0); |
45717 | Op1 = V.getOperand(1); |
45718 | return true; |
45719 | }; |
45720 | |
45721 | SDValue Op0, Op1; |
45722 | if (FindAddLike(Operands[0], Op0, Op1)) |
45723 | std::swap(Operands[0], Operands[1]); |
45724 | else if (!FindAddLike(Operands[1], Op0, Op1)) |
45725 | return SDValue(); |
45726 | Operands[2] = Op0; |
45727 | Operands[1] = Op1; |
45728 | |
45729 | |
45730 | |
45731 | for (int i = 0; i < 3; ++i) { |
45732 | if (!IsConstVectorInRange(Operands[i], 1, 1)) |
45733 | continue; |
45734 | std::swap(Operands[i], Operands[2]); |
45735 | |
45736 | |
45737 | for (int j = 0; j < 2; ++j) |
45738 | if (Operands[j].getValueType() != VT) { |
45739 | if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || |
45740 | Operands[j].getOperand(0).getValueType() != VT) |
45741 | return SDValue(); |
45742 | Operands[j] = Operands[j].getOperand(0); |
45743 | } |
45744 | |
45745 | |
45746 | return AVGSplitter(Operands[0], Operands[1]); |
45747 | } |
45748 | |
45749 | return SDValue(); |
45750 | } |
45751 | |
45752 | static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, |
45753 | TargetLowering::DAGCombinerInfo &DCI, |
45754 | const X86Subtarget &Subtarget) { |
45755 | LoadSDNode *Ld = cast<LoadSDNode>(N); |
45756 | EVT RegVT = Ld->getValueType(0); |
45757 | EVT MemVT = Ld->getMemoryVT(); |
45758 | SDLoc dl(Ld); |
45759 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
45760 | |
45761 | |
45762 | |
45763 | |
45764 | ISD::LoadExtType Ext = Ld->getExtensionType(); |
45765 | bool Fast; |
45766 | if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && |
45767 | Ext == ISD::NON_EXTLOAD && |
45768 | ((Ld->isNonTemporal() && !Subtarget.hasInt256() && |
45769 | Ld->getAlignment() >= 16) || |
45770 | (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, |
45771 | *Ld->getMemOperand(), &Fast) && |
45772 | !Fast))) { |
45773 | unsigned NumElems = RegVT.getVectorNumElements(); |
45774 | if (NumElems < 2) |
45775 | return SDValue(); |
45776 | |
45777 | unsigned HalfOffset = 16; |
45778 | SDValue Ptr1 = Ld->getBasePtr(); |
45779 | SDValue Ptr2 = |
45780 | DAG.getMemBasePlusOffset(Ptr1, TypeSize::Fixed(HalfOffset), dl); |
45781 | EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), |
45782 | NumElems / 2); |
45783 | SDValue Load1 = |
45784 | DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(), |
45785 | Ld->getOriginalAlign(), |
45786 | Ld->getMemOperand()->getFlags()); |
45787 | SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2, |
45788 | Ld->getPointerInfo().getWithOffset(HalfOffset), |
45789 | Ld->getOriginalAlign(), |
45790 | Ld->getMemOperand()->getFlags()); |
45791 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
45792 | Load1.getValue(1), Load2.getValue(1)); |
45793 | |
45794 | SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); |
45795 | return DCI.CombineTo(N, NewVec, TF, true); |
45796 | } |
45797 | |
45798 | |
45799 | |
45800 | if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() && |
45801 | RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) { |
45802 | unsigned NumElts = RegVT.getVectorNumElements(); |
45803 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
45804 | if (TLI.isTypeLegal(IntVT)) { |
45805 | SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(), |
45806 | Ld->getPointerInfo(), |
45807 | Ld->getOriginalAlign(), |
45808 | Ld->getMemOperand()->getFlags()); |
45809 | SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad); |
45810 | return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true); |
45811 | } |
45812 | } |
45813 | |
45814 | |
45815 | |
45816 | if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() && |
45817 | (RegVT.is128BitVector() || RegVT.is256BitVector())) { |
45818 | SDValue Ptr = Ld->getBasePtr(); |
45819 | SDValue Chain = Ld->getChain(); |
45820 | for (SDNode *User : Ptr->uses()) { |
45821 | if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && |
45822 | cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && |
45823 | cast<MemIntrinsicSDNode>(User)->getChain() == Chain && |
45824 | cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == |
45825 | MemVT.getSizeInBits() && |
45826 | !User->hasAnyUseOfValue(1) && |
45827 | User->getValueSizeInBits(0).getFixedSize() > |
45828 | RegVT.getFixedSizeInBits()) { |
45829 | SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), |
45830 | RegVT.getSizeInBits()); |
45831 | Extract = DAG.getBitcast(RegVT, Extract); |
45832 | return DCI.CombineTo(N, Extract, SDValue(User, 1)); |
45833 | } |
45834 | } |
45835 | } |
45836 | |
45837 | |
45838 | unsigned AddrSpace = Ld->getAddressSpace(); |
45839 | if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR || |
45840 | AddrSpace == X86AS::PTR32_UPTR) { |
45841 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
45842 | if (PtrVT != Ld->getBasePtr().getSimpleValueType()) { |
45843 | SDValue Cast = |
45844 | DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0); |
45845 | return DAG.getLoad(RegVT, dl, Ld->getChain(), Cast, Ld->getPointerInfo(), |
45846 | Ld->getOriginalAlign(), |
45847 | Ld->getMemOperand()->getFlags()); |
45848 | } |
45849 | } |
45850 | |
45851 | return SDValue(); |
45852 | } |
45853 | |
45854 | |
45855 | |
45856 | |
45857 | static int getOneTrueElt(SDValue V) { |
45858 | |
45859 | |
45860 | |
45861 | |
45862 | |
45863 | |
45864 | |
45865 | auto *BV = dyn_cast<BuildVectorSDNode>(V); |
45866 | if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1) |
45867 | return -1; |
45868 | |
45869 | int TrueIndex = -1; |
45870 | unsigned NumElts = BV->getValueType(0).getVectorNumElements(); |
45871 | for (unsigned i = 0; i < NumElts; ++i) { |
45872 | const SDValue &Op = BV->getOperand(i); |
45873 | if (Op.isUndef()) |
45874 | continue; |
45875 | auto *ConstNode = dyn_cast<ConstantSDNode>(Op); |
45876 | if (!ConstNode) |
45877 | return -1; |
45878 | if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) { |
45879 | |
45880 | if (TrueIndex >= 0) |
45881 | return -1; |
45882 | TrueIndex = i; |
45883 | } |
45884 | } |
45885 | return TrueIndex; |
45886 | } |
45887 | |
45888 | |
45889 | |
45890 | |
45891 | |
45892 | static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, |
45893 | SelectionDAG &DAG, SDValue &Addr, |
45894 | SDValue &Index, Align &Alignment, |
45895 | unsigned &Offset) { |
45896 | int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); |
45897 | if (TrueMaskElt < 0) |
45898 | return false; |
45899 | |
45900 | |
45901 | |
45902 | EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); |
45903 | Offset = 0; |
45904 | Addr = MaskedOp->getBasePtr(); |
45905 | if (TrueMaskElt != 0) { |
45906 | Offset = TrueMaskElt * EltVT.getStoreSize(); |
45907 | Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset), |
45908 | SDLoc(MaskedOp)); |
45909 | } |
45910 | |
45911 | Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp)); |
45912 | Alignment = commonAlignment(MaskedOp->getOriginalAlign(), |
45913 | EltVT.getStoreSize()); |
45914 | return true; |
45915 | } |
45916 | |
45917 | |
45918 | |
45919 | |
45920 | |
45921 | static SDValue |
45922 | reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, |
45923 | TargetLowering::DAGCombinerInfo &DCI, |
45924 | const X86Subtarget &Subtarget) { |
45925 | assert(ML->isUnindexed() && "Unexpected indexed masked load!"); |
45926 | |
45927 | |
45928 | |
45929 | |
45930 | SDValue Addr, VecIndex; |
45931 | Align Alignment; |
45932 | unsigned Offset; |
45933 | if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset)) |
45934 | return SDValue(); |
45935 | |
45936 | |
45937 | |
45938 | SDLoc DL(ML); |
45939 | EVT VT = ML->getValueType(0); |
45940 | EVT EltVT = VT.getVectorElementType(); |
45941 | |
45942 | EVT CastVT = VT; |
45943 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
45944 | EltVT = MVT::f64; |
45945 | CastVT = VT.changeVectorElementType(EltVT); |
45946 | } |
45947 | |
45948 | SDValue Load = |
45949 | DAG.getLoad(EltVT, DL, ML->getChain(), Addr, |
45950 | ML->getPointerInfo().getWithOffset(Offset), |
45951 | Alignment, ML->getMemOperand()->getFlags()); |
45952 | |
45953 | SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru()); |
45954 | |
45955 | |
45956 | SDValue Insert = |
45957 | DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, CastVT, PassThru, Load, VecIndex); |
45958 | Insert = DAG.getBitcast(VT, Insert); |
45959 | return DCI.CombineTo(ML, Insert, Load.getValue(1), true); |
45960 | } |
45961 | |
45962 | static SDValue |
45963 | combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, |
45964 | TargetLowering::DAGCombinerInfo &DCI) { |
45965 | assert(ML->isUnindexed() && "Unexpected indexed masked load!"); |
45966 | if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) |
45967 | return SDValue(); |
45968 | |
45969 | SDLoc DL(ML); |
45970 | EVT VT = ML->getValueType(0); |
45971 | |
45972 | |
45973 | |
45974 | |
45975 | unsigned NumElts = VT.getVectorNumElements(); |
45976 | BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask()); |
45977 | bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0)); |
45978 | bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1)); |
45979 | if (LoadFirstElt && LoadLastElt) { |
45980 | SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(), |
45981 | ML->getMemOperand()); |
45982 | SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, |
45983 | ML->getPassThru()); |
45984 | return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true); |
45985 | } |
45986 | |
45987 | |
45988 | |
45989 | |
45990 | |
45991 | |
45992 | |
45993 | if (ML->getPassThru().isUndef()) |
45994 | return SDValue(); |
45995 | |
45996 | if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode())) |
45997 | return SDValue(); |
45998 | |
45999 | |
46000 | |
46001 | SDValue NewML = DAG.getMaskedLoad( |
46002 | VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), |
46003 | DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), |
46004 | ML->getAddressingMode(), ML->getExtensionType()); |
46005 | SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, |
46006 | ML->getPassThru()); |
46007 | |
46008 | return DCI.CombineTo(ML, Blend, NewML.getValue(1), true); |
46009 | } |
46010 | |
46011 | static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, |
46012 | TargetLowering::DAGCombinerInfo &DCI, |
46013 | const X86Subtarget &Subtarget) { |
46014 | auto *Mld = cast<MaskedLoadSDNode>(N); |
46015 | |
46016 | |
46017 | if (Mld->isExpandingLoad()) |
46018 | return SDValue(); |
46019 | |
46020 | if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { |
46021 | if (SDValue ScalarLoad = |
46022 | reduceMaskedLoadToScalarLoad(Mld, DAG, DCI, Subtarget)) |
46023 | return ScalarLoad; |
46024 | |
46025 | |
46026 | if (!Subtarget.hasAVX512()) |
46027 | if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI)) |
46028 | return Blend; |
46029 | } |
46030 | |
46031 | |
46032 | |
46033 | SDValue Mask = Mld->getMask(); |
46034 | if (Mask.getScalarValueSizeInBits() != 1) { |
46035 | EVT VT = Mld->getValueType(0); |
46036 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46037 | APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); |
46038 | if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { |
46039 | if (N->getOpcode() != ISD::DELETED_NODE) |
46040 | DCI.AddToWorklist(N); |
46041 | return SDValue(N, 0); |
46042 | } |
46043 | if (SDValue NewMask = |
46044 | TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG)) |
46045 | return DAG.getMaskedLoad( |
46046 | VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(), |
46047 | NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(), |
46048 | Mld->getAddressingMode(), Mld->getExtensionType()); |
46049 | } |
46050 | |
46051 | return SDValue(); |
46052 | } |
46053 | |
46054 | |
46055 | |
46056 | |
46057 | |
46058 | static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, |
46059 | SelectionDAG &DAG, |
46060 | const X86Subtarget &Subtarget) { |
46061 | |
46062 | |
46063 | |
46064 | |
46065 | SDValue Addr, VecIndex; |
46066 | Align Alignment; |
46067 | unsigned Offset; |
46068 | if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset)) |
46069 | return SDValue(); |
46070 | |
46071 | |
46072 | SDLoc DL(MS); |
46073 | SDValue Value = MS->getValue(); |
46074 | EVT VT = Value.getValueType(); |
46075 | EVT EltVT = VT.getVectorElementType(); |
46076 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
46077 | EltVT = MVT::f64; |
46078 | EVT CastVT = VT.changeVectorElementType(EltVT); |
46079 | Value = DAG.getBitcast(CastVT, Value); |
46080 | } |
46081 | SDValue Extract = |
46082 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex); |
46083 | |
46084 | |
46085 | return DAG.getStore(MS->getChain(), DL, Extract, Addr, |
46086 | MS->getPointerInfo().getWithOffset(Offset), |
46087 | Alignment, MS->getMemOperand()->getFlags()); |
46088 | } |
46089 | |
46090 | static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, |
46091 | TargetLowering::DAGCombinerInfo &DCI, |
46092 | const X86Subtarget &Subtarget) { |
46093 | MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N); |
46094 | if (Mst->isCompressingStore()) |
46095 | return SDValue(); |
46096 | |
46097 | EVT VT = Mst->getValue().getValueType(); |
46098 | SDLoc dl(Mst); |
46099 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46100 | |
46101 | if (Mst->isTruncatingStore()) |
46102 | return SDValue(); |
46103 | |
46104 | if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget)) |
46105 | return ScalarStore; |
46106 | |
46107 | |
46108 | |
46109 | SDValue Mask = Mst->getMask(); |
46110 | if (Mask.getScalarValueSizeInBits() != 1) { |
46111 | APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); |
46112 | if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { |
46113 | if (N->getOpcode() != ISD::DELETED_NODE) |
46114 | DCI.AddToWorklist(N); |
46115 | return SDValue(N, 0); |
46116 | } |
46117 | if (SDValue NewMask = |
46118 | TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG)) |
46119 | return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(), |
46120 | Mst->getBasePtr(), Mst->getOffset(), NewMask, |
46121 | Mst->getMemoryVT(), Mst->getMemOperand(), |
46122 | Mst->getAddressingMode()); |
46123 | } |
46124 | |
46125 | SDValue Value = Mst->getValue(); |
46126 | if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && |
46127 | TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), |
46128 | Mst->getMemoryVT())) { |
46129 | return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), |
46130 | Mst->getBasePtr(), Mst->getOffset(), Mask, |
46131 | Mst->getMemoryVT(), Mst->getMemOperand(), |
46132 | Mst->getAddressingMode(), true); |
46133 | } |
46134 | |
46135 | return SDValue(); |
46136 | } |
46137 | |
46138 | static SDValue combineStore(SDNode *N, SelectionDAG &DAG, |
46139 | TargetLowering::DAGCombinerInfo &DCI, |
46140 | const X86Subtarget &Subtarget) { |
46141 | StoreSDNode *St = cast<StoreSDNode>(N); |
46142 | EVT StVT = St->getMemoryVT(); |
46143 | SDLoc dl(St); |
46144 | SDValue StoredVal = St->getValue(); |
46145 | EVT VT = StoredVal.getValueType(); |
46146 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46147 | |
46148 | |
46149 | if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() && |
46150 | VT.getVectorElementType() == MVT::i1) { |
46151 | |
46152 | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); |
46153 | StoredVal = DAG.getBitcast(NewVT, StoredVal); |
46154 | |
46155 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
46156 | St->getPointerInfo(), St->getOriginalAlign(), |
46157 | St->getMemOperand()->getFlags()); |
46158 | } |
46159 | |
46160 | |
46161 | |
46162 | if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() && |
46163 | StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR && |
46164 | StoredVal.getOperand(0).getValueType() == MVT::i8) { |
46165 | SDValue Val = StoredVal.getOperand(0); |
46166 | |
46167 | Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); |
46168 | return DAG.getStore(St->getChain(), dl, Val, |
46169 | St->getBasePtr(), St->getPointerInfo(), |
46170 | St->getOriginalAlign(), |
46171 | St->getMemOperand()->getFlags()); |
46172 | } |
46173 | |
46174 | |
46175 | if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && |
46176 | Subtarget.hasAVX512()) { |
46177 | unsigned NumConcats = 8 / VT.getVectorNumElements(); |
46178 | |
46179 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT)); |
46180 | Ops[0] = StoredVal; |
46181 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
46182 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
46183 | St->getPointerInfo(), St->getOriginalAlign(), |
46184 | St->getMemOperand()->getFlags()); |
46185 | } |
46186 | |
46187 | |
46188 | if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || |
46189 | VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && |
46190 | ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) { |
46191 | |
46192 | if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
46193 | SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl, |
46194 | StoredVal->ops().slice(0, 32)); |
46195 | Lo = combinevXi1ConstantToInteger(Lo, DAG); |
46196 | SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl, |
46197 | StoredVal->ops().slice(32, 32)); |
46198 | Hi = combinevXi1ConstantToInteger(Hi, DAG); |
46199 | |
46200 | SDValue Ptr0 = St->getBasePtr(); |
46201 | SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(4), dl); |
46202 | |
46203 | SDValue Ch0 = |
46204 | DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), |
46205 | St->getOriginalAlign(), |
46206 | St->getMemOperand()->getFlags()); |
46207 | SDValue Ch1 = |
46208 | DAG.getStore(St->getChain(), dl, Hi, Ptr1, |
46209 | St->getPointerInfo().getWithOffset(4), |
46210 | St->getOriginalAlign(), |
46211 | St->getMemOperand()->getFlags()); |
46212 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); |
46213 | } |
46214 | |
46215 | StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG); |
46216 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
46217 | St->getPointerInfo(), St->getOriginalAlign(), |
46218 | St->getMemOperand()->getFlags()); |
46219 | } |
46220 | |
46221 | |
46222 | |
46223 | bool Fast; |
46224 | if (VT.is256BitVector() && StVT == VT && |
46225 | TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
46226 | *St->getMemOperand(), &Fast) && |
46227 | !Fast) { |
46228 | unsigned NumElems = VT.getVectorNumElements(); |
46229 | if (NumElems < 2) |
46230 | return SDValue(); |
46231 | |
46232 | return splitVectorStore(St, DAG); |
46233 | } |
46234 | |
46235 | |
46236 | if (St->isNonTemporal() && StVT == VT && |
46237 | St->getAlignment() < VT.getStoreSize()) { |
46238 | |
46239 | |
46240 | if (VT.is256BitVector() || VT.is512BitVector()) { |
46241 | unsigned NumElems = VT.getVectorNumElements(); |
46242 | if (NumElems < 2) |
46243 | return SDValue(); |
46244 | return splitVectorStore(St, DAG); |
46245 | } |
46246 | |
46247 | |
46248 | |
46249 | if (VT.is128BitVector() && Subtarget.hasSSE2()) { |
46250 | MVT NTVT = Subtarget.hasSSE4A() |
46251 | ? MVT::v2f64 |
46252 | : (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32); |
46253 | return scalarizeVectorStore(St, NTVT, DAG); |
46254 | } |
46255 | } |
46256 | |
46257 | |
46258 | |
46259 | if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && |
46260 | St->getValue().getOpcode() == ISD::TRUNCATE && |
46261 | St->getValue().getOperand(0).getValueType() == MVT::v16i16 && |
46262 | TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) && |
46263 | St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { |
46264 | SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); |
46265 | return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), |
46266 | MVT::v16i8, St->getMemOperand()); |
46267 | } |
46268 | |
46269 | |
46270 | if (!St->isTruncatingStore() && StoredVal.hasOneUse() && |
46271 | (StoredVal.getOpcode() == X86ISD::VTRUNCUS || |
46272 | StoredVal.getOpcode() == X86ISD::VTRUNCS) && |
46273 | TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { |
46274 | bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS; |
46275 | return EmitTruncSStore(IsSigned, St->getChain(), |
46276 | dl, StoredVal.getOperand(0), St->getBasePtr(), |
46277 | VT, St->getMemOperand(), DAG); |
46278 | } |
46279 | |
46280 | |
46281 | if (!St->isTruncatingStore() && StoredVal.hasOneUse()) { |
46282 | auto IsExtractedElement = [](SDValue V) { |
46283 | if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse()) |
46284 | V = V.getOperand(0); |
46285 | unsigned Opc = V.getOpcode(); |
46286 | if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) { |
46287 | if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1))) |
46288 | return V.getOperand(0); |
46289 | } |
46290 | return SDValue(); |
46291 | }; |
46292 | if (SDValue Extract = IsExtractedElement(StoredVal)) { |
46293 | SDValue Trunc = peekThroughOneUseBitcasts(Extract); |
46294 | if (Trunc.getOpcode() == X86ISD::VTRUNC) { |
46295 | SDValue Src = Trunc.getOperand(0); |
46296 | MVT DstVT = Trunc.getSimpleValueType(); |
46297 | MVT SrcVT = Src.getSimpleValueType(); |
46298 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
46299 | unsigned NumTruncBits = DstVT.getScalarSizeInBits() * NumSrcElts; |
46300 | MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts); |
46301 | if (NumTruncBits == VT.getSizeInBits() && |
46302 | TLI.isTruncStoreLegal(SrcVT, TruncVT)) { |
46303 | return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(), |
46304 | TruncVT, St->getMemOperand()); |
46305 | } |
46306 | } |
46307 | } |
46308 | } |
46309 | |
46310 | |
46311 | |
46312 | |
46313 | if (St->isTruncatingStore() && VT.isVector()) { |
46314 | |
46315 | |
46316 | |
46317 | if (DCI.isBeforeLegalize() || TLI.isTypeLegal(St->getMemoryVT())) |
46318 | if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, |
46319 | Subtarget, dl)) |
46320 | return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), |
46321 | St->getPointerInfo(), St->getOriginalAlign(), |
46322 | St->getMemOperand()->getFlags()); |
46323 | |
46324 | if (TLI.isTruncStoreLegal(VT, StVT)) { |
46325 | if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT())) |
46326 | return EmitTruncSStore(true , St->getChain(), |
46327 | dl, Val, St->getBasePtr(), |
46328 | St->getMemoryVT(), St->getMemOperand(), DAG); |
46329 | if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(), |
46330 | DAG, dl)) |
46331 | return EmitTruncSStore(false , St->getChain(), |
46332 | dl, Val, St->getBasePtr(), |
46333 | St->getMemoryVT(), St->getMemOperand(), DAG); |
46334 | } |
46335 | |
46336 | return SDValue(); |
46337 | } |
46338 | |
46339 | |
46340 | unsigned AddrSpace = St->getAddressSpace(); |
46341 | if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR || |
46342 | AddrSpace == X86AS::PTR32_UPTR) { |
46343 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
46344 | if (PtrVT != St->getBasePtr().getSimpleValueType()) { |
46345 | SDValue Cast = |
46346 | DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0); |
46347 | return DAG.getStore(St->getChain(), dl, StoredVal, Cast, |
46348 | St->getPointerInfo(), St->getOriginalAlign(), |
46349 | St->getMemOperand()->getFlags(), St->getAAInfo()); |
46350 | } |
46351 | } |
46352 | |
46353 | |
46354 | |
46355 | |
46356 | |
46357 | |
46358 | |
46359 | if (VT.getSizeInBits() != 64) |
46360 | return SDValue(); |
46361 | |
46362 | const Function &F = DAG.getMachineFunction().getFunction(); |
46363 | bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); |
46364 | bool F64IsLegal = |
46365 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); |
46366 | if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) && |
46367 | isa<LoadSDNode>(St->getValue()) && |
46368 | cast<LoadSDNode>(St->getValue())->isSimple() && |
46369 | St->getChain().hasOneUse() && St->isSimple()) { |
46370 | LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode()); |
46371 | |
46372 | if (!ISD::isNormalLoad(Ld)) |
46373 | return SDValue(); |
46374 | |
46375 | |
46376 | if (!Ld->hasNUsesOfValue(1, 0)) |
46377 | return SDValue(); |
46378 | |
46379 | SDLoc LdDL(Ld); |
46380 | SDLoc StDL(N); |
46381 | |
46382 | SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(), |
46383 | Ld->getBasePtr(), Ld->getMemOperand()); |
46384 | |
46385 | |
46386 | DAG.makeEquivalentMemoryOrdering(Ld, NewLd); |
46387 | return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), |
46388 | St->getMemOperand()); |
46389 | } |
46390 | |
46391 | |
46392 | |
46393 | |
46394 | |
46395 | |
46396 | |
46397 | if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() && |
46398 | St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
46399 | SDValue OldExtract = St->getOperand(1); |
46400 | SDValue ExtOp0 = OldExtract.getOperand(0); |
46401 | unsigned VecSize = ExtOp0.getValueSizeInBits(); |
46402 | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); |
46403 | SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); |
46404 | SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, |
46405 | BitCast, OldExtract.getOperand(1)); |
46406 | return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), |
46407 | St->getPointerInfo(), St->getOriginalAlign(), |
46408 | St->getMemOperand()->getFlags()); |
46409 | } |
46410 | |
46411 | return SDValue(); |
46412 | } |
46413 | |
46414 | static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, |
46415 | TargetLowering::DAGCombinerInfo &DCI, |
46416 | const X86Subtarget &Subtarget) { |
46417 | auto *St = cast<MemIntrinsicSDNode>(N); |
46418 | |
46419 | SDValue StoredVal = N->getOperand(1); |
46420 | MVT VT = StoredVal.getSimpleValueType(); |
46421 | EVT MemVT = St->getMemoryVT(); |
46422 | |
46423 | |
46424 | unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits(); |
46425 | APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts); |
46426 | |
46427 | APInt KnownUndef, KnownZero; |
46428 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46429 | if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef, |
46430 | KnownZero, DCI)) { |
46431 | if (N->getOpcode() != ISD::DELETED_NODE) |
46432 | DCI.AddToWorklist(N); |
46433 | return SDValue(N, 0); |
46434 | } |
46435 | |
46436 | return SDValue(); |
46437 | } |
46438 | |
46439 | |
46440 | |
46441 | |
46442 | |
46443 | |
46444 | |
46445 | |
46446 | |
46447 | |
46448 | |
46449 | |
46450 | |
46451 | |
46452 | static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, |
46453 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
46454 | bool IsCommutative, |
46455 | SmallVectorImpl<int> &PostShuffleMask) { |
46456 | |
46457 | if (LHS.isUndef() || RHS.isUndef()) |
46458 | return false; |
46459 | |
46460 | |
46461 | |
46462 | |
46463 | |
46464 | |
46465 | |
46466 | |
46467 | |
46468 | |
46469 | MVT VT = LHS.getSimpleValueType(); |
46470 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
46471 | "Unsupported vector type for horizontal add/sub"); |
46472 | unsigned NumElts = VT.getVectorNumElements(); |
46473 | |
46474 | auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1, |
46475 | SmallVectorImpl<int> &ShuffleMask) { |
46476 | bool UseSubVector = false; |
46477 | if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
46478 | Op.getOperand(0).getValueType().is256BitVector() && |
46479 | llvm::isNullConstant(Op.getOperand(1))) { |
46480 | Op = Op.getOperand(0); |
46481 | UseSubVector = true; |
46482 | } |
46483 | SmallVector<SDValue, 2> SrcOps; |
46484 | SmallVector<int, 16> SrcMask, ScaledMask; |
46485 | SDValue BC = peekThroughBitcasts(Op); |
46486 | if (getTargetShuffleInputs(BC, SrcOps, SrcMask, DAG) && |
46487 | !isAnyZero(SrcMask) && all_of(SrcOps, [BC](SDValue Op) { |
46488 | return Op.getValueSizeInBits() == BC.getValueSizeInBits(); |
46489 | })) { |
46490 | resolveTargetShuffleInputsAndMask(SrcOps, SrcMask); |
46491 | if (!UseSubVector && SrcOps.size() <= 2 && |
46492 | scaleShuffleElements(SrcMask, NumElts, ScaledMask)) { |
46493 | N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue(); |
46494 | N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue(); |
46495 | ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end()); |
46496 | } |
46497 | if (UseSubVector && SrcOps.size() == 1 && |
46498 | scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask)) { |
46499 | std::tie(N0, N1) = DAG.SplitVector(SrcOps[0], SDLoc(Op)); |
46500 | ArrayRef<int> Mask = ArrayRef<int>(ScaledMask).slice(0, NumElts); |
46501 | ShuffleMask.assign(Mask.begin(), Mask.end()); |
46502 | } |
46503 | } |
46504 | }; |
46505 | |
46506 | |
46507 | |
46508 | |
46509 | |
46510 | |
46511 | SDValue A, B; |
46512 | SmallVector<int, 16> LMask; |
46513 | GetShuffle(LHS, A, B, LMask); |
46514 | |
46515 | |
46516 | |
46517 | SDValue C, D; |
46518 | SmallVector<int, 16> RMask; |
46519 | GetShuffle(RHS, C, D, RMask); |
46520 | |
46521 | |
46522 | unsigned NumShuffles = (LMask.empty() ? 0 : 1) + (RMask.empty() ? 0 : 1); |
46523 | if (NumShuffles == 0) |
46524 | return false; |
46525 | |
46526 | if (LMask.empty()) { |
46527 | A = LHS; |
46528 | for (unsigned i = 0; i != NumElts; ++i) |
46529 | LMask.push_back(i); |
46530 | } |
46531 | |
46532 | if (RMask.empty()) { |
46533 | C = RHS; |
46534 | for (unsigned i = 0; i != NumElts; ++i) |
46535 | RMask.push_back(i); |
46536 | } |
46537 | |
46538 | |
46539 | if (isUndefOrInRange(LMask, 0, NumElts)) |
46540 | B = SDValue(); |
46541 | else if (isUndefOrInRange(LMask, NumElts, NumElts * 2)) |
46542 | A = SDValue(); |
46543 | |
46544 | if (isUndefOrInRange(RMask, 0, NumElts)) |
46545 | D = SDValue(); |
46546 | else if (isUndefOrInRange(RMask, NumElts, NumElts * 2)) |
46547 | C = SDValue(); |
46548 | |
46549 | |
46550 | |
46551 | if (A != C) { |
46552 | std::swap(C, D); |
46553 | ShuffleVectorSDNode::commuteMask(RMask); |
46554 | } |
46555 | |
46556 | if (!(A == C && B == D)) |
46557 | return false; |
46558 | |
46559 | PostShuffleMask.clear(); |
46560 | PostShuffleMask.append(NumElts, SM_SentinelUndef); |
46561 | |
46562 | |
46563 | |
46564 | |
46565 | |
46566 | |
46567 | |
46568 | unsigned Num128BitChunks = VT.getSizeInBits() / 128; |
46569 | unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks; |
46570 | unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2; |
46571 | assert((NumEltsPer128BitChunk % 2 == 0) && |
46572 | "Vector type should have an even number of elements in each lane"); |
46573 | for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) { |
46574 | for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) { |
46575 | |
46576 | int LIdx = LMask[i + j], RIdx = RMask[i + j]; |
46577 | if (LIdx < 0 || RIdx < 0 || |
46578 | (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || |
46579 | (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) |
46580 | continue; |
46581 | |
46582 | |
46583 | |
46584 | if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) && |
46585 | !((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative)) |
46586 | return false; |
46587 | |
46588 | |
46589 | |
46590 | int Base = LIdx & ~1u; |
46591 | int Index = ((Base % NumEltsPer128BitChunk) / 2) + |
46592 | ((Base % NumElts) & ~(NumEltsPer128BitChunk - 1)); |
46593 | |
46594 | |
46595 | |
46596 | |
46597 | if ((B && Base >= (int)NumElts) || (!B && i >= NumEltsPer64BitChunk)) |
46598 | Index += NumEltsPer64BitChunk; |
46599 | PostShuffleMask[i + j] = Index; |
46600 | } |
46601 | } |
46602 | |
46603 | SDValue NewLHS = A.getNode() ? A : B; |
46604 | SDValue NewRHS = B.getNode() ? B : A; |
46605 | |
46606 | bool IsIdentityPostShuffle = |
46607 | isSequentialOrUndefInRange(PostShuffleMask, 0, NumElts, 0); |
46608 | if (IsIdentityPostShuffle) |
46609 | PostShuffleMask.clear(); |
46610 | |
46611 | |
46612 | if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() && |
46613 | isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask)) |
46614 | return false; |
46615 | |
46616 | |
46617 | |
46618 | bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) { |
46619 | return User->getOpcode() == HOpcode && User->getValueType(0) == VT; |
46620 | }); |
46621 | bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) { |
46622 | return User->getOpcode() == HOpcode && User->getValueType(0) == VT; |
46623 | }); |
46624 | bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS; |
46625 | |
46626 | |
46627 | |
46628 | if (!ForceHorizOp && |
46629 | !shouldUseHorizontalOp(NewLHS == NewRHS && |
46630 | (NumShuffles < 2 || !IsIdentityPostShuffle), |
46631 | DAG, Subtarget)) |
46632 | return false; |
46633 | |
46634 | LHS = DAG.getBitcast(VT, NewLHS); |
46635 | RHS = DAG.getBitcast(VT, NewRHS); |
46636 | return true; |
46637 | } |
46638 | |
46639 | |
46640 | static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, |
46641 | const X86Subtarget &Subtarget) { |
46642 | EVT VT = N->getValueType(0); |
46643 | unsigned Opcode = N->getOpcode(); |
46644 | bool IsAdd = (Opcode == ISD::FADD) || (Opcode == ISD::ADD); |
46645 | SmallVector<int, 8> PostShuffleMask; |
46646 | |
46647 | switch (Opcode) { |
46648 | case ISD::FADD: |
46649 | case ISD::FSUB: |
46650 | if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || |
46651 | (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { |
46652 | SDValue LHS = N->getOperand(0); |
46653 | SDValue RHS = N->getOperand(1); |
46654 | auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB; |
46655 | if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, |
46656 | PostShuffleMask)) { |
46657 | SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); |
46658 | if (!PostShuffleMask.empty()) |
46659 | HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, |
46660 | DAG.getUNDEF(VT), PostShuffleMask); |
46661 | return HorizBinOp; |
46662 | } |
46663 | } |
46664 | break; |
46665 | case ISD::ADD: |
46666 | case ISD::SUB: |
46667 | if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 || |
46668 | VT == MVT::v16i16 || VT == MVT::v8i32)) { |
46669 | SDValue LHS = N->getOperand(0); |
46670 | SDValue RHS = N->getOperand(1); |
46671 | auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB; |
46672 | if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, |
46673 | PostShuffleMask)) { |
46674 | auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL, |
46675 | ArrayRef<SDValue> Ops) { |
46676 | return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops); |
46677 | }; |
46678 | SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, |
46679 | {LHS, RHS}, HOpBuilder); |
46680 | if (!PostShuffleMask.empty()) |
46681 | HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, |
46682 | DAG.getUNDEF(VT), PostShuffleMask); |
46683 | return HorizBinOp; |
46684 | } |
46685 | } |
46686 | break; |
46687 | } |
46688 | |
46689 | return SDValue(); |
46690 | } |
46691 | |
46692 | |
46693 | static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, |
46694 | const X86Subtarget &Subtarget) { |
46695 | if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget)) |
46696 | return HOp; |
46697 | return SDValue(); |
46698 | } |
46699 | |
46700 | |
46701 | |
46702 | |
46703 | |
46704 | |
46705 | static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, |
46706 | const X86Subtarget &Subtarget, |
46707 | const SDLoc &DL) { |
46708 | assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); |
46709 | SDValue Src = N->getOperand(0); |
46710 | unsigned SrcOpcode = Src.getOpcode(); |
46711 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
46712 | |
46713 | EVT VT = N->getValueType(0); |
46714 | EVT SrcVT = Src.getValueType(); |
46715 | |
46716 | auto IsFreeTruncation = [VT](SDValue Op) { |
46717 | unsigned TruncSizeInBits = VT.getScalarSizeInBits(); |
46718 | |
46719 | |
46720 | |
46721 | unsigned Opcode = Op.getOpcode(); |
46722 | if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND || |
46723 | Opcode == ISD::ZERO_EXTEND) && |
46724 | Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) |
46725 | return true; |
46726 | |
46727 | |
46728 | |
46729 | |
46730 | |
46731 | |
46732 | return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); |
46733 | }; |
46734 | |
46735 | auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { |
46736 | SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); |
46737 | SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); |
46738 | return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1); |
46739 | }; |
46740 | |
46741 | |
46742 | if (!Src.hasOneUse()) |
46743 | return SDValue(); |
46744 | |
46745 | |
46746 | |
46747 | if (!VT.isVector()) |
46748 | return SDValue(); |
46749 | |
46750 | |
46751 | |
46752 | |
46753 | switch (SrcOpcode) { |
46754 | case ISD::MUL: |
46755 | |
46756 | |
46757 | if (SrcVT.getScalarType() == MVT::i64 && |
46758 | TLI.isOperationLegal(SrcOpcode, VT) && |
46759 | !TLI.isOperationLegal(SrcOpcode, SrcVT)) |
46760 | return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); |
46761 | LLVM_FALLTHROUGH; |
46762 | case ISD::AND: |
46763 | case ISD::XOR: |
46764 | case ISD::OR: |
46765 | case ISD::ADD: |
46766 | case ISD::SUB: { |
46767 | SDValue Op0 = Src.getOperand(0); |
46768 | SDValue Op1 = Src.getOperand(1); |
46769 | if (TLI.isOperationLegal(SrcOpcode, VT) && |
46770 | (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) |
46771 | return TruncateArithmetic(Op0, Op1); |
46772 | break; |
46773 | } |
46774 | } |
46775 | |
46776 | return SDValue(); |
46777 | } |
46778 | |
46779 | |
46780 | |
46781 | |
46782 | |
46783 | static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL, |
46784 | const X86Subtarget &Subtarget, |
46785 | SelectionDAG &DAG) { |
46786 | SDValue In = N->getOperand(0); |
46787 | EVT InVT = In.getValueType(); |
46788 | EVT OutVT = N->getValueType(0); |
46789 | |
46790 | APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), |
46791 | OutVT.getScalarSizeInBits()); |
46792 | In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); |
46793 | return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); |
46794 | } |
46795 | |
46796 | |
46797 | static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL, |
46798 | const X86Subtarget &Subtarget, |
46799 | SelectionDAG &DAG) { |
46800 | SDValue In = N->getOperand(0); |
46801 | EVT InVT = In.getValueType(); |
46802 | EVT OutVT = N->getValueType(0); |
46803 | In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In, |
46804 | DAG.getValueType(OutVT)); |
46805 | return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget); |
46806 | } |
46807 | |
46808 | |
46809 | |
46810 | |
46811 | |
46812 | |
46813 | static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, |
46814 | const X86Subtarget &Subtarget) { |
46815 | EVT OutVT = N->getValueType(0); |
46816 | if (!OutVT.isVector()) |
46817 | return SDValue(); |
46818 | |
46819 | SDValue In = N->getOperand(0); |
46820 | if (!In.getValueType().isSimple()) |
46821 | return SDValue(); |
46822 | |
46823 | EVT InVT = In.getValueType(); |
46824 | unsigned NumElems = OutVT.getVectorNumElements(); |
46825 | |
46826 | |
46827 | if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) |
46828 | return SDValue(); |
46829 | |
46830 | EVT OutSVT = OutVT.getVectorElementType(); |
46831 | EVT InSVT = InVT.getVectorElementType(); |
46832 | if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) && |
46833 | (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && |
46834 | NumElems >= 8)) |
46835 | return SDValue(); |
46836 | |
46837 | |
46838 | if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64) |
46839 | return SDValue(); |
46840 | |
46841 | SDLoc DL(N); |
46842 | |
46843 | |
46844 | |
46845 | if (Subtarget.hasSSE41() || OutSVT == MVT::i8) |
46846 | return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG); |
46847 | if (InSVT == MVT::i32) |
46848 | return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG); |
46849 | |
46850 | return SDValue(); |
46851 | } |
46852 | |
46853 | |
46854 | |
46855 | |
46856 | static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, |
46857 | SelectionDAG &DAG, |
46858 | const X86Subtarget &Subtarget) { |
46859 | |
46860 | if (!Subtarget.hasSSE2()) |
46861 | return SDValue(); |
46862 | |
46863 | if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) |
46864 | return SDValue(); |
46865 | |
46866 | SDValue In = N->getOperand(0); |
46867 | if (!In.getValueType().isSimple()) |
46868 | return SDValue(); |
46869 | |
46870 | MVT VT = N->getValueType(0).getSimpleVT(); |
46871 | MVT SVT = VT.getScalarType(); |
46872 | |
46873 | MVT InVT = In.getValueType().getSimpleVT(); |
46874 | MVT InSVT = InVT.getScalarType(); |
46875 | |
46876 | |
46877 | if (!isPowerOf2_32(VT.getVectorNumElements())) |
46878 | return SDValue(); |
46879 | if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32) |
46880 | return SDValue(); |
46881 | if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) |
46882 | return SDValue(); |
46883 | |
46884 | |
46885 | if (SVT == MVT::i32 && VT.getSizeInBits() < 128) |
46886 | return SDValue(); |
46887 | |
46888 | |
46889 | |
46890 | if (Subtarget.hasAVX512() && |
46891 | !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && |
46892 | InVT.is512BitVector())) { |
46893 | |
46894 | |
46895 | SmallVector<SDValue> ConcatOps; |
46896 | if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps)) |
46897 | return SDValue(); |
46898 | } |
46899 | |
46900 | unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16); |
46901 | unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; |
46902 | |
46903 | |
46904 | |
46905 | KnownBits Known = DAG.computeKnownBits(In); |
46906 | unsigned NumLeadingZeroBits = Known.countMinLeadingZeros(); |
46907 | if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits)) |
46908 | return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget); |
46909 | |
46910 | |
46911 | |
46912 | unsigned NumSignBits = DAG.ComputeNumSignBits(In); |
46913 | |
46914 | |
46915 | |
46916 | |
46917 | if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits()) |
46918 | return SDValue(); |
46919 | |
46920 | unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits; |
46921 | if (NumSignBits > MinSignBits) |
46922 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); |
46923 | |
46924 | |
46925 | |
46926 | |
46927 | if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode())) |
46928 | if (const APInt *ShAmt = DAG.getValidShiftAmountConstant( |
46929 | In, APInt::getAllOnesValue(VT.getVectorNumElements()))) { |
46930 | if (*ShAmt == MinSignBits) { |
46931 | SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops()); |
46932 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG, |
46933 | Subtarget); |
46934 | } |
46935 | } |
46936 | |
46937 | return SDValue(); |
46938 | } |
46939 | |
46940 | |
46941 | |
46942 | |
46943 | |
46944 | |
46945 | |
46946 | |
46947 | static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, |
46948 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
46949 | |
46950 | if (Src.getOpcode() != ISD::SRL || |
46951 | Src.getOperand(0).getOpcode() != ISD::MUL) |
46952 | return SDValue(); |
46953 | |
46954 | if (!Subtarget.hasSSE2()) |
46955 | return SDValue(); |
46956 | |
46957 | |
46958 | |
46959 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) |
46960 | return SDValue(); |
46961 | |
46962 | |
46963 | EVT InVT = Src.getValueType(); |
46964 | if (InVT.getVectorElementType().getSizeInBits() < 32) |
46965 | return SDValue(); |
46966 | |
46967 | |
46968 | APInt ShiftAmt; |
46969 | if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) || |
46970 | ShiftAmt != 16) |
46971 | return SDValue(); |
46972 | |
46973 | SDValue LHS = Src.getOperand(0).getOperand(0); |
46974 | SDValue RHS = Src.getOperand(0).getOperand(1); |
46975 | |
46976 | unsigned ExtOpc = LHS.getOpcode(); |
46977 | if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || |
46978 | RHS.getOpcode() != ExtOpc) |
46979 | return SDValue(); |
46980 | |
46981 | |
46982 | LHS = LHS.getOperand(0); |
46983 | RHS = RHS.getOperand(0); |
46984 | |
46985 | |
46986 | if (LHS.getValueType() != VT || RHS.getValueType() != VT) |
46987 | return SDValue(); |
46988 | |
46989 | unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; |
46990 | return DAG.getNode(Opc, DL, VT, LHS, RHS); |
46991 | } |
46992 | |
46993 | |
46994 | |
46995 | |
46996 | |
46997 | |
46998 | |
46999 | |
47000 | |
47001 | static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, |
47002 | const X86Subtarget &Subtarget, |
47003 | const SDLoc &DL) { |
47004 | if (!VT.isVector() || !Subtarget.hasSSSE3()) |
47005 | return SDValue(); |
47006 | |
47007 | unsigned NumElems = VT.getVectorNumElements(); |
47008 | EVT ScalarVT = VT.getVectorElementType(); |
47009 | if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems)) |
47010 | return SDValue(); |
47011 | |
47012 | SDValue SSatVal = detectSSatPattern(In, VT); |
47013 | if (!SSatVal || SSatVal.getOpcode() != ISD::ADD) |
47014 | return SDValue(); |
47015 | |
47016 | |
47017 | |
47018 | SDValue N0 = SSatVal.getOperand(0); |
47019 | SDValue N1 = SSatVal.getOperand(1); |
47020 | |
47021 | if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) |
47022 | return SDValue(); |
47023 | |
47024 | SDValue N00 = N0.getOperand(0); |
47025 | SDValue N01 = N0.getOperand(1); |
47026 | SDValue N10 = N1.getOperand(0); |
47027 | SDValue N11 = N1.getOperand(1); |
47028 | |
47029 | |
47030 | |
47031 | if (N01.getOpcode() == ISD::ZERO_EXTEND) |
47032 | std::swap(N00, N01); |
47033 | if (N11.getOpcode() == ISD::ZERO_EXTEND) |
47034 | std::swap(N10, N11); |
47035 | |
47036 | |
47037 | if (N00.getOpcode() != ISD::ZERO_EXTEND || |
47038 | N01.getOpcode() != ISD::SIGN_EXTEND || |
47039 | N10.getOpcode() != ISD::ZERO_EXTEND || |
47040 | N11.getOpcode() != ISD::SIGN_EXTEND) |
47041 | return SDValue(); |
47042 | |
47043 | |
47044 | N00 = N00.getOperand(0); |
47045 | N01 = N01.getOperand(0); |
47046 | N10 = N10.getOperand(0); |
47047 | N11 = N11.getOperand(0); |
47048 | |
47049 | |
47050 | if (N00.getValueType().getVectorElementType() != MVT::i8 || |
47051 | N01.getValueType().getVectorElementType() != MVT::i8 || |
47052 | N10.getValueType().getVectorElementType() != MVT::i8 || |
47053 | N11.getValueType().getVectorElementType() != MVT::i8) |
47054 | return SDValue(); |
47055 | |
47056 | |
47057 | if (N00.getOpcode() != ISD::BUILD_VECTOR || |
47058 | N01.getOpcode() != ISD::BUILD_VECTOR || |
47059 | N10.getOpcode() != ISD::BUILD_VECTOR || |
47060 | N11.getOpcode() != ISD::BUILD_VECTOR) |
47061 | return SDValue(); |
47062 | |
47063 | |
47064 | |
47065 | |
47066 | |
47067 | |
47068 | |
47069 | |
47070 | |
47071 | SDValue ZExtIn, SExtIn; |
47072 | for (unsigned i = 0; i != NumElems; ++i) { |
47073 | SDValue N00Elt = N00.getOperand(i); |
47074 | SDValue N01Elt = N01.getOperand(i); |
47075 | SDValue N10Elt = N10.getOperand(i); |
47076 | SDValue N11Elt = N11.getOperand(i); |
47077 | |
47078 | if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
47079 | N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
47080 | N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
47081 | N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
47082 | return SDValue(); |
47083 | auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); |
47084 | auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); |
47085 | auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); |
47086 | auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); |
47087 | if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) |
47088 | return SDValue(); |
47089 | unsigned IdxN00 = ConstN00Elt->getZExtValue(); |
47090 | unsigned IdxN01 = ConstN01Elt->getZExtValue(); |
47091 | unsigned IdxN10 = ConstN10Elt->getZExtValue(); |
47092 | unsigned IdxN11 = ConstN11Elt->getZExtValue(); |
47093 | |
47094 | if (IdxN00 > IdxN10) { |
47095 | std::swap(IdxN00, IdxN10); |
47096 | std::swap(IdxN01, IdxN11); |
47097 | } |
47098 | |
47099 | if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || |
47100 | IdxN01 != 2 * i || IdxN11 != 2 * i + 1) |
47101 | return SDValue(); |
47102 | SDValue N00In = N00Elt.getOperand(0); |
47103 | SDValue N01In = N01Elt.getOperand(0); |
47104 | SDValue N10In = N10Elt.getOperand(0); |
47105 | SDValue N11In = N11Elt.getOperand(0); |
47106 | |
47107 | if (!ZExtIn) { |
47108 | ZExtIn = N00In; |
47109 | SExtIn = N01In; |
47110 | } |
47111 | if (ZExtIn != N00In || SExtIn != N01In || |
47112 | ZExtIn != N10In || SExtIn != N11In) |
47113 | return SDValue(); |
47114 | } |
47115 | |
47116 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
47117 | ArrayRef<SDValue> Ops) { |
47118 | |
47119 | |
47120 | EVT InVT = Ops[0].getValueType(); |
47121 | assert(InVT.getScalarType() == MVT::i8 && |
47122 | "Unexpected scalar element type"); |
47123 | assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); |
47124 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
47125 | InVT.getVectorNumElements() / 2); |
47126 | return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]); |
47127 | }; |
47128 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, |
47129 | PMADDBuilder); |
47130 | } |
47131 | |
47132 | static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, |
47133 | const X86Subtarget &Subtarget) { |
47134 | EVT VT = N->getValueType(0); |
47135 | SDValue Src = N->getOperand(0); |
47136 | SDLoc DL(N); |
47137 | |
47138 | |
47139 | if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) |
47140 | return V; |
47141 | |
47142 | |
47143 | if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) |
47144 | return Avg; |
47145 | |
47146 | |
47147 | if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) |
47148 | return PMAdd; |
47149 | |
47150 | |
47151 | if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) |
47152 | return Val; |
47153 | |
47154 | |
47155 | if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) |
47156 | return V; |
47157 | |
47158 | |
47159 | |
47160 | if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { |
47161 | SDValue BCSrc = Src.getOperand(0); |
47162 | if (BCSrc.getValueType() == MVT::x86mmx) |
47163 | return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); |
47164 | } |
47165 | |
47166 | |
47167 | if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) |
47168 | return V; |
47169 | |
47170 | return combineVectorTruncation(N, DAG, Subtarget); |
47171 | } |
47172 | |
47173 | static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, |
47174 | TargetLowering::DAGCombinerInfo &DCI) { |
47175 | EVT VT = N->getValueType(0); |
47176 | SDValue In = N->getOperand(0); |
47177 | SDLoc DL(N); |
47178 | |
47179 | if (auto SSatVal = detectSSatPattern(In, VT)) |
47180 | return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); |
47181 | if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) |
47182 | return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); |
47183 | |
47184 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47185 | APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits())); |
47186 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
47187 | return SDValue(N, 0); |
47188 | |
47189 | return SDValue(); |
47190 | } |
47191 | |
47192 | |
47193 | |
47194 | |
47195 | |
47196 | |
47197 | |
47198 | |
47199 | |
47200 | |
47201 | static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { |
47202 | if (N->getOpcode() == ISD::FNEG) |
47203 | return N->getOperand(0); |
47204 | |
47205 | |
47206 | if (Depth > SelectionDAG::MaxRecursionDepth) |
47207 | return SDValue(); |
47208 | |
47209 | unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); |
47210 | |
47211 | SDValue Op = peekThroughBitcasts(SDValue(N, 0)); |
47212 | EVT VT = Op->getValueType(0); |
47213 | |
47214 | |
47215 | if (VT.getScalarSizeInBits() != ScalarSize) |
47216 | return SDValue(); |
47217 | |
47218 | unsigned Opc = Op.getOpcode(); |
47219 | switch (Opc) { |
47220 | case ISD::VECTOR_SHUFFLE: { |
47221 | |
47222 | |
47223 | if (!Op.getOperand(1).isUndef()) |
47224 | return SDValue(); |
47225 | if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1)) |
47226 | if (NegOp0.getValueType() == VT) |
47227 | return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT), |
47228 | cast<ShuffleVectorSDNode>(Op)->getMask()); |
47229 | break; |
47230 | } |
47231 | case ISD::INSERT_VECTOR_ELT: { |
47232 | |
47233 | |
47234 | SDValue InsVector = Op.getOperand(0); |
47235 | SDValue InsVal = Op.getOperand(1); |
47236 | if (!InsVector.isUndef()) |
47237 | return SDValue(); |
47238 | if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1)) |
47239 | if (NegInsVal.getValueType() == VT.getVectorElementType()) |
47240 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, |
47241 | NegInsVal, Op.getOperand(2)); |
47242 | break; |
47243 | } |
47244 | case ISD::FSUB: |
47245 | case ISD::XOR: |
47246 | case X86ISD::FXOR: { |
47247 | SDValue Op1 = Op.getOperand(1); |
47248 | SDValue Op0 = Op.getOperand(0); |
47249 | |
47250 | |
47251 | |
47252 | |
47253 | |
47254 | if (Opc == ISD::FSUB) |
47255 | std::swap(Op0, Op1); |
47256 | |
47257 | APInt UndefElts; |
47258 | SmallVector<APInt, 16> EltBits; |
47259 | |
47260 | |
47261 | if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits, |
47262 | true, |
47263 | false)) { |
47264 | for (unsigned I = 0, E = EltBits.size(); I < E; I++) |
47265 | if (!UndefElts[I] && !EltBits[I].isSignMask()) |
47266 | return SDValue(); |
47267 | |
47268 | return peekThroughBitcasts(Op0); |
47269 | } |
47270 | } |
47271 | } |
47272 | |
47273 | return SDValue(); |
47274 | } |
47275 | |
47276 | static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc, |
47277 | bool NegRes) { |
47278 | if (NegMul) { |
47279 | switch (Opcode) { |
47280 | default: llvm_unreachable("Unexpected opcode"); |
47281 | case ISD::FMA: Opcode = X86ISD::FNMADD; break; |
47282 | case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break; |
47283 | case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; |
47284 | case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; |
47285 | case X86ISD::STRICT_FMSUB: Opcode = X86ISD::STRICT_FNMSUB; break; |
47286 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; |
47287 | case X86ISD::FNMADD: Opcode = ISD::FMA; break; |
47288 | case X86ISD::STRICT_FNMADD: Opcode = ISD::STRICT_FMA; break; |
47289 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; |
47290 | case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; |
47291 | case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break; |
47292 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; |
47293 | } |
47294 | } |
47295 | |
47296 | if (NegAcc) { |
47297 | switch (Opcode) { |
47298 | default: llvm_unreachable("Unexpected opcode"); |
47299 | case ISD::FMA: Opcode = X86ISD::FMSUB; break; |
47300 | case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break; |
47301 | case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; |
47302 | case X86ISD::FMSUB: Opcode = ISD::FMA; break; |
47303 | case X86ISD::STRICT_FMSUB: Opcode = ISD::STRICT_FMA; break; |
47304 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; |
47305 | case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; |
47306 | case X86ISD::STRICT_FNMADD: Opcode = X86ISD::STRICT_FNMSUB; break; |
47307 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; |
47308 | case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; |
47309 | case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FNMADD; break; |
47310 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; |
47311 | case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break; |
47312 | case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break; |
47313 | case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break; |
47314 | case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break; |
47315 | } |
47316 | } |
47317 | |
47318 | if (NegRes) { |
47319 | switch (Opcode) { |
47320 | |
47321 | default: llvm_unreachable("Unexpected opcode"); |
47322 | case ISD::FMA: Opcode = X86ISD::FNMSUB; break; |
47323 | case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; |
47324 | case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break; |
47325 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; |
47326 | case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break; |
47327 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break; |
47328 | case X86ISD::FNMSUB: Opcode = ISD::FMA; break; |
47329 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break; |
47330 | } |
47331 | } |
47332 | |
47333 | return Opcode; |
47334 | } |
47335 | |
47336 | |
47337 | static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, |
47338 | TargetLowering::DAGCombinerInfo &DCI, |
47339 | const X86Subtarget &Subtarget) { |
47340 | EVT OrigVT = N->getValueType(0); |
47341 | SDValue Arg = isFNEG(DAG, N); |
47342 | if (!Arg) |
47343 | return SDValue(); |
47344 | |
47345 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47346 | EVT VT = Arg.getValueType(); |
47347 | EVT SVT = VT.getScalarType(); |
47348 | SDLoc DL(N); |
47349 | |
47350 | |
47351 | if (!TLI.isTypeLegal(VT)) |
47352 | return SDValue(); |
47353 | |
47354 | |
47355 | |
47356 | |
47357 | if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && |
47358 | Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { |
47359 | SDValue Zero = DAG.getConstantFP(0.0, DL, VT); |
47360 | SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), |
47361 | Arg.getOperand(1), Zero); |
47362 | return DAG.getBitcast(OrigVT, NewNode); |
47363 | } |
47364 | |
47365 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
47366 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
47367 | if (SDValue NegArg = |
47368 | TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize)) |
47369 | return DAG.getBitcast(OrigVT, NegArg); |
47370 | |
47371 | return SDValue(); |
47372 | } |
47373 | |
47374 | SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
47375 | bool LegalOperations, |
47376 | bool ForCodeSize, |
47377 | NegatibleCost &Cost, |
47378 | unsigned Depth) const { |
47379 | |
47380 | if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) { |
47381 | Cost = NegatibleCost::Cheaper; |
47382 | return DAG.getBitcast(Op.getValueType(), Arg); |
47383 | } |
47384 | |
47385 | EVT VT = Op.getValueType(); |
47386 | EVT SVT = VT.getScalarType(); |
47387 | unsigned Opc = Op.getOpcode(); |
47388 | SDNodeFlags Flags = Op.getNode()->getFlags(); |
47389 | switch (Opc) { |
47390 | case ISD::FMA: |
47391 | case X86ISD::FMSUB: |
47392 | case X86ISD::FNMADD: |
47393 | case X86ISD::FNMSUB: |
47394 | case X86ISD::FMADD_RND: |
47395 | case X86ISD::FMSUB_RND: |
47396 | case X86ISD::FNMADD_RND: |
47397 | case X86ISD::FNMSUB_RND: { |
47398 | if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || |
47399 | !(SVT == MVT::f32 || SVT == MVT::f64) || |
47400 | !isOperationLegal(ISD::FMA, VT)) |
47401 | break; |
47402 | |
47403 | |
47404 | |
47405 | if (!Flags.hasNoSignedZeros()) |
47406 | break; |
47407 | |
47408 | |
47409 | |
47410 | SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue()); |
47411 | for (int i = 0; i != 3; ++i) |
47412 | NewOps[i] = getCheaperNegatedExpression( |
47413 | Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1); |
47414 | |
47415 | bool NegA = !!NewOps[0]; |
47416 | bool NegB = !!NewOps[1]; |
47417 | bool NegC = !!NewOps[2]; |
47418 | unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true); |
47419 | |
47420 | Cost = (NegA || NegB || NegC) ? NegatibleCost::Cheaper |
47421 | : NegatibleCost::Neutral; |
47422 | |
47423 | |
47424 | for (int i = 0, e = Op.getNumOperands(); i != e; ++i) |
47425 | if (!NewOps[i]) |
47426 | NewOps[i] = Op.getOperand(i); |
47427 | return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); |
47428 | } |
47429 | case X86ISD::FRCP: |
47430 | if (SDValue NegOp0 = |
47431 | getNegatedExpression(Op.getOperand(0), DAG, LegalOperations, |
47432 | ForCodeSize, Cost, Depth + 1)) |
47433 | return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0); |
47434 | break; |
47435 | } |
47436 | |
47437 | return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, |
47438 | ForCodeSize, Cost, Depth); |
47439 | } |
47440 | |
47441 | static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, |
47442 | const X86Subtarget &Subtarget) { |
47443 | MVT VT = N->getSimpleValueType(0); |
47444 | |
47445 | if (!VT.isVector() || !Subtarget.hasSSE2()) |
47446 | return SDValue(); |
47447 | |
47448 | SDLoc dl(N); |
47449 | |
47450 | unsigned IntBits = VT.getScalarSizeInBits(); |
47451 | MVT IntSVT = MVT::getIntegerVT(IntBits); |
47452 | MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits); |
47453 | |
47454 | SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); |
47455 | SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); |
47456 | unsigned IntOpcode; |
47457 | switch (N->getOpcode()) { |
47458 | default: llvm_unreachable("Unexpected FP logic op"); |
47459 | case X86ISD::FOR: IntOpcode = ISD::OR; break; |
47460 | case X86ISD::FXOR: IntOpcode = ISD::XOR; break; |
47461 | case X86ISD::FAND: IntOpcode = ISD::AND; break; |
47462 | case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; |
47463 | } |
47464 | SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); |
47465 | return DAG.getBitcast(VT, IntOp); |
47466 | } |
47467 | |
47468 | |
47469 | |
47470 | static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { |
47471 | if (N->getOpcode() != ISD::XOR) |
47472 | return SDValue(); |
47473 | |
47474 | SDValue LHS = N->getOperand(0); |
47475 | if (!isOneConstant(N->getOperand(1)) || LHS->getOpcode() != X86ISD::SETCC) |
47476 | return SDValue(); |
47477 | |
47478 | X86::CondCode NewCC = X86::GetOppositeBranchCondition( |
47479 | X86::CondCode(LHS->getConstantOperandVal(0))); |
47480 | SDLoc DL(N); |
47481 | return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); |
47482 | } |
47483 | |
47484 | static SDValue combineXor(SDNode *N, SelectionDAG &DAG, |
47485 | TargetLowering::DAGCombinerInfo &DCI, |
47486 | const X86Subtarget &Subtarget) { |
47487 | SDValue N0 = N->getOperand(0); |
47488 | SDValue N1 = N->getOperand(1); |
47489 | EVT VT = N->getValueType(0); |
47490 | |
47491 | |
47492 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
47493 | return DAG.getBitcast(MVT::v4i32, |
47494 | DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32, |
47495 | DAG.getBitcast(MVT::v4f32, N0), |
47496 | DAG.getBitcast(MVT::v4f32, N1))); |
47497 | } |
47498 | |
47499 | if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget)) |
47500 | return Cmp; |
47501 | |
47502 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
47503 | return R; |
47504 | |
47505 | if (DCI.isBeforeLegalizeOps()) |
47506 | return SDValue(); |
47507 | |
47508 | if (SDValue SetCC = foldXor1SetCC(N, DAG)) |
47509 | return SetCC; |
47510 | |
47511 | if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) |
47512 | return RV; |
47513 | |
47514 | |
47515 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47516 | if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST && |
47517 | N0.getOperand(0).getValueType().isVector() && |
47518 | N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
47519 | TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) { |
47520 | return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0), |
47521 | N0.getOperand(0).getValueType())); |
47522 | } |
47523 | |
47524 | |
47525 | |
47526 | if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() && |
47527 | VT.getVectorElementType() == MVT::i1 && |
47528 | N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() && |
47529 | TLI.isTypeLegal(N0.getOperand(1).getValueType())) { |
47530 | return DAG.getNode( |
47531 | ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), |
47532 | DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()), |
47533 | N0.getOperand(2)); |
47534 | } |
47535 | |
47536 | |
47537 | |
47538 | |
47539 | if ((N0.getOpcode() == ISD::TRUNCATE || N0.getOpcode() == ISD::ZERO_EXTEND) && |
47540 | N0.getOperand(0).getOpcode() == N->getOpcode()) { |
47541 | SDValue TruncExtSrc = N0.getOperand(0); |
47542 | auto *N1C = dyn_cast<ConstantSDNode>(N1); |
47543 | auto *N001C = dyn_cast<ConstantSDNode>(TruncExtSrc.getOperand(1)); |
47544 | if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) { |
47545 | SDLoc DL(N); |
47546 | SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); |
47547 | SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); |
47548 | return DAG.getNode(ISD::XOR, DL, VT, LHS, |
47549 | DAG.getNode(ISD::XOR, DL, VT, RHS, N1)); |
47550 | } |
47551 | } |
47552 | |
47553 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
47554 | return FPLogic; |
47555 | |
47556 | return combineFneg(N, DAG, DCI, Subtarget); |
47557 | } |
47558 | |
47559 | static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, |
47560 | TargetLowering::DAGCombinerInfo &DCI, |
47561 | const X86Subtarget &Subtarget) { |
47562 | EVT VT = N->getValueType(0); |
47563 | unsigned NumBits = VT.getSizeInBits(); |
47564 | |
47565 | |
47566 | |
47567 | |
47568 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47569 | APInt DemandedMask(APInt::getAllOnesValue(NumBits)); |
47570 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
47571 | return SDValue(N, 0); |
47572 | |
47573 | return SDValue(); |
47574 | } |
47575 | |
47576 | static bool isNullFPScalarOrVectorConst(SDValue V) { |
47577 | return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode()); |
47578 | } |
47579 | |
47580 | |
47581 | |
47582 | |
47583 | |
47584 | |
47585 | |
47586 | static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG, |
47587 | const X86Subtarget &Subtarget) { |
47588 | if (!isNullFPScalarOrVectorConst(V)) |
47589 | return SDValue(); |
47590 | |
47591 | if (V.getValueType().isVector()) |
47592 | return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V)); |
47593 | |
47594 | return V; |
47595 | } |
47596 | |
47597 | static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, |
47598 | const X86Subtarget &Subtarget) { |
47599 | SDValue N0 = N->getOperand(0); |
47600 | SDValue N1 = N->getOperand(1); |
47601 | EVT VT = N->getValueType(0); |
47602 | SDLoc DL(N); |
47603 | |
47604 | |
47605 | if (!((VT == MVT::f32 && Subtarget.hasSSE1()) || |
47606 | (VT == MVT::f64 && Subtarget.hasSSE2()) || |
47607 | (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2()))) |
47608 | return SDValue(); |
47609 | |
47610 | auto isAllOnesConstantFP = [](SDValue V) { |
47611 | if (V.getSimpleValueType().isVector()) |
47612 | return ISD::isBuildVectorAllOnes(V.getNode()); |
47613 | auto *C = dyn_cast<ConstantFPSDNode>(V); |
47614 | return C && C->getConstantFPValue()->isAllOnesValue(); |
47615 | }; |
47616 | |
47617 | |
47618 | if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1))) |
47619 | return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1); |
47620 | |
47621 | |
47622 | if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1))) |
47623 | return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0); |
47624 | |
47625 | return SDValue(); |
47626 | } |
47627 | |
47628 | |
47629 | static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG, |
47630 | const X86Subtarget &Subtarget) { |
47631 | |
47632 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget)) |
47633 | return V; |
47634 | |
47635 | |
47636 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) |
47637 | return V; |
47638 | |
47639 | if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget)) |
47640 | return V; |
47641 | |
47642 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
47643 | } |
47644 | |
47645 | |
47646 | static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG, |
47647 | const X86Subtarget &Subtarget) { |
47648 | |
47649 | if (isNullFPScalarOrVectorConst(N->getOperand(0))) |
47650 | return N->getOperand(1); |
47651 | |
47652 | |
47653 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) |
47654 | return V; |
47655 | |
47656 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
47657 | } |
47658 | |
47659 | |
47660 | static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, |
47661 | TargetLowering::DAGCombinerInfo &DCI, |
47662 | const X86Subtarget &Subtarget) { |
47663 | assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); |
47664 | |
47665 | |
47666 | if (isNullFPScalarOrVectorConst(N->getOperand(0))) |
47667 | return N->getOperand(1); |
47668 | |
47669 | |
47670 | if (isNullFPScalarOrVectorConst(N->getOperand(1))) |
47671 | return N->getOperand(0); |
47672 | |
47673 | if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget)) |
47674 | return NewVal; |
47675 | |
47676 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
47677 | } |
47678 | |
47679 | |
47680 | static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) { |
47681 | assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); |
47682 | |
47683 | |
47684 | if (!DAG.getTarget().Options.NoNaNsFPMath || |
47685 | !DAG.getTarget().Options.NoSignedZerosFPMath) |
47686 | return SDValue(); |
47687 | |
47688 | |
47689 | |
47690 | unsigned NewOp = 0; |
47691 | switch (N->getOpcode()) { |
47692 | default: llvm_unreachable("unknown opcode"); |
47693 | case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; |
47694 | case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; |
47695 | } |
47696 | |
47697 | return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), |
47698 | N->getOperand(0), N->getOperand(1)); |
47699 | } |
47700 | |
47701 | static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, |
47702 | const X86Subtarget &Subtarget) { |
47703 | if (Subtarget.useSoftFloat()) |
47704 | return SDValue(); |
47705 | |
47706 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47707 | |
47708 | EVT VT = N->getValueType(0); |
47709 | if (!((Subtarget.hasSSE1() && VT == MVT::f32) || |
47710 | (Subtarget.hasSSE2() && VT == MVT::f64) || |
47711 | (VT.isVector() && TLI.isTypeLegal(VT)))) |
47712 | return SDValue(); |
47713 | |
47714 | SDValue Op0 = N->getOperand(0); |
47715 | SDValue Op1 = N->getOperand(1); |
47716 | SDLoc DL(N); |
47717 | auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; |
47718 | |
47719 | |
47720 | |
47721 | if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) |
47722 | return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); |
47723 | |
47724 | |
47725 | |
47726 | if (DAG.isKnownNeverNaN(Op1)) |
47727 | return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); |
47728 | if (DAG.isKnownNeverNaN(Op0)) |
47729 | return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); |
47730 | |
47731 | |
47732 | |
47733 | if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) |
47734 | return SDValue(); |
47735 | |
47736 | EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
47737 | VT); |
47738 | |
47739 | |
47740 | |
47741 | |
47742 | |
47743 | |
47744 | |
47745 | |
47746 | |
47747 | |
47748 | |
47749 | |
47750 | |
47751 | |
47752 | |
47753 | |
47754 | |
47755 | |
47756 | |
47757 | |
47758 | SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); |
47759 | SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO); |
47760 | |
47761 | |
47762 | |
47763 | return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); |
47764 | } |
47765 | |
47766 | static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, |
47767 | TargetLowering::DAGCombinerInfo &DCI) { |
47768 | EVT VT = N->getValueType(0); |
47769 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47770 | |
47771 | APInt KnownUndef, KnownZero; |
47772 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
47773 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
47774 | KnownZero, DCI)) |
47775 | return SDValue(N, 0); |
47776 | |
47777 | |
47778 | SDValue In = N->getOperand(0); |
47779 | MVT InVT = In.getSimpleValueType(); |
47780 | if (VT.getVectorNumElements() < InVT.getVectorNumElements() && |
47781 | ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { |
47782 | assert(InVT.is128BitVector() && "Expected 128-bit input vector"); |
47783 | LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0)); |
47784 | unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); |
47785 | MVT MemVT = MVT::getIntegerVT(NumBits); |
47786 | MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); |
47787 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) { |
47788 | SDLoc dl(N); |
47789 | SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, |
47790 | DAG.getBitcast(InVT, VZLoad)); |
47791 | DCI.CombineTo(N, Convert); |
47792 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
47793 | DCI.recursivelyDeleteUnusedNodes(LN); |
47794 | return SDValue(N, 0); |
47795 | } |
47796 | } |
47797 | |
47798 | return SDValue(); |
47799 | } |
47800 | |
47801 | static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, |
47802 | TargetLowering::DAGCombinerInfo &DCI) { |
47803 | bool IsStrict = N->isTargetStrictFPOpcode(); |
47804 | EVT VT = N->getValueType(0); |
47805 | |
47806 | |
47807 | SDValue In = N->getOperand(IsStrict ? 1 : 0); |
47808 | MVT InVT = In.getSimpleValueType(); |
47809 | if (VT.getVectorNumElements() < InVT.getVectorNumElements() && |
47810 | ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { |
47811 | assert(InVT.is128BitVector() && "Expected 128-bit input vector"); |
47812 | LoadSDNode *LN = cast<LoadSDNode>(In); |
47813 | unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); |
47814 | MVT MemVT = MVT::getFloatingPointVT(NumBits); |
47815 | MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); |
47816 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) { |
47817 | SDLoc dl(N); |
47818 | if (IsStrict) { |
47819 | SDValue Convert = |
47820 | DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other}, |
47821 | {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)}); |
47822 | DCI.CombineTo(N, Convert, Convert.getValue(1)); |
47823 | } else { |
47824 | SDValue Convert = |
47825 | DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad)); |
47826 | DCI.CombineTo(N, Convert); |
47827 | } |
47828 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
47829 | DCI.recursivelyDeleteUnusedNodes(LN); |
47830 | return SDValue(N, 0); |
47831 | } |
47832 | } |
47833 | |
47834 | return SDValue(); |
47835 | } |
47836 | |
47837 | |
47838 | static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, |
47839 | TargetLowering::DAGCombinerInfo &DCI, |
47840 | const X86Subtarget &Subtarget) { |
47841 | MVT VT = N->getSimpleValueType(0); |
47842 | |
47843 | |
47844 | if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) |
47845 | return N->getOperand(1); |
47846 | |
47847 | |
47848 | if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) |
47849 | return DAG.getConstant(0, SDLoc(N), VT); |
47850 | |
47851 | |
47852 | if (SDValue Not = IsNOT(N->getOperand(0), DAG)) |
47853 | return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), |
47854 | N->getOperand(1)); |
47855 | |
47856 | |
47857 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
47858 | SDValue Op(N, 0); |
47859 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
47860 | return Res; |
47861 | } |
47862 | |
47863 | return SDValue(); |
47864 | } |
47865 | |
47866 | static SDValue combineBT(SDNode *N, SelectionDAG &DAG, |
47867 | TargetLowering::DAGCombinerInfo &DCI) { |
47868 | SDValue N1 = N->getOperand(1); |
47869 | |
47870 | |
47871 | unsigned BitWidth = N1.getValueSizeInBits(); |
47872 | APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); |
47873 | if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) { |
47874 | if (N->getOpcode() != ISD::DELETED_NODE) |
47875 | DCI.AddToWorklist(N); |
47876 | return SDValue(N, 0); |
47877 | } |
47878 | |
47879 | return SDValue(); |
47880 | } |
47881 | |
47882 | static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, |
47883 | TargetLowering::DAGCombinerInfo &DCI) { |
47884 | bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS; |
47885 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
47886 | |
47887 | if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) { |
47888 | APInt KnownUndef, KnownZero; |
47889 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
47890 | APInt DemandedElts = APInt::getLowBitsSet(8, 4); |
47891 | if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, |
47892 | DCI)) { |
47893 | if (N->getOpcode() != ISD::DELETED_NODE) |
47894 | DCI.AddToWorklist(N); |
47895 | return SDValue(N, 0); |
47896 | } |
47897 | |
47898 | |
47899 | if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { |
47900 | LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0)); |
47901 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::i64, MVT::v2i64, DAG)) { |
47902 | SDLoc dl(N); |
47903 | if (IsStrict) { |
47904 | SDValue Convert = DAG.getNode( |
47905 | N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, |
47906 | {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)}); |
47907 | DCI.CombineTo(N, Convert, Convert.getValue(1)); |
47908 | } else { |
47909 | SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32, |
47910 | DAG.getBitcast(MVT::v8i16, VZLoad)); |
47911 | DCI.CombineTo(N, Convert); |
47912 | } |
47913 | |
47914 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
47915 | DCI.recursivelyDeleteUnusedNodes(LN); |
47916 | return SDValue(N, 0); |
47917 | } |
47918 | } |
47919 | } |
47920 | |
47921 | return SDValue(); |
47922 | } |
47923 | |
47924 | |
47925 | static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) { |
47926 | assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
47927 | |
47928 | EVT DstVT = N->getValueType(0); |
47929 | |
47930 | SDValue N0 = N->getOperand(0); |
47931 | SDValue N1 = N->getOperand(1); |
47932 | EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); |
47933 | |
47934 | if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16) |
47935 | return SDValue(); |
47936 | |
47937 | |
47938 | SDValue IntermediateBitwidthOp; |
47939 | if ((N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::TRUNCATE) && |
47940 | N0.hasOneUse()) { |
47941 | IntermediateBitwidthOp = N0; |
47942 | N0 = N0.getOperand(0); |
47943 | } |
47944 | |
47945 | |
47946 | if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse()) |
47947 | return SDValue(); |
47948 | |
47949 | SDValue CMovOp0 = N0.getOperand(0); |
47950 | SDValue CMovOp1 = N0.getOperand(1); |
47951 | |
47952 | |
47953 | if (!isa<ConstantSDNode>(CMovOp0.getNode()) || |
47954 | !isa<ConstantSDNode>(CMovOp1.getNode())) |
47955 | return SDValue(); |
47956 | |
47957 | SDLoc DL(N); |
47958 | |
47959 | |
47960 | if (IntermediateBitwidthOp) { |
47961 | unsigned IntermediateOpc = IntermediateBitwidthOp.getOpcode(); |
47962 | CMovOp0 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp0); |
47963 | CMovOp1 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp1); |
47964 | } |
47965 | |
47966 | CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp0, N1); |
47967 | CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp1, N1); |
47968 | |
47969 | EVT CMovVT = DstVT; |
47970 | |
47971 | if (DstVT == MVT::i16) { |
47972 | CMovVT = MVT::i32; |
47973 | CMovOp0 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp0); |
47974 | CMovOp1 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp1); |
47975 | } |
47976 | |
47977 | SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1, |
47978 | N0.getOperand(2), N0.getOperand(3)); |
47979 | |
47980 | if (CMovVT != DstVT) |
47981 | CMov = DAG.getNode(ISD::TRUNCATE, DL, DstVT, CMov); |
47982 | |
47983 | return CMov; |
47984 | } |
47985 | |
47986 | static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, |
47987 | const X86Subtarget &Subtarget) { |
47988 | assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
47989 | |
47990 | if (SDValue V = combineSextInRegCmov(N, DAG)) |
47991 | return V; |
47992 | |
47993 | EVT VT = N->getValueType(0); |
47994 | SDValue N0 = N->getOperand(0); |
47995 | SDValue N1 = N->getOperand(1); |
47996 | EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); |
47997 | SDLoc dl(N); |
47998 | |
47999 | |
48000 | |
48001 | |
48002 | |
48003 | |
48004 | if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || |
48005 | N0.getOpcode() == ISD::SIGN_EXTEND)) { |
48006 | SDValue N00 = N0.getOperand(0); |
48007 | |
48008 | |
48009 | |
48010 | if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256()) |
48011 | if (!ISD::isNormalLoad(N00.getNode())) |
48012 | return SDValue(); |
48013 | |
48014 | |
48015 | |
48016 | if (SDValue Promote = PromoteMaskArithmetic(N0.getNode(), DAG, Subtarget)) |
48017 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1); |
48018 | |
48019 | if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { |
48020 | SDValue Tmp = |
48021 | DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); |
48022 | return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); |
48023 | } |
48024 | } |
48025 | return SDValue(); |
48026 | } |
48027 | |
48028 | |
48029 | |
48030 | |
48031 | |
48032 | |
48033 | static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, |
48034 | const X86Subtarget &Subtarget) { |
48035 | if (Ext->getOpcode() != ISD::SIGN_EXTEND && |
48036 | Ext->getOpcode() != ISD::ZERO_EXTEND) |
48037 | return SDValue(); |
48038 | |
48039 | |
48040 | EVT VT = Ext->getValueType(0); |
48041 | if (VT != MVT::i64) |
48042 | return SDValue(); |
48043 | |
48044 | SDValue Add = Ext->getOperand(0); |
48045 | if (Add.getOpcode() != ISD::ADD) |
48046 | return SDValue(); |
48047 | |
48048 | bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; |
48049 | bool NSW = Add->getFlags().hasNoSignedWrap(); |
48050 | bool NUW = Add->getFlags().hasNoUnsignedWrap(); |
48051 | |
48052 | |
48053 | |
48054 | if ((Sext && !NSW) || (!Sext && !NUW)) |
48055 | return SDValue(); |
48056 | |
48057 | |
48058 | |
48059 | |
48060 | auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1)); |
48061 | if (!AddOp1) |
48062 | return SDValue(); |
48063 | |
48064 | |
48065 | |
48066 | |
48067 | |
48068 | |
48069 | bool HasLEAPotential = false; |
48070 | for (auto *User : Ext->uses()) { |
48071 | if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { |
48072 | HasLEAPotential = true; |
48073 | break; |
48074 | } |
48075 | } |
48076 | if (!HasLEAPotential) |
48077 | return SDValue(); |
48078 | |
48079 | |
48080 | int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue(); |
48081 | SDValue AddOp0 = Add.getOperand(0); |
48082 | SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0); |
48083 | SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT); |
48084 | |
48085 | |
48086 | |
48087 | SDNodeFlags Flags; |
48088 | Flags.setNoSignedWrap(NSW); |
48089 | Flags.setNoUnsignedWrap(NUW); |
48090 | return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); |
48091 | } |
48092 | |
48093 | |
48094 | |
48095 | |
48096 | |
48097 | |
48098 | |
48099 | |
48100 | |
48101 | |
48102 | |
48103 | |
48104 | |
48105 | static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { |
48106 | SDValue CMovN = Extend->getOperand(0); |
48107 | if (CMovN.getOpcode() != X86ISD::CMOV || !CMovN.hasOneUse()) |
48108 | return SDValue(); |
48109 | |
48110 | EVT TargetVT = Extend->getValueType(0); |
48111 | unsigned ExtendOpcode = Extend->getOpcode(); |
48112 | SDLoc DL(Extend); |
48113 | |
48114 | EVT VT = CMovN.getValueType(); |
48115 | SDValue CMovOp0 = CMovN.getOperand(0); |
48116 | SDValue CMovOp1 = CMovN.getOperand(1); |
48117 | |
48118 | if (!isa<ConstantSDNode>(CMovOp0.getNode()) || |
48119 | !isa<ConstantSDNode>(CMovOp1.getNode())) |
48120 | return SDValue(); |
48121 | |
48122 | |
48123 | if (TargetVT != MVT::i32 && TargetVT != MVT::i64) |
48124 | return SDValue(); |
48125 | |
48126 | |
48127 | |
48128 | if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32)) |
48129 | return SDValue(); |
48130 | |
48131 | |
48132 | |
48133 | EVT ExtendVT = TargetVT; |
48134 | if (TargetVT == MVT::i64 && ExtendOpcode != ISD::SIGN_EXTEND) |
48135 | ExtendVT = MVT::i32; |
48136 | |
48137 | CMovOp0 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp0); |
48138 | CMovOp1 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp1); |
48139 | |
48140 | SDValue Res = DAG.getNode(X86ISD::CMOV, DL, ExtendVT, CMovOp0, CMovOp1, |
48141 | CMovN.getOperand(2), CMovN.getOperand(3)); |
48142 | |
48143 | |
48144 | if (ExtendVT != TargetVT) |
48145 | Res = DAG.getNode(ExtendOpcode, DL, TargetVT, Res); |
48146 | |
48147 | return Res; |
48148 | } |
48149 | |
48150 | |
48151 | |
48152 | static SDValue |
48153 | combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, |
48154 | TargetLowering::DAGCombinerInfo &DCI, |
48155 | const X86Subtarget &Subtarget) { |
48156 | unsigned Opcode = N->getOpcode(); |
48157 | if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && |
48158 | Opcode != ISD::ANY_EXTEND) |
48159 | return SDValue(); |
48160 | if (!DCI.isBeforeLegalizeOps()) |
48161 | return SDValue(); |
48162 | if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) |
48163 | return SDValue(); |
48164 | |
48165 | SDValue N0 = N->getOperand(0); |
48166 | EVT VT = N->getValueType(0); |
48167 | EVT SVT = VT.getScalarType(); |
48168 | EVT InSVT = N0.getValueType().getScalarType(); |
48169 | unsigned EltSizeInBits = SVT.getSizeInBits(); |
48170 | |
48171 | |
48172 | |
48173 | if (!VT.isVector()) |
48174 | return SDValue(); |
48175 | if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) |
48176 | return SDValue(); |
48177 | if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) |
48178 | return SDValue(); |
48179 | |
48180 | SDValue N00 = N0.getOperand(0); |
48181 | EVT SclVT = N0.getOperand(0).getValueType(); |
48182 | if (!SclVT.isScalarInteger()) |
48183 | return SDValue(); |
48184 | |
48185 | SDLoc DL(N); |
48186 | SDValue Vec; |
48187 | SmallVector<int, 32> ShuffleMask; |
48188 | unsigned NumElts = VT.getVectorNumElements(); |
48189 | assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); |
48190 | |
48191 | |
48192 | if (NumElts > EltSizeInBits) { |
48193 | |
48194 | |
48195 | |
48196 | |
48197 | assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); |
48198 | unsigned Scale = NumElts / EltSizeInBits; |
48199 | EVT BroadcastVT = |
48200 | EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); |
48201 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); |
48202 | Vec = DAG.getBitcast(VT, Vec); |
48203 | |
48204 | for (unsigned i = 0; i != Scale; ++i) |
48205 | ShuffleMask.append(EltSizeInBits, i); |
48206 | Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); |
48207 | } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits && |
48208 | (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) { |
48209 | |
48210 | |
48211 | |
48212 | |
48213 | assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale"); |
48214 | unsigned Scale = EltSizeInBits / NumElts; |
48215 | EVT BroadcastVT = |
48216 | EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale); |
48217 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); |
48218 | ShuffleMask.append(NumElts * Scale, 0); |
48219 | Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask); |
48220 | Vec = DAG.getBitcast(VT, Vec); |
48221 | } else { |
48222 | |
48223 | |
48224 | |
48225 | SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); |
48226 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); |
48227 | ShuffleMask.append(NumElts, 0); |
48228 | Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); |
48229 | } |
48230 | |
48231 | |
48232 | SmallVector<SDValue, 32> Bits; |
48233 | for (unsigned i = 0; i != NumElts; ++i) { |
48234 | int BitIdx = (i % EltSizeInBits); |
48235 | APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); |
48236 | Bits.push_back(DAG.getConstant(Bit, DL, SVT)); |
48237 | } |
48238 | SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); |
48239 | Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); |
48240 | |
48241 | |
48242 | EVT CCVT = VT.changeVectorElementType(MVT::i1); |
48243 | Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); |
48244 | Vec = DAG.getSExtOrTrunc(Vec, DL, VT); |
48245 | |
48246 | |
48247 | |
48248 | if (Opcode == ISD::SIGN_EXTEND) |
48249 | return Vec; |
48250 | return DAG.getNode(ISD::SRL, DL, VT, Vec, |
48251 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
48252 | } |
48253 | |
48254 | |
48255 | |
48256 | static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, |
48257 | const X86Subtarget &Subtarget) { |
48258 | SDValue N0 = N->getOperand(0); |
48259 | EVT VT = N->getValueType(0); |
48260 | SDLoc dl(N); |
48261 | |
48262 | |
48263 | if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC) |
48264 | return SDValue(); |
48265 | |
48266 | |
48267 | EVT SVT = VT.getVectorElementType(); |
48268 | if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && |
48269 | SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) |
48270 | return SDValue(); |
48271 | |
48272 | |
48273 | unsigned Size = VT.getSizeInBits(); |
48274 | if (Size > 256 && Subtarget.useAVX512Regs()) |
48275 | return SDValue(); |
48276 | |
48277 | |
48278 | |
48279 | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
48280 | if (ISD::isUnsignedIntSetCC(CC)) |
48281 | return SDValue(); |
48282 | |
48283 | |
48284 | EVT N00VT = N0.getOperand(0).getValueType(); |
48285 | EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); |
48286 | if (Size != MatchingVecType.getSizeInBits()) |
48287 | return SDValue(); |
48288 | |
48289 | SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); |
48290 | |
48291 | if (N->getOpcode() == ISD::ZERO_EXTEND) |
48292 | Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType()); |
48293 | |
48294 | return Res; |
48295 | } |
48296 | |
48297 | static SDValue combineSext(SDNode *N, SelectionDAG &DAG, |
48298 | TargetLowering::DAGCombinerInfo &DCI, |
48299 | const X86Subtarget &Subtarget) { |
48300 | SDValue N0 = N->getOperand(0); |
48301 | EVT VT = N->getValueType(0); |
48302 | SDLoc DL(N); |
48303 | |
48304 | |
48305 | if (!DCI.isBeforeLegalizeOps() && |
48306 | N0.getOpcode() == X86ISD::SETCC_CARRY) { |
48307 | SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0), |
48308 | N0->getOperand(1)); |
48309 | bool ReplaceOtherUses = !N0.hasOneUse(); |
48310 | DCI.CombineTo(N, Setcc); |
48311 | |
48312 | if (ReplaceOtherUses) { |
48313 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
48314 | N0.getValueType(), Setcc); |
48315 | DCI.CombineTo(N0.getNode(), Trunc); |
48316 | } |
48317 | |
48318 | return SDValue(N, 0); |
48319 | } |
48320 | |
48321 | if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) |
48322 | return NewCMov; |
48323 | |
48324 | if (!DCI.isBeforeLegalizeOps()) |
48325 | return SDValue(); |
48326 | |
48327 | if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) |
48328 | return V; |
48329 | |
48330 | if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) |
48331 | return V; |
48332 | |
48333 | if (VT.isVector()) { |
48334 | if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) |
48335 | return R; |
48336 | |
48337 | if (N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) |
48338 | return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); |
48339 | } |
48340 | |
48341 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) |
48342 | return NewAdd; |
48343 | |
48344 | return SDValue(); |
48345 | } |
48346 | |
48347 | static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, |
48348 | TargetLowering::DAGCombinerInfo &DCI, |
48349 | const X86Subtarget &Subtarget) { |
48350 | SDLoc dl(N); |
48351 | EVT VT = N->getValueType(0); |
48352 | bool IsStrict = N->isStrictFPOpcode() || N->isTargetStrictFPOpcode(); |
48353 | |
48354 | |
48355 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48356 | if (!TLI.isTypeLegal(VT)) |
48357 | return SDValue(); |
48358 | |
48359 | SDValue A = N->getOperand(IsStrict ? 1 : 0); |
48360 | SDValue B = N->getOperand(IsStrict ? 2 : 1); |
48361 | SDValue C = N->getOperand(IsStrict ? 3 : 2); |
48362 | |
48363 | |
48364 | |
48365 | SDNodeFlags Flags = N->getFlags(); |
48366 | if (!IsStrict && Flags.hasAllowReassociation() && |
48367 | TLI.isOperationExpand(ISD::FMA, VT)) { |
48368 | SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags); |
48369 | return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags); |
48370 | } |
48371 | |
48372 | EVT ScalarVT = VT.getScalarType(); |
48373 | if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) |
48374 | return SDValue(); |
48375 | |
48376 | auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { |
48377 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
48378 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
48379 | if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations, |
48380 | CodeSize)) { |
48381 | V = NegV; |
48382 | return true; |
48383 | } |
48384 | |
48385 | |
48386 | if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
48387 | isNullConstant(V.getOperand(1))) { |
48388 | SDValue Vec = V.getOperand(0); |
48389 | if (SDValue NegV = TLI.getCheaperNegatedExpression( |
48390 | Vec, DAG, LegalOperations, CodeSize)) { |
48391 | V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), |
48392 | NegV, V.getOperand(1)); |
48393 | return true; |
48394 | } |
48395 | } |
48396 | |
48397 | return false; |
48398 | }; |
48399 | |
48400 | |
48401 | |
48402 | bool NegA = invertIfNegative(A); |
48403 | bool NegB = invertIfNegative(B); |
48404 | bool NegC = invertIfNegative(C); |
48405 | |
48406 | if (!NegA && !NegB && !NegC) |
48407 | return SDValue(); |
48408 | |
48409 | unsigned NewOpcode = |
48410 | negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false); |
48411 | |
48412 | |
48413 | SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); |
48414 | if (IsStrict) { |
48415 | assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4"); |
48416 | return DAG.getNode(NewOpcode, dl, {VT, MVT::Other}, |
48417 | {N->getOperand(0), A, B, C}); |
48418 | } else { |
48419 | if (N->getNumOperands() == 4) |
48420 | return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); |
48421 | return DAG.getNode(NewOpcode, dl, VT, A, B, C); |
48422 | } |
48423 | } |
48424 | |
48425 | |
48426 | |
48427 | static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, |
48428 | TargetLowering::DAGCombinerInfo &DCI) { |
48429 | SDLoc dl(N); |
48430 | EVT VT = N->getValueType(0); |
48431 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48432 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
48433 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
48434 | |
48435 | SDValue N2 = N->getOperand(2); |
48436 | |
48437 | SDValue NegN2 = |
48438 | TLI.getCheaperNegatedExpression(N2, DAG, LegalOperations, CodeSize); |
48439 | if (!NegN2) |
48440 | return SDValue(); |
48441 | unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); |
48442 | |
48443 | if (N->getNumOperands() == 4) |
48444 | return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), |
48445 | NegN2, N->getOperand(3)); |
48446 | return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), |
48447 | NegN2); |
48448 | } |
48449 | |
48450 | static SDValue combineZext(SDNode *N, SelectionDAG &DAG, |
48451 | TargetLowering::DAGCombinerInfo &DCI, |
48452 | const X86Subtarget &Subtarget) { |
48453 | SDLoc dl(N); |
48454 | SDValue N0 = N->getOperand(0); |
48455 | EVT VT = N->getValueType(0); |
48456 | |
48457 | |
48458 | |
48459 | if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND && |
48460 | N0.getOpcode() == X86ISD::SETCC_CARRY) { |
48461 | SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0), |
48462 | N0->getOperand(1)); |
48463 | bool ReplaceOtherUses = !N0.hasOneUse(); |
48464 | DCI.CombineTo(N, Setcc); |
48465 | |
48466 | if (ReplaceOtherUses) { |
48467 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
48468 | N0.getValueType(), Setcc); |
48469 | DCI.CombineTo(N0.getNode(), Trunc); |
48470 | } |
48471 | |
48472 | return SDValue(N, 0); |
48473 | } |
48474 | |
48475 | if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) |
48476 | return NewCMov; |
48477 | |
48478 | if (DCI.isBeforeLegalizeOps()) |
48479 | if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) |
48480 | return V; |
48481 | |
48482 | if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) |
48483 | return V; |
48484 | |
48485 | if (VT.isVector()) |
48486 | if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) |
48487 | return R; |
48488 | |
48489 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) |
48490 | return NewAdd; |
48491 | |
48492 | if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) |
48493 | return R; |
48494 | |
48495 | |
48496 | if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 && |
48497 | VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { |
48498 | SDValue N00 = N0.getOperand(0); |
48499 | SDValue N01 = N0.getOperand(1); |
48500 | unsigned NumSrcEltBits = N00.getScalarValueSizeInBits(); |
48501 | APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2); |
48502 | if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) && |
48503 | (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) { |
48504 | return concatSubVectors(N00, N01, DAG, dl); |
48505 | } |
48506 | } |
48507 | |
48508 | return SDValue(); |
48509 | } |
48510 | |
48511 | |
48512 | |
48513 | static bool isOrXorXorTree(SDValue X, bool Root = true) { |
48514 | if (X.getOpcode() == ISD::OR) |
48515 | return isOrXorXorTree(X.getOperand(0), false) && |
48516 | isOrXorXorTree(X.getOperand(1), false); |
48517 | if (Root) |
48518 | return false; |
48519 | return X.getOpcode() == ISD::XOR; |
48520 | } |
48521 | |
48522 | |
48523 | |
48524 | template<typename F> |
48525 | static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, |
48526 | EVT VecVT, EVT CmpVT, bool HasPT, F SToV) { |
48527 | SDValue Op0 = X.getOperand(0); |
48528 | SDValue Op1 = X.getOperand(1); |
48529 | if (X.getOpcode() == ISD::OR) { |
48530 | SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV); |
48531 | SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV); |
48532 | if (VecVT != CmpVT) |
48533 | return DAG.getNode(ISD::OR, DL, CmpVT, A, B); |
48534 | if (HasPT) |
48535 | return DAG.getNode(ISD::OR, DL, VecVT, A, B); |
48536 | return DAG.getNode(ISD::AND, DL, CmpVT, A, B); |
48537 | } else if (X.getOpcode() == ISD::XOR) { |
48538 | SDValue A = SToV(Op0); |
48539 | SDValue B = SToV(Op1); |
48540 | if (VecVT != CmpVT) |
48541 | return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); |
48542 | if (HasPT) |
48543 | return DAG.getNode(ISD::XOR, DL, VecVT, A, B); |
48544 | return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); |
48545 | } |
48546 | llvm_unreachable("Impossible"); |
48547 | } |
48548 | |
48549 | |
48550 | |
48551 | static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, |
48552 | const X86Subtarget &Subtarget) { |
48553 | ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); |
48554 | assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); |
48555 | |
48556 | |
48557 | SDValue X = SetCC->getOperand(0); |
48558 | SDValue Y = SetCC->getOperand(1); |
48559 | EVT OpVT = X.getValueType(); |
48560 | unsigned OpSize = OpVT.getSizeInBits(); |
48561 | if (!OpVT.isScalarInteger() || OpSize < 128) |
48562 | return SDValue(); |
48563 | |
48564 | |
48565 | |
48566 | |
48567 | |
48568 | |
48569 | bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); |
48570 | if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) |
48571 | return SDValue(); |
48572 | |
48573 | |
48574 | auto IsVectorBitCastCheap = [](SDValue X) { |
48575 | X = peekThroughBitcasts(X); |
48576 | return isa<ConstantSDNode>(X) || X.getValueType().isVector() || |
48577 | X.getOpcode() == ISD::LOAD; |
48578 | }; |
48579 | if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) && |
48580 | !IsOrXorXorTreeCCZero) |
48581 | return SDValue(); |
48582 | |
48583 | EVT VT = SetCC->getValueType(0); |
48584 | SDLoc DL(SetCC); |
48585 | |
48586 | |
48587 | |
48588 | |
48589 | if ((OpSize == 128 && Subtarget.hasSSE2()) || |
48590 | (OpSize == 256 && Subtarget.hasAVX()) || |
48591 | (OpSize == 512 && Subtarget.useAVX512Regs())) { |
48592 | bool HasPT = Subtarget.hasSSE41(); |
48593 | |
48594 | |
48595 | |
48596 | |
48597 | bool PreferKOT = Subtarget.preferMaskRegisters(); |
48598 | bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512; |
48599 | |
48600 | EVT VecVT = MVT::v16i8; |
48601 | EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT; |
48602 | if (OpSize == 256) { |
48603 | VecVT = MVT::v32i8; |
48604 | CmpVT = PreferKOT ? MVT::v32i1 : VecVT; |
48605 | } |
48606 | EVT CastVT = VecVT; |
48607 | bool NeedsAVX512FCast = false; |
48608 | if (OpSize == 512 || NeedZExt) { |
48609 | if (Subtarget.hasBWI()) { |
48610 | VecVT = MVT::v64i8; |
48611 | CmpVT = MVT::v64i1; |
48612 | if (OpSize == 512) |
48613 | CastVT = VecVT; |
48614 | } else { |
48615 | VecVT = MVT::v16i32; |
48616 | CmpVT = MVT::v16i1; |
48617 | CastVT = OpSize == 512 ? VecVT : |
48618 | OpSize == 256 ? MVT::v8i32 : MVT::v4i32; |
48619 | NeedsAVX512FCast = true; |
48620 | } |
48621 | } |
48622 | |
48623 | auto ScalarToVector = [&](SDValue X) -> SDValue { |
48624 | bool TmpZext = false; |
48625 | EVT TmpCastVT = CastVT; |
48626 | if (X.getOpcode() == ISD::ZERO_EXTEND) { |
48627 | SDValue OrigX = X.getOperand(0); |
48628 | unsigned OrigSize = OrigX.getScalarValueSizeInBits(); |
48629 | if (OrigSize < OpSize) { |
48630 | if (OrigSize == 128) { |
48631 | TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8; |
48632 | X = OrigX; |
48633 | TmpZext = true; |
48634 | } else if (OrigSize == 256) { |
48635 | TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8; |
48636 | X = OrigX; |
48637 | TmpZext = true; |
48638 | } |
48639 | } |
48640 | } |
48641 | X = DAG.getBitcast(TmpCastVT, X); |
48642 | if (!NeedZExt && !TmpZext) |
48643 | return X; |
48644 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, |
48645 | DAG.getConstant(0, DL, VecVT), X, |
48646 | DAG.getVectorIdxConstant(0, DL)); |
48647 | }; |
48648 | |
48649 | SDValue Cmp; |
48650 | if (IsOrXorXorTreeCCZero) { |
48651 | |
48652 | |
48653 | |
48654 | |
48655 | Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector); |
48656 | } else { |
48657 | SDValue VecX = ScalarToVector(X); |
48658 | SDValue VecY = ScalarToVector(Y); |
48659 | if (VecVT != CmpVT) { |
48660 | Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE); |
48661 | } else if (HasPT) { |
48662 | Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY); |
48663 | } else { |
48664 | Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); |
48665 | } |
48666 | } |
48667 | |
48668 | if (VecVT != CmpVT) { |
48669 | EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : |
48670 | CmpVT == MVT::v32i1 ? MVT::i32 : MVT::i16; |
48671 | return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), |
48672 | DAG.getConstant(0, DL, KRegVT), CC); |
48673 | } |
48674 | if (HasPT) { |
48675 | SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, |
48676 | Cmp); |
48677 | SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp); |
48678 | X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; |
48679 | SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG); |
48680 | return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0)); |
48681 | } |
48682 | |
48683 | |
48684 | |
48685 | assert(Cmp.getValueType() == MVT::v16i8 && |
48686 | "Non 128-bit vector on pre-SSE41 target"); |
48687 | SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); |
48688 | SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32); |
48689 | return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); |
48690 | } |
48691 | |
48692 | return SDValue(); |
48693 | } |
48694 | |
48695 | static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, |
48696 | TargetLowering::DAGCombinerInfo &DCI, |
48697 | const X86Subtarget &Subtarget) { |
48698 | const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
48699 | const SDValue LHS = N->getOperand(0); |
48700 | const SDValue RHS = N->getOperand(1); |
48701 | EVT VT = N->getValueType(0); |
48702 | EVT OpVT = LHS.getValueType(); |
48703 | SDLoc DL(N); |
48704 | |
48705 | if (CC == ISD::SETNE || CC == ISD::SETEQ) { |
48706 | if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) |
48707 | return V; |
48708 | |
48709 | if (VT == MVT::i1 && isNullConstant(RHS)) { |
48710 | SDValue X86CC; |
48711 | if (SDValue V = |
48712 | MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC)) |
48713 | return DAG.getNode(ISD::TRUNCATE, DL, VT, |
48714 | DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V)); |
48715 | } |
48716 | |
48717 | if (OpVT.isScalarInteger()) { |
48718 | |
48719 | |
48720 | auto MatchOrCmpEq = [&](SDValue N0, SDValue N1) { |
48721 | if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) { |
48722 | if (N0.getOperand(0) == N1) |
48723 | return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), |
48724 | N0.getOperand(1)); |
48725 | if (N0.getOperand(1) == N1) |
48726 | return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), |
48727 | N0.getOperand(0)); |
48728 | } |
48729 | return SDValue(); |
48730 | }; |
48731 | if (SDValue AndN = MatchOrCmpEq(LHS, RHS)) |
48732 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
48733 | if (SDValue AndN = MatchOrCmpEq(RHS, LHS)) |
48734 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
48735 | |
48736 | |
48737 | |
48738 | auto MatchAndCmpEq = [&](SDValue N0, SDValue N1) { |
48739 | if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) { |
48740 | if (N0.getOperand(0) == N1) |
48741 | return DAG.getNode(ISD::AND, DL, OpVT, N1, |
48742 | DAG.getNOT(DL, N0.getOperand(1), OpVT)); |
48743 | if (N0.getOperand(1) == N1) |
48744 | return DAG.getNode(ISD::AND, DL, OpVT, N1, |
48745 | DAG.getNOT(DL, N0.getOperand(0), OpVT)); |
48746 | } |
48747 | return SDValue(); |
48748 | }; |
48749 | if (SDValue AndN = MatchAndCmpEq(LHS, RHS)) |
48750 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
48751 | if (SDValue AndN = MatchAndCmpEq(RHS, LHS)) |
48752 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
48753 | |
48754 | |
48755 | |
48756 | |
48757 | |
48758 | if (LHS.getOpcode() == ISD::TRUNCATE && |
48759 | LHS.getOperand(0).getScalarValueSizeInBits() >= 32 && |
48760 | isNullConstant(RHS) && !DCI.isBeforeLegalize()) { |
48761 | EVT SrcVT = LHS.getOperand(0).getValueType(); |
48762 | APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(), |
48763 | OpVT.getScalarSizeInBits()); |
48764 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48765 | if (DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) && |
48766 | TLI.isTypeLegal(LHS.getOperand(0).getValueType())) |
48767 | return DAG.getSetCC(DL, VT, LHS.getOperand(0), |
48768 | DAG.getConstant(0, DL, SrcVT), CC); |
48769 | } |
48770 | } |
48771 | } |
48772 | |
48773 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
48774 | (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { |
48775 | |
48776 | |
48777 | SDValue Op0 = LHS; |
48778 | SDValue Op1 = RHS; |
48779 | ISD::CondCode TmpCC = CC; |
48780 | |
48781 | if (Op0.getOpcode() == ISD::BUILD_VECTOR) { |
48782 | std::swap(Op0, Op1); |
48783 | TmpCC = ISD::getSetCCSwappedOperands(TmpCC); |
48784 | } |
48785 | |
48786 | bool IsSEXT0 = |
48787 | (Op0.getOpcode() == ISD::SIGN_EXTEND) && |
48788 | (Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1); |
48789 | bool IsVZero1 = ISD::isBuildVectorAllZeros(Op1.getNode()); |
48790 | |
48791 | if (IsSEXT0 && IsVZero1) { |
48792 | assert(VT == Op0.getOperand(0).getValueType() && |
48793 | "Unexpected operand type"); |
48794 | if (TmpCC == ISD::SETGT) |
48795 | return DAG.getConstant(0, DL, VT); |
48796 | if (TmpCC == ISD::SETLE) |
48797 | return DAG.getConstant(1, DL, VT); |
48798 | if (TmpCC == ISD::SETEQ || TmpCC == ISD::SETGE) |
48799 | return DAG.getNOT(DL, Op0.getOperand(0), VT); |
48800 | |
48801 | assert((TmpCC == ISD::SETNE || TmpCC == ISD::SETLT) && |
48802 | "Unexpected condition code!"); |
48803 | return Op0.getOperand(0); |
48804 | } |
48805 | } |
48806 | |
48807 | |
48808 | |
48809 | |
48810 | |
48811 | |
48812 | if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && |
48813 | VT.getVectorElementType() == MVT::i1 && |
48814 | (OpVT.getVectorElementType() == MVT::i8 || |
48815 | OpVT.getVectorElementType() == MVT::i16)) { |
48816 | SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC); |
48817 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); |
48818 | } |
48819 | |
48820 | |
48821 | |
48822 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && |
48823 | LHS.getValueType() == MVT::v4f32) |
48824 | return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); |
48825 | |
48826 | return SDValue(); |
48827 | } |
48828 | |
48829 | static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, |
48830 | TargetLowering::DAGCombinerInfo &DCI, |
48831 | const X86Subtarget &Subtarget) { |
48832 | SDValue Src = N->getOperand(0); |
48833 | MVT SrcVT = Src.getSimpleValueType(); |
48834 | MVT VT = N->getSimpleValueType(0); |
48835 | unsigned NumBits = VT.getScalarSizeInBits(); |
48836 | unsigned NumElts = SrcVT.getVectorNumElements(); |
48837 | |
48838 | |
48839 | if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) { |
48840 | assert(VT == MVT::i32 && "Unexpected result type"); |
48841 | APInt Imm(32, 0); |
48842 | for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) { |
48843 | if (!Src.getOperand(Idx).isUndef() && |
48844 | Src.getConstantOperandAPInt(Idx).isNegative()) |
48845 | Imm.setBit(Idx); |
48846 | } |
48847 | return DAG.getConstant(Imm, SDLoc(N), VT); |
48848 | } |
48849 | |
48850 | |
48851 | unsigned EltWidth = SrcVT.getScalarSizeInBits(); |
48852 | if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST && |
48853 | Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) |
48854 | return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); |
48855 | |
48856 | |
48857 | |
48858 | if (SDValue NotSrc = IsNOT(Src, DAG)) { |
48859 | SDLoc DL(N); |
48860 | APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); |
48861 | NotSrc = DAG.getBitcast(SrcVT, NotSrc); |
48862 | return DAG.getNode(ISD::XOR, DL, VT, |
48863 | DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc), |
48864 | DAG.getConstant(NotMask, DL, VT)); |
48865 | } |
48866 | |
48867 | |
48868 | |
48869 | if (Src.getOpcode() == X86ISD::PCMPGT && |
48870 | ISD::isBuildVectorAllOnes(Src.getOperand(1).getNode())) { |
48871 | SDLoc DL(N); |
48872 | APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); |
48873 | return DAG.getNode(ISD::XOR, DL, VT, |
48874 | DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)), |
48875 | DAG.getConstant(NotMask, DL, VT)); |
48876 | } |
48877 | |
48878 | |
48879 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48880 | APInt DemandedMask(APInt::getAllOnesValue(NumBits)); |
48881 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
48882 | return SDValue(N, 0); |
48883 | |
48884 | return SDValue(); |
48885 | } |
48886 | |
48887 | static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, |
48888 | TargetLowering::DAGCombinerInfo &DCI) { |
48889 | |
48890 | SDValue Mask = cast<X86MaskedGatherScatterSDNode>(N)->getMask(); |
48891 | if (Mask.getScalarValueSizeInBits() != 1) { |
48892 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48893 | APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); |
48894 | if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { |
48895 | if (N->getOpcode() != ISD::DELETED_NODE) |
48896 | DCI.AddToWorklist(N); |
48897 | return SDValue(N, 0); |
48898 | } |
48899 | } |
48900 | |
48901 | return SDValue(); |
48902 | } |
48903 | |
48904 | static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS, |
48905 | SDValue Index, SDValue Base, SDValue Scale, |
48906 | SelectionDAG &DAG) { |
48907 | SDLoc DL(GorS); |
48908 | |
48909 | if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) { |
48910 | SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(), |
48911 | Gather->getMask(), Base, Index, Scale } ; |
48912 | return DAG.getMaskedGather(Gather->getVTList(), |
48913 | Gather->getMemoryVT(), DL, Ops, |
48914 | Gather->getMemOperand(), |
48915 | Gather->getIndexType(), |
48916 | Gather->getExtensionType()); |
48917 | } |
48918 | auto *Scatter = cast<MaskedScatterSDNode>(GorS); |
48919 | SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(), |
48920 | Scatter->getMask(), Base, Index, Scale }; |
48921 | return DAG.getMaskedScatter(Scatter->getVTList(), |
48922 | Scatter->getMemoryVT(), DL, |
48923 | Ops, Scatter->getMemOperand(), |
48924 | Scatter->getIndexType(), |
48925 | Scatter->isTruncatingStore()); |
48926 | } |
48927 | |
48928 | static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, |
48929 | TargetLowering::DAGCombinerInfo &DCI) { |
48930 | SDLoc DL(N); |
48931 | auto *GorS = cast<MaskedGatherScatterSDNode>(N); |
48932 | SDValue Index = GorS->getIndex(); |
48933 | SDValue Base = GorS->getBasePtr(); |
48934 | SDValue Scale = GorS->getScale(); |
48935 | |
48936 | if (DCI.isBeforeLegalize()) { |
48937 | unsigned IndexWidth = Index.getScalarValueSizeInBits(); |
48938 | |
48939 | |
48940 | |
48941 | |
48942 | |
48943 | |
48944 | |
48945 | |
48946 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Index)) { |
48947 | if (BV->isConstant() && IndexWidth > 32 && |
48948 | DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { |
48949 | EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32); |
48950 | Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); |
48951 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
48952 | } |
48953 | } |
48954 | |
48955 | |
48956 | |
48957 | |
48958 | if ((Index.getOpcode() == ISD::SIGN_EXTEND || |
48959 | Index.getOpcode() == ISD::ZERO_EXTEND) && |
48960 | IndexWidth > 32 && |
48961 | Index.getOperand(0).getScalarValueSizeInBits() <= 32 && |
48962 | DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { |
48963 | EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32); |
48964 | Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); |
48965 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
48966 | } |
48967 | } |
48968 | |
48969 | if (DCI.isBeforeLegalizeOps()) { |
48970 | unsigned IndexWidth = Index.getScalarValueSizeInBits(); |
48971 | |
48972 | |
48973 | if (IndexWidth != 32 && IndexWidth != 64) { |
48974 | MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32; |
48975 | EVT IndexVT = Index.getValueType().changeVectorElementType(EltVT); |
48976 | Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); |
48977 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
48978 | } |
48979 | } |
48980 | |
48981 | |
48982 | SDValue Mask = GorS->getMask(); |
48983 | if (Mask.getScalarValueSizeInBits() != 1) { |
48984 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
48985 | APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); |
48986 | if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { |
48987 | if (N->getOpcode() != ISD::DELETED_NODE) |
48988 | DCI.AddToWorklist(N); |
48989 | return SDValue(N, 0); |
48990 | } |
48991 | } |
48992 | |
48993 | return SDValue(); |
48994 | } |
48995 | |
48996 | |
48997 | static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, |
48998 | const X86Subtarget &Subtarget) { |
48999 | SDLoc DL(N); |
49000 | X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); |
49001 | SDValue EFLAGS = N->getOperand(1); |
49002 | |
49003 | |
49004 | if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) |
49005 | return getSETCC(CC, Flags, DL, DAG); |
49006 | |
49007 | return SDValue(); |
49008 | } |
49009 | |
49010 | |
49011 | static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, |
49012 | const X86Subtarget &Subtarget) { |
49013 | SDLoc DL(N); |
49014 | SDValue EFLAGS = N->getOperand(3); |
49015 | X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); |
49016 | |
49017 | |
49018 | |
49019 | |
49020 | if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) { |
49021 | SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8); |
49022 | return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), |
49023 | N->getOperand(1), Cond, Flags); |
49024 | } |
49025 | |
49026 | return SDValue(); |
49027 | } |
49028 | |
49029 | |
49030 | static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, |
49031 | SelectionDAG &DAG) { |
49032 | |
49033 | |
49034 | |
49035 | |
49036 | |
49037 | |
49038 | |
49039 | |
49040 | |
49041 | |
49042 | |
49043 | EVT VT = N->getValueType(0); |
49044 | bool IsStrict = N->isStrictFPOpcode(); |
49045 | unsigned NumEltBits = VT.getScalarSizeInBits(); |
49046 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
49047 | if (!VT.isVector() || Op0.getOpcode() != ISD::AND || |
49048 | DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits || |
49049 | VT.getSizeInBits() != Op0.getValueSizeInBits()) |
49050 | return SDValue(); |
49051 | |
49052 | |
49053 | |
49054 | |
49055 | |
49056 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Op0.getOperand(1))) { |
49057 | |
49058 | if (!BV->isConstant()) |
49059 | return SDValue(); |
49060 | |
49061 | |
49062 | SDLoc DL(N); |
49063 | EVT IntVT = BV->getValueType(0); |
49064 | |
49065 | |
49066 | SDValue SourceConst; |
49067 | if (IsStrict) |
49068 | SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other}, |
49069 | {N->getOperand(0), SDValue(BV, 0)}); |
49070 | else |
49071 | SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); |
49072 | |
49073 | SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); |
49074 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), |
49075 | MaskConst); |
49076 | SDValue Res = DAG.getBitcast(VT, NewAnd); |
49077 | if (IsStrict) |
49078 | return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL); |
49079 | return Res; |
49080 | } |
49081 | |
49082 | return SDValue(); |
49083 | } |
49084 | |
49085 | |
49086 | |
49087 | |
49088 | static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) { |
49089 | |
49090 | |
49091 | |
49092 | SDValue Trunc = N->getOperand(0); |
49093 | if (!Trunc.hasOneUse() || Trunc.getOpcode() != ISD::TRUNCATE) |
49094 | return SDValue(); |
49095 | |
49096 | SDValue ExtElt = Trunc.getOperand(0); |
49097 | if (!ExtElt.hasOneUse() || ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49098 | !isNullConstant(ExtElt.getOperand(1))) |
49099 | return SDValue(); |
49100 | |
49101 | EVT TruncVT = Trunc.getValueType(); |
49102 | EVT SrcVT = ExtElt.getValueType(); |
49103 | unsigned DestWidth = TruncVT.getSizeInBits(); |
49104 | unsigned SrcWidth = SrcVT.getSizeInBits(); |
49105 | if (SrcWidth % DestWidth != 0) |
49106 | return SDValue(); |
49107 | |
49108 | |
49109 | EVT SrcVecVT = ExtElt.getOperand(0).getValueType(); |
49110 | unsigned VecWidth = SrcVecVT.getSizeInBits(); |
49111 | unsigned NumElts = VecWidth / DestWidth; |
49112 | EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts); |
49113 | SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0)); |
49114 | SDLoc DL(N); |
49115 | SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT, |
49116 | BitcastVec, ExtElt.getOperand(1)); |
49117 | return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt); |
49118 | } |
49119 | |
49120 | static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, |
49121 | const X86Subtarget &Subtarget) { |
49122 | bool IsStrict = N->isStrictFPOpcode(); |
49123 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
49124 | EVT VT = N->getValueType(0); |
49125 | EVT InVT = Op0.getValueType(); |
49126 | |
49127 | |
49128 | |
49129 | |
49130 | if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) { |
49131 | SDLoc dl(N); |
49132 | EVT DstVT = InVT.changeVectorElementType(MVT::i32); |
49133 | SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); |
49134 | |
49135 | |
49136 | if (IsStrict) |
49137 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
49138 | {N->getOperand(0), P}); |
49139 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
49140 | } |
49141 | |
49142 | |
49143 | |
49144 | |
49145 | if (DAG.SignBitIsZero(Op0)) { |
49146 | if (IsStrict) |
49147 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other}, |
49148 | {N->getOperand(0), Op0}); |
49149 | return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); |
49150 | } |
49151 | |
49152 | return SDValue(); |
49153 | } |
49154 | |
49155 | static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, |
49156 | TargetLowering::DAGCombinerInfo &DCI, |
49157 | const X86Subtarget &Subtarget) { |
49158 | |
49159 | |
49160 | bool IsStrict = N->isStrictFPOpcode(); |
49161 | if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG)) |
49162 | return Res; |
49163 | |
49164 | |
49165 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
49166 | EVT VT = N->getValueType(0); |
49167 | EVT InVT = Op0.getValueType(); |
49168 | |
49169 | |
49170 | |
49171 | |
49172 | if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) { |
49173 | SDLoc dl(N); |
49174 | EVT DstVT = InVT.changeVectorElementType(MVT::i32); |
49175 | SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); |
49176 | if (IsStrict) |
49177 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
49178 | {N->getOperand(0), P}); |
49179 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
49180 | } |
49181 | |
49182 | |
49183 | |
49184 | |
49185 | if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) { |
49186 | unsigned BitWidth = InVT.getScalarSizeInBits(); |
49187 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); |
49188 | if (NumSignBits >= (BitWidth - 31)) { |
49189 | EVT TruncVT = MVT::i32; |
49190 | if (InVT.isVector()) |
49191 | TruncVT = InVT.changeVectorElementType(TruncVT); |
49192 | SDLoc dl(N); |
49193 | if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { |
49194 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); |
49195 | if (IsStrict) |
49196 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
49197 | {N->getOperand(0), Trunc}); |
49198 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); |
49199 | } |
49200 | |
49201 | |
49202 | assert(InVT == MVT::v2i64 && "Unexpected VT!"); |
49203 | SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); |
49204 | SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, |
49205 | { 0, 2, -1, -1 }); |
49206 | if (IsStrict) |
49207 | return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, |
49208 | {N->getOperand(0), Shuf}); |
49209 | return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); |
49210 | } |
49211 | } |
49212 | |
49213 | |
49214 | |
49215 | if (!Subtarget.useSoftFloat() && Subtarget.hasX87() && |
49216 | Op0.getOpcode() == ISD::LOAD) { |
49217 | LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode()); |
49218 | |
49219 | |
49220 | if (VT == MVT::f16 || VT == MVT::f128) |
49221 | return SDValue(); |
49222 | |
49223 | |
49224 | |
49225 | if (Subtarget.hasDQI() && VT != MVT::f80) |
49226 | return SDValue(); |
49227 | |
49228 | if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) && |
49229 | Op0.hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) { |
49230 | std::pair<SDValue, SDValue> Tmp = |
49231 | Subtarget.getTargetLowering()->BuildFILD( |
49232 | VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), |
49233 | Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG); |
49234 | DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second); |
49235 | return Tmp.first; |
49236 | } |
49237 | } |
49238 | |
49239 | if (IsStrict) |
49240 | return SDValue(); |
49241 | |
49242 | if (SDValue V = combineToFPTruncExtElt(N, DAG)) |
49243 | return V; |
49244 | |
49245 | return SDValue(); |
49246 | } |
49247 | |
49248 | static bool needCarryOrOverflowFlag(SDValue Flags) { |
49249 | assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); |
49250 | |
49251 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
49252 | UI != UE; ++UI) { |
49253 | SDNode *User = *UI; |
49254 | |
49255 | X86::CondCode CC; |
49256 | switch (User->getOpcode()) { |
49257 | default: |
49258 | |
49259 | return true; |
49260 | case X86ISD::SETCC: |
49261 | case X86ISD::SETCC_CARRY: |
49262 | CC = (X86::CondCode)User->getConstantOperandVal(0); |
49263 | break; |
49264 | case X86ISD::BRCOND: |
49265 | CC = (X86::CondCode)User->getConstantOperandVal(2); |
49266 | break; |
49267 | case X86ISD::CMOV: |
49268 | CC = (X86::CondCode)User->getConstantOperandVal(2); |
49269 | break; |
49270 | } |
49271 | |
49272 | switch (CC) { |
49273 | default: break; |
49274 | case X86::COND_A: case X86::COND_AE: |
49275 | case X86::COND_B: case X86::COND_BE: |
49276 | case X86::COND_O: case X86::COND_NO: |
49277 | case X86::COND_G: case X86::COND_GE: |
49278 | case X86::COND_L: case X86::COND_LE: |
49279 | return true; |
49280 | } |
49281 | } |
49282 | |
49283 | return false; |
49284 | } |
49285 | |
49286 | static bool onlyZeroFlagUsed(SDValue Flags) { |
49287 | assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); |
49288 | |
49289 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
49290 | UI != UE; ++UI) { |
49291 | SDNode *User = *UI; |
49292 | |
49293 | unsigned CCOpNo; |
49294 | switch (User->getOpcode()) { |
49295 | default: |
49296 | |
49297 | return false; |
49298 | case X86ISD::SETCC: CCOpNo = 0; break; |
49299 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; |
49300 | case X86ISD::BRCOND: CCOpNo = 2; break; |
49301 | case X86ISD::CMOV: CCOpNo = 2; break; |
49302 | } |
49303 | |
49304 | X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); |
49305 | if (CC != X86::COND_E && CC != X86::COND_NE) |
49306 | return false; |
49307 | } |
49308 | |
49309 | return true; |
49310 | } |
49311 | |
49312 | static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { |
49313 | |
49314 | if (!isNullConstant(N->getOperand(1))) |
49315 | return SDValue(); |
49316 | |
49317 | |
49318 | |
49319 | |
49320 | |
49321 | SDLoc dl(N); |
49322 | SDValue Op = N->getOperand(0); |
49323 | EVT VT = Op.getValueType(); |
49324 | |
49325 | |
49326 | |
49327 | |
49328 | if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) && |
49329 | Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) && |
49330 | onlyZeroFlagUsed(SDValue(N, 0))) { |
49331 | unsigned BitWidth = VT.getSizeInBits(); |
49332 | const APInt &ShAmt = Op.getConstantOperandAPInt(1); |
49333 | if (ShAmt.ult(BitWidth)) { |
49334 | unsigned MaskBits = BitWidth - ShAmt.getZExtValue(); |
49335 | APInt Mask = Op.getOpcode() == ISD::SRL |
49336 | ? APInt::getHighBitsSet(BitWidth, MaskBits) |
49337 | : APInt::getLowBitsSet(BitWidth, MaskBits); |
49338 | if (Mask.isSignedIntN(32)) { |
49339 | Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), |
49340 | DAG.getConstant(Mask, dl, VT)); |
49341 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
49342 | DAG.getConstant(0, dl, VT)); |
49343 | } |
49344 | } |
49345 | } |
49346 | |
49347 | |
49348 | if (Op.getOpcode() != ISD::TRUNCATE) |
49349 | return SDValue(); |
49350 | |
49351 | SDValue Trunc = Op; |
49352 | Op = Op.getOperand(0); |
49353 | |
49354 | |
49355 | |
49356 | |
49357 | EVT OpVT = Op.getValueType(); |
49358 | APInt UpperBits = |
49359 | APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits()); |
49360 | if (OpVT == MVT::i32 && DAG.MaskedValueIsZero(Op, UpperBits) && |
49361 | onlyZeroFlagUsed(SDValue(N, 0))) { |
49362 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
49363 | DAG.getConstant(0, dl, OpVT)); |
49364 | } |
49365 | |
49366 | |
49367 | if (!Trunc.hasOneUse() || !Op.hasOneUse()) |
49368 | return SDValue(); |
49369 | |
49370 | unsigned NewOpc; |
49371 | switch (Op.getOpcode()) { |
49372 | default: return SDValue(); |
49373 | case ISD::AND: |
49374 | |
49375 | |
49376 | if (isa<ConstantSDNode>(Op.getOperand(1))) |
49377 | return SDValue(); |
49378 | NewOpc = X86ISD::AND; |
49379 | break; |
49380 | case ISD::OR: NewOpc = X86ISD::OR; break; |
49381 | case ISD::XOR: NewOpc = X86ISD::XOR; break; |
49382 | case ISD::ADD: |
49383 | |
49384 | if (needCarryOrOverflowFlag(SDValue(N, 0))) |
49385 | return SDValue(); |
49386 | NewOpc = X86ISD::ADD; |
49387 | break; |
49388 | case ISD::SUB: |
49389 | |
49390 | if (needCarryOrOverflowFlag(SDValue(N, 0))) |
49391 | return SDValue(); |
49392 | NewOpc = X86ISD::SUB; |
49393 | break; |
49394 | } |
49395 | |
49396 | |
49397 | SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0)); |
49398 | SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1)); |
49399 | |
49400 | |
49401 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
49402 | Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1); |
49403 | |
49404 | |
49405 | if (NewOpc == X86ISD::AND) |
49406 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
49407 | DAG.getConstant(0, dl, VT)); |
49408 | |
49409 | |
49410 | return Op.getValue(1); |
49411 | } |
49412 | |
49413 | static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, |
49414 | TargetLowering::DAGCombinerInfo &DCI) { |
49415 | assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && |
49416 | "Expected X86ISD::ADD or X86ISD::SUB"); |
49417 | |
49418 | SDLoc DL(N); |
49419 | SDValue LHS = N->getOperand(0); |
49420 | SDValue RHS = N->getOperand(1); |
49421 | MVT VT = LHS.getSimpleValueType(); |
49422 | unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB; |
49423 | |
49424 | |
49425 | if (!N->hasAnyUseOfValue(1)) { |
49426 | SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); |
49427 | return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); |
49428 | } |
49429 | |
49430 | |
49431 | auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { |
49432 | SDValue Ops[] = {N0, N1}; |
49433 | SDVTList VTs = DAG.getVTList(N->getValueType(0)); |
49434 | if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { |
49435 | SDValue Op(N, 0); |
49436 | if (Negate) |
49437 | Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); |
49438 | DCI.CombineTo(GenericAddSub, Op); |
49439 | } |
49440 | }; |
49441 | MatchGeneric(LHS, RHS, false); |
49442 | MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); |
49443 | |
49444 | return SDValue(); |
49445 | } |
49446 | |
49447 | static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { |
49448 | if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { |
49449 | MVT VT = N->getSimpleValueType(0); |
49450 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
49451 | return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, |
49452 | N->getOperand(0), N->getOperand(1), |
49453 | Flags); |
49454 | } |
49455 | |
49456 | |
49457 | |
49458 | SDValue Op0 = N->getOperand(0); |
49459 | SDValue Op1 = N->getOperand(1); |
49460 | if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) && |
49461 | !N->hasAnyUseOfValue(1)) |
49462 | return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0), |
49463 | Op0.getOperand(1), N->getOperand(2)); |
49464 | |
49465 | return SDValue(); |
49466 | } |
49467 | |
49468 | |
49469 | static SDValue combineADC(SDNode *N, SelectionDAG &DAG, |
49470 | TargetLowering::DAGCombinerInfo &DCI) { |
49471 | |
49472 | |
49473 | |
49474 | if (X86::isZeroNode(N->getOperand(0)) && |
49475 | X86::isZeroNode(N->getOperand(1)) && |
49476 | |
49477 | |
49478 | SDValue(N, 1).use_empty()) { |
49479 | SDLoc DL(N); |
49480 | EVT VT = N->getValueType(0); |
49481 | SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); |
49482 | SDValue Res1 = |
49483 | DAG.getNode(ISD::AND, DL, VT, |
49484 | DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
49485 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
49486 | N->getOperand(2)), |
49487 | DAG.getConstant(1, DL, VT)); |
49488 | return DCI.CombineTo(N, Res1, CarryOut); |
49489 | } |
49490 | |
49491 | if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { |
49492 | MVT VT = N->getSimpleValueType(0); |
49493 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
49494 | return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, |
49495 | N->getOperand(0), N->getOperand(1), |
49496 | Flags); |
49497 | } |
49498 | |
49499 | return SDValue(); |
49500 | } |
49501 | |
49502 | |
49503 | |
49504 | |
49505 | static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { |
49506 | bool IsSub = N->getOpcode() == ISD::SUB; |
49507 | SDValue X = N->getOperand(0); |
49508 | SDValue Y = N->getOperand(1); |
49509 | |
49510 | |
49511 | |
49512 | if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND && |
49513 | Y.getOpcode() != ISD::ZERO_EXTEND) |
49514 | std::swap(X, Y); |
49515 | |
49516 | |
49517 | bool PeekedThroughZext = false; |
49518 | if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) { |
49519 | Y = Y.getOperand(0); |
49520 | PeekedThroughZext = true; |
49521 | } |
49522 | |
49523 | |
49524 | |
49525 | |
49526 | if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC && |
49527 | Y.getOpcode() != X86ISD::SETCC) |
49528 | std::swap(X, Y); |
49529 | |
49530 | if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) |
49531 | return SDValue(); |
49532 | |
49533 | SDLoc DL(N); |
49534 | EVT VT = N->getValueType(0); |
49535 | X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); |
49536 | |
49537 | |
49538 | |
49539 | auto *ConstantX = dyn_cast<ConstantSDNode>(X); |
49540 | if (ConstantX) { |
49541 | if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || |
49542 | (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { |
49543 | |
49544 | |
49545 | |
49546 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
49547 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
49548 | Y.getOperand(1)); |
49549 | } |
49550 | |
49551 | if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || |
49552 | (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { |
49553 | SDValue EFLAGS = Y->getOperand(1); |
49554 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && |
49555 | EFLAGS.getValueType().isInteger() && |
49556 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
49557 | |
49558 | |
49559 | |
49560 | SDValue NewSub = DAG.getNode( |
49561 | X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), |
49562 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
49563 | SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); |
49564 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
49565 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
49566 | NewEFLAGS); |
49567 | } |
49568 | } |
49569 | } |
49570 | |
49571 | if (CC == X86::COND_B) { |
49572 | |
49573 | |
49574 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, |
49575 | DAG.getVTList(VT, MVT::i32), X, |
49576 | DAG.getConstant(0, DL, VT), Y.getOperand(1)); |
49577 | } |
49578 | |
49579 | if (CC == X86::COND_A) { |
49580 | SDValue EFLAGS = Y.getOperand(1); |
49581 | |
49582 | |
49583 | |
49584 | |
49585 | |
49586 | |
49587 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && |
49588 | EFLAGS.getValueType().isInteger() && |
49589 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
49590 | SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), |
49591 | EFLAGS.getNode()->getVTList(), |
49592 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
49593 | SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); |
49594 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, |
49595 | DAG.getVTList(VT, MVT::i32), X, |
49596 | DAG.getConstant(0, DL, VT), NewEFLAGS); |
49597 | } |
49598 | } |
49599 | |
49600 | if (CC == X86::COND_AE) { |
49601 | |
49602 | |
49603 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, |
49604 | DAG.getVTList(VT, MVT::i32), X, |
49605 | DAG.getConstant(-1, DL, VT), Y.getOperand(1)); |
49606 | } |
49607 | |
49608 | if (CC == X86::COND_BE) { |
49609 | |
49610 | |
49611 | SDValue EFLAGS = Y.getOperand(1); |
49612 | |
49613 | |
49614 | |
49615 | |
49616 | |
49617 | |
49618 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && |
49619 | EFLAGS.getValueType().isInteger() && |
49620 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
49621 | SDValue NewSub = DAG.getNode( |
49622 | X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), |
49623 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
49624 | SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); |
49625 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, |
49626 | DAG.getVTList(VT, MVT::i32), X, |
49627 | DAG.getConstant(-1, DL, VT), NewEFLAGS); |
49628 | } |
49629 | } |
49630 | |
49631 | if (CC != X86::COND_E && CC != X86::COND_NE) |
49632 | return SDValue(); |
49633 | |
49634 | SDValue Cmp = Y.getOperand(1); |
49635 | if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || |
49636 | !X86::isZeroNode(Cmp.getOperand(1)) || |
49637 | !Cmp.getOperand(0).getValueType().isInteger()) |
49638 | return SDValue(); |
49639 | |
49640 | SDValue Z = Cmp.getOperand(0); |
49641 | EVT ZVT = Z.getValueType(); |
49642 | |
49643 | |
49644 | |
49645 | if (ConstantX) { |
49646 | |
49647 | |
49648 | |
49649 | |
49650 | if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || |
49651 | (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { |
49652 | SDValue Zero = DAG.getConstant(0, DL, ZVT); |
49653 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
49654 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); |
49655 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
49656 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
49657 | SDValue(Neg.getNode(), 1)); |
49658 | } |
49659 | |
49660 | |
49661 | |
49662 | |
49663 | |
49664 | if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) || |
49665 | (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) { |
49666 | SDValue One = DAG.getConstant(1, DL, ZVT); |
49667 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
49668 | SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); |
49669 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
49670 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
49671 | Cmp1.getValue(1)); |
49672 | } |
49673 | } |
49674 | |
49675 | |
49676 | SDValue One = DAG.getConstant(1, DL, ZVT); |
49677 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
49678 | SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); |
49679 | |
49680 | |
49681 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
49682 | |
49683 | |
49684 | |
49685 | if (CC == X86::COND_NE) |
49686 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, |
49687 | DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1)); |
49688 | |
49689 | |
49690 | |
49691 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, |
49692 | DAG.getConstant(0, DL, VT), Cmp1.getValue(1)); |
49693 | } |
49694 | |
49695 | static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, |
49696 | const SDLoc &DL, EVT VT, |
49697 | const X86Subtarget &Subtarget) { |
49698 | |
49699 | |
49700 | |
49701 | |
49702 | |
49703 | |
49704 | |
49705 | |
49706 | |
49707 | |
49708 | |
49709 | if (!Subtarget.hasSSE2()) |
49710 | return SDValue(); |
49711 | |
49712 | if (Op0.getOpcode() != ISD::BUILD_VECTOR || |
49713 | Op1.getOpcode() != ISD::BUILD_VECTOR) |
49714 | return SDValue(); |
49715 | |
49716 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || |
49717 | VT.getVectorNumElements() < 4 || |
49718 | !isPowerOf2_32(VT.getVectorNumElements())) |
49719 | return SDValue(); |
49720 | |
49721 | |
49722 | |
49723 | |
49724 | |
49725 | |
49726 | |
49727 | |
49728 | |
49729 | |
49730 | |
49731 | |
49732 | SDValue Mul; |
49733 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) { |
49734 | SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i), |
49735 | Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1); |
49736 | |
49737 | if (Op0L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49738 | Op1L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49739 | Op0H.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49740 | Op1H.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
49741 | return SDValue(); |
49742 | auto *Const0L = dyn_cast<ConstantSDNode>(Op0L->getOperand(1)); |
49743 | auto *Const1L = dyn_cast<ConstantSDNode>(Op1L->getOperand(1)); |
49744 | auto *Const0H = dyn_cast<ConstantSDNode>(Op0H->getOperand(1)); |
49745 | auto *Const1H = dyn_cast<ConstantSDNode>(Op1H->getOperand(1)); |
49746 | if (!Const0L || !Const1L || !Const0H || !Const1H) |
49747 | return SDValue(); |
49748 | unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(), |
49749 | Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue(); |
49750 | |
49751 | if (Idx0L > Idx1L) |
49752 | std::swap(Idx0L, Idx1L); |
49753 | if (Idx0H > Idx1H) |
49754 | std::swap(Idx0H, Idx1H); |
49755 | |
49756 | if (Idx0L > Idx0H) { |
49757 | std::swap(Idx0L, Idx0H); |
49758 | std::swap(Idx1L, Idx1H); |
49759 | } |
49760 | if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 || |
49761 | Idx1H != 2 * i + 3) |
49762 | return SDValue(); |
49763 | if (!Mul) { |
49764 | |
49765 | |
49766 | |
49767 | Mul = Op0L->getOperand(0); |
49768 | if (Mul->getOpcode() != ISD::MUL || |
49769 | Mul.getValueType().getVectorNumElements() != 2 * e) |
49770 | return SDValue(); |
49771 | } |
49772 | |
49773 | if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) || |
49774 | Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0)) |
49775 | return SDValue(); |
49776 | } |
49777 | |
49778 | |
49779 | ShrinkMode Mode; |
49780 | if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || |
49781 | Mode == ShrinkMode::MULU16) |
49782 | return SDValue(); |
49783 | |
49784 | EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
49785 | VT.getVectorNumElements() * 2); |
49786 | SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0)); |
49787 | SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1)); |
49788 | |
49789 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
49790 | ArrayRef<SDValue> Ops) { |
49791 | EVT InVT = Ops[0].getValueType(); |
49792 | assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); |
49793 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, |
49794 | InVT.getVectorNumElements() / 2); |
49795 | return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); |
49796 | }; |
49797 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder); |
49798 | } |
49799 | |
49800 | |
49801 | |
49802 | |
49803 | static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, |
49804 | const SDLoc &DL, EVT VT, |
49805 | const X86Subtarget &Subtarget) { |
49806 | if (!Subtarget.hasSSE2()) |
49807 | return SDValue(); |
49808 | |
49809 | if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) |
49810 | return SDValue(); |
49811 | |
49812 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || |
49813 | VT.getVectorNumElements() < 4 || |
49814 | !isPowerOf2_32(VT.getVectorNumElements())) |
49815 | return SDValue(); |
49816 | |
49817 | SDValue N00 = N0.getOperand(0); |
49818 | SDValue N01 = N0.getOperand(1); |
49819 | SDValue N10 = N1.getOperand(0); |
49820 | SDValue N11 = N1.getOperand(1); |
49821 | |
49822 | |
49823 | |
49824 | if (N00.getOpcode() != ISD::SIGN_EXTEND || |
49825 | N01.getOpcode() != ISD::SIGN_EXTEND || |
49826 | N10.getOpcode() != ISD::SIGN_EXTEND || |
49827 | N11.getOpcode() != ISD::SIGN_EXTEND) |
49828 | return SDValue(); |
49829 | |
49830 | |
49831 | N00 = N00.getOperand(0); |
49832 | N01 = N01.getOperand(0); |
49833 | N10 = N10.getOperand(0); |
49834 | N11 = N11.getOperand(0); |
49835 | |
49836 | |
49837 | EVT InVT = N00.getValueType(); |
49838 | if (InVT.getVectorElementType() != MVT::i16 || N01.getValueType() != InVT || |
49839 | N10.getValueType() != InVT || N11.getValueType() != InVT) |
49840 | return SDValue(); |
49841 | |
49842 | |
49843 | if (N00.getOpcode() != ISD::BUILD_VECTOR || |
49844 | N01.getOpcode() != ISD::BUILD_VECTOR || |
49845 | N10.getOpcode() != ISD::BUILD_VECTOR || |
49846 | N11.getOpcode() != ISD::BUILD_VECTOR) |
49847 | return SDValue(); |
49848 | |
49849 | |
49850 | |
49851 | |
49852 | |
49853 | |
49854 | |
49855 | SDValue In0, In1; |
49856 | for (unsigned i = 0; i != N00.getNumOperands(); ++i) { |
49857 | SDValue N00Elt = N00.getOperand(i); |
49858 | SDValue N01Elt = N01.getOperand(i); |
49859 | SDValue N10Elt = N10.getOperand(i); |
49860 | SDValue N11Elt = N11.getOperand(i); |
49861 | |
49862 | if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49863 | N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49864 | N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
49865 | N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
49866 | return SDValue(); |
49867 | auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); |
49868 | auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); |
49869 | auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); |
49870 | auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); |
49871 | if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) |
49872 | return SDValue(); |
49873 | unsigned IdxN00 = ConstN00Elt->getZExtValue(); |
49874 | unsigned IdxN01 = ConstN01Elt->getZExtValue(); |
49875 | unsigned IdxN10 = ConstN10Elt->getZExtValue(); |
49876 | unsigned IdxN11 = ConstN11Elt->getZExtValue(); |
49877 | |
49878 | if (IdxN00 > IdxN10) { |
49879 | std::swap(IdxN00, IdxN10); |
49880 | std::swap(IdxN01, IdxN11); |
49881 | } |
49882 | |
49883 | if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || |
49884 | IdxN01 != 2 * i || IdxN11 != 2 * i + 1) |
49885 | return SDValue(); |
49886 | SDValue N00In = N00Elt.getOperand(0); |
49887 | SDValue N01In = N01Elt.getOperand(0); |
49888 | SDValue N10In = N10Elt.getOperand(0); |
49889 | SDValue N11In = N11Elt.getOperand(0); |
49890 | |
49891 | |
49892 | if (!In0) { |
49893 | In0 = N00In; |
49894 | In1 = N01In; |
49895 | |
49896 | |
49897 | |
49898 | if (In0.getValueSizeInBits() < VT.getSizeInBits() || |
49899 | In1.getValueSizeInBits() < VT.getSizeInBits()) |
49900 | return SDValue(); |
49901 | } |
49902 | |
49903 | |
49904 | if (In0 != N00In) |
49905 | std::swap(N00In, N01In); |
49906 | if (In0 != N10In) |
49907 | std::swap(N10In, N11In); |
49908 | if (In0 != N00In || In1 != N01In || In0 != N10In || In1 != N11In) |
49909 | return SDValue(); |
49910 | } |
49911 | |
49912 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
49913 | ArrayRef<SDValue> Ops) { |
49914 | EVT OpVT = Ops[0].getValueType(); |
49915 | assert(OpVT.getScalarType() == MVT::i16 && |
49916 | "Unexpected scalar element type"); |
49917 | assert(OpVT == Ops[1].getValueType() && "Operands' types mismatch"); |
49918 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, |
49919 | OpVT.getVectorNumElements() / 2); |
49920 | return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); |
49921 | }; |
49922 | |
49923 | |
49924 | |
49925 | EVT OutVT16 = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
49926 | VT.getVectorNumElements() * 2); |
49927 | if (OutVT16.bitsLT(In0.getValueType())) { |
49928 | In0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In0, |
49929 | DAG.getIntPtrConstant(0, DL)); |
49930 | } |
49931 | if (OutVT16.bitsLT(In1.getValueType())) { |
49932 | In1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In1, |
49933 | DAG.getIntPtrConstant(0, DL)); |
49934 | } |
49935 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 }, |
49936 | PMADDBuilder); |
49937 | } |
49938 | |
49939 | |
49940 | |
49941 | |
49942 | |
49943 | static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { |
49944 | |
49945 | |
49946 | |
49947 | |
49948 | |
49949 | auto isSuitableCmov = [](SDValue V) { |
49950 | if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse()) |
49951 | return false; |
49952 | if (!isa<ConstantSDNode>(V.getOperand(0)) || |
49953 | !isa<ConstantSDNode>(V.getOperand(1))) |
49954 | return false; |
49955 | return isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)) || |
49956 | (V.getConstantOperandAPInt(0).isSignedIntN(32) && |
49957 | V.getConstantOperandAPInt(1).isSignedIntN(32)); |
49958 | }; |
49959 | |
49960 | |
49961 | SDValue Cmov = N->getOperand(0); |
49962 | SDValue OtherOp = N->getOperand(1); |
49963 | if (!isSuitableCmov(Cmov)) |
49964 | std::swap(Cmov, OtherOp); |
49965 | if (!isSuitableCmov(Cmov)) |
49966 | return SDValue(); |
49967 | |
49968 | |
49969 | EVT VT = N->getValueType(0); |
49970 | SDLoc DL(N); |
49971 | SDValue FalseOp = Cmov.getOperand(0); |
49972 | SDValue TrueOp = Cmov.getOperand(1); |
49973 | FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp); |
49974 | TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp); |
49975 | return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2), |
49976 | Cmov.getOperand(3)); |
49977 | } |
49978 | |
49979 | static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, |
49980 | TargetLowering::DAGCombinerInfo &DCI, |
49981 | const X86Subtarget &Subtarget) { |
49982 | EVT VT = N->getValueType(0); |
49983 | SDValue Op0 = N->getOperand(0); |
49984 | SDValue Op1 = N->getOperand(1); |
49985 | |
49986 | if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG)) |
49987 | return Select; |
49988 | |
49989 | if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) |
49990 | return MAdd; |
49991 | if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) |
49992 | return MAdd; |
49993 | |
49994 | |
49995 | if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) |
49996 | return V; |
49997 | |
49998 | |
49999 | |
50000 | |
50001 | |
50002 | |
50003 | if (VT.isVector()) { |
50004 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50005 | if (Op0.getOpcode() == ISD::ZERO_EXTEND && |
50006 | Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
50007 | TLI.isTypeLegal(Op0.getOperand(0).getValueType())) { |
50008 | SDLoc DL(N); |
50009 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0)); |
50010 | return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt); |
50011 | } |
50012 | |
50013 | if (Op1.getOpcode() == ISD::ZERO_EXTEND && |
50014 | Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
50015 | TLI.isTypeLegal(Op1.getOperand(0).getValueType())) { |
50016 | SDLoc DL(N); |
50017 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0)); |
50018 | return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt); |
50019 | } |
50020 | } |
50021 | |
50022 | return combineAddOrSubToADCOrSBB(N, DAG); |
50023 | } |
50024 | |
50025 | static SDValue combineSub(SDNode *N, SelectionDAG &DAG, |
50026 | TargetLowering::DAGCombinerInfo &DCI, |
50027 | const X86Subtarget &Subtarget) { |
50028 | SDValue Op0 = N->getOperand(0); |
50029 | SDValue Op1 = N->getOperand(1); |
50030 | |
50031 | |
50032 | auto IsNonOpaqueConstant = [&](SDValue Op) { |
50033 | if (SDNode *C = DAG.isConstantIntBuildVectorOrConstantInt(Op)) { |
50034 | if (auto *Cst = dyn_cast<ConstantSDNode>(C)) |
50035 | return !Cst->isOpaque(); |
50036 | return true; |
50037 | } |
50038 | return false; |
50039 | }; |
50040 | |
50041 | |
50042 | |
50043 | |
50044 | |
50045 | if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) && |
50046 | IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) { |
50047 | SDLoc DL(N); |
50048 | EVT VT = Op0.getValueType(); |
50049 | SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), |
50050 | DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT)); |
50051 | SDValue NewAdd = |
50052 | DAG.getNode(ISD::ADD, DL, VT, Op0, DAG.getConstant(1, DL, VT)); |
50053 | return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd); |
50054 | } |
50055 | |
50056 | |
50057 | if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) |
50058 | return V; |
50059 | |
50060 | return combineAddOrSubToADCOrSBB(N, DAG); |
50061 | } |
50062 | |
50063 | static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, |
50064 | const X86Subtarget &Subtarget) { |
50065 | MVT VT = N->getSimpleValueType(0); |
50066 | SDLoc DL(N); |
50067 | |
50068 | if (N->getOperand(0) == N->getOperand(1)) { |
50069 | if (N->getOpcode() == X86ISD::PCMPEQ) |
50070 | return DAG.getConstant(-1, DL, VT); |
50071 | if (N->getOpcode() == X86ISD::PCMPGT) |
50072 | return DAG.getConstant(0, DL, VT); |
50073 | } |
50074 | |
50075 | return SDValue(); |
50076 | } |
50077 | |
50078 | |
50079 | |
50080 | |
50081 | static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, |
50082 | ArrayRef<SDValue> Ops, SelectionDAG &DAG, |
50083 | TargetLowering::DAGCombinerInfo &DCI, |
50084 | const X86Subtarget &Subtarget) { |
50085 | assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors"); |
50086 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
50087 | |
50088 | if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) |
50089 | return DAG.getUNDEF(VT); |
50090 | |
50091 | if (llvm::all_of(Ops, [](SDValue Op) { |
50092 | return ISD::isBuildVectorAllZeros(Op.getNode()); |
50093 | })) |
50094 | return getZeroVector(VT, Subtarget, DAG, DL); |
50095 | |
50096 | SDValue Op0 = Ops[0]; |
50097 | bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; }); |
50098 | |
50099 | |
50100 | if (IsSplat && |
50101 | (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) { |
50102 | |
50103 | if (Op0.getOpcode() == X86ISD::VBROADCAST) |
50104 | return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); |
50105 | |
50106 | |
50107 | |
50108 | if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || |
50109 | Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { |
50110 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op0); |
50111 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
50112 | SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()}; |
50113 | SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops, |
50114 | MemIntr->getMemoryVT(), |
50115 | MemIntr->getMemOperand()); |
50116 | DAG.ReplaceAllUsesOfValueWith( |
50117 | Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); |
50118 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); |
50119 | return BcastLd; |
50120 | } |
50121 | |
50122 | |
50123 | |
50124 | if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) { |
50125 | if (Ld->isSimple() && !Ld->isNonTemporal() && |
50126 | Ld->getExtensionType() == ISD::NON_EXTLOAD) { |
50127 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
50128 | SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; |
50129 | SDValue BcastLd = |
50130 | DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, |
50131 | Ld->getMemoryVT(), Ld->getMemOperand()); |
50132 | DAG.ReplaceAllUsesOfValueWith( |
50133 | Op0, |
50134 | extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); |
50135 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); |
50136 | return BcastLd; |
50137 | } |
50138 | } |
50139 | |
50140 | |
50141 | if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 && |
50142 | (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0)))) |
50143 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
50144 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, |
50145 | Op0.getOperand(0), |
50146 | DAG.getIntPtrConstant(0, DL))); |
50147 | |
50148 | |
50149 | if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR && |
50150 | (Subtarget.hasAVX2() || |
50151 | (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) && |
50152 | Op0.getOperand(0).getValueType() == VT.getScalarType()) |
50153 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0)); |
50154 | |
50155 | |
50156 | |
50157 | if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
50158 | Op0.getOperand(0).getValueType() == VT) { |
50159 | if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST || |
50160 | Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) |
50161 | return Op0.getOperand(0); |
50162 | } |
50163 | } |
50164 | |
50165 | |
50166 | |
50167 | |
50168 | if (VT.is256BitVector() && Ops.size() == 2) { |
50169 | SDValue Src0 = peekThroughBitcasts(Ops[0]); |
50170 | SDValue Src1 = peekThroughBitcasts(Ops[1]); |
50171 | if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
50172 | Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
50173 | EVT SrcVT0 = Src0.getOperand(0).getValueType(); |
50174 | EVT SrcVT1 = Src1.getOperand(0).getValueType(); |
50175 | unsigned NumSrcElts0 = SrcVT0.getVectorNumElements(); |
50176 | unsigned NumSrcElts1 = SrcVT1.getVectorNumElements(); |
50177 | if (SrcVT0.is256BitVector() && SrcVT1.is256BitVector() && |
50178 | Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2) && |
50179 | Src1.getConstantOperandAPInt(1) == (NumSrcElts1 / 2)) { |
50180 | return DAG.getNode(X86ISD::VPERM2X128, DL, VT, |
50181 | DAG.getBitcast(VT, Src0.getOperand(0)), |
50182 | DAG.getBitcast(VT, Src1.getOperand(0)), |
50183 | DAG.getTargetConstant(0x31, DL, MVT::i8)); |
50184 | } |
50185 | } |
50186 | } |
50187 | |
50188 | |
50189 | |
50190 | |
50191 | if (llvm::all_of(Ops, [Op0](SDValue Op) { |
50192 | return Op.getOpcode() == Op0.getOpcode(); |
50193 | })) { |
50194 | auto ConcatSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) { |
50195 | SmallVector<SDValue> Subs; |
50196 | for (SDValue SubOp : SubOps) |
50197 | Subs.push_back(SubOp.getOperand(I)); |
50198 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
50199 | }; |
50200 | |
50201 | unsigned NumOps = Ops.size(); |
50202 | switch (Op0.getOpcode()) { |
50203 | case X86ISD::SHUFP: { |
50204 | |
50205 | if (!IsSplat && VT.getScalarType() == MVT::f32 && |
50206 | llvm::all_of(Ops, [Op0](SDValue Op) { |
50207 | return Op.getOperand(2) == Op0.getOperand(2); |
50208 | })) { |
50209 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50210 | ConcatSubOperand(VT, Ops, 0), |
50211 | ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); |
50212 | } |
50213 | break; |
50214 | } |
50215 | case X86ISD::PSHUFHW: |
50216 | case X86ISD::PSHUFLW: |
50217 | case X86ISD::PSHUFD: |
50218 | if (!IsSplat && NumOps == 2 && VT.is256BitVector() && |
50219 | Subtarget.hasInt256() && Op0.getOperand(1) == Ops[1].getOperand(1)) { |
50220 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50221 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
50222 | } |
50223 | LLVM_FALLTHROUGH; |
50224 | case X86ISD::VPERMILPI: |
50225 | |
50226 | if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) && |
50227 | Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) { |
50228 | SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0)); |
50229 | Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res, |
50230 | Op0.getOperand(1)); |
50231 | return DAG.getBitcast(VT, Res); |
50232 | } |
50233 | break; |
50234 | case X86ISD::VPERMV3: |
50235 | if (!IsSplat && NumOps == 2 && VT.is512BitVector()) { |
50236 | MVT OpVT = Op0.getSimpleValueType(); |
50237 | int NumSrcElts = OpVT.getVectorNumElements(); |
50238 | SmallVector<int, 64> ConcatMask; |
50239 | for (unsigned i = 0; i != NumOps; ++i) { |
50240 | SmallVector<int, 64> SubMask; |
50241 | SmallVector<SDValue, 2> SubOps; |
50242 | if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps, |
50243 | SubMask)) |
50244 | break; |
50245 | for (int M : SubMask) { |
50246 | if (0 <= M) { |
50247 | M += M < NumSrcElts ? 0 : NumSrcElts; |
50248 | M += i * NumSrcElts; |
50249 | } |
50250 | ConcatMask.push_back(M); |
50251 | } |
50252 | } |
50253 | if (ConcatMask.size() == (NumOps * NumSrcElts)) { |
50254 | SDValue Src0 = concatSubVectors(Ops[0].getOperand(0), |
50255 | Ops[1].getOperand(0), DAG, DL); |
50256 | SDValue Src1 = concatSubVectors(Ops[0].getOperand(2), |
50257 | Ops[1].getOperand(2), DAG, DL); |
50258 | MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); |
50259 | MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts); |
50260 | SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true); |
50261 | return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1); |
50262 | } |
50263 | } |
50264 | break; |
50265 | case X86ISD::VSHLI: |
50266 | case X86ISD::VSRLI: |
50267 | |
50268 | |
50269 | if (VT == MVT::v4i64 && !Subtarget.hasInt256() && |
50270 | llvm::all_of(Ops, [](SDValue Op) { |
50271 | return Op.getConstantOperandAPInt(1) == 32; |
50272 | })) { |
50273 | SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0)); |
50274 | SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL); |
50275 | if (Op0.getOpcode() == X86ISD::VSHLI) { |
50276 | Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, |
50277 | {8, 0, 8, 2, 8, 4, 8, 6}); |
50278 | } else { |
50279 | Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, |
50280 | {1, 8, 3, 8, 5, 8, 7, 8}); |
50281 | } |
50282 | return DAG.getBitcast(VT, Res); |
50283 | } |
50284 | LLVM_FALLTHROUGH; |
50285 | case X86ISD::VSRAI: |
50286 | if (((VT.is256BitVector() && Subtarget.hasInt256()) || |
50287 | (VT.is512BitVector() && Subtarget.useAVX512Regs() && |
50288 | (EltSizeInBits >= 32 || Subtarget.useBWIRegs()))) && |
50289 | llvm::all_of(Ops, [Op0](SDValue Op) { |
50290 | return Op0.getOperand(1) == Op.getOperand(1); |
50291 | })) { |
50292 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50293 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
50294 | } |
50295 | break; |
50296 | case X86ISD::VPERMI: |
50297 | case X86ISD::VROTLI: |
50298 | case X86ISD::VROTRI: |
50299 | if (VT.is512BitVector() && Subtarget.useAVX512Regs() && |
50300 | llvm::all_of(Ops, [Op0](SDValue Op) { |
50301 | return Op0.getOperand(1) == Op.getOperand(1); |
50302 | })) { |
50303 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50304 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
50305 | } |
50306 | break; |
50307 | case ISD::AND: |
50308 | case ISD::OR: |
50309 | case ISD::XOR: |
50310 | case X86ISD::ANDNP: |
50311 | |
50312 | if (!IsSplat && VT.is512BitVector()) { |
50313 | MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); |
50314 | SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), |
50315 | NumOps * SrcVT.getVectorNumElements()); |
50316 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50317 | ConcatSubOperand(SrcVT, Ops, 0), |
50318 | ConcatSubOperand(SrcVT, Ops, 1)); |
50319 | } |
50320 | break; |
50321 | case X86ISD::HADD: |
50322 | case X86ISD::HSUB: |
50323 | case X86ISD::FHADD: |
50324 | case X86ISD::FHSUB: |
50325 | case X86ISD::PACKSS: |
50326 | case X86ISD::PACKUS: |
50327 | if (!IsSplat && VT.is256BitVector() && |
50328 | (VT.isFloatingPoint() || Subtarget.hasInt256())) { |
50329 | MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); |
50330 | SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), |
50331 | NumOps * SrcVT.getVectorNumElements()); |
50332 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50333 | ConcatSubOperand(SrcVT, Ops, 0), |
50334 | ConcatSubOperand(SrcVT, Ops, 1)); |
50335 | } |
50336 | break; |
50337 | case X86ISD::PALIGNR: |
50338 | if (!IsSplat && |
50339 | ((VT.is256BitVector() && Subtarget.hasInt256()) || |
50340 | (VT.is512BitVector() && Subtarget.useBWIRegs())) && |
50341 | llvm::all_of(Ops, [Op0](SDValue Op) { |
50342 | return Op0.getOperand(2) == Op.getOperand(2); |
50343 | })) { |
50344 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
50345 | ConcatSubOperand(VT, Ops, 0), |
50346 | ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); |
50347 | } |
50348 | break; |
50349 | } |
50350 | } |
50351 | |
50352 | |
50353 | |
50354 | if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) { |
50355 | bool Fast; |
50356 | const X86TargetLowering *TLI = Subtarget.getTargetLowering(); |
50357 | if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
50358 | *FirstLd->getMemOperand(), &Fast) && |
50359 | Fast) { |
50360 | if (SDValue Ld = |
50361 | EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false)) |
50362 | return Ld; |
50363 | } |
50364 | } |
50365 | |
50366 | return SDValue(); |
50367 | } |
50368 | |
50369 | static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG, |
50370 | TargetLowering::DAGCombinerInfo &DCI, |
50371 | const X86Subtarget &Subtarget) { |
50372 | EVT VT = N->getValueType(0); |
50373 | EVT SrcVT = N->getOperand(0).getValueType(); |
50374 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50375 | |
50376 | |
50377 | if (VT.getVectorElementType() == MVT::i1) |
50378 | return SDValue(); |
50379 | |
50380 | if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) { |
50381 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); |
50382 | if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG, |
50383 | DCI, Subtarget)) |
50384 | return R; |
50385 | } |
50386 | |
50387 | return SDValue(); |
50388 | } |
50389 | |
50390 | static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, |
50391 | TargetLowering::DAGCombinerInfo &DCI, |
50392 | const X86Subtarget &Subtarget) { |
50393 | if (DCI.isBeforeLegalizeOps()) |
50394 | return SDValue(); |
50395 | |
50396 | MVT OpVT = N->getSimpleValueType(0); |
50397 | |
50398 | bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1; |
50399 | |
50400 | SDLoc dl(N); |
50401 | SDValue Vec = N->getOperand(0); |
50402 | SDValue SubVec = N->getOperand(1); |
50403 | |
50404 | uint64_t IdxVal = N->getConstantOperandVal(2); |
50405 | MVT SubVecVT = SubVec.getSimpleValueType(); |
50406 | |
50407 | if (Vec.isUndef() && SubVec.isUndef()) |
50408 | return DAG.getUNDEF(OpVT); |
50409 | |
50410 | |
50411 | if ((Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())) && |
50412 | (SubVec.isUndef() || ISD::isBuildVectorAllZeros(SubVec.getNode()))) |
50413 | return getZeroVector(OpVT, Subtarget, DAG, dl); |
50414 | |
50415 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
50416 | |
50417 | |
50418 | if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && |
50419 | ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { |
50420 | uint64_t Idx2Val = SubVec.getConstantOperandVal(2); |
50421 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
50422 | getZeroVector(OpVT, Subtarget, DAG, dl), |
50423 | SubVec.getOperand(1), |
50424 | DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); |
50425 | } |
50426 | |
50427 | |
50428 | |
50429 | |
50430 | |
50431 | if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && |
50432 | isNullConstant(SubVec.getOperand(1)) && |
50433 | SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { |
50434 | SDValue Ins = SubVec.getOperand(0); |
50435 | if (isNullConstant(Ins.getOperand(2)) && |
50436 | ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && |
50437 | Ins.getOperand(1).getValueSizeInBits().getFixedSize() <= |
50438 | SubVecVT.getFixedSizeInBits()) |
50439 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
50440 | getZeroVector(OpVT, Subtarget, DAG, dl), |
50441 | Ins.getOperand(1), N->getOperand(2)); |
50442 | } |
50443 | } |
50444 | |
50445 | |
50446 | if (IsI1Vector) |
50447 | return SDValue(); |
50448 | |
50449 | |
50450 | |
50451 | if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
50452 | SubVec.getOperand(0).getSimpleValueType() == OpVT && |
50453 | (IdxVal != 0 || |
50454 | !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) { |
50455 | int ExtIdxVal = SubVec.getConstantOperandVal(1); |
50456 | if (ExtIdxVal != 0) { |
50457 | int VecNumElts = OpVT.getVectorNumElements(); |
50458 | int SubVecNumElts = SubVecVT.getVectorNumElements(); |
50459 | SmallVector<int, 64> Mask(VecNumElts); |
50460 | |
50461 | for (int i = 0; i != VecNumElts; ++i) |
50462 | Mask[i] = i; |
50463 | |
50464 | for (int i = 0; i != SubVecNumElts; ++i) |
50465 | Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; |
50466 | |
50467 | return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); |
50468 | } |
50469 | } |
50470 | |
50471 | |
50472 | SmallVector<SDValue, 2> SubVectorOps; |
50473 | if (collectConcatOps(N, SubVectorOps)) { |
50474 | if (SDValue Fold = |
50475 | combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget)) |
50476 | return Fold; |
50477 | |
50478 | |
50479 | |
50480 | |
50481 | |
50482 | |
50483 | if (SubVectorOps.size() == 2 && |
50484 | ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode())) |
50485 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
50486 | getZeroVector(OpVT, Subtarget, DAG, dl), |
50487 | SubVectorOps[0], DAG.getIntPtrConstant(0, dl)); |
50488 | } |
50489 | |
50490 | |
50491 | if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) |
50492 | return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); |
50493 | |
50494 | |
50495 | |
50496 | if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && |
50497 | SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
50498 | auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec); |
50499 | SDVTList Tys = DAG.getVTList(OpVT, MVT::Other); |
50500 | SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; |
50501 | SDValue BcastLd = |
50502 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
50503 | MemIntr->getMemoryVT(), |
50504 | MemIntr->getMemOperand()); |
50505 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); |
50506 | return BcastLd; |
50507 | } |
50508 | |
50509 | |
50510 | |
50511 | if (IdxVal == (OpVT.getVectorNumElements() / 2) && SubVec.hasOneUse() && |
50512 | Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits())) { |
50513 | auto *VecLd = dyn_cast<LoadSDNode>(Vec); |
50514 | auto *SubLd = dyn_cast<LoadSDNode>(SubVec); |
50515 | if (VecLd && SubLd && |
50516 | DAG.areNonVolatileConsecutiveLoads(SubLd, VecLd, |
50517 | SubVec.getValueSizeInBits() / 8, 0)) |
50518 | return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, dl, OpVT, SubVecVT, |
50519 | SubLd, 0, DAG); |
50520 | } |
50521 | |
50522 | return SDValue(); |
50523 | } |
50524 | |
50525 | |
50526 | |
50527 | |
50528 | |
50529 | |
50530 | |
50531 | static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) { |
50532 | SDValue Sel = peekThroughBitcasts(Ext->getOperand(0)); |
50533 | SmallVector<SDValue, 4> CatOps; |
50534 | if (Sel.getOpcode() != ISD::VSELECT || |
50535 | !collectConcatOps(Sel.getOperand(0).getNode(), CatOps)) |
50536 | return SDValue(); |
50537 | |
50538 | |
50539 | |
50540 | |
50541 | MVT VT = Ext->getSimpleValueType(0); |
50542 | if (!VT.is128BitVector()) |
50543 | return SDValue(); |
50544 | |
50545 | MVT SelCondVT = Sel.getOperand(0).getSimpleValueType(); |
50546 | if (!SelCondVT.is256BitVector() && !SelCondVT.is512BitVector()) |
50547 | return SDValue(); |
50548 | |
50549 | MVT WideVT = Ext->getOperand(0).getSimpleValueType(); |
50550 | MVT SelVT = Sel.getSimpleValueType(); |
50551 | assert((SelVT.is256BitVector() || SelVT.is512BitVector()) && |
50552 | "Unexpected vector type with legal operations"); |
50553 | |
50554 | unsigned SelElts = SelVT.getVectorNumElements(); |
50555 | unsigned CastedElts = WideVT.getVectorNumElements(); |
50556 | unsigned ExtIdx = Ext->getConstantOperandVal(1); |
50557 | if (SelElts % CastedElts == 0) { |
50558 | |
50559 | |
50560 | ExtIdx *= (SelElts / CastedElts); |
50561 | } else if (CastedElts % SelElts == 0) { |
50562 | |
50563 | |
50564 | unsigned IndexDivisor = CastedElts / SelElts; |
50565 | if (ExtIdx % IndexDivisor != 0) |
50566 | return SDValue(); |
50567 | ExtIdx /= IndexDivisor; |
50568 | } else { |
50569 | llvm_unreachable("Element count of simple vector types are not divisible?"); |
50570 | } |
50571 | |
50572 | unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits(); |
50573 | unsigned NarrowElts = SelElts / NarrowingFactor; |
50574 | MVT NarrowSelVT = MVT::getVectorVT(SelVT.getVectorElementType(), NarrowElts); |
50575 | SDLoc DL(Ext); |
50576 | SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL); |
50577 | SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL); |
50578 | SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL); |
50579 | SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF); |
50580 | return DAG.getBitcast(VT, NarrowSel); |
50581 | } |
50582 | |
50583 | static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, |
50584 | TargetLowering::DAGCombinerInfo &DCI, |
50585 | const X86Subtarget &Subtarget) { |
50586 | |
50587 | |
50588 | |
50589 | |
50590 | |
50591 | |
50592 | |
50593 | |
50594 | |
50595 | |
50596 | if (!N->getValueType(0).isSimple()) |
50597 | return SDValue(); |
50598 | |
50599 | MVT VT = N->getSimpleValueType(0); |
50600 | SDValue InVec = N->getOperand(0); |
50601 | unsigned IdxVal = N->getConstantOperandVal(1); |
50602 | SDValue InVecBC = peekThroughBitcasts(InVec); |
50603 | EVT InVecVT = InVec.getValueType(); |
50604 | unsigned SizeInBits = VT.getSizeInBits(); |
50605 | unsigned InSizeInBits = InVecVT.getSizeInBits(); |
50606 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50607 | |
50608 | if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && |
50609 | TLI.isTypeLegal(InVecVT) && |
50610 | InSizeInBits == 256 && InVecBC.getOpcode() == ISD::AND) { |
50611 | auto isConcatenatedNot = [](SDValue V) { |
50612 | V = peekThroughBitcasts(V); |
50613 | if (!isBitwiseNot(V)) |
50614 | return false; |
50615 | SDValue NotOp = V->getOperand(0); |
50616 | return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; |
50617 | }; |
50618 | if (isConcatenatedNot(InVecBC.getOperand(0)) || |
50619 | isConcatenatedNot(InVecBC.getOperand(1))) { |
50620 | |
50621 | SDValue Concat = splitVectorIntBinary(InVecBC, DAG); |
50622 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, |
50623 | DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); |
50624 | } |
50625 | } |
50626 | |
50627 | if (DCI.isBeforeLegalizeOps()) |
50628 | return SDValue(); |
50629 | |
50630 | if (SDValue V = narrowExtractedVectorSelect(N, DAG)) |
50631 | return V; |
50632 | |
50633 | if (ISD::isBuildVectorAllZeros(InVec.getNode())) |
50634 | return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); |
50635 | |
50636 | if (ISD::isBuildVectorAllOnes(InVec.getNode())) { |
50637 | if (VT.getScalarType() == MVT::i1) |
50638 | return DAG.getConstant(1, SDLoc(N), VT); |
50639 | return getOnesVector(VT, DAG, SDLoc(N)); |
50640 | } |
50641 | |
50642 | if (InVec.getOpcode() == ISD::BUILD_VECTOR) |
50643 | return DAG.getBuildVector( |
50644 | VT, SDLoc(N), |
50645 | InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements())); |
50646 | |
50647 | |
50648 | |
50649 | |
50650 | if (VT.getVectorElementType() != MVT::i1 && |
50651 | InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 && |
50652 | InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) && |
50653 | ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) && |
50654 | InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) { |
50655 | SDLoc DL(N); |
50656 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
50657 | getZeroVector(VT, Subtarget, DAG, DL), |
50658 | InVec.getOperand(1), InVec.getOperand(2)); |
50659 | } |
50660 | |
50661 | |
50662 | |
50663 | |
50664 | if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST || |
50665 | InVec.getOpcode() == X86ISD::VBROADCAST_LOAD || |
50666 | DAG.isSplatValue(InVec, false))) |
50667 | return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); |
50668 | |
50669 | |
50670 | if (IdxVal != 0 && InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && |
50671 | cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT) |
50672 | return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); |
50673 | |
50674 | |
50675 | if ((InSizeInBits % SizeInBits) == 0 && |
50676 | (IdxVal % VT.getVectorNumElements()) == 0) { |
50677 | SmallVector<int, 32> ShuffleMask; |
50678 | SmallVector<int, 32> ScaledMask; |
50679 | SmallVector<SDValue, 2> ShuffleInputs; |
50680 | unsigned NumSubVecs = InSizeInBits / SizeInBits; |
50681 | |
50682 | if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) && |
50683 | scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) { |
50684 | unsigned SubVecIdx = IdxVal / VT.getVectorNumElements(); |
50685 | if (ScaledMask[SubVecIdx] == SM_SentinelUndef) |
50686 | return DAG.getUNDEF(VT); |
50687 | if (ScaledMask[SubVecIdx] == SM_SentinelZero) |
50688 | return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); |
50689 | SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs]; |
50690 | if (Src.getValueSizeInBits() == InSizeInBits) { |
50691 | unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs; |
50692 | unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements(); |
50693 | return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG, |
50694 | SDLoc(N), SizeInBits); |
50695 | } |
50696 | } |
50697 | } |
50698 | |
50699 | |
50700 | |
50701 | unsigned InOpcode = InVec.getOpcode(); |
50702 | if (IdxVal == 0 && InVec.hasOneUse()) { |
50703 | if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { |
50704 | |
50705 | if (InOpcode == ISD::SINT_TO_FP && |
50706 | InVec.getOperand(0).getValueType() == MVT::v4i32) { |
50707 | return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0)); |
50708 | } |
50709 | |
50710 | if (InOpcode == ISD::UINT_TO_FP && Subtarget.hasVLX() && |
50711 | InVec.getOperand(0).getValueType() == MVT::v4i32) { |
50712 | return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0)); |
50713 | } |
50714 | |
50715 | if (InOpcode == ISD::FP_EXTEND && |
50716 | InVec.getOperand(0).getValueType() == MVT::v4f32) { |
50717 | return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0)); |
50718 | } |
50719 | } |
50720 | if ((InOpcode == ISD::ANY_EXTEND || |
50721 | InOpcode == ISD::ANY_EXTEND_VECTOR_INREG || |
50722 | InOpcode == ISD::ZERO_EXTEND || |
50723 | InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || |
50724 | InOpcode == ISD::SIGN_EXTEND || |
50725 | InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) && |
50726 | (SizeInBits == 128 || SizeInBits == 256) && |
50727 | InVec.getOperand(0).getValueSizeInBits() >= SizeInBits) { |
50728 | SDLoc DL(N); |
50729 | SDValue Ext = InVec.getOperand(0); |
50730 | if (Ext.getValueSizeInBits() > SizeInBits) |
50731 | Ext = extractSubVector(Ext, 0, DAG, DL, SizeInBits); |
50732 | unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode); |
50733 | return DAG.getNode(ExtOp, DL, VT, Ext); |
50734 | } |
50735 | if (InOpcode == ISD::VSELECT && |
50736 | InVec.getOperand(0).getValueType().is256BitVector() && |
50737 | InVec.getOperand(1).getValueType().is256BitVector() && |
50738 | InVec.getOperand(2).getValueType().is256BitVector()) { |
50739 | SDLoc DL(N); |
50740 | SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128); |
50741 | SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128); |
50742 | SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128); |
50743 | return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2); |
50744 | } |
50745 | if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() && |
50746 | (VT.is128BitVector() || VT.is256BitVector())) { |
50747 | SDLoc DL(N); |
50748 | SDValue InVecSrc = InVec.getOperand(0); |
50749 | unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits; |
50750 | SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits); |
50751 | return DAG.getNode(InOpcode, DL, VT, Ext); |
50752 | } |
50753 | } |
50754 | |
50755 | |
50756 | |
50757 | if ((InOpcode == X86ISD::VSHLI || InOpcode == X86ISD::VSRLI) && |
50758 | InVecVT.getScalarSizeInBits() == 64 && |
50759 | InVec.getConstantOperandAPInt(1) == 32) { |
50760 | SDLoc DL(N); |
50761 | SDValue Ext = |
50762 | extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits); |
50763 | return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1)); |
50764 | } |
50765 | |
50766 | return SDValue(); |
50767 | } |
50768 | |
50769 | static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { |
50770 | EVT VT = N->getValueType(0); |
50771 | SDValue Src = N->getOperand(0); |
50772 | SDLoc DL(N); |
50773 | |
50774 | |
50775 | |
50776 | |
50777 | |
50778 | if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse()) |
50779 | if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) |
50780 | if (C->getAPIntValue().isOneValue()) |
50781 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, |
50782 | Src.getOperand(0)); |
50783 | |
50784 | |
50785 | if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
50786 | Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() && |
50787 | Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
50788 | if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) |
50789 | if (C->isNullValue()) |
50790 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0), |
50791 | Src.getOperand(1)); |
50792 | |
50793 | |
50794 | |
50795 | if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
50796 | auto IsAnyExt64 = [](SDValue Op) { |
50797 | if (Op.getValueType() != MVT::i64 || !Op.hasOneUse()) |
50798 | return SDValue(); |
50799 | if (Op.getOpcode() == ISD::ANY_EXTEND && |
50800 | Op.getOperand(0).getScalarValueSizeInBits() <= 32) |
50801 | return Op.getOperand(0); |
50802 | if (auto *Ld = dyn_cast<LoadSDNode>(Op)) |
50803 | if (Ld->getExtensionType() == ISD::EXTLOAD && |
50804 | Ld->getMemoryVT().getScalarSizeInBits() <= 32) |
50805 | return Op; |
50806 | return SDValue(); |
50807 | }; |
50808 | if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src))) |
50809 | return DAG.getBitcast( |
50810 | VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32, |
50811 | DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32))); |
50812 | } |
50813 | |
50814 | |
50815 | if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST && |
50816 | Src.getOperand(0).getValueType() == MVT::x86mmx) |
50817 | return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0)); |
50818 | |
50819 | |
50820 | |
50821 | if (VT.getScalarType() == Src.getValueType()) |
50822 | for (SDNode *User : Src->uses()) |
50823 | if (User->getOpcode() == X86ISD::VBROADCAST && |
50824 | Src == User->getOperand(0)) { |
50825 | unsigned SizeInBits = VT.getFixedSizeInBits(); |
50826 | unsigned BroadcastSizeInBits = |
50827 | User->getValueSizeInBits(0).getFixedSize(); |
50828 | if (BroadcastSizeInBits == SizeInBits) |
50829 | return SDValue(User, 0); |
50830 | if (BroadcastSizeInBits > SizeInBits) |
50831 | return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits); |
50832 | |
50833 | |
50834 | } |
50835 | |
50836 | return SDValue(); |
50837 | } |
50838 | |
50839 | |
50840 | static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, |
50841 | TargetLowering::DAGCombinerInfo &DCI, |
50842 | const X86Subtarget &Subtarget) { |
50843 | SDValue LHS = N->getOperand(0); |
50844 | SDValue RHS = N->getOperand(1); |
50845 | |
50846 | |
50847 | if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) && |
50848 | !DAG.isConstantIntBuildVectorOrConstantInt(RHS)) |
50849 | return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); |
50850 | |
50851 | |
50852 | |
50853 | if (ISD::isBuildVectorAllZeros(RHS.getNode())) |
50854 | return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); |
50855 | |
50856 | |
50857 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50858 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI)) |
50859 | return SDValue(N, 0); |
50860 | |
50861 | |
50862 | |
50863 | |
50864 | |
50865 | |
50866 | |
50867 | |
50868 | if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() && |
50869 | (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || |
50870 | LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && |
50871 | LHS.getOperand(0).getValueType() == MVT::v4i32) { |
50872 | SDLoc dl(N); |
50873 | LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0), |
50874 | LHS.getOperand(0), { 0, -1, 1, -1 }); |
50875 | LHS = DAG.getBitcast(MVT::v2i64, LHS); |
50876 | return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); |
50877 | } |
50878 | if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() && |
50879 | (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || |
50880 | RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && |
50881 | RHS.getOperand(0).getValueType() == MVT::v4i32) { |
50882 | SDLoc dl(N); |
50883 | RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0), |
50884 | RHS.getOperand(0), { 0, -1, 1, -1 }); |
50885 | RHS = DAG.getBitcast(MVT::v2i64, RHS); |
50886 | return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); |
50887 | } |
50888 | |
50889 | return SDValue(); |
50890 | } |
50891 | |
50892 | static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, |
50893 | TargetLowering::DAGCombinerInfo &DCI, |
50894 | const X86Subtarget &Subtarget) { |
50895 | EVT VT = N->getValueType(0); |
50896 | SDValue In = N->getOperand(0); |
50897 | unsigned Opcode = N->getOpcode(); |
50898 | unsigned InOpcode = In.getOpcode(); |
50899 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50900 | |
50901 | |
50902 | if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && |
50903 | In.hasOneUse()) { |
50904 | auto *Ld = cast<LoadSDNode>(In); |
50905 | if (Ld->isSimple()) { |
50906 | MVT SVT = In.getSimpleValueType().getVectorElementType(); |
50907 | ISD::LoadExtType Ext = Opcode == ISD::SIGN_EXTEND_VECTOR_INREG |
50908 | ? ISD::SEXTLOAD |
50909 | : ISD::ZEXTLOAD; |
50910 | EVT MemVT = VT.changeVectorElementType(SVT); |
50911 | if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { |
50912 | SDValue Load = |
50913 | DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), |
50914 | Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), |
50915 | Ld->getMemOperand()->getFlags()); |
50916 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); |
50917 | return Load; |
50918 | } |
50919 | } |
50920 | } |
50921 | |
50922 | |
50923 | if (Opcode == InOpcode) |
50924 | return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0)); |
50925 | |
50926 | |
50927 | |
50928 | |
50929 | if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 && |
50930 | In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) && |
50931 | In.getOperand(0).getOperand(0).getValueSizeInBits() == |
50932 | In.getValueSizeInBits()) |
50933 | return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0)); |
50934 | |
50935 | |
50936 | |
50937 | if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG || |
50938 | (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasSSE41())) { |
50939 | SDValue Op(N, 0); |
50940 | if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) |
50941 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
50942 | return Res; |
50943 | } |
50944 | |
50945 | return SDValue(); |
50946 | } |
50947 | |
50948 | static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, |
50949 | TargetLowering::DAGCombinerInfo &DCI) { |
50950 | EVT VT = N->getValueType(0); |
50951 | |
50952 | if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) |
50953 | return DAG.getConstant(0, SDLoc(N), VT); |
50954 | |
50955 | APInt KnownUndef, KnownZero; |
50956 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
50957 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
50958 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
50959 | KnownZero, DCI)) |
50960 | return SDValue(N, 0); |
50961 | |
50962 | return SDValue(); |
50963 | } |
50964 | |
50965 | |
50966 | |
50967 | |
50968 | static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG, |
50969 | const X86Subtarget &Subtarget) { |
50970 | if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) |
50971 | return SDValue(); |
50972 | |
50973 | if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16) |
50974 | return SDValue(); |
50975 | |
50976 | if (N->getValueType(0) != MVT::f32 || |
50977 | N->getOperand(0).getOperand(0).getValueType() != MVT::f32) |
50978 | return SDValue(); |
50979 | |
50980 | SDLoc dl(N); |
50981 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, |
50982 | N->getOperand(0).getOperand(0)); |
50983 | Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res, |
50984 | DAG.getTargetConstant(4, dl, MVT::i32)); |
50985 | Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res); |
50986 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, |
50987 | DAG.getIntPtrConstant(0, dl)); |
50988 | } |
50989 | |
50990 | static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG, |
50991 | const X86Subtarget &Subtarget) { |
50992 | if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) |
50993 | return SDValue(); |
50994 | |
50995 | bool IsStrict = N->isStrictFPOpcode(); |
50996 | EVT VT = N->getValueType(0); |
50997 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
50998 | EVT SrcVT = Src.getValueType(); |
50999 | |
51000 | if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::f16) |
51001 | return SDValue(); |
51002 | |
51003 | if (VT.getVectorElementType() != MVT::f32 && |
51004 | VT.getVectorElementType() != MVT::f64) |
51005 | return SDValue(); |
51006 | |
51007 | unsigned NumElts = VT.getVectorNumElements(); |
51008 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) |
51009 | return SDValue(); |
51010 | |
51011 | SDLoc dl(N); |
51012 | |
51013 | |
51014 | EVT IntVT = SrcVT.changeVectorElementTypeToInteger(); |
51015 | Src = DAG.getBitcast(IntVT, Src); |
51016 | |
51017 | |
51018 | if (NumElts < 8) { |
51019 | unsigned NumConcats = 8 / NumElts; |
51020 | SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT) |
51021 | : DAG.getConstant(0, dl, IntVT); |
51022 | SmallVector<SDValue, 4> Ops(NumConcats, Fill); |
51023 | Ops[0] = Src; |
51024 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops); |
51025 | } |
51026 | |
51027 | |
51028 | EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, |
51029 | std::max(4U, NumElts)); |
51030 | SDValue Cvt, Chain; |
51031 | if (IsStrict) { |
51032 | Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other}, |
51033 | {N->getOperand(0), Src}); |
51034 | Chain = Cvt.getValue(1); |
51035 | } else { |
51036 | Cvt = DAG.getNode(X86ISD::CVTPH2PS, dl, CvtVT, Src); |
51037 | } |
51038 | |
51039 | if (NumElts < 4) { |
51040 | assert(NumElts == 2 && "Unexpected size"); |
51041 | Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Cvt, |
51042 | DAG.getIntPtrConstant(0, dl)); |
51043 | } |
51044 | |
51045 | if (IsStrict) { |
51046 | |
51047 | if (Cvt.getValueType() != VT) { |
51048 | Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other}, |
51049 | {Chain, Cvt}); |
51050 | Chain = Cvt.getValue(1); |
51051 | } |
51052 | return DAG.getMergeValues({Cvt, Chain}, dl); |
51053 | } |
51054 | |
51055 | |
51056 | return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt); |
51057 | } |
51058 | |
51059 | |
51060 | |
51061 | |
51062 | static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG, |
51063 | TargetLowering::DAGCombinerInfo &DCI) { |
51064 | assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD || |
51065 | N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) && |
51066 | "Unknown broadcast load type"); |
51067 | |
51068 | |
51069 | if (N->hasAnyUseOfValue(1)) |
51070 | return SDValue(); |
51071 | |
51072 | auto *MemIntrin = cast<MemIntrinsicSDNode>(N); |
51073 | |
51074 | SDValue Ptr = MemIntrin->getBasePtr(); |
51075 | SDValue Chain = MemIntrin->getChain(); |
51076 | EVT VT = N->getSimpleValueType(0); |
51077 | EVT MemVT = MemIntrin->getMemoryVT(); |
51078 | |
51079 | |
51080 | |
51081 | for (SDNode *User : Ptr->uses()) |
51082 | if (User != N && User->getOpcode() == N->getOpcode() && |
51083 | cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && |
51084 | cast<MemIntrinsicSDNode>(User)->getChain() == Chain && |
51085 | cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == |
51086 | MemVT.getSizeInBits() && |
51087 | !User->hasAnyUseOfValue(1) && |
51088 | User->getValueSizeInBits(0).getFixedSize() > VT.getFixedSizeInBits()) { |
51089 | SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), |
51090 | VT.getSizeInBits()); |
51091 | Extract = DAG.getBitcast(VT, Extract); |
51092 | return DCI.CombineTo(N, Extract, SDValue(User, 1)); |
51093 | } |
51094 | |
51095 | return SDValue(); |
51096 | } |
51097 | |
51098 | static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, |
51099 | const X86Subtarget &Subtarget) { |
51100 | if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) |
51101 | return SDValue(); |
51102 | |
51103 | EVT VT = N->getValueType(0); |
51104 | SDValue Src = N->getOperand(0); |
51105 | EVT SrcVT = Src.getValueType(); |
51106 | |
51107 | if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || |
51108 | SrcVT.getVectorElementType() != MVT::f32) |
51109 | return SDValue(); |
51110 | |
51111 | unsigned NumElts = VT.getVectorNumElements(); |
51112 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) |
51113 | return SDValue(); |
51114 | |
51115 | SDLoc dl(N); |
51116 | |
51117 | |
51118 | if (NumElts < 4) |
51119 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
51120 | DAG.getConstantFP(0.0, dl, SrcVT)); |
51121 | |
51122 | |
51123 | EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
51124 | std::max(8U, NumElts)); |
51125 | SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, |
51126 | DAG.getTargetConstant(4, dl, MVT::i32)); |
51127 | |
51128 | |
51129 | if (NumElts < 8) { |
51130 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
51131 | Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, IntVT, Cvt, |
51132 | DAG.getIntPtrConstant(0, dl)); |
51133 | } |
51134 | |
51135 | return DAG.getBitcast(VT, Cvt); |
51136 | } |
51137 | |
51138 | static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) { |
51139 | SDValue Src = N->getOperand(0); |
51140 | |
51141 | |
51142 | if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { |
51143 | LoadSDNode *LN = cast<LoadSDNode>(Src.getNode()); |
51144 | |
51145 | if (LN->isSimple()) { |
51146 | SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(), |
51147 | LN->getBasePtr(), |
51148 | LN->getPointerInfo(), |
51149 | LN->getOriginalAlign(), |
51150 | LN->getMemOperand()->getFlags()); |
51151 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), NewLd.getValue(1)); |
51152 | return NewLd; |
51153 | } |
51154 | } |
51155 | |
51156 | return SDValue(); |
51157 | } |
51158 | |
51159 | static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, |
51160 | TargetLowering::DAGCombinerInfo &DCI) { |
51161 | unsigned NumBits = N->getSimpleValueType(0).getSizeInBits(); |
51162 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
51163 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
51164 | APInt::getAllOnesValue(NumBits), DCI)) |
51165 | return SDValue(N, 0); |
51166 | |
51167 | return SDValue(); |
51168 | } |
51169 | |
51170 | SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, |
51171 | DAGCombinerInfo &DCI) const { |
51172 | SelectionDAG &DAG = DCI.DAG; |
51173 | switch (N->getOpcode()) { |
51174 | default: break; |
51175 | case ISD::SCALAR_TO_VECTOR: |
51176 | return combineScalarToVector(N, DAG); |
51177 | case ISD::EXTRACT_VECTOR_ELT: |
51178 | case X86ISD::PEXTRW: |
51179 | case X86ISD::PEXTRB: |
51180 | return combineExtractVectorElt(N, DAG, DCI, Subtarget); |
51181 | case ISD::CONCAT_VECTORS: |
51182 | return combineConcatVectors(N, DAG, DCI, Subtarget); |
51183 | case ISD::INSERT_SUBVECTOR: |
51184 | return combineInsertSubvector(N, DAG, DCI, Subtarget); |
51185 | case ISD::EXTRACT_SUBVECTOR: |
51186 | return combineExtractSubvector(N, DAG, DCI, Subtarget); |
51187 | case ISD::VSELECT: |
51188 | case ISD::SELECT: |
51189 | case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget); |
51190 | case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); |
51191 | case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); |
51192 | case X86ISD::CMP: return combineCMP(N, DAG); |
51193 | case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget); |
51194 | case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget); |
51195 | case X86ISD::ADD: |
51196 | case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); |
51197 | case X86ISD::SBB: return combineSBB(N, DAG); |
51198 | case X86ISD::ADC: return combineADC(N, DAG, DCI); |
51199 | case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); |
51200 | case ISD::SHL: return combineShiftLeft(N, DAG); |
51201 | case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget); |
51202 | case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget); |
51203 | case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); |
51204 | case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); |
51205 | case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); |
51206 | case X86ISD::BEXTR: |
51207 | case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget); |
51208 | case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); |
51209 | case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); |
51210 | case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); |
51211 | case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); |
51212 | case X86ISD::VEXTRACT_STORE: |
51213 | return combineVEXTRACT_STORE(N, DAG, DCI, Subtarget); |
51214 | case ISD::SINT_TO_FP: |
51215 | case ISD::STRICT_SINT_TO_FP: |
51216 | return combineSIntToFP(N, DAG, DCI, Subtarget); |
51217 | case ISD::UINT_TO_FP: |
51218 | case ISD::STRICT_UINT_TO_FP: |
51219 | return combineUIntToFP(N, DAG, Subtarget); |
51220 | case ISD::FADD: |
51221 | case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); |
51222 | case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget); |
51223 | case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); |
51224 | case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI); |
51225 | case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); |
51226 | case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); |
51227 | case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); |
51228 | case X86ISD::FXOR: |
51229 | case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget); |
51230 | case X86ISD::FMIN: |
51231 | case X86ISD::FMAX: return combineFMinFMax(N, DAG); |
51232 | case ISD::FMINNUM: |
51233 | case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); |
51234 | case X86ISD::CVTSI2P: |
51235 | case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); |
51236 | case X86ISD::CVTP2SI: |
51237 | case X86ISD::CVTP2UI: |
51238 | case X86ISD::STRICT_CVTTP2SI: |
51239 | case X86ISD::CVTTP2SI: |
51240 | case X86ISD::STRICT_CVTTP2UI: |
51241 | case X86ISD::CVTTP2UI: |
51242 | return combineCVTP2I_CVTTP2I(N, DAG, DCI); |
51243 | case X86ISD::STRICT_CVTPH2PS: |
51244 | case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI); |
51245 | case X86ISD::BT: return combineBT(N, DAG, DCI); |
51246 | case ISD::ANY_EXTEND: |
51247 | case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget); |
51248 | case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); |
51249 | case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); |
51250 | case ISD::ANY_EXTEND_VECTOR_INREG: |
51251 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
51252 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
51253 | return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget); |
51254 | case ISD::SETCC: return combineSetCC(N, DAG, DCI, Subtarget); |
51255 | case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); |
51256 | case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); |
51257 | case X86ISD::PACKSS: |
51258 | case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget); |
51259 | case X86ISD::HADD: |
51260 | case X86ISD::HSUB: |
51261 | case X86ISD::FHADD: |
51262 | case X86ISD::FHSUB: return combineVectorHADDSUB(N, DAG, DCI, Subtarget); |
51263 | case X86ISD::VSHL: |
51264 | case X86ISD::VSRA: |
51265 | case X86ISD::VSRL: |
51266 | return combineVectorShiftVar(N, DAG, DCI, Subtarget); |
51267 | case X86ISD::VSHLI: |
51268 | case X86ISD::VSRAI: |
51269 | case X86ISD::VSRLI: |
51270 | return combineVectorShiftImm(N, DAG, DCI, Subtarget); |
51271 | case ISD::INSERT_VECTOR_ELT: |
51272 | case X86ISD::PINSRB: |
51273 | case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); |
51274 | case X86ISD::SHUFP: |
51275 | case X86ISD::INSERTPS: |
51276 | case X86ISD::EXTRQI: |
51277 | case X86ISD::INSERTQI: |
51278 | case X86ISD::VALIGN: |
51279 | case X86ISD::PALIGNR: |
51280 | case X86ISD::VSHLDQ: |
51281 | case X86ISD::VSRLDQ: |
51282 | case X86ISD::BLENDI: |
51283 | case X86ISD::UNPCKH: |
51284 | case X86ISD::UNPCKL: |
51285 | case X86ISD::MOVHLPS: |
51286 | case X86ISD::MOVLHPS: |
51287 | case X86ISD::PSHUFB: |
51288 | case X86ISD::PSHUFD: |
51289 | case X86ISD::PSHUFHW: |
51290 | case X86ISD::PSHUFLW: |
51291 | case X86ISD::MOVSHDUP: |
51292 | case X86ISD::MOVSLDUP: |
51293 | case X86ISD::MOVDDUP: |
51294 | case X86ISD::MOVSS: |
51295 | case X86ISD::MOVSD: |
51296 | case X86ISD::VBROADCAST: |
51297 | case X86ISD::VPPERM: |
51298 | case X86ISD::VPERMI: |
51299 | case X86ISD::VPERMV: |
51300 | case X86ISD::VPERMV3: |
51301 | case X86ISD::VPERMIL2: |
51302 | case X86ISD::VPERMILPI: |
51303 | case X86ISD::VPERMILPV: |
51304 | case X86ISD::VPERM2X128: |
51305 | case X86ISD::SHUF128: |
51306 | case X86ISD::VZEXT_MOVL: |
51307 | case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); |
51308 | case X86ISD::FMADD_RND: |
51309 | case X86ISD::FMSUB: |
51310 | case X86ISD::STRICT_FMSUB: |
51311 | case X86ISD::FMSUB_RND: |
51312 | case X86ISD::FNMADD: |
51313 | case X86ISD::STRICT_FNMADD: |
51314 | case X86ISD::FNMADD_RND: |
51315 | case X86ISD::FNMSUB: |
51316 | case X86ISD::STRICT_FNMSUB: |
51317 | case X86ISD::FNMSUB_RND: |
51318 | case ISD::FMA: |
51319 | case ISD::STRICT_FMA: return combineFMA(N, DAG, DCI, Subtarget); |
51320 | case X86ISD::FMADDSUB_RND: |
51321 | case X86ISD::FMSUBADD_RND: |
51322 | case X86ISD::FMADDSUB: |
51323 | case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI); |
51324 | case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); |
51325 | case X86ISD::MGATHER: |
51326 | case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI); |
51327 | case ISD::MGATHER: |
51328 | case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI); |
51329 | case X86ISD::PCMPEQ: |
51330 | case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); |
51331 | case X86ISD::PMULDQ: |
51332 | case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); |
51333 | case X86ISD::KSHIFTL: |
51334 | case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); |
51335 | case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); |
51336 | case ISD::STRICT_FP_EXTEND: |
51337 | case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget); |
51338 | case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); |
51339 | case X86ISD::VBROADCAST_LOAD: |
51340 | case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI); |
51341 | case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); |
51342 | case X86ISD::PDEP: return combinePDEP(N, DAG, DCI); |
51343 | } |
51344 | |
51345 | return SDValue(); |
51346 | } |
51347 | |
51348 | bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { |
51349 | if (!isTypeLegal(VT)) |
51350 | return false; |
51351 | |
51352 | |
51353 | if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) |
51354 | return false; |
51355 | |
51356 | |
51357 | |
51358 | |
51359 | |
51360 | |
51361 | |
51362 | |
51363 | |
51364 | if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8) |
51365 | return false; |
51366 | |
51367 | |
51368 | |
51369 | if (VT == MVT::i16) { |
51370 | switch (Opc) { |
51371 | default: |
51372 | break; |
51373 | case ISD::LOAD: |
51374 | case ISD::SIGN_EXTEND: |
51375 | case ISD::ZERO_EXTEND: |
51376 | case ISD::ANY_EXTEND: |
51377 | case ISD::SHL: |
51378 | case ISD::SRA: |
51379 | case ISD::SRL: |
51380 | case ISD::SUB: |
51381 | case ISD::ADD: |
51382 | case ISD::MUL: |
51383 | case ISD::AND: |
51384 | case ISD::OR: |
51385 | case ISD::XOR: |
51386 | return false; |
51387 | } |
51388 | } |
51389 | |
51390 | |
51391 | return true; |
51392 | } |
51393 | |
51394 | SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl, |
51395 | SDValue Value, SDValue Addr, |
51396 | SelectionDAG &DAG) const { |
51397 | const Module *M = DAG.getMachineFunction().getMMI().getModule(); |
51398 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
51399 | if (IsCFProtectionSupported) { |
51400 | |
51401 | |
51402 | |
51403 | |
51404 | return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, Value, Addr); |
51405 | } |
51406 | |
51407 | return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG); |
51408 | } |
51409 | |
51410 | bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { |
51411 | EVT VT = Op.getValueType(); |
51412 | bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL && |
51413 | isa<ConstantSDNode>(Op.getOperand(1)); |
51414 | |
51415 | |
51416 | |
51417 | |
51418 | |
51419 | if (VT != MVT::i16 && !Is8BitMulByConstant) |
51420 | return false; |
51421 | |
51422 | auto IsFoldableRMW = [](SDValue Load, SDValue Op) { |
51423 | if (!Op.hasOneUse()) |
51424 | return false; |
51425 | SDNode *User = *Op->use_begin(); |
51426 | if (!ISD::isNormalStore(User)) |
51427 | return false; |
51428 | auto *Ld = cast<LoadSDNode>(Load); |
51429 | auto *St = cast<StoreSDNode>(User); |
51430 | return Ld->getBasePtr() == St->getBasePtr(); |
51431 | }; |
51432 | |
51433 | auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) { |
51434 | if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD) |
51435 | return false; |
51436 | if (!Op.hasOneUse()) |
51437 | return false; |
51438 | SDNode *User = *Op->use_begin(); |
51439 | if (User->getOpcode() != ISD::ATOMIC_STORE) |
51440 | return false; |
51441 | auto *Ld = cast<AtomicSDNode>(Load); |
51442 | auto *St = cast<AtomicSDNode>(User); |
51443 | return Ld->getBasePtr() == St->getBasePtr(); |
51444 | }; |
51445 | |
51446 | bool Commute = false; |
51447 | switch (Op.getOpcode()) { |
51448 | default: return false; |
51449 | case ISD::SIGN_EXTEND: |
51450 | case ISD::ZERO_EXTEND: |
51451 | case ISD::ANY_EXTEND: |
51452 | break; |
51453 | case ISD::SHL: |
51454 | case ISD::SRA: |
51455 | case ISD::SRL: { |
51456 | SDValue N0 = Op.getOperand(0); |
51457 | |
51458 | if (MayFoldLoad(N0) && IsFoldableRMW(N0, Op)) |
51459 | return false; |
51460 | break; |
51461 | } |
51462 | case ISD::ADD: |
51463 | case ISD::MUL: |
51464 | case ISD::AND: |
51465 | case ISD::OR: |
51466 | case ISD::XOR: |
51467 | Commute = true; |
51468 | LLVM_FALLTHROUGH; |
51469 | case ISD::SUB: { |
51470 | SDValue N0 = Op.getOperand(0); |
51471 | SDValue N1 = Op.getOperand(1); |
51472 | |
51473 | if (MayFoldLoad(N1) && |
51474 | (!Commute || !isa<ConstantSDNode>(N0) || |
51475 | (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op)))) |
51476 | return false; |
51477 | if (MayFoldLoad(N0) && |
51478 | ((Commute && !isa<ConstantSDNode>(N1)) || |
51479 | (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op)))) |
51480 | return false; |
51481 | if (IsFoldableAtomicRMW(N0, Op) || |
51482 | (Commute && IsFoldableAtomicRMW(N1, Op))) |
51483 | return false; |
51484 | } |
51485 | } |
51486 | |
51487 | PVT = MVT::i32; |
51488 | return true; |
51489 | } |
51490 | |
51491 | |
51492 | |
51493 | |
51494 | |
51495 | |
51496 | static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) { |
51497 | S = S.substr(S.find_first_not_of(" \t")); |
51498 | |
51499 | for (StringRef Piece : Pieces) { |
51500 | if (!S.startswith(Piece)) |
51501 | return false; |
51502 | |
51503 | S = S.substr(Piece.size()); |
51504 | StringRef::size_type Pos = S.find_first_not_of(" \t"); |
51505 | if (Pos == 0) |
51506 | return false; |
51507 | |
51508 | S = S.substr(Pos); |
51509 | } |
51510 | |
51511 | return S.empty(); |
51512 | } |
51513 | |
51514 | static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) { |
51515 | |
51516 | if (AsmPieces.size() == 3 || AsmPieces.size() == 4) { |
51517 | if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") && |
51518 | std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") && |
51519 | std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) { |
51520 | |
51521 | if (AsmPieces.size() == 3) |
51522 | return true; |
51523 | else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}")) |
51524 | return true; |
51525 | } |
51526 | } |
51527 | return false; |
51528 | } |
51529 | |
51530 | bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { |
51531 | InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand()); |
51532 | |
51533 | const std::string &AsmStr = IA->getAsmString(); |
51534 | |
51535 | IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); |
51536 | if (!Ty || Ty->getBitWidth() % 16 != 0) |
51537 | return false; |
51538 | |
51539 | |
51540 | SmallVector<StringRef, 4> AsmPieces; |
51541 | SplitString(AsmStr, AsmPieces, ";\n"); |
51542 | |
51543 | switch (AsmPieces.size()) { |
51544 | default: return false; |
51545 | case 1: |
51546 | |
51547 | |
51548 | |
51549 | |
51550 | |
51551 | if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || |
51552 | matchAsm(AsmPieces[0], {"bswapl", "$0"}) || |
51553 | matchAsm(AsmPieces[0], {"bswapq", "$0"}) || |
51554 | matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) || |
51555 | matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) || |
51556 | matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) { |
51557 | |
51558 | |
51559 | return IntrinsicLowering::LowerToByteSwap(CI); |
51560 | } |
51561 | |
51562 | |
51563 | if (CI->getType()->isIntegerTy(16) && |
51564 | IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && |
51565 | (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || |
51566 | matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { |
51567 | AsmPieces.clear(); |
51568 | StringRef ConstraintsStr = IA->getConstraintString(); |
51569 | SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); |
51570 | array_pod_sort(AsmPieces.begin(), AsmPieces.end()); |
51571 | if (clobbersFlagRegisters(AsmPieces)) |
51572 | return IntrinsicLowering::LowerToByteSwap(CI); |
51573 | } |
51574 | break; |
51575 | case 3: |
51576 | if (CI->getType()->isIntegerTy(32) && |
51577 | IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && |
51578 | matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) && |
51579 | matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && |
51580 | matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { |
51581 | AsmPieces.clear(); |
51582 | StringRef ConstraintsStr = IA->getConstraintString(); |
51583 | SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); |
51584 | array_pod_sort(AsmPieces.begin(), AsmPieces.end()); |
51585 | if (clobbersFlagRegisters(AsmPieces)) |
51586 | return IntrinsicLowering::LowerToByteSwap(CI); |
51587 | } |
51588 | |
51589 | if (CI->getType()->isIntegerTy(64)) { |
51590 | InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); |
51591 | if (Constraints.size() >= 2 && |
51592 | Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && |
51593 | Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { |
51594 | |
51595 | if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && |
51596 | matchAsm(AsmPieces[1], {"bswap", "%edx"}) && |
51597 | matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"})) |
51598 | return IntrinsicLowering::LowerToByteSwap(CI); |
51599 | } |
51600 | } |
51601 | break; |
51602 | } |
51603 | return false; |
51604 | } |
51605 | |
51606 | static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) { |
51607 | X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint) |
51608 | .Case("{@cca}", X86::COND_A) |
51609 | .Case("{@ccae}", X86::COND_AE) |
51610 | .Case("{@ccb}", X86::COND_B) |
51611 | .Case("{@ccbe}", X86::COND_BE) |
51612 | .Case("{@ccc}", X86::COND_B) |
51613 | .Case("{@cce}", X86::COND_E) |
51614 | .Case("{@ccz}", X86::COND_E) |
51615 | .Case("{@ccg}", X86::COND_G) |
51616 | .Case("{@ccge}", X86::COND_GE) |
51617 | .Case("{@ccl}", X86::COND_L) |
51618 | .Case("{@ccle}", X86::COND_LE) |
51619 | .Case("{@ccna}", X86::COND_BE) |
51620 | .Case("{@ccnae}", X86::COND_B) |
51621 | .Case("{@ccnb}", X86::COND_AE) |
51622 | .Case("{@ccnbe}", X86::COND_A) |
51623 | .Case("{@ccnc}", X86::COND_AE) |
51624 | .Case("{@ccne}", X86::COND_NE) |
51625 | .Case("{@ccnz}", X86::COND_NE) |
51626 | .Case("{@ccng}", X86::COND_LE) |
51627 | .Case("{@ccnge}", X86::COND_L) |
51628 | .Case("{@ccnl}", X86::COND_GE) |
51629 | .Case("{@ccnle}", X86::COND_G) |
51630 | .Case("{@ccno}", X86::COND_NO) |
51631 | .Case("{@ccnp}", X86::COND_NP) |
51632 | .Case("{@ccns}", X86::COND_NS) |
51633 | .Case("{@cco}", X86::COND_O) |
51634 | .Case("{@ccp}", X86::COND_P) |
51635 | .Case("{@ccs}", X86::COND_S) |
51636 | .Default(X86::COND_INVALID); |
51637 | return Cond; |
51638 | } |
51639 | |
51640 | |
51641 | X86TargetLowering::ConstraintType |
51642 | X86TargetLowering::getConstraintType(StringRef Constraint) const { |
51643 | if (Constraint.size() == 1) { |
51644 | switch (Constraint[0]) { |
51645 | case 'R': |
51646 | case 'q': |
51647 | case 'Q': |
51648 | case 'f': |
51649 | case 't': |
51650 | case 'u': |
51651 | case 'y': |
51652 | case 'x': |
51653 | case 'v': |
51654 | case 'l': |
51655 | case 'k': |
51656 | return C_RegisterClass; |
51657 | case 'a': |
51658 | case 'b': |
51659 | case 'c': |
51660 | case 'd': |
51661 | case 'S': |
51662 | case 'D': |
51663 | case 'A': |
51664 | return C_Register; |
51665 | case 'I': |
51666 | case 'J': |
51667 | case 'K': |
51668 | case 'N': |
51669 | case 'G': |
51670 | case 'L': |
51671 | case 'M': |
51672 | return C_Immediate; |
51673 | case 'C': |
51674 | case 'e': |
51675 | case 'Z': |
51676 | return C_Other; |
51677 | default: |
51678 | break; |
51679 | } |
51680 | } |
51681 | else if (Constraint.size() == 2) { |
51682 | switch (Constraint[0]) { |
51683 | default: |
51684 | break; |
51685 | case 'Y': |
51686 | switch (Constraint[1]) { |
51687 | default: |
51688 | break; |
51689 | case 'z': |
51690 | return C_Register; |
51691 | case 'i': |
51692 | case 'm': |
51693 | case 'k': |
51694 | case 't': |
51695 | case '2': |
51696 | return C_RegisterClass; |
51697 | } |
51698 | } |
51699 | } else if (parseConstraintCode(Constraint) != X86::COND_INVALID) |
51700 | return C_Other; |
51701 | return TargetLowering::getConstraintType(Constraint); |
51702 | } |
51703 | |
51704 | |
51705 | |
51706 | |
51707 | TargetLowering::ConstraintWeight |
51708 | X86TargetLowering::getSingleConstraintMatchWeight( |
51709 | AsmOperandInfo &info, const char *constraint) const { |
51710 | ConstraintWeight weight = CW_Invalid; |
51711 | Value *CallOperandVal = info.CallOperandVal; |
51712 | |
51713 | |
51714 | if (!CallOperandVal) |
51715 | return CW_Default; |
51716 | Type *type = CallOperandVal->getType(); |
51717 | |
51718 | switch (*constraint) { |
51719 | default: |
51720 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
51721 | LLVM_FALLTHROUGH; |
51722 | case 'R': |
51723 | case 'q': |
51724 | case 'Q': |
51725 | case 'a': |
51726 | case 'b': |
51727 | case 'c': |
51728 | case 'd': |
51729 | case 'S': |
51730 | case 'D': |
51731 | case 'A': |
51732 | if (CallOperandVal->getType()->isIntegerTy()) |
51733 | weight = CW_SpecificReg; |
51734 | break; |
51735 | case 'f': |
51736 | case 't': |
51737 | case 'u': |
51738 | if (type->isFloatingPointTy()) |
51739 | weight = CW_SpecificReg; |
51740 | break; |
51741 | case 'y': |
51742 | if (type->isX86_MMXTy() && Subtarget.hasMMX()) |
51743 | weight = CW_SpecificReg; |
51744 | break; |
51745 | case 'Y': |
51746 | if (StringRef(constraint).size() != 2) |
51747 | break; |
51748 | switch (constraint[1]) { |
51749 | default: |
51750 | return CW_Invalid; |
51751 | |
51752 | case 'z': |
51753 | if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || |
51754 | ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) || |
51755 | ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())) |
51756 | return CW_SpecificReg; |
51757 | return CW_Invalid; |
51758 | |
51759 | case 'k': |
51760 | if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) |
51761 | return CW_Register; |
51762 | return CW_Invalid; |
51763 | |
51764 | case 'm': |
51765 | if (type->isX86_MMXTy() && Subtarget.hasMMX()) |
51766 | return weight; |
51767 | return CW_Invalid; |
51768 | |
51769 | case 'i': |
51770 | case 't': |
51771 | case '2': |
51772 | if (!Subtarget.hasSSE2()) |
51773 | return CW_Invalid; |
51774 | break; |
51775 | } |
51776 | break; |
51777 | case 'v': |
51778 | if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) |
51779 | weight = CW_Register; |
51780 | LLVM_FALLTHROUGH; |
51781 | case 'x': |
51782 | if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || |
51783 | ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX())) |
51784 | weight = CW_Register; |
51785 | break; |
51786 | case 'k': |
51787 | |
51788 | if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) |
51789 | weight = CW_Register; |
51790 | break; |
51791 | case 'I': |
51792 | if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) { |
51793 | if (C->getZExtValue() <= 31) |
51794 | weight = CW_Constant; |
51795 | } |
51796 | break; |
51797 | case 'J': |
51798 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51799 | if (C->getZExtValue() <= 63) |
51800 | weight = CW_Constant; |
51801 | } |
51802 | break; |
51803 | case 'K': |
51804 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51805 | if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) |
51806 | weight = CW_Constant; |
51807 | } |
51808 | break; |
51809 | case 'L': |
51810 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51811 | if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) |
51812 | weight = CW_Constant; |
51813 | } |
51814 | break; |
51815 | case 'M': |
51816 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51817 | if (C->getZExtValue() <= 3) |
51818 | weight = CW_Constant; |
51819 | } |
51820 | break; |
51821 | case 'N': |
51822 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51823 | if (C->getZExtValue() <= 0xff) |
51824 | weight = CW_Constant; |
51825 | } |
51826 | break; |
51827 | case 'G': |
51828 | case 'C': |
51829 | if (isa<ConstantFP>(CallOperandVal)) { |
51830 | weight = CW_Constant; |
51831 | } |
51832 | break; |
51833 | case 'e': |
51834 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51835 | if ((C->getSExtValue() >= -0x80000000LL) && |
51836 | (C->getSExtValue() <= 0x7fffffffLL)) |
51837 | weight = CW_Constant; |
51838 | } |
51839 | break; |
51840 | case 'Z': |
51841 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
51842 | if (C->getZExtValue() <= 0xffffffff) |
51843 | weight = CW_Constant; |
51844 | } |
51845 | break; |
51846 | } |
51847 | return weight; |
51848 | } |
51849 | |
51850 | |
51851 | |
51852 | |
51853 | const char *X86TargetLowering:: |
51854 | LowerXConstraint(EVT ConstraintVT) const { |
51855 | |
51856 | |
51857 | if (ConstraintVT.isFloatingPoint()) { |
51858 | if (Subtarget.hasSSE1()) |
51859 | return "x"; |
51860 | } |
51861 | |
51862 | return TargetLowering::LowerXConstraint(ConstraintVT); |
51863 | } |
51864 | |
51865 | |
51866 | SDValue X86TargetLowering::LowerAsmOutputForConstraint( |
51867 | SDValue &Chain, SDValue &Flag, const SDLoc &DL, |
51868 | const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { |
51869 | X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode); |
51870 | if (Cond == X86::COND_INVALID) |
51871 | return SDValue(); |
51872 | |
51873 | if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || |
51874 | OpInfo.ConstraintVT.getSizeInBits() < 8) |
51875 | report_fatal_error("Flag output operand is of invalid type"); |
51876 | |
51877 | |
51878 | if (Flag.getNode()) { |
51879 | Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag); |
51880 | Chain = Flag.getValue(1); |
51881 | } else |
51882 | Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32); |
51883 | |
51884 | SDValue CC = getSETCC(Cond, Flag, DL, DAG); |
51885 | |
51886 | SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC); |
51887 | |
51888 | return Result; |
51889 | } |
51890 | |
51891 | |
51892 | |
51893 | void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, |
51894 | std::string &Constraint, |
51895 | std::vector<SDValue>&Ops, |
51896 | SelectionDAG &DAG) const { |
51897 | SDValue Result; |
51898 | |
51899 | |
51900 | if (Constraint.length() > 1) return; |
51901 | |
51902 | char ConstraintLetter = Constraint[0]; |
51903 | switch (ConstraintLetter) { |
51904 | default: break; |
51905 | case 'I': |
51906 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51907 | if (C->getZExtValue() <= 31) { |
51908 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51909 | Op.getValueType()); |
51910 | break; |
51911 | } |
51912 | } |
51913 | return; |
51914 | case 'J': |
51915 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51916 | if (C->getZExtValue() <= 63) { |
51917 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51918 | Op.getValueType()); |
51919 | break; |
51920 | } |
51921 | } |
51922 | return; |
51923 | case 'K': |
51924 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51925 | if (isInt<8>(C->getSExtValue())) { |
51926 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51927 | Op.getValueType()); |
51928 | break; |
51929 | } |
51930 | } |
51931 | return; |
51932 | case 'L': |
51933 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51934 | if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || |
51935 | (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { |
51936 | Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), |
51937 | Op.getValueType()); |
51938 | break; |
51939 | } |
51940 | } |
51941 | return; |
51942 | case 'M': |
51943 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51944 | if (C->getZExtValue() <= 3) { |
51945 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51946 | Op.getValueType()); |
51947 | break; |
51948 | } |
51949 | } |
51950 | return; |
51951 | case 'N': |
51952 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51953 | if (C->getZExtValue() <= 255) { |
51954 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51955 | Op.getValueType()); |
51956 | break; |
51957 | } |
51958 | } |
51959 | return; |
51960 | case 'O': |
51961 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51962 | if (C->getZExtValue() <= 127) { |
51963 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51964 | Op.getValueType()); |
51965 | break; |
51966 | } |
51967 | } |
51968 | return; |
51969 | case 'e': { |
51970 | |
51971 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51972 | if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), |
51973 | C->getSExtValue())) { |
51974 | |
51975 | Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); |
51976 | break; |
51977 | } |
51978 | |
51979 | |
51980 | } |
51981 | return; |
51982 | } |
51983 | case 'Z': { |
51984 | |
51985 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
51986 | if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), |
51987 | C->getZExtValue())) { |
51988 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
51989 | Op.getValueType()); |
51990 | break; |
51991 | } |
51992 | } |
51993 | |
51994 | |
51995 | return; |
51996 | } |
51997 | case 'i': { |
51998 | |
51999 | if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { |
52000 | bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1; |
52001 | BooleanContent BCont = getBooleanContents(MVT::i64); |
52002 | ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) |
52003 | : ISD::SIGN_EXTEND; |
52004 | int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue() |
52005 | : CST->getSExtValue(); |
52006 | Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64); |
52007 | break; |
52008 | } |
52009 | |
52010 | |
52011 | |
52012 | |
52013 | if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) |
52014 | return; |
52015 | |
52016 | |
52017 | |
52018 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) |
52019 | |
52020 | |
52021 | if (isGlobalStubReference( |
52022 | Subtarget.classifyGlobalReference(GA->getGlobal()))) |
52023 | return; |
52024 | break; |
52025 | } |
52026 | } |
52027 | |
52028 | if (Result.getNode()) { |
52029 | Ops.push_back(Result); |
52030 | return; |
52031 | } |
52032 | return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
52033 | } |
52034 | |
52035 | |
52036 | |
52037 | static bool isGRClass(const TargetRegisterClass &RC) { |
52038 | return RC.hasSuperClassEq(&X86::GR8RegClass) || |
52039 | RC.hasSuperClassEq(&X86::GR16RegClass) || |
52040 | RC.hasSuperClassEq(&X86::GR32RegClass) || |
52041 | RC.hasSuperClassEq(&X86::GR64RegClass) || |
52042 | RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass); |
52043 | } |
52044 | |
52045 | |
52046 | |
52047 | static bool isFRClass(const TargetRegisterClass &RC) { |
52048 | return RC.hasSuperClassEq(&X86::FR32XRegClass) || |
52049 | RC.hasSuperClassEq(&X86::FR64XRegClass) || |
52050 | RC.hasSuperClassEq(&X86::VR128XRegClass) || |
52051 | RC.hasSuperClassEq(&X86::VR256XRegClass) || |
52052 | RC.hasSuperClassEq(&X86::VR512RegClass); |
52053 | } |
52054 | |
52055 | |
52056 | |
52057 | static bool isVKClass(const TargetRegisterClass &RC) { |
52058 | return RC.hasSuperClassEq(&X86::VK1RegClass) || |
52059 | RC.hasSuperClassEq(&X86::VK2RegClass) || |
52060 | RC.hasSuperClassEq(&X86::VK4RegClass) || |
52061 | RC.hasSuperClassEq(&X86::VK8RegClass) || |
52062 | RC.hasSuperClassEq(&X86::VK16RegClass) || |
52063 | RC.hasSuperClassEq(&X86::VK32RegClass) || |
52064 | RC.hasSuperClassEq(&X86::VK64RegClass); |
52065 | } |
52066 | |
52067 | std::pair<unsigned, const TargetRegisterClass *> |
52068 | X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
52069 | StringRef Constraint, |
52070 | MVT VT) const { |
52071 | |
52072 | |
52073 | if (Constraint.size() == 1) { |
52074 | |
52075 | switch (Constraint[0]) { |
52076 | default: break; |
52077 | |
52078 | case 'A': |
52079 | if (Subtarget.is64Bit()) |
52080 | return std::make_pair(X86::RAX, &X86::GR64_ADRegClass); |
52081 | assert((Subtarget.is32Bit() || Subtarget.is16Bit()) && |
52082 | "Expecting 64, 32 or 16 bit subtarget"); |
52083 | return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); |
52084 | |
52085 | |
52086 | |
52087 | |
52088 | case 'k': |
52089 | if (Subtarget.hasAVX512()) { |
52090 | if (VT == MVT::i1) |
52091 | return std::make_pair(0U, &X86::VK1RegClass); |
52092 | if (VT == MVT::i8) |
52093 | return std::make_pair(0U, &X86::VK8RegClass); |
52094 | if (VT == MVT::i16) |
52095 | return std::make_pair(0U, &X86::VK16RegClass); |
52096 | } |
52097 | if (Subtarget.hasBWI()) { |
52098 | if (VT == MVT::i32) |
52099 | return std::make_pair(0U, &X86::VK32RegClass); |
52100 | if (VT == MVT::i64) |
52101 | return std::make_pair(0U, &X86::VK64RegClass); |
52102 | } |
52103 | break; |
52104 | case 'q': |
52105 | if (Subtarget.is64Bit()) { |
52106 | if (VT == MVT::i8 || VT == MVT::i1) |
52107 | return std::make_pair(0U, &X86::GR8RegClass); |
52108 | if (VT == MVT::i16) |
52109 | return std::make_pair(0U, &X86::GR16RegClass); |
52110 | if (VT == MVT::i32 || VT == MVT::f32) |
52111 | return std::make_pair(0U, &X86::GR32RegClass); |
52112 | if (VT != MVT::f80 && !VT.isVector()) |
52113 | return std::make_pair(0U, &X86::GR64RegClass); |
52114 | break; |
52115 | } |
52116 | LLVM_FALLTHROUGH; |
52117 | |
52118 | case 'Q': |
52119 | if (VT == MVT::i8 || VT == MVT::i1) |
52120 | return std::make_pair(0U, &X86::GR8_ABCD_LRegClass); |
52121 | if (VT == MVT::i16) |
52122 | return std::make_pair(0U, &X86::GR16_ABCDRegClass); |
52123 | if (VT == MVT::i32 || VT == MVT::f32 || |
52124 | (!VT.isVector() && !Subtarget.is64Bit())) |
52125 | return std::make_pair(0U, &X86::GR32_ABCDRegClass); |
52126 | if (VT != MVT::f80 && !VT.isVector()) |
52127 | return std::make_pair(0U, &X86::GR64_ABCDRegClass); |
52128 | break; |
52129 | case 'r': |
52130 | case 'l': |
52131 | if (VT == MVT::i8 || VT == MVT::i1) |
52132 | return std::make_pair(0U, &X86::GR8RegClass); |
52133 | if (VT == MVT::i16) |
52134 | return std::make_pair(0U, &X86::GR16RegClass); |
52135 | if (VT == MVT::i32 || VT == MVT::f32 || |
52136 | (!VT.isVector() && !Subtarget.is64Bit())) |
52137 | return std::make_pair(0U, &X86::GR32RegClass); |
52138 | if (VT != MVT::f80 && !VT.isVector()) |
52139 | return std::make_pair(0U, &X86::GR64RegClass); |
52140 | break; |
52141 | case 'R': |
52142 | if (VT == MVT::i8 || VT == MVT::i1) |
52143 | return std::make_pair(0U, &X86::GR8_NOREXRegClass); |
52144 | if (VT == MVT::i16) |
52145 | return std::make_pair(0U, &X86::GR16_NOREXRegClass); |
52146 | if (VT == MVT::i32 || VT == MVT::f32 || |
52147 | (!VT.isVector() && !Subtarget.is64Bit())) |
52148 | return std::make_pair(0U, &X86::GR32_NOREXRegClass); |
52149 | if (VT != MVT::f80 && !VT.isVector()) |
52150 | return std::make_pair(0U, &X86::GR64_NOREXRegClass); |
52151 | break; |
52152 | case 'f': |
52153 | |
52154 | |
52155 | if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) |
52156 | return std::make_pair(0U, &X86::RFP32RegClass); |
52157 | if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) |
52158 | return std::make_pair(0U, &X86::RFP64RegClass); |
52159 | if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) |
52160 | return std::make_pair(0U, &X86::RFP80RegClass); |
52161 | break; |
52162 | case 'y': |
52163 | if (!Subtarget.hasMMX()) break; |
52164 | return std::make_pair(0U, &X86::VR64RegClass); |
52165 | case 'v': |
52166 | case 'x': |
52167 | if (!Subtarget.hasSSE1()) break; |
52168 | bool VConstraint = (Constraint[0] == 'v'); |
52169 | |
52170 | switch (VT.SimpleTy) { |
52171 | default: break; |
52172 | |
52173 | case MVT::f32: |
52174 | case MVT::i32: |
52175 | if (VConstraint && Subtarget.hasVLX()) |
52176 | return std::make_pair(0U, &X86::FR32XRegClass); |
52177 | return std::make_pair(0U, &X86::FR32RegClass); |
52178 | case MVT::f64: |
52179 | case MVT::i64: |
52180 | if (VConstraint && Subtarget.hasVLX()) |
52181 | return std::make_pair(0U, &X86::FR64XRegClass); |
52182 | return std::make_pair(0U, &X86::FR64RegClass); |
52183 | case MVT::i128: |
52184 | if (Subtarget.is64Bit()) { |
52185 | if (VConstraint && Subtarget.hasVLX()) |
52186 | return std::make_pair(0U, &X86::VR128XRegClass); |
52187 | return std::make_pair(0U, &X86::VR128RegClass); |
52188 | } |
52189 | break; |
52190 | |
52191 | case MVT::f128: |
52192 | case MVT::v16i8: |
52193 | case MVT::v8i16: |
52194 | case MVT::v4i32: |
52195 | case MVT::v2i64: |
52196 | case MVT::v4f32: |
52197 | case MVT::v2f64: |
52198 | if (VConstraint && Subtarget.hasVLX()) |
52199 | return std::make_pair(0U, &X86::VR128XRegClass); |
52200 | return std::make_pair(0U, &X86::VR128RegClass); |
52201 | |
52202 | case MVT::v32i8: |
52203 | case MVT::v16i16: |
52204 | case MVT::v8i32: |
52205 | case MVT::v4i64: |
52206 | case MVT::v8f32: |
52207 | case MVT::v4f64: |
52208 | if (VConstraint && Subtarget.hasVLX()) |
52209 | return std::make_pair(0U, &X86::VR256XRegClass); |
52210 | if (Subtarget.hasAVX()) |
52211 | return std::make_pair(0U, &X86::VR256RegClass); |
52212 | break; |
52213 | case MVT::v64i8: |
52214 | case MVT::v32i16: |
52215 | case MVT::v8f64: |
52216 | case MVT::v16f32: |
52217 | case MVT::v16i32: |
52218 | case MVT::v8i64: |
52219 | if (!Subtarget.hasAVX512()) break; |
52220 | if (VConstraint) |
52221 | return std::make_pair(0U, &X86::VR512RegClass); |
52222 | return std::make_pair(0U, &X86::VR512_0_15RegClass); |
52223 | } |
52224 | break; |
52225 | } |
52226 | } else if (Constraint.size() == 2 && Constraint[0] == 'Y') { |
52227 | switch (Constraint[1]) { |
52228 | default: |
52229 | break; |
52230 | case 'i': |
52231 | case 't': |
52232 | case '2': |
52233 | return getRegForInlineAsmConstraint(TRI, "x", VT); |
52234 | case 'm': |
52235 | if (!Subtarget.hasMMX()) break; |
52236 | return std::make_pair(0U, &X86::VR64RegClass); |
52237 | case 'z': |
52238 | if (!Subtarget.hasSSE1()) break; |
52239 | switch (VT.SimpleTy) { |
52240 | default: break; |
52241 | |
52242 | case MVT::f32: |
52243 | case MVT::i32: |
52244 | return std::make_pair(X86::XMM0, &X86::FR32RegClass); |
52245 | case MVT::f64: |
52246 | case MVT::i64: |
52247 | return std::make_pair(X86::XMM0, &X86::FR64RegClass); |
52248 | case MVT::f128: |
52249 | case MVT::v16i8: |
52250 | case MVT::v8i16: |
52251 | case MVT::v4i32: |
52252 | case MVT::v2i64: |
52253 | case MVT::v4f32: |
52254 | case MVT::v2f64: |
52255 | return std::make_pair(X86::XMM0, &X86::VR128RegClass); |
52256 | |
52257 | case MVT::v32i8: |
52258 | case MVT::v16i16: |
52259 | case MVT::v8i32: |
52260 | case MVT::v4i64: |
52261 | case MVT::v8f32: |
52262 | case MVT::v4f64: |
52263 | if (Subtarget.hasAVX()) |
52264 | return std::make_pair(X86::YMM0, &X86::VR256RegClass); |
52265 | break; |
52266 | case MVT::v64i8: |
52267 | case MVT::v32i16: |
52268 | case MVT::v8f64: |
52269 | case MVT::v16f32: |
52270 | case MVT::v16i32: |
52271 | case MVT::v8i64: |
52272 | if (Subtarget.hasAVX512()) |
52273 | return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); |
52274 | break; |
52275 | } |
52276 | break; |
52277 | case 'k': |
52278 | |
52279 | if (Subtarget.hasAVX512()) { |
52280 | if (VT == MVT::i1) |
52281 | return std::make_pair(0U, &X86::VK1WMRegClass); |
52282 | if (VT == MVT::i8) |
52283 | return std::make_pair(0U, &X86::VK8WMRegClass); |
52284 | if (VT == MVT::i16) |
52285 | return std::make_pair(0U, &X86::VK16WMRegClass); |
52286 | } |
52287 | if (Subtarget.hasBWI()) { |
52288 | if (VT == MVT::i32) |
52289 | return std::make_pair(0U, &X86::VK32WMRegClass); |
52290 | if (VT == MVT::i64) |
52291 | return std::make_pair(0U, &X86::VK64WMRegClass); |
52292 | } |
52293 | break; |
52294 | } |
52295 | } |
52296 | |
52297 | if (parseConstraintCode(Constraint) != X86::COND_INVALID) |
52298 | return std::make_pair(0U, &X86::GR32RegClass); |
52299 | |
52300 | |
52301 | |
52302 | std::pair<Register, const TargetRegisterClass*> Res; |
52303 | Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
52304 | |
52305 | |
52306 | if (!Res.second) { |
52307 | |
52308 | |
52309 | if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) { |
52310 | |
52311 | if (Constraint.size() == 7 && Constraint[0] == '{' && |
52312 | tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' && |
52313 | Constraint[3] == '(' && |
52314 | (Constraint[4] >= '0' && Constraint[4] <= '7') && |
52315 | Constraint[5] == ')' && Constraint[6] == '}') { |
52316 | |
52317 | |
52318 | if (Constraint[4] == '7') |
52319 | return std::make_pair(X86::FP7, &X86::RFP80_7RegClass); |
52320 | return std::make_pair(X86::FP0 + Constraint[4] - '0', |
52321 | &X86::RFP80RegClass); |
52322 | } |
52323 | |
52324 | |
52325 | if (StringRef("{st}").equals_insensitive(Constraint)) |
52326 | return std::make_pair(X86::FP0, &X86::RFP80RegClass); |
52327 | } |
52328 | |
52329 | |
52330 | if (StringRef("{flags}").equals_insensitive(Constraint)) |
52331 | return std::make_pair(X86::EFLAGS, &X86::CCRRegClass); |
52332 | |
52333 | |
52334 | |
52335 | if (StringRef("{dirflag}").equals_insensitive(Constraint) && |
52336 | VT == MVT::Other) |
52337 | return std::make_pair(X86::DF, &X86::DFCCRRegClass); |
52338 | |
52339 | |
52340 | if (StringRef("{fpsr}").equals_insensitive(Constraint)) |
52341 | return std::make_pair(X86::FPSW, &X86::FPCCRRegClass); |
52342 | |
52343 | return Res; |
52344 | } |
52345 | |
52346 | |
52347 | if (!Subtarget.is64Bit() && |
52348 | (isFRClass(*Res.second) || isGRClass(*Res.second)) && |
52349 | TRI->getEncodingValue(Res.first) >= 8) { |
52350 | |
52351 | return std::make_pair(0, nullptr); |
52352 | } |
52353 | |
52354 | |
52355 | if (!Subtarget.hasAVX512() && isFRClass(*Res.second) && |
52356 | TRI->getEncodingValue(Res.first) & 0x10) { |
52357 | |
52358 | return std::make_pair(0, nullptr); |
52359 | } |
52360 | |
52361 | |
52362 | |
52363 | |
52364 | |
52365 | if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) |
52366 | return Res; |
52367 | |
52368 | |
52369 | |
52370 | |
52371 | const TargetRegisterClass *Class = Res.second; |
52372 | |
52373 | |
52374 | |
52375 | |
52376 | if (isGRClass(*Class)) { |
52377 | unsigned Size = VT.getSizeInBits(); |
52378 | if (Size == 1) Size = 8; |
52379 | Register DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); |
52380 | if (DestReg > 0) { |
52381 | bool is64Bit = Subtarget.is64Bit(); |
52382 | const TargetRegisterClass *RC = |
52383 | Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) |
52384 | : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) |
52385 | : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) |
52386 | : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) |
52387 | : nullptr; |
52388 | if (Size == 64 && !is64Bit) { |
52389 | |
52390 | |
52391 | switch (DestReg) { |
52392 | case X86::RAX: |
52393 | return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); |
52394 | case X86::RDX: |
52395 | return std::make_pair(X86::EDX, &X86::GR32_DCRegClass); |
52396 | case X86::RCX: |
52397 | return std::make_pair(X86::ECX, &X86::GR32_CBRegClass); |
52398 | case X86::RBX: |
52399 | return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass); |
52400 | case X86::RSI: |
52401 | return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass); |
52402 | case X86::RDI: |
52403 | return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass); |
52404 | case X86::RBP: |
52405 | return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass); |
52406 | default: |
52407 | return std::make_pair(0, nullptr); |
52408 | } |
52409 | } |
52410 | if (RC && RC->contains(DestReg)) |
52411 | return std::make_pair(DestReg, RC); |
52412 | return Res; |
52413 | } |
52414 | |
52415 | return std::make_pair(0, nullptr); |
52416 | } else if (isFRClass(*Class)) { |
52417 | |
52418 | |
52419 | |
52420 | |
52421 | |
52422 | |
52423 | if (VT == MVT::f32 || VT == MVT::i32) |
52424 | Res.second = &X86::FR32XRegClass; |
52425 | else if (VT == MVT::f64 || VT == MVT::i64) |
52426 | Res.second = &X86::FR64XRegClass; |
52427 | else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT)) |
52428 | Res.second = &X86::VR128XRegClass; |
52429 | else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT)) |
52430 | Res.second = &X86::VR256XRegClass; |
52431 | else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) |
52432 | Res.second = &X86::VR512RegClass; |
52433 | else { |
52434 | |
52435 | Res.first = 0; |
52436 | Res.second = nullptr; |
52437 | } |
52438 | } else if (isVKClass(*Class)) { |
52439 | if (VT == MVT::i1) |
52440 | Res.second = &X86::VK1RegClass; |
52441 | else if (VT == MVT::i8) |
52442 | Res.second = &X86::VK8RegClass; |
52443 | else if (VT == MVT::i16) |
52444 | Res.second = &X86::VK16RegClass; |
52445 | else if (VT == MVT::i32) |
52446 | Res.second = &X86::VK32RegClass; |
52447 | else if (VT == MVT::i64) |
52448 | Res.second = &X86::VK64RegClass; |
52449 | else { |
52450 | |
52451 | Res.first = 0; |
52452 | Res.second = nullptr; |
52453 | } |
52454 | } |
52455 | |
52456 | return Res; |
52457 | } |
52458 | |
52459 | InstructionCost X86TargetLowering::getScalingFactorCost(const DataLayout &DL, |
52460 | const AddrMode &AM, |
52461 | Type *Ty, |
52462 | unsigned AS) const { |
52463 | |
52464 | |
52465 | |
52466 | |
52467 | |
52468 | |
52469 | |
52470 | |
52471 | |
52472 | |
52473 | |
52474 | |
52475 | |
52476 | |
52477 | |
52478 | |
52479 | |
52480 | |
52481 | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
52482 | |
52483 | |
52484 | return AM.Scale != 0; |
52485 | return -1; |
52486 | } |
52487 | |
52488 | bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
52489 | |
52490 | |
52491 | |
52492 | |
52493 | |
52494 | |
52495 | |
52496 | bool OptSize = Attr.hasFnAttribute(Attribute::MinSize); |
52497 | return OptSize && !VT.isVector(); |
52498 | } |
52499 | |
52500 | void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { |
52501 | if (!Subtarget.is64Bit()) |
52502 | return; |
52503 | |
52504 | |
52505 | X86MachineFunctionInfo *AFI = |
52506 | Entry->getParent()->getInfo<X86MachineFunctionInfo>(); |
52507 | AFI->setIsSplitCSR(true); |
52508 | } |
52509 | |
52510 | void X86TargetLowering::insertCopiesSplitCSR( |
52511 | MachineBasicBlock *Entry, |
52512 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
52513 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
52514 | const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); |
52515 | if (!IStart) |
52516 | return; |
52517 | |
52518 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
52519 | MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); |
52520 | MachineBasicBlock::iterator MBBI = Entry->begin(); |
52521 | for (const MCPhysReg *I = IStart; *I; ++I) { |
52522 | const TargetRegisterClass *RC = nullptr; |
52523 | if (X86::GR64RegClass.contains(*I)) |
52524 | RC = &X86::GR64RegClass; |
52525 | else |
52526 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
52527 | |
52528 | Register NewVR = MRI->createVirtualRegister(RC); |
52529 | |
52530 | |
52531 | |
52532 | |
52533 | |
52534 | assert( |
52535 | Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) && |
52536 | "Function should be nounwind in insertCopiesSplitCSR!"); |
52537 | Entry->addLiveIn(*I); |
52538 | BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) |
52539 | .addReg(*I); |
52540 | |
52541 | |
52542 | for (auto *Exit : Exits) |
52543 | BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), |
52544 | TII->get(TargetOpcode::COPY), *I) |
52545 | .addReg(NewVR); |
52546 | } |
52547 | } |
52548 | |
52549 | bool X86TargetLowering::supportSwiftError() const { |
52550 | return Subtarget.is64Bit(); |
52551 | } |
52552 | |
52553 | |
52554 | bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const { |
52555 | return !getStackProbeSymbolName(MF).empty(); |
52556 | } |
52557 | |
52558 | |
52559 | bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const { |
52560 | |
52561 | |
52562 | if (Subtarget.isOSWindows() || |
52563 | MF.getFunction().hasFnAttribute("no-stack-arg-probe")) |
52564 | return false; |
52565 | |
52566 | |
52567 | if (MF.getFunction().hasFnAttribute("probe-stack")) |
52568 | return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == |
52569 | "inline-asm"; |
52570 | |
52571 | return false; |
52572 | } |
52573 | |
52574 | |
52575 | |
52576 | StringRef |
52577 | X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { |
52578 | |
52579 | if (hasInlineStackProbe(MF)) |
52580 | return ""; |
52581 | |
52582 | |
52583 | if (MF.getFunction().hasFnAttribute("probe-stack")) |
52584 | return MF.getFunction().getFnAttribute("probe-stack").getValueAsString(); |
52585 | |
52586 | |
52587 | |
52588 | if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO() || |
52589 | MF.getFunction().hasFnAttribute("no-stack-arg-probe")) |
52590 | return ""; |
52591 | |
52592 | |
52593 | |
52594 | if (Subtarget.is64Bit()) |
52595 | return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; |
52596 | return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; |
52597 | } |
52598 | |
52599 | unsigned |
52600 | X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { |
52601 | |
52602 | |
52603 | unsigned StackProbeSize = 4096; |
52604 | const Function &Fn = MF.getFunction(); |
52605 | if (Fn.hasFnAttribute("stack-probe-size")) |
52606 | Fn.getFnAttribute("stack-probe-size") |
52607 | .getValueAsString() |
52608 | .getAsInteger(0, StackProbeSize); |
52609 | return StackProbeSize; |
52610 | } |
52611 | |
52612 | Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { |
52613 | if (ML->isInnermost() && |
52614 | ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) |
52615 | return Align(1ULL << ExperimentalPrefInnermostLoopAlignment); |
52616 | return TargetLowering::getPrefLoopAlignment(); |
52617 | } |