clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86TargetTransformInfo.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | |
32 | |
33 | |
34 | |
35 | |
36 | |
37 | |
38 | |
39 | |
40 | |
41 | #include "X86TargetTransformInfo.h" |
42 | #include "llvm/Analysis/TargetTransformInfo.h" |
43 | #include "llvm/CodeGen/BasicTTIImpl.h" |
44 | #include "llvm/CodeGen/CostTable.h" |
45 | #include "llvm/CodeGen/TargetLowering.h" |
46 | #include "llvm/IR/IntrinsicInst.h" |
47 | #include "llvm/Support/Debug.h" |
48 | |
49 | using namespace llvm; |
50 | |
51 | #define DEBUG_TYPE "x86tti" |
52 | |
53 | |
54 | |
55 | |
56 | |
57 | |
58 | |
59 | TargetTransformInfo::PopcntSupportKind |
60 | X86TTIImpl::getPopcntSupport(unsigned TyWidth) { |
61 | assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); |
62 | |
63 | |
64 | |
65 | return ST->hasPOPCNT() ? TTI::PSK_FastHardware : TTI::PSK_Software; |
66 | } |
67 | |
68 | llvm::Optional<unsigned> X86TTIImpl::getCacheSize( |
69 | TargetTransformInfo::CacheLevel Level) const { |
70 | switch (Level) { |
71 | case TargetTransformInfo::CacheLevel::L1D: |
72 | |
73 | |
74 | |
75 | |
76 | |
77 | |
78 | |
79 | |
80 | |
81 | return 32 * 1024; |
82 | case TargetTransformInfo::CacheLevel::L2D: |
83 | |
84 | |
85 | |
86 | |
87 | |
88 | |
89 | |
90 | |
91 | |
92 | return 256 * 1024; |
93 | } |
94 | |
95 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); |
96 | } |
97 | |
98 | llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity( |
99 | TargetTransformInfo::CacheLevel Level) const { |
100 | |
101 | |
102 | |
103 | |
104 | |
105 | |
106 | |
107 | |
108 | |
109 | switch (Level) { |
110 | case TargetTransformInfo::CacheLevel::L1D: |
111 | LLVM_FALLTHROUGH; |
112 | case TargetTransformInfo::CacheLevel::L2D: |
113 | return 8; |
114 | } |
115 | |
116 | llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); |
117 | } |
118 | |
119 | unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const { |
120 | bool Vector = (ClassID == 1); |
121 | if (Vector && !ST->hasSSE1()) |
122 | return 0; |
123 | |
124 | if (ST->is64Bit()) { |
125 | if (Vector && ST->hasAVX512()) |
126 | return 32; |
127 | return 16; |
128 | } |
129 | return 8; |
130 | } |
131 | |
132 | TypeSize |
133 | X86TTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { |
134 | unsigned PreferVectorWidth = ST->getPreferVectorWidth(); |
135 | switch (K) { |
136 | case TargetTransformInfo::RGK_Scalar: |
137 | return TypeSize::getFixed(ST->is64Bit() ? 64 : 32); |
138 | case TargetTransformInfo::RGK_FixedWidthVector: |
139 | if (ST->hasAVX512() && PreferVectorWidth >= 512) |
140 | return TypeSize::getFixed(512); |
141 | if (ST->hasAVX() && PreferVectorWidth >= 256) |
142 | return TypeSize::getFixed(256); |
143 | if (ST->hasSSE1() && PreferVectorWidth >= 128) |
144 | return TypeSize::getFixed(128); |
145 | return TypeSize::getFixed(0); |
146 | case TargetTransformInfo::RGK_ScalableVector: |
147 | return TypeSize::getScalable(0); |
148 | } |
149 | |
150 | llvm_unreachable("Unsupported register kind"); |
151 | } |
152 | |
153 | unsigned X86TTIImpl::getLoadStoreVecRegBitWidth(unsigned) const { |
154 | return getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector) |
155 | .getFixedSize(); |
156 | } |
157 | |
158 | unsigned X86TTIImpl::getMaxInterleaveFactor(unsigned VF) { |
159 | |
160 | |
161 | |
162 | if (VF == 1) |
163 | return 1; |
164 | |
165 | if (ST->isAtom()) |
166 | return 1; |
167 | |
168 | |
169 | |
170 | if (ST->hasAVX()) |
171 | return 4; |
172 | |
173 | return 2; |
174 | } |
175 | |
176 | InstructionCost X86TTIImpl::getArithmeticInstrCost( |
177 | unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, |
178 | TTI::OperandValueKind Op1Info, TTI::OperandValueKind Op2Info, |
179 | TTI::OperandValueProperties Opd1PropInfo, |
180 | TTI::OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args, |
181 | const Instruction *CxtI) { |
182 | |
183 | if (CostKind != TTI::TCK_RecipThroughput) |
184 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, |
185 | Op2Info, Opd1PropInfo, |
186 | Opd2PropInfo, Args, CxtI); |
187 | |
188 | |
189 | if (Opcode == Instruction::Mul && Ty->isVectorTy() && |
190 | Ty->getScalarSizeInBits() == 8) { |
191 | Type *WideVecTy = |
192 | VectorType::getExtendedElementVectorType(cast<VectorType>(Ty)); |
193 | return getCastInstrCost(Instruction::ZExt, WideVecTy, Ty, |
194 | TargetTransformInfo::CastContextHint::None, |
195 | CostKind) + |
196 | getCastInstrCost(Instruction::Trunc, Ty, WideVecTy, |
197 | TargetTransformInfo::CastContextHint::None, |
198 | CostKind) + |
199 | getArithmeticInstrCost(Opcode, WideVecTy, CostKind, Op1Info, Op2Info, |
200 | Opd1PropInfo, Opd2PropInfo); |
201 | } |
202 | |
203 | |
204 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
205 | |
206 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
207 | assert(ISD && "Invalid opcode"); |
208 | |
209 | static const CostTblEntry GLMCostTable[] = { |
210 | { ISD::FDIV, MVT::f32, 18 }, |
211 | { ISD::FDIV, MVT::v4f32, 35 }, |
212 | { ISD::FDIV, MVT::f64, 33 }, |
213 | { ISD::FDIV, MVT::v2f64, 65 }, |
214 | }; |
215 | |
216 | if (ST->useGLMDivSqrtCosts()) |
217 | if (const auto *Entry = CostTableLookup(GLMCostTable, ISD, |
218 | LT.second)) |
219 | return LT.first * Entry->Cost; |
220 | |
221 | static const CostTblEntry SLMCostTable[] = { |
222 | { ISD::MUL, MVT::v4i32, 11 }, |
223 | { ISD::MUL, MVT::v8i16, 2 }, |
224 | { ISD::FMUL, MVT::f64, 2 }, |
225 | { ISD::FMUL, MVT::v2f64, 4 }, |
226 | { ISD::FMUL, MVT::v4f32, 2 }, |
227 | { ISD::FDIV, MVT::f32, 17 }, |
228 | { ISD::FDIV, MVT::v4f32, 39 }, |
229 | { ISD::FDIV, MVT::f64, 32 }, |
230 | { ISD::FDIV, MVT::v2f64, 69 }, |
231 | { ISD::FADD, MVT::v2f64, 2 }, |
232 | { ISD::FSUB, MVT::v2f64, 2 }, |
233 | |
234 | |
235 | |
236 | |
237 | |
238 | { ISD::MUL, MVT::v2i64, 17 }, |
239 | |
240 | { ISD::ADD, MVT::v2i64, 4 }, |
241 | { ISD::SUB, MVT::v2i64, 4 }, |
242 | }; |
243 | |
244 | if (ST->isSLM()) { |
245 | if (Args.size() == 2 && ISD == ISD::MUL && LT.second == MVT::v4i32) { |
246 | |
247 | bool Op1Signed = false; |
248 | unsigned Op1MinSize = BaseT::minRequiredElementSize(Args[0], Op1Signed); |
249 | bool Op2Signed = false; |
250 | unsigned Op2MinSize = BaseT::minRequiredElementSize(Args[1], Op2Signed); |
251 | |
252 | bool SignedMode = Op1Signed || Op2Signed; |
253 | unsigned OpMinSize = std::max(Op1MinSize, Op2MinSize); |
254 | |
255 | if (OpMinSize <= 7) |
256 | return LT.first * 3; |
257 | if (!SignedMode && OpMinSize <= 8) |
258 | return LT.first * 3; |
259 | if (OpMinSize <= 15) |
260 | return LT.first * 5; |
261 | if (!SignedMode && OpMinSize <= 16) |
262 | return LT.first * 5; |
263 | } |
264 | |
265 | if (const auto *Entry = CostTableLookup(SLMCostTable, ISD, |
266 | LT.second)) { |
267 | return LT.first * Entry->Cost; |
268 | } |
269 | } |
270 | |
271 | if ((ISD == ISD::SDIV || ISD == ISD::SREM || ISD == ISD::UDIV || |
272 | ISD == ISD::UREM) && |
273 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
274 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
275 | Opd2PropInfo == TargetTransformInfo::OP_PowerOf2) { |
276 | if (ISD == ISD::SDIV || ISD == ISD::SREM) { |
277 | |
278 | |
279 | |
280 | |
281 | InstructionCost Cost = |
282 | 2 * getArithmeticInstrCost(Instruction::AShr, Ty, CostKind, Op1Info, |
283 | Op2Info, TargetTransformInfo::OP_None, |
284 | TargetTransformInfo::OP_None); |
285 | Cost += getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, Op1Info, |
286 | Op2Info, |
287 | TargetTransformInfo::OP_None, |
288 | TargetTransformInfo::OP_None); |
289 | Cost += getArithmeticInstrCost(Instruction::Add, Ty, CostKind, Op1Info, |
290 | Op2Info, |
291 | TargetTransformInfo::OP_None, |
292 | TargetTransformInfo::OP_None); |
293 | |
294 | if (ISD == ISD::SREM) { |
295 | |
296 | Cost += getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, Op1Info, |
297 | Op2Info); |
298 | Cost += getArithmeticInstrCost(Instruction::Sub, Ty, CostKind, Op1Info, |
299 | Op2Info); |
300 | } |
301 | |
302 | return Cost; |
303 | } |
304 | |
305 | |
306 | if (ISD == ISD::UDIV) |
307 | return getArithmeticInstrCost(Instruction::LShr, Ty, CostKind, |
308 | Op1Info, Op2Info, |
309 | TargetTransformInfo::OP_None, |
310 | TargetTransformInfo::OP_None); |
311 | |
312 | else |
313 | return getArithmeticInstrCost(Instruction::And, Ty, CostKind, |
314 | Op1Info, Op2Info, |
315 | TargetTransformInfo::OP_None, |
316 | TargetTransformInfo::OP_None); |
317 | } |
318 | |
319 | static const CostTblEntry AVX512BWUniformConstCostTable[] = { |
320 | { ISD::SHL, MVT::v64i8, 2 }, |
321 | { ISD::SRL, MVT::v64i8, 2 }, |
322 | { ISD::SRA, MVT::v64i8, 4 }, |
323 | }; |
324 | |
325 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
326 | ST->hasBWI()) { |
327 | if (const auto *Entry = CostTableLookup(AVX512BWUniformConstCostTable, ISD, |
328 | LT.second)) |
329 | return LT.first * Entry->Cost; |
330 | } |
331 | |
332 | static const CostTblEntry AVX512UniformConstCostTable[] = { |
333 | { ISD::SRA, MVT::v2i64, 1 }, |
334 | { ISD::SRA, MVT::v4i64, 1 }, |
335 | { ISD::SRA, MVT::v8i64, 1 }, |
336 | |
337 | { ISD::SHL, MVT::v64i8, 4 }, |
338 | { ISD::SRL, MVT::v64i8, 4 }, |
339 | { ISD::SRA, MVT::v64i8, 8 }, |
340 | |
341 | { ISD::SDIV, MVT::v16i32, 6 }, |
342 | { ISD::SREM, MVT::v16i32, 8 }, |
343 | { ISD::UDIV, MVT::v16i32, 5 }, |
344 | { ISD::UREM, MVT::v16i32, 7 }, |
345 | }; |
346 | |
347 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
348 | ST->hasAVX512()) { |
349 | if (const auto *Entry = CostTableLookup(AVX512UniformConstCostTable, ISD, |
350 | LT.second)) |
351 | return LT.first * Entry->Cost; |
352 | } |
353 | |
354 | static const CostTblEntry AVX2UniformConstCostTable[] = { |
355 | { ISD::SHL, MVT::v32i8, 2 }, |
356 | { ISD::SRL, MVT::v32i8, 2 }, |
357 | { ISD::SRA, MVT::v32i8, 4 }, |
358 | |
359 | { ISD::SRA, MVT::v4i64, 4 }, |
360 | |
361 | { ISD::SDIV, MVT::v8i32, 6 }, |
362 | { ISD::SREM, MVT::v8i32, 8 }, |
363 | { ISD::UDIV, MVT::v8i32, 5 }, |
364 | { ISD::UREM, MVT::v8i32, 7 }, |
365 | }; |
366 | |
367 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
368 | ST->hasAVX2()) { |
369 | if (const auto *Entry = CostTableLookup(AVX2UniformConstCostTable, ISD, |
370 | LT.second)) |
371 | return LT.first * Entry->Cost; |
372 | } |
373 | |
374 | static const CostTblEntry SSE2UniformConstCostTable[] = { |
375 | { ISD::SHL, MVT::v16i8, 2 }, |
376 | { ISD::SRL, MVT::v16i8, 2 }, |
377 | { ISD::SRA, MVT::v16i8, 4 }, |
378 | |
379 | { ISD::SHL, MVT::v32i8, 4+2 }, |
380 | { ISD::SRL, MVT::v32i8, 4+2 }, |
381 | { ISD::SRA, MVT::v32i8, 8+2 }, |
382 | |
383 | { ISD::SDIV, MVT::v8i32, 12+2 }, |
384 | { ISD::SREM, MVT::v8i32, 16+2 }, |
385 | { ISD::SDIV, MVT::v4i32, 6 }, |
386 | { ISD::SREM, MVT::v4i32, 8 }, |
387 | { ISD::UDIV, MVT::v8i32, 10+2 }, |
388 | { ISD::UREM, MVT::v8i32, 14+2 }, |
389 | { ISD::UDIV, MVT::v4i32, 5 }, |
390 | { ISD::UREM, MVT::v4i32, 7 }, |
391 | }; |
392 | |
393 | |
394 | if (Op2Info == TargetTransformInfo::OK_UniformConstantValue && |
395 | ST->hasSSE2() && !ST->hasXOP()) { |
396 | if (const auto *Entry = |
397 | CostTableLookup(SSE2UniformConstCostTable, ISD, LT.second)) |
398 | return LT.first * Entry->Cost; |
399 | } |
400 | |
401 | static const CostTblEntry AVX512BWConstCostTable[] = { |
402 | { ISD::SDIV, MVT::v64i8, 14 }, |
403 | { ISD::SREM, MVT::v64i8, 16 }, |
404 | { ISD::UDIV, MVT::v64i8, 14 }, |
405 | { ISD::UREM, MVT::v64i8, 16 }, |
406 | { ISD::SDIV, MVT::v32i16, 6 }, |
407 | { ISD::SREM, MVT::v32i16, 8 }, |
408 | { ISD::UDIV, MVT::v32i16, 6 }, |
409 | { ISD::UREM, MVT::v32i16, 8 }, |
410 | }; |
411 | |
412 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
413 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
414 | ST->hasBWI()) { |
415 | if (const auto *Entry = |
416 | CostTableLookup(AVX512BWConstCostTable, ISD, LT.second)) |
417 | return LT.first * Entry->Cost; |
418 | } |
419 | |
420 | static const CostTblEntry AVX512ConstCostTable[] = { |
421 | { ISD::SDIV, MVT::v16i32, 15 }, |
422 | { ISD::SREM, MVT::v16i32, 17 }, |
423 | { ISD::UDIV, MVT::v16i32, 15 }, |
424 | { ISD::UREM, MVT::v16i32, 17 }, |
425 | { ISD::SDIV, MVT::v64i8, 28 }, |
426 | { ISD::SREM, MVT::v64i8, 32 }, |
427 | { ISD::UDIV, MVT::v64i8, 28 }, |
428 | { ISD::UREM, MVT::v64i8, 32 }, |
429 | { ISD::SDIV, MVT::v32i16, 12 }, |
430 | { ISD::SREM, MVT::v32i16, 16 }, |
431 | { ISD::UDIV, MVT::v32i16, 12 }, |
432 | { ISD::UREM, MVT::v32i16, 16 }, |
433 | }; |
434 | |
435 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
436 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
437 | ST->hasAVX512()) { |
438 | if (const auto *Entry = |
439 | CostTableLookup(AVX512ConstCostTable, ISD, LT.second)) |
440 | return LT.first * Entry->Cost; |
441 | } |
442 | |
443 | static const CostTblEntry AVX2ConstCostTable[] = { |
444 | { ISD::SDIV, MVT::v32i8, 14 }, |
445 | { ISD::SREM, MVT::v32i8, 16 }, |
446 | { ISD::UDIV, MVT::v32i8, 14 }, |
447 | { ISD::UREM, MVT::v32i8, 16 }, |
448 | { ISD::SDIV, MVT::v16i16, 6 }, |
449 | { ISD::SREM, MVT::v16i16, 8 }, |
450 | { ISD::UDIV, MVT::v16i16, 6 }, |
451 | { ISD::UREM, MVT::v16i16, 8 }, |
452 | { ISD::SDIV, MVT::v8i32, 15 }, |
453 | { ISD::SREM, MVT::v8i32, 19 }, |
454 | { ISD::UDIV, MVT::v8i32, 15 }, |
455 | { ISD::UREM, MVT::v8i32, 19 }, |
456 | }; |
457 | |
458 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
459 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
460 | ST->hasAVX2()) { |
461 | if (const auto *Entry = CostTableLookup(AVX2ConstCostTable, ISD, LT.second)) |
462 | return LT.first * Entry->Cost; |
463 | } |
464 | |
465 | static const CostTblEntry SSE2ConstCostTable[] = { |
466 | { ISD::SDIV, MVT::v32i8, 28+2 }, |
467 | { ISD::SREM, MVT::v32i8, 32+2 }, |
468 | { ISD::SDIV, MVT::v16i8, 14 }, |
469 | { ISD::SREM, MVT::v16i8, 16 }, |
470 | { ISD::UDIV, MVT::v32i8, 28+2 }, |
471 | { ISD::UREM, MVT::v32i8, 32+2 }, |
472 | { ISD::UDIV, MVT::v16i8, 14 }, |
473 | { ISD::UREM, MVT::v16i8, 16 }, |
474 | { ISD::SDIV, MVT::v16i16, 12+2 }, |
475 | { ISD::SREM, MVT::v16i16, 16+2 }, |
476 | { ISD::SDIV, MVT::v8i16, 6 }, |
477 | { ISD::SREM, MVT::v8i16, 8 }, |
478 | { ISD::UDIV, MVT::v16i16, 12+2 }, |
479 | { ISD::UREM, MVT::v16i16, 16+2 }, |
480 | { ISD::UDIV, MVT::v8i16, 6 }, |
481 | { ISD::UREM, MVT::v8i16, 8 }, |
482 | { ISD::SDIV, MVT::v8i32, 38+2 }, |
483 | { ISD::SREM, MVT::v8i32, 48+2 }, |
484 | { ISD::SDIV, MVT::v4i32, 19 }, |
485 | { ISD::SREM, MVT::v4i32, 24 }, |
486 | { ISD::UDIV, MVT::v8i32, 30+2 }, |
487 | { ISD::UREM, MVT::v8i32, 40+2 }, |
488 | { ISD::UDIV, MVT::v4i32, 15 }, |
489 | { ISD::UREM, MVT::v4i32, 20 }, |
490 | }; |
491 | |
492 | if ((Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
493 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) && |
494 | ST->hasSSE2()) { |
495 | |
496 | if (ISD == ISD::SDIV && LT.second == MVT::v8i32 && ST->hasAVX()) |
497 | return LT.first * 32; |
498 | if (ISD == ISD::SREM && LT.second == MVT::v8i32 && ST->hasAVX()) |
499 | return LT.first * 38; |
500 | if (ISD == ISD::SDIV && LT.second == MVT::v4i32 && ST->hasSSE41()) |
501 | return LT.first * 15; |
502 | if (ISD == ISD::SREM && LT.second == MVT::v4i32 && ST->hasSSE41()) |
503 | return LT.first * 20; |
504 | |
505 | if (const auto *Entry = CostTableLookup(SSE2ConstCostTable, ISD, LT.second)) |
506 | return LT.first * Entry->Cost; |
507 | } |
508 | |
509 | static const CostTblEntry AVX512BWShiftCostTable[] = { |
510 | { ISD::SHL, MVT::v16i8, 4 }, |
511 | { ISD::SRL, MVT::v16i8, 4 }, |
512 | { ISD::SRA, MVT::v16i8, 4 }, |
513 | { ISD::SHL, MVT::v32i8, 4 }, |
514 | { ISD::SRL, MVT::v32i8, 4 }, |
515 | { ISD::SRA, MVT::v32i8, 6 }, |
516 | { ISD::SHL, MVT::v64i8, 6 }, |
517 | { ISD::SRL, MVT::v64i8, 7 }, |
518 | { ISD::SRA, MVT::v64i8, 15 }, |
519 | |
520 | { ISD::SHL, MVT::v8i16, 1 }, |
521 | { ISD::SRL, MVT::v8i16, 1 }, |
522 | { ISD::SRA, MVT::v8i16, 1 }, |
523 | { ISD::SHL, MVT::v16i16, 1 }, |
524 | { ISD::SRL, MVT::v16i16, 1 }, |
525 | { ISD::SRA, MVT::v16i16, 1 }, |
526 | { ISD::SHL, MVT::v32i16, 1 }, |
527 | { ISD::SRL, MVT::v32i16, 1 }, |
528 | { ISD::SRA, MVT::v32i16, 1 }, |
529 | }; |
530 | |
531 | if (ST->hasBWI()) |
532 | if (const auto *Entry = CostTableLookup(AVX512BWShiftCostTable, ISD, LT.second)) |
533 | return LT.first * Entry->Cost; |
534 | |
535 | static const CostTblEntry AVX2UniformCostTable[] = { |
536 | |
537 | { ISD::SHL, MVT::v16i16, 1 }, |
538 | { ISD::SRL, MVT::v16i16, 1 }, |
539 | { ISD::SRA, MVT::v16i16, 1 }, |
540 | { ISD::SHL, MVT::v32i16, 2 }, |
541 | { ISD::SRL, MVT::v32i16, 2 }, |
542 | { ISD::SRA, MVT::v32i16, 2 }, |
543 | |
544 | { ISD::SHL, MVT::v8i32, 1 }, |
545 | { ISD::SRL, MVT::v8i32, 1 }, |
546 | { ISD::SRA, MVT::v8i32, 1 }, |
547 | { ISD::SHL, MVT::v4i64, 1 }, |
548 | { ISD::SRL, MVT::v4i64, 1 }, |
549 | }; |
550 | |
551 | if (ST->hasAVX2() && |
552 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || |
553 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { |
554 | if (const auto *Entry = |
555 | CostTableLookup(AVX2UniformCostTable, ISD, LT.second)) |
556 | return LT.first * Entry->Cost; |
557 | } |
558 | |
559 | static const CostTblEntry SSE2UniformCostTable[] = { |
560 | |
561 | { ISD::SHL, MVT::v8i16, 1 }, |
562 | { ISD::SHL, MVT::v4i32, 1 }, |
563 | { ISD::SHL, MVT::v2i64, 1 }, |
564 | |
565 | { ISD::SRL, MVT::v8i16, 1 }, |
566 | { ISD::SRL, MVT::v4i32, 1 }, |
567 | { ISD::SRL, MVT::v2i64, 1 }, |
568 | |
569 | { ISD::SRA, MVT::v8i16, 1 }, |
570 | { ISD::SRA, MVT::v4i32, 1 }, |
571 | }; |
572 | |
573 | if (ST->hasSSE2() && |
574 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || |
575 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { |
576 | if (const auto *Entry = |
577 | CostTableLookup(SSE2UniformCostTable, ISD, LT.second)) |
578 | return LT.first * Entry->Cost; |
579 | } |
580 | |
581 | static const CostTblEntry AVX512DQCostTable[] = { |
582 | { ISD::MUL, MVT::v2i64, 2 }, |
583 | { ISD::MUL, MVT::v4i64, 2 }, |
584 | { ISD::MUL, MVT::v8i64, 2 } |
585 | }; |
586 | |
587 | |
588 | if (ST->hasDQI()) |
589 | if (const auto *Entry = CostTableLookup(AVX512DQCostTable, ISD, LT.second)) |
590 | return LT.first * Entry->Cost; |
591 | |
592 | static const CostTblEntry AVX512BWCostTable[] = { |
593 | { ISD::SHL, MVT::v64i8, 11 }, |
594 | { ISD::SRL, MVT::v64i8, 11 }, |
595 | { ISD::SRA, MVT::v64i8, 24 }, |
596 | }; |
597 | |
598 | |
599 | if (ST->hasBWI()) |
600 | if (const auto *Entry = CostTableLookup(AVX512BWCostTable, ISD, LT.second)) |
601 | return LT.first * Entry->Cost; |
602 | |
603 | static const CostTblEntry AVX512CostTable[] = { |
604 | { ISD::SHL, MVT::v4i32, 1 }, |
605 | { ISD::SRL, MVT::v4i32, 1 }, |
606 | { ISD::SRA, MVT::v4i32, 1 }, |
607 | { ISD::SHL, MVT::v8i32, 1 }, |
608 | { ISD::SRL, MVT::v8i32, 1 }, |
609 | { ISD::SRA, MVT::v8i32, 1 }, |
610 | { ISD::SHL, MVT::v16i32, 1 }, |
611 | { ISD::SRL, MVT::v16i32, 1 }, |
612 | { ISD::SRA, MVT::v16i32, 1 }, |
613 | |
614 | { ISD::SHL, MVT::v2i64, 1 }, |
615 | { ISD::SRL, MVT::v2i64, 1 }, |
616 | { ISD::SHL, MVT::v4i64, 1 }, |
617 | { ISD::SRL, MVT::v4i64, 1 }, |
618 | { ISD::SHL, MVT::v8i64, 1 }, |
619 | { ISD::SRL, MVT::v8i64, 1 }, |
620 | |
621 | { ISD::SRA, MVT::v2i64, 1 }, |
622 | { ISD::SRA, MVT::v4i64, 1 }, |
623 | { ISD::SRA, MVT::v8i64, 1 }, |
624 | |
625 | { ISD::MUL, MVT::v16i32, 1 }, |
626 | { ISD::MUL, MVT::v8i32, 1 }, |
627 | { ISD::MUL, MVT::v4i32, 1 }, |
628 | { ISD::MUL, MVT::v8i64, 6 }, |
629 | |
630 | { ISD::FNEG, MVT::v8f64, 1 }, |
631 | { ISD::FADD, MVT::v8f64, 1 }, |
632 | { ISD::FSUB, MVT::v8f64, 1 }, |
633 | { ISD::FMUL, MVT::v8f64, 1 }, |
634 | { ISD::FDIV, MVT::f64, 4 }, |
635 | { ISD::FDIV, MVT::v2f64, 4 }, |
636 | { ISD::FDIV, MVT::v4f64, 8 }, |
637 | { ISD::FDIV, MVT::v8f64, 16 }, |
638 | |
639 | { ISD::FNEG, MVT::v16f32, 1 }, |
640 | { ISD::FADD, MVT::v16f32, 1 }, |
641 | { ISD::FSUB, MVT::v16f32, 1 }, |
642 | { ISD::FMUL, MVT::v16f32, 1 }, |
643 | { ISD::FDIV, MVT::f32, 3 }, |
644 | { ISD::FDIV, MVT::v4f32, 3 }, |
645 | { ISD::FDIV, MVT::v8f32, 5 }, |
646 | { ISD::FDIV, MVT::v16f32, 10 }, |
647 | }; |
648 | |
649 | if (ST->hasAVX512()) |
650 | if (const auto *Entry = CostTableLookup(AVX512CostTable, ISD, LT.second)) |
651 | return LT.first * Entry->Cost; |
652 | |
653 | static const CostTblEntry AVX2ShiftCostTable[] = { |
654 | |
655 | |
656 | { ISD::SHL, MVT::v4i32, 2 }, |
657 | { ISD::SRL, MVT::v4i32, 2 }, |
658 | { ISD::SRA, MVT::v4i32, 2 }, |
659 | { ISD::SHL, MVT::v8i32, 2 }, |
660 | { ISD::SRL, MVT::v8i32, 2 }, |
661 | { ISD::SRA, MVT::v8i32, 2 }, |
662 | { ISD::SHL, MVT::v2i64, 1 }, |
663 | { ISD::SRL, MVT::v2i64, 1 }, |
664 | { ISD::SHL, MVT::v4i64, 1 }, |
665 | { ISD::SRL, MVT::v4i64, 1 }, |
666 | }; |
667 | |
668 | if (ST->hasAVX512()) { |
669 | if (ISD == ISD::SHL && LT.second == MVT::v32i16 && |
670 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
671 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) |
672 | |
673 | |
674 | return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, |
675 | Op1Info, Op2Info, |
676 | TargetTransformInfo::OP_None, |
677 | TargetTransformInfo::OP_None); |
678 | } |
679 | |
680 | |
681 | if (ST->hasAVX2() && !(ST->hasXOP() && LT.second == MVT::v4i32)) { |
682 | if (ISD == ISD::SHL && LT.second == MVT::v16i16 && |
683 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
684 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) |
685 | |
686 | |
687 | return getArithmeticInstrCost(Instruction::Mul, Ty, CostKind, |
688 | Op1Info, Op2Info, |
689 | TargetTransformInfo::OP_None, |
690 | TargetTransformInfo::OP_None); |
691 | |
692 | if (const auto *Entry = CostTableLookup(AVX2ShiftCostTable, ISD, LT.second)) |
693 | return LT.first * Entry->Cost; |
694 | } |
695 | |
696 | static const CostTblEntry XOPShiftCostTable[] = { |
697 | |
698 | { ISD::SHL, MVT::v16i8, 1 }, |
699 | { ISD::SRL, MVT::v16i8, 2 }, |
700 | { ISD::SRA, MVT::v16i8, 2 }, |
701 | { ISD::SHL, MVT::v8i16, 1 }, |
702 | { ISD::SRL, MVT::v8i16, 2 }, |
703 | { ISD::SRA, MVT::v8i16, 2 }, |
704 | { ISD::SHL, MVT::v4i32, 1 }, |
705 | { ISD::SRL, MVT::v4i32, 2 }, |
706 | { ISD::SRA, MVT::v4i32, 2 }, |
707 | { ISD::SHL, MVT::v2i64, 1 }, |
708 | { ISD::SRL, MVT::v2i64, 2 }, |
709 | { ISD::SRA, MVT::v2i64, 2 }, |
710 | |
711 | { ISD::SHL, MVT::v32i8, 2+2 }, |
712 | { ISD::SRL, MVT::v32i8, 4+2 }, |
713 | { ISD::SRA, MVT::v32i8, 4+2 }, |
714 | { ISD::SHL, MVT::v16i16, 2+2 }, |
715 | { ISD::SRL, MVT::v16i16, 4+2 }, |
716 | { ISD::SRA, MVT::v16i16, 4+2 }, |
717 | { ISD::SHL, MVT::v8i32, 2+2 }, |
718 | { ISD::SRL, MVT::v8i32, 4+2 }, |
719 | { ISD::SRA, MVT::v8i32, 4+2 }, |
720 | { ISD::SHL, MVT::v4i64, 2+2 }, |
721 | { ISD::SRL, MVT::v4i64, 4+2 }, |
722 | { ISD::SRA, MVT::v4i64, 4+2 }, |
723 | }; |
724 | |
725 | |
726 | if (ST->hasXOP()) { |
727 | |
728 | |
729 | int ShiftISD = ISD; |
730 | if ((ShiftISD == ISD::SRL || ShiftISD == ISD::SRA) && |
731 | (Op2Info == TargetTransformInfo::OK_UniformConstantValue || |
732 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue)) |
733 | ShiftISD = ISD::SHL; |
734 | if (const auto *Entry = |
735 | CostTableLookup(XOPShiftCostTable, ShiftISD, LT.second)) |
736 | return LT.first * Entry->Cost; |
737 | } |
738 | |
739 | static const CostTblEntry SSE2UniformShiftCostTable[] = { |
740 | |
741 | { ISD::SHL, MVT::v16i16, 2+2 }, |
742 | { ISD::SHL, MVT::v8i32, 2+2 }, |
743 | { ISD::SHL, MVT::v4i64, 2+2 }, |
744 | |
745 | { ISD::SRL, MVT::v16i16, 2+2 }, |
746 | { ISD::SRL, MVT::v8i32, 2+2 }, |
747 | { ISD::SRL, MVT::v4i64, 2+2 }, |
748 | |
749 | { ISD::SRA, MVT::v16i16, 2+2 }, |
750 | { ISD::SRA, MVT::v8i32, 2+2 }, |
751 | { ISD::SRA, MVT::v2i64, 4 }, |
752 | { ISD::SRA, MVT::v4i64, 8+2 }, |
753 | }; |
754 | |
755 | if (ST->hasSSE2() && |
756 | ((Op2Info == TargetTransformInfo::OK_UniformConstantValue) || |
757 | (Op2Info == TargetTransformInfo::OK_UniformValue))) { |
758 | |
759 | |
760 | if (ISD == ISD::SRA && LT.second == MVT::v4i64 && ST->hasAVX2()) |
761 | return LT.first * 4; |
762 | |
763 | if (const auto *Entry = |
764 | CostTableLookup(SSE2UniformShiftCostTable, ISD, LT.second)) |
765 | return LT.first * Entry->Cost; |
766 | } |
767 | |
768 | if (ISD == ISD::SHL && |
769 | Op2Info == TargetTransformInfo::OK_NonUniformConstantValue) { |
770 | MVT VT = LT.second; |
771 | |
772 | |
773 | if (((VT == MVT::v8i16 || VT == MVT::v4i32) && ST->hasSSE2()) || |
774 | ((VT == MVT::v16i16 || VT == MVT::v8i32) && ST->hasAVX())) |
775 | ISD = ISD::MUL; |
776 | } |
777 | |
778 | static const CostTblEntry AVX2CostTable[] = { |
779 | { ISD::SHL, MVT::v16i8, 6 }, |
780 | { ISD::SHL, MVT::v32i8, 6 }, |
781 | { ISD::SHL, MVT::v64i8, 12 }, |
782 | { ISD::SHL, MVT::v8i16, 5 }, |
783 | { ISD::SHL, MVT::v16i16, 7 }, |
784 | { ISD::SHL, MVT::v32i16, 14 }, |
785 | |
786 | { ISD::SRL, MVT::v16i8, 6 }, |
787 | { ISD::SRL, MVT::v32i8, 6 }, |
788 | { ISD::SRL, MVT::v64i8, 12 }, |
789 | { ISD::SRL, MVT::v8i16, 5 }, |
790 | { ISD::SRL, MVT::v16i16, 7 }, |
791 | { ISD::SRL, MVT::v32i16, 14 }, |
792 | |
793 | { ISD::SRA, MVT::v16i8, 17 }, |
794 | { ISD::SRA, MVT::v32i8, 17 }, |
795 | { ISD::SRA, MVT::v64i8, 34 }, |
796 | { ISD::SRA, MVT::v8i16, 5 }, |
797 | { ISD::SRA, MVT::v16i16, 7 }, |
798 | { ISD::SRA, MVT::v32i16, 14 }, |
799 | { ISD::SRA, MVT::v2i64, 2 }, |
800 | { ISD::SRA, MVT::v4i64, 2 }, |
801 | |
802 | { ISD::SUB, MVT::v32i8, 1 }, |
803 | { ISD::ADD, MVT::v32i8, 1 }, |
804 | { ISD::SUB, MVT::v16i16, 1 }, |
805 | { ISD::ADD, MVT::v16i16, 1 }, |
806 | { ISD::SUB, MVT::v8i32, 1 }, |
807 | { ISD::ADD, MVT::v8i32, 1 }, |
808 | { ISD::SUB, MVT::v4i64, 1 }, |
809 | { ISD::ADD, MVT::v4i64, 1 }, |
810 | |
811 | { ISD::MUL, MVT::v16i16, 1 }, |
812 | { ISD::MUL, MVT::v8i32, 2 }, |
813 | { ISD::MUL, MVT::v4i64, 6 }, |
814 | |
815 | { ISD::FNEG, MVT::v4f64, 1 }, |
816 | { ISD::FNEG, MVT::v8f32, 1 }, |
817 | { ISD::FADD, MVT::v4f64, 1 }, |
818 | { ISD::FADD, MVT::v8f32, 1 }, |
819 | { ISD::FSUB, MVT::v4f64, 1 }, |
820 | { ISD::FSUB, MVT::v8f32, 1 }, |
821 | { ISD::FMUL, MVT::f64, 1 }, |
822 | { ISD::FMUL, MVT::v2f64, 1 }, |
823 | { ISD::FMUL, MVT::v4f64, 1 }, |
824 | { ISD::FMUL, MVT::v8f32, 1 }, |
825 | |
826 | { ISD::FDIV, MVT::f32, 7 }, |
827 | { ISD::FDIV, MVT::v4f32, 7 }, |
828 | { ISD::FDIV, MVT::v8f32, 14 }, |
829 | { ISD::FDIV, MVT::f64, 14 }, |
830 | { ISD::FDIV, MVT::v2f64, 14 }, |
831 | { ISD::FDIV, MVT::v4f64, 28 }, |
832 | }; |
833 | |
834 | |
835 | if (ST->hasAVX2()) |
836 | if (const auto *Entry = CostTableLookup(AVX2CostTable, ISD, LT.second)) |
837 | return LT.first * Entry->Cost; |
838 | |
839 | static const CostTblEntry AVX1CostTable[] = { |
840 | |
841 | |
842 | |
843 | { ISD::MUL, MVT::v16i16, 4 }, |
844 | { ISD::MUL, MVT::v8i32, 5 }, |
845 | { ISD::MUL, MVT::v4i64, 12 }, |
846 | |
847 | { ISD::SUB, MVT::v32i8, 4 }, |
848 | { ISD::ADD, MVT::v32i8, 4 }, |
849 | { ISD::SUB, MVT::v16i16, 4 }, |
850 | { ISD::ADD, MVT::v16i16, 4 }, |
851 | { ISD::SUB, MVT::v8i32, 4 }, |
852 | { ISD::ADD, MVT::v8i32, 4 }, |
853 | { ISD::SUB, MVT::v4i64, 4 }, |
854 | { ISD::ADD, MVT::v4i64, 4 }, |
855 | |
856 | { ISD::SHL, MVT::v32i8, 22 }, |
857 | { ISD::SHL, MVT::v8i16, 6 }, |
858 | { ISD::SHL, MVT::v16i16, 13 }, |
859 | { ISD::SHL, MVT::v4i32, 3 }, |
860 | { ISD::SHL, MVT::v8i32, 9 }, |
861 | { ISD::SHL, MVT::v2i64, 2 }, |
862 | { ISD::SHL, MVT::v4i64, 6 }, |
863 | |
864 | { ISD::SRL, MVT::v32i8, 23 }, |
865 | { ISD::SRL, MVT::v16i16, 28 }, |
866 | { ISD::SRL, MVT::v4i32, 6 }, |
867 | { ISD::SRL, MVT::v8i32, 14 }, |
868 | { ISD::SRL, MVT::v2i64, 2 }, |
869 | { ISD::SRL, MVT::v4i64, 6 }, |
870 | |
871 | { ISD::SRA, MVT::v32i8, 44 }, |
872 | { ISD::SRA, MVT::v16i16, 28 }, |
873 | { ISD::SRA, MVT::v4i32, 6 }, |
874 | { ISD::SRA, MVT::v8i32, 14 }, |
875 | { ISD::SRA, MVT::v2i64, 5 }, |
876 | { ISD::SRA, MVT::v4i64, 12 }, |
877 | |
878 | { ISD::FNEG, MVT::v4f64, 2 }, |
879 | { ISD::FNEG, MVT::v8f32, 2 }, |
880 | |
881 | { ISD::FMUL, MVT::f64, 2 }, |
882 | { ISD::FMUL, MVT::v2f64, 2 }, |
883 | { ISD::FMUL, MVT::v4f64, 4 }, |
884 | |
885 | { ISD::FDIV, MVT::f32, 14 }, |
886 | { ISD::FDIV, MVT::v4f32, 14 }, |
887 | { ISD::FDIV, MVT::v8f32, 28 }, |
888 | { ISD::FDIV, MVT::f64, 22 }, |
889 | { ISD::FDIV, MVT::v2f64, 22 }, |
890 | { ISD::FDIV, MVT::v4f64, 44 }, |
891 | }; |
892 | |
893 | if (ST->hasAVX()) |
894 | if (const auto *Entry = CostTableLookup(AVX1CostTable, ISD, LT.second)) |
895 | return LT.first * Entry->Cost; |
896 | |
897 | static const CostTblEntry SSE42CostTable[] = { |
898 | { ISD::FADD, MVT::f64, 1 }, |
899 | { ISD::FADD, MVT::f32, 1 }, |
900 | { ISD::FADD, MVT::v2f64, 1 }, |
901 | { ISD::FADD, MVT::v4f32, 1 }, |
902 | |
903 | { ISD::FSUB, MVT::f64, 1 }, |
904 | { ISD::FSUB, MVT::f32 , 1 }, |
905 | { ISD::FSUB, MVT::v2f64, 1 }, |
906 | { ISD::FSUB, MVT::v4f32, 1 }, |
907 | |
908 | { ISD::FMUL, MVT::f64, 1 }, |
909 | { ISD::FMUL, MVT::f32, 1 }, |
910 | { ISD::FMUL, MVT::v2f64, 1 }, |
911 | { ISD::FMUL, MVT::v4f32, 1 }, |
912 | |
913 | { ISD::FDIV, MVT::f32, 14 }, |
914 | { ISD::FDIV, MVT::v4f32, 14 }, |
915 | { ISD::FDIV, MVT::f64, 22 }, |
916 | { ISD::FDIV, MVT::v2f64, 22 }, |
917 | |
918 | { ISD::MUL, MVT::v2i64, 6 } |
919 | }; |
920 | |
921 | if (ST->hasSSE42()) |
922 | if (const auto *Entry = CostTableLookup(SSE42CostTable, ISD, LT.second)) |
923 | return LT.first * Entry->Cost; |
924 | |
925 | static const CostTblEntry SSE41CostTable[] = { |
926 | { ISD::SHL, MVT::v16i8, 10 }, |
927 | { ISD::SHL, MVT::v8i16, 11 }, |
928 | { ISD::SHL, MVT::v4i32, 4 }, |
929 | |
930 | { ISD::SRL, MVT::v16i8, 11 }, |
931 | { ISD::SRL, MVT::v8i16, 13 }, |
932 | { ISD::SRL, MVT::v4i32, 16 }, |
933 | |
934 | { ISD::SRA, MVT::v16i8, 21 }, |
935 | { ISD::SRA, MVT::v8i16, 13 }, |
936 | |
937 | { ISD::MUL, MVT::v4i32, 2 } |
938 | }; |
939 | |
940 | if (ST->hasSSE41()) |
941 | if (const auto *Entry = CostTableLookup(SSE41CostTable, ISD, LT.second)) |
942 | return LT.first * Entry->Cost; |
943 | |
944 | static const CostTblEntry SSE2CostTable[] = { |
945 | |
946 | |
947 | { ISD::SHL, MVT::v16i8, 13 }, |
948 | { ISD::SHL, MVT::v8i16, 25 }, |
949 | { ISD::SHL, MVT::v4i32, 16 }, |
950 | { ISD::SHL, MVT::v2i64, 4 }, |
951 | |
952 | { ISD::SRL, MVT::v16i8, 14 }, |
953 | { ISD::SRL, MVT::v8i16, 16 }, |
954 | { ISD::SRL, MVT::v4i32, 12 }, |
955 | { ISD::SRL, MVT::v2i64, 4 }, |
956 | |
957 | { ISD::SRA, MVT::v16i8, 27 }, |
958 | { ISD::SRA, MVT::v8i16, 16 }, |
959 | { ISD::SRA, MVT::v4i32, 12 }, |
960 | { ISD::SRA, MVT::v2i64, 8 }, |
961 | |
962 | { ISD::MUL, MVT::v8i16, 1 }, |
963 | { ISD::MUL, MVT::v4i32, 6 }, |
964 | { ISD::MUL, MVT::v2i64, 8 }, |
965 | |
966 | { ISD::FDIV, MVT::f32, 23 }, |
967 | { ISD::FDIV, MVT::v4f32, 39 }, |
968 | { ISD::FDIV, MVT::f64, 38 }, |
969 | { ISD::FDIV, MVT::v2f64, 69 }, |
970 | |
971 | { ISD::FNEG, MVT::f32, 1 }, |
972 | { ISD::FNEG, MVT::f64, 1 }, |
973 | { ISD::FNEG, MVT::v4f32, 1 }, |
974 | { ISD::FNEG, MVT::v2f64, 1 }, |
975 | |
976 | { ISD::FADD, MVT::f32, 2 }, |
977 | { ISD::FADD, MVT::f64, 2 }, |
978 | |
979 | { ISD::FSUB, MVT::f32, 2 }, |
980 | { ISD::FSUB, MVT::f64, 2 }, |
981 | }; |
982 | |
983 | if (ST->hasSSE2()) |
984 | if (const auto *Entry = CostTableLookup(SSE2CostTable, ISD, LT.second)) |
985 | return LT.first * Entry->Cost; |
986 | |
987 | static const CostTblEntry SSE1CostTable[] = { |
988 | { ISD::FDIV, MVT::f32, 17 }, |
989 | { ISD::FDIV, MVT::v4f32, 34 }, |
990 | |
991 | { ISD::FNEG, MVT::f32, 2 }, |
992 | { ISD::FNEG, MVT::v4f32, 2 }, |
993 | |
994 | { ISD::FADD, MVT::f32, 1 }, |
995 | { ISD::FADD, MVT::v4f32, 2 }, |
996 | |
997 | { ISD::FSUB, MVT::f32, 1 }, |
998 | { ISD::FSUB, MVT::v4f32, 2 }, |
999 | }; |
1000 | |
1001 | if (ST->hasSSE1()) |
1002 | if (const auto *Entry = CostTableLookup(SSE1CostTable, ISD, LT.second)) |
1003 | return LT.first * Entry->Cost; |
1004 | |
1005 | static const CostTblEntry X64CostTbl[] = { |
1006 | { ISD::ADD, MVT::i64, 1 }, |
1007 | { ISD::SUB, MVT::i64, 1 }, |
1008 | }; |
1009 | |
1010 | if (ST->is64Bit()) |
1011 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, LT.second)) |
1012 | return LT.first * Entry->Cost; |
1013 | |
1014 | static const CostTblEntry X86CostTbl[] = { |
1015 | { ISD::ADD, MVT::i8, 1 }, |
1016 | { ISD::ADD, MVT::i16, 1 }, |
1017 | { ISD::ADD, MVT::i32, 1 }, |
1018 | |
1019 | { ISD::SUB, MVT::i8, 1 }, |
1020 | { ISD::SUB, MVT::i16, 1 }, |
1021 | { ISD::SUB, MVT::i32, 1 }, |
1022 | }; |
1023 | |
1024 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, LT.second)) |
1025 | return LT.first * Entry->Cost; |
1026 | |
1027 | |
1028 | |
1029 | |
1030 | |
1031 | |
1032 | |
1033 | if (LT.second.isVector() && (ISD == ISD::SDIV || ISD == ISD::SREM || |
1034 | ISD == ISD::UDIV || ISD == ISD::UREM)) { |
1035 | InstructionCost ScalarCost = getArithmeticInstrCost( |
1036 | Opcode, Ty->getScalarType(), CostKind, Op1Info, Op2Info, |
1037 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); |
1038 | return 20 * LT.first * LT.second.getVectorNumElements() * ScalarCost; |
1039 | } |
1040 | |
1041 | |
1042 | return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, Op2Info); |
1043 | } |
1044 | |
1045 | InstructionCost X86TTIImpl::getShuffleCost(TTI::ShuffleKind Kind, |
1046 | VectorType *BaseTp, |
1047 | ArrayRef<int> Mask, int Index, |
1048 | VectorType *SubTp) { |
1049 | |
1050 | |
1051 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, BaseTp); |
1052 | |
1053 | Kind = improveShuffleKindFromMask(Kind, Mask); |
1054 | |
1055 | if (Kind == TTI::SK_Transpose) |
1056 | Kind = TTI::SK_PermuteTwoSrc; |
1057 | |
1058 | |
1059 | |
1060 | |
1061 | if (Kind == TTI::SK_Broadcast) |
1062 | LT.first = 1; |
1063 | |
1064 | |
1065 | |
1066 | if (Kind == TTI::SK_ExtractSubvector && LT.second.isVector()) { |
1067 | int NumElts = LT.second.getVectorNumElements(); |
1068 | if ((Index % NumElts) == 0) |
1069 | return 0; |
1070 | std::pair<InstructionCost, MVT> SubLT = |
1071 | TLI->getTypeLegalizationCost(DL, SubTp); |
1072 | if (SubLT.second.isVector()) { |
1073 | int NumSubElts = SubLT.second.getVectorNumElements(); |
1074 | if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) |
1075 | return SubLT.first; |
1076 | |
1077 | |
1078 | |
1079 | |
1080 | |
1081 | |
1082 | int OrigSubElts = cast<FixedVectorType>(SubTp)->getNumElements(); |
1083 | if (NumSubElts > OrigSubElts && (Index % OrigSubElts) == 0 && |
1084 | (NumSubElts % OrigSubElts) == 0 && |
1085 | LT.second.getVectorElementType() == |
1086 | SubLT.second.getVectorElementType() && |
1087 | LT.second.getVectorElementType().getSizeInBits() == |
1088 | BaseTp->getElementType()->getPrimitiveSizeInBits()) { |
1089 | assert(NumElts >= NumSubElts && NumElts > OrigSubElts && |
1090 | "Unexpected number of elements!"); |
1091 | auto *VecTy = FixedVectorType::get(BaseTp->getElementType(), |
1092 | LT.second.getVectorNumElements()); |
1093 | auto *SubTy = FixedVectorType::get(BaseTp->getElementType(), |
1094 | SubLT.second.getVectorNumElements()); |
1095 | int ExtractIndex = alignDown((Index % NumElts), NumSubElts); |
1096 | InstructionCost ExtractCost = getShuffleCost( |
1097 | TTI::SK_ExtractSubvector, VecTy, None, ExtractIndex, SubTy); |
1098 | |
1099 | |
1100 | |
1101 | if (SubTp->getPrimitiveSizeInBits() >= 32 || ST->hasSSSE3()) |
1102 | return ExtractCost + 1; |
1103 | |
1104 | assert(SubTp->getPrimitiveSizeInBits() == 16 && |
1105 | "Unexpected vector size"); |
1106 | |
1107 | return ExtractCost + 2; |
1108 | } |
1109 | } |
1110 | } |
1111 | |
1112 | |
1113 | |
1114 | |
1115 | if (Kind == TTI::SK_InsertSubvector && LT.second.isVector()) { |
1116 | int NumElts = LT.second.getVectorNumElements(); |
1117 | std::pair<InstructionCost, MVT> SubLT = |
1118 | TLI->getTypeLegalizationCost(DL, SubTp); |
1119 | if (SubLT.second.isVector()) { |
1120 | int NumSubElts = SubLT.second.getVectorNumElements(); |
1121 | if ((Index % NumSubElts) == 0 && (NumElts % NumSubElts) == 0) |
1122 | return SubLT.first; |
1123 | } |
1124 | } |
1125 | |
1126 | |
1127 | |
1128 | EVT VT = TLI->getValueType(DL, BaseTp); |
1129 | if (VT.isSimple() && VT.isVector() && VT.getSizeInBits() < 128 && |
1130 | !ST->hasSSSE3()) { |
1131 | static const CostTblEntry SSE2SubVectorShuffleTbl[] = { |
1132 | {TTI::SK_Broadcast, MVT::v4i16, 1}, |
1133 | {TTI::SK_Broadcast, MVT::v2i16, 1}, |
1134 | {TTI::SK_Broadcast, MVT::v8i8, 2}, |
1135 | {TTI::SK_Broadcast, MVT::v4i8, 2}, |
1136 | {TTI::SK_Broadcast, MVT::v2i8, 1}, |
1137 | |
1138 | {TTI::SK_Reverse, MVT::v4i16, 1}, |
1139 | {TTI::SK_Reverse, MVT::v2i16, 1}, |
1140 | {TTI::SK_Reverse, MVT::v4i8, 3}, |
1141 | {TTI::SK_Reverse, MVT::v2i8, 1}, |
1142 | |
1143 | {TTI::SK_PermuteTwoSrc, MVT::v4i16, 2}, |
1144 | {TTI::SK_PermuteTwoSrc, MVT::v2i16, 2}, |
1145 | {TTI::SK_PermuteTwoSrc, MVT::v8i8, 7}, |
1146 | {TTI::SK_PermuteTwoSrc, MVT::v4i8, 4}, |
1147 | {TTI::SK_PermuteTwoSrc, MVT::v2i8, 2}, |
1148 | |
1149 | {TTI::SK_PermuteSingleSrc, MVT::v4i16, 1}, |
1150 | {TTI::SK_PermuteSingleSrc, MVT::v2i16, 1}, |
1151 | {TTI::SK_PermuteSingleSrc, MVT::v8i8, 5}, |
1152 | {TTI::SK_PermuteSingleSrc, MVT::v4i8, 3}, |
1153 | {TTI::SK_PermuteSingleSrc, MVT::v2i8, 1}, |
1154 | }; |
1155 | |
1156 | if (ST->hasSSE2()) |
1157 | if (const auto *Entry = |
1158 | CostTableLookup(SSE2SubVectorShuffleTbl, Kind, VT.getSimpleVT())) |
1159 | return Entry->Cost; |
1160 | } |
1161 | |
1162 | |
1163 | |
1164 | |
1165 | if (Kind == TTI::SK_PermuteSingleSrc && LT.first != 1) { |
1166 | MVT LegalVT = LT.second; |
1167 | if (LegalVT.isVector() && |
1168 | LegalVT.getVectorElementType().getSizeInBits() == |
1169 | BaseTp->getElementType()->getPrimitiveSizeInBits() && |
1170 | LegalVT.getVectorNumElements() < |
1171 | cast<FixedVectorType>(BaseTp)->getNumElements()) { |
1172 | |
1173 | unsigned VecTySize = DL.getTypeStoreSize(BaseTp); |
1174 | unsigned LegalVTSize = LegalVT.getStoreSize(); |
1175 | |
1176 | unsigned NumOfSrcs = (VecTySize + LegalVTSize - 1) / LegalVTSize; |
1177 | |
1178 | InstructionCost NumOfDests = LT.first; |
1179 | |
1180 | auto *SingleOpTy = FixedVectorType::get(BaseTp->getElementType(), |
1181 | LegalVT.getVectorNumElements()); |
1182 | |
1183 | InstructionCost NumOfShuffles = (NumOfSrcs - 1) * NumOfDests; |
1184 | return NumOfShuffles * getShuffleCost(TTI::SK_PermuteTwoSrc, SingleOpTy, |
1185 | None, 0, nullptr); |
1186 | } |
1187 | |
1188 | return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp); |
1189 | } |
1190 | |
1191 | |
1192 | if (Kind == TTI::SK_PermuteTwoSrc && LT.first != 1) { |
1193 | |
1194 | InstructionCost NumOfDests = LT.first; |
1195 | InstructionCost NumOfShufflesPerDest = LT.first * 2 - 1; |
1196 | LT.first = NumOfDests * NumOfShufflesPerDest; |
1197 | } |
1198 | |
1199 | static const CostTblEntry AVX512VBMIShuffleTbl[] = { |
1200 | {TTI::SK_Reverse, MVT::v64i8, 1}, |
1201 | {TTI::SK_Reverse, MVT::v32i8, 1}, |
1202 | |
1203 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 1}, |
1204 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 1}, |
1205 | |
1206 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 2}, |
1207 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 2}, |
1208 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 2} |
1209 | }; |
1210 | |
1211 | if (ST->hasVBMI()) |
1212 | if (const auto *Entry = |
1213 | CostTableLookup(AVX512VBMIShuffleTbl, Kind, LT.second)) |
1214 | return LT.first * Entry->Cost; |
1215 | |
1216 | static const CostTblEntry AVX512BWShuffleTbl[] = { |
1217 | {TTI::SK_Broadcast, MVT::v32i16, 1}, |
1218 | {TTI::SK_Broadcast, MVT::v64i8, 1}, |
1219 | |
1220 | {TTI::SK_Reverse, MVT::v32i16, 2}, |
1221 | {TTI::SK_Reverse, MVT::v16i16, 2}, |
1222 | {TTI::SK_Reverse, MVT::v64i8, 2}, |
1223 | |
1224 | {TTI::SK_PermuteSingleSrc, MVT::v32i16, 2}, |
1225 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 2}, |
1226 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 8}, |
1227 | |
1228 | {TTI::SK_PermuteTwoSrc, MVT::v32i16, 2}, |
1229 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 2}, |
1230 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 2}, |
1231 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 19}, |
1232 | |
1233 | {TTI::SK_Select, MVT::v32i16, 1}, |
1234 | {TTI::SK_Select, MVT::v64i8, 1}, |
1235 | }; |
1236 | |
1237 | if (ST->hasBWI()) |
1238 | if (const auto *Entry = |
1239 | CostTableLookup(AVX512BWShuffleTbl, Kind, LT.second)) |
1240 | return LT.first * Entry->Cost; |
1241 | |
1242 | static const CostTblEntry AVX512ShuffleTbl[] = { |
1243 | {TTI::SK_Broadcast, MVT::v8f64, 1}, |
1244 | {TTI::SK_Broadcast, MVT::v16f32, 1}, |
1245 | {TTI::SK_Broadcast, MVT::v8i64, 1}, |
1246 | {TTI::SK_Broadcast, MVT::v16i32, 1}, |
1247 | {TTI::SK_Broadcast, MVT::v32i16, 1}, |
1248 | {TTI::SK_Broadcast, MVT::v64i8, 1}, |
1249 | |
1250 | {TTI::SK_Reverse, MVT::v8f64, 1}, |
1251 | {TTI::SK_Reverse, MVT::v16f32, 1}, |
1252 | {TTI::SK_Reverse, MVT::v8i64, 1}, |
1253 | {TTI::SK_Reverse, MVT::v16i32, 1}, |
1254 | {TTI::SK_Reverse, MVT::v32i16, 7}, |
1255 | {TTI::SK_Reverse, MVT::v64i8, 7}, |
1256 | |
1257 | {TTI::SK_PermuteSingleSrc, MVT::v8f64, 1}, |
1258 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, |
1259 | {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, |
1260 | {TTI::SK_PermuteSingleSrc, MVT::v16f32, 1}, |
1261 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, |
1262 | {TTI::SK_PermuteSingleSrc, MVT::v4f32, 1}, |
1263 | {TTI::SK_PermuteSingleSrc, MVT::v8i64, 1}, |
1264 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, |
1265 | {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, |
1266 | {TTI::SK_PermuteSingleSrc, MVT::v16i32, 1}, |
1267 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, |
1268 | {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, |
1269 | {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, |
1270 | |
1271 | {TTI::SK_PermuteTwoSrc, MVT::v8f64, 1}, |
1272 | {TTI::SK_PermuteTwoSrc, MVT::v16f32, 1}, |
1273 | {TTI::SK_PermuteTwoSrc, MVT::v8i64, 1}, |
1274 | {TTI::SK_PermuteTwoSrc, MVT::v16i32, 1}, |
1275 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 1}, |
1276 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 1}, |
1277 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 1}, |
1278 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 1}, |
1279 | {TTI::SK_PermuteTwoSrc, MVT::v2f64, 1}, |
1280 | {TTI::SK_PermuteTwoSrc, MVT::v4f32, 1}, |
1281 | {TTI::SK_PermuteTwoSrc, MVT::v2i64, 1}, |
1282 | {TTI::SK_PermuteTwoSrc, MVT::v4i32, 1}, |
1283 | |
1284 | |
1285 | |
1286 | {TTI::SK_PermuteSingleSrc, MVT::v32i16, 14}, |
1287 | {TTI::SK_PermuteSingleSrc, MVT::v64i8, 14}, |
1288 | {TTI::SK_PermuteTwoSrc, MVT::v32i16, 42}, |
1289 | {TTI::SK_PermuteTwoSrc, MVT::v64i8, 42}, |
1290 | |
1291 | {TTI::SK_Select, MVT::v32i16, 1}, |
1292 | {TTI::SK_Select, MVT::v64i8, 1}, |
1293 | {TTI::SK_Select, MVT::v8f64, 1}, |
1294 | {TTI::SK_Select, MVT::v16f32, 1}, |
1295 | {TTI::SK_Select, MVT::v8i64, 1}, |
1296 | {TTI::SK_Select, MVT::v16i32, 1}, |
1297 | }; |
1298 | |
1299 | if (ST->hasAVX512()) |
1300 | if (const auto *Entry = CostTableLookup(AVX512ShuffleTbl, Kind, LT.second)) |
1301 | return LT.first * Entry->Cost; |
1302 | |
1303 | static const CostTblEntry AVX2ShuffleTbl[] = { |
1304 | {TTI::SK_Broadcast, MVT::v4f64, 1}, |
1305 | {TTI::SK_Broadcast, MVT::v8f32, 1}, |
1306 | {TTI::SK_Broadcast, MVT::v4i64, 1}, |
1307 | {TTI::SK_Broadcast, MVT::v8i32, 1}, |
1308 | {TTI::SK_Broadcast, MVT::v16i16, 1}, |
1309 | {TTI::SK_Broadcast, MVT::v32i8, 1}, |
1310 | |
1311 | {TTI::SK_Reverse, MVT::v4f64, 1}, |
1312 | {TTI::SK_Reverse, MVT::v8f32, 1}, |
1313 | {TTI::SK_Reverse, MVT::v4i64, 1}, |
1314 | {TTI::SK_Reverse, MVT::v8i32, 1}, |
1315 | {TTI::SK_Reverse, MVT::v16i16, 2}, |
1316 | {TTI::SK_Reverse, MVT::v32i8, 2}, |
1317 | |
1318 | {TTI::SK_Select, MVT::v16i16, 1}, |
1319 | {TTI::SK_Select, MVT::v32i8, 1}, |
1320 | |
1321 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 1}, |
1322 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 1}, |
1323 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 1}, |
1324 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 1}, |
1325 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, |
1326 | |
1327 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, |
1328 | |
1329 | |
1330 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, |
1331 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 3}, |
1332 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, |
1333 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 3}, |
1334 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 7}, |
1335 | |
1336 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 7}, |
1337 | |
1338 | }; |
1339 | |
1340 | if (ST->hasAVX2()) |
1341 | if (const auto *Entry = CostTableLookup(AVX2ShuffleTbl, Kind, LT.second)) |
1342 | return LT.first * Entry->Cost; |
1343 | |
1344 | static const CostTblEntry XOPShuffleTbl[] = { |
1345 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, |
1346 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 2}, |
1347 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, |
1348 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 2}, |
1349 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 4}, |
1350 | |
1351 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 4}, |
1352 | |
1353 | |
1354 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 9}, |
1355 | |
1356 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 1}, |
1357 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 9}, |
1358 | |
1359 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 1}, |
1360 | }; |
1361 | |
1362 | if (ST->hasXOP()) |
1363 | if (const auto *Entry = CostTableLookup(XOPShuffleTbl, Kind, LT.second)) |
1364 | return LT.first * Entry->Cost; |
1365 | |
1366 | static const CostTblEntry AVX1ShuffleTbl[] = { |
1367 | {TTI::SK_Broadcast, MVT::v4f64, 2}, |
1368 | {TTI::SK_Broadcast, MVT::v8f32, 2}, |
1369 | {TTI::SK_Broadcast, MVT::v4i64, 2}, |
1370 | {TTI::SK_Broadcast, MVT::v8i32, 2}, |
1371 | {TTI::SK_Broadcast, MVT::v16i16, 3}, |
1372 | {TTI::SK_Broadcast, MVT::v32i8, 2}, |
1373 | |
1374 | {TTI::SK_Reverse, MVT::v4f64, 2}, |
1375 | {TTI::SK_Reverse, MVT::v8f32, 2}, |
1376 | {TTI::SK_Reverse, MVT::v4i64, 2}, |
1377 | {TTI::SK_Reverse, MVT::v8i32, 2}, |
1378 | {TTI::SK_Reverse, MVT::v16i16, 4}, |
1379 | |
1380 | {TTI::SK_Reverse, MVT::v32i8, 4}, |
1381 | |
1382 | |
1383 | {TTI::SK_Select, MVT::v4i64, 1}, |
1384 | {TTI::SK_Select, MVT::v4f64, 1}, |
1385 | {TTI::SK_Select, MVT::v8i32, 1}, |
1386 | {TTI::SK_Select, MVT::v8f32, 1}, |
1387 | {TTI::SK_Select, MVT::v16i16, 3}, |
1388 | {TTI::SK_Select, MVT::v32i8, 3}, |
1389 | |
1390 | {TTI::SK_PermuteSingleSrc, MVT::v4f64, 2}, |
1391 | {TTI::SK_PermuteSingleSrc, MVT::v4i64, 2}, |
1392 | {TTI::SK_PermuteSingleSrc, MVT::v8f32, 4}, |
1393 | {TTI::SK_PermuteSingleSrc, MVT::v8i32, 4}, |
1394 | {TTI::SK_PermuteSingleSrc, MVT::v16i16, 8}, |
1395 | |
1396 | {TTI::SK_PermuteSingleSrc, MVT::v32i8, 8}, |
1397 | |
1398 | |
1399 | {TTI::SK_PermuteTwoSrc, MVT::v4f64, 3}, |
1400 | {TTI::SK_PermuteTwoSrc, MVT::v4i64, 3}, |
1401 | {TTI::SK_PermuteTwoSrc, MVT::v8f32, 4}, |
1402 | {TTI::SK_PermuteTwoSrc, MVT::v8i32, 4}, |
1403 | {TTI::SK_PermuteTwoSrc, MVT::v16i16, 15}, |
1404 | |
1405 | {TTI::SK_PermuteTwoSrc, MVT::v32i8, 15}, |
1406 | |
1407 | }; |
1408 | |
1409 | if (ST->hasAVX()) |
1410 | if (const auto *Entry = CostTableLookup(AVX1ShuffleTbl, Kind, LT.second)) |
1411 | return LT.first * Entry->Cost; |
1412 | |
1413 | static const CostTblEntry SSE41ShuffleTbl[] = { |
1414 | {TTI::SK_Select, MVT::v2i64, 1}, |
1415 | {TTI::SK_Select, MVT::v2f64, 1}, |
1416 | {TTI::SK_Select, MVT::v4i32, 1}, |
1417 | {TTI::SK_Select, MVT::v4f32, 1}, |
1418 | {TTI::SK_Select, MVT::v8i16, 1}, |
1419 | {TTI::SK_Select, MVT::v16i8, 1} |
1420 | }; |
1421 | |
1422 | if (ST->hasSSE41()) |
1423 | if (const auto *Entry = CostTableLookup(SSE41ShuffleTbl, Kind, LT.second)) |
1424 | return LT.first * Entry->Cost; |
1425 | |
1426 | static const CostTblEntry SSSE3ShuffleTbl[] = { |
1427 | {TTI::SK_Broadcast, MVT::v8i16, 1}, |
1428 | {TTI::SK_Broadcast, MVT::v16i8, 1}, |
1429 | |
1430 | {TTI::SK_Reverse, MVT::v8i16, 1}, |
1431 | {TTI::SK_Reverse, MVT::v16i8, 1}, |
1432 | |
1433 | {TTI::SK_Select, MVT::v8i16, 3}, |
1434 | {TTI::SK_Select, MVT::v16i8, 3}, |
1435 | |
1436 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 1}, |
1437 | {TTI::SK_PermuteSingleSrc, MVT::v16i8, 1}, |
1438 | |
1439 | {TTI::SK_PermuteTwoSrc, MVT::v8i16, 3}, |
1440 | {TTI::SK_PermuteTwoSrc, MVT::v16i8, 3}, |
1441 | }; |
1442 | |
1443 | if (ST->hasSSSE3()) |
1444 | if (const auto *Entry = CostTableLookup(SSSE3ShuffleTbl, Kind, LT.second)) |
1445 | return LT.first * Entry->Cost; |
1446 | |
1447 | static const CostTblEntry SSE2ShuffleTbl[] = { |
1448 | {TTI::SK_Broadcast, MVT::v2f64, 1}, |
1449 | {TTI::SK_Broadcast, MVT::v2i64, 1}, |
1450 | {TTI::SK_Broadcast, MVT::v4i32, 1}, |
1451 | {TTI::SK_Broadcast, MVT::v8i16, 2}, |
1452 | {TTI::SK_Broadcast, MVT::v16i8, 3}, |
1453 | |
1454 | {TTI::SK_Reverse, MVT::v2f64, 1}, |
1455 | {TTI::SK_Reverse, MVT::v2i64, 1}, |
1456 | {TTI::SK_Reverse, MVT::v4i32, 1}, |
1457 | {TTI::SK_Reverse, MVT::v8i16, 3}, |
1458 | {TTI::SK_Reverse, MVT::v16i8, 9}, |
1459 | |
1460 | |
1461 | {TTI::SK_Select, MVT::v2i64, 1}, |
1462 | {TTI::SK_Select, MVT::v2f64, 1}, |
1463 | {TTI::SK_Select, MVT::v4i32, 2}, |
1464 | {TTI::SK_Select, MVT::v8i16, 3}, |
1465 | {TTI::SK_Select, MVT::v16i8, 3}, |
1466 | |
1467 | {TTI::SK_PermuteSingleSrc, MVT::v2f64, 1}, |
1468 | {TTI::SK_PermuteSingleSrc, MVT::v2i64, 1}, |
1469 | {TTI::SK_PermuteSingleSrc, MVT::v4i32, 1}, |
1470 | {TTI::SK_PermuteSingleSrc, MVT::v8i16, 5}, |
1471 | |
1472 | { TTI::SK_PermuteSingleSrc, MVT::v16i8, 10 }, |
1473 | |
1474 | |
1475 | { TTI::SK_PermuteTwoSrc, MVT::v2f64, 1 }, |
1476 | { TTI::SK_PermuteTwoSrc, MVT::v2i64, 1 }, |
1477 | { TTI::SK_PermuteTwoSrc, MVT::v4i32, 2 }, |
1478 | { TTI::SK_PermuteTwoSrc, MVT::v8i16, 8 }, |
1479 | { TTI::SK_PermuteTwoSrc, MVT::v16i8, 13 }, |
1480 | }; |
1481 | |
1482 | if (ST->hasSSE2()) |
1483 | if (const auto *Entry = CostTableLookup(SSE2ShuffleTbl, Kind, LT.second)) |
1484 | return LT.first * Entry->Cost; |
1485 | |
1486 | static const CostTblEntry SSE1ShuffleTbl[] = { |
1487 | { TTI::SK_Broadcast, MVT::v4f32, 1 }, |
1488 | { TTI::SK_Reverse, MVT::v4f32, 1 }, |
1489 | { TTI::SK_Select, MVT::v4f32, 2 }, |
1490 | { TTI::SK_PermuteSingleSrc, MVT::v4f32, 1 }, |
1491 | { TTI::SK_PermuteTwoSrc, MVT::v4f32, 2 }, |
1492 | }; |
1493 | |
1494 | if (ST->hasSSE1()) |
1495 | if (const auto *Entry = CostTableLookup(SSE1ShuffleTbl, Kind, LT.second)) |
1496 | return LT.first * Entry->Cost; |
1497 | |
1498 | return BaseT::getShuffleCost(Kind, BaseTp, Mask, Index, SubTp); |
1499 | } |
1500 | |
1501 | InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, |
1502 | Type *Src, |
1503 | TTI::CastContextHint CCH, |
1504 | TTI::TargetCostKind CostKind, |
1505 | const Instruction *I) { |
1506 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
1507 | assert(ISD && "Invalid opcode"); |
1508 | |
1509 | |
1510 | auto AdjustCost = [&CostKind](InstructionCost Cost) -> InstructionCost { |
1511 | if (CostKind != TTI::TCK_RecipThroughput) |
1512 | return Cost == 0 ? 0 : 1; |
1513 | return Cost; |
1514 | }; |
1515 | |
1516 | |
1517 | |
1518 | |
1519 | |
1520 | |
1521 | static const TypeConversionCostTblEntry AVX512BWConversionTbl[] { |
1522 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, |
1523 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 1 }, |
1524 | |
1525 | |
1526 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, |
1527 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, |
1528 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, |
1529 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, |
1530 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, |
1531 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, |
1532 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, |
1533 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1534 | { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, |
1535 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i1, 1 }, |
1536 | { ISD::SIGN_EXTEND, MVT::v64i8, MVT::v64i1, 1 }, |
1537 | |
1538 | |
1539 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, |
1540 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, |
1541 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, |
1542 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, |
1543 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, |
1544 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, |
1545 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, |
1546 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, |
1547 | { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, |
1548 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i1, 2 }, |
1549 | { ISD::ZERO_EXTEND, MVT::v64i8, MVT::v64i1, 2 }, |
1550 | |
1551 | { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 2 }, |
1552 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, |
1553 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, |
1554 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, |
1555 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i16, 2 }, |
1556 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, |
1557 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, |
1558 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, |
1559 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, |
1560 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, |
1561 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, |
1562 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, |
1563 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, |
1564 | { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, |
1565 | { ISD::TRUNCATE, MVT::v32i1, MVT::v32i16, 2 }, |
1566 | { ISD::TRUNCATE, MVT::v64i1, MVT::v64i8, 2 }, |
1567 | }; |
1568 | |
1569 | static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { |
1570 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, |
1571 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, |
1572 | |
1573 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 1 }, |
1574 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 1 }, |
1575 | |
1576 | { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f32, 1 }, |
1577 | { ISD::FP_TO_SINT, MVT::v8i64, MVT::v8f64, 1 }, |
1578 | |
1579 | { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f32, 1 }, |
1580 | { ISD::FP_TO_UINT, MVT::v8i64, MVT::v8f64, 1 }, |
1581 | }; |
1582 | |
1583 | |
1584 | |
1585 | |
1586 | static const TypeConversionCostTblEntry AVX512FConversionTbl[] = { |
1587 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 1 }, |
1588 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v16f32, 3 }, |
1589 | { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 1 }, |
1590 | |
1591 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, |
1592 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, |
1593 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, |
1594 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 3 }, |
1595 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, |
1596 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, |
1597 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, |
1598 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 3 }, |
1599 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, |
1600 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, |
1601 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, |
1602 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i32, 2 }, |
1603 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, |
1604 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, |
1605 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 2 }, |
1606 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i32, 2 }, |
1607 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 2 }, |
1608 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 2 }, |
1609 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 2 }, |
1610 | { ISD::TRUNCATE, MVT::v2i8, MVT::v2i64, 2 }, |
1611 | { ISD::TRUNCATE, MVT::v2i16, MVT::v2i64, 1 }, |
1612 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i64, 2 }, |
1613 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i64, 2 }, |
1614 | { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 1 }, |
1615 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, |
1616 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i64, 5 }, |
1617 | |
1618 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, |
1619 | { ISD::TRUNCATE, MVT::v32i8, MVT::v32i16, 8 }, |
1620 | |
1621 | |
1622 | |
1623 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 3 }, |
1624 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 4 }, |
1625 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 3 }, |
1626 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 4 }, |
1627 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 3 }, |
1628 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 4 }, |
1629 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 3 }, |
1630 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 4 }, |
1631 | |
1632 | |
1633 | |
1634 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 3 }, |
1635 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 4 }, |
1636 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 3 }, |
1637 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 4 }, |
1638 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 3 }, |
1639 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 4 }, |
1640 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 3 }, |
1641 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, |
1642 | |
1643 | { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, |
1644 | { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, |
1645 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, |
1646 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, |
1647 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, |
1648 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, |
1649 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, |
1650 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, |
1651 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, |
1652 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, |
1653 | |
1654 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i1, 1 }, |
1655 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i1, 2 }, |
1656 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i1, 1 }, |
1657 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i1, 2 }, |
1658 | |
1659 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, |
1660 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 1 }, |
1661 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, |
1662 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 1 }, |
1663 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, |
1664 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i8, 1 }, |
1665 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, |
1666 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i16, 1 }, |
1667 | { ISD::SIGN_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, |
1668 | { ISD::ZERO_EXTEND, MVT::v8i64, MVT::v8i32, 1 }, |
1669 | |
1670 | { ISD::SIGN_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, |
1671 | { ISD::ZERO_EXTEND, MVT::v32i16, MVT::v32i8, 3 }, |
1672 | |
1673 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, |
1674 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, |
1675 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 }, |
1676 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 }, |
1677 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, |
1678 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 }, |
1679 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, |
1680 | { ISD::SINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, |
1681 | |
1682 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i1, 4 }, |
1683 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i1, 3 }, |
1684 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v16i8, 2 }, |
1685 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i8, 1 }, |
1686 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i16, 2 }, |
1687 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i16, 1 }, |
1688 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 1 }, |
1689 | { ISD::UINT_TO_FP, MVT::v16f32, MVT::v16i32, 1 }, |
1690 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i64, 26 }, |
1691 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i64, 5 }, |
1692 | |
1693 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 }, |
1694 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f64, 7 }, |
1695 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f64,15 }, |
1696 | { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f32,11 }, |
1697 | { ISD::FP_TO_SINT, MVT::v64i8, MVT::v64f64,31 }, |
1698 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f64, 3 }, |
1699 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v16f64, 7 }, |
1700 | { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f32, 5 }, |
1701 | { ISD::FP_TO_SINT, MVT::v32i16, MVT::v32f64,15 }, |
1702 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 1 }, |
1703 | { ISD::FP_TO_SINT, MVT::v16i32, MVT::v16f64, 3 }, |
1704 | |
1705 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, |
1706 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f64, 3 }, |
1707 | { ISD::FP_TO_UINT, MVT::v8i8, MVT::v8f64, 3 }, |
1708 | { ISD::FP_TO_UINT, MVT::v16i32, MVT::v16f32, 1 }, |
1709 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v16f32, 3 }, |
1710 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v16f32, 3 }, |
1711 | }; |
1712 | |
1713 | static const TypeConversionCostTblEntry AVX512BWVLConversionTbl[] { |
1714 | |
1715 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 1 }, |
1716 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 1 }, |
1717 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 1 }, |
1718 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 1 }, |
1719 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 1 }, |
1720 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 1 }, |
1721 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 1 }, |
1722 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1723 | { ISD::SIGN_EXTEND, MVT::v32i8, MVT::v32i1, 1 }, |
1724 | |
1725 | |
1726 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 2 }, |
1727 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 2 }, |
1728 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 2 }, |
1729 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 2 }, |
1730 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 2 }, |
1731 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 2 }, |
1732 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 2 }, |
1733 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 2 }, |
1734 | { ISD::ZERO_EXTEND, MVT::v32i8, MVT::v32i1, 2 }, |
1735 | |
1736 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, |
1737 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 2 }, |
1738 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, |
1739 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, |
1740 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 2 }, |
1741 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 2 }, |
1742 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 2 }, |
1743 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 2 }, |
1744 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 2 }, |
1745 | { ISD::TRUNCATE, MVT::v32i1, MVT::v32i8, 2 }, |
1746 | }; |
1747 | |
1748 | static const TypeConversionCostTblEntry AVX512DQVLConversionTbl[] = { |
1749 | { ISD::SINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, |
1750 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, |
1751 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, |
1752 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, |
1753 | |
1754 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 1 }, |
1755 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, |
1756 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 1 }, |
1757 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, |
1758 | |
1759 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v4f32, 1 }, |
1760 | { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f32, 1 }, |
1761 | { ISD::FP_TO_SINT, MVT::v2i64, MVT::v2f64, 1 }, |
1762 | { ISD::FP_TO_SINT, MVT::v4i64, MVT::v4f64, 1 }, |
1763 | |
1764 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v4f32, 1 }, |
1765 | { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f32, 1 }, |
1766 | { ISD::FP_TO_UINT, MVT::v2i64, MVT::v2f64, 1 }, |
1767 | { ISD::FP_TO_UINT, MVT::v4i64, MVT::v4f64, 1 }, |
1768 | }; |
1769 | |
1770 | static const TypeConversionCostTblEntry AVX512VLConversionTbl[] = { |
1771 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, |
1772 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 3 }, |
1773 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 3 }, |
1774 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i8, 8 }, |
1775 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 3 }, |
1776 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 3 }, |
1777 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i16, 3 }, |
1778 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 8 }, |
1779 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 2 }, |
1780 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i32, 2 }, |
1781 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, |
1782 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i64, 2 }, |
1783 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 2 }, |
1784 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, |
1785 | { ISD::TRUNCATE, MVT::v4i8, MVT::v4i64, 2 }, |
1786 | { ISD::TRUNCATE, MVT::v4i16, MVT::v4i64, 2 }, |
1787 | { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 2 }, |
1788 | |
1789 | |
1790 | |
1791 | { ISD::SIGN_EXTEND, MVT::v2i8, MVT::v2i1, 5 }, |
1792 | { ISD::ZERO_EXTEND, MVT::v2i8, MVT::v2i1, 6 }, |
1793 | { ISD::SIGN_EXTEND, MVT::v4i8, MVT::v4i1, 5 }, |
1794 | { ISD::ZERO_EXTEND, MVT::v4i8, MVT::v4i1, 6 }, |
1795 | { ISD::SIGN_EXTEND, MVT::v8i8, MVT::v8i1, 5 }, |
1796 | { ISD::ZERO_EXTEND, MVT::v8i8, MVT::v8i1, 6 }, |
1797 | { ISD::SIGN_EXTEND, MVT::v16i8, MVT::v16i1, 10 }, |
1798 | { ISD::ZERO_EXTEND, MVT::v16i8, MVT::v16i1, 12 }, |
1799 | |
1800 | |
1801 | |
1802 | { ISD::SIGN_EXTEND, MVT::v2i16, MVT::v2i1, 4 }, |
1803 | { ISD::ZERO_EXTEND, MVT::v2i16, MVT::v2i1, 5 }, |
1804 | { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i1, 4 }, |
1805 | { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i1, 5 }, |
1806 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i1, 4 }, |
1807 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i1, 5 }, |
1808 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 10 }, |
1809 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 12 }, |
1810 | |
1811 | { ISD::SIGN_EXTEND, MVT::v2i32, MVT::v2i1, 1 }, |
1812 | { ISD::ZERO_EXTEND, MVT::v2i32, MVT::v2i1, 2 }, |
1813 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i1, 1 }, |
1814 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i1, 2 }, |
1815 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 1 }, |
1816 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 2 }, |
1817 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v2i1, 1 }, |
1818 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v2i1, 2 }, |
1819 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 1 }, |
1820 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 2 }, |
1821 | |
1822 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i8, 1 }, |
1823 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i8, 1 }, |
1824 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i8, 1 }, |
1825 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i8, 1 }, |
1826 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, |
1827 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 1 }, |
1828 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i16, 1 }, |
1829 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i16, 1 }, |
1830 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, |
1831 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, |
1832 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, |
1833 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, |
1834 | |
1835 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
1836 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 }, |
1837 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
1838 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 }, |
1839 | |
1840 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 1 }, |
1841 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 1 }, |
1842 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
1843 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 1 }, |
1844 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
1845 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 1 }, |
1846 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 1 }, |
1847 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, |
1848 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, |
1849 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, |
1850 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 5 }, |
1851 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, |
1852 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 5 }, |
1853 | |
1854 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 }, |
1855 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v16f32, 2 }, |
1856 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v32f32, 5 }, |
1857 | |
1858 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 1 }, |
1859 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 1 }, |
1860 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 1 }, |
1861 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 1 }, |
1862 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 1 }, |
1863 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 1 }, |
1864 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f64, 1 }, |
1865 | }; |
1866 | |
1867 | static const TypeConversionCostTblEntry AVX2ConversionTbl[] = { |
1868 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, |
1869 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 3 }, |
1870 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, |
1871 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 3 }, |
1872 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1873 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 1 }, |
1874 | |
1875 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i8, 2 }, |
1876 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i8, 2 }, |
1877 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i8, 2 }, |
1878 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i8, 2 }, |
1879 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, |
1880 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, |
1881 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i16, 2 }, |
1882 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i16, 2 }, |
1883 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, |
1884 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, |
1885 | { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, |
1886 | { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 3 }, |
1887 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, |
1888 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 2 }, |
1889 | |
1890 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 2 }, |
1891 | |
1892 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 1 }, |
1893 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 1 }, |
1894 | { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 1 }, |
1895 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 4 }, |
1896 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i64, 4 }, |
1897 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i32, 1 }, |
1898 | { ISD::TRUNCATE, MVT::v8i16, MVT::v2i64, 1 }, |
1899 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i64, 5 }, |
1900 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, |
1901 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 2 }, |
1902 | |
1903 | { ISD::FP_EXTEND, MVT::v8f64, MVT::v8f32, 3 }, |
1904 | { ISD::FP_ROUND, MVT::v8f32, MVT::v8f64, 3 }, |
1905 | |
1906 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 1 }, |
1907 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 1 }, |
1908 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 1 }, |
1909 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 3 }, |
1910 | |
1911 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 3 }, |
1912 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 3 }, |
1913 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 1 }, |
1914 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 3 }, |
1915 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 4 }, |
1916 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4 }, |
1917 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 3 }, |
1918 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 4 }, |
1919 | |
1920 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 }, |
1921 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 }, |
1922 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 }, |
1923 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, |
1924 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 1 }, |
1925 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 1 }, |
1926 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 3 }, |
1927 | |
1928 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 2 }, |
1929 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 2 }, |
1930 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 2 }, |
1931 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 2 }, |
1932 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 2 }, |
1933 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 1 }, |
1934 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 2 }, |
1935 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, |
1936 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, |
1937 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, |
1938 | }; |
1939 | |
1940 | static const TypeConversionCostTblEntry AVXConversionTbl[] = { |
1941 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, |
1942 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, |
1943 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, |
1944 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, |
1945 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, |
1946 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, |
1947 | |
1948 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v16i8, 3 }, |
1949 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v16i8, 3 }, |
1950 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v16i8, 3 }, |
1951 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v16i8, 3 }, |
1952 | { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, |
1953 | { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, |
1954 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v8i16, 3 }, |
1955 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v8i16, 3 }, |
1956 | { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, |
1957 | { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, |
1958 | { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, |
1959 | { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 3 }, |
1960 | |
1961 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i64, 4 }, |
1962 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i32, 5 }, |
1963 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i16, 4 }, |
1964 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i64, 9 }, |
1965 | { ISD::TRUNCATE, MVT::v16i1, MVT::v16i64, 11 }, |
1966 | |
1967 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 2 }, |
1968 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i32, 5 }, |
1969 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, |
1970 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i64, 5 }, |
1971 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i64, 3 }, |
1972 | { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 2 }, |
1973 | |
1974 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i1, 3 }, |
1975 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i1, 3 }, |
1976 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i1, 8 }, |
1977 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 }, |
1978 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 }, |
1979 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, |
1980 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 }, |
1981 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, |
1982 | { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i32, 2 }, |
1983 | { ISD::SINT_TO_FP, MVT::v8f64, MVT::v8i32, 4 }, |
1984 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 5 }, |
1985 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i64, 8 }, |
1986 | |
1987 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i1, 7 }, |
1988 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i1, 7 }, |
1989 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i1, 6 }, |
1990 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v16i8, 4 }, |
1991 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v16i8, 2 }, |
1992 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i16, 4 }, |
1993 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v8i16, 2 }, |
1994 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 4 }, |
1995 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i32, 4 }, |
1996 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, |
1997 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i32, 6 }, |
1998 | { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i32, 8 }, |
1999 | { ISD::UINT_TO_FP, MVT::v8f64, MVT::v8i32, 10 }, |
2000 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i64, 10 }, |
2001 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 18 }, |
2002 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 5 }, |
2003 | { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 10 }, |
2004 | |
2005 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v8f32, 2 }, |
2006 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f64, 2 }, |
2007 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v8f32, 2 }, |
2008 | { ISD::FP_TO_SINT, MVT::v32i8, MVT::v4f64, 2 }, |
2009 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v8f32, 2 }, |
2010 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f64, 2 }, |
2011 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v8f32, 2 }, |
2012 | { ISD::FP_TO_SINT, MVT::v16i16, MVT::v4f64, 2 }, |
2013 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f64, 2 }, |
2014 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f32, 2 }, |
2015 | { ISD::FP_TO_SINT, MVT::v8i32, MVT::v8f64, 5 }, |
2016 | |
2017 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v8f32, 2 }, |
2018 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f64, 2 }, |
2019 | { ISD::FP_TO_UINT, MVT::v32i8, MVT::v8f32, 2 }, |
2020 | { ISD::FP_TO_UINT, MVT::v32i8, MVT::v4f64, 2 }, |
2021 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v8f32, 2 }, |
2022 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f64, 2 }, |
2023 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v8f32, 2 }, |
2024 | { ISD::FP_TO_UINT, MVT::v16i16, MVT::v4f64, 2 }, |
2025 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 3 }, |
2026 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 4 }, |
2027 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 6 }, |
2028 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v8f32, 7 }, |
2029 | { ISD::FP_TO_UINT, MVT::v8i32, MVT::v4f64, 7 }, |
2030 | |
2031 | { ISD::FP_EXTEND, MVT::v4f64, MVT::v4f32, 1 }, |
2032 | { ISD::FP_ROUND, MVT::v4f32, MVT::v4f64, 1 }, |
2033 | }; |
2034 | |
2035 | static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { |
2036 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8, 1 }, |
2037 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8, 1 }, |
2038 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v16i8, 1 }, |
2039 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v16i8, 1 }, |
2040 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v16i8, 1 }, |
2041 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v16i8, 1 }, |
2042 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v8i16, 1 }, |
2043 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v8i16, 1 }, |
2044 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v8i16, 1 }, |
2045 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v8i16, 1 }, |
2046 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v4i32, 1 }, |
2047 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v4i32, 1 }, |
2048 | |
2049 | |
2050 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 1 }, |
2051 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 1 }, |
2052 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 1 }, |
2053 | |
2054 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 2 }, |
2055 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i32, 2 }, |
2056 | { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 2 }, |
2057 | |
2058 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 1 }, |
2059 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 1 }, |
2060 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 1 }, |
2061 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 1 }, |
2062 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 }, |
2063 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
2064 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 }, |
2065 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
2066 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 1 }, |
2067 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 1 }, |
2068 | { ISD::SINT_TO_FP, MVT::v4f64, MVT::v4i32, 2 }, |
2069 | |
2070 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 1 }, |
2071 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 1 }, |
2072 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 4 }, |
2073 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 4 }, |
2074 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 1 }, |
2075 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 1 }, |
2076 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 1 }, |
2077 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 1 }, |
2078 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 3 }, |
2079 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 }, |
2080 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 2 }, |
2081 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 12 }, |
2082 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i64, 22 }, |
2083 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 4 }, |
2084 | |
2085 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 1 }, |
2086 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 1 }, |
2087 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 1 }, |
2088 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 1 }, |
2089 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 2 }, |
2090 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 2 }, |
2091 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 1 }, |
2092 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 1 }, |
2093 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 1 }, |
2094 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 1 }, |
2095 | |
2096 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 1 }, |
2097 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, |
2098 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 1 }, |
2099 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 4 }, |
2100 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 2 }, |
2101 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 2 }, |
2102 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 1 }, |
2103 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 1 }, |
2104 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 4 }, |
2105 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 4 }, |
2106 | }; |
2107 | |
2108 | static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { |
2109 | |
2110 | |
2111 | |
2112 | { ISD::SINT_TO_FP, MVT::f32, MVT::i32, 3 }, |
2113 | { ISD::SINT_TO_FP, MVT::f64, MVT::i32, 3 }, |
2114 | { ISD::SINT_TO_FP, MVT::f32, MVT::i64, 3 }, |
2115 | { ISD::SINT_TO_FP, MVT::f64, MVT::i64, 3 }, |
2116 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 3 }, |
2117 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, |
2118 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 3 }, |
2119 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, |
2120 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 3 }, |
2121 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v4i32, 4 }, |
2122 | { ISD::SINT_TO_FP, MVT::v4f32, MVT::v2i64, 8 }, |
2123 | { ISD::SINT_TO_FP, MVT::v2f64, MVT::v2i64, 8 }, |
2124 | |
2125 | { ISD::UINT_TO_FP, MVT::f32, MVT::i32, 3 }, |
2126 | { ISD::UINT_TO_FP, MVT::f64, MVT::i32, 3 }, |
2127 | { ISD::UINT_TO_FP, MVT::f32, MVT::i64, 8 }, |
2128 | { ISD::UINT_TO_FP, MVT::f64, MVT::i64, 9 }, |
2129 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v16i8, 4 }, |
2130 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v16i8, 4 }, |
2131 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v8i16, 4 }, |
2132 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v8i16, 4 }, |
2133 | { ISD::UINT_TO_FP, MVT::v2f32, MVT::v2i32, 7 }, |
2134 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v4i32, 7 }, |
2135 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i32, 5 }, |
2136 | { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 15 }, |
2137 | { ISD::UINT_TO_FP, MVT::v4f32, MVT::v2i64, 18 }, |
2138 | |
2139 | { ISD::FP_TO_SINT, MVT::i32, MVT::f32, 4 }, |
2140 | { ISD::FP_TO_SINT, MVT::i64, MVT::f32, 4 }, |
2141 | { ISD::FP_TO_SINT, MVT::i32, MVT::f64, 4 }, |
2142 | { ISD::FP_TO_SINT, MVT::i64, MVT::f64, 4 }, |
2143 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v4f32, 6 }, |
2144 | { ISD::FP_TO_SINT, MVT::v16i8, MVT::v2f64, 6 }, |
2145 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v4f32, 5 }, |
2146 | { ISD::FP_TO_SINT, MVT::v8i16, MVT::v2f64, 5 }, |
2147 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v4f32, 4 }, |
2148 | { ISD::FP_TO_SINT, MVT::v4i32, MVT::v2f64, 4 }, |
2149 | |
2150 | { ISD::FP_TO_UINT, MVT::i32, MVT::f32, 4 }, |
2151 | { ISD::FP_TO_UINT, MVT::i64, MVT::f32, 4 }, |
2152 | { ISD::FP_TO_UINT, MVT::i32, MVT::f64, 4 }, |
2153 | { ISD::FP_TO_UINT, MVT::i64, MVT::f64, 15 }, |
2154 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v4f32, 6 }, |
2155 | { ISD::FP_TO_UINT, MVT::v16i8, MVT::v2f64, 6 }, |
2156 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v4f32, 5 }, |
2157 | { ISD::FP_TO_UINT, MVT::v8i16, MVT::v2f64, 5 }, |
2158 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f32, 8 }, |
2159 | { ISD::FP_TO_UINT, MVT::v4i32, MVT::v2f64, 8 }, |
2160 | |
2161 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v16i8, 4 }, |
2162 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v16i8, 4 }, |
2163 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v16i8, 2 }, |
2164 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v16i8, 3 }, |
2165 | { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v16i8, 1 }, |
2166 | { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v16i8, 2 }, |
2167 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v8i16, 2 }, |
2168 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v8i16, 3 }, |
2169 | { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v8i16, 1 }, |
2170 | { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v8i16, 2 }, |
2171 | { ISD::ZERO_EXTEND, MVT::v2i64, MVT::v4i32, 1 }, |
2172 | { ISD::SIGN_EXTEND, MVT::v2i64, MVT::v4i32, 2 }, |
2173 | |
2174 | |
2175 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i32, 1 }, |
2176 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i16, 2 }, |
2177 | { ISD::TRUNCATE, MVT::v2i1, MVT::v2i8, 3 }, |
2178 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i16, 1 }, |
2179 | { ISD::TRUNCATE, MVT::v4i1, MVT::v4i8, 2 }, |
2180 | { ISD::TRUNCATE, MVT::v8i1, MVT::v8i8, 1 }, |
2181 | |
2182 | { ISD::TRUNCATE, MVT::v16i8, MVT::v8i16, 2 }, |
2183 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, |
2184 | { ISD::TRUNCATE, MVT::v16i8, MVT::v4i32, 3 }, |
2185 | { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 7 }, |
2186 | { ISD::TRUNCATE, MVT::v2i16, MVT::v2i32, 1 }, |
2187 | { ISD::TRUNCATE, MVT::v8i16, MVT::v4i32, 3 }, |
2188 | { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 5 }, |
2189 | { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32,10 }, |
2190 | { ISD::TRUNCATE, MVT::v16i8, MVT::v2i64, 4 }, |
2191 | { ISD::TRUNCATE, MVT::v8i16, MVT::v2i64, 2 }, |
2192 | { ISD::TRUNCATE, MVT::v4i32, MVT::v2i64, 1 }, |
2193 | }; |
2194 | |
2195 | |
2196 | EVT SrcTy = TLI->getValueType(DL, Src); |
2197 | EVT DstTy = TLI->getValueType(DL, Dst); |
2198 | |
2199 | |
2200 | if (SrcTy.isSimple() && DstTy.isSimple()) { |
2201 | MVT SimpleSrcTy = SrcTy.getSimpleVT(); |
2202 | MVT SimpleDstTy = DstTy.getSimpleVT(); |
2203 | |
2204 | if (ST->useAVX512Regs()) { |
2205 | if (ST->hasBWI()) |
2206 | if (const auto *Entry = ConvertCostTableLookup( |
2207 | AVX512BWConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2208 | return AdjustCost(Entry->Cost); |
2209 | |
2210 | if (ST->hasDQI()) |
2211 | if (const auto *Entry = ConvertCostTableLookup( |
2212 | AVX512DQConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2213 | return AdjustCost(Entry->Cost); |
2214 | |
2215 | if (ST->hasAVX512()) |
2216 | if (const auto *Entry = ConvertCostTableLookup( |
2217 | AVX512FConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2218 | return AdjustCost(Entry->Cost); |
2219 | } |
2220 | |
2221 | if (ST->hasBWI()) |
2222 | if (const auto *Entry = ConvertCostTableLookup( |
2223 | AVX512BWVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2224 | return AdjustCost(Entry->Cost); |
2225 | |
2226 | if (ST->hasDQI()) |
2227 | if (const auto *Entry = ConvertCostTableLookup( |
2228 | AVX512DQVLConversionTbl, ISD, SimpleDstTy, SimpleSrcTy)) |
2229 | return AdjustCost(Entry->Cost); |
2230 | |
2231 | if (ST->hasAVX512()) |
2232 | if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, |
2233 | SimpleDstTy, SimpleSrcTy)) |
2234 | return AdjustCost(Entry->Cost); |
2235 | |
2236 | if (ST->hasAVX2()) { |
2237 | if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, |
2238 | SimpleDstTy, SimpleSrcTy)) |
2239 | return AdjustCost(Entry->Cost); |
2240 | } |
2241 | |
2242 | if (ST->hasAVX()) { |
2243 | if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, |
2244 | SimpleDstTy, SimpleSrcTy)) |
2245 | return AdjustCost(Entry->Cost); |
2246 | } |
2247 | |
2248 | if (ST->hasSSE41()) { |
2249 | if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, |
2250 | SimpleDstTy, SimpleSrcTy)) |
2251 | return AdjustCost(Entry->Cost); |
2252 | } |
2253 | |
2254 | if (ST->hasSSE2()) { |
2255 | if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, |
2256 | SimpleDstTy, SimpleSrcTy)) |
2257 | return AdjustCost(Entry->Cost); |
2258 | } |
2259 | } |
2260 | |
2261 | |
2262 | std::pair<InstructionCost, MVT> LTSrc = TLI->getTypeLegalizationCost(DL, Src); |
2263 | std::pair<InstructionCost, MVT> LTDest = |
2264 | TLI->getTypeLegalizationCost(DL, Dst); |
2265 | |
2266 | if (ST->useAVX512Regs()) { |
2267 | if (ST->hasBWI()) |
2268 | if (const auto *Entry = ConvertCostTableLookup( |
2269 | AVX512BWConversionTbl, ISD, LTDest.second, LTSrc.second)) |
2270 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2271 | |
2272 | if (ST->hasDQI()) |
2273 | if (const auto *Entry = ConvertCostTableLookup( |
2274 | AVX512DQConversionTbl, ISD, LTDest.second, LTSrc.second)) |
2275 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2276 | |
2277 | if (ST->hasAVX512()) |
2278 | if (const auto *Entry = ConvertCostTableLookup( |
2279 | AVX512FConversionTbl, ISD, LTDest.second, LTSrc.second)) |
2280 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2281 | } |
2282 | |
2283 | if (ST->hasBWI()) |
2284 | if (const auto *Entry = ConvertCostTableLookup(AVX512BWVLConversionTbl, ISD, |
2285 | LTDest.second, LTSrc.second)) |
2286 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2287 | |
2288 | if (ST->hasDQI()) |
2289 | if (const auto *Entry = ConvertCostTableLookup(AVX512DQVLConversionTbl, ISD, |
2290 | LTDest.second, LTSrc.second)) |
2291 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2292 | |
2293 | if (ST->hasAVX512()) |
2294 | if (const auto *Entry = ConvertCostTableLookup(AVX512VLConversionTbl, ISD, |
2295 | LTDest.second, LTSrc.second)) |
2296 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2297 | |
2298 | if (ST->hasAVX2()) |
2299 | if (const auto *Entry = ConvertCostTableLookup(AVX2ConversionTbl, ISD, |
2300 | LTDest.second, LTSrc.second)) |
2301 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2302 | |
2303 | if (ST->hasAVX()) |
2304 | if (const auto *Entry = ConvertCostTableLookup(AVXConversionTbl, ISD, |
2305 | LTDest.second, LTSrc.second)) |
2306 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2307 | |
2308 | if (ST->hasSSE41()) |
2309 | if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, |
2310 | LTDest.second, LTSrc.second)) |
2311 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2312 | |
2313 | if (ST->hasSSE2()) |
2314 | if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, |
2315 | LTDest.second, LTSrc.second)) |
2316 | return AdjustCost(std::max(LTSrc.first, LTDest.first) * Entry->Cost); |
2317 | |
2318 | |
2319 | |
2320 | if ((ISD == ISD::SINT_TO_FP || ISD == ISD::UINT_TO_FP) && |
2321 | 1 < Src->getScalarSizeInBits() && Src->getScalarSizeInBits() < 32) { |
2322 | Type *ExtSrc = Src->getWithNewBitWidth(32); |
2323 | unsigned ExtOpc = |
2324 | (ISD == ISD::SINT_TO_FP) ? Instruction::SExt : Instruction::ZExt; |
2325 | |
2326 | |
2327 | InstructionCost ExtCost = 0; |
2328 | if (!(Src->isIntegerTy() && I && isa<LoadInst>(I->getOperand(0)))) |
2329 | ExtCost = getCastInstrCost(ExtOpc, ExtSrc, Src, CCH, CostKind); |
2330 | |
2331 | return ExtCost + getCastInstrCost(Instruction::SIToFP, Dst, ExtSrc, |
2332 | TTI::CastContextHint::None, CostKind); |
2333 | } |
2334 | |
2335 | |
2336 | |
2337 | if ((ISD == ISD::FP_TO_SINT || ISD == ISD::FP_TO_UINT) && |
2338 | 1 < Dst->getScalarSizeInBits() && Dst->getScalarSizeInBits() < 32) { |
2339 | Type *TruncDst = Dst->getWithNewBitWidth(32); |
2340 | return getCastInstrCost(Instruction::FPToSI, TruncDst, Src, CCH, CostKind) + |
2341 | getCastInstrCost(Instruction::Trunc, Dst, TruncDst, |
2342 | TTI::CastContextHint::None, CostKind); |
2343 | } |
2344 | |
2345 | return AdjustCost( |
2346 | BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I)); |
2347 | } |
2348 | |
2349 | InstructionCost X86TTIImpl::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, |
2350 | Type *CondTy, |
2351 | CmpInst::Predicate VecPred, |
2352 | TTI::TargetCostKind CostKind, |
2353 | const Instruction *I) { |
2354 | |
2355 | if (CostKind != TTI::TCK_RecipThroughput) |
| |
2356 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, |
2357 | I); |
2358 | |
2359 | |
2360 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
2361 | |
2362 | MVT MTy = LT.second; |
2363 | |
2364 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
2365 | assert(ISD && "Invalid opcode"); |
2366 | |
2367 | unsigned ExtraCost = 0; |
2368 | if (I && (Opcode == Instruction::ICmp || Opcode == Instruction::FCmp)) { |
2369 | |
2370 | if (MTy.isVector() && |
2371 | !((ST->hasXOP() && (!ST->hasAVX2() || MTy.is128BitVector())) || |
2372 | (ST->hasAVX512() && 32 <= MTy.getScalarSizeInBits()) || |
2373 | ST->hasBWI())) { |
2374 | switch (cast<CmpInst>(I)->getPredicate()) { |
2375 | case CmpInst::Predicate::ICMP_NE: |
2376 | |
2377 | ExtraCost = 1; |
2378 | break; |
2379 | case CmpInst::Predicate::ICMP_SGE: |
2380 | case CmpInst::Predicate::ICMP_SLE: |
2381 | |
2382 | ExtraCost = 1; |
2383 | break; |
2384 | case CmpInst::Predicate::ICMP_ULT: |
2385 | case CmpInst::Predicate::ICMP_UGT: |
2386 | |
2387 | |
2388 | ExtraCost = 2; |
2389 | break; |
2390 | case CmpInst::Predicate::ICMP_ULE: |
2391 | case CmpInst::Predicate::ICMP_UGE: |
2392 | if ((ST->hasSSE41() && MTy.getScalarSizeInBits() == 32) || |
2393 | (ST->hasSSE2() && MTy.getScalarSizeInBits() < 32)) { |
2394 | |
2395 | |
2396 | ExtraCost = 1; |
2397 | } else { |
2398 | |
2399 | ExtraCost = 3; |
2400 | } |
2401 | break; |
2402 | default: |
2403 | break; |
2404 | } |
2405 | } |
2406 | } |
2407 | |
2408 | static const CostTblEntry SLMCostTbl[] = { |
2409 | |
2410 | { ISD::SETCC, MVT::v2i64, 2 }, |
2411 | }; |
2412 | |
2413 | static const CostTblEntry AVX512BWCostTbl[] = { |
2414 | { ISD::SETCC, MVT::v32i16, 1 }, |
2415 | { ISD::SETCC, MVT::v64i8, 1 }, |
2416 | |
2417 | { ISD::SELECT, MVT::v32i16, 1 }, |
2418 | { ISD::SELECT, MVT::v64i8, 1 }, |
2419 | }; |
2420 | |
2421 | static const CostTblEntry AVX512CostTbl[] = { |
2422 | { ISD::SETCC, MVT::v8i64, 1 }, |
2423 | { ISD::SETCC, MVT::v16i32, 1 }, |
2424 | { ISD::SETCC, MVT::v8f64, 1 }, |
2425 | { ISD::SETCC, MVT::v16f32, 1 }, |
2426 | |
2427 | { ISD::SELECT, MVT::v8i64, 1 }, |
2428 | { ISD::SELECT, MVT::v16i32, 1 }, |
2429 | { ISD::SELECT, MVT::v8f64, 1 }, |
2430 | { ISD::SELECT, MVT::v16f32, 1 }, |
2431 | |
2432 | { ISD::SETCC, MVT::v32i16, 2 }, |
2433 | { ISD::SETCC, MVT::v64i8, 2 }, |
2434 | |
2435 | { ISD::SELECT, MVT::v32i16, 2 }, |
2436 | { ISD::SELECT, MVT::v64i8, 2 }, |
2437 | }; |
2438 | |
2439 | static const CostTblEntry AVX2CostTbl[] = { |
2440 | { ISD::SETCC, MVT::v4i64, 1 }, |
2441 | { ISD::SETCC, MVT::v8i32, 1 }, |
2442 | { ISD::SETCC, MVT::v16i16, 1 }, |
2443 | { ISD::SETCC, MVT::v32i8, 1 }, |
2444 | |
2445 | { ISD::SELECT, MVT::v4i64, 1 }, |
2446 | { ISD::SELECT, MVT::v8i32, 1 }, |
2447 | { ISD::SELECT, MVT::v16i16, 1 }, |
2448 | { ISD::SELECT, MVT::v32i8, 1 }, |
2449 | }; |
2450 | |
2451 | static const CostTblEntry AVX1CostTbl[] = { |
2452 | { ISD::SETCC, MVT::v4f64, 1 }, |
2453 | { ISD::SETCC, MVT::v8f32, 1 }, |
2454 | |
2455 | { ISD::SETCC, MVT::v4i64, 4 }, |
2456 | { ISD::SETCC, MVT::v8i32, 4 }, |
2457 | { ISD::SETCC, MVT::v16i16, 4 }, |
2458 | { ISD::SETCC, MVT::v32i8, 4 }, |
2459 | |
2460 | { ISD::SELECT, MVT::v4f64, 1 }, |
2461 | { ISD::SELECT, MVT::v8f32, 1 }, |
2462 | { ISD::SELECT, MVT::v4i64, 1 }, |
2463 | { ISD::SELECT, MVT::v8i32, 1 }, |
2464 | { ISD::SELECT, MVT::v16i16, 3 }, |
2465 | { ISD::SELECT, MVT::v32i8, 3 }, |
2466 | }; |
2467 | |
2468 | static const CostTblEntry SSE42CostTbl[] = { |
2469 | { ISD::SETCC, MVT::v2f64, 1 }, |
2470 | { ISD::SETCC, MVT::v4f32, 1 }, |
2471 | { ISD::SETCC, MVT::v2i64, 1 }, |
2472 | }; |
2473 | |
2474 | static const CostTblEntry SSE41CostTbl[] = { |
2475 | { ISD::SELECT, MVT::v2f64, 1 }, |
2476 | { ISD::SELECT, MVT::v4f32, 1 }, |
2477 | { ISD::SELECT, MVT::v2i64, 1 }, |
2478 | { ISD::SELECT, MVT::v4i32, 1 }, |
2479 | { ISD::SELECT, MVT::v8i16, 1 }, |
2480 | { ISD::SELECT, MVT::v16i8, 1 }, |
2481 | }; |
2482 | |
2483 | static const CostTblEntry SSE2CostTbl[] = { |
2484 | { ISD::SETCC, MVT::v2f64, 2 }, |
2485 | { ISD::SETCC, MVT::f64, 1 }, |
2486 | { ISD::SETCC, MVT::v2i64, 8 }, |
2487 | { ISD::SETCC, MVT::v4i32, 1 }, |
2488 | { ISD::SETCC, MVT::v8i16, 1 }, |
2489 | { ISD::SETCC, MVT::v16i8, 1 }, |
2490 | |
2491 | { ISD::SELECT, MVT::v2f64, 3 }, |
2492 | { ISD::SELECT, MVT::v2i64, 3 }, |
2493 | { ISD::SELECT, MVT::v4i32, 3 }, |
2494 | { ISD::SELECT, MVT::v8i16, 3 }, |
2495 | { ISD::SELECT, MVT::v16i8, 3 }, |
2496 | }; |
2497 | |
2498 | static const CostTblEntry SSE1CostTbl[] = { |
2499 | { ISD::SETCC, MVT::v4f32, 2 }, |
2500 | { ISD::SETCC, MVT::f32, 1 }, |
2501 | |
2502 | { ISD::SELECT, MVT::v4f32, 3 }, |
2503 | }; |
2504 | |
2505 | if (ST->isSLM()) |
| 14 | | Calling 'X86Subtarget::isSLM' | |
|
| 17 | | Returning from 'X86Subtarget::isSLM' | |
|
| |
2506 | if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) |
2507 | return LT.first * (ExtraCost + Entry->Cost); |
2508 | |
2509 | if (ST->hasBWI()) |
| 19 | | Assuming the condition is false | |
|
| |
2510 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) |
2511 | return LT.first * (ExtraCost + Entry->Cost); |
2512 | |
2513 | if (ST->hasAVX512()) |
| 21 | | Calling 'X86Subtarget::hasAVX512' | |
|
| 23 | | Returning from 'X86Subtarget::hasAVX512' | |
|
| |
2514 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
2515 | return LT.first * (ExtraCost + Entry->Cost); |
2516 | |
2517 | if (ST->hasAVX2()) |
| 25 | | Calling 'X86Subtarget::hasAVX2' | |
|
| 28 | | Returning from 'X86Subtarget::hasAVX2' | |
|
| |
2518 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) |
2519 | return LT.first * (ExtraCost + Entry->Cost); |
2520 | |
2521 | if (ST->hasAVX()) |
| 30 | | Calling 'X86Subtarget::hasAVX' | |
|
| 33 | | Returning from 'X86Subtarget::hasAVX' | |
|
| |
2522 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) |
2523 | return LT.first * (ExtraCost + Entry->Cost); |
2524 | |
2525 | if (ST->hasSSE42()) |
| 35 | | Calling 'X86Subtarget::hasSSE42' | |
|
| 38 | | Returning from 'X86Subtarget::hasSSE42' | |
|
| |
2526 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) |
2527 | return LT.first * (ExtraCost + Entry->Cost); |
2528 | |
2529 | if (ST->hasSSE41()) |
| 40 | | Calling 'X86Subtarget::hasSSE41' | |
|
| 43 | | Returning from 'X86Subtarget::hasSSE41' | |
|
| |
2530 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) |
2531 | return LT.first * (ExtraCost + Entry->Cost); |
2532 | |
2533 | if (ST->hasSSE2()) |
| 45 | | Calling 'X86Subtarget::hasSSE2' | |
|
| 48 | | Returning from 'X86Subtarget::hasSSE2' | |
|
| |
2534 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) |
2535 | return LT.first * (ExtraCost + Entry->Cost); |
2536 | |
2537 | if (ST->hasSSE1()) |
| 50 | | Calling 'X86Subtarget::hasSSE1' | |
|
| 53 | | Returning from 'X86Subtarget::hasSSE1' | |
|
| |
2538 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) |
2539 | return LT.first * (ExtraCost + Entry->Cost); |
2540 | |
2541 | return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I); |
| 55 | | Passing null pointer value via 3rd parameter 'CondTy' | |
|
| 56 | | Calling 'BasicTTIImplBase::getCmpSelInstrCost' | |
|
2542 | } |
2543 | |
2544 | unsigned X86TTIImpl::getAtomicMemIntrinsicMaxElementSize() const { return 16; } |
2545 | |
2546 | InstructionCost |
2547 | X86TTIImpl::getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
2548 | TTI::TargetCostKind CostKind) { |
2549 | |
2550 | |
2551 | |
2552 | |
2553 | |
2554 | |
2555 | |
2556 | |
2557 | |
2558 | |
2559 | static const CostTblEntry AVX512CDCostTbl[] = { |
2560 | { ISD::CTLZ, MVT::v8i64, 1 }, |
2561 | { ISD::CTLZ, MVT::v16i32, 1 }, |
2562 | { ISD::CTLZ, MVT::v32i16, 8 }, |
2563 | { ISD::CTLZ, MVT::v64i8, 20 }, |
2564 | { ISD::CTLZ, MVT::v4i64, 1 }, |
2565 | { ISD::CTLZ, MVT::v8i32, 1 }, |
2566 | { ISD::CTLZ, MVT::v16i16, 4 }, |
2567 | { ISD::CTLZ, MVT::v32i8, 10 }, |
2568 | { ISD::CTLZ, MVT::v2i64, 1 }, |
2569 | { ISD::CTLZ, MVT::v4i32, 1 }, |
2570 | { ISD::CTLZ, MVT::v8i16, 4 }, |
2571 | { ISD::CTLZ, MVT::v16i8, 4 }, |
2572 | }; |
2573 | static const CostTblEntry AVX512BWCostTbl[] = { |
2574 | { ISD::ABS, MVT::v32i16, 1 }, |
2575 | { ISD::ABS, MVT::v64i8, 1 }, |
2576 | { ISD::BITREVERSE, MVT::v8i64, 5 }, |
2577 | { ISD::BITREVERSE, MVT::v16i32, 5 }, |
2578 | { ISD::BITREVERSE, MVT::v32i16, 5 }, |
2579 | { ISD::BITREVERSE, MVT::v64i8, 5 }, |
2580 | { ISD::BSWAP, MVT::v8i64, 1 }, |
2581 | { ISD::BSWAP, MVT::v16i32, 1 }, |
2582 | { ISD::BSWAP, MVT::v32i16, 1 }, |
2583 | { ISD::CTLZ, MVT::v8i64, 23 }, |
2584 | { ISD::CTLZ, MVT::v16i32, 22 }, |
2585 | { ISD::CTLZ, MVT::v32i16, 18 }, |
2586 | { ISD::CTLZ, MVT::v64i8, 17 }, |
2587 | { ISD::CTPOP, MVT::v8i64, 7 }, |
2588 | { ISD::CTPOP, MVT::v16i32, 11 }, |
2589 | { ISD::CTPOP, MVT::v32i16, 9 }, |
2590 | { ISD::CTPOP, MVT::v64i8, 6 }, |
2591 | { ISD::CTTZ, MVT::v8i64, 10 }, |
2592 | { ISD::CTTZ, MVT::v16i32, 14 }, |
2593 | { ISD::CTTZ, MVT::v32i16, 12 }, |
2594 | { ISD::CTTZ, MVT::v64i8, 9 }, |
2595 | { ISD::SADDSAT, MVT::v32i16, 1 }, |
2596 | { ISD::SADDSAT, MVT::v64i8, 1 }, |
2597 | { ISD::SMAX, MVT::v32i16, 1 }, |
2598 | { ISD::SMAX, MVT::v64i8, 1 }, |
2599 | { ISD::SMIN, MVT::v32i16, 1 }, |
2600 | { ISD::SMIN, MVT::v64i8, 1 }, |
2601 | { ISD::SSUBSAT, MVT::v32i16, 1 }, |
2602 | { ISD::SSUBSAT, MVT::v64i8, 1 }, |
2603 | { ISD::UADDSAT, MVT::v32i16, 1 }, |
2604 | { ISD::UADDSAT, MVT::v64i8, 1 }, |
2605 | { ISD::UMAX, MVT::v32i16, 1 }, |
2606 | { ISD::UMAX, MVT::v64i8, 1 }, |
2607 | { ISD::UMIN, MVT::v32i16, 1 }, |
2608 | { ISD::UMIN, MVT::v64i8, 1 }, |
2609 | { ISD::USUBSAT, MVT::v32i16, 1 }, |
2610 | { ISD::USUBSAT, MVT::v64i8, 1 }, |
2611 | }; |
2612 | static const CostTblEntry AVX512CostTbl[] = { |
2613 | { ISD::ABS, MVT::v8i64, 1 }, |
2614 | { ISD::ABS, MVT::v16i32, 1 }, |
2615 | { ISD::ABS, MVT::v32i16, 2 }, |
2616 | { ISD::ABS, MVT::v64i8, 2 }, |
2617 | { ISD::ABS, MVT::v4i64, 1 }, |
2618 | { ISD::ABS, MVT::v2i64, 1 }, |
2619 | { ISD::BITREVERSE, MVT::v8i64, 36 }, |
2620 | { ISD::BITREVERSE, MVT::v16i32, 24 }, |
2621 | { ISD::BITREVERSE, MVT::v32i16, 10 }, |
2622 | { ISD::BITREVERSE, MVT::v64i8, 10 }, |
2623 | { ISD::BSWAP, MVT::v8i64, 4 }, |
2624 | { ISD::BSWAP, MVT::v16i32, 4 }, |
2625 | { ISD::BSWAP, MVT::v32i16, 4 }, |
2626 | { ISD::CTLZ, MVT::v8i64, 29 }, |
2627 | { ISD::CTLZ, MVT::v16i32, 35 }, |
2628 | { ISD::CTLZ, MVT::v32i16, 28 }, |
2629 | { ISD::CTLZ, MVT::v64i8, 18 }, |
2630 | { ISD::CTPOP, MVT::v8i64, 16 }, |
2631 | { ISD::CTPOP, MVT::v16i32, 24 }, |
2632 | { ISD::CTPOP, MVT::v32i16, 18 }, |
2633 | { ISD::CTPOP, MVT::v64i8, 12 }, |
2634 | { ISD::CTTZ, MVT::v8i64, 20 }, |
2635 | { ISD::CTTZ, MVT::v16i32, 28 }, |
2636 | { ISD::CTTZ, MVT::v32i16, 24 }, |
2637 | { ISD::CTTZ, MVT::v64i8, 18 }, |
2638 | { ISD::SMAX, MVT::v8i64, 1 }, |
2639 | { ISD::SMAX, MVT::v16i32, 1 }, |
2640 | { ISD::SMAX, MVT::v32i16, 2 }, |
2641 | { ISD::SMAX, MVT::v64i8, 2 }, |
2642 | { ISD::SMAX, MVT::v4i64, 1 }, |
2643 | { ISD::SMAX, MVT::v2i64, 1 }, |
2644 | { ISD::SMIN, MVT::v8i64, 1 }, |
2645 | { ISD::SMIN, MVT::v16i32, 1 }, |
2646 | { ISD::SMIN, MVT::v32i16, 2 }, |
2647 | { ISD::SMIN, MVT::v64i8, 2 }, |
2648 | { ISD::SMIN, MVT::v4i64, 1 }, |
2649 | { ISD::SMIN, MVT::v2i64, 1 }, |
2650 | { ISD::UMAX, MVT::v8i64, 1 }, |
2651 | { ISD::UMAX, MVT::v16i32, 1 }, |
2652 | { ISD::UMAX, MVT::v32i16, 2 }, |
2653 | { ISD::UMAX, MVT::v64i8, 2 }, |
2654 | { ISD::UMAX, MVT::v4i64, 1 }, |
2655 | { ISD::UMAX, MVT::v2i64, 1 }, |
2656 | { ISD::UMIN, MVT::v8i64, 1 }, |
2657 | { ISD::UMIN, MVT::v16i32, 1 }, |
2658 | { ISD::UMIN, MVT::v32i16, 2 }, |
2659 | { ISD::UMIN, MVT::v64i8, 2 }, |
2660 | { ISD::UMIN, MVT::v4i64, 1 }, |
2661 | { ISD::UMIN, MVT::v2i64, 1 }, |
2662 | { ISD::USUBSAT, MVT::v16i32, 2 }, |
2663 | { ISD::USUBSAT, MVT::v2i64, 2 }, |
2664 | { ISD::USUBSAT, MVT::v4i64, 2 }, |
2665 | { ISD::USUBSAT, MVT::v8i64, 2 }, |
2666 | { ISD::UADDSAT, MVT::v16i32, 3 }, |
2667 | { ISD::UADDSAT, MVT::v2i64, 3 }, |
2668 | { ISD::UADDSAT, MVT::v4i64, 3 }, |
2669 | { ISD::UADDSAT, MVT::v8i64, 3 }, |
2670 | { ISD::SADDSAT, MVT::v32i16, 2 }, |
2671 | { ISD::SADDSAT, MVT::v64i8, 2 }, |
2672 | { ISD::SSUBSAT, MVT::v32i16, 2 }, |
2673 | { ISD::SSUBSAT, MVT::v64i8, 2 }, |
2674 | { ISD::UADDSAT, MVT::v32i16, 2 }, |
2675 | { ISD::UADDSAT, MVT::v64i8, 2 }, |
2676 | { ISD::USUBSAT, MVT::v32i16, 2 }, |
2677 | { ISD::USUBSAT, MVT::v64i8, 2 }, |
2678 | { ISD::FMAXNUM, MVT::f32, 2 }, |
2679 | { ISD::FMAXNUM, MVT::v4f32, 2 }, |
2680 | { ISD::FMAXNUM, MVT::v8f32, 2 }, |
2681 | { ISD::FMAXNUM, MVT::v16f32, 2 }, |
2682 | { ISD::FMAXNUM, MVT::f64, 2 }, |
2683 | { ISD::FMAXNUM, MVT::v2f64, 2 }, |
2684 | { ISD::FMAXNUM, MVT::v4f64, 2 }, |
2685 | { ISD::FMAXNUM, MVT::v8f64, 2 }, |
2686 | }; |
2687 | static const CostTblEntry XOPCostTbl[] = { |
2688 | { ISD::BITREVERSE, MVT::v4i64, 4 }, |
2689 | { ISD::BITREVERSE, MVT::v8i32, 4 }, |
2690 | { ISD::BITREVERSE, MVT::v16i16, 4 }, |
2691 | { ISD::BITREVERSE, MVT::v32i8, 4 }, |
2692 | { ISD::BITREVERSE, MVT::v2i64, 1 }, |
2693 | { ISD::BITREVERSE, MVT::v4i32, 1 }, |
2694 | { ISD::BITREVERSE, MVT::v8i16, 1 }, |
2695 | { ISD::BITREVERSE, MVT::v16i8, 1 }, |
2696 | { ISD::BITREVERSE, MVT::i64, 3 }, |
2697 | { ISD::BITREVERSE, MVT::i32, 3 }, |
2698 | { ISD::BITREVERSE, MVT::i16, 3 }, |
2699 | { ISD::BITREVERSE, MVT::i8, 3 } |
2700 | }; |
2701 | static const CostTblEntry AVX2CostTbl[] = { |
2702 | { ISD::ABS, MVT::v4i64, 2 }, |
2703 | { ISD::ABS, MVT::v8i32, 1 }, |
2704 | { ISD::ABS, MVT::v16i16, 1 }, |
2705 | { ISD::ABS, MVT::v32i8, 1 }, |
2706 | { ISD::BITREVERSE, MVT::v4i64, 5 }, |
2707 | { ISD::BITREVERSE, MVT::v8i32, 5 }, |
2708 | { ISD::BITREVERSE, MVT::v16i16, 5 }, |
2709 | { ISD::BITREVERSE, MVT::v32i8, 5 }, |
2710 | { ISD::BSWAP, MVT::v4i64, 1 }, |
2711 | { ISD::BSWAP, MVT::v8i32, 1 }, |
2712 | { ISD::BSWAP, MVT::v16i16, 1 }, |
2713 | { ISD::CTLZ, MVT::v4i64, 23 }, |
2714 | { ISD::CTLZ, MVT::v8i32, 18 }, |
2715 | { ISD::CTLZ, MVT::v16i16, 14 }, |
2716 | { ISD::CTLZ, MVT::v32i8, 9 }, |
2717 | { ISD::CTPOP, MVT::v4i64, 7 }, |
2718 | { ISD::CTPOP, MVT::v8i32, 11 }, |
2719 | { ISD::CTPOP, MVT::v16i16, 9 }, |
2720 | { ISD::CTPOP, MVT::v32i8, 6 }, |
2721 | { ISD::CTTZ, MVT::v4i64, 10 }, |
2722 | { ISD::CTTZ, MVT::v8i32, 14 }, |
2723 | { ISD::CTTZ, MVT::v16i16, 12 }, |
2724 | { ISD::CTTZ, MVT::v32i8, 9 }, |
2725 | { ISD::SADDSAT, MVT::v16i16, 1 }, |
2726 | { ISD::SADDSAT, MVT::v32i8, 1 }, |
2727 | { ISD::SMAX, MVT::v8i32, 1 }, |
2728 | { ISD::SMAX, MVT::v16i16, 1 }, |
2729 | { ISD::SMAX, MVT::v32i8, 1 }, |
2730 | { ISD::SMIN, MVT::v8i32, 1 }, |
2731 | { ISD::SMIN, MVT::v16i16, 1 }, |
2732 | { ISD::SMIN, MVT::v32i8, 1 }, |
2733 | { ISD::SSUBSAT, MVT::v16i16, 1 }, |
2734 | { ISD::SSUBSAT, MVT::v32i8, 1 }, |
2735 | { ISD::UADDSAT, MVT::v16i16, 1 }, |
2736 | { ISD::UADDSAT, MVT::v32i8, 1 }, |
2737 | { ISD::UADDSAT, MVT::v8i32, 3 }, |
2738 | { ISD::UMAX, MVT::v8i32, 1 }, |
2739 | { ISD::UMAX, MVT::v16i16, 1 }, |
2740 | { ISD::UMAX, MVT::v32i8, 1 }, |
2741 | { ISD::UMIN, MVT::v8i32, 1 }, |
2742 | { ISD::UMIN, MVT::v16i16, 1 }, |
2743 | { ISD::UMIN, MVT::v32i8, 1 }, |
2744 | { ISD::USUBSAT, MVT::v16i16, 1 }, |
2745 | { ISD::USUBSAT, MVT::v32i8, 1 }, |
2746 | { ISD::USUBSAT, MVT::v8i32, 2 }, |
2747 | { ISD::FMAXNUM, MVT::v8f32, 3 }, |
2748 | { ISD::FMAXNUM, MVT::v4f64, 3 }, |
2749 | { ISD::FSQRT, MVT::f32, 7 }, |
2750 | { ISD::FSQRT, MVT::v4f32, 7 }, |
2751 | { ISD::FSQRT, MVT::v8f32, 14 }, |
2752 | { ISD::FSQRT, MVT::f64, 14 }, |
2753 | { ISD::FSQRT, MVT::v2f64, 14 }, |
2754 | { ISD::FSQRT, MVT::v4f64, 28 }, |
2755 | }; |
2756 | static const CostTblEntry AVX1CostTbl[] = { |
2757 | { ISD::ABS, MVT::v4i64, 5 }, |
2758 | { ISD::ABS, MVT::v8i32, 3 }, |
2759 | { ISD::ABS, MVT::v16i16, 3 }, |
2760 | { ISD::ABS, MVT::v32i8, 3 }, |
2761 | { ISD::BITREVERSE, MVT::v4i64, 12 }, |
2762 | { ISD::BITREVERSE, MVT::v8i32, 12 }, |
2763 | { ISD::BITREVERSE, MVT::v16i16, 12 }, |
2764 | { ISD::BITREVERSE, MVT::v32i8, 12 }, |
2765 | { ISD::BSWAP, MVT::v4i64, 4 }, |
2766 | { ISD::BSWAP, MVT::v8i32, 4 }, |
2767 | { ISD::BSWAP, MVT::v16i16, 4 }, |
2768 | { ISD::CTLZ, MVT::v4i64, 48 }, |
2769 | { ISD::CTLZ, MVT::v8i32, 38 }, |
2770 | { ISD::CTLZ, MVT::v16i16, 30 }, |
2771 | { ISD::CTLZ, MVT::v32i8, 20 }, |
2772 | { ISD::CTPOP, MVT::v4i64, 16 }, |
2773 | { ISD::CTPOP, MVT::v8i32, 24 }, |
2774 | { ISD::CTPOP, MVT::v16i16, 20 }, |
2775 | { ISD::CTPOP, MVT::v32i8, 14 }, |
2776 | { ISD::CTTZ, MVT::v4i64, 22 }, |
2777 | { ISD::CTTZ, MVT::v8i32, 30 }, |
2778 | { ISD::CTTZ, MVT::v16i16, 26 }, |
2779 | { ISD::CTTZ, MVT::v32i8, 20 }, |
2780 | { ISD::SADDSAT, MVT::v16i16, 4 }, |
2781 | { ISD::SADDSAT, MVT::v32i8, 4 }, |
2782 | { ISD::SMAX, MVT::v8i32, 4 }, |
2783 | { ISD::SMAX, MVT::v16i16, 4 }, |
2784 | { ISD::SMAX, MVT::v32i8, 4 }, |
2785 | { ISD::SMIN, MVT::v8i32, 4 }, |
2786 | { ISD::SMIN, MVT::v16i16, 4 }, |
2787 | { ISD::SMIN, MVT::v32i8, 4 }, |
2788 | { ISD::SSUBSAT, MVT::v16i16, 4 }, |
2789 | { ISD::SSUBSAT, MVT::v32i8, 4 }, |
2790 | { ISD::UADDSAT, MVT::v16i16, 4 }, |
2791 | { ISD::UADDSAT, MVT::v32i8, 4 }, |
2792 | { ISD::UADDSAT, MVT::v8i32, 8 }, |
2793 | { ISD::UMAX, MVT::v8i32, 4 }, |
2794 | { ISD::UMAX, MVT::v16i16, 4 }, |
2795 | { ISD::UMAX, MVT::v32i8, 4 }, |
2796 | { ISD::UMIN, MVT::v8i32, 4 }, |
2797 | { ISD::UMIN, MVT::v16i16, 4 }, |
2798 | { ISD::UMIN, MVT::v32i8, 4 }, |
2799 | { ISD::USUBSAT, MVT::v16i16, 4 }, |
2800 | { ISD::USUBSAT, MVT::v32i8, 4 }, |
2801 | { ISD::USUBSAT, MVT::v8i32, 6 }, |
2802 | { ISD::FMAXNUM, MVT::f32, 3 }, |
2803 | { ISD::FMAXNUM, MVT::v4f32, 3 }, |
2804 | { ISD::FMAXNUM, MVT::v8f32, 5 }, |
2805 | { ISD::FMAXNUM, MVT::f64, 3 }, |
2806 | { ISD::FMAXNUM, MVT::v2f64, 3 }, |
2807 | { ISD::FMAXNUM, MVT::v4f64, 5 }, |
2808 | { ISD::FSQRT, MVT::f32, 14 }, |
2809 | { ISD::FSQRT, MVT::v4f32, 14 }, |
2810 | { ISD::FSQRT, MVT::v8f32, 28 }, |
2811 | { ISD::FSQRT, MVT::f64, 21 }, |
2812 | { ISD::FSQRT, MVT::v2f64, 21 }, |
2813 | { ISD::FSQRT, MVT::v4f64, 43 }, |
2814 | }; |
2815 | static const CostTblEntry GLMCostTbl[] = { |
2816 | { ISD::FSQRT, MVT::f32, 19 }, |
2817 | { ISD::FSQRT, MVT::v4f32, 37 }, |
2818 | { ISD::FSQRT, MVT::f64, 34 }, |
2819 | { ISD::FSQRT, MVT::v2f64, 67 }, |
2820 | }; |
2821 | static const CostTblEntry SLMCostTbl[] = { |
2822 | { ISD::FSQRT, MVT::f32, 20 }, |
2823 | { ISD::FSQRT, MVT::v4f32, 40 }, |
2824 | { ISD::FSQRT, MVT::f64, 35 }, |
2825 | { ISD::FSQRT, MVT::v2f64, 70 }, |
2826 | }; |
2827 | static const CostTblEntry SSE42CostTbl[] = { |
2828 | { ISD::USUBSAT, MVT::v4i32, 2 }, |
2829 | { ISD::UADDSAT, MVT::v4i32, 3 }, |
2830 | { ISD::FSQRT, MVT::f32, 18 }, |
2831 | { ISD::FSQRT, MVT::v4f32, 18 }, |
2832 | }; |
2833 | static const CostTblEntry SSE41CostTbl[] = { |
2834 | { ISD::ABS, MVT::v2i64, 2 }, |
2835 | { ISD::SMAX, MVT::v4i32, 1 }, |
2836 | { ISD::SMAX, MVT::v16i8, 1 }, |
2837 | { ISD::SMIN, MVT::v4i32, 1 }, |
2838 | { ISD::SMIN, MVT::v16i8, 1 }, |
2839 | { ISD::UMAX, MVT::v4i32, 1 }, |
2840 | { ISD::UMAX, MVT::v8i16, 1 }, |
2841 | { ISD::UMIN, MVT::v4i32, 1 }, |
2842 | { ISD::UMIN, MVT::v8i16, 1 }, |
2843 | }; |
2844 | static const CostTblEntry SSSE3CostTbl[] = { |
2845 | { ISD::ABS, MVT::v4i32, 1 }, |
2846 | { ISD::ABS, MVT::v8i16, 1 }, |
2847 | { ISD::ABS, MVT::v16i8, 1 }, |
2848 | { ISD::BITREVERSE, MVT::v2i64, 5 }, |
2849 | { ISD::BITREVERSE, MVT::v4i32, 5 }, |
2850 | { ISD::BITREVERSE, MVT::v8i16, 5 }, |
2851 | { ISD::BITREVERSE, MVT::v16i8, 5 }, |
2852 | { ISD::BSWAP, MVT::v2i64, 1 }, |
2853 | { ISD::BSWAP, MVT::v4i32, 1 }, |
2854 | { ISD::BSWAP, MVT::v8i16, 1 }, |
2855 | { ISD::CTLZ, MVT::v2i64, 23 }, |
2856 | { ISD::CTLZ, MVT::v4i32, 18 }, |
2857 | { ISD::CTLZ, MVT::v8i16, 14 }, |
2858 | { ISD::CTLZ, MVT::v16i8, 9 }, |
2859 | { ISD::CTPOP, MVT::v2i64, 7 }, |
2860 | { ISD::CTPOP, MVT::v4i32, 11 }, |
2861 | { ISD::CTPOP, MVT::v8i16, 9 }, |
2862 | { ISD::CTPOP, MVT::v16i8, 6 }, |
2863 | { ISD::CTTZ, MVT::v2i64, 10 }, |
2864 | { ISD::CTTZ, MVT::v4i32, 14 }, |
2865 | { ISD::CTTZ, MVT::v8i16, 12 }, |
2866 | { ISD::CTTZ, MVT::v16i8, 9 } |
2867 | }; |
2868 | static const CostTblEntry SSE2CostTbl[] = { |
2869 | { ISD::ABS, MVT::v2i64, 4 }, |
2870 | { ISD::ABS, MVT::v4i32, 3 }, |
2871 | { ISD::ABS, MVT::v8i16, 2 }, |
2872 | { ISD::ABS, MVT::v16i8, 2 }, |
2873 | { ISD::BITREVERSE, MVT::v2i64, 29 }, |
2874 | { ISD::BITREVERSE, MVT::v4i32, 27 }, |
2875 | { ISD::BITREVERSE, MVT::v8i16, 27 }, |
2876 | { ISD::BITREVERSE, MVT::v16i8, 20 }, |
2877 | { ISD::BSWAP, MVT::v2i64, 7 }, |
2878 | { ISD::BSWAP, MVT::v4i32, 7 }, |
2879 | { ISD::BSWAP, MVT::v8i16, 7 }, |
2880 | { ISD::CTLZ, MVT::v2i64, 25 }, |
2881 | { ISD::CTLZ, MVT::v4i32, 26 }, |
2882 | { ISD::CTLZ, MVT::v8i16, 20 }, |
2883 | { ISD::CTLZ, MVT::v16i8, 17 }, |
2884 | { ISD::CTPOP, MVT::v2i64, 12 }, |
2885 | { ISD::CTPOP, MVT::v4i32, 15 }, |
2886 | { ISD::CTPOP, MVT::v8i16, 13 }, |
2887 | { ISD::CTPOP, MVT::v16i8, 10 }, |
2888 | { ISD::CTTZ, MVT::v2i64, 14 }, |
2889 | { ISD::CTTZ, MVT::v4i32, 18 }, |
2890 | { ISD::CTTZ, MVT::v8i16, 16 }, |
2891 | { ISD::CTTZ, MVT::v16i8, 13 }, |
2892 | { ISD::SADDSAT, MVT::v8i16, 1 }, |
2893 | { ISD::SADDSAT, MVT::v16i8, 1 }, |
2894 | { ISD::SMAX, MVT::v8i16, 1 }, |
2895 | { ISD::SMIN, MVT::v8i16, 1 }, |
2896 | { ISD::SSUBSAT, MVT::v8i16, 1 }, |
2897 | { ISD::SSUBSAT, MVT::v16i8, 1 }, |
2898 | { ISD::UADDSAT, MVT::v8i16, 1 }, |
2899 | { ISD::UADDSAT, MVT::v16i8, 1 }, |
2900 | { ISD::UMAX, MVT::v8i16, 2 }, |
2901 | { ISD::UMAX, MVT::v16i8, 1 }, |
2902 | { ISD::UMIN, MVT::v8i16, 2 }, |
2903 | { ISD::UMIN, MVT::v16i8, 1 }, |
2904 | { ISD::USUBSAT, MVT::v8i16, 1 }, |
2905 | { ISD::USUBSAT, MVT::v16i8, 1 }, |
2906 | { ISD::FMAXNUM, MVT::f64, 4 }, |
2907 | { ISD::FMAXNUM, MVT::v2f64, 4 }, |
2908 | { ISD::FSQRT, MVT::f64, 32 }, |
2909 | { ISD::FSQRT, MVT::v2f64, 32 }, |
2910 | }; |
2911 | static const CostTblEntry SSE1CostTbl[] = { |
2912 | { ISD::FMAXNUM, MVT::f32, 4 }, |
2913 | { ISD::FMAXNUM, MVT::v4f32, 4 }, |
2914 | { ISD::FSQRT, MVT::f32, 28 }, |
2915 | { ISD::FSQRT, MVT::v4f32, 56 }, |
2916 | }; |
2917 | static const CostTblEntry BMI64CostTbl[] = { |
2918 | { ISD::CTTZ, MVT::i64, 1 }, |
2919 | }; |
2920 | static const CostTblEntry BMI32CostTbl[] = { |
2921 | { ISD::CTTZ, MVT::i32, 1 }, |
2922 | { ISD::CTTZ, MVT::i16, 1 }, |
2923 | { ISD::CTTZ, MVT::i8, 1 }, |
2924 | }; |
2925 | static const CostTblEntry LZCNT64CostTbl[] = { |
2926 | { ISD::CTLZ, MVT::i64, 1 }, |
2927 | }; |
2928 | static const CostTblEntry LZCNT32CostTbl[] = { |
2929 | { ISD::CTLZ, MVT::i32, 1 }, |
2930 | { ISD::CTLZ, MVT::i16, 1 }, |
2931 | { ISD::CTLZ, MVT::i8, 1 }, |
2932 | }; |
2933 | static const CostTblEntry POPCNT64CostTbl[] = { |
2934 | { ISD::CTPOP, MVT::i64, 1 }, |
2935 | }; |
2936 | static const CostTblEntry POPCNT32CostTbl[] = { |
2937 | { ISD::CTPOP, MVT::i32, 1 }, |
2938 | { ISD::CTPOP, MVT::i16, 1 }, |
2939 | { ISD::CTPOP, MVT::i8, 1 }, |
2940 | }; |
2941 | static const CostTblEntry X64CostTbl[] = { |
2942 | { ISD::ABS, MVT::i64, 2 }, |
2943 | { ISD::BITREVERSE, MVT::i64, 14 }, |
2944 | { ISD::BSWAP, MVT::i64, 1 }, |
2945 | { ISD::CTLZ, MVT::i64, 4 }, |
2946 | { ISD::CTTZ, MVT::i64, 3 }, |
2947 | { ISD::CTPOP, MVT::i64, 10 }, |
2948 | { ISD::SADDO, MVT::i64, 1 }, |
2949 | { ISD::UADDO, MVT::i64, 1 }, |
2950 | { ISD::UMULO, MVT::i64, 2 }, |
2951 | }; |
2952 | static const CostTblEntry X86CostTbl[] = { |
2953 | { ISD::ABS, MVT::i32, 2 }, |
2954 | { ISD::ABS, MVT::i16, 2 }, |
2955 | { ISD::BITREVERSE, MVT::i32, 14 }, |
2956 | { ISD::BITREVERSE, MVT::i16, 14 }, |
2957 | { ISD::BITREVERSE, MVT::i8, 11 }, |
2958 | { ISD::BSWAP, MVT::i32, 1 }, |
2959 | { ISD::BSWAP, MVT::i16, 1 }, |
2960 | { ISD::CTLZ, MVT::i32, 4 }, |
2961 | { ISD::CTLZ, MVT::i16, 4 }, |
2962 | { ISD::CTLZ, MVT::i8, 4 }, |
2963 | { ISD::CTTZ, MVT::i32, 3 }, |
2964 | { ISD::CTTZ, MVT::i16, 3 }, |
2965 | { ISD::CTTZ, MVT::i8, 3 }, |
2966 | { ISD::CTPOP, MVT::i32, 8 }, |
2967 | { ISD::CTPOP, MVT::i16, 9 }, |
2968 | { ISD::CTPOP, MVT::i8, 7 }, |
2969 | { ISD::SADDO, MVT::i32, 1 }, |
2970 | { ISD::SADDO, MVT::i16, 1 }, |
2971 | { ISD::SADDO, MVT::i8, 1 }, |
2972 | { ISD::UADDO, MVT::i32, 1 }, |
2973 | { ISD::UADDO, MVT::i16, 1 }, |
2974 | { ISD::UADDO, MVT::i8, 1 }, |
2975 | { ISD::UMULO, MVT::i32, 2 }, |
2976 | { ISD::UMULO, MVT::i16, 2 }, |
2977 | { ISD::UMULO, MVT::i8, 2 }, |
2978 | }; |
2979 | |
2980 | Type *RetTy = ICA.getReturnType(); |
2981 | Type *OpTy = RetTy; |
2982 | Intrinsic::ID IID = ICA.getID(); |
2983 | unsigned ISD = ISD::DELETED_NODE; |
2984 | switch (IID) { |
2985 | default: |
2986 | break; |
2987 | case Intrinsic::abs: |
2988 | ISD = ISD::ABS; |
2989 | break; |
2990 | case Intrinsic::bitreverse: |
2991 | ISD = ISD::BITREVERSE; |
2992 | break; |
2993 | case Intrinsic::bswap: |
2994 | ISD = ISD::BSWAP; |
2995 | break; |
2996 | case Intrinsic::ctlz: |
2997 | ISD = ISD::CTLZ; |
2998 | break; |
2999 | case Intrinsic::ctpop: |
3000 | ISD = ISD::CTPOP; |
3001 | break; |
3002 | case Intrinsic::cttz: |
3003 | ISD = ISD::CTTZ; |
3004 | break; |
3005 | case Intrinsic::maxnum: |
3006 | case Intrinsic::minnum: |
3007 | |
3008 | ISD = ISD::FMAXNUM; |
3009 | break; |
3010 | case Intrinsic::sadd_sat: |
3011 | ISD = ISD::SADDSAT; |
3012 | break; |
3013 | case Intrinsic::smax: |
3014 | ISD = ISD::SMAX; |
3015 | break; |
3016 | case Intrinsic::smin: |
3017 | ISD = ISD::SMIN; |
3018 | break; |
3019 | case Intrinsic::ssub_sat: |
3020 | ISD = ISD::SSUBSAT; |
3021 | break; |
3022 | case Intrinsic::uadd_sat: |
3023 | ISD = ISD::UADDSAT; |
3024 | break; |
3025 | case Intrinsic::umax: |
3026 | ISD = ISD::UMAX; |
3027 | break; |
3028 | case Intrinsic::umin: |
3029 | ISD = ISD::UMIN; |
3030 | break; |
3031 | case Intrinsic::usub_sat: |
3032 | ISD = ISD::USUBSAT; |
3033 | break; |
3034 | case Intrinsic::sqrt: |
3035 | ISD = ISD::FSQRT; |
3036 | break; |
3037 | case Intrinsic::sadd_with_overflow: |
3038 | case Intrinsic::ssub_with_overflow: |
3039 | |
3040 | ISD = ISD::SADDO; |
3041 | OpTy = RetTy->getContainedType(0); |
3042 | break; |
3043 | case Intrinsic::uadd_with_overflow: |
3044 | case Intrinsic::usub_with_overflow: |
3045 | |
3046 | ISD = ISD::UADDO; |
3047 | OpTy = RetTy->getContainedType(0); |
3048 | break; |
3049 | case Intrinsic::umul_with_overflow: |
3050 | case Intrinsic::smul_with_overflow: |
3051 | |
3052 | ISD = ISD::UMULO; |
3053 | OpTy = RetTy->getContainedType(0); |
3054 | break; |
3055 | } |
3056 | |
3057 | if (ISD != ISD::DELETED_NODE) { |
3058 | |
3059 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, OpTy); |
3060 | MVT MTy = LT.second; |
3061 | |
3062 | |
3063 | if (ISD == ISD::BITREVERSE && ST->hasGFNI() && ST->hasSSSE3() && |
3064 | MTy.isVector()) { |
3065 | |
3066 | |
3067 | |
3068 | unsigned Cost = MTy.getVectorElementType() == MVT::i8 ? 1 : 2; |
3069 | |
3070 | |
3071 | |
3072 | if (!(MTy.is128BitVector() || (ST->hasAVX2() && MTy.is256BitVector()) || |
3073 | (ST->hasBWI() && MTy.is512BitVector()))) |
3074 | Cost = Cost * 2 + 2; |
3075 | |
3076 | return LT.first * Cost; |
3077 | } |
3078 | |
3079 | auto adjustTableCost = [](const CostTblEntry &Entry, |
3080 | InstructionCost LegalizationCost, |
3081 | FastMathFlags FMF) { |
3082 | |
3083 | |
3084 | |
3085 | if (Entry.ISD == ISD::FMAXNUM || Entry.ISD == ISD::FMINNUM) { |
3086 | if (FMF.noNaNs()) |
3087 | return LegalizationCost * 1; |
3088 | } |
3089 | return LegalizationCost * (int)Entry.Cost; |
3090 | }; |
3091 | |
3092 | if (ST->useGLMDivSqrtCosts()) |
3093 | if (const auto *Entry = CostTableLookup(GLMCostTbl, ISD, MTy)) |
3094 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3095 | |
3096 | if (ST->isSLM()) |
3097 | if (const auto *Entry = CostTableLookup(SLMCostTbl, ISD, MTy)) |
3098 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3099 | |
3100 | if (ST->hasCDI()) |
3101 | if (const auto *Entry = CostTableLookup(AVX512CDCostTbl, ISD, MTy)) |
3102 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3103 | |
3104 | if (ST->hasBWI()) |
3105 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) |
3106 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3107 | |
3108 | if (ST->hasAVX512()) |
3109 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
3110 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3111 | |
3112 | if (ST->hasXOP()) |
3113 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) |
3114 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3115 | |
3116 | if (ST->hasAVX2()) |
3117 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) |
3118 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3119 | |
3120 | if (ST->hasAVX()) |
3121 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) |
3122 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3123 | |
3124 | if (ST->hasSSE42()) |
3125 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) |
3126 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3127 | |
3128 | if (ST->hasSSE41()) |
3129 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) |
3130 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3131 | |
3132 | if (ST->hasSSSE3()) |
3133 | if (const auto *Entry = CostTableLookup(SSSE3CostTbl, ISD, MTy)) |
3134 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3135 | |
3136 | if (ST->hasSSE2()) |
3137 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) |
3138 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3139 | |
3140 | if (ST->hasSSE1()) |
3141 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) |
3142 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3143 | |
3144 | if (ST->hasBMI()) { |
3145 | if (ST->is64Bit()) |
3146 | if (const auto *Entry = CostTableLookup(BMI64CostTbl, ISD, MTy)) |
3147 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3148 | |
3149 | if (const auto *Entry = CostTableLookup(BMI32CostTbl, ISD, MTy)) |
3150 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3151 | } |
3152 | |
3153 | if (ST->hasLZCNT()) { |
3154 | if (ST->is64Bit()) |
3155 | if (const auto *Entry = CostTableLookup(LZCNT64CostTbl, ISD, MTy)) |
3156 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3157 | |
3158 | if (const auto *Entry = CostTableLookup(LZCNT32CostTbl, ISD, MTy)) |
3159 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3160 | } |
3161 | |
3162 | if (ST->hasPOPCNT()) { |
3163 | if (ST->is64Bit()) |
3164 | if (const auto *Entry = CostTableLookup(POPCNT64CostTbl, ISD, MTy)) |
3165 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3166 | |
3167 | if (const auto *Entry = CostTableLookup(POPCNT32CostTbl, ISD, MTy)) |
3168 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3169 | } |
3170 | |
3171 | if (ISD == ISD::BSWAP && ST->hasMOVBE() && ST->hasFastMOVBE()) { |
3172 | if (const Instruction *II = ICA.getInst()) { |
3173 | if (II->hasOneUse() && isa<StoreInst>(II->user_back())) |
3174 | return TTI::TCC_Free; |
3175 | if (auto *LI = dyn_cast<LoadInst>(II->getOperand(0))) { |
3176 | if (LI->hasOneUse()) |
3177 | return TTI::TCC_Free; |
3178 | } |
3179 | } |
3180 | } |
3181 | |
3182 | |
3183 | |
3184 | if (ST->is64Bit()) |
3185 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) |
3186 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3187 | |
3188 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) |
3189 | return adjustTableCost(*Entry, LT.first, ICA.getFlags()); |
3190 | } |
3191 | |
3192 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
3193 | } |
3194 | |
3195 | InstructionCost |
3196 | X86TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, |
3197 | TTI::TargetCostKind CostKind) { |
3198 | if (ICA.isTypeBasedOnly()) |
3199 | return getTypeBasedIntrinsicInstrCost(ICA, CostKind); |
3200 | |
3201 | static const CostTblEntry AVX512CostTbl[] = { |
3202 | { ISD::ROTL, MVT::v8i64, 1 }, |
3203 | { ISD::ROTL, MVT::v4i64, 1 }, |
3204 | { ISD::ROTL, MVT::v2i64, 1 }, |
3205 | { ISD::ROTL, MVT::v16i32, 1 }, |
3206 | { ISD::ROTL, MVT::v8i32, 1 }, |
3207 | { ISD::ROTL, MVT::v4i32, 1 }, |
3208 | { ISD::ROTR, MVT::v8i64, 1 }, |
3209 | { ISD::ROTR, MVT::v4i64, 1 }, |
3210 | { ISD::ROTR, MVT::v2i64, 1 }, |
3211 | { ISD::ROTR, MVT::v16i32, 1 }, |
3212 | { ISD::ROTR, MVT::v8i32, 1 }, |
3213 | { ISD::ROTR, MVT::v4i32, 1 } |
3214 | }; |
3215 | |
3216 | static const CostTblEntry XOPCostTbl[] = { |
3217 | { ISD::ROTL, MVT::v4i64, 4 }, |
3218 | { ISD::ROTL, MVT::v8i32, 4 }, |
3219 | { ISD::ROTL, MVT::v16i16, 4 }, |
3220 | { ISD::ROTL, MVT::v32i8, 4 }, |
3221 | { ISD::ROTL, MVT::v2i64, 1 }, |
3222 | { ISD::ROTL, MVT::v4i32, 1 }, |
3223 | { ISD::ROTL, MVT::v8i16, 1 }, |
3224 | { ISD::ROTL, MVT::v16i8, 1 }, |
3225 | { ISD::ROTR, MVT::v4i64, 6 }, |
3226 | { ISD::ROTR, MVT::v8i32, 6 }, |
3227 | { ISD::ROTR, MVT::v16i16, 6 }, |
3228 | { ISD::ROTR, MVT::v32i8, 6 }, |
3229 | { ISD::ROTR, MVT::v2i64, 2 }, |
3230 | { ISD::ROTR, MVT::v4i32, 2 }, |
3231 | { ISD::ROTR, MVT::v8i16, 2 }, |
3232 | { ISD::ROTR, MVT::v16i8, 2 } |
3233 | }; |
3234 | static const CostTblEntry X64CostTbl[] = { |
3235 | { ISD::ROTL, MVT::i64, 1 }, |
3236 | { ISD::ROTR, MVT::i64, 1 }, |
3237 | { ISD::FSHL, MVT::i64, 4 } |
3238 | }; |
3239 | static const CostTblEntry X86CostTbl[] = { |
3240 | { ISD::ROTL, MVT::i32, 1 }, |
3241 | { ISD::ROTL, MVT::i16, 1 }, |
3242 | { ISD::ROTL, MVT::i8, 1 }, |
3243 | { ISD::ROTR, MVT::i32, 1 }, |
3244 | { ISD::ROTR, MVT::i16, 1 }, |
3245 | { ISD::ROTR, MVT::i8, 1 }, |
3246 | { ISD::FSHL, MVT::i32, 4 }, |
3247 | { ISD::FSHL, MVT::i16, 4 }, |
3248 | { ISD::FSHL, MVT::i8, 4 } |
3249 | }; |
3250 | |
3251 | Intrinsic::ID IID = ICA.getID(); |
3252 | Type *RetTy = ICA.getReturnType(); |
3253 | const SmallVectorImpl<const Value *> &Args = ICA.getArgs(); |
3254 | unsigned ISD = ISD::DELETED_NODE; |
3255 | switch (IID) { |
3256 | default: |
3257 | break; |
3258 | case Intrinsic::fshl: |
3259 | ISD = ISD::FSHL; |
3260 | if (Args[0] == Args[1]) |
3261 | ISD = ISD::ROTL; |
3262 | break; |
3263 | case Intrinsic::fshr: |
3264 | |
3265 | ISD = ISD::FSHL; |
3266 | if (Args[0] == Args[1]) |
3267 | ISD = ISD::ROTR; |
3268 | break; |
3269 | } |
3270 | |
3271 | if (ISD != ISD::DELETED_NODE) { |
3272 | |
3273 | std::pair<InstructionCost, MVT> LT = |
3274 | TLI->getTypeLegalizationCost(DL, RetTy); |
3275 | MVT MTy = LT.second; |
3276 | |
3277 | |
3278 | if (ST->hasAVX512()) |
3279 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
3280 | return LT.first * Entry->Cost; |
3281 | |
3282 | if (ST->hasXOP()) |
3283 | if (const auto *Entry = CostTableLookup(XOPCostTbl, ISD, MTy)) |
3284 | return LT.first * Entry->Cost; |
3285 | |
3286 | if (ST->is64Bit()) |
3287 | if (const auto *Entry = CostTableLookup(X64CostTbl, ISD, MTy)) |
3288 | return LT.first * Entry->Cost; |
3289 | |
3290 | if (const auto *Entry = CostTableLookup(X86CostTbl, ISD, MTy)) |
3291 | return LT.first * Entry->Cost; |
3292 | } |
3293 | |
3294 | return BaseT::getIntrinsicInstrCost(ICA, CostKind); |
3295 | } |
3296 | |
3297 | InstructionCost X86TTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val, |
3298 | unsigned Index) { |
3299 | static const CostTblEntry SLMCostTbl[] = { |
3300 | { ISD::EXTRACT_VECTOR_ELT, MVT::i8, 4 }, |
3301 | { ISD::EXTRACT_VECTOR_ELT, MVT::i16, 4 }, |
3302 | { ISD::EXTRACT_VECTOR_ELT, MVT::i32, 4 }, |
3303 | { ISD::EXTRACT_VECTOR_ELT, MVT::i64, 7 } |
3304 | }; |
3305 | |
3306 | assert(Val->isVectorTy() && "This must be a vector type"); |
3307 | Type *ScalarType = Val->getScalarType(); |
3308 | int RegisterFileMoveCost = 0; |
3309 | |
3310 | |
3311 | |
3312 | if (Index == -1U && (Opcode == Instruction::ExtractElement || |
3313 | Opcode == Instruction::InsertElement)) { |
3314 | |
3315 | |
3316 | |
3317 | |
3318 | assert(isa<FixedVectorType>(Val) && "Fixed vector type expected"); |
3319 | Align VecAlign = DL.getPrefTypeAlign(Val); |
3320 | Align SclAlign = DL.getPrefTypeAlign(ScalarType); |
3321 | |
3322 | |
3323 | if (Opcode == Instruction::ExtractElement) { |
3324 | return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, |
3325 | TTI::TargetCostKind::TCK_RecipThroughput) + |
3326 | getMemoryOpCost(Instruction::Load, ScalarType, SclAlign, 0, |
3327 | TTI::TargetCostKind::TCK_RecipThroughput); |
3328 | } |
3329 | |
3330 | if (Opcode == Instruction::InsertElement) { |
3331 | return getMemoryOpCost(Instruction::Store, Val, VecAlign, 0, |
3332 | TTI::TargetCostKind::TCK_RecipThroughput) + |
3333 | getMemoryOpCost(Instruction::Store, ScalarType, SclAlign, 0, |
3334 | TTI::TargetCostKind::TCK_RecipThroughput) + |
3335 | getMemoryOpCost(Instruction::Load, Val, VecAlign, 0, |
3336 | TTI::TargetCostKind::TCK_RecipThroughput); |
3337 | } |
3338 | } |
3339 | |
3340 | if (Index != -1U && (Opcode == Instruction::ExtractElement || |
3341 | Opcode == Instruction::InsertElement)) { |
3342 | |
3343 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Val); |
3344 | |
3345 | |
3346 | if (!LT.second.isVector()) |
3347 | return 0; |
3348 | |
3349 | |
3350 | unsigned NumElts = LT.second.getVectorNumElements(); |
3351 | unsigned SubNumElts = NumElts; |
3352 | Index = Index % NumElts; |
3353 | |
3354 | |
3355 | |
3356 | if (LT.second.getSizeInBits() > 128) { |
3357 | assert((LT.second.getSizeInBits() % 128) == 0 && "Illegal vector"); |
3358 | unsigned NumSubVecs = LT.second.getSizeInBits() / 128; |
3359 | SubNumElts = NumElts / NumSubVecs; |
3360 | if (SubNumElts <= Index) { |
3361 | RegisterFileMoveCost += (Opcode == Instruction::InsertElement ? 2 : 1); |
3362 | Index %= SubNumElts; |
3363 | } |
3364 | } |
3365 | |
3366 | if (Index == 0) { |
3367 | |
3368 | |
3369 | |
3370 | if (ScalarType->isFloatingPointTy()) |
3371 | return RegisterFileMoveCost; |
3372 | |
3373 | |
3374 | if (ScalarType->isIntegerTy() && Opcode == Instruction::ExtractElement) |
3375 | return 1 + RegisterFileMoveCost; |
3376 | } |
3377 | |
3378 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
3379 | assert(ISD && "Unexpected vector opcode"); |
3380 | MVT MScalarTy = LT.second.getScalarType(); |
3381 | if (ST->isSLM()) |
3382 | if (auto *Entry = CostTableLookup(SLMCostTbl, ISD, MScalarTy)) |
3383 | return Entry->Cost + RegisterFileMoveCost; |
3384 | |
3385 | |
3386 | if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || |
3387 | (MScalarTy.isInteger() && ST->hasSSE41())) |
3388 | return 1 + RegisterFileMoveCost; |
3389 | |
3390 | |
3391 | if (MScalarTy == MVT::f32 && ST->hasSSE41() && |
3392 | Opcode == Instruction::InsertElement) |
3393 | return 1 + RegisterFileMoveCost; |
3394 | |
3395 | |
3396 | |
3397 | |
3398 | |
3399 | |
3400 | |
3401 | InstructionCost ShuffleCost = 1; |
3402 | if (Opcode == Instruction::InsertElement) { |
3403 | auto *SubTy = cast<VectorType>(Val); |
3404 | EVT VT = TLI->getValueType(DL, Val); |
3405 | if (VT.getScalarType() != MScalarTy || VT.getSizeInBits() >= 128) |
3406 | SubTy = FixedVectorType::get(ScalarType, SubNumElts); |
3407 | ShuffleCost = |
3408 | getShuffleCost(TTI::SK_PermuteTwoSrc, SubTy, None, 0, SubTy); |
3409 | } |
3410 | int IntOrFpCost = ScalarType->isFloatingPointTy() ? 0 : 1; |
3411 | return ShuffleCost + IntOrFpCost + RegisterFileMoveCost; |
3412 | } |
3413 | |
3414 | |
3415 | |
3416 | if (Opcode == Instruction::ExtractElement && ScalarType->isPointerTy()) |
3417 | RegisterFileMoveCost += 1; |
3418 | |
3419 | return BaseT::getVectorInstrCost(Opcode, Val, Index) + RegisterFileMoveCost; |
3420 | } |
3421 | |
3422 | InstructionCost X86TTIImpl::getScalarizationOverhead(VectorType *Ty, |
3423 | const APInt &DemandedElts, |
3424 | bool Insert, |
3425 | bool Extract) { |
3426 | InstructionCost Cost = 0; |
3427 | |
3428 | |
3429 | |
3430 | if (Insert) { |
3431 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
3432 | MVT MScalarTy = LT.second.getScalarType(); |
3433 | |
3434 | if ((MScalarTy == MVT::i16 && ST->hasSSE2()) || |
3435 | (MScalarTy.isInteger() && ST->hasSSE41()) || |
3436 | (MScalarTy == MVT::f32 && ST->hasSSE41())) { |
3437 | |
3438 | |
3439 | if (LT.second.getSizeInBits() <= 128) { |
3440 | Cost += |
3441 | BaseT::getScalarizationOverhead(Ty, DemandedElts, Insert, false); |
3442 | } else { |
3443 | |
3444 | |
3445 | |
3446 | |
3447 | |
3448 | |
3449 | |
3450 | |
3451 | |
3452 | |
3453 | |
3454 | |
3455 | const int CostValue = *LT.first.getValue(); |
3456 | assert(CostValue >= 0 && "Negative cost!"); |
3457 | unsigned Num128Lanes = LT.second.getSizeInBits() / 128 * CostValue; |
3458 | unsigned NumElts = LT.second.getVectorNumElements() * CostValue; |
3459 | APInt WidenedDemandedElts = DemandedElts.zextOrSelf(NumElts); |
3460 | unsigned Scale = NumElts / Num128Lanes; |
3461 | |
3462 | |
3463 | for (unsigned I = 0; I < NumElts; I += Scale) { |
3464 | APInt Mask = WidenedDemandedElts.getBitsSet(NumElts, I, I + Scale); |
3465 | APInt MaskedDE = Mask & WidenedDemandedElts; |
3466 | unsigned Population = MaskedDE.countPopulation(); |
3467 | Cost += (Population > 0 && Population != Scale && |
3468 | I % LT.second.getVectorNumElements() != 0); |
3469 | Cost += Population > 0; |
3470 | } |
3471 | Cost += DemandedElts.countPopulation(); |
3472 | |
3473 | |
3474 | |
3475 | |
3476 | if (MScalarTy == MVT::f32) |
3477 | for (unsigned i = 0, e = cast<FixedVectorType>(Ty)->getNumElements(); |
3478 | i < e; i += 4) |
3479 | if (DemandedElts[i]) |
3480 | Cost--; |
3481 | } |
3482 | } else if (LT.second.isVector()) { |
3483 | |
3484 | |
3485 | |
3486 | |
3487 | if (Ty->isIntOrIntVectorTy()) |
3488 | Cost += DemandedElts.countPopulation(); |
3489 | |
3490 | |
3491 | |
3492 | |
3493 | unsigned NumElts = LT.second.getVectorNumElements(); |
3494 | unsigned Pow2Elts = |
3495 | PowerOf2Ceil(cast<FixedVectorType>(Ty)->getNumElements()); |
3496 | Cost += (std::min<unsigned>(NumElts, Pow2Elts) - 1) * LT.first; |
3497 | } |
3498 | } |
3499 | |
3500 | |
3501 | |
3502 | if (Extract) |
3503 | Cost += BaseT::getScalarizationOverhead(Ty, DemandedElts, false, Extract); |
3504 | |
3505 | return Cost; |
3506 | } |
3507 | |
3508 | InstructionCost X86TTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src, |
3509 | MaybeAlign Alignment, |
3510 | unsigned AddressSpace, |
3511 | TTI::TargetCostKind CostKind, |
3512 | const Instruction *I) { |
3513 | |
3514 | if (CostKind != TTI::TCK_RecipThroughput) { |
3515 | if (auto *SI = dyn_cast_or_null<StoreInst>(I)) { |
3516 | |
3517 | |
3518 | if (auto *GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) { |
3519 | if (!all_of(GEP->indices(), [](Value *V) { return isa<Constant>(V); })) |
3520 | return TTI::TCC_Basic * 2; |
3521 | } |
3522 | } |
3523 | return TTI::TCC_Basic; |
3524 | } |
3525 | |
3526 | assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && |
3527 | "Invalid Opcode"); |
3528 | |
3529 | if (TLI->getValueType(DL, Src, true) == MVT::Other) |
3530 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
3531 | CostKind); |
3532 | |
3533 | |
3534 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Src); |
3535 | |
3536 | auto *VTy = dyn_cast<FixedVectorType>(Src); |
3537 | |
3538 | |
3539 | |
3540 | if (!VTy || !LT.second.isVector()) |
3541 | |
3542 | return LT.first * 1; |
3543 | |
3544 | bool IsLoad = Opcode == Instruction::Load; |
3545 | |
3546 | Type *EltTy = VTy->getElementType(); |
3547 | |
3548 | const int EltTyBits = DL.getTypeSizeInBits(EltTy); |
3549 | |
3550 | InstructionCost Cost = 0; |
3551 | |
3552 | |
3553 | const unsigned SrcNumElt = VTy->getNumElements(); |
3554 | |
3555 | |
3556 | int NumEltRemaining = SrcNumElt; |
3557 | |
3558 | auto NumEltDone = [&]() { return SrcNumElt - NumEltRemaining; }; |
3559 | |
3560 | const int MaxLegalOpSizeBytes = divideCeil(LT.second.getSizeInBits(), 8); |
3561 | |
3562 | |
3563 | const unsigned XMMBits = 128; |
3564 | if (XMMBits % EltTyBits != 0) |
3565 | |
3566 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
3567 | CostKind); |
3568 | const int NumEltPerXMM = XMMBits / EltTyBits; |
3569 | |
3570 | auto *XMMVecTy = FixedVectorType::get(EltTy, NumEltPerXMM); |
3571 | |
3572 | for (int CurrOpSizeBytes = MaxLegalOpSizeBytes, SubVecEltsLeft = 0; |
3573 | NumEltRemaining > 0; CurrOpSizeBytes /= 2) { |
3574 | |
3575 | if ((8 * CurrOpSizeBytes) % EltTyBits != 0) |
3576 | |
3577 | return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, |
3578 | CostKind); |
3579 | int CurrNumEltPerOp = (8 * CurrOpSizeBytes) / EltTyBits; |
3580 | |
3581 | assert(CurrOpSizeBytes > 0 && CurrNumEltPerOp > 0 && "How'd we get here?"); |
3582 | assert((((NumEltRemaining * EltTyBits) < (2 * 8 * CurrOpSizeBytes)) || |
3583 | (CurrOpSizeBytes == MaxLegalOpSizeBytes)) && |
3584 | "Unless we haven't halved the op size yet, " |
3585 | "we have less than two op's sized units of work left."); |
3586 | |
3587 | auto *CurrVecTy = CurrNumEltPerOp > NumEltPerXMM |
3588 | ? FixedVectorType::get(EltTy, CurrNumEltPerOp) |
3589 | : XMMVecTy; |
3590 | |
3591 | assert(CurrVecTy->getNumElements() % CurrNumEltPerOp == 0 && |
3592 | "After halving sizes, the vector elt count is no longer a multiple " |
3593 | "of number of elements per operation?"); |
3594 | auto *CoalescedVecTy = |
3595 | CurrNumEltPerOp == 1 |
3596 | ? CurrVecTy |
3597 | : FixedVectorType::get( |
3598 | IntegerType::get(Src->getContext(), |
3599 | EltTyBits * CurrNumEltPerOp), |
3600 | CurrVecTy->getNumElements() / CurrNumEltPerOp); |
3601 | assert(DL.getTypeSizeInBits(CoalescedVecTy) == |
3602 | DL.getTypeSizeInBits(CurrVecTy) && |
3603 | "coalesciing elements doesn't change vector width."); |
3604 | |
3605 | while (NumEltRemaining > 0) { |
3606 | assert(SubVecEltsLeft >= 0 && "Subreg element count overconsumtion?"); |
3607 | |
3608 | |
3609 | |
3610 | if (NumEltRemaining < CurrNumEltPerOp && |
3611 | (!IsLoad || Alignment.valueOrOne() < CurrOpSizeBytes) && |
3612 | CurrOpSizeBytes != 1) |
3613 | break; |
3614 | |
3615 | bool Is0thSubVec = (NumEltDone() % LT.second.getVectorNumElements()) == 0; |
3616 | |
3617 | |
3618 | if (SubVecEltsLeft == 0) { |
3619 | SubVecEltsLeft += CurrVecTy->getNumElements(); |
3620 | |
3621 | if (!Is0thSubVec) |
3622 | Cost += getShuffleCost(IsLoad ? TTI::ShuffleKind::SK_InsertSubvector |
3623 | : TTI::ShuffleKind::SK_ExtractSubvector, |
3624 | VTy, None, NumEltDone(), CurrVecTy); |
3625 | } |
3626 | |
3627 | |
3628 | |
3629 | |
3630 | |
3631 | if (CurrOpSizeBytes <= 32 / 8 && !Is0thSubVec) { |
3632 | int NumEltDoneInCurrXMM = NumEltDone() % NumEltPerXMM; |
3633 | assert(NumEltDoneInCurrXMM % CurrNumEltPerOp == 0 && ""); |
3634 | int CoalescedVecEltIdx = NumEltDoneInCurrXMM / CurrNumEltPerOp; |
3635 | APInt DemandedElts = |
3636 | APInt::getBitsSet(CoalescedVecTy->getNumElements(), |
3637 | CoalescedVecEltIdx, CoalescedVecEltIdx + 1); |
3638 | assert(DemandedElts.countPopulation() == 1 && "Inserting single value"); |
3639 | Cost += getScalarizationOverhead(CoalescedVecTy, DemandedElts, IsLoad, |
3640 | !IsLoad); |
3641 | } |
3642 | |
3643 | |
3644 | |
3645 | |
3646 | if (CurrOpSizeBytes == 32 && ST->isUnalignedMem32Slow()) |
3647 | Cost += 2; |
3648 | else |
3649 | Cost += 1; |
3650 | |
3651 | SubVecEltsLeft -= CurrNumEltPerOp; |
3652 | NumEltRemaining -= CurrNumEltPerOp; |
3653 | Alignment = commonAlignment(Alignment.valueOrOne(), CurrOpSizeBytes); |
3654 | } |
3655 | } |
3656 | |
3657 | assert(NumEltRemaining <= 0 && "Should have processed all the elements."); |
3658 | |
3659 | return Cost; |
3660 | } |
3661 | |
3662 | InstructionCost |
3663 | X86TTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *SrcTy, Align Alignment, |
3664 | unsigned AddressSpace, |
3665 | TTI::TargetCostKind CostKind) { |
3666 | bool IsLoad = (Instruction::Load == Opcode); |
3667 | bool IsStore = (Instruction::Store == Opcode); |
3668 | |
3669 | auto *SrcVTy = dyn_cast<FixedVectorType>(SrcTy); |
3670 | if (!SrcVTy) |
3671 | |
3672 | return getMemoryOpCost(Opcode, SrcTy, Alignment, AddressSpace, CostKind); |
3673 | |
3674 | unsigned NumElem = SrcVTy->getNumElements(); |
3675 | auto *MaskTy = |
3676 | FixedVectorType::get(Type::getInt8Ty(SrcVTy->getContext()), NumElem); |
3677 | if ((IsLoad && !isLegalMaskedLoad(SrcVTy, Alignment)) || |
3678 | (IsStore && !isLegalMaskedStore(SrcVTy, Alignment))) { |
3679 | |
3680 | APInt DemandedElts = APInt::getAllOnesValue(NumElem); |
3681 | InstructionCost MaskSplitCost = |
3682 | getScalarizationOverhead(MaskTy, DemandedElts, false, true); |
3683 | InstructionCost ScalarCompareCost = getCmpSelInstrCost( |
3684 | Instruction::ICmp, Type::getInt8Ty(SrcVTy->getContext()), nullptr, |
3685 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
3686 | InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind); |
3687 | InstructionCost MaskCmpCost = NumElem * (BranchCost + ScalarCompareCost); |
3688 | InstructionCost ValueSplitCost = |
3689 | getScalarizationOverhead(SrcVTy, DemandedElts, IsLoad, IsStore); |
3690 | InstructionCost MemopCost = |
3691 | NumElem * BaseT::getMemoryOpCost(Opcode, SrcVTy->getScalarType(), |
3692 | Alignment, AddressSpace, CostKind); |
3693 | return MemopCost + ValueSplitCost + MaskSplitCost + MaskCmpCost; |
3694 | } |
3695 | |
3696 | |
3697 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, SrcVTy); |
3698 | auto VT = TLI->getValueType(DL, SrcVTy); |
3699 | InstructionCost Cost = 0; |
3700 | if (VT.isSimple() && LT.second != VT.getSimpleVT() && |
3701 | LT.second.getVectorNumElements() == NumElem) |
3702 | |
3703 | Cost += getShuffleCost(TTI::SK_PermuteTwoSrc, SrcVTy, None, 0, nullptr) + |
3704 | getShuffleCost(TTI::SK_PermuteTwoSrc, MaskTy, None, 0, nullptr); |
3705 | |
3706 | else if (LT.first * LT.second.getVectorNumElements() > NumElem) { |
3707 | auto *NewMaskTy = FixedVectorType::get(MaskTy->getElementType(), |
3708 | LT.second.getVectorNumElements()); |
3709 | |
3710 | Cost += getShuffleCost(TTI::SK_InsertSubvector, NewMaskTy, None, 0, MaskTy); |
3711 | } |
3712 | |
3713 | |
3714 | if (!ST->hasAVX512()) |
3715 | return Cost + LT.first * (IsLoad ? 2 : 8); |
3716 | |
3717 | |
3718 | return Cost + LT.first; |
3719 | } |
3720 | |
3721 | InstructionCost X86TTIImpl::getAddressComputationCost(Type *Ty, |
3722 | ScalarEvolution *SE, |
3723 | const SCEV *Ptr) { |
3724 | |
3725 | |
3726 | |
3727 | |
3728 | const unsigned NumVectorInstToHideOverhead = 10; |
3729 | |
3730 | |
3731 | |
3732 | |
3733 | |
3734 | |
3735 | |
3736 | |
3737 | if (Ty->isVectorTy() && SE) { |
3738 | if (!BaseT::isStridedAccess(Ptr)) |
3739 | return NumVectorInstToHideOverhead; |
3740 | if (!BaseT::getConstantStrideStep(SE, Ptr)) |
3741 | return 1; |
3742 | } |
3743 | |
3744 | return BaseT::getAddressComputationCost(Ty, SE, Ptr); |
3745 | } |
3746 | |
3747 | InstructionCost |
3748 | X86TTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *ValTy, |
3749 | Optional<FastMathFlags> FMF, |
3750 | TTI::TargetCostKind CostKind) { |
3751 | if (TTI::requiresOrderedReduction(FMF)) |
3752 | return BaseT::getArithmeticReductionCost(Opcode, ValTy, FMF, CostKind); |
3753 | |
3754 | |
3755 | |
3756 | |
3757 | static const CostTblEntry SLMCostTblNoPairWise[] = { |
3758 | { ISD::FADD, MVT::v2f64, 3 }, |
3759 | { ISD::ADD, MVT::v2i64, 5 }, |
3760 | }; |
3761 | |
3762 | static const CostTblEntry SSE2CostTblNoPairWise[] = { |
3763 | { ISD::FADD, MVT::v2f64, 2 }, |
3764 | { ISD::FADD, MVT::v2f32, 2 }, |
3765 | { ISD::FADD, MVT::v4f32, 4 }, |
3766 | { ISD::ADD, MVT::v2i64, 2 }, |
3767 | { ISD::ADD, MVT::v2i32, 2 }, |
3768 | { ISD::ADD, MVT::v4i32, 3 }, |
3769 | { ISD::ADD, MVT::v2i16, 2 }, |
3770 | { ISD::ADD, MVT::v4i16, 3 }, |
3771 | { ISD::ADD, MVT::v8i16, 4 }, |
3772 | { ISD::ADD, MVT::v2i8, 2 }, |
3773 | { ISD::ADD, MVT::v4i8, 2 }, |
3774 | { ISD::ADD, MVT::v8i8, 2 }, |
3775 | { ISD::ADD, MVT::v16i8, 3 }, |
3776 | }; |
3777 | |
3778 | static const CostTblEntry AVX1CostTblNoPairWise[] = { |
3779 | { ISD::FADD, MVT::v4f64, 3 }, |
3780 | { ISD::FADD, MVT::v4f32, 3 }, |
3781 | { ISD::FADD, MVT::v8f32, 4 }, |
3782 | { ISD::ADD, MVT::v2i64, 1 }, |
3783 | { ISD::ADD, MVT::v4i64, 3 }, |
3784 | { ISD::ADD, MVT::v8i32, 5 }, |
3785 | { ISD::ADD, MVT::v16i16, 5 }, |
3786 | { ISD::ADD, MVT::v32i8, 4 }, |
3787 | }; |
3788 | |
3789 | int ISD = TLI->InstructionOpcodeToISD(Opcode); |
3790 | assert(ISD && "Invalid opcode"); |
3791 | |
3792 | |
3793 | |
3794 | |
3795 | EVT VT = TLI->getValueType(DL, ValTy); |
3796 | if (VT.isSimple()) { |
3797 | MVT MTy = VT.getSimpleVT(); |
3798 | if (ST->isSLM()) |
3799 | if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy)) |
3800 | return Entry->Cost; |
3801 | |
3802 | if (ST->hasAVX()) |
3803 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
3804 | return Entry->Cost; |
3805 | |
3806 | if (ST->hasSSE2()) |
3807 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
3808 | return Entry->Cost; |
3809 | } |
3810 | |
3811 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
3812 | |
3813 | MVT MTy = LT.second; |
3814 | |
3815 | auto *ValVTy = cast<FixedVectorType>(ValTy); |
3816 | |
3817 | |
3818 | if (ISD == ISD::MUL && MTy.getScalarType() == MVT::i8) { |
3819 | auto *WideSclTy = IntegerType::get(ValVTy->getContext(), 16); |
3820 | auto *WideVecTy = FixedVectorType::get(WideSclTy, ValVTy->getNumElements()); |
3821 | return getCastInstrCost(Instruction::ZExt, WideVecTy, ValTy, |
3822 | TargetTransformInfo::CastContextHint::None, |
3823 | CostKind) + |
3824 | getArithmeticReductionCost(Opcode, WideVecTy, FMF, CostKind); |
3825 | } |
3826 | |
3827 | InstructionCost ArithmeticCost = 0; |
3828 | if (LT.first != 1 && MTy.isVector() && |
3829 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
3830 | |
3831 | auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(), |
3832 | MTy.getVectorNumElements()); |
3833 | ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); |
3834 | ArithmeticCost *= LT.first - 1; |
3835 | } |
3836 | |
3837 | if (ST->isSLM()) |
3838 | if (const auto *Entry = CostTableLookup(SLMCostTblNoPairWise, ISD, MTy)) |
3839 | return ArithmeticCost + Entry->Cost; |
3840 | |
3841 | if (ST->hasAVX()) |
3842 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
3843 | return ArithmeticCost + Entry->Cost; |
3844 | |
3845 | if (ST->hasSSE2()) |
3846 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
3847 | return ArithmeticCost + Entry->Cost; |
3848 | |
3849 | |
3850 | |
3851 | static const CostTblEntry AVX512BoolReduction[] = { |
3852 | { ISD::AND, MVT::v2i1, 3 }, |
3853 | { ISD::AND, MVT::v4i1, 5 }, |
3854 | { ISD::AND, MVT::v8i1, 7 }, |
3855 | { ISD::AND, MVT::v16i1, 9 }, |
3856 | { ISD::AND, MVT::v32i1, 11 }, |
3857 | { ISD::AND, MVT::v64i1, 13 }, |
3858 | { ISD::OR, MVT::v2i1, 3 }, |
3859 | { ISD::OR, MVT::v4i1, 5 }, |
3860 | { ISD::OR, MVT::v8i1, 7 }, |
3861 | { ISD::OR, MVT::v16i1, 9 }, |
3862 | { ISD::OR, MVT::v32i1, 11 }, |
3863 | { ISD::OR, MVT::v64i1, 13 }, |
3864 | }; |
3865 | |
3866 | static const CostTblEntry AVX2BoolReduction[] = { |
3867 | { ISD::AND, MVT::v16i16, 2 }, |
3868 | { ISD::AND, MVT::v32i8, 2 }, |
3869 | { ISD::OR, MVT::v16i16, 2 }, |
3870 | { ISD::OR, MVT::v32i8, 2 }, |
3871 | }; |
3872 | |
3873 | static const CostTblEntry AVX1BoolReduction[] = { |
3874 | { ISD::AND, MVT::v4i64, 2 }, |
3875 | { ISD::AND, MVT::v8i32, 2 }, |
3876 | { ISD::AND, MVT::v16i16, 4 }, |
3877 | { ISD::AND, MVT::v32i8, 4 }, |
3878 | { ISD::OR, MVT::v4i64, 2 }, |
3879 | { ISD::OR, MVT::v8i32, 2 }, |
3880 | { ISD::OR, MVT::v16i16, 4 }, |
3881 | { ISD::OR, MVT::v32i8, 4 }, |
3882 | }; |
3883 | |
3884 | static const CostTblEntry SSE2BoolReduction[] = { |
3885 | { ISD::AND, MVT::v2i64, 2 }, |
3886 | { ISD::AND, MVT::v4i32, 2 }, |
3887 | { ISD::AND, MVT::v8i16, 2 }, |
3888 | { ISD::AND, MVT::v16i8, 2 }, |
3889 | { ISD::OR, MVT::v2i64, 2 }, |
3890 | { ISD::OR, MVT::v4i32, 2 }, |
3891 | { ISD::OR, MVT::v8i16, 2 }, |
3892 | { ISD::OR, MVT::v16i8, 2 }, |
3893 | }; |
3894 | |
3895 | |
3896 | if (ValVTy->getElementType()->isIntegerTy(1)) { |
3897 | InstructionCost ArithmeticCost = 0; |
3898 | if (LT.first != 1 && MTy.isVector() && |
3899 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
3900 | |
3901 | auto *SingleOpTy = FixedVectorType::get(ValVTy->getElementType(), |
3902 | MTy.getVectorNumElements()); |
3903 | ArithmeticCost = getArithmeticInstrCost(Opcode, SingleOpTy, CostKind); |
3904 | ArithmeticCost *= LT.first - 1; |
3905 | } |
3906 | |
3907 | if (ST->hasAVX512()) |
3908 | if (const auto *Entry = CostTableLookup(AVX512BoolReduction, ISD, MTy)) |
3909 | return ArithmeticCost + Entry->Cost; |
3910 | if (ST->hasAVX2()) |
3911 | if (const auto *Entry = CostTableLookup(AVX2BoolReduction, ISD, MTy)) |
3912 | return ArithmeticCost + Entry->Cost; |
3913 | if (ST->hasAVX()) |
3914 | if (const auto *Entry = CostTableLookup(AVX1BoolReduction, ISD, MTy)) |
3915 | return ArithmeticCost + Entry->Cost; |
3916 | if (ST->hasSSE2()) |
3917 | if (const auto *Entry = CostTableLookup(SSE2BoolReduction, ISD, MTy)) |
3918 | return ArithmeticCost + Entry->Cost; |
3919 | |
3920 | return BaseT::getArithmeticReductionCost(Opcode, ValVTy, FMF, CostKind); |
3921 | } |
3922 | |
3923 | unsigned NumVecElts = ValVTy->getNumElements(); |
3924 | unsigned ScalarSize = ValVTy->getScalarSizeInBits(); |
3925 | |
3926 | |
3927 | |
3928 | if (!isPowerOf2_32(NumVecElts) || ScalarSize != MTy.getScalarSizeInBits()) |
3929 | return BaseT::getArithmeticReductionCost(Opcode, ValVTy, FMF, CostKind); |
3930 | |
3931 | InstructionCost ReductionCost = 0; |
3932 | |
3933 | auto *Ty = ValVTy; |
3934 | if (LT.first != 1 && MTy.isVector() && |
3935 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
3936 | |
3937 | Ty = FixedVectorType::get(ValVTy->getElementType(), |
3938 | MTy.getVectorNumElements()); |
3939 | ReductionCost = getArithmeticInstrCost(Opcode, Ty, CostKind); |
3940 | ReductionCost *= LT.first - 1; |
3941 | NumVecElts = MTy.getVectorNumElements(); |
3942 | } |
3943 | |
3944 | |
3945 | |
3946 | while (NumVecElts > 1) { |
3947 | |
3948 | unsigned Size = NumVecElts * ScalarSize; |
3949 | NumVecElts /= 2; |
3950 | |
3951 | if (Size > 128) { |
3952 | auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); |
3953 | ReductionCost += |
3954 | getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, NumVecElts, SubTy); |
3955 | Ty = SubTy; |
3956 | } else if (Size == 128) { |
3957 | |
3958 | FixedVectorType *ShufTy; |
3959 | if (ValVTy->isFloatingPointTy()) |
3960 | ShufTy = |
3961 | FixedVectorType::get(Type::getDoubleTy(ValVTy->getContext()), 2); |
3962 | else |
3963 | ShufTy = |
3964 | FixedVectorType::get(Type::getInt64Ty(ValVTy->getContext()), 2); |
3965 | ReductionCost += |
3966 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
3967 | } else if (Size == 64) { |
3968 | |
3969 | FixedVectorType *ShufTy; |
3970 | if (ValVTy->isFloatingPointTy()) |
3971 | ShufTy = |
3972 | FixedVectorType::get(Type::getFloatTy(ValVTy->getContext()), 4); |
3973 | else |
3974 | ShufTy = |
3975 | FixedVectorType::get(Type::getInt32Ty(ValVTy->getContext()), 4); |
3976 | ReductionCost += |
3977 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
3978 | } else { |
3979 | |
3980 | auto *ShiftTy = FixedVectorType::get( |
3981 | Type::getIntNTy(ValVTy->getContext(), Size), 128 / Size); |
3982 | ReductionCost += getArithmeticInstrCost( |
3983 | Instruction::LShr, ShiftTy, CostKind, |
3984 | TargetTransformInfo::OK_AnyValue, |
3985 | TargetTransformInfo::OK_UniformConstantValue, |
3986 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); |
3987 | } |
3988 | |
3989 | |
3990 | ReductionCost += getArithmeticInstrCost(Opcode, Ty, CostKind); |
3991 | } |
3992 | |
3993 | |
3994 | return ReductionCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
3995 | } |
3996 | |
3997 | InstructionCost X86TTIImpl::getMinMaxCost(Type *Ty, Type *CondTy, |
3998 | bool IsUnsigned) { |
3999 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty); |
4000 | |
4001 | MVT MTy = LT.second; |
4002 | |
4003 | int ISD; |
4004 | if (Ty->isIntOrIntVectorTy()) { |
4005 | ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; |
4006 | } else { |
4007 | assert(Ty->isFPOrFPVectorTy() && |
4008 | "Expected float point or integer vector type."); |
4009 | ISD = ISD::FMINNUM; |
4010 | } |
4011 | |
4012 | static const CostTblEntry SSE1CostTbl[] = { |
4013 | {ISD::FMINNUM, MVT::v4f32, 1}, |
4014 | }; |
4015 | |
4016 | static const CostTblEntry SSE2CostTbl[] = { |
4017 | {ISD::FMINNUM, MVT::v2f64, 1}, |
4018 | {ISD::SMIN, MVT::v8i16, 1}, |
4019 | {ISD::UMIN, MVT::v16i8, 1}, |
4020 | }; |
4021 | |
4022 | static const CostTblEntry SSE41CostTbl[] = { |
4023 | {ISD::SMIN, MVT::v4i32, 1}, |
4024 | {ISD::UMIN, MVT::v4i32, 1}, |
4025 | {ISD::UMIN, MVT::v8i16, 1}, |
4026 | {ISD::SMIN, MVT::v16i8, 1}, |
4027 | }; |
4028 | |
4029 | static const CostTblEntry SSE42CostTbl[] = { |
4030 | {ISD::UMIN, MVT::v2i64, 3}, |
4031 | }; |
4032 | |
4033 | static const CostTblEntry AVX1CostTbl[] = { |
4034 | {ISD::FMINNUM, MVT::v8f32, 1}, |
4035 | {ISD::FMINNUM, MVT::v4f64, 1}, |
4036 | {ISD::SMIN, MVT::v8i32, 3}, |
4037 | {ISD::UMIN, MVT::v8i32, 3}, |
4038 | {ISD::SMIN, MVT::v16i16, 3}, |
4039 | {ISD::UMIN, MVT::v16i16, 3}, |
4040 | {ISD::SMIN, MVT::v32i8, 3}, |
4041 | {ISD::UMIN, MVT::v32i8, 3}, |
4042 | }; |
4043 | |
4044 | static const CostTblEntry AVX2CostTbl[] = { |
4045 | {ISD::SMIN, MVT::v8i32, 1}, |
4046 | {ISD::UMIN, MVT::v8i32, 1}, |
4047 | {ISD::SMIN, MVT::v16i16, 1}, |
4048 | {ISD::UMIN, MVT::v16i16, 1}, |
4049 | {ISD::SMIN, MVT::v32i8, 1}, |
4050 | {ISD::UMIN, MVT::v32i8, 1}, |
4051 | }; |
4052 | |
4053 | static const CostTblEntry AVX512CostTbl[] = { |
4054 | {ISD::FMINNUM, MVT::v16f32, 1}, |
4055 | {ISD::FMINNUM, MVT::v8f64, 1}, |
4056 | {ISD::SMIN, MVT::v2i64, 1}, |
4057 | {ISD::UMIN, MVT::v2i64, 1}, |
4058 | {ISD::SMIN, MVT::v4i64, 1}, |
4059 | {ISD::UMIN, MVT::v4i64, 1}, |
4060 | {ISD::SMIN, MVT::v8i64, 1}, |
4061 | {ISD::UMIN, MVT::v8i64, 1}, |
4062 | {ISD::SMIN, MVT::v16i32, 1}, |
4063 | {ISD::UMIN, MVT::v16i32, 1}, |
4064 | }; |
4065 | |
4066 | static const CostTblEntry AVX512BWCostTbl[] = { |
4067 | {ISD::SMIN, MVT::v32i16, 1}, |
4068 | {ISD::UMIN, MVT::v32i16, 1}, |
4069 | {ISD::SMIN, MVT::v64i8, 1}, |
4070 | {ISD::UMIN, MVT::v64i8, 1}, |
4071 | }; |
4072 | |
4073 | |
4074 | if (ST->hasBWI()) |
4075 | if (const auto *Entry = CostTableLookup(AVX512BWCostTbl, ISD, MTy)) |
4076 | return LT.first * Entry->Cost; |
4077 | |
4078 | if (ST->hasAVX512()) |
4079 | if (const auto *Entry = CostTableLookup(AVX512CostTbl, ISD, MTy)) |
4080 | return LT.first * Entry->Cost; |
4081 | |
4082 | if (ST->hasAVX2()) |
4083 | if (const auto *Entry = CostTableLookup(AVX2CostTbl, ISD, MTy)) |
4084 | return LT.first * Entry->Cost; |
4085 | |
4086 | if (ST->hasAVX()) |
4087 | if (const auto *Entry = CostTableLookup(AVX1CostTbl, ISD, MTy)) |
4088 | return LT.first * Entry->Cost; |
4089 | |
4090 | if (ST->hasSSE42()) |
4091 | if (const auto *Entry = CostTableLookup(SSE42CostTbl, ISD, MTy)) |
4092 | return LT.first * Entry->Cost; |
4093 | |
4094 | if (ST->hasSSE41()) |
4095 | if (const auto *Entry = CostTableLookup(SSE41CostTbl, ISD, MTy)) |
4096 | return LT.first * Entry->Cost; |
4097 | |
4098 | if (ST->hasSSE2()) |
4099 | if (const auto *Entry = CostTableLookup(SSE2CostTbl, ISD, MTy)) |
4100 | return LT.first * Entry->Cost; |
4101 | |
4102 | if (ST->hasSSE1()) |
4103 | if (const auto *Entry = CostTableLookup(SSE1CostTbl, ISD, MTy)) |
4104 | return LT.first * Entry->Cost; |
4105 | |
4106 | unsigned CmpOpcode; |
4107 | if (Ty->isFPOrFPVectorTy()) { |
4108 | CmpOpcode = Instruction::FCmp; |
4109 | } else { |
4110 | assert(Ty->isIntOrIntVectorTy() && |
4111 | "expecting floating point or integer type for min/max reduction"); |
4112 | CmpOpcode = Instruction::ICmp; |
4113 | } |
4114 | |
4115 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; |
4116 | |
4117 | InstructionCost Result = |
4118 | getCmpSelInstrCost(CmpOpcode, Ty, CondTy, CmpInst::BAD_ICMP_PREDICATE, |
4119 | CostKind) + |
4120 | getCmpSelInstrCost(Instruction::Select, Ty, CondTy, |
4121 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
4122 | return Result; |
4123 | } |
4124 | |
4125 | InstructionCost |
4126 | X86TTIImpl::getMinMaxReductionCost(VectorType *ValTy, VectorType *CondTy, |
4127 | bool IsUnsigned, |
4128 | TTI::TargetCostKind CostKind) { |
4129 | std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy); |
4130 | |
4131 | MVT MTy = LT.second; |
4132 | |
4133 | int ISD; |
4134 | if (ValTy->isIntOrIntVectorTy()) { |
4135 | ISD = IsUnsigned ? ISD::UMIN : ISD::SMIN; |
4136 | } else { |
4137 | assert(ValTy->isFPOrFPVectorTy() && |
4138 | "Expected float point or integer vector type."); |
4139 | ISD = ISD::FMINNUM; |
4140 | } |
4141 | |
4142 | |
4143 | |
4144 | |
4145 | static const CostTblEntry SSE2CostTblNoPairWise[] = { |
4146 | {ISD::UMIN, MVT::v2i16, 5}, |
4147 | {ISD::UMIN, MVT::v4i16, 7}, |
4148 | {ISD::UMIN, MVT::v8i16, 9}, |
4149 | }; |
4150 | |
4151 | static const CostTblEntry SSE41CostTblNoPairWise[] = { |
4152 | {ISD::SMIN, MVT::v2i16, 3}, |
4153 | {ISD::SMIN, MVT::v4i16, 5}, |
4154 | {ISD::UMIN, MVT::v2i16, 5}, |
4155 | {ISD::UMIN, MVT::v4i16, 7}, |
4156 | {ISD::SMIN, MVT::v8i16, 4}, |
4157 | {ISD::UMIN, MVT::v8i16, 4}, |
4158 | {ISD::SMIN, MVT::v2i8, 3}, |
4159 | {ISD::SMIN, MVT::v4i8, 5}, |
4160 | {ISD::SMIN, MVT::v8i8, 7}, |
4161 | {ISD::SMIN, MVT::v16i8, 6}, |
4162 | {ISD::UMIN, MVT::v2i8, 3}, |
4163 | {ISD::UMIN, MVT::v4i8, 5}, |
4164 | {ISD::UMIN, MVT::v8i8, 7}, |
4165 | {ISD::UMIN, MVT::v16i8, 6}, |
4166 | }; |
4167 | |
4168 | static const CostTblEntry AVX1CostTblNoPairWise[] = { |
4169 | {ISD::SMIN, MVT::v16i16, 6}, |
4170 | {ISD::UMIN, MVT::v16i16, 6}, |
4171 | {ISD::SMIN, MVT::v32i8, 8}, |
4172 | {ISD::UMIN, MVT::v32i8, 8}, |
4173 | }; |
4174 | |
4175 | static const CostTblEntry AVX512BWCostTblNoPairWise[] = { |
4176 | {ISD::SMIN, MVT::v32i16, 8}, |
4177 | {ISD::UMIN, MVT::v32i16, 8}, |
4178 | {ISD::SMIN, MVT::v64i8, 10}, |
4179 | {ISD::UMIN, MVT::v64i8, 10}, |
4180 | }; |
4181 | |
4182 | |
4183 | |
4184 | |
4185 | EVT VT = TLI->getValueType(DL, ValTy); |
4186 | if (VT.isSimple()) { |
4187 | MVT MTy = VT.getSimpleVT(); |
4188 | if (ST->hasBWI()) |
4189 | if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy)) |
4190 | return Entry->Cost; |
4191 | |
4192 | if (ST->hasAVX()) |
4193 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
4194 | return Entry->Cost; |
4195 | |
4196 | if (ST->hasSSE41()) |
4197 | if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy)) |
4198 | return Entry->Cost; |
4199 | |
4200 | if (ST->hasSSE2()) |
4201 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
4202 | return Entry->Cost; |
4203 | } |
4204 | |
4205 | auto *ValVTy = cast<FixedVectorType>(ValTy); |
4206 | unsigned NumVecElts = ValVTy->getNumElements(); |
4207 | |
4208 | auto *Ty = ValVTy; |
4209 | InstructionCost MinMaxCost = 0; |
4210 | if (LT.first != 1 && MTy.isVector() && |
4211 | MTy.getVectorNumElements() < ValVTy->getNumElements()) { |
4212 | |
4213 | Ty = FixedVectorType::get(ValVTy->getElementType(), |
4214 | MTy.getVectorNumElements()); |
4215 | auto *SubCondTy = FixedVectorType::get(CondTy->getElementType(), |
4216 | MTy.getVectorNumElements()); |
4217 | MinMaxCost = getMinMaxCost(Ty, SubCondTy, IsUnsigned); |
4218 | MinMaxCost *= LT.first - 1; |
4219 | NumVecElts = MTy.getVectorNumElements(); |
4220 | } |
4221 | |
4222 | if (ST->hasBWI()) |
4223 | if (const auto *Entry = CostTableLookup(AVX512BWCostTblNoPairWise, ISD, MTy)) |
4224 | return MinMaxCost + Entry->Cost; |
4225 | |
4226 | if (ST->hasAVX()) |
4227 | if (const auto *Entry = CostTableLookup(AVX1CostTblNoPairWise, ISD, MTy)) |
4228 | return MinMaxCost + Entry->Cost; |
4229 | |
4230 | if (ST->hasSSE41()) |
4231 | if (const auto *Entry = CostTableLookup(SSE41CostTblNoPairWise, ISD, MTy)) |
4232 | return MinMaxCost + Entry->Cost; |
4233 | |
4234 | if (ST->hasSSE2()) |
4235 | if (const auto *Entry = CostTableLookup(SSE2CostTblNoPairWise, ISD, MTy)) |
4236 | return MinMaxCost + Entry->Cost; |
4237 | |
4238 | unsigned ScalarSize = ValTy->getScalarSizeInBits(); |
4239 | |
4240 | |
4241 | |
4242 | if (!isPowerOf2_32(ValVTy->getNumElements()) || |
4243 | ScalarSize != MTy.getScalarSizeInBits()) |
4244 | return BaseT::getMinMaxReductionCost(ValTy, CondTy, IsUnsigned, CostKind); |
4245 | |
4246 | |
4247 | |
4248 | while (NumVecElts > 1) { |
4249 | |
4250 | unsigned Size = NumVecElts * ScalarSize; |
4251 | NumVecElts /= 2; |
4252 | |
4253 | if (Size > 128) { |
4254 | auto *SubTy = FixedVectorType::get(ValVTy->getElementType(), NumVecElts); |
4255 | MinMaxCost += |
4256 | getShuffleCost(TTI::SK_ExtractSubvector, Ty, None, NumVecElts, SubTy); |
4257 | Ty = SubTy; |
4258 | } else if (Size == 128) { |
4259 | |
4260 | VectorType *ShufTy; |
4261 | if (ValTy->isFloatingPointTy()) |
4262 | ShufTy = |
4263 | FixedVectorType::get(Type::getDoubleTy(ValTy->getContext()), 2); |
4264 | else |
4265 | ShufTy = FixedVectorType::get(Type::getInt64Ty(ValTy->getContext()), 2); |
4266 | MinMaxCost += |
4267 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
4268 | } else if (Size == 64) { |
4269 | |
4270 | FixedVectorType *ShufTy; |
4271 | if (ValTy->isFloatingPointTy()) |
4272 | ShufTy = FixedVectorType::get(Type::getFloatTy(ValTy->getContext()), 4); |
4273 | else |
4274 | ShufTy = FixedVectorType::get(Type::getInt32Ty(ValTy->getContext()), 4); |
4275 | MinMaxCost += |
4276 | getShuffleCost(TTI::SK_PermuteSingleSrc, ShufTy, None, 0, nullptr); |
4277 | } else { |
4278 | |
4279 | auto *ShiftTy = FixedVectorType::get( |
4280 | Type::getIntNTy(ValTy->getContext(), Size), 128 / Size); |
4281 | MinMaxCost += getArithmeticInstrCost( |
4282 | Instruction::LShr, ShiftTy, TTI::TCK_RecipThroughput, |
4283 | TargetTransformInfo::OK_AnyValue, |
4284 | TargetTransformInfo::OK_UniformConstantValue, |
4285 | TargetTransformInfo::OP_None, TargetTransformInfo::OP_None); |
4286 | } |
4287 | |
4288 | |
4289 | auto *SubCondTy = |
4290 | FixedVectorType::get(CondTy->getElementType(), Ty->getNumElements()); |
4291 | MinMaxCost += getMinMaxCost(Ty, SubCondTy, IsUnsigned); |
4292 | } |
4293 | |
4294 | |
4295 | return MinMaxCost + getVectorInstrCost(Instruction::ExtractElement, Ty, 0); |
4296 | } |
4297 | |
4298 | |
4299 | |
4300 | |
4301 | InstructionCost X86TTIImpl::getIntImmCost(int64_t Val) { |
4302 | if (Val == 0) |
4303 | return TTI::TCC_Free; |
4304 | |
4305 | if (isInt<32>(Val)) |
4306 | return TTI::TCC_Basic; |
4307 | |
4308 | return 2 * TTI::TCC_Basic; |
4309 | } |
4310 | |
4311 | InstructionCost X86TTIImpl::getIntImmCost(const APInt &Imm, Type *Ty, |
4312 | TTI::TargetCostKind CostKind) { |
4313 | assert(Ty->isIntegerTy()); |
4314 | |
4315 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4316 | if (BitSize == 0) |
4317 | return ~0U; |
4318 | |
4319 | |
4320 | |
4321 | |
4322 | |
4323 | if (BitSize > 128) |
4324 | return TTI::TCC_Free; |
4325 | |
4326 | if (Imm == 0) |
4327 | return TTI::TCC_Free; |
4328 | |
4329 | |
4330 | APInt ImmVal = Imm; |
4331 | if (BitSize % 64 != 0) |
4332 | ImmVal = Imm.sext(alignTo(BitSize, 64)); |
4333 | |
4334 | |
4335 | |
4336 | InstructionCost Cost = 0; |
4337 | for (unsigned ShiftVal = 0; ShiftVal < BitSize; ShiftVal += 64) { |
4338 | APInt Tmp = ImmVal.ashr(ShiftVal).sextOrTrunc(64); |
4339 | int64_t Val = Tmp.getSExtValue(); |
4340 | Cost += getIntImmCost(Val); |
4341 | } |
4342 | |
4343 | return std::max<InstructionCost>(1, Cost); |
4344 | } |
4345 | |
4346 | InstructionCost X86TTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx, |
4347 | const APInt &Imm, Type *Ty, |
4348 | TTI::TargetCostKind CostKind, |
4349 | Instruction *Inst) { |
4350 | assert(Ty->isIntegerTy()); |
4351 | |
4352 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4353 | |
4354 | |
4355 | if (BitSize == 0) |
4356 | return TTI::TCC_Free; |
4357 | |
4358 | unsigned ImmIdx = ~0U; |
4359 | switch (Opcode) { |
4360 | default: |
4361 | return TTI::TCC_Free; |
4362 | case Instruction::GetElementPtr: |
4363 | |
4364 | |
4365 | |
4366 | if (Idx == 0) |
4367 | return 2 * TTI::TCC_Basic; |
4368 | return TTI::TCC_Free; |
4369 | case Instruction::Store: |
4370 | ImmIdx = 0; |
4371 | break; |
4372 | case Instruction::ICmp: |
4373 | |
4374 | |
4375 | |
4376 | |
4377 | |
4378 | if (Idx == 1 && Imm.getBitWidth() == 64) { |
4379 | uint64_t ImmVal = Imm.getZExtValue(); |
4380 | if (ImmVal == 0x100000000ULL || ImmVal == 0xffffffff) |
4381 | return TTI::TCC_Free; |
4382 | } |
4383 | ImmIdx = 1; |
4384 | break; |
4385 | case Instruction::And: |
4386 | |
4387 | |
4388 | |
4389 | if (Idx == 1 && Imm.getBitWidth() == 64 && isUInt<32>(Imm.getZExtValue())) |
4390 | return TTI::TCC_Free; |
4391 | ImmIdx = 1; |
4392 | break; |
4393 | case Instruction::Add: |
4394 | case Instruction::Sub: |
4395 | |
4396 | if (Idx == 1 && Imm.getBitWidth() == 64 && Imm.getZExtValue() == 0x80000000) |
4397 | return TTI::TCC_Free; |
4398 | ImmIdx = 1; |
4399 | break; |
4400 | case Instruction::UDiv: |
4401 | case Instruction::SDiv: |
4402 | case Instruction::URem: |
4403 | case Instruction::SRem: |
4404 | |
4405 | |
4406 | |
4407 | return TTI::TCC_Free; |
4408 | case Instruction::Mul: |
4409 | case Instruction::Or: |
4410 | case Instruction::Xor: |
4411 | ImmIdx = 1; |
4412 | break; |
4413 | |
4414 | case Instruction::Shl: |
4415 | case Instruction::LShr: |
4416 | case Instruction::AShr: |
4417 | if (Idx == 1) |
4418 | return TTI::TCC_Free; |
4419 | break; |
4420 | case Instruction::Trunc: |
4421 | case Instruction::ZExt: |
4422 | case Instruction::SExt: |
4423 | case Instruction::IntToPtr: |
4424 | case Instruction::PtrToInt: |
4425 | case Instruction::BitCast: |
4426 | case Instruction::PHI: |
4427 | case Instruction::Call: |
4428 | case Instruction::Select: |
4429 | case Instruction::Ret: |
4430 | case Instruction::Load: |
4431 | break; |
4432 | } |
4433 | |
4434 | if (Idx == ImmIdx) { |
4435 | int NumConstants = divideCeil(BitSize, 64); |
4436 | InstructionCost Cost = X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); |
4437 | return (Cost <= NumConstants * TTI::TCC_Basic) |
4438 | ? static_cast<int>(TTI::TCC_Free) |
4439 | : Cost; |
4440 | } |
4441 | |
4442 | return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); |
4443 | } |
4444 | |
4445 | InstructionCost X86TTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, |
4446 | const APInt &Imm, Type *Ty, |
4447 | TTI::TargetCostKind CostKind) { |
4448 | assert(Ty->isIntegerTy()); |
4449 | |
4450 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
4451 | |
4452 | |
4453 | if (BitSize == 0) |
4454 | return TTI::TCC_Free; |
4455 | |
4456 | switch (IID) { |
4457 | default: |
4458 | return TTI::TCC_Free; |
4459 | case Intrinsic::sadd_with_overflow: |
4460 | case Intrinsic::uadd_with_overflow: |
4461 | case Intrinsic::ssub_with_overflow: |
4462 | case Intrinsic::usub_with_overflow: |
4463 | case Intrinsic::smul_with_overflow: |
4464 | case Intrinsic::umul_with_overflow: |
4465 | if ((Idx == 1) && Imm.getBitWidth() <= 64 && isInt<32>(Imm.getSExtValue())) |
4466 | return TTI::TCC_Free; |
4467 | break; |
4468 | case Intrinsic::experimental_stackmap: |
4469 | if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) |
4470 | return TTI::TCC_Free; |
4471 | break; |
4472 | case Intrinsic::experimental_patchpoint_void: |
4473 | case Intrinsic::experimental_patchpoint_i64: |
4474 | if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) |
4475 | return TTI::TCC_Free; |
4476 | break; |
4477 | } |
4478 | return X86TTIImpl::getIntImmCost(Imm, Ty, CostKind); |
4479 | } |
4480 | |
4481 | InstructionCost X86TTIImpl::getCFInstrCost(unsigned Opcode, |
4482 | TTI::TargetCostKind CostKind, |
4483 | const Instruction *I) { |
4484 | if (CostKind != TTI::TCK_RecipThroughput) |
4485 | return Opcode == Instruction::PHI ? 0 : 1; |
4486 | |
4487 | return 0; |
4488 | } |
4489 | |
4490 | int X86TTIImpl::getGatherOverhead() const { |
4491 | |
4492 | |
4493 | |
4494 | |
4495 | |
4496 | |
4497 | if (ST->hasAVX512() || (ST->hasAVX2() && ST->hasFastGather())) |
4498 | return 2; |
4499 | |
4500 | return 1024; |
4501 | } |
4502 | |
4503 | int X86TTIImpl::getScatterOverhead() const { |
4504 | if (ST->hasAVX512()) |
4505 | return 2; |
4506 | |
4507 | return 1024; |
4508 | } |
4509 | |
4510 | |
4511 | |
4512 | InstructionCost X86TTIImpl::getGSVectorCost(unsigned Opcode, Type *SrcVTy, |
4513 | const Value *Ptr, Align Alignment, |
4514 | unsigned AddressSpace) { |
4515 | |
4516 | assert(isa<VectorType>(SrcVTy) && "Unexpected type in getGSVectorCost"); |
4517 | unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements(); |
4518 | |
4519 | |
4520 | |
4521 | |
4522 | |
4523 | |
4524 | auto getIndexSizeInBits = [](const Value *Ptr, const DataLayout &DL) { |
4525 | unsigned IndexSize = DL.getPointerSizeInBits(); |
4526 | const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); |
4527 | if (IndexSize < 64 || !GEP) |
4528 | return IndexSize; |
4529 | |
4530 | unsigned NumOfVarIndices = 0; |
4531 | const Value *Ptrs = GEP->getPointerOperand(); |
4532 | if (Ptrs->getType()->isVectorTy() && !getSplatValue(Ptrs)) |
4533 | return IndexSize; |
4534 | for (unsigned i = 1; i < GEP->getNumOperands(); ++i) { |
4535 | if (isa<Constant>(GEP->getOperand(i))) |
4536 | continue; |
4537 | Type *IndxTy = GEP->getOperand(i)->getType(); |
4538 | if (auto *IndexVTy = dyn_cast<VectorType>(IndxTy)) |
4539 | IndxTy = IndexVTy->getElementType(); |
4540 | if ((IndxTy->getPrimitiveSizeInBits() == 64 && |
4541 | !isa<SExtInst>(GEP->getOperand(i))) || |
4542 | ++NumOfVarIndices > 1) |
4543 | return IndexSize; |
4544 | } |
4545 | return (unsigned)32; |
4546 | }; |
4547 | |
4548 | |
4549 | |
4550 | unsigned IndexSize = (ST->hasAVX512() && VF >= 16) |
4551 | ? getIndexSizeInBits(Ptr, DL) |
4552 | : DL.getPointerSizeInBits(); |
4553 | |
4554 | auto *IndexVTy = FixedVectorType::get( |
4555 | IntegerType::get(SrcVTy->getContext(), IndexSize), VF); |
4556 | std::pair<InstructionCost, MVT> IdxsLT = |
4557 | TLI->getTypeLegalizationCost(DL, IndexVTy); |
4558 | std::pair<InstructionCost, MVT> SrcLT = |
4559 | TLI->getTypeLegalizationCost(DL, SrcVTy); |
4560 | InstructionCost::CostType SplitFactor = |
4561 | *std::max(IdxsLT.first, SrcLT.first).getValue(); |
4562 | if (SplitFactor > 1) { |
4563 | |
4564 | auto *SplitSrcTy = |
4565 | FixedVectorType::get(SrcVTy->getScalarType(), VF / SplitFactor); |
4566 | return SplitFactor * getGSVectorCost(Opcode, SplitSrcTy, Ptr, Alignment, |
4567 | AddressSpace); |
4568 | } |
4569 | |
4570 | |
4571 | |
4572 | const int GSOverhead = (Opcode == Instruction::Load) |
4573 | ? getGatherOverhead() |
4574 | : getScatterOverhead(); |
4575 | return GSOverhead + VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), |
4576 | MaybeAlign(Alignment), AddressSpace, |
4577 | TTI::TCK_RecipThroughput); |
4578 | } |
4579 | |
4580 | |
4581 | |
4582 | |
4583 | |
4584 | |
4585 | |
4586 | |
4587 | |
4588 | |
4589 | InstructionCost X86TTIImpl::getGSScalarCost(unsigned Opcode, Type *SrcVTy, |
4590 | bool VariableMask, Align Alignment, |
4591 | unsigned AddressSpace) { |
4592 | unsigned VF = cast<FixedVectorType>(SrcVTy)->getNumElements(); |
| 8 | | 'SrcVTy' is a 'FixedVectorType' | |
|
4593 | APInt DemandedElts = APInt::getAllOnesValue(VF); |
4594 | TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; |
4595 | |
4596 | InstructionCost MaskUnpackCost = 0; |
4597 | if (VariableMask) { |
| 9 | | Assuming 'VariableMask' is true | |
|
| |
4598 | auto *MaskTy = |
4599 | FixedVectorType::get(Type::getInt1Ty(SrcVTy->getContext()), VF); |
4600 | MaskUnpackCost = |
4601 | getScalarizationOverhead(MaskTy, DemandedElts, false, true); |
4602 | InstructionCost ScalarCompareCost = getCmpSelInstrCost( |
| 12 | | Calling 'X86TTIImpl::getCmpSelInstrCost' | |
|
4603 | Instruction::ICmp, Type::getInt1Ty(SrcVTy->getContext()), nullptr, |
| 11 | | Passing null pointer value via 3rd parameter 'CondTy' | |
|
4604 | CmpInst::BAD_ICMP_PREDICATE, CostKind); |
4605 | InstructionCost BranchCost = getCFInstrCost(Instruction::Br, CostKind); |
4606 | MaskUnpackCost += VF * (BranchCost + ScalarCompareCost); |
4607 | } |
4608 | |
4609 | |
4610 | InstructionCost MemoryOpCost = |
4611 | VF * getMemoryOpCost(Opcode, SrcVTy->getScalarType(), |
4612 | MaybeAlign(Alignment), AddressSpace, CostKind); |
4613 | |
4614 | InstructionCost InsertExtractCost = 0; |
4615 | if (Opcode == Instruction::Load) |
4616 | for (unsigned i = 0; i < VF; ++i) |
4617 | |
4618 | InsertExtractCost += |
4619 | getVectorInstrCost(Instruction::InsertElement, SrcVTy, i); |
4620 | else |
4621 | for (unsigned i = 0; i < VF; ++i) |
4622 | |
4623 | InsertExtractCost += |
4624 | getVectorInstrCost(Instruction::ExtractElement, SrcVTy, i); |
4625 | |
4626 | return MemoryOpCost + MaskUnpackCost + InsertExtractCost; |
4627 | } |
4628 | |
4629 | |
4630 | InstructionCost X86TTIImpl::getGatherScatterOpCost( |
4631 | unsigned Opcode, Type *SrcVTy, const Value *Ptr, bool VariableMask, |
4632 | Align Alignment, TTI::TargetCostKind CostKind, |
4633 | const Instruction *I = nullptr) { |
4634 | if (CostKind != TTI::TCK_RecipThroughput) { |
| 1 | Assuming 'CostKind' is equal to TCK_RecipThroughput | |
|
| |
4635 | if ((Opcode == Instruction::Load && |
4636 | isLegalMaskedGather(SrcVTy, Align(Alignment))) || |
4637 | (Opcode == Instruction::Store && |
4638 | isLegalMaskedScatter(SrcVTy, Align(Alignment)))) |
4639 | return 1; |
4640 | return BaseT::getGatherScatterOpCost(Opcode, SrcVTy, Ptr, VariableMask, |
4641 | Alignment, CostKind, I); |
4642 | } |
4643 | |
4644 | assert(SrcVTy->isVectorTy() && "Unexpected data type for Gather/Scatter"); |
4645 | PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType()); |
| 3 | | Assuming the object is a 'PointerType' | |
|
4646 | if (!PtrTy && Ptr->getType()->isVectorTy()) |
4647 | PtrTy = dyn_cast<PointerType>( |
4648 | cast<VectorType>(Ptr->getType())->getElementType()); |
4649 | assert(PtrTy && "Unexpected type for Ptr argument"); |
4650 | unsigned AddressSpace = PtrTy->getAddressSpace(); |
4651 | |
4652 | if ((Opcode == Instruction::Load && |
| 4 | | Assuming 'Opcode' is not equal to Load | |
|
| |
4653 | !isLegalMaskedGather(SrcVTy, Align(Alignment))) || |
4654 | (Opcode == Instruction::Store && |
| 5 | | Assuming 'Opcode' is equal to Store | |
|
4655 | !isLegalMaskedScatter(SrcVTy, Align(Alignment)))) |
4656 | return getGSScalarCost(Opcode, SrcVTy, VariableMask, Alignment, |
| 7 | | Calling 'X86TTIImpl::getGSScalarCost' | |
|
4657 | AddressSpace); |
4658 | |
4659 | return getGSVectorCost(Opcode, SrcVTy, Ptr, Alignment, AddressSpace); |
4660 | } |
4661 | |
4662 | bool X86TTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1, |
4663 | TargetTransformInfo::LSRCost &C2) { |
4664 | |
4665 | return std::tie(C1.Insns, C1.NumRegs, C1.AddRecCost, |
4666 | C1.NumIVMuls, C1.NumBaseAdds, |
4667 | C1.ScaleCost, C1.ImmCost, C1.SetupCost) < |
4668 | std::tie(C2.Insns, C2.NumRegs, C2.AddRecCost, |
4669 | C2.NumIVMuls, C2.NumBaseAdds, |
4670 | C2.ScaleCost, C2.ImmCost, C2.SetupCost); |
4671 | } |
4672 | |
4673 | bool X86TTIImpl::canMacroFuseCmp() { |
4674 | return ST->hasMacroFusion() || ST->hasBranchFusion(); |
4675 | } |
4676 | |
4677 | bool X86TTIImpl::isLegalMaskedLoad(Type *DataTy, Align Alignment) { |
4678 | if (!ST->hasAVX()) |
4679 | return false; |
4680 | |
4681 | |
4682 | if (isa<VectorType>(DataTy) && |
4683 | cast<FixedVectorType>(DataTy)->getNumElements() == 1) |
4684 | return false; |
4685 | Type *ScalarTy = DataTy->getScalarType(); |
4686 | |
4687 | if (ScalarTy->isPointerTy()) |
4688 | return true; |
4689 | |
4690 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) |
4691 | return true; |
4692 | |
4693 | if (!ScalarTy->isIntegerTy()) |
4694 | return false; |
4695 | |
4696 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); |
4697 | return IntWidth == 32 || IntWidth == 64 || |
4698 | ((IntWidth == 8 || IntWidth == 16) && ST->hasBWI()); |
4699 | } |
4700 | |
4701 | bool X86TTIImpl::isLegalMaskedStore(Type *DataType, Align Alignment) { |
4702 | return isLegalMaskedLoad(DataType, Alignment); |
4703 | } |
4704 | |
4705 | bool X86TTIImpl::isLegalNTLoad(Type *DataType, Align Alignment) { |
4706 | unsigned DataSize = DL.getTypeStoreSize(DataType); |
4707 | |
4708 | |
4709 | |
4710 | if (Alignment >= DataSize && (DataSize == 16 || DataSize == 32)) |
4711 | return DataSize == 16 ? ST->hasSSE1() : ST->hasAVX2(); |
4712 | |
4713 | return false; |
4714 | } |
4715 | |
4716 | bool X86TTIImpl::isLegalNTStore(Type *DataType, Align Alignment) { |
4717 | unsigned DataSize = DL.getTypeStoreSize(DataType); |
4718 | |
4719 | |
4720 | |
4721 | if (ST->hasSSE4A() && (DataType->isFloatTy() || DataType->isDoubleTy())) |
4722 | return true; |
4723 | |
4724 | |
4725 | |
4726 | |
4727 | if (Alignment < DataSize || DataSize < 4 || DataSize > 32 || |
4728 | !isPowerOf2_32(DataSize)) |
4729 | return false; |
4730 | |
4731 | |
4732 | |
4733 | if (DataSize == 32) |
4734 | return ST->hasAVX(); |
4735 | else if (DataSize == 16) |
4736 | return ST->hasSSE1(); |
4737 | return true; |
4738 | } |
4739 | |
4740 | bool X86TTIImpl::isLegalMaskedExpandLoad(Type *DataTy) { |
4741 | if (!isa<VectorType>(DataTy)) |
4742 | return false; |
4743 | |
4744 | if (!ST->hasAVX512()) |
4745 | return false; |
4746 | |
4747 | |
4748 | if (cast<FixedVectorType>(DataTy)->getNumElements() == 1) |
4749 | return false; |
4750 | |
4751 | Type *ScalarTy = cast<VectorType>(DataTy)->getElementType(); |
4752 | |
4753 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) |
4754 | return true; |
4755 | |
4756 | if (!ScalarTy->isIntegerTy()) |
4757 | return false; |
4758 | |
4759 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); |
4760 | return IntWidth == 32 || IntWidth == 64 || |
4761 | ((IntWidth == 8 || IntWidth == 16) && ST->hasVBMI2()); |
4762 | } |
4763 | |
4764 | bool X86TTIImpl::isLegalMaskedCompressStore(Type *DataTy) { |
4765 | return isLegalMaskedExpandLoad(DataTy); |
4766 | } |
4767 | |
4768 | bool X86TTIImpl::isLegalMaskedGather(Type *DataTy, Align Alignment) { |
4769 | |
4770 | |
4771 | |
4772 | if (!(ST->hasAVX512() || (ST->hasFastGather() && ST->hasAVX2()))) |
4773 | return false; |
4774 | |
4775 | |
4776 | |
4777 | |
4778 | |
4779 | |
4780 | |
4781 | |
4782 | |
4783 | |
4784 | |
4785 | |
4786 | |
4787 | if (auto *DataVTy = dyn_cast<FixedVectorType>(DataTy)) { |
4788 | unsigned NumElts = DataVTy->getNumElements(); |
4789 | if (NumElts == 1) |
4790 | return false; |
4791 | |
4792 | |
4793 | |
4794 | |
4795 | |
4796 | |
4797 | if (ST->hasAVX512() && (NumElts == 2 || (NumElts == 4 && !ST->hasVLX()))) |
4798 | return false; |
4799 | } |
4800 | Type *ScalarTy = DataTy->getScalarType(); |
4801 | if (ScalarTy->isPointerTy()) |
4802 | return true; |
4803 | |
4804 | if (ScalarTy->isFloatTy() || ScalarTy->isDoubleTy()) |
4805 | return true; |
4806 | |
4807 | if (!ScalarTy->isIntegerTy()) |
4808 | return false; |
4809 | |
4810 | unsigned IntWidth = ScalarTy->getIntegerBitWidth(); |
4811 | return IntWidth == 32 || IntWidth == 64; |
4812 | } |
4813 | |
4814 | bool X86TTIImpl::isLegalMaskedScatter(Type *DataType, Align Alignment) { |
4815 | |
4816 | if (!ST->hasAVX512()) |
4817 | return false; |
4818 | return isLegalMaskedGather(DataType, Alignment); |
4819 | } |
4820 | |
4821 | bool X86TTIImpl::hasDivRemOp(Type *DataType, bool IsSigned) { |
4822 | EVT VT = TLI->getValueType(DL, DataType); |
4823 | return TLI->isOperationLegal(IsSigned ? ISD::SDIVREM : ISD::UDIVREM, VT); |
4824 | } |
4825 | |
4826 | bool X86TTIImpl::isFCmpOrdCheaperThanFCmpZero(Type *Ty) { |
4827 | return false; |
4828 | } |
4829 | |
4830 | bool X86TTIImpl::areInlineCompatible(const Function *Caller, |
4831 | const Function *Callee) const { |
4832 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
4833 | |
4834 | |
4835 | const FeatureBitset &CallerBits = |
4836 | TM.getSubtargetImpl(*Caller)->getFeatureBits(); |
4837 | const FeatureBitset &CalleeBits = |
4838 | TM.getSubtargetImpl(*Callee)->getFeatureBits(); |
4839 | |
4840 | FeatureBitset RealCallerBits = CallerBits & ~InlineFeatureIgnoreList; |
4841 | FeatureBitset RealCalleeBits = CalleeBits & ~InlineFeatureIgnoreList; |
4842 | return (RealCallerBits & RealCalleeBits) == RealCalleeBits; |
4843 | } |
4844 | |
4845 | bool X86TTIImpl::areFunctionArgsABICompatible( |
4846 | const Function *Caller, const Function *Callee, |
4847 | SmallPtrSetImpl<Argument *> &Args) const { |
4848 | if (!BaseT::areFunctionArgsABICompatible(Caller, Callee, Args)) |
4849 | return false; |
4850 | |
4851 | |
4852 | |
4853 | |
4854 | const TargetMachine &TM = getTLI()->getTargetMachine(); |
4855 | |
4856 | if (TM.getSubtarget<X86Subtarget>(*Caller).useAVX512Regs() == |
4857 | TM.getSubtarget<X86Subtarget>(*Callee).useAVX512Regs()) |
4858 | return true; |
4859 | |
4860 | |
4861 | |
4862 | |
4863 | |
4864 | |
4865 | |
4866 | return llvm::none_of(Args, [](Argument *A) { |
4867 | auto *EltTy = cast<PointerType>(A->getType())->getElementType(); |
4868 | return EltTy->isVectorTy() || EltTy->isAggregateType(); |
4869 | }); |
4870 | } |
4871 | |
4872 | X86TTIImpl::TTI::MemCmpExpansionOptions |
4873 | X86TTIImpl::enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const { |
4874 | TTI::MemCmpExpansionOptions Options; |
4875 | Options.MaxNumLoads = TLI->getMaxExpandSizeMemcmp(OptSize); |
4876 | Options.NumLoadsPerBlock = 2; |
4877 | |
4878 | Options.AllowOverlappingLoads = true; |
4879 | if (IsZeroCmp) { |
4880 | |
4881 | |
4882 | const unsigned PreferredWidth = ST->getPreferVectorWidth(); |
4883 | if (PreferredWidth >= 512 && ST->hasAVX512()) Options.LoadSizes.push_back(64); |
4884 | if (PreferredWidth >= 256 && ST->hasAVX()) Options.LoadSizes.push_back(32); |
4885 | if (PreferredWidth >= 128 && ST->hasSSE2()) Options.LoadSizes.push_back(16); |
4886 | } |
4887 | if (ST->is64Bit()) { |
4888 | Options.LoadSizes.push_back(8); |
4889 | } |
4890 | Options.LoadSizes.push_back(4); |
4891 | Options.LoadSizes.push_back(2); |
4892 | Options.LoadSizes.push_back(1); |
4893 | return Options; |
4894 | } |
4895 | |
4896 | bool X86TTIImpl::enableInterleavedAccessVectorization() { |
4897 | |
4898 | |
4899 | |
4900 | return !(ST->isAtom()); |
4901 | } |
4902 | |
4903 | |
4904 | |
4905 | |
4906 | |
4907 | |
4908 | |
4909 | |
4910 | |
4911 | |
4912 | |
4913 | |
4914 | |
4915 | InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX2( |
4916 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
4917 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
4918 | TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { |
4919 | |
4920 | if (UseMaskForCond || UseMaskForGaps) |
4921 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4922 | Alignment, AddressSpace, CostKind, |
4923 | UseMaskForCond, UseMaskForGaps); |
4924 | |
4925 | |
4926 | |
4927 | if (Indices.size() && Indices.size() != Factor) |
4928 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4929 | Alignment, AddressSpace, CostKind); |
4930 | |
4931 | |
4932 | |
4933 | |
4934 | MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
4935 | |
4936 | |
4937 | |
4938 | |
4939 | if (!LegalVT.isVector()) |
4940 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4941 | Alignment, AddressSpace, CostKind); |
4942 | |
4943 | unsigned VF = VecTy->getNumElements() / Factor; |
4944 | Type *ScalarTy = VecTy->getElementType(); |
4945 | |
4946 | if (!ScalarTy->isIntegerTy()) |
4947 | ScalarTy = |
4948 | Type::getIntNTy(ScalarTy->getContext(), DL.getTypeSizeInBits(ScalarTy)); |
4949 | |
4950 | |
4951 | InstructionCost MemOpCosts = getMemoryOpCost( |
4952 | Opcode, VecTy, MaybeAlign(Alignment), AddressSpace, CostKind); |
4953 | |
4954 | auto *VT = FixedVectorType::get(ScalarTy, VF); |
4955 | EVT ETy = TLI->getValueType(DL, VT); |
4956 | if (!ETy.isSimple()) |
4957 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
4958 | Alignment, AddressSpace, CostKind); |
4959 | |
4960 | |
4961 | |
4962 | |
4963 | |
4964 | |
4965 | |
4966 | |
4967 | static const CostTblEntry AVX2InterleavedLoadTbl[] = { |
4968 | {2, MVT::v4i64, 6}, |
4969 | |
4970 | {3, MVT::v2i8, 10}, |
4971 | {3, MVT::v4i8, 4}, |
4972 | {3, MVT::v8i8, 9}, |
4973 | {3, MVT::v16i8, 11}, |
4974 | {3, MVT::v32i8, 13}, |
4975 | |
4976 | {3, MVT::v8i32, 17}, |
4977 | |
4978 | {4, MVT::v2i8, 12}, |
4979 | {4, MVT::v4i8, 4}, |
4980 | {4, MVT::v8i8, 20}, |
4981 | {4, MVT::v16i8, 39}, |
4982 | {4, MVT::v32i8, 80}, |
4983 | |
4984 | {8, MVT::v8i32, 40} |
4985 | }; |
4986 | |
4987 | static const CostTblEntry AVX2InterleavedStoreTbl[] = { |
4988 | {2, MVT::v4i64, 6}, |
4989 | |
4990 | {3, MVT::v2i8, 7}, |
4991 | {3, MVT::v4i8, 8}, |
4992 | {3, MVT::v8i8, 11}, |
4993 | {3, MVT::v16i8, 11}, |
4994 | {3, MVT::v32i8, 13}, |
4995 | |
4996 | {4, MVT::v2i8, 12}, |
4997 | {4, MVT::v4i8, 9}, |
4998 | {4, MVT::v8i8, 10}, |
4999 | {4, MVT::v16i8, 10}, |
5000 | {4, MVT::v32i8, 12} |
5001 | }; |
5002 | |
5003 | if (Opcode == Instruction::Load) { |
5004 | if (const auto *Entry = |
5005 | CostTableLookup(AVX2InterleavedLoadTbl, Factor, ETy.getSimpleVT())) |
5006 | return MemOpCosts + Entry->Cost; |
5007 | } else { |
5008 | assert(Opcode == Instruction::Store && |
5009 | "Expected Store Instruction at this point"); |
5010 | if (const auto *Entry = |
5011 | CostTableLookup(AVX2InterleavedStoreTbl, Factor, ETy.getSimpleVT())) |
5012 | return MemOpCosts + Entry->Cost; |
5013 | } |
5014 | |
5015 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5016 | Alignment, AddressSpace, CostKind); |
5017 | } |
5018 | |
5019 | |
5020 | |
5021 | |
5022 | |
5023 | InstructionCost X86TTIImpl::getInterleavedMemoryOpCostAVX512( |
5024 | unsigned Opcode, FixedVectorType *VecTy, unsigned Factor, |
5025 | ArrayRef<unsigned> Indices, Align Alignment, unsigned AddressSpace, |
5026 | TTI::TargetCostKind CostKind, bool UseMaskForCond, bool UseMaskForGaps) { |
5027 | |
5028 | if (UseMaskForCond || UseMaskForGaps) |
5029 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5030 | Alignment, AddressSpace, CostKind, |
5031 | UseMaskForCond, UseMaskForGaps); |
5032 | |
5033 | |
5034 | |
5035 | |
5036 | |
5037 | |
5038 | |
5039 | MVT LegalVT = getTLI()->getTypeLegalizationCost(DL, VecTy).second; |
5040 | unsigned VecTySize = DL.getTypeStoreSize(VecTy); |
5041 | unsigned LegalVTSize = LegalVT.getStoreSize(); |
5042 | unsigned NumOfMemOps = (VecTySize + LegalVTSize - 1) / LegalVTSize; |
5043 | |
5044 | |
5045 | auto *SingleMemOpTy = FixedVectorType::get(VecTy->getElementType(), |
5046 | LegalVT.getVectorNumElements()); |
5047 | InstructionCost MemOpCost = getMemoryOpCost( |
5048 | Opcode, SingleMemOpTy, MaybeAlign(Alignment), AddressSpace, CostKind); |
5049 | |
5050 | unsigned VF = VecTy->getNumElements() / Factor; |
5051 | MVT VT = MVT::getVectorVT(MVT::getVT(VecTy->getScalarType()), VF); |
5052 | |
5053 | if (Opcode == Instruction::Load) { |
5054 | |
5055 | |
5056 | |
5057 | |
5058 | |
5059 | |
5060 | static const CostTblEntry AVX512InterleavedLoadTbl[] = { |
5061 | {3, MVT::v16i8, 12}, |
5062 | {3, MVT::v32i8, 14}, |
5063 | {3, MVT::v64i8, 22}, |
5064 | }; |
5065 | |
5066 | if (const auto *Entry = |
5067 | CostTableLookup(AVX512InterleavedLoadTbl, Factor, VT)) |
5068 | return NumOfMemOps * MemOpCost + Entry->Cost; |
5069 | |
5070 | |
5071 | |
5072 | |
5073 | |
5074 | TTI::ShuffleKind ShuffleKind = |
5075 | (NumOfMemOps > 1) ? TTI::SK_PermuteTwoSrc : TTI::SK_PermuteSingleSrc; |
5076 | |
5077 | InstructionCost ShuffleCost = |
5078 | getShuffleCost(ShuffleKind, SingleMemOpTy, None, 0, nullptr); |
5079 | |
5080 | unsigned NumOfLoadsInInterleaveGrp = |
5081 | Indices.size() ? Indices.size() : Factor; |
5082 | auto *ResultTy = FixedVectorType::get(VecTy->getElementType(), |
5083 | VecTy->getNumElements() / Factor); |
5084 | InstructionCost NumOfResults = |
5085 | getTLI()->getTypeLegalizationCost(DL, ResultTy).first * |
5086 | NumOfLoadsInInterleaveGrp; |
5087 | |
5088 | |
5089 | |
5090 | unsigned NumOfUnfoldedLoads = |
5091 | NumOfResults > 1 ? NumOfMemOps : NumOfMemOps / 2; |
5092 | |
5093 | |
5094 | unsigned NumOfShufflesPerResult = |
5095 | std::max((unsigned)1, (unsigned)(NumOfMemOps - 1)); |
5096 | |
5097 | |
5098 | |
5099 | |
5100 | InstructionCost NumOfMoves = 0; |
5101 | if (NumOfResults > 1 && ShuffleKind == TTI::SK_PermuteTwoSrc) |
5102 | NumOfMoves = NumOfResults * NumOfShufflesPerResult / 2; |
5103 | |
5104 | InstructionCost Cost = NumOfResults * NumOfShufflesPerResult * ShuffleCost + |
5105 | NumOfUnfoldedLoads * MemOpCost + NumOfMoves; |
5106 | |
5107 | return Cost; |
5108 | } |
5109 | |
5110 | |
5111 | assert(Opcode == Instruction::Store && |
5112 | "Expected Store Instruction at this point"); |
5113 | |
5114 | static const CostTblEntry AVX512InterleavedStoreTbl[] = { |
5115 | {3, MVT::v16i8, 12}, |
5116 | {3, MVT::v32i8, 14}, |
5117 | {3, MVT::v64i8, 26}, |
5118 | |
5119 | {4, MVT::v8i8, 10}, |
5120 | {4, MVT::v16i8, 11}, |
5121 | {4, MVT::v32i8, 14}, |
5122 | {4, MVT::v64i8, 24} |
5123 | }; |
5124 | |
5125 | if (const auto *Entry = |
5126 | CostTableLookup(AVX512InterleavedStoreTbl, Factor, VT)) |
5127 | return NumOfMemOps * MemOpCost + Entry->Cost; |
5128 | |
5129 | |
5130 | |
5131 | |
5132 | unsigned NumOfSources = Factor; |
5133 | InstructionCost ShuffleCost = |
5134 | getShuffleCost(TTI::SK_PermuteTwoSrc, SingleMemOpTy, None, 0, nullptr); |
5135 | unsigned NumOfShufflesPerStore = NumOfSources - 1; |
5136 | |
5137 | |
5138 | |
5139 | unsigned NumOfMoves = NumOfMemOps * NumOfShufflesPerStore / 2; |
5140 | InstructionCost Cost = |
5141 | NumOfMemOps * (MemOpCost + NumOfShufflesPerStore * ShuffleCost) + |
5142 | NumOfMoves; |
5143 | return Cost; |
5144 | } |
5145 | |
5146 | InstructionCost X86TTIImpl::getInterleavedMemoryOpCost( |
5147 | unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, |
5148 | Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, |
5149 | bool UseMaskForCond, bool UseMaskForGaps) { |
5150 | auto isSupportedOnAVX512 = [](Type *VecTy, bool HasBW) { |
5151 | Type *EltTy = cast<VectorType>(VecTy)->getElementType(); |
5152 | if (EltTy->isFloatTy() || EltTy->isDoubleTy() || EltTy->isIntegerTy(64) || |
5153 | EltTy->isIntegerTy(32) || EltTy->isPointerTy()) |
5154 | return true; |
5155 | if (EltTy->isIntegerTy(16) || EltTy->isIntegerTy(8)) |
5156 | return HasBW; |
5157 | return false; |
5158 | }; |
5159 | if (ST->hasAVX512() && isSupportedOnAVX512(VecTy, ST->hasBWI())) |
5160 | return getInterleavedMemoryOpCostAVX512( |
5161 | Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment, |
5162 | AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); |
5163 | if (ST->hasAVX2()) |
5164 | return getInterleavedMemoryOpCostAVX2( |
5165 | Opcode, cast<FixedVectorType>(VecTy), Factor, Indices, Alignment, |
5166 | AddressSpace, CostKind, UseMaskForCond, UseMaskForGaps); |
5167 | |
5168 | return BaseT::getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices, |
5169 | Alignment, AddressSpace, CostKind, |
5170 | UseMaskForCond, UseMaskForGaps); |
5171 | } |