Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/CodeGen/SelectionDAGNodes.h
Warning:line 1114, column 10
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelLowering.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelLowering.cpp

1//===-- X86ISelLowering.cpp - X86 DAG Lowering Implementation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that X86 uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86ISelLowering.h"
15#include "MCTargetDesc/X86ShuffleDecode.h"
16#include "X86.h"
17#include "X86CallingConv.h"
18#include "X86FrameLowering.h"
19#include "X86InstrBuilder.h"
20#include "X86IntrinsicsInfo.h"
21#include "X86MachineFunctionInfo.h"
22#include "X86TargetMachine.h"
23#include "X86TargetObjectFile.h"
24#include "llvm/ADT/SmallBitVector.h"
25#include "llvm/ADT/SmallSet.h"
26#include "llvm/ADT/Statistic.h"
27#include "llvm/ADT/StringExtras.h"
28#include "llvm/ADT/StringSwitch.h"
29#include "llvm/Analysis/BlockFrequencyInfo.h"
30#include "llvm/Analysis/EHPersonalities.h"
31#include "llvm/Analysis/ObjCARCUtil.h"
32#include "llvm/Analysis/ProfileSummaryInfo.h"
33#include "llvm/Analysis/VectorUtils.h"
34#include "llvm/CodeGen/IntrinsicLowering.h"
35#include "llvm/CodeGen/MachineFrameInfo.h"
36#include "llvm/CodeGen/MachineFunction.h"
37#include "llvm/CodeGen/MachineInstrBuilder.h"
38#include "llvm/CodeGen/MachineJumpTableInfo.h"
39#include "llvm/CodeGen/MachineLoopInfo.h"
40#include "llvm/CodeGen/MachineModuleInfo.h"
41#include "llvm/CodeGen/MachineRegisterInfo.h"
42#include "llvm/CodeGen/TargetLowering.h"
43#include "llvm/CodeGen/WinEHFuncInfo.h"
44#include "llvm/IR/CallingConv.h"
45#include "llvm/IR/Constants.h"
46#include "llvm/IR/DerivedTypes.h"
47#include "llvm/IR/DiagnosticInfo.h"
48#include "llvm/IR/Function.h"
49#include "llvm/IR/GlobalAlias.h"
50#include "llvm/IR/GlobalVariable.h"
51#include "llvm/IR/Instructions.h"
52#include "llvm/IR/Intrinsics.h"
53#include "llvm/IR/IRBuilder.h"
54#include "llvm/MC/MCAsmInfo.h"
55#include "llvm/MC/MCContext.h"
56#include "llvm/MC/MCExpr.h"
57#include "llvm/MC/MCSymbol.h"
58#include "llvm/Support/CommandLine.h"
59#include "llvm/Support/Debug.h"
60#include "llvm/Support/ErrorHandling.h"
61#include "llvm/Support/KnownBits.h"
62#include "llvm/Support/MathExtras.h"
63#include "llvm/Target/TargetOptions.h"
64#include <algorithm>
65#include <bitset>
66#include <cctype>
67#include <numeric>
68using namespace llvm;
69
70#define DEBUG_TYPE"x86-isel" "x86-isel"
71
72STATISTIC(NumTailCalls, "Number of tail calls")static llvm::Statistic NumTailCalls = {"x86-isel", "NumTailCalls"
, "Number of tail calls"}
;
73
74static cl::opt<int> ExperimentalPrefLoopAlignment(
75 "x86-experimental-pref-loop-alignment", cl::init(4),
76 cl::desc(
77 "Sets the preferable loop alignment for experiments (as log2 bytes)"
78 "(the last x86-experimental-pref-loop-alignment bits"
79 " of the loop header PC will be 0)."),
80 cl::Hidden);
81
82static cl::opt<int> ExperimentalPrefInnermostLoopAlignment(
83 "x86-experimental-pref-innermost-loop-alignment", cl::init(4),
84 cl::desc(
85 "Sets the preferable loop alignment for experiments (as log2 bytes) "
86 "for innermost loops only. If specified, this option overrides "
87 "alignment set by x86-experimental-pref-loop-alignment."),
88 cl::Hidden);
89
90static cl::opt<bool> MulConstantOptimization(
91 "mul-constant-optimization", cl::init(true),
92 cl::desc("Replace 'mul x, Const' with more effective instructions like "
93 "SHIFT, LEA, etc."),
94 cl::Hidden);
95
96static cl::opt<bool> ExperimentalUnorderedISEL(
97 "x86-experimental-unordered-atomic-isel", cl::init(false),
98 cl::desc("Use LoadSDNode and StoreSDNode instead of "
99 "AtomicSDNode for unordered atomic loads and "
100 "stores respectively."),
101 cl::Hidden);
102
103/// Call this when the user attempts to do something unsupported, like
104/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
105/// report_fatal_error, so calling code should attempt to recover without
106/// crashing.
107static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
108 const char *Msg) {
109 MachineFunction &MF = DAG.getMachineFunction();
110 DAG.getContext()->diagnose(
111 DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc()));
112}
113
114X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
115 const X86Subtarget &STI)
116 : TargetLowering(TM), Subtarget(STI) {
117 bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87();
118 X86ScalarSSEf64 = Subtarget.hasSSE2();
119 X86ScalarSSEf32 = Subtarget.hasSSE1();
120 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
121
122 // Set up the TargetLowering object.
123
124 // X86 is weird. It always uses i8 for shift amounts and setcc results.
125 setBooleanContents(ZeroOrOneBooleanContent);
126 // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
127 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
128
129 // For 64-bit, since we have so many registers, use the ILP scheduler.
130 // For 32-bit, use the register pressure specific scheduling.
131 // For Atom, always use ILP scheduling.
132 if (Subtarget.isAtom())
133 setSchedulingPreference(Sched::ILP);
134 else if (Subtarget.is64Bit())
135 setSchedulingPreference(Sched::ILP);
136 else
137 setSchedulingPreference(Sched::RegPressure);
138 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
139 setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister());
140
141 // Bypass expensive divides and use cheaper ones.
142 if (TM.getOptLevel() >= CodeGenOpt::Default) {
143 if (Subtarget.hasSlowDivide32())
144 addBypassSlowDiv(32, 8);
145 if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit())
146 addBypassSlowDiv(64, 32);
147 }
148
149 // Setup Windows compiler runtime calls.
150 if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
151 static const struct {
152 const RTLIB::Libcall Op;
153 const char * const Name;
154 const CallingConv::ID CC;
155 } LibraryCalls[] = {
156 { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
157 { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
158 { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
159 { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
160 { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
161 };
162
163 for (const auto &LC : LibraryCalls) {
164 setLibcallName(LC.Op, LC.Name);
165 setLibcallCallingConv(LC.Op, LC.CC);
166 }
167 }
168
169 if (Subtarget.getTargetTriple().isOSMSVCRT()) {
170 // MSVCRT doesn't have powi; fall back to pow
171 setLibcallName(RTLIB::POWI_F32, nullptr);
172 setLibcallName(RTLIB::POWI_F64, nullptr);
173 }
174
175 // If we don't have cmpxchg8b(meaing this is a 386/486), limit atomic size to
176 // 32 bits so the AtomicExpandPass will expand it so we don't need cmpxchg8b.
177 // FIXME: Should we be limiting the atomic size on other configs? Default is
178 // 1024.
179 if (!Subtarget.hasCmpxchg8b())
180 setMaxAtomicSizeInBitsSupported(32);
181
182 // Set up the register classes.
183 addRegisterClass(MVT::i8, &X86::GR8RegClass);
184 addRegisterClass(MVT::i16, &X86::GR16RegClass);
185 addRegisterClass(MVT::i32, &X86::GR32RegClass);
186 if (Subtarget.is64Bit())
187 addRegisterClass(MVT::i64, &X86::GR64RegClass);
188
189 for (MVT VT : MVT::integer_valuetypes())
190 setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote);
191
192 // We don't accept any truncstore of integer registers.
193 setTruncStoreAction(MVT::i64, MVT::i32, Expand);
194 setTruncStoreAction(MVT::i64, MVT::i16, Expand);
195 setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
196 setTruncStoreAction(MVT::i32, MVT::i16, Expand);
197 setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
198 setTruncStoreAction(MVT::i16, MVT::i8, Expand);
199
200 setTruncStoreAction(MVT::f64, MVT::f32, Expand);
201
202 // SETOEQ and SETUNE require checking two conditions.
203 for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) {
204 setCondCodeAction(ISD::SETOEQ, VT, Expand);
205 setCondCodeAction(ISD::SETUNE, VT, Expand);
206 }
207
208 // Integer absolute.
209 if (Subtarget.hasCMov()) {
210 setOperationAction(ISD::ABS , MVT::i16 , Custom);
211 setOperationAction(ISD::ABS , MVT::i32 , Custom);
212 if (Subtarget.is64Bit())
213 setOperationAction(ISD::ABS , MVT::i64 , Custom);
214 }
215
216 // Funnel shifts.
217 for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
218 // For slow shld targets we only lower for code size.
219 LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
220
221 setOperationAction(ShiftOp , MVT::i8 , Custom);
222 setOperationAction(ShiftOp , MVT::i16 , Custom);
223 setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
224 if (Subtarget.is64Bit())
225 setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
226 }
227
228 if (!Subtarget.useSoftFloat()) {
229 // Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
230 // operation.
231 setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
232 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
233 setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
234 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
235 // We have an algorithm for SSE2, and we turn this into a 64-bit
236 // FILD or VCVTUSI2SS/SD for other targets.
237 setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
238 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
239 // We have an algorithm for SSE2->double, and we turn this into a
240 // 64-bit FILD followed by conditional FADD for other targets.
241 setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
242 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
243
244 // Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
245 // this operation.
246 setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
247 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
248 // SSE has no i16 to fp conversion, only i32. We promote in the handler
249 // to allow f80 to use i16 and f64 to use i16 with sse1 only
250 setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
251 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
252 // f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
253 setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
254 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
255 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
256 // are Legal, f80 is custom lowered.
257 setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
258 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
259
260 // Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
261 // this operation.
262 setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
263 // FIXME: This doesn't generate invalid exception when it should. PR44019.
264 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
265 setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
266 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
267 setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
268 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
269 // In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
270 // are Legal, f80 is custom lowered.
271 setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
272 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
273
274 // Handle FP_TO_UINT by promoting the destination to a larger signed
275 // conversion.
276 setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
277 // FIXME: This doesn't generate invalid exception when it should. PR44019.
278 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
279 setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
280 // FIXME: This doesn't generate invalid exception when it should. PR44019.
281 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
282 setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
283 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
284 setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
285 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
286
287 setOperationAction(ISD::LRINT, MVT::f32, Custom);
288 setOperationAction(ISD::LRINT, MVT::f64, Custom);
289 setOperationAction(ISD::LLRINT, MVT::f32, Custom);
290 setOperationAction(ISD::LLRINT, MVT::f64, Custom);
291
292 if (!Subtarget.is64Bit()) {
293 setOperationAction(ISD::LRINT, MVT::i64, Custom);
294 setOperationAction(ISD::LLRINT, MVT::i64, Custom);
295 }
296 }
297
298 if (Subtarget.hasSSE2()) {
299 // Custom lowering for saturating float to int conversions.
300 // We handle promotion to larger result types manually.
301 for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
302 setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
303 setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
304 }
305 if (Subtarget.is64Bit()) {
306 setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom);
307 setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom);
308 }
309 }
310
311 // Handle address space casts between mixed sized pointers.
312 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
313 setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom);
314
315 // TODO: when we have SSE, these could be more efficient, by using movd/movq.
316 if (!X86ScalarSSEf64) {
317 setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
318 setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
319 if (Subtarget.is64Bit()) {
320 setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
321 // Without SSE, i64->f64 goes through memory.
322 setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
323 }
324 } else if (!Subtarget.is64Bit())
325 setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
326
327 // Scalar integer divide and remainder are lowered to use operations that
328 // produce two results, to match the available instructions. This exposes
329 // the two-result form to trivial CSE, which is able to combine x/y and x%y
330 // into a single instruction.
331 //
332 // Scalar integer multiply-high is also lowered to use two-result
333 // operations, to match the available instructions. However, plain multiply
334 // (low) operations are left as Legal, as there are single-result
335 // instructions for this in x86. Using the two-result multiply instructions
336 // when both high and low results are needed must be arranged by dagcombine.
337 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
338 setOperationAction(ISD::MULHS, VT, Expand);
339 setOperationAction(ISD::MULHU, VT, Expand);
340 setOperationAction(ISD::SDIV, VT, Expand);
341 setOperationAction(ISD::UDIV, VT, Expand);
342 setOperationAction(ISD::SREM, VT, Expand);
343 setOperationAction(ISD::UREM, VT, Expand);
344 }
345
346 setOperationAction(ISD::BR_JT , MVT::Other, Expand);
347 setOperationAction(ISD::BRCOND , MVT::Other, Custom);
348 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
349 MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
350 setOperationAction(ISD::BR_CC, VT, Expand);
351 setOperationAction(ISD::SELECT_CC, VT, Expand);
352 }
353 if (Subtarget.is64Bit())
354 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
355 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
356 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
357 setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
358
359 setOperationAction(ISD::FREM , MVT::f32 , Expand);
360 setOperationAction(ISD::FREM , MVT::f64 , Expand);
361 setOperationAction(ISD::FREM , MVT::f80 , Expand);
362 setOperationAction(ISD::FREM , MVT::f128 , Expand);
363
364 if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
365 setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom);
366 setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
367 }
368
369 // Promote the i8 variants and force them on up to i32 which has a shorter
370 // encoding.
371 setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
372 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
373
374 if (Subtarget.hasBMI()) {
375 // Promote the i16 zero undef variant and force it on up to i32 when tzcnt
376 // is enabled.
377 setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
378 } else {
379 setOperationAction(ISD::CTTZ, MVT::i16, Custom);
380 setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
381 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal);
382 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
383 if (Subtarget.is64Bit()) {
384 setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
385 setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
386 }
387 }
388
389 if (Subtarget.hasLZCNT()) {
390 // When promoting the i8 variants, force them to i32 for a shorter
391 // encoding.
392 setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
393 setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
394 } else {
395 for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
396 if (VT == MVT::i64 && !Subtarget.is64Bit())
397 continue;
398 setOperationAction(ISD::CTLZ , VT, Custom);
399 setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
400 }
401 }
402
403 for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16,
404 ISD::STRICT_FP_TO_FP16}) {
405 // Special handling for half-precision floating point conversions.
406 // If we don't have F16C support, then lower half float conversions
407 // into library calls.
408 setOperationAction(
409 Op, MVT::f32,
410 (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand);
411 // There's never any support for operations beyond MVT::f32.
412 setOperationAction(Op, MVT::f64, Expand);
413 setOperationAction(Op, MVT::f80, Expand);
414 setOperationAction(Op, MVT::f128, Expand);
415 }
416
417 setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
418 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
419 setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand);
420 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand);
421 setTruncStoreAction(MVT::f32, MVT::f16, Expand);
422 setTruncStoreAction(MVT::f64, MVT::f16, Expand);
423 setTruncStoreAction(MVT::f80, MVT::f16, Expand);
424 setTruncStoreAction(MVT::f128, MVT::f16, Expand);
425
426 setOperationAction(ISD::PARITY, MVT::i8, Custom);
427 if (Subtarget.hasPOPCNT()) {
428 setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32);
429 } else {
430 setOperationAction(ISD::CTPOP , MVT::i8 , Expand);
431 setOperationAction(ISD::CTPOP , MVT::i16 , Expand);
432 setOperationAction(ISD::CTPOP , MVT::i32 , Expand);
433 if (Subtarget.is64Bit())
434 setOperationAction(ISD::CTPOP , MVT::i64 , Expand);
435 else
436 setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
437
438 setOperationAction(ISD::PARITY, MVT::i16, Custom);
439 setOperationAction(ISD::PARITY, MVT::i32, Custom);
440 if (Subtarget.is64Bit())
441 setOperationAction(ISD::PARITY, MVT::i64, Custom);
442 }
443
444 setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
445
446 if (!Subtarget.hasMOVBE())
447 setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
448
449 // X86 wants to expand cmov itself.
450 for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
451 setOperationAction(ISD::SELECT, VT, Custom);
452 setOperationAction(ISD::SETCC, VT, Custom);
453 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
454 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
455 }
456 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
457 if (VT == MVT::i64 && !Subtarget.is64Bit())
458 continue;
459 setOperationAction(ISD::SELECT, VT, Custom);
460 setOperationAction(ISD::SETCC, VT, Custom);
461 }
462
463 // Custom action for SELECT MMX and expand action for SELECT_CC MMX
464 setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
465 setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
466
467 setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
468 // NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
469 // LLVM/Clang supports zero-cost DWARF and SEH exception handling.
470 setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
471 setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom);
472 setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
473 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
474 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
475
476 // Darwin ABI issue.
477 for (auto VT : { MVT::i32, MVT::i64 }) {
478 if (VT == MVT::i64 && !Subtarget.is64Bit())
479 continue;
480 setOperationAction(ISD::ConstantPool , VT, Custom);
481 setOperationAction(ISD::JumpTable , VT, Custom);
482 setOperationAction(ISD::GlobalAddress , VT, Custom);
483 setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
484 setOperationAction(ISD::ExternalSymbol , VT, Custom);
485 setOperationAction(ISD::BlockAddress , VT, Custom);
486 }
487
488 // 64-bit shl, sra, srl (iff 32-bit x86)
489 for (auto VT : { MVT::i32, MVT::i64 }) {
490 if (VT == MVT::i64 && !Subtarget.is64Bit())
491 continue;
492 setOperationAction(ISD::SHL_PARTS, VT, Custom);
493 setOperationAction(ISD::SRA_PARTS, VT, Custom);
494 setOperationAction(ISD::SRL_PARTS, VT, Custom);
495 }
496
497 if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
498 setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
499
500 setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
501
502 // Expand certain atomics
503 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
504 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom);
505 setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
506 setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom);
507 setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom);
508 setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom);
509 setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom);
510 setOperationAction(ISD::ATOMIC_STORE, VT, Custom);
511 }
512
513 if (!Subtarget.is64Bit())
514 setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom);
515
516 if (Subtarget.hasCmpxchg16b()) {
517 setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom);
518 }
519
520 // FIXME - use subtarget debug flags
521 if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() &&
522 !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() &&
523 TM.Options.ExceptionModel != ExceptionHandling::SjLj) {
524 setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
525 }
526
527 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom);
528 setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom);
529
530 setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom);
531 setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom);
532
533 setOperationAction(ISD::TRAP, MVT::Other, Legal);
534 setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
535 setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal);
536
537 // VASTART needs to be custom lowered to use the VarArgsFrameIndex
538 setOperationAction(ISD::VASTART , MVT::Other, Custom);
539 setOperationAction(ISD::VAEND , MVT::Other, Expand);
540 bool Is64Bit = Subtarget.is64Bit();
541 setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand);
542 setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand);
543
544 setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
545 setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
546
547 setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom);
548
549 // GC_TRANSITION_START and GC_TRANSITION_END need custom lowering.
550 setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom);
551 setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom);
552
553 if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) {
554 // f32 and f64 use SSE.
555 // Set up the FP register classes.
556 addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass
557 : &X86::FR32RegClass);
558 addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass
559 : &X86::FR64RegClass);
560
561 // Disable f32->f64 extload as we can only generate this in one instruction
562 // under optsize. So its easier to pattern match (fpext (load)) for that
563 // case instead of needing to emit 2 instructions for extload in the
564 // non-optsize case.
565 setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
566
567 for (auto VT : { MVT::f32, MVT::f64 }) {
568 // Use ANDPD to simulate FABS.
569 setOperationAction(ISD::FABS, VT, Custom);
570
571 // Use XORP to simulate FNEG.
572 setOperationAction(ISD::FNEG, VT, Custom);
573
574 // Use ANDPD and ORPD to simulate FCOPYSIGN.
575 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
576
577 // These might be better off as horizontal vector ops.
578 setOperationAction(ISD::FADD, VT, Custom);
579 setOperationAction(ISD::FSUB, VT, Custom);
580
581 // We don't support sin/cos/fmod
582 setOperationAction(ISD::FSIN , VT, Expand);
583 setOperationAction(ISD::FCOS , VT, Expand);
584 setOperationAction(ISD::FSINCOS, VT, Expand);
585 }
586
587 // Lower this to MOVMSK plus an AND.
588 setOperationAction(ISD::FGETSIGN, MVT::i64, Custom);
589 setOperationAction(ISD::FGETSIGN, MVT::i32, Custom);
590
591 } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 &&
592 (UseX87 || Is64Bit)) {
593 // Use SSE for f32, x87 for f64.
594 // Set up the FP register classes.
595 addRegisterClass(MVT::f32, &X86::FR32RegClass);
596 if (UseX87)
597 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
598
599 // Use ANDPS to simulate FABS.
600 setOperationAction(ISD::FABS , MVT::f32, Custom);
601
602 // Use XORP to simulate FNEG.
603 setOperationAction(ISD::FNEG , MVT::f32, Custom);
604
605 if (UseX87)
606 setOperationAction(ISD::UNDEF, MVT::f64, Expand);
607
608 // Use ANDPS and ORPS to simulate FCOPYSIGN.
609 if (UseX87)
610 setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
611 setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom);
612
613 // We don't support sin/cos/fmod
614 setOperationAction(ISD::FSIN , MVT::f32, Expand);
615 setOperationAction(ISD::FCOS , MVT::f32, Expand);
616 setOperationAction(ISD::FSINCOS, MVT::f32, Expand);
617
618 if (UseX87) {
619 // Always expand sin/cos functions even though x87 has an instruction.
620 setOperationAction(ISD::FSIN, MVT::f64, Expand);
621 setOperationAction(ISD::FCOS, MVT::f64, Expand);
622 setOperationAction(ISD::FSINCOS, MVT::f64, Expand);
623 }
624 } else if (UseX87) {
625 // f32 and f64 in x87.
626 // Set up the FP register classes.
627 addRegisterClass(MVT::f64, &X86::RFP64RegClass);
628 addRegisterClass(MVT::f32, &X86::RFP32RegClass);
629
630 for (auto VT : { MVT::f32, MVT::f64 }) {
631 setOperationAction(ISD::UNDEF, VT, Expand);
632 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
633
634 // Always expand sin/cos functions even though x87 has an instruction.
635 setOperationAction(ISD::FSIN , VT, Expand);
636 setOperationAction(ISD::FCOS , VT, Expand);
637 setOperationAction(ISD::FSINCOS, VT, Expand);
638 }
639 }
640
641 // Expand FP32 immediates into loads from the stack, save special cases.
642 if (isTypeLegal(MVT::f32)) {
643 if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) {
644 addLegalFPImmediate(APFloat(+0.0f)); // FLD0
645 addLegalFPImmediate(APFloat(+1.0f)); // FLD1
646 addLegalFPImmediate(APFloat(-0.0f)); // FLD0/FCHS
647 addLegalFPImmediate(APFloat(-1.0f)); // FLD1/FCHS
648 } else // SSE immediates.
649 addLegalFPImmediate(APFloat(+0.0f)); // xorps
650 }
651 // Expand FP64 immediates into loads from the stack, save special cases.
652 if (isTypeLegal(MVT::f64)) {
653 if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) {
654 addLegalFPImmediate(APFloat(+0.0)); // FLD0
655 addLegalFPImmediate(APFloat(+1.0)); // FLD1
656 addLegalFPImmediate(APFloat(-0.0)); // FLD0/FCHS
657 addLegalFPImmediate(APFloat(-1.0)); // FLD1/FCHS
658 } else // SSE immediates.
659 addLegalFPImmediate(APFloat(+0.0)); // xorpd
660 }
661 // Handle constrained floating-point operations of scalar.
662 setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal);
663 setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal);
664 setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal);
665 setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal);
666 setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal);
667 setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal);
668 setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal);
669 setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal);
670 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal);
671 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal);
672 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal);
673 setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal);
674 setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal);
675
676 // We don't support FMA.
677 setOperationAction(ISD::FMA, MVT::f64, Expand);
678 setOperationAction(ISD::FMA, MVT::f32, Expand);
679
680 // f80 always uses X87.
681 if (UseX87) {
682 addRegisterClass(MVT::f80, &X86::RFP80RegClass);
683 setOperationAction(ISD::UNDEF, MVT::f80, Expand);
684 setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand);
685 {
686 APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended());
687 addLegalFPImmediate(TmpFlt); // FLD0
688 TmpFlt.changeSign();
689 addLegalFPImmediate(TmpFlt); // FLD0/FCHS
690
691 bool ignored;
692 APFloat TmpFlt2(+1.0);
693 TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven,
694 &ignored);
695 addLegalFPImmediate(TmpFlt2); // FLD1
696 TmpFlt2.changeSign();
697 addLegalFPImmediate(TmpFlt2); // FLD1/FCHS
698 }
699
700 // Always expand sin/cos functions even though x87 has an instruction.
701 setOperationAction(ISD::FSIN , MVT::f80, Expand);
702 setOperationAction(ISD::FCOS , MVT::f80, Expand);
703 setOperationAction(ISD::FSINCOS, MVT::f80, Expand);
704
705 setOperationAction(ISD::FFLOOR, MVT::f80, Expand);
706 setOperationAction(ISD::FCEIL, MVT::f80, Expand);
707 setOperationAction(ISD::FTRUNC, MVT::f80, Expand);
708 setOperationAction(ISD::FRINT, MVT::f80, Expand);
709 setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand);
710 setOperationAction(ISD::FMA, MVT::f80, Expand);
711 setOperationAction(ISD::LROUND, MVT::f80, Expand);
712 setOperationAction(ISD::LLROUND, MVT::f80, Expand);
713 setOperationAction(ISD::LRINT, MVT::f80, Custom);
714 setOperationAction(ISD::LLRINT, MVT::f80, Custom);
715
716 // Handle constrained floating-point operations of scalar.
717 setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal);
718 setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal);
719 setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal);
720 setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal);
721 setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal);
722 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal);
723 // FIXME: When the target is 64-bit, STRICT_FP_ROUND will be overwritten
724 // as Custom.
725 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal);
726 }
727
728 // f128 uses xmm registers, but most operations require libcalls.
729 if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) {
730 addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass
731 : &X86::VR128RegClass);
732
733 addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); // xorps
734
735 setOperationAction(ISD::FADD, MVT::f128, LibCall);
736 setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall);
737 setOperationAction(ISD::FSUB, MVT::f128, LibCall);
738 setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall);
739 setOperationAction(ISD::FDIV, MVT::f128, LibCall);
740 setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall);
741 setOperationAction(ISD::FMUL, MVT::f128, LibCall);
742 setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall);
743 setOperationAction(ISD::FMA, MVT::f128, LibCall);
744 setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall);
745
746 setOperationAction(ISD::FABS, MVT::f128, Custom);
747 setOperationAction(ISD::FNEG, MVT::f128, Custom);
748 setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom);
749
750 setOperationAction(ISD::FSIN, MVT::f128, LibCall);
751 setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall);
752 setOperationAction(ISD::FCOS, MVT::f128, LibCall);
753 setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall);
754 setOperationAction(ISD::FSINCOS, MVT::f128, LibCall);
755 // No STRICT_FSINCOS
756 setOperationAction(ISD::FSQRT, MVT::f128, LibCall);
757 setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall);
758
759 setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom);
760 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom);
761 // We need to custom handle any FP_ROUND with an f128 input, but
762 // LegalizeDAG uses the result type to know when to run a custom handler.
763 // So we have to list all legal floating point result types here.
764 if (isTypeLegal(MVT::f32)) {
765 setOperationAction(ISD::FP_ROUND, MVT::f32, Custom);
766 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom);
767 }
768 if (isTypeLegal(MVT::f64)) {
769 setOperationAction(ISD::FP_ROUND, MVT::f64, Custom);
770 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom);
771 }
772 if (isTypeLegal(MVT::f80)) {
773 setOperationAction(ISD::FP_ROUND, MVT::f80, Custom);
774 setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom);
775 }
776
777 setOperationAction(ISD::SETCC, MVT::f128, Custom);
778
779 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand);
780 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand);
781 setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand);
782 setTruncStoreAction(MVT::f128, MVT::f32, Expand);
783 setTruncStoreAction(MVT::f128, MVT::f64, Expand);
784 setTruncStoreAction(MVT::f128, MVT::f80, Expand);
785 }
786
787 // Always use a library call for pow.
788 setOperationAction(ISD::FPOW , MVT::f32 , Expand);
789 setOperationAction(ISD::FPOW , MVT::f64 , Expand);
790 setOperationAction(ISD::FPOW , MVT::f80 , Expand);
791 setOperationAction(ISD::FPOW , MVT::f128 , Expand);
792
793 setOperationAction(ISD::FLOG, MVT::f80, Expand);
794 setOperationAction(ISD::FLOG2, MVT::f80, Expand);
795 setOperationAction(ISD::FLOG10, MVT::f80, Expand);
796 setOperationAction(ISD::FEXP, MVT::f80, Expand);
797 setOperationAction(ISD::FEXP2, MVT::f80, Expand);
798 setOperationAction(ISD::FMINNUM, MVT::f80, Expand);
799 setOperationAction(ISD::FMAXNUM, MVT::f80, Expand);
800
801 // Some FP actions are always expanded for vector types.
802 for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32,
803 MVT::v2f64, MVT::v4f64, MVT::v8f64 }) {
804 setOperationAction(ISD::FSIN, VT, Expand);
805 setOperationAction(ISD::FSINCOS, VT, Expand);
806 setOperationAction(ISD::FCOS, VT, Expand);
807 setOperationAction(ISD::FREM, VT, Expand);
808 setOperationAction(ISD::FCOPYSIGN, VT, Expand);
809 setOperationAction(ISD::FPOW, VT, Expand);
810 setOperationAction(ISD::FLOG, VT, Expand);
811 setOperationAction(ISD::FLOG2, VT, Expand);
812 setOperationAction(ISD::FLOG10, VT, Expand);
813 setOperationAction(ISD::FEXP, VT, Expand);
814 setOperationAction(ISD::FEXP2, VT, Expand);
815 }
816
817 // First set operation action for all vector types to either promote
818 // (for widening) or expand (for scalarization). Then we will selectively
819 // turn on ones that can be effectively codegen'd.
820 for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
821 setOperationAction(ISD::SDIV, VT, Expand);
822 setOperationAction(ISD::UDIV, VT, Expand);
823 setOperationAction(ISD::SREM, VT, Expand);
824 setOperationAction(ISD::UREM, VT, Expand);
825 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand);
826 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand);
827 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand);
828 setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand);
829 setOperationAction(ISD::FMA, VT, Expand);
830 setOperationAction(ISD::FFLOOR, VT, Expand);
831 setOperationAction(ISD::FCEIL, VT, Expand);
832 setOperationAction(ISD::FTRUNC, VT, Expand);
833 setOperationAction(ISD::FRINT, VT, Expand);
834 setOperationAction(ISD::FNEARBYINT, VT, Expand);
835 setOperationAction(ISD::SMUL_LOHI, VT, Expand);
836 setOperationAction(ISD::MULHS, VT, Expand);
837 setOperationAction(ISD::UMUL_LOHI, VT, Expand);
838 setOperationAction(ISD::MULHU, VT, Expand);
839 setOperationAction(ISD::SDIVREM, VT, Expand);
840 setOperationAction(ISD::UDIVREM, VT, Expand);
841 setOperationAction(ISD::CTPOP, VT, Expand);
842 setOperationAction(ISD::CTTZ, VT, Expand);
843 setOperationAction(ISD::CTLZ, VT, Expand);
844 setOperationAction(ISD::ROTL, VT, Expand);
845 setOperationAction(ISD::ROTR, VT, Expand);
846 setOperationAction(ISD::BSWAP, VT, Expand);
847 setOperationAction(ISD::SETCC, VT, Expand);
848 setOperationAction(ISD::FP_TO_UINT, VT, Expand);
849 setOperationAction(ISD::FP_TO_SINT, VT, Expand);
850 setOperationAction(ISD::UINT_TO_FP, VT, Expand);
851 setOperationAction(ISD::SINT_TO_FP, VT, Expand);
852 setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand);
853 setOperationAction(ISD::TRUNCATE, VT, Expand);
854 setOperationAction(ISD::SIGN_EXTEND, VT, Expand);
855 setOperationAction(ISD::ZERO_EXTEND, VT, Expand);
856 setOperationAction(ISD::ANY_EXTEND, VT, Expand);
857 setOperationAction(ISD::SELECT_CC, VT, Expand);
858 for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
859 setTruncStoreAction(InnerVT, VT, Expand);
860
861 setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand);
862 setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand);
863
864 // N.b. ISD::EXTLOAD legality is basically ignored except for i1-like
865 // types, we have to deal with them whether we ask for Expansion or not.
866 // Setting Expand causes its own optimisation problems though, so leave
867 // them legal.
868 if (VT.getVectorElementType() == MVT::i1)
869 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
870
871 // EXTLOAD for MVT::f16 vectors is not legal because f16 vectors are
872 // split/scalarized right now.
873 if (VT.getVectorElementType() == MVT::f16)
874 setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand);
875 }
876 }
877
878 // FIXME: In order to prevent SSE instructions being expanded to MMX ones
879 // with -msoft-float, disable use of MMX as well.
880 if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) {
881 addRegisterClass(MVT::x86mmx, &X86::VR64RegClass);
882 // No operations on x86mmx supported, everything uses intrinsics.
883 }
884
885 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) {
886 addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
887 : &X86::VR128RegClass);
888
889 setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
890 setOperationAction(ISD::FABS, MVT::v4f32, Custom);
891 setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
892 setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom);
893 setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom);
894 setOperationAction(ISD::VSELECT, MVT::v4f32, Custom);
895 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
896 setOperationAction(ISD::SELECT, MVT::v4f32, Custom);
897
898 setOperationAction(ISD::LOAD, MVT::v2f32, Custom);
899 setOperationAction(ISD::STORE, MVT::v2f32, Custom);
900
901 setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal);
902 setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal);
903 setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal);
904 setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal);
905 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal);
906 }
907
908 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) {
909 addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass
910 : &X86::VR128RegClass);
911
912 // FIXME: Unfortunately, -soft-float and -no-implicit-float mean XMM
913 // registers cannot be used even for integer operations.
914 addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass
915 : &X86::VR128RegClass);
916 addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass
917 : &X86::VR128RegClass);
918 addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass
919 : &X86::VR128RegClass);
920 addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
921 : &X86::VR128RegClass);
922
923 for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
924 MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
925 setOperationAction(ISD::SDIV, VT, Custom);
926 setOperationAction(ISD::SREM, VT, Custom);
927 setOperationAction(ISD::UDIV, VT, Custom);
928 setOperationAction(ISD::UREM, VT, Custom);
929 }
930
931 setOperationAction(ISD::MUL, MVT::v2i8, Custom);
932 setOperationAction(ISD::MUL, MVT::v4i8, Custom);
933 setOperationAction(ISD::MUL, MVT::v8i8, Custom);
934
935 setOperationAction(ISD::MUL, MVT::v16i8, Custom);
936 setOperationAction(ISD::MUL, MVT::v4i32, Custom);
937 setOperationAction(ISD::MUL, MVT::v2i64, Custom);
938 setOperationAction(ISD::MULHU, MVT::v4i32, Custom);
939 setOperationAction(ISD::MULHS, MVT::v4i32, Custom);
940 setOperationAction(ISD::MULHU, MVT::v16i8, Custom);
941 setOperationAction(ISD::MULHS, MVT::v16i8, Custom);
942 setOperationAction(ISD::MULHU, MVT::v8i16, Legal);
943 setOperationAction(ISD::MULHS, MVT::v8i16, Legal);
944 setOperationAction(ISD::MUL, MVT::v8i16, Legal);
945
946 setOperationAction(ISD::SMULO, MVT::v16i8, Custom);
947 setOperationAction(ISD::UMULO, MVT::v16i8, Custom);
948
949 setOperationAction(ISD::FNEG, MVT::v2f64, Custom);
950 setOperationAction(ISD::FABS, MVT::v2f64, Custom);
951 setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom);
952
953 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
954 setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom);
955 setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom);
956 setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom);
957 setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom);
958 }
959
960 setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal);
961 setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal);
962 setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal);
963 setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal);
964 setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal);
965 setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal);
966 setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal);
967 setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal);
968 setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom);
969 setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom);
970
971 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom);
972 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom);
973 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
974
975 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
976 setOperationAction(ISD::SETCC, VT, Custom);
977 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
978 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
979 setOperationAction(ISD::CTPOP, VT, Custom);
980 setOperationAction(ISD::ABS, VT, Custom);
981
982 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
983 // setcc all the way to isel and prefer SETGT in some isel patterns.
984 setCondCodeAction(ISD::SETLT, VT, Custom);
985 setCondCodeAction(ISD::SETLE, VT, Custom);
986 }
987
988 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
989 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
990 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
991 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
992 setOperationAction(ISD::VSELECT, VT, Custom);
993 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
994 }
995
996 for (auto VT : { MVT::v2f64, MVT::v2i64 }) {
997 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
998 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
999 setOperationAction(ISD::VSELECT, VT, Custom);
1000
1001 if (VT == MVT::v2i64 && !Subtarget.is64Bit())
1002 continue;
1003
1004 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1005 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1006 }
1007
1008 // Custom lower v2i64 and v2f64 selects.
1009 setOperationAction(ISD::SELECT, MVT::v2f64, Custom);
1010 setOperationAction(ISD::SELECT, MVT::v2i64, Custom);
1011 setOperationAction(ISD::SELECT, MVT::v4i32, Custom);
1012 setOperationAction(ISD::SELECT, MVT::v8i16, Custom);
1013 setOperationAction(ISD::SELECT, MVT::v16i8, Custom);
1014
1015 setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal);
1016 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom);
1017 setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom);
1018 setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom);
1019 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal);
1020 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom);
1021
1022 // Custom legalize these to avoid over promotion or custom promotion.
1023 for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) {
1024 setOperationAction(ISD::FP_TO_SINT, VT, Custom);
1025 setOperationAction(ISD::FP_TO_UINT, VT, Custom);
1026 setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom);
1027 setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom);
1028 }
1029
1030 setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal);
1031 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal);
1032 setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom);
1033 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom);
1034
1035 setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom);
1036 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom);
1037
1038 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom);
1039 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom);
1040
1041 // Fast v2f32 UINT_TO_FP( v2i32 ) custom conversion.
1042 setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom);
1043 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom);
1044 setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom);
1045 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom);
1046
1047 setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom);
1048 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom);
1049 setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom);
1050 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom);
1051
1052 // We want to legalize this to an f64 load rather than an i64 load on
1053 // 64-bit targets and two 32-bit loads on a 32-bit target. Similar for
1054 // store.
1055 setOperationAction(ISD::LOAD, MVT::v2i32, Custom);
1056 setOperationAction(ISD::LOAD, MVT::v4i16, Custom);
1057 setOperationAction(ISD::LOAD, MVT::v8i8, Custom);
1058 setOperationAction(ISD::STORE, MVT::v2i32, Custom);
1059 setOperationAction(ISD::STORE, MVT::v4i16, Custom);
1060 setOperationAction(ISD::STORE, MVT::v8i8, Custom);
1061
1062 setOperationAction(ISD::BITCAST, MVT::v2i32, Custom);
1063 setOperationAction(ISD::BITCAST, MVT::v4i16, Custom);
1064 setOperationAction(ISD::BITCAST, MVT::v8i8, Custom);
1065 if (!Subtarget.hasAVX512())
1066 setOperationAction(ISD::BITCAST, MVT::v16i1, Custom);
1067
1068 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom);
1069 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom);
1070 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom);
1071
1072 setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom);
1073
1074 setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom);
1075 setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom);
1076 setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom);
1077 setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom);
1078 setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom);
1079 setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom);
1080
1081 // In the customized shift lowering, the legal v4i32/v2i64 cases
1082 // in AVX2 will be recognized.
1083 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1084 setOperationAction(ISD::SRL, VT, Custom);
1085 setOperationAction(ISD::SHL, VT, Custom);
1086 setOperationAction(ISD::SRA, VT, Custom);
1087 }
1088
1089 setOperationAction(ISD::ROTL, MVT::v4i32, Custom);
1090 setOperationAction(ISD::ROTL, MVT::v8i16, Custom);
1091
1092 // With 512-bit registers or AVX512VL+BW, expanding (and promoting the
1093 // shifts) is better.
1094 if (!Subtarget.useAVX512Regs() &&
1095 !(Subtarget.hasBWI() && Subtarget.hasVLX()))
1096 setOperationAction(ISD::ROTL, MVT::v16i8, Custom);
1097
1098 setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal);
1099 setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal);
1100 setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal);
1101 setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal);
1102 setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal);
1103 }
1104
1105 if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) {
1106 setOperationAction(ISD::ABS, MVT::v16i8, Legal);
1107 setOperationAction(ISD::ABS, MVT::v8i16, Legal);
1108 setOperationAction(ISD::ABS, MVT::v4i32, Legal);
1109 setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom);
1110 setOperationAction(ISD::CTLZ, MVT::v16i8, Custom);
1111 setOperationAction(ISD::CTLZ, MVT::v8i16, Custom);
1112 setOperationAction(ISD::CTLZ, MVT::v4i32, Custom);
1113 setOperationAction(ISD::CTLZ, MVT::v2i64, Custom);
1114
1115 // These might be better off as horizontal vector ops.
1116 setOperationAction(ISD::ADD, MVT::i16, Custom);
1117 setOperationAction(ISD::ADD, MVT::i32, Custom);
1118 setOperationAction(ISD::SUB, MVT::i16, Custom);
1119 setOperationAction(ISD::SUB, MVT::i32, Custom);
1120 }
1121
1122 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) {
1123 for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) {
1124 setOperationAction(ISD::FFLOOR, RoundedTy, Legal);
1125 setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal);
1126 setOperationAction(ISD::FCEIL, RoundedTy, Legal);
1127 setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal);
1128 setOperationAction(ISD::FTRUNC, RoundedTy, Legal);
1129 setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal);
1130 setOperationAction(ISD::FRINT, RoundedTy, Legal);
1131 setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal);
1132 setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal);
1133 setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal);
1134 setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal);
1135 setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal);
1136
1137 setOperationAction(ISD::FROUND, RoundedTy, Custom);
1138 }
1139
1140 setOperationAction(ISD::SMAX, MVT::v16i8, Legal);
1141 setOperationAction(ISD::SMAX, MVT::v4i32, Legal);
1142 setOperationAction(ISD::UMAX, MVT::v8i16, Legal);
1143 setOperationAction(ISD::UMAX, MVT::v4i32, Legal);
1144 setOperationAction(ISD::SMIN, MVT::v16i8, Legal);
1145 setOperationAction(ISD::SMIN, MVT::v4i32, Legal);
1146 setOperationAction(ISD::UMIN, MVT::v8i16, Legal);
1147 setOperationAction(ISD::UMIN, MVT::v4i32, Legal);
1148
1149 setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom);
1150
1151 // FIXME: Do we need to handle scalar-to-vector here?
1152 setOperationAction(ISD::MUL, MVT::v4i32, Legal);
1153
1154 // We directly match byte blends in the backend as they match the VSELECT
1155 // condition form.
1156 setOperationAction(ISD::VSELECT, MVT::v16i8, Legal);
1157
1158 // SSE41 brings specific instructions for doing vector sign extend even in
1159 // cases where we don't have SRA.
1160 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1161 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal);
1162 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal);
1163 }
1164
1165 // SSE41 also has vector sign/zero extending loads, PMOV[SZ]X
1166 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1167 setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal);
1168 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal);
1169 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal);
1170 setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal);
1171 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal);
1172 setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal);
1173 }
1174
1175 // i8 vectors are custom because the source register and source
1176 // source memory operand types are not the same width.
1177 setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom);
1178
1179 if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) {
1180 // We need to scalarize v4i64->v432 uint_to_fp using cvtsi2ss, but we can
1181 // do the pre and post work in the vector domain.
1182 setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom);
1183 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom);
1184 // We need to mark SINT_TO_FP as Custom even though we want to expand it
1185 // so that DAG combine doesn't try to turn it into uint_to_fp.
1186 setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom);
1187 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom);
1188 }
1189 }
1190
1191 if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) {
1192 setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom);
1193 }
1194
1195 if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) {
1196 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1197 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1198 setOperationAction(ISD::ROTL, VT, Custom);
1199
1200 // XOP can efficiently perform BITREVERSE with VPPERM.
1201 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 })
1202 setOperationAction(ISD::BITREVERSE, VT, Custom);
1203
1204 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1205 MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 })
1206 setOperationAction(ISD::BITREVERSE, VT, Custom);
1207 }
1208
1209 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) {
1210 bool HasInt256 = Subtarget.hasInt256();
1211
1212 addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass
1213 : &X86::VR256RegClass);
1214 addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass
1215 : &X86::VR256RegClass);
1216 addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1217 : &X86::VR256RegClass);
1218 addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass
1219 : &X86::VR256RegClass);
1220 addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1221 : &X86::VR256RegClass);
1222 addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass
1223 : &X86::VR256RegClass);
1224
1225 for (auto VT : { MVT::v8f32, MVT::v4f64 }) {
1226 setOperationAction(ISD::FFLOOR, VT, Legal);
1227 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1228 setOperationAction(ISD::FCEIL, VT, Legal);
1229 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1230 setOperationAction(ISD::FTRUNC, VT, Legal);
1231 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1232 setOperationAction(ISD::FRINT, VT, Legal);
1233 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1234 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1235 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1236 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1237 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1238
1239 setOperationAction(ISD::FROUND, VT, Custom);
1240
1241 setOperationAction(ISD::FNEG, VT, Custom);
1242 setOperationAction(ISD::FABS, VT, Custom);
1243 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1244 }
1245
1246 // (fp_to_int:v8i16 (v8f32 ..)) requires the result type to be promoted
1247 // even though v8i16 is a legal type.
1248 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1249 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1250 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32);
1251 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32);
1252 setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal);
1253 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom);
1254 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal);
1255
1256 setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal);
1257 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal);
1258
1259 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal);
1260 setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal);
1261 setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal);
1262 setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal);
1263 setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal);
1264 setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal);
1265 setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal);
1266 setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal);
1267 setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal);
1268 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal);
1269 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal);
1270 setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal);
1271
1272 if (!Subtarget.hasAVX512())
1273 setOperationAction(ISD::BITCAST, MVT::v32i1, Custom);
1274
1275 // In the customized shift lowering, the legal v8i32/v4i64 cases
1276 // in AVX2 will be recognized.
1277 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1278 setOperationAction(ISD::SRL, VT, Custom);
1279 setOperationAction(ISD::SHL, VT, Custom);
1280 setOperationAction(ISD::SRA, VT, Custom);
1281 }
1282
1283 // These types need custom splitting if their input is a 128-bit vector.
1284 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1285 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1286 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1287 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1288
1289 setOperationAction(ISD::ROTL, MVT::v8i32, Custom);
1290 setOperationAction(ISD::ROTL, MVT::v16i16, Custom);
1291
1292 // With BWI, expanding (and promoting the shifts) is the better.
1293 if (!Subtarget.useBWIRegs())
1294 setOperationAction(ISD::ROTL, MVT::v32i8, Custom);
1295
1296 setOperationAction(ISD::SELECT, MVT::v4f64, Custom);
1297 setOperationAction(ISD::SELECT, MVT::v4i64, Custom);
1298 setOperationAction(ISD::SELECT, MVT::v8i32, Custom);
1299 setOperationAction(ISD::SELECT, MVT::v16i16, Custom);
1300 setOperationAction(ISD::SELECT, MVT::v32i8, Custom);
1301 setOperationAction(ISD::SELECT, MVT::v8f32, Custom);
1302
1303 for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1304 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1305 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1306 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1307 }
1308
1309 setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom);
1310 setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom);
1311 setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom);
1312 setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom);
1313
1314 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1315 setOperationAction(ISD::SETCC, VT, Custom);
1316 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1317 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1318 setOperationAction(ISD::CTPOP, VT, Custom);
1319 setOperationAction(ISD::CTLZ, VT, Custom);
1320
1321 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1322 // setcc all the way to isel and prefer SETGT in some isel patterns.
1323 setCondCodeAction(ISD::SETLT, VT, Custom);
1324 setCondCodeAction(ISD::SETLE, VT, Custom);
1325 }
1326
1327 if (Subtarget.hasAnyFMA()) {
1328 for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32,
1329 MVT::v2f64, MVT::v4f64 }) {
1330 setOperationAction(ISD::FMA, VT, Legal);
1331 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1332 }
1333 }
1334
1335 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) {
1336 setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom);
1337 setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom);
1338 }
1339
1340 setOperationAction(ISD::MUL, MVT::v4i64, Custom);
1341 setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom);
1342 setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom);
1343 setOperationAction(ISD::MUL, MVT::v32i8, Custom);
1344
1345 setOperationAction(ISD::MULHU, MVT::v8i32, Custom);
1346 setOperationAction(ISD::MULHS, MVT::v8i32, Custom);
1347 setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom);
1348 setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom);
1349 setOperationAction(ISD::MULHU, MVT::v32i8, Custom);
1350 setOperationAction(ISD::MULHS, MVT::v32i8, Custom);
1351
1352 setOperationAction(ISD::SMULO, MVT::v32i8, Custom);
1353 setOperationAction(ISD::UMULO, MVT::v32i8, Custom);
1354
1355 setOperationAction(ISD::ABS, MVT::v4i64, Custom);
1356 setOperationAction(ISD::SMAX, MVT::v4i64, Custom);
1357 setOperationAction(ISD::UMAX, MVT::v4i64, Custom);
1358 setOperationAction(ISD::SMIN, MVT::v4i64, Custom);
1359 setOperationAction(ISD::UMIN, MVT::v4i64, Custom);
1360
1361 setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1362 setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1363 setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1364 setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom);
1365 setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1366 setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1367 setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1368 setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom);
1369 setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom);
1370 setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom);
1371 setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom);
1372 setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom);
1373
1374 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) {
1375 setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom);
1376 setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom);
1377 setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom);
1378 setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom);
1379 setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom);
1380 }
1381
1382 for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) {
1383 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1384 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1385 }
1386
1387 if (HasInt256) {
1388 // The custom lowering for UINT_TO_FP for v8i32 becomes interesting
1389 // when we have a 256bit-wide blend with immediate.
1390 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom);
1391 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom);
1392
1393 // AVX2 also has wider vector sign/zero extending loads, VPMOV[SZ]X
1394 for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) {
1395 setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal);
1396 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal);
1397 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal);
1398 setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal);
1399 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal);
1400 setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal);
1401 }
1402 }
1403
1404 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1405 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) {
1406 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1407 setOperationAction(ISD::MSTORE, VT, Legal);
1408 }
1409
1410 // Extract subvector is special because the value type
1411 // (result) is 128-bit but the source is 256-bit wide.
1412 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64,
1413 MVT::v4f32, MVT::v2f64 }) {
1414 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1415 }
1416
1417 // Custom lower several nodes for 256-bit types.
1418 for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1419 MVT::v8f32, MVT::v4f64 }) {
1420 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1421 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1422 setOperationAction(ISD::VSELECT, VT, Custom);
1423 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1424 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1425 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1426 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1427 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1428 setOperationAction(ISD::STORE, VT, Custom);
1429 }
1430
1431 if (HasInt256) {
1432 setOperationAction(ISD::VSELECT, MVT::v32i8, Legal);
1433
1434 // Custom legalize 2x32 to get a little better code.
1435 setOperationAction(ISD::MGATHER, MVT::v2f32, Custom);
1436 setOperationAction(ISD::MGATHER, MVT::v2i32, Custom);
1437
1438 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1439 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1440 setOperationAction(ISD::MGATHER, VT, Custom);
1441 }
1442 }
1443
1444 // This block controls legalization of the mask vector sizes that are
1445 // available with AVX512. 512-bit vectors are in a separate block controlled
1446 // by useAVX512Regs.
1447 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1448 addRegisterClass(MVT::v1i1, &X86::VK1RegClass);
1449 addRegisterClass(MVT::v2i1, &X86::VK2RegClass);
1450 addRegisterClass(MVT::v4i1, &X86::VK4RegClass);
1451 addRegisterClass(MVT::v8i1, &X86::VK8RegClass);
1452 addRegisterClass(MVT::v16i1, &X86::VK16RegClass);
1453
1454 setOperationAction(ISD::SELECT, MVT::v1i1, Custom);
1455 setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom);
1456 setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom);
1457
1458 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1459 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1460 setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1461 setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1462 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32);
1463 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32);
1464 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32);
1465 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32);
1466 setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom);
1467 setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom);
1468 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom);
1469 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom);
1470
1471 // There is no byte sized k-register load or store without AVX512DQ.
1472 if (!Subtarget.hasDQI()) {
1473 setOperationAction(ISD::LOAD, MVT::v1i1, Custom);
1474 setOperationAction(ISD::LOAD, MVT::v2i1, Custom);
1475 setOperationAction(ISD::LOAD, MVT::v4i1, Custom);
1476 setOperationAction(ISD::LOAD, MVT::v8i1, Custom);
1477
1478 setOperationAction(ISD::STORE, MVT::v1i1, Custom);
1479 setOperationAction(ISD::STORE, MVT::v2i1, Custom);
1480 setOperationAction(ISD::STORE, MVT::v4i1, Custom);
1481 setOperationAction(ISD::STORE, MVT::v8i1, Custom);
1482 }
1483
1484 // Extends of v16i1/v8i1/v4i1/v2i1 to 128-bit vectors.
1485 for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) {
1486 setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
1487 setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
1488 setOperationAction(ISD::ANY_EXTEND, VT, Custom);
1489 }
1490
1491 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 })
1492 setOperationAction(ISD::VSELECT, VT, Expand);
1493
1494 for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) {
1495 setOperationAction(ISD::SETCC, VT, Custom);
1496 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1497 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1498 setOperationAction(ISD::SELECT, VT, Custom);
1499 setOperationAction(ISD::TRUNCATE, VT, Custom);
1500
1501 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1502 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1503 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1504 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1505 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1506 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1507 }
1508
1509 for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 })
1510 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1511 }
1512
1513 // This block controls legalization for 512-bit operations with 32/64 bit
1514 // elements. 512-bits can be disabled based on prefer-vector-width and
1515 // required-vector-width function attributes.
1516 if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) {
1517 bool HasBWI = Subtarget.hasBWI();
1518
1519 addRegisterClass(MVT::v16i32, &X86::VR512RegClass);
1520 addRegisterClass(MVT::v16f32, &X86::VR512RegClass);
1521 addRegisterClass(MVT::v8i64, &X86::VR512RegClass);
1522 addRegisterClass(MVT::v8f64, &X86::VR512RegClass);
1523 addRegisterClass(MVT::v32i16, &X86::VR512RegClass);
1524 addRegisterClass(MVT::v64i8, &X86::VR512RegClass);
1525
1526 for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) {
1527 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal);
1528 setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal);
1529 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal);
1530 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal);
1531 setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal);
1532 if (HasBWI)
1533 setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal);
1534 }
1535
1536 for (MVT VT : { MVT::v16f32, MVT::v8f64 }) {
1537 setOperationAction(ISD::FNEG, VT, Custom);
1538 setOperationAction(ISD::FABS, VT, Custom);
1539 setOperationAction(ISD::FMA, VT, Legal);
1540 setOperationAction(ISD::STRICT_FMA, VT, Legal);
1541 setOperationAction(ISD::FCOPYSIGN, VT, Custom);
1542 }
1543
1544 for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) {
1545 setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32);
1546 setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32);
1547 setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32);
1548 setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32);
1549 }
1550 setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal);
1551 setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal);
1552 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal);
1553 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal);
1554 setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal);
1555 setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal);
1556 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal);
1557 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal);
1558
1559 setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal);
1560 setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal);
1561 setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal);
1562 setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal);
1563 setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal);
1564 setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal);
1565 setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal);
1566 setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal);
1567 setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal);
1568 setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal);
1569 setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal);
1570 setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal);
1571
1572 setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal);
1573 setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal);
1574 setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal);
1575 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal);
1576 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal);
1577 if (HasBWI)
1578 setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal);
1579
1580 // With 512-bit vectors and no VLX, we prefer to widen MLOAD/MSTORE
1581 // to 512-bit rather than use the AVX2 instructions so that we can use
1582 // k-masks.
1583 if (!Subtarget.hasVLX()) {
1584 for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1585 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) {
1586 setOperationAction(ISD::MLOAD, VT, Custom);
1587 setOperationAction(ISD::MSTORE, VT, Custom);
1588 }
1589 }
1590
1591 setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal);
1592 setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal);
1593 setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom);
1594 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1595 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom);
1596 setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom);
1597 setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom);
1598 setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom);
1599 setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom);
1600 setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom);
1601 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom);
1602 setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom);
1603 setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom);
1604
1605 if (HasBWI) {
1606 // Extends from v64i1 masks to 512-bit vectors.
1607 setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom);
1608 setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom);
1609 setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom);
1610 }
1611
1612 for (auto VT : { MVT::v16f32, MVT::v8f64 }) {
1613 setOperationAction(ISD::FFLOOR, VT, Legal);
1614 setOperationAction(ISD::STRICT_FFLOOR, VT, Legal);
1615 setOperationAction(ISD::FCEIL, VT, Legal);
1616 setOperationAction(ISD::STRICT_FCEIL, VT, Legal);
1617 setOperationAction(ISD::FTRUNC, VT, Legal);
1618 setOperationAction(ISD::STRICT_FTRUNC, VT, Legal);
1619 setOperationAction(ISD::FRINT, VT, Legal);
1620 setOperationAction(ISD::STRICT_FRINT, VT, Legal);
1621 setOperationAction(ISD::FNEARBYINT, VT, Legal);
1622 setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal);
1623 setOperationAction(ISD::FROUNDEVEN, VT, Legal);
1624 setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal);
1625
1626 setOperationAction(ISD::FROUND, VT, Custom);
1627 }
1628
1629 for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) {
1630 setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
1631 setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
1632 }
1633
1634 setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom);
1635 setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom);
1636 setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom);
1637 setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom);
1638
1639 setOperationAction(ISD::MUL, MVT::v8i64, Custom);
1640 setOperationAction(ISD::MUL, MVT::v16i32, Legal);
1641 setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom);
1642 setOperationAction(ISD::MUL, MVT::v64i8, Custom);
1643
1644 setOperationAction(ISD::MULHU, MVT::v16i32, Custom);
1645 setOperationAction(ISD::MULHS, MVT::v16i32, Custom);
1646 setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom);
1647 setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom);
1648 setOperationAction(ISD::MULHS, MVT::v64i8, Custom);
1649 setOperationAction(ISD::MULHU, MVT::v64i8, Custom);
1650
1651 setOperationAction(ISD::SMULO, MVT::v64i8, Custom);
1652 setOperationAction(ISD::UMULO, MVT::v64i8, Custom);
1653
1654 setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom);
1655
1656 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1657 setOperationAction(ISD::SRL, VT, Custom);
1658 setOperationAction(ISD::SHL, VT, Custom);
1659 setOperationAction(ISD::SRA, VT, Custom);
1660 setOperationAction(ISD::SETCC, VT, Custom);
1661
1662 // The condition codes aren't legal in SSE/AVX and under AVX512 we use
1663 // setcc all the way to isel and prefer SETGT in some isel patterns.
1664 setCondCodeAction(ISD::SETLT, VT, Custom);
1665 setCondCodeAction(ISD::SETLE, VT, Custom);
1666 }
1667 for (auto VT : { MVT::v16i32, MVT::v8i64 }) {
1668 setOperationAction(ISD::SMAX, VT, Legal);
1669 setOperationAction(ISD::UMAX, VT, Legal);
1670 setOperationAction(ISD::SMIN, VT, Legal);
1671 setOperationAction(ISD::UMIN, VT, Legal);
1672 setOperationAction(ISD::ABS, VT, Legal);
1673 setOperationAction(ISD::CTPOP, VT, Custom);
1674 setOperationAction(ISD::ROTL, VT, Custom);
1675 setOperationAction(ISD::ROTR, VT, Custom);
1676 setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
1677 setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
1678 }
1679
1680 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1681 setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom);
1682 setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom);
1683 setOperationAction(ISD::CTLZ, VT, Custom);
1684 setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom);
1685 setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom);
1686 setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom);
1687 setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom);
1688 setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom);
1689 setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom);
1690 setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom);
1691 setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom);
1692 }
1693
1694 if (Subtarget.hasDQI()) {
1695 setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal);
1696 setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal);
1697 setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal);
1698 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal);
1699 setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal);
1700 setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal);
1701 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal);
1702 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal);
1703
1704 setOperationAction(ISD::MUL, MVT::v8i64, Legal);
1705 }
1706
1707 if (Subtarget.hasCDI()) {
1708 // NonVLX sub-targets extend 128/256 vectors to use the 512 version.
1709 for (auto VT : { MVT::v16i32, MVT::v8i64} ) {
1710 setOperationAction(ISD::CTLZ, VT, Legal);
1711 }
1712 } // Subtarget.hasCDI()
1713
1714 if (Subtarget.hasVPOPCNTDQ()) {
1715 for (auto VT : { MVT::v16i32, MVT::v8i64 })
1716 setOperationAction(ISD::CTPOP, VT, Legal);
1717 }
1718
1719 // Extract subvector is special because the value type
1720 // (result) is 256-bit but the source is 512-bit wide.
1721 // 128-bit was made Legal under AVX1.
1722 for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64,
1723 MVT::v8f32, MVT::v4f64 })
1724 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal);
1725
1726 for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64,
1727 MVT::v16f32, MVT::v8f64 }) {
1728 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1729 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal);
1730 setOperationAction(ISD::SELECT, VT, Custom);
1731 setOperationAction(ISD::VSELECT, VT, Custom);
1732 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1733 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1734 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1735 setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
1736 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1737 }
1738
1739 for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) {
1740 setOperationAction(ISD::MLOAD, VT, Legal);
1741 setOperationAction(ISD::MSTORE, VT, Legal);
1742 setOperationAction(ISD::MGATHER, VT, Custom);
1743 setOperationAction(ISD::MSCATTER, VT, Custom);
1744 }
1745 if (HasBWI) {
1746 for (auto VT : { MVT::v64i8, MVT::v32i16 }) {
1747 setOperationAction(ISD::MLOAD, VT, Legal);
1748 setOperationAction(ISD::MSTORE, VT, Legal);
1749 }
1750 } else {
1751 setOperationAction(ISD::STORE, MVT::v32i16, Custom);
1752 setOperationAction(ISD::STORE, MVT::v64i8, Custom);
1753 }
1754
1755 if (Subtarget.hasVBMI2()) {
1756 for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64,
1757 MVT::v16i16, MVT::v8i32, MVT::v4i64,
1758 MVT::v32i16, MVT::v16i32, MVT::v8i64 }) {
1759 setOperationAction(ISD::FSHL, VT, Custom);
1760 setOperationAction(ISD::FSHR, VT, Custom);
1761 }
1762
1763 setOperationAction(ISD::ROTL, MVT::v32i16, Custom);
1764 setOperationAction(ISD::ROTR, MVT::v8i16, Custom);
1765 setOperationAction(ISD::ROTR, MVT::v16i16, Custom);
1766 setOperationAction(ISD::ROTR, MVT::v32i16, Custom);
1767 }
1768 }// useAVX512Regs
1769
1770 // This block controls legalization for operations that don't have
1771 // pre-AVX512 equivalents. Without VLX we use 512-bit operations for
1772 // narrower widths.
1773 if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) {
1774 // These operations are handled on non-VLX by artificially widening in
1775 // isel patterns.
1776
1777 setOperationAction(ISD::FP_TO_UINT, MVT::v8i32,
1778 Subtarget.hasVLX() ? Legal : Custom);
1779 setOperationAction(ISD::FP_TO_UINT, MVT::v4i32,
1780 Subtarget.hasVLX() ? Legal : Custom);
1781 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32,
1782 Subtarget.hasVLX() ? Legal : Custom);
1783 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32,
1784 Subtarget.hasVLX() ? Legal : Custom);
1785 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom);
1786 setOperationAction(ISD::UINT_TO_FP, MVT::v8i32,
1787 Subtarget.hasVLX() ? Legal : Custom);
1788 setOperationAction(ISD::UINT_TO_FP, MVT::v4i32,
1789 Subtarget.hasVLX() ? Legal : Custom);
1790 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32,
1791 Subtarget.hasVLX() ? Legal : Custom);
1792 setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32,
1793 Subtarget.hasVLX() ? Legal : Custom);
1794
1795 if (Subtarget.hasDQI()) {
1796 // Fast v2f32 SINT_TO_FP( v2i64 ) custom conversion.
1797 // v2f32 UINT_TO_FP is already custom under SSE2.
1798 assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) &&((void)0)
1799 isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) &&((void)0)
1800 "Unexpected operation action!")((void)0);
1801 // v2i64 FP_TO_S/UINT(v2f32) custom conversion.
1802 setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom);
1803 setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom);
1804 setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom);
1805 setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom);
1806 }
1807
1808 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1809 setOperationAction(ISD::SMAX, VT, Legal);
1810 setOperationAction(ISD::UMAX, VT, Legal);
1811 setOperationAction(ISD::SMIN, VT, Legal);
1812 setOperationAction(ISD::UMIN, VT, Legal);
1813 setOperationAction(ISD::ABS, VT, Legal);
1814 }
1815
1816 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1817 setOperationAction(ISD::ROTL, VT, Custom);
1818 setOperationAction(ISD::ROTR, VT, Custom);
1819 }
1820
1821 // Custom legalize 2x32 to get a little better code.
1822 setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom);
1823 setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom);
1824
1825 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64,
1826 MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 })
1827 setOperationAction(ISD::MSCATTER, VT, Custom);
1828
1829 if (Subtarget.hasDQI()) {
1830 for (auto VT : { MVT::v2i64, MVT::v4i64 }) {
1831 setOperationAction(ISD::SINT_TO_FP, VT,
1832 Subtarget.hasVLX() ? Legal : Custom);
1833 setOperationAction(ISD::UINT_TO_FP, VT,
1834 Subtarget.hasVLX() ? Legal : Custom);
1835 setOperationAction(ISD::STRICT_SINT_TO_FP, VT,
1836 Subtarget.hasVLX() ? Legal : Custom);
1837 setOperationAction(ISD::STRICT_UINT_TO_FP, VT,
1838 Subtarget.hasVLX() ? Legal : Custom);
1839 setOperationAction(ISD::FP_TO_SINT, VT,
1840 Subtarget.hasVLX() ? Legal : Custom);
1841 setOperationAction(ISD::FP_TO_UINT, VT,
1842 Subtarget.hasVLX() ? Legal : Custom);
1843 setOperationAction(ISD::STRICT_FP_TO_SINT, VT,
1844 Subtarget.hasVLX() ? Legal : Custom);
1845 setOperationAction(ISD::STRICT_FP_TO_UINT, VT,
1846 Subtarget.hasVLX() ? Legal : Custom);
1847 setOperationAction(ISD::MUL, VT, Legal);
1848 }
1849 }
1850
1851 if (Subtarget.hasCDI()) {
1852 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) {
1853 setOperationAction(ISD::CTLZ, VT, Legal);
1854 }
1855 } // Subtarget.hasCDI()
1856
1857 if (Subtarget.hasVPOPCNTDQ()) {
1858 for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 })
1859 setOperationAction(ISD::CTPOP, VT, Legal);
1860 }
1861 }
1862
1863 // This block control legalization of v32i1/v64i1 which are available with
1864 // AVX512BW. 512-bit v32i16 and v64i8 vector legalization is controlled with
1865 // useBWIRegs.
1866 if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) {
1867 addRegisterClass(MVT::v32i1, &X86::VK32RegClass);
1868 addRegisterClass(MVT::v64i1, &X86::VK64RegClass);
1869
1870 for (auto VT : { MVT::v32i1, MVT::v64i1 }) {
1871 setOperationAction(ISD::VSELECT, VT, Expand);
1872 setOperationAction(ISD::TRUNCATE, VT, Custom);
1873 setOperationAction(ISD::SETCC, VT, Custom);
1874 setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
1875 setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
1876 setOperationAction(ISD::SELECT, VT, Custom);
1877 setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
1878 setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
1879 setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
1880 setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
1881 }
1882
1883 for (auto VT : { MVT::v16i1, MVT::v32i1 })
1884 setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
1885
1886 // Extends from v32i1 masks to 256-bit vectors.
1887 setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom);
1888 setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom);
1889 setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom);
1890
1891 for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) {
1892 setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom);
1893 setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom);
1894 }
1895
1896 // These operations are handled on non-VLX by artificially widening in
1897 // isel patterns.
1898 // TODO: Custom widen in lowering on non-VLX and drop the isel patterns?
1899
1900 if (Subtarget.hasBITALG()) {
1901 for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 })
1902 setOperationAction(ISD::CTPOP, VT, Legal);
1903 }
1904 }
1905
1906 if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) {
1907 setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal);
1908 setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal);
1909 setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal);
1910 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal);
1911 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal);
1912
1913 setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal);
1914 setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal);
1915 setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal);
1916 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal);
1917 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal);
1918
1919 if (Subtarget.hasBWI()) {
1920 setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal);
1921 setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal);
1922 }
1923
1924 setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom);
1925 setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom);
1926 setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom);
1927 }
1928
1929 if (Subtarget.hasAMXTILE()) {
1930 addRegisterClass(MVT::x86amx, &X86::TILERegClass);
1931 }
1932
1933 // We want to custom lower some of our intrinsics.
1934 setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
1935 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
1936 setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
1937 if (!Subtarget.is64Bit()) {
1938 setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
1939 }
1940
1941 // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't
1942 // handle type legalization for these operations here.
1943 //
1944 // FIXME: We really should do custom legalization for addition and
1945 // subtraction on x86-32 once PR3203 is fixed. We really can't do much better
1946 // than generic legalization for 64-bit multiplication-with-overflow, though.
1947 for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
1948 if (VT == MVT::i64 && !Subtarget.is64Bit())
1949 continue;
1950 // Add/Sub/Mul with overflow operations are custom lowered.
1951 setOperationAction(ISD::SADDO, VT, Custom);
1952 setOperationAction(ISD::UADDO, VT, Custom);
1953 setOperationAction(ISD::SSUBO, VT, Custom);
1954 setOperationAction(ISD::USUBO, VT, Custom);
1955 setOperationAction(ISD::SMULO, VT, Custom);
1956 setOperationAction(ISD::UMULO, VT, Custom);
1957
1958 // Support carry in as value rather than glue.
1959 setOperationAction(ISD::ADDCARRY, VT, Custom);
1960 setOperationAction(ISD::SUBCARRY, VT, Custom);
1961 setOperationAction(ISD::SETCCCARRY, VT, Custom);
1962 setOperationAction(ISD::SADDO_CARRY, VT, Custom);
1963 setOperationAction(ISD::SSUBO_CARRY, VT, Custom);
1964 }
1965
1966 if (!Subtarget.is64Bit()) {
1967 // These libcalls are not available in 32-bit.
1968 setLibcallName(RTLIB::SHL_I128, nullptr);
1969 setLibcallName(RTLIB::SRL_I128, nullptr);
1970 setLibcallName(RTLIB::SRA_I128, nullptr);
1971 setLibcallName(RTLIB::MUL_I128, nullptr);
1972 }
1973
1974 // Combine sin / cos into _sincos_stret if it is available.
1975 if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr &&
1976 getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) {
1977 setOperationAction(ISD::FSINCOS, MVT::f64, Custom);
1978 setOperationAction(ISD::FSINCOS, MVT::f32, Custom);
1979 }
1980
1981 if (Subtarget.isTargetWin64()) {
1982 setOperationAction(ISD::SDIV, MVT::i128, Custom);
1983 setOperationAction(ISD::UDIV, MVT::i128, Custom);
1984 setOperationAction(ISD::SREM, MVT::i128, Custom);
1985 setOperationAction(ISD::UREM, MVT::i128, Custom);
1986 }
1987
1988 // On 32 bit MSVC, `fmodf(f32)` is not defined - only `fmod(f64)`
1989 // is. We should promote the value to 64-bits to solve this.
1990 // This is what the CRT headers do - `fmodf` is an inline header
1991 // function casting to f64 and calling `fmod`.
1992 if (Subtarget.is32Bit() &&
1993 (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()))
1994 for (ISD::NodeType Op :
1995 {ISD::FCEIL, ISD::STRICT_FCEIL,
1996 ISD::FCOS, ISD::STRICT_FCOS,
1997 ISD::FEXP, ISD::STRICT_FEXP,
1998 ISD::FFLOOR, ISD::STRICT_FFLOOR,
1999 ISD::FREM, ISD::STRICT_FREM,
2000 ISD::FLOG, ISD::STRICT_FLOG,
2001 ISD::FLOG10, ISD::STRICT_FLOG10,
2002 ISD::FPOW, ISD::STRICT_FPOW,
2003 ISD::FSIN, ISD::STRICT_FSIN})
2004 if (isOperationExpand(Op, MVT::f32))
2005 setOperationAction(Op, MVT::f32, Promote);
2006
2007 // We have target-specific dag combine patterns for the following nodes:
2008 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
2009 setTargetDAGCombine(ISD::SCALAR_TO_VECTOR);
2010 setTargetDAGCombine(ISD::INSERT_VECTOR_ELT);
2011 setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
2012 setTargetDAGCombine(ISD::CONCAT_VECTORS);
2013 setTargetDAGCombine(ISD::INSERT_SUBVECTOR);
2014 setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR);
2015 setTargetDAGCombine(ISD::BITCAST);
2016 setTargetDAGCombine(ISD::VSELECT);
2017 setTargetDAGCombine(ISD::SELECT);
2018 setTargetDAGCombine(ISD::SHL);
2019 setTargetDAGCombine(ISD::SRA);
2020 setTargetDAGCombine(ISD::SRL);
2021 setTargetDAGCombine(ISD::OR);
2022 setTargetDAGCombine(ISD::AND);
2023 setTargetDAGCombine(ISD::ADD);
2024 setTargetDAGCombine(ISD::FADD);
2025 setTargetDAGCombine(ISD::FSUB);
2026 setTargetDAGCombine(ISD::FNEG);
2027 setTargetDAGCombine(ISD::FMA);
2028 setTargetDAGCombine(ISD::STRICT_FMA);
2029 setTargetDAGCombine(ISD::FMINNUM);
2030 setTargetDAGCombine(ISD::FMAXNUM);
2031 setTargetDAGCombine(ISD::SUB);
2032 setTargetDAGCombine(ISD::LOAD);
2033 setTargetDAGCombine(ISD::MLOAD);
2034 setTargetDAGCombine(ISD::STORE);
2035 setTargetDAGCombine(ISD::MSTORE);
2036 setTargetDAGCombine(ISD::TRUNCATE);
2037 setTargetDAGCombine(ISD::ZERO_EXTEND);
2038 setTargetDAGCombine(ISD::ANY_EXTEND);
2039 setTargetDAGCombine(ISD::SIGN_EXTEND);
2040 setTargetDAGCombine(ISD::SIGN_EXTEND_INREG);
2041 setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG);
2042 setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG);
2043 setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG);
2044 setTargetDAGCombine(ISD::SINT_TO_FP);
2045 setTargetDAGCombine(ISD::UINT_TO_FP);
2046 setTargetDAGCombine(ISD::STRICT_SINT_TO_FP);
2047 setTargetDAGCombine(ISD::STRICT_UINT_TO_FP);
2048 setTargetDAGCombine(ISD::SETCC);
2049 setTargetDAGCombine(ISD::MUL);
2050 setTargetDAGCombine(ISD::XOR);
2051 setTargetDAGCombine(ISD::MSCATTER);
2052 setTargetDAGCombine(ISD::MGATHER);
2053 setTargetDAGCombine(ISD::FP16_TO_FP);
2054 setTargetDAGCombine(ISD::FP_EXTEND);
2055 setTargetDAGCombine(ISD::STRICT_FP_EXTEND);
2056 setTargetDAGCombine(ISD::FP_ROUND);
2057
2058 computeRegisterProperties(Subtarget.getRegisterInfo());
2059
2060 MaxStoresPerMemset = 16; // For @llvm.memset -> sequence of stores
2061 MaxStoresPerMemsetOptSize = 8;
2062 MaxStoresPerMemcpy = 8; // For @llvm.memcpy -> sequence of stores
2063 MaxStoresPerMemcpyOptSize = 4;
2064 MaxStoresPerMemmove = 8; // For @llvm.memmove -> sequence of stores
2065 MaxStoresPerMemmoveOptSize = 4;
2066
2067 // TODO: These control memcmp expansion in CGP and could be raised higher, but
2068 // that needs to benchmarked and balanced with the potential use of vector
2069 // load/store types (PR33329, PR33914).
2070 MaxLoadsPerMemcmp = 2;
2071 MaxLoadsPerMemcmpOptSize = 2;
2072
2073 // Set loop alignment to 2^ExperimentalPrefLoopAlignment bytes (default: 2^4).
2074 setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment));
2075
2076 // An out-of-order CPU can speculatively execute past a predictable branch,
2077 // but a conditional move could be stalled by an expensive earlier operation.
2078 PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder();
2079 EnableExtLdPromotion = true;
2080 setPrefFunctionAlignment(Align(16));
2081
2082 verifyIntrinsicTables();
2083
2084 // Default to having -disable-strictnode-mutation on
2085 IsStrictFPEnabled = true;
2086}
2087
2088// This has so far only been implemented for 64-bit MachO.
2089bool X86TargetLowering::useLoadStackGuardNode() const {
2090 return Subtarget.isTargetMachO() && Subtarget.is64Bit();
2091}
2092
2093bool X86TargetLowering::useStackGuardXorFP() const {
2094 // Currently only MSVC CRTs XOR the frame pointer into the stack guard value.
2095 return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO();
2096}
2097
2098SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val,
2099 const SDLoc &DL) const {
2100 EVT PtrTy = getPointerTy(DAG.getDataLayout());
2101 unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP;
2102 MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val);
2103 return SDValue(Node, 0);
2104}
2105
2106TargetLoweringBase::LegalizeTypeAction
2107X86TargetLowering::getPreferredVectorAction(MVT VT) const {
2108 if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() &&
2109 !Subtarget.hasBWI())
2110 return TypeSplitVector;
2111
2112 if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 &&
2113 VT.getVectorElementType() != MVT::i1)
2114 return TypeWidenVector;
2115
2116 return TargetLoweringBase::getPreferredVectorAction(VT);
2117}
2118
2119static std::pair<MVT, unsigned>
2120handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC,
2121 const X86Subtarget &Subtarget) {
2122 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
2123 // convention is one that uses k registers.
2124 if (NumElts == 2)
2125 return {MVT::v2i64, 1};
2126 if (NumElts == 4)
2127 return {MVT::v4i32, 1};
2128 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
2129 CC != CallingConv::Intel_OCL_BI)
2130 return {MVT::v8i16, 1};
2131 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
2132 CC != CallingConv::Intel_OCL_BI)
2133 return {MVT::v16i8, 1};
2134 // v32i1 passes in ymm unless we have BWI and the calling convention is
2135 // regcall.
2136 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
2137 return {MVT::v32i8, 1};
2138 // Split v64i1 vectors if we don't have v64i8 available.
2139 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
2140 if (Subtarget.useAVX512Regs())
2141 return {MVT::v64i8, 1};
2142 return {MVT::v32i8, 2};
2143 }
2144
2145 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2146 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
2147 NumElts > 64)
2148 return {MVT::i8, NumElts};
2149
2150 return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0};
2151}
2152
2153MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
2154 CallingConv::ID CC,
2155 EVT VT) const {
2156 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2157 Subtarget.hasAVX512()) {
2158 unsigned NumElts = VT.getVectorNumElements();
2159
2160 MVT RegisterVT;
2161 unsigned NumRegisters;
2162 std::tie(RegisterVT, NumRegisters) =
2163 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2164 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2165 return RegisterVT;
2166 }
2167
2168 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
2169}
2170
2171unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
2172 CallingConv::ID CC,
2173 EVT VT) const {
2174 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2175 Subtarget.hasAVX512()) {
2176 unsigned NumElts = VT.getVectorNumElements();
2177
2178 MVT RegisterVT;
2179 unsigned NumRegisters;
2180 std::tie(RegisterVT, NumRegisters) =
2181 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
2182 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
2183 return NumRegisters;
2184 }
2185
2186 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
2187}
2188
2189unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv(
2190 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
2191 unsigned &NumIntermediates, MVT &RegisterVT) const {
2192 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
2193 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
2194 Subtarget.hasAVX512() &&
2195 (!isPowerOf2_32(VT.getVectorNumElements()) ||
2196 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
2197 VT.getVectorNumElements() > 64)) {
2198 RegisterVT = MVT::i8;
2199 IntermediateVT = MVT::i1;
2200 NumIntermediates = VT.getVectorNumElements();
2201 return NumIntermediates;
2202 }
2203
2204 // Split v64i1 vectors if we don't have v64i8 available.
2205 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
2206 CC != CallingConv::X86_RegCall) {
2207 RegisterVT = MVT::v32i8;
2208 IntermediateVT = MVT::v32i1;
2209 NumIntermediates = 2;
2210 return 2;
2211 }
2212
2213 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
2214 NumIntermediates, RegisterVT);
2215}
2216
2217EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL,
2218 LLVMContext& Context,
2219 EVT VT) const {
2220 if (!VT.isVector())
2221 return MVT::i8;
2222
2223 if (Subtarget.hasAVX512()) {
2224 // Figure out what this type will be legalized to.
2225 EVT LegalVT = VT;
2226 while (getTypeAction(Context, LegalVT) != TypeLegal)
2227 LegalVT = getTypeToTransformTo(Context, LegalVT);
2228
2229 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
2230 if (LegalVT.getSimpleVT().is512BitVector())
2231 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2232
2233 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
2234 // If we legalized to less than a 512-bit vector, then we will use a vXi1
2235 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
2236 // vXi16/vXi8.
2237 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
2238 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
2239 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
2240 }
2241 }
2242
2243 return VT.changeVectorElementTypeToInteger();
2244}
2245
2246/// Helper for getByValTypeAlignment to determine
2247/// the desired ByVal argument alignment.
2248static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
2249 if (MaxAlign == 16)
2250 return;
2251 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
2252 if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128)
2253 MaxAlign = Align(16);
2254 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
2255 Align EltAlign;
2256 getMaxByValAlign(ATy->getElementType(), EltAlign);
2257 if (EltAlign > MaxAlign)
2258 MaxAlign = EltAlign;
2259 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
2260 for (auto *EltTy : STy->elements()) {
2261 Align EltAlign;
2262 getMaxByValAlign(EltTy, EltAlign);
2263 if (EltAlign > MaxAlign)
2264 MaxAlign = EltAlign;
2265 if (MaxAlign == 16)
2266 break;
2267 }
2268 }
2269}
2270
2271/// Return the desired alignment for ByVal aggregate
2272/// function arguments in the caller parameter area. For X86, aggregates
2273/// that contain SSE vectors are placed at 16-byte boundaries while the rest
2274/// are at 4-byte boundaries.
2275unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty,
2276 const DataLayout &DL) const {
2277 if (Subtarget.is64Bit()) {
2278 // Max of 8 and alignment of type.
2279 Align TyAlign = DL.getABITypeAlign(Ty);
2280 if (TyAlign > 8)
2281 return TyAlign.value();
2282 return 8;
2283 }
2284
2285 Align Alignment(4);
2286 if (Subtarget.hasSSE1())
2287 getMaxByValAlign(Ty, Alignment);
2288 return Alignment.value();
2289}
2290
2291/// It returns EVT::Other if the type should be determined using generic
2292/// target-independent logic.
2293/// For vector ops we check that the overall size isn't larger than our
2294/// preferred vector width.
2295EVT X86TargetLowering::getOptimalMemOpType(
2296 const MemOp &Op, const AttributeList &FuncAttributes) const {
2297 if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) {
2298 if (Op.size() >= 16 &&
2299 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
2300 // FIXME: Check if unaligned 64-byte accesses are slow.
2301 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
2302 (Subtarget.getPreferVectorWidth() >= 512)) {
2303 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
2304 }
2305 // FIXME: Check if unaligned 32-byte accesses are slow.
2306 if (Op.size() >= 32 && Subtarget.hasAVX() &&
2307 (Subtarget.getPreferVectorWidth() >= 256)) {
2308 // Although this isn't a well-supported type for AVX1, we'll let
2309 // legalization and shuffle lowering produce the optimal codegen. If we
2310 // choose an optimal type with a vector element larger than a byte,
2311 // getMemsetStores() may create an intermediate splat (using an integer
2312 // multiply) before we splat as a vector.
2313 return MVT::v32i8;
2314 }
2315 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
2316 return MVT::v16i8;
2317 // TODO: Can SSE1 handle a byte vector?
2318 // If we have SSE1 registers we should be able to use them.
2319 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
2320 (Subtarget.getPreferVectorWidth() >= 128))
2321 return MVT::v4f32;
2322 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
2323 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
2324 // Do not use f64 to lower memcpy if source is string constant. It's
2325 // better to use i32 to avoid the loads.
2326 // Also, do not use f64 to lower memset unless this is a memset of zeros.
2327 // The gymnastics of splatting a byte value into an XMM register and then
2328 // only using 8-byte stores (because this is a CPU with slow unaligned
2329 // 16-byte accesses) makes that a loser.
2330 return MVT::f64;
2331 }
2332 }
2333 // This is a compromise. If we reach here, unaligned accesses may be slow on
2334 // this target. However, creating smaller, aligned accesses could be even
2335 // slower and would certainly be a lot more code.
2336 if (Subtarget.is64Bit() && Op.size() >= 8)
2337 return MVT::i64;
2338 return MVT::i32;
2339}
2340
2341bool X86TargetLowering::isSafeMemOpType(MVT VT) const {
2342 if (VT == MVT::f32)
2343 return X86ScalarSSEf32;
2344 if (VT == MVT::f64)
2345 return X86ScalarSSEf64;
2346 return true;
2347}
2348
2349bool X86TargetLowering::allowsMisalignedMemoryAccesses(
2350 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
2351 bool *Fast) const {
2352 if (Fast) {
2353 switch (VT.getSizeInBits()) {
2354 default:
2355 // 8-byte and under are always assumed to be fast.
2356 *Fast = true;
2357 break;
2358 case 128:
2359 *Fast = !Subtarget.isUnalignedMem16Slow();
2360 break;
2361 case 256:
2362 *Fast = !Subtarget.isUnalignedMem32Slow();
2363 break;
2364 // TODO: What about AVX-512 (512-bit) accesses?
2365 }
2366 }
2367 // NonTemporal vector memory ops must be aligned.
2368 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
2369 // NT loads can only be vector aligned, so if its less aligned than the
2370 // minimum vector size (which we can split the vector down to), we might as
2371 // well use a regular unaligned vector load.
2372 // We don't have any NT loads pre-SSE41.
2373 if (!!(Flags & MachineMemOperand::MOLoad))
2374 return (Alignment < 16 || !Subtarget.hasSSE41());
2375 return false;
2376 }
2377 // Misaligned accesses of any size are always allowed.
2378 return true;
2379}
2380
2381/// Return the entry encoding for a jump table in the
2382/// current function. The returned value is a member of the
2383/// MachineJumpTableInfo::JTEntryKind enum.
2384unsigned X86TargetLowering::getJumpTableEncoding() const {
2385 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
2386 // symbol.
2387 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
2388 return MachineJumpTableInfo::EK_Custom32;
2389
2390 // Otherwise, use the normal jump table encoding heuristics.
2391 return TargetLowering::getJumpTableEncoding();
2392}
2393
2394bool X86TargetLowering::useSoftFloat() const {
2395 return Subtarget.useSoftFloat();
2396}
2397
2398void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC,
2399 ArgListTy &Args) const {
2400
2401 // Only relabel X86-32 for C / Stdcall CCs.
2402 if (Subtarget.is64Bit())
2403 return;
2404 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
2405 return;
2406 unsigned ParamRegs = 0;
2407 if (auto *M = MF->getFunction().getParent())
2408 ParamRegs = M->getNumberRegisterParameters();
2409
2410 // Mark the first N int arguments as having reg
2411 for (auto &Arg : Args) {
2412 Type *T = Arg.Ty;
2413 if (T->isIntOrPtrTy())
2414 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
2415 unsigned numRegs = 1;
2416 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
2417 numRegs = 2;
2418 if (ParamRegs < numRegs)
2419 return;
2420 ParamRegs -= numRegs;
2421 Arg.IsInReg = true;
2422 }
2423 }
2424}
2425
2426const MCExpr *
2427X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI,
2428 const MachineBasicBlock *MBB,
2429 unsigned uid,MCContext &Ctx) const{
2430 assert(isPositionIndependent() && Subtarget.isPICStyleGOT())((void)0);
2431 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
2432 // entries.
2433 return MCSymbolRefExpr::create(MBB->getSymbol(),
2434 MCSymbolRefExpr::VK_GOTOFF, Ctx);
2435}
2436
2437/// Returns relocation base for the given PIC jumptable.
2438SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table,
2439 SelectionDAG &DAG) const {
2440 if (!Subtarget.is64Bit())
2441 // This doesn't have SDLoc associated with it, but is not really the
2442 // same as a Register.
2443 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2444 getPointerTy(DAG.getDataLayout()));
2445 return Table;
2446}
2447
2448/// This returns the relocation base for the given PIC jumptable,
2449/// the same as getPICJumpTableRelocBase, but as an MCExpr.
2450const MCExpr *X86TargetLowering::
2451getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
2452 MCContext &Ctx) const {
2453 // X86-64 uses RIP relative addressing based on the jump table label.
2454 if (Subtarget.isPICStyleRIPRel())
2455 return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
2456
2457 // Otherwise, the reference is relative to the PIC base.
2458 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
2459}
2460
2461std::pair<const TargetRegisterClass *, uint8_t>
2462X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
2463 MVT VT) const {
2464 const TargetRegisterClass *RRC = nullptr;
2465 uint8_t Cost = 1;
2466 switch (VT.SimpleTy) {
2467 default:
2468 return TargetLowering::findRepresentativeClass(TRI, VT);
2469 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
2470 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
2471 break;
2472 case MVT::x86mmx:
2473 RRC = &X86::VR64RegClass;
2474 break;
2475 case MVT::f32: case MVT::f64:
2476 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
2477 case MVT::v4f32: case MVT::v2f64:
2478 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
2479 case MVT::v8f32: case MVT::v4f64:
2480 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
2481 case MVT::v16f32: case MVT::v8f64:
2482 RRC = &X86::VR128XRegClass;
2483 break;
2484 }
2485 return std::make_pair(RRC, Cost);
2486}
2487
2488unsigned X86TargetLowering::getAddressSpace() const {
2489 if (Subtarget.is64Bit())
2490 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
2491 return 256;
2492}
2493
2494static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
2495 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
2496 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
2497}
2498
2499static Constant* SegmentOffset(IRBuilderBase &IRB,
2500 int Offset, unsigned AddressSpace) {
2501 return ConstantExpr::getIntToPtr(
2502 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
2503 Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace));
2504}
2505
2506Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const {
2507 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
2508 // tcbhead_t; use it instead of the usual global variable (see
2509 // sysdeps/{i386,x86_64}/nptl/tls.h)
2510 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
2511 if (Subtarget.isTargetFuchsia()) {
2512 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
2513 return SegmentOffset(IRB, 0x10, getAddressSpace());
2514 } else {
2515 unsigned AddressSpace = getAddressSpace();
2516 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
2517 // Specially, some users may customize the base reg and offset.
2518 int Offset = M->getStackProtectorGuardOffset();
2519 // If we don't set -stack-protector-guard-offset value:
2520 // %fs:0x28, unless we're using a Kernel code model, in which case
2521 // it's %gs:0x28. gs:0x14 on i386.
2522 if (Offset == INT_MAX2147483647)
2523 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
2524
2525 StringRef GuardReg = M->getStackProtectorGuardReg();
2526 if (GuardReg == "fs")
2527 AddressSpace = X86AS::FS;
2528 else if (GuardReg == "gs")
2529 AddressSpace = X86AS::GS;
2530 return SegmentOffset(IRB, Offset, AddressSpace);
2531 }
2532 }
2533 return TargetLowering::getIRStackGuard(IRB);
2534}
2535
2536void X86TargetLowering::insertSSPDeclarations(Module &M) const {
2537 // MSVC CRT provides functionalities for stack protection.
2538 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2539 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2540 // MSVC CRT has a global variable holding security cookie.
2541 M.getOrInsertGlobal("__security_cookie",
2542 Type::getInt8PtrTy(M.getContext()));
2543
2544 // MSVC CRT has a function to validate security cookie.
2545 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
2546 "__security_check_cookie", Type::getVoidTy(M.getContext()),
2547 Type::getInt8PtrTy(M.getContext()));
2548 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
2549 F->setCallingConv(CallingConv::X86_FastCall);
2550 F->addAttribute(1, Attribute::AttrKind::InReg);
2551 }
2552 return;
2553 }
2554
2555 StringRef GuardMode = M.getStackProtectorGuard();
2556
2557 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
2558 if ((GuardMode == "tls" || GuardMode.empty()) &&
2559 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
2560 return;
2561 TargetLowering::insertSSPDeclarations(M);
2562}
2563
2564Value *X86TargetLowering::getSDagStackGuard(const Module &M) const {
2565 // MSVC CRT has a global variable holding security cookie.
2566 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2567 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2568 return M.getGlobalVariable("__security_cookie");
2569 }
2570 return TargetLowering::getSDagStackGuard(M);
2571}
2572
2573Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const {
2574 // MSVC CRT has a function to validate security cookie.
2575 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
2576 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
2577 return M.getFunction("__security_check_cookie");
2578 }
2579 return TargetLowering::getSSPStackGuardCheck(M);
2580}
2581
2582Value *
2583X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const {
2584 if (Subtarget.getTargetTriple().isOSContiki())
2585 return getDefaultSafeStackPointerLocation(IRB, false);
2586
2587 // Android provides a fixed TLS slot for the SafeStack pointer. See the
2588 // definition of TLS_SLOT_SAFESTACK in
2589 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
2590 if (Subtarget.isTargetAndroid()) {
2591 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
2592 // %gs:0x24 on i386
2593 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
2594 return SegmentOffset(IRB, Offset, getAddressSpace());
2595 }
2596
2597 // Fuchsia is similar.
2598 if (Subtarget.isTargetFuchsia()) {
2599 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
2600 return SegmentOffset(IRB, 0x18, getAddressSpace());
2601 }
2602
2603 return TargetLowering::getSafeStackPointerLocation(IRB);
2604}
2605
2606//===----------------------------------------------------------------------===//
2607// Return Value Calling Convention Implementation
2608//===----------------------------------------------------------------------===//
2609
2610bool X86TargetLowering::CanLowerReturn(
2611 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
2612 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
2613 SmallVector<CCValAssign, 16> RVLocs;
2614 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
2615 return CCInfo.CheckReturn(Outs, RetCC_X86);
2616}
2617
2618const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
2619 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
2620 return ScratchRegs;
2621}
2622
2623/// Lowers masks values (v*i1) to the local register values
2624/// \returns DAG node after lowering to register type
2625static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
2626 const SDLoc &Dl, SelectionDAG &DAG) {
2627 EVT ValVT = ValArg.getValueType();
2628
2629 if (ValVT == MVT::v1i1)
2630 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg,
2631 DAG.getIntPtrConstant(0, Dl));
2632
2633 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
2634 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
2635 // Two stage lowering might be required
2636 // bitcast: v8i1 -> i8 / v16i1 -> i16
2637 // anyextend: i8 -> i32 / i16 -> i32
2638 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
2639 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
2640 if (ValLoc == MVT::i32)
2641 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy);
2642 return ValToCopy;
2643 }
2644
2645 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
2646 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
2647 // One stage lowering is required
2648 // bitcast: v32i1 -> i32 / v64i1 -> i64
2649 return DAG.getBitcast(ValLoc, ValArg);
2650 }
2651
2652 return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg);
2653}
2654
2655/// Breaks v64i1 value into two registers and adds the new node to the DAG
2656static void Passv64i1ArgInRegs(
2657 const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg,
2658 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
2659 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
2660 assert(Subtarget.hasBWI() && "Expected AVX512BW target!")((void)0);
2661 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((void)0);
2662 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value")((void)0);
2663 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((void)0)
2664 "The value should reside in two registers")((void)0);
2665
2666 // Before splitting the value we cast it to i64
2667 Arg = DAG.getBitcast(MVT::i64, Arg);
2668
2669 // Splitting the value into two i32 types
2670 SDValue Lo, Hi;
2671 Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2672 DAG.getConstant(0, Dl, MVT::i32));
2673 Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg,
2674 DAG.getConstant(1, Dl, MVT::i32));
2675
2676 // Attach the two i32 types into corresponding registers
2677 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
2678 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
2679}
2680
2681SDValue
2682X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
2683 bool isVarArg,
2684 const SmallVectorImpl<ISD::OutputArg> &Outs,
2685 const SmallVectorImpl<SDValue> &OutVals,
2686 const SDLoc &dl, SelectionDAG &DAG) const {
2687 MachineFunction &MF = DAG.getMachineFunction();
2688 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2689
2690 // In some cases we need to disable registers from the default CSR list.
2691 // For example, when they are used for argument passing.
2692 bool ShouldDisableCalleeSavedRegister =
2693 CallConv == CallingConv::X86_RegCall ||
2694 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
2695
2696 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
2697 report_fatal_error("X86 interrupts may not return any value");
2698
2699 SmallVector<CCValAssign, 16> RVLocs;
2700 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
2701 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
2702
2703 SmallVector<std::pair<Register, SDValue>, 4> RetVals;
2704 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
2705 ++I, ++OutsIndex) {
2706 CCValAssign &VA = RVLocs[I];
2707 assert(VA.isRegLoc() && "Can only return in registers!")((void)0);
2708
2709 // Add the register to the CalleeSaveDisableRegs list.
2710 if (ShouldDisableCalleeSavedRegister)
2711 MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg());
2712
2713 SDValue ValToCopy = OutVals[OutsIndex];
2714 EVT ValVT = ValToCopy.getValueType();
2715
2716 // Promote values to the appropriate types.
2717 if (VA.getLocInfo() == CCValAssign::SExt)
2718 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
2719 else if (VA.getLocInfo() == CCValAssign::ZExt)
2720 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
2721 else if (VA.getLocInfo() == CCValAssign::AExt) {
2722 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
2723 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
2724 else
2725 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
2726 }
2727 else if (VA.getLocInfo() == CCValAssign::BCvt)
2728 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
2729
2730 assert(VA.getLocInfo() != CCValAssign::FPExt &&((void)0)
2731 "Unexpected FP-extend for return value.")((void)0);
2732
2733 // Report an error if we have attempted to return a value via an XMM
2734 // register and SSE was disabled.
2735 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
2736 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
2737 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2738 } else if (!Subtarget.hasSSE2() &&
2739 X86::FR64XRegClass.contains(VA.getLocReg()) &&
2740 ValVT == MVT::f64) {
2741 // When returning a double via an XMM register, report an error if SSE2 is
2742 // not enabled.
2743 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
2744 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
2745 }
2746
2747 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
2748 // the RET instruction and handled by the FP Stackifier.
2749 if (VA.getLocReg() == X86::FP0 ||
2750 VA.getLocReg() == X86::FP1) {
2751 // If this is a copy from an xmm register to ST(0), use an FPExtend to
2752 // change the value to the FP stack register class.
2753 if (isScalarFPTypeInSSEReg(VA.getValVT()))
2754 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
2755 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2756 // Don't emit a copytoreg.
2757 continue;
2758 }
2759
2760 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
2761 // which is returned in RAX / RDX.
2762 if (Subtarget.is64Bit()) {
2763 if (ValVT == MVT::x86mmx) {
2764 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
2765 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
2766 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
2767 ValToCopy);
2768 // If we don't have SSE2 available, convert to v4f32 so the generated
2769 // register is legal.
2770 if (!Subtarget.hasSSE2())
2771 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
2772 }
2773 }
2774 }
2775
2776 if (VA.needsCustom()) {
2777 assert(VA.getValVT() == MVT::v64i1 &&((void)0)
2778 "Currently the only custom case is when we split v64i1 to 2 regs")((void)0);
2779
2780 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
2781 Subtarget);
2782
2783 // Add the second register to the CalleeSaveDisableRegs list.
2784 if (ShouldDisableCalleeSavedRegister)
2785 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
2786 } else {
2787 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
2788 }
2789 }
2790
2791 SDValue Flag;
2792 SmallVector<SDValue, 6> RetOps;
2793 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
2794 // Operand #1 = Bytes To Pop
2795 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
2796 MVT::i32));
2797
2798 // Copy the result values into the output registers.
2799 for (auto &RetVal : RetVals) {
2800 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
2801 RetOps.push_back(RetVal.second);
2802 continue; // Don't emit a copytoreg.
2803 }
2804
2805 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag);
2806 Flag = Chain.getValue(1);
2807 RetOps.push_back(
2808 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
2809 }
2810
2811 // Swift calling convention does not require we copy the sret argument
2812 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
2813
2814 // All x86 ABIs require that for returning structs by value we copy
2815 // the sret argument into %rax/%eax (depending on ABI) for the return.
2816 // We saved the argument into a virtual register in the entry block,
2817 // so now we copy the value out and into %rax/%eax.
2818 //
2819 // Checking Function.hasStructRetAttr() here is insufficient because the IR
2820 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
2821 // false, then an sret argument may be implicitly inserted in the SelDAG. In
2822 // either case FuncInfo->setSRetReturnReg() will have been called.
2823 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2824 // When we have both sret and another return value, we should use the
2825 // original Chain stored in RetOps[0], instead of the current Chain updated
2826 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
2827
2828 // For the case of sret and another return value, we have
2829 // Chain_0 at the function entry
2830 // Chain_1 = getCopyToReg(Chain_0) in the above loop
2831 // If we use Chain_1 in getCopyFromReg, we will have
2832 // Val = getCopyFromReg(Chain_1)
2833 // Chain_2 = getCopyToReg(Chain_1, Val) from below
2834
2835 // getCopyToReg(Chain_0) will be glued together with
2836 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
2837 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
2838 // Data dependency from Unit B to Unit A due to usage of Val in
2839 // getCopyToReg(Chain_1, Val)
2840 // Chain dependency from Unit A to Unit B
2841
2842 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
2843 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
2844 getPointerTy(MF.getDataLayout()));
2845
2846 Register RetValReg
2847 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
2848 X86::RAX : X86::EAX;
2849 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag);
2850 Flag = Chain.getValue(1);
2851
2852 // RAX/EAX now acts like a return value.
2853 RetOps.push_back(
2854 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
2855
2856 // Add the returned register to the CalleeSaveDisableRegs list.
2857 if (ShouldDisableCalleeSavedRegister)
2858 MF.getRegInfo().disableCalleeSavedRegister(RetValReg);
2859 }
2860
2861 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2862 const MCPhysReg *I =
2863 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
2864 if (I) {
2865 for (; *I; ++I) {
2866 if (X86::GR64RegClass.contains(*I))
2867 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
2868 else
2869 llvm_unreachable("Unexpected register class in CSRsViaCopy!")__builtin_unreachable();
2870 }
2871 }
2872
2873 RetOps[0] = Chain; // Update chain.
2874
2875 // Add the flag if we have it.
2876 if (Flag.getNode())
2877 RetOps.push_back(Flag);
2878
2879 X86ISD::NodeType opcode = X86ISD::RET_FLAG;
2880 if (CallConv == CallingConv::X86_INTR)
2881 opcode = X86ISD::IRET;
2882 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
2883}
2884
2885bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
2886 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
2887 return false;
2888
2889 SDValue TCChain = Chain;
2890 SDNode *Copy = *N->use_begin();
2891 if (Copy->getOpcode() == ISD::CopyToReg) {
2892 // If the copy has a glue operand, we conservatively assume it isn't safe to
2893 // perform a tail call.
2894 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
2895 return false;
2896 TCChain = Copy->getOperand(0);
2897 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
2898 return false;
2899
2900 bool HasRet = false;
2901 for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end();
2902 UI != UE; ++UI) {
2903 if (UI->getOpcode() != X86ISD::RET_FLAG)
2904 return false;
2905 // If we are returning more than one value, we can definitely
2906 // not make a tail call see PR19530
2907 if (UI->getNumOperands() > 4)
2908 return false;
2909 if (UI->getNumOperands() == 4 &&
2910 UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue)
2911 return false;
2912 HasRet = true;
2913 }
2914
2915 if (!HasRet)
2916 return false;
2917
2918 Chain = TCChain;
2919 return true;
2920}
2921
2922EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
2923 ISD::NodeType ExtendKind) const {
2924 MVT ReturnMVT = MVT::i32;
2925
2926 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
2927 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
2928 // The ABI does not require i1, i8 or i16 to be extended.
2929 //
2930 // On Darwin, there is code in the wild relying on Clang's old behaviour of
2931 // always extending i8/i16 return values, so keep doing that for now.
2932 // (PR26665).
2933 ReturnMVT = MVT::i8;
2934 }
2935
2936 EVT MinVT = getRegisterType(Context, ReturnMVT);
2937 return VT.bitsLT(MinVT) ? MinVT : VT;
2938}
2939
2940/// Reads two 32 bit registers and creates a 64 bit mask value.
2941/// \param VA The current 32 bit value that need to be assigned.
2942/// \param NextVA The next 32 bit value that need to be assigned.
2943/// \param Root The parent DAG node.
2944/// \param [in,out] InFlag Represents SDvalue in the parent DAG node for
2945/// glue purposes. In the case the DAG is already using
2946/// physical register instead of virtual, we should glue
2947/// our new SDValue to InFlag SDvalue.
2948/// \return a new SDvalue of size 64bit.
2949static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA,
2950 SDValue &Root, SelectionDAG &DAG,
2951 const SDLoc &Dl, const X86Subtarget &Subtarget,
2952 SDValue *InFlag = nullptr) {
2953 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!")((void)0);
2954 assert(Subtarget.is32Bit() && "Expecting 32 bit target")((void)0);
2955 assert(VA.getValVT() == MVT::v64i1 &&((void)0)
2956 "Expecting first location of 64 bit width type")((void)0);
2957 assert(NextVA.getValVT() == VA.getValVT() &&((void)0)
2958 "The locations should have the same type")((void)0);
2959 assert(VA.isRegLoc() && NextVA.isRegLoc() &&((void)0)
2960 "The values should reside in two registers")((void)0);
2961
2962 SDValue Lo, Hi;
2963 SDValue ArgValueLo, ArgValueHi;
2964
2965 MachineFunction &MF = DAG.getMachineFunction();
2966 const TargetRegisterClass *RC = &X86::GR32RegClass;
2967
2968 // Read a 32 bit value from the registers.
2969 if (nullptr == InFlag) {
2970 // When no physical register is present,
2971 // create an intermediate virtual register.
2972 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
2973 ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2974 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
2975 ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32);
2976 } else {
2977 // When a physical register is available read the value from it and glue
2978 // the reads together.
2979 ArgValueLo =
2980 DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag);
2981 *InFlag = ArgValueLo.getValue(2);
2982 ArgValueHi =
2983 DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag);
2984 *InFlag = ArgValueHi.getValue(2);
2985 }
2986
2987 // Convert the i32 type into v32i1 type.
2988 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
2989
2990 // Convert the i32 type into v32i1 type.
2991 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
2992
2993 // Concatenate the two values together.
2994 return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi);
2995}
2996
2997/// The function will lower a register of various sizes (8/16/32/64)
2998/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
2999/// \returns a DAG node contains the operand after lowering to mask type.
3000static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
3001 const EVT &ValLoc, const SDLoc &Dl,
3002 SelectionDAG &DAG) {
3003 SDValue ValReturned = ValArg;
3004
3005 if (ValVT == MVT::v1i1)
3006 return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned);
3007
3008 if (ValVT == MVT::v64i1) {
3009 // In 32 bit machine, this case is handled by getv64i1Argument
3010 assert(ValLoc == MVT::i64 && "Expecting only i64 locations")((void)0);
3011 // In 64 bit machine, There is no need to truncate the value only bitcast
3012 } else {
3013 MVT maskLen;
3014 switch (ValVT.getSimpleVT().SimpleTy) {
3015 case MVT::v8i1:
3016 maskLen = MVT::i8;
3017 break;
3018 case MVT::v16i1:
3019 maskLen = MVT::i16;
3020 break;
3021 case MVT::v32i1:
3022 maskLen = MVT::i32;
3023 break;
3024 default:
3025 llvm_unreachable("Expecting a vector of i1 types")__builtin_unreachable();
3026 }
3027
3028 ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned);
3029 }
3030 return DAG.getBitcast(ValVT, ValReturned);
3031}
3032
3033/// Lower the result values of a call into the
3034/// appropriate copies out of appropriate physical registers.
3035///
3036SDValue X86TargetLowering::LowerCallResult(
3037 SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
3038 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3039 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
3040 uint32_t *RegMask) const {
3041
3042 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
3043 // Assign locations to each value returned by this call.
3044 SmallVector<CCValAssign, 16> RVLocs;
3045 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
3046 *DAG.getContext());
3047 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3048
3049 // Copy all of the result registers out of their specified physreg.
3050 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
3051 ++I, ++InsIndex) {
3052 CCValAssign &VA = RVLocs[I];
3053 EVT CopyVT = VA.getLocVT();
3054
3055 // In some calling conventions we need to remove the used registers
3056 // from the register mask.
3057 if (RegMask) {
3058 for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, /*IncludeSelf=*/true);
3059 SubRegs.isValid(); ++SubRegs)
3060 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
3061 }
3062
3063 // Report an error if there was an attempt to return FP values via XMM
3064 // registers.
3065 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
3066 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
3067 if (VA.getLocReg() == X86::XMM1)
3068 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3069 else
3070 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3071 } else if (!Subtarget.hasSSE2() &&
3072 X86::FR64XRegClass.contains(VA.getLocReg()) &&
3073 CopyVT == MVT::f64) {
3074 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
3075 if (VA.getLocReg() == X86::XMM1)
3076 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
3077 else
3078 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
3079 }
3080
3081 // If we prefer to use the value in xmm registers, copy it out as f80 and
3082 // use a truncate to move it from fp stack reg to xmm reg.
3083 bool RoundAfterCopy = false;
3084 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
3085 isScalarFPTypeInSSEReg(VA.getValVT())) {
3086 if (!Subtarget.hasX87())
3087 report_fatal_error("X87 register return with X87 disabled");
3088 CopyVT = MVT::f80;
3089 RoundAfterCopy = (CopyVT != VA.getLocVT());
3090 }
3091
3092 SDValue Val;
3093 if (VA.needsCustom()) {
3094 assert(VA.getValVT() == MVT::v64i1 &&((void)0)
3095 "Currently the only custom case is when we split v64i1 to 2 regs")((void)0);
3096 Val =
3097 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag);
3098 } else {
3099 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag)
3100 .getValue(1);
3101 Val = Chain.getValue(0);
3102 InFlag = Chain.getValue(2);
3103 }
3104
3105 if (RoundAfterCopy)
3106 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
3107 // This truncation won't change the value.
3108 DAG.getIntPtrConstant(1, dl));
3109
3110 if (VA.isExtInLoc()) {
3111 if (VA.getValVT().isVector() &&
3112 VA.getValVT().getScalarType() == MVT::i1 &&
3113 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3114 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3115 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3116 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
3117 } else
3118 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
3119 }
3120
3121 if (VA.getLocInfo() == CCValAssign::BCvt)
3122 Val = DAG.getBitcast(VA.getValVT(), Val);
3123
3124 InVals.push_back(Val);
3125 }
3126
3127 return Chain;
3128}
3129
3130//===----------------------------------------------------------------------===//
3131// C & StdCall & Fast Calling Convention implementation
3132//===----------------------------------------------------------------------===//
3133// StdCall calling convention seems to be standard for many Windows' API
3134// routines and around. It differs from C calling convention just a little:
3135// callee should clean up the stack, not caller. Symbols should be also
3136// decorated in some fancy way :) It doesn't support any vector arguments.
3137// For info on fast calling convention see Fast Calling Convention (tail call)
3138// implementation LowerX86_32FastCCCallTo.
3139
3140/// CallIsStructReturn - Determines whether a call uses struct return
3141/// semantics.
3142enum StructReturnType {
3143 NotStructReturn,
3144 RegStructReturn,
3145 StackStructReturn
3146};
3147static StructReturnType
3148callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) {
3149 if (Outs.empty())
3150 return NotStructReturn;
3151
3152 const ISD::ArgFlagsTy &Flags = Outs[0].Flags;
3153 if (!Flags.isSRet())
3154 return NotStructReturn;
3155 if (Flags.isInReg() || IsMCU)
3156 return RegStructReturn;
3157 return StackStructReturn;
3158}
3159
3160/// Determines whether a function uses struct return semantics.
3161static StructReturnType
3162argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) {
3163 if (Ins.empty())
3164 return NotStructReturn;
3165
3166 const ISD::ArgFlagsTy &Flags = Ins[0].Flags;
3167 if (!Flags.isSRet())
3168 return NotStructReturn;
3169 if (Flags.isInReg() || IsMCU)
3170 return RegStructReturn;
3171 return StackStructReturn;
3172}
3173
3174/// Make a copy of an aggregate at address specified by "Src" to address
3175/// "Dst" with size and alignment information specified by the specific
3176/// parameter attribute. The copy will be passed as a byval function parameter.
3177static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
3178 SDValue Chain, ISD::ArgFlagsTy Flags,
3179 SelectionDAG &DAG, const SDLoc &dl) {
3180 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
3181
3182 return DAG.getMemcpy(
3183 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
3184 /*isVolatile*/ false, /*AlwaysInline=*/true,
3185 /*isTailCall*/ false, MachinePointerInfo(), MachinePointerInfo());
3186}
3187
3188/// Return true if the calling convention is one that we can guarantee TCO for.
3189static bool canGuaranteeTCO(CallingConv::ID CC) {
3190 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
3191 CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE ||
3192 CC == CallingConv::HHVM || CC == CallingConv::Tail ||
3193 CC == CallingConv::SwiftTail);
3194}
3195
3196/// Return true if we might ever do TCO for calls with this calling convention.
3197static bool mayTailCallThisCC(CallingConv::ID CC) {
3198 switch (CC) {
3199 // C calling conventions:
3200 case CallingConv::C:
3201 case CallingConv::Win64:
3202 case CallingConv::X86_64_SysV:
3203 // Callee pop conventions:
3204 case CallingConv::X86_ThisCall:
3205 case CallingConv::X86_StdCall:
3206 case CallingConv::X86_VectorCall:
3207 case CallingConv::X86_FastCall:
3208 // Swift:
3209 case CallingConv::Swift:
3210 return true;
3211 default:
3212 return canGuaranteeTCO(CC);
3213 }
3214}
3215
3216/// Return true if the function is being made into a tailcall target by
3217/// changing its ABI.
3218static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
3219 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
3220 CC == CallingConv::Tail || CC == CallingConv::SwiftTail;
3221}
3222
3223bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
3224 if (!CI->isTailCall())
3225 return false;
3226
3227 CallingConv::ID CalleeCC = CI->getCallingConv();
3228 if (!mayTailCallThisCC(CalleeCC))
3229 return false;
3230
3231 return true;
3232}
3233
3234SDValue
3235X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
3236 const SmallVectorImpl<ISD::InputArg> &Ins,
3237 const SDLoc &dl, SelectionDAG &DAG,
3238 const CCValAssign &VA,
3239 MachineFrameInfo &MFI, unsigned i) const {
3240 // Create the nodes corresponding to a load from this parameter slot.
3241 ISD::ArgFlagsTy Flags = Ins[i].Flags;
3242 bool AlwaysUseMutable = shouldGuaranteeTCO(
3243 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
3244 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
3245 EVT ValVT;
3246 MVT PtrVT = getPointerTy(DAG.getDataLayout());
3247
3248 // If value is passed by pointer we have address passed instead of the value
3249 // itself. No need to extend if the mask value and location share the same
3250 // absolute size.
3251 bool ExtendedInMem =
3252 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
3253 VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits();
3254
3255 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
3256 ValVT = VA.getLocVT();
3257 else
3258 ValVT = VA.getValVT();
3259
3260 // FIXME: For now, all byval parameter objects are marked mutable. This can be
3261 // changed with more analysis.
3262 // In case of tail call optimization mark all arguments mutable. Since they
3263 // could be overwritten by lowering of arguments in case of a tail call.
3264 if (Flags.isByVal()) {
3265 unsigned Bytes = Flags.getByValSize();
3266 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
3267
3268 // FIXME: For now, all byval parameter objects are marked as aliasing. This
3269 // can be improved with deeper analysis.
3270 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
3271 /*isAliased=*/true);
3272 return DAG.getFrameIndex(FI, PtrVT);
3273 }
3274
3275 EVT ArgVT = Ins[i].ArgVT;
3276
3277 // If this is a vector that has been split into multiple parts, and the
3278 // scalar size of the parts don't match the vector element size, then we can't
3279 // elide the copy. The parts will have padding between them instead of being
3280 // packed like a vector.
3281 bool ScalarizedAndExtendedVector =
3282 ArgVT.isVector() && !VA.getLocVT().isVector() &&
3283 VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits();
3284
3285 // This is an argument in memory. We might be able to perform copy elision.
3286 // If the argument is passed directly in memory without any extension, then we
3287 // can perform copy elision. Large vector types, for example, may be passed
3288 // indirectly by pointer.
3289 if (Flags.isCopyElisionCandidate() &&
3290 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
3291 !ScalarizedAndExtendedVector) {
3292 SDValue PartAddr;
3293 if (Ins[i].PartOffset == 0) {
3294 // If this is a one-part value or the first part of a multi-part value,
3295 // create a stack object for the entire argument value type and return a
3296 // load from our portion of it. This assumes that if the first part of an
3297 // argument is in memory, the rest will also be in memory.
3298 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
3299 /*IsImmutable=*/false);
3300 PartAddr = DAG.getFrameIndex(FI, PtrVT);
3301 return DAG.getLoad(
3302 ValVT, dl, Chain, PartAddr,
3303 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3304 } else {
3305 // This is not the first piece of an argument in memory. See if there is
3306 // already a fixed stack object including this offset. If so, assume it
3307 // was created by the PartOffset == 0 branch above and create a load from
3308 // the appropriate offset into it.
3309 int64_t PartBegin = VA.getLocMemOffset();
3310 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
3311 int FI = MFI.getObjectIndexBegin();
3312 for (; MFI.isFixedObjectIndex(FI); ++FI) {
3313 int64_t ObjBegin = MFI.getObjectOffset(FI);
3314 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
3315 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
3316 break;
3317 }
3318 if (MFI.isFixedObjectIndex(FI)) {
3319 SDValue Addr =
3320 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
3321 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
3322 return DAG.getLoad(
3323 ValVT, dl, Chain, Addr,
3324 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI,
3325 Ins[i].PartOffset));
3326 }
3327 }
3328 }
3329
3330 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
3331 VA.getLocMemOffset(), isImmutable);
3332
3333 // Set SExt or ZExt flag.
3334 if (VA.getLocInfo() == CCValAssign::ZExt) {
3335 MFI.setObjectZExt(FI, true);
3336 } else if (VA.getLocInfo() == CCValAssign::SExt) {
3337 MFI.setObjectSExt(FI, true);
3338 }
3339
3340 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
3341 SDValue Val = DAG.getLoad(
3342 ValVT, dl, Chain, FIN,
3343 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
3344 return ExtendedInMem
3345 ? (VA.getValVT().isVector()
3346 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
3347 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
3348 : Val;
3349}
3350
3351// FIXME: Get this from tablegen.
3352static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv,
3353 const X86Subtarget &Subtarget) {
3354 assert(Subtarget.is64Bit())((void)0);
3355
3356 if (Subtarget.isCallingConvWin64(CallConv)) {
3357 static const MCPhysReg GPR64ArgRegsWin64[] = {
3358 X86::RCX, X86::RDX, X86::R8, X86::R9
3359 };
3360 return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
3361 }
3362
3363 static const MCPhysReg GPR64ArgRegs64Bit[] = {
3364 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
3365 };
3366 return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
3367}
3368
3369// FIXME: Get this from tablegen.
3370static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF,
3371 CallingConv::ID CallConv,
3372 const X86Subtarget &Subtarget) {
3373 assert(Subtarget.is64Bit())((void)0);
3374 if (Subtarget.isCallingConvWin64(CallConv)) {
3375 // The XMM registers which might contain var arg parameters are shadowed
3376 // in their paired GPR. So we only need to save the GPR to their home
3377 // slots.
3378 // TODO: __vectorcall will change this.
3379 return None;
3380 }
3381
3382 bool isSoftFloat = Subtarget.useSoftFloat();
3383 if (isSoftFloat || !Subtarget.hasSSE1())
3384 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
3385 // registers.
3386 return None;
3387
3388 static const MCPhysReg XMMArgRegs64Bit[] = {
3389 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
3390 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
3391 };
3392 return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
3393}
3394
3395#ifndef NDEBUG1
3396static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) {
3397 return llvm::is_sorted(
3398 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
3399 return A.getValNo() < B.getValNo();
3400 });
3401}
3402#endif
3403
3404namespace {
3405/// This is a helper class for lowering variable arguments parameters.
3406class VarArgsLoweringHelper {
3407public:
3408 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
3409 SelectionDAG &DAG, const X86Subtarget &Subtarget,
3410 CallingConv::ID CallConv, CCState &CCInfo)
3411 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
3412 TheMachineFunction(DAG.getMachineFunction()),
3413 TheFunction(TheMachineFunction.getFunction()),
3414 FrameInfo(TheMachineFunction.getFrameInfo()),
3415 FrameLowering(*Subtarget.getFrameLowering()),
3416 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
3417 CCInfo(CCInfo) {}
3418
3419 // Lower variable arguments parameters.
3420 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
3421
3422private:
3423 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
3424
3425 void forwardMustTailParameters(SDValue &Chain);
3426
3427 bool is64Bit() const { return Subtarget.is64Bit(); }
3428 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
3429
3430 X86MachineFunctionInfo *FuncInfo;
3431 const SDLoc &DL;
3432 SelectionDAG &DAG;
3433 const X86Subtarget &Subtarget;
3434 MachineFunction &TheMachineFunction;
3435 const Function &TheFunction;
3436 MachineFrameInfo &FrameInfo;
3437 const TargetFrameLowering &FrameLowering;
3438 const TargetLowering &TargLowering;
3439 CallingConv::ID CallConv;
3440 CCState &CCInfo;
3441};
3442} // namespace
3443
3444void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
3445 SDValue &Chain, unsigned StackSize) {
3446 // If the function takes variable number of arguments, make a frame index for
3447 // the start of the first vararg value... for expansion of llvm.va_start. We
3448 // can skip this if there are no va_start calls.
3449 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
3450 CallConv != CallingConv::X86_ThisCall)) {
3451 FuncInfo->setVarArgsFrameIndex(
3452 FrameInfo.CreateFixedObject(1, StackSize, true));
3453 }
3454
3455 // 64-bit calling conventions support varargs and register parameters, so we
3456 // have to do extra work to spill them in the prologue.
3457 if (is64Bit()) {
3458 // Find the first unallocated argument registers.
3459 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
3460 ArrayRef<MCPhysReg> ArgXMMs =
3461 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
3462 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
3463 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
3464
3465 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&((void)0)
3466 "SSE register cannot be used when SSE is disabled!")((void)0);
3467
3468 if (isWin64()) {
3469 // Get to the caller-allocated home save location. Add 8 to account
3470 // for the return address.
3471 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
3472 FuncInfo->setRegSaveFrameIndex(
3473 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
3474 // Fixup to set vararg frame on shadow area (4 x i64).
3475 if (NumIntRegs < 4)
3476 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
3477 } else {
3478 // For X86-64, if there are vararg parameters that are passed via
3479 // registers, then we must store them to their spots on the stack so
3480 // they may be loaded by dereferencing the result of va_next.
3481 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
3482 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
3483 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
3484 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
3485 }
3486
3487 SmallVector<SDValue, 6>
3488 LiveGPRs; // list of SDValue for GPR registers keeping live input value
3489 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
3490 // keeping live input value
3491 SDValue ALVal; // if applicable keeps SDValue for %al register
3492
3493 // Gather all the live in physical registers.
3494 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
3495 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
3496 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
3497 }
3498 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
3499 if (!AvailableXmms.empty()) {
3500 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3501 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
3502 for (MCPhysReg Reg : AvailableXmms) {
3503 // FastRegisterAllocator spills virtual registers at basic
3504 // block boundary. That leads to usages of xmm registers
3505 // outside of check for %al. Pass physical registers to
3506 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
3507 TheMachineFunction.getRegInfo().addLiveIn(Reg);
3508 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
3509 }
3510 }
3511
3512 // Store the integer parameter registers.
3513 SmallVector<SDValue, 8> MemOps;
3514 SDValue RSFIN =
3515 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
3516 TargLowering.getPointerTy(DAG.getDataLayout()));
3517 unsigned Offset = FuncInfo->getVarArgsGPOffset();
3518 for (SDValue Val : LiveGPRs) {
3519 SDValue FIN = DAG.getNode(ISD::ADD, DL,
3520 TargLowering.getPointerTy(DAG.getDataLayout()),
3521 RSFIN, DAG.getIntPtrConstant(Offset, DL));
3522 SDValue Store =
3523 DAG.getStore(Val.getValue(1), DL, Val, FIN,
3524 MachinePointerInfo::getFixedStack(
3525 DAG.getMachineFunction(),
3526 FuncInfo->getRegSaveFrameIndex(), Offset));
3527 MemOps.push_back(Store);
3528 Offset += 8;
3529 }
3530
3531 // Now store the XMM (fp + vector) parameter registers.
3532 if (!LiveXMMRegs.empty()) {
3533 SmallVector<SDValue, 12> SaveXMMOps;
3534 SaveXMMOps.push_back(Chain);
3535 SaveXMMOps.push_back(ALVal);
3536 SaveXMMOps.push_back(
3537 DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32));
3538 SaveXMMOps.push_back(
3539 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
3540 llvm::append_range(SaveXMMOps, LiveXMMRegs);
3541 MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL,
3542 MVT::Other, SaveXMMOps));
3543 }
3544
3545 if (!MemOps.empty())
3546 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
3547 }
3548}
3549
3550void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
3551 // Find the largest legal vector type.
3552 MVT VecVT = MVT::Other;
3553 // FIXME: Only some x86_32 calling conventions support AVX512.
3554 if (Subtarget.useAVX512Regs() &&
3555 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
3556 CallConv == CallingConv::Intel_OCL_BI)))
3557 VecVT = MVT::v16f32;
3558 else if (Subtarget.hasAVX())
3559 VecVT = MVT::v8f32;
3560 else if (Subtarget.hasSSE2())
3561 VecVT = MVT::v4f32;
3562
3563 // We forward some GPRs and some vector types.
3564 SmallVector<MVT, 2> RegParmTypes;
3565 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
3566 RegParmTypes.push_back(IntVT);
3567 if (VecVT != MVT::Other)
3568 RegParmTypes.push_back(VecVT);
3569
3570 // Compute the set of forwarded registers. The rest are scratch.
3571 SmallVectorImpl<ForwardedRegister> &Forwards =
3572 FuncInfo->getForwardedMustTailRegParms();
3573 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
3574
3575 // Forward AL for SysV x86_64 targets, since it is used for varargs.
3576 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
3577 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
3578 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
3579 }
3580
3581 // Copy all forwards from physical to virtual registers.
3582 for (ForwardedRegister &FR : Forwards) {
3583 // FIXME: Can we use a less constrained schedule?
3584 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
3585 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
3586 TargLowering.getRegClassFor(FR.VT));
3587 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
3588 }
3589}
3590
3591void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
3592 unsigned StackSize) {
3593 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
3594 // If necessary, it would be set into the correct value later.
3595 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
3596 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3597
3598 if (FrameInfo.hasVAStart())
3599 createVarArgAreaAndStoreRegisters(Chain, StackSize);
3600
3601 if (FrameInfo.hasMustTailInVarArgFunc())
3602 forwardMustTailParameters(Chain);
3603}
3604
3605SDValue X86TargetLowering::LowerFormalArguments(
3606 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
3607 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3608 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3609 MachineFunction &MF = DAG.getMachineFunction();
3610 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
3611
3612 const Function &F = MF.getFunction();
3613 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
3614 F.getName() == "main")
3615 FuncInfo->setForceFramePointer(true);
3616
3617 MachineFrameInfo &MFI = MF.getFrameInfo();
3618 bool Is64Bit = Subtarget.is64Bit();
3619 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3620
3621 assert(((void)0)
3622 !(IsVarArg && canGuaranteeTCO(CallConv)) &&((void)0)
3623 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe")((void)0);
3624
3625 // Assign locations to all of the incoming arguments.
3626 SmallVector<CCValAssign, 16> ArgLocs;
3627 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3628
3629 // Allocate shadow area for Win64.
3630 if (IsWin64)
3631 CCInfo.AllocateStack(32, Align(8));
3632
3633 CCInfo.AnalyzeArguments(Ins, CC_X86);
3634
3635 // In vectorcall calling convention a second pass is required for the HVA
3636 // types.
3637 if (CallingConv::X86_VectorCall == CallConv) {
3638 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
3639 }
3640
3641 // The next loop assumes that the locations are in the same order of the
3642 // input arguments.
3643 assert(isSortedByValueNo(ArgLocs) &&((void)0)
3644 "Argument Location list must be sorted before lowering")((void)0);
3645
3646 SDValue ArgValue;
3647 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
3648 ++I, ++InsIndex) {
3649 assert(InsIndex < Ins.size() && "Invalid Ins index")((void)0);
3650 CCValAssign &VA = ArgLocs[I];
3651
3652 if (VA.isRegLoc()) {
3653 EVT RegVT = VA.getLocVT();
3654 if (VA.needsCustom()) {
3655 assert(((void)0)
3656 VA.getValVT() == MVT::v64i1 &&((void)0)
3657 "Currently the only custom case is when we split v64i1 to 2 regs")((void)0);
3658
3659 // v64i1 values, in regcall calling convention, that are
3660 // compiled to 32 bit arch, are split up into two registers.
3661 ArgValue =
3662 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
3663 } else {
3664 const TargetRegisterClass *RC;
3665 if (RegVT == MVT::i8)
3666 RC = &X86::GR8RegClass;
3667 else if (RegVT == MVT::i16)
3668 RC = &X86::GR16RegClass;
3669 else if (RegVT == MVT::i32)
3670 RC = &X86::GR32RegClass;
3671 else if (Is64Bit && RegVT == MVT::i64)
3672 RC = &X86::GR64RegClass;
3673 else if (RegVT == MVT::f32)
3674 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
3675 else if (RegVT == MVT::f64)
3676 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
3677 else if (RegVT == MVT::f80)
3678 RC = &X86::RFP80RegClass;
3679 else if (RegVT == MVT::f128)
3680 RC = &X86::VR128RegClass;
3681 else if (RegVT.is512BitVector())
3682 RC = &X86::VR512RegClass;
3683 else if (RegVT.is256BitVector())
3684 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
3685 else if (RegVT.is128BitVector())
3686 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
3687 else if (RegVT == MVT::x86mmx)
3688 RC = &X86::VR64RegClass;
3689 else if (RegVT == MVT::v1i1)
3690 RC = &X86::VK1RegClass;
3691 else if (RegVT == MVT::v8i1)
3692 RC = &X86::VK8RegClass;
3693 else if (RegVT == MVT::v16i1)
3694 RC = &X86::VK16RegClass;
3695 else if (RegVT == MVT::v32i1)
3696 RC = &X86::VK32RegClass;
3697 else if (RegVT == MVT::v64i1)
3698 RC = &X86::VK64RegClass;
3699 else
3700 llvm_unreachable("Unknown argument type!")__builtin_unreachable();
3701
3702 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
3703 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
3704 }
3705
3706 // If this is an 8 or 16-bit value, it is really passed promoted to 32
3707 // bits. Insert an assert[sz]ext to capture this, then truncate to the
3708 // right size.
3709 if (VA.getLocInfo() == CCValAssign::SExt)
3710 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
3711 DAG.getValueType(VA.getValVT()));
3712 else if (VA.getLocInfo() == CCValAssign::ZExt)
3713 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
3714 DAG.getValueType(VA.getValVT()));
3715 else if (VA.getLocInfo() == CCValAssign::BCvt)
3716 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
3717
3718 if (VA.isExtInLoc()) {
3719 // Handle MMX values passed in XMM regs.
3720 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
3721 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
3722 else if (VA.getValVT().isVector() &&
3723 VA.getValVT().getScalarType() == MVT::i1 &&
3724 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
3725 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
3726 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
3727 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
3728 } else
3729 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
3730 }
3731 } else {
3732 assert(VA.isMemLoc())((void)0);
3733 ArgValue =
3734 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
3735 }
3736
3737 // If value is passed via pointer - do a load.
3738 if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal())
3739 ArgValue =
3740 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
3741
3742 InVals.push_back(ArgValue);
3743 }
3744
3745 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
3746 if (Ins[I].Flags.isSwiftAsync()) {
3747 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
3748 if (Subtarget.is64Bit())
3749 X86FI->setHasSwiftAsyncContext(true);
3750 else {
3751 int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false);
3752 X86FI->setSwiftAsyncContextFrameIdx(FI);
3753 SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I],
3754 DAG.getFrameIndex(FI, MVT::i32),
3755 MachinePointerInfo::getFixedStack(MF, FI));
3756 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
3757 }
3758 }
3759
3760 // Swift calling convention does not require we copy the sret argument
3761 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
3762 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
3763 continue;
3764
3765 // All x86 ABIs require that for returning structs by value we copy the
3766 // sret argument into %rax/%eax (depending on ABI) for the return. Save
3767 // the argument into a virtual register so that we can access it from the
3768 // return points.
3769 if (Ins[I].Flags.isSRet()) {
3770 Register Reg = FuncInfo->getSRetReturnReg();
3771 if (!Reg) {
3772 MVT PtrTy = getPointerTy(DAG.getDataLayout());
3773 Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy));
3774 FuncInfo->setSRetReturnReg(Reg);
3775 }
3776 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
3777 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
3778 break;
3779 }
3780 }
3781
3782 unsigned StackSize = CCInfo.getNextStackOffset();
3783 // Align stack specially for tail calls.
3784 if (shouldGuaranteeTCO(CallConv,
3785 MF.getTarget().Options.GuaranteedTailCallOpt))
3786 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
3787
3788 if (IsVarArg)
3789 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
3790 .lowerVarArgsParameters(Chain, StackSize);
3791
3792 // Some CCs need callee pop.
3793 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
3794 MF.getTarget().Options.GuaranteedTailCallOpt)) {
3795 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
3796 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
3797 // X86 interrupts must pop the error code (and the alignment padding) if
3798 // present.
3799 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
3800 } else {
3801 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
3802 // If this is an sret function, the return should pop the hidden pointer.
3803 if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
3804 !Subtarget.getTargetTriple().isOSMSVCRT() &&
3805 argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn)
3806 FuncInfo->setBytesToPopOnReturn(4);
3807 }
3808
3809 if (!Is64Bit) {
3810 // RegSaveFrameIndex is X86-64 only.
3811 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
3812 }
3813
3814 FuncInfo->setArgumentStackSize(StackSize);
3815
3816 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
3817 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
3818 if (Personality == EHPersonality::CoreCLR) {
3819 assert(Is64Bit)((void)0);
3820 // TODO: Add a mechanism to frame lowering that will allow us to indicate
3821 // that we'd prefer this slot be allocated towards the bottom of the frame
3822 // (i.e. near the stack pointer after allocating the frame). Every
3823 // funclet needs a copy of this slot in its (mostly empty) frame, and the
3824 // offset from the bottom of this and each funclet's frame must be the
3825 // same, so the size of funclets' (mostly empty) frames is dictated by
3826 // how far this slot is from the bottom (since they allocate just enough
3827 // space to accommodate holding this slot at the correct offset).
3828 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
3829 EHInfo->PSPSymFrameIdx = PSPSymFI;
3830 }
3831 }
3832
3833 if (CallConv == CallingConv::X86_RegCall ||
3834 F.hasFnAttribute("no_caller_saved_registers")) {
3835 MachineRegisterInfo &MRI = MF.getRegInfo();
3836 for (std::pair<Register, Register> Pair : MRI.liveins())
3837 MRI.disableCalleeSavedRegister(Pair.first);
3838 }
3839
3840 return Chain;
3841}
3842
3843SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
3844 SDValue Arg, const SDLoc &dl,
3845 SelectionDAG &DAG,
3846 const CCValAssign &VA,
3847 ISD::ArgFlagsTy Flags,
3848 bool isByVal) const {
3849 unsigned LocMemOffset = VA.getLocMemOffset();
3850 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
3851 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
3852 StackPtr, PtrOff);
3853 if (isByVal)
3854 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
3855
3856 return DAG.getStore(
3857 Chain, dl, Arg, PtrOff,
3858 MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset));
3859}
3860
3861/// Emit a load of return address if tail call
3862/// optimization is performed and it is required.
3863SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
3864 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
3865 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
3866 // Adjust the Return address stack slot.
3867 EVT VT = getPointerTy(DAG.getDataLayout());
3868 OutRetAddr = getReturnAddressFrameIndex(DAG);
3869
3870 // Load the "old" Return address.
3871 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
3872 return SDValue(OutRetAddr.getNode(), 1);
3873}
3874
3875/// Emit a store of the return address if tail call
3876/// optimization is performed and it is required (FPDiff!=0).
3877static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF,
3878 SDValue Chain, SDValue RetAddrFrIdx,
3879 EVT PtrVT, unsigned SlotSize,
3880 int FPDiff, const SDLoc &dl) {
3881 // Store the return address to the appropriate stack slot.
3882 if (!FPDiff) return Chain;
3883 // Calculate the new stack slot for the return address.
3884 int NewReturnAddrFI =
3885 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
3886 false);
3887 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
3888 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
3889 MachinePointerInfo::getFixedStack(
3890 DAG.getMachineFunction(), NewReturnAddrFI));
3891 return Chain;
3892}
3893
3894/// Returns a vector_shuffle mask for an movs{s|d}, movd
3895/// operation of specified width.
3896static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1,
3897 SDValue V2) {
3898 unsigned NumElems = VT.getVectorNumElements();
3899 SmallVector<int, 8> Mask;
3900 Mask.push_back(NumElems);
3901 for (unsigned i = 1; i != NumElems; ++i)
3902 Mask.push_back(i);
3903 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
3904}
3905
3906SDValue
3907X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
3908 SmallVectorImpl<SDValue> &InVals) const {
3909 SelectionDAG &DAG = CLI.DAG;
3910 SDLoc &dl = CLI.DL;
3911 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
3912 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
3913 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
3914 SDValue Chain = CLI.Chain;
3915 SDValue Callee = CLI.Callee;
3916 CallingConv::ID CallConv = CLI.CallConv;
3917 bool &isTailCall = CLI.IsTailCall;
3918 bool isVarArg = CLI.IsVarArg;
3919 const auto *CB = CLI.CB;
3920
3921 MachineFunction &MF = DAG.getMachineFunction();
3922 bool Is64Bit = Subtarget.is64Bit();
3923 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
3924 StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU());
3925 bool IsSibcall = false;
3926 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
3927 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
3928 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
3929 bool HasNCSR = (CB && isa<CallInst>(CB) &&
3930 CB->hasFnAttr("no_caller_saved_registers"));
3931 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
3932 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
3933 const Module *M = MF.getMMI().getModule();
3934 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
3935
3936 MachineFunction::CallSiteInfo CSInfo;
3937 if (CallConv == CallingConv::X86_INTR)
3938 report_fatal_error("X86 interrupts may not be called directly");
3939
3940 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
3941 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
3942 // If we are using a GOT, disable tail calls to external symbols with
3943 // default visibility. Tail calling such a symbol requires using a GOT
3944 // relocation, which forces early binding of the symbol. This breaks code
3945 // that require lazy function symbol resolution. Using musttail or
3946 // GuaranteedTailCallOpt will override this.
3947 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
3948 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
3949 G->getGlobal()->hasDefaultVisibility()))
3950 isTailCall = false;
3951 }
3952
3953
3954 if (isTailCall && !IsMustTail) {
3955 // Check if it's really possible to do a tail call.
3956 isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
3957 isVarArg, SR != NotStructReturn,
3958 MF.getFunction().hasStructRetAttr(), CLI.RetTy,
3959 Outs, OutVals, Ins, DAG);
3960
3961 // Sibcalls are automatically detected tailcalls which do not require
3962 // ABI changes.
3963 if (!IsGuaranteeTCO && isTailCall)
3964 IsSibcall = true;
3965
3966 if (isTailCall)
3967 ++NumTailCalls;
3968 }
3969
3970 if (IsMustTail && !isTailCall)
3971 report_fatal_error("failed to perform tail call elimination on a call "
3972 "site marked musttail");
3973
3974 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&((void)0)
3975 "Var args not supported with calling convention fastcc, ghc or hipe")((void)0);
3976
3977 // Analyze operands of the call, assigning locations to each operand.
3978 SmallVector<CCValAssign, 16> ArgLocs;
3979 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
3980
3981 // Allocate shadow area for Win64.
3982 if (IsWin64)
3983 CCInfo.AllocateStack(32, Align(8));
3984
3985 CCInfo.AnalyzeArguments(Outs, CC_X86);
3986
3987 // In vectorcall calling convention a second pass is required for the HVA
3988 // types.
3989 if (CallingConv::X86_VectorCall == CallConv) {
3990 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
3991 }
3992
3993 // Get a count of how many bytes are to be pushed on the stack.
3994 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
3995 if (IsSibcall)
3996 // This is a sibcall. The memory operands are available in caller's
3997 // own caller's stack.
3998 NumBytes = 0;
3999 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
4000 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
4001
4002 int FPDiff = 0;
4003 if (isTailCall &&
4004 shouldGuaranteeTCO(CallConv,
4005 MF.getTarget().Options.GuaranteedTailCallOpt)) {
4006 // Lower arguments at fp - stackoffset + fpdiff.
4007 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
4008
4009 FPDiff = NumBytesCallerPushed - NumBytes;
4010
4011 // Set the delta of movement of the returnaddr stackslot.
4012 // But only set if delta is greater than previous delta.
4013 if (FPDiff < X86Info->getTCReturnAddrDelta())
4014 X86Info->setTCReturnAddrDelta(FPDiff);
4015 }
4016
4017 unsigned NumBytesToPush = NumBytes;
4018 unsigned NumBytesToPop = NumBytes;
4019
4020 // If we have an inalloca argument, all stack space has already been allocated
4021 // for us and be right at the top of the stack. We don't support multiple
4022 // arguments passed in memory when using inalloca.
4023 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
4024 NumBytesToPush = 0;
4025 if (!ArgLocs.back().isMemLoc())
4026 report_fatal_error("cannot use inalloca attribute on a register "
4027 "parameter");
4028 if (ArgLocs.back().getLocMemOffset() != 0)
4029 report_fatal_error("any parameter with the inalloca attribute must be "
4030 "the only memory argument");
4031 } else if (CLI.IsPreallocated) {
4032 assert(ArgLocs.back().isMemLoc() &&((void)0)
4033 "cannot use preallocated attribute on a register "((void)0)
4034 "parameter")((void)0);
4035 SmallVector<size_t, 4> PreallocatedOffsets;
4036 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
4037 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
4038 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
4039 }
4040 }
4041 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
4042 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
4043 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
4044 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
4045 NumBytesToPush = 0;
4046 }
4047
4048 if (!IsSibcall && !IsMustTail)
4049 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
4050 NumBytes - NumBytesToPush, dl);
4051
4052 SDValue RetAddrFrIdx;
4053 // Load return address for tail calls.
4054 if (isTailCall && FPDiff)
4055 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
4056 Is64Bit, FPDiff, dl);
4057
4058 SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
4059 SmallVector<SDValue, 8> MemOpChains;
4060 SDValue StackPtr;
4061
4062 // The next loop assumes that the locations are in the same order of the
4063 // input arguments.
4064 assert(isSortedByValueNo(ArgLocs) &&((void)0)
4065 "Argument Location list must be sorted before lowering")((void)0);
4066
4067 // Walk the register/memloc assignments, inserting copies/loads. In the case
4068 // of tail call optimization arguments are handle later.
4069 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4070 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
4071 ++I, ++OutIndex) {
4072 assert(OutIndex < Outs.size() && "Invalid Out index")((void)0);
4073 // Skip inalloca/preallocated arguments, they have already been written.
4074 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
4075 if (Flags.isInAlloca() || Flags.isPreallocated())
4076 continue;
4077
4078 CCValAssign &VA = ArgLocs[I];
4079 EVT RegVT = VA.getLocVT();
4080 SDValue Arg = OutVals[OutIndex];
4081 bool isByVal = Flags.isByVal();
4082
4083 // Promote the value if needed.
4084 switch (VA.getLocInfo()) {
4085 default: llvm_unreachable("Unknown loc info!")__builtin_unreachable();
4086 case CCValAssign::Full: break;
4087 case CCValAssign::SExt:
4088 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
4089 break;
4090 case CCValAssign::ZExt:
4091 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
4092 break;
4093 case CCValAssign::AExt:
4094 if (Arg.getValueType().isVector() &&
4095 Arg.getValueType().getVectorElementType() == MVT::i1)
4096 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
4097 else if (RegVT.is128BitVector()) {
4098 // Special case: passing MMX values in XMM registers.
4099 Arg = DAG.getBitcast(MVT::i64, Arg);
4100 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
4101 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
4102 } else
4103 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
4104 break;
4105 case CCValAssign::BCvt:
4106 Arg = DAG.getBitcast(RegVT, Arg);
4107 break;
4108 case CCValAssign::Indirect: {
4109 if (isByVal) {
4110 // Memcpy the argument to a temporary stack slot to prevent
4111 // the caller from seeing any modifications the callee may make
4112 // as guaranteed by the `byval` attribute.
4113 int FrameIdx = MF.getFrameInfo().CreateStackObject(
4114 Flags.getByValSize(),
4115 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
4116 SDValue StackSlot =
4117 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
4118 Chain =
4119 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
4120 // From now on treat this as a regular pointer
4121 Arg = StackSlot;
4122 isByVal = false;
4123 } else {
4124 // Store the argument.
4125 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
4126 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
4127 Chain = DAG.getStore(
4128 Chain, dl, Arg, SpillSlot,
4129 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI));
4130 Arg = SpillSlot;
4131 }
4132 break;
4133 }
4134 }
4135
4136 if (VA.needsCustom()) {
4137 assert(VA.getValVT() == MVT::v64i1 &&((void)0)
4138 "Currently the only custom case is when we split v64i1 to 2 regs")((void)0);
4139 // Split v64i1 value into two registers
4140 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
4141 } else if (VA.isRegLoc()) {
4142 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
4143 const TargetOptions &Options = DAG.getTarget().Options;
4144 if (Options.EmitCallSiteInfo)
4145 CSInfo.emplace_back(VA.getLocReg(), I);
4146 if (isVarArg && IsWin64) {
4147 // Win64 ABI requires argument XMM reg to be copied to the corresponding
4148 // shadow reg if callee is a varargs function.
4149 Register ShadowReg;
4150 switch (VA.getLocReg()) {
4151 case X86::XMM0: ShadowReg = X86::RCX; break;
4152 case X86::XMM1: ShadowReg = X86::RDX; break;
4153 case X86::XMM2: ShadowReg = X86::R8; break;
4154 case X86::XMM3: ShadowReg = X86::R9; break;
4155 }
4156 if (ShadowReg)
4157 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
4158 }
4159 } else if (!IsSibcall && (!isTailCall || isByVal)) {
4160 assert(VA.isMemLoc())((void)0);
4161 if (!StackPtr.getNode())
4162 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4163 getPointerTy(DAG.getDataLayout()));
4164 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
4165 dl, DAG, VA, Flags, isByVal));
4166 }
4167 }
4168
4169 if (!MemOpChains.empty())
4170 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
4171
4172 if (Subtarget.isPICStyleGOT()) {
4173 // ELF / PIC requires GOT in the EBX register before function calls via PLT
4174 // GOT pointer (except regcall).
4175 if (!isTailCall) {
4176 // Indirect call with RegCall calling convertion may use up all the
4177 // general registers, so it is not suitable to bind EBX reister for
4178 // GOT address, just let register allocator handle it.
4179 if (CallConv != CallingConv::X86_RegCall)
4180 RegsToPass.push_back(std::make_pair(
4181 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
4182 getPointerTy(DAG.getDataLayout()))));
4183 } else {
4184 // If we are tail calling and generating PIC/GOT style code load the
4185 // address of the callee into ECX. The value in ecx is used as target of
4186 // the tail jump. This is done to circumvent the ebx/callee-saved problem
4187 // for tail calls on PIC/GOT architectures. Normally we would just put the
4188 // address of GOT into ebx and then call target@PLT. But for tail calls
4189 // ebx would be restored (since ebx is callee saved) before jumping to the
4190 // target@PLT.
4191
4192 // Note: The actual moving to ECX is done further down.
4193 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4194 if (G && !G->getGlobal()->hasLocalLinkage() &&
4195 G->getGlobal()->hasDefaultVisibility())
4196 Callee = LowerGlobalAddress(Callee, DAG);
4197 else if (isa<ExternalSymbolSDNode>(Callee))
4198 Callee = LowerExternalSymbol(Callee, DAG);
4199 }
4200 }
4201
4202 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) {
4203 // From AMD64 ABI document:
4204 // For calls that may call functions that use varargs or stdargs
4205 // (prototype-less calls or calls to functions containing ellipsis (...) in
4206 // the declaration) %al is used as hidden argument to specify the number
4207 // of SSE registers used. The contents of %al do not need to match exactly
4208 // the number of registers, but must be an ubound on the number of SSE
4209 // registers used and is in the range 0 - 8 inclusive.
4210
4211 // Count the number of XMM registers allocated.
4212 static const MCPhysReg XMMArgRegs[] = {
4213 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
4214 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
4215 };
4216 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
4217 assert((Subtarget.hasSSE1() || !NumXMMRegs)((void)0)
4218 && "SSE registers cannot be used when SSE is disabled")((void)0);
4219 RegsToPass.push_back(std::make_pair(Register(X86::AL),
4220 DAG.getConstant(NumXMMRegs, dl,
4221 MVT::i8)));
4222 }
4223
4224 if (isVarArg && IsMustTail) {
4225 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
4226 for (const auto &F : Forwards) {
4227 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
4228 RegsToPass.push_back(std::make_pair(F.PReg, Val));
4229 }
4230 }
4231
4232 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
4233 // don't need this because the eligibility check rejects calls that require
4234 // shuffling arguments passed in memory.
4235 if (!IsSibcall && isTailCall) {
4236 // Force all the incoming stack arguments to be loaded from the stack
4237 // before any new outgoing arguments are stored to the stack, because the
4238 // outgoing stack slots may alias the incoming argument stack slots, and
4239 // the alias isn't otherwise explicit. This is slightly more conservative
4240 // than necessary, because it means that each store effectively depends
4241 // on every argument instead of just those arguments it would clobber.
4242 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
4243
4244 SmallVector<SDValue, 8> MemOpChains2;
4245 SDValue FIN;
4246 int FI = 0;
4247 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
4248 ++I, ++OutsIndex) {
4249 CCValAssign &VA = ArgLocs[I];
4250
4251 if (VA.isRegLoc()) {
4252 if (VA.needsCustom()) {
4253 assert((CallConv == CallingConv::X86_RegCall) &&((void)0)
4254 "Expecting custom case only in regcall calling convention")((void)0);
4255 // This means that we are in special case where one argument was
4256 // passed through two register locations - Skip the next location
4257 ++I;
4258 }
4259
4260 continue;
4261 }
4262
4263 assert(VA.isMemLoc())((void)0);
4264 SDValue Arg = OutVals[OutsIndex];
4265 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
4266 // Skip inalloca/preallocated arguments. They don't require any work.
4267 if (Flags.isInAlloca() || Flags.isPreallocated())
4268 continue;
4269 // Create frame index.
4270 int32_t Offset = VA.getLocMemOffset()+FPDiff;
4271 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
4272 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4273 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
4274
4275 if (Flags.isByVal()) {
4276 // Copy relative to framepointer.
4277 SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl);
4278 if (!StackPtr.getNode())
4279 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
4280 getPointerTy(DAG.getDataLayout()));
4281 Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
4282 StackPtr, Source);
4283
4284 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
4285 ArgChain,
4286 Flags, DAG, dl));
4287 } else {
4288 // Store relative to framepointer.
4289 MemOpChains2.push_back(DAG.getStore(
4290 ArgChain, dl, Arg, FIN,
4291 MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)));
4292 }
4293 }
4294
4295 if (!MemOpChains2.empty())
4296 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
4297
4298 // Store the return address to the appropriate stack slot.
4299 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
4300 getPointerTy(DAG.getDataLayout()),
4301 RegInfo->getSlotSize(), FPDiff, dl);
4302 }
4303
4304 // Build a sequence of copy-to-reg nodes chained together with token chain
4305 // and flag operands which copy the outgoing args into registers.
4306 SDValue InFlag;
4307 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
4308 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
4309 RegsToPass[i].second, InFlag);
4310 InFlag = Chain.getValue(1);
4311 }
4312
4313 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
4314 assert(Is64Bit && "Large code model is only legal in 64-bit mode.")((void)0);
4315 // In the 64-bit large code model, we have to make all calls
4316 // through a register, since the call instruction's 32-bit
4317 // pc-relative offset may not be large enough to hold the whole
4318 // address.
4319 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
4320 Callee->getOpcode() == ISD::ExternalSymbol) {
4321 // Lower direct calls to global addresses and external symbols. Setting
4322 // ForCall to true here has the effect of removing WrapperRIP when possible
4323 // to allow direct calls to be selected without first materializing the
4324 // address into a register.
4325 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
4326 } else if (Subtarget.isTarget64BitILP32() &&
4327 Callee->getValueType(0) == MVT::i32) {
4328 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
4329 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
4330 }
4331
4332 // Returns a chain & a flag for retval copy to use.
4333 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
4334 SmallVector<SDValue, 8> Ops;
4335
4336 if (!IsSibcall && isTailCall && !IsMustTail) {
4337 Chain = DAG.getCALLSEQ_END(Chain,
4338 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4339 DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
4340 InFlag = Chain.getValue(1);
4341 }
4342
4343 Ops.push_back(Chain);
4344 Ops.push_back(Callee);
4345
4346 if (isTailCall)
4347 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
4348
4349 // Add argument registers to the end of the list so that they are known live
4350 // into the call.
4351 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
4352 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
4353 RegsToPass[i].second.getValueType()));
4354
4355 // Add a register mask operand representing the call-preserved registers.
4356 const uint32_t *Mask = [&]() {
4357 auto AdaptedCC = CallConv;
4358 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
4359 // use X86_INTR calling convention because it has the same CSR mask
4360 // (same preserved registers).
4361 if (HasNCSR)
4362 AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR;
4363 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
4364 // to use the CSR_NoRegs_RegMask.
4365 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
4366 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
4367 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
4368 }();
4369 assert(Mask && "Missing call preserved mask for calling convention")((void)0);
4370
4371 // If this is an invoke in a 32-bit function using a funclet-based
4372 // personality, assume the function clobbers all registers. If an exception
4373 // is thrown, the runtime will not restore CSRs.
4374 // FIXME: Model this more precisely so that we can register allocate across
4375 // the normal edge and spill and fill across the exceptional edge.
4376 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
4377 const Function &CallerFn = MF.getFunction();
4378 EHPersonality Pers =
4379 CallerFn.hasPersonalityFn()
4380 ? classifyEHPersonality(CallerFn.getPersonalityFn())
4381 : EHPersonality::Unknown;
4382 if (isFuncletEHPersonality(Pers))
4383 Mask = RegInfo->getNoPreservedMask();
4384 }
4385
4386 // Define a new register mask from the existing mask.
4387 uint32_t *RegMask = nullptr;
4388
4389 // In some calling conventions we need to remove the used physical registers
4390 // from the reg mask.
4391 if (CallConv == CallingConv::X86_RegCall || HasNCSR) {
4392 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
4393
4394 // Allocate a new Reg Mask and copy Mask.
4395 RegMask = MF.allocateRegMask();
4396 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
4397 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
4398
4399 // Make sure all sub registers of the argument registers are reset
4400 // in the RegMask.
4401 for (auto const &RegPair : RegsToPass)
4402 for (MCSubRegIterator SubRegs(RegPair.first, TRI, /*IncludeSelf=*/true);
4403 SubRegs.isValid(); ++SubRegs)
4404 RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32));
4405
4406 // Create the RegMask Operand according to our updated mask.
4407 Ops.push_back(DAG.getRegisterMask(RegMask));
4408 } else {
4409 // Create the RegMask Operand according to the static mask.
4410 Ops.push_back(DAG.getRegisterMask(Mask));
4411 }
4412
4413 if (InFlag.getNode())
4414 Ops.push_back(InFlag);
4415
4416 if (isTailCall) {
4417 // We used to do:
4418 //// If this is the first return lowered for this function, add the regs
4419 //// to the liveout set for the function.
4420 // This isn't right, although it's probably harmless on x86; liveouts
4421 // should be computed from returns not tail calls. Consider a void
4422 // function making a tail call to a function returning int.
4423 MF.getFrameInfo().setHasTailCall();
4424 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
4425 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
4426 return Ret;
4427 }
4428
4429 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
4430 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
4431 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
4432 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
4433 // expanded to the call, directly followed by a special marker sequence and
4434 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
4435 assert(!isTailCall &&((void)0)
4436 "tail calls cannot be marked with clang.arc.attachedcall")((void)0);
4437 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode")((void)0);
4438
4439 // Add target constant to select ObjC runtime call just before the call
4440 // target. RuntimeCallType == 0 selects objc_retainAutoreleasedReturnValue,
4441 // RuntimeCallType == 0 selects objc_unsafeClaimAutoreleasedReturnValue when
4442 // epxanding the pseudo.
4443 unsigned RuntimeCallType =
4444 objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1;
4445 Ops.insert(Ops.begin() + 1,
4446 DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32));
4447 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
4448 } else {
4449 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
4450 }
4451
4452 InFlag = Chain.getValue(1);
4453 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
4454 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
4455
4456 // Save heapallocsite metadata.
4457 if (CLI.CB)
4458 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
4459 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
4460
4461 // Create the CALLSEQ_END node.
4462 unsigned NumBytesForCalleeToPop;
4463 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
4464 DAG.getTarget().Options.GuaranteedTailCallOpt))
4465 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
4466 else if (!Is64Bit && !canGuaranteeTCO(CallConv) &&
4467 !Subtarget.getTargetTriple().isOSMSVCRT() &&
4468 SR == StackStructReturn)
4469 // If this is a call to a struct-return function, the callee
4470 // pops the hidden struct pointer, so we have to push it back.
4471 // This is common for Darwin/X86, Linux & Mingw32 targets.
4472 // For MSVC Win32 targets, the caller pops the hidden struct pointer.
4473 NumBytesForCalleeToPop = 4;
4474 else
4475 NumBytesForCalleeToPop = 0; // Callee pops nothing.
4476
4477 // Returns a flag for retval copy to use.
4478 if (!IsSibcall) {
4479 Chain = DAG.getCALLSEQ_END(Chain,
4480 DAG.getIntPtrConstant(NumBytesToPop, dl, true),
4481 DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl,
4482 true),
4483 InFlag, dl);
4484 InFlag = Chain.getValue(1);
4485 }
4486
4487 // Handle result values, copying them out of physregs into vregs that we
4488 // return.
4489 return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG,
4490 InVals, RegMask);
4491}
4492
4493//===----------------------------------------------------------------------===//
4494// Fast Calling Convention (tail call) implementation
4495//===----------------------------------------------------------------------===//
4496
4497// Like std call, callee cleans arguments, convention except that ECX is
4498// reserved for storing the tail called function address. Only 2 registers are
4499// free for argument passing (inreg). Tail call optimization is performed
4500// provided:
4501// * tailcallopt is enabled
4502// * caller/callee are fastcc
4503// On X86_64 architecture with GOT-style position independent code only local
4504// (within module) calls are supported at the moment.
4505// To keep the stack aligned according to platform abi the function
4506// GetAlignedArgumentStackSize ensures that argument delta is always multiples
4507// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
4508// If a tail called function callee has more arguments than the caller the
4509// caller needs to make sure that there is room to move the RETADDR to. This is
4510// achieved by reserving an area the size of the argument delta right after the
4511// original RETADDR, but before the saved framepointer or the spilled registers
4512// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
4513// stack layout:
4514// arg1
4515// arg2
4516// RETADDR
4517// [ new RETADDR
4518// move area ]
4519// (possible EBP)
4520// ESI
4521// EDI
4522// local1 ..
4523
4524/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
4525/// requirement.
4526unsigned
4527X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
4528 SelectionDAG &DAG) const {
4529 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
4530 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
4531 assert(StackSize % SlotSize == 0 &&((void)0)
4532 "StackSize must be a multiple of SlotSize")((void)0);
4533 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
4534}
4535
4536/// Return true if the given stack call argument is already available in the
4537/// same position (relatively) of the caller's incoming argument stack.
4538static
4539bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags,
4540 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
4541 const X86InstrInfo *TII, const CCValAssign &VA) {
4542 unsigned Bytes = Arg.getValueSizeInBits() / 8;
4543
4544 for (;;) {
4545 // Look through nodes that don't alter the bits of the incoming value.
4546 unsigned Op = Arg.getOpcode();
4547 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) {
4548 Arg = Arg.getOperand(0);
4549 continue;
4550 }
4551 if (Op == ISD::TRUNCATE) {
4552 const SDValue &TruncInput = Arg.getOperand(0);
4553 if (TruncInput.getOpcode() == ISD::AssertZext &&
4554 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
4555 Arg.getValueType()) {
4556 Arg = TruncInput.getOperand(0);
4557 continue;
4558 }
4559 }
4560 break;
4561 }
4562
4563 int FI = INT_MAX2147483647;
4564 if (Arg.getOpcode() == ISD::CopyFromReg) {
4565 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
4566 if (!VR.isVirtual())
4567 return false;
4568 MachineInstr *Def = MRI->getVRegDef(VR);
4569 if (!Def)
4570 return false;
4571 if (!Flags.isByVal()) {
4572 if (!TII->isLoadFromStackSlot(*Def, FI))
4573 return false;
4574 } else {
4575 unsigned Opcode = Def->getOpcode();
4576 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
4577 Opcode == X86::LEA64_32r) &&
4578 Def->getOperand(1).isFI()) {
4579 FI = Def->getOperand(1).getIndex();
4580 Bytes = Flags.getByValSize();
4581 } else
4582 return false;
4583 }
4584 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
4585 if (Flags.isByVal())
4586 // ByVal argument is passed in as a pointer but it's now being
4587 // dereferenced. e.g.
4588 // define @foo(%struct.X* %A) {
4589 // tail call @bar(%struct.X* byval %A)
4590 // }
4591 return false;
4592 SDValue Ptr = Ld->getBasePtr();
4593 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
4594 if (!FINode)
4595 return false;
4596 FI = FINode->getIndex();
4597 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
4598 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
4599 FI = FINode->getIndex();
4600 Bytes = Flags.getByValSize();
4601 } else
4602 return false;
4603
4604 assert(FI != INT_MAX)((void)0);
4605 if (!MFI.isFixedObjectIndex(FI))
4606 return false;
4607
4608 if (Offset != MFI.getObjectOffset(FI))
4609 return false;
4610
4611 // If this is not byval, check that the argument stack object is immutable.
4612 // inalloca and argument copy elision can create mutable argument stack
4613 // objects. Byval objects can be mutated, but a byval call intends to pass the
4614 // mutated memory.
4615 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
4616 return false;
4617
4618 if (VA.getLocVT().getFixedSizeInBits() >
4619 Arg.getValueSizeInBits().getFixedSize()) {
4620 // If the argument location is wider than the argument type, check that any
4621 // extension flags match.
4622 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
4623 Flags.isSExt() != MFI.isObjectSExt(FI)) {
4624 return false;
4625 }
4626 }
4627
4628 return Bytes == MFI.getObjectSize(FI);
4629}
4630
4631/// Check whether the call is eligible for tail call optimization. Targets
4632/// that want to do tail call optimization should implement this function.
4633bool X86TargetLowering::IsEligibleForTailCallOptimization(
4634 SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg,
4635 bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy,
4636 const SmallVectorImpl<ISD::OutputArg> &Outs,
4637 const SmallVectorImpl<SDValue> &OutVals,
4638 const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4639 if (!mayTailCallThisCC(CalleeCC))
4640 return false;
4641
4642 // If -tailcallopt is specified, make fastcc functions tail-callable.
4643 MachineFunction &MF = DAG.getMachineFunction();
4644 const Function &CallerF = MF.getFunction();
4645
4646 // If the function return type is x86_fp80 and the callee return type is not,
4647 // then the FP_EXTEND of the call result is not a nop. It's not safe to
4648 // perform a tailcall optimization here.
4649 if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty())
4650 return false;
4651
4652 CallingConv::ID CallerCC = CallerF.getCallingConv();
4653 bool CCMatch = CallerCC == CalleeCC;
4654 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
4655 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
4656 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
4657 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
4658
4659 // Win64 functions have extra shadow space for argument homing. Don't do the
4660 // sibcall if the caller and callee have mismatched expectations for this
4661 // space.
4662 if (IsCalleeWin64 != IsCallerWin64)
4663 return false;
4664
4665 if (IsGuaranteeTCO) {
4666 if (canGuaranteeTCO(CalleeCC) && CCMatch)
4667 return true;
4668 return false;
4669 }
4670
4671 // Look for obvious safe cases to perform tail call optimization that do not
4672 // require ABI changes. This is what gcc calls sibcall.
4673
4674 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
4675 // emit a special epilogue.
4676 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4677 if (RegInfo->hasStackRealignment(MF))
4678 return false;
4679
4680 // Also avoid sibcall optimization if either caller or callee uses struct
4681 // return semantics.
4682 if (isCalleeStructRet || isCallerStructRet)
4683 return false;
4684
4685 // Do not sibcall optimize vararg calls unless all arguments are passed via
4686 // registers.
4687 LLVMContext &C = *DAG.getContext();
4688 if (isVarArg && !Outs.empty()) {
4689 // Optimizing for varargs on Win64 is unlikely to be safe without
4690 // additional testing.
4691 if (IsCalleeWin64 || IsCallerWin64)
4692 return false;
4693
4694 SmallVector<CCValAssign, 16> ArgLocs;
4695 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4696
4697 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4698 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i)
4699 if (!ArgLocs[i].isRegLoc())
4700 return false;
4701 }
4702
4703 // If the call result is in ST0 / ST1, it needs to be popped off the x87
4704 // stack. Therefore, if it's not used by the call it is not safe to optimize
4705 // this into a sibcall.
4706 bool Unused = false;
4707 for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4708 if (!Ins[i].Used) {
4709 Unused = true;
4710 break;
4711 }
4712 }
4713 if (Unused) {
4714 SmallVector<CCValAssign, 16> RVLocs;
4715 CCState CCInfo(CalleeCC, false, MF, RVLocs, C);
4716 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
4717 for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
4718 CCValAssign &VA = RVLocs[i];
4719 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
4720 return false;
4721 }
4722 }
4723
4724 // Check that the call results are passed in the same way.
4725 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
4726 RetCC_X86, RetCC_X86))
4727 return false;
4728 // The callee has to preserve all registers the caller needs to preserve.
4729 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
4730 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
4731 if (!CCMatch) {
4732 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
4733 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
4734 return false;
4735 }
4736
4737 unsigned StackArgsSize = 0;
4738
4739 // If the callee takes no arguments then go on to check the results of the
4740 // call.
4741 if (!Outs.empty()) {
4742 // Check if stack adjustment is needed. For now, do not do this if any
4743 // argument is passed on the stack.
4744 SmallVector<CCValAssign, 16> ArgLocs;
4745 CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C);
4746
4747 // Allocate shadow area for Win64
4748 if (IsCalleeWin64)
4749 CCInfo.AllocateStack(32, Align(8));
4750
4751 CCInfo.AnalyzeCallOperands(Outs, CC_X86);
4752 StackArgsSize = CCInfo.getNextStackOffset();
4753
4754 if (CCInfo.getNextStackOffset()) {
4755 // Check if the arguments are already laid out in the right way as
4756 // the caller's fixed stack objects.
4757 MachineFrameInfo &MFI = MF.getFrameInfo();
4758 const MachineRegisterInfo *MRI = &MF.getRegInfo();
4759 const X86InstrInfo *TII = Subtarget.getInstrInfo();
4760 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4761 CCValAssign &VA = ArgLocs[i];
4762 SDValue Arg = OutVals[i];
4763 ISD::ArgFlagsTy Flags = Outs[i].Flags;
4764 if (VA.getLocInfo() == CCValAssign::Indirect)
4765 return false;
4766 if (!VA.isRegLoc()) {
4767 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags,
4768 MFI, MRI, TII, VA))
4769 return false;
4770 }
4771 }
4772 }
4773
4774 bool PositionIndependent = isPositionIndependent();
4775 // If the tailcall address may be in a register, then make sure it's
4776 // possible to register allocate for it. In 32-bit, the call address can
4777 // only target EAX, EDX, or ECX since the tail call must be scheduled after
4778 // callee-saved registers are restored. These happen to be the same
4779 // registers used to pass 'inreg' arguments so watch out for those.
4780 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
4781 !isa<ExternalSymbolSDNode>(Callee)) ||
4782 PositionIndependent)) {
4783 unsigned NumInRegs = 0;
4784 // In PIC we need an extra register to formulate the address computation
4785 // for the callee.
4786 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
4787
4788 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4789 CCValAssign &VA = ArgLocs[i];
4790 if (!VA.isRegLoc())
4791 continue;
4792 Register Reg = VA.getLocReg();
4793 switch (Reg) {
4794 default: break;
4795 case X86::EAX: case X86::EDX: case X86::ECX:
4796 if (++NumInRegs == MaxInRegs)
4797 return false;
4798 break;
4799 }
4800 }
4801 }
4802
4803 const MachineRegisterInfo &MRI = MF.getRegInfo();
4804 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
4805 return false;
4806 }
4807
4808 bool CalleeWillPop =
4809 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
4810 MF.getTarget().Options.GuaranteedTailCallOpt);
4811
4812 if (unsigned BytesToPop =
4813 MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) {
4814 // If we have bytes to pop, the callee must pop them.
4815 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
4816 if (!CalleePopMatches)
4817 return false;
4818 } else if (CalleeWillPop && StackArgsSize > 0) {
4819 // If we don't have bytes to pop, make sure the callee doesn't pop any.
4820 return false;
4821 }
4822
4823 return true;
4824}
4825
4826FastISel *
4827X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo,
4828 const TargetLibraryInfo *libInfo) const {
4829 return X86::createFastISel(funcInfo, libInfo);
4830}
4831
4832//===----------------------------------------------------------------------===//
4833// Other Lowering Hooks
4834//===----------------------------------------------------------------------===//
4835
4836static bool MayFoldLoad(SDValue Op) {
4837 return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode());
4838}
4839
4840static bool MayFoldIntoStore(SDValue Op) {
4841 return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin());
4842}
4843
4844static bool MayFoldIntoZeroExtend(SDValue Op) {
4845 if (Op.hasOneUse()) {
4846 unsigned Opcode = Op.getNode()->use_begin()->getOpcode();
4847 return (ISD::ZERO_EXTEND == Opcode);
4848 }
4849 return false;
4850}
4851
4852static bool isTargetShuffle(unsigned Opcode) {
4853 switch(Opcode) {
4854 default: return false;
4855 case X86ISD::BLENDI:
4856 case X86ISD::PSHUFB:
4857 case X86ISD::PSHUFD:
4858 case X86ISD::PSHUFHW:
4859 case X86ISD::PSHUFLW:
4860 case X86ISD::SHUFP:
4861 case X86ISD::INSERTPS:
4862 case X86ISD::EXTRQI:
4863 case X86ISD::INSERTQI:
4864 case X86ISD::VALIGN:
4865 case X86ISD::PALIGNR:
4866 case X86ISD::VSHLDQ:
4867 case X86ISD::VSRLDQ:
4868 case X86ISD::MOVLHPS:
4869 case X86ISD::MOVHLPS:
4870 case X86ISD::MOVSHDUP:
4871 case X86ISD::MOVSLDUP:
4872 case X86ISD::MOVDDUP:
4873 case X86ISD::MOVSS:
4874 case X86ISD::MOVSD:
4875 case X86ISD::UNPCKL:
4876 case X86ISD::UNPCKH:
4877 case X86ISD::VBROADCAST:
4878 case X86ISD::VPERMILPI:
4879 case X86ISD::VPERMILPV:
4880 case X86ISD::VPERM2X128:
4881 case X86ISD::SHUF128:
4882 case X86ISD::VPERMIL2:
4883 case X86ISD::VPERMI:
4884 case X86ISD::VPPERM:
4885 case X86ISD::VPERMV:
4886 case X86ISD::VPERMV3:
4887 case X86ISD::VZEXT_MOVL:
4888 return true;
4889 }
4890}
4891
4892static bool isTargetShuffleVariableMask(unsigned Opcode) {
4893 switch (Opcode) {
4894 default: return false;
4895 // Target Shuffles.
4896 case X86ISD::PSHUFB:
4897 case X86ISD::VPERMILPV:
4898 case X86ISD::VPERMIL2:
4899 case X86ISD::VPPERM:
4900 case X86ISD::VPERMV:
4901 case X86ISD::VPERMV3:
4902 return true;
4903 // 'Faux' Target Shuffles.
4904 case ISD::OR:
4905 case ISD::AND:
4906 case X86ISD::ANDNP:
4907 return true;
4908 }
4909}
4910
4911static bool isTargetShuffleSplat(SDValue Op) {
4912 unsigned Opcode = Op.getOpcode();
4913 if (Opcode == ISD::EXTRACT_SUBVECTOR)
4914 return isTargetShuffleSplat(Op.getOperand(0));
4915 return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD;
4916}
4917
4918SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const {
4919 MachineFunction &MF = DAG.getMachineFunction();
4920 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
4921 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
4922 int ReturnAddrIndex = FuncInfo->getRAIndex();
4923
4924 if (ReturnAddrIndex == 0) {
4925 // Set up a frame object for the return address.
4926 unsigned SlotSize = RegInfo->getSlotSize();
4927 ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize,
4928 -(int64_t)SlotSize,
4929 false);
4930 FuncInfo->setRAIndex(ReturnAddrIndex);
4931 }
4932
4933 return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout()));
4934}
4935
4936bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M,
4937 bool hasSymbolicDisplacement) {
4938 // Offset should fit into 32 bit immediate field.
4939 if (!isInt<32>(Offset))
4940 return false;
4941
4942 // If we don't have a symbolic displacement - we don't have any extra
4943 // restrictions.
4944 if (!hasSymbolicDisplacement)
4945 return true;
4946
4947 // FIXME: Some tweaks might be needed for medium code model.
4948 if (M != CodeModel::Small && M != CodeModel::Kernel)
4949 return false;
4950
4951 // For small code model we assume that latest object is 16MB before end of 31
4952 // bits boundary. We may also accept pretty large negative constants knowing
4953 // that all objects are in the positive half of address space.
4954 if (M == CodeModel::Small && Offset < 16*1024*1024)
4955 return true;
4956
4957 // For kernel code model we know that all object resist in the negative half
4958 // of 32bits address space. We may not accept negative offsets, since they may
4959 // be just off and we may accept pretty large positive ones.
4960 if (M == CodeModel::Kernel && Offset >= 0)
4961 return true;
4962
4963 return false;
4964}
4965
4966/// Determines whether the callee is required to pop its own arguments.
4967/// Callee pop is necessary to support tail calls.
4968bool X86::isCalleePop(CallingConv::ID CallingConv,
4969 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
4970 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
4971 // can guarantee TCO.
4972 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
4973 return true;
4974
4975 switch (CallingConv) {
4976 default:
4977 return false;
4978 case CallingConv::X86_StdCall:
4979 case CallingConv::X86_FastCall:
4980 case CallingConv::X86_ThisCall:
4981 case CallingConv::X86_VectorCall:
4982 return !is64Bit;
4983 }
4984}
4985
4986/// Return true if the condition is an signed comparison operation.
4987static bool isX86CCSigned(unsigned X86CC) {
4988 switch (X86CC) {
4989 default:
4990 llvm_unreachable("Invalid integer condition!")__builtin_unreachable();
4991 case X86::COND_E:
4992 case X86::COND_NE:
4993 case X86::COND_B:
4994 case X86::COND_A:
4995 case X86::COND_BE:
4996 case X86::COND_AE:
4997 return false;
4998 case X86::COND_G:
4999 case X86::COND_GE:
5000 case X86::COND_L:
5001 case X86::COND_LE:
5002 return true;
5003 }
5004}
5005
5006static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) {
5007 switch (SetCCOpcode) {
5008 default: llvm_unreachable("Invalid integer condition!")__builtin_unreachable();
5009 case ISD::SETEQ: return X86::COND_E;
5010 case ISD::SETGT: return X86::COND_G;
5011 case ISD::SETGE: return X86::COND_GE;
5012 case ISD::SETLT: return X86::COND_L;
5013 case ISD::SETLE: return X86::COND_LE;
5014 case ISD::SETNE: return X86::COND_NE;
5015 case ISD::SETULT: return X86::COND_B;
5016 case ISD::SETUGT: return X86::COND_A;
5017 case ISD::SETULE: return X86::COND_BE;
5018 case ISD::SETUGE: return X86::COND_AE;
5019 }
5020}
5021
5022/// Do a one-to-one translation of a ISD::CondCode to the X86-specific
5023/// condition code, returning the condition code and the LHS/RHS of the
5024/// comparison to make.
5025static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL,
5026 bool isFP, SDValue &LHS, SDValue &RHS,
5027 SelectionDAG &DAG) {
5028 if (!isFP) {
5029 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
5030 if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) {
5031 // X > -1 -> X == 0, jump !sign.
5032 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5033 return X86::COND_NS;
5034 }
5035 if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) {
5036 // X < 0 -> X == 0, jump on sign.
5037 return X86::COND_S;
5038 }
5039 if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) {
5040 // X >= 0 -> X == 0, jump on !sign.
5041 return X86::COND_NS;
5042 }
5043 if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) {
5044 // X < 1 -> X <= 0
5045 RHS = DAG.getConstant(0, DL, RHS.getValueType());
5046 return X86::COND_LE;
5047 }
5048 }
5049
5050 return TranslateIntegerX86CC(SetCCOpcode);
5051 }
5052
5053 // First determine if it is required or is profitable to flip the operands.
5054
5055 // If LHS is a foldable load, but RHS is not, flip the condition.
5056 if (ISD::isNON_EXTLoad(LHS.getNode()) &&
5057 !ISD::isNON_EXTLoad(RHS.getNode())) {
5058 SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode);
5059 std::swap(LHS, RHS);
5060 }
5061
5062 switch (SetCCOpcode) {
5063 default: break;
5064 case ISD::SETOLT:
5065 case ISD::SETOLE:
5066 case ISD::SETUGT:
5067 case ISD::SETUGE:
5068 std::swap(LHS, RHS);
5069 break;
5070 }
5071
5072 // On a floating point condition, the flags are set as follows:
5073 // ZF PF CF op
5074 // 0 | 0 | 0 | X > Y
5075 // 0 | 0 | 1 | X < Y
5076 // 1 | 0 | 0 | X == Y
5077 // 1 | 1 | 1 | unordered
5078 switch (SetCCOpcode) {
5079 default: llvm_unreachable("Condcode should be pre-legalized away")__builtin_unreachable();
5080 case ISD::SETUEQ:
5081 case ISD::SETEQ: return X86::COND_E;
5082 case ISD::SETOLT: // flipped
5083 case ISD::SETOGT:
5084 case ISD::SETGT: return X86::COND_A;
5085 case ISD::SETOLE: // flipped
5086 case ISD::SETOGE:
5087 case ISD::SETGE: return X86::COND_AE;
5088 case ISD::SETUGT: // flipped
5089 case ISD::SETULT:
5090 case ISD::SETLT: return X86::COND_B;
5091 case ISD::SETUGE: // flipped
5092 case ISD::SETULE:
5093 case ISD::SETLE: return X86::COND_BE;
5094 case ISD::SETONE:
5095 case ISD::SETNE: return X86::COND_NE;
5096 case ISD::SETUO: return X86::COND_P;
5097 case ISD::SETO: return X86::COND_NP;
5098 case ISD::SETOEQ:
5099 case ISD::SETUNE: return X86::COND_INVALID;
5100 }
5101}
5102
5103/// Is there a floating point cmov for the specific X86 condition code?
5104/// Current x86 isa includes the following FP cmov instructions:
5105/// fcmovb, fcomvbe, fcomve, fcmovu, fcmovae, fcmova, fcmovne, fcmovnu.
5106static bool hasFPCMov(unsigned X86CC) {
5107 switch (X86CC) {
5108 default:
5109 return false;
5110 case X86::COND_B:
5111 case X86::COND_BE:
5112 case X86::COND_E:
5113 case X86::COND_P:
5114 case X86::COND_A:
5115 case X86::COND_AE:
5116 case X86::COND_NE:
5117 case X86::COND_NP:
5118 return true;
5119 }
5120}
5121
5122
5123bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
5124 const CallInst &I,
5125 MachineFunction &MF,
5126 unsigned Intrinsic) const {
5127 Info.flags = MachineMemOperand::MONone;
5128 Info.offset = 0;
5129
5130 const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic);
5131 if (!IntrData) {
5132 switch (Intrinsic) {
5133 case Intrinsic::x86_aesenc128kl:
5134 case Intrinsic::x86_aesdec128kl:
5135 Info.opc = ISD::INTRINSIC_W_CHAIN;
5136 Info.ptrVal = I.getArgOperand(1);
5137 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5138 Info.align = Align(1);
5139 Info.flags |= MachineMemOperand::MOLoad;
5140 return true;
5141 case Intrinsic::x86_aesenc256kl:
5142 case Intrinsic::x86_aesdec256kl:
5143 Info.opc = ISD::INTRINSIC_W_CHAIN;
5144 Info.ptrVal = I.getArgOperand(1);
5145 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5146 Info.align = Align(1);
5147 Info.flags |= MachineMemOperand::MOLoad;
5148 return true;
5149 case Intrinsic::x86_aesencwide128kl:
5150 case Intrinsic::x86_aesdecwide128kl:
5151 Info.opc = ISD::INTRINSIC_W_CHAIN;
5152 Info.ptrVal = I.getArgOperand(0);
5153 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48);
5154 Info.align = Align(1);
5155 Info.flags |= MachineMemOperand::MOLoad;
5156 return true;
5157 case Intrinsic::x86_aesencwide256kl:
5158 case Intrinsic::x86_aesdecwide256kl:
5159 Info.opc = ISD::INTRINSIC_W_CHAIN;
5160 Info.ptrVal = I.getArgOperand(0);
5161 Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64);
5162 Info.align = Align(1);
5163 Info.flags |= MachineMemOperand::MOLoad;
5164 return true;
5165 }
5166 return false;
5167 }
5168
5169 switch (IntrData->Type) {
5170 case TRUNCATE_TO_MEM_VI8:
5171 case TRUNCATE_TO_MEM_VI16:
5172 case TRUNCATE_TO_MEM_VI32: {
5173 Info.opc = ISD::INTRINSIC_VOID;
5174 Info.ptrVal = I.getArgOperand(0);
5175 MVT VT = MVT::getVT(I.getArgOperand(1)->getType());
5176 MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE;
5177 if (IntrData->Type == TRUNCATE_TO_MEM_VI8)
5178 ScalarVT = MVT::i8;
5179 else if (IntrData->Type == TRUNCATE_TO_MEM_VI16)
5180 ScalarVT = MVT::i16;
5181 else if (IntrData->Type == TRUNCATE_TO_MEM_VI32)
5182 ScalarVT = MVT::i32;
5183
5184 Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements());
5185 Info.align = Align(1);
5186 Info.flags |= MachineMemOperand::MOStore;
5187 break;
5188 }
5189 case GATHER:
5190 case GATHER_AVX2: {
5191 Info.opc = ISD::INTRINSIC_W_CHAIN;
5192 Info.ptrVal = nullptr;
5193 MVT DataVT = MVT::getVT(I.getType());
5194 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5195 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5196 IndexVT.getVectorNumElements());
5197 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5198 Info.align = Align(1);
5199 Info.flags |= MachineMemOperand::MOLoad;
5200 break;
5201 }
5202 case SCATTER: {
5203 Info.opc = ISD::INTRINSIC_VOID;
5204 Info.ptrVal = nullptr;
5205 MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType());
5206 MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType());
5207 unsigned NumElts = std::min(DataVT.getVectorNumElements(),
5208 IndexVT.getVectorNumElements());
5209 Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts);
5210 Info.align = Align(1);
5211 Info.flags |= MachineMemOperand::MOStore;
5212 break;
5213 }
5214 default:
5215 return false;
5216 }
5217
5218 return true;
5219}
5220
5221/// Returns true if the target can instruction select the
5222/// specified FP immediate natively. If false, the legalizer will
5223/// materialize the FP immediate as a load from a constant pool.
5224bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
5225 bool ForCodeSize) const {
5226 for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) {
5227 if (Imm.bitwiseIsEqual(LegalFPImmediates[i]))
5228 return true;
5229 }
5230 return false;
5231}
5232
5233bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load,
5234 ISD::LoadExtType ExtTy,
5235 EVT NewVT) const {
5236 assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow")((void)0);
5237
5238 // "ELF Handling for Thread-Local Storage" specifies that R_X86_64_GOTTPOFF
5239 // relocation target a movq or addq instruction: don't let the load shrink.
5240 SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr();
5241 if (BasePtr.getOpcode() == X86ISD::WrapperRIP)
5242 if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0)))
5243 return GA->getTargetFlags() != X86II::MO_GOTTPOFF;
5244
5245 // If this is an (1) AVX vector load with (2) multiple uses and (3) all of
5246 // those uses are extracted directly into a store, then the extract + store
5247 // can be store-folded. Therefore, it's probably not worth splitting the load.
5248 EVT VT = Load->getValueType(0);
5249 if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) {
5250 for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) {
5251 // Skip uses of the chain value. Result 0 of the node is the load value.
5252 if (UI.getUse().getResNo() != 0)
5253 continue;
5254
5255 // If this use is not an extract + store, it's probably worth splitting.
5256 if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() ||
5257 UI->use_begin()->getOpcode() != ISD::STORE)
5258 return true;
5259 }
5260 // All non-chain uses are extract + store.
5261 return false;
5262 }
5263
5264 return true;
5265}
5266
5267/// Returns true if it is beneficial to convert a load of a constant
5268/// to just the constant itself.
5269bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm,
5270 Type *Ty) const {
5271 assert(Ty->isIntegerTy())((void)0);
5272
5273 unsigned BitSize = Ty->getPrimitiveSizeInBits();
5274 if (BitSize == 0 || BitSize > 64)
5275 return false;
5276 return true;
5277}
5278
5279bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const {
5280 // If we are using XMM registers in the ABI and the condition of the select is
5281 // a floating-point compare and we have blendv or conditional move, then it is
5282 // cheaper to select instead of doing a cross-register move and creating a
5283 // load that depends on the compare result.
5284 bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128;
5285 return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX();
5286}
5287
5288bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
5289 // TODO: It might be a win to ease or lift this restriction, but the generic
5290 // folds in DAGCombiner conflict with vector folds for an AVX512 target.
5291 if (VT.isVector() && Subtarget.hasAVX512())
5292 return false;
5293
5294 return true;
5295}
5296
5297bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
5298 SDValue C) const {
5299 // TODO: We handle scalars using custom code, but generic combining could make
5300 // that unnecessary.
5301 APInt MulC;
5302 if (!ISD::isConstantSplatVector(C.getNode(), MulC))
5303 return false;
5304
5305 // Find the type this will be legalized too. Otherwise we might prematurely
5306 // convert this to shl+add/sub and then still have to type legalize those ops.
5307 // Another choice would be to defer the decision for illegal types until
5308 // after type legalization. But constant splat vectors of i64 can't make it
5309 // through type legalization on 32-bit targets so we would need to special
5310 // case vXi64.
5311 while (getTypeAction(Context, VT) != TypeLegal)
5312 VT = getTypeToTransformTo(Context, VT);
5313
5314 // If vector multiply is legal, assume that's faster than shl + add/sub.
5315 // TODO: Multiply is a complex op with higher latency and lower throughput in
5316 // most implementations, so this check could be loosened based on type
5317 // and/or a CPU attribute.
5318 if (isOperationLegal(ISD::MUL, VT))
5319 return false;
5320
5321 // shl+add, shl+sub, shl+add+neg
5322 return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() ||
5323 (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2();
5324}
5325
5326bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT,
5327 unsigned Index) const {
5328 if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT))
5329 return false;
5330
5331 // Mask vectors support all subregister combinations and operations that
5332 // extract half of vector.
5333 if (ResVT.getVectorElementType() == MVT::i1)
5334 return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) &&
5335 (Index == ResVT.getVectorNumElements()));
5336
5337 return (Index % ResVT.getVectorNumElements()) == 0;
5338}
5339
5340bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
5341 unsigned Opc = VecOp.getOpcode();
5342
5343 // Assume target opcodes can't be scalarized.
5344 // TODO - do we have any exceptions?
5345 if (Opc >= ISD::BUILTIN_OP_END)
5346 return false;
5347
5348 // If the vector op is not supported, try to convert to scalar.
5349 EVT VecVT = VecOp.getValueType();
5350 if (!isOperationLegalOrCustomOrPromote(Opc, VecVT))
5351 return true;
5352
5353 // If the vector op is supported, but the scalar op is not, the transform may
5354 // not be worthwhile.
5355 EVT ScalarVT = VecVT.getScalarType();
5356 return isOperationLegalOrCustomOrPromote(Opc, ScalarVT);
5357}
5358
5359bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT,
5360 bool) const {
5361 // TODO: Allow vectors?
5362 if (VT.isVector())
5363 return false;
5364 return VT.isSimple() || !isOperationExpand(Opcode, VT);
5365}
5366
5367bool X86TargetLowering::isCheapToSpeculateCttz() const {
5368 // Speculate cttz only if we can directly use TZCNT.
5369 return Subtarget.hasBMI();
5370}
5371
5372bool X86TargetLowering::isCheapToSpeculateCtlz() const {
5373 // Speculate ctlz only if we can directly use LZCNT.
5374 return Subtarget.hasLZCNT();
5375}
5376
5377bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
5378 const SelectionDAG &DAG,
5379 const MachineMemOperand &MMO) const {
5380 if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
5381 BitcastVT.getVectorElementType() == MVT::i1)
5382 return false;
5383
5384 if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
5385 return false;
5386
5387 // If both types are legal vectors, it's always ok to convert them.
5388 if (LoadVT.isVector() && BitcastVT.isVector() &&
5389 isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
5390 return true;
5391
5392 return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
5393}
5394
5395bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
5396 const SelectionDAG &DAG) const {
5397 // Do not merge to float value size (128 bytes) if no implicit
5398 // float attribute is set.
5399 bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute(
5400 Attribute::NoImplicitFloat);
5401
5402 if (NoFloat) {
5403 unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32;
5404 return (MemVT.getSizeInBits() <= MaxIntSize);
5405 }
5406 // Make sure we don't merge greater than our preferred vector
5407 // width.
5408 if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth())
5409 return false;
5410
5411 return true;
5412}
5413
5414bool X86TargetLowering::isCtlzFast() const {
5415 return Subtarget.hasFastLZCNT();
5416}
5417
5418bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial(
5419 const Instruction &AndI) const {
5420 return true;
5421}
5422
5423bool X86TargetLowering::hasAndNotCompare(SDValue Y) const {
5424 EVT VT = Y.getValueType();
5425
5426 if (VT.isVector())
5427 return false;
5428
5429 if (!Subtarget.hasBMI())
5430 return false;
5431
5432 // There are only 32-bit and 64-bit forms for 'andn'.
5433 if (VT != MVT::i32 && VT != MVT::i64)
5434 return false;
5435
5436 return !isa<ConstantSDNode>(Y);
5437}
5438
5439bool X86TargetLowering::hasAndNot(SDValue Y) const {
5440 EVT VT = Y.getValueType();
5441
5442 if (!VT.isVector())
5443 return hasAndNotCompare(Y);
5444
5445 // Vector.
5446
5447 if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128)
5448 return false;
5449
5450 if (VT == MVT::v4i32)
5451 return true;
5452
5453 return Subtarget.hasSSE2();
5454}
5455
5456bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const {
5457 return X.getValueType().isScalarInteger(); // 'bt'
5458}
5459
5460bool X86TargetLowering::
5461 shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5462 SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y,
5463 unsigned OldShiftOpcode, unsigned NewShiftOpcode,
5464 SelectionDAG &DAG) const {
5465 // Does baseline recommend not to perform the fold by default?
5466 if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd(
5467 X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG))
5468 return false;
5469 // For scalars this transform is always beneficial.
5470 if (X.getValueType().isScalarInteger())
5471 return true;
5472 // If all the shift amounts are identical, then transform is beneficial even
5473 // with rudimentary SSE2 shifts.
5474 if (DAG.isSplatValue(Y, /*AllowUndefs=*/true))
5475 return true;
5476 // If we have AVX2 with it's powerful shift operations, then it's also good.
5477 if (Subtarget.hasAVX2())
5478 return true;
5479 // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'.
5480 return NewShiftOpcode == ISD::SHL;
5481}
5482
5483bool X86TargetLowering::shouldFoldConstantShiftPairToMask(
5484 const SDNode *N, CombineLevel Level) const {
5485 assert(((N->getOpcode() == ISD::SHL &&((void)0)
5486 N->getOperand(0).getOpcode() == ISD::SRL) ||((void)0)
5487 (N->getOpcode() == ISD::SRL &&((void)0)
5488 N->getOperand(0).getOpcode() == ISD::SHL)) &&((void)0)
5489 "Expected shift-shift mask")((void)0);
5490 EVT VT = N->getValueType(0);
5491 if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) ||
5492 (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) {
5493 // Only fold if the shift values are equal - so it folds to AND.
5494 // TODO - we should fold if either is a non-uniform vector but we don't do
5495 // the fold for non-splats yet.
5496 return N->getOperand(1) == N->getOperand(0).getOperand(1);
5497 }
5498 return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level);
5499}
5500
5501bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const {
5502 EVT VT = Y.getValueType();
5503
5504 // For vectors, we don't have a preference, but we probably want a mask.
5505 if (VT.isVector())
5506 return false;
5507
5508 // 64-bit shifts on 32-bit targets produce really bad bloated code.
5509 if (VT == MVT::i64 && !Subtarget.is64Bit())
5510 return false;
5511
5512 return true;
5513}
5514
5515bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG,
5516 SDNode *N) const {
5517 if (DAG.getMachineFunction().getFunction().hasMinSize() &&
5518 !Subtarget.isOSWindows())
5519 return false;
5520 return true;
5521}
5522
5523bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const {
5524 // Any legal vector type can be splatted more efficiently than
5525 // loading/spilling from memory.
5526 return isTypeLegal(VT);
5527}
5528
5529MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const {
5530 MVT VT = MVT::getIntegerVT(NumBits);
5531 if (isTypeLegal(VT))
5532 return VT;
5533
5534 // PMOVMSKB can handle this.
5535 if (NumBits == 128 && isTypeLegal(MVT::v16i8))
5536 return MVT::v16i8;
5537
5538 // VPMOVMSKB can handle this.
5539 if (NumBits == 256 && isTypeLegal(MVT::v32i8))
5540 return MVT::v32i8;
5541
5542 // TODO: Allow 64-bit type for 32-bit target.
5543 // TODO: 512-bit types should be allowed, but make sure that those
5544 // cases are handled in combineVectorSizedSetCCEquality().
5545
5546 return MVT::INVALID_SIMPLE_VALUE_TYPE;
5547}
5548
5549/// Val is the undef sentinel value or equal to the specified value.
5550static bool isUndefOrEqual(int Val, int CmpVal) {
5551 return ((Val == SM_SentinelUndef) || (Val == CmpVal));
5552}
5553
5554/// Return true if every element in Mask is the undef sentinel value or equal to
5555/// the specified value..
5556static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) {
5557 return llvm::all_of(Mask, [CmpVal](int M) {
5558 return (M == SM_SentinelUndef) || (M == CmpVal);
5559 });
5560}
5561
5562/// Val is either the undef or zero sentinel value.
5563static bool isUndefOrZero(int Val) {
5564 return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero));
5565}
5566
5567/// Return true if every element in Mask, beginning from position Pos and ending
5568/// in Pos+Size is the undef sentinel value.
5569static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) {
5570 return llvm::all_of(Mask.slice(Pos, Size),
5571 [](int M) { return M == SM_SentinelUndef; });
5572}
5573
5574/// Return true if the mask creates a vector whose lower half is undefined.
5575static bool isUndefLowerHalf(ArrayRef<int> Mask) {
5576 unsigned NumElts = Mask.size();
5577 return isUndefInRange(Mask, 0, NumElts / 2);
5578}
5579
5580/// Return true if the mask creates a vector whose upper half is undefined.
5581static bool isUndefUpperHalf(ArrayRef<int> Mask) {
5582 unsigned NumElts = Mask.size();
5583 return isUndefInRange(Mask, NumElts / 2, NumElts / 2);
5584}
5585
5586/// Return true if Val falls within the specified range (L, H].
5587static bool isInRange(int Val, int Low, int Hi) {
5588 return (Val >= Low && Val < Hi);
5589}
5590
5591/// Return true if the value of any element in Mask falls within the specified
5592/// range (L, H].
5593static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) {
5594 return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); });
5595}
5596
5597/// Return true if the value of any element in Mask is the zero sentinel value.
5598static bool isAnyZero(ArrayRef<int> Mask) {
5599 return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; });
5600}
5601
5602/// Return true if the value of any element in Mask is the zero or undef
5603/// sentinel values.
5604static bool isAnyZeroOrUndef(ArrayRef<int> Mask) {
5605 return llvm::any_of(Mask, [](int M) {
5606 return M == SM_SentinelZero || M == SM_SentinelUndef;
5607 });
5608}
5609
5610/// Return true if Val is undef or if its value falls within the
5611/// specified range (L, H].
5612static bool isUndefOrInRange(int Val, int Low, int Hi) {
5613 return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi);
5614}
5615
5616/// Return true if every element in Mask is undef or if its value
5617/// falls within the specified range (L, H].
5618static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5619 return llvm::all_of(
5620 Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); });
5621}
5622
5623/// Return true if Val is undef, zero or if its value falls within the
5624/// specified range (L, H].
5625static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) {
5626 return isUndefOrZero(Val) || isInRange(Val, Low, Hi);
5627}
5628
5629/// Return true if every element in Mask is undef, zero or if its value
5630/// falls within the specified range (L, H].
5631static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) {
5632 return llvm::all_of(
5633 Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); });
5634}
5635
5636/// Return true if every element in Mask, beginning
5637/// from position Pos and ending in Pos + Size, falls within the specified
5638/// sequence (Low, Low + Step, ..., Low + (Size - 1) * Step) or is undef.
5639static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos,
5640 unsigned Size, int Low, int Step = 1) {
5641 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5642 if (!isUndefOrEqual(Mask[i], Low))
5643 return false;
5644 return true;
5645}
5646
5647/// Return true if every element in Mask, beginning
5648/// from position Pos and ending in Pos+Size, falls within the specified
5649/// sequential range (Low, Low+Size], or is undef or is zero.
5650static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5651 unsigned Size, int Low,
5652 int Step = 1) {
5653 for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step)
5654 if (!isUndefOrZero(Mask[i]) && Mask[i] != Low)
5655 return false;
5656 return true;
5657}
5658
5659/// Return true if every element in Mask, beginning
5660/// from position Pos and ending in Pos+Size is undef or is zero.
5661static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos,
5662 unsigned Size) {
5663 return llvm::all_of(Mask.slice(Pos, Size),
5664 [](int M) { return isUndefOrZero(M); });
5665}
5666
5667/// Helper function to test whether a shuffle mask could be
5668/// simplified by widening the elements being shuffled.
5669///
5670/// Appends the mask for wider elements in WidenedMask if valid. Otherwise
5671/// leaves it in an unspecified state.
5672///
5673/// NOTE: This must handle normal vector shuffle masks and *target* vector
5674/// shuffle masks. The latter have the special property of a '-2' representing
5675/// a zero-ed lane of a vector.
5676static bool canWidenShuffleElements(ArrayRef<int> Mask,
5677 SmallVectorImpl<int> &WidenedMask) {
5678 WidenedMask.assign(Mask.size() / 2, 0);
5679 for (int i = 0, Size = Mask.size(); i < Size; i += 2) {
5680 int M0 = Mask[i];
5681 int M1 = Mask[i + 1];
5682
5683 // If both elements are undef, its trivial.
5684 if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) {
5685 WidenedMask[i / 2] = SM_SentinelUndef;
5686 continue;
5687 }
5688
5689 // Check for an undef mask and a mask value properly aligned to fit with
5690 // a pair of values. If we find such a case, use the non-undef mask's value.
5691 if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) {
5692 WidenedMask[i / 2] = M1 / 2;
5693 continue;
5694 }
5695 if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) {
5696 WidenedMask[i / 2] = M0 / 2;
5697 continue;
5698 }
5699
5700 // When zeroing, we need to spread the zeroing across both lanes to widen.
5701 if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) {
5702 if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) &&
5703 (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) {
5704 WidenedMask[i / 2] = SM_SentinelZero;
5705 continue;
5706 }
5707 return false;
5708 }
5709
5710 // Finally check if the two mask values are adjacent and aligned with
5711 // a pair.
5712 if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) {
5713 WidenedMask[i / 2] = M0 / 2;
5714 continue;
5715 }
5716
5717 // Otherwise we can't safely widen the elements used in this shuffle.
5718 return false;
5719 }
5720 assert(WidenedMask.size() == Mask.size() / 2 &&((void)0)
5721 "Incorrect size of mask after widening the elements!")((void)0);
5722
5723 return true;
5724}
5725
5726static bool canWidenShuffleElements(ArrayRef<int> Mask,
5727 const APInt &Zeroable,
5728 bool V2IsZero,
5729 SmallVectorImpl<int> &WidenedMask) {
5730 // Create an alternative mask with info about zeroable elements.
5731 // Here we do not set undef elements as zeroable.
5732 SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end());
5733 if (V2IsZero) {
5734 assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!")((void)0);
5735 for (int i = 0, Size = Mask.size(); i != Size; ++i)
5736 if (Mask[i] != SM_SentinelUndef && Zeroable[i])
5737 ZeroableMask[i] = SM_SentinelZero;
5738 }
5739 return canWidenShuffleElements(ZeroableMask, WidenedMask);
5740}
5741
5742static bool canWidenShuffleElements(ArrayRef<int> Mask) {
5743 SmallVector<int, 32> WidenedMask;
5744 return canWidenShuffleElements(Mask, WidenedMask);
5745}
5746
5747// Attempt to narrow/widen shuffle mask until it matches the target number of
5748// elements.
5749static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts,
5750 SmallVectorImpl<int> &ScaledMask) {
5751 unsigned NumSrcElts = Mask.size();
5752 assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) &&((void)0)
5753 "Illegal shuffle scale factor")((void)0);
5754
5755 // Narrowing is guaranteed to work.
5756 if (NumDstElts >= NumSrcElts) {
5757 int Scale = NumDstElts / NumSrcElts;
5758 llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask);
5759 return true;
5760 }
5761
5762 // We have to repeat the widening until we reach the target size, but we can
5763 // split out the first widening as it sets up ScaledMask for us.
5764 if (canWidenShuffleElements(Mask, ScaledMask)) {
5765 while (ScaledMask.size() > NumDstElts) {
5766 SmallVector<int, 16> WidenedMask;
5767 if (!canWidenShuffleElements(ScaledMask, WidenedMask))
5768 return false;
5769 ScaledMask = std::move(WidenedMask);
5770 }
5771 return true;
5772 }
5773
5774 return false;
5775}
5776
5777/// Returns true if Elt is a constant zero or a floating point constant +0.0.
5778bool X86::isZeroNode(SDValue Elt) {
5779 return isNullConstant(Elt) || isNullFPConstant(Elt);
5780}
5781
5782// Build a vector of constants.
5783// Use an UNDEF node if MaskElt == -1.
5784// Split 64-bit constants in the 32-bit mode.
5785static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG,
5786 const SDLoc &dl, bool IsMask = false) {
5787
5788 SmallVector<SDValue, 32> Ops;
5789 bool Split = false;
5790
5791 MVT ConstVecVT = VT;
5792 unsigned NumElts = VT.getVectorNumElements();
5793 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5794 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5795 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5796 Split = true;
5797 }
5798
5799 MVT EltVT = ConstVecVT.getVectorElementType();
5800 for (unsigned i = 0; i < NumElts; ++i) {
5801 bool IsUndef = Values[i] < 0 && IsMask;
5802 SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) :
5803 DAG.getConstant(Values[i], dl, EltVT);
5804 Ops.push_back(OpNode);
5805 if (Split)
5806 Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) :
5807 DAG.getConstant(0, dl, EltVT));
5808 }
5809 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5810 if (Split)
5811 ConstsNode = DAG.getBitcast(VT, ConstsNode);
5812 return ConstsNode;
5813}
5814
5815static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs,
5816 MVT VT, SelectionDAG &DAG, const SDLoc &dl) {
5817 assert(Bits.size() == Undefs.getBitWidth() &&((void)0)
5818 "Unequal constant and undef arrays")((void)0);
5819 SmallVector<SDValue, 32> Ops;
5820 bool Split = false;
5821
5822 MVT ConstVecVT = VT;
5823 unsigned NumElts = VT.getVectorNumElements();
5824 bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64);
5825 if (!In64BitMode && VT.getVectorElementType() == MVT::i64) {
5826 ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2);
5827 Split = true;
5828 }
5829
5830 MVT EltVT = ConstVecVT.getVectorElementType();
5831 for (unsigned i = 0, e = Bits.size(); i != e; ++i) {
5832 if (Undefs[i]) {
5833 Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT));
5834 continue;
5835 }
5836 const APInt &V = Bits[i];
5837 assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes")((void)0);
5838 if (Split) {
5839 Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT));
5840 Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT));
5841 } else if (EltVT == MVT::f32) {
5842 APFloat FV(APFloat::IEEEsingle(), V);
5843 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5844 } else if (EltVT == MVT::f64) {
5845 APFloat FV(APFloat::IEEEdouble(), V);
5846 Ops.push_back(DAG.getConstantFP(FV, dl, EltVT));
5847 } else {
5848 Ops.push_back(DAG.getConstant(V, dl, EltVT));
5849 }
5850 }
5851
5852 SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops);
5853 return DAG.getBitcast(VT, ConstsNode);
5854}
5855
5856/// Returns a vector of specified type with all zero elements.
5857static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget,
5858 SelectionDAG &DAG, const SDLoc &dl) {
5859 assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() ||((void)0)
5860 VT.getVectorElementType() == MVT::i1) &&((void)0)
5861 "Unexpected vector type")((void)0);
5862
5863 // Try to build SSE/AVX zero vectors as <N x i32> bitcasted to their dest
5864 // type. This ensures they get CSE'd. But if the integer type is not
5865 // available, use a floating-point +0.0 instead.
5866 SDValue Vec;
5867 if (!Subtarget.hasSSE2() && VT.is128BitVector()) {
5868 Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32);
5869 } else if (VT.isFloatingPoint()) {
5870 Vec = DAG.getConstantFP(+0.0, dl, VT);
5871 } else if (VT.getVectorElementType() == MVT::i1) {
5872 assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) &&((void)0)
5873 "Unexpected vector type")((void)0);
5874 Vec = DAG.getConstant(0, dl, VT);
5875 } else {
5876 unsigned Num32BitElts = VT.getSizeInBits() / 32;
5877 Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts));
5878 }
5879 return DAG.getBitcast(VT, Vec);
5880}
5881
5882static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
5883 const SDLoc &dl, unsigned vectorWidth) {
5884 EVT VT = Vec.getValueType();
5885 EVT ElVT = VT.getVectorElementType();
5886 unsigned Factor = VT.getSizeInBits() / vectorWidth;
5887 EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT,
5888 VT.getVectorNumElements() / Factor);
5889
5890 // Extract the relevant vectorWidth bits. Generate an EXTRACT_SUBVECTOR
5891 unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits();
5892 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((void)0);
5893
5894 // This is the index of the first element of the vectorWidth-bit chunk
5895 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5896 IdxVal &= ~(ElemsPerChunk - 1);
5897
5898 // If the input is a buildvector just emit a smaller one.
5899 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
5900 return DAG.getBuildVector(ResultVT, dl,
5901 Vec->ops().slice(IdxVal, ElemsPerChunk));
5902
5903 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5904 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx);
5905}
5906
5907/// Generate a DAG to grab 128-bits from a vector > 128 bits. This
5908/// sets things up to match to an AVX VEXTRACTF128 / VEXTRACTI128
5909/// or AVX-512 VEXTRACTF32x4 / VEXTRACTI32x4
5910/// instructions or a simple subregister reference. Idx is an index in the
5911/// 128 bits we want. It need not be aligned to a 128-bit boundary. That makes
5912/// lowering EXTRACT_VECTOR_ELT operations easier.
5913static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal,
5914 SelectionDAG &DAG, const SDLoc &dl) {
5915 assert((Vec.getValueType().is256BitVector() ||((void)0)
5916 Vec.getValueType().is512BitVector()) && "Unexpected vector size!")((void)0);
5917 return extractSubVector(Vec, IdxVal, DAG, dl, 128);
5918}
5919
5920/// Generate a DAG to grab 256-bits from a 512-bit vector.
5921static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal,
5922 SelectionDAG &DAG, const SDLoc &dl) {
5923 assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!")((void)0);
5924 return extractSubVector(Vec, IdxVal, DAG, dl, 256);
5925}
5926
5927static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5928 SelectionDAG &DAG, const SDLoc &dl,
5929 unsigned vectorWidth) {
5930 assert((vectorWidth == 128 || vectorWidth == 256) &&((void)0)
5931 "Unsupported vector width")((void)0);
5932 // Inserting UNDEF is Result
5933 if (Vec.isUndef())
5934 return Result;
5935 EVT VT = Vec.getValueType();
5936 EVT ElVT = VT.getVectorElementType();
5937 EVT ResultVT = Result.getValueType();
5938
5939 // Insert the relevant vectorWidth bits.
5940 unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits();
5941 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2")((void)0);
5942
5943 // This is the index of the first element of the vectorWidth-bit chunk
5944 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
5945 IdxVal &= ~(ElemsPerChunk - 1);
5946
5947 SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl);
5948 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx);
5949}
5950
5951/// Generate a DAG to put 128-bits into a vector > 128 bits. This
5952/// sets things up to match to an AVX VINSERTF128/VINSERTI128 or
5953/// AVX-512 VINSERTF32x4/VINSERTI32x4 instructions or a
5954/// simple superregister reference. Idx is an index in the 128 bits
5955/// we want. It need not be aligned to a 128-bit boundary. That makes
5956/// lowering INSERT_VECTOR_ELT operations easier.
5957static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal,
5958 SelectionDAG &DAG, const SDLoc &dl) {
5959 assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!")((void)0);
5960 return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128);
5961}
5962
5963/// Widen a vector to a larger size with the same scalar type, with the new
5964/// elements either zero or undef.
5965static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements,
5966 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5967 const SDLoc &dl) {
5968 assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() &&((void)0)
5969 Vec.getValueType().getScalarType() == VT.getScalarType() &&((void)0)
5970 "Unsupported vector widening type")((void)0);
5971 SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl)
5972 : DAG.getUNDEF(VT);
5973 return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec,
5974 DAG.getIntPtrConstant(0, dl));
5975}
5976
5977/// Widen a vector to a larger size with the same scalar type, with the new
5978/// elements either zero or undef.
5979static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements,
5980 const X86Subtarget &Subtarget, SelectionDAG &DAG,
5981 const SDLoc &dl, unsigned WideSizeInBits) {
5982 assert(Vec.getValueSizeInBits() < WideSizeInBits &&((void)0)
5983 (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 &&((void)0)
5984 "Unsupported vector widening type")((void)0);
5985 unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits();
5986 MVT SVT = Vec.getSimpleValueType().getScalarType();
5987 MVT VT = MVT::getVectorVT(SVT, WideNumElts);
5988 return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl);
5989}
5990
5991// Helper function to collect subvector ops that are concatenated together,
5992// either by ISD::CONCAT_VECTORS or a ISD::INSERT_SUBVECTOR series.
5993// The subvectors in Ops are guaranteed to be the same type.
5994static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) {
5995 assert(Ops.empty() && "Expected an empty ops vector")((void)0);
5996
5997 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
5998 Ops.append(N->op_begin(), N->op_end());
5999 return true;
6000 }
6001
6002 if (N->getOpcode() == ISD::INSERT_SUBVECTOR) {
6003 SDValue Src = N->getOperand(0);
6004 SDValue Sub = N->getOperand(1);
6005 const APInt &Idx = N->getConstantOperandAPInt(2);
6006 EVT VT = Src.getValueType();
6007 EVT SubVT = Sub.getValueType();
6008
6009 // TODO - Handle more general insert_subvector chains.
6010 if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) &&
6011 Idx == (VT.getVectorNumElements() / 2)) {
6012 // insert_subvector(insert_subvector(undef, x, lo), y, hi)
6013 if (Src.getOpcode() == ISD::INSERT_SUBVECTOR &&
6014 Src.getOperand(1).getValueType() == SubVT &&
6015 isNullConstant(Src.getOperand(2))) {
6016 Ops.push_back(Src.getOperand(1));
6017 Ops.push_back(Sub);
6018 return true;
6019 }
6020 // insert_subvector(x, extract_subvector(x, lo), hi)
6021 if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR &&
6022 Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) {
6023 Ops.append(2, Sub);
6024 return true;
6025 }
6026 }
6027 }
6028
6029 return false;
6030}
6031
6032static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG,
6033 const SDLoc &dl) {
6034 EVT VT = Op.getValueType();
6035 unsigned NumElems = VT.getVectorNumElements();
6036 unsigned SizeInBits = VT.getSizeInBits();
6037 assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 &&((void)0)
6038 "Can't split odd sized vector")((void)0);
6039
6040 SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2);
6041 SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2);
6042 return std::make_pair(Lo, Hi);
6043}
6044
6045// Split an unary integer op into 2 half sized ops.
6046static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) {
6047 EVT VT = Op.getValueType();
6048
6049 // Make sure we only try to split 256/512-bit types to avoid creating
6050 // narrow vectors.
6051 assert((Op.getOperand(0).getValueType().is256BitVector() ||((void)0)
6052 Op.getOperand(0).getValueType().is512BitVector()) &&((void)0)
6053 (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!")((void)0);
6054 assert(Op.getOperand(0).getValueType().getVectorNumElements() ==((void)0)
6055 VT.getVectorNumElements() &&((void)0)
6056 "Unexpected VTs!")((void)0);
6057
6058 SDLoc dl(Op);
6059
6060 // Extract the Lo/Hi vectors
6061 SDValue Lo, Hi;
6062 std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl);
6063
6064 EVT LoVT, HiVT;
6065 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6066 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
6067 DAG.getNode(Op.getOpcode(), dl, LoVT, Lo),
6068 DAG.getNode(Op.getOpcode(), dl, HiVT, Hi));
6069}
6070
6071/// Break a binary integer operation into 2 half sized ops and then
6072/// concatenate the result back.
6073static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) {
6074 EVT VT = Op.getValueType();
6075
6076 // Sanity check that all the types match.
6077 assert(Op.getOperand(0).getValueType() == VT &&((void)0)
6078 Op.getOperand(1).getValueType() == VT && "Unexpected VTs!")((void)0);
6079 assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!")((void)0);
6080
6081 SDLoc dl(Op);
6082
6083 // Extract the LHS Lo/Hi vectors
6084 SDValue LHS1, LHS2;
6085 std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl);
6086
6087 // Extract the RHS Lo/Hi vectors
6088 SDValue RHS1, RHS2;
6089 std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl);
6090
6091 EVT LoVT, HiVT;
6092 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
6093 return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT,
6094 DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1),
6095 DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2));
6096}
6097
6098// Helper for splitting operands of an operation to legal target size and
6099// apply a function on each part.
6100// Useful for operations that are available on SSE2 in 128-bit, on AVX2 in
6101// 256-bit and on AVX512BW in 512-bit. The argument VT is the type used for
6102// deciding if/how to split Ops. Ops elements do *not* have to be of type VT.
6103// The argument Builder is a function that will be applied on each split part:
6104// SDValue Builder(SelectionDAG&G, SDLoc, ArrayRef<SDValue>)
6105template <typename F>
6106SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget,
6107 const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops,
6108 F Builder, bool CheckBWI = true) {
6109 assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2")((void)0);
6110 unsigned NumSubs = 1;
6111 if ((CheckBWI && Subtarget.useBWIRegs()) ||
6112 (!CheckBWI && Subtarget.useAVX512Regs())) {
6113 if (VT.getSizeInBits() > 512) {