clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name X86ISelLowering.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelLowering.cpp
| 1 | |
| 2 | |
| 3 | |
| 4 | |
| 5 | |
| 6 | |
| 7 | |
| 8 | |
| 9 | |
| 10 | |
| 11 | |
| 12 | |
| 13 | |
| 14 | #include "X86ISelLowering.h" |
| 15 | #include "MCTargetDesc/X86ShuffleDecode.h" |
| 16 | #include "X86.h" |
| 17 | #include "X86CallingConv.h" |
| 18 | #include "X86FrameLowering.h" |
| 19 | #include "X86InstrBuilder.h" |
| 20 | #include "X86IntrinsicsInfo.h" |
| 21 | #include "X86MachineFunctionInfo.h" |
| 22 | #include "X86TargetMachine.h" |
| 23 | #include "X86TargetObjectFile.h" |
| 24 | #include "llvm/ADT/SmallBitVector.h" |
| 25 | #include "llvm/ADT/SmallSet.h" |
| 26 | #include "llvm/ADT/Statistic.h" |
| 27 | #include "llvm/ADT/StringExtras.h" |
| 28 | #include "llvm/ADT/StringSwitch.h" |
| 29 | #include "llvm/Analysis/BlockFrequencyInfo.h" |
| 30 | #include "llvm/Analysis/EHPersonalities.h" |
| 31 | #include "llvm/Analysis/ObjCARCUtil.h" |
| 32 | #include "llvm/Analysis/ProfileSummaryInfo.h" |
| 33 | #include "llvm/Analysis/VectorUtils.h" |
| 34 | #include "llvm/CodeGen/IntrinsicLowering.h" |
| 35 | #include "llvm/CodeGen/MachineFrameInfo.h" |
| 36 | #include "llvm/CodeGen/MachineFunction.h" |
| 37 | #include "llvm/CodeGen/MachineInstrBuilder.h" |
| 38 | #include "llvm/CodeGen/MachineJumpTableInfo.h" |
| 39 | #include "llvm/CodeGen/MachineLoopInfo.h" |
| 40 | #include "llvm/CodeGen/MachineModuleInfo.h" |
| 41 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
| 42 | #include "llvm/CodeGen/TargetLowering.h" |
| 43 | #include "llvm/CodeGen/WinEHFuncInfo.h" |
| 44 | #include "llvm/IR/CallingConv.h" |
| 45 | #include "llvm/IR/Constants.h" |
| 46 | #include "llvm/IR/DerivedTypes.h" |
| 47 | #include "llvm/IR/DiagnosticInfo.h" |
| 48 | #include "llvm/IR/Function.h" |
| 49 | #include "llvm/IR/GlobalAlias.h" |
| 50 | #include "llvm/IR/GlobalVariable.h" |
| 51 | #include "llvm/IR/Instructions.h" |
| 52 | #include "llvm/IR/Intrinsics.h" |
| 53 | #include "llvm/IR/IRBuilder.h" |
| 54 | #include "llvm/MC/MCAsmInfo.h" |
| 55 | #include "llvm/MC/MCContext.h" |
| 56 | #include "llvm/MC/MCExpr.h" |
| 57 | #include "llvm/MC/MCSymbol.h" |
| 58 | #include "llvm/Support/CommandLine.h" |
| 59 | #include "llvm/Support/Debug.h" |
| 60 | #include "llvm/Support/ErrorHandling.h" |
| 61 | #include "llvm/Support/KnownBits.h" |
| 62 | #include "llvm/Support/MathExtras.h" |
| 63 | #include "llvm/Target/TargetOptions.h" |
| 64 | #include <algorithm> |
| 65 | #include <bitset> |
| 66 | #include <cctype> |
| 67 | #include <numeric> |
| 68 | using namespace llvm; |
| 69 | |
| 70 | #define DEBUG_TYPE "x86-isel" |
| 71 | |
| 72 | STATISTIC(NumTailCalls, "Number of tail calls"); |
| 73 | |
| 74 | static cl::opt<int> ExperimentalPrefLoopAlignment( |
| 75 | "x86-experimental-pref-loop-alignment", cl::init(4), |
| 76 | cl::desc( |
| 77 | "Sets the preferable loop alignment for experiments (as log2 bytes)" |
| 78 | "(the last x86-experimental-pref-loop-alignment bits" |
| 79 | " of the loop header PC will be 0)."), |
| 80 | cl::Hidden); |
| 81 | |
| 82 | static cl::opt<int> ExperimentalPrefInnermostLoopAlignment( |
| 83 | "x86-experimental-pref-innermost-loop-alignment", cl::init(4), |
| 84 | cl::desc( |
| 85 | "Sets the preferable loop alignment for experiments (as log2 bytes) " |
| 86 | "for innermost loops only. If specified, this option overrides " |
| 87 | "alignment set by x86-experimental-pref-loop-alignment."), |
| 88 | cl::Hidden); |
| 89 | |
| 90 | static cl::opt<bool> MulConstantOptimization( |
| 91 | "mul-constant-optimization", cl::init(true), |
| 92 | cl::desc("Replace 'mul x, Const' with more effective instructions like " |
| 93 | "SHIFT, LEA, etc."), |
| 94 | cl::Hidden); |
| 95 | |
| 96 | static cl::opt<bool> ExperimentalUnorderedISEL( |
| 97 | "x86-experimental-unordered-atomic-isel", cl::init(false), |
| 98 | cl::desc("Use LoadSDNode and StoreSDNode instead of " |
| 99 | "AtomicSDNode for unordered atomic loads and " |
| 100 | "stores respectively."), |
| 101 | cl::Hidden); |
| 102 | |
| 103 | |
| 104 | |
| 105 | |
| 106 | |
| 107 | static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, |
| 108 | const char *Msg) { |
| 109 | MachineFunction &MF = DAG.getMachineFunction(); |
| 110 | DAG.getContext()->diagnose( |
| 111 | DiagnosticInfoUnsupported(MF.getFunction(), Msg, dl.getDebugLoc())); |
| 112 | } |
| 113 | |
| 114 | X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, |
| 115 | const X86Subtarget &STI) |
| 116 | : TargetLowering(TM), Subtarget(STI) { |
| 117 | bool UseX87 = !Subtarget.useSoftFloat() && Subtarget.hasX87(); |
| 118 | X86ScalarSSEf64 = Subtarget.hasSSE2(); |
| 119 | X86ScalarSSEf32 = Subtarget.hasSSE1(); |
| 120 | MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0)); |
| 121 | |
| 122 | |
| 123 | |
| 124 | |
| 125 | setBooleanContents(ZeroOrOneBooleanContent); |
| 126 | |
| 127 | setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); |
| 128 | |
| 129 | |
| 130 | |
| 131 | |
| 132 | if (Subtarget.isAtom()) |
| 133 | setSchedulingPreference(Sched::ILP); |
| 134 | else if (Subtarget.is64Bit()) |
| 135 | setSchedulingPreference(Sched::ILP); |
| 136 | else |
| 137 | setSchedulingPreference(Sched::RegPressure); |
| 138 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 139 | setStackPointerRegisterToSaveRestore(RegInfo->getStackRegister()); |
| 140 | |
| 141 | |
| 142 | if (TM.getOptLevel() >= CodeGenOpt::Default) { |
| 143 | if (Subtarget.hasSlowDivide32()) |
| 144 | addBypassSlowDiv(32, 8); |
| 145 | if (Subtarget.hasSlowDivide64() && Subtarget.is64Bit()) |
| 146 | addBypassSlowDiv(64, 32); |
| 147 | } |
| 148 | |
| 149 | |
| 150 | if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { |
| 151 | static const struct { |
| 152 | const RTLIB::Libcall Op; |
| 153 | const char * const Name; |
| 154 | const CallingConv::ID CC; |
| 155 | } LibraryCalls[] = { |
| 156 | { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall }, |
| 157 | { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall }, |
| 158 | { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall }, |
| 159 | { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall }, |
| 160 | { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall }, |
| 161 | }; |
| 162 | |
| 163 | for (const auto &LC : LibraryCalls) { |
| 164 | setLibcallName(LC.Op, LC.Name); |
| 165 | setLibcallCallingConv(LC.Op, LC.CC); |
| 166 | } |
| 167 | } |
| 168 | |
| 169 | if (Subtarget.getTargetTriple().isOSMSVCRT()) { |
| 170 | |
| 171 | setLibcallName(RTLIB::POWI_F32, nullptr); |
| 172 | setLibcallName(RTLIB::POWI_F64, nullptr); |
| 173 | } |
| 174 | |
| 175 | |
| 176 | |
| 177 | |
| 178 | |
| 179 | if (!Subtarget.hasCmpxchg8b()) |
| 180 | setMaxAtomicSizeInBitsSupported(32); |
| 181 | |
| 182 | |
| 183 | addRegisterClass(MVT::i8, &X86::GR8RegClass); |
| 184 | addRegisterClass(MVT::i16, &X86::GR16RegClass); |
| 185 | addRegisterClass(MVT::i32, &X86::GR32RegClass); |
| 186 | if (Subtarget.is64Bit()) |
| 187 | addRegisterClass(MVT::i64, &X86::GR64RegClass); |
| 188 | |
| 189 | for (MVT VT : MVT::integer_valuetypes()) |
| 190 | setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i1, Promote); |
| 191 | |
| 192 | |
| 193 | setTruncStoreAction(MVT::i64, MVT::i32, Expand); |
| 194 | setTruncStoreAction(MVT::i64, MVT::i16, Expand); |
| 195 | setTruncStoreAction(MVT::i64, MVT::i8 , Expand); |
| 196 | setTruncStoreAction(MVT::i32, MVT::i16, Expand); |
| 197 | setTruncStoreAction(MVT::i32, MVT::i8 , Expand); |
| 198 | setTruncStoreAction(MVT::i16, MVT::i8, Expand); |
| 199 | |
| 200 | setTruncStoreAction(MVT::f64, MVT::f32, Expand); |
| 201 | |
| 202 | |
| 203 | for (auto VT : {MVT::f32, MVT::f64, MVT::f80}) { |
| 204 | setCondCodeAction(ISD::SETOEQ, VT, Expand); |
| 205 | setCondCodeAction(ISD::SETUNE, VT, Expand); |
| 206 | } |
| 207 | |
| 208 | |
| 209 | if (Subtarget.hasCMov()) { |
| 210 | setOperationAction(ISD::ABS , MVT::i16 , Custom); |
| 211 | setOperationAction(ISD::ABS , MVT::i32 , Custom); |
| 212 | if (Subtarget.is64Bit()) |
| 213 | setOperationAction(ISD::ABS , MVT::i64 , Custom); |
| 214 | } |
| 215 | |
| 216 | |
| 217 | for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) { |
| 218 | |
| 219 | LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal; |
| 220 | |
| 221 | setOperationAction(ShiftOp , MVT::i8 , Custom); |
| 222 | setOperationAction(ShiftOp , MVT::i16 , Custom); |
| 223 | setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction); |
| 224 | if (Subtarget.is64Bit()) |
| 225 | setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction); |
| 226 | } |
| 227 | |
| 228 | if (!Subtarget.useSoftFloat()) { |
| 229 | |
| 230 | |
| 231 | setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote); |
| 232 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote); |
| 233 | setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote); |
| 234 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote); |
| 235 | |
| 236 | |
| 237 | setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom); |
| 238 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom); |
| 239 | |
| 240 | |
| 241 | setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom); |
| 242 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom); |
| 243 | |
| 244 | |
| 245 | |
| 246 | setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote); |
| 247 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote); |
| 248 | |
| 249 | |
| 250 | setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom); |
| 251 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom); |
| 252 | |
| 253 | setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom); |
| 254 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom); |
| 255 | |
| 256 | |
| 257 | setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom); |
| 258 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom); |
| 259 | |
| 260 | |
| 261 | |
| 262 | setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote); |
| 263 | |
| 264 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote); |
| 265 | setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom); |
| 266 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom); |
| 267 | setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom); |
| 268 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom); |
| 269 | |
| 270 | |
| 271 | setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom); |
| 272 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom); |
| 273 | |
| 274 | |
| 275 | |
| 276 | setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote); |
| 277 | |
| 278 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote); |
| 279 | setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote); |
| 280 | |
| 281 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote); |
| 282 | setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom); |
| 283 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom); |
| 284 | setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom); |
| 285 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom); |
| 286 | |
| 287 | setOperationAction(ISD::LRINT, MVT::f32, Custom); |
| 288 | setOperationAction(ISD::LRINT, MVT::f64, Custom); |
| 289 | setOperationAction(ISD::LLRINT, MVT::f32, Custom); |
| 290 | setOperationAction(ISD::LLRINT, MVT::f64, Custom); |
| 291 | |
| 292 | if (!Subtarget.is64Bit()) { |
| 293 | setOperationAction(ISD::LRINT, MVT::i64, Custom); |
| 294 | setOperationAction(ISD::LLRINT, MVT::i64, Custom); |
| 295 | } |
| 296 | } |
| 297 | |
| 298 | if (Subtarget.hasSSE2()) { |
| 299 | |
| 300 | |
| 301 | for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) { |
| 302 | setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom); |
| 303 | setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom); |
| 304 | } |
| 305 | if (Subtarget.is64Bit()) { |
| 306 | setOperationAction(ISD::FP_TO_UINT_SAT, MVT::i64, Custom); |
| 307 | setOperationAction(ISD::FP_TO_SINT_SAT, MVT::i64, Custom); |
| 308 | } |
| 309 | } |
| 310 | |
| 311 | |
| 312 | setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom); |
| 313 | setOperationAction(ISD::ADDRSPACECAST, MVT::i64, Custom); |
| 314 | |
| 315 | |
| 316 | if (!X86ScalarSSEf64) { |
| 317 | setOperationAction(ISD::BITCAST , MVT::f32 , Expand); |
| 318 | setOperationAction(ISD::BITCAST , MVT::i32 , Expand); |
| 319 | if (Subtarget.is64Bit()) { |
| 320 | setOperationAction(ISD::BITCAST , MVT::f64 , Expand); |
| 321 | |
| 322 | setOperationAction(ISD::BITCAST , MVT::i64 , Expand); |
| 323 | } |
| 324 | } else if (!Subtarget.is64Bit()) |
| 325 | setOperationAction(ISD::BITCAST , MVT::i64 , Custom); |
| 326 | |
| 327 | |
| 328 | |
| 329 | |
| 330 | |
| 331 | |
| 332 | |
| 333 | |
| 334 | |
| 335 | |
| 336 | |
| 337 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| 338 | setOperationAction(ISD::MULHS, VT, Expand); |
| 339 | setOperationAction(ISD::MULHU, VT, Expand); |
| 340 | setOperationAction(ISD::SDIV, VT, Expand); |
| 341 | setOperationAction(ISD::UDIV, VT, Expand); |
| 342 | setOperationAction(ISD::SREM, VT, Expand); |
| 343 | setOperationAction(ISD::UREM, VT, Expand); |
| 344 | } |
| 345 | |
| 346 | setOperationAction(ISD::BR_JT , MVT::Other, Expand); |
| 347 | setOperationAction(ISD::BRCOND , MVT::Other, Custom); |
| 348 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128, |
| 349 | MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| 350 | setOperationAction(ISD::BR_CC, VT, Expand); |
| 351 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
| 352 | } |
| 353 | if (Subtarget.is64Bit()) |
| 354 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal); |
| 355 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal); |
| 356 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal); |
| 357 | setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand); |
| 358 | |
| 359 | setOperationAction(ISD::FREM , MVT::f32 , Expand); |
| 360 | setOperationAction(ISD::FREM , MVT::f64 , Expand); |
| 361 | setOperationAction(ISD::FREM , MVT::f80 , Expand); |
| 362 | setOperationAction(ISD::FREM , MVT::f128 , Expand); |
| 363 | |
| 364 | if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) { |
| 365 | setOperationAction(ISD::FLT_ROUNDS_ , MVT::i32 , Custom); |
| 366 | setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom); |
| 367 | } |
| 368 | |
| 369 | |
| 370 | |
| 371 | setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32); |
| 372 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
| 373 | |
| 374 | if (Subtarget.hasBMI()) { |
| 375 | |
| 376 | |
| 377 | setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32); |
| 378 | } else { |
| 379 | setOperationAction(ISD::CTTZ, MVT::i16, Custom); |
| 380 | setOperationAction(ISD::CTTZ , MVT::i32 , Custom); |
| 381 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , Legal); |
| 382 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal); |
| 383 | if (Subtarget.is64Bit()) { |
| 384 | setOperationAction(ISD::CTTZ , MVT::i64 , Custom); |
| 385 | setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal); |
| 386 | } |
| 387 | } |
| 388 | |
| 389 | if (Subtarget.hasLZCNT()) { |
| 390 | |
| 391 | |
| 392 | setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32); |
| 393 | setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32); |
| 394 | } else { |
| 395 | for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) { |
| 396 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| 397 | continue; |
| 398 | setOperationAction(ISD::CTLZ , VT, Custom); |
| 399 | setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); |
| 400 | } |
| 401 | } |
| 402 | |
| 403 | for (auto Op : {ISD::FP16_TO_FP, ISD::STRICT_FP16_TO_FP, ISD::FP_TO_FP16, |
| 404 | ISD::STRICT_FP_TO_FP16}) { |
| 405 | |
| 406 | |
| 407 | |
| 408 | setOperationAction( |
| 409 | Op, MVT::f32, |
| 410 | (!Subtarget.useSoftFloat() && Subtarget.hasF16C()) ? Custom : Expand); |
| 411 | |
| 412 | setOperationAction(Op, MVT::f64, Expand); |
| 413 | setOperationAction(Op, MVT::f80, Expand); |
| 414 | setOperationAction(Op, MVT::f128, Expand); |
| 415 | } |
| 416 | |
| 417 | setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand); |
| 418 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand); |
| 419 | setLoadExtAction(ISD::EXTLOAD, MVT::f80, MVT::f16, Expand); |
| 420 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f16, Expand); |
| 421 | setTruncStoreAction(MVT::f32, MVT::f16, Expand); |
| 422 | setTruncStoreAction(MVT::f64, MVT::f16, Expand); |
| 423 | setTruncStoreAction(MVT::f80, MVT::f16, Expand); |
| 424 | setTruncStoreAction(MVT::f128, MVT::f16, Expand); |
| 425 | |
| 426 | setOperationAction(ISD::PARITY, MVT::i8, Custom); |
| 427 | if (Subtarget.hasPOPCNT()) { |
| 428 | setOperationPromotedToType(ISD::CTPOP, MVT::i8, MVT::i32); |
| 429 | } else { |
| 430 | setOperationAction(ISD::CTPOP , MVT::i8 , Expand); |
| 431 | setOperationAction(ISD::CTPOP , MVT::i16 , Expand); |
| 432 | setOperationAction(ISD::CTPOP , MVT::i32 , Expand); |
| 433 | if (Subtarget.is64Bit()) |
| 434 | setOperationAction(ISD::CTPOP , MVT::i64 , Expand); |
| 435 | else |
| 436 | setOperationAction(ISD::CTPOP , MVT::i64 , Custom); |
| 437 | |
| 438 | setOperationAction(ISD::PARITY, MVT::i16, Custom); |
| 439 | setOperationAction(ISD::PARITY, MVT::i32, Custom); |
| 440 | if (Subtarget.is64Bit()) |
| 441 | setOperationAction(ISD::PARITY, MVT::i64, Custom); |
| 442 | } |
| 443 | |
| 444 | setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom); |
| 445 | |
| 446 | if (!Subtarget.hasMOVBE()) |
| 447 | setOperationAction(ISD::BSWAP , MVT::i16 , Expand); |
| 448 | |
| 449 | |
| 450 | for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) { |
| 451 | setOperationAction(ISD::SELECT, VT, Custom); |
| 452 | setOperationAction(ISD::SETCC, VT, Custom); |
| 453 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
| 454 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
| 455 | } |
| 456 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| 457 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| 458 | continue; |
| 459 | setOperationAction(ISD::SELECT, VT, Custom); |
| 460 | setOperationAction(ISD::SETCC, VT, Custom); |
| 461 | } |
| 462 | |
| 463 | |
| 464 | setOperationAction(ISD::SELECT, MVT::x86mmx, Custom); |
| 465 | setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand); |
| 466 | |
| 467 | setOperationAction(ISD::EH_RETURN , MVT::Other, Custom); |
| 468 | |
| 469 | |
| 470 | setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom); |
| 471 | setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Custom); |
| 472 | setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); |
| 473 | if (TM.Options.ExceptionModel == ExceptionHandling::SjLj) |
| 474 | setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume"); |
| 475 | |
| 476 | |
| 477 | for (auto VT : { MVT::i32, MVT::i64 }) { |
| 478 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| 479 | continue; |
| 480 | setOperationAction(ISD::ConstantPool , VT, Custom); |
| 481 | setOperationAction(ISD::JumpTable , VT, Custom); |
| 482 | setOperationAction(ISD::GlobalAddress , VT, Custom); |
| 483 | setOperationAction(ISD::GlobalTLSAddress, VT, Custom); |
| 484 | setOperationAction(ISD::ExternalSymbol , VT, Custom); |
| 485 | setOperationAction(ISD::BlockAddress , VT, Custom); |
| 486 | } |
| 487 | |
| 488 | |
| 489 | for (auto VT : { MVT::i32, MVT::i64 }) { |
| 490 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| 491 | continue; |
| 492 | setOperationAction(ISD::SHL_PARTS, VT, Custom); |
| 493 | setOperationAction(ISD::SRA_PARTS, VT, Custom); |
| 494 | setOperationAction(ISD::SRL_PARTS, VT, Custom); |
| 495 | } |
| 496 | |
| 497 | if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) |
| 498 | setOperationAction(ISD::PREFETCH , MVT::Other, Legal); |
| 499 | |
| 500 | setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); |
| 501 | |
| 502 | |
| 503 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| 504 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Custom); |
| 505 | setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); |
| 506 | setOperationAction(ISD::ATOMIC_LOAD_ADD, VT, Custom); |
| 507 | setOperationAction(ISD::ATOMIC_LOAD_OR, VT, Custom); |
| 508 | setOperationAction(ISD::ATOMIC_LOAD_XOR, VT, Custom); |
| 509 | setOperationAction(ISD::ATOMIC_LOAD_AND, VT, Custom); |
| 510 | setOperationAction(ISD::ATOMIC_STORE, VT, Custom); |
| 511 | } |
| 512 | |
| 513 | if (!Subtarget.is64Bit()) |
| 514 | setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); |
| 515 | |
| 516 | if (Subtarget.hasCmpxchg16b()) { |
| 517 | setOperationAction(ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, MVT::i128, Custom); |
| 518 | } |
| 519 | |
| 520 | |
| 521 | if (!Subtarget.isTargetDarwin() && !Subtarget.isTargetELF() && |
| 522 | !Subtarget.isTargetCygMing() && !Subtarget.isTargetWin64() && |
| 523 | TM.Options.ExceptionModel != ExceptionHandling::SjLj) { |
| 524 | setOperationAction(ISD::EH_LABEL, MVT::Other, Expand); |
| 525 | } |
| 526 | |
| 527 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i32, Custom); |
| 528 | setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i64, Custom); |
| 529 | |
| 530 | setOperationAction(ISD::INIT_TRAMPOLINE, MVT::Other, Custom); |
| 531 | setOperationAction(ISD::ADJUST_TRAMPOLINE, MVT::Other, Custom); |
| 532 | |
| 533 | setOperationAction(ISD::TRAP, MVT::Other, Legal); |
| 534 | setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal); |
| 535 | setOperationAction(ISD::UBSANTRAP, MVT::Other, Legal); |
| 536 | |
| 537 | |
| 538 | setOperationAction(ISD::VASTART , MVT::Other, Custom); |
| 539 | setOperationAction(ISD::VAEND , MVT::Other, Expand); |
| 540 | bool Is64Bit = Subtarget.is64Bit(); |
| 541 | setOperationAction(ISD::VAARG, MVT::Other, Is64Bit ? Custom : Expand); |
| 542 | setOperationAction(ISD::VACOPY, MVT::Other, Is64Bit ? Custom : Expand); |
| 543 | |
| 544 | setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); |
| 545 | setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); |
| 546 | |
| 547 | setOperationAction(ISD::DYNAMIC_STACKALLOC, PtrVT, Custom); |
| 548 | |
| 549 | |
| 550 | setOperationAction(ISD::GC_TRANSITION_START, MVT::Other, Custom); |
| 551 | setOperationAction(ISD::GC_TRANSITION_END, MVT::Other, Custom); |
| 552 | |
| 553 | if (!Subtarget.useSoftFloat() && X86ScalarSSEf64) { |
| 554 | |
| 555 | |
| 556 | addRegisterClass(MVT::f32, Subtarget.hasAVX512() ? &X86::FR32XRegClass |
| 557 | : &X86::FR32RegClass); |
| 558 | addRegisterClass(MVT::f64, Subtarget.hasAVX512() ? &X86::FR64XRegClass |
| 559 | : &X86::FR64RegClass); |
| 560 | |
| 561 | |
| 562 | |
| 563 | |
| 564 | |
| 565 | setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); |
| 566 | |
| 567 | for (auto VT : { MVT::f32, MVT::f64 }) { |
| 568 | |
| 569 | setOperationAction(ISD::FABS, VT, Custom); |
| 570 | |
| 571 | |
| 572 | setOperationAction(ISD::FNEG, VT, Custom); |
| 573 | |
| 574 | |
| 575 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| 576 | |
| 577 | |
| 578 | setOperationAction(ISD::FADD, VT, Custom); |
| 579 | setOperationAction(ISD::FSUB, VT, Custom); |
| 580 | |
| 581 | |
| 582 | setOperationAction(ISD::FSIN , VT, Expand); |
| 583 | setOperationAction(ISD::FCOS , VT, Expand); |
| 584 | setOperationAction(ISD::FSINCOS, VT, Expand); |
| 585 | } |
| 586 | |
| 587 | |
| 588 | setOperationAction(ISD::FGETSIGN, MVT::i64, Custom); |
| 589 | setOperationAction(ISD::FGETSIGN, MVT::i32, Custom); |
| 590 | |
| 591 | } else if (!Subtarget.useSoftFloat() && X86ScalarSSEf32 && |
| 592 | (UseX87 || Is64Bit)) { |
| 593 | |
| 594 | |
| 595 | addRegisterClass(MVT::f32, &X86::FR32RegClass); |
| 596 | if (UseX87) |
| 597 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
| 598 | |
| 599 | |
| 600 | setOperationAction(ISD::FABS , MVT::f32, Custom); |
| 601 | |
| 602 | |
| 603 | setOperationAction(ISD::FNEG , MVT::f32, Custom); |
| 604 | |
| 605 | if (UseX87) |
| 606 | setOperationAction(ISD::UNDEF, MVT::f64, Expand); |
| 607 | |
| 608 | |
| 609 | if (UseX87) |
| 610 | setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand); |
| 611 | setOperationAction(ISD::FCOPYSIGN, MVT::f32, Custom); |
| 612 | |
| 613 | |
| 614 | setOperationAction(ISD::FSIN , MVT::f32, Expand); |
| 615 | setOperationAction(ISD::FCOS , MVT::f32, Expand); |
| 616 | setOperationAction(ISD::FSINCOS, MVT::f32, Expand); |
| 617 | |
| 618 | if (UseX87) { |
| 619 | |
| 620 | setOperationAction(ISD::FSIN, MVT::f64, Expand); |
| 621 | setOperationAction(ISD::FCOS, MVT::f64, Expand); |
| 622 | setOperationAction(ISD::FSINCOS, MVT::f64, Expand); |
| 623 | } |
| 624 | } else if (UseX87) { |
| 625 | |
| 626 | |
| 627 | addRegisterClass(MVT::f64, &X86::RFP64RegClass); |
| 628 | addRegisterClass(MVT::f32, &X86::RFP32RegClass); |
| 629 | |
| 630 | for (auto VT : { MVT::f32, MVT::f64 }) { |
| 631 | setOperationAction(ISD::UNDEF, VT, Expand); |
| 632 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
| 633 | |
| 634 | |
| 635 | setOperationAction(ISD::FSIN , VT, Expand); |
| 636 | setOperationAction(ISD::FCOS , VT, Expand); |
| 637 | setOperationAction(ISD::FSINCOS, VT, Expand); |
| 638 | } |
| 639 | } |
| 640 | |
| 641 | |
| 642 | if (isTypeLegal(MVT::f32)) { |
| 643 | if (UseX87 && (getRegClassFor(MVT::f32) == &X86::RFP32RegClass)) { |
| 644 | addLegalFPImmediate(APFloat(+0.0f)); |
| 645 | addLegalFPImmediate(APFloat(+1.0f)); |
| 646 | addLegalFPImmediate(APFloat(-0.0f)); |
| 647 | addLegalFPImmediate(APFloat(-1.0f)); |
| 648 | } else |
| 649 | addLegalFPImmediate(APFloat(+0.0f)); |
| 650 | } |
| 651 | |
| 652 | if (isTypeLegal(MVT::f64)) { |
| 653 | if (UseX87 && getRegClassFor(MVT::f64) == &X86::RFP64RegClass) { |
| 654 | addLegalFPImmediate(APFloat(+0.0)); |
| 655 | addLegalFPImmediate(APFloat(+1.0)); |
| 656 | addLegalFPImmediate(APFloat(-0.0)); |
| 657 | addLegalFPImmediate(APFloat(-1.0)); |
| 658 | } else |
| 659 | addLegalFPImmediate(APFloat(+0.0)); |
| 660 | } |
| 661 | |
| 662 | setOperationAction(ISD::STRICT_FADD, MVT::f32, Legal); |
| 663 | setOperationAction(ISD::STRICT_FADD, MVT::f64, Legal); |
| 664 | setOperationAction(ISD::STRICT_FSUB, MVT::f32, Legal); |
| 665 | setOperationAction(ISD::STRICT_FSUB, MVT::f64, Legal); |
| 666 | setOperationAction(ISD::STRICT_FMUL, MVT::f32, Legal); |
| 667 | setOperationAction(ISD::STRICT_FMUL, MVT::f64, Legal); |
| 668 | setOperationAction(ISD::STRICT_FDIV, MVT::f32, Legal); |
| 669 | setOperationAction(ISD::STRICT_FDIV, MVT::f64, Legal); |
| 670 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); |
| 671 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Legal); |
| 672 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Legal); |
| 673 | setOperationAction(ISD::STRICT_FSQRT, MVT::f32, Legal); |
| 674 | setOperationAction(ISD::STRICT_FSQRT, MVT::f64, Legal); |
| 675 | |
| 676 | |
| 677 | setOperationAction(ISD::FMA, MVT::f64, Expand); |
| 678 | setOperationAction(ISD::FMA, MVT::f32, Expand); |
| 679 | |
| 680 | |
| 681 | if (UseX87) { |
| 682 | addRegisterClass(MVT::f80, &X86::RFP80RegClass); |
| 683 | setOperationAction(ISD::UNDEF, MVT::f80, Expand); |
| 684 | setOperationAction(ISD::FCOPYSIGN, MVT::f80, Expand); |
| 685 | { |
| 686 | APFloat TmpFlt = APFloat::getZero(APFloat::x87DoubleExtended()); |
| 687 | addLegalFPImmediate(TmpFlt); |
| 688 | TmpFlt.changeSign(); |
| 689 | addLegalFPImmediate(TmpFlt); |
| 690 | |
| 691 | bool ignored; |
| 692 | APFloat TmpFlt2(+1.0); |
| 693 | TmpFlt2.convert(APFloat::x87DoubleExtended(), APFloat::rmNearestTiesToEven, |
| 694 | &ignored); |
| 695 | addLegalFPImmediate(TmpFlt2); |
| 696 | TmpFlt2.changeSign(); |
| 697 | addLegalFPImmediate(TmpFlt2); |
| 698 | } |
| 699 | |
| 700 | |
| 701 | setOperationAction(ISD::FSIN , MVT::f80, Expand); |
| 702 | setOperationAction(ISD::FCOS , MVT::f80, Expand); |
| 703 | setOperationAction(ISD::FSINCOS, MVT::f80, Expand); |
| 704 | |
| 705 | setOperationAction(ISD::FFLOOR, MVT::f80, Expand); |
| 706 | setOperationAction(ISD::FCEIL, MVT::f80, Expand); |
| 707 | setOperationAction(ISD::FTRUNC, MVT::f80, Expand); |
| 708 | setOperationAction(ISD::FRINT, MVT::f80, Expand); |
| 709 | setOperationAction(ISD::FNEARBYINT, MVT::f80, Expand); |
| 710 | setOperationAction(ISD::FMA, MVT::f80, Expand); |
| 711 | setOperationAction(ISD::LROUND, MVT::f80, Expand); |
| 712 | setOperationAction(ISD::LLROUND, MVT::f80, Expand); |
| 713 | setOperationAction(ISD::LRINT, MVT::f80, Custom); |
| 714 | setOperationAction(ISD::LLRINT, MVT::f80, Custom); |
| 715 | |
| 716 | |
| 717 | setOperationAction(ISD::STRICT_FADD , MVT::f80, Legal); |
| 718 | setOperationAction(ISD::STRICT_FSUB , MVT::f80, Legal); |
| 719 | setOperationAction(ISD::STRICT_FMUL , MVT::f80, Legal); |
| 720 | setOperationAction(ISD::STRICT_FDIV , MVT::f80, Legal); |
| 721 | setOperationAction(ISD::STRICT_FSQRT , MVT::f80, Legal); |
| 722 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f80, Legal); |
| 723 | |
| 724 | |
| 725 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Legal); |
| 726 | } |
| 727 | |
| 728 | |
| 729 | if (!Subtarget.useSoftFloat() && Subtarget.is64Bit() && Subtarget.hasSSE1()) { |
| 730 | addRegisterClass(MVT::f128, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 731 | : &X86::VR128RegClass); |
| 732 | |
| 733 | addLegalFPImmediate(APFloat::getZero(APFloat::IEEEquad())); |
| 734 | |
| 735 | setOperationAction(ISD::FADD, MVT::f128, LibCall); |
| 736 | setOperationAction(ISD::STRICT_FADD, MVT::f128, LibCall); |
| 737 | setOperationAction(ISD::FSUB, MVT::f128, LibCall); |
| 738 | setOperationAction(ISD::STRICT_FSUB, MVT::f128, LibCall); |
| 739 | setOperationAction(ISD::FDIV, MVT::f128, LibCall); |
| 740 | setOperationAction(ISD::STRICT_FDIV, MVT::f128, LibCall); |
| 741 | setOperationAction(ISD::FMUL, MVT::f128, LibCall); |
| 742 | setOperationAction(ISD::STRICT_FMUL, MVT::f128, LibCall); |
| 743 | setOperationAction(ISD::FMA, MVT::f128, LibCall); |
| 744 | setOperationAction(ISD::STRICT_FMA, MVT::f128, LibCall); |
| 745 | |
| 746 | setOperationAction(ISD::FABS, MVT::f128, Custom); |
| 747 | setOperationAction(ISD::FNEG, MVT::f128, Custom); |
| 748 | setOperationAction(ISD::FCOPYSIGN, MVT::f128, Custom); |
| 749 | |
| 750 | setOperationAction(ISD::FSIN, MVT::f128, LibCall); |
| 751 | setOperationAction(ISD::STRICT_FSIN, MVT::f128, LibCall); |
| 752 | setOperationAction(ISD::FCOS, MVT::f128, LibCall); |
| 753 | setOperationAction(ISD::STRICT_FCOS, MVT::f128, LibCall); |
| 754 | setOperationAction(ISD::FSINCOS, MVT::f128, LibCall); |
| 755 | |
| 756 | setOperationAction(ISD::FSQRT, MVT::f128, LibCall); |
| 757 | setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); |
| 758 | |
| 759 | setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); |
| 760 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); |
| 761 | |
| 762 | |
| 763 | |
| 764 | if (isTypeLegal(MVT::f32)) { |
| 765 | setOperationAction(ISD::FP_ROUND, MVT::f32, Custom); |
| 766 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f32, Custom); |
| 767 | } |
| 768 | if (isTypeLegal(MVT::f64)) { |
| 769 | setOperationAction(ISD::FP_ROUND, MVT::f64, Custom); |
| 770 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f64, Custom); |
| 771 | } |
| 772 | if (isTypeLegal(MVT::f80)) { |
| 773 | setOperationAction(ISD::FP_ROUND, MVT::f80, Custom); |
| 774 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::f80, Custom); |
| 775 | } |
| 776 | |
| 777 | setOperationAction(ISD::SETCC, MVT::f128, Custom); |
| 778 | |
| 779 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f32, Expand); |
| 780 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f64, Expand); |
| 781 | setLoadExtAction(ISD::EXTLOAD, MVT::f128, MVT::f80, Expand); |
| 782 | setTruncStoreAction(MVT::f128, MVT::f32, Expand); |
| 783 | setTruncStoreAction(MVT::f128, MVT::f64, Expand); |
| 784 | setTruncStoreAction(MVT::f128, MVT::f80, Expand); |
| 785 | } |
| 786 | |
| 787 | |
| 788 | setOperationAction(ISD::FPOW , MVT::f32 , Expand); |
| 789 | setOperationAction(ISD::FPOW , MVT::f64 , Expand); |
| 790 | setOperationAction(ISD::FPOW , MVT::f80 , Expand); |
| 791 | setOperationAction(ISD::FPOW , MVT::f128 , Expand); |
| 792 | |
| 793 | setOperationAction(ISD::FLOG, MVT::f80, Expand); |
| 794 | setOperationAction(ISD::FLOG2, MVT::f80, Expand); |
| 795 | setOperationAction(ISD::FLOG10, MVT::f80, Expand); |
| 796 | setOperationAction(ISD::FEXP, MVT::f80, Expand); |
| 797 | setOperationAction(ISD::FEXP2, MVT::f80, Expand); |
| 798 | setOperationAction(ISD::FMINNUM, MVT::f80, Expand); |
| 799 | setOperationAction(ISD::FMAXNUM, MVT::f80, Expand); |
| 800 | |
| 801 | |
| 802 | for (auto VT : { MVT::v4f32, MVT::v8f32, MVT::v16f32, |
| 803 | MVT::v2f64, MVT::v4f64, MVT::v8f64 }) { |
| 804 | setOperationAction(ISD::FSIN, VT, Expand); |
| 805 | setOperationAction(ISD::FSINCOS, VT, Expand); |
| 806 | setOperationAction(ISD::FCOS, VT, Expand); |
| 807 | setOperationAction(ISD::FREM, VT, Expand); |
| 808 | setOperationAction(ISD::FCOPYSIGN, VT, Expand); |
| 809 | setOperationAction(ISD::FPOW, VT, Expand); |
| 810 | setOperationAction(ISD::FLOG, VT, Expand); |
| 811 | setOperationAction(ISD::FLOG2, VT, Expand); |
| 812 | setOperationAction(ISD::FLOG10, VT, Expand); |
| 813 | setOperationAction(ISD::FEXP, VT, Expand); |
| 814 | setOperationAction(ISD::FEXP2, VT, Expand); |
| 815 | } |
| 816 | |
| 817 | |
| 818 | |
| 819 | |
| 820 | for (MVT VT : MVT::fixedlen_vector_valuetypes()) { |
| 821 | setOperationAction(ISD::SDIV, VT, Expand); |
| 822 | setOperationAction(ISD::UDIV, VT, Expand); |
| 823 | setOperationAction(ISD::SREM, VT, Expand); |
| 824 | setOperationAction(ISD::UREM, VT, Expand); |
| 825 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT,Expand); |
| 826 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Expand); |
| 827 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT,Expand); |
| 828 | setOperationAction(ISD::INSERT_SUBVECTOR, VT,Expand); |
| 829 | setOperationAction(ISD::FMA, VT, Expand); |
| 830 | setOperationAction(ISD::FFLOOR, VT, Expand); |
| 831 | setOperationAction(ISD::FCEIL, VT, Expand); |
| 832 | setOperationAction(ISD::FTRUNC, VT, Expand); |
| 833 | setOperationAction(ISD::FRINT, VT, Expand); |
| 834 | setOperationAction(ISD::FNEARBYINT, VT, Expand); |
| 835 | setOperationAction(ISD::SMUL_LOHI, VT, Expand); |
| 836 | setOperationAction(ISD::MULHS, VT, Expand); |
| 837 | setOperationAction(ISD::UMUL_LOHI, VT, Expand); |
| 838 | setOperationAction(ISD::MULHU, VT, Expand); |
| 839 | setOperationAction(ISD::SDIVREM, VT, Expand); |
| 840 | setOperationAction(ISD::UDIVREM, VT, Expand); |
| 841 | setOperationAction(ISD::CTPOP, VT, Expand); |
| 842 | setOperationAction(ISD::CTTZ, VT, Expand); |
| 843 | setOperationAction(ISD::CTLZ, VT, Expand); |
| 844 | setOperationAction(ISD::ROTL, VT, Expand); |
| 845 | setOperationAction(ISD::ROTR, VT, Expand); |
| 846 | setOperationAction(ISD::BSWAP, VT, Expand); |
| 847 | setOperationAction(ISD::SETCC, VT, Expand); |
| 848 | setOperationAction(ISD::FP_TO_UINT, VT, Expand); |
| 849 | setOperationAction(ISD::FP_TO_SINT, VT, Expand); |
| 850 | setOperationAction(ISD::UINT_TO_FP, VT, Expand); |
| 851 | setOperationAction(ISD::SINT_TO_FP, VT, Expand); |
| 852 | setOperationAction(ISD::SIGN_EXTEND_INREG, VT,Expand); |
| 853 | setOperationAction(ISD::TRUNCATE, VT, Expand); |
| 854 | setOperationAction(ISD::SIGN_EXTEND, VT, Expand); |
| 855 | setOperationAction(ISD::ZERO_EXTEND, VT, Expand); |
| 856 | setOperationAction(ISD::ANY_EXTEND, VT, Expand); |
| 857 | setOperationAction(ISD::SELECT_CC, VT, Expand); |
| 858 | for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) { |
| 859 | setTruncStoreAction(InnerVT, VT, Expand); |
| 860 | |
| 861 | setLoadExtAction(ISD::SEXTLOAD, InnerVT, VT, Expand); |
| 862 | setLoadExtAction(ISD::ZEXTLOAD, InnerVT, VT, Expand); |
| 863 | |
| 864 | |
| 865 | |
| 866 | |
| 867 | |
| 868 | if (VT.getVectorElementType() == MVT::i1) |
| 869 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
| 870 | |
| 871 | |
| 872 | |
| 873 | if (VT.getVectorElementType() == MVT::f16) |
| 874 | setLoadExtAction(ISD::EXTLOAD, InnerVT, VT, Expand); |
| 875 | } |
| 876 | } |
| 877 | |
| 878 | |
| 879 | |
| 880 | if (!Subtarget.useSoftFloat() && Subtarget.hasMMX()) { |
| 881 | addRegisterClass(MVT::x86mmx, &X86::VR64RegClass); |
| 882 | |
| 883 | } |
| 884 | |
| 885 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE1()) { |
| 886 | addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 887 | : &X86::VR128RegClass); |
| 888 | |
| 889 | setOperationAction(ISD::FNEG, MVT::v4f32, Custom); |
| 890 | setOperationAction(ISD::FABS, MVT::v4f32, Custom); |
| 891 | setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom); |
| 892 | setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); |
| 893 | setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); |
| 894 | setOperationAction(ISD::VSELECT, MVT::v4f32, Custom); |
| 895 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); |
| 896 | setOperationAction(ISD::SELECT, MVT::v4f32, Custom); |
| 897 | |
| 898 | setOperationAction(ISD::LOAD, MVT::v2f32, Custom); |
| 899 | setOperationAction(ISD::STORE, MVT::v2f32, Custom); |
| 900 | |
| 901 | setOperationAction(ISD::STRICT_FADD, MVT::v4f32, Legal); |
| 902 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f32, Legal); |
| 903 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f32, Legal); |
| 904 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f32, Legal); |
| 905 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f32, Legal); |
| 906 | } |
| 907 | |
| 908 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE2()) { |
| 909 | addRegisterClass(MVT::v2f64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 910 | : &X86::VR128RegClass); |
| 911 | |
| 912 | |
| 913 | |
| 914 | addRegisterClass(MVT::v16i8, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 915 | : &X86::VR128RegClass); |
| 916 | addRegisterClass(MVT::v8i16, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 917 | : &X86::VR128RegClass); |
| 918 | addRegisterClass(MVT::v4i32, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 919 | : &X86::VR128RegClass); |
| 920 | addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass |
| 921 | : &X86::VR128RegClass); |
| 922 | |
| 923 | for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8, |
| 924 | MVT::v2i16, MVT::v4i16, MVT::v2i32 }) { |
| 925 | setOperationAction(ISD::SDIV, VT, Custom); |
| 926 | setOperationAction(ISD::SREM, VT, Custom); |
| 927 | setOperationAction(ISD::UDIV, VT, Custom); |
| 928 | setOperationAction(ISD::UREM, VT, Custom); |
| 929 | } |
| 930 | |
| 931 | setOperationAction(ISD::MUL, MVT::v2i8, Custom); |
| 932 | setOperationAction(ISD::MUL, MVT::v4i8, Custom); |
| 933 | setOperationAction(ISD::MUL, MVT::v8i8, Custom); |
| 934 | |
| 935 | setOperationAction(ISD::MUL, MVT::v16i8, Custom); |
| 936 | setOperationAction(ISD::MUL, MVT::v4i32, Custom); |
| 937 | setOperationAction(ISD::MUL, MVT::v2i64, Custom); |
| 938 | setOperationAction(ISD::MULHU, MVT::v4i32, Custom); |
| 939 | setOperationAction(ISD::MULHS, MVT::v4i32, Custom); |
| 940 | setOperationAction(ISD::MULHU, MVT::v16i8, Custom); |
| 941 | setOperationAction(ISD::MULHS, MVT::v16i8, Custom); |
| 942 | setOperationAction(ISD::MULHU, MVT::v8i16, Legal); |
| 943 | setOperationAction(ISD::MULHS, MVT::v8i16, Legal); |
| 944 | setOperationAction(ISD::MUL, MVT::v8i16, Legal); |
| 945 | |
| 946 | setOperationAction(ISD::SMULO, MVT::v16i8, Custom); |
| 947 | setOperationAction(ISD::UMULO, MVT::v16i8, Custom); |
| 948 | |
| 949 | setOperationAction(ISD::FNEG, MVT::v2f64, Custom); |
| 950 | setOperationAction(ISD::FABS, MVT::v2f64, Custom); |
| 951 | setOperationAction(ISD::FCOPYSIGN, MVT::v2f64, Custom); |
| 952 | |
| 953 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| 954 | setOperationAction(ISD::SMAX, VT, VT == MVT::v8i16 ? Legal : Custom); |
| 955 | setOperationAction(ISD::SMIN, VT, VT == MVT::v8i16 ? Legal : Custom); |
| 956 | setOperationAction(ISD::UMAX, VT, VT == MVT::v16i8 ? Legal : Custom); |
| 957 | setOperationAction(ISD::UMIN, VT, VT == MVT::v16i8 ? Legal : Custom); |
| 958 | } |
| 959 | |
| 960 | setOperationAction(ISD::UADDSAT, MVT::v16i8, Legal); |
| 961 | setOperationAction(ISD::SADDSAT, MVT::v16i8, Legal); |
| 962 | setOperationAction(ISD::USUBSAT, MVT::v16i8, Legal); |
| 963 | setOperationAction(ISD::SSUBSAT, MVT::v16i8, Legal); |
| 964 | setOperationAction(ISD::UADDSAT, MVT::v8i16, Legal); |
| 965 | setOperationAction(ISD::SADDSAT, MVT::v8i16, Legal); |
| 966 | setOperationAction(ISD::USUBSAT, MVT::v8i16, Legal); |
| 967 | setOperationAction(ISD::SSUBSAT, MVT::v8i16, Legal); |
| 968 | setOperationAction(ISD::USUBSAT, MVT::v4i32, Custom); |
| 969 | setOperationAction(ISD::USUBSAT, MVT::v2i64, Custom); |
| 970 | |
| 971 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i16, Custom); |
| 972 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i32, Custom); |
| 973 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); |
| 974 | |
| 975 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| 976 | setOperationAction(ISD::SETCC, VT, Custom); |
| 977 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
| 978 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
| 979 | setOperationAction(ISD::CTPOP, VT, Custom); |
| 980 | setOperationAction(ISD::ABS, VT, Custom); |
| 981 | |
| 982 | |
| 983 | |
| 984 | setCondCodeAction(ISD::SETLT, VT, Custom); |
| 985 | setCondCodeAction(ISD::SETLE, VT, Custom); |
| 986 | } |
| 987 | |
| 988 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) { |
| 989 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| 990 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| 991 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| 992 | setOperationAction(ISD::VSELECT, VT, Custom); |
| 993 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| 994 | } |
| 995 | |
| 996 | for (auto VT : { MVT::v2f64, MVT::v2i64 }) { |
| 997 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| 998 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| 999 | setOperationAction(ISD::VSELECT, VT, Custom); |
| 1000 | |
| 1001 | if (VT == MVT::v2i64 && !Subtarget.is64Bit()) |
| 1002 | continue; |
| 1003 | |
| 1004 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| 1005 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| 1006 | } |
| 1007 | |
| 1008 | |
| 1009 | setOperationAction(ISD::SELECT, MVT::v2f64, Custom); |
| 1010 | setOperationAction(ISD::SELECT, MVT::v2i64, Custom); |
| 1011 | setOperationAction(ISD::SELECT, MVT::v4i32, Custom); |
| 1012 | setOperationAction(ISD::SELECT, MVT::v8i16, Custom); |
| 1013 | setOperationAction(ISD::SELECT, MVT::v16i8, Custom); |
| 1014 | |
| 1015 | setOperationAction(ISD::FP_TO_SINT, MVT::v4i32, Legal); |
| 1016 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, Custom); |
| 1017 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i32, Custom); |
| 1018 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i32, Custom); |
| 1019 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v4i32, Legal); |
| 1020 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i32, Custom); |
| 1021 | |
| 1022 | |
| 1023 | for (auto VT : {MVT::v2i8, MVT::v4i8, MVT::v8i8, MVT::v2i16, MVT::v4i16}) { |
| 1024 | setOperationAction(ISD::FP_TO_SINT, VT, Custom); |
| 1025 | setOperationAction(ISD::FP_TO_UINT, VT, Custom); |
| 1026 | setOperationAction(ISD::STRICT_FP_TO_SINT, VT, Custom); |
| 1027 | setOperationAction(ISD::STRICT_FP_TO_UINT, VT, Custom); |
| 1028 | } |
| 1029 | |
| 1030 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i32, Legal); |
| 1031 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i32, Legal); |
| 1032 | setOperationAction(ISD::SINT_TO_FP, MVT::v2i32, Custom); |
| 1033 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2i32, Custom); |
| 1034 | |
| 1035 | setOperationAction(ISD::UINT_TO_FP, MVT::v2i32, Custom); |
| 1036 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2i32, Custom); |
| 1037 | |
| 1038 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, Custom); |
| 1039 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, Custom); |
| 1040 | |
| 1041 | |
| 1042 | setOperationAction(ISD::SINT_TO_FP, MVT::v2f32, Custom); |
| 1043 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v2f32, Custom); |
| 1044 | setOperationAction(ISD::UINT_TO_FP, MVT::v2f32, Custom); |
| 1045 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v2f32, Custom); |
| 1046 | |
| 1047 | setOperationAction(ISD::FP_EXTEND, MVT::v2f32, Custom); |
| 1048 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v2f32, Custom); |
| 1049 | setOperationAction(ISD::FP_ROUND, MVT::v2f32, Custom); |
| 1050 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v2f32, Custom); |
| 1051 | |
| 1052 | |
| 1053 | |
| 1054 | |
| 1055 | setOperationAction(ISD::LOAD, MVT::v2i32, Custom); |
| 1056 | setOperationAction(ISD::LOAD, MVT::v4i16, Custom); |
| 1057 | setOperationAction(ISD::LOAD, MVT::v8i8, Custom); |
| 1058 | setOperationAction(ISD::STORE, MVT::v2i32, Custom); |
| 1059 | setOperationAction(ISD::STORE, MVT::v4i16, Custom); |
| 1060 | setOperationAction(ISD::STORE, MVT::v8i8, Custom); |
| 1061 | |
| 1062 | setOperationAction(ISD::BITCAST, MVT::v2i32, Custom); |
| 1063 | setOperationAction(ISD::BITCAST, MVT::v4i16, Custom); |
| 1064 | setOperationAction(ISD::BITCAST, MVT::v8i8, Custom); |
| 1065 | if (!Subtarget.hasAVX512()) |
| 1066 | setOperationAction(ISD::BITCAST, MVT::v16i1, Custom); |
| 1067 | |
| 1068 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v2i64, Custom); |
| 1069 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v4i32, Custom); |
| 1070 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, MVT::v8i16, Custom); |
| 1071 | |
| 1072 | setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); |
| 1073 | |
| 1074 | setOperationAction(ISD::TRUNCATE, MVT::v2i8, Custom); |
| 1075 | setOperationAction(ISD::TRUNCATE, MVT::v2i16, Custom); |
| 1076 | setOperationAction(ISD::TRUNCATE, MVT::v2i32, Custom); |
| 1077 | setOperationAction(ISD::TRUNCATE, MVT::v4i8, Custom); |
| 1078 | setOperationAction(ISD::TRUNCATE, MVT::v4i16, Custom); |
| 1079 | setOperationAction(ISD::TRUNCATE, MVT::v8i8, Custom); |
| 1080 | |
| 1081 | |
| 1082 | |
| 1083 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| 1084 | setOperationAction(ISD::SRL, VT, Custom); |
| 1085 | setOperationAction(ISD::SHL, VT, Custom); |
| 1086 | setOperationAction(ISD::SRA, VT, Custom); |
| 1087 | } |
| 1088 | |
| 1089 | setOperationAction(ISD::ROTL, MVT::v4i32, Custom); |
| 1090 | setOperationAction(ISD::ROTL, MVT::v8i16, Custom); |
| 1091 | |
| 1092 | |
| 1093 | |
| 1094 | if (!Subtarget.useAVX512Regs() && |
| 1095 | !(Subtarget.hasBWI() && Subtarget.hasVLX())) |
| 1096 | setOperationAction(ISD::ROTL, MVT::v16i8, Custom); |
| 1097 | |
| 1098 | setOperationAction(ISD::STRICT_FSQRT, MVT::v2f64, Legal); |
| 1099 | setOperationAction(ISD::STRICT_FADD, MVT::v2f64, Legal); |
| 1100 | setOperationAction(ISD::STRICT_FSUB, MVT::v2f64, Legal); |
| 1101 | setOperationAction(ISD::STRICT_FMUL, MVT::v2f64, Legal); |
| 1102 | setOperationAction(ISD::STRICT_FDIV, MVT::v2f64, Legal); |
| 1103 | } |
| 1104 | |
| 1105 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSSE3()) { |
| 1106 | setOperationAction(ISD::ABS, MVT::v16i8, Legal); |
| 1107 | setOperationAction(ISD::ABS, MVT::v8i16, Legal); |
| 1108 | setOperationAction(ISD::ABS, MVT::v4i32, Legal); |
| 1109 | setOperationAction(ISD::BITREVERSE, MVT::v16i8, Custom); |
| 1110 | setOperationAction(ISD::CTLZ, MVT::v16i8, Custom); |
| 1111 | setOperationAction(ISD::CTLZ, MVT::v8i16, Custom); |
| 1112 | setOperationAction(ISD::CTLZ, MVT::v4i32, Custom); |
| 1113 | setOperationAction(ISD::CTLZ, MVT::v2i64, Custom); |
| 1114 | |
| 1115 | |
| 1116 | setOperationAction(ISD::ADD, MVT::i16, Custom); |
| 1117 | setOperationAction(ISD::ADD, MVT::i32, Custom); |
| 1118 | setOperationAction(ISD::SUB, MVT::i16, Custom); |
| 1119 | setOperationAction(ISD::SUB, MVT::i32, Custom); |
| 1120 | } |
| 1121 | |
| 1122 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE41()) { |
| 1123 | for (MVT RoundedTy : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64}) { |
| 1124 | setOperationAction(ISD::FFLOOR, RoundedTy, Legal); |
| 1125 | setOperationAction(ISD::STRICT_FFLOOR, RoundedTy, Legal); |
| 1126 | setOperationAction(ISD::FCEIL, RoundedTy, Legal); |
| 1127 | setOperationAction(ISD::STRICT_FCEIL, RoundedTy, Legal); |
| 1128 | setOperationAction(ISD::FTRUNC, RoundedTy, Legal); |
| 1129 | setOperationAction(ISD::STRICT_FTRUNC, RoundedTy, Legal); |
| 1130 | setOperationAction(ISD::FRINT, RoundedTy, Legal); |
| 1131 | setOperationAction(ISD::STRICT_FRINT, RoundedTy, Legal); |
| 1132 | setOperationAction(ISD::FNEARBYINT, RoundedTy, Legal); |
| 1133 | setOperationAction(ISD::STRICT_FNEARBYINT, RoundedTy, Legal); |
| 1134 | setOperationAction(ISD::FROUNDEVEN, RoundedTy, Legal); |
| 1135 | setOperationAction(ISD::STRICT_FROUNDEVEN, RoundedTy, Legal); |
| 1136 | |
| 1137 | setOperationAction(ISD::FROUND, RoundedTy, Custom); |
| 1138 | } |
| 1139 | |
| 1140 | setOperationAction(ISD::SMAX, MVT::v16i8, Legal); |
| 1141 | setOperationAction(ISD::SMAX, MVT::v4i32, Legal); |
| 1142 | setOperationAction(ISD::UMAX, MVT::v8i16, Legal); |
| 1143 | setOperationAction(ISD::UMAX, MVT::v4i32, Legal); |
| 1144 | setOperationAction(ISD::SMIN, MVT::v16i8, Legal); |
| 1145 | setOperationAction(ISD::SMIN, MVT::v4i32, Legal); |
| 1146 | setOperationAction(ISD::UMIN, MVT::v8i16, Legal); |
| 1147 | setOperationAction(ISD::UMIN, MVT::v4i32, Legal); |
| 1148 | |
| 1149 | setOperationAction(ISD::UADDSAT, MVT::v4i32, Custom); |
| 1150 | |
| 1151 | |
| 1152 | setOperationAction(ISD::MUL, MVT::v4i32, Legal); |
| 1153 | |
| 1154 | |
| 1155 | |
| 1156 | setOperationAction(ISD::VSELECT, MVT::v16i8, Legal); |
| 1157 | |
| 1158 | |
| 1159 | |
| 1160 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| 1161 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Legal); |
| 1162 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Legal); |
| 1163 | } |
| 1164 | |
| 1165 | |
| 1166 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
| 1167 | setLoadExtAction(LoadExtOp, MVT::v8i16, MVT::v8i8, Legal); |
| 1168 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i8, Legal); |
| 1169 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i8, Legal); |
| 1170 | setLoadExtAction(LoadExtOp, MVT::v4i32, MVT::v4i16, Legal); |
| 1171 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i16, Legal); |
| 1172 | setLoadExtAction(LoadExtOp, MVT::v2i64, MVT::v2i32, Legal); |
| 1173 | } |
| 1174 | |
| 1175 | |
| 1176 | |
| 1177 | setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i8, Custom); |
| 1178 | |
| 1179 | if (Subtarget.is64Bit() && !Subtarget.hasAVX512()) { |
| 1180 | |
| 1181 | |
| 1182 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i64, Custom); |
| 1183 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i64, Custom); |
| 1184 | |
| 1185 | |
| 1186 | setOperationAction(ISD::SINT_TO_FP, MVT::v4i64, Custom); |
| 1187 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v4i64, Custom); |
| 1188 | } |
| 1189 | } |
| 1190 | |
| 1191 | if (!Subtarget.useSoftFloat() && Subtarget.hasSSE42()) { |
| 1192 | setOperationAction(ISD::UADDSAT, MVT::v2i64, Custom); |
| 1193 | } |
| 1194 | |
| 1195 | if (!Subtarget.useSoftFloat() && Subtarget.hasXOP()) { |
| 1196 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| 1197 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
| 1198 | setOperationAction(ISD::ROTL, VT, Custom); |
| 1199 | |
| 1200 | |
| 1201 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) |
| 1202 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
| 1203 | |
| 1204 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| 1205 | MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) |
| 1206 | setOperationAction(ISD::BITREVERSE, VT, Custom); |
| 1207 | } |
| 1208 | |
| 1209 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX()) { |
| 1210 | bool HasInt256 = Subtarget.hasInt256(); |
| 1211 | |
| 1212 | addRegisterClass(MVT::v32i8, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| 1213 | : &X86::VR256RegClass); |
| 1214 | addRegisterClass(MVT::v16i16, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| 1215 | : &X86::VR256RegClass); |
| 1216 | addRegisterClass(MVT::v8i32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| 1217 | : &X86::VR256RegClass); |
| 1218 | addRegisterClass(MVT::v8f32, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| 1219 | : &X86::VR256RegClass); |
| 1220 | addRegisterClass(MVT::v4i64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| 1221 | : &X86::VR256RegClass); |
| 1222 | addRegisterClass(MVT::v4f64, Subtarget.hasVLX() ? &X86::VR256XRegClass |
| 1223 | : &X86::VR256RegClass); |
| 1224 | |
| 1225 | for (auto VT : { MVT::v8f32, MVT::v4f64 }) { |
| 1226 | setOperationAction(ISD::FFLOOR, VT, Legal); |
| 1227 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
| 1228 | setOperationAction(ISD::FCEIL, VT, Legal); |
| 1229 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
| 1230 | setOperationAction(ISD::FTRUNC, VT, Legal); |
| 1231 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
| 1232 | setOperationAction(ISD::FRINT, VT, Legal); |
| 1233 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
| 1234 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
| 1235 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
| 1236 | setOperationAction(ISD::FROUNDEVEN, VT, Legal); |
| 1237 | setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); |
| 1238 | |
| 1239 | setOperationAction(ISD::FROUND, VT, Custom); |
| 1240 | |
| 1241 | setOperationAction(ISD::FNEG, VT, Custom); |
| 1242 | setOperationAction(ISD::FABS, VT, Custom); |
| 1243 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| 1244 | } |
| 1245 | |
| 1246 | |
| 1247 | |
| 1248 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
| 1249 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
| 1250 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i16, MVT::v8i32); |
| 1251 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i16, MVT::v8i32); |
| 1252 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i32, Legal); |
| 1253 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, Custom); |
| 1254 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i32, Legal); |
| 1255 | |
| 1256 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i32, Legal); |
| 1257 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i32, Legal); |
| 1258 | |
| 1259 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v4f32, Legal); |
| 1260 | setOperationAction(ISD::STRICT_FADD, MVT::v8f32, Legal); |
| 1261 | setOperationAction(ISD::STRICT_FADD, MVT::v4f64, Legal); |
| 1262 | setOperationAction(ISD::STRICT_FSUB, MVT::v8f32, Legal); |
| 1263 | setOperationAction(ISD::STRICT_FSUB, MVT::v4f64, Legal); |
| 1264 | setOperationAction(ISD::STRICT_FMUL, MVT::v8f32, Legal); |
| 1265 | setOperationAction(ISD::STRICT_FMUL, MVT::v4f64, Legal); |
| 1266 | setOperationAction(ISD::STRICT_FDIV, MVT::v8f32, Legal); |
| 1267 | setOperationAction(ISD::STRICT_FDIV, MVT::v4f64, Legal); |
| 1268 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v4f64, Legal); |
| 1269 | setOperationAction(ISD::STRICT_FSQRT, MVT::v8f32, Legal); |
| 1270 | setOperationAction(ISD::STRICT_FSQRT, MVT::v4f64, Legal); |
| 1271 | |
| 1272 | if (!Subtarget.hasAVX512()) |
| 1273 | setOperationAction(ISD::BITCAST, MVT::v32i1, Custom); |
| 1274 | |
| 1275 | |
| 1276 | |
| 1277 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| 1278 | setOperationAction(ISD::SRL, VT, Custom); |
| 1279 | setOperationAction(ISD::SHL, VT, Custom); |
| 1280 | setOperationAction(ISD::SRA, VT, Custom); |
| 1281 | } |
| 1282 | |
| 1283 | |
| 1284 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
| 1285 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
| 1286 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
| 1287 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
| 1288 | |
| 1289 | setOperationAction(ISD::ROTL, MVT::v8i32, Custom); |
| 1290 | setOperationAction(ISD::ROTL, MVT::v16i16, Custom); |
| 1291 | |
| 1292 | |
| 1293 | if (!Subtarget.useBWIRegs()) |
| 1294 | setOperationAction(ISD::ROTL, MVT::v32i8, Custom); |
| 1295 | |
| 1296 | setOperationAction(ISD::SELECT, MVT::v4f64, Custom); |
| 1297 | setOperationAction(ISD::SELECT, MVT::v4i64, Custom); |
| 1298 | setOperationAction(ISD::SELECT, MVT::v8i32, Custom); |
| 1299 | setOperationAction(ISD::SELECT, MVT::v16i16, Custom); |
| 1300 | setOperationAction(ISD::SELECT, MVT::v32i8, Custom); |
| 1301 | setOperationAction(ISD::SELECT, MVT::v8f32, Custom); |
| 1302 | |
| 1303 | for (auto VT : { MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| 1304 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| 1305 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| 1306 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| 1307 | } |
| 1308 | |
| 1309 | setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); |
| 1310 | setOperationAction(ISD::TRUNCATE, MVT::v8i16, Custom); |
| 1311 | setOperationAction(ISD::TRUNCATE, MVT::v4i32, Custom); |
| 1312 | setOperationAction(ISD::BITREVERSE, MVT::v32i8, Custom); |
| 1313 | |
| 1314 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| 1315 | setOperationAction(ISD::SETCC, VT, Custom); |
| 1316 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
| 1317 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
| 1318 | setOperationAction(ISD::CTPOP, VT, Custom); |
| 1319 | setOperationAction(ISD::CTLZ, VT, Custom); |
| 1320 | |
| 1321 | |
| 1322 | |
| 1323 | setCondCodeAction(ISD::SETLT, VT, Custom); |
| 1324 | setCondCodeAction(ISD::SETLE, VT, Custom); |
| 1325 | } |
| 1326 | |
| 1327 | if (Subtarget.hasAnyFMA()) { |
| 1328 | for (auto VT : { MVT::f32, MVT::f64, MVT::v4f32, MVT::v8f32, |
| 1329 | MVT::v2f64, MVT::v4f64 }) { |
| 1330 | setOperationAction(ISD::FMA, VT, Legal); |
| 1331 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
| 1332 | } |
| 1333 | } |
| 1334 | |
| 1335 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64 }) { |
| 1336 | setOperationAction(ISD::ADD, VT, HasInt256 ? Legal : Custom); |
| 1337 | setOperationAction(ISD::SUB, VT, HasInt256 ? Legal : Custom); |
| 1338 | } |
| 1339 | |
| 1340 | setOperationAction(ISD::MUL, MVT::v4i64, Custom); |
| 1341 | setOperationAction(ISD::MUL, MVT::v8i32, HasInt256 ? Legal : Custom); |
| 1342 | setOperationAction(ISD::MUL, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1343 | setOperationAction(ISD::MUL, MVT::v32i8, Custom); |
| 1344 | |
| 1345 | setOperationAction(ISD::MULHU, MVT::v8i32, Custom); |
| 1346 | setOperationAction(ISD::MULHS, MVT::v8i32, Custom); |
| 1347 | setOperationAction(ISD::MULHU, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1348 | setOperationAction(ISD::MULHS, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1349 | setOperationAction(ISD::MULHU, MVT::v32i8, Custom); |
| 1350 | setOperationAction(ISD::MULHS, MVT::v32i8, Custom); |
| 1351 | |
| 1352 | setOperationAction(ISD::SMULO, MVT::v32i8, Custom); |
| 1353 | setOperationAction(ISD::UMULO, MVT::v32i8, Custom); |
| 1354 | |
| 1355 | setOperationAction(ISD::ABS, MVT::v4i64, Custom); |
| 1356 | setOperationAction(ISD::SMAX, MVT::v4i64, Custom); |
| 1357 | setOperationAction(ISD::UMAX, MVT::v4i64, Custom); |
| 1358 | setOperationAction(ISD::SMIN, MVT::v4i64, Custom); |
| 1359 | setOperationAction(ISD::UMIN, MVT::v4i64, Custom); |
| 1360 | |
| 1361 | setOperationAction(ISD::UADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| 1362 | setOperationAction(ISD::SADDSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| 1363 | setOperationAction(ISD::USUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| 1364 | setOperationAction(ISD::SSUBSAT, MVT::v32i8, HasInt256 ? Legal : Custom); |
| 1365 | setOperationAction(ISD::UADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1366 | setOperationAction(ISD::SADDSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1367 | setOperationAction(ISD::USUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1368 | setOperationAction(ISD::SSUBSAT, MVT::v16i16, HasInt256 ? Legal : Custom); |
| 1369 | setOperationAction(ISD::UADDSAT, MVT::v8i32, Custom); |
| 1370 | setOperationAction(ISD::USUBSAT, MVT::v8i32, Custom); |
| 1371 | setOperationAction(ISD::UADDSAT, MVT::v4i64, Custom); |
| 1372 | setOperationAction(ISD::USUBSAT, MVT::v4i64, Custom); |
| 1373 | |
| 1374 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32 }) { |
| 1375 | setOperationAction(ISD::ABS, VT, HasInt256 ? Legal : Custom); |
| 1376 | setOperationAction(ISD::SMAX, VT, HasInt256 ? Legal : Custom); |
| 1377 | setOperationAction(ISD::UMAX, VT, HasInt256 ? Legal : Custom); |
| 1378 | setOperationAction(ISD::SMIN, VT, HasInt256 ? Legal : Custom); |
| 1379 | setOperationAction(ISD::UMIN, VT, HasInt256 ? Legal : Custom); |
| 1380 | } |
| 1381 | |
| 1382 | for (auto VT : {MVT::v16i16, MVT::v8i32, MVT::v4i64}) { |
| 1383 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
| 1384 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
| 1385 | } |
| 1386 | |
| 1387 | if (HasInt256) { |
| 1388 | |
| 1389 | |
| 1390 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, Custom); |
| 1391 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, Custom); |
| 1392 | |
| 1393 | |
| 1394 | for (auto LoadExtOp : { ISD::SEXTLOAD, ISD::ZEXTLOAD }) { |
| 1395 | setLoadExtAction(LoadExtOp, MVT::v16i16, MVT::v16i8, Legal); |
| 1396 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i8, Legal); |
| 1397 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i8, Legal); |
| 1398 | setLoadExtAction(LoadExtOp, MVT::v8i32, MVT::v8i16, Legal); |
| 1399 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i16, Legal); |
| 1400 | setLoadExtAction(LoadExtOp, MVT::v4i64, MVT::v4i32, Legal); |
| 1401 | } |
| 1402 | } |
| 1403 | |
| 1404 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| 1405 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { |
| 1406 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
| 1407 | setOperationAction(ISD::MSTORE, VT, Legal); |
| 1408 | } |
| 1409 | |
| 1410 | |
| 1411 | |
| 1412 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| 1413 | MVT::v4f32, MVT::v2f64 }) { |
| 1414 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
| 1415 | } |
| 1416 | |
| 1417 | |
| 1418 | for (MVT VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
| 1419 | MVT::v8f32, MVT::v4f64 }) { |
| 1420 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| 1421 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| 1422 | setOperationAction(ISD::VSELECT, VT, Custom); |
| 1423 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| 1424 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| 1425 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| 1426 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
| 1427 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| 1428 | setOperationAction(ISD::STORE, VT, Custom); |
| 1429 | } |
| 1430 | |
| 1431 | if (HasInt256) { |
| 1432 | setOperationAction(ISD::VSELECT, MVT::v32i8, Legal); |
| 1433 | |
| 1434 | |
| 1435 | setOperationAction(ISD::MGATHER, MVT::v2f32, Custom); |
| 1436 | setOperationAction(ISD::MGATHER, MVT::v2i32, Custom); |
| 1437 | |
| 1438 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| 1439 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
| 1440 | setOperationAction(ISD::MGATHER, VT, Custom); |
| 1441 | } |
| 1442 | } |
| 1443 | |
| 1444 | |
| 1445 | |
| 1446 | |
| 1447 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
| 1448 | addRegisterClass(MVT::v1i1, &X86::VK1RegClass); |
| 1449 | addRegisterClass(MVT::v2i1, &X86::VK2RegClass); |
| 1450 | addRegisterClass(MVT::v4i1, &X86::VK4RegClass); |
| 1451 | addRegisterClass(MVT::v8i1, &X86::VK8RegClass); |
| 1452 | addRegisterClass(MVT::v16i1, &X86::VK16RegClass); |
| 1453 | |
| 1454 | setOperationAction(ISD::SELECT, MVT::v1i1, Custom); |
| 1455 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v1i1, Custom); |
| 1456 | setOperationAction(ISD::BUILD_VECTOR, MVT::v1i1, Custom); |
| 1457 | |
| 1458 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
| 1459 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
| 1460 | setOperationPromotedToType(ISD::FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
| 1461 | setOperationPromotedToType(ISD::FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
| 1462 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v8i1, MVT::v8i32); |
| 1463 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v8i1, MVT::v8i32); |
| 1464 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, MVT::v4i1, MVT::v4i32); |
| 1465 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, MVT::v4i1, MVT::v4i32); |
| 1466 | setOperationAction(ISD::FP_TO_SINT, MVT::v2i1, Custom); |
| 1467 | setOperationAction(ISD::FP_TO_UINT, MVT::v2i1, Custom); |
| 1468 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2i1, Custom); |
| 1469 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i1, Custom); |
| 1470 | |
| 1471 | |
| 1472 | if (!Subtarget.hasDQI()) { |
| 1473 | setOperationAction(ISD::LOAD, MVT::v1i1, Custom); |
| 1474 | setOperationAction(ISD::LOAD, MVT::v2i1, Custom); |
| 1475 | setOperationAction(ISD::LOAD, MVT::v4i1, Custom); |
| 1476 | setOperationAction(ISD::LOAD, MVT::v8i1, Custom); |
| 1477 | |
| 1478 | setOperationAction(ISD::STORE, MVT::v1i1, Custom); |
| 1479 | setOperationAction(ISD::STORE, MVT::v2i1, Custom); |
| 1480 | setOperationAction(ISD::STORE, MVT::v4i1, Custom); |
| 1481 | setOperationAction(ISD::STORE, MVT::v8i1, Custom); |
| 1482 | } |
| 1483 | |
| 1484 | |
| 1485 | for (auto VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64 }) { |
| 1486 | setOperationAction(ISD::SIGN_EXTEND, VT, Custom); |
| 1487 | setOperationAction(ISD::ZERO_EXTEND, VT, Custom); |
| 1488 | setOperationAction(ISD::ANY_EXTEND, VT, Custom); |
| 1489 | } |
| 1490 | |
| 1491 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) |
| 1492 | setOperationAction(ISD::VSELECT, VT, Expand); |
| 1493 | |
| 1494 | for (auto VT : { MVT::v2i1, MVT::v4i1, MVT::v8i1, MVT::v16i1 }) { |
| 1495 | setOperationAction(ISD::SETCC, VT, Custom); |
| 1496 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
| 1497 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
| 1498 | setOperationAction(ISD::SELECT, VT, Custom); |
| 1499 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
| 1500 | |
| 1501 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| 1502 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| 1503 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| 1504 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| 1505 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| 1506 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| 1507 | } |
| 1508 | |
| 1509 | for (auto VT : { MVT::v1i1, MVT::v2i1, MVT::v4i1, MVT::v8i1 }) |
| 1510 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| 1511 | } |
| 1512 | |
| 1513 | |
| 1514 | |
| 1515 | |
| 1516 | if (!Subtarget.useSoftFloat() && Subtarget.useAVX512Regs()) { |
| 1517 | bool HasBWI = Subtarget.hasBWI(); |
| 1518 | |
| 1519 | addRegisterClass(MVT::v16i32, &X86::VR512RegClass); |
| 1520 | addRegisterClass(MVT::v16f32, &X86::VR512RegClass); |
| 1521 | addRegisterClass(MVT::v8i64, &X86::VR512RegClass); |
| 1522 | addRegisterClass(MVT::v8f64, &X86::VR512RegClass); |
| 1523 | addRegisterClass(MVT::v32i16, &X86::VR512RegClass); |
| 1524 | addRegisterClass(MVT::v64i8, &X86::VR512RegClass); |
| 1525 | |
| 1526 | for (auto ExtType : {ISD::ZEXTLOAD, ISD::SEXTLOAD}) { |
| 1527 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i8, Legal); |
| 1528 | setLoadExtAction(ExtType, MVT::v16i32, MVT::v16i16, Legal); |
| 1529 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i8, Legal); |
| 1530 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i16, Legal); |
| 1531 | setLoadExtAction(ExtType, MVT::v8i64, MVT::v8i32, Legal); |
| 1532 | if (HasBWI) |
| 1533 | setLoadExtAction(ExtType, MVT::v32i16, MVT::v32i8, Legal); |
| 1534 | } |
| 1535 | |
| 1536 | for (MVT VT : { MVT::v16f32, MVT::v8f64 }) { |
| 1537 | setOperationAction(ISD::FNEG, VT, Custom); |
| 1538 | setOperationAction(ISD::FABS, VT, Custom); |
| 1539 | setOperationAction(ISD::FMA, VT, Legal); |
| 1540 | setOperationAction(ISD::STRICT_FMA, VT, Legal); |
| 1541 | setOperationAction(ISD::FCOPYSIGN, VT, Custom); |
| 1542 | } |
| 1543 | |
| 1544 | for (MVT VT : { MVT::v16i1, MVT::v16i8, MVT::v16i16 }) { |
| 1545 | setOperationPromotedToType(ISD::FP_TO_SINT , VT, MVT::v16i32); |
| 1546 | setOperationPromotedToType(ISD::FP_TO_UINT , VT, MVT::v16i32); |
| 1547 | setOperationPromotedToType(ISD::STRICT_FP_TO_SINT, VT, MVT::v16i32); |
| 1548 | setOperationPromotedToType(ISD::STRICT_FP_TO_UINT, VT, MVT::v16i32); |
| 1549 | } |
| 1550 | setOperationAction(ISD::FP_TO_SINT, MVT::v16i32, Legal); |
| 1551 | setOperationAction(ISD::FP_TO_UINT, MVT::v16i32, Legal); |
| 1552 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v16i32, Legal); |
| 1553 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v16i32, Legal); |
| 1554 | setOperationAction(ISD::SINT_TO_FP, MVT::v16i32, Legal); |
| 1555 | setOperationAction(ISD::UINT_TO_FP, MVT::v16i32, Legal); |
| 1556 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v16i32, Legal); |
| 1557 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v16i32, Legal); |
| 1558 | |
| 1559 | setOperationAction(ISD::STRICT_FADD, MVT::v16f32, Legal); |
| 1560 | setOperationAction(ISD::STRICT_FADD, MVT::v8f64, Legal); |
| 1561 | setOperationAction(ISD::STRICT_FSUB, MVT::v16f32, Legal); |
| 1562 | setOperationAction(ISD::STRICT_FSUB, MVT::v8f64, Legal); |
| 1563 | setOperationAction(ISD::STRICT_FMUL, MVT::v16f32, Legal); |
| 1564 | setOperationAction(ISD::STRICT_FMUL, MVT::v8f64, Legal); |
| 1565 | setOperationAction(ISD::STRICT_FDIV, MVT::v16f32, Legal); |
| 1566 | setOperationAction(ISD::STRICT_FDIV, MVT::v8f64, Legal); |
| 1567 | setOperationAction(ISD::STRICT_FSQRT, MVT::v16f32, Legal); |
| 1568 | setOperationAction(ISD::STRICT_FSQRT, MVT::v8f64, Legal); |
| 1569 | setOperationAction(ISD::STRICT_FP_EXTEND, MVT::v8f64, Legal); |
| 1570 | setOperationAction(ISD::STRICT_FP_ROUND, MVT::v8f32, Legal); |
| 1571 | |
| 1572 | setTruncStoreAction(MVT::v8i64, MVT::v8i8, Legal); |
| 1573 | setTruncStoreAction(MVT::v8i64, MVT::v8i16, Legal); |
| 1574 | setTruncStoreAction(MVT::v8i64, MVT::v8i32, Legal); |
| 1575 | setTruncStoreAction(MVT::v16i32, MVT::v16i8, Legal); |
| 1576 | setTruncStoreAction(MVT::v16i32, MVT::v16i16, Legal); |
| 1577 | if (HasBWI) |
| 1578 | setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); |
| 1579 | |
| 1580 | |
| 1581 | |
| 1582 | |
| 1583 | if (!Subtarget.hasVLX()) { |
| 1584 | for (auto VT : {MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| 1585 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64}) { |
| 1586 | setOperationAction(ISD::MLOAD, VT, Custom); |
| 1587 | setOperationAction(ISD::MSTORE, VT, Custom); |
| 1588 | } |
| 1589 | } |
| 1590 | |
| 1591 | setOperationAction(ISD::TRUNCATE, MVT::v8i32, Legal); |
| 1592 | setOperationAction(ISD::TRUNCATE, MVT::v16i16, Legal); |
| 1593 | setOperationAction(ISD::TRUNCATE, MVT::v32i8, HasBWI ? Legal : Custom); |
| 1594 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
| 1595 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i16, Custom); |
| 1596 | setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); |
| 1597 | setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); |
| 1598 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i16, Custom); |
| 1599 | setOperationAction(ISD::ANY_EXTEND, MVT::v16i32, Custom); |
| 1600 | setOperationAction(ISD::ANY_EXTEND, MVT::v8i64, Custom); |
| 1601 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i16, Custom); |
| 1602 | setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); |
| 1603 | setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); |
| 1604 | |
| 1605 | if (HasBWI) { |
| 1606 | |
| 1607 | setOperationAction(ISD::SIGN_EXTEND, MVT::v64i8, Custom); |
| 1608 | setOperationAction(ISD::ZERO_EXTEND, MVT::v64i8, Custom); |
| 1609 | setOperationAction(ISD::ANY_EXTEND, MVT::v64i8, Custom); |
| 1610 | } |
| 1611 | |
| 1612 | for (auto VT : { MVT::v16f32, MVT::v8f64 }) { |
| 1613 | setOperationAction(ISD::FFLOOR, VT, Legal); |
| 1614 | setOperationAction(ISD::STRICT_FFLOOR, VT, Legal); |
| 1615 | setOperationAction(ISD::FCEIL, VT, Legal); |
| 1616 | setOperationAction(ISD::STRICT_FCEIL, VT, Legal); |
| 1617 | setOperationAction(ISD::FTRUNC, VT, Legal); |
| 1618 | setOperationAction(ISD::STRICT_FTRUNC, VT, Legal); |
| 1619 | setOperationAction(ISD::FRINT, VT, Legal); |
| 1620 | setOperationAction(ISD::STRICT_FRINT, VT, Legal); |
| 1621 | setOperationAction(ISD::FNEARBYINT, VT, Legal); |
| 1622 | setOperationAction(ISD::STRICT_FNEARBYINT, VT, Legal); |
| 1623 | setOperationAction(ISD::FROUNDEVEN, VT, Legal); |
| 1624 | setOperationAction(ISD::STRICT_FROUNDEVEN, VT, Legal); |
| 1625 | |
| 1626 | setOperationAction(ISD::FROUND, VT, Custom); |
| 1627 | } |
| 1628 | |
| 1629 | for (auto VT : {MVT::v32i16, MVT::v16i32, MVT::v8i64}) { |
| 1630 | setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); |
| 1631 | setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); |
| 1632 | } |
| 1633 | |
| 1634 | setOperationAction(ISD::ADD, MVT::v32i16, HasBWI ? Legal : Custom); |
| 1635 | setOperationAction(ISD::SUB, MVT::v32i16, HasBWI ? Legal : Custom); |
| 1636 | setOperationAction(ISD::ADD, MVT::v64i8, HasBWI ? Legal : Custom); |
| 1637 | setOperationAction(ISD::SUB, MVT::v64i8, HasBWI ? Legal : Custom); |
| 1638 | |
| 1639 | setOperationAction(ISD::MUL, MVT::v8i64, Custom); |
| 1640 | setOperationAction(ISD::MUL, MVT::v16i32, Legal); |
| 1641 | setOperationAction(ISD::MUL, MVT::v32i16, HasBWI ? Legal : Custom); |
| 1642 | setOperationAction(ISD::MUL, MVT::v64i8, Custom); |
| 1643 | |
| 1644 | setOperationAction(ISD::MULHU, MVT::v16i32, Custom); |
| 1645 | setOperationAction(ISD::MULHS, MVT::v16i32, Custom); |
| 1646 | setOperationAction(ISD::MULHS, MVT::v32i16, HasBWI ? Legal : Custom); |
| 1647 | setOperationAction(ISD::MULHU, MVT::v32i16, HasBWI ? Legal : Custom); |
| 1648 | setOperationAction(ISD::MULHS, MVT::v64i8, Custom); |
| 1649 | setOperationAction(ISD::MULHU, MVT::v64i8, Custom); |
| 1650 | |
| 1651 | setOperationAction(ISD::SMULO, MVT::v64i8, Custom); |
| 1652 | setOperationAction(ISD::UMULO, MVT::v64i8, Custom); |
| 1653 | |
| 1654 | setOperationAction(ISD::BITREVERSE, MVT::v64i8, Custom); |
| 1655 | |
| 1656 | for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { |
| 1657 | setOperationAction(ISD::SRL, VT, Custom); |
| 1658 | setOperationAction(ISD::SHL, VT, Custom); |
| 1659 | setOperationAction(ISD::SRA, VT, Custom); |
| 1660 | setOperationAction(ISD::SETCC, VT, Custom); |
| 1661 | |
| 1662 | |
| 1663 | |
| 1664 | setCondCodeAction(ISD::SETLT, VT, Custom); |
| 1665 | setCondCodeAction(ISD::SETLE, VT, Custom); |
| 1666 | } |
| 1667 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) { |
| 1668 | setOperationAction(ISD::SMAX, VT, Legal); |
| 1669 | setOperationAction(ISD::UMAX, VT, Legal); |
| 1670 | setOperationAction(ISD::SMIN, VT, Legal); |
| 1671 | setOperationAction(ISD::UMIN, VT, Legal); |
| 1672 | setOperationAction(ISD::ABS, VT, Legal); |
| 1673 | setOperationAction(ISD::CTPOP, VT, Custom); |
| 1674 | setOperationAction(ISD::ROTL, VT, Custom); |
| 1675 | setOperationAction(ISD::ROTR, VT, Custom); |
| 1676 | setOperationAction(ISD::STRICT_FSETCC, VT, Custom); |
| 1677 | setOperationAction(ISD::STRICT_FSETCCS, VT, Custom); |
| 1678 | } |
| 1679 | |
| 1680 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
| 1681 | setOperationAction(ISD::ABS, VT, HasBWI ? Legal : Custom); |
| 1682 | setOperationAction(ISD::CTPOP, VT, Subtarget.hasBITALG() ? Legal : Custom); |
| 1683 | setOperationAction(ISD::CTLZ, VT, Custom); |
| 1684 | setOperationAction(ISD::SMAX, VT, HasBWI ? Legal : Custom); |
| 1685 | setOperationAction(ISD::UMAX, VT, HasBWI ? Legal : Custom); |
| 1686 | setOperationAction(ISD::SMIN, VT, HasBWI ? Legal : Custom); |
| 1687 | setOperationAction(ISD::UMIN, VT, HasBWI ? Legal : Custom); |
| 1688 | setOperationAction(ISD::UADDSAT, VT, HasBWI ? Legal : Custom); |
| 1689 | setOperationAction(ISD::SADDSAT, VT, HasBWI ? Legal : Custom); |
| 1690 | setOperationAction(ISD::USUBSAT, VT, HasBWI ? Legal : Custom); |
| 1691 | setOperationAction(ISD::SSUBSAT, VT, HasBWI ? Legal : Custom); |
| 1692 | } |
| 1693 | |
| 1694 | if (Subtarget.hasDQI()) { |
| 1695 | setOperationAction(ISD::SINT_TO_FP, MVT::v8i64, Legal); |
| 1696 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i64, Legal); |
| 1697 | setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::v8i64, Legal); |
| 1698 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i64, Legal); |
| 1699 | setOperationAction(ISD::FP_TO_SINT, MVT::v8i64, Legal); |
| 1700 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i64, Legal); |
| 1701 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v8i64, Legal); |
| 1702 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i64, Legal); |
| 1703 | |
| 1704 | setOperationAction(ISD::MUL, MVT::v8i64, Legal); |
| 1705 | } |
| 1706 | |
| 1707 | if (Subtarget.hasCDI()) { |
| 1708 | |
| 1709 | for (auto VT : { MVT::v16i32, MVT::v8i64} ) { |
| 1710 | setOperationAction(ISD::CTLZ, VT, Legal); |
| 1711 | } |
| 1712 | } |
| 1713 | |
| 1714 | if (Subtarget.hasVPOPCNTDQ()) { |
| 1715 | for (auto VT : { MVT::v16i32, MVT::v8i64 }) |
| 1716 | setOperationAction(ISD::CTPOP, VT, Legal); |
| 1717 | } |
| 1718 | |
| 1719 | |
| 1720 | |
| 1721 | |
| 1722 | for (auto VT : { MVT::v32i8, MVT::v16i16, MVT::v8i32, MVT::v4i64, |
| 1723 | MVT::v8f32, MVT::v4f64 }) |
| 1724 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Legal); |
| 1725 | |
| 1726 | for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v16i32, MVT::v8i64, |
| 1727 | MVT::v16f32, MVT::v8f64 }) { |
| 1728 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| 1729 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Legal); |
| 1730 | setOperationAction(ISD::SELECT, VT, Custom); |
| 1731 | setOperationAction(ISD::VSELECT, VT, Custom); |
| 1732 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| 1733 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| 1734 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| 1735 | setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); |
| 1736 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| 1737 | } |
| 1738 | |
| 1739 | for (auto VT : { MVT::v16i32, MVT::v8i64, MVT::v16f32, MVT::v8f64 }) { |
| 1740 | setOperationAction(ISD::MLOAD, VT, Legal); |
| 1741 | setOperationAction(ISD::MSTORE, VT, Legal); |
| 1742 | setOperationAction(ISD::MGATHER, VT, Custom); |
| 1743 | setOperationAction(ISD::MSCATTER, VT, Custom); |
| 1744 | } |
| 1745 | if (HasBWI) { |
| 1746 | for (auto VT : { MVT::v64i8, MVT::v32i16 }) { |
| 1747 | setOperationAction(ISD::MLOAD, VT, Legal); |
| 1748 | setOperationAction(ISD::MSTORE, VT, Legal); |
| 1749 | } |
| 1750 | } else { |
| 1751 | setOperationAction(ISD::STORE, MVT::v32i16, Custom); |
| 1752 | setOperationAction(ISD::STORE, MVT::v64i8, Custom); |
| 1753 | } |
| 1754 | |
| 1755 | if (Subtarget.hasVBMI2()) { |
| 1756 | for (auto VT : { MVT::v8i16, MVT::v4i32, MVT::v2i64, |
| 1757 | MVT::v16i16, MVT::v8i32, MVT::v4i64, |
| 1758 | MVT::v32i16, MVT::v16i32, MVT::v8i64 }) { |
| 1759 | setOperationAction(ISD::FSHL, VT, Custom); |
| 1760 | setOperationAction(ISD::FSHR, VT, Custom); |
| 1761 | } |
| 1762 | |
| 1763 | setOperationAction(ISD::ROTL, MVT::v32i16, Custom); |
| 1764 | setOperationAction(ISD::ROTR, MVT::v8i16, Custom); |
| 1765 | setOperationAction(ISD::ROTR, MVT::v16i16, Custom); |
| 1766 | setOperationAction(ISD::ROTR, MVT::v32i16, Custom); |
| 1767 | } |
| 1768 | } |
| 1769 | |
| 1770 | |
| 1771 | |
| 1772 | |
| 1773 | if (!Subtarget.useSoftFloat() && Subtarget.hasAVX512()) { |
| 1774 | |
| 1775 | |
| 1776 | |
| 1777 | setOperationAction(ISD::FP_TO_UINT, MVT::v8i32, |
| 1778 | Subtarget.hasVLX() ? Legal : Custom); |
| 1779 | setOperationAction(ISD::FP_TO_UINT, MVT::v4i32, |
| 1780 | Subtarget.hasVLX() ? Legal : Custom); |
| 1781 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v8i32, |
| 1782 | Subtarget.hasVLX() ? Legal : Custom); |
| 1783 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v4i32, |
| 1784 | Subtarget.hasVLX() ? Legal : Custom); |
| 1785 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2i32, Custom); |
| 1786 | setOperationAction(ISD::UINT_TO_FP, MVT::v8i32, |
| 1787 | Subtarget.hasVLX() ? Legal : Custom); |
| 1788 | setOperationAction(ISD::UINT_TO_FP, MVT::v4i32, |
| 1789 | Subtarget.hasVLX() ? Legal : Custom); |
| 1790 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v8i32, |
| 1791 | Subtarget.hasVLX() ? Legal : Custom); |
| 1792 | setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::v4i32, |
| 1793 | Subtarget.hasVLX() ? Legal : Custom); |
| 1794 | |
| 1795 | if (Subtarget.hasDQI()) { |
| 1796 | |
| 1797 | |
| 1798 | assert(isOperationCustom(ISD::UINT_TO_FP, MVT::v2f32) && |
| 1799 | isOperationCustom(ISD::STRICT_UINT_TO_FP, MVT::v2f32) && |
| 1800 | "Unexpected operation action!"); |
| 1801 | |
| 1802 | setOperationAction(ISD::FP_TO_SINT, MVT::v2f32, Custom); |
| 1803 | setOperationAction(ISD::FP_TO_UINT, MVT::v2f32, Custom); |
| 1804 | setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::v2f32, Custom); |
| 1805 | setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::v2f32, Custom); |
| 1806 | } |
| 1807 | |
| 1808 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
| 1809 | setOperationAction(ISD::SMAX, VT, Legal); |
| 1810 | setOperationAction(ISD::UMAX, VT, Legal); |
| 1811 | setOperationAction(ISD::SMIN, VT, Legal); |
| 1812 | setOperationAction(ISD::UMIN, VT, Legal); |
| 1813 | setOperationAction(ISD::ABS, VT, Legal); |
| 1814 | } |
| 1815 | |
| 1816 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
| 1817 | setOperationAction(ISD::ROTL, VT, Custom); |
| 1818 | setOperationAction(ISD::ROTR, VT, Custom); |
| 1819 | } |
| 1820 | |
| 1821 | |
| 1822 | setOperationAction(ISD::MSCATTER, MVT::v2f32, Custom); |
| 1823 | setOperationAction(ISD::MSCATTER, MVT::v2i32, Custom); |
| 1824 | |
| 1825 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, |
| 1826 | MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) |
| 1827 | setOperationAction(ISD::MSCATTER, VT, Custom); |
| 1828 | |
| 1829 | if (Subtarget.hasDQI()) { |
| 1830 | for (auto VT : { MVT::v2i64, MVT::v4i64 }) { |
| 1831 | setOperationAction(ISD::SINT_TO_FP, VT, |
| 1832 | Subtarget.hasVLX() ? Legal : Custom); |
| 1833 | setOperationAction(ISD::UINT_TO_FP, VT, |
| 1834 | Subtarget.hasVLX() ? Legal : Custom); |
| 1835 | setOperationAction(ISD::STRICT_SINT_TO_FP, VT, |
| 1836 | Subtarget.hasVLX() ? Legal : Custom); |
| 1837 | setOperationAction(ISD::STRICT_UINT_TO_FP, VT, |
| 1838 | Subtarget.hasVLX() ? Legal : Custom); |
| 1839 | setOperationAction(ISD::FP_TO_SINT, VT, |
| 1840 | Subtarget.hasVLX() ? Legal : Custom); |
| 1841 | setOperationAction(ISD::FP_TO_UINT, VT, |
| 1842 | Subtarget.hasVLX() ? Legal : Custom); |
| 1843 | setOperationAction(ISD::STRICT_FP_TO_SINT, VT, |
| 1844 | Subtarget.hasVLX() ? Legal : Custom); |
| 1845 | setOperationAction(ISD::STRICT_FP_TO_UINT, VT, |
| 1846 | Subtarget.hasVLX() ? Legal : Custom); |
| 1847 | setOperationAction(ISD::MUL, VT, Legal); |
| 1848 | } |
| 1849 | } |
| 1850 | |
| 1851 | if (Subtarget.hasCDI()) { |
| 1852 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) { |
| 1853 | setOperationAction(ISD::CTLZ, VT, Legal); |
| 1854 | } |
| 1855 | } |
| 1856 | |
| 1857 | if (Subtarget.hasVPOPCNTDQ()) { |
| 1858 | for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64 }) |
| 1859 | setOperationAction(ISD::CTPOP, VT, Legal); |
| 1860 | } |
| 1861 | } |
| 1862 | |
| 1863 | |
| 1864 | |
| 1865 | |
| 1866 | if (!Subtarget.useSoftFloat() && Subtarget.hasBWI()) { |
| 1867 | addRegisterClass(MVT::v32i1, &X86::VK32RegClass); |
| 1868 | addRegisterClass(MVT::v64i1, &X86::VK64RegClass); |
| 1869 | |
| 1870 | for (auto VT : { MVT::v32i1, MVT::v64i1 }) { |
| 1871 | setOperationAction(ISD::VSELECT, VT, Expand); |
| 1872 | setOperationAction(ISD::TRUNCATE, VT, Custom); |
| 1873 | setOperationAction(ISD::SETCC, VT, Custom); |
| 1874 | setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); |
| 1875 | setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom); |
| 1876 | setOperationAction(ISD::SELECT, VT, Custom); |
| 1877 | setOperationAction(ISD::BUILD_VECTOR, VT, Custom); |
| 1878 | setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); |
| 1879 | setOperationAction(ISD::CONCAT_VECTORS, VT, Custom); |
| 1880 | setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom); |
| 1881 | } |
| 1882 | |
| 1883 | for (auto VT : { MVT::v16i1, MVT::v32i1 }) |
| 1884 | setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom); |
| 1885 | |
| 1886 | |
| 1887 | setOperationAction(ISD::SIGN_EXTEND, MVT::v32i8, Custom); |
| 1888 | setOperationAction(ISD::ZERO_EXTEND, MVT::v32i8, Custom); |
| 1889 | setOperationAction(ISD::ANY_EXTEND, MVT::v32i8, Custom); |
| 1890 | |
| 1891 | for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { |
| 1892 | setOperationAction(ISD::MLOAD, VT, Subtarget.hasVLX() ? Legal : Custom); |
| 1893 | setOperationAction(ISD::MSTORE, VT, Subtarget.hasVLX() ? Legal : Custom); |
| 1894 | } |
| 1895 | |
| 1896 | |
| 1897 | |
| 1898 | |
| 1899 | |
| 1900 | if (Subtarget.hasBITALG()) { |
| 1901 | for (auto VT : { MVT::v16i8, MVT::v32i8, MVT::v8i16, MVT::v16i16 }) |
| 1902 | setOperationAction(ISD::CTPOP, VT, Legal); |
| 1903 | } |
| 1904 | } |
| 1905 | |
| 1906 | if (!Subtarget.useSoftFloat() && Subtarget.hasVLX()) { |
| 1907 | setTruncStoreAction(MVT::v4i64, MVT::v4i8, Legal); |
| 1908 | setTruncStoreAction(MVT::v4i64, MVT::v4i16, Legal); |
| 1909 | setTruncStoreAction(MVT::v4i64, MVT::v4i32, Legal); |
| 1910 | setTruncStoreAction(MVT::v8i32, MVT::v8i8, Legal); |
| 1911 | setTruncStoreAction(MVT::v8i32, MVT::v8i16, Legal); |
| 1912 | |
| 1913 | setTruncStoreAction(MVT::v2i64, MVT::v2i8, Legal); |
| 1914 | setTruncStoreAction(MVT::v2i64, MVT::v2i16, Legal); |
| 1915 | setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); |
| 1916 | setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); |
| 1917 | setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); |
| 1918 | |
| 1919 | if (Subtarget.hasBWI()) { |
| 1920 | setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); |
| 1921 | setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); |
| 1922 | } |
| 1923 | |
| 1924 | setOperationAction(ISD::TRUNCATE, MVT::v16i32, Custom); |
| 1925 | setOperationAction(ISD::TRUNCATE, MVT::v8i64, Custom); |
| 1926 | setOperationAction(ISD::TRUNCATE, MVT::v16i64, Custom); |
| 1927 | } |
| 1928 | |
| 1929 | if (Subtarget.hasAMXTILE()) { |
| 1930 | addRegisterClass(MVT::x86amx, &X86::TILERegClass); |
| 1931 | } |
| 1932 | |
| 1933 | |
| 1934 | setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); |
| 1935 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); |
| 1936 | setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); |
| 1937 | if (!Subtarget.is64Bit()) { |
| 1938 | setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); |
| 1939 | } |
| 1940 | |
| 1941 | |
| 1942 | |
| 1943 | |
| 1944 | |
| 1945 | |
| 1946 | |
| 1947 | for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) { |
| 1948 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| 1949 | continue; |
| 1950 | |
| 1951 | setOperationAction(ISD::SADDO, VT, Custom); |
| 1952 | setOperationAction(ISD::UADDO, VT, Custom); |
| 1953 | setOperationAction(ISD::SSUBO, VT, Custom); |
| 1954 | setOperationAction(ISD::USUBO, VT, Custom); |
| 1955 | setOperationAction(ISD::SMULO, VT, Custom); |
| 1956 | setOperationAction(ISD::UMULO, VT, Custom); |
| 1957 | |
| 1958 | |
| 1959 | setOperationAction(ISD::ADDCARRY, VT, Custom); |
| 1960 | setOperationAction(ISD::SUBCARRY, VT, Custom); |
| 1961 | setOperationAction(ISD::SETCCCARRY, VT, Custom); |
| 1962 | setOperationAction(ISD::SADDO_CARRY, VT, Custom); |
| 1963 | setOperationAction(ISD::SSUBO_CARRY, VT, Custom); |
| 1964 | } |
| 1965 | |
| 1966 | if (!Subtarget.is64Bit()) { |
| 1967 | |
| 1968 | setLibcallName(RTLIB::SHL_I128, nullptr); |
| 1969 | setLibcallName(RTLIB::SRL_I128, nullptr); |
| 1970 | setLibcallName(RTLIB::SRA_I128, nullptr); |
| 1971 | setLibcallName(RTLIB::MUL_I128, nullptr); |
| 1972 | } |
| 1973 | |
| 1974 | |
| 1975 | if (getLibcallName(RTLIB::SINCOS_STRET_F32) != nullptr && |
| 1976 | getLibcallName(RTLIB::SINCOS_STRET_F64) != nullptr) { |
| 1977 | setOperationAction(ISD::FSINCOS, MVT::f64, Custom); |
| 1978 | setOperationAction(ISD::FSINCOS, MVT::f32, Custom); |
| 1979 | } |
| 1980 | |
| 1981 | if (Subtarget.isTargetWin64()) { |
| 1982 | setOperationAction(ISD::SDIV, MVT::i128, Custom); |
| 1983 | setOperationAction(ISD::UDIV, MVT::i128, Custom); |
| 1984 | setOperationAction(ISD::SREM, MVT::i128, Custom); |
| 1985 | setOperationAction(ISD::UREM, MVT::i128, Custom); |
| 1986 | } |
| 1987 | |
| 1988 | |
| 1989 | |
| 1990 | |
| 1991 | |
| 1992 | if (Subtarget.is32Bit() && |
| 1993 | (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium())) |
| 1994 | for (ISD::NodeType Op : |
| 1995 | {ISD::FCEIL, ISD::STRICT_FCEIL, |
| 1996 | ISD::FCOS, ISD::STRICT_FCOS, |
| 1997 | ISD::FEXP, ISD::STRICT_FEXP, |
| 1998 | ISD::FFLOOR, ISD::STRICT_FFLOOR, |
| 1999 | ISD::FREM, ISD::STRICT_FREM, |
| 2000 | ISD::FLOG, ISD::STRICT_FLOG, |
| 2001 | ISD::FLOG10, ISD::STRICT_FLOG10, |
| 2002 | ISD::FPOW, ISD::STRICT_FPOW, |
| 2003 | ISD::FSIN, ISD::STRICT_FSIN}) |
| 2004 | if (isOperationExpand(Op, MVT::f32)) |
| 2005 | setOperationAction(Op, MVT::f32, Promote); |
| 2006 | |
| 2007 | |
| 2008 | setTargetDAGCombine(ISD::VECTOR_SHUFFLE); |
| 2009 | setTargetDAGCombine(ISD::SCALAR_TO_VECTOR); |
| 2010 | setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); |
| 2011 | setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); |
| 2012 | setTargetDAGCombine(ISD::CONCAT_VECTORS); |
| 2013 | setTargetDAGCombine(ISD::INSERT_SUBVECTOR); |
| 2014 | setTargetDAGCombine(ISD::EXTRACT_SUBVECTOR); |
| 2015 | setTargetDAGCombine(ISD::BITCAST); |
| 2016 | setTargetDAGCombine(ISD::VSELECT); |
| 2017 | setTargetDAGCombine(ISD::SELECT); |
| 2018 | setTargetDAGCombine(ISD::SHL); |
| 2019 | setTargetDAGCombine(ISD::SRA); |
| 2020 | setTargetDAGCombine(ISD::SRL); |
| 2021 | setTargetDAGCombine(ISD::OR); |
| 2022 | setTargetDAGCombine(ISD::AND); |
| 2023 | setTargetDAGCombine(ISD::ADD); |
| 2024 | setTargetDAGCombine(ISD::FADD); |
| 2025 | setTargetDAGCombine(ISD::FSUB); |
| 2026 | setTargetDAGCombine(ISD::FNEG); |
| 2027 | setTargetDAGCombine(ISD::FMA); |
| 2028 | setTargetDAGCombine(ISD::STRICT_FMA); |
| 2029 | setTargetDAGCombine(ISD::FMINNUM); |
| 2030 | setTargetDAGCombine(ISD::FMAXNUM); |
| 2031 | setTargetDAGCombine(ISD::SUB); |
| 2032 | setTargetDAGCombine(ISD::LOAD); |
| 2033 | setTargetDAGCombine(ISD::MLOAD); |
| 2034 | setTargetDAGCombine(ISD::STORE); |
| 2035 | setTargetDAGCombine(ISD::MSTORE); |
| 2036 | setTargetDAGCombine(ISD::TRUNCATE); |
| 2037 | setTargetDAGCombine(ISD::ZERO_EXTEND); |
| 2038 | setTargetDAGCombine(ISD::ANY_EXTEND); |
| 2039 | setTargetDAGCombine(ISD::SIGN_EXTEND); |
| 2040 | setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); |
| 2041 | setTargetDAGCombine(ISD::ANY_EXTEND_VECTOR_INREG); |
| 2042 | setTargetDAGCombine(ISD::SIGN_EXTEND_VECTOR_INREG); |
| 2043 | setTargetDAGCombine(ISD::ZERO_EXTEND_VECTOR_INREG); |
| 2044 | setTargetDAGCombine(ISD::SINT_TO_FP); |
| 2045 | setTargetDAGCombine(ISD::UINT_TO_FP); |
| 2046 | setTargetDAGCombine(ISD::STRICT_SINT_TO_FP); |
| 2047 | setTargetDAGCombine(ISD::STRICT_UINT_TO_FP); |
| 2048 | setTargetDAGCombine(ISD::SETCC); |
| 2049 | setTargetDAGCombine(ISD::MUL); |
| 2050 | setTargetDAGCombine(ISD::XOR); |
| 2051 | setTargetDAGCombine(ISD::MSCATTER); |
| 2052 | setTargetDAGCombine(ISD::MGATHER); |
| 2053 | setTargetDAGCombine(ISD::FP16_TO_FP); |
| 2054 | setTargetDAGCombine(ISD::FP_EXTEND); |
| 2055 | setTargetDAGCombine(ISD::STRICT_FP_EXTEND); |
| 2056 | setTargetDAGCombine(ISD::FP_ROUND); |
| 2057 | |
| 2058 | computeRegisterProperties(Subtarget.getRegisterInfo()); |
| 2059 | |
| 2060 | MaxStoresPerMemset = 16; |
| 2061 | MaxStoresPerMemsetOptSize = 8; |
| 2062 | MaxStoresPerMemcpy = 8; |
| 2063 | MaxStoresPerMemcpyOptSize = 4; |
| 2064 | MaxStoresPerMemmove = 8; |
| 2065 | MaxStoresPerMemmoveOptSize = 4; |
| 2066 | |
| 2067 | |
| 2068 | |
| 2069 | |
| 2070 | MaxLoadsPerMemcmp = 2; |
| 2071 | MaxLoadsPerMemcmpOptSize = 2; |
| 2072 | |
| 2073 | |
| 2074 | setPrefLoopAlignment(Align(1ULL << ExperimentalPrefLoopAlignment)); |
| 2075 | |
| 2076 | |
| 2077 | |
| 2078 | PredictableSelectIsExpensive = Subtarget.getSchedModel().isOutOfOrder(); |
| 2079 | EnableExtLdPromotion = true; |
| 2080 | setPrefFunctionAlignment(Align(16)); |
| 2081 | |
| 2082 | verifyIntrinsicTables(); |
| 2083 | |
| 2084 | |
| 2085 | IsStrictFPEnabled = true; |
| 2086 | } |
| 2087 | |
| 2088 | |
| 2089 | bool X86TargetLowering::useLoadStackGuardNode() const { |
| 2090 | return Subtarget.isTargetMachO() && Subtarget.is64Bit(); |
| 2091 | } |
| 2092 | |
| 2093 | bool X86TargetLowering::useStackGuardXorFP() const { |
| 2094 | |
| 2095 | return Subtarget.getTargetTriple().isOSMSVCRT() && !Subtarget.isTargetMachO(); |
| 2096 | } |
| 2097 | |
| 2098 | SDValue X86TargetLowering::emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
| 2099 | const SDLoc &DL) const { |
| 2100 | EVT PtrTy = getPointerTy(DAG.getDataLayout()); |
| 2101 | unsigned XorOp = Subtarget.is64Bit() ? X86::XOR64_FP : X86::XOR32_FP; |
| 2102 | MachineSDNode *Node = DAG.getMachineNode(XorOp, DL, PtrTy, Val); |
| 2103 | return SDValue(Node, 0); |
| 2104 | } |
| 2105 | |
| 2106 | TargetLoweringBase::LegalizeTypeAction |
| 2107 | X86TargetLowering::getPreferredVectorAction(MVT VT) const { |
| 2108 | if ((VT == MVT::v32i1 || VT == MVT::v64i1) && Subtarget.hasAVX512() && |
| 2109 | !Subtarget.hasBWI()) |
| 2110 | return TypeSplitVector; |
| 2111 | |
| 2112 | if (!VT.isScalableVector() && VT.getVectorNumElements() != 1 && |
| 2113 | VT.getVectorElementType() != MVT::i1) |
| 2114 | return TypeWidenVector; |
| 2115 | |
| 2116 | return TargetLoweringBase::getPreferredVectorAction(VT); |
| 2117 | } |
| 2118 | |
| 2119 | static std::pair<MVT, unsigned> |
| 2120 | handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, |
| 2121 | const X86Subtarget &Subtarget) { |
| 2122 | |
| 2123 | |
| 2124 | if (NumElts == 2) |
| 2125 | return {MVT::v2i64, 1}; |
| 2126 | if (NumElts == 4) |
| 2127 | return {MVT::v4i32, 1}; |
| 2128 | if (NumElts == 8 && CC != CallingConv::X86_RegCall && |
| 2129 | CC != CallingConv::Intel_OCL_BI) |
| 2130 | return {MVT::v8i16, 1}; |
| 2131 | if (NumElts == 16 && CC != CallingConv::X86_RegCall && |
| 2132 | CC != CallingConv::Intel_OCL_BI) |
| 2133 | return {MVT::v16i8, 1}; |
| 2134 | |
| 2135 | |
| 2136 | if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall)) |
| 2137 | return {MVT::v32i8, 1}; |
| 2138 | |
| 2139 | if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) { |
| 2140 | if (Subtarget.useAVX512Regs()) |
| 2141 | return {MVT::v64i8, 1}; |
| 2142 | return {MVT::v32i8, 2}; |
| 2143 | } |
| 2144 | |
| 2145 | |
| 2146 | if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) || |
| 2147 | NumElts > 64) |
| 2148 | return {MVT::i8, NumElts}; |
| 2149 | |
| 2150 | return {MVT::INVALID_SIMPLE_VALUE_TYPE, 0}; |
| 2151 | } |
| 2152 | |
| 2153 | MVT X86TargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, |
| 2154 | CallingConv::ID CC, |
| 2155 | EVT VT) const { |
| 2156 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| 2157 | Subtarget.hasAVX512()) { |
| 2158 | unsigned NumElts = VT.getVectorNumElements(); |
| 2159 | |
| 2160 | MVT RegisterVT; |
| 2161 | unsigned NumRegisters; |
| 2162 | std::tie(RegisterVT, NumRegisters) = |
| 2163 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
| 2164 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
| 2165 | return RegisterVT; |
| 2166 | } |
| 2167 | |
| 2168 | return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); |
| 2169 | } |
| 2170 | |
| 2171 | unsigned X86TargetLowering::getNumRegistersForCallingConv(LLVMContext &Context, |
| 2172 | CallingConv::ID CC, |
| 2173 | EVT VT) const { |
| 2174 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| 2175 | Subtarget.hasAVX512()) { |
| 2176 | unsigned NumElts = VT.getVectorNumElements(); |
| 2177 | |
| 2178 | MVT RegisterVT; |
| 2179 | unsigned NumRegisters; |
| 2180 | std::tie(RegisterVT, NumRegisters) = |
| 2181 | handleMaskRegisterForCallingConv(NumElts, CC, Subtarget); |
| 2182 | if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE) |
| 2183 | return NumRegisters; |
| 2184 | } |
| 2185 | |
| 2186 | return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT); |
| 2187 | } |
| 2188 | |
| 2189 | unsigned X86TargetLowering::getVectorTypeBreakdownForCallingConv( |
| 2190 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
| 2191 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
| 2192 | |
| 2193 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| 2194 | Subtarget.hasAVX512() && |
| 2195 | (!isPowerOf2_32(VT.getVectorNumElements()) || |
| 2196 | (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) || |
| 2197 | VT.getVectorNumElements() > 64)) { |
| 2198 | RegisterVT = MVT::i8; |
| 2199 | IntermediateVT = MVT::i1; |
| 2200 | NumIntermediates = VT.getVectorNumElements(); |
| 2201 | return NumIntermediates; |
| 2202 | } |
| 2203 | |
| 2204 | |
| 2205 | if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
| 2206 | CC != CallingConv::X86_RegCall) { |
| 2207 | RegisterVT = MVT::v32i8; |
| 2208 | IntermediateVT = MVT::v32i1; |
| 2209 | NumIntermediates = 2; |
| 2210 | return 2; |
| 2211 | } |
| 2212 | |
| 2213 | return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT, |
| 2214 | NumIntermediates, RegisterVT); |
| 2215 | } |
| 2216 | |
| 2217 | EVT X86TargetLowering::getSetCCResultType(const DataLayout &DL, |
| 2218 | LLVMContext& Context, |
| 2219 | EVT VT) const { |
| 2220 | if (!VT.isVector()) |
| 2221 | return MVT::i8; |
| 2222 | |
| 2223 | if (Subtarget.hasAVX512()) { |
| 2224 | |
| 2225 | EVT LegalVT = VT; |
| 2226 | while (getTypeAction(Context, LegalVT) != TypeLegal) |
| 2227 | LegalVT = getTypeToTransformTo(Context, LegalVT); |
| 2228 | |
| 2229 | |
| 2230 | if (LegalVT.getSimpleVT().is512BitVector()) |
| 2231 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
| 2232 | |
| 2233 | if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) { |
| 2234 | |
| 2235 | |
| 2236 | |
| 2237 | MVT EltVT = LegalVT.getSimpleVT().getVectorElementType(); |
| 2238 | if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32) |
| 2239 | return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount()); |
| 2240 | } |
| 2241 | } |
| 2242 | |
| 2243 | return VT.changeVectorElementTypeToInteger(); |
| 2244 | } |
| 2245 | |
| 2246 | |
| 2247 | |
| 2248 | static void getMaxByValAlign(Type *Ty, Align &MaxAlign) { |
| 2249 | if (MaxAlign == 16) |
| 2250 | return; |
| 2251 | if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
| 2252 | if (VTy->getPrimitiveSizeInBits().getFixedSize() == 128) |
| 2253 | MaxAlign = Align(16); |
| 2254 | } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) { |
| 2255 | Align EltAlign; |
| 2256 | getMaxByValAlign(ATy->getElementType(), EltAlign); |
| 2257 | if (EltAlign > MaxAlign) |
| 2258 | MaxAlign = EltAlign; |
| 2259 | } else if (StructType *STy = dyn_cast<StructType>(Ty)) { |
| 2260 | for (auto *EltTy : STy->elements()) { |
| 2261 | Align EltAlign; |
| 2262 | getMaxByValAlign(EltTy, EltAlign); |
| 2263 | if (EltAlign > MaxAlign) |
| 2264 | MaxAlign = EltAlign; |
| 2265 | if (MaxAlign == 16) |
| 2266 | break; |
| 2267 | } |
| 2268 | } |
| 2269 | } |
| 2270 | |
| 2271 | |
| 2272 | |
| 2273 | |
| 2274 | |
| 2275 | unsigned X86TargetLowering::getByValTypeAlignment(Type *Ty, |
| 2276 | const DataLayout &DL) const { |
| 2277 | if (Subtarget.is64Bit()) { |
| 2278 | |
| 2279 | Align TyAlign = DL.getABITypeAlign(Ty); |
| 2280 | if (TyAlign > 8) |
| 2281 | return TyAlign.value(); |
| 2282 | return 8; |
| 2283 | } |
| 2284 | |
| 2285 | Align Alignment(4); |
| 2286 | if (Subtarget.hasSSE1()) |
| 2287 | getMaxByValAlign(Ty, Alignment); |
| 2288 | return Alignment.value(); |
| 2289 | } |
| 2290 | |
| 2291 | |
| 2292 | |
| 2293 | |
| 2294 | |
| 2295 | EVT X86TargetLowering::getOptimalMemOpType( |
| 2296 | const MemOp &Op, const AttributeList &FuncAttributes) const { |
| 2297 | if (!FuncAttributes.hasFnAttribute(Attribute::NoImplicitFloat)) { |
| 2298 | if (Op.size() >= 16 && |
| 2299 | (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) { |
| 2300 | |
| 2301 | if (Op.size() >= 64 && Subtarget.hasAVX512() && |
| 2302 | (Subtarget.getPreferVectorWidth() >= 512)) { |
| 2303 | return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32; |
| 2304 | } |
| 2305 | |
| 2306 | if (Op.size() >= 32 && Subtarget.hasAVX() && |
| 2307 | (Subtarget.getPreferVectorWidth() >= 256)) { |
| 2308 | |
| 2309 | |
| 2310 | |
| 2311 | |
| 2312 | |
| 2313 | return MVT::v32i8; |
| 2314 | } |
| 2315 | if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128)) |
| 2316 | return MVT::v16i8; |
| 2317 | |
| 2318 | |
| 2319 | if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) && |
| 2320 | (Subtarget.getPreferVectorWidth() >= 128)) |
| 2321 | return MVT::v4f32; |
| 2322 | } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) && |
| 2323 | Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) { |
| 2324 | |
| 2325 | |
| 2326 | |
| 2327 | |
| 2328 | |
| 2329 | |
| 2330 | return MVT::f64; |
| 2331 | } |
| 2332 | } |
| 2333 | |
| 2334 | |
| 2335 | |
| 2336 | if (Subtarget.is64Bit() && Op.size() >= 8) |
| 2337 | return MVT::i64; |
| 2338 | return MVT::i32; |
| 2339 | } |
| 2340 | |
| 2341 | bool X86TargetLowering::isSafeMemOpType(MVT VT) const { |
| 2342 | if (VT == MVT::f32) |
| 2343 | return X86ScalarSSEf32; |
| 2344 | if (VT == MVT::f64) |
| 2345 | return X86ScalarSSEf64; |
| 2346 | return true; |
| 2347 | } |
| 2348 | |
| 2349 | bool X86TargetLowering::allowsMisalignedMemoryAccesses( |
| 2350 | EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags, |
| 2351 | bool *Fast) const { |
| 2352 | if (Fast) { |
| 2353 | switch (VT.getSizeInBits()) { |
| 2354 | default: |
| 2355 | |
| 2356 | *Fast = true; |
| 2357 | break; |
| 2358 | case 128: |
| 2359 | *Fast = !Subtarget.isUnalignedMem16Slow(); |
| 2360 | break; |
| 2361 | case 256: |
| 2362 | *Fast = !Subtarget.isUnalignedMem32Slow(); |
| 2363 | break; |
| 2364 | |
| 2365 | } |
| 2366 | } |
| 2367 | |
| 2368 | if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) { |
| 2369 | |
| 2370 | |
| 2371 | |
| 2372 | |
| 2373 | if (!!(Flags & MachineMemOperand::MOLoad)) |
| 2374 | return (Alignment < 16 || !Subtarget.hasSSE41()); |
| 2375 | return false; |
| 2376 | } |
| 2377 | |
| 2378 | return true; |
| 2379 | } |
| 2380 | |
| 2381 | |
| 2382 | |
| 2383 | |
| 2384 | unsigned X86TargetLowering::getJumpTableEncoding() const { |
| 2385 | |
| 2386 | |
| 2387 | if (isPositionIndependent() && Subtarget.isPICStyleGOT()) |
| 2388 | return MachineJumpTableInfo::EK_Custom32; |
| 2389 | |
| 2390 | |
| 2391 | return TargetLowering::getJumpTableEncoding(); |
| 2392 | } |
| 2393 | |
| 2394 | bool X86TargetLowering::useSoftFloat() const { |
| 2395 | return Subtarget.useSoftFloat(); |
| 2396 | } |
| 2397 | |
| 2398 | void X86TargetLowering::markLibCallAttributes(MachineFunction *MF, unsigned CC, |
| 2399 | ArgListTy &Args) const { |
| 2400 | |
| 2401 | |
| 2402 | if (Subtarget.is64Bit()) |
| 2403 | return; |
| 2404 | if (CC != CallingConv::C && CC != CallingConv::X86_StdCall) |
| 2405 | return; |
| 2406 | unsigned ParamRegs = 0; |
| 2407 | if (auto *M = MF->getFunction().getParent()) |
| 2408 | ParamRegs = M->getNumberRegisterParameters(); |
| 2409 | |
| 2410 | |
| 2411 | for (auto &Arg : Args) { |
| 2412 | Type *T = Arg.Ty; |
| 2413 | if (T->isIntOrPtrTy()) |
| 2414 | if (MF->getDataLayout().getTypeAllocSize(T) <= 8) { |
| 2415 | unsigned numRegs = 1; |
| 2416 | if (MF->getDataLayout().getTypeAllocSize(T) > 4) |
| 2417 | numRegs = 2; |
| 2418 | if (ParamRegs < numRegs) |
| 2419 | return; |
| 2420 | ParamRegs -= numRegs; |
| 2421 | Arg.IsInReg = true; |
| 2422 | } |
| 2423 | } |
| 2424 | } |
| 2425 | |
| 2426 | const MCExpr * |
| 2427 | X86TargetLowering::LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, |
| 2428 | const MachineBasicBlock *MBB, |
| 2429 | unsigned uid,MCContext &Ctx) const{ |
| 2430 | assert(isPositionIndependent() && Subtarget.isPICStyleGOT()); |
| 2431 | |
| 2432 | |
| 2433 | return MCSymbolRefExpr::create(MBB->getSymbol(), |
| 2434 | MCSymbolRefExpr::VK_GOTOFF, Ctx); |
| 2435 | } |
| 2436 | |
| 2437 | |
| 2438 | SDValue X86TargetLowering::getPICJumpTableRelocBase(SDValue Table, |
| 2439 | SelectionDAG &DAG) const { |
| 2440 | if (!Subtarget.is64Bit()) |
| 2441 | |
| 2442 | |
| 2443 | return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
| 2444 | getPointerTy(DAG.getDataLayout())); |
| 2445 | return Table; |
| 2446 | } |
| 2447 | |
| 2448 | |
| 2449 | |
| 2450 | const MCExpr *X86TargetLowering:: |
| 2451 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, |
| 2452 | MCContext &Ctx) const { |
| 2453 | |
| 2454 | if (Subtarget.isPICStyleRIPRel()) |
| 2455 | return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx); |
| 2456 | |
| 2457 | |
| 2458 | return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx); |
| 2459 | } |
| 2460 | |
| 2461 | std::pair<const TargetRegisterClass *, uint8_t> |
| 2462 | X86TargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI, |
| 2463 | MVT VT) const { |
| 2464 | const TargetRegisterClass *RRC = nullptr; |
| 2465 | uint8_t Cost = 1; |
| 2466 | switch (VT.SimpleTy) { |
| 2467 | default: |
| 2468 | return TargetLowering::findRepresentativeClass(TRI, VT); |
| 2469 | case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64: |
| 2470 | RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass; |
| 2471 | break; |
| 2472 | case MVT::x86mmx: |
| 2473 | RRC = &X86::VR64RegClass; |
| 2474 | break; |
| 2475 | case MVT::f32: case MVT::f64: |
| 2476 | case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: |
| 2477 | case MVT::v4f32: case MVT::v2f64: |
| 2478 | case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64: |
| 2479 | case MVT::v8f32: case MVT::v4f64: |
| 2480 | case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64: |
| 2481 | case MVT::v16f32: case MVT::v8f64: |
| 2482 | RRC = &X86::VR128XRegClass; |
| 2483 | break; |
| 2484 | } |
| 2485 | return std::make_pair(RRC, Cost); |
| 2486 | } |
| 2487 | |
| 2488 | unsigned X86TargetLowering::getAddressSpace() const { |
| 2489 | if (Subtarget.is64Bit()) |
| 2490 | return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257; |
| 2491 | return 256; |
| 2492 | } |
| 2493 | |
| 2494 | static bool hasStackGuardSlotTLS(const Triple &TargetTriple) { |
| 2495 | return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() || |
| 2496 | (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17)); |
| 2497 | } |
| 2498 | |
| 2499 | static Constant* SegmentOffset(IRBuilderBase &IRB, |
| 2500 | int Offset, unsigned AddressSpace) { |
| 2501 | return ConstantExpr::getIntToPtr( |
| 2502 | ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset), |
| 2503 | Type::getInt8PtrTy(IRB.getContext())->getPointerTo(AddressSpace)); |
| 2504 | } |
| 2505 | |
| 2506 | Value *X86TargetLowering::getIRStackGuard(IRBuilderBase &IRB) const { |
| 2507 | |
| 2508 | |
| 2509 | |
| 2510 | if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) { |
| 2511 | if (Subtarget.isTargetFuchsia()) { |
| 2512 | |
| 2513 | return SegmentOffset(IRB, 0x10, getAddressSpace()); |
| 2514 | } else { |
| 2515 | unsigned AddressSpace = getAddressSpace(); |
| 2516 | Module *M = IRB.GetInsertBlock()->getParent()->getParent(); |
| 2517 | |
| 2518 | int Offset = M->getStackProtectorGuardOffset(); |
| 2519 | |
| 2520 | |
| 2521 | |
| 2522 | if (Offset == INT_MAX) |
| 2523 | Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14; |
| 2524 | |
| 2525 | StringRef GuardReg = M->getStackProtectorGuardReg(); |
| 2526 | if (GuardReg == "fs") |
| 2527 | AddressSpace = X86AS::FS; |
| 2528 | else if (GuardReg == "gs") |
| 2529 | AddressSpace = X86AS::GS; |
| 2530 | return SegmentOffset(IRB, Offset, AddressSpace); |
| 2531 | } |
| 2532 | } |
| 2533 | return TargetLowering::getIRStackGuard(IRB); |
| 2534 | } |
| 2535 | |
| 2536 | void X86TargetLowering::insertSSPDeclarations(Module &M) const { |
| 2537 | |
| 2538 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
| 2539 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
| 2540 | |
| 2541 | M.getOrInsertGlobal("__security_cookie", |
| 2542 | Type::getInt8PtrTy(M.getContext())); |
| 2543 | |
| 2544 | |
| 2545 | FunctionCallee SecurityCheckCookie = M.getOrInsertFunction( |
| 2546 | "__security_check_cookie", Type::getVoidTy(M.getContext()), |
| 2547 | Type::getInt8PtrTy(M.getContext())); |
| 2548 | if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) { |
| 2549 | F->setCallingConv(CallingConv::X86_FastCall); |
| 2550 | F->addAttribute(1, Attribute::AttrKind::InReg); |
| 2551 | } |
| 2552 | return; |
| 2553 | } |
| 2554 | |
| 2555 | StringRef GuardMode = M.getStackProtectorGuard(); |
| 2556 | |
| 2557 | |
| 2558 | if ((GuardMode == "tls" || GuardMode.empty()) && |
| 2559 | hasStackGuardSlotTLS(Subtarget.getTargetTriple())) |
| 2560 | return; |
| 2561 | TargetLowering::insertSSPDeclarations(M); |
| 2562 | } |
| 2563 | |
| 2564 | Value *X86TargetLowering::getSDagStackGuard(const Module &M) const { |
| 2565 | |
| 2566 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
| 2567 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
| 2568 | return M.getGlobalVariable("__security_cookie"); |
| 2569 | } |
| 2570 | return TargetLowering::getSDagStackGuard(M); |
| 2571 | } |
| 2572 | |
| 2573 | Function *X86TargetLowering::getSSPStackGuardCheck(const Module &M) const { |
| 2574 | |
| 2575 | if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() || |
| 2576 | Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) { |
| 2577 | return M.getFunction("__security_check_cookie"); |
| 2578 | } |
| 2579 | return TargetLowering::getSSPStackGuardCheck(M); |
| 2580 | } |
| 2581 | |
| 2582 | Value * |
| 2583 | X86TargetLowering::getSafeStackPointerLocation(IRBuilderBase &IRB) const { |
| 2584 | if (Subtarget.getTargetTriple().isOSContiki()) |
| 2585 | return getDefaultSafeStackPointerLocation(IRB, false); |
| 2586 | |
| 2587 | |
| 2588 | |
| 2589 | |
| 2590 | if (Subtarget.isTargetAndroid()) { |
| 2591 | |
| 2592 | |
| 2593 | int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24; |
| 2594 | return SegmentOffset(IRB, Offset, getAddressSpace()); |
| 2595 | } |
| 2596 | |
| 2597 | |
| 2598 | if (Subtarget.isTargetFuchsia()) { |
| 2599 | |
| 2600 | return SegmentOffset(IRB, 0x18, getAddressSpace()); |
| 2601 | } |
| 2602 | |
| 2603 | return TargetLowering::getSafeStackPointerLocation(IRB); |
| 2604 | } |
| 2605 | |
| 2606 | |
| 2607 | |
| 2608 | |
| 2609 | |
| 2610 | bool X86TargetLowering::CanLowerReturn( |
| 2611 | CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, |
| 2612 | const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const { |
| 2613 | SmallVector<CCValAssign, 16> RVLocs; |
| 2614 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context); |
| 2615 | return CCInfo.CheckReturn(Outs, RetCC_X86); |
| 2616 | } |
| 2617 | |
| 2618 | const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const { |
| 2619 | static const MCPhysReg ScratchRegs[] = { X86::R11, 0 }; |
| 2620 | return ScratchRegs; |
| 2621 | } |
| 2622 | |
| 2623 | |
| 2624 | |
| 2625 | static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, |
| 2626 | const SDLoc &Dl, SelectionDAG &DAG) { |
| 2627 | EVT ValVT = ValArg.getValueType(); |
| 2628 | |
| 2629 | if (ValVT == MVT::v1i1) |
| 2630 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, Dl, ValLoc, ValArg, |
| 2631 | DAG.getIntPtrConstant(0, Dl)); |
| 2632 | |
| 2633 | if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) || |
| 2634 | (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) { |
| 2635 | |
| 2636 | |
| 2637 | |
| 2638 | EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16; |
| 2639 | SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg); |
| 2640 | if (ValLoc == MVT::i32) |
| 2641 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValToCopy); |
| 2642 | return ValToCopy; |
| 2643 | } |
| 2644 | |
| 2645 | if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) || |
| 2646 | (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) { |
| 2647 | |
| 2648 | |
| 2649 | return DAG.getBitcast(ValLoc, ValArg); |
| 2650 | } |
| 2651 | |
| 2652 | return DAG.getNode(ISD::ANY_EXTEND, Dl, ValLoc, ValArg); |
| 2653 | } |
| 2654 | |
| 2655 | |
| 2656 | static void Passv64i1ArgInRegs( |
| 2657 | const SDLoc &Dl, SelectionDAG &DAG, SDValue &Arg, |
| 2658 | SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA, |
| 2659 | CCValAssign &NextVA, const X86Subtarget &Subtarget) { |
| 2660 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); |
| 2661 | assert(Subtarget.is32Bit() && "Expecting 32 bit target"); |
| 2662 | assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value"); |
| 2663 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
| 2664 | "The value should reside in two registers"); |
| 2665 | |
| 2666 | |
| 2667 | Arg = DAG.getBitcast(MVT::i64, Arg); |
| 2668 | |
| 2669 | |
| 2670 | SDValue Lo, Hi; |
| 2671 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
| 2672 | DAG.getConstant(0, Dl, MVT::i32)); |
| 2673 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, Dl, MVT::i32, Arg, |
| 2674 | DAG.getConstant(1, Dl, MVT::i32)); |
| 2675 | |
| 2676 | |
| 2677 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo)); |
| 2678 | RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi)); |
| 2679 | } |
| 2680 | |
| 2681 | SDValue |
| 2682 | X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, |
| 2683 | bool isVarArg, |
| 2684 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 2685 | const SmallVectorImpl<SDValue> &OutVals, |
| 2686 | const SDLoc &dl, SelectionDAG &DAG) const { |
| 2687 | MachineFunction &MF = DAG.getMachineFunction(); |
| 2688 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
| 2689 | |
| 2690 | |
| 2691 | |
| 2692 | bool ShouldDisableCalleeSavedRegister = |
| 2693 | CallConv == CallingConv::X86_RegCall || |
| 2694 | MF.getFunction().hasFnAttribute("no_caller_saved_registers"); |
| 2695 | |
| 2696 | if (CallConv == CallingConv::X86_INTR && !Outs.empty()) |
| 2697 | report_fatal_error("X86 interrupts may not return any value"); |
| 2698 | |
| 2699 | SmallVector<CCValAssign, 16> RVLocs; |
| 2700 | CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext()); |
| 2701 | CCInfo.AnalyzeReturn(Outs, RetCC_X86); |
| 2702 | |
| 2703 | SmallVector<std::pair<Register, SDValue>, 4> RetVals; |
| 2704 | for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E; |
| 2705 | ++I, ++OutsIndex) { |
| 2706 | CCValAssign &VA = RVLocs[I]; |
| 2707 | assert(VA.isRegLoc() && "Can only return in registers!"); |
| 2708 | |
| 2709 | |
| 2710 | if (ShouldDisableCalleeSavedRegister) |
| 2711 | MF.getRegInfo().disableCalleeSavedRegister(VA.getLocReg()); |
| 2712 | |
| 2713 | SDValue ValToCopy = OutVals[OutsIndex]; |
| 2714 | EVT ValVT = ValToCopy.getValueType(); |
| 2715 | |
| 2716 | |
| 2717 | if (VA.getLocInfo() == CCValAssign::SExt) |
| 2718 | ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy); |
| 2719 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
| 2720 | ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy); |
| 2721 | else if (VA.getLocInfo() == CCValAssign::AExt) { |
| 2722 | if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1) |
| 2723 | ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG); |
| 2724 | else |
| 2725 | ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy); |
| 2726 | } |
| 2727 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
| 2728 | ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy); |
| 2729 | |
| 2730 | assert(VA.getLocInfo() != CCValAssign::FPExt && |
| 2731 | "Unexpected FP-extend for return value."); |
| 2732 | |
| 2733 | |
| 2734 | |
| 2735 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { |
| 2736 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
| 2737 | VA.convertToReg(X86::FP0); |
| 2738 | } else if (!Subtarget.hasSSE2() && |
| 2739 | X86::FR64XRegClass.contains(VA.getLocReg()) && |
| 2740 | ValVT == MVT::f64) { |
| 2741 | |
| 2742 | |
| 2743 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
| 2744 | VA.convertToReg(X86::FP0); |
| 2745 | } |
| 2746 | |
| 2747 | |
| 2748 | |
| 2749 | if (VA.getLocReg() == X86::FP0 || |
| 2750 | VA.getLocReg() == X86::FP1) { |
| 2751 | |
| 2752 | |
| 2753 | if (isScalarFPTypeInSSEReg(VA.getValVT())) |
| 2754 | ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy); |
| 2755 | RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
| 2756 | |
| 2757 | continue; |
| 2758 | } |
| 2759 | |
| 2760 | |
| 2761 | |
| 2762 | if (Subtarget.is64Bit()) { |
| 2763 | if (ValVT == MVT::x86mmx) { |
| 2764 | if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) { |
| 2765 | ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy); |
| 2766 | ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
| 2767 | ValToCopy); |
| 2768 | |
| 2769 | |
| 2770 | if (!Subtarget.hasSSE2()) |
| 2771 | ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy); |
| 2772 | } |
| 2773 | } |
| 2774 | } |
| 2775 | |
| 2776 | if (VA.needsCustom()) { |
| 2777 | assert(VA.getValVT() == MVT::v64i1 && |
| 2778 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
| 2779 | |
| 2780 | Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I], |
| 2781 | Subtarget); |
| 2782 | |
| 2783 | |
| 2784 | if (ShouldDisableCalleeSavedRegister) |
| 2785 | MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg()); |
| 2786 | } else { |
| 2787 | RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy)); |
| 2788 | } |
| 2789 | } |
| 2790 | |
| 2791 | SDValue Flag; |
| 2792 | SmallVector<SDValue, 6> RetOps; |
| 2793 | RetOps.push_back(Chain); |
| 2794 | |
| 2795 | RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl, |
| 2796 | MVT::i32)); |
| 2797 | |
| 2798 | |
| 2799 | for (auto &RetVal : RetVals) { |
| 2800 | if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) { |
| 2801 | RetOps.push_back(RetVal.second); |
| 2802 | continue; |
| 2803 | } |
| 2804 | |
| 2805 | Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Flag); |
| 2806 | Flag = Chain.getValue(1); |
| 2807 | RetOps.push_back( |
| 2808 | DAG.getRegister(RetVal.first, RetVal.second.getValueType())); |
| 2809 | } |
| 2810 | |
| 2811 | |
| 2812 | |
| 2813 | |
| 2814 | |
| 2815 | |
| 2816 | |
| 2817 | |
| 2818 | |
| 2819 | |
| 2820 | |
| 2821 | |
| 2822 | |
| 2823 | if (Register SRetReg = FuncInfo->getSRetReturnReg()) { |
| 2824 | |
| 2825 | |
| 2826 | |
| 2827 | |
| 2828 | |
| 2829 | |
| 2830 | |
| 2831 | |
| 2832 | |
| 2833 | |
| 2834 | |
| 2835 | |
| 2836 | |
| 2837 | |
| 2838 | |
| 2839 | |
| 2840 | |
| 2841 | |
| 2842 | |
| 2843 | SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg, |
| 2844 | getPointerTy(MF.getDataLayout())); |
| 2845 | |
| 2846 | Register RetValReg |
| 2847 | = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ? |
| 2848 | X86::RAX : X86::EAX; |
| 2849 | Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Flag); |
| 2850 | Flag = Chain.getValue(1); |
| 2851 | |
| 2852 | |
| 2853 | RetOps.push_back( |
| 2854 | DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout()))); |
| 2855 | |
| 2856 | |
| 2857 | if (ShouldDisableCalleeSavedRegister) |
| 2858 | MF.getRegInfo().disableCalleeSavedRegister(RetValReg); |
| 2859 | } |
| 2860 | |
| 2861 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 2862 | const MCPhysReg *I = |
| 2863 | TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction()); |
| 2864 | if (I) { |
| 2865 | for (; *I; ++I) { |
| 2866 | if (X86::GR64RegClass.contains(*I)) |
| 2867 | RetOps.push_back(DAG.getRegister(*I, MVT::i64)); |
| 2868 | else |
| 2869 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
| 2870 | } |
| 2871 | } |
| 2872 | |
| 2873 | RetOps[0] = Chain; |
| 2874 | |
| 2875 | |
| 2876 | if (Flag.getNode()) |
| 2877 | RetOps.push_back(Flag); |
| 2878 | |
| 2879 | X86ISD::NodeType opcode = X86ISD::RET_FLAG; |
| 2880 | if (CallConv == CallingConv::X86_INTR) |
| 2881 | opcode = X86ISD::IRET; |
| 2882 | return DAG.getNode(opcode, dl, MVT::Other, RetOps); |
| 2883 | } |
| 2884 | |
| 2885 | bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { |
| 2886 | if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0)) |
| 2887 | return false; |
| 2888 | |
| 2889 | SDValue TCChain = Chain; |
| 2890 | SDNode *Copy = *N->use_begin(); |
| 2891 | if (Copy->getOpcode() == ISD::CopyToReg) { |
| 2892 | |
| 2893 | |
| 2894 | if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue) |
| 2895 | return false; |
| 2896 | TCChain = Copy->getOperand(0); |
| 2897 | } else if (Copy->getOpcode() != ISD::FP_EXTEND) |
| 2898 | return false; |
| 2899 | |
| 2900 | bool HasRet = false; |
| 2901 | for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); |
| 2902 | UI != UE; ++UI) { |
| 2903 | if (UI->getOpcode() != X86ISD::RET_FLAG) |
| 2904 | return false; |
| 2905 | |
| 2906 | |
| 2907 | if (UI->getNumOperands() > 4) |
| 2908 | return false; |
| 2909 | if (UI->getNumOperands() == 4 && |
| 2910 | UI->getOperand(UI->getNumOperands()-1).getValueType() != MVT::Glue) |
| 2911 | return false; |
| 2912 | HasRet = true; |
| 2913 | } |
| 2914 | |
| 2915 | if (!HasRet) |
| 2916 | return false; |
| 2917 | |
| 2918 | Chain = TCChain; |
| 2919 | return true; |
| 2920 | } |
| 2921 | |
| 2922 | EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT, |
| 2923 | ISD::NodeType ExtendKind) const { |
| 2924 | MVT ReturnMVT = MVT::i32; |
| 2925 | |
| 2926 | bool Darwin = Subtarget.getTargetTriple().isOSDarwin(); |
| 2927 | if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) { |
| 2928 | |
| 2929 | |
| 2930 | |
| 2931 | |
| 2932 | |
| 2933 | ReturnMVT = MVT::i8; |
| 2934 | } |
| 2935 | |
| 2936 | EVT MinVT = getRegisterType(Context, ReturnMVT); |
| 2937 | return VT.bitsLT(MinVT) ? MinVT : VT; |
| 2938 | } |
| 2939 | |
| 2940 | |
| 2941 | |
| 2942 | |
| 2943 | |
| 2944 | |
| 2945 | |
| 2946 | |
| 2947 | |
| 2948 | |
| 2949 | static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, |
| 2950 | SDValue &Root, SelectionDAG &DAG, |
| 2951 | const SDLoc &Dl, const X86Subtarget &Subtarget, |
| 2952 | SDValue *InFlag = nullptr) { |
| 2953 | assert((Subtarget.hasBWI()) && "Expected AVX512BW target!"); |
| 2954 | assert(Subtarget.is32Bit() && "Expecting 32 bit target"); |
| 2955 | assert(VA.getValVT() == MVT::v64i1 && |
| 2956 | "Expecting first location of 64 bit width type"); |
| 2957 | assert(NextVA.getValVT() == VA.getValVT() && |
| 2958 | "The locations should have the same type"); |
| 2959 | assert(VA.isRegLoc() && NextVA.isRegLoc() && |
| 2960 | "The values should reside in two registers"); |
| 2961 | |
| 2962 | SDValue Lo, Hi; |
| 2963 | SDValue ArgValueLo, ArgValueHi; |
| 2964 | |
| 2965 | MachineFunction &MF = DAG.getMachineFunction(); |
| 2966 | const TargetRegisterClass *RC = &X86::GR32RegClass; |
| 2967 | |
| 2968 | |
| 2969 | if (nullptr == InFlag) { |
| 2970 | |
| 2971 | |
| 2972 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
| 2973 | ArgValueLo = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
| 2974 | Reg = MF.addLiveIn(NextVA.getLocReg(), RC); |
| 2975 | ArgValueHi = DAG.getCopyFromReg(Root, Dl, Reg, MVT::i32); |
| 2976 | } else { |
| 2977 | |
| 2978 | |
| 2979 | ArgValueLo = |
| 2980 | DAG.getCopyFromReg(Root, Dl, VA.getLocReg(), MVT::i32, *InFlag); |
| 2981 | *InFlag = ArgValueLo.getValue(2); |
| 2982 | ArgValueHi = |
| 2983 | DAG.getCopyFromReg(Root, Dl, NextVA.getLocReg(), MVT::i32, *InFlag); |
| 2984 | *InFlag = ArgValueHi.getValue(2); |
| 2985 | } |
| 2986 | |
| 2987 | |
| 2988 | Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo); |
| 2989 | |
| 2990 | |
| 2991 | Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi); |
| 2992 | |
| 2993 | |
| 2994 | return DAG.getNode(ISD::CONCAT_VECTORS, Dl, MVT::v64i1, Lo, Hi); |
| 2995 | } |
| 2996 | |
| 2997 | |
| 2998 | |
| 2999 | |
| 3000 | static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, |
| 3001 | const EVT &ValLoc, const SDLoc &Dl, |
| 3002 | SelectionDAG &DAG) { |
| 3003 | SDValue ValReturned = ValArg; |
| 3004 | |
| 3005 | if (ValVT == MVT::v1i1) |
| 3006 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, Dl, MVT::v1i1, ValReturned); |
| 3007 | |
| 3008 | if (ValVT == MVT::v64i1) { |
| 3009 | |
| 3010 | assert(ValLoc == MVT::i64 && "Expecting only i64 locations"); |
| 3011 | |
| 3012 | } else { |
| 3013 | MVT maskLen; |
| 3014 | switch (ValVT.getSimpleVT().SimpleTy) { |
| 3015 | case MVT::v8i1: |
| 3016 | maskLen = MVT::i8; |
| 3017 | break; |
| 3018 | case MVT::v16i1: |
| 3019 | maskLen = MVT::i16; |
| 3020 | break; |
| 3021 | case MVT::v32i1: |
| 3022 | maskLen = MVT::i32; |
| 3023 | break; |
| 3024 | default: |
| 3025 | llvm_unreachable("Expecting a vector of i1 types"); |
| 3026 | } |
| 3027 | |
| 3028 | ValReturned = DAG.getNode(ISD::TRUNCATE, Dl, maskLen, ValReturned); |
| 3029 | } |
| 3030 | return DAG.getBitcast(ValVT, ValReturned); |
| 3031 | } |
| 3032 | |
| 3033 | |
| 3034 | |
| 3035 | |
| 3036 | SDValue X86TargetLowering::LowerCallResult( |
| 3037 | SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, |
| 3038 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| 3039 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals, |
| 3040 | uint32_t *RegMask) const { |
| 3041 | |
| 3042 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 3043 | |
| 3044 | SmallVector<CCValAssign, 16> RVLocs; |
| 3045 | CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs, |
| 3046 | *DAG.getContext()); |
| 3047 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
| 3048 | |
| 3049 | |
| 3050 | for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E; |
| 3051 | ++I, ++InsIndex) { |
| 3052 | CCValAssign &VA = RVLocs[I]; |
| 3053 | EVT CopyVT = VA.getLocVT(); |
| 3054 | |
| 3055 | |
| 3056 | |
| 3057 | if (RegMask) { |
| 3058 | for (MCSubRegIterator SubRegs(VA.getLocReg(), TRI, true); |
| 3059 | SubRegs.isValid(); ++SubRegs) |
| 3060 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
| 3061 | } |
| 3062 | |
| 3063 | |
| 3064 | |
| 3065 | if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) { |
| 3066 | errorUnsupported(DAG, dl, "SSE register return with SSE disabled"); |
| 3067 | if (VA.getLocReg() == X86::XMM1) |
| 3068 | VA.convertToReg(X86::FP1); |
| 3069 | else |
| 3070 | VA.convertToReg(X86::FP0); |
| 3071 | } else if (!Subtarget.hasSSE2() && |
| 3072 | X86::FR64XRegClass.contains(VA.getLocReg()) && |
| 3073 | CopyVT == MVT::f64) { |
| 3074 | errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled"); |
| 3075 | if (VA.getLocReg() == X86::XMM1) |
| 3076 | VA.convertToReg(X86::FP1); |
| 3077 | else |
| 3078 | VA.convertToReg(X86::FP0); |
| 3079 | } |
| 3080 | |
| 3081 | |
| 3082 | |
| 3083 | bool RoundAfterCopy = false; |
| 3084 | if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) && |
| 3085 | isScalarFPTypeInSSEReg(VA.getValVT())) { |
| 3086 | if (!Subtarget.hasX87()) |
| 3087 | report_fatal_error("X87 register return with X87 disabled"); |
| 3088 | CopyVT = MVT::f80; |
| 3089 | RoundAfterCopy = (CopyVT != VA.getLocVT()); |
| 3090 | } |
| 3091 | |
| 3092 | SDValue Val; |
| 3093 | if (VA.needsCustom()) { |
| 3094 | assert(VA.getValVT() == MVT::v64i1 && |
| 3095 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
| 3096 | Val = |
| 3097 | getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InFlag); |
| 3098 | } else { |
| 3099 | Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InFlag) |
| 3100 | .getValue(1); |
| 3101 | Val = Chain.getValue(0); |
| 3102 | InFlag = Chain.getValue(2); |
| 3103 | } |
| 3104 | |
| 3105 | if (RoundAfterCopy) |
| 3106 | Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val, |
| 3107 | |
| 3108 | DAG.getIntPtrConstant(1, dl)); |
| 3109 | |
| 3110 | if (VA.isExtInLoc()) { |
| 3111 | if (VA.getValVT().isVector() && |
| 3112 | VA.getValVT().getScalarType() == MVT::i1 && |
| 3113 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
| 3114 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
| 3115 | |
| 3116 | Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG); |
| 3117 | } else |
| 3118 | Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val); |
| 3119 | } |
| 3120 | |
| 3121 | if (VA.getLocInfo() == CCValAssign::BCvt) |
| 3122 | Val = DAG.getBitcast(VA.getValVT(), Val); |
| 3123 | |
| 3124 | InVals.push_back(Val); |
| 3125 | } |
| 3126 | |
| 3127 | return Chain; |
| 3128 | } |
| 3129 | |
| 3130 | |
| 3131 | |
| 3132 | |
| 3133 | |
| 3134 | |
| 3135 | |
| 3136 | |
| 3137 | |
| 3138 | |
| 3139 | |
| 3140 | |
| 3141 | |
| 3142 | enum StructReturnType { |
| 3143 | NotStructReturn, |
| 3144 | RegStructReturn, |
| 3145 | StackStructReturn |
| 3146 | }; |
| 3147 | static StructReturnType |
| 3148 | callIsStructReturn(ArrayRef<ISD::OutputArg> Outs, bool IsMCU) { |
| 3149 | if (Outs.empty()) |
| 3150 | return NotStructReturn; |
| 3151 | |
| 3152 | const ISD::ArgFlagsTy &Flags = Outs[0].Flags; |
| 3153 | if (!Flags.isSRet()) |
| 3154 | return NotStructReturn; |
| 3155 | if (Flags.isInReg() || IsMCU) |
| 3156 | return RegStructReturn; |
| 3157 | return StackStructReturn; |
| 3158 | } |
| 3159 | |
| 3160 | |
| 3161 | static StructReturnType |
| 3162 | argsAreStructReturn(ArrayRef<ISD::InputArg> Ins, bool IsMCU) { |
| 3163 | if (Ins.empty()) |
| 3164 | return NotStructReturn; |
| 3165 | |
| 3166 | const ISD::ArgFlagsTy &Flags = Ins[0].Flags; |
| 3167 | if (!Flags.isSRet()) |
| 3168 | return NotStructReturn; |
| 3169 | if (Flags.isInReg() || IsMCU) |
| 3170 | return RegStructReturn; |
| 3171 | return StackStructReturn; |
| 3172 | } |
| 3173 | |
| 3174 | |
| 3175 | |
| 3176 | |
| 3177 | static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, |
| 3178 | SDValue Chain, ISD::ArgFlagsTy Flags, |
| 3179 | SelectionDAG &DAG, const SDLoc &dl) { |
| 3180 | SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl); |
| 3181 | |
| 3182 | return DAG.getMemcpy( |
| 3183 | Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(), |
| 3184 | false, true, |
| 3185 | false, MachinePointerInfo(), MachinePointerInfo()); |
| 3186 | } |
| 3187 | |
| 3188 | |
| 3189 | static bool canGuaranteeTCO(CallingConv::ID CC) { |
| 3190 | return (CC == CallingConv::Fast || CC == CallingConv::GHC || |
| 3191 | CC == CallingConv::X86_RegCall || CC == CallingConv::HiPE || |
| 3192 | CC == CallingConv::HHVM || CC == CallingConv::Tail || |
| 3193 | CC == CallingConv::SwiftTail); |
| 3194 | } |
| 3195 | |
| 3196 | |
| 3197 | static bool mayTailCallThisCC(CallingConv::ID CC) { |
| 3198 | switch (CC) { |
| 3199 | |
| 3200 | case CallingConv::C: |
| 3201 | case CallingConv::Win64: |
| 3202 | case CallingConv::X86_64_SysV: |
| 3203 | |
| 3204 | case CallingConv::X86_ThisCall: |
| 3205 | case CallingConv::X86_StdCall: |
| 3206 | case CallingConv::X86_VectorCall: |
| 3207 | case CallingConv::X86_FastCall: |
| 3208 | |
| 3209 | case CallingConv::Swift: |
| 3210 | return true; |
| 3211 | default: |
| 3212 | return canGuaranteeTCO(CC); |
| 3213 | } |
| 3214 | } |
| 3215 | |
| 3216 | |
| 3217 | |
| 3218 | static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) { |
| 3219 | return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) || |
| 3220 | CC == CallingConv::Tail || CC == CallingConv::SwiftTail; |
| 3221 | } |
| 3222 | |
| 3223 | bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const { |
| 3224 | if (!CI->isTailCall()) |
| 3225 | return false; |
| 3226 | |
| 3227 | CallingConv::ID CalleeCC = CI->getCallingConv(); |
| 3228 | if (!mayTailCallThisCC(CalleeCC)) |
| 3229 | return false; |
| 3230 | |
| 3231 | return true; |
| 3232 | } |
| 3233 | |
| 3234 | SDValue |
| 3235 | X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv, |
| 3236 | const SmallVectorImpl<ISD::InputArg> &Ins, |
| 3237 | const SDLoc &dl, SelectionDAG &DAG, |
| 3238 | const CCValAssign &VA, |
| 3239 | MachineFrameInfo &MFI, unsigned i) const { |
| 3240 | |
| 3241 | ISD::ArgFlagsTy Flags = Ins[i].Flags; |
| 3242 | bool AlwaysUseMutable = shouldGuaranteeTCO( |
| 3243 | CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt); |
| 3244 | bool isImmutable = !AlwaysUseMutable && !Flags.isByVal(); |
| 3245 | EVT ValVT; |
| 3246 | MVT PtrVT = getPointerTy(DAG.getDataLayout()); |
| 3247 | |
| 3248 | |
| 3249 | |
| 3250 | |
| 3251 | bool ExtendedInMem = |
| 3252 | VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 && |
| 3253 | VA.getValVT().getSizeInBits() != VA.getLocVT().getSizeInBits(); |
| 3254 | |
| 3255 | if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem) |
| 3256 | ValVT = VA.getLocVT(); |
| 3257 | else |
| 3258 | ValVT = VA.getValVT(); |
| 3259 | |
| 3260 | |
| 3261 | |
| 3262 | |
| 3263 | |
| 3264 | if (Flags.isByVal()) { |
| 3265 | unsigned Bytes = Flags.getByValSize(); |
| 3266 | if (Bytes == 0) Bytes = 1; |
| 3267 | |
| 3268 | |
| 3269 | |
| 3270 | int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable, |
| 3271 | true); |
| 3272 | return DAG.getFrameIndex(FI, PtrVT); |
| 3273 | } |
| 3274 | |
| 3275 | EVT ArgVT = Ins[i].ArgVT; |
| 3276 | |
| 3277 | |
| 3278 | |
| 3279 | |
| 3280 | |
| 3281 | bool ScalarizedAndExtendedVector = |
| 3282 | ArgVT.isVector() && !VA.getLocVT().isVector() && |
| 3283 | VA.getLocVT().getSizeInBits() != ArgVT.getScalarSizeInBits(); |
| 3284 | |
| 3285 | |
| 3286 | |
| 3287 | |
| 3288 | |
| 3289 | if (Flags.isCopyElisionCandidate() && |
| 3290 | VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem && |
| 3291 | !ScalarizedAndExtendedVector) { |
| 3292 | SDValue PartAddr; |
| 3293 | if (Ins[i].PartOffset == 0) { |
| 3294 | |
| 3295 | |
| 3296 | |
| 3297 | |
| 3298 | int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(), |
| 3299 | false); |
| 3300 | PartAddr = DAG.getFrameIndex(FI, PtrVT); |
| 3301 | return DAG.getLoad( |
| 3302 | ValVT, dl, Chain, PartAddr, |
| 3303 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
| 3304 | } else { |
| 3305 | |
| 3306 | |
| 3307 | |
| 3308 | |
| 3309 | int64_t PartBegin = VA.getLocMemOffset(); |
| 3310 | int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8; |
| 3311 | int FI = MFI.getObjectIndexBegin(); |
| 3312 | for (; MFI.isFixedObjectIndex(FI); ++FI) { |
| 3313 | int64_t ObjBegin = MFI.getObjectOffset(FI); |
| 3314 | int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI); |
| 3315 | if (ObjBegin <= PartBegin && PartEnd <= ObjEnd) |
| 3316 | break; |
| 3317 | } |
| 3318 | if (MFI.isFixedObjectIndex(FI)) { |
| 3319 | SDValue Addr = |
| 3320 | DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT), |
| 3321 | DAG.getIntPtrConstant(Ins[i].PartOffset, dl)); |
| 3322 | return DAG.getLoad( |
| 3323 | ValVT, dl, Chain, Addr, |
| 3324 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI, |
| 3325 | Ins[i].PartOffset)); |
| 3326 | } |
| 3327 | } |
| 3328 | } |
| 3329 | |
| 3330 | int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8, |
| 3331 | VA.getLocMemOffset(), isImmutable); |
| 3332 | |
| 3333 | |
| 3334 | if (VA.getLocInfo() == CCValAssign::ZExt) { |
| 3335 | MFI.setObjectZExt(FI, true); |
| 3336 | } else if (VA.getLocInfo() == CCValAssign::SExt) { |
| 3337 | MFI.setObjectSExt(FI, true); |
| 3338 | } |
| 3339 | |
| 3340 | SDValue FIN = DAG.getFrameIndex(FI, PtrVT); |
| 3341 | SDValue Val = DAG.getLoad( |
| 3342 | ValVT, dl, Chain, FIN, |
| 3343 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
| 3344 | return ExtendedInMem |
| 3345 | ? (VA.getValVT().isVector() |
| 3346 | ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val) |
| 3347 | : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val)) |
| 3348 | : Val; |
| 3349 | } |
| 3350 | |
| 3351 | |
| 3352 | static ArrayRef<MCPhysReg> get64BitArgumentGPRs(CallingConv::ID CallConv, |
| 3353 | const X86Subtarget &Subtarget) { |
| 3354 | assert(Subtarget.is64Bit()); |
| 3355 | |
| 3356 | if (Subtarget.isCallingConvWin64(CallConv)) { |
| 3357 | static const MCPhysReg GPR64ArgRegsWin64[] = { |
| 3358 | X86::RCX, X86::RDX, X86::R8, X86::R9 |
| 3359 | }; |
| 3360 | return makeArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64)); |
| 3361 | } |
| 3362 | |
| 3363 | static const MCPhysReg GPR64ArgRegs64Bit[] = { |
| 3364 | X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9 |
| 3365 | }; |
| 3366 | return makeArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit)); |
| 3367 | } |
| 3368 | |
| 3369 | |
| 3370 | static ArrayRef<MCPhysReg> get64BitArgumentXMMs(MachineFunction &MF, |
| 3371 | CallingConv::ID CallConv, |
| 3372 | const X86Subtarget &Subtarget) { |
| 3373 | assert(Subtarget.is64Bit()); |
| 3374 | if (Subtarget.isCallingConvWin64(CallConv)) { |
| 3375 | |
| 3376 | |
| 3377 | |
| 3378 | |
| 3379 | return None; |
| 3380 | } |
| 3381 | |
| 3382 | bool isSoftFloat = Subtarget.useSoftFloat(); |
| 3383 | if (isSoftFloat || !Subtarget.hasSSE1()) |
| 3384 | |
| 3385 | |
| 3386 | return None; |
| 3387 | |
| 3388 | static const MCPhysReg XMMArgRegs64Bit[] = { |
| 3389 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
| 3390 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
| 3391 | }; |
| 3392 | return makeArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit)); |
| 3393 | } |
| 3394 | |
| 3395 | #ifndef NDEBUG |
| 3396 | static bool isSortedByValueNo(ArrayRef<CCValAssign> ArgLocs) { |
| 3397 | return llvm::is_sorted( |
| 3398 | ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool { |
| 3399 | return A.getValNo() < B.getValNo(); |
| 3400 | }); |
| 3401 | } |
| 3402 | #endif |
| 3403 | |
| 3404 | namespace { |
| 3405 | |
| 3406 | class VarArgsLoweringHelper { |
| 3407 | public: |
| 3408 | VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc, |
| 3409 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
| 3410 | CallingConv::ID CallConv, CCState &CCInfo) |
| 3411 | : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget), |
| 3412 | TheMachineFunction(DAG.getMachineFunction()), |
| 3413 | TheFunction(TheMachineFunction.getFunction()), |
| 3414 | FrameInfo(TheMachineFunction.getFrameInfo()), |
| 3415 | FrameLowering(*Subtarget.getFrameLowering()), |
| 3416 | TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv), |
| 3417 | CCInfo(CCInfo) {} |
| 3418 | |
| 3419 | |
| 3420 | void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize); |
| 3421 | |
| 3422 | private: |
| 3423 | void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize); |
| 3424 | |
| 3425 | void forwardMustTailParameters(SDValue &Chain); |
| 3426 | |
| 3427 | bool is64Bit() const { return Subtarget.is64Bit(); } |
| 3428 | bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); } |
| 3429 | |
| 3430 | X86MachineFunctionInfo *FuncInfo; |
| 3431 | const SDLoc &DL; |
| 3432 | SelectionDAG &DAG; |
| 3433 | const X86Subtarget &Subtarget; |
| 3434 | MachineFunction &TheMachineFunction; |
| 3435 | const Function &TheFunction; |
| 3436 | MachineFrameInfo &FrameInfo; |
| 3437 | const TargetFrameLowering &FrameLowering; |
| 3438 | const TargetLowering &TargLowering; |
| 3439 | CallingConv::ID CallConv; |
| 3440 | CCState &CCInfo; |
| 3441 | }; |
| 3442 | } |
| 3443 | |
| 3444 | void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters( |
| 3445 | SDValue &Chain, unsigned StackSize) { |
| 3446 | |
| 3447 | |
| 3448 | |
| 3449 | if (is64Bit() || (CallConv != CallingConv::X86_FastCall && |
| 3450 | CallConv != CallingConv::X86_ThisCall)) { |
| 3451 | FuncInfo->setVarArgsFrameIndex( |
| 3452 | FrameInfo.CreateFixedObject(1, StackSize, true)); |
| 3453 | } |
| 3454 | |
| 3455 | |
| 3456 | |
| 3457 | if (is64Bit()) { |
| 3458 | |
| 3459 | ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget); |
| 3460 | ArrayRef<MCPhysReg> ArgXMMs = |
| 3461 | get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget); |
| 3462 | unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs); |
| 3463 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs); |
| 3464 | |
| 3465 | assert(!(NumXMMRegs && !Subtarget.hasSSE1()) && |
| 3466 | "SSE register cannot be used when SSE is disabled!"); |
| 3467 | |
| 3468 | if (isWin64()) { |
| 3469 | |
| 3470 | |
| 3471 | int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8; |
| 3472 | FuncInfo->setRegSaveFrameIndex( |
| 3473 | FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false)); |
| 3474 | |
| 3475 | if (NumIntRegs < 4) |
| 3476 | FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex()); |
| 3477 | } else { |
| 3478 | |
| 3479 | |
| 3480 | |
| 3481 | FuncInfo->setVarArgsGPOffset(NumIntRegs * 8); |
| 3482 | FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16); |
| 3483 | FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject( |
| 3484 | ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false)); |
| 3485 | } |
| 3486 | |
| 3487 | SmallVector<SDValue, 6> |
| 3488 | LiveGPRs; |
| 3489 | SmallVector<SDValue, 8> LiveXMMRegs; |
| 3490 | |
| 3491 | SDValue ALVal; |
| 3492 | |
| 3493 | |
| 3494 | for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) { |
| 3495 | Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass); |
| 3496 | LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64)); |
| 3497 | } |
| 3498 | const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs); |
| 3499 | if (!AvailableXmms.empty()) { |
| 3500 | Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); |
| 3501 | ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8); |
| 3502 | for (MCPhysReg Reg : AvailableXmms) { |
| 3503 | |
| 3504 | |
| 3505 | |
| 3506 | |
| 3507 | TheMachineFunction.getRegInfo().addLiveIn(Reg); |
| 3508 | LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32)); |
| 3509 | } |
| 3510 | } |
| 3511 | |
| 3512 | |
| 3513 | SmallVector<SDValue, 8> MemOps; |
| 3514 | SDValue RSFIN = |
| 3515 | DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), |
| 3516 | TargLowering.getPointerTy(DAG.getDataLayout())); |
| 3517 | unsigned Offset = FuncInfo->getVarArgsGPOffset(); |
| 3518 | for (SDValue Val : LiveGPRs) { |
| 3519 | SDValue FIN = DAG.getNode(ISD::ADD, DL, |
| 3520 | TargLowering.getPointerTy(DAG.getDataLayout()), |
| 3521 | RSFIN, DAG.getIntPtrConstant(Offset, DL)); |
| 3522 | SDValue Store = |
| 3523 | DAG.getStore(Val.getValue(1), DL, Val, FIN, |
| 3524 | MachinePointerInfo::getFixedStack( |
| 3525 | DAG.getMachineFunction(), |
| 3526 | FuncInfo->getRegSaveFrameIndex(), Offset)); |
| 3527 | MemOps.push_back(Store); |
| 3528 | Offset += 8; |
| 3529 | } |
| 3530 | |
| 3531 | |
| 3532 | if (!LiveXMMRegs.empty()) { |
| 3533 | SmallVector<SDValue, 12> SaveXMMOps; |
| 3534 | SaveXMMOps.push_back(Chain); |
| 3535 | SaveXMMOps.push_back(ALVal); |
| 3536 | SaveXMMOps.push_back( |
| 3537 | DAG.getTargetConstant(FuncInfo->getRegSaveFrameIndex(), DL, MVT::i32)); |
| 3538 | SaveXMMOps.push_back( |
| 3539 | DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32)); |
| 3540 | llvm::append_range(SaveXMMOps, LiveXMMRegs); |
| 3541 | MemOps.push_back(DAG.getNode(X86ISD::VASTART_SAVE_XMM_REGS, DL, |
| 3542 | MVT::Other, SaveXMMOps)); |
| 3543 | } |
| 3544 | |
| 3545 | if (!MemOps.empty()) |
| 3546 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
| 3547 | } |
| 3548 | } |
| 3549 | |
| 3550 | void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) { |
| 3551 | |
| 3552 | MVT VecVT = MVT::Other; |
| 3553 | |
| 3554 | if (Subtarget.useAVX512Regs() && |
| 3555 | (is64Bit() || (CallConv == CallingConv::X86_VectorCall || |
| 3556 | CallConv == CallingConv::Intel_OCL_BI))) |
| 3557 | VecVT = MVT::v16f32; |
| 3558 | else if (Subtarget.hasAVX()) |
| 3559 | VecVT = MVT::v8f32; |
| 3560 | else if (Subtarget.hasSSE2()) |
| 3561 | VecVT = MVT::v4f32; |
| 3562 | |
| 3563 | |
| 3564 | SmallVector<MVT, 2> RegParmTypes; |
| 3565 | MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32; |
| 3566 | RegParmTypes.push_back(IntVT); |
| 3567 | if (VecVT != MVT::Other) |
| 3568 | RegParmTypes.push_back(VecVT); |
| 3569 | |
| 3570 | |
| 3571 | SmallVectorImpl<ForwardedRegister> &Forwards = |
| 3572 | FuncInfo->getForwardedMustTailRegParms(); |
| 3573 | CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86); |
| 3574 | |
| 3575 | |
| 3576 | if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) { |
| 3577 | Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass); |
| 3578 | Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8)); |
| 3579 | } |
| 3580 | |
| 3581 | |
| 3582 | for (ForwardedRegister &FR : Forwards) { |
| 3583 | |
| 3584 | SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT); |
| 3585 | FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister( |
| 3586 | TargLowering.getRegClassFor(FR.VT)); |
| 3587 | Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal); |
| 3588 | } |
| 3589 | } |
| 3590 | |
| 3591 | void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain, |
| 3592 | unsigned StackSize) { |
| 3593 | |
| 3594 | |
| 3595 | FuncInfo->setVarArgsFrameIndex(0xAAAAAAA); |
| 3596 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
| 3597 | |
| 3598 | if (FrameInfo.hasVAStart()) |
| 3599 | createVarArgAreaAndStoreRegisters(Chain, StackSize); |
| 3600 | |
| 3601 | if (FrameInfo.hasMustTailInVarArgFunc()) |
| 3602 | forwardMustTailParameters(Chain); |
| 3603 | } |
| 3604 | |
| 3605 | SDValue X86TargetLowering::LowerFormalArguments( |
| 3606 | SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, |
| 3607 | const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl, |
| 3608 | SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const { |
| 3609 | MachineFunction &MF = DAG.getMachineFunction(); |
| 3610 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
| 3611 | |
| 3612 | const Function &F = MF.getFunction(); |
| 3613 | if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() && |
| 3614 | F.getName() == "main") |
| 3615 | FuncInfo->setForceFramePointer(true); |
| 3616 | |
| 3617 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 3618 | bool Is64Bit = Subtarget.is64Bit(); |
| 3619 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
| 3620 | |
| 3621 | assert( |
| 3622 | !(IsVarArg && canGuaranteeTCO(CallConv)) && |
| 3623 | "Var args not supported with calling conv' regcall, fastcc, ghc or hipe"); |
| 3624 | |
| 3625 | |
| 3626 | SmallVector<CCValAssign, 16> ArgLocs; |
| 3627 | CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); |
| 3628 | |
| 3629 | |
| 3630 | if (IsWin64) |
| 3631 | CCInfo.AllocateStack(32, Align(8)); |
| 3632 | |
| 3633 | CCInfo.AnalyzeArguments(Ins, CC_X86); |
| 3634 | |
| 3635 | |
| 3636 | |
| 3637 | if (CallingConv::X86_VectorCall == CallConv) { |
| 3638 | CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86); |
| 3639 | } |
| 3640 | |
| 3641 | |
| 3642 | |
| 3643 | assert(isSortedByValueNo(ArgLocs) && |
| 3644 | "Argument Location list must be sorted before lowering"); |
| 3645 | |
| 3646 | SDValue ArgValue; |
| 3647 | for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E; |
| 3648 | ++I, ++InsIndex) { |
| 3649 | assert(InsIndex < Ins.size() && "Invalid Ins index"); |
| 3650 | CCValAssign &VA = ArgLocs[I]; |
| 3651 | |
| 3652 | if (VA.isRegLoc()) { |
| 3653 | EVT RegVT = VA.getLocVT(); |
| 3654 | if (VA.needsCustom()) { |
| 3655 | assert( |
| 3656 | VA.getValVT() == MVT::v64i1 && |
| 3657 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
| 3658 | |
| 3659 | |
| 3660 | |
| 3661 | ArgValue = |
| 3662 | getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget); |
| 3663 | } else { |
| 3664 | const TargetRegisterClass *RC; |
| 3665 | if (RegVT == MVT::i8) |
| 3666 | RC = &X86::GR8RegClass; |
| 3667 | else if (RegVT == MVT::i16) |
| 3668 | RC = &X86::GR16RegClass; |
| 3669 | else if (RegVT == MVT::i32) |
| 3670 | RC = &X86::GR32RegClass; |
| 3671 | else if (Is64Bit && RegVT == MVT::i64) |
| 3672 | RC = &X86::GR64RegClass; |
| 3673 | else if (RegVT == MVT::f32) |
| 3674 | RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass; |
| 3675 | else if (RegVT == MVT::f64) |
| 3676 | RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass; |
| 3677 | else if (RegVT == MVT::f80) |
| 3678 | RC = &X86::RFP80RegClass; |
| 3679 | else if (RegVT == MVT::f128) |
| 3680 | RC = &X86::VR128RegClass; |
| 3681 | else if (RegVT.is512BitVector()) |
| 3682 | RC = &X86::VR512RegClass; |
| 3683 | else if (RegVT.is256BitVector()) |
| 3684 | RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass; |
| 3685 | else if (RegVT.is128BitVector()) |
| 3686 | RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass; |
| 3687 | else if (RegVT == MVT::x86mmx) |
| 3688 | RC = &X86::VR64RegClass; |
| 3689 | else if (RegVT == MVT::v1i1) |
| 3690 | RC = &X86::VK1RegClass; |
| 3691 | else if (RegVT == MVT::v8i1) |
| 3692 | RC = &X86::VK8RegClass; |
| 3693 | else if (RegVT == MVT::v16i1) |
| 3694 | RC = &X86::VK16RegClass; |
| 3695 | else if (RegVT == MVT::v32i1) |
| 3696 | RC = &X86::VK32RegClass; |
| 3697 | else if (RegVT == MVT::v64i1) |
| 3698 | RC = &X86::VK64RegClass; |
| 3699 | else |
| 3700 | llvm_unreachable("Unknown argument type!"); |
| 3701 | |
| 3702 | Register Reg = MF.addLiveIn(VA.getLocReg(), RC); |
| 3703 | ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT); |
| 3704 | } |
| 3705 | |
| 3706 | |
| 3707 | |
| 3708 | |
| 3709 | if (VA.getLocInfo() == CCValAssign::SExt) |
| 3710 | ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue, |
| 3711 | DAG.getValueType(VA.getValVT())); |
| 3712 | else if (VA.getLocInfo() == CCValAssign::ZExt) |
| 3713 | ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue, |
| 3714 | DAG.getValueType(VA.getValVT())); |
| 3715 | else if (VA.getLocInfo() == CCValAssign::BCvt) |
| 3716 | ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue); |
| 3717 | |
| 3718 | if (VA.isExtInLoc()) { |
| 3719 | |
| 3720 | if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1) |
| 3721 | ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue); |
| 3722 | else if (VA.getValVT().isVector() && |
| 3723 | VA.getValVT().getScalarType() == MVT::i1 && |
| 3724 | ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) || |
| 3725 | (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) { |
| 3726 | |
| 3727 | ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG); |
| 3728 | } else |
| 3729 | ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue); |
| 3730 | } |
| 3731 | } else { |
| 3732 | assert(VA.isMemLoc()); |
| 3733 | ArgValue = |
| 3734 | LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex); |
| 3735 | } |
| 3736 | |
| 3737 | |
| 3738 | if (VA.getLocInfo() == CCValAssign::Indirect && !Ins[I].Flags.isByVal()) |
| 3739 | ArgValue = |
| 3740 | DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo()); |
| 3741 | |
| 3742 | InVals.push_back(ArgValue); |
| 3743 | } |
| 3744 | |
| 3745 | for (unsigned I = 0, E = Ins.size(); I != E; ++I) { |
| 3746 | if (Ins[I].Flags.isSwiftAsync()) { |
| 3747 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
| 3748 | if (Subtarget.is64Bit()) |
| 3749 | X86FI->setHasSwiftAsyncContext(true); |
| 3750 | else { |
| 3751 | int FI = MF.getFrameInfo().CreateStackObject(4, Align(4), false); |
| 3752 | X86FI->setSwiftAsyncContextFrameIdx(FI); |
| 3753 | SDValue St = DAG.getStore(DAG.getEntryNode(), dl, InVals[I], |
| 3754 | DAG.getFrameIndex(FI, MVT::i32), |
| 3755 | MachinePointerInfo::getFixedStack(MF, FI)); |
| 3756 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain); |
| 3757 | } |
| 3758 | } |
| 3759 | |
| 3760 | |
| 3761 | |
| 3762 | if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) |
| 3763 | continue; |
| 3764 | |
| 3765 | |
| 3766 | |
| 3767 | |
| 3768 | |
| 3769 | if (Ins[I].Flags.isSRet()) { |
| 3770 | Register Reg = FuncInfo->getSRetReturnReg(); |
| 3771 | if (!Reg) { |
| 3772 | MVT PtrTy = getPointerTy(DAG.getDataLayout()); |
| 3773 | Reg = MF.getRegInfo().createVirtualRegister(getRegClassFor(PtrTy)); |
| 3774 | FuncInfo->setSRetReturnReg(Reg); |
| 3775 | } |
| 3776 | SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]); |
| 3777 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain); |
| 3778 | break; |
| 3779 | } |
| 3780 | } |
| 3781 | |
| 3782 | unsigned StackSize = CCInfo.getNextStackOffset(); |
| 3783 | |
| 3784 | if (shouldGuaranteeTCO(CallConv, |
| 3785 | MF.getTarget().Options.GuaranteedTailCallOpt)) |
| 3786 | StackSize = GetAlignedArgumentStackSize(StackSize, DAG); |
| 3787 | |
| 3788 | if (IsVarArg) |
| 3789 | VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo) |
| 3790 | .lowerVarArgsParameters(Chain, StackSize); |
| 3791 | |
| 3792 | |
| 3793 | if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg, |
| 3794 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
| 3795 | FuncInfo->setBytesToPopOnReturn(StackSize); |
| 3796 | } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) { |
| 3797 | |
| 3798 | |
| 3799 | FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4); |
| 3800 | } else { |
| 3801 | FuncInfo->setBytesToPopOnReturn(0); |
| 3802 | |
| 3803 | if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
| 3804 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
| 3805 | argsAreStructReturn(Ins, Subtarget.isTargetMCU()) == StackStructReturn) |
| 3806 | FuncInfo->setBytesToPopOnReturn(4); |
| 3807 | } |
| 3808 | |
| 3809 | if (!Is64Bit) { |
| 3810 | |
| 3811 | FuncInfo->setRegSaveFrameIndex(0xAAAAAAA); |
| 3812 | } |
| 3813 | |
| 3814 | FuncInfo->setArgumentStackSize(StackSize); |
| 3815 | |
| 3816 | if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) { |
| 3817 | EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn()); |
| 3818 | if (Personality == EHPersonality::CoreCLR) { |
| 3819 | assert(Is64Bit); |
| 3820 | |
| 3821 | |
| 3822 | |
| 3823 | |
| 3824 | |
| 3825 | |
| 3826 | |
| 3827 | |
| 3828 | int PSPSymFI = MFI.CreateStackObject(8, Align(8), false); |
| 3829 | EHInfo->PSPSymFrameIdx = PSPSymFI; |
| 3830 | } |
| 3831 | } |
| 3832 | |
| 3833 | if (CallConv == CallingConv::X86_RegCall || |
| 3834 | F.hasFnAttribute("no_caller_saved_registers")) { |
| 3835 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 3836 | for (std::pair<Register, Register> Pair : MRI.liveins()) |
| 3837 | MRI.disableCalleeSavedRegister(Pair.first); |
| 3838 | } |
| 3839 | |
| 3840 | return Chain; |
| 3841 | } |
| 3842 | |
| 3843 | SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, |
| 3844 | SDValue Arg, const SDLoc &dl, |
| 3845 | SelectionDAG &DAG, |
| 3846 | const CCValAssign &VA, |
| 3847 | ISD::ArgFlagsTy Flags, |
| 3848 | bool isByVal) const { |
| 3849 | unsigned LocMemOffset = VA.getLocMemOffset(); |
| 3850 | SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl); |
| 3851 | PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
| 3852 | StackPtr, PtrOff); |
| 3853 | if (isByVal) |
| 3854 | return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl); |
| 3855 | |
| 3856 | return DAG.getStore( |
| 3857 | Chain, dl, Arg, PtrOff, |
| 3858 | MachinePointerInfo::getStack(DAG.getMachineFunction(), LocMemOffset)); |
| 3859 | } |
| 3860 | |
| 3861 | |
| 3862 | |
| 3863 | SDValue X86TargetLowering::EmitTailCallLoadRetAddr( |
| 3864 | SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall, |
| 3865 | bool Is64Bit, int FPDiff, const SDLoc &dl) const { |
| 3866 | |
| 3867 | EVT VT = getPointerTy(DAG.getDataLayout()); |
| 3868 | OutRetAddr = getReturnAddressFrameIndex(DAG); |
| 3869 | |
| 3870 | |
| 3871 | OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo()); |
| 3872 | return SDValue(OutRetAddr.getNode(), 1); |
| 3873 | } |
| 3874 | |
| 3875 | |
| 3876 | |
| 3877 | static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, |
| 3878 | SDValue Chain, SDValue RetAddrFrIdx, |
| 3879 | EVT PtrVT, unsigned SlotSize, |
| 3880 | int FPDiff, const SDLoc &dl) { |
| 3881 | |
| 3882 | if (!FPDiff) return Chain; |
| 3883 | |
| 3884 | int NewReturnAddrFI = |
| 3885 | MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize, |
| 3886 | false); |
| 3887 | SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT); |
| 3888 | Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx, |
| 3889 | MachinePointerInfo::getFixedStack( |
| 3890 | DAG.getMachineFunction(), NewReturnAddrFI)); |
| 3891 | return Chain; |
| 3892 | } |
| 3893 | |
| 3894 | |
| 3895 | |
| 3896 | static SDValue getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT, SDValue V1, |
| 3897 | SDValue V2) { |
| 3898 | unsigned NumElems = VT.getVectorNumElements(); |
| 3899 | SmallVector<int, 8> Mask; |
| 3900 | Mask.push_back(NumElems); |
| 3901 | for (unsigned i = 1; i != NumElems; ++i) |
| 3902 | Mask.push_back(i); |
| 3903 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
| 3904 | } |
| 3905 | |
| 3906 | SDValue |
| 3907 | X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, |
| 3908 | SmallVectorImpl<SDValue> &InVals) const { |
| 3909 | SelectionDAG &DAG = CLI.DAG; |
| 3910 | SDLoc &dl = CLI.DL; |
| 3911 | SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs; |
| 3912 | SmallVectorImpl<SDValue> &OutVals = CLI.OutVals; |
| 3913 | SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins; |
| 3914 | SDValue Chain = CLI.Chain; |
| 3915 | SDValue Callee = CLI.Callee; |
| 3916 | CallingConv::ID CallConv = CLI.CallConv; |
| 3917 | bool &isTailCall = CLI.IsTailCall; |
| 3918 | bool isVarArg = CLI.IsVarArg; |
| 3919 | const auto *CB = CLI.CB; |
| 3920 | |
| 3921 | MachineFunction &MF = DAG.getMachineFunction(); |
| 3922 | bool Is64Bit = Subtarget.is64Bit(); |
| 3923 | bool IsWin64 = Subtarget.isCallingConvWin64(CallConv); |
| 3924 | StructReturnType SR = callIsStructReturn(Outs, Subtarget.isTargetMCU()); |
| 3925 | bool IsSibcall = false; |
| 3926 | bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt || |
| 3927 | CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail; |
| 3928 | X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>(); |
| 3929 | bool HasNCSR = (CB && isa<CallInst>(CB) && |
| 3930 | CB->hasFnAttr("no_caller_saved_registers")); |
| 3931 | bool HasNoCfCheck = (CB && CB->doesNoCfCheck()); |
| 3932 | bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall()); |
| 3933 | const Module *M = MF.getMMI().getModule(); |
| 3934 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
| 3935 | |
| 3936 | MachineFunction::CallSiteInfo CSInfo; |
| 3937 | if (CallConv == CallingConv::X86_INTR) |
| 3938 | report_fatal_error("X86 interrupts may not be called directly"); |
| 3939 | |
| 3940 | bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall(); |
| 3941 | if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) { |
| 3942 | |
| 3943 | |
| 3944 | |
| 3945 | |
| 3946 | |
| 3947 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
| 3948 | if (!G || (!G->getGlobal()->hasLocalLinkage() && |
| 3949 | G->getGlobal()->hasDefaultVisibility())) |
| 3950 | isTailCall = false; |
| 3951 | } |
| 3952 | |
| 3953 | |
| 3954 | if (isTailCall && !IsMustTail) { |
| 3955 | |
| 3956 | isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, |
| 3957 | isVarArg, SR != NotStructReturn, |
| 3958 | MF.getFunction().hasStructRetAttr(), CLI.RetTy, |
| 3959 | Outs, OutVals, Ins, DAG); |
| 3960 | |
| 3961 | |
| 3962 | |
| 3963 | if (!IsGuaranteeTCO && isTailCall) |
| 3964 | IsSibcall = true; |
| 3965 | |
| 3966 | if (isTailCall) |
| 3967 | ++NumTailCalls; |
| 3968 | } |
| 3969 | |
| 3970 | if (IsMustTail && !isTailCall) |
| 3971 | report_fatal_error("failed to perform tail call elimination on a call " |
| 3972 | "site marked musttail"); |
| 3973 | |
| 3974 | assert(!(isVarArg && canGuaranteeTCO(CallConv)) && |
| 3975 | "Var args not supported with calling convention fastcc, ghc or hipe"); |
| 3976 | |
| 3977 | |
| 3978 | SmallVector<CCValAssign, 16> ArgLocs; |
| 3979 | CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext()); |
| 3980 | |
| 3981 | |
| 3982 | if (IsWin64) |
| 3983 | CCInfo.AllocateStack(32, Align(8)); |
| 3984 | |
| 3985 | CCInfo.AnalyzeArguments(Outs, CC_X86); |
| 3986 | |
| 3987 | |
| 3988 | |
| 3989 | if (CallingConv::X86_VectorCall == CallConv) { |
| 3990 | CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86); |
| 3991 | } |
| 3992 | |
| 3993 | |
| 3994 | unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); |
| 3995 | if (IsSibcall) |
| 3996 | |
| 3997 | |
| 3998 | NumBytes = 0; |
| 3999 | else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv)) |
| 4000 | NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG); |
| 4001 | |
| 4002 | int FPDiff = 0; |
| 4003 | if (isTailCall && |
| 4004 | shouldGuaranteeTCO(CallConv, |
| 4005 | MF.getTarget().Options.GuaranteedTailCallOpt)) { |
| 4006 | |
| 4007 | unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn(); |
| 4008 | |
| 4009 | FPDiff = NumBytesCallerPushed - NumBytes; |
| 4010 | |
| 4011 | |
| 4012 | |
| 4013 | if (FPDiff < X86Info->getTCReturnAddrDelta()) |
| 4014 | X86Info->setTCReturnAddrDelta(FPDiff); |
| 4015 | } |
| 4016 | |
| 4017 | unsigned NumBytesToPush = NumBytes; |
| 4018 | unsigned NumBytesToPop = NumBytes; |
| 4019 | |
| 4020 | |
| 4021 | |
| 4022 | |
| 4023 | if (!Outs.empty() && Outs.back().Flags.isInAlloca()) { |
| 4024 | NumBytesToPush = 0; |
| 4025 | if (!ArgLocs.back().isMemLoc()) |
| 4026 | report_fatal_error("cannot use inalloca attribute on a register " |
| 4027 | "parameter"); |
| 4028 | if (ArgLocs.back().getLocMemOffset() != 0) |
| 4029 | report_fatal_error("any parameter with the inalloca attribute must be " |
| 4030 | "the only memory argument"); |
| 4031 | } else if (CLI.IsPreallocated) { |
| 4032 | assert(ArgLocs.back().isMemLoc() && |
| 4033 | "cannot use preallocated attribute on a register " |
| 4034 | "parameter"); |
| 4035 | SmallVector<size_t, 4> PreallocatedOffsets; |
| 4036 | for (size_t i = 0; i < CLI.OutVals.size(); ++i) { |
| 4037 | if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) { |
| 4038 | PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset()); |
| 4039 | } |
| 4040 | } |
| 4041 | auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>(); |
| 4042 | size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB); |
| 4043 | MFI->setPreallocatedStackSize(PreallocatedId, NumBytes); |
| 4044 | MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets); |
| 4045 | NumBytesToPush = 0; |
| 4046 | } |
| 4047 | |
| 4048 | if (!IsSibcall && !IsMustTail) |
| 4049 | Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush, |
| 4050 | NumBytes - NumBytesToPush, dl); |
| 4051 | |
| 4052 | SDValue RetAddrFrIdx; |
| 4053 | |
| 4054 | if (isTailCall && FPDiff) |
| 4055 | Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall, |
| 4056 | Is64Bit, FPDiff, dl); |
| 4057 | |
| 4058 | SmallVector<std::pair<Register, SDValue>, 8> RegsToPass; |
| 4059 | SmallVector<SDValue, 8> MemOpChains; |
| 4060 | SDValue StackPtr; |
| 4061 | |
| 4062 | |
| 4063 | |
| 4064 | assert(isSortedByValueNo(ArgLocs) && |
| 4065 | "Argument Location list must be sorted before lowering"); |
| 4066 | |
| 4067 | |
| 4068 | |
| 4069 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 4070 | for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E; |
| 4071 | ++I, ++OutIndex) { |
| 4072 | assert(OutIndex < Outs.size() && "Invalid Out index"); |
| 4073 | |
| 4074 | ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags; |
| 4075 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
| 4076 | continue; |
| 4077 | |
| 4078 | CCValAssign &VA = ArgLocs[I]; |
| 4079 | EVT RegVT = VA.getLocVT(); |
| 4080 | SDValue Arg = OutVals[OutIndex]; |
| 4081 | bool isByVal = Flags.isByVal(); |
| 4082 | |
| 4083 | |
| 4084 | switch (VA.getLocInfo()) { |
| 4085 | default: llvm_unreachable("Unknown loc info!"); |
| 4086 | case CCValAssign::Full: break; |
| 4087 | case CCValAssign::SExt: |
| 4088 | Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg); |
| 4089 | break; |
| 4090 | case CCValAssign::ZExt: |
| 4091 | Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg); |
| 4092 | break; |
| 4093 | case CCValAssign::AExt: |
| 4094 | if (Arg.getValueType().isVector() && |
| 4095 | Arg.getValueType().getVectorElementType() == MVT::i1) |
| 4096 | Arg = lowerMasksToReg(Arg, RegVT, dl, DAG); |
| 4097 | else if (RegVT.is128BitVector()) { |
| 4098 | |
| 4099 | Arg = DAG.getBitcast(MVT::i64, Arg); |
| 4100 | Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg); |
| 4101 | Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg); |
| 4102 | } else |
| 4103 | Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg); |
| 4104 | break; |
| 4105 | case CCValAssign::BCvt: |
| 4106 | Arg = DAG.getBitcast(RegVT, Arg); |
| 4107 | break; |
| 4108 | case CCValAssign::Indirect: { |
| 4109 | if (isByVal) { |
| 4110 | |
| 4111 | |
| 4112 | |
| 4113 | int FrameIdx = MF.getFrameInfo().CreateStackObject( |
| 4114 | Flags.getByValSize(), |
| 4115 | std::max(Align(16), Flags.getNonZeroByValAlign()), false); |
| 4116 | SDValue StackSlot = |
| 4117 | DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout())); |
| 4118 | Chain = |
| 4119 | CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl); |
| 4120 | |
| 4121 | Arg = StackSlot; |
| 4122 | isByVal = false; |
| 4123 | } else { |
| 4124 | |
| 4125 | SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT()); |
| 4126 | int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex(); |
| 4127 | Chain = DAG.getStore( |
| 4128 | Chain, dl, Arg, SpillSlot, |
| 4129 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI)); |
| 4130 | Arg = SpillSlot; |
| 4131 | } |
| 4132 | break; |
| 4133 | } |
| 4134 | } |
| 4135 | |
| 4136 | if (VA.needsCustom()) { |
| 4137 | assert(VA.getValVT() == MVT::v64i1 && |
| 4138 | "Currently the only custom case is when we split v64i1 to 2 regs"); |
| 4139 | |
| 4140 | Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget); |
| 4141 | } else if (VA.isRegLoc()) { |
| 4142 | RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); |
| 4143 | const TargetOptions &Options = DAG.getTarget().Options; |
| 4144 | if (Options.EmitCallSiteInfo) |
| 4145 | CSInfo.emplace_back(VA.getLocReg(), I); |
| 4146 | if (isVarArg && IsWin64) { |
| 4147 | |
| 4148 | |
| 4149 | Register ShadowReg; |
| 4150 | switch (VA.getLocReg()) { |
| 4151 | case X86::XMM0: ShadowReg = X86::RCX; break; |
| 4152 | case X86::XMM1: ShadowReg = X86::RDX; break; |
| 4153 | case X86::XMM2: ShadowReg = X86::R8; break; |
| 4154 | case X86::XMM3: ShadowReg = X86::R9; break; |
| 4155 | } |
| 4156 | if (ShadowReg) |
| 4157 | RegsToPass.push_back(std::make_pair(ShadowReg, Arg)); |
| 4158 | } |
| 4159 | } else if (!IsSibcall && (!isTailCall || isByVal)) { |
| 4160 | assert(VA.isMemLoc()); |
| 4161 | if (!StackPtr.getNode()) |
| 4162 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
| 4163 | getPointerTy(DAG.getDataLayout())); |
| 4164 | MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg, |
| 4165 | dl, DAG, VA, Flags, isByVal)); |
| 4166 | } |
| 4167 | } |
| 4168 | |
| 4169 | if (!MemOpChains.empty()) |
| 4170 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains); |
| 4171 | |
| 4172 | if (Subtarget.isPICStyleGOT()) { |
| 4173 | |
| 4174 | |
| 4175 | if (!isTailCall) { |
| 4176 | |
| 4177 | |
| 4178 | |
| 4179 | if (CallConv != CallingConv::X86_RegCall) |
| 4180 | RegsToPass.push_back(std::make_pair( |
| 4181 | Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), |
| 4182 | getPointerTy(DAG.getDataLayout())))); |
| 4183 | } else { |
| 4184 | |
| 4185 | |
| 4186 | |
| 4187 | |
| 4188 | |
| 4189 | |
| 4190 | |
| 4191 | |
| 4192 | |
| 4193 | GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee); |
| 4194 | if (G && !G->getGlobal()->hasLocalLinkage() && |
| 4195 | G->getGlobal()->hasDefaultVisibility()) |
| 4196 | Callee = LowerGlobalAddress(Callee, DAG); |
| 4197 | else if (isa<ExternalSymbolSDNode>(Callee)) |
| 4198 | Callee = LowerExternalSymbol(Callee, DAG); |
| 4199 | } |
| 4200 | } |
| 4201 | |
| 4202 | if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail) { |
| 4203 | |
| 4204 | |
| 4205 | |
| 4206 | |
| 4207 | |
| 4208 | |
| 4209 | |
| 4210 | |
| 4211 | |
| 4212 | static const MCPhysReg XMMArgRegs[] = { |
| 4213 | X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3, |
| 4214 | X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7 |
| 4215 | }; |
| 4216 | unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs); |
| 4217 | assert((Subtarget.hasSSE1() || !NumXMMRegs) |
| 4218 | && "SSE registers cannot be used when SSE is disabled"); |
| 4219 | RegsToPass.push_back(std::make_pair(Register(X86::AL), |
| 4220 | DAG.getConstant(NumXMMRegs, dl, |
| 4221 | MVT::i8))); |
| 4222 | } |
| 4223 | |
| 4224 | if (isVarArg && IsMustTail) { |
| 4225 | const auto &Forwards = X86Info->getForwardedMustTailRegParms(); |
| 4226 | for (const auto &F : Forwards) { |
| 4227 | SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT); |
| 4228 | RegsToPass.push_back(std::make_pair(F.PReg, Val)); |
| 4229 | } |
| 4230 | } |
| 4231 | |
| 4232 | |
| 4233 | |
| 4234 | |
| 4235 | if (!IsSibcall && isTailCall) { |
| 4236 | |
| 4237 | |
| 4238 | |
| 4239 | |
| 4240 | |
| 4241 | |
| 4242 | SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain); |
| 4243 | |
| 4244 | SmallVector<SDValue, 8> MemOpChains2; |
| 4245 | SDValue FIN; |
| 4246 | int FI = 0; |
| 4247 | for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E; |
| 4248 | ++I, ++OutsIndex) { |
| 4249 | CCValAssign &VA = ArgLocs[I]; |
| 4250 | |
| 4251 | if (VA.isRegLoc()) { |
| 4252 | if (VA.needsCustom()) { |
| 4253 | assert((CallConv == CallingConv::X86_RegCall) && |
| 4254 | "Expecting custom case only in regcall calling convention"); |
| 4255 | |
| 4256 | |
| 4257 | ++I; |
| 4258 | } |
| 4259 | |
| 4260 | continue; |
| 4261 | } |
| 4262 | |
| 4263 | assert(VA.isMemLoc()); |
| 4264 | SDValue Arg = OutVals[OutsIndex]; |
| 4265 | ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags; |
| 4266 | |
| 4267 | if (Flags.isInAlloca() || Flags.isPreallocated()) |
| 4268 | continue; |
| 4269 | |
| 4270 | int32_t Offset = VA.getLocMemOffset()+FPDiff; |
| 4271 | uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8; |
| 4272 | FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true); |
| 4273 | FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout())); |
| 4274 | |
| 4275 | if (Flags.isByVal()) { |
| 4276 | |
| 4277 | SDValue Source = DAG.getIntPtrConstant(VA.getLocMemOffset(), dl); |
| 4278 | if (!StackPtr.getNode()) |
| 4279 | StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), |
| 4280 | getPointerTy(DAG.getDataLayout())); |
| 4281 | Source = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()), |
| 4282 | StackPtr, Source); |
| 4283 | |
| 4284 | MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN, |
| 4285 | ArgChain, |
| 4286 | Flags, DAG, dl)); |
| 4287 | } else { |
| 4288 | |
| 4289 | MemOpChains2.push_back(DAG.getStore( |
| 4290 | ArgChain, dl, Arg, FIN, |
| 4291 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI))); |
| 4292 | } |
| 4293 | } |
| 4294 | |
| 4295 | if (!MemOpChains2.empty()) |
| 4296 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2); |
| 4297 | |
| 4298 | |
| 4299 | Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx, |
| 4300 | getPointerTy(DAG.getDataLayout()), |
| 4301 | RegInfo->getSlotSize(), FPDiff, dl); |
| 4302 | } |
| 4303 | |
| 4304 | |
| 4305 | |
| 4306 | SDValue InFlag; |
| 4307 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) { |
| 4308 | Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first, |
| 4309 | RegsToPass[i].second, InFlag); |
| 4310 | InFlag = Chain.getValue(1); |
| 4311 | } |
| 4312 | |
| 4313 | if (DAG.getTarget().getCodeModel() == CodeModel::Large) { |
| 4314 | assert(Is64Bit && "Large code model is only legal in 64-bit mode."); |
| 4315 | |
| 4316 | |
| 4317 | |
| 4318 | |
| 4319 | } else if (Callee->getOpcode() == ISD::GlobalAddress || |
| 4320 | Callee->getOpcode() == ISD::ExternalSymbol) { |
| 4321 | |
| 4322 | |
| 4323 | |
| 4324 | |
| 4325 | Callee = LowerGlobalOrExternal(Callee, DAG, true); |
| 4326 | } else if (Subtarget.isTarget64BitILP32() && |
| 4327 | Callee->getValueType(0) == MVT::i32) { |
| 4328 | |
| 4329 | Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee); |
| 4330 | } |
| 4331 | |
| 4332 | |
| 4333 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 4334 | SmallVector<SDValue, 8> Ops; |
| 4335 | |
| 4336 | if (!IsSibcall && isTailCall && !IsMustTail) { |
| 4337 | Chain = DAG.getCALLSEQ_END(Chain, |
| 4338 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
| 4339 | DAG.getIntPtrConstant(0, dl, true), InFlag, dl); |
| 4340 | InFlag = Chain.getValue(1); |
| 4341 | } |
| 4342 | |
| 4343 | Ops.push_back(Chain); |
| 4344 | Ops.push_back(Callee); |
| 4345 | |
| 4346 | if (isTailCall) |
| 4347 | Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32)); |
| 4348 | |
| 4349 | |
| 4350 | |
| 4351 | for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) |
| 4352 | Ops.push_back(DAG.getRegister(RegsToPass[i].first, |
| 4353 | RegsToPass[i].second.getValueType())); |
| 4354 | |
| 4355 | |
| 4356 | const uint32_t *Mask = [&]() { |
| 4357 | auto AdaptedCC = CallConv; |
| 4358 | |
| 4359 | |
| 4360 | |
| 4361 | if (HasNCSR) |
| 4362 | AdaptedCC = (CallingConv::ID)CallingConv::X86_INTR; |
| 4363 | |
| 4364 | |
| 4365 | if (CB && CB->hasFnAttr("no_callee_saved_registers")) |
| 4366 | AdaptedCC = (CallingConv::ID)CallingConv::GHC; |
| 4367 | return RegInfo->getCallPreservedMask(MF, AdaptedCC); |
| 4368 | }(); |
| 4369 | assert(Mask && "Missing call preserved mask for calling convention"); |
| 4370 | |
| 4371 | |
| 4372 | |
| 4373 | |
| 4374 | |
| 4375 | |
| 4376 | if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) { |
| 4377 | const Function &CallerFn = MF.getFunction(); |
| 4378 | EHPersonality Pers = |
| 4379 | CallerFn.hasPersonalityFn() |
| 4380 | ? classifyEHPersonality(CallerFn.getPersonalityFn()) |
| 4381 | : EHPersonality::Unknown; |
| 4382 | if (isFuncletEHPersonality(Pers)) |
| 4383 | Mask = RegInfo->getNoPreservedMask(); |
| 4384 | } |
| 4385 | |
| 4386 | |
| 4387 | uint32_t *RegMask = nullptr; |
| 4388 | |
| 4389 | |
| 4390 | |
| 4391 | if (CallConv == CallingConv::X86_RegCall || HasNCSR) { |
| 4392 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 4393 | |
| 4394 | |
| 4395 | RegMask = MF.allocateRegMask(); |
| 4396 | unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs()); |
| 4397 | memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize); |
| 4398 | |
| 4399 | |
| 4400 | |
| 4401 | for (auto const &RegPair : RegsToPass) |
| 4402 | for (MCSubRegIterator SubRegs(RegPair.first, TRI, true); |
| 4403 | SubRegs.isValid(); ++SubRegs) |
| 4404 | RegMask[*SubRegs / 32] &= ~(1u << (*SubRegs % 32)); |
| 4405 | |
| 4406 | |
| 4407 | Ops.push_back(DAG.getRegisterMask(RegMask)); |
| 4408 | } else { |
| 4409 | |
| 4410 | Ops.push_back(DAG.getRegisterMask(Mask)); |
| 4411 | } |
| 4412 | |
| 4413 | if (InFlag.getNode()) |
| 4414 | Ops.push_back(InFlag); |
| 4415 | |
| 4416 | if (isTailCall) { |
| 4417 | |
| 4418 | |
| 4419 | |
| 4420 | |
| 4421 | |
| 4422 | |
| 4423 | MF.getFrameInfo().setHasTailCall(); |
| 4424 | SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops); |
| 4425 | DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo)); |
| 4426 | return Ret; |
| 4427 | } |
| 4428 | |
| 4429 | if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) { |
| 4430 | Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops); |
| 4431 | } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) { |
| 4432 | |
| 4433 | |
| 4434 | |
| 4435 | assert(!isTailCall && |
| 4436 | "tail calls cannot be marked with clang.arc.attachedcall"); |
| 4437 | assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode"); |
| 4438 | |
| 4439 | |
| 4440 | |
| 4441 | |
| 4442 | |
| 4443 | unsigned RuntimeCallType = |
| 4444 | objcarc::hasAttachedCallOpBundle(CLI.CB, true) ? 0 : 1; |
| 4445 | Ops.insert(Ops.begin() + 1, |
| 4446 | DAG.getTargetConstant(RuntimeCallType, dl, MVT::i32)); |
| 4447 | Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops); |
| 4448 | } else { |
| 4449 | Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops); |
| 4450 | } |
| 4451 | |
| 4452 | InFlag = Chain.getValue(1); |
| 4453 | DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge); |
| 4454 | DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo)); |
| 4455 | |
| 4456 | |
| 4457 | if (CLI.CB) |
| 4458 | if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite")) |
| 4459 | DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc); |
| 4460 | |
| 4461 | |
| 4462 | unsigned NumBytesForCalleeToPop; |
| 4463 | if (X86::isCalleePop(CallConv, Is64Bit, isVarArg, |
| 4464 | DAG.getTarget().Options.GuaranteedTailCallOpt)) |
| 4465 | NumBytesForCalleeToPop = NumBytes; |
| 4466 | else if (!Is64Bit && !canGuaranteeTCO(CallConv) && |
| 4467 | !Subtarget.getTargetTriple().isOSMSVCRT() && |
| 4468 | SR == StackStructReturn) |
| 4469 | |
| 4470 | |
| 4471 | |
| 4472 | |
| 4473 | NumBytesForCalleeToPop = 4; |
| 4474 | else |
| 4475 | NumBytesForCalleeToPop = 0; |
| 4476 | |
| 4477 | |
| 4478 | if (!IsSibcall) { |
| 4479 | Chain = DAG.getCALLSEQ_END(Chain, |
| 4480 | DAG.getIntPtrConstant(NumBytesToPop, dl, true), |
| 4481 | DAG.getIntPtrConstant(NumBytesForCalleeToPop, dl, |
| 4482 | true), |
| 4483 | InFlag, dl); |
| 4484 | InFlag = Chain.getValue(1); |
| 4485 | } |
| 4486 | |
| 4487 | |
| 4488 | |
| 4489 | return LowerCallResult(Chain, InFlag, CallConv, isVarArg, Ins, dl, DAG, |
| 4490 | InVals, RegMask); |
| 4491 | } |
| 4492 | |
| 4493 | |
| 4494 | |
| 4495 | |
| 4496 | |
| 4497 | |
| 4498 | |
| 4499 | |
| 4500 | |
| 4501 | |
| 4502 | |
| 4503 | |
| 4504 | |
| 4505 | |
| 4506 | |
| 4507 | |
| 4508 | |
| 4509 | |
| 4510 | |
| 4511 | |
| 4512 | |
| 4513 | |
| 4514 | |
| 4515 | |
| 4516 | |
| 4517 | |
| 4518 | |
| 4519 | |
| 4520 | |
| 4521 | |
| 4522 | |
| 4523 | |
| 4524 | |
| 4525 | |
| 4526 | unsigned |
| 4527 | X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize, |
| 4528 | SelectionDAG &DAG) const { |
| 4529 | const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign(); |
| 4530 | const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize(); |
| 4531 | assert(StackSize % SlotSize == 0 && |
| 4532 | "StackSize must be a multiple of SlotSize"); |
| 4533 | return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize; |
| 4534 | } |
| 4535 | |
| 4536 | |
| 4537 | |
| 4538 | static |
| 4539 | bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, |
| 4540 | MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, |
| 4541 | const X86InstrInfo *TII, const CCValAssign &VA) { |
| 4542 | unsigned Bytes = Arg.getValueSizeInBits() / 8; |
| 4543 | |
| 4544 | for (;;) { |
| 4545 | |
| 4546 | unsigned Op = Arg.getOpcode(); |
| 4547 | if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST) { |
| 4548 | Arg = Arg.getOperand(0); |
| 4549 | continue; |
| 4550 | } |
| 4551 | if (Op == ISD::TRUNCATE) { |
| 4552 | const SDValue &TruncInput = Arg.getOperand(0); |
| 4553 | if (TruncInput.getOpcode() == ISD::AssertZext && |
| 4554 | cast<VTSDNode>(TruncInput.getOperand(1))->getVT() == |
| 4555 | Arg.getValueType()) { |
| 4556 | Arg = TruncInput.getOperand(0); |
| 4557 | continue; |
| 4558 | } |
| 4559 | } |
| 4560 | break; |
| 4561 | } |
| 4562 | |
| 4563 | int FI = INT_MAX; |
| 4564 | if (Arg.getOpcode() == ISD::CopyFromReg) { |
| 4565 | Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg(); |
| 4566 | if (!VR.isVirtual()) |
| 4567 | return false; |
| 4568 | MachineInstr *Def = MRI->getVRegDef(VR); |
| 4569 | if (!Def) |
| 4570 | return false; |
| 4571 | if (!Flags.isByVal()) { |
| 4572 | if (!TII->isLoadFromStackSlot(*Def, FI)) |
| 4573 | return false; |
| 4574 | } else { |
| 4575 | unsigned Opcode = Def->getOpcode(); |
| 4576 | if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r || |
| 4577 | Opcode == X86::LEA64_32r) && |
| 4578 | Def->getOperand(1).isFI()) { |
| 4579 | FI = Def->getOperand(1).getIndex(); |
| 4580 | Bytes = Flags.getByValSize(); |
| 4581 | } else |
| 4582 | return false; |
| 4583 | } |
| 4584 | } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) { |
| 4585 | if (Flags.isByVal()) |
| 4586 | |
| 4587 | |
| 4588 | |
| 4589 | |
| 4590 | |
| 4591 | return false; |
| 4592 | SDValue Ptr = Ld->getBasePtr(); |
| 4593 | FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr); |
| 4594 | if (!FINode) |
| 4595 | return false; |
| 4596 | FI = FINode->getIndex(); |
| 4597 | } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) { |
| 4598 | FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg); |
| 4599 | FI = FINode->getIndex(); |
| 4600 | Bytes = Flags.getByValSize(); |
| 4601 | } else |
| 4602 | return false; |
| 4603 | |
| 4604 | assert(FI != INT_MAX); |
| 4605 | if (!MFI.isFixedObjectIndex(FI)) |
| 4606 | return false; |
| 4607 | |
| 4608 | if (Offset != MFI.getObjectOffset(FI)) |
| 4609 | return false; |
| 4610 | |
| 4611 | |
| 4612 | |
| 4613 | |
| 4614 | |
| 4615 | if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI)) |
| 4616 | return false; |
| 4617 | |
| 4618 | if (VA.getLocVT().getFixedSizeInBits() > |
| 4619 | Arg.getValueSizeInBits().getFixedSize()) { |
| 4620 | |
| 4621 | |
| 4622 | if (Flags.isZExt() != MFI.isObjectZExt(FI) || |
| 4623 | Flags.isSExt() != MFI.isObjectSExt(FI)) { |
| 4624 | return false; |
| 4625 | } |
| 4626 | } |
| 4627 | |
| 4628 | return Bytes == MFI.getObjectSize(FI); |
| 4629 | } |
| 4630 | |
| 4631 | |
| 4632 | |
| 4633 | bool X86TargetLowering::IsEligibleForTailCallOptimization( |
| 4634 | SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, |
| 4635 | bool isCalleeStructRet, bool isCallerStructRet, Type *RetTy, |
| 4636 | const SmallVectorImpl<ISD::OutputArg> &Outs, |
| 4637 | const SmallVectorImpl<SDValue> &OutVals, |
| 4638 | const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const { |
| 4639 | if (!mayTailCallThisCC(CalleeCC)) |
| 4640 | return false; |
| 4641 | |
| 4642 | |
| 4643 | MachineFunction &MF = DAG.getMachineFunction(); |
| 4644 | const Function &CallerF = MF.getFunction(); |
| 4645 | |
| 4646 | |
| 4647 | |
| 4648 | |
| 4649 | if (CallerF.getReturnType()->isX86_FP80Ty() && !RetTy->isX86_FP80Ty()) |
| 4650 | return false; |
| 4651 | |
| 4652 | CallingConv::ID CallerCC = CallerF.getCallingConv(); |
| 4653 | bool CCMatch = CallerCC == CalleeCC; |
| 4654 | bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC); |
| 4655 | bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC); |
| 4656 | bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt || |
| 4657 | CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail; |
| 4658 | |
| 4659 | |
| 4660 | |
| 4661 | |
| 4662 | if (IsCalleeWin64 != IsCallerWin64) |
| 4663 | return false; |
| 4664 | |
| 4665 | if (IsGuaranteeTCO) { |
| 4666 | if (canGuaranteeTCO(CalleeCC) && CCMatch) |
| 4667 | return true; |
| 4668 | return false; |
| 4669 | } |
| 4670 | |
| 4671 | |
| 4672 | |
| 4673 | |
| 4674 | |
| 4675 | |
| 4676 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 4677 | if (RegInfo->hasStackRealignment(MF)) |
| 4678 | return false; |
| 4679 | |
| 4680 | |
| 4681 | |
| 4682 | if (isCalleeStructRet || isCallerStructRet) |
| 4683 | return false; |
| 4684 | |
| 4685 | |
| 4686 | |
| 4687 | LLVMContext &C = *DAG.getContext(); |
| 4688 | if (isVarArg && !Outs.empty()) { |
| 4689 | |
| 4690 | |
| 4691 | if (IsCalleeWin64 || IsCallerWin64) |
| 4692 | return false; |
| 4693 | |
| 4694 | SmallVector<CCValAssign, 16> ArgLocs; |
| 4695 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
| 4696 | |
| 4697 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
| 4698 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) |
| 4699 | if (!ArgLocs[i].isRegLoc()) |
| 4700 | return false; |
| 4701 | } |
| 4702 | |
| 4703 | |
| 4704 | |
| 4705 | |
| 4706 | bool Unused = false; |
| 4707 | for (unsigned i = 0, e = Ins.size(); i != e; ++i) { |
| 4708 | if (!Ins[i].Used) { |
| 4709 | Unused = true; |
| 4710 | break; |
| 4711 | } |
| 4712 | } |
| 4713 | if (Unused) { |
| 4714 | SmallVector<CCValAssign, 16> RVLocs; |
| 4715 | CCState CCInfo(CalleeCC, false, MF, RVLocs, C); |
| 4716 | CCInfo.AnalyzeCallResult(Ins, RetCC_X86); |
| 4717 | for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) { |
| 4718 | CCValAssign &VA = RVLocs[i]; |
| 4719 | if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) |
| 4720 | return false; |
| 4721 | } |
| 4722 | } |
| 4723 | |
| 4724 | |
| 4725 | if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, |
| 4726 | RetCC_X86, RetCC_X86)) |
| 4727 | return false; |
| 4728 | |
| 4729 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 4730 | const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC); |
| 4731 | if (!CCMatch) { |
| 4732 | const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC); |
| 4733 | if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved)) |
| 4734 | return false; |
| 4735 | } |
| 4736 | |
| 4737 | unsigned StackArgsSize = 0; |
| 4738 | |
| 4739 | |
| 4740 | |
| 4741 | if (!Outs.empty()) { |
| 4742 | |
| 4743 | |
| 4744 | SmallVector<CCValAssign, 16> ArgLocs; |
| 4745 | CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); |
| 4746 | |
| 4747 | |
| 4748 | if (IsCalleeWin64) |
| 4749 | CCInfo.AllocateStack(32, Align(8)); |
| 4750 | |
| 4751 | CCInfo.AnalyzeCallOperands(Outs, CC_X86); |
| 4752 | StackArgsSize = CCInfo.getNextStackOffset(); |
| 4753 | |
| 4754 | if (CCInfo.getNextStackOffset()) { |
| 4755 | |
| 4756 | |
| 4757 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 4758 | const MachineRegisterInfo *MRI = &MF.getRegInfo(); |
| 4759 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
| 4760 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
| 4761 | CCValAssign &VA = ArgLocs[i]; |
| 4762 | SDValue Arg = OutVals[i]; |
| 4763 | ISD::ArgFlagsTy Flags = Outs[i].Flags; |
| 4764 | if (VA.getLocInfo() == CCValAssign::Indirect) |
| 4765 | return false; |
| 4766 | if (!VA.isRegLoc()) { |
| 4767 | if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, |
| 4768 | MFI, MRI, TII, VA)) |
| 4769 | return false; |
| 4770 | } |
| 4771 | } |
| 4772 | } |
| 4773 | |
| 4774 | bool PositionIndependent = isPositionIndependent(); |
| 4775 | |
| 4776 | |
| 4777 | |
| 4778 | |
| 4779 | |
| 4780 | if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) && |
| 4781 | !isa<ExternalSymbolSDNode>(Callee)) || |
| 4782 | PositionIndependent)) { |
| 4783 | unsigned NumInRegs = 0; |
| 4784 | |
| 4785 | |
| 4786 | unsigned MaxInRegs = PositionIndependent ? 2 : 3; |
| 4787 | |
| 4788 | for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { |
| 4789 | CCValAssign &VA = ArgLocs[i]; |
| 4790 | if (!VA.isRegLoc()) |
| 4791 | continue; |
| 4792 | Register Reg = VA.getLocReg(); |
| 4793 | switch (Reg) { |
| 4794 | default: break; |
| 4795 | case X86::EAX: case X86::EDX: case X86::ECX: |
| 4796 | if (++NumInRegs == MaxInRegs) |
| 4797 | return false; |
| 4798 | break; |
| 4799 | } |
| 4800 | } |
| 4801 | } |
| 4802 | |
| 4803 | const MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 4804 | if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals)) |
| 4805 | return false; |
| 4806 | } |
| 4807 | |
| 4808 | bool CalleeWillPop = |
| 4809 | X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg, |
| 4810 | MF.getTarget().Options.GuaranteedTailCallOpt); |
| 4811 | |
| 4812 | if (unsigned BytesToPop = |
| 4813 | MF.getInfo<X86MachineFunctionInfo>()->getBytesToPopOnReturn()) { |
| 4814 | |
| 4815 | bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize; |
| 4816 | if (!CalleePopMatches) |
| 4817 | return false; |
| 4818 | } else if (CalleeWillPop && StackArgsSize > 0) { |
| 4819 | |
| 4820 | return false; |
| 4821 | } |
| 4822 | |
| 4823 | return true; |
| 4824 | } |
| 4825 | |
| 4826 | FastISel * |
| 4827 | X86TargetLowering::createFastISel(FunctionLoweringInfo &funcInfo, |
| 4828 | const TargetLibraryInfo *libInfo) const { |
| 4829 | return X86::createFastISel(funcInfo, libInfo); |
| 4830 | } |
| 4831 | |
| 4832 | |
| 4833 | |
| 4834 | |
| 4835 | |
| 4836 | static bool MayFoldLoad(SDValue Op) { |
| 4837 | return Op.hasOneUse() && ISD::isNormalLoad(Op.getNode()); |
| 4838 | } |
| 4839 | |
| 4840 | static bool MayFoldIntoStore(SDValue Op) { |
| 4841 | return Op.hasOneUse() && ISD::isNormalStore(*Op.getNode()->use_begin()); |
| 4842 | } |
| 4843 | |
| 4844 | static bool MayFoldIntoZeroExtend(SDValue Op) { |
| 4845 | if (Op.hasOneUse()) { |
| 4846 | unsigned Opcode = Op.getNode()->use_begin()->getOpcode(); |
| 4847 | return (ISD::ZERO_EXTEND == Opcode); |
| 4848 | } |
| 4849 | return false; |
| 4850 | } |
| 4851 | |
| 4852 | static bool isTargetShuffle(unsigned Opcode) { |
| 4853 | switch(Opcode) { |
| 4854 | default: return false; |
| 4855 | case X86ISD::BLENDI: |
| 4856 | case X86ISD::PSHUFB: |
| 4857 | case X86ISD::PSHUFD: |
| 4858 | case X86ISD::PSHUFHW: |
| 4859 | case X86ISD::PSHUFLW: |
| 4860 | case X86ISD::SHUFP: |
| 4861 | case X86ISD::INSERTPS: |
| 4862 | case X86ISD::EXTRQI: |
| 4863 | case X86ISD::INSERTQI: |
| 4864 | case X86ISD::VALIGN: |
| 4865 | case X86ISD::PALIGNR: |
| 4866 | case X86ISD::VSHLDQ: |
| 4867 | case X86ISD::VSRLDQ: |
| 4868 | case X86ISD::MOVLHPS: |
| 4869 | case X86ISD::MOVHLPS: |
| 4870 | case X86ISD::MOVSHDUP: |
| 4871 | case X86ISD::MOVSLDUP: |
| 4872 | case X86ISD::MOVDDUP: |
| 4873 | case X86ISD::MOVSS: |
| 4874 | case X86ISD::MOVSD: |
| 4875 | case X86ISD::UNPCKL: |
| 4876 | case X86ISD::UNPCKH: |
| 4877 | case X86ISD::VBROADCAST: |
| 4878 | case X86ISD::VPERMILPI: |
| 4879 | case X86ISD::VPERMILPV: |
| 4880 | case X86ISD::VPERM2X128: |
| 4881 | case X86ISD::SHUF128: |
| 4882 | case X86ISD::VPERMIL2: |
| 4883 | case X86ISD::VPERMI: |
| 4884 | case X86ISD::VPPERM: |
| 4885 | case X86ISD::VPERMV: |
| 4886 | case X86ISD::VPERMV3: |
| 4887 | case X86ISD::VZEXT_MOVL: |
| 4888 | return true; |
| 4889 | } |
| 4890 | } |
| 4891 | |
| 4892 | static bool isTargetShuffleVariableMask(unsigned Opcode) { |
| 4893 | switch (Opcode) { |
| 4894 | default: return false; |
| 4895 | |
| 4896 | case X86ISD::PSHUFB: |
| 4897 | case X86ISD::VPERMILPV: |
| 4898 | case X86ISD::VPERMIL2: |
| 4899 | case X86ISD::VPPERM: |
| 4900 | case X86ISD::VPERMV: |
| 4901 | case X86ISD::VPERMV3: |
| 4902 | return true; |
| 4903 | |
| 4904 | case ISD::OR: |
| 4905 | case ISD::AND: |
| 4906 | case X86ISD::ANDNP: |
| 4907 | return true; |
| 4908 | } |
| 4909 | } |
| 4910 | |
| 4911 | static bool isTargetShuffleSplat(SDValue Op) { |
| 4912 | unsigned Opcode = Op.getOpcode(); |
| 4913 | if (Opcode == ISD::EXTRACT_SUBVECTOR) |
| 4914 | return isTargetShuffleSplat(Op.getOperand(0)); |
| 4915 | return Opcode == X86ISD::VBROADCAST || Opcode == X86ISD::VBROADCAST_LOAD; |
| 4916 | } |
| 4917 | |
| 4918 | SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) const { |
| 4919 | MachineFunction &MF = DAG.getMachineFunction(); |
| 4920 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 4921 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
| 4922 | int ReturnAddrIndex = FuncInfo->getRAIndex(); |
| 4923 | |
| 4924 | if (ReturnAddrIndex == 0) { |
| 4925 | |
| 4926 | unsigned SlotSize = RegInfo->getSlotSize(); |
| 4927 | ReturnAddrIndex = MF.getFrameInfo().CreateFixedObject(SlotSize, |
| 4928 | -(int64_t)SlotSize, |
| 4929 | false); |
| 4930 | FuncInfo->setRAIndex(ReturnAddrIndex); |
| 4931 | } |
| 4932 | |
| 4933 | return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy(DAG.getDataLayout())); |
| 4934 | } |
| 4935 | |
| 4936 | bool X86::isOffsetSuitableForCodeModel(int64_t Offset, CodeModel::Model M, |
| 4937 | bool hasSymbolicDisplacement) { |
| 4938 | |
| 4939 | if (!isInt<32>(Offset)) |
| 4940 | return false; |
| 4941 | |
| 4942 | |
| 4943 | |
| 4944 | if (!hasSymbolicDisplacement) |
| 4945 | return true; |
| 4946 | |
| 4947 | |
| 4948 | if (M != CodeModel::Small && M != CodeModel::Kernel) |
| 4949 | return false; |
| 4950 | |
| 4951 | |
| 4952 | |
| 4953 | |
| 4954 | if (M == CodeModel::Small && Offset < 16*1024*1024) |
| 4955 | return true; |
| 4956 | |
| 4957 | |
| 4958 | |
| 4959 | |
| 4960 | if (M == CodeModel::Kernel && Offset >= 0) |
| 4961 | return true; |
| 4962 | |
| 4963 | return false; |
| 4964 | } |
| 4965 | |
| 4966 | |
| 4967 | |
| 4968 | bool X86::isCalleePop(CallingConv::ID CallingConv, |
| 4969 | bool is64Bit, bool IsVarArg, bool GuaranteeTCO) { |
| 4970 | |
| 4971 | |
| 4972 | if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO)) |
| 4973 | return true; |
| 4974 | |
| 4975 | switch (CallingConv) { |
| 4976 | default: |
| 4977 | return false; |
| 4978 | case CallingConv::X86_StdCall: |
| 4979 | case CallingConv::X86_FastCall: |
| 4980 | case CallingConv::X86_ThisCall: |
| 4981 | case CallingConv::X86_VectorCall: |
| 4982 | return !is64Bit; |
| 4983 | } |
| 4984 | } |
| 4985 | |
| 4986 | |
| 4987 | static bool isX86CCSigned(unsigned X86CC) { |
| 4988 | switch (X86CC) { |
| 4989 | default: |
| 4990 | llvm_unreachable("Invalid integer condition!"); |
| 4991 | case X86::COND_E: |
| 4992 | case X86::COND_NE: |
| 4993 | case X86::COND_B: |
| 4994 | case X86::COND_A: |
| 4995 | case X86::COND_BE: |
| 4996 | case X86::COND_AE: |
| 4997 | return false; |
| 4998 | case X86::COND_G: |
| 4999 | case X86::COND_GE: |
| 5000 | case X86::COND_L: |
| 5001 | case X86::COND_LE: |
| 5002 | return true; |
| 5003 | } |
| 5004 | } |
| 5005 | |
| 5006 | static X86::CondCode TranslateIntegerX86CC(ISD::CondCode SetCCOpcode) { |
| 5007 | switch (SetCCOpcode) { |
| 5008 | default: llvm_unreachable("Invalid integer condition!"); |
| 5009 | case ISD::SETEQ: return X86::COND_E; |
| 5010 | case ISD::SETGT: return X86::COND_G; |
| 5011 | case ISD::SETGE: return X86::COND_GE; |
| 5012 | case ISD::SETLT: return X86::COND_L; |
| 5013 | case ISD::SETLE: return X86::COND_LE; |
| 5014 | case ISD::SETNE: return X86::COND_NE; |
| 5015 | case ISD::SETULT: return X86::COND_B; |
| 5016 | case ISD::SETUGT: return X86::COND_A; |
| 5017 | case ISD::SETULE: return X86::COND_BE; |
| 5018 | case ISD::SETUGE: return X86::COND_AE; |
| 5019 | } |
| 5020 | } |
| 5021 | |
| 5022 | |
| 5023 | |
| 5024 | |
| 5025 | static X86::CondCode TranslateX86CC(ISD::CondCode SetCCOpcode, const SDLoc &DL, |
| 5026 | bool isFP, SDValue &LHS, SDValue &RHS, |
| 5027 | SelectionDAG &DAG) { |
| 5028 | if (!isFP) { |
| 5029 | if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) { |
| 5030 | if (SetCCOpcode == ISD::SETGT && RHSC->isAllOnesValue()) { |
| 5031 | |
| 5032 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
| 5033 | return X86::COND_NS; |
| 5034 | } |
| 5035 | if (SetCCOpcode == ISD::SETLT && RHSC->isNullValue()) { |
| 5036 | |
| 5037 | return X86::COND_S; |
| 5038 | } |
| 5039 | if (SetCCOpcode == ISD::SETGE && RHSC->isNullValue()) { |
| 5040 | |
| 5041 | return X86::COND_NS; |
| 5042 | } |
| 5043 | if (SetCCOpcode == ISD::SETLT && RHSC->isOne()) { |
| 5044 | |
| 5045 | RHS = DAG.getConstant(0, DL, RHS.getValueType()); |
| 5046 | return X86::COND_LE; |
| 5047 | } |
| 5048 | } |
| 5049 | |
| 5050 | return TranslateIntegerX86CC(SetCCOpcode); |
| 5051 | } |
| 5052 | |
| 5053 | |
| 5054 | |
| 5055 | |
| 5056 | if (ISD::isNON_EXTLoad(LHS.getNode()) && |
| 5057 | !ISD::isNON_EXTLoad(RHS.getNode())) { |
| 5058 | SetCCOpcode = getSetCCSwappedOperands(SetCCOpcode); |
| 5059 | std::swap(LHS, RHS); |
| 5060 | } |
| 5061 | |
| 5062 | switch (SetCCOpcode) { |
| 5063 | default: break; |
| 5064 | case ISD::SETOLT: |
| 5065 | case ISD::SETOLE: |
| 5066 | case ISD::SETUGT: |
| 5067 | case ISD::SETUGE: |
| 5068 | std::swap(LHS, RHS); |
| 5069 | break; |
| 5070 | } |
| 5071 | |
| 5072 | |
| 5073 | |
| 5074 | |
| 5075 | |
| 5076 | |
| 5077 | |
| 5078 | switch (SetCCOpcode) { |
| 5079 | default: llvm_unreachable("Condcode should be pre-legalized away"); |
| 5080 | case ISD::SETUEQ: |
| 5081 | case ISD::SETEQ: return X86::COND_E; |
| 5082 | case ISD::SETOLT: |
| 5083 | case ISD::SETOGT: |
| 5084 | case ISD::SETGT: return X86::COND_A; |
| 5085 | case ISD::SETOLE: |
| 5086 | case ISD::SETOGE: |
| 5087 | case ISD::SETGE: return X86::COND_AE; |
| 5088 | case ISD::SETUGT: |
| 5089 | case ISD::SETULT: |
| 5090 | case ISD::SETLT: return X86::COND_B; |
| 5091 | case ISD::SETUGE: |
| 5092 | case ISD::SETULE: |
| 5093 | case ISD::SETLE: return X86::COND_BE; |
| 5094 | case ISD::SETONE: |
| 5095 | case ISD::SETNE: return X86::COND_NE; |
| 5096 | case ISD::SETUO: return X86::COND_P; |
| 5097 | case ISD::SETO: return X86::COND_NP; |
| 5098 | case ISD::SETOEQ: |
| 5099 | case ISD::SETUNE: return X86::COND_INVALID; |
| 5100 | } |
| 5101 | } |
| 5102 | |
| 5103 | |
| 5104 | |
| 5105 | |
| 5106 | static bool hasFPCMov(unsigned X86CC) { |
| 5107 | switch (X86CC) { |
| 5108 | default: |
| 5109 | return false; |
| 5110 | case X86::COND_B: |
| 5111 | case X86::COND_BE: |
| 5112 | case X86::COND_E: |
| 5113 | case X86::COND_P: |
| 5114 | case X86::COND_A: |
| 5115 | case X86::COND_AE: |
| 5116 | case X86::COND_NE: |
| 5117 | case X86::COND_NP: |
| 5118 | return true; |
| 5119 | } |
| 5120 | } |
| 5121 | |
| 5122 | |
| 5123 | bool X86TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, |
| 5124 | const CallInst &I, |
| 5125 | MachineFunction &MF, |
| 5126 | unsigned Intrinsic) const { |
| 5127 | Info.flags = MachineMemOperand::MONone; |
| 5128 | Info.offset = 0; |
| 5129 | |
| 5130 | const IntrinsicData* IntrData = getIntrinsicWithChain(Intrinsic); |
| 5131 | if (!IntrData) { |
| 5132 | switch (Intrinsic) { |
| 5133 | case Intrinsic::x86_aesenc128kl: |
| 5134 | case Intrinsic::x86_aesdec128kl: |
| 5135 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
| 5136 | Info.ptrVal = I.getArgOperand(1); |
| 5137 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); |
| 5138 | Info.align = Align(1); |
| 5139 | Info.flags |= MachineMemOperand::MOLoad; |
| 5140 | return true; |
| 5141 | case Intrinsic::x86_aesenc256kl: |
| 5142 | case Intrinsic::x86_aesdec256kl: |
| 5143 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
| 5144 | Info.ptrVal = I.getArgOperand(1); |
| 5145 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); |
| 5146 | Info.align = Align(1); |
| 5147 | Info.flags |= MachineMemOperand::MOLoad; |
| 5148 | return true; |
| 5149 | case Intrinsic::x86_aesencwide128kl: |
| 5150 | case Intrinsic::x86_aesdecwide128kl: |
| 5151 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
| 5152 | Info.ptrVal = I.getArgOperand(0); |
| 5153 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 48); |
| 5154 | Info.align = Align(1); |
| 5155 | Info.flags |= MachineMemOperand::MOLoad; |
| 5156 | return true; |
| 5157 | case Intrinsic::x86_aesencwide256kl: |
| 5158 | case Intrinsic::x86_aesdecwide256kl: |
| 5159 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
| 5160 | Info.ptrVal = I.getArgOperand(0); |
| 5161 | Info.memVT = EVT::getIntegerVT(I.getType()->getContext(), 64); |
| 5162 | Info.align = Align(1); |
| 5163 | Info.flags |= MachineMemOperand::MOLoad; |
| 5164 | return true; |
| 5165 | } |
| 5166 | return false; |
| 5167 | } |
| 5168 | |
| 5169 | switch (IntrData->Type) { |
| 5170 | case TRUNCATE_TO_MEM_VI8: |
| 5171 | case TRUNCATE_TO_MEM_VI16: |
| 5172 | case TRUNCATE_TO_MEM_VI32: { |
| 5173 | Info.opc = ISD::INTRINSIC_VOID; |
| 5174 | Info.ptrVal = I.getArgOperand(0); |
| 5175 | MVT VT = MVT::getVT(I.getArgOperand(1)->getType()); |
| 5176 | MVT ScalarVT = MVT::INVALID_SIMPLE_VALUE_TYPE; |
| 5177 | if (IntrData->Type == TRUNCATE_TO_MEM_VI8) |
| 5178 | ScalarVT = MVT::i8; |
| 5179 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI16) |
| 5180 | ScalarVT = MVT::i16; |
| 5181 | else if (IntrData->Type == TRUNCATE_TO_MEM_VI32) |
| 5182 | ScalarVT = MVT::i32; |
| 5183 | |
| 5184 | Info.memVT = MVT::getVectorVT(ScalarVT, VT.getVectorNumElements()); |
| 5185 | Info.align = Align(1); |
| 5186 | Info.flags |= MachineMemOperand::MOStore; |
| 5187 | break; |
| 5188 | } |
| 5189 | case GATHER: |
| 5190 | case GATHER_AVX2: { |
| 5191 | Info.opc = ISD::INTRINSIC_W_CHAIN; |
| 5192 | Info.ptrVal = nullptr; |
| 5193 | MVT DataVT = MVT::getVT(I.getType()); |
| 5194 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
| 5195 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
| 5196 | IndexVT.getVectorNumElements()); |
| 5197 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
| 5198 | Info.align = Align(1); |
| 5199 | Info.flags |= MachineMemOperand::MOLoad; |
| 5200 | break; |
| 5201 | } |
| 5202 | case SCATTER: { |
| 5203 | Info.opc = ISD::INTRINSIC_VOID; |
| 5204 | Info.ptrVal = nullptr; |
| 5205 | MVT DataVT = MVT::getVT(I.getArgOperand(3)->getType()); |
| 5206 | MVT IndexVT = MVT::getVT(I.getArgOperand(2)->getType()); |
| 5207 | unsigned NumElts = std::min(DataVT.getVectorNumElements(), |
| 5208 | IndexVT.getVectorNumElements()); |
| 5209 | Info.memVT = MVT::getVectorVT(DataVT.getVectorElementType(), NumElts); |
| 5210 | Info.align = Align(1); |
| 5211 | Info.flags |= MachineMemOperand::MOStore; |
| 5212 | break; |
| 5213 | } |
| 5214 | default: |
| 5215 | return false; |
| 5216 | } |
| 5217 | |
| 5218 | return true; |
| 5219 | } |
| 5220 | |
| 5221 | |
| 5222 | |
| 5223 | |
| 5224 | bool X86TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT, |
| 5225 | bool ForCodeSize) const { |
| 5226 | for (unsigned i = 0, e = LegalFPImmediates.size(); i != e; ++i) { |
| 5227 | if (Imm.bitwiseIsEqual(LegalFPImmediates[i])) |
| 5228 | return true; |
| 5229 | } |
| 5230 | return false; |
| 5231 | } |
| 5232 | |
| 5233 | bool X86TargetLowering::shouldReduceLoadWidth(SDNode *Load, |
| 5234 | ISD::LoadExtType ExtTy, |
| 5235 | EVT NewVT) const { |
| 5236 | assert(cast<LoadSDNode>(Load)->isSimple() && "illegal to narrow"); |
| 5237 | |
| 5238 | |
| 5239 | |
| 5240 | SDValue BasePtr = cast<LoadSDNode>(Load)->getBasePtr(); |
| 5241 | if (BasePtr.getOpcode() == X86ISD::WrapperRIP) |
| 5242 | if (const auto *GA = dyn_cast<GlobalAddressSDNode>(BasePtr.getOperand(0))) |
| 5243 | return GA->getTargetFlags() != X86II::MO_GOTTPOFF; |
| 5244 | |
| 5245 | |
| 5246 | |
| 5247 | |
| 5248 | EVT VT = Load->getValueType(0); |
| 5249 | if ((VT.is256BitVector() || VT.is512BitVector()) && !Load->hasOneUse()) { |
| 5250 | for (auto UI = Load->use_begin(), UE = Load->use_end(); UI != UE; ++UI) { |
| 5251 | |
| 5252 | if (UI.getUse().getResNo() != 0) |
| 5253 | continue; |
| 5254 | |
| 5255 | |
| 5256 | if (UI->getOpcode() != ISD::EXTRACT_SUBVECTOR || !UI->hasOneUse() || |
| 5257 | UI->use_begin()->getOpcode() != ISD::STORE) |
| 5258 | return true; |
| 5259 | } |
| 5260 | |
| 5261 | return false; |
| 5262 | } |
| 5263 | |
| 5264 | return true; |
| 5265 | } |
| 5266 | |
| 5267 | |
| 5268 | |
| 5269 | bool X86TargetLowering::shouldConvertConstantLoadToIntImm(const APInt &Imm, |
| 5270 | Type *Ty) const { |
| 5271 | assert(Ty->isIntegerTy()); |
| 5272 | |
| 5273 | unsigned BitSize = Ty->getPrimitiveSizeInBits(); |
| 5274 | if (BitSize == 0 || BitSize > 64) |
| 5275 | return false; |
| 5276 | return true; |
| 5277 | } |
| 5278 | |
| 5279 | bool X86TargetLowering::reduceSelectOfFPConstantLoads(EVT CmpOpVT) const { |
| 5280 | |
| 5281 | |
| 5282 | |
| 5283 | |
| 5284 | bool IsFPSetCC = CmpOpVT.isFloatingPoint() && CmpOpVT != MVT::f128; |
| 5285 | return !IsFPSetCC || !Subtarget.isTarget64BitLP64() || !Subtarget.hasAVX(); |
| 5286 | } |
| 5287 | |
| 5288 | bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { |
| 5289 | |
| 5290 | |
| 5291 | if (VT.isVector() && Subtarget.hasAVX512()) |
| 5292 | return false; |
| 5293 | |
| 5294 | return true; |
| 5295 | } |
| 5296 | |
| 5297 | bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, |
| 5298 | SDValue C) const { |
| 5299 | |
| 5300 | |
| 5301 | APInt MulC; |
| 5302 | if (!ISD::isConstantSplatVector(C.getNode(), MulC)) |
| 5303 | return false; |
| 5304 | |
| 5305 | |
| 5306 | |
| 5307 | |
| 5308 | |
| 5309 | |
| 5310 | |
| 5311 | while (getTypeAction(Context, VT) != TypeLegal) |
| 5312 | VT = getTypeToTransformTo(Context, VT); |
| 5313 | |
| 5314 | |
| 5315 | |
| 5316 | |
| 5317 | |
| 5318 | if (isOperationLegal(ISD::MUL, VT)) |
| 5319 | return false; |
| 5320 | |
| 5321 | |
| 5322 | return (MulC + 1).isPowerOf2() || (MulC - 1).isPowerOf2() || |
| 5323 | (1 - MulC).isPowerOf2() || (-(MulC + 1)).isPowerOf2(); |
| 5324 | } |
| 5325 | |
| 5326 | bool X86TargetLowering::isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
| 5327 | unsigned Index) const { |
| 5328 | if (!isOperationLegalOrCustom(ISD::EXTRACT_SUBVECTOR, ResVT)) |
| 5329 | return false; |
| 5330 | |
| 5331 | |
| 5332 | |
| 5333 | if (ResVT.getVectorElementType() == MVT::i1) |
| 5334 | return Index == 0 || ((ResVT.getSizeInBits() == SrcVT.getSizeInBits()*2) && |
| 5335 | (Index == ResVT.getVectorNumElements())); |
| 5336 | |
| 5337 | return (Index % ResVT.getVectorNumElements()) == 0; |
| 5338 | } |
| 5339 | |
| 5340 | bool X86TargetLowering::shouldScalarizeBinop(SDValue VecOp) const { |
| 5341 | unsigned Opc = VecOp.getOpcode(); |
| 5342 | |
| 5343 | |
| 5344 | |
| 5345 | if (Opc >= ISD::BUILTIN_OP_END) |
| 5346 | return false; |
| 5347 | |
| 5348 | |
| 5349 | EVT VecVT = VecOp.getValueType(); |
| 5350 | if (!isOperationLegalOrCustomOrPromote(Opc, VecVT)) |
| 5351 | return true; |
| 5352 | |
| 5353 | |
| 5354 | |
| 5355 | EVT ScalarVT = VecVT.getScalarType(); |
| 5356 | return isOperationLegalOrCustomOrPromote(Opc, ScalarVT); |
| 5357 | } |
| 5358 | |
| 5359 | bool X86TargetLowering::shouldFormOverflowOp(unsigned Opcode, EVT VT, |
| 5360 | bool) const { |
| 5361 | |
| 5362 | if (VT.isVector()) |
| 5363 | return false; |
| 5364 | return VT.isSimple() || !isOperationExpand(Opcode, VT); |
| 5365 | } |
| 5366 | |
| 5367 | bool X86TargetLowering::isCheapToSpeculateCttz() const { |
| 5368 | |
| 5369 | return Subtarget.hasBMI(); |
| 5370 | } |
| 5371 | |
| 5372 | bool X86TargetLowering::isCheapToSpeculateCtlz() const { |
| 5373 | |
| 5374 | return Subtarget.hasLZCNT(); |
| 5375 | } |
| 5376 | |
| 5377 | bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
| 5378 | const SelectionDAG &DAG, |
| 5379 | const MachineMemOperand &MMO) const { |
| 5380 | if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && |
| 5381 | BitcastVT.getVectorElementType() == MVT::i1) |
| 5382 | return false; |
| 5383 | |
| 5384 | if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) |
| 5385 | return false; |
| 5386 | |
| 5387 | |
| 5388 | if (LoadVT.isVector() && BitcastVT.isVector() && |
| 5389 | isTypeLegal(LoadVT) && isTypeLegal(BitcastVT)) |
| 5390 | return true; |
| 5391 | |
| 5392 | return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO); |
| 5393 | } |
| 5394 | |
| 5395 | bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, |
| 5396 | const SelectionDAG &DAG) const { |
| 5397 | |
| 5398 | |
| 5399 | bool NoFloat = DAG.getMachineFunction().getFunction().hasFnAttribute( |
| 5400 | Attribute::NoImplicitFloat); |
| 5401 | |
| 5402 | if (NoFloat) { |
| 5403 | unsigned MaxIntSize = Subtarget.is64Bit() ? 64 : 32; |
| 5404 | return (MemVT.getSizeInBits() <= MaxIntSize); |
| 5405 | } |
| 5406 | |
| 5407 | |
| 5408 | if (MemVT.getSizeInBits() > Subtarget.getPreferVectorWidth()) |
| 5409 | return false; |
| 5410 | |
| 5411 | return true; |
| 5412 | } |
| 5413 | |
| 5414 | bool X86TargetLowering::isCtlzFast() const { |
| 5415 | return Subtarget.hasFastLZCNT(); |
| 5416 | } |
| 5417 | |
| 5418 | bool X86TargetLowering::isMaskAndCmp0FoldingBeneficial( |
| 5419 | const Instruction &AndI) const { |
| 5420 | return true; |
| 5421 | } |
| 5422 | |
| 5423 | bool X86TargetLowering::hasAndNotCompare(SDValue Y) const { |
| 5424 | EVT VT = Y.getValueType(); |
| 5425 | |
| 5426 | if (VT.isVector()) |
| 5427 | return false; |
| 5428 | |
| 5429 | if (!Subtarget.hasBMI()) |
| 5430 | return false; |
| 5431 | |
| 5432 | |
| 5433 | if (VT != MVT::i32 && VT != MVT::i64) |
| 5434 | return false; |
| 5435 | |
| 5436 | return !isa<ConstantSDNode>(Y); |
| 5437 | } |
| 5438 | |
| 5439 | bool X86TargetLowering::hasAndNot(SDValue Y) const { |
| 5440 | EVT VT = Y.getValueType(); |
| 5441 | |
| 5442 | if (!VT.isVector()) |
| 5443 | return hasAndNotCompare(Y); |
| 5444 | |
| 5445 | |
| 5446 | |
| 5447 | if (!Subtarget.hasSSE1() || VT.getSizeInBits() < 128) |
| 5448 | return false; |
| 5449 | |
| 5450 | if (VT == MVT::v4i32) |
| 5451 | return true; |
| 5452 | |
| 5453 | return Subtarget.hasSSE2(); |
| 5454 | } |
| 5455 | |
| 5456 | bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { |
| 5457 | return X.getValueType().isScalarInteger(); |
| 5458 | } |
| 5459 | |
| 5460 | bool X86TargetLowering:: |
| 5461 | shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
| 5462 | SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, |
| 5463 | unsigned OldShiftOpcode, unsigned NewShiftOpcode, |
| 5464 | SelectionDAG &DAG) const { |
| 5465 | |
| 5466 | if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( |
| 5467 | X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) |
| 5468 | return false; |
| 5469 | |
| 5470 | if (X.getValueType().isScalarInteger()) |
| 5471 | return true; |
| 5472 | |
| 5473 | |
| 5474 | if (DAG.isSplatValue(Y, true)) |
| 5475 | return true; |
| 5476 | |
| 5477 | if (Subtarget.hasAVX2()) |
| 5478 | return true; |
| 5479 | |
| 5480 | return NewShiftOpcode == ISD::SHL; |
| 5481 | } |
| 5482 | |
| 5483 | bool X86TargetLowering::shouldFoldConstantShiftPairToMask( |
| 5484 | const SDNode *N, CombineLevel Level) const { |
| 5485 | assert(((N->getOpcode() == ISD::SHL && |
| 5486 | N->getOperand(0).getOpcode() == ISD::SRL) || |
| 5487 | (N->getOpcode() == ISD::SRL && |
| 5488 | N->getOperand(0).getOpcode() == ISD::SHL)) && |
| 5489 | "Expected shift-shift mask"); |
| 5490 | EVT VT = N->getValueType(0); |
| 5491 | if ((Subtarget.hasFastVectorShiftMasks() && VT.isVector()) || |
| 5492 | (Subtarget.hasFastScalarShiftMasks() && !VT.isVector())) { |
| 5493 | |
| 5494 | |
| 5495 | |
| 5496 | return N->getOperand(1) == N->getOperand(0).getOperand(1); |
| 5497 | } |
| 5498 | return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); |
| 5499 | } |
| 5500 | |
| 5501 | bool X86TargetLowering::shouldFoldMaskToVariableShiftPair(SDValue Y) const { |
| 5502 | EVT VT = Y.getValueType(); |
| 5503 | |
| 5504 | |
| 5505 | if (VT.isVector()) |
| 5506 | return false; |
| 5507 | |
| 5508 | |
| 5509 | if (VT == MVT::i64 && !Subtarget.is64Bit()) |
| 5510 | return false; |
| 5511 | |
| 5512 | return true; |
| 5513 | } |
| 5514 | |
| 5515 | bool X86TargetLowering::shouldExpandShift(SelectionDAG &DAG, |
| 5516 | SDNode *N) const { |
| 5517 | if (DAG.getMachineFunction().getFunction().hasMinSize() && |
| 5518 | !Subtarget.isOSWindows()) |
| 5519 | return false; |
| 5520 | return true; |
| 5521 | } |
| 5522 | |
| 5523 | bool X86TargetLowering::shouldSplatInsEltVarIndex(EVT VT) const { |
| 5524 | |
| 5525 | |
| 5526 | return isTypeLegal(VT); |
| 5527 | } |
| 5528 | |
| 5529 | MVT X86TargetLowering::hasFastEqualityCompare(unsigned NumBits) const { |
| 5530 | MVT VT = MVT::getIntegerVT(NumBits); |
| 5531 | if (isTypeLegal(VT)) |
| 5532 | return VT; |
| 5533 | |
| 5534 | |
| 5535 | if (NumBits == 128 && isTypeLegal(MVT::v16i8)) |
| 5536 | return MVT::v16i8; |
| 5537 | |
| 5538 | |
| 5539 | if (NumBits == 256 && isTypeLegal(MVT::v32i8)) |
| 5540 | return MVT::v32i8; |
| 5541 | |
| 5542 | |
| 5543 | |
| 5544 | |
| 5545 | |
| 5546 | return MVT::INVALID_SIMPLE_VALUE_TYPE; |
| 5547 | } |
| 5548 | |
| 5549 | |
| 5550 | static bool isUndefOrEqual(int Val, int CmpVal) { |
| 5551 | return ((Val == SM_SentinelUndef) || (Val == CmpVal)); |
| 5552 | } |
| 5553 | |
| 5554 | |
| 5555 | |
| 5556 | static bool isUndefOrEqual(ArrayRef<int> Mask, int CmpVal) { |
| 5557 | return llvm::all_of(Mask, [CmpVal](int M) { |
| 5558 | return (M == SM_SentinelUndef) || (M == CmpVal); |
| 5559 | }); |
| 5560 | } |
| 5561 | |
| 5562 | |
| 5563 | static bool isUndefOrZero(int Val) { |
| 5564 | return ((Val == SM_SentinelUndef) || (Val == SM_SentinelZero)); |
| 5565 | } |
| 5566 | |
| 5567 | |
| 5568 | |
| 5569 | static bool isUndefInRange(ArrayRef<int> Mask, unsigned Pos, unsigned Size) { |
| 5570 | return llvm::all_of(Mask.slice(Pos, Size), |
| 5571 | [](int M) { return M == SM_SentinelUndef; }); |
| 5572 | } |
| 5573 | |
| 5574 | |
| 5575 | static bool isUndefLowerHalf(ArrayRef<int> Mask) { |
| 5576 | unsigned NumElts = Mask.size(); |
| 5577 | return isUndefInRange(Mask, 0, NumElts / 2); |
| 5578 | } |
| 5579 | |
| 5580 | |
| 5581 | static bool isUndefUpperHalf(ArrayRef<int> Mask) { |
| 5582 | unsigned NumElts = Mask.size(); |
| 5583 | return isUndefInRange(Mask, NumElts / 2, NumElts / 2); |
| 5584 | } |
| 5585 | |
| 5586 | |
| 5587 | static bool isInRange(int Val, int Low, int Hi) { |
| 5588 | return (Val >= Low && Val < Hi); |
| 5589 | } |
| 5590 | |
| 5591 | |
| 5592 | |
| 5593 | static bool isAnyInRange(ArrayRef<int> Mask, int Low, int Hi) { |
| 5594 | return llvm::any_of(Mask, [Low, Hi](int M) { return isInRange(M, Low, Hi); }); |
| 5595 | } |
| 5596 | |
| 5597 | |
| 5598 | static bool isAnyZero(ArrayRef<int> Mask) { |
| 5599 | return llvm::any_of(Mask, [](int M) { return M == SM_SentinelZero; }); |
| 5600 | } |
| 5601 | |
| 5602 | |
| 5603 | |
| 5604 | static bool isAnyZeroOrUndef(ArrayRef<int> Mask) { |
| 5605 | return llvm::any_of(Mask, [](int M) { |
| 5606 | return M == SM_SentinelZero || M == SM_SentinelUndef; |
| 5607 | }); |
| 5608 | } |
| 5609 | |
| 5610 | |
| 5611 | |
| 5612 | static bool isUndefOrInRange(int Val, int Low, int Hi) { |
| 5613 | return (Val == SM_SentinelUndef) || isInRange(Val, Low, Hi); |
| 5614 | } |
| 5615 | |
| 5616 | |
| 5617 | |
| 5618 | static bool isUndefOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
| 5619 | return llvm::all_of( |
| 5620 | Mask, [Low, Hi](int M) { return isUndefOrInRange(M, Low, Hi); }); |
| 5621 | } |
| 5622 | |
| 5623 | |
| 5624 | |
| 5625 | static bool isUndefOrZeroOrInRange(int Val, int Low, int Hi) { |
| 5626 | return isUndefOrZero(Val) || isInRange(Val, Low, Hi); |
| 5627 | } |
| 5628 | |
| 5629 | |
| 5630 | |
| 5631 | static bool isUndefOrZeroOrInRange(ArrayRef<int> Mask, int Low, int Hi) { |
| 5632 | return llvm::all_of( |
| 5633 | Mask, [Low, Hi](int M) { return isUndefOrZeroOrInRange(M, Low, Hi); }); |
| 5634 | } |
| 5635 | |
| 5636 | |
| 5637 | |
| 5638 | |
| 5639 | static bool isSequentialOrUndefInRange(ArrayRef<int> Mask, unsigned Pos, |
| 5640 | unsigned Size, int Low, int Step = 1) { |
| 5641 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
| 5642 | if (!isUndefOrEqual(Mask[i], Low)) |
| 5643 | return false; |
| 5644 | return true; |
| 5645 | } |
| 5646 | |
| 5647 | |
| 5648 | |
| 5649 | |
| 5650 | static bool isSequentialOrUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
| 5651 | unsigned Size, int Low, |
| 5652 | int Step = 1) { |
| 5653 | for (unsigned i = Pos, e = Pos + Size; i != e; ++i, Low += Step) |
| 5654 | if (!isUndefOrZero(Mask[i]) && Mask[i] != Low) |
| 5655 | return false; |
| 5656 | return true; |
| 5657 | } |
| 5658 | |
| 5659 | |
| 5660 | |
| 5661 | static bool isUndefOrZeroInRange(ArrayRef<int> Mask, unsigned Pos, |
| 5662 | unsigned Size) { |
| 5663 | return llvm::all_of(Mask.slice(Pos, Size), |
| 5664 | [](int M) { return isUndefOrZero(M); }); |
| 5665 | } |
| 5666 | |
| 5667 | |
| 5668 | |
| 5669 | |
| 5670 | |
| 5671 | |
| 5672 | |
| 5673 | |
| 5674 | |
| 5675 | |
| 5676 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
| 5677 | SmallVectorImpl<int> &WidenedMask) { |
| 5678 | WidenedMask.assign(Mask.size() / 2, 0); |
| 5679 | for (int i = 0, Size = Mask.size(); i < Size; i += 2) { |
| 5680 | int M0 = Mask[i]; |
| 5681 | int M1 = Mask[i + 1]; |
| 5682 | |
| 5683 | |
| 5684 | if (M0 == SM_SentinelUndef && M1 == SM_SentinelUndef) { |
| 5685 | WidenedMask[i / 2] = SM_SentinelUndef; |
| 5686 | continue; |
| 5687 | } |
| 5688 | |
| 5689 | |
| 5690 | |
| 5691 | if (M0 == SM_SentinelUndef && M1 >= 0 && (M1 % 2) == 1) { |
| 5692 | WidenedMask[i / 2] = M1 / 2; |
| 5693 | continue; |
| 5694 | } |
| 5695 | if (M1 == SM_SentinelUndef && M0 >= 0 && (M0 % 2) == 0) { |
| 5696 | WidenedMask[i / 2] = M0 / 2; |
| 5697 | continue; |
| 5698 | } |
| 5699 | |
| 5700 | |
| 5701 | if (M0 == SM_SentinelZero || M1 == SM_SentinelZero) { |
| 5702 | if ((M0 == SM_SentinelZero || M0 == SM_SentinelUndef) && |
| 5703 | (M1 == SM_SentinelZero || M1 == SM_SentinelUndef)) { |
| 5704 | WidenedMask[i / 2] = SM_SentinelZero; |
| 5705 | continue; |
| 5706 | } |
| 5707 | return false; |
| 5708 | } |
| 5709 | |
| 5710 | |
| 5711 | |
| 5712 | if (M0 != SM_SentinelUndef && (M0 % 2) == 0 && (M0 + 1) == M1) { |
| 5713 | WidenedMask[i / 2] = M0 / 2; |
| 5714 | continue; |
| 5715 | } |
| 5716 | |
| 5717 | |
| 5718 | return false; |
| 5719 | } |
| 5720 | assert(WidenedMask.size() == Mask.size() / 2 && |
| 5721 | "Incorrect size of mask after widening the elements!"); |
| 5722 | |
| 5723 | return true; |
| 5724 | } |
| 5725 | |
| 5726 | static bool canWidenShuffleElements(ArrayRef<int> Mask, |
| 5727 | const APInt &Zeroable, |
| 5728 | bool V2IsZero, |
| 5729 | SmallVectorImpl<int> &WidenedMask) { |
| 5730 | |
| 5731 | |
| 5732 | SmallVector<int, 64> ZeroableMask(Mask.begin(), Mask.end()); |
| 5733 | if (V2IsZero) { |
| 5734 | assert(!Zeroable.isNullValue() && "V2's non-undef elements are used?!"); |
| 5735 | for (int i = 0, Size = Mask.size(); i != Size; ++i) |
| 5736 | if (Mask[i] != SM_SentinelUndef && Zeroable[i]) |
| 5737 | ZeroableMask[i] = SM_SentinelZero; |
| 5738 | } |
| 5739 | return canWidenShuffleElements(ZeroableMask, WidenedMask); |
| 5740 | } |
| 5741 | |
| 5742 | static bool canWidenShuffleElements(ArrayRef<int> Mask) { |
| 5743 | SmallVector<int, 32> WidenedMask; |
| 5744 | return canWidenShuffleElements(Mask, WidenedMask); |
| 5745 | } |
| 5746 | |
| 5747 | |
| 5748 | |
| 5749 | static bool scaleShuffleElements(ArrayRef<int> Mask, unsigned NumDstElts, |
| 5750 | SmallVectorImpl<int> &ScaledMask) { |
| 5751 | unsigned NumSrcElts = Mask.size(); |
| 5752 | assert(((NumSrcElts % NumDstElts) == 0 || (NumDstElts % NumSrcElts) == 0) && |
| 5753 | "Illegal shuffle scale factor"); |
| 5754 | |
| 5755 | |
| 5756 | if (NumDstElts >= NumSrcElts) { |
| 5757 | int Scale = NumDstElts / NumSrcElts; |
| 5758 | llvm::narrowShuffleMaskElts(Scale, Mask, ScaledMask); |
| 5759 | return true; |
| 5760 | } |
| 5761 | |
| 5762 | |
| 5763 | |
| 5764 | if (canWidenShuffleElements(Mask, ScaledMask)) { |
| 5765 | while (ScaledMask.size() > NumDstElts) { |
| 5766 | SmallVector<int, 16> WidenedMask; |
| 5767 | if (!canWidenShuffleElements(ScaledMask, WidenedMask)) |
| 5768 | return false; |
| 5769 | ScaledMask = std::move(WidenedMask); |
| 5770 | } |
| 5771 | return true; |
| 5772 | } |
| 5773 | |
| 5774 | return false; |
| 5775 | } |
| 5776 | |
| 5777 | |
| 5778 | bool X86::isZeroNode(SDValue Elt) { |
| 5779 | return isNullConstant(Elt) || isNullFPConstant(Elt); |
| 5780 | } |
| 5781 | |
| 5782 | |
| 5783 | |
| 5784 | |
| 5785 | static SDValue getConstVector(ArrayRef<int> Values, MVT VT, SelectionDAG &DAG, |
| 5786 | const SDLoc &dl, bool IsMask = false) { |
| 5787 | |
| 5788 | SmallVector<SDValue, 32> Ops; |
| 5789 | bool Split = false; |
| 5790 | |
| 5791 | MVT ConstVecVT = VT; |
| 5792 | unsigned NumElts = VT.getVectorNumElements(); |
| 5793 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
| 5794 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
| 5795 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
| 5796 | Split = true; |
| 5797 | } |
| 5798 | |
| 5799 | MVT EltVT = ConstVecVT.getVectorElementType(); |
| 5800 | for (unsigned i = 0; i < NumElts; ++i) { |
| 5801 | bool IsUndef = Values[i] < 0 && IsMask; |
| 5802 | SDValue OpNode = IsUndef ? DAG.getUNDEF(EltVT) : |
| 5803 | DAG.getConstant(Values[i], dl, EltVT); |
| 5804 | Ops.push_back(OpNode); |
| 5805 | if (Split) |
| 5806 | Ops.push_back(IsUndef ? DAG.getUNDEF(EltVT) : |
| 5807 | DAG.getConstant(0, dl, EltVT)); |
| 5808 | } |
| 5809 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
| 5810 | if (Split) |
| 5811 | ConstsNode = DAG.getBitcast(VT, ConstsNode); |
| 5812 | return ConstsNode; |
| 5813 | } |
| 5814 | |
| 5815 | static SDValue getConstVector(ArrayRef<APInt> Bits, APInt &Undefs, |
| 5816 | MVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
| 5817 | assert(Bits.size() == Undefs.getBitWidth() && |
| 5818 | "Unequal constant and undef arrays"); |
| 5819 | SmallVector<SDValue, 32> Ops; |
| 5820 | bool Split = false; |
| 5821 | |
| 5822 | MVT ConstVecVT = VT; |
| 5823 | unsigned NumElts = VT.getVectorNumElements(); |
| 5824 | bool In64BitMode = DAG.getTargetLoweringInfo().isTypeLegal(MVT::i64); |
| 5825 | if (!In64BitMode && VT.getVectorElementType() == MVT::i64) { |
| 5826 | ConstVecVT = MVT::getVectorVT(MVT::i32, NumElts * 2); |
| 5827 | Split = true; |
| 5828 | } |
| 5829 | |
| 5830 | MVT EltVT = ConstVecVT.getVectorElementType(); |
| 5831 | for (unsigned i = 0, e = Bits.size(); i != e; ++i) { |
| 5832 | if (Undefs[i]) { |
| 5833 | Ops.append(Split ? 2 : 1, DAG.getUNDEF(EltVT)); |
| 5834 | continue; |
| 5835 | } |
| 5836 | const APInt &V = Bits[i]; |
| 5837 | assert(V.getBitWidth() == VT.getScalarSizeInBits() && "Unexpected sizes"); |
| 5838 | if (Split) { |
| 5839 | Ops.push_back(DAG.getConstant(V.trunc(32), dl, EltVT)); |
| 5840 | Ops.push_back(DAG.getConstant(V.lshr(32).trunc(32), dl, EltVT)); |
| 5841 | } else if (EltVT == MVT::f32) { |
| 5842 | APFloat FV(APFloat::IEEEsingle(), V); |
| 5843 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
| 5844 | } else if (EltVT == MVT::f64) { |
| 5845 | APFloat FV(APFloat::IEEEdouble(), V); |
| 5846 | Ops.push_back(DAG.getConstantFP(FV, dl, EltVT)); |
| 5847 | } else { |
| 5848 | Ops.push_back(DAG.getConstant(V, dl, EltVT)); |
| 5849 | } |
| 5850 | } |
| 5851 | |
| 5852 | SDValue ConstsNode = DAG.getBuildVector(ConstVecVT, dl, Ops); |
| 5853 | return DAG.getBitcast(VT, ConstsNode); |
| 5854 | } |
| 5855 | |
| 5856 | |
| 5857 | static SDValue getZeroVector(MVT VT, const X86Subtarget &Subtarget, |
| 5858 | SelectionDAG &DAG, const SDLoc &dl) { |
| 5859 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector() || |
| 5860 | VT.getVectorElementType() == MVT::i1) && |
| 5861 | "Unexpected vector type"); |
| 5862 | |
| 5863 | |
| 5864 | |
| 5865 | |
| 5866 | SDValue Vec; |
| 5867 | if (!Subtarget.hasSSE2() && VT.is128BitVector()) { |
| 5868 | Vec = DAG.getConstantFP(+0.0, dl, MVT::v4f32); |
| 5869 | } else if (VT.isFloatingPoint()) { |
| 5870 | Vec = DAG.getConstantFP(+0.0, dl, VT); |
| 5871 | } else if (VT.getVectorElementType() == MVT::i1) { |
| 5872 | assert((Subtarget.hasBWI() || VT.getVectorNumElements() <= 16) && |
| 5873 | "Unexpected vector type"); |
| 5874 | Vec = DAG.getConstant(0, dl, VT); |
| 5875 | } else { |
| 5876 | unsigned Num32BitElts = VT.getSizeInBits() / 32; |
| 5877 | Vec = DAG.getConstant(0, dl, MVT::getVectorVT(MVT::i32, Num32BitElts)); |
| 5878 | } |
| 5879 | return DAG.getBitcast(VT, Vec); |
| 5880 | } |
| 5881 | |
| 5882 | static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG, |
| 5883 | const SDLoc &dl, unsigned vectorWidth) { |
| 5884 | EVT VT = Vec.getValueType(); |
| 5885 | EVT ElVT = VT.getVectorElementType(); |
| 5886 | unsigned Factor = VT.getSizeInBits() / vectorWidth; |
| 5887 | EVT ResultVT = EVT::getVectorVT(*DAG.getContext(), ElVT, |
| 5888 | VT.getVectorNumElements() / Factor); |
| 5889 | |
| 5890 | |
| 5891 | unsigned ElemsPerChunk = vectorWidth / ElVT.getSizeInBits(); |
| 5892 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
| 5893 | |
| 5894 | |
| 5895 | |
| 5896 | IdxVal &= ~(ElemsPerChunk - 1); |
| 5897 | |
| 5898 | |
| 5899 | if (Vec.getOpcode() == ISD::BUILD_VECTOR) |
| 5900 | return DAG.getBuildVector(ResultVT, dl, |
| 5901 | Vec->ops().slice(IdxVal, ElemsPerChunk)); |
| 5902 | |
| 5903 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
| 5904 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResultVT, Vec, VecIdx); |
| 5905 | } |
| 5906 | |
| 5907 | |
| 5908 | |
| 5909 | |
| 5910 | |
| 5911 | |
| 5912 | |
| 5913 | static SDValue extract128BitVector(SDValue Vec, unsigned IdxVal, |
| 5914 | SelectionDAG &DAG, const SDLoc &dl) { |
| 5915 | assert((Vec.getValueType().is256BitVector() || |
| 5916 | Vec.getValueType().is512BitVector()) && "Unexpected vector size!"); |
| 5917 | return extractSubVector(Vec, IdxVal, DAG, dl, 128); |
| 5918 | } |
| 5919 | |
| 5920 | |
| 5921 | static SDValue extract256BitVector(SDValue Vec, unsigned IdxVal, |
| 5922 | SelectionDAG &DAG, const SDLoc &dl) { |
| 5923 | assert(Vec.getValueType().is512BitVector() && "Unexpected vector size!"); |
| 5924 | return extractSubVector(Vec, IdxVal, DAG, dl, 256); |
| 5925 | } |
| 5926 | |
| 5927 | static SDValue insertSubVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
| 5928 | SelectionDAG &DAG, const SDLoc &dl, |
| 5929 | unsigned vectorWidth) { |
| 5930 | assert((vectorWidth == 128 || vectorWidth == 256) && |
| 5931 | "Unsupported vector width"); |
| 5932 | |
| 5933 | if (Vec.isUndef()) |
| 5934 | return Result; |
| 5935 | EVT VT = Vec.getValueType(); |
| 5936 | EVT ElVT = VT.getVectorElementType(); |
| 5937 | EVT ResultVT = Result.getValueType(); |
| 5938 | |
| 5939 | |
| 5940 | unsigned ElemsPerChunk = vectorWidth/ElVT.getSizeInBits(); |
| 5941 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
| 5942 | |
| 5943 | |
| 5944 | |
| 5945 | IdxVal &= ~(ElemsPerChunk - 1); |
| 5946 | |
| 5947 | SDValue VecIdx = DAG.getIntPtrConstant(IdxVal, dl); |
| 5948 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResultVT, Result, Vec, VecIdx); |
| 5949 | } |
| 5950 | |
| 5951 | |
| 5952 | |
| 5953 | |
| 5954 | |
| 5955 | |
| 5956 | |
| 5957 | static SDValue insert128BitVector(SDValue Result, SDValue Vec, unsigned IdxVal, |
| 5958 | SelectionDAG &DAG, const SDLoc &dl) { |
| 5959 | assert(Vec.getValueType().is128BitVector() && "Unexpected vector size!"); |
| 5960 | return insertSubVector(Result, Vec, IdxVal, DAG, dl, 128); |
| 5961 | } |
| 5962 | |
| 5963 | |
| 5964 | |
| 5965 | static SDValue widenSubVector(MVT VT, SDValue Vec, bool ZeroNewElements, |
| 5966 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
| 5967 | const SDLoc &dl) { |
| 5968 | assert(Vec.getValueSizeInBits().getFixedSize() < VT.getFixedSizeInBits() && |
| 5969 | Vec.getValueType().getScalarType() == VT.getScalarType() && |
| 5970 | "Unsupported vector widening type"); |
| 5971 | SDValue Res = ZeroNewElements ? getZeroVector(VT, Subtarget, DAG, dl) |
| 5972 | : DAG.getUNDEF(VT); |
| 5973 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT, Res, Vec, |
| 5974 | DAG.getIntPtrConstant(0, dl)); |
| 5975 | } |
| 5976 | |
| 5977 | |
| 5978 | |
| 5979 | static SDValue widenSubVector(SDValue Vec, bool ZeroNewElements, |
| 5980 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
| 5981 | const SDLoc &dl, unsigned WideSizeInBits) { |
| 5982 | assert(Vec.getValueSizeInBits() < WideSizeInBits && |
| 5983 | (WideSizeInBits % Vec.getScalarValueSizeInBits()) == 0 && |
| 5984 | "Unsupported vector widening type"); |
| 5985 | unsigned WideNumElts = WideSizeInBits / Vec.getScalarValueSizeInBits(); |
| 5986 | MVT SVT = Vec.getSimpleValueType().getScalarType(); |
| 5987 | MVT VT = MVT::getVectorVT(SVT, WideNumElts); |
| 5988 | return widenSubVector(VT, Vec, ZeroNewElements, Subtarget, DAG, dl); |
| 5989 | } |
| 5990 | |
| 5991 | |
| 5992 | |
| 5993 | |
| 5994 | static bool collectConcatOps(SDNode *N, SmallVectorImpl<SDValue> &Ops) { |
| 5995 | assert(Ops.empty() && "Expected an empty ops vector"); |
| 5996 | |
| 5997 | if (N->getOpcode() == ISD::CONCAT_VECTORS) { |
| 5998 | Ops.append(N->op_begin(), N->op_end()); |
| 5999 | return true; |
| 6000 | } |
| 6001 | |
| 6002 | if (N->getOpcode() == ISD::INSERT_SUBVECTOR) { |
| 6003 | SDValue Src = N->getOperand(0); |
| 6004 | SDValue Sub = N->getOperand(1); |
| 6005 | const APInt &Idx = N->getConstantOperandAPInt(2); |
| 6006 | EVT VT = Src.getValueType(); |
| 6007 | EVT SubVT = Sub.getValueType(); |
| 6008 | |
| 6009 | |
| 6010 | if (VT.getSizeInBits() == (SubVT.getSizeInBits() * 2) && |
| 6011 | Idx == (VT.getVectorNumElements() / 2)) { |
| 6012 | |
| 6013 | if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
| 6014 | Src.getOperand(1).getValueType() == SubVT && |
| 6015 | isNullConstant(Src.getOperand(2))) { |
| 6016 | Ops.push_back(Src.getOperand(1)); |
| 6017 | Ops.push_back(Sub); |
| 6018 | return true; |
| 6019 | } |
| 6020 | |
| 6021 | if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 6022 | Sub.getOperand(0) == Src && isNullConstant(Sub.getOperand(1))) { |
| 6023 | Ops.append(2, Sub); |
| 6024 | return true; |
| 6025 | } |
| 6026 | } |
| 6027 | } |
| 6028 | |
| 6029 | return false; |
| 6030 | } |
| 6031 | |
| 6032 | static std::pair<SDValue, SDValue> splitVector(SDValue Op, SelectionDAG &DAG, |
| 6033 | const SDLoc &dl) { |
| 6034 | EVT VT = Op.getValueType(); |
| 6035 | unsigned NumElems = VT.getVectorNumElements(); |
| 6036 | unsigned SizeInBits = VT.getSizeInBits(); |
| 6037 | assert((NumElems % 2) == 0 && (SizeInBits % 2) == 0 && |
| 6038 | "Can't split odd sized vector"); |
| 6039 | |
| 6040 | SDValue Lo = extractSubVector(Op, 0, DAG, dl, SizeInBits / 2); |
| 6041 | SDValue Hi = extractSubVector(Op, NumElems / 2, DAG, dl, SizeInBits / 2); |
| 6042 | return std::make_pair(Lo, Hi); |
| 6043 | } |
| 6044 | |
| 6045 | |
| 6046 | static SDValue splitVectorIntUnary(SDValue Op, SelectionDAG &DAG) { |
| 6047 | EVT VT = Op.getValueType(); |
| 6048 | |
| 6049 | |
| 6050 | |
| 6051 | assert((Op.getOperand(0).getValueType().is256BitVector() || |
| 6052 | Op.getOperand(0).getValueType().is512BitVector()) && |
| 6053 | (VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); |
| 6054 | assert(Op.getOperand(0).getValueType().getVectorNumElements() == |
| 6055 | VT.getVectorNumElements() && |
| 6056 | "Unexpected VTs!"); |
| 6057 | |
| 6058 | SDLoc dl(Op); |
| 6059 | |
| 6060 | |
| 6061 | SDValue Lo, Hi; |
| 6062 | std::tie(Lo, Hi) = splitVector(Op.getOperand(0), DAG, dl); |
| 6063 | |
| 6064 | EVT LoVT, HiVT; |
| 6065 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 6066 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
| 6067 | DAG.getNode(Op.getOpcode(), dl, LoVT, Lo), |
| 6068 | DAG.getNode(Op.getOpcode(), dl, HiVT, Hi)); |
| 6069 | } |
| 6070 | |
| 6071 | |
| 6072 | |
| 6073 | static SDValue splitVectorIntBinary(SDValue Op, SelectionDAG &DAG) { |
| 6074 | EVT VT = Op.getValueType(); |
| 6075 | |
| 6076 | |
| 6077 | assert(Op.getOperand(0).getValueType() == VT && |
| 6078 | Op.getOperand(1).getValueType() == VT && "Unexpected VTs!"); |
| 6079 | assert((VT.is256BitVector() || VT.is512BitVector()) && "Unsupported VT!"); |
| 6080 | |
| 6081 | SDLoc dl(Op); |
| 6082 | |
| 6083 | |
| 6084 | SDValue LHS1, LHS2; |
| 6085 | std::tie(LHS1, LHS2) = splitVector(Op.getOperand(0), DAG, dl); |
| 6086 | |
| 6087 | |
| 6088 | SDValue RHS1, RHS2; |
| 6089 | std::tie(RHS1, RHS2) = splitVector(Op.getOperand(1), DAG, dl); |
| 6090 | |
| 6091 | EVT LoVT, HiVT; |
| 6092 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 6093 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
| 6094 | DAG.getNode(Op.getOpcode(), dl, LoVT, LHS1, RHS1), |
| 6095 | DAG.getNode(Op.getOpcode(), dl, HiVT, LHS2, RHS2)); |
| 6096 | } |
| 6097 | |
| 6098 | |
| 6099 | |
| 6100 | |
| 6101 | |
| 6102 | |
| 6103 | |
| 6104 | |
| 6105 | template <typename F> |
| 6106 | SDValue SplitOpsAndApply(SelectionDAG &DAG, const X86Subtarget &Subtarget, |
| 6107 | const SDLoc &DL, EVT VT, ArrayRef<SDValue> Ops, |
| 6108 | F Builder, bool CheckBWI = true) { |
| 6109 | assert(Subtarget.hasSSE2() && "Target assumed to support at least SSE2"); |
| 6110 | unsigned NumSubs = 1; |
| 6111 | if ((CheckBWI && Subtarget.useBWIRegs()) || |
| 6112 | (!CheckBWI && Subtarget.useAVX512Regs())) { |
| 6113 | if (VT.getSizeInBits() > 512) { |
| 6114 | NumSubs = VT.getSizeInBits() / 512; |
| 6115 | assert((VT.getSizeInBits() % 512) == 0 && "Illegal vector size"); |
| 6116 | } |
| 6117 | } else if (Subtarget.hasAVX2()) { |
| 6118 | if (VT.getSizeInBits() > 256) { |
| 6119 | NumSubs = VT.getSizeInBits() / 256; |
| 6120 | assert((VT.getSizeInBits() % 256) == 0 && "Illegal vector size"); |
| 6121 | } |
| 6122 | } else { |
| 6123 | if (VT.getSizeInBits() > 128) { |
| 6124 | NumSubs = VT.getSizeInBits() / 128; |
| 6125 | assert((VT.getSizeInBits() % 128) == 0 && "Illegal vector size"); |
| 6126 | } |
| 6127 | } |
| 6128 | |
| 6129 | if (NumSubs == 1) |
| 6130 | return Builder(DAG, DL, Ops); |
| 6131 | |
| 6132 | SmallVector<SDValue, 4> Subs; |
| 6133 | for (unsigned i = 0; i != NumSubs; ++i) { |
| 6134 | SmallVector<SDValue, 2> SubOps; |
| 6135 | for (SDValue Op : Ops) { |
| 6136 | EVT OpVT = Op.getValueType(); |
| 6137 | unsigned NumSubElts = OpVT.getVectorNumElements() / NumSubs; |
| 6138 | unsigned SizeSub = OpVT.getSizeInBits() / NumSubs; |
| 6139 | SubOps.push_back(extractSubVector(Op, i * NumSubElts, DAG, DL, SizeSub)); |
| 6140 | } |
| 6141 | Subs.push_back(Builder(DAG, DL, SubOps)); |
| 6142 | } |
| 6143 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
| 6144 | } |
| 6145 | |
| 6146 | |
| 6147 | static SDValue insert1BitVector(SDValue Op, SelectionDAG &DAG, |
| 6148 | const X86Subtarget &Subtarget) { |
| 6149 | |
| 6150 | SDLoc dl(Op); |
| 6151 | SDValue Vec = Op.getOperand(0); |
| 6152 | SDValue SubVec = Op.getOperand(1); |
| 6153 | SDValue Idx = Op.getOperand(2); |
| 6154 | unsigned IdxVal = Op.getConstantOperandVal(2); |
| 6155 | |
| 6156 | |
| 6157 | if (SubVec.isUndef()) |
| 6158 | return Vec; |
| 6159 | |
| 6160 | if (IdxVal == 0 && Vec.isUndef()) |
| 6161 | return Op; |
| 6162 | |
| 6163 | MVT OpVT = Op.getSimpleValueType(); |
| 6164 | unsigned NumElems = OpVT.getVectorNumElements(); |
| 6165 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
| 6166 | |
| 6167 | |
| 6168 | MVT WideOpVT = OpVT; |
| 6169 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
| 6170 | WideOpVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
| 6171 | |
| 6172 | |
| 6173 | |
| 6174 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(Vec.getNode())) { |
| 6175 | |
| 6176 | Op = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
| 6177 | DAG.getConstant(0, dl, WideOpVT), |
| 6178 | SubVec, Idx); |
| 6179 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
| 6180 | } |
| 6181 | |
| 6182 | MVT SubVecVT = SubVec.getSimpleValueType(); |
| 6183 | unsigned SubVecNumElems = SubVecVT.getVectorNumElements(); |
| 6184 | assert(IdxVal + SubVecNumElems <= NumElems && |
| 6185 | IdxVal % SubVecVT.getSizeInBits() == 0 && |
| 6186 | "Unexpected index value in INSERT_SUBVECTOR"); |
| 6187 | |
| 6188 | SDValue Undef = DAG.getUNDEF(WideOpVT); |
| 6189 | |
| 6190 | if (IdxVal == 0) { |
| 6191 | |
| 6192 | SDValue ShiftBits = DAG.getTargetConstant(SubVecNumElems, dl, MVT::i8); |
| 6193 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, |
| 6194 | ZeroIdx); |
| 6195 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
| 6196 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
| 6197 | |
| 6198 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
| 6199 | DAG.getConstant(0, dl, WideOpVT), |
| 6200 | SubVec, ZeroIdx); |
| 6201 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
| 6202 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
| 6203 | } |
| 6204 | |
| 6205 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
| 6206 | Undef, SubVec, ZeroIdx); |
| 6207 | |
| 6208 | if (Vec.isUndef()) { |
| 6209 | assert(IdxVal != 0 && "Unexpected index"); |
| 6210 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
| 6211 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
| 6212 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
| 6213 | } |
| 6214 | |
| 6215 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
| 6216 | assert(IdxVal != 0 && "Unexpected index"); |
| 6217 | NumElems = WideOpVT.getVectorNumElements(); |
| 6218 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
| 6219 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
| 6220 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
| 6221 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
| 6222 | if (ShiftRight != 0) |
| 6223 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
| 6224 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
| 6225 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
| 6226 | } |
| 6227 | |
| 6228 | |
| 6229 | if (IdxVal + SubVecNumElems == NumElems) { |
| 6230 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
| 6231 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
| 6232 | if (SubVecNumElems * 2 == NumElems) { |
| 6233 | |
| 6234 | |
| 6235 | Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SubVecVT, Vec, ZeroIdx); |
| 6236 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
| 6237 | DAG.getConstant(0, dl, WideOpVT), |
| 6238 | Vec, ZeroIdx); |
| 6239 | } else { |
| 6240 | |
| 6241 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, |
| 6242 | Undef, Vec, ZeroIdx); |
| 6243 | NumElems = WideOpVT.getVectorNumElements(); |
| 6244 | SDValue ShiftBits = DAG.getTargetConstant(NumElems - IdxVal, dl, MVT::i8); |
| 6245 | Vec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, ShiftBits); |
| 6246 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, ShiftBits); |
| 6247 | } |
| 6248 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
| 6249 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
| 6250 | } |
| 6251 | |
| 6252 | |
| 6253 | |
| 6254 | NumElems = WideOpVT.getVectorNumElements(); |
| 6255 | |
| 6256 | |
| 6257 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideOpVT, Undef, Vec, ZeroIdx); |
| 6258 | |
| 6259 | unsigned ShiftLeft = NumElems - SubVecNumElems; |
| 6260 | unsigned ShiftRight = NumElems - SubVecNumElems - IdxVal; |
| 6261 | |
| 6262 | |
| 6263 | if (WideOpVT != MVT::v64i1 || Subtarget.is64Bit()) { |
| 6264 | APInt Mask0 = APInt::getBitsSet(NumElems, IdxVal, IdxVal + SubVecNumElems); |
| 6265 | Mask0.flipAllBits(); |
| 6266 | SDValue CMask0 = DAG.getConstant(Mask0, dl, MVT::getIntegerVT(NumElems)); |
| 6267 | SDValue VMask0 = DAG.getNode(ISD::BITCAST, dl, WideOpVT, CMask0); |
| 6268 | Vec = DAG.getNode(ISD::AND, dl, WideOpVT, Vec, VMask0); |
| 6269 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
| 6270 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
| 6271 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
| 6272 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
| 6273 | Op = DAG.getNode(ISD::OR, dl, WideOpVT, Vec, SubVec); |
| 6274 | |
| 6275 | |
| 6276 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, Op, ZeroIdx); |
| 6277 | } |
| 6278 | |
| 6279 | |
| 6280 | SubVec = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, SubVec, |
| 6281 | DAG.getTargetConstant(ShiftLeft, dl, MVT::i8)); |
| 6282 | SubVec = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, SubVec, |
| 6283 | DAG.getTargetConstant(ShiftRight, dl, MVT::i8)); |
| 6284 | |
| 6285 | |
| 6286 | unsigned LowShift = NumElems - IdxVal; |
| 6287 | SDValue Low = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, Vec, |
| 6288 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
| 6289 | Low = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Low, |
| 6290 | DAG.getTargetConstant(LowShift, dl, MVT::i8)); |
| 6291 | |
| 6292 | |
| 6293 | unsigned HighShift = IdxVal + SubVecNumElems; |
| 6294 | SDValue High = DAG.getNode(X86ISD::KSHIFTR, dl, WideOpVT, Vec, |
| 6295 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
| 6296 | High = DAG.getNode(X86ISD::KSHIFTL, dl, WideOpVT, High, |
| 6297 | DAG.getTargetConstant(HighShift, dl, MVT::i8)); |
| 6298 | |
| 6299 | |
| 6300 | Vec = DAG.getNode(ISD::OR, dl, WideOpVT, Low, High); |
| 6301 | SubVec = DAG.getNode(ISD::OR, dl, WideOpVT, SubVec, Vec); |
| 6302 | |
| 6303 | |
| 6304 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OpVT, SubVec, ZeroIdx); |
| 6305 | } |
| 6306 | |
| 6307 | static SDValue concatSubVectors(SDValue V1, SDValue V2, SelectionDAG &DAG, |
| 6308 | const SDLoc &dl) { |
| 6309 | assert(V1.getValueType() == V2.getValueType() && "subvector type mismatch"); |
| 6310 | EVT SubVT = V1.getValueType(); |
| 6311 | EVT SubSVT = SubVT.getScalarType(); |
| 6312 | unsigned SubNumElts = SubVT.getVectorNumElements(); |
| 6313 | unsigned SubVectorWidth = SubVT.getSizeInBits(); |
| 6314 | EVT VT = EVT::getVectorVT(*DAG.getContext(), SubSVT, 2 * SubNumElts); |
| 6315 | SDValue V = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, dl, SubVectorWidth); |
| 6316 | return insertSubVector(V, V2, SubNumElts, DAG, dl, SubVectorWidth); |
| 6317 | } |
| 6318 | |
| 6319 | |
| 6320 | |
| 6321 | |
| 6322 | static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, const SDLoc &dl) { |
| 6323 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && |
| 6324 | "Expected a 128/256/512-bit vector type"); |
| 6325 | |
| 6326 | APInt Ones = APInt::getAllOnesValue(32); |
| 6327 | unsigned NumElts = VT.getSizeInBits() / 32; |
| 6328 | SDValue Vec = DAG.getConstant(Ones, dl, MVT::getVectorVT(MVT::i32, NumElts)); |
| 6329 | return DAG.getBitcast(VT, Vec); |
| 6330 | } |
| 6331 | |
| 6332 | |
| 6333 | static unsigned getOpcode_EXTEND(unsigned Opcode) { |
| 6334 | switch (Opcode) { |
| 6335 | case ISD::ANY_EXTEND: |
| 6336 | case ISD::ANY_EXTEND_VECTOR_INREG: |
| 6337 | return ISD::ANY_EXTEND; |
| 6338 | case ISD::ZERO_EXTEND: |
| 6339 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
| 6340 | return ISD::ZERO_EXTEND; |
| 6341 | case ISD::SIGN_EXTEND: |
| 6342 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
| 6343 | return ISD::SIGN_EXTEND; |
| 6344 | } |
| 6345 | llvm_unreachable("Unknown opcode"); |
| 6346 | } |
| 6347 | |
| 6348 | |
| 6349 | static unsigned getOpcode_EXTEND_VECTOR_INREG(unsigned Opcode) { |
| 6350 | switch (Opcode) { |
| 6351 | case ISD::ANY_EXTEND: |
| 6352 | case ISD::ANY_EXTEND_VECTOR_INREG: |
| 6353 | return ISD::ANY_EXTEND_VECTOR_INREG; |
| 6354 | case ISD::ZERO_EXTEND: |
| 6355 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
| 6356 | return ISD::ZERO_EXTEND_VECTOR_INREG; |
| 6357 | case ISD::SIGN_EXTEND: |
| 6358 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
| 6359 | return ISD::SIGN_EXTEND_VECTOR_INREG; |
| 6360 | } |
| 6361 | llvm_unreachable("Unknown opcode"); |
| 6362 | } |
| 6363 | |
| 6364 | static SDValue getEXTEND_VECTOR_INREG(unsigned Opcode, const SDLoc &DL, EVT VT, |
| 6365 | SDValue In, SelectionDAG &DAG) { |
| 6366 | EVT InVT = In.getValueType(); |
| 6367 | assert(VT.isVector() && InVT.isVector() && "Expected vector VTs."); |
| 6368 | assert((ISD::ANY_EXTEND == Opcode || ISD::SIGN_EXTEND == Opcode || |
| 6369 | ISD::ZERO_EXTEND == Opcode) && |
| 6370 | "Unknown extension opcode"); |
| 6371 | |
| 6372 | |
| 6373 | |
| 6374 | if (InVT.getSizeInBits() > 128) { |
| 6375 | assert(VT.getSizeInBits() == InVT.getSizeInBits() && |
| 6376 | "Expected VTs to be the same size!"); |
| 6377 | unsigned Scale = VT.getScalarSizeInBits() / InVT.getScalarSizeInBits(); |
| 6378 | In = extractSubVector(In, 0, DAG, DL, |
| 6379 | std::max(128U, (unsigned)VT.getSizeInBits() / Scale)); |
| 6380 | InVT = In.getValueType(); |
| 6381 | } |
| 6382 | |
| 6383 | if (VT.getVectorNumElements() != InVT.getVectorNumElements()) |
| 6384 | Opcode = getOpcode_EXTEND_VECTOR_INREG(Opcode); |
| 6385 | |
| 6386 | return DAG.getNode(Opcode, DL, VT, In); |
| 6387 | } |
| 6388 | |
| 6389 | |
| 6390 | |
| 6391 | |
| 6392 | static SDValue IsNOT(SDValue V, SelectionDAG &DAG) { |
| 6393 | V = peekThroughBitcasts(V); |
| 6394 | if (V.getOpcode() == ISD::XOR && |
| 6395 | ISD::isBuildVectorAllOnes(V.getOperand(1).getNode())) |
| 6396 | return V.getOperand(0); |
| 6397 | if (V.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 6398 | (isNullConstant(V.getOperand(1)) || V.getOperand(0).hasOneUse())) { |
| 6399 | if (SDValue Not = IsNOT(V.getOperand(0), DAG)) { |
| 6400 | Not = DAG.getBitcast(V.getOperand(0).getValueType(), Not); |
| 6401 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(Not), V.getValueType(), |
| 6402 | Not, V.getOperand(1)); |
| 6403 | } |
| 6404 | } |
| 6405 | SmallVector<SDValue, 2> CatOps; |
| 6406 | if (collectConcatOps(V.getNode(), CatOps)) { |
| 6407 | for (SDValue &CatOp : CatOps) { |
| 6408 | SDValue NotCat = IsNOT(CatOp, DAG); |
| 6409 | if (!NotCat) return SDValue(); |
| 6410 | CatOp = DAG.getBitcast(CatOp.getValueType(), NotCat); |
| 6411 | } |
| 6412 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(V), V.getValueType(), CatOps); |
| 6413 | } |
| 6414 | return SDValue(); |
| 6415 | } |
| 6416 | |
| 6417 | void llvm::createUnpackShuffleMask(EVT VT, SmallVectorImpl<int> &Mask, |
| 6418 | bool Lo, bool Unary) { |
| 6419 | assert(VT.getScalarType().isSimple() && (VT.getSizeInBits() % 128) == 0 && |
| 6420 | "Illegal vector type to unpack"); |
| 6421 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
| 6422 | int NumElts = VT.getVectorNumElements(); |
| 6423 | int NumEltsInLane = 128 / VT.getScalarSizeInBits(); |
| 6424 | for (int i = 0; i < NumElts; ++i) { |
| 6425 | unsigned LaneStart = (i / NumEltsInLane) * NumEltsInLane; |
| 6426 | int Pos = (i % NumEltsInLane) / 2 + LaneStart; |
| 6427 | Pos += (Unary ? 0 : NumElts * (i % 2)); |
| 6428 | Pos += (Lo ? 0 : NumEltsInLane / 2); |
| 6429 | Mask.push_back(Pos); |
| 6430 | } |
| 6431 | } |
| 6432 | |
| 6433 | |
| 6434 | |
| 6435 | |
| 6436 | |
| 6437 | void llvm::createSplat2ShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, |
| 6438 | bool Lo) { |
| 6439 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
| 6440 | int NumElts = VT.getVectorNumElements(); |
| 6441 | for (int i = 0; i < NumElts; ++i) { |
| 6442 | int Pos = i / 2; |
| 6443 | Pos += (Lo ? 0 : NumElts / 2); |
| 6444 | Mask.push_back(Pos); |
| 6445 | } |
| 6446 | } |
| 6447 | |
| 6448 | |
| 6449 | static SDValue getUnpackl(SelectionDAG &DAG, const SDLoc &dl, EVT VT, |
| 6450 | SDValue V1, SDValue V2) { |
| 6451 | SmallVector<int, 8> Mask; |
| 6452 | createUnpackShuffleMask(VT, Mask, true, false); |
| 6453 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
| 6454 | } |
| 6455 | |
| 6456 | |
| 6457 | static SDValue getUnpackh(SelectionDAG &DAG, const SDLoc &dl, EVT VT, |
| 6458 | SDValue V1, SDValue V2) { |
| 6459 | SmallVector<int, 8> Mask; |
| 6460 | createUnpackShuffleMask(VT, Mask, false, false); |
| 6461 | return DAG.getVectorShuffle(VT, dl, V1, V2, Mask); |
| 6462 | } |
| 6463 | |
| 6464 | |
| 6465 | |
| 6466 | |
| 6467 | |
| 6468 | static SDValue getShuffleVectorZeroOrUndef(SDValue V2, int Idx, |
| 6469 | bool IsZero, |
| 6470 | const X86Subtarget &Subtarget, |
| 6471 | SelectionDAG &DAG) { |
| 6472 | MVT VT = V2.getSimpleValueType(); |
| 6473 | SDValue V1 = IsZero |
| 6474 | ? getZeroVector(VT, Subtarget, DAG, SDLoc(V2)) : DAG.getUNDEF(VT); |
| 6475 | int NumElems = VT.getVectorNumElements(); |
| 6476 | SmallVector<int, 16> MaskVec(NumElems); |
| 6477 | for (int i = 0; i != NumElems; ++i) |
| 6478 | |
| 6479 | MaskVec[i] = (i == Idx) ? NumElems : i; |
| 6480 | return DAG.getVectorShuffle(VT, SDLoc(V2), V1, V2, MaskVec); |
| 6481 | } |
| 6482 | |
| 6483 | static const Constant *getTargetConstantFromBasePtr(SDValue Ptr) { |
| 6484 | if (Ptr.getOpcode() == X86ISD::Wrapper || |
| 6485 | Ptr.getOpcode() == X86ISD::WrapperRIP) |
| 6486 | Ptr = Ptr.getOperand(0); |
| 6487 | |
| 6488 | auto *CNode = dyn_cast<ConstantPoolSDNode>(Ptr); |
| 6489 | if (!CNode || CNode->isMachineConstantPoolEntry() || CNode->getOffset() != 0) |
| 6490 | return nullptr; |
| 6491 | |
| 6492 | return CNode->getConstVal(); |
| 6493 | } |
| 6494 | |
| 6495 | static const Constant *getTargetConstantFromNode(LoadSDNode *Load) { |
| 6496 | if (!Load || !ISD::isNormalLoad(Load)) |
| 6497 | return nullptr; |
| 6498 | return getTargetConstantFromBasePtr(Load->getBasePtr()); |
| 6499 | } |
| 6500 | |
| 6501 | static const Constant *getTargetConstantFromNode(SDValue Op) { |
| 6502 | Op = peekThroughBitcasts(Op); |
| 6503 | return getTargetConstantFromNode(dyn_cast<LoadSDNode>(Op)); |
| 6504 | } |
| 6505 | |
| 6506 | const Constant * |
| 6507 | X86TargetLowering::getTargetConstantFromLoad(LoadSDNode *LD) const { |
| 6508 | assert(LD && "Unexpected null LoadSDNode"); |
| 6509 | return getTargetConstantFromNode(LD); |
| 6510 | } |
| 6511 | |
| 6512 | |
| 6513 | static bool getTargetConstantBitsFromNode(SDValue Op, unsigned EltSizeInBits, |
| 6514 | APInt &UndefElts, |
| 6515 | SmallVectorImpl<APInt> &EltBits, |
| 6516 | bool AllowWholeUndefs = true, |
| 6517 | bool AllowPartialUndefs = true) { |
| 6518 | assert(EltBits.empty() && "Expected an empty EltBits vector"); |
| 6519 | |
| 6520 | Op = peekThroughBitcasts(Op); |
| 6521 | |
| 6522 | EVT VT = Op.getValueType(); |
| 6523 | unsigned SizeInBits = VT.getSizeInBits(); |
| 6524 | assert((SizeInBits % EltSizeInBits) == 0 && "Can't split constant!"); |
| 6525 | unsigned NumElts = SizeInBits / EltSizeInBits; |
| 6526 | |
| 6527 | |
| 6528 | auto CastBitData = [&](APInt &UndefSrcElts, ArrayRef<APInt> SrcEltBits) { |
| 6529 | unsigned NumSrcElts = UndefSrcElts.getBitWidth(); |
| 6530 | unsigned SrcEltSizeInBits = SrcEltBits[0].getBitWidth(); |
| 6531 | assert((NumSrcElts * SrcEltSizeInBits) == SizeInBits && |
| 6532 | "Constant bit sizes don't match"); |
| 6533 | |
| 6534 | |
| 6535 | bool AllowUndefs = AllowWholeUndefs || AllowPartialUndefs; |
| 6536 | if (UndefSrcElts.getBoolValue() && !AllowUndefs) |
| 6537 | return false; |
| 6538 | |
| 6539 | |
| 6540 | if (NumSrcElts == NumElts) { |
| 6541 | UndefElts = UndefSrcElts; |
| 6542 | EltBits.assign(SrcEltBits.begin(), SrcEltBits.end()); |
| 6543 | return true; |
| 6544 | } |
| 6545 | |
| 6546 | |
| 6547 | APInt UndefBits(SizeInBits, 0); |
| 6548 | APInt MaskBits(SizeInBits, 0); |
| 6549 | |
| 6550 | for (unsigned i = 0; i != NumSrcElts; ++i) { |
| 6551 | unsigned BitOffset = i * SrcEltSizeInBits; |
| 6552 | if (UndefSrcElts[i]) |
| 6553 | UndefBits.setBits(BitOffset, BitOffset + SrcEltSizeInBits); |
| 6554 | MaskBits.insertBits(SrcEltBits[i], BitOffset); |
| 6555 | } |
| 6556 | |
| 6557 | |
| 6558 | UndefElts = APInt(NumElts, 0); |
| 6559 | EltBits.resize(NumElts, APInt(EltSizeInBits, 0)); |
| 6560 | |
| 6561 | for (unsigned i = 0; i != NumElts; ++i) { |
| 6562 | unsigned BitOffset = i * EltSizeInBits; |
| 6563 | APInt UndefEltBits = UndefBits.extractBits(EltSizeInBits, BitOffset); |
| 6564 | |
| 6565 | |
| 6566 | if (UndefEltBits.isAllOnesValue()) { |
| 6567 | if (!AllowWholeUndefs) |
| 6568 | return false; |
| 6569 | UndefElts.setBit(i); |
| 6570 | continue; |
| 6571 | } |
| 6572 | |
| 6573 | |
| 6574 | |
| 6575 | if (UndefEltBits.getBoolValue() && !AllowPartialUndefs) |
| 6576 | return false; |
| 6577 | |
| 6578 | EltBits[i] = MaskBits.extractBits(EltSizeInBits, BitOffset); |
| 6579 | } |
| 6580 | return true; |
| 6581 | }; |
| 6582 | |
| 6583 | |
| 6584 | auto CollectConstantBits = [](const Constant *Cst, APInt &Mask, APInt &Undefs, |
| 6585 | unsigned UndefBitIndex) { |
| 6586 | if (!Cst) |
| 6587 | return false; |
| 6588 | if (isa<UndefValue>(Cst)) { |
| 6589 | Undefs.setBit(UndefBitIndex); |
| 6590 | return true; |
| 6591 | } |
| 6592 | if (auto *CInt = dyn_cast<ConstantInt>(Cst)) { |
| 6593 | Mask = CInt->getValue(); |
| 6594 | return true; |
| 6595 | } |
| 6596 | if (auto *CFP = dyn_cast<ConstantFP>(Cst)) { |
| 6597 | Mask = CFP->getValueAPF().bitcastToAPInt(); |
| 6598 | return true; |
| 6599 | } |
| 6600 | return false; |
| 6601 | }; |
| 6602 | |
| 6603 | |
| 6604 | if (Op.isUndef()) { |
| 6605 | APInt UndefSrcElts = APInt::getAllOnesValue(NumElts); |
| 6606 | SmallVector<APInt, 64> SrcEltBits(NumElts, APInt(EltSizeInBits, 0)); |
| 6607 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6608 | } |
| 6609 | |
| 6610 | |
| 6611 | if (auto *Cst = dyn_cast<ConstantSDNode>(Op)) { |
| 6612 | APInt UndefSrcElts = APInt::getNullValue(1); |
| 6613 | SmallVector<APInt, 64> SrcEltBits(1, Cst->getAPIntValue()); |
| 6614 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6615 | } |
| 6616 | if (auto *Cst = dyn_cast<ConstantFPSDNode>(Op)) { |
| 6617 | APInt UndefSrcElts = APInt::getNullValue(1); |
| 6618 | APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); |
| 6619 | SmallVector<APInt, 64> SrcEltBits(1, RawBits); |
| 6620 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6621 | } |
| 6622 | |
| 6623 | |
| 6624 | if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) { |
| 6625 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
| 6626 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
| 6627 | |
| 6628 | APInt UndefSrcElts(NumSrcElts, 0); |
| 6629 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
| 6630 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
| 6631 | const SDValue &Src = Op.getOperand(i); |
| 6632 | if (Src.isUndef()) { |
| 6633 | UndefSrcElts.setBit(i); |
| 6634 | continue; |
| 6635 | } |
| 6636 | auto *Cst = cast<ConstantSDNode>(Src); |
| 6637 | SrcEltBits[i] = Cst->getAPIntValue().zextOrTrunc(SrcEltSizeInBits); |
| 6638 | } |
| 6639 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6640 | } |
| 6641 | if (ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode())) { |
| 6642 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
| 6643 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
| 6644 | |
| 6645 | APInt UndefSrcElts(NumSrcElts, 0); |
| 6646 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
| 6647 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
| 6648 | const SDValue &Src = Op.getOperand(i); |
| 6649 | if (Src.isUndef()) { |
| 6650 | UndefSrcElts.setBit(i); |
| 6651 | continue; |
| 6652 | } |
| 6653 | auto *Cst = cast<ConstantFPSDNode>(Src); |
| 6654 | APInt RawBits = Cst->getValueAPF().bitcastToAPInt(); |
| 6655 | SrcEltBits[i] = RawBits.zextOrTrunc(SrcEltSizeInBits); |
| 6656 | } |
| 6657 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6658 | } |
| 6659 | |
| 6660 | |
| 6661 | if (auto *Cst = getTargetConstantFromNode(Op)) { |
| 6662 | Type *CstTy = Cst->getType(); |
| 6663 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
| 6664 | if (!CstTy->isVectorTy() || (CstSizeInBits % SizeInBits) != 0) |
| 6665 | return false; |
| 6666 | |
| 6667 | unsigned SrcEltSizeInBits = CstTy->getScalarSizeInBits(); |
| 6668 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
| 6669 | |
| 6670 | APInt UndefSrcElts(NumSrcElts, 0); |
| 6671 | SmallVector<APInt, 64> SrcEltBits(NumSrcElts, APInt(SrcEltSizeInBits, 0)); |
| 6672 | for (unsigned i = 0; i != NumSrcElts; ++i) |
| 6673 | if (!CollectConstantBits(Cst->getAggregateElement(i), SrcEltBits[i], |
| 6674 | UndefSrcElts, i)) |
| 6675 | return false; |
| 6676 | |
| 6677 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6678 | } |
| 6679 | |
| 6680 | |
| 6681 | if (Op.getOpcode() == X86ISD::VBROADCAST_LOAD && |
| 6682 | EltSizeInBits <= VT.getScalarSizeInBits()) { |
| 6683 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 6684 | if (MemIntr->getMemoryVT().getScalarSizeInBits() != VT.getScalarSizeInBits()) |
| 6685 | return false; |
| 6686 | |
| 6687 | SDValue Ptr = MemIntr->getBasePtr(); |
| 6688 | if (const Constant *C = getTargetConstantFromBasePtr(Ptr)) { |
| 6689 | unsigned SrcEltSizeInBits = C->getType()->getScalarSizeInBits(); |
| 6690 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
| 6691 | |
| 6692 | APInt UndefSrcElts(NumSrcElts, 0); |
| 6693 | SmallVector<APInt, 64> SrcEltBits(1, APInt(SrcEltSizeInBits, 0)); |
| 6694 | if (CollectConstantBits(C, SrcEltBits[0], UndefSrcElts, 0)) { |
| 6695 | if (UndefSrcElts[0]) |
| 6696 | UndefSrcElts.setBits(0, NumSrcElts); |
| 6697 | SrcEltBits.append(NumSrcElts - 1, SrcEltBits[0]); |
| 6698 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6699 | } |
| 6700 | } |
| 6701 | } |
| 6702 | |
| 6703 | |
| 6704 | if (Op.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { |
| 6705 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 6706 | SDValue Ptr = MemIntr->getBasePtr(); |
| 6707 | |
| 6708 | |
| 6709 | if (const Constant *Cst = getTargetConstantFromBasePtr(Ptr)) { |
| 6710 | Type *CstTy = Cst->getType(); |
| 6711 | unsigned CstSizeInBits = CstTy->getPrimitiveSizeInBits(); |
| 6712 | unsigned SubVecSizeInBits = MemIntr->getMemoryVT().getStoreSizeInBits(); |
| 6713 | if (!CstTy->isVectorTy() || (CstSizeInBits % SubVecSizeInBits) != 0 || |
| 6714 | (SizeInBits % SubVecSizeInBits) != 0) |
| 6715 | return false; |
| 6716 | unsigned CstEltSizeInBits = CstTy->getScalarSizeInBits(); |
| 6717 | unsigned NumSubElts = SubVecSizeInBits / CstEltSizeInBits; |
| 6718 | unsigned NumSubVecs = SizeInBits / SubVecSizeInBits; |
| 6719 | APInt UndefSubElts(NumSubElts, 0); |
| 6720 | SmallVector<APInt, 64> SubEltBits(NumSubElts * NumSubVecs, |
| 6721 | APInt(CstEltSizeInBits, 0)); |
| 6722 | for (unsigned i = 0; i != NumSubElts; ++i) { |
| 6723 | if (!CollectConstantBits(Cst->getAggregateElement(i), SubEltBits[i], |
| 6724 | UndefSubElts, i)) |
| 6725 | return false; |
| 6726 | for (unsigned j = 1; j != NumSubVecs; ++j) |
| 6727 | SubEltBits[i + (j * NumSubElts)] = SubEltBits[i]; |
| 6728 | } |
| 6729 | UndefSubElts = APInt::getSplat(NumSubVecs * UndefSubElts.getBitWidth(), |
| 6730 | UndefSubElts); |
| 6731 | return CastBitData(UndefSubElts, SubEltBits); |
| 6732 | } |
| 6733 | } |
| 6734 | |
| 6735 | |
| 6736 | if (Op.getOpcode() == X86ISD::VZEXT_MOVL && |
| 6737 | Op.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 6738 | isa<ConstantSDNode>(Op.getOperand(0).getOperand(0))) { |
| 6739 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
| 6740 | unsigned NumSrcElts = SizeInBits / SrcEltSizeInBits; |
| 6741 | |
| 6742 | APInt UndefSrcElts(NumSrcElts, 0); |
| 6743 | SmallVector<APInt, 64> SrcEltBits; |
| 6744 | auto *CN = cast<ConstantSDNode>(Op.getOperand(0).getOperand(0)); |
| 6745 | SrcEltBits.push_back(CN->getAPIntValue().zextOrTrunc(SrcEltSizeInBits)); |
| 6746 | SrcEltBits.append(NumSrcElts - 1, APInt(SrcEltSizeInBits, 0)); |
| 6747 | return CastBitData(UndefSrcElts, SrcEltBits); |
| 6748 | } |
| 6749 | |
| 6750 | |
| 6751 | if (Op.getOpcode() == ISD::INSERT_SUBVECTOR) { |
| 6752 | |
| 6753 | |
| 6754 | unsigned SrcEltSizeInBits = VT.getScalarSizeInBits(); |
| 6755 | bool AllowUndefs = EltSizeInBits >= SrcEltSizeInBits; |
| 6756 | |
| 6757 | APInt UndefSrcElts, UndefSubElts; |
| 6758 | SmallVector<APInt, 32> EltSrcBits, EltSubBits; |
| 6759 | if (getTargetConstantBitsFromNode(Op.getOperand(1), SrcEltSizeInBits, |
| 6760 | UndefSubElts, EltSubBits, |
| 6761 | AllowWholeUndefs && AllowUndefs, |
| 6762 | AllowPartialUndefs && AllowUndefs) && |
| 6763 | getTargetConstantBitsFromNode(Op.getOperand(0), SrcEltSizeInBits, |
| 6764 | UndefSrcElts, EltSrcBits, |
| 6765 | AllowWholeUndefs && AllowUndefs, |
| 6766 | AllowPartialUndefs && AllowUndefs)) { |
| 6767 | unsigned BaseIdx = Op.getConstantOperandVal(2); |
| 6768 | UndefSrcElts.insertBits(UndefSubElts, BaseIdx); |
| 6769 | for (unsigned i = 0, e = EltSubBits.size(); i != e; ++i) |
| 6770 | EltSrcBits[BaseIdx + i] = EltSubBits[i]; |
| 6771 | return CastBitData(UndefSrcElts, EltSrcBits); |
| 6772 | } |
| 6773 | } |
| 6774 | |
| 6775 | |
| 6776 | if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
| 6777 | |
| 6778 | if (EltSizeInBits != VT.getScalarSizeInBits()) |
| 6779 | return false; |
| 6780 | |
| 6781 | if (getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, |
| 6782 | UndefElts, EltBits, AllowWholeUndefs, |
| 6783 | AllowPartialUndefs)) { |
| 6784 | EVT SrcVT = Op.getOperand(0).getValueType(); |
| 6785 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 6786 | unsigned NumSubElts = VT.getVectorNumElements(); |
| 6787 | unsigned BaseIdx = Op.getConstantOperandVal(1); |
| 6788 | UndefElts = UndefElts.extractBits(NumSubElts, BaseIdx); |
| 6789 | if ((BaseIdx + NumSubElts) != NumSrcElts) |
| 6790 | EltBits.erase(EltBits.begin() + BaseIdx + NumSubElts, EltBits.end()); |
| 6791 | if (BaseIdx != 0) |
| 6792 | EltBits.erase(EltBits.begin(), EltBits.begin() + BaseIdx); |
| 6793 | return true; |
| 6794 | } |
| 6795 | } |
| 6796 | |
| 6797 | |
| 6798 | if (auto *SVN = dyn_cast<ShuffleVectorSDNode>(Op)) { |
| 6799 | |
| 6800 | if (EltSizeInBits != VT.getScalarSizeInBits()) |
| 6801 | return false; |
| 6802 | |
| 6803 | ArrayRef<int> Mask = SVN->getMask(); |
| 6804 | if ((!AllowWholeUndefs || !AllowPartialUndefs) && |
| 6805 | llvm::any_of(Mask, [](int M) { return M < 0; })) |
| 6806 | return false; |
| 6807 | |
| 6808 | APInt UndefElts0, UndefElts1; |
| 6809 | SmallVector<APInt, 32> EltBits0, EltBits1; |
| 6810 | if (isAnyInRange(Mask, 0, NumElts) && |
| 6811 | !getTargetConstantBitsFromNode(Op.getOperand(0), EltSizeInBits, |
| 6812 | UndefElts0, EltBits0, AllowWholeUndefs, |
| 6813 | AllowPartialUndefs)) |
| 6814 | return false; |
| 6815 | if (isAnyInRange(Mask, NumElts, 2 * NumElts) && |
| 6816 | !getTargetConstantBitsFromNode(Op.getOperand(1), EltSizeInBits, |
| 6817 | UndefElts1, EltBits1, AllowWholeUndefs, |
| 6818 | AllowPartialUndefs)) |
| 6819 | return false; |
| 6820 | |
| 6821 | UndefElts = APInt::getNullValue(NumElts); |
| 6822 | for (int i = 0; i != (int)NumElts; ++i) { |
| 6823 | int M = Mask[i]; |
| 6824 | if (M < 0) { |
| 6825 | UndefElts.setBit(i); |
| 6826 | EltBits.push_back(APInt::getNullValue(EltSizeInBits)); |
| 6827 | } else if (M < (int)NumElts) { |
| 6828 | if (UndefElts0[M]) |
| 6829 | UndefElts.setBit(i); |
| 6830 | EltBits.push_back(EltBits0[M]); |
| 6831 | } else { |
| 6832 | if (UndefElts1[M - NumElts]) |
| 6833 | UndefElts.setBit(i); |
| 6834 | EltBits.push_back(EltBits1[M - NumElts]); |
| 6835 | } |
| 6836 | } |
| 6837 | return true; |
| 6838 | } |
| 6839 | |
| 6840 | return false; |
| 6841 | } |
| 6842 | |
| 6843 | namespace llvm { |
| 6844 | namespace X86 { |
| 6845 | bool isConstantSplat(SDValue Op, APInt &SplatVal, bool AllowPartialUndefs) { |
| 6846 | APInt UndefElts; |
| 6847 | SmallVector<APInt, 16> EltBits; |
| 6848 | if (getTargetConstantBitsFromNode(Op, Op.getScalarValueSizeInBits(), |
| 6849 | UndefElts, EltBits, true, |
| 6850 | AllowPartialUndefs)) { |
| 6851 | int SplatIndex = -1; |
| 6852 | for (int i = 0, e = EltBits.size(); i != e; ++i) { |
| 6853 | if (UndefElts[i]) |
| 6854 | continue; |
| 6855 | if (0 <= SplatIndex && EltBits[i] != EltBits[SplatIndex]) { |
| 6856 | SplatIndex = -1; |
| 6857 | break; |
| 6858 | } |
| 6859 | SplatIndex = i; |
| 6860 | } |
| 6861 | if (0 <= SplatIndex) { |
| 6862 | SplatVal = EltBits[SplatIndex]; |
| 6863 | return true; |
| 6864 | } |
| 6865 | } |
| 6866 | |
| 6867 | return false; |
| 6868 | } |
| 6869 | } |
| 6870 | } |
| 6871 | |
| 6872 | static bool getTargetShuffleMaskIndices(SDValue MaskNode, |
| 6873 | unsigned MaskEltSizeInBits, |
| 6874 | SmallVectorImpl<uint64_t> &RawMask, |
| 6875 | APInt &UndefElts) { |
| 6876 | |
| 6877 | SmallVector<APInt, 64> EltBits; |
| 6878 | if (!getTargetConstantBitsFromNode(MaskNode, MaskEltSizeInBits, UndefElts, |
| 6879 | EltBits, true, |
| 6880 | false)) |
| 6881 | return false; |
| 6882 | |
| 6883 | |
| 6884 | for (const APInt &Elt : EltBits) |
| 6885 | RawMask.push_back(Elt.getZExtValue()); |
| 6886 | |
| 6887 | return true; |
| 6888 | } |
| 6889 | |
| 6890 | |
| 6891 | |
| 6892 | |
| 6893 | static void createPackShuffleMask(MVT VT, SmallVectorImpl<int> &Mask, |
| 6894 | bool Unary, unsigned NumStages = 1) { |
| 6895 | assert(Mask.empty() && "Expected an empty shuffle mask vector"); |
| 6896 | unsigned NumElts = VT.getVectorNumElements(); |
| 6897 | unsigned NumLanes = VT.getSizeInBits() / 128; |
| 6898 | unsigned NumEltsPerLane = 128 / VT.getScalarSizeInBits(); |
| 6899 | unsigned Offset = Unary ? 0 : NumElts; |
| 6900 | unsigned Repetitions = 1u << (NumStages - 1); |
| 6901 | unsigned Increment = 1u << NumStages; |
| 6902 | assert((NumEltsPerLane >> NumStages) > 0 && "Illegal packing compaction"); |
| 6903 | |
| 6904 | for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { |
| 6905 | for (unsigned Stage = 0; Stage != Repetitions; ++Stage) { |
| 6906 | for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment) |
| 6907 | Mask.push_back(Elt + (Lane * NumEltsPerLane)); |
| 6908 | for (unsigned Elt = 0; Elt != NumEltsPerLane; Elt += Increment) |
| 6909 | Mask.push_back(Elt + (Lane * NumEltsPerLane) + Offset); |
| 6910 | } |
| 6911 | } |
| 6912 | } |
| 6913 | |
| 6914 | |
| 6915 | static void getPackDemandedElts(EVT VT, const APInt &DemandedElts, |
| 6916 | APInt &DemandedLHS, APInt &DemandedRHS) { |
| 6917 | int NumLanes = VT.getSizeInBits() / 128; |
| 6918 | int NumElts = DemandedElts.getBitWidth(); |
| 6919 | int NumInnerElts = NumElts / 2; |
| 6920 | int NumEltsPerLane = NumElts / NumLanes; |
| 6921 | int NumInnerEltsPerLane = NumInnerElts / NumLanes; |
| 6922 | |
| 6923 | DemandedLHS = APInt::getNullValue(NumInnerElts); |
| 6924 | DemandedRHS = APInt::getNullValue(NumInnerElts); |
| 6925 | |
| 6926 | |
| 6927 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
| 6928 | for (int Elt = 0; Elt != NumInnerEltsPerLane; ++Elt) { |
| 6929 | int OuterIdx = (Lane * NumEltsPerLane) + Elt; |
| 6930 | int InnerIdx = (Lane * NumInnerEltsPerLane) + Elt; |
| 6931 | if (DemandedElts[OuterIdx]) |
| 6932 | DemandedLHS.setBit(InnerIdx); |
| 6933 | if (DemandedElts[OuterIdx + NumInnerEltsPerLane]) |
| 6934 | DemandedRHS.setBit(InnerIdx); |
| 6935 | } |
| 6936 | } |
| 6937 | } |
| 6938 | |
| 6939 | |
| 6940 | static void getHorizDemandedElts(EVT VT, const APInt &DemandedElts, |
| 6941 | APInt &DemandedLHS, APInt &DemandedRHS) { |
| 6942 | int NumLanes = VT.getSizeInBits() / 128; |
| 6943 | int NumElts = DemandedElts.getBitWidth(); |
| 6944 | int NumEltsPerLane = NumElts / NumLanes; |
| 6945 | int HalfEltsPerLane = NumEltsPerLane / 2; |
| 6946 | |
| 6947 | DemandedLHS = APInt::getNullValue(NumElts); |
| 6948 | DemandedRHS = APInt::getNullValue(NumElts); |
| 6949 | |
| 6950 | |
| 6951 | for (int Idx = 0; Idx != NumElts; ++Idx) { |
| 6952 | if (!DemandedElts[Idx]) |
| 6953 | continue; |
| 6954 | int LaneIdx = (Idx / NumEltsPerLane) * NumEltsPerLane; |
| 6955 | int LocalIdx = Idx % NumEltsPerLane; |
| 6956 | if (LocalIdx < HalfEltsPerLane) { |
| 6957 | DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 0); |
| 6958 | DemandedLHS.setBit(LaneIdx + 2 * LocalIdx + 1); |
| 6959 | } else { |
| 6960 | LocalIdx -= HalfEltsPerLane; |
| 6961 | DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 0); |
| 6962 | DemandedRHS.setBit(LaneIdx + 2 * LocalIdx + 1); |
| 6963 | } |
| 6964 | } |
| 6965 | } |
| 6966 | |
| 6967 | |
| 6968 | |
| 6969 | |
| 6970 | |
| 6971 | |
| 6972 | |
| 6973 | |
| 6974 | static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, |
| 6975 | SmallVectorImpl<SDValue> &Ops, |
| 6976 | SmallVectorImpl<int> &Mask, bool &IsUnary) { |
| 6977 | unsigned NumElems = VT.getVectorNumElements(); |
| 6978 | unsigned MaskEltSize = VT.getScalarSizeInBits(); |
| 6979 | SmallVector<uint64_t, 32> RawMask; |
| 6980 | APInt RawUndefs; |
| 6981 | uint64_t ImmN; |
| 6982 | |
| 6983 | assert(Mask.empty() && "getTargetShuffleMask expects an empty Mask vector"); |
| 6984 | assert(Ops.empty() && "getTargetShuffleMask expects an empty Ops vector"); |
| 6985 | |
| 6986 | IsUnary = false; |
| 6987 | bool IsFakeUnary = false; |
| 6988 | switch (N->getOpcode()) { |
| 6989 | case X86ISD::BLENDI: |
| 6990 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 6991 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 6992 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 6993 | DecodeBLENDMask(NumElems, ImmN, Mask); |
| 6994 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 6995 | break; |
| 6996 | case X86ISD::SHUFP: |
| 6997 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 6998 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 6999 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7000 | DecodeSHUFPMask(NumElems, MaskEltSize, ImmN, Mask); |
| 7001 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7002 | break; |
| 7003 | case X86ISD::INSERTPS: |
| 7004 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7005 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7006 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7007 | DecodeINSERTPSMask(ImmN, Mask); |
| 7008 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7009 | break; |
| 7010 | case X86ISD::EXTRQI: |
| 7011 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7012 | if (isa<ConstantSDNode>(N->getOperand(1)) && |
| 7013 | isa<ConstantSDNode>(N->getOperand(2))) { |
| 7014 | int BitLen = N->getConstantOperandVal(1); |
| 7015 | int BitIdx = N->getConstantOperandVal(2); |
| 7016 | DecodeEXTRQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); |
| 7017 | IsUnary = true; |
| 7018 | } |
| 7019 | break; |
| 7020 | case X86ISD::INSERTQI: |
| 7021 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7022 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7023 | if (isa<ConstantSDNode>(N->getOperand(2)) && |
| 7024 | isa<ConstantSDNode>(N->getOperand(3))) { |
| 7025 | int BitLen = N->getConstantOperandVal(2); |
| 7026 | int BitIdx = N->getConstantOperandVal(3); |
| 7027 | DecodeINSERTQIMask(NumElems, MaskEltSize, BitLen, BitIdx, Mask); |
| 7028 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7029 | } |
| 7030 | break; |
| 7031 | case X86ISD::UNPCKH: |
| 7032 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7033 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7034 | DecodeUNPCKHMask(NumElems, MaskEltSize, Mask); |
| 7035 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7036 | break; |
| 7037 | case X86ISD::UNPCKL: |
| 7038 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7039 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7040 | DecodeUNPCKLMask(NumElems, MaskEltSize, Mask); |
| 7041 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7042 | break; |
| 7043 | case X86ISD::MOVHLPS: |
| 7044 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7045 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7046 | DecodeMOVHLPSMask(NumElems, Mask); |
| 7047 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7048 | break; |
| 7049 | case X86ISD::MOVLHPS: |
| 7050 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7051 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7052 | DecodeMOVLHPSMask(NumElems, Mask); |
| 7053 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7054 | break; |
| 7055 | case X86ISD::VALIGN: |
| 7056 | assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && |
| 7057 | "Only 32-bit and 64-bit elements are supported!"); |
| 7058 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7059 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7060 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7061 | DecodeVALIGNMask(NumElems, ImmN, Mask); |
| 7062 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7063 | Ops.push_back(N->getOperand(1)); |
| 7064 | Ops.push_back(N->getOperand(0)); |
| 7065 | break; |
| 7066 | case X86ISD::PALIGNR: |
| 7067 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
| 7068 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7069 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7070 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7071 | DecodePALIGNRMask(NumElems, ImmN, Mask); |
| 7072 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7073 | Ops.push_back(N->getOperand(1)); |
| 7074 | Ops.push_back(N->getOperand(0)); |
| 7075 | break; |
| 7076 | case X86ISD::VSHLDQ: |
| 7077 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
| 7078 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7079 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7080 | DecodePSLLDQMask(NumElems, ImmN, Mask); |
| 7081 | IsUnary = true; |
| 7082 | break; |
| 7083 | case X86ISD::VSRLDQ: |
| 7084 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
| 7085 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7086 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7087 | DecodePSRLDQMask(NumElems, ImmN, Mask); |
| 7088 | IsUnary = true; |
| 7089 | break; |
| 7090 | case X86ISD::PSHUFD: |
| 7091 | case X86ISD::VPERMILPI: |
| 7092 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7093 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7094 | DecodePSHUFMask(NumElems, MaskEltSize, ImmN, Mask); |
| 7095 | IsUnary = true; |
| 7096 | break; |
| 7097 | case X86ISD::PSHUFHW: |
| 7098 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7099 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7100 | DecodePSHUFHWMask(NumElems, ImmN, Mask); |
| 7101 | IsUnary = true; |
| 7102 | break; |
| 7103 | case X86ISD::PSHUFLW: |
| 7104 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7105 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7106 | DecodePSHUFLWMask(NumElems, ImmN, Mask); |
| 7107 | IsUnary = true; |
| 7108 | break; |
| 7109 | case X86ISD::VZEXT_MOVL: |
| 7110 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7111 | DecodeZeroMoveLowMask(NumElems, Mask); |
| 7112 | IsUnary = true; |
| 7113 | break; |
| 7114 | case X86ISD::VBROADCAST: |
| 7115 | |
| 7116 | |
| 7117 | |
| 7118 | if (N->getOperand(0).getValueType() == VT) { |
| 7119 | DecodeVectorBroadcast(NumElems, Mask); |
| 7120 | IsUnary = true; |
| 7121 | break; |
| 7122 | } |
| 7123 | return false; |
| 7124 | case X86ISD::VPERMILPV: { |
| 7125 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7126 | IsUnary = true; |
| 7127 | SDValue MaskNode = N->getOperand(1); |
| 7128 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
| 7129 | RawUndefs)) { |
| 7130 | DecodeVPERMILPMask(NumElems, MaskEltSize, RawMask, RawUndefs, Mask); |
| 7131 | break; |
| 7132 | } |
| 7133 | return false; |
| 7134 | } |
| 7135 | case X86ISD::PSHUFB: { |
| 7136 | assert(VT.getScalarType() == MVT::i8 && "Byte vector expected"); |
| 7137 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7138 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7139 | IsUnary = true; |
| 7140 | SDValue MaskNode = N->getOperand(1); |
| 7141 | if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { |
| 7142 | DecodePSHUFBMask(RawMask, RawUndefs, Mask); |
| 7143 | break; |
| 7144 | } |
| 7145 | return false; |
| 7146 | } |
| 7147 | case X86ISD::VPERMI: |
| 7148 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7149 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7150 | DecodeVPERMMask(NumElems, ImmN, Mask); |
| 7151 | IsUnary = true; |
| 7152 | break; |
| 7153 | case X86ISD::MOVSS: |
| 7154 | case X86ISD::MOVSD: |
| 7155 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7156 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7157 | DecodeScalarMoveMask(NumElems, false, Mask); |
| 7158 | break; |
| 7159 | case X86ISD::VPERM2X128: |
| 7160 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7161 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7162 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7163 | DecodeVPERM2X128Mask(NumElems, ImmN, Mask); |
| 7164 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7165 | break; |
| 7166 | case X86ISD::SHUF128: |
| 7167 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7168 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7169 | ImmN = N->getConstantOperandVal(N->getNumOperands() - 1); |
| 7170 | decodeVSHUF64x2FamilyMask(NumElems, MaskEltSize, ImmN, Mask); |
| 7171 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7172 | break; |
| 7173 | case X86ISD::MOVSLDUP: |
| 7174 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7175 | DecodeMOVSLDUPMask(NumElems, Mask); |
| 7176 | IsUnary = true; |
| 7177 | break; |
| 7178 | case X86ISD::MOVSHDUP: |
| 7179 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7180 | DecodeMOVSHDUPMask(NumElems, Mask); |
| 7181 | IsUnary = true; |
| 7182 | break; |
| 7183 | case X86ISD::MOVDDUP: |
| 7184 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7185 | DecodeMOVDDUPMask(NumElems, Mask); |
| 7186 | IsUnary = true; |
| 7187 | break; |
| 7188 | case X86ISD::VPERMIL2: { |
| 7189 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7190 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7191 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7192 | SDValue MaskNode = N->getOperand(2); |
| 7193 | SDValue CtrlNode = N->getOperand(3); |
| 7194 | if (ConstantSDNode *CtrlOp = dyn_cast<ConstantSDNode>(CtrlNode)) { |
| 7195 | unsigned CtrlImm = CtrlOp->getZExtValue(); |
| 7196 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
| 7197 | RawUndefs)) { |
| 7198 | DecodeVPERMIL2PMask(NumElems, MaskEltSize, CtrlImm, RawMask, RawUndefs, |
| 7199 | Mask); |
| 7200 | break; |
| 7201 | } |
| 7202 | } |
| 7203 | return false; |
| 7204 | } |
| 7205 | case X86ISD::VPPERM: { |
| 7206 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7207 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7208 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(1); |
| 7209 | SDValue MaskNode = N->getOperand(2); |
| 7210 | if (getTargetShuffleMaskIndices(MaskNode, 8, RawMask, RawUndefs)) { |
| 7211 | DecodeVPPERMMask(RawMask, RawUndefs, Mask); |
| 7212 | break; |
| 7213 | } |
| 7214 | return false; |
| 7215 | } |
| 7216 | case X86ISD::VPERMV: { |
| 7217 | assert(N->getOperand(1).getValueType() == VT && "Unexpected value type"); |
| 7218 | IsUnary = true; |
| 7219 | |
| 7220 | Ops.push_back(N->getOperand(1)); |
| 7221 | SDValue MaskNode = N->getOperand(0); |
| 7222 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
| 7223 | RawUndefs)) { |
| 7224 | DecodeVPERMVMask(RawMask, RawUndefs, Mask); |
| 7225 | break; |
| 7226 | } |
| 7227 | return false; |
| 7228 | } |
| 7229 | case X86ISD::VPERMV3: { |
| 7230 | assert(N->getOperand(0).getValueType() == VT && "Unexpected value type"); |
| 7231 | assert(N->getOperand(2).getValueType() == VT && "Unexpected value type"); |
| 7232 | IsUnary = IsFakeUnary = N->getOperand(0) == N->getOperand(2); |
| 7233 | |
| 7234 | Ops.push_back(N->getOperand(0)); |
| 7235 | Ops.push_back(N->getOperand(2)); |
| 7236 | SDValue MaskNode = N->getOperand(1); |
| 7237 | if (getTargetShuffleMaskIndices(MaskNode, MaskEltSize, RawMask, |
| 7238 | RawUndefs)) { |
| 7239 | DecodeVPERMV3Mask(RawMask, RawUndefs, Mask); |
| 7240 | break; |
| 7241 | } |
| 7242 | return false; |
| 7243 | } |
| 7244 | default: llvm_unreachable("unknown target shuffle node"); |
| 7245 | } |
| 7246 | |
| 7247 | |
| 7248 | if (Mask.empty()) |
| 7249 | return false; |
| 7250 | |
| 7251 | |
| 7252 | if (!AllowSentinelZero && isAnyZero(Mask)) |
| 7253 | return false; |
| 7254 | |
| 7255 | |
| 7256 | |
| 7257 | |
| 7258 | if (IsFakeUnary) |
| 7259 | for (int &M : Mask) |
| 7260 | if (M >= (int)Mask.size()) |
| 7261 | M -= Mask.size(); |
| 7262 | |
| 7263 | |
| 7264 | |
| 7265 | if (Ops.empty()) { |
| 7266 | Ops.push_back(N->getOperand(0)); |
| 7267 | if (!IsUnary || IsFakeUnary) |
| 7268 | Ops.push_back(N->getOperand(1)); |
| 7269 | } |
| 7270 | |
| 7271 | return true; |
| 7272 | } |
| 7273 | |
| 7274 | |
| 7275 | static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero, |
| 7276 | SmallVectorImpl<SDValue> &Ops, |
| 7277 | SmallVectorImpl<int> &Mask) { |
| 7278 | bool IsUnary; |
| 7279 | return getTargetShuffleMask(N, VT, AllowSentinelZero, Ops, Mask, IsUnary); |
| 7280 | } |
| 7281 | |
| 7282 | |
| 7283 | |
| 7284 | |
| 7285 | |
| 7286 | |
| 7287 | |
| 7288 | |
| 7289 | |
| 7290 | static void computeZeroableShuffleElements(ArrayRef<int> Mask, |
| 7291 | SDValue V1, SDValue V2, |
| 7292 | APInt &KnownUndef, APInt &KnownZero) { |
| 7293 | int Size = Mask.size(); |
| 7294 | KnownUndef = KnownZero = APInt::getNullValue(Size); |
| 7295 | |
| 7296 | V1 = peekThroughBitcasts(V1); |
| 7297 | V2 = peekThroughBitcasts(V2); |
| 7298 | |
| 7299 | bool V1IsZero = ISD::isBuildVectorAllZeros(V1.getNode()); |
| 7300 | bool V2IsZero = ISD::isBuildVectorAllZeros(V2.getNode()); |
| 7301 | |
| 7302 | int VectorSizeInBits = V1.getValueSizeInBits(); |
| 7303 | int ScalarSizeInBits = VectorSizeInBits / Size; |
| 7304 | assert(!(VectorSizeInBits % ScalarSizeInBits) && "Illegal shuffle mask size"); |
| 7305 | |
| 7306 | for (int i = 0; i < Size; ++i) { |
| 7307 | int M = Mask[i]; |
| 7308 | |
| 7309 | if (M < 0) { |
| 7310 | KnownUndef.setBit(i); |
| 7311 | continue; |
| 7312 | } |
| 7313 | if ((M >= 0 && M < Size && V1IsZero) || (M >= Size && V2IsZero)) { |
| 7314 | KnownZero.setBit(i); |
| 7315 | continue; |
| 7316 | } |
| 7317 | |
| 7318 | |
| 7319 | SDValue V = M < Size ? V1 : V2; |
| 7320 | M %= Size; |
| 7321 | |
| 7322 | |
| 7323 | if (V.getOpcode() != ISD::BUILD_VECTOR) |
| 7324 | continue; |
| 7325 | |
| 7326 | |
| 7327 | |
| 7328 | if ((Size % V.getNumOperands()) == 0) { |
| 7329 | int Scale = Size / V->getNumOperands(); |
| 7330 | SDValue Op = V.getOperand(M / Scale); |
| 7331 | if (Op.isUndef()) |
| 7332 | KnownUndef.setBit(i); |
| 7333 | if (X86::isZeroNode(Op)) |
| 7334 | KnownZero.setBit(i); |
| 7335 | else if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) { |
| 7336 | APInt Val = Cst->getAPIntValue(); |
| 7337 | Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); |
| 7338 | if (Val == 0) |
| 7339 | KnownZero.setBit(i); |
| 7340 | } else if (ConstantFPSDNode *Cst = dyn_cast<ConstantFPSDNode>(Op)) { |
| 7341 | APInt Val = Cst->getValueAPF().bitcastToAPInt(); |
| 7342 | Val = Val.extractBits(ScalarSizeInBits, (M % Scale) * ScalarSizeInBits); |
| 7343 | if (Val == 0) |
| 7344 | KnownZero.setBit(i); |
| 7345 | } |
| 7346 | continue; |
| 7347 | } |
| 7348 | |
| 7349 | |
| 7350 | |
| 7351 | if ((V.getNumOperands() % Size) == 0) { |
| 7352 | int Scale = V->getNumOperands() / Size; |
| 7353 | bool AllUndef = true; |
| 7354 | bool AllZero = true; |
| 7355 | for (int j = 0; j < Scale; ++j) { |
| 7356 | SDValue Op = V.getOperand((M * Scale) + j); |
| 7357 | AllUndef &= Op.isUndef(); |
| 7358 | AllZero &= X86::isZeroNode(Op); |
| 7359 | } |
| 7360 | if (AllUndef) |
| 7361 | KnownUndef.setBit(i); |
| 7362 | if (AllZero) |
| 7363 | KnownZero.setBit(i); |
| 7364 | continue; |
| 7365 | } |
| 7366 | } |
| 7367 | } |
| 7368 | |
| 7369 | |
| 7370 | |
| 7371 | |
| 7372 | |
| 7373 | static bool getTargetShuffleAndZeroables(SDValue N, SmallVectorImpl<int> &Mask, |
| 7374 | SmallVectorImpl<SDValue> &Ops, |
| 7375 | APInt &KnownUndef, APInt &KnownZero) { |
| 7376 | bool IsUnary; |
| 7377 | if (!isTargetShuffle(N.getOpcode())) |
| 7378 | return false; |
| 7379 | |
| 7380 | MVT VT = N.getSimpleValueType(); |
| 7381 | if (!getTargetShuffleMask(N.getNode(), VT, true, Ops, Mask, IsUnary)) |
| 7382 | return false; |
| 7383 | |
| 7384 | int Size = Mask.size(); |
| 7385 | SDValue V1 = Ops[0]; |
| 7386 | SDValue V2 = IsUnary ? V1 : Ops[1]; |
| 7387 | KnownUndef = KnownZero = APInt::getNullValue(Size); |
| 7388 | |
| 7389 | V1 = peekThroughBitcasts(V1); |
| 7390 | V2 = peekThroughBitcasts(V2); |
| 7391 | |
| 7392 | assert((VT.getSizeInBits() % Size) == 0 && |
| 7393 | "Illegal split of shuffle value type"); |
| 7394 | unsigned EltSizeInBits = VT.getSizeInBits() / Size; |
| 7395 | |
| 7396 | |
| 7397 | APInt UndefSrcElts[2]; |
| 7398 | SmallVector<APInt, 32> SrcEltBits[2]; |
| 7399 | bool IsSrcConstant[2] = { |
| 7400 | getTargetConstantBitsFromNode(V1, EltSizeInBits, UndefSrcElts[0], |
| 7401 | SrcEltBits[0], true, false), |
| 7402 | getTargetConstantBitsFromNode(V2, EltSizeInBits, UndefSrcElts[1], |
| 7403 | SrcEltBits[1], true, false)}; |
| 7404 | |
| 7405 | for (int i = 0; i < Size; ++i) { |
| 7406 | int M = Mask[i]; |
| 7407 | |
| 7408 | |
| 7409 | if (M < 0) { |
| 7410 | assert(isUndefOrZero(M) && "Unknown shuffle sentinel value!"); |
| 7411 | if (SM_SentinelUndef == M) |
| 7412 | KnownUndef.setBit(i); |
| 7413 | if (SM_SentinelZero == M) |
| 7414 | KnownZero.setBit(i); |
| 7415 | continue; |
| 7416 | } |
| 7417 | |
| 7418 | |
| 7419 | unsigned SrcIdx = M / Size; |
| 7420 | SDValue V = M < Size ? V1 : V2; |
| 7421 | M %= Size; |
| 7422 | |
| 7423 | |
| 7424 | if (V.isUndef()) { |
| 7425 | KnownUndef.setBit(i); |
| 7426 | continue; |
| 7427 | } |
| 7428 | |
| 7429 | |
| 7430 | |
| 7431 | |
| 7432 | |
| 7433 | if (V.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 7434 | (Size % V.getValueType().getVectorNumElements()) == 0) { |
| 7435 | int Scale = Size / V.getValueType().getVectorNumElements(); |
| 7436 | int Idx = M / Scale; |
| 7437 | if (Idx != 0 && !VT.isFloatingPoint()) |
| 7438 | KnownUndef.setBit(i); |
| 7439 | else if (Idx == 0 && X86::isZeroNode(V.getOperand(0))) |
| 7440 | KnownZero.setBit(i); |
| 7441 | continue; |
| 7442 | } |
| 7443 | |
| 7444 | |
| 7445 | |
| 7446 | if (V.getOpcode() == ISD::INSERT_SUBVECTOR) { |
| 7447 | SDValue Vec = V.getOperand(0); |
| 7448 | int NumVecElts = Vec.getValueType().getVectorNumElements(); |
| 7449 | if (Vec.isUndef() && Size == NumVecElts) { |
| 7450 | int Idx = V.getConstantOperandVal(2); |
| 7451 | int NumSubElts = V.getOperand(1).getValueType().getVectorNumElements(); |
| 7452 | if (M < Idx || (Idx + NumSubElts) <= M) |
| 7453 | KnownUndef.setBit(i); |
| 7454 | } |
| 7455 | continue; |
| 7456 | } |
| 7457 | |
| 7458 | |
| 7459 | if (IsSrcConstant[SrcIdx]) { |
| 7460 | if (UndefSrcElts[SrcIdx][M]) |
| 7461 | KnownUndef.setBit(i); |
| 7462 | else if (SrcEltBits[SrcIdx][M] == 0) |
| 7463 | KnownZero.setBit(i); |
| 7464 | } |
| 7465 | } |
| 7466 | |
| 7467 | assert(VT.getVectorNumElements() == (unsigned)Size && |
| 7468 | "Different mask size from vector size!"); |
| 7469 | return true; |
| 7470 | } |
| 7471 | |
| 7472 | |
| 7473 | static void resolveTargetShuffleFromZeroables(SmallVectorImpl<int> &Mask, |
| 7474 | const APInt &KnownUndef, |
| 7475 | const APInt &KnownZero, |
| 7476 | bool ResolveKnownZeros= true) { |
| 7477 | unsigned NumElts = Mask.size(); |
| 7478 | assert(KnownUndef.getBitWidth() == NumElts && |
| 7479 | KnownZero.getBitWidth() == NumElts && "Shuffle mask size mismatch"); |
| 7480 | |
| 7481 | for (unsigned i = 0; i != NumElts; ++i) { |
| 7482 | if (KnownUndef[i]) |
| 7483 | Mask[i] = SM_SentinelUndef; |
| 7484 | else if (ResolveKnownZeros && KnownZero[i]) |
| 7485 | Mask[i] = SM_SentinelZero; |
| 7486 | } |
| 7487 | } |
| 7488 | |
| 7489 | |
| 7490 | static void resolveZeroablesFromTargetShuffle(const SmallVectorImpl<int> &Mask, |
| 7491 | APInt &KnownUndef, |
| 7492 | APInt &KnownZero) { |
| 7493 | unsigned NumElts = Mask.size(); |
| 7494 | KnownUndef = KnownZero = APInt::getNullValue(NumElts); |
| 7495 | |
| 7496 | for (unsigned i = 0; i != NumElts; ++i) { |
| 7497 | int M = Mask[i]; |
| 7498 | if (SM_SentinelUndef == M) |
| 7499 | KnownUndef.setBit(i); |
| 7500 | if (SM_SentinelZero == M) |
| 7501 | KnownZero.setBit(i); |
| 7502 | } |
| 7503 | } |
| 7504 | |
| 7505 | |
| 7506 | |
| 7507 | static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, |
| 7508 | SmallVectorImpl<int> &Mask, |
| 7509 | const SelectionDAG &DAG, unsigned Depth, |
| 7510 | bool ResolveKnownElts); |
| 7511 | |
| 7512 | |
| 7513 | |
| 7514 | |
| 7515 | static bool getFauxShuffleMask(SDValue N, const APInt &DemandedElts, |
| 7516 | SmallVectorImpl<int> &Mask, |
| 7517 | SmallVectorImpl<SDValue> &Ops, |
| 7518 | const SelectionDAG &DAG, unsigned Depth, |
| 7519 | bool ResolveKnownElts) { |
| 7520 | Mask.clear(); |
| 7521 | Ops.clear(); |
| 7522 | |
| 7523 | MVT VT = N.getSimpleValueType(); |
| 7524 | unsigned NumElts = VT.getVectorNumElements(); |
| 7525 | unsigned NumSizeInBits = VT.getSizeInBits(); |
| 7526 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
| 7527 | if ((NumBitsPerElt % 8) != 0 || (NumSizeInBits % 8) != 0) |
| 7528 | return false; |
| 7529 | assert(NumElts == DemandedElts.getBitWidth() && "Unexpected vector size"); |
| 7530 | unsigned NumSizeInBytes = NumSizeInBits / 8; |
| 7531 | unsigned NumBytesPerElt = NumBitsPerElt / 8; |
| 7532 | |
| 7533 | unsigned Opcode = N.getOpcode(); |
| 7534 | switch (Opcode) { |
| 7535 | case ISD::VECTOR_SHUFFLE: { |
| 7536 | |
| 7537 | ArrayRef<int> ShuffleMask = cast<ShuffleVectorSDNode>(N)->getMask(); |
| 7538 | if (isUndefOrInRange(ShuffleMask, 0, 2 * NumElts)) { |
| 7539 | Mask.append(ShuffleMask.begin(), ShuffleMask.end()); |
| 7540 | Ops.push_back(N.getOperand(0)); |
| 7541 | Ops.push_back(N.getOperand(1)); |
| 7542 | return true; |
| 7543 | } |
| 7544 | return false; |
| 7545 | } |
| 7546 | case ISD::AND: |
| 7547 | case X86ISD::ANDNP: { |
| 7548 | |
| 7549 | APInt UndefElts; |
| 7550 | SmallVector<APInt, 32> EltBits; |
| 7551 | SDValue N0 = N.getOperand(0); |
| 7552 | SDValue N1 = N.getOperand(1); |
| 7553 | bool IsAndN = (X86ISD::ANDNP == Opcode); |
| 7554 | uint64_t ZeroMask = IsAndN ? 255 : 0; |
| 7555 | if (!getTargetConstantBitsFromNode(IsAndN ? N0 : N1, 8, UndefElts, EltBits)) |
| 7556 | return false; |
| 7557 | for (int i = 0, e = (int)EltBits.size(); i != e; ++i) { |
| 7558 | if (UndefElts[i]) { |
| 7559 | Mask.push_back(SM_SentinelUndef); |
| 7560 | continue; |
| 7561 | } |
| 7562 | const APInt &ByteBits = EltBits[i]; |
| 7563 | if (ByteBits != 0 && ByteBits != 255) |
| 7564 | return false; |
| 7565 | Mask.push_back(ByteBits == ZeroMask ? SM_SentinelZero : i); |
| 7566 | } |
| 7567 | Ops.push_back(IsAndN ? N1 : N0); |
| 7568 | return true; |
| 7569 | } |
| 7570 | case ISD::OR: { |
| 7571 | |
| 7572 | |
| 7573 | SDValue N0 = peekThroughBitcasts(N.getOperand(0)); |
| 7574 | SDValue N1 = peekThroughBitcasts(N.getOperand(1)); |
| 7575 | if (!N0.getValueType().isVector() || !N1.getValueType().isVector()) |
| 7576 | return false; |
| 7577 | SmallVector<int, 64> SrcMask0, SrcMask1; |
| 7578 | SmallVector<SDValue, 2> SrcInputs0, SrcInputs1; |
| 7579 | if (!getTargetShuffleInputs(N0, SrcInputs0, SrcMask0, DAG, Depth + 1, |
| 7580 | true) || |
| 7581 | !getTargetShuffleInputs(N1, SrcInputs1, SrcMask1, DAG, Depth + 1, |
| 7582 | true)) |
| 7583 | return false; |
| 7584 | |
| 7585 | size_t MaskSize = std::max(SrcMask0.size(), SrcMask1.size()); |
| 7586 | SmallVector<int, 64> Mask0, Mask1; |
| 7587 | narrowShuffleMaskElts(MaskSize / SrcMask0.size(), SrcMask0, Mask0); |
| 7588 | narrowShuffleMaskElts(MaskSize / SrcMask1.size(), SrcMask1, Mask1); |
| 7589 | for (int i = 0; i != (int)MaskSize; ++i) { |
| 7590 | |
| 7591 | |
| 7592 | |
| 7593 | |
| 7594 | if (Mask0[i] == SM_SentinelZero && Mask1[i] == SM_SentinelZero) |
| 7595 | Mask.push_back(SM_SentinelZero); |
| 7596 | else if (Mask1[i] == SM_SentinelZero) |
| 7597 | Mask.push_back(i); |
| 7598 | else if (Mask0[i] == SM_SentinelZero) |
| 7599 | Mask.push_back(i + MaskSize); |
| 7600 | else |
| 7601 | return false; |
| 7602 | } |
| 7603 | Ops.push_back(N0); |
| 7604 | Ops.push_back(N1); |
| 7605 | return true; |
| 7606 | } |
| 7607 | case ISD::INSERT_SUBVECTOR: { |
| 7608 | SDValue Src = N.getOperand(0); |
| 7609 | SDValue Sub = N.getOperand(1); |
| 7610 | EVT SubVT = Sub.getValueType(); |
| 7611 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
| 7612 | if (!N->isOnlyUserOf(Sub.getNode())) |
| 7613 | return false; |
| 7614 | uint64_t InsertIdx = N.getConstantOperandVal(2); |
| 7615 | |
| 7616 | if (Sub.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 7617 | Sub.getOperand(0).getValueType() == VT) { |
| 7618 | uint64_t ExtractIdx = Sub.getConstantOperandVal(1); |
| 7619 | for (int i = 0; i != (int)NumElts; ++i) |
| 7620 | Mask.push_back(i); |
| 7621 | for (int i = 0; i != (int)NumSubElts; ++i) |
| 7622 | Mask[InsertIdx + i] = NumElts + ExtractIdx + i; |
| 7623 | Ops.push_back(Src); |
| 7624 | Ops.push_back(Sub.getOperand(0)); |
| 7625 | return true; |
| 7626 | } |
| 7627 | |
| 7628 | SmallVector<int, 64> SubMask; |
| 7629 | SmallVector<SDValue, 2> SubInputs; |
| 7630 | if (!getTargetShuffleInputs(peekThroughOneUseBitcasts(Sub), SubInputs, |
| 7631 | SubMask, DAG, Depth + 1, ResolveKnownElts)) |
| 7632 | return false; |
| 7633 | |
| 7634 | |
| 7635 | if (llvm::any_of(SubInputs, [SubVT](SDValue SubInput) { |
| 7636 | return SubVT.getFixedSizeInBits() < |
| 7637 | SubInput.getValueSizeInBits().getFixedSize(); |
| 7638 | })) |
| 7639 | return false; |
| 7640 | |
| 7641 | if (SubMask.size() != NumSubElts) { |
| 7642 | assert(((SubMask.size() % NumSubElts) == 0 || |
| 7643 | (NumSubElts % SubMask.size()) == 0) && "Illegal submask scale"); |
| 7644 | if ((NumSubElts % SubMask.size()) == 0) { |
| 7645 | int Scale = NumSubElts / SubMask.size(); |
| 7646 | SmallVector<int,64> ScaledSubMask; |
| 7647 | narrowShuffleMaskElts(Scale, SubMask, ScaledSubMask); |
| 7648 | SubMask = ScaledSubMask; |
| 7649 | } else { |
| 7650 | int Scale = SubMask.size() / NumSubElts; |
| 7651 | NumSubElts = SubMask.size(); |
| 7652 | NumElts *= Scale; |
| 7653 | InsertIdx *= Scale; |
| 7654 | } |
| 7655 | } |
| 7656 | Ops.push_back(Src); |
| 7657 | Ops.append(SubInputs.begin(), SubInputs.end()); |
| 7658 | if (ISD::isBuildVectorAllZeros(Src.getNode())) |
| 7659 | Mask.append(NumElts, SM_SentinelZero); |
| 7660 | else |
| 7661 | for (int i = 0; i != (int)NumElts; ++i) |
| 7662 | Mask.push_back(i); |
| 7663 | for (int i = 0; i != (int)NumSubElts; ++i) { |
| 7664 | int M = SubMask[i]; |
| 7665 | if (0 <= M) { |
| 7666 | int InputIdx = M / NumSubElts; |
| 7667 | M = (NumElts * (1 + InputIdx)) + (M % NumSubElts); |
| 7668 | } |
| 7669 | Mask[i + InsertIdx] = M; |
| 7670 | } |
| 7671 | return true; |
| 7672 | } |
| 7673 | case X86ISD::PINSRB: |
| 7674 | case X86ISD::PINSRW: |
| 7675 | case ISD::SCALAR_TO_VECTOR: |
| 7676 | case ISD::INSERT_VECTOR_ELT: { |
| 7677 | |
| 7678 | |
| 7679 | SDValue Scl = N.getOperand(Opcode == ISD::SCALAR_TO_VECTOR ? 0 : 1); |
| 7680 | |
| 7681 | unsigned DstIdx = 0; |
| 7682 | if (Opcode != ISD::SCALAR_TO_VECTOR) { |
| 7683 | |
| 7684 | if (!isa<ConstantSDNode>(N.getOperand(2)) || |
| 7685 | N.getConstantOperandAPInt(2).uge(NumElts)) |
| 7686 | return false; |
| 7687 | DstIdx = N.getConstantOperandVal(2); |
| 7688 | |
| 7689 | |
| 7690 | if (X86::isZeroNode(Scl)) { |
| 7691 | Ops.push_back(N.getOperand(0)); |
| 7692 | for (unsigned i = 0; i != NumElts; ++i) |
| 7693 | Mask.push_back(i == DstIdx ? SM_SentinelZero : (int)i); |
| 7694 | return true; |
| 7695 | } |
| 7696 | } |
| 7697 | |
| 7698 | |
| 7699 | |
| 7700 | |
| 7701 | unsigned MinBitsPerElt = Scl.getScalarValueSizeInBits(); |
| 7702 | while (Scl.getOpcode() == ISD::TRUNCATE || |
| 7703 | Scl.getOpcode() == ISD::ANY_EXTEND || |
| 7704 | Scl.getOpcode() == ISD::ZERO_EXTEND) { |
| 7705 | Scl = Scl.getOperand(0); |
| 7706 | MinBitsPerElt = |
| 7707 | std::min<unsigned>(MinBitsPerElt, Scl.getScalarValueSizeInBits()); |
| 7708 | } |
| 7709 | if ((MinBitsPerElt % 8) != 0) |
| 7710 | return false; |
| 7711 | |
| 7712 | |
| 7713 | SDValue SrcExtract; |
| 7714 | if ((Scl.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
| 7715 | Scl.getOpcode() == X86ISD::PEXTRW || |
| 7716 | Scl.getOpcode() == X86ISD::PEXTRB) && |
| 7717 | Scl.getOperand(0).getValueSizeInBits() == NumSizeInBits) { |
| 7718 | SrcExtract = Scl; |
| 7719 | } |
| 7720 | if (!SrcExtract || !isa<ConstantSDNode>(SrcExtract.getOperand(1))) |
| 7721 | return false; |
| 7722 | |
| 7723 | SDValue SrcVec = SrcExtract.getOperand(0); |
| 7724 | EVT SrcVT = SrcVec.getValueType(); |
| 7725 | if (!SrcVT.getScalarType().isByteSized()) |
| 7726 | return false; |
| 7727 | unsigned SrcIdx = SrcExtract.getConstantOperandVal(1); |
| 7728 | unsigned SrcByte = SrcIdx * (SrcVT.getScalarSizeInBits() / 8); |
| 7729 | unsigned DstByte = DstIdx * NumBytesPerElt; |
| 7730 | MinBitsPerElt = |
| 7731 | std::min<unsigned>(MinBitsPerElt, SrcVT.getScalarSizeInBits()); |
| 7732 | |
| 7733 | |
| 7734 | if (Opcode == ISD::SCALAR_TO_VECTOR) { |
| 7735 | Ops.push_back(SrcVec); |
| 7736 | Mask.append(NumSizeInBytes, SM_SentinelUndef); |
| 7737 | } else { |
| 7738 | Ops.push_back(SrcVec); |
| 7739 | Ops.push_back(N.getOperand(0)); |
| 7740 | for (int i = 0; i != (int)NumSizeInBytes; ++i) |
| 7741 | Mask.push_back(NumSizeInBytes + i); |
| 7742 | } |
| 7743 | |
| 7744 | unsigned MinBytesPerElts = MinBitsPerElt / 8; |
| 7745 | MinBytesPerElts = std::min(MinBytesPerElts, NumBytesPerElt); |
| 7746 | for (unsigned i = 0; i != MinBytesPerElts; ++i) |
| 7747 | Mask[DstByte + i] = SrcByte + i; |
| 7748 | for (unsigned i = MinBytesPerElts; i < NumBytesPerElt; ++i) |
| 7749 | Mask[DstByte + i] = SM_SentinelZero; |
| 7750 | return true; |
| 7751 | } |
| 7752 | case X86ISD::PACKSS: |
| 7753 | case X86ISD::PACKUS: { |
| 7754 | SDValue N0 = N.getOperand(0); |
| 7755 | SDValue N1 = N.getOperand(1); |
| 7756 | assert(N0.getValueType().getVectorNumElements() == (NumElts / 2) && |
| 7757 | N1.getValueType().getVectorNumElements() == (NumElts / 2) && |
| 7758 | "Unexpected input value type"); |
| 7759 | |
| 7760 | APInt EltsLHS, EltsRHS; |
| 7761 | getPackDemandedElts(VT, DemandedElts, EltsLHS, EltsRHS); |
| 7762 | |
| 7763 | |
| 7764 | |
| 7765 | bool Offset0 = false, Offset1 = false; |
| 7766 | if (Opcode == X86ISD::PACKSS) { |
| 7767 | if ((!(N0.isUndef() || EltsLHS.isNullValue()) && |
| 7768 | DAG.ComputeNumSignBits(N0, EltsLHS, Depth + 1) <= NumBitsPerElt) || |
| 7769 | (!(N1.isUndef() || EltsRHS.isNullValue()) && |
| 7770 | DAG.ComputeNumSignBits(N1, EltsRHS, Depth + 1) <= NumBitsPerElt)) |
| 7771 | return false; |
| 7772 | |
| 7773 | |
| 7774 | |
| 7775 | if (N0.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N0.getNode()) && |
| 7776 | N0.getConstantOperandAPInt(1) == NumBitsPerElt) { |
| 7777 | Offset0 = true; |
| 7778 | N0 = N0.getOperand(0); |
| 7779 | } |
| 7780 | if (N1.getOpcode() == X86ISD::VSRAI && N->isOnlyUserOf(N1.getNode()) && |
| 7781 | N1.getConstantOperandAPInt(1) == NumBitsPerElt) { |
| 7782 | Offset1 = true; |
| 7783 | N1 = N1.getOperand(0); |
| 7784 | } |
| 7785 | } else { |
| 7786 | APInt ZeroMask = APInt::getHighBitsSet(2 * NumBitsPerElt, NumBitsPerElt); |
| 7787 | if ((!(N0.isUndef() || EltsLHS.isNullValue()) && |
| 7788 | !DAG.MaskedValueIsZero(N0, ZeroMask, EltsLHS, Depth + 1)) || |
| 7789 | (!(N1.isUndef() || EltsRHS.isNullValue()) && |
| 7790 | !DAG.MaskedValueIsZero(N1, ZeroMask, EltsRHS, Depth + 1))) |
| 7791 | return false; |
| 7792 | } |
| 7793 | |
| 7794 | bool IsUnary = (N0 == N1); |
| 7795 | |
| 7796 | Ops.push_back(N0); |
| 7797 | if (!IsUnary) |
| 7798 | Ops.push_back(N1); |
| 7799 | |
| 7800 | createPackShuffleMask(VT, Mask, IsUnary); |
| 7801 | |
| 7802 | if (Offset0 || Offset1) { |
| 7803 | for (int &M : Mask) |
| 7804 | if ((Offset0 && isInRange(M, 0, NumElts)) || |
| 7805 | (Offset1 && isInRange(M, NumElts, 2 * NumElts))) |
| 7806 | ++M; |
| 7807 | } |
| 7808 | return true; |
| 7809 | } |
| 7810 | case X86ISD::VTRUNC: { |
| 7811 | SDValue Src = N.getOperand(0); |
| 7812 | EVT SrcVT = Src.getValueType(); |
| 7813 | |
| 7814 | if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || |
| 7815 | (SrcVT.getScalarSizeInBits() % 8) != 0) |
| 7816 | return false; |
| 7817 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 7818 | unsigned NumBitsPerSrcElt = SrcVT.getScalarSizeInBits(); |
| 7819 | unsigned Scale = NumBitsPerSrcElt / NumBitsPerElt; |
| 7820 | assert((NumBitsPerSrcElt % NumBitsPerElt) == 0 && "Illegal truncation"); |
| 7821 | for (unsigned i = 0; i != NumSrcElts; ++i) |
| 7822 | Mask.push_back(i * Scale); |
| 7823 | Mask.append(NumElts - NumSrcElts, SM_SentinelZero); |
| 7824 | Ops.push_back(Src); |
| 7825 | return true; |
| 7826 | } |
| 7827 | case X86ISD::VSHLI: |
| 7828 | case X86ISD::VSRLI: { |
| 7829 | uint64_t ShiftVal = N.getConstantOperandVal(1); |
| 7830 | |
| 7831 | if (NumBitsPerElt <= ShiftVal) { |
| 7832 | Mask.append(NumElts, SM_SentinelZero); |
| 7833 | return true; |
| 7834 | } |
| 7835 | |
| 7836 | |
| 7837 | if ((ShiftVal % 8) != 0) |
| 7838 | break; |
| 7839 | |
| 7840 | uint64_t ByteShift = ShiftVal / 8; |
| 7841 | Ops.push_back(N.getOperand(0)); |
| 7842 | |
| 7843 | |
| 7844 | Mask.append(NumSizeInBytes, SM_SentinelZero); |
| 7845 | |
| 7846 | if (X86ISD::VSHLI == Opcode) { |
| 7847 | for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) |
| 7848 | for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) |
| 7849 | Mask[i + j] = i + j - ByteShift; |
| 7850 | } else { |
| 7851 | for (unsigned i = 0; i != NumSizeInBytes; i += NumBytesPerElt) |
| 7852 | for (unsigned j = ByteShift; j != NumBytesPerElt; ++j) |
| 7853 | Mask[i + j - ByteShift] = i + j; |
| 7854 | } |
| 7855 | return true; |
| 7856 | } |
| 7857 | case X86ISD::VROTLI: |
| 7858 | case X86ISD::VROTRI: { |
| 7859 | |
| 7860 | uint64_t RotateVal = N.getConstantOperandAPInt(1).urem(NumBitsPerElt); |
| 7861 | if ((RotateVal % 8) != 0) |
| 7862 | return false; |
| 7863 | Ops.push_back(N.getOperand(0)); |
| 7864 | int Offset = RotateVal / 8; |
| 7865 | Offset = (X86ISD::VROTLI == Opcode ? NumBytesPerElt - Offset : Offset); |
| 7866 | for (int i = 0; i != (int)NumElts; ++i) { |
| 7867 | int BaseIdx = i * NumBytesPerElt; |
| 7868 | for (int j = 0; j != (int)NumBytesPerElt; ++j) { |
| 7869 | Mask.push_back(BaseIdx + ((Offset + j) % NumBytesPerElt)); |
| 7870 | } |
| 7871 | } |
| 7872 | return true; |
| 7873 | } |
| 7874 | case X86ISD::VBROADCAST: { |
| 7875 | SDValue Src = N.getOperand(0); |
| 7876 | if (!Src.getSimpleValueType().isVector()) { |
| 7877 | if (Src.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 7878 | !isNullConstant(Src.getOperand(1)) || |
| 7879 | Src.getOperand(0).getValueType().getScalarType() != |
| 7880 | VT.getScalarType()) |
| 7881 | return false; |
| 7882 | Src = Src.getOperand(0); |
| 7883 | } |
| 7884 | Ops.push_back(Src); |
| 7885 | Mask.append(NumElts, 0); |
| 7886 | return true; |
| 7887 | } |
| 7888 | case ISD::ZERO_EXTEND: |
| 7889 | case ISD::ANY_EXTEND: |
| 7890 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
| 7891 | case ISD::ANY_EXTEND_VECTOR_INREG: { |
| 7892 | SDValue Src = N.getOperand(0); |
| 7893 | EVT SrcVT = Src.getValueType(); |
| 7894 | |
| 7895 | |
| 7896 | if (!SrcVT.isSimple() || (SrcVT.getSizeInBits() % 128) != 0 || |
| 7897 | (SrcVT.getScalarSizeInBits() % 8) != 0) |
| 7898 | return false; |
| 7899 | |
| 7900 | bool IsAnyExtend = |
| 7901 | (ISD::ANY_EXTEND == Opcode || ISD::ANY_EXTEND_VECTOR_INREG == Opcode); |
| 7902 | DecodeZeroExtendMask(SrcVT.getScalarSizeInBits(), NumBitsPerElt, NumElts, |
| 7903 | IsAnyExtend, Mask); |
| 7904 | Ops.push_back(Src); |
| 7905 | return true; |
| 7906 | } |
| 7907 | } |
| 7908 | |
| 7909 | return false; |
| 7910 | } |
| 7911 | |
| 7912 | |
| 7913 | static void resolveTargetShuffleInputsAndMask(SmallVectorImpl<SDValue> &Inputs, |
| 7914 | SmallVectorImpl<int> &Mask) { |
| 7915 | int MaskWidth = Mask.size(); |
| 7916 | SmallVector<SDValue, 16> UsedInputs; |
| 7917 | for (int i = 0, e = Inputs.size(); i < e; ++i) { |
| 7918 | int lo = UsedInputs.size() * MaskWidth; |
| 7919 | int hi = lo + MaskWidth; |
| 7920 | |
| 7921 | |
| 7922 | if (Inputs[i].isUndef()) |
| 7923 | for (int &M : Mask) |
| 7924 | if ((lo <= M) && (M < hi)) |
| 7925 | M = SM_SentinelUndef; |
| 7926 | |
| 7927 | |
| 7928 | if (none_of(Mask, [lo, hi](int i) { return (lo <= i) && (i < hi); })) { |
| 7929 | for (int &M : Mask) |
| 7930 | if (lo <= M) |
| 7931 | M -= MaskWidth; |
| 7932 | continue; |
| 7933 | } |
| 7934 | |
| 7935 | |
| 7936 | bool IsRepeat = false; |
| 7937 | for (int j = 0, ue = UsedInputs.size(); j != ue; ++j) { |
| 7938 | if (UsedInputs[j] != Inputs[i]) |
| 7939 | continue; |
| 7940 | for (int &M : Mask) |
| 7941 | if (lo <= M) |
| 7942 | M = (M < hi) ? ((M - lo) + (j * MaskWidth)) : (M - MaskWidth); |
| 7943 | IsRepeat = true; |
| 7944 | break; |
| 7945 | } |
| 7946 | if (IsRepeat) |
| 7947 | continue; |
| 7948 | |
| 7949 | UsedInputs.push_back(Inputs[i]); |
| 7950 | } |
| 7951 | Inputs = UsedInputs; |
| 7952 | } |
| 7953 | |
| 7954 | |
| 7955 | |
| 7956 | |
| 7957 | static bool getTargetShuffleInputs(SDValue Op, const APInt &DemandedElts, |
| 7958 | SmallVectorImpl<SDValue> &Inputs, |
| 7959 | SmallVectorImpl<int> &Mask, |
| 7960 | APInt &KnownUndef, APInt &KnownZero, |
| 7961 | const SelectionDAG &DAG, unsigned Depth, |
| 7962 | bool ResolveKnownElts) { |
| 7963 | EVT VT = Op.getValueType(); |
| 7964 | if (!VT.isSimple() || !VT.isVector()) |
| 17 | | Calling 'EVT::isSimple' | |
|
| 19 | | Returning from 'EVT::isSimple' | |
|
| 20 | | Calling 'EVT::isVector' | |
|
| 26 | | Returning from 'EVT::isVector' | |
|
| |
| 7965 | return false; |
| 7966 | |
| 7967 | if (getTargetShuffleAndZeroables(Op, Mask, Inputs, KnownUndef, KnownZero)) { |
| 28 | | Assuming the condition is true | |
|
| |
| 7968 | if (ResolveKnownElts) |
| |
| 7969 | resolveTargetShuffleFromZeroables(Mask, KnownUndef, KnownZero); |
| 7970 | return true; |
| 31 | | Returning the value 1, which participates in a condition later | |
|
| 7971 | } |
| 7972 | if (getFauxShuffleMask(Op, DemandedElts, Mask, Inputs, DAG, Depth, |
| 7973 | ResolveKnownElts)) { |
| 7974 | resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); |
| 7975 | return true; |
| 7976 | } |
| 7977 | return false; |
| 7978 | } |
| 7979 | |
| 7980 | static bool getTargetShuffleInputs(SDValue Op, SmallVectorImpl<SDValue> &Inputs, |
| 7981 | SmallVectorImpl<int> &Mask, |
| 7982 | const SelectionDAG &DAG, unsigned Depth = 0, |
| 7983 | bool ResolveKnownElts = true) { |
| 7984 | EVT VT = Op.getValueType(); |
| 7985 | if (!VT.isSimple() || !VT.isVector()) |
| 7986 | return false; |
| 7987 | |
| 7988 | APInt KnownUndef, KnownZero; |
| 7989 | unsigned NumElts = Op.getValueType().getVectorNumElements(); |
| 7990 | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
| 7991 | return getTargetShuffleInputs(Op, DemandedElts, Inputs, Mask, KnownUndef, |
| 7992 | KnownZero, DAG, Depth, ResolveKnownElts); |
| 7993 | } |
| 7994 | |
| 7995 | |
| 7996 | static SDValue getBROADCAST_LOAD(unsigned Opcode, const SDLoc &DL, EVT VT, |
| 7997 | EVT MemVT, MemSDNode *Mem, unsigned Offset, |
| 7998 | SelectionDAG &DAG) { |
| 7999 | assert((Opcode == X86ISD::VBROADCAST_LOAD || |
| 8000 | Opcode == X86ISD::SUBV_BROADCAST_LOAD) && |
| 8001 | "Unknown broadcast load type"); |
| 8002 | |
| 8003 | |
| 8004 | if (!Mem || !Mem->readMem() || !Mem->isSimple() || Mem->isNonTemporal()) |
| 8005 | return SDValue(); |
| 8006 | |
| 8007 | SDValue Ptr = |
| 8008 | DAG.getMemBasePlusOffset(Mem->getBasePtr(), TypeSize::Fixed(Offset), DL); |
| 8009 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 8010 | SDValue Ops[] = {Mem->getChain(), Ptr}; |
| 8011 | SDValue BcstLd = DAG.getMemIntrinsicNode( |
| 8012 | Opcode, DL, Tys, Ops, MemVT, |
| 8013 | DAG.getMachineFunction().getMachineMemOperand( |
| 8014 | Mem->getMemOperand(), Offset, MemVT.getStoreSize())); |
| 8015 | DAG.makeEquivalentMemoryOrdering(SDValue(Mem, 1), BcstLd.getValue(1)); |
| 8016 | return BcstLd; |
| 8017 | } |
| 8018 | |
| 8019 | |
| 8020 | |
| 8021 | static SDValue getShuffleScalarElt(SDValue Op, unsigned Index, |
| 8022 | SelectionDAG &DAG, unsigned Depth) { |
| 8023 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
| 8024 | return SDValue(); |
| 8025 | |
| 8026 | EVT VT = Op.getValueType(); |
| 8027 | unsigned Opcode = Op.getOpcode(); |
| 8028 | unsigned NumElems = VT.getVectorNumElements(); |
| 8029 | |
| 8030 | |
| 8031 | if (auto *SV = dyn_cast<ShuffleVectorSDNode>(Op)) { |
| 8032 | int Elt = SV->getMaskElt(Index); |
| 8033 | |
| 8034 | if (Elt < 0) |
| 8035 | return DAG.getUNDEF(VT.getVectorElementType()); |
| 8036 | |
| 8037 | SDValue Src = (Elt < (int)NumElems) ? SV->getOperand(0) : SV->getOperand(1); |
| 8038 | return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1); |
| 8039 | } |
| 8040 | |
| 8041 | |
| 8042 | if (isTargetShuffle(Opcode)) { |
| 8043 | MVT ShufVT = VT.getSimpleVT(); |
| 8044 | MVT ShufSVT = ShufVT.getVectorElementType(); |
| 8045 | int NumElems = (int)ShufVT.getVectorNumElements(); |
| 8046 | SmallVector<int, 16> ShuffleMask; |
| 8047 | SmallVector<SDValue, 16> ShuffleOps; |
| 8048 | if (!getTargetShuffleMask(Op.getNode(), ShufVT, true, ShuffleOps, |
| 8049 | ShuffleMask)) |
| 8050 | return SDValue(); |
| 8051 | |
| 8052 | int Elt = ShuffleMask[Index]; |
| 8053 | if (Elt == SM_SentinelZero) |
| 8054 | return ShufSVT.isInteger() ? DAG.getConstant(0, SDLoc(Op), ShufSVT) |
| 8055 | : DAG.getConstantFP(+0.0, SDLoc(Op), ShufSVT); |
| 8056 | if (Elt == SM_SentinelUndef) |
| 8057 | return DAG.getUNDEF(ShufSVT); |
| 8058 | |
| 8059 | assert(0 <= Elt && Elt < (2 * NumElems) && "Shuffle index out of range"); |
| 8060 | SDValue Src = (Elt < NumElems) ? ShuffleOps[0] : ShuffleOps[1]; |
| 8061 | return getShuffleScalarElt(Src, Elt % NumElems, DAG, Depth + 1); |
| 8062 | } |
| 8063 | |
| 8064 | |
| 8065 | if (Opcode == ISD::INSERT_SUBVECTOR) { |
| 8066 | SDValue Vec = Op.getOperand(0); |
| 8067 | SDValue Sub = Op.getOperand(1); |
| 8068 | uint64_t SubIdx = Op.getConstantOperandVal(2); |
| 8069 | unsigned NumSubElts = Sub.getValueType().getVectorNumElements(); |
| 8070 | |
| 8071 | if (SubIdx <= Index && Index < (SubIdx + NumSubElts)) |
| 8072 | return getShuffleScalarElt(Sub, Index - SubIdx, DAG, Depth + 1); |
| 8073 | return getShuffleScalarElt(Vec, Index, DAG, Depth + 1); |
| 8074 | } |
| 8075 | |
| 8076 | |
| 8077 | if (Opcode == ISD::CONCAT_VECTORS) { |
| 8078 | EVT SubVT = Op.getOperand(0).getValueType(); |
| 8079 | unsigned NumSubElts = SubVT.getVectorNumElements(); |
| 8080 | uint64_t SubIdx = Index / NumSubElts; |
| 8081 | uint64_t SubElt = Index % NumSubElts; |
| 8082 | return getShuffleScalarElt(Op.getOperand(SubIdx), SubElt, DAG, Depth + 1); |
| 8083 | } |
| 8084 | |
| 8085 | |
| 8086 | if (Opcode == ISD::EXTRACT_SUBVECTOR) { |
| 8087 | SDValue Src = Op.getOperand(0); |
| 8088 | uint64_t SrcIdx = Op.getConstantOperandVal(1); |
| 8089 | return getShuffleScalarElt(Src, Index + SrcIdx, DAG, Depth + 1); |
| 8090 | } |
| 8091 | |
| 8092 | |
| 8093 | if (Opcode == ISD::BITCAST) { |
| 8094 | SDValue Src = Op.getOperand(0); |
| 8095 | EVT SrcVT = Src.getValueType(); |
| 8096 | if (SrcVT.isVector() && SrcVT.getVectorNumElements() == NumElems) |
| 8097 | return getShuffleScalarElt(Src, Index, DAG, Depth + 1); |
| 8098 | return SDValue(); |
| 8099 | } |
| 8100 | |
| 8101 | |
| 8102 | |
| 8103 | |
| 8104 | |
| 8105 | if (Opcode == ISD::INSERT_VECTOR_ELT && |
| 8106 | isa<ConstantSDNode>(Op.getOperand(2))) { |
| 8107 | if (Op.getConstantOperandAPInt(2) == Index) |
| 8108 | return Op.getOperand(1); |
| 8109 | return getShuffleScalarElt(Op.getOperand(0), Index, DAG, Depth + 1); |
| 8110 | } |
| 8111 | |
| 8112 | if (Opcode == ISD::SCALAR_TO_VECTOR) |
| 8113 | return (Index == 0) ? Op.getOperand(0) |
| 8114 | : DAG.getUNDEF(VT.getVectorElementType()); |
| 8115 | |
| 8116 | if (Opcode == ISD::BUILD_VECTOR) |
| 8117 | return Op.getOperand(Index); |
| 8118 | |
| 8119 | return SDValue(); |
| 8120 | } |
| 8121 | |
| 8122 | |
| 8123 | static SDValue LowerBuildVectorAsInsert(SDValue Op, const APInt &NonZeroMask, |
| 8124 | unsigned NumNonZero, unsigned NumZero, |
| 8125 | SelectionDAG &DAG, |
| 8126 | const X86Subtarget &Subtarget) { |
| 8127 | MVT VT = Op.getSimpleValueType(); |
| 8128 | unsigned NumElts = VT.getVectorNumElements(); |
| 8129 | assert(((VT == MVT::v8i16 && Subtarget.hasSSE2()) || |
| 8130 | ((VT == MVT::v16i8 || VT == MVT::v4i32) && Subtarget.hasSSE41())) && |
| 8131 | "Illegal vector insertion"); |
| 8132 | |
| 8133 | SDLoc dl(Op); |
| 8134 | SDValue V; |
| 8135 | bool First = true; |
| 8136 | |
| 8137 | for (unsigned i = 0; i < NumElts; ++i) { |
| 8138 | bool IsNonZero = NonZeroMask[i]; |
| 8139 | if (!IsNonZero) |
| 8140 | continue; |
| 8141 | |
| 8142 | |
| 8143 | |
| 8144 | |
| 8145 | if (First) { |
| 8146 | First = false; |
| 8147 | if (NumZero || 0 != i) |
| 8148 | V = getZeroVector(VT, Subtarget, DAG, dl); |
| 8149 | else { |
| 8150 | assert(0 == i && "Expected insertion into zero-index"); |
| 8151 | V = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
| 8152 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, V); |
| 8153 | V = DAG.getBitcast(VT, V); |
| 8154 | continue; |
| 8155 | } |
| 8156 | } |
| 8157 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, V, Op.getOperand(i), |
| 8158 | DAG.getIntPtrConstant(i, dl)); |
| 8159 | } |
| 8160 | |
| 8161 | return V; |
| 8162 | } |
| 8163 | |
| 8164 | |
| 8165 | static SDValue LowerBuildVectorv16i8(SDValue Op, const APInt &NonZeroMask, |
| 8166 | unsigned NumNonZero, unsigned NumZero, |
| 8167 | SelectionDAG &DAG, |
| 8168 | const X86Subtarget &Subtarget) { |
| 8169 | if (NumNonZero > 8 && !Subtarget.hasSSE41()) |
| 8170 | return SDValue(); |
| 8171 | |
| 8172 | |
| 8173 | if (Subtarget.hasSSE41()) |
| 8174 | return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, |
| 8175 | Subtarget); |
| 8176 | |
| 8177 | SDLoc dl(Op); |
| 8178 | SDValue V; |
| 8179 | |
| 8180 | |
| 8181 | for (unsigned i = 0; i < 16; i += 2) { |
| 8182 | bool ThisIsNonZero = NonZeroMask[i]; |
| 8183 | bool NextIsNonZero = NonZeroMask[i + 1]; |
| 8184 | if (!ThisIsNonZero && !NextIsNonZero) |
| 8185 | continue; |
| 8186 | |
| 8187 | |
| 8188 | SDValue Elt; |
| 8189 | if (ThisIsNonZero) { |
| 8190 | if (NumZero || NextIsNonZero) |
| 8191 | Elt = DAG.getZExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
| 8192 | else |
| 8193 | Elt = DAG.getAnyExtOrTrunc(Op.getOperand(i), dl, MVT::i32); |
| 8194 | } |
| 8195 | |
| 8196 | if (NextIsNonZero) { |
| 8197 | SDValue NextElt = Op.getOperand(i + 1); |
| 8198 | if (i == 0 && NumZero) |
| 8199 | NextElt = DAG.getZExtOrTrunc(NextElt, dl, MVT::i32); |
| 8200 | else |
| 8201 | NextElt = DAG.getAnyExtOrTrunc(NextElt, dl, MVT::i32); |
| 8202 | NextElt = DAG.getNode(ISD::SHL, dl, MVT::i32, NextElt, |
| 8203 | DAG.getConstant(8, dl, MVT::i8)); |
| 8204 | if (ThisIsNonZero) |
| 8205 | Elt = DAG.getNode(ISD::OR, dl, MVT::i32, NextElt, Elt); |
| 8206 | else |
| 8207 | Elt = NextElt; |
| 8208 | } |
| 8209 | |
| 8210 | |
| 8211 | |
| 8212 | |
| 8213 | if (!V) { |
| 8214 | if (i != 0 || NumZero) |
| 8215 | V = getZeroVector(MVT::v8i16, Subtarget, DAG, dl); |
| 8216 | else { |
| 8217 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Elt); |
| 8218 | V = DAG.getBitcast(MVT::v8i16, V); |
| 8219 | continue; |
| 8220 | } |
| 8221 | } |
| 8222 | Elt = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, Elt); |
| 8223 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, V, Elt, |
| 8224 | DAG.getIntPtrConstant(i / 2, dl)); |
| 8225 | } |
| 8226 | |
| 8227 | return DAG.getBitcast(MVT::v16i8, V); |
| 8228 | } |
| 8229 | |
| 8230 | |
| 8231 | static SDValue LowerBuildVectorv8i16(SDValue Op, const APInt &NonZeroMask, |
| 8232 | unsigned NumNonZero, unsigned NumZero, |
| 8233 | SelectionDAG &DAG, |
| 8234 | const X86Subtarget &Subtarget) { |
| 8235 | if (NumNonZero > 4 && !Subtarget.hasSSE41()) |
| 8236 | return SDValue(); |
| 8237 | |
| 8238 | |
| 8239 | return LowerBuildVectorAsInsert(Op, NonZeroMask, NumNonZero, NumZero, DAG, |
| 8240 | Subtarget); |
| 8241 | } |
| 8242 | |
| 8243 | |
| 8244 | static SDValue LowerBuildVectorv4x32(SDValue Op, SelectionDAG &DAG, |
| 8245 | const X86Subtarget &Subtarget) { |
| 8246 | |
| 8247 | |
| 8248 | |
| 8249 | |
| 8250 | if (Subtarget.hasSSE3() && !Subtarget.hasXOP() && |
| 8251 | Op.getOperand(0) == Op.getOperand(2) && |
| 8252 | Op.getOperand(1) == Op.getOperand(3) && |
| 8253 | Op.getOperand(0) != Op.getOperand(1)) { |
| 8254 | SDLoc DL(Op); |
| 8255 | MVT VT = Op.getSimpleValueType(); |
| 8256 | MVT EltVT = VT.getVectorElementType(); |
| 8257 | |
| 8258 | |
| 8259 | SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), |
| 8260 | DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; |
| 8261 | SDValue NewBV = DAG.getBitcast(MVT::v2f64, DAG.getBuildVector(VT, DL, Ops)); |
| 8262 | SDValue Dup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, NewBV); |
| 8263 | return DAG.getBitcast(VT, Dup); |
| 8264 | } |
| 8265 | |
| 8266 | |
| 8267 | std::bitset<4> Zeroable, Undefs; |
| 8268 | for (int i = 0; i < 4; ++i) { |
| 8269 | SDValue Elt = Op.getOperand(i); |
| 8270 | Undefs[i] = Elt.isUndef(); |
| 8271 | Zeroable[i] = (Elt.isUndef() || X86::isZeroNode(Elt)); |
| 8272 | } |
| 8273 | assert(Zeroable.size() - Zeroable.count() > 1 && |
| 8274 | "We expect at least two non-zero elements!"); |
| 8275 | |
| 8276 | |
| 8277 | |
| 8278 | SDValue FirstNonZero; |
| 8279 | unsigned FirstNonZeroIdx; |
| 8280 | for (unsigned i = 0; i < 4; ++i) { |
| 8281 | if (Zeroable[i]) |
| 8282 | continue; |
| 8283 | SDValue Elt = Op.getOperand(i); |
| 8284 | if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 8285 | !isa<ConstantSDNode>(Elt.getOperand(1))) |
| 8286 | return SDValue(); |
| 8287 | |
| 8288 | MVT VT = Elt.getOperand(0).getSimpleValueType(); |
| 8289 | if (!VT.is128BitVector()) |
| 8290 | return SDValue(); |
| 8291 | if (!FirstNonZero.getNode()) { |
| 8292 | FirstNonZero = Elt; |
| 8293 | FirstNonZeroIdx = i; |
| 8294 | } |
| 8295 | } |
| 8296 | |
| 8297 | assert(FirstNonZero.getNode() && "Unexpected build vector of all zeros!"); |
| 8298 | SDValue V1 = FirstNonZero.getOperand(0); |
| 8299 | MVT VT = V1.getSimpleValueType(); |
| 8300 | |
| 8301 | |
| 8302 | SDValue Elt; |
| 8303 | unsigned EltMaskIdx, EltIdx; |
| 8304 | int Mask[4]; |
| 8305 | for (EltIdx = 0; EltIdx < 4; ++EltIdx) { |
| 8306 | if (Zeroable[EltIdx]) { |
| 8307 | |
| 8308 | Mask[EltIdx] = EltIdx+4; |
| 8309 | continue; |
| 8310 | } |
| 8311 | |
| 8312 | Elt = Op->getOperand(EltIdx); |
| 8313 | |
| 8314 | EltMaskIdx = Elt.getConstantOperandVal(1); |
| 8315 | if (Elt.getOperand(0) != V1 || EltMaskIdx != EltIdx) |
| 8316 | break; |
| 8317 | Mask[EltIdx] = EltIdx; |
| 8318 | } |
| 8319 | |
| 8320 | if (EltIdx == 4) { |
| 8321 | |
| 8322 | SDValue VZeroOrUndef = (Zeroable == Undefs) |
| 8323 | ? DAG.getUNDEF(VT) |
| 8324 | : getZeroVector(VT, Subtarget, DAG, SDLoc(Op)); |
| 8325 | if (V1.getSimpleValueType() != VT) |
| 8326 | V1 = DAG.getBitcast(VT, V1); |
| 8327 | return DAG.getVectorShuffle(VT, SDLoc(V1), V1, VZeroOrUndef, Mask); |
| 8328 | } |
| 8329 | |
| 8330 | |
| 8331 | if (!Subtarget.hasSSE41()) |
| 8332 | return SDValue(); |
| 8333 | |
| 8334 | SDValue V2 = Elt.getOperand(0); |
| 8335 | if (Elt == FirstNonZero && EltIdx == FirstNonZeroIdx) |
| 8336 | V1 = SDValue(); |
| 8337 | |
| 8338 | bool CanFold = true; |
| 8339 | for (unsigned i = EltIdx + 1; i < 4 && CanFold; ++i) { |
| 8340 | if (Zeroable[i]) |
| 8341 | continue; |
| 8342 | |
| 8343 | SDValue Current = Op->getOperand(i); |
| 8344 | SDValue SrcVector = Current->getOperand(0); |
| 8345 | if (!V1.getNode()) |
| 8346 | V1 = SrcVector; |
| 8347 | CanFold = (SrcVector == V1) && (Current.getConstantOperandAPInt(1) == i); |
| 8348 | } |
| 8349 | |
| 8350 | if (!CanFold) |
| 8351 | return SDValue(); |
| 8352 | |
| 8353 | assert(V1.getNode() && "Expected at least two non-zero elements!"); |
| 8354 | if (V1.getSimpleValueType() != MVT::v4f32) |
| 8355 | V1 = DAG.getBitcast(MVT::v4f32, V1); |
| 8356 | if (V2.getSimpleValueType() != MVT::v4f32) |
| 8357 | V2 = DAG.getBitcast(MVT::v4f32, V2); |
| 8358 | |
| 8359 | |
| 8360 | unsigned ZMask = Zeroable.to_ulong(); |
| 8361 | |
| 8362 | unsigned InsertPSMask = EltMaskIdx << 6 | EltIdx << 4 | ZMask; |
| 8363 | assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); |
| 8364 | SDLoc DL(Op); |
| 8365 | SDValue Result = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, |
| 8366 | DAG.getIntPtrConstant(InsertPSMask, DL, true)); |
| 8367 | return DAG.getBitcast(VT, Result); |
| 8368 | } |
| 8369 | |
| 8370 | |
| 8371 | static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp, unsigned NumBits, |
| 8372 | SelectionDAG &DAG, const TargetLowering &TLI, |
| 8373 | const SDLoc &dl) { |
| 8374 | assert(VT.is128BitVector() && "Unknown type for VShift"); |
| 8375 | MVT ShVT = MVT::v16i8; |
| 8376 | unsigned Opc = isLeft ? X86ISD::VSHLDQ : X86ISD::VSRLDQ; |
| 8377 | SrcOp = DAG.getBitcast(ShVT, SrcOp); |
| 8378 | assert(NumBits % 8 == 0 && "Only support byte sized shifts"); |
| 8379 | SDValue ShiftVal = DAG.getTargetConstant(NumBits / 8, dl, MVT::i8); |
| 8380 | return DAG.getBitcast(VT, DAG.getNode(Opc, dl, ShVT, SrcOp, ShiftVal)); |
| 8381 | } |
| 8382 | |
| 8383 | static SDValue LowerAsSplatVectorLoad(SDValue SrcOp, MVT VT, const SDLoc &dl, |
| 8384 | SelectionDAG &DAG) { |
| 8385 | |
| 8386 | |
| 8387 | |
| 8388 | |
| 8389 | if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) { |
| 8390 | SDValue Ptr = LD->getBasePtr(); |
| 8391 | if (!ISD::isNormalLoad(LD) || !LD->isSimple()) |
| 8392 | return SDValue(); |
| 8393 | EVT PVT = LD->getValueType(0); |
| 8394 | if (PVT != MVT::i32 && PVT != MVT::f32) |
| 8395 | return SDValue(); |
| 8396 | |
| 8397 | int FI = -1; |
| 8398 | int64_t Offset = 0; |
| 8399 | if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) { |
| 8400 | FI = FINode->getIndex(); |
| 8401 | Offset = 0; |
| 8402 | } else if (DAG.isBaseWithConstantOffset(Ptr) && |
| 8403 | isa<FrameIndexSDNode>(Ptr.getOperand(0))) { |
| 8404 | FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex(); |
| 8405 | Offset = Ptr.getConstantOperandVal(1); |
| 8406 | Ptr = Ptr.getOperand(0); |
| 8407 | } else { |
| 8408 | return SDValue(); |
| 8409 | } |
| 8410 | |
| 8411 | |
| 8412 | |
| 8413 | Align RequiredAlign(VT.getSizeInBits() / 8); |
| 8414 | SDValue Chain = LD->getChain(); |
| 8415 | |
| 8416 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| 8417 | MaybeAlign InferredAlign = DAG.InferPtrAlign(Ptr); |
| 8418 | if (!InferredAlign || *InferredAlign < RequiredAlign) { |
| 8419 | if (MFI.isFixedObjectIndex(FI)) { |
| 8420 | |
| 8421 | |
| 8422 | |
| 8423 | return SDValue(); |
| 8424 | } else { |
| 8425 | MFI.setObjectAlignment(FI, RequiredAlign); |
| 8426 | } |
| 8427 | } |
| 8428 | |
| 8429 | |
| 8430 | |
| 8431 | if (Offset < 0) |
| 8432 | return SDValue(); |
| 8433 | if ((Offset % RequiredAlign.value()) & 3) |
| 8434 | return SDValue(); |
| 8435 | int64_t StartOffset = Offset & ~int64_t(RequiredAlign.value() - 1); |
| 8436 | if (StartOffset) { |
| 8437 | SDLoc DL(Ptr); |
| 8438 | Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, |
| 8439 | DAG.getConstant(StartOffset, DL, Ptr.getValueType())); |
| 8440 | } |
| 8441 | |
| 8442 | int EltNo = (Offset - StartOffset) >> 2; |
| 8443 | unsigned NumElems = VT.getVectorNumElements(); |
| 8444 | |
| 8445 | EVT NVT = EVT::getVectorVT(*DAG.getContext(), PVT, NumElems); |
| 8446 | SDValue V1 = DAG.getLoad(NVT, dl, Chain, Ptr, |
| 8447 | LD->getPointerInfo().getWithOffset(StartOffset)); |
| 8448 | |
| 8449 | SmallVector<int, 8> Mask(NumElems, EltNo); |
| 8450 | |
| 8451 | return DAG.getVectorShuffle(NVT, dl, V1, DAG.getUNDEF(NVT), Mask); |
| 8452 | } |
| 8453 | |
| 8454 | return SDValue(); |
| 8455 | } |
| 8456 | |
| 8457 | |
| 8458 | static bool findEltLoadSrc(SDValue Elt, LoadSDNode *&Ld, int64_t &ByteOffset) { |
| 8459 | if (ISD::isNON_EXTLoad(Elt.getNode())) { |
| 8460 | auto *BaseLd = cast<LoadSDNode>(Elt); |
| 8461 | if (!BaseLd->isSimple()) |
| 8462 | return false; |
| 8463 | Ld = BaseLd; |
| 8464 | ByteOffset = 0; |
| 8465 | return true; |
| 8466 | } |
| 8467 | |
| 8468 | switch (Elt.getOpcode()) { |
| 8469 | case ISD::BITCAST: |
| 8470 | case ISD::TRUNCATE: |
| 8471 | case ISD::SCALAR_TO_VECTOR: |
| 8472 | return findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset); |
| 8473 | case ISD::SRL: |
| 8474 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) { |
| 8475 | uint64_t Idx = IdxC->getZExtValue(); |
| 8476 | if ((Idx % 8) == 0 && findEltLoadSrc(Elt.getOperand(0), Ld, ByteOffset)) { |
| 8477 | ByteOffset += Idx / 8; |
| 8478 | return true; |
| 8479 | } |
| 8480 | } |
| 8481 | break; |
| 8482 | case ISD::EXTRACT_VECTOR_ELT: |
| 8483 | if (auto *IdxC = dyn_cast<ConstantSDNode>(Elt.getOperand(1))) { |
| 8484 | SDValue Src = Elt.getOperand(0); |
| 8485 | unsigned SrcSizeInBits = Src.getScalarValueSizeInBits(); |
| 8486 | unsigned DstSizeInBits = Elt.getScalarValueSizeInBits(); |
| 8487 | if (DstSizeInBits == SrcSizeInBits && (SrcSizeInBits % 8) == 0 && |
| 8488 | findEltLoadSrc(Src, Ld, ByteOffset)) { |
| 8489 | uint64_t Idx = IdxC->getZExtValue(); |
| 8490 | ByteOffset += Idx * (SrcSizeInBits / 8); |
| 8491 | return true; |
| 8492 | } |
| 8493 | } |
| 8494 | break; |
| 8495 | } |
| 8496 | |
| 8497 | return false; |
| 8498 | } |
| 8499 | |
| 8500 | |
| 8501 | |
| 8502 | |
| 8503 | |
| 8504 | |
| 8505 | static SDValue EltsFromConsecutiveLoads(EVT VT, ArrayRef<SDValue> Elts, |
| 8506 | const SDLoc &DL, SelectionDAG &DAG, |
| 8507 | const X86Subtarget &Subtarget, |
| 8508 | bool IsAfterLegalize) { |
| 8509 | if ((VT.getScalarSizeInBits() % 8) != 0) |
| 8510 | return SDValue(); |
| 8511 | |
| 8512 | unsigned NumElems = Elts.size(); |
| 8513 | |
| 8514 | int LastLoadedElt = -1; |
| 8515 | APInt LoadMask = APInt::getNullValue(NumElems); |
| 8516 | APInt ZeroMask = APInt::getNullValue(NumElems); |
| 8517 | APInt UndefMask = APInt::getNullValue(NumElems); |
| 8518 | |
| 8519 | SmallVector<LoadSDNode*, 8> Loads(NumElems, nullptr); |
| 8520 | SmallVector<int64_t, 8> ByteOffsets(NumElems, 0); |
| 8521 | |
| 8522 | |
| 8523 | |
| 8524 | for (unsigned i = 0; i < NumElems; ++i) { |
| 8525 | SDValue Elt = peekThroughBitcasts(Elts[i]); |
| 8526 | if (!Elt.getNode()) |
| 8527 | return SDValue(); |
| 8528 | if (Elt.isUndef()) { |
| 8529 | UndefMask.setBit(i); |
| 8530 | continue; |
| 8531 | } |
| 8532 | if (X86::isZeroNode(Elt) || ISD::isBuildVectorAllZeros(Elt.getNode())) { |
| 8533 | ZeroMask.setBit(i); |
| 8534 | continue; |
| 8535 | } |
| 8536 | |
| 8537 | |
| 8538 | |
| 8539 | unsigned EltSizeInBits = Elt.getValueSizeInBits(); |
| 8540 | if ((NumElems * EltSizeInBits) != VT.getSizeInBits()) |
| 8541 | return SDValue(); |
| 8542 | |
| 8543 | if (!findEltLoadSrc(Elt, Loads[i], ByteOffsets[i]) || ByteOffsets[i] < 0) |
| 8544 | return SDValue(); |
| 8545 | unsigned LoadSizeInBits = Loads[i]->getValueSizeInBits(0); |
| 8546 | if (((ByteOffsets[i] * 8) + EltSizeInBits) > LoadSizeInBits) |
| 8547 | return SDValue(); |
| 8548 | |
| 8549 | LoadMask.setBit(i); |
| 8550 | LastLoadedElt = i; |
| 8551 | } |
| 8552 | assert((ZeroMask.countPopulation() + UndefMask.countPopulation() + |
| 8553 | LoadMask.countPopulation()) == NumElems && |
| 8554 | "Incomplete element masks"); |
| 8555 | |
| 8556 | |
| 8557 | if (UndefMask.countPopulation() == NumElems) |
| 8558 | return DAG.getUNDEF(VT); |
| 8559 | if ((ZeroMask.countPopulation() + UndefMask.countPopulation()) == NumElems) |
| 8560 | return VT.isInteger() ? DAG.getConstant(0, DL, VT) |
| 8561 | : DAG.getConstantFP(0.0, DL, VT); |
| 8562 | |
| 8563 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 8564 | int FirstLoadedElt = LoadMask.countTrailingZeros(); |
| 8565 | SDValue EltBase = peekThroughBitcasts(Elts[FirstLoadedElt]); |
| 8566 | EVT EltBaseVT = EltBase.getValueType(); |
| 8567 | assert(EltBaseVT.getSizeInBits() == EltBaseVT.getStoreSizeInBits() && |
| 8568 | "Register/Memory size mismatch"); |
| 8569 | LoadSDNode *LDBase = Loads[FirstLoadedElt]; |
| 8570 | assert(LDBase && "Did not find base load for merging consecutive loads"); |
| 8571 | unsigned BaseSizeInBits = EltBaseVT.getStoreSizeInBits(); |
| 8572 | unsigned BaseSizeInBytes = BaseSizeInBits / 8; |
| 8573 | int NumLoadedElts = (1 + LastLoadedElt - FirstLoadedElt); |
| 8574 | int LoadSizeInBits = NumLoadedElts * BaseSizeInBits; |
| 8575 | assert((BaseSizeInBits % 8) == 0 && "Sub-byte element loads detected"); |
| 8576 | |
| 8577 | |
| 8578 | if (ByteOffsets[FirstLoadedElt] != 0) |
| 8579 | return SDValue(); |
| 8580 | |
| 8581 | |
| 8582 | |
| 8583 | auto CheckConsecutiveLoad = [&](LoadSDNode *Base, int EltIdx) { |
| 8584 | LoadSDNode *Ld = Loads[EltIdx]; |
| 8585 | int64_t ByteOffset = ByteOffsets[EltIdx]; |
| 8586 | if (ByteOffset && (ByteOffset % BaseSizeInBytes) == 0) { |
| 8587 | int64_t BaseIdx = EltIdx - (ByteOffset / BaseSizeInBytes); |
| 8588 | return (0 <= BaseIdx && BaseIdx < (int)NumElems && LoadMask[BaseIdx] && |
| 8589 | Loads[BaseIdx] == Ld && ByteOffsets[BaseIdx] == 0); |
| 8590 | } |
| 8591 | return DAG.areNonVolatileConsecutiveLoads(Ld, Base, BaseSizeInBytes, |
| 8592 | EltIdx - FirstLoadedElt); |
| 8593 | }; |
| 8594 | |
| 8595 | |
| 8596 | |
| 8597 | |
| 8598 | bool IsConsecutiveLoad = true; |
| 8599 | bool IsConsecutiveLoadWithZeros = true; |
| 8600 | for (int i = FirstLoadedElt + 1; i <= LastLoadedElt; ++i) { |
| 8601 | if (LoadMask[i]) { |
| 8602 | if (!CheckConsecutiveLoad(LDBase, i)) { |
| 8603 | IsConsecutiveLoad = false; |
| 8604 | IsConsecutiveLoadWithZeros = false; |
| 8605 | break; |
| 8606 | } |
| 8607 | } else if (ZeroMask[i]) { |
| 8608 | IsConsecutiveLoad = false; |
| 8609 | } |
| 8610 | } |
| 8611 | |
| 8612 | auto CreateLoad = [&DAG, &DL, &Loads](EVT VT, LoadSDNode *LDBase) { |
| 8613 | auto MMOFlags = LDBase->getMemOperand()->getFlags(); |
| 8614 | assert(LDBase->isSimple() && |
| 8615 | "Cannot merge volatile or atomic loads."); |
| 8616 | SDValue NewLd = |
| 8617 | DAG.getLoad(VT, DL, LDBase->getChain(), LDBase->getBasePtr(), |
| 8618 | LDBase->getPointerInfo(), LDBase->getOriginalAlign(), |
| 8619 | MMOFlags); |
| 8620 | for (auto *LD : Loads) |
| 8621 | if (LD) |
| 8622 | DAG.makeEquivalentMemoryOrdering(LD, NewLd); |
| 8623 | return NewLd; |
| 8624 | }; |
| 8625 | |
| 8626 | |
| 8627 | bool IsDereferenceable = LDBase->getPointerInfo().isDereferenceable( |
| 8628 | VT.getSizeInBits() / 8, *DAG.getContext(), DAG.getDataLayout()); |
| 8629 | |
| 8630 | |
| 8631 | |
| 8632 | |
| 8633 | |
| 8634 | |
| 8635 | if (FirstLoadedElt == 0 && |
| 8636 | (NumLoadedElts == (int)NumElems || IsDereferenceable) && |
| 8637 | (IsConsecutiveLoad || IsConsecutiveLoadWithZeros)) { |
| 8638 | if (IsAfterLegalize && !TLI.isOperationLegal(ISD::LOAD, VT)) |
| 8639 | return SDValue(); |
| 8640 | |
| 8641 | |
| 8642 | |
| 8643 | if (LDBase->isNonTemporal() && LDBase->getAlignment() >= 32 && |
| 8644 | VT.is256BitVector() && !Subtarget.hasInt256()) |
| 8645 | return SDValue(); |
| 8646 | |
| 8647 | if (NumElems == 1) |
| 8648 | return DAG.getBitcast(VT, Elts[FirstLoadedElt]); |
| 8649 | |
| 8650 | if (!ZeroMask) |
| 8651 | return CreateLoad(VT, LDBase); |
| 8652 | |
| 8653 | |
| 8654 | |
| 8655 | if (!IsAfterLegalize && VT.isVector()) { |
| 8656 | unsigned NumMaskElts = VT.getVectorNumElements(); |
| 8657 | if ((NumMaskElts % NumElems) == 0) { |
| 8658 | unsigned Scale = NumMaskElts / NumElems; |
| 8659 | SmallVector<int, 4> ClearMask(NumMaskElts, -1); |
| 8660 | for (unsigned i = 0; i < NumElems; ++i) { |
| 8661 | if (UndefMask[i]) |
| 8662 | continue; |
| 8663 | int Offset = ZeroMask[i] ? NumMaskElts : 0; |
| 8664 | for (unsigned j = 0; j != Scale; ++j) |
| 8665 | ClearMask[(i * Scale) + j] = (i * Scale) + j + Offset; |
| 8666 | } |
| 8667 | SDValue V = CreateLoad(VT, LDBase); |
| 8668 | SDValue Z = VT.isInteger() ? DAG.getConstant(0, DL, VT) |
| 8669 | : DAG.getConstantFP(0.0, DL, VT); |
| 8670 | return DAG.getVectorShuffle(VT, DL, V, Z, ClearMask); |
| 8671 | } |
| 8672 | } |
| 8673 | } |
| 8674 | |
| 8675 | |
| 8676 | if (VT.is256BitVector() || VT.is512BitVector()) { |
| 8677 | unsigned HalfNumElems = NumElems / 2; |
| 8678 | if (UndefMask.extractBits(HalfNumElems, HalfNumElems).isAllOnesValue()) { |
| 8679 | EVT HalfVT = |
| 8680 | EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(), HalfNumElems); |
| 8681 | SDValue HalfLD = |
| 8682 | EltsFromConsecutiveLoads(HalfVT, Elts.drop_back(HalfNumElems), DL, |
| 8683 | DAG, Subtarget, IsAfterLegalize); |
| 8684 | if (HalfLD) |
| 8685 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), |
| 8686 | HalfLD, DAG.getIntPtrConstant(0, DL)); |
| 8687 | } |
| 8688 | } |
| 8689 | |
| 8690 | |
| 8691 | if (IsConsecutiveLoad && FirstLoadedElt == 0 && |
| 8692 | (LoadSizeInBits == 32 || LoadSizeInBits == 64) && |
| 8693 | ((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()))) { |
| 8694 | MVT VecSVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(LoadSizeInBits) |
| 8695 | : MVT::getIntegerVT(LoadSizeInBits); |
| 8696 | MVT VecVT = MVT::getVectorVT(VecSVT, VT.getSizeInBits() / LoadSizeInBits); |
| 8697 | |
| 8698 | |
| 8699 | if (!Subtarget.hasSSE2() && VT == MVT::v4f32) |
| 8700 | VecVT = MVT::v4f32; |
| 8701 | if (TLI.isTypeLegal(VecVT)) { |
| 8702 | SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); |
| 8703 | SDValue Ops[] = { LDBase->getChain(), LDBase->getBasePtr() }; |
| 8704 | SDValue ResNode = DAG.getMemIntrinsicNode( |
| 8705 | X86ISD::VZEXT_LOAD, DL, Tys, Ops, VecSVT, LDBase->getPointerInfo(), |
| 8706 | LDBase->getOriginalAlign(), MachineMemOperand::MOLoad); |
| 8707 | for (auto *LD : Loads) |
| 8708 | if (LD) |
| 8709 | DAG.makeEquivalentMemoryOrdering(LD, ResNode); |
| 8710 | return DAG.getBitcast(VT, ResNode); |
| 8711 | } |
| 8712 | } |
| 8713 | |
| 8714 | |
| 8715 | |
| 8716 | if (ZeroMask.isNullValue() && isPowerOf2_32(NumElems) && Subtarget.hasAVX() && |
| 8717 | (VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector())) { |
| 8718 | for (unsigned SubElems = 1; SubElems < NumElems; SubElems *= 2) { |
| 8719 | unsigned RepeatSize = SubElems * BaseSizeInBits; |
| 8720 | unsigned ScalarSize = std::min(RepeatSize, 64u); |
| 8721 | if (!Subtarget.hasAVX2() && ScalarSize < 32) |
| 8722 | continue; |
| 8723 | |
| 8724 | |
| 8725 | |
| 8726 | if (RepeatSize > ScalarSize && SubElems == 1) |
| 8727 | continue; |
| 8728 | |
| 8729 | bool Match = true; |
| 8730 | SmallVector<SDValue, 8> RepeatedLoads(SubElems, DAG.getUNDEF(EltBaseVT)); |
| 8731 | for (unsigned i = 0; i != NumElems && Match; ++i) { |
| 8732 | if (!LoadMask[i]) |
| 8733 | continue; |
| 8734 | SDValue Elt = peekThroughBitcasts(Elts[i]); |
| 8735 | if (RepeatedLoads[i % SubElems].isUndef()) |
| 8736 | RepeatedLoads[i % SubElems] = Elt; |
| 8737 | else |
| 8738 | Match &= (RepeatedLoads[i % SubElems] == Elt); |
| 8739 | } |
| 8740 | |
| 8741 | |
| 8742 | Match &= !RepeatedLoads.front().isUndef(); |
| 8743 | Match &= !RepeatedLoads.back().isUndef(); |
| 8744 | if (!Match) |
| 8745 | continue; |
| 8746 | |
| 8747 | EVT RepeatVT = |
| 8748 | VT.isInteger() && (RepeatSize != 64 || TLI.isTypeLegal(MVT::i64)) |
| 8749 | ? EVT::getIntegerVT(*DAG.getContext(), ScalarSize) |
| 8750 | : EVT::getFloatingPointVT(ScalarSize); |
| 8751 | if (RepeatSize > ScalarSize) |
| 8752 | RepeatVT = EVT::getVectorVT(*DAG.getContext(), RepeatVT, |
| 8753 | RepeatSize / ScalarSize); |
| 8754 | EVT BroadcastVT = |
| 8755 | EVT::getVectorVT(*DAG.getContext(), RepeatVT.getScalarType(), |
| 8756 | VT.getSizeInBits() / ScalarSize); |
| 8757 | if (TLI.isTypeLegal(BroadcastVT)) { |
| 8758 | if (SDValue RepeatLoad = EltsFromConsecutiveLoads( |
| 8759 | RepeatVT, RepeatedLoads, DL, DAG, Subtarget, IsAfterLegalize)) { |
| 8760 | SDValue Broadcast = RepeatLoad; |
| 8761 | if (RepeatSize > ScalarSize) { |
| 8762 | while (Broadcast.getValueSizeInBits() < VT.getSizeInBits()) |
| 8763 | Broadcast = concatSubVectors(Broadcast, Broadcast, DAG, DL); |
| 8764 | } else { |
| 8765 | Broadcast = |
| 8766 | DAG.getNode(X86ISD::VBROADCAST, DL, BroadcastVT, RepeatLoad); |
| 8767 | } |
| 8768 | return DAG.getBitcast(VT, Broadcast); |
| 8769 | } |
| 8770 | } |
| 8771 | } |
| 8772 | } |
| 8773 | |
| 8774 | return SDValue(); |
| 8775 | } |
| 8776 | |
| 8777 | |
| 8778 | |
| 8779 | |
| 8780 | static SDValue combineToConsecutiveLoads(EVT VT, SDValue Op, const SDLoc &DL, |
| 8781 | SelectionDAG &DAG, |
| 8782 | const X86Subtarget &Subtarget, |
| 8783 | bool IsAfterLegalize) { |
| 8784 | SmallVector<SDValue, 64> Elts; |
| 8785 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { |
| 8786 | if (SDValue Elt = getShuffleScalarElt(Op, i, DAG, 0)) { |
| 8787 | Elts.push_back(Elt); |
| 8788 | continue; |
| 8789 | } |
| 8790 | return SDValue(); |
| 8791 | } |
| 8792 | assert(Elts.size() == VT.getVectorNumElements()); |
| 8793 | return EltsFromConsecutiveLoads(VT, Elts, DL, DAG, Subtarget, |
| 8794 | IsAfterLegalize); |
| 8795 | } |
| 8796 | |
| 8797 | static Constant *getConstantVector(MVT VT, const APInt &SplatValue, |
| 8798 | unsigned SplatBitSize, LLVMContext &C) { |
| 8799 | unsigned ScalarSize = VT.getScalarSizeInBits(); |
| 8800 | unsigned NumElm = SplatBitSize / ScalarSize; |
| 8801 | |
| 8802 | SmallVector<Constant *, 32> ConstantVec; |
| 8803 | for (unsigned i = 0; i < NumElm; i++) { |
| 8804 | APInt Val = SplatValue.extractBits(ScalarSize, ScalarSize * i); |
| 8805 | Constant *Const; |
| 8806 | if (VT.isFloatingPoint()) { |
| 8807 | if (ScalarSize == 32) { |
| 8808 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEsingle(), Val)); |
| 8809 | } else { |
| 8810 | assert(ScalarSize == 64 && "Unsupported floating point scalar size"); |
| 8811 | Const = ConstantFP::get(C, APFloat(APFloat::IEEEdouble(), Val)); |
| 8812 | } |
| 8813 | } else |
| 8814 | Const = Constant::getIntegerValue(Type::getIntNTy(C, ScalarSize), Val); |
| 8815 | ConstantVec.push_back(Const); |
| 8816 | } |
| 8817 | return ConstantVector::get(ArrayRef<Constant *>(ConstantVec)); |
| 8818 | } |
| 8819 | |
| 8820 | static bool isFoldableUseOfShuffle(SDNode *N) { |
| 8821 | for (auto *U : N->uses()) { |
| 8822 | unsigned Opc = U->getOpcode(); |
| 8823 | |
| 8824 | if (Opc == X86ISD::VPERMV && U->getOperand(0).getNode() == N) |
| 8825 | return false; |
| 8826 | if (Opc == X86ISD::VPERMV3 && U->getOperand(1).getNode() == N) |
| 8827 | return false; |
| 8828 | if (isTargetShuffle(Opc)) |
| 8829 | return true; |
| 8830 | if (Opc == ISD::BITCAST) |
| 8831 | return isFoldableUseOfShuffle(U); |
| 8832 | if (N->hasOneUse()) |
| 8833 | return true; |
| 8834 | } |
| 8835 | return false; |
| 8836 | } |
| 8837 | |
| 8838 | |
| 8839 | |
| 8840 | |
| 8841 | |
| 8842 | |
| 8843 | |
| 8844 | |
| 8845 | static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, |
| 8846 | const X86Subtarget &Subtarget, |
| 8847 | SelectionDAG &DAG) { |
| 8848 | |
| 8849 | |
| 8850 | |
| 8851 | if (!Subtarget.hasAVX()) |
| 8852 | return SDValue(); |
| 8853 | |
| 8854 | MVT VT = BVOp->getSimpleValueType(0); |
| 8855 | unsigned NumElts = VT.getVectorNumElements(); |
| 8856 | SDLoc dl(BVOp); |
| 8857 | |
| 8858 | assert((VT.is128BitVector() || VT.is256BitVector() || VT.is512BitVector()) && |
| 8859 | "Unsupported vector type for broadcast."); |
| 8860 | |
| 8861 | |
| 8862 | SDValue Ld; |
| 8863 | BitVector UndefElements; |
| 8864 | SmallVector<SDValue, 16> Sequence; |
| 8865 | if (BVOp->getRepeatedSequence(Sequence, &UndefElements)) { |
| 8866 | assert((NumElts % Sequence.size()) == 0 && "Sequence doesn't fit."); |
| 8867 | if (Sequence.size() == 1) |
| 8868 | Ld = Sequence[0]; |
| 8869 | } |
| 8870 | |
| 8871 | |
| 8872 | |
| 8873 | |
| 8874 | |
| 8875 | |
| 8876 | |
| 8877 | if (!Sequence.empty() && Subtarget.hasCDI()) { |
| 8878 | |
| 8879 | unsigned SeqLen = Sequence.size(); |
| 8880 | bool UpperZeroOrUndef = |
| 8881 | SeqLen == 1 || |
| 8882 | llvm::all_of(makeArrayRef(Sequence).drop_front(), [](SDValue V) { |
| 8883 | return !V || V.isUndef() || isNullConstant(V); |
| 8884 | }); |
| 8885 | SDValue Op0 = Sequence[0]; |
| 8886 | if (UpperZeroOrUndef && ((Op0.getOpcode() == ISD::BITCAST) || |
| 8887 | (Op0.getOpcode() == ISD::ZERO_EXTEND && |
| 8888 | Op0.getOperand(0).getOpcode() == ISD::BITCAST))) { |
| 8889 | SDValue BOperand = Op0.getOpcode() == ISD::BITCAST |
| 8890 | ? Op0.getOperand(0) |
| 8891 | : Op0.getOperand(0).getOperand(0); |
| 8892 | MVT MaskVT = BOperand.getSimpleValueType(); |
| 8893 | MVT EltType = MVT::getIntegerVT(VT.getScalarSizeInBits() * SeqLen); |
| 8894 | if ((EltType == MVT::i64 && MaskVT == MVT::v8i1) || |
| 8895 | (EltType == MVT::i32 && MaskVT == MVT::v16i1)) { |
| 8896 | MVT BcstVT = MVT::getVectorVT(EltType, NumElts / SeqLen); |
| 8897 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) { |
| 8898 | unsigned Scale = 512 / VT.getSizeInBits(); |
| 8899 | BcstVT = MVT::getVectorVT(EltType, Scale * (NumElts / SeqLen)); |
| 8900 | } |
| 8901 | SDValue Bcst = DAG.getNode(X86ISD::VBROADCASTM, dl, BcstVT, BOperand); |
| 8902 | if (BcstVT.getSizeInBits() != VT.getSizeInBits()) |
| 8903 | Bcst = extractSubVector(Bcst, 0, DAG, dl, VT.getSizeInBits()); |
| 8904 | return DAG.getBitcast(VT, Bcst); |
| 8905 | } |
| 8906 | } |
| 8907 | } |
| 8908 | |
| 8909 | unsigned NumUndefElts = UndefElements.count(); |
| 8910 | if (!Ld || (NumElts - NumUndefElts) <= 1) { |
| 8911 | APInt SplatValue, Undef; |
| 8912 | unsigned SplatBitSize; |
| 8913 | bool HasUndef; |
| 8914 | |
| 8915 | if (BVOp->isConstantSplat(SplatValue, Undef, SplatBitSize, HasUndef) && |
| 8916 | SplatBitSize > VT.getScalarSizeInBits() && |
| 8917 | SplatBitSize < VT.getSizeInBits()) { |
| 8918 | |
| 8919 | |
| 8920 | if (isFoldableUseOfShuffle(BVOp)) |
| 8921 | return SDValue(); |
| 8922 | |
| 8923 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 8924 | LLVMContext *Ctx = DAG.getContext(); |
| 8925 | MVT PVT = TLI.getPointerTy(DAG.getDataLayout()); |
| 8926 | if (Subtarget.hasAVX()) { |
| 8927 | if (SplatBitSize == 32 || SplatBitSize == 64 || |
| 8928 | (SplatBitSize < 32 && Subtarget.hasAVX2())) { |
| 8929 | |
| 8930 | |
| 8931 | MVT CVT = MVT::getIntegerVT(SplatBitSize); |
| 8932 | Type *ScalarTy = Type::getIntNTy(*Ctx, SplatBitSize); |
| 8933 | Constant *C = Constant::getIntegerValue(ScalarTy, SplatValue); |
| 8934 | SDValue CP = DAG.getConstantPool(C, PVT); |
| 8935 | unsigned Repeat = VT.getSizeInBits() / SplatBitSize; |
| 8936 | |
| 8937 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
| 8938 | SDVTList Tys = |
| 8939 | DAG.getVTList(MVT::getVectorVT(CVT, Repeat), MVT::Other); |
| 8940 | SDValue Ops[] = {DAG.getEntryNode(), CP}; |
| 8941 | MachinePointerInfo MPI = |
| 8942 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
| 8943 | SDValue Brdcst = DAG.getMemIntrinsicNode( |
| 8944 | X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, MPI, Alignment, |
| 8945 | MachineMemOperand::MOLoad); |
| 8946 | return DAG.getBitcast(VT, Brdcst); |
| 8947 | } |
| 8948 | if (SplatBitSize > 64) { |
| 8949 | |
| 8950 | Constant *VecC = getConstantVector(VT, SplatValue, SplatBitSize, |
| 8951 | *Ctx); |
| 8952 | SDValue VCP = DAG.getConstantPool(VecC, PVT); |
| 8953 | unsigned NumElm = SplatBitSize / VT.getScalarSizeInBits(); |
| 8954 | MVT VVT = MVT::getVectorVT(VT.getScalarType(), NumElm); |
| 8955 | Align Alignment = cast<ConstantPoolSDNode>(VCP)->getAlign(); |
| 8956 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 8957 | SDValue Ops[] = {DAG.getEntryNode(), VCP}; |
| 8958 | MachinePointerInfo MPI = |
| 8959 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
| 8960 | return DAG.getMemIntrinsicNode( |
| 8961 | X86ISD::SUBV_BROADCAST_LOAD, dl, Tys, Ops, VVT, MPI, Alignment, |
| 8962 | MachineMemOperand::MOLoad); |
| 8963 | } |
| 8964 | } |
| 8965 | } |
| 8966 | |
| 8967 | |
| 8968 | |
| 8969 | |
| 8970 | |
| 8971 | |
| 8972 | if (!Ld || NumElts - NumUndefElts != 1) |
| 8973 | return SDValue(); |
| 8974 | unsigned ScalarSize = Ld.getValueSizeInBits(); |
| 8975 | if (!(UndefElements[0] || (ScalarSize != 32 && ScalarSize != 64))) |
| 8976 | return SDValue(); |
| 8977 | } |
| 8978 | |
| 8979 | bool ConstSplatVal = |
| 8980 | (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); |
| 8981 | bool IsLoad = ISD::isNormalLoad(Ld.getNode()); |
| 8982 | |
| 8983 | |
| 8984 | |
| 8985 | |
| 8986 | |
| 8987 | |
| 8988 | if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode())) |
| 8989 | return SDValue(); |
| 8990 | |
| 8991 | unsigned ScalarSize = Ld.getValueSizeInBits(); |
| 8992 | bool IsGE256 = (VT.getSizeInBits() >= 256); |
| 8993 | |
| 8994 | |
| 8995 | |
| 8996 | |
| 8997 | |
| 8998 | |
| 8999 | bool OptForSize = DAG.shouldOptForSize(); |
| 9000 | |
| 9001 | |
| 9002 | |
| 9003 | |
| 9004 | |
| 9005 | |
| 9006 | |
| 9007 | if (ConstSplatVal && (Subtarget.hasAVX2() || OptForSize)) { |
| 9008 | EVT CVT = Ld.getValueType(); |
| 9009 | assert(!CVT.isVector() && "Must not broadcast a vector type"); |
| 9010 | |
| 9011 | |
| 9012 | |
| 9013 | |
| 9014 | |
| 9015 | if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || |
| 9016 | (OptForSize && (ScalarSize == 64 || Subtarget.hasAVX2()))) { |
| 9017 | const Constant *C = nullptr; |
| 9018 | if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) |
| 9019 | C = CI->getConstantIntValue(); |
| 9020 | else if (ConstantFPSDNode *CF = dyn_cast<ConstantFPSDNode>(Ld)) |
| 9021 | C = CF->getConstantFPValue(); |
| 9022 | |
| 9023 | assert(C && "Invalid constant type"); |
| 9024 | |
| 9025 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 9026 | SDValue CP = |
| 9027 | DAG.getConstantPool(C, TLI.getPointerTy(DAG.getDataLayout())); |
| 9028 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
| 9029 | |
| 9030 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 9031 | SDValue Ops[] = {DAG.getEntryNode(), CP}; |
| 9032 | MachinePointerInfo MPI = |
| 9033 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
| 9034 | return DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, CVT, |
| 9035 | MPI, Alignment, MachineMemOperand::MOLoad); |
| 9036 | } |
| 9037 | } |
| 9038 | |
| 9039 | |
| 9040 | if (!IsLoad && Subtarget.hasInt256() && |
| 9041 | (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) |
| 9042 | return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); |
| 9043 | |
| 9044 | |
| 9045 | if (!IsLoad) |
| 9046 | return SDValue(); |
| 9047 | |
| 9048 | |
| 9049 | if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0)) |
| 9050 | return SDValue(); |
| 9051 | |
| 9052 | if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || |
| 9053 | (Subtarget.hasVLX() && ScalarSize == 64)) { |
| 9054 | auto *LN = cast<LoadSDNode>(Ld); |
| 9055 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 9056 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
| 9057 | SDValue BCast = |
| 9058 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
| 9059 | LN->getMemoryVT(), LN->getMemOperand()); |
| 9060 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); |
| 9061 | return BCast; |
| 9062 | } |
| 9063 | |
| 9064 | |
| 9065 | |
| 9066 | if (Subtarget.hasInt256() && Ld.getValueType().isInteger() && |
| 9067 | (ScalarSize == 8 || ScalarSize == 16 || ScalarSize == 64)) { |
| 9068 | auto *LN = cast<LoadSDNode>(Ld); |
| 9069 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 9070 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
| 9071 | SDValue BCast = |
| 9072 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
| 9073 | LN->getMemoryVT(), LN->getMemOperand()); |
| 9074 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BCast.getValue(1)); |
| 9075 | return BCast; |
| 9076 | } |
| 9077 | |
| 9078 | |
| 9079 | return SDValue(); |
| 9080 | } |
| 9081 | |
| 9082 | |
| 9083 | |
| 9084 | |
| 9085 | |
| 9086 | |
| 9087 | static int getUnderlyingExtractedFromVec(SDValue &ExtractedFromVec, |
| 9088 | SDValue ExtIdx) { |
| 9089 | int Idx = cast<ConstantSDNode>(ExtIdx)->getZExtValue(); |
| 9090 | if (!isa<ShuffleVectorSDNode>(ExtractedFromVec)) |
| 9091 | return Idx; |
| 9092 | |
| 9093 | |
| 9094 | |
| 9095 | |
| 9096 | |
| 9097 | |
| 9098 | |
| 9099 | |
| 9100 | |
| 9101 | |
| 9102 | |
| 9103 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(ExtractedFromVec); |
| 9104 | SDValue ShuffleVec = SVOp->getOperand(0); |
| 9105 | MVT ShuffleVecVT = ShuffleVec.getSimpleValueType(); |
| 9106 | assert(ShuffleVecVT.getVectorElementType() == |
| 9107 | ExtractedFromVec.getSimpleValueType().getVectorElementType()); |
| 9108 | |
| 9109 | int ShuffleIdx = SVOp->getMaskElt(Idx); |
| 9110 | if (isUndefOrInRange(ShuffleIdx, 0, ShuffleVecVT.getVectorNumElements())) { |
| 9111 | ExtractedFromVec = ShuffleVec; |
| 9112 | return ShuffleIdx; |
| 9113 | } |
| 9114 | return Idx; |
| 9115 | } |
| 9116 | |
| 9117 | static SDValue buildFromShuffleMostly(SDValue Op, SelectionDAG &DAG) { |
| 9118 | MVT VT = Op.getSimpleValueType(); |
| 9119 | |
| 9120 | |
| 9121 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 9122 | if (!TLI.isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT)) |
| 9123 | return SDValue(); |
| 9124 | |
| 9125 | SDLoc DL(Op); |
| 9126 | unsigned NumElems = Op.getNumOperands(); |
| 9127 | |
| 9128 | SDValue VecIn1; |
| 9129 | SDValue VecIn2; |
| 9130 | SmallVector<unsigned, 4> InsertIndices; |
| 9131 | SmallVector<int, 8> Mask(NumElems, -1); |
| 9132 | |
| 9133 | for (unsigned i = 0; i != NumElems; ++i) { |
| 9134 | unsigned Opc = Op.getOperand(i).getOpcode(); |
| 9135 | |
| 9136 | if (Opc == ISD::UNDEF) |
| 9137 | continue; |
| 9138 | |
| 9139 | if (Opc != ISD::EXTRACT_VECTOR_ELT) { |
| 9140 | |
| 9141 | if (InsertIndices.size() > 1) |
| 9142 | return SDValue(); |
| 9143 | |
| 9144 | InsertIndices.push_back(i); |
| 9145 | continue; |
| 9146 | } |
| 9147 | |
| 9148 | SDValue ExtractedFromVec = Op.getOperand(i).getOperand(0); |
| 9149 | SDValue ExtIdx = Op.getOperand(i).getOperand(1); |
| 9150 | |
| 9151 | |
| 9152 | if (!isa<ConstantSDNode>(ExtIdx)) |
| 9153 | return SDValue(); |
| 9154 | int Idx = getUnderlyingExtractedFromVec(ExtractedFromVec, ExtIdx); |
| 9155 | |
| 9156 | |
| 9157 | if (ExtractedFromVec.getValueType() != VT) |
| 9158 | return SDValue(); |
| 9159 | |
| 9160 | if (!VecIn1.getNode()) |
| 9161 | VecIn1 = ExtractedFromVec; |
| 9162 | else if (VecIn1 != ExtractedFromVec) { |
| 9163 | if (!VecIn2.getNode()) |
| 9164 | VecIn2 = ExtractedFromVec; |
| 9165 | else if (VecIn2 != ExtractedFromVec) |
| 9166 | |
| 9167 | return SDValue(); |
| 9168 | } |
| 9169 | |
| 9170 | if (ExtractedFromVec == VecIn1) |
| 9171 | Mask[i] = Idx; |
| 9172 | else if (ExtractedFromVec == VecIn2) |
| 9173 | Mask[i] = Idx + NumElems; |
| 9174 | } |
| 9175 | |
| 9176 | if (!VecIn1.getNode()) |
| 9177 | return SDValue(); |
| 9178 | |
| 9179 | VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); |
| 9180 | SDValue NV = DAG.getVectorShuffle(VT, DL, VecIn1, VecIn2, Mask); |
| 9181 | |
| 9182 | for (unsigned Idx : InsertIndices) |
| 9183 | NV = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, NV, Op.getOperand(Idx), |
| 9184 | DAG.getIntPtrConstant(Idx, DL)); |
| 9185 | |
| 9186 | return NV; |
| 9187 | } |
| 9188 | |
| 9189 | |
| 9190 | static SDValue LowerBUILD_VECTORvXi1(SDValue Op, SelectionDAG &DAG, |
| 9191 | const X86Subtarget &Subtarget) { |
| 9192 | |
| 9193 | MVT VT = Op.getSimpleValueType(); |
| 9194 | assert((VT.getVectorElementType() == MVT::i1) && |
| 9195 | "Unexpected type in LowerBUILD_VECTORvXi1!"); |
| 9196 | |
| 9197 | SDLoc dl(Op); |
| 9198 | if (ISD::isBuildVectorAllZeros(Op.getNode()) || |
| 9199 | ISD::isBuildVectorAllOnes(Op.getNode())) |
| 9200 | return Op; |
| 9201 | |
| 9202 | uint64_t Immediate = 0; |
| 9203 | SmallVector<unsigned, 16> NonConstIdx; |
| 9204 | bool IsSplat = true; |
| 9205 | bool HasConstElts = false; |
| 9206 | int SplatIdx = -1; |
| 9207 | for (unsigned idx = 0, e = Op.getNumOperands(); idx < e; ++idx) { |
| 9208 | SDValue In = Op.getOperand(idx); |
| 9209 | if (In.isUndef()) |
| 9210 | continue; |
| 9211 | if (auto *InC = dyn_cast<ConstantSDNode>(In)) { |
| 9212 | Immediate |= (InC->getZExtValue() & 0x1) << idx; |
| 9213 | HasConstElts = true; |
| 9214 | } else { |
| 9215 | NonConstIdx.push_back(idx); |
| 9216 | } |
| 9217 | if (SplatIdx < 0) |
| 9218 | SplatIdx = idx; |
| 9219 | else if (In != Op.getOperand(SplatIdx)) |
| 9220 | IsSplat = false; |
| 9221 | } |
| 9222 | |
| 9223 | |
| 9224 | if (IsSplat) { |
| 9225 | |
| 9226 | |
| 9227 | |
| 9228 | |
| 9229 | SDValue Cond = Op.getOperand(SplatIdx); |
| 9230 | assert(Cond.getValueType() == MVT::i8 && "Unexpected VT!"); |
| 9231 | if (Cond.getOpcode() != ISD::SETCC) |
| 9232 | Cond = DAG.getNode(ISD::AND, dl, MVT::i8, Cond, |
| 9233 | DAG.getConstant(1, dl, MVT::i8)); |
| 9234 | |
| 9235 | |
| 9236 | if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
| 9237 | SDValue Select = DAG.getSelect(dl, MVT::i32, Cond, |
| 9238 | DAG.getAllOnesConstant(dl, MVT::i32), |
| 9239 | DAG.getConstant(0, dl, MVT::i32)); |
| 9240 | Select = DAG.getBitcast(MVT::v32i1, Select); |
| 9241 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Select, Select); |
| 9242 | } else { |
| 9243 | MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); |
| 9244 | SDValue Select = DAG.getSelect(dl, ImmVT, Cond, |
| 9245 | DAG.getAllOnesConstant(dl, ImmVT), |
| 9246 | DAG.getConstant(0, dl, ImmVT)); |
| 9247 | MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; |
| 9248 | Select = DAG.getBitcast(VecVT, Select); |
| 9249 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Select, |
| 9250 | DAG.getIntPtrConstant(0, dl)); |
| 9251 | } |
| 9252 | } |
| 9253 | |
| 9254 | |
| 9255 | SDValue DstVec; |
| 9256 | if (HasConstElts) { |
| 9257 | if (VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
| 9258 | SDValue ImmL = DAG.getConstant(Lo_32(Immediate), dl, MVT::i32); |
| 9259 | SDValue ImmH = DAG.getConstant(Hi_32(Immediate), dl, MVT::i32); |
| 9260 | ImmL = DAG.getBitcast(MVT::v32i1, ImmL); |
| 9261 | ImmH = DAG.getBitcast(MVT::v32i1, ImmH); |
| 9262 | DstVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, ImmL, ImmH); |
| 9263 | } else { |
| 9264 | MVT ImmVT = MVT::getIntegerVT(std::max((unsigned)VT.getSizeInBits(), 8U)); |
| 9265 | SDValue Imm = DAG.getConstant(Immediate, dl, ImmVT); |
| 9266 | MVT VecVT = VT.getSizeInBits() >= 8 ? VT : MVT::v8i1; |
| 9267 | DstVec = DAG.getBitcast(VecVT, Imm); |
| 9268 | DstVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, DstVec, |
| 9269 | DAG.getIntPtrConstant(0, dl)); |
| 9270 | } |
| 9271 | } else |
| 9272 | DstVec = DAG.getUNDEF(VT); |
| 9273 | |
| 9274 | for (unsigned i = 0, e = NonConstIdx.size(); i != e; ++i) { |
| 9275 | unsigned InsertIdx = NonConstIdx[i]; |
| 9276 | DstVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, DstVec, |
| 9277 | Op.getOperand(InsertIdx), |
| 9278 | DAG.getIntPtrConstant(InsertIdx, dl)); |
| 9279 | } |
| 9280 | return DstVec; |
| 9281 | } |
| 9282 | |
| 9283 | LLVM_ATTRIBUTE_UNUSED static bool isHorizOp(unsigned Opcode) { |
| 9284 | switch (Opcode) { |
| 9285 | case X86ISD::PACKSS: |
| 9286 | case X86ISD::PACKUS: |
| 9287 | case X86ISD::FHADD: |
| 9288 | case X86ISD::FHSUB: |
| 9289 | case X86ISD::HADD: |
| 9290 | case X86ISD::HSUB: |
| 9291 | return true; |
| 9292 | } |
| 9293 | return false; |
| 9294 | } |
| 9295 | |
| 9296 | |
| 9297 | |
| 9298 | |
| 9299 | |
| 9300 | |
| 9301 | |
| 9302 | |
| 9303 | |
| 9304 | |
| 9305 | |
| 9306 | |
| 9307 | |
| 9308 | |
| 9309 | |
| 9310 | |
| 9311 | |
| 9312 | |
| 9313 | |
| 9314 | |
| 9315 | |
| 9316 | static bool isHorizontalBinOpPart(const BuildVectorSDNode *N, unsigned Opcode, |
| 9317 | SelectionDAG &DAG, |
| 9318 | unsigned BaseIdx, unsigned LastIdx, |
| 9319 | SDValue &V0, SDValue &V1) { |
| 9320 | EVT VT = N->getValueType(0); |
| 9321 | assert(VT.is256BitVector() && "Only use for matching partial 256-bit h-ops"); |
| 9322 | assert(BaseIdx * 2 <= LastIdx && "Invalid Indices in input!"); |
| 9323 | assert(VT.isVector() && VT.getVectorNumElements() >= LastIdx && |
| 9324 | "Invalid Vector in input!"); |
| 9325 | |
| 9326 | bool IsCommutable = (Opcode == ISD::ADD || Opcode == ISD::FADD); |
| 9327 | bool CanFold = true; |
| 9328 | unsigned ExpectedVExtractIdx = BaseIdx; |
| 9329 | unsigned NumElts = LastIdx - BaseIdx; |
| 9330 | V0 = DAG.getUNDEF(VT); |
| 9331 | V1 = DAG.getUNDEF(VT); |
| 9332 | |
| 9333 | |
| 9334 | for (unsigned i = 0, e = NumElts; i != e && CanFold; ++i) { |
| 9335 | SDValue Op = N->getOperand(i + BaseIdx); |
| 9336 | |
| 9337 | |
| 9338 | if (Op->isUndef()) { |
| 9339 | |
| 9340 | if (i * 2 == NumElts) |
| 9341 | ExpectedVExtractIdx = BaseIdx; |
| 9342 | ExpectedVExtractIdx += 2; |
| 9343 | continue; |
| 9344 | } |
| 9345 | |
| 9346 | CanFold = Op->getOpcode() == Opcode && Op->hasOneUse(); |
| 9347 | |
| 9348 | if (!CanFold) |
| 9349 | break; |
| 9350 | |
| 9351 | SDValue Op0 = Op.getOperand(0); |
| 9352 | SDValue Op1 = Op.getOperand(1); |
| 9353 | |
| 9354 | |
| 9355 | |
| 9356 | CanFold = (Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 9357 | Op1.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 9358 | Op0.getOperand(0) == Op1.getOperand(0) && |
| 9359 | isa<ConstantSDNode>(Op0.getOperand(1)) && |
| 9360 | isa<ConstantSDNode>(Op1.getOperand(1))); |
| 9361 | if (!CanFold) |
| 9362 | break; |
| 9363 | |
| 9364 | unsigned I0 = Op0.getConstantOperandVal(1); |
| 9365 | unsigned I1 = Op1.getConstantOperandVal(1); |
| 9366 | |
| 9367 | if (i * 2 < NumElts) { |
| 9368 | if (V0.isUndef()) { |
| 9369 | V0 = Op0.getOperand(0); |
| 9370 | if (V0.getValueType() != VT) |
| 9371 | return false; |
| 9372 | } |
| 9373 | } else { |
| 9374 | if (V1.isUndef()) { |
| 9375 | V1 = Op0.getOperand(0); |
| 9376 | if (V1.getValueType() != VT) |
| 9377 | return false; |
| 9378 | } |
| 9379 | if (i * 2 == NumElts) |
| 9380 | ExpectedVExtractIdx = BaseIdx; |
| 9381 | } |
| 9382 | |
| 9383 | SDValue Expected = (i * 2 < NumElts) ? V0 : V1; |
| 9384 | if (I0 == ExpectedVExtractIdx) |
| 9385 | CanFold = I1 == I0 + 1 && Op0.getOperand(0) == Expected; |
| 9386 | else if (IsCommutable && I1 == ExpectedVExtractIdx) { |
| 9387 | |
| 9388 | |
| 9389 | CanFold = I0 == I1 + 1 && Op1.getOperand(0) == Expected; |
| 9390 | } else |
| 9391 | CanFold = false; |
| 9392 | |
| 9393 | ExpectedVExtractIdx += 2; |
| 9394 | } |
| 9395 | |
| 9396 | return CanFold; |
| 9397 | } |
| 9398 | |
| 9399 | |
| 9400 | |
| 9401 | |
| 9402 | |
| 9403 | |
| 9404 | |
| 9405 | |
| 9406 | |
| 9407 | |
| 9408 | |
| 9409 | |
| 9410 | |
| 9411 | |
| 9412 | |
| 9413 | |
| 9414 | |
| 9415 | |
| 9416 | |
| 9417 | |
| 9418 | |
| 9419 | |
| 9420 | |
| 9421 | |
| 9422 | |
| 9423 | |
| 9424 | |
| 9425 | |
| 9426 | |
| 9427 | |
| 9428 | |
| 9429 | |
| 9430 | static SDValue ExpandHorizontalBinOp(const SDValue &V0, const SDValue &V1, |
| 9431 | const SDLoc &DL, SelectionDAG &DAG, |
| 9432 | unsigned X86Opcode, bool Mode, |
| 9433 | bool isUndefLO, bool isUndefHI) { |
| 9434 | MVT VT = V0.getSimpleValueType(); |
| 9435 | assert(VT.is256BitVector() && VT == V1.getSimpleValueType() && |
| 9436 | "Invalid nodes in input!"); |
| 9437 | |
| 9438 | unsigned NumElts = VT.getVectorNumElements(); |
| 9439 | SDValue V0_LO = extract128BitVector(V0, 0, DAG, DL); |
| 9440 | SDValue V0_HI = extract128BitVector(V0, NumElts/2, DAG, DL); |
| 9441 | SDValue V1_LO = extract128BitVector(V1, 0, DAG, DL); |
| 9442 | SDValue V1_HI = extract128BitVector(V1, NumElts/2, DAG, DL); |
| 9443 | MVT NewVT = V0_LO.getSimpleValueType(); |
| 9444 | |
| 9445 | SDValue LO = DAG.getUNDEF(NewVT); |
| 9446 | SDValue HI = DAG.getUNDEF(NewVT); |
| 9447 | |
| 9448 | if (Mode) { |
| 9449 | |
| 9450 | if (!isUndefLO && !V0->isUndef()) |
| 9451 | LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V0_HI); |
| 9452 | if (!isUndefHI && !V1->isUndef()) |
| 9453 | HI = DAG.getNode(X86Opcode, DL, NewVT, V1_LO, V1_HI); |
| 9454 | } else { |
| 9455 | |
| 9456 | if (!isUndefLO && (!V0_LO->isUndef() || !V1_LO->isUndef())) |
| 9457 | LO = DAG.getNode(X86Opcode, DL, NewVT, V0_LO, V1_LO); |
| 9458 | |
| 9459 | if (!isUndefHI && (!V0_HI->isUndef() || !V1_HI->isUndef())) |
| 9460 | HI = DAG.getNode(X86Opcode, DL, NewVT, V0_HI, V1_HI); |
| 9461 | } |
| 9462 | |
| 9463 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, LO, HI); |
| 9464 | } |
| 9465 | |
| 9466 | |
| 9467 | |
| 9468 | |
| 9469 | |
| 9470 | |
| 9471 | static bool isAddSubOrSubAdd(const BuildVectorSDNode *BV, |
| 9472 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
| 9473 | SDValue &Opnd0, SDValue &Opnd1, |
| 9474 | unsigned &NumExtracts, |
| 9475 | bool &IsSubAdd) { |
| 9476 | |
| 9477 | MVT VT = BV->getSimpleValueType(0); |
| 9478 | if (!Subtarget.hasSSE3() || !VT.isFloatingPoint()) |
| 9479 | return false; |
| 9480 | |
| 9481 | unsigned NumElts = VT.getVectorNumElements(); |
| 9482 | SDValue InVec0 = DAG.getUNDEF(VT); |
| 9483 | SDValue InVec1 = DAG.getUNDEF(VT); |
| 9484 | |
| 9485 | NumExtracts = 0; |
| 9486 | |
| 9487 | |
| 9488 | |
| 9489 | |
| 9490 | |
| 9491 | unsigned Opc[2] = {0, 0}; |
| 9492 | for (unsigned i = 0, e = NumElts; i != e; ++i) { |
| 9493 | SDValue Op = BV->getOperand(i); |
| 9494 | |
| 9495 | |
| 9496 | unsigned Opcode = Op.getOpcode(); |
| 9497 | if (Opcode == ISD::UNDEF) |
| 9498 | continue; |
| 9499 | |
| 9500 | |
| 9501 | if (Opcode != ISD::FADD && Opcode != ISD::FSUB) |
| 9502 | return false; |
| 9503 | |
| 9504 | SDValue Op0 = Op.getOperand(0); |
| 9505 | SDValue Op1 = Op.getOperand(1); |
| 9506 | |
| 9507 | |
| 9508 | |
| 9509 | |
| 9510 | if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 9511 | Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 9512 | !isa<ConstantSDNode>(Op0.getOperand(1)) || |
| 9513 | Op0.getOperand(1) != Op1.getOperand(1)) |
| 9514 | return false; |
| 9515 | |
| 9516 | unsigned I0 = Op0.getConstantOperandVal(1); |
| 9517 | if (I0 != i) |
| 9518 | return false; |
| 9519 | |
| 9520 | |
| 9521 | |
| 9522 | if (Opc[i % 2] != 0 && Opc[i % 2] != Opcode) |
| 9523 | return false; |
| 9524 | Opc[i % 2] = Opcode; |
| 9525 | |
| 9526 | |
| 9527 | if (InVec0.isUndef()) { |
| 9528 | InVec0 = Op0.getOperand(0); |
| 9529 | if (InVec0.getSimpleValueType() != VT) |
| 9530 | return false; |
| 9531 | } |
| 9532 | if (InVec1.isUndef()) { |
| 9533 | InVec1 = Op1.getOperand(0); |
| 9534 | if (InVec1.getSimpleValueType() != VT) |
| 9535 | return false; |
| 9536 | } |
| 9537 | |
| 9538 | |
| 9539 | |
| 9540 | if (InVec0 != Op0.getOperand(0)) { |
| 9541 | if (Opcode == ISD::FSUB) |
| 9542 | return false; |
| 9543 | |
| 9544 | |
| 9545 | |
| 9546 | std::swap(Op0, Op1); |
| 9547 | if (InVec0 != Op0.getOperand(0)) |
| 9548 | return false; |
| 9549 | } |
| 9550 | |
| 9551 | if (InVec1 != Op1.getOperand(0)) |
| 9552 | return false; |
| 9553 | |
| 9554 | |
| 9555 | ++NumExtracts; |
| 9556 | } |
| 9557 | |
| 9558 | |
| 9559 | |
| 9560 | |
| 9561 | if (!Opc[0] || !Opc[1] || Opc[0] == Opc[1] || |
| 9562 | InVec0.isUndef() || InVec1.isUndef()) |
| 9563 | return false; |
| 9564 | |
| 9565 | IsSubAdd = Opc[0] == ISD::FADD; |
| 9566 | |
| 9567 | Opnd0 = InVec0; |
| 9568 | Opnd1 = InVec1; |
| 9569 | return true; |
| 9570 | } |
| 9571 | |
| 9572 | |
| 9573 | |
| 9574 | |
| 9575 | |
| 9576 | |
| 9577 | |
| 9578 | |
| 9579 | |
| 9580 | |
| 9581 | |
| 9582 | |
| 9583 | |
| 9584 | |
| 9585 | |
| 9586 | |
| 9587 | |
| 9588 | |
| 9589 | |
| 9590 | |
| 9591 | |
| 9592 | |
| 9593 | |
| 9594 | |
| 9595 | |
| 9596 | |
| 9597 | |
| 9598 | static bool isFMAddSubOrFMSubAdd(const X86Subtarget &Subtarget, |
| 9599 | SelectionDAG &DAG, |
| 9600 | SDValue &Opnd0, SDValue &Opnd1, SDValue &Opnd2, |
| 9601 | unsigned ExpectedUses) { |
| 9602 | if (Opnd0.getOpcode() != ISD::FMUL || |
| 9603 | !Opnd0->hasNUsesOfValue(ExpectedUses, 0) || !Subtarget.hasAnyFMA()) |
| 9604 | return false; |
| 9605 | |
| 9606 | |
| 9607 | |
| 9608 | |
| 9609 | |
| 9610 | const TargetOptions &Options = DAG.getTarget().Options; |
| 9611 | bool AllowFusion = |
| 9612 | (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath); |
| 9613 | if (!AllowFusion) |
| 9614 | return false; |
| 9615 | |
| 9616 | Opnd2 = Opnd1; |
| 9617 | Opnd1 = Opnd0.getOperand(1); |
| 9618 | Opnd0 = Opnd0.getOperand(0); |
| 9619 | |
| 9620 | return true; |
| 9621 | } |
| 9622 | |
| 9623 | |
| 9624 | |
| 9625 | |
| 9626 | static SDValue lowerToAddSubOrFMAddSub(const BuildVectorSDNode *BV, |
| 9627 | const X86Subtarget &Subtarget, |
| 9628 | SelectionDAG &DAG) { |
| 9629 | SDValue Opnd0, Opnd1; |
| 9630 | unsigned NumExtracts; |
| 9631 | bool IsSubAdd; |
| 9632 | if (!isAddSubOrSubAdd(BV, Subtarget, DAG, Opnd0, Opnd1, NumExtracts, |
| 9633 | IsSubAdd)) |
| 9634 | return SDValue(); |
| 9635 | |
| 9636 | MVT VT = BV->getSimpleValueType(0); |
| 9637 | SDLoc DL(BV); |
| 9638 | |
| 9639 | |
| 9640 | SDValue Opnd2; |
| 9641 | if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, NumExtracts)) { |
| 9642 | unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
| 9643 | return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); |
| 9644 | } |
| 9645 | |
| 9646 | |
| 9647 | if (IsSubAdd) |
| 9648 | return SDValue(); |
| 9649 | |
| 9650 | |
| 9651 | |
| 9652 | |
| 9653 | |
| 9654 | |
| 9655 | if (VT.is512BitVector()) |
| 9656 | return SDValue(); |
| 9657 | |
| 9658 | return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); |
| 9659 | } |
| 9660 | |
| 9661 | static bool isHopBuildVector(const BuildVectorSDNode *BV, SelectionDAG &DAG, |
| 9662 | unsigned &HOpcode, SDValue &V0, SDValue &V1) { |
| 9663 | |
| 9664 | MVT VT = BV->getSimpleValueType(0); |
| 9665 | HOpcode = ISD::DELETED_NODE; |
| 9666 | V0 = DAG.getUNDEF(VT); |
| 9667 | V1 = DAG.getUNDEF(VT); |
| 9668 | |
| 9669 | |
| 9670 | |
| 9671 | |
| 9672 | unsigned NumElts = VT.getVectorNumElements(); |
| 9673 | unsigned GenericOpcode = ISD::DELETED_NODE; |
| 9674 | unsigned Num128BitChunks = VT.is256BitVector() ? 2 : 1; |
| 9675 | unsigned NumEltsIn128Bits = NumElts / Num128BitChunks; |
| 9676 | unsigned NumEltsIn64Bits = NumEltsIn128Bits / 2; |
| 9677 | for (unsigned i = 0; i != Num128BitChunks; ++i) { |
| 9678 | for (unsigned j = 0; j != NumEltsIn128Bits; ++j) { |
| 9679 | |
| 9680 | SDValue Op = BV->getOperand(i * NumEltsIn128Bits + j); |
| 9681 | if (Op.isUndef()) |
| 9682 | continue; |
| 9683 | |
| 9684 | |
| 9685 | if (HOpcode != ISD::DELETED_NODE && Op.getOpcode() != GenericOpcode) |
| 9686 | return false; |
| 9687 | |
| 9688 | |
| 9689 | if (HOpcode == ISD::DELETED_NODE) { |
| 9690 | GenericOpcode = Op.getOpcode(); |
| 9691 | switch (GenericOpcode) { |
| 9692 | case ISD::ADD: HOpcode = X86ISD::HADD; break; |
| 9693 | case ISD::SUB: HOpcode = X86ISD::HSUB; break; |
| 9694 | case ISD::FADD: HOpcode = X86ISD::FHADD; break; |
| 9695 | case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; |
| 9696 | default: return false; |
| 9697 | } |
| 9698 | } |
| 9699 | |
| 9700 | SDValue Op0 = Op.getOperand(0); |
| 9701 | SDValue Op1 = Op.getOperand(1); |
| 9702 | if (Op0.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 9703 | Op1.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 9704 | Op0.getOperand(0) != Op1.getOperand(0) || |
| 9705 | !isa<ConstantSDNode>(Op0.getOperand(1)) || |
| 9706 | !isa<ConstantSDNode>(Op1.getOperand(1)) || !Op.hasOneUse()) |
| 9707 | return false; |
| 9708 | |
| 9709 | |
| 9710 | |
| 9711 | if (j < NumEltsIn64Bits) { |
| 9712 | if (V0.isUndef()) |
| 9713 | V0 = Op0.getOperand(0); |
| 9714 | } else { |
| 9715 | if (V1.isUndef()) |
| 9716 | V1 = Op0.getOperand(0); |
| 9717 | } |
| 9718 | |
| 9719 | SDValue SourceVec = (j < NumEltsIn64Bits) ? V0 : V1; |
| 9720 | if (SourceVec != Op0.getOperand(0)) |
| 9721 | return false; |
| 9722 | |
| 9723 | |
| 9724 | unsigned ExtIndex0 = Op0.getConstantOperandVal(1); |
| 9725 | unsigned ExtIndex1 = Op1.getConstantOperandVal(1); |
| 9726 | unsigned ExpectedIndex = i * NumEltsIn128Bits + |
| 9727 | (j % NumEltsIn64Bits) * 2; |
| 9728 | if (ExpectedIndex == ExtIndex0 && ExtIndex1 == ExtIndex0 + 1) |
| 9729 | continue; |
| 9730 | |
| 9731 | |
| 9732 | if (GenericOpcode != ISD::ADD && GenericOpcode != ISD::FADD) |
| 9733 | return false; |
| 9734 | |
| 9735 | |
| 9736 | |
| 9737 | if (ExpectedIndex == ExtIndex1 && ExtIndex0 == ExtIndex1 + 1) |
| 9738 | continue; |
| 9739 | |
| 9740 | |
| 9741 | return false; |
| 9742 | } |
| 9743 | } |
| 9744 | |
| 9745 | return true; |
| 9746 | } |
| 9747 | |
| 9748 | static SDValue getHopForBuildVector(const BuildVectorSDNode *BV, |
| 9749 | SelectionDAG &DAG, unsigned HOpcode, |
| 9750 | SDValue V0, SDValue V1) { |
| 9751 | |
| 9752 | |
| 9753 | |
| 9754 | MVT VT = BV->getSimpleValueType(0); |
| 9755 | unsigned Width = VT.getSizeInBits(); |
| 9756 | if (V0.getValueSizeInBits() > Width) |
| 9757 | V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), Width); |
| 9758 | else if (V0.getValueSizeInBits() < Width) |
| 9759 | V0 = insertSubVector(DAG.getUNDEF(VT), V0, 0, DAG, SDLoc(BV), Width); |
| 9760 | |
| 9761 | if (V1.getValueSizeInBits() > Width) |
| 9762 | V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), Width); |
| 9763 | else if (V1.getValueSizeInBits() < Width) |
| 9764 | V1 = insertSubVector(DAG.getUNDEF(VT), V1, 0, DAG, SDLoc(BV), Width); |
| 9765 | |
| 9766 | unsigned NumElts = VT.getVectorNumElements(); |
| 9767 | APInt DemandedElts = APInt::getAllOnesValue(NumElts); |
| 9768 | for (unsigned i = 0; i != NumElts; ++i) |
| 9769 | if (BV->getOperand(i).isUndef()) |
| 9770 | DemandedElts.clearBit(i); |
| 9771 | |
| 9772 | |
| 9773 | unsigned HalfNumElts = NumElts / 2; |
| 9774 | if (VT.is256BitVector() && DemandedElts.lshr(HalfNumElts) == 0) { |
| 9775 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
| 9776 | V0 = extractSubVector(V0, 0, DAG, SDLoc(BV), 128); |
| 9777 | V1 = extractSubVector(V1, 0, DAG, SDLoc(BV), 128); |
| 9778 | SDValue Half = DAG.getNode(HOpcode, SDLoc(BV), HalfVT, V0, V1); |
| 9779 | return insertSubVector(DAG.getUNDEF(VT), Half, 0, DAG, SDLoc(BV), 256); |
| 9780 | } |
| 9781 | |
| 9782 | return DAG.getNode(HOpcode, SDLoc(BV), VT, V0, V1); |
| 9783 | } |
| 9784 | |
| 9785 | |
| 9786 | static SDValue LowerToHorizontalOp(const BuildVectorSDNode *BV, |
| 9787 | const X86Subtarget &Subtarget, |
| 9788 | SelectionDAG &DAG) { |
| 9789 | |
| 9790 | unsigned NumNonUndefs = |
| 9791 | count_if(BV->op_values(), [](SDValue V) { return !V.isUndef(); }); |
| 9792 | if (NumNonUndefs < 2) |
| 9793 | return SDValue(); |
| 9794 | |
| 9795 | |
| 9796 | |
| 9797 | |
| 9798 | MVT VT = BV->getSimpleValueType(0); |
| 9799 | if (((VT == MVT::v4f32 || VT == MVT::v2f64) && Subtarget.hasSSE3()) || |
| 9800 | ((VT == MVT::v8i16 || VT == MVT::v4i32) && Subtarget.hasSSSE3()) || |
| 9801 | ((VT == MVT::v8f32 || VT == MVT::v4f64) && Subtarget.hasAVX()) || |
| 9802 | ((VT == MVT::v16i16 || VT == MVT::v8i32) && Subtarget.hasAVX2())) { |
| 9803 | unsigned HOpcode; |
| 9804 | SDValue V0, V1; |
| 9805 | if (isHopBuildVector(BV, DAG, HOpcode, V0, V1)) |
| 9806 | return getHopForBuildVector(BV, DAG, HOpcode, V0, V1); |
| 9807 | } |
| 9808 | |
| 9809 | |
| 9810 | if (!Subtarget.hasAVX() || !VT.is256BitVector()) |
| 9811 | return SDValue(); |
| 9812 | |
| 9813 | |
| 9814 | unsigned NumElts = VT.getVectorNumElements(); |
| 9815 | unsigned Half = NumElts / 2; |
| 9816 | unsigned NumUndefsLO = 0; |
| 9817 | unsigned NumUndefsHI = 0; |
| 9818 | for (unsigned i = 0, e = Half; i != e; ++i) |
| 9819 | if (BV->getOperand(i)->isUndef()) |
| 9820 | NumUndefsLO++; |
| 9821 | |
| 9822 | for (unsigned i = Half, e = NumElts; i != e; ++i) |
| 9823 | if (BV->getOperand(i)->isUndef()) |
| 9824 | NumUndefsHI++; |
| 9825 | |
| 9826 | SDLoc DL(BV); |
| 9827 | SDValue InVec0, InVec1; |
| 9828 | if (VT == MVT::v8i32 || VT == MVT::v16i16) { |
| 9829 | SDValue InVec2, InVec3; |
| 9830 | unsigned X86Opcode; |
| 9831 | bool CanFold = true; |
| 9832 | |
| 9833 | if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, Half, InVec0, InVec1) && |
| 9834 | isHorizontalBinOpPart(BV, ISD::ADD, DAG, Half, NumElts, InVec2, |
| 9835 | InVec3) && |
| 9836 | ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && |
| 9837 | ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) |
| 9838 | X86Opcode = X86ISD::HADD; |
| 9839 | else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, Half, InVec0, |
| 9840 | InVec1) && |
| 9841 | isHorizontalBinOpPart(BV, ISD::SUB, DAG, Half, NumElts, InVec2, |
| 9842 | InVec3) && |
| 9843 | ((InVec0.isUndef() || InVec2.isUndef()) || InVec0 == InVec2) && |
| 9844 | ((InVec1.isUndef() || InVec3.isUndef()) || InVec1 == InVec3)) |
| 9845 | X86Opcode = X86ISD::HSUB; |
| 9846 | else |
| 9847 | CanFold = false; |
| 9848 | |
| 9849 | if (CanFold) { |
| 9850 | |
| 9851 | |
| 9852 | if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) |
| 9853 | return SDValue(); |
| 9854 | |
| 9855 | |
| 9856 | |
| 9857 | |
| 9858 | SDValue V0 = InVec0.isUndef() ? InVec2 : InVec0; |
| 9859 | SDValue V1 = InVec1.isUndef() ? InVec3 : InVec1; |
| 9860 | assert((!V0.isUndef() || !V1.isUndef()) && "Horizontal-op of undefs?"); |
| 9861 | bool isUndefLO = NumUndefsLO == Half; |
| 9862 | bool isUndefHI = NumUndefsHI == Half; |
| 9863 | return ExpandHorizontalBinOp(V0, V1, DL, DAG, X86Opcode, false, isUndefLO, |
| 9864 | isUndefHI); |
| 9865 | } |
| 9866 | } |
| 9867 | |
| 9868 | if (VT == MVT::v8f32 || VT == MVT::v4f64 || VT == MVT::v8i32 || |
| 9869 | VT == MVT::v16i16) { |
| 9870 | unsigned X86Opcode; |
| 9871 | if (isHorizontalBinOpPart(BV, ISD::ADD, DAG, 0, NumElts, InVec0, InVec1)) |
| 9872 | X86Opcode = X86ISD::HADD; |
| 9873 | else if (isHorizontalBinOpPart(BV, ISD::SUB, DAG, 0, NumElts, InVec0, |
| 9874 | InVec1)) |
| 9875 | X86Opcode = X86ISD::HSUB; |
| 9876 | else if (isHorizontalBinOpPart(BV, ISD::FADD, DAG, 0, NumElts, InVec0, |
| 9877 | InVec1)) |
| 9878 | X86Opcode = X86ISD::FHADD; |
| 9879 | else if (isHorizontalBinOpPart(BV, ISD::FSUB, DAG, 0, NumElts, InVec0, |
| 9880 | InVec1)) |
| 9881 | X86Opcode = X86ISD::FHSUB; |
| 9882 | else |
| 9883 | return SDValue(); |
| 9884 | |
| 9885 | |
| 9886 | |
| 9887 | if (NumUndefsLO + 1 == Half || NumUndefsHI + 1 == Half) |
| 9888 | return SDValue(); |
| 9889 | |
| 9890 | |
| 9891 | |
| 9892 | bool isUndefLO = NumUndefsLO == Half; |
| 9893 | bool isUndefHI = NumUndefsHI == Half; |
| 9894 | return ExpandHorizontalBinOp(InVec0, InVec1, DL, DAG, X86Opcode, true, |
| 9895 | isUndefLO, isUndefHI); |
| 9896 | } |
| 9897 | |
| 9898 | return SDValue(); |
| 9899 | } |
| 9900 | |
| 9901 | static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, |
| 9902 | SelectionDAG &DAG); |
| 9903 | |
| 9904 | |
| 9905 | |
| 9906 | |
| 9907 | |
| 9908 | |
| 9909 | |
| 9910 | static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op, |
| 9911 | const X86Subtarget &Subtarget, |
| 9912 | SelectionDAG &DAG) { |
| 9913 | SDLoc DL(Op); |
| 9914 | MVT VT = Op->getSimpleValueType(0); |
| 9915 | unsigned NumElems = VT.getVectorNumElements(); |
| 9916 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 9917 | |
| 9918 | |
| 9919 | |
| 9920 | unsigned Opcode = Op->getOperand(0).getOpcode(); |
| 9921 | for (unsigned i = 1; i < NumElems; ++i) |
| 9922 | if (Opcode != Op->getOperand(i).getOpcode()) |
| 9923 | return SDValue(); |
| 9924 | |
| 9925 | |
| 9926 | bool IsShift = false; |
| 9927 | switch (Opcode) { |
| 9928 | default: |
| 9929 | return SDValue(); |
| 9930 | case ISD::SHL: |
| 9931 | case ISD::SRL: |
| 9932 | case ISD::SRA: |
| 9933 | IsShift = true; |
| 9934 | break; |
| 9935 | case ISD::AND: |
| 9936 | case ISD::XOR: |
| 9937 | case ISD::OR: |
| 9938 | |
| 9939 | |
| 9940 | if (Op->getSplatValue()) |
| 9941 | return SDValue(); |
| 9942 | if (!TLI.isOperationLegalOrPromote(Opcode, VT)) |
| 9943 | return SDValue(); |
| 9944 | break; |
| 9945 | } |
| 9946 | |
| 9947 | SmallVector<SDValue, 4> LHSElts, RHSElts; |
| 9948 | for (SDValue Elt : Op->ops()) { |
| 9949 | SDValue LHS = Elt.getOperand(0); |
| 9950 | SDValue RHS = Elt.getOperand(1); |
| 9951 | |
| 9952 | |
| 9953 | if (!isa<ConstantSDNode>(RHS)) |
| 9954 | return SDValue(); |
| 9955 | |
| 9956 | |
| 9957 | if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) { |
| 9958 | if (!IsShift) |
| 9959 | return SDValue(); |
| 9960 | RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType()); |
| 9961 | } |
| 9962 | |
| 9963 | LHSElts.push_back(LHS); |
| 9964 | RHSElts.push_back(RHS); |
| 9965 | } |
| 9966 | |
| 9967 | |
| 9968 | |
| 9969 | |
| 9970 | if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; })) |
| 9971 | return SDValue(); |
| 9972 | |
| 9973 | SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts); |
| 9974 | SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts); |
| 9975 | SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); |
| 9976 | |
| 9977 | if (!IsShift) |
| 9978 | return Res; |
| 9979 | |
| 9980 | |
| 9981 | |
| 9982 | return LowerShift(Res, Subtarget, DAG); |
| 9983 | } |
| 9984 | |
| 9985 | |
| 9986 | |
| 9987 | |
| 9988 | static SDValue materializeVectorConstant(SDValue Op, SelectionDAG &DAG, |
| 9989 | const X86Subtarget &Subtarget) { |
| 9990 | SDLoc DL(Op); |
| 9991 | MVT VT = Op.getSimpleValueType(); |
| 9992 | |
| 9993 | |
| 9994 | if (ISD::isBuildVectorAllZeros(Op.getNode())) |
| 9995 | return Op; |
| 9996 | |
| 9997 | |
| 9998 | |
| 9999 | |
| 10000 | if (Subtarget.hasSSE2() && ISD::isBuildVectorAllOnes(Op.getNode())) { |
| 10001 | if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) |
| 10002 | return Op; |
| 10003 | |
| 10004 | return getOnesVector(VT, DAG, DL); |
| 10005 | } |
| 10006 | |
| 10007 | return SDValue(); |
| 10008 | } |
| 10009 | |
| 10010 | |
| 10011 | |
| 10012 | |
| 10013 | static SDValue createVariablePermute(MVT VT, SDValue SrcVec, SDValue IndicesVec, |
| 10014 | SDLoc &DL, SelectionDAG &DAG, |
| 10015 | const X86Subtarget &Subtarget) { |
| 10016 | MVT ShuffleVT = VT; |
| 10017 | EVT IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); |
| 10018 | unsigned NumElts = VT.getVectorNumElements(); |
| 10019 | unsigned SizeInBits = VT.getSizeInBits(); |
| 10020 | |
| 10021 | |
| 10022 | assert(IndicesVec.getValueType().getVectorNumElements() >= NumElts && |
| 10023 | "Illegal variable permute mask size"); |
| 10024 | if (IndicesVec.getValueType().getVectorNumElements() > NumElts) { |
| 10025 | |
| 10026 | if (IndicesVec.getValueSizeInBits() > SizeInBits) |
| 10027 | IndicesVec = extractSubVector(IndicesVec, 0, DAG, SDLoc(IndicesVec), |
| 10028 | NumElts * VT.getScalarSizeInBits()); |
| 10029 | else if (IndicesVec.getValueSizeInBits() < SizeInBits) |
| 10030 | IndicesVec = widenSubVector(IndicesVec, false, Subtarget, DAG, |
| 10031 | SDLoc(IndicesVec), SizeInBits); |
| 10032 | |
| 10033 | if (IndicesVec.getValueType().getVectorNumElements() > NumElts) |
| 10034 | IndicesVec = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(IndicesVec), |
| 10035 | IndicesVT, IndicesVec); |
| 10036 | } |
| 10037 | IndicesVec = DAG.getZExtOrTrunc(IndicesVec, SDLoc(IndicesVec), IndicesVT); |
| 10038 | |
| 10039 | |
| 10040 | if (SrcVec.getValueSizeInBits() != SizeInBits) { |
| 10041 | if ((SrcVec.getValueSizeInBits() % SizeInBits) == 0) { |
| 10042 | |
| 10043 | unsigned Scale = SrcVec.getValueSizeInBits() / SizeInBits; |
| 10044 | VT = MVT::getVectorVT(VT.getScalarType(), Scale * NumElts); |
| 10045 | IndicesVT = EVT(VT).changeVectorElementTypeToInteger(); |
| 10046 | IndicesVec = widenSubVector(IndicesVT.getSimpleVT(), IndicesVec, false, |
| 10047 | Subtarget, DAG, SDLoc(IndicesVec)); |
| 10048 | SDValue NewSrcVec = |
| 10049 | createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); |
| 10050 | if (NewSrcVec) |
| 10051 | return extractSubVector(NewSrcVec, 0, DAG, DL, SizeInBits); |
| 10052 | return SDValue(); |
| 10053 | } else if (SrcVec.getValueSizeInBits() < SizeInBits) { |
| 10054 | |
| 10055 | SrcVec = widenSubVector(VT, SrcVec, false, Subtarget, DAG, SDLoc(SrcVec)); |
| 10056 | } else |
| 10057 | return SDValue(); |
| 10058 | } |
| 10059 | |
| 10060 | auto ScaleIndices = [&DAG](SDValue Idx, uint64_t Scale) { |
| 10061 | assert(isPowerOf2_64(Scale) && "Illegal variable permute shuffle scale"); |
| 10062 | EVT SrcVT = Idx.getValueType(); |
| 10063 | unsigned NumDstBits = SrcVT.getScalarSizeInBits() / Scale; |
| 10064 | uint64_t IndexScale = 0; |
| 10065 | uint64_t IndexOffset = 0; |
| 10066 | |
| 10067 | |
| 10068 | |
| 10069 | |
| 10070 | |
| 10071 | |
| 10072 | for (uint64_t i = 0; i != Scale; ++i) { |
| 10073 | IndexScale |= Scale << (i * NumDstBits); |
| 10074 | IndexOffset |= i << (i * NumDstBits); |
| 10075 | } |
| 10076 | |
| 10077 | Idx = DAG.getNode(ISD::MUL, SDLoc(Idx), SrcVT, Idx, |
| 10078 | DAG.getConstant(IndexScale, SDLoc(Idx), SrcVT)); |
| 10079 | Idx = DAG.getNode(ISD::ADD, SDLoc(Idx), SrcVT, Idx, |
| 10080 | DAG.getConstant(IndexOffset, SDLoc(Idx), SrcVT)); |
| 10081 | return Idx; |
| 10082 | }; |
| 10083 | |
| 10084 | unsigned Opcode = 0; |
| 10085 | switch (VT.SimpleTy) { |
| 10086 | default: |
| 10087 | break; |
| 10088 | case MVT::v16i8: |
| 10089 | if (Subtarget.hasSSSE3()) |
| 10090 | Opcode = X86ISD::PSHUFB; |
| 10091 | break; |
| 10092 | case MVT::v8i16: |
| 10093 | if (Subtarget.hasVLX() && Subtarget.hasBWI()) |
| 10094 | Opcode = X86ISD::VPERMV; |
| 10095 | else if (Subtarget.hasSSSE3()) { |
| 10096 | Opcode = X86ISD::PSHUFB; |
| 10097 | ShuffleVT = MVT::v16i8; |
| 10098 | } |
| 10099 | break; |
| 10100 | case MVT::v4f32: |
| 10101 | case MVT::v4i32: |
| 10102 | if (Subtarget.hasAVX()) { |
| 10103 | Opcode = X86ISD::VPERMILPV; |
| 10104 | ShuffleVT = MVT::v4f32; |
| 10105 | } else if (Subtarget.hasSSSE3()) { |
| 10106 | Opcode = X86ISD::PSHUFB; |
| 10107 | ShuffleVT = MVT::v16i8; |
| 10108 | } |
| 10109 | break; |
| 10110 | case MVT::v2f64: |
| 10111 | case MVT::v2i64: |
| 10112 | if (Subtarget.hasAVX()) { |
| 10113 | |
| 10114 | IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); |
| 10115 | Opcode = X86ISD::VPERMILPV; |
| 10116 | ShuffleVT = MVT::v2f64; |
| 10117 | } else if (Subtarget.hasSSE41()) { |
| 10118 | |
| 10119 | return DAG.getSelectCC( |
| 10120 | DL, IndicesVec, |
| 10121 | getZeroVector(IndicesVT.getSimpleVT(), Subtarget, DAG, DL), |
| 10122 | DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {0, 0}), |
| 10123 | DAG.getVectorShuffle(VT, DL, SrcVec, SrcVec, {1, 1}), |
| 10124 | ISD::CondCode::SETEQ); |
| 10125 | } |
| 10126 | break; |
| 10127 | case MVT::v32i8: |
| 10128 | if (Subtarget.hasVLX() && Subtarget.hasVBMI()) |
| 10129 | Opcode = X86ISD::VPERMV; |
| 10130 | else if (Subtarget.hasXOP()) { |
| 10131 | SDValue LoSrc = extract128BitVector(SrcVec, 0, DAG, DL); |
| 10132 | SDValue HiSrc = extract128BitVector(SrcVec, 16, DAG, DL); |
| 10133 | SDValue LoIdx = extract128BitVector(IndicesVec, 0, DAG, DL); |
| 10134 | SDValue HiIdx = extract128BitVector(IndicesVec, 16, DAG, DL); |
| 10135 | return DAG.getNode( |
| 10136 | ISD::CONCAT_VECTORS, DL, VT, |
| 10137 | DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, LoIdx), |
| 10138 | DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, LoSrc, HiSrc, HiIdx)); |
| 10139 | } else if (Subtarget.hasAVX()) { |
| 10140 | SDValue Lo = extract128BitVector(SrcVec, 0, DAG, DL); |
| 10141 | SDValue Hi = extract128BitVector(SrcVec, 16, DAG, DL); |
| 10142 | SDValue LoLo = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Lo); |
| 10143 | SDValue HiHi = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Hi, Hi); |
| 10144 | auto PSHUFBBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 10145 | ArrayRef<SDValue> Ops) { |
| 10146 | |
| 10147 | |
| 10148 | |
| 10149 | SDValue Idx = Ops[2]; |
| 10150 | EVT VT = Idx.getValueType(); |
| 10151 | return DAG.getSelectCC(DL, Idx, DAG.getConstant(15, DL, VT), |
| 10152 | DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[1], Idx), |
| 10153 | DAG.getNode(X86ISD::PSHUFB, DL, VT, Ops[0], Idx), |
| 10154 | ISD::CondCode::SETGT); |
| 10155 | }; |
| 10156 | SDValue Ops[] = {LoLo, HiHi, IndicesVec}; |
| 10157 | return SplitOpsAndApply(DAG, Subtarget, DL, MVT::v32i8, Ops, |
| 10158 | PSHUFBBuilder); |
| 10159 | } |
| 10160 | break; |
| 10161 | case MVT::v16i16: |
| 10162 | if (Subtarget.hasVLX() && Subtarget.hasBWI()) |
| 10163 | Opcode = X86ISD::VPERMV; |
| 10164 | else if (Subtarget.hasAVX()) { |
| 10165 | |
| 10166 | IndicesVec = ScaleIndices(IndicesVec, 2); |
| 10167 | return DAG.getBitcast( |
| 10168 | VT, createVariablePermute( |
| 10169 | MVT::v32i8, DAG.getBitcast(MVT::v32i8, SrcVec), |
| 10170 | DAG.getBitcast(MVT::v32i8, IndicesVec), DL, DAG, Subtarget)); |
| 10171 | } |
| 10172 | break; |
| 10173 | case MVT::v8f32: |
| 10174 | case MVT::v8i32: |
| 10175 | if (Subtarget.hasAVX2()) |
| 10176 | Opcode = X86ISD::VPERMV; |
| 10177 | else if (Subtarget.hasAVX()) { |
| 10178 | SrcVec = DAG.getBitcast(MVT::v8f32, SrcVec); |
| 10179 | SDValue LoLo = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, |
| 10180 | {0, 1, 2, 3, 0, 1, 2, 3}); |
| 10181 | SDValue HiHi = DAG.getVectorShuffle(MVT::v8f32, DL, SrcVec, SrcVec, |
| 10182 | {4, 5, 6, 7, 4, 5, 6, 7}); |
| 10183 | if (Subtarget.hasXOP()) |
| 10184 | return DAG.getBitcast( |
| 10185 | VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v8f32, LoLo, HiHi, |
| 10186 | IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); |
| 10187 | |
| 10188 | |
| 10189 | SDValue Res = DAG.getSelectCC( |
| 10190 | DL, IndicesVec, DAG.getConstant(3, DL, MVT::v8i32), |
| 10191 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, HiHi, IndicesVec), |
| 10192 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, LoLo, IndicesVec), |
| 10193 | ISD::CondCode::SETGT); |
| 10194 | return DAG.getBitcast(VT, Res); |
| 10195 | } |
| 10196 | break; |
| 10197 | case MVT::v4i64: |
| 10198 | case MVT::v4f64: |
| 10199 | if (Subtarget.hasAVX512()) { |
| 10200 | if (!Subtarget.hasVLX()) { |
| 10201 | MVT WidenSrcVT = MVT::getVectorVT(VT.getScalarType(), 8); |
| 10202 | SrcVec = widenSubVector(WidenSrcVT, SrcVec, false, Subtarget, DAG, |
| 10203 | SDLoc(SrcVec)); |
| 10204 | IndicesVec = widenSubVector(MVT::v8i64, IndicesVec, false, Subtarget, |
| 10205 | DAG, SDLoc(IndicesVec)); |
| 10206 | SDValue Res = createVariablePermute(WidenSrcVT, SrcVec, IndicesVec, DL, |
| 10207 | DAG, Subtarget); |
| 10208 | return extract256BitVector(Res, 0, DAG, DL); |
| 10209 | } |
| 10210 | Opcode = X86ISD::VPERMV; |
| 10211 | } else if (Subtarget.hasAVX()) { |
| 10212 | SrcVec = DAG.getBitcast(MVT::v4f64, SrcVec); |
| 10213 | SDValue LoLo = |
| 10214 | DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {0, 1, 0, 1}); |
| 10215 | SDValue HiHi = |
| 10216 | DAG.getVectorShuffle(MVT::v4f64, DL, SrcVec, SrcVec, {2, 3, 2, 3}); |
| 10217 | |
| 10218 | IndicesVec = DAG.getNode(ISD::ADD, DL, IndicesVT, IndicesVec, IndicesVec); |
| 10219 | if (Subtarget.hasXOP()) |
| 10220 | return DAG.getBitcast( |
| 10221 | VT, DAG.getNode(X86ISD::VPERMIL2, DL, MVT::v4f64, LoLo, HiHi, |
| 10222 | IndicesVec, DAG.getTargetConstant(0, DL, MVT::i8))); |
| 10223 | |
| 10224 | |
| 10225 | SDValue Res = DAG.getSelectCC( |
| 10226 | DL, IndicesVec, DAG.getConstant(2, DL, MVT::v4i64), |
| 10227 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, HiHi, IndicesVec), |
| 10228 | DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v4f64, LoLo, IndicesVec), |
| 10229 | ISD::CondCode::SETGT); |
| 10230 | return DAG.getBitcast(VT, Res); |
| 10231 | } |
| 10232 | break; |
| 10233 | case MVT::v64i8: |
| 10234 | if (Subtarget.hasVBMI()) |
| 10235 | Opcode = X86ISD::VPERMV; |
| 10236 | break; |
| 10237 | case MVT::v32i16: |
| 10238 | if (Subtarget.hasBWI()) |
| 10239 | Opcode = X86ISD::VPERMV; |
| 10240 | break; |
| 10241 | case MVT::v16f32: |
| 10242 | case MVT::v16i32: |
| 10243 | case MVT::v8f64: |
| 10244 | case MVT::v8i64: |
| 10245 | if (Subtarget.hasAVX512()) |
| 10246 | Opcode = X86ISD::VPERMV; |
| 10247 | break; |
| 10248 | } |
| 10249 | if (!Opcode) |
| 10250 | return SDValue(); |
| 10251 | |
| 10252 | assert((VT.getSizeInBits() == ShuffleVT.getSizeInBits()) && |
| 10253 | (VT.getScalarSizeInBits() % ShuffleVT.getScalarSizeInBits()) == 0 && |
| 10254 | "Illegal variable permute shuffle type"); |
| 10255 | |
| 10256 | uint64_t Scale = VT.getScalarSizeInBits() / ShuffleVT.getScalarSizeInBits(); |
| 10257 | if (Scale > 1) |
| 10258 | IndicesVec = ScaleIndices(IndicesVec, Scale); |
| 10259 | |
| 10260 | EVT ShuffleIdxVT = EVT(ShuffleVT).changeVectorElementTypeToInteger(); |
| 10261 | IndicesVec = DAG.getBitcast(ShuffleIdxVT, IndicesVec); |
| 10262 | |
| 10263 | SrcVec = DAG.getBitcast(ShuffleVT, SrcVec); |
| 10264 | SDValue Res = Opcode == X86ISD::VPERMV |
| 10265 | ? DAG.getNode(Opcode, DL, ShuffleVT, IndicesVec, SrcVec) |
| 10266 | : DAG.getNode(Opcode, DL, ShuffleVT, SrcVec, IndicesVec); |
| 10267 | return DAG.getBitcast(VT, Res); |
| 10268 | } |
| 10269 | |
| 10270 | |
| 10271 | |
| 10272 | |
| 10273 | |
| 10274 | |
| 10275 | |
| 10276 | |
| 10277 | |
| 10278 | |
| 10279 | |
| 10280 | |
| 10281 | static SDValue |
| 10282 | LowerBUILD_VECTORAsVariablePermute(SDValue V, SelectionDAG &DAG, |
| 10283 | const X86Subtarget &Subtarget) { |
| 10284 | SDValue SrcVec, IndicesVec; |
| 10285 | |
| 10286 | |
| 10287 | |
| 10288 | for (unsigned Idx = 0, E = V.getNumOperands(); Idx != E; ++Idx) { |
| 10289 | SDValue Op = V.getOperand(Idx); |
| 10290 | if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
| 10291 | return SDValue(); |
| 10292 | |
| 10293 | |
| 10294 | |
| 10295 | |
| 10296 | if (!SrcVec) |
| 10297 | SrcVec = Op.getOperand(0); |
| 10298 | else if (SrcVec != Op.getOperand(0)) |
| 10299 | return SDValue(); |
| 10300 | SDValue ExtractedIndex = Op->getOperand(1); |
| 10301 | |
| 10302 | if (ExtractedIndex.getOpcode() == ISD::ZERO_EXTEND || |
| 10303 | ExtractedIndex.getOpcode() == ISD::SIGN_EXTEND) |
| 10304 | ExtractedIndex = ExtractedIndex.getOperand(0); |
| 10305 | if (ExtractedIndex.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
| 10306 | return SDValue(); |
| 10307 | |
| 10308 | |
| 10309 | |
| 10310 | |
| 10311 | if (!IndicesVec) |
| 10312 | IndicesVec = ExtractedIndex.getOperand(0); |
| 10313 | else if (IndicesVec != ExtractedIndex.getOperand(0)) |
| 10314 | return SDValue(); |
| 10315 | |
| 10316 | auto *PermIdx = dyn_cast<ConstantSDNode>(ExtractedIndex.getOperand(1)); |
| 10317 | if (!PermIdx || PermIdx->getAPIntValue() != Idx) |
| 10318 | return SDValue(); |
| 10319 | } |
| 10320 | |
| 10321 | SDLoc DL(V); |
| 10322 | MVT VT = V.getSimpleValueType(); |
| 10323 | return createVariablePermute(VT, SrcVec, IndicesVec, DL, DAG, Subtarget); |
| 10324 | } |
| 10325 | |
| 10326 | SDValue |
| 10327 | X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { |
| 10328 | SDLoc dl(Op); |
| 10329 | |
| 10330 | MVT VT = Op.getSimpleValueType(); |
| 10331 | MVT EltVT = VT.getVectorElementType(); |
| 10332 | unsigned NumElems = Op.getNumOperands(); |
| 10333 | |
| 10334 | |
| 10335 | if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) |
| 10336 | return LowerBUILD_VECTORvXi1(Op, DAG, Subtarget); |
| 10337 | |
| 10338 | if (SDValue VectorConstant = materializeVectorConstant(Op, DAG, Subtarget)) |
| 10339 | return VectorConstant; |
| 10340 | |
| 10341 | unsigned EVTBits = EltVT.getSizeInBits(); |
| 10342 | APInt UndefMask = APInt::getNullValue(NumElems); |
| 10343 | APInt ZeroMask = APInt::getNullValue(NumElems); |
| 10344 | APInt NonZeroMask = APInt::getNullValue(NumElems); |
| 10345 | bool IsAllConstants = true; |
| 10346 | SmallSet<SDValue, 8> Values; |
| 10347 | unsigned NumConstants = NumElems; |
| 10348 | for (unsigned i = 0; i < NumElems; ++i) { |
| 10349 | SDValue Elt = Op.getOperand(i); |
| 10350 | if (Elt.isUndef()) { |
| 10351 | UndefMask.setBit(i); |
| 10352 | continue; |
| 10353 | } |
| 10354 | Values.insert(Elt); |
| 10355 | if (!isa<ConstantSDNode>(Elt) && !isa<ConstantFPSDNode>(Elt)) { |
| 10356 | IsAllConstants = false; |
| 10357 | NumConstants--; |
| 10358 | } |
| 10359 | if (X86::isZeroNode(Elt)) { |
| 10360 | ZeroMask.setBit(i); |
| 10361 | } else { |
| 10362 | NonZeroMask.setBit(i); |
| 10363 | } |
| 10364 | } |
| 10365 | |
| 10366 | |
| 10367 | if (NonZeroMask == 0) { |
| 10368 | assert(UndefMask.isAllOnesValue() && "Fully undef mask expected"); |
| 10369 | return DAG.getUNDEF(VT); |
| 10370 | } |
| 10371 | |
| 10372 | BuildVectorSDNode *BV = cast<BuildVectorSDNode>(Op.getNode()); |
| 10373 | |
| 10374 | |
| 10375 | |
| 10376 | if ((VT.is256BitVector() || VT.is512BitVector()) && |
| 10377 | !isFoldableUseOfShuffle(BV)) { |
| 10378 | unsigned UpperElems = NumElems / 2; |
| 10379 | APInt UndefOrZeroMask = UndefMask | ZeroMask; |
| 10380 | unsigned NumUpperUndefsOrZeros = UndefOrZeroMask.countLeadingOnes(); |
| 10381 | if (NumUpperUndefsOrZeros >= UpperElems) { |
| 10382 | if (VT.is512BitVector() && |
| 10383 | NumUpperUndefsOrZeros >= (NumElems - (NumElems / 4))) |
| 10384 | UpperElems = NumElems - (NumElems / 4); |
| 10385 | bool UndefUpper = UndefMask.countLeadingOnes() >= UpperElems; |
| 10386 | MVT LowerVT = MVT::getVectorVT(EltVT, NumElems - UpperElems); |
| 10387 | SDValue NewBV = |
| 10388 | DAG.getBuildVector(LowerVT, dl, Op->ops().drop_back(UpperElems)); |
| 10389 | return widenSubVector(VT, NewBV, !UndefUpper, Subtarget, DAG, dl); |
| 10390 | } |
| 10391 | } |
| 10392 | |
| 10393 | if (SDValue AddSub = lowerToAddSubOrFMAddSub(BV, Subtarget, DAG)) |
| 10394 | return AddSub; |
| 10395 | if (SDValue HorizontalOp = LowerToHorizontalOp(BV, Subtarget, DAG)) |
| 10396 | return HorizontalOp; |
| 10397 | if (SDValue Broadcast = lowerBuildVectorAsBroadcast(BV, Subtarget, DAG)) |
| 10398 | return Broadcast; |
| 10399 | if (SDValue BitOp = lowerBuildVectorToBitOp(BV, Subtarget, DAG)) |
| 10400 | return BitOp; |
| 10401 | |
| 10402 | unsigned NumZero = ZeroMask.countPopulation(); |
| 10403 | unsigned NumNonZero = NonZeroMask.countPopulation(); |
| 10404 | |
| 10405 | |
| 10406 | |
| 10407 | |
| 10408 | |
| 10409 | |
| 10410 | |
| 10411 | if (NumConstants == NumElems - 1 && NumNonZero != 1 && |
| 10412 | (isOperationLegalOrCustom(ISD::INSERT_VECTOR_ELT, VT) || |
| 10413 | isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT))) { |
| 10414 | |
| 10415 | |
| 10416 | |
| 10417 | LLVMContext &Context = *DAG.getContext(); |
| 10418 | Type *EltType = Op.getValueType().getScalarType().getTypeForEVT(Context); |
| 10419 | SmallVector<Constant *, 16> ConstVecOps(NumElems, UndefValue::get(EltType)); |
| 10420 | SDValue VarElt; |
| 10421 | SDValue InsIndex; |
| 10422 | for (unsigned i = 0; i != NumElems; ++i) { |
| 10423 | SDValue Elt = Op.getOperand(i); |
| 10424 | if (auto *C = dyn_cast<ConstantSDNode>(Elt)) |
| 10425 | ConstVecOps[i] = ConstantInt::get(Context, C->getAPIntValue()); |
| 10426 | else if (auto *C = dyn_cast<ConstantFPSDNode>(Elt)) |
| 10427 | ConstVecOps[i] = ConstantFP::get(Context, C->getValueAPF()); |
| 10428 | else if (!Elt.isUndef()) { |
| 10429 | assert(!VarElt.getNode() && !InsIndex.getNode() && |
| 10430 | "Expected one variable element in this vector"); |
| 10431 | VarElt = Elt; |
| 10432 | InsIndex = DAG.getVectorIdxConstant(i, dl); |
| 10433 | } |
| 10434 | } |
| 10435 | Constant *CV = ConstantVector::get(ConstVecOps); |
| 10436 | SDValue DAGConstVec = DAG.getConstantPool(CV, VT); |
| 10437 | |
| 10438 | |
| 10439 | |
| 10440 | |
| 10441 | |
| 10442 | |
| 10443 | |
| 10444 | SDValue LegalDAGConstVec = LowerConstantPool(DAGConstVec, DAG); |
| 10445 | MachineFunction &MF = DAG.getMachineFunction(); |
| 10446 | MachinePointerInfo MPI = MachinePointerInfo::getConstantPool(MF); |
| 10447 | SDValue Ld = DAG.getLoad(VT, dl, DAG.getEntryNode(), LegalDAGConstVec, MPI); |
| 10448 | unsigned InsertC = cast<ConstantSDNode>(InsIndex)->getZExtValue(); |
| 10449 | unsigned NumEltsInLow128Bits = 128 / VT.getScalarSizeInBits(); |
| 10450 | if (InsertC < NumEltsInLow128Bits) |
| 10451 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Ld, VarElt, InsIndex); |
| 10452 | |
| 10453 | |
| 10454 | |
| 10455 | assert(VT.getSizeInBits() > 128 && "Invalid insertion index?"); |
| 10456 | assert(Subtarget.hasAVX() && "Must have AVX with >16-byte vector"); |
| 10457 | SmallVector<int, 8> ShuffleMask; |
| 10458 | unsigned NumElts = VT.getVectorNumElements(); |
| 10459 | for (unsigned i = 0; i != NumElts; ++i) |
| 10460 | ShuffleMask.push_back(i == InsertC ? NumElts : i); |
| 10461 | SDValue S2V = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, VarElt); |
| 10462 | return DAG.getVectorShuffle(VT, dl, Ld, S2V, ShuffleMask); |
| 10463 | } |
| 10464 | |
| 10465 | |
| 10466 | if (NumNonZero == 1) { |
| 10467 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
| 10468 | SDValue Item = Op.getOperand(Idx); |
| 10469 | |
| 10470 | |
| 10471 | |
| 10472 | |
| 10473 | |
| 10474 | if (Idx == 0) { |
| 10475 | if (NumZero == 0) |
| 10476 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
| 10477 | |
| 10478 | if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || |
| 10479 | (EltVT == MVT::i64 && Subtarget.is64Bit())) { |
| 10480 | assert((VT.is128BitVector() || VT.is256BitVector() || |
| 10481 | VT.is512BitVector()) && |
| 10482 | "Expected an SSE value type!"); |
| 10483 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
| 10484 | |
| 10485 | return getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); |
| 10486 | } |
| 10487 | |
| 10488 | |
| 10489 | |
| 10490 | if (EltVT == MVT::i16 || EltVT == MVT::i8) { |
| 10491 | Item = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, Item); |
| 10492 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
| 10493 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, Item); |
| 10494 | Item = getShuffleVectorZeroOrUndef(Item, 0, true, Subtarget, DAG); |
| 10495 | return DAG.getBitcast(VT, Item); |
| 10496 | } |
| 10497 | } |
| 10498 | |
| 10499 | |
| 10500 | if (NumElems == 2 && Idx == 1 && |
| 10501 | X86::isZeroNode(Op.getOperand(0)) && |
| 10502 | !X86::isZeroNode(Op.getOperand(1))) { |
| 10503 | unsigned NumBits = VT.getSizeInBits(); |
| 10504 | return getVShift(true, VT, |
| 10505 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, |
| 10506 | VT, Op.getOperand(1)), |
| 10507 | NumBits/2, DAG, *this, dl); |
| 10508 | } |
| 10509 | |
| 10510 | if (IsAllConstants) |
| 10511 | return SDValue(); |
| 10512 | |
| 10513 | |
| 10514 | |
| 10515 | |
| 10516 | |
| 10517 | |
| 10518 | if (EVTBits == 32) { |
| 10519 | Item = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Item); |
| 10520 | return getShuffleVectorZeroOrUndef(Item, Idx, NumZero > 0, Subtarget, DAG); |
| 10521 | } |
| 10522 | } |
| 10523 | |
| 10524 | |
| 10525 | if (Values.size() == 1) { |
| 10526 | if (EVTBits == 32) { |
| 10527 | |
| 10528 | |
| 10529 | |
| 10530 | |
| 10531 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
| 10532 | SDValue Item = Op.getOperand(Idx); |
| 10533 | if (Op.getNode()->isOnlyUserOf(Item.getNode())) |
| 10534 | return LowerAsSplatVectorLoad(Item, VT, dl, DAG); |
| 10535 | } |
| 10536 | return SDValue(); |
| 10537 | } |
| 10538 | |
| 10539 | |
| 10540 | |
| 10541 | if (IsAllConstants) |
| 10542 | return SDValue(); |
| 10543 | |
| 10544 | if (SDValue V = LowerBUILD_VECTORAsVariablePermute(Op, DAG, Subtarget)) |
| 10545 | return V; |
| 10546 | |
| 10547 | |
| 10548 | { |
| 10549 | SmallVector<SDValue, 64> Ops(Op->op_begin(), Op->op_begin() + NumElems); |
| 10550 | if (SDValue LD = |
| 10551 | EltsFromConsecutiveLoads(VT, Ops, dl, DAG, Subtarget, false)) |
| 10552 | return LD; |
| 10553 | } |
| 10554 | |
| 10555 | |
| 10556 | |
| 10557 | |
| 10558 | if (Subtarget.hasAVX2() && EVTBits == 32 && Values.size() == 2) { |
| 10559 | SDValue Ops[4] = { Op.getOperand(0), Op.getOperand(1), |
| 10560 | DAG.getUNDEF(EltVT), DAG.getUNDEF(EltVT) }; |
| 10561 | auto CanSplat = [](SDValue Op, unsigned NumElems, ArrayRef<SDValue> Ops) { |
| 10562 | |
| 10563 | for (unsigned i = 2; i != NumElems; ++i) |
| 10564 | if (Ops[i % 2] != Op.getOperand(i)) |
| 10565 | return false; |
| 10566 | return true; |
| 10567 | }; |
| 10568 | if (CanSplat(Op, NumElems, Ops)) { |
| 10569 | MVT WideEltVT = VT.isFloatingPoint() ? MVT::f64 : MVT::i64; |
| 10570 | MVT NarrowVT = MVT::getVectorVT(EltVT, 4); |
| 10571 | |
| 10572 | SDValue NewBV = DAG.getBitcast(MVT::getVectorVT(WideEltVT, 2), |
| 10573 | DAG.getBuildVector(NarrowVT, dl, Ops)); |
| 10574 | |
| 10575 | MVT BcastVT = MVT::getVectorVT(WideEltVT, NumElems / 2); |
| 10576 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, dl, BcastVT, |
| 10577 | NewBV)); |
| 10578 | } |
| 10579 | } |
| 10580 | |
| 10581 | |
| 10582 | |
| 10583 | if (VT.getSizeInBits() > 128) { |
| 10584 | MVT HVT = MVT::getVectorVT(EltVT, NumElems / 2); |
| 10585 | |
| 10586 | |
| 10587 | SDValue Lower = |
| 10588 | DAG.getBuildVector(HVT, dl, Op->ops().slice(0, NumElems / 2)); |
| 10589 | SDValue Upper = DAG.getBuildVector( |
| 10590 | HVT, dl, Op->ops().slice(NumElems / 2, NumElems /2)); |
| 10591 | |
| 10592 | |
| 10593 | return concatSubVectors(Lower, Upper, DAG, dl); |
| 10594 | } |
| 10595 | |
| 10596 | |
| 10597 | if (EVTBits == 64) { |
| 10598 | if (NumNonZero == 1) { |
| 10599 | |
| 10600 | unsigned Idx = NonZeroMask.countTrailingZeros(); |
| 10601 | SDValue V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, |
| 10602 | Op.getOperand(Idx)); |
| 10603 | return getShuffleVectorZeroOrUndef(V2, Idx, true, Subtarget, DAG); |
| 10604 | } |
| 10605 | return SDValue(); |
| 10606 | } |
| 10607 | |
| 10608 | |
| 10609 | if (EVTBits == 8 && NumElems == 16) |
| 10610 | if (SDValue V = LowerBuildVectorv16i8(Op, NonZeroMask, NumNonZero, NumZero, |
| 10611 | DAG, Subtarget)) |
| 10612 | return V; |
| 10613 | |
| 10614 | if (EVTBits == 16 && NumElems == 8) |
| 10615 | if (SDValue V = LowerBuildVectorv8i16(Op, NonZeroMask, NumNonZero, NumZero, |
| 10616 | DAG, Subtarget)) |
| 10617 | return V; |
| 10618 | |
| 10619 | |
| 10620 | if (EVTBits == 32 && NumElems == 4) |
| 10621 | if (SDValue V = LowerBuildVectorv4x32(Op, DAG, Subtarget)) |
| 10622 | return V; |
| 10623 | |
| 10624 | |
| 10625 | if (NumElems == 4 && NumZero > 0) { |
| 10626 | SmallVector<SDValue, 8> Ops(NumElems); |
| 10627 | for (unsigned i = 0; i < 4; ++i) { |
| 10628 | bool isZero = !NonZeroMask[i]; |
| 10629 | if (isZero) |
| 10630 | Ops[i] = getZeroVector(VT, Subtarget, DAG, dl); |
| 10631 | else |
| 10632 | Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); |
| 10633 | } |
| 10634 | |
| 10635 | for (unsigned i = 0; i < 2; ++i) { |
| 10636 | switch (NonZeroMask.extractBitsAsZExtValue(2, i * 2)) { |
| 10637 | default: llvm_unreachable("Unexpected NonZero count"); |
| 10638 | case 0: |
| 10639 | Ops[i] = Ops[i*2]; |
| 10640 | break; |
| 10641 | case 1: |
| 10642 | Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2+1], Ops[i*2]); |
| 10643 | break; |
| 10644 | case 2: |
| 10645 | Ops[i] = getMOVL(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); |
| 10646 | break; |
| 10647 | case 3: |
| 10648 | Ops[i] = getUnpackl(DAG, dl, VT, Ops[i*2], Ops[i*2+1]); |
| 10649 | break; |
| 10650 | } |
| 10651 | } |
| 10652 | |
| 10653 | bool Reverse1 = NonZeroMask.extractBitsAsZExtValue(2, 0) == 2; |
| 10654 | bool Reverse2 = NonZeroMask.extractBitsAsZExtValue(2, 2) == 2; |
| 10655 | int MaskVec[] = { |
| 10656 | Reverse1 ? 1 : 0, |
| 10657 | Reverse1 ? 0 : 1, |
| 10658 | static_cast<int>(Reverse2 ? NumElems+1 : NumElems), |
| 10659 | static_cast<int>(Reverse2 ? NumElems : NumElems+1) |
| 10660 | }; |
| 10661 | return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], MaskVec); |
| 10662 | } |
| 10663 | |
| 10664 | assert(Values.size() > 1 && "Expected non-undef and non-splat vector"); |
| 10665 | |
| 10666 | |
| 10667 | if (SDValue Sh = buildFromShuffleMostly(Op, DAG)) |
| 10668 | return Sh; |
| 10669 | |
| 10670 | |
| 10671 | if (Subtarget.hasSSE41()) { |
| 10672 | SDValue Result; |
| 10673 | if (!Op.getOperand(0).isUndef()) |
| 10674 | Result = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(0)); |
| 10675 | else |
| 10676 | Result = DAG.getUNDEF(VT); |
| 10677 | |
| 10678 | for (unsigned i = 1; i < NumElems; ++i) { |
| 10679 | if (Op.getOperand(i).isUndef()) continue; |
| 10680 | Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Result, |
| 10681 | Op.getOperand(i), DAG.getIntPtrConstant(i, dl)); |
| 10682 | } |
| 10683 | return Result; |
| 10684 | } |
| 10685 | |
| 10686 | |
| 10687 | |
| 10688 | |
| 10689 | SmallVector<SDValue, 8> Ops(NumElems); |
| 10690 | for (unsigned i = 0; i < NumElems; ++i) { |
| 10691 | if (!Op.getOperand(i).isUndef()) |
| 10692 | Ops[i] = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Op.getOperand(i)); |
| 10693 | else |
| 10694 | Ops[i] = DAG.getUNDEF(VT); |
| 10695 | } |
| 10696 | |
| 10697 | |
| 10698 | |
| 10699 | |
| 10700 | |
| 10701 | for (unsigned Scale = 1; Scale < NumElems; Scale *= 2) { |
| 10702 | |
| 10703 | SmallVector<int, 16> Mask; |
| 10704 | for(unsigned i = 0; i != Scale; ++i) |
| 10705 | Mask.push_back(i); |
| 10706 | for (unsigned i = 0; i != Scale; ++i) |
| 10707 | Mask.push_back(NumElems+i); |
| 10708 | Mask.append(NumElems - Mask.size(), SM_SentinelUndef); |
| 10709 | |
| 10710 | for (unsigned i = 0, e = NumElems / (2 * Scale); i != e; ++i) |
| 10711 | Ops[i] = DAG.getVectorShuffle(VT, dl, Ops[2*i], Ops[(2*i)+1], Mask); |
| 10712 | } |
| 10713 | return Ops[0]; |
| 10714 | } |
| 10715 | |
| 10716 | |
| 10717 | |
| 10718 | |
| 10719 | static SDValue LowerAVXCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, |
| 10720 | const X86Subtarget &Subtarget) { |
| 10721 | SDLoc dl(Op); |
| 10722 | MVT ResVT = Op.getSimpleValueType(); |
| 10723 | |
| 10724 | assert((ResVT.is256BitVector() || |
| 10725 | ResVT.is512BitVector()) && "Value type must be 256-/512-bit wide"); |
| 10726 | |
| 10727 | unsigned NumOperands = Op.getNumOperands(); |
| 10728 | unsigned NumZero = 0; |
| 10729 | unsigned NumNonZero = 0; |
| 10730 | unsigned NonZeros = 0; |
| 10731 | for (unsigned i = 0; i != NumOperands; ++i) { |
| 10732 | SDValue SubVec = Op.getOperand(i); |
| 10733 | if (SubVec.isUndef()) |
| 10734 | continue; |
| 10735 | if (ISD::isBuildVectorAllZeros(SubVec.getNode())) |
| 10736 | ++NumZero; |
| 10737 | else { |
| 10738 | assert(i < sizeof(NonZeros) * CHAR_BIT); |
| 10739 | NonZeros |= 1 << i; |
| 10740 | ++NumNonZero; |
| 10741 | } |
| 10742 | } |
| 10743 | |
| 10744 | |
| 10745 | if (NumNonZero > 2) { |
| 10746 | MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); |
| 10747 | ArrayRef<SDUse> Ops = Op->ops(); |
| 10748 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
| 10749 | Ops.slice(0, NumOperands/2)); |
| 10750 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
| 10751 | Ops.slice(NumOperands/2)); |
| 10752 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); |
| 10753 | } |
| 10754 | |
| 10755 | |
| 10756 | SDValue Vec = NumZero ? getZeroVector(ResVT, Subtarget, DAG, dl) |
| 10757 | : DAG.getUNDEF(ResVT); |
| 10758 | |
| 10759 | MVT SubVT = Op.getOperand(0).getSimpleValueType(); |
| 10760 | unsigned NumSubElems = SubVT.getVectorNumElements(); |
| 10761 | for (unsigned i = 0; i != NumOperands; ++i) { |
| 10762 | if ((NonZeros & (1 << i)) == 0) |
| 10763 | continue; |
| 10764 | |
| 10765 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, |
| 10766 | Op.getOperand(i), |
| 10767 | DAG.getIntPtrConstant(i * NumSubElems, dl)); |
| 10768 | } |
| 10769 | |
| 10770 | return Vec; |
| 10771 | } |
| 10772 | |
| 10773 | |
| 10774 | |
| 10775 | |
| 10776 | |
| 10777 | static SDValue LowerCONCAT_VECTORSvXi1(SDValue Op, |
| 10778 | const X86Subtarget &Subtarget, |
| 10779 | SelectionDAG & DAG) { |
| 10780 | SDLoc dl(Op); |
| 10781 | MVT ResVT = Op.getSimpleValueType(); |
| 10782 | unsigned NumOperands = Op.getNumOperands(); |
| 10783 | |
| 10784 | assert(NumOperands > 1 && isPowerOf2_32(NumOperands) && |
| 10785 | "Unexpected number of operands in CONCAT_VECTORS"); |
| 10786 | |
| 10787 | uint64_t Zeros = 0; |
| 10788 | uint64_t NonZeros = 0; |
| 10789 | for (unsigned i = 0; i != NumOperands; ++i) { |
| 10790 | SDValue SubVec = Op.getOperand(i); |
| 10791 | if (SubVec.isUndef()) |
| 10792 | continue; |
| 10793 | assert(i < sizeof(NonZeros) * CHAR_BIT); |
| 10794 | if (ISD::isBuildVectorAllZeros(SubVec.getNode())) |
| 10795 | Zeros |= (uint64_t)1 << i; |
| 10796 | else |
| 10797 | NonZeros |= (uint64_t)1 << i; |
| 10798 | } |
| 10799 | |
| 10800 | unsigned NumElems = ResVT.getVectorNumElements(); |
| 10801 | |
| 10802 | |
| 10803 | |
| 10804 | |
| 10805 | if (isPowerOf2_64(NonZeros) && Zeros != 0 && NonZeros > Zeros && |
| 10806 | Log2_64(NonZeros) != NumOperands - 1) { |
| 10807 | MVT ShiftVT = ResVT; |
| 10808 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) |
| 10809 | ShiftVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
| 10810 | unsigned Idx = Log2_64(NonZeros); |
| 10811 | SDValue SubVec = Op.getOperand(Idx); |
| 10812 | unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); |
| 10813 | SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ShiftVT, |
| 10814 | DAG.getUNDEF(ShiftVT), SubVec, |
| 10815 | DAG.getIntPtrConstant(0, dl)); |
| 10816 | Op = DAG.getNode(X86ISD::KSHIFTL, dl, ShiftVT, SubVec, |
| 10817 | DAG.getTargetConstant(Idx * SubVecNumElts, dl, MVT::i8)); |
| 10818 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, ResVT, Op, |
| 10819 | DAG.getIntPtrConstant(0, dl)); |
| 10820 | } |
| 10821 | |
| 10822 | |
| 10823 | if (NonZeros == 0 || isPowerOf2_64(NonZeros)) { |
| 10824 | SDValue Vec = Zeros ? DAG.getConstant(0, dl, ResVT) : DAG.getUNDEF(ResVT); |
| 10825 | if (!NonZeros) |
| 10826 | return Vec; |
| 10827 | unsigned Idx = Log2_64(NonZeros); |
| 10828 | SDValue SubVec = Op.getOperand(Idx); |
| 10829 | unsigned SubVecNumElts = SubVec.getSimpleValueType().getVectorNumElements(); |
| 10830 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, SubVec, |
| 10831 | DAG.getIntPtrConstant(Idx * SubVecNumElts, dl)); |
| 10832 | } |
| 10833 | |
| 10834 | if (NumOperands > 2) { |
| 10835 | MVT HalfVT = ResVT.getHalfNumVectorElementsVT(); |
| 10836 | ArrayRef<SDUse> Ops = Op->ops(); |
| 10837 | SDValue Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
| 10838 | Ops.slice(0, NumOperands/2)); |
| 10839 | SDValue Hi = DAG.getNode(ISD::CONCAT_VECTORS, dl, HalfVT, |
| 10840 | Ops.slice(NumOperands/2)); |
| 10841 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Lo, Hi); |
| 10842 | } |
| 10843 | |
| 10844 | assert(countPopulation(NonZeros) == 2 && "Simple cases not handled?"); |
| 10845 | |
| 10846 | if (ResVT.getVectorNumElements() >= 16) |
| 10847 | return Op; |
| 10848 | |
| 10849 | SDValue Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, |
| 10850 | DAG.getUNDEF(ResVT), Op.getOperand(0), |
| 10851 | DAG.getIntPtrConstant(0, dl)); |
| 10852 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, ResVT, Vec, Op.getOperand(1), |
| 10853 | DAG.getIntPtrConstant(NumElems/2, dl)); |
| 10854 | } |
| 10855 | |
| 10856 | static SDValue LowerCONCAT_VECTORS(SDValue Op, |
| 10857 | const X86Subtarget &Subtarget, |
| 10858 | SelectionDAG &DAG) { |
| 10859 | MVT VT = Op.getSimpleValueType(); |
| 10860 | if (VT.getVectorElementType() == MVT::i1) |
| 10861 | return LowerCONCAT_VECTORSvXi1(Op, Subtarget, DAG); |
| 10862 | |
| 10863 | assert((VT.is256BitVector() && Op.getNumOperands() == 2) || |
| 10864 | (VT.is512BitVector() && (Op.getNumOperands() == 2 || |
| 10865 | Op.getNumOperands() == 4))); |
| 10866 | |
| 10867 | |
| 10868 | |
| 10869 | |
| 10870 | |
| 10871 | return LowerAVXCONCAT_VECTORS(Op, DAG, Subtarget); |
| 10872 | } |
| 10873 | |
| 10874 | |
| 10875 | |
| 10876 | |
| 10877 | |
| 10878 | |
| 10879 | |
| 10880 | |
| 10881 | |
| 10882 | |
| 10883 | |
| 10884 | |
| 10885 | |
| 10886 | |
| 10887 | |
| 10888 | |
| 10889 | |
| 10890 | |
| 10891 | |
| 10892 | static bool isNoopShuffleMask(ArrayRef<int> Mask) { |
| 10893 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
| 10894 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
| 10895 | if (Mask[i] >= 0 && Mask[i] != i) |
| 10896 | return false; |
| 10897 | } |
| 10898 | return true; |
| 10899 | } |
| 10900 | |
| 10901 | |
| 10902 | |
| 10903 | |
| 10904 | |
| 10905 | |
| 10906 | static bool isLaneCrossingShuffleMask(unsigned LaneSizeInBits, |
| 10907 | unsigned ScalarSizeInBits, |
| 10908 | ArrayRef<int> Mask) { |
| 10909 | assert(LaneSizeInBits && ScalarSizeInBits && |
| 10910 | (LaneSizeInBits % ScalarSizeInBits) == 0 && |
| 10911 | "Illegal shuffle lane size"); |
| 10912 | int LaneSize = LaneSizeInBits / ScalarSizeInBits; |
| 10913 | int Size = Mask.size(); |
| 10914 | for (int i = 0; i < Size; ++i) |
| 10915 | if (Mask[i] >= 0 && (Mask[i] % Size) / LaneSize != i / LaneSize) |
| 10916 | return true; |
| 10917 | return false; |
| 10918 | } |
| 10919 | |
| 10920 | |
| 10921 | |
| 10922 | static bool is128BitLaneCrossingShuffleMask(MVT VT, ArrayRef<int> Mask) { |
| 10923 | return isLaneCrossingShuffleMask(128, VT.getScalarSizeInBits(), Mask); |
| 10924 | } |
| 10925 | |
| 10926 | |
| 10927 | |
| 10928 | |
| 10929 | static bool isMultiLaneShuffleMask(unsigned LaneSizeInBits, |
| 10930 | unsigned ScalarSizeInBits, |
| 10931 | ArrayRef<int> Mask) { |
| 10932 | assert(LaneSizeInBits && ScalarSizeInBits && |
| 10933 | (LaneSizeInBits % ScalarSizeInBits) == 0 && |
| 10934 | "Illegal shuffle lane size"); |
| 10935 | int NumElts = Mask.size(); |
| 10936 | int NumEltsPerLane = LaneSizeInBits / ScalarSizeInBits; |
| 10937 | int NumLanes = NumElts / NumEltsPerLane; |
| 10938 | if (NumLanes > 1) { |
| 10939 | for (int i = 0; i != NumLanes; ++i) { |
| 10940 | int SrcLane = -1; |
| 10941 | for (int j = 0; j != NumEltsPerLane; ++j) { |
| 10942 | int M = Mask[(i * NumEltsPerLane) + j]; |
| 10943 | if (M < 0) |
| 10944 | continue; |
| 10945 | int Lane = (M % NumElts) / NumEltsPerLane; |
| 10946 | if (SrcLane >= 0 && SrcLane != Lane) |
| 10947 | return true; |
| 10948 | SrcLane = Lane; |
| 10949 | } |
| 10950 | } |
| 10951 | } |
| 10952 | return false; |
| 10953 | } |
| 10954 | |
| 10955 | |
| 10956 | |
| 10957 | |
| 10958 | |
| 10959 | |
| 10960 | |
| 10961 | |
| 10962 | |
| 10963 | |
| 10964 | |
| 10965 | |
| 10966 | static bool isRepeatedShuffleMask(unsigned LaneSizeInBits, MVT VT, |
| 10967 | ArrayRef<int> Mask, |
| 10968 | SmallVectorImpl<int> &RepeatedMask) { |
| 10969 | auto LaneSize = LaneSizeInBits / VT.getScalarSizeInBits(); |
| 10970 | RepeatedMask.assign(LaneSize, -1); |
| 10971 | int Size = Mask.size(); |
| 10972 | for (int i = 0; i < Size; ++i) { |
| 10973 | assert(Mask[i] == SM_SentinelUndef || Mask[i] >= 0); |
| 10974 | if (Mask[i] < 0) |
| 10975 | continue; |
| 10976 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
| 10977 | |
| 10978 | return false; |
| 10979 | |
| 10980 | |
| 10981 | |
| 10982 | int LocalM = Mask[i] < Size ? Mask[i] % LaneSize |
| 10983 | : Mask[i] % LaneSize + LaneSize; |
| 10984 | if (RepeatedMask[i % LaneSize] < 0) |
| 10985 | |
| 10986 | RepeatedMask[i % LaneSize] = LocalM; |
| 10987 | else if (RepeatedMask[i % LaneSize] != LocalM) |
| 10988 | |
| 10989 | return false; |
| 10990 | } |
| 10991 | return true; |
| 10992 | } |
| 10993 | |
| 10994 | |
| 10995 | static bool |
| 10996 | is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, |
| 10997 | SmallVectorImpl<int> &RepeatedMask) { |
| 10998 | return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); |
| 10999 | } |
| 11000 | |
| 11001 | static bool |
| 11002 | is128BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask) { |
| 11003 | SmallVector<int, 32> RepeatedMask; |
| 11004 | return isRepeatedShuffleMask(128, VT, Mask, RepeatedMask); |
| 11005 | } |
| 11006 | |
| 11007 | |
| 11008 | static bool |
| 11009 | is256BitLaneRepeatedShuffleMask(MVT VT, ArrayRef<int> Mask, |
| 11010 | SmallVectorImpl<int> &RepeatedMask) { |
| 11011 | return isRepeatedShuffleMask(256, VT, Mask, RepeatedMask); |
| 11012 | } |
| 11013 | |
| 11014 | |
| 11015 | |
| 11016 | static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, |
| 11017 | unsigned EltSizeInBits, |
| 11018 | ArrayRef<int> Mask, |
| 11019 | SmallVectorImpl<int> &RepeatedMask) { |
| 11020 | int LaneSize = LaneSizeInBits / EltSizeInBits; |
| 11021 | RepeatedMask.assign(LaneSize, SM_SentinelUndef); |
| 11022 | int Size = Mask.size(); |
| 11023 | for (int i = 0; i < Size; ++i) { |
| 11024 | assert(isUndefOrZero(Mask[i]) || (Mask[i] >= 0)); |
| 11025 | if (Mask[i] == SM_SentinelUndef) |
| 11026 | continue; |
| 11027 | if (Mask[i] == SM_SentinelZero) { |
| 11028 | if (!isUndefOrZero(RepeatedMask[i % LaneSize])) |
| 11029 | return false; |
| 11030 | RepeatedMask[i % LaneSize] = SM_SentinelZero; |
| 11031 | continue; |
| 11032 | } |
| 11033 | if ((Mask[i] % Size) / LaneSize != i / LaneSize) |
| 11034 | |
| 11035 | return false; |
| 11036 | |
| 11037 | |
| 11038 | |
| 11039 | int LaneM = Mask[i] / Size; |
| 11040 | int LocalM = (Mask[i] % LaneSize) + (LaneM * LaneSize); |
| 11041 | if (RepeatedMask[i % LaneSize] == SM_SentinelUndef) |
| 11042 | |
| 11043 | RepeatedMask[i % LaneSize] = LocalM; |
| 11044 | else if (RepeatedMask[i % LaneSize] != LocalM) |
| 11045 | |
| 11046 | return false; |
| 11047 | } |
| 11048 | return true; |
| 11049 | } |
| 11050 | |
| 11051 | |
| 11052 | |
| 11053 | static bool isRepeatedTargetShuffleMask(unsigned LaneSizeInBits, MVT VT, |
| 11054 | ArrayRef<int> Mask, |
| 11055 | SmallVectorImpl<int> &RepeatedMask) { |
| 11056 | return isRepeatedTargetShuffleMask(LaneSizeInBits, VT.getScalarSizeInBits(), |
| 11057 | Mask, RepeatedMask); |
| 11058 | } |
| 11059 | |
| 11060 | |
| 11061 | |
| 11062 | static bool IsElementEquivalent(int MaskSize, SDValue Op, SDValue ExpectedOp, |
| 11063 | int Idx, int ExpectedIdx) { |
| 11064 | assert(0 <= Idx && Idx < MaskSize && 0 <= ExpectedIdx && |
| 11065 | ExpectedIdx < MaskSize && "Out of range element index"); |
| 11066 | if (!Op || !ExpectedOp || Op.getOpcode() != ExpectedOp.getOpcode()) |
| 11067 | return false; |
| 11068 | |
| 11069 | switch (Op.getOpcode()) { |
| 11070 | case ISD::BUILD_VECTOR: |
| 11071 | |
| 11072 | |
| 11073 | |
| 11074 | if (MaskSize == (int)Op.getNumOperands() && |
| 11075 | MaskSize == (int)ExpectedOp.getNumOperands()) |
| 11076 | return Op.getOperand(Idx) == ExpectedOp.getOperand(ExpectedIdx); |
| 11077 | break; |
| 11078 | case X86ISD::VBROADCAST: |
| 11079 | case X86ISD::VBROADCAST_LOAD: |
| 11080 | |
| 11081 | return (Op == ExpectedOp && |
| 11082 | (int)Op.getValueType().getVectorNumElements() == MaskSize); |
| 11083 | case X86ISD::HADD: |
| 11084 | case X86ISD::HSUB: |
| 11085 | case X86ISD::FHADD: |
| 11086 | case X86ISD::FHSUB: |
| 11087 | case X86ISD::PACKSS: |
| 11088 | case X86ISD::PACKUS: |
| 11089 | |
| 11090 | |
| 11091 | |
| 11092 | if (Op == ExpectedOp && Op.getOperand(0) == Op.getOperand(1)) { |
| 11093 | MVT VT = Op.getSimpleValueType(); |
| 11094 | int NumElts = VT.getVectorNumElements(); |
| 11095 | if (MaskSize == NumElts) { |
| 11096 | int NumLanes = VT.getSizeInBits() / 128; |
| 11097 | int NumEltsPerLane = NumElts / NumLanes; |
| 11098 | int NumHalfEltsPerLane = NumEltsPerLane / 2; |
| 11099 | bool SameLane = |
| 11100 | (Idx / NumEltsPerLane) == (ExpectedIdx / NumEltsPerLane); |
| 11101 | bool SameElt = |
| 11102 | (Idx % NumHalfEltsPerLane) == (ExpectedIdx % NumHalfEltsPerLane); |
| 11103 | return SameLane && SameElt; |
| 11104 | } |
| 11105 | } |
| 11106 | break; |
| 11107 | } |
| 11108 | |
| 11109 | return false; |
| 11110 | } |
| 11111 | |
| 11112 | |
| 11113 | |
| 11114 | |
| 11115 | |
| 11116 | |
| 11117 | |
| 11118 | |
| 11119 | |
| 11120 | |
| 11121 | |
| 11122 | static bool isShuffleEquivalent(ArrayRef<int> Mask, ArrayRef<int> ExpectedMask, |
| 11123 | SDValue V1 = SDValue(), |
| 11124 | SDValue V2 = SDValue()) { |
| 11125 | int Size = Mask.size(); |
| 11126 | if (Size != (int)ExpectedMask.size()) |
| 11127 | return false; |
| 11128 | |
| 11129 | for (int i = 0; i < Size; ++i) { |
| 11130 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
| 11131 | int MaskIdx = Mask[i]; |
| 11132 | int ExpectedIdx = ExpectedMask[i]; |
| 11133 | if (0 <= MaskIdx && MaskIdx != ExpectedIdx) { |
| 11134 | SDValue MaskV = MaskIdx < Size ? V1 : V2; |
| 11135 | SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; |
| 11136 | MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); |
| 11137 | ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); |
| 11138 | if (!IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) |
| 11139 | return false; |
| 11140 | } |
| 11141 | } |
| 11142 | return true; |
| 11143 | } |
| 11144 | |
| 11145 | |
| 11146 | |
| 11147 | |
| 11148 | |
| 11149 | |
| 11150 | |
| 11151 | |
| 11152 | |
| 11153 | |
| 11154 | static bool isTargetShuffleEquivalent(MVT VT, ArrayRef<int> Mask, |
| 11155 | ArrayRef<int> ExpectedMask, |
| 11156 | SDValue V1 = SDValue(), |
| 11157 | SDValue V2 = SDValue()) { |
| 11158 | int Size = Mask.size(); |
| 11159 | if (Size != (int)ExpectedMask.size()) |
| 11160 | return false; |
| 11161 | assert(isUndefOrZeroOrInRange(ExpectedMask, 0, 2 * Size) && |
| 11162 | "Illegal target shuffle mask"); |
| 11163 | |
| 11164 | |
| 11165 | if (!isUndefOrZeroOrInRange(Mask, 0, 2 * Size)) |
| 11166 | return false; |
| 11167 | |
| 11168 | |
| 11169 | if (V1 && V1.getValueSizeInBits() != VT.getSizeInBits()) |
| 11170 | V1 = SDValue(); |
| 11171 | if (V2 && V2.getValueSizeInBits() != VT.getSizeInBits()) |
| 11172 | V2 = SDValue(); |
| 11173 | |
| 11174 | for (int i = 0; i < Size; ++i) { |
| 11175 | int MaskIdx = Mask[i]; |
| 11176 | int ExpectedIdx = ExpectedMask[i]; |
| 11177 | if (MaskIdx == SM_SentinelUndef || MaskIdx == ExpectedIdx) |
| 11178 | continue; |
| 11179 | if (0 <= MaskIdx && 0 <= ExpectedIdx) { |
| 11180 | SDValue MaskV = MaskIdx < Size ? V1 : V2; |
| 11181 | SDValue ExpectedV = ExpectedIdx < Size ? V1 : V2; |
| 11182 | MaskIdx = MaskIdx < Size ? MaskIdx : (MaskIdx - Size); |
| 11183 | ExpectedIdx = ExpectedIdx < Size ? ExpectedIdx : (ExpectedIdx - Size); |
| 11184 | if (IsElementEquivalent(Size, MaskV, ExpectedV, MaskIdx, ExpectedIdx)) |
| 11185 | continue; |
| 11186 | } |
| 11187 | |
| 11188 | return false; |
| 11189 | } |
| 11190 | return true; |
| 11191 | } |
| 11192 | |
| 11193 | |
| 11194 | static bool createShuffleMaskFromVSELECT(SmallVectorImpl<int> &Mask, |
| 11195 | SDValue Cond) { |
| 11196 | EVT CondVT = Cond.getValueType(); |
| 11197 | unsigned EltSizeInBits = CondVT.getScalarSizeInBits(); |
| 11198 | unsigned NumElts = CondVT.getVectorNumElements(); |
| 11199 | |
| 11200 | APInt UndefElts; |
| 11201 | SmallVector<APInt, 32> EltBits; |
| 11202 | if (!getTargetConstantBitsFromNode(Cond, EltSizeInBits, UndefElts, EltBits, |
| 11203 | true, false)) |
| 11204 | return false; |
| 11205 | |
| 11206 | Mask.resize(NumElts, SM_SentinelUndef); |
| 11207 | |
| 11208 | for (int i = 0; i != (int)NumElts; ++i) { |
| 11209 | Mask[i] = i; |
| 11210 | |
| 11211 | |
| 11212 | |
| 11213 | if (UndefElts[i] || EltBits[i].isNullValue()) |
| 11214 | Mask[i] += NumElts; |
| 11215 | } |
| 11216 | |
| 11217 | return true; |
| 11218 | } |
| 11219 | |
| 11220 | |
| 11221 | |
| 11222 | static bool isUnpackWdShuffleMask(ArrayRef<int> Mask, MVT VT) { |
| 11223 | if (VT != MVT::v8i32 && VT != MVT::v8f32) |
| 11224 | return false; |
| 11225 | |
| 11226 | SmallVector<int, 8> Unpcklwd; |
| 11227 | createUnpackShuffleMask(MVT::v8i16, Unpcklwd, true, |
| 11228 | false); |
| 11229 | SmallVector<int, 8> Unpckhwd; |
| 11230 | createUnpackShuffleMask(MVT::v8i16, Unpckhwd, false, |
| 11231 | false); |
| 11232 | bool IsUnpackwdMask = (isTargetShuffleEquivalent(VT, Mask, Unpcklwd) || |
| 11233 | isTargetShuffleEquivalent(VT, Mask, Unpckhwd)); |
| 11234 | return IsUnpackwdMask; |
| 11235 | } |
| 11236 | |
| 11237 | static bool is128BitUnpackShuffleMask(ArrayRef<int> Mask) { |
| 11238 | |
| 11239 | MVT EltVT = MVT::getIntegerVT(128 / Mask.size()); |
| 11240 | MVT VT = MVT::getVectorVT(EltVT, Mask.size()); |
| 11241 | |
| 11242 | |
| 11243 | SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); |
| 11244 | ShuffleVectorSDNode::commuteMask(CommutedMask); |
| 11245 | |
| 11246 | |
| 11247 | for (unsigned i = 0; i != 4; ++i) { |
| 11248 | SmallVector<int, 16> UnpackMask; |
| 11249 | createUnpackShuffleMask(VT, UnpackMask, (i >> 1) % 2, i % 2); |
| 11250 | if (isTargetShuffleEquivalent(VT, Mask, UnpackMask) || |
| 11251 | isTargetShuffleEquivalent(VT, CommutedMask, UnpackMask)) |
| 11252 | return true; |
| 11253 | } |
| 11254 | return false; |
| 11255 | } |
| 11256 | |
| 11257 | |
| 11258 | |
| 11259 | |
| 11260 | |
| 11261 | static bool hasIdenticalHalvesShuffleMask(ArrayRef<int> Mask) { |
| 11262 | assert(Mask.size() % 2 == 0 && "Expecting even number of elements in mask"); |
| 11263 | unsigned HalfSize = Mask.size() / 2; |
| 11264 | for (unsigned i = 0; i != HalfSize; ++i) { |
| 11265 | if (Mask[i] != Mask[i + HalfSize]) |
| 11266 | return false; |
| 11267 | } |
| 11268 | return true; |
| 11269 | } |
| 11270 | |
| 11271 | |
| 11272 | |
| 11273 | |
| 11274 | |
| 11275 | |
| 11276 | |
| 11277 | |
| 11278 | |
| 11279 | static unsigned getV4X86ShuffleImm(ArrayRef<int> Mask) { |
| 11280 | assert(Mask.size() == 4 && "Only 4-lane shuffle masks"); |
| 11281 | assert(Mask[0] >= -1 && Mask[0] < 4 && "Out of bound mask element!"); |
| 11282 | assert(Mask[1] >= -1 && Mask[1] < 4 && "Out of bound mask element!"); |
| 11283 | assert(Mask[2] >= -1 && Mask[2] < 4 && "Out of bound mask element!"); |
| 11284 | assert(Mask[3] >= -1 && Mask[3] < 4 && "Out of bound mask element!"); |
| 11285 | |
| 11286 | |
| 11287 | |
| 11288 | int FirstIndex = find_if(Mask, [](int M) { return M >= 0; }) - Mask.begin(); |
| 11289 | assert(0 <= FirstIndex && FirstIndex < 4 && "All undef shuffle mask"); |
| 11290 | |
| 11291 | int FirstElt = Mask[FirstIndex]; |
| 11292 | if (all_of(Mask, [FirstElt](int M) { return M < 0 || M == FirstElt; })) |
| 11293 | return (FirstElt << 6) | (FirstElt << 4) | (FirstElt << 2) | FirstElt; |
| 11294 | |
| 11295 | unsigned Imm = 0; |
| 11296 | Imm |= (Mask[0] < 0 ? 0 : Mask[0]) << 0; |
| 11297 | Imm |= (Mask[1] < 0 ? 1 : Mask[1]) << 2; |
| 11298 | Imm |= (Mask[2] < 0 ? 2 : Mask[2]) << 4; |
| 11299 | Imm |= (Mask[3] < 0 ? 3 : Mask[3]) << 6; |
| 11300 | return Imm; |
| 11301 | } |
| 11302 | |
| 11303 | static SDValue getV4X86ShuffleImm8ForMask(ArrayRef<int> Mask, const SDLoc &DL, |
| 11304 | SelectionDAG &DAG) { |
| 11305 | return DAG.getTargetConstant(getV4X86ShuffleImm(Mask), DL, MVT::i8); |
| 11306 | } |
| 11307 | |
| 11308 | |
| 11309 | |
| 11310 | |
| 11311 | |
| 11312 | |
| 11313 | |
| 11314 | |
| 11315 | static bool isNonZeroElementsInOrder(const APInt &Zeroable, |
| 11316 | ArrayRef<int> Mask, const EVT &VectorType, |
| 11317 | bool &IsZeroSideLeft) { |
| 11318 | int NextElement = -1; |
| 11319 | |
| 11320 | for (int i = 0, e = Mask.size(); i < e; i++) { |
| 11321 | |
| 11322 | assert(Mask[i] >= -1 && "Out of bound mask element!"); |
| 11323 | if (Mask[i] < 0) |
| 11324 | return false; |
| 11325 | if (Zeroable[i]) |
| 11326 | continue; |
| 11327 | |
| 11328 | if (NextElement < 0) { |
| 11329 | NextElement = Mask[i] != 0 ? VectorType.getVectorNumElements() : 0; |
| 11330 | IsZeroSideLeft = NextElement != 0; |
| 11331 | } |
| 11332 | |
| 11333 | if (NextElement != Mask[i]) |
| 11334 | return false; |
| 11335 | NextElement++; |
| 11336 | } |
| 11337 | return true; |
| 11338 | } |
| 11339 | |
| 11340 | |
| 11341 | static SDValue lowerShuffleWithPSHUFB(const SDLoc &DL, MVT VT, |
| 11342 | ArrayRef<int> Mask, SDValue V1, |
| 11343 | SDValue V2, const APInt &Zeroable, |
| 11344 | const X86Subtarget &Subtarget, |
| 11345 | SelectionDAG &DAG) { |
| 11346 | int Size = Mask.size(); |
| 11347 | int LaneSize = 128 / VT.getScalarSizeInBits(); |
| 11348 | const int NumBytes = VT.getSizeInBits() / 8; |
| 11349 | const int NumEltBytes = VT.getScalarSizeInBits() / 8; |
| 11350 | |
| 11351 | assert((Subtarget.hasSSSE3() && VT.is128BitVector()) || |
| 11352 | (Subtarget.hasAVX2() && VT.is256BitVector()) || |
| 11353 | (Subtarget.hasBWI() && VT.is512BitVector())); |
| 11354 | |
| 11355 | SmallVector<SDValue, 64> PSHUFBMask(NumBytes); |
| 11356 | |
| 11357 | SDValue ZeroMask = DAG.getConstant(0x80, DL, MVT::i8); |
| 11358 | |
| 11359 | SDValue V; |
| 11360 | for (int i = 0; i < NumBytes; ++i) { |
| 11361 | int M = Mask[i / NumEltBytes]; |
| 11362 | if (M < 0) { |
| 11363 | PSHUFBMask[i] = DAG.getUNDEF(MVT::i8); |
| 11364 | continue; |
| 11365 | } |
| 11366 | if (Zeroable[i / NumEltBytes]) { |
| 11367 | PSHUFBMask[i] = ZeroMask; |
| 11368 | continue; |
| 11369 | } |
| 11370 | |
| 11371 | |
| 11372 | SDValue SrcV = (M >= Size ? V2 : V1); |
| 11373 | if (V && V != SrcV) |
| 11374 | return SDValue(); |
| 11375 | V = SrcV; |
| 11376 | M %= Size; |
| 11377 | |
| 11378 | |
| 11379 | if ((M / LaneSize) != ((i / NumEltBytes) / LaneSize)) |
| 11380 | return SDValue(); |
| 11381 | |
| 11382 | M = M % LaneSize; |
| 11383 | M = M * NumEltBytes + (i % NumEltBytes); |
| 11384 | PSHUFBMask[i] = DAG.getConstant(M, DL, MVT::i8); |
| 11385 | } |
| 11386 | assert(V && "Failed to find a source input"); |
| 11387 | |
| 11388 | MVT I8VT = MVT::getVectorVT(MVT::i8, NumBytes); |
| 11389 | return DAG.getBitcast( |
| 11390 | VT, DAG.getNode(X86ISD::PSHUFB, DL, I8VT, DAG.getBitcast(I8VT, V), |
| 11391 | DAG.getBuildVector(I8VT, DL, PSHUFBMask))); |
| 11392 | } |
| 11393 | |
| 11394 | static SDValue getMaskNode(SDValue Mask, MVT MaskVT, |
| 11395 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
| 11396 | const SDLoc &dl); |
| 11397 | |
| 11398 | |
| 11399 | static SDValue lowerShuffleToEXPAND(const SDLoc &DL, MVT VT, |
| 11400 | const APInt &Zeroable, |
| 11401 | ArrayRef<int> Mask, SDValue &V1, |
| 11402 | SDValue &V2, SelectionDAG &DAG, |
| 11403 | const X86Subtarget &Subtarget) { |
| 11404 | bool IsLeftZeroSide = true; |
| 11405 | if (!isNonZeroElementsInOrder(Zeroable, Mask, V1.getValueType(), |
| 11406 | IsLeftZeroSide)) |
| 11407 | return SDValue(); |
| 11408 | unsigned VEXPANDMask = (~Zeroable).getZExtValue(); |
| 11409 | MVT IntegerType = |
| 11410 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
| 11411 | SDValue MaskNode = DAG.getConstant(VEXPANDMask, DL, IntegerType); |
| 11412 | unsigned NumElts = VT.getVectorNumElements(); |
| 11413 | assert((NumElts == 4 || NumElts == 8 || NumElts == 16) && |
| 11414 | "Unexpected number of vector elements"); |
| 11415 | SDValue VMask = getMaskNode(MaskNode, MVT::getVectorVT(MVT::i1, NumElts), |
| 11416 | Subtarget, DAG, DL); |
| 11417 | SDValue ZeroVector = getZeroVector(VT, Subtarget, DAG, DL); |
| 11418 | SDValue ExpandedVector = IsLeftZeroSide ? V2 : V1; |
| 11419 | return DAG.getNode(X86ISD::EXPAND, DL, VT, ExpandedVector, ZeroVector, VMask); |
| 11420 | } |
| 11421 | |
| 11422 | static bool matchShuffleWithUNPCK(MVT VT, SDValue &V1, SDValue &V2, |
| 11423 | unsigned &UnpackOpcode, bool IsUnary, |
| 11424 | ArrayRef<int> TargetMask, const SDLoc &DL, |
| 11425 | SelectionDAG &DAG, |
| 11426 | const X86Subtarget &Subtarget) { |
| 11427 | int NumElts = VT.getVectorNumElements(); |
| 11428 | |
| 11429 | bool Undef1 = true, Undef2 = true, Zero1 = true, Zero2 = true; |
| 11430 | for (int i = 0; i != NumElts; i += 2) { |
| 11431 | int M1 = TargetMask[i + 0]; |
| 11432 | int M2 = TargetMask[i + 1]; |
| 11433 | Undef1 &= (SM_SentinelUndef == M1); |
| 11434 | Undef2 &= (SM_SentinelUndef == M2); |
| 11435 | Zero1 &= isUndefOrZero(M1); |
| 11436 | Zero2 &= isUndefOrZero(M2); |
| 11437 | } |
| 11438 | assert(!((Undef1 || Zero1) && (Undef2 || Zero2)) && |
| 11439 | "Zeroable shuffle detected"); |
| 11440 | |
| 11441 | |
| 11442 | SmallVector<int, 64> Unpckl, Unpckh; |
| 11443 | createUnpackShuffleMask(VT, Unpckl, true, IsUnary); |
| 11444 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl, V1, |
| 11445 | (IsUnary ? V1 : V2))) { |
| 11446 | UnpackOpcode = X86ISD::UNPCKL; |
| 11447 | V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); |
| 11448 | V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); |
| 11449 | return true; |
| 11450 | } |
| 11451 | |
| 11452 | createUnpackShuffleMask(VT, Unpckh, false, IsUnary); |
| 11453 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh, V1, |
| 11454 | (IsUnary ? V1 : V2))) { |
| 11455 | UnpackOpcode = X86ISD::UNPCKH; |
| 11456 | V2 = (Undef2 ? DAG.getUNDEF(VT) : (IsUnary ? V1 : V2)); |
| 11457 | V1 = (Undef1 ? DAG.getUNDEF(VT) : V1); |
| 11458 | return true; |
| 11459 | } |
| 11460 | |
| 11461 | |
| 11462 | if (IsUnary && (Zero1 || Zero2)) { |
| 11463 | |
| 11464 | if ((Subtarget.hasSSE41() || VT == MVT::v2i64 || VT == MVT::v2f64) && |
| 11465 | isSequentialOrUndefOrZeroInRange(TargetMask, 0, NumElts, 0)) |
| 11466 | return false; |
| 11467 | |
| 11468 | bool MatchLo = true, MatchHi = true; |
| 11469 | for (int i = 0; (i != NumElts) && (MatchLo || MatchHi); ++i) { |
| 11470 | int M = TargetMask[i]; |
| 11471 | |
| 11472 | |
| 11473 | if ((((i & 1) == 0) && Zero1) || (((i & 1) == 1) && Zero2) || |
| 11474 | (M == SM_SentinelUndef)) |
| 11475 | continue; |
| 11476 | |
| 11477 | MatchLo &= (M == Unpckl[i]); |
| 11478 | MatchHi &= (M == Unpckh[i]); |
| 11479 | } |
| 11480 | |
| 11481 | if (MatchLo || MatchHi) { |
| 11482 | UnpackOpcode = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; |
| 11483 | V2 = Zero2 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; |
| 11484 | V1 = Zero1 ? getZeroVector(VT, Subtarget, DAG, DL) : V1; |
| 11485 | return true; |
| 11486 | } |
| 11487 | } |
| 11488 | |
| 11489 | |
| 11490 | if (!IsUnary) { |
| 11491 | ShuffleVectorSDNode::commuteMask(Unpckl); |
| 11492 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckl)) { |
| 11493 | UnpackOpcode = X86ISD::UNPCKL; |
| 11494 | std::swap(V1, V2); |
| 11495 | return true; |
| 11496 | } |
| 11497 | |
| 11498 | ShuffleVectorSDNode::commuteMask(Unpckh); |
| 11499 | if (isTargetShuffleEquivalent(VT, TargetMask, Unpckh)) { |
| 11500 | UnpackOpcode = X86ISD::UNPCKH; |
| 11501 | std::swap(V1, V2); |
| 11502 | return true; |
| 11503 | } |
| 11504 | } |
| 11505 | |
| 11506 | return false; |
| 11507 | } |
| 11508 | |
| 11509 | |
| 11510 | |
| 11511 | static SDValue lowerShuffleWithUNPCK(const SDLoc &DL, MVT VT, |
| 11512 | ArrayRef<int> Mask, SDValue V1, SDValue V2, |
| 11513 | SelectionDAG &DAG) { |
| 11514 | SmallVector<int, 8> Unpckl; |
| 11515 | createUnpackShuffleMask(VT, Unpckl, true, false); |
| 11516 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
| 11517 | return DAG.getNode(X86ISD::UNPCKL, DL, VT, V1, V2); |
| 11518 | |
| 11519 | SmallVector<int, 8> Unpckh; |
| 11520 | createUnpackShuffleMask(VT, Unpckh, false, false); |
| 11521 | if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
| 11522 | return DAG.getNode(X86ISD::UNPCKH, DL, VT, V1, V2); |
| 11523 | |
| 11524 | |
| 11525 | ShuffleVectorSDNode::commuteMask(Unpckl); |
| 11526 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
| 11527 | return DAG.getNode(X86ISD::UNPCKL, DL, VT, V2, V1); |
| 11528 | |
| 11529 | ShuffleVectorSDNode::commuteMask(Unpckh); |
| 11530 | if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
| 11531 | return DAG.getNode(X86ISD::UNPCKH, DL, VT, V2, V1); |
| 11532 | |
| 11533 | return SDValue(); |
| 11534 | } |
| 11535 | |
| 11536 | |
| 11537 | |
| 11538 | static SDValue lowerShuffleWithUNPCK256(const SDLoc &DL, MVT VT, |
| 11539 | ArrayRef<int> Mask, SDValue V1, |
| 11540 | SDValue V2, SelectionDAG &DAG) { |
| 11541 | SmallVector<int, 32> Unpckl, Unpckh; |
| 11542 | createSplat2ShuffleMask(VT, Unpckl, true); |
| 11543 | createSplat2ShuffleMask(VT, Unpckh, false); |
| 11544 | |
| 11545 | unsigned UnpackOpcode; |
| 11546 | if (isShuffleEquivalent(Mask, Unpckl, V1, V2)) |
| 11547 | UnpackOpcode = X86ISD::UNPCKL; |
| 11548 | else if (isShuffleEquivalent(Mask, Unpckh, V1, V2)) |
| 11549 | UnpackOpcode = X86ISD::UNPCKH; |
| 11550 | else |
| 11551 | return SDValue(); |
| 11552 | |
| 11553 | |
| 11554 | |
| 11555 | |
| 11556 | V1 = DAG.getVectorShuffle(MVT::v4f64, DL, DAG.getBitcast(MVT::v4f64, V1), |
| 11557 | DAG.getUNDEF(MVT::v4f64), {0, 2, 1, 3}); |
| 11558 | V1 = DAG.getBitcast(VT, V1); |
| 11559 | return DAG.getNode(UnpackOpcode, DL, VT, V1, V1); |
| 11560 | } |
| 11561 | |
| 11562 | |
| 11563 | |
| 11564 | static bool matchShuffleAsVTRUNC(MVT &SrcVT, MVT &DstVT, MVT VT, |
| 11565 | ArrayRef<int> Mask, const APInt &Zeroable, |
| 11566 | const X86Subtarget &Subtarget) { |
| 11567 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) |
| 11568 | return false; |
| 11569 | |
| 11570 | unsigned NumElts = Mask.size(); |
| 11571 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 11572 | unsigned MaxScale = 64 / EltSizeInBits; |
| 11573 | |
| 11574 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
| 11575 | unsigned SrcEltBits = EltSizeInBits * Scale; |
| 11576 | if (SrcEltBits < 32 && !Subtarget.hasBWI()) |
| 11577 | continue; |
| 11578 | unsigned NumSrcElts = NumElts / Scale; |
| 11579 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale)) |
| 11580 | continue; |
| 11581 | unsigned UpperElts = NumElts - NumSrcElts; |
| 11582 | if (!Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
| 11583 | continue; |
| 11584 | SrcVT = MVT::getIntegerVT(EltSizeInBits * Scale); |
| 11585 | SrcVT = MVT::getVectorVT(SrcVT, NumSrcElts); |
| 11586 | DstVT = MVT::getIntegerVT(EltSizeInBits); |
| 11587 | if ((NumSrcElts * EltSizeInBits) >= 128) { |
| 11588 | |
| 11589 | DstVT = MVT::getVectorVT(DstVT, NumSrcElts); |
| 11590 | } else { |
| 11591 | |
| 11592 | DstVT = MVT::getVectorVT(DstVT, 128 / EltSizeInBits); |
| 11593 | } |
| 11594 | return true; |
| 11595 | } |
| 11596 | |
| 11597 | return false; |
| 11598 | } |
| 11599 | |
| 11600 | |
| 11601 | |
| 11602 | static SDValue getAVX512TruncNode(const SDLoc &DL, MVT DstVT, SDValue Src, |
| 11603 | const X86Subtarget &Subtarget, |
| 11604 | SelectionDAG &DAG, bool ZeroUppers) { |
| 11605 | MVT SrcVT = Src.getSimpleValueType(); |
| 11606 | MVT DstSVT = DstVT.getScalarType(); |
| 11607 | unsigned NumDstElts = DstVT.getVectorNumElements(); |
| 11608 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 11609 | unsigned DstEltSizeInBits = DstVT.getScalarSizeInBits(); |
| 11610 | |
| 11611 | if (!DAG.getTargetLoweringInfo().isTypeLegal(SrcVT)) |
| 11612 | return SDValue(); |
| 11613 | |
| 11614 | |
| 11615 | if (NumSrcElts == NumDstElts) |
| 11616 | return DAG.getNode(ISD::TRUNCATE, DL, DstVT, Src); |
| 11617 | |
| 11618 | if (NumSrcElts > NumDstElts) { |
| 11619 | MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); |
| 11620 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); |
| 11621 | return extractSubVector(Trunc, 0, DAG, DL, DstVT.getSizeInBits()); |
| 11622 | } |
| 11623 | |
| 11624 | if ((NumSrcElts * DstEltSizeInBits) >= 128) { |
| 11625 | MVT TruncVT = MVT::getVectorVT(DstSVT, NumSrcElts); |
| 11626 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Src); |
| 11627 | return widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, |
| 11628 | DstVT.getSizeInBits()); |
| 11629 | } |
| 11630 | |
| 11631 | |
| 11632 | |
| 11633 | if (!Subtarget.hasVLX() && !SrcVT.is512BitVector()) { |
| 11634 | SDValue NewSrc = widenSubVector(Src, ZeroUppers, Subtarget, DAG, DL, 512); |
| 11635 | return getAVX512TruncNode(DL, DstVT, NewSrc, Subtarget, DAG, ZeroUppers); |
| 11636 | } |
| 11637 | |
| 11638 | |
| 11639 | MVT TruncVT = MVT::getVectorVT(DstSVT, 128 / DstEltSizeInBits); |
| 11640 | SDValue Trunc = DAG.getNode(X86ISD::VTRUNC, DL, TruncVT, Src); |
| 11641 | if (DstVT != TruncVT) |
| 11642 | Trunc = widenSubVector(Trunc, ZeroUppers, Subtarget, DAG, DL, |
| 11643 | DstVT.getSizeInBits()); |
| 11644 | return Trunc; |
| 11645 | } |
| 11646 | |
| 11647 | |
| 11648 | |
| 11649 | |
| 11650 | |
| 11651 | |
| 11652 | |
| 11653 | |
| 11654 | |
| 11655 | |
| 11656 | |
| 11657 | |
| 11658 | |
| 11659 | |
| 11660 | |
| 11661 | |
| 11662 | |
| 11663 | static SDValue lowerShuffleWithVPMOV(const SDLoc &DL, MVT VT, SDValue V1, |
| 11664 | SDValue V2, ArrayRef<int> Mask, |
| 11665 | const APInt &Zeroable, |
| 11666 | const X86Subtarget &Subtarget, |
| 11667 | SelectionDAG &DAG) { |
| 11668 | assert((VT == MVT::v16i8 || VT == MVT::v8i16) && "Unexpected VTRUNC type"); |
| 11669 | if (!Subtarget.hasAVX512()) |
| 11670 | return SDValue(); |
| 11671 | |
| 11672 | unsigned NumElts = VT.getVectorNumElements(); |
| 11673 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 11674 | unsigned MaxScale = 64 / EltSizeInBits; |
| 11675 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
| 11676 | unsigned NumSrcElts = NumElts / Scale; |
| 11677 | unsigned UpperElts = NumElts - NumSrcElts; |
| 11678 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) || |
| 11679 | !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
| 11680 | continue; |
| 11681 | |
| 11682 | SDValue Src = V1; |
| 11683 | if (!Src.hasOneUse()) |
| 11684 | return SDValue(); |
| 11685 | |
| 11686 | Src = peekThroughOneUseBitcasts(Src); |
| 11687 | if (Src.getOpcode() != ISD::TRUNCATE || |
| 11688 | Src.getScalarValueSizeInBits() != (EltSizeInBits * Scale)) |
| 11689 | return SDValue(); |
| 11690 | Src = Src.getOperand(0); |
| 11691 | |
| 11692 | |
| 11693 | MVT SrcVT = Src.getSimpleValueType(); |
| 11694 | if (SrcVT.getVectorElementType() == MVT::i16 && VT == MVT::v16i8 && |
| 11695 | !Subtarget.hasBWI()) |
| 11696 | return SDValue(); |
| 11697 | |
| 11698 | bool UndefUppers = isUndefInRange(Mask, NumSrcElts, UpperElts); |
| 11699 | return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); |
| 11700 | } |
| 11701 | |
| 11702 | return SDValue(); |
| 11703 | } |
| 11704 | |
| 11705 | |
| 11706 | static SDValue lowerShuffleAsVTRUNC(const SDLoc &DL, MVT VT, SDValue V1, |
| 11707 | SDValue V2, ArrayRef<int> Mask, |
| 11708 | const APInt &Zeroable, |
| 11709 | const X86Subtarget &Subtarget, |
| 11710 | SelectionDAG &DAG) { |
| 11711 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
| 11712 | "Unexpected VTRUNC type"); |
| 11713 | if (!Subtarget.hasAVX512()) |
| 11714 | return SDValue(); |
| 11715 | |
| 11716 | unsigned NumElts = VT.getVectorNumElements(); |
| 11717 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 11718 | unsigned MaxScale = 64 / EltSizeInBits; |
| 11719 | for (unsigned Scale = 2; Scale <= MaxScale; Scale += Scale) { |
| 11720 | |
| 11721 | unsigned SrcEltBits = EltSizeInBits * Scale; |
| 11722 | if (SrcEltBits < 32 && !Subtarget.hasBWI()) |
| 11723 | continue; |
| 11724 | |
| 11725 | |
| 11726 | |
| 11727 | unsigned NumHalfSrcElts = NumElts / Scale; |
| 11728 | unsigned NumSrcElts = 2 * NumHalfSrcElts; |
| 11729 | if (!isSequentialOrUndefInRange(Mask, 0, NumSrcElts, 0, Scale) || |
| 11730 | isUndefInRange(Mask, NumHalfSrcElts, NumHalfSrcElts)) |
| 11731 | continue; |
| 11732 | |
| 11733 | |
| 11734 | unsigned UpperElts = NumElts - NumSrcElts; |
| 11735 | if (UpperElts > 0 && |
| 11736 | !Zeroable.extractBits(UpperElts, NumSrcElts).isAllOnesValue()) |
| 11737 | continue; |
| 11738 | bool UndefUppers = |
| 11739 | UpperElts > 0 && isUndefInRange(Mask, NumSrcElts, UpperElts); |
| 11740 | |
| 11741 | |
| 11742 | |
| 11743 | MVT ConcatVT = MVT::getVectorVT(VT.getScalarType(), NumElts * 2); |
| 11744 | SDValue Src = DAG.getNode(ISD::CONCAT_VECTORS, DL, ConcatVT, V1, V2); |
| 11745 | |
| 11746 | MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); |
| 11747 | MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); |
| 11748 | Src = DAG.getBitcast(SrcVT, Src); |
| 11749 | return getAVX512TruncNode(DL, VT, Src, Subtarget, DAG, !UndefUppers); |
| 11750 | } |
| 11751 | |
| 11752 | return SDValue(); |
| 11753 | } |
| 11754 | |
| 11755 | |
| 11756 | |
| 11757 | |
| 11758 | |
| 11759 | |
| 11760 | |
| 11761 | |
| 11762 | |
| 11763 | |
| 11764 | |
| 11765 | |
| 11766 | |
| 11767 | |
| 11768 | |
| 11769 | |
| 11770 | |
| 11771 | |
| 11772 | |
| 11773 | |
| 11774 | |
| 11775 | |
| 11776 | static int canLowerByDroppingEvenElements(ArrayRef<int> Mask, |
| 11777 | bool IsSingleInput) { |
| 11778 | |
| 11779 | |
| 11780 | int ShuffleModulus = Mask.size() * (IsSingleInput ? 1 : 2); |
| 11781 | assert(isPowerOf2_32((uint32_t)ShuffleModulus) && |
| 11782 | "We should only be called with masks with a power-of-2 size!"); |
| 11783 | |
| 11784 | uint64_t ModMask = (uint64_t)ShuffleModulus - 1; |
| 11785 | |
| 11786 | |
| 11787 | |
| 11788 | |
| 11789 | bool ViableForN[3] = {true, true, true}; |
| 11790 | |
| 11791 | for (int i = 0, e = Mask.size(); i < e; ++i) { |
| 11792 | |
| 11793 | |
| 11794 | if (Mask[i] < 0) |
| 11795 | continue; |
| 11796 | |
| 11797 | bool IsAnyViable = false; |
| 11798 | for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) |
| 11799 | if (ViableForN[j]) { |
| 11800 | uint64_t N = j + 1; |
| 11801 | |
| 11802 | |
| 11803 | if ((uint64_t)Mask[i] == (((uint64_t)i << N) & ModMask)) |
| 11804 | IsAnyViable = true; |
| 11805 | else |
| 11806 | ViableForN[j] = false; |
| 11807 | } |
| 11808 | |
| 11809 | if (!IsAnyViable) |
| 11810 | break; |
| 11811 | } |
| 11812 | |
| 11813 | for (unsigned j = 0; j != array_lengthof(ViableForN); ++j) |
| 11814 | if (ViableForN[j]) |
| 11815 | return j + 1; |
| 11816 | |
| 11817 | |
| 11818 | return 0; |
| 11819 | } |
| 11820 | |
| 11821 | |
| 11822 | |
| 11823 | |
| 11824 | |
| 11825 | static bool matchShuffleWithPACK(MVT VT, MVT &SrcVT, SDValue &V1, SDValue &V2, |
| 11826 | unsigned &PackOpcode, ArrayRef<int> TargetMask, |
| 11827 | const SelectionDAG &DAG, |
| 11828 | const X86Subtarget &Subtarget, |
| 11829 | unsigned MaxStages = 1) { |
| 11830 | unsigned NumElts = VT.getVectorNumElements(); |
| 11831 | unsigned BitSize = VT.getScalarSizeInBits(); |
| 11832 | assert(0 < MaxStages && MaxStages <= 3 && (BitSize << MaxStages) <= 64 && |
| 11833 | "Illegal maximum compaction"); |
| 11834 | |
| 11835 | auto MatchPACK = [&](SDValue N1, SDValue N2, MVT PackVT) { |
| 11836 | unsigned NumSrcBits = PackVT.getScalarSizeInBits(); |
| 11837 | unsigned NumPackedBits = NumSrcBits - BitSize; |
| 11838 | N1 = peekThroughBitcasts(N1); |
| 11839 | N2 = peekThroughBitcasts(N2); |
| 11840 | unsigned NumBits1 = N1.getScalarValueSizeInBits(); |
| 11841 | unsigned NumBits2 = N2.getScalarValueSizeInBits(); |
| 11842 | bool IsZero1 = llvm::isNullOrNullSplat(N1, false); |
| 11843 | bool IsZero2 = llvm::isNullOrNullSplat(N2, false); |
| 11844 | if ((!N1.isUndef() && !IsZero1 && NumBits1 != NumSrcBits) || |
| 11845 | (!N2.isUndef() && !IsZero2 && NumBits2 != NumSrcBits)) |
| 11846 | return false; |
| 11847 | if (Subtarget.hasSSE41() || BitSize == 8) { |
| 11848 | APInt ZeroMask = APInt::getHighBitsSet(NumSrcBits, NumPackedBits); |
| 11849 | if ((N1.isUndef() || IsZero1 || DAG.MaskedValueIsZero(N1, ZeroMask)) && |
| 11850 | (N2.isUndef() || IsZero2 || DAG.MaskedValueIsZero(N2, ZeroMask))) { |
| 11851 | V1 = N1; |
| 11852 | V2 = N2; |
| 11853 | SrcVT = PackVT; |
| 11854 | PackOpcode = X86ISD::PACKUS; |
| 11855 | return true; |
| 11856 | } |
| 11857 | } |
| 11858 | bool IsAllOnes1 = llvm::isAllOnesOrAllOnesSplat(N1, false); |
| 11859 | bool IsAllOnes2 = llvm::isAllOnesOrAllOnesSplat(N2, false); |
| 11860 | if ((N1.isUndef() || IsZero1 || IsAllOnes1 || |
| 11861 | DAG.ComputeNumSignBits(N1) > NumPackedBits) && |
| 11862 | (N2.isUndef() || IsZero2 || IsAllOnes2 || |
| 11863 | DAG.ComputeNumSignBits(N2) > NumPackedBits)) { |
| 11864 | V1 = N1; |
| 11865 | V2 = N2; |
| 11866 | SrcVT = PackVT; |
| 11867 | PackOpcode = X86ISD::PACKSS; |
| 11868 | return true; |
| 11869 | } |
| 11870 | return false; |
| 11871 | }; |
| 11872 | |
| 11873 | |
| 11874 | for (unsigned NumStages = 1; NumStages <= MaxStages; ++NumStages) { |
| 11875 | MVT PackSVT = MVT::getIntegerVT(BitSize << NumStages); |
| 11876 | MVT PackVT = MVT::getVectorVT(PackSVT, NumElts >> NumStages); |
| 11877 | |
| 11878 | |
| 11879 | SmallVector<int, 32> BinaryMask; |
| 11880 | createPackShuffleMask(VT, BinaryMask, false, NumStages); |
| 11881 | if (isTargetShuffleEquivalent(VT, TargetMask, BinaryMask, V1, V2)) |
| 11882 | if (MatchPACK(V1, V2, PackVT)) |
| 11883 | return true; |
| 11884 | |
| 11885 | |
| 11886 | SmallVector<int, 32> UnaryMask; |
| 11887 | createPackShuffleMask(VT, UnaryMask, true, NumStages); |
| 11888 | if (isTargetShuffleEquivalent(VT, TargetMask, UnaryMask, V1)) |
| 11889 | if (MatchPACK(V1, V1, PackVT)) |
| 11890 | return true; |
| 11891 | } |
| 11892 | |
| 11893 | return false; |
| 11894 | } |
| 11895 | |
| 11896 | static SDValue lowerShuffleWithPACK(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, |
| 11897 | SDValue V1, SDValue V2, SelectionDAG &DAG, |
| 11898 | const X86Subtarget &Subtarget) { |
| 11899 | MVT PackVT; |
| 11900 | unsigned PackOpcode; |
| 11901 | unsigned SizeBits = VT.getSizeInBits(); |
| 11902 | unsigned EltBits = VT.getScalarSizeInBits(); |
| 11903 | unsigned MaxStages = Log2_32(64 / EltBits); |
| 11904 | if (!matchShuffleWithPACK(VT, PackVT, V1, V2, PackOpcode, Mask, DAG, |
| 11905 | Subtarget, MaxStages)) |
| 11906 | return SDValue(); |
| 11907 | |
| 11908 | unsigned CurrentEltBits = PackVT.getScalarSizeInBits(); |
| 11909 | unsigned NumStages = Log2_32(CurrentEltBits / EltBits); |
| 11910 | |
| 11911 | |
| 11912 | if (NumStages != 1 && SizeBits == 128 && Subtarget.hasVLX()) |
| 11913 | return SDValue(); |
| 11914 | |
| 11915 | |
| 11916 | |
| 11917 | unsigned MaxPackBits = 16; |
| 11918 | if (CurrentEltBits > 16 && |
| 11919 | (PackOpcode == X86ISD::PACKSS || Subtarget.hasSSE41())) |
| 11920 | MaxPackBits = 32; |
| 11921 | |
| 11922 | |
| 11923 | SDValue Res; |
| 11924 | for (unsigned i = 0; i != NumStages; ++i) { |
| 11925 | unsigned SrcEltBits = std::min(MaxPackBits, CurrentEltBits); |
| 11926 | unsigned NumSrcElts = SizeBits / SrcEltBits; |
| 11927 | MVT SrcSVT = MVT::getIntegerVT(SrcEltBits); |
| 11928 | MVT DstSVT = MVT::getIntegerVT(SrcEltBits / 2); |
| 11929 | MVT SrcVT = MVT::getVectorVT(SrcSVT, NumSrcElts); |
| 11930 | MVT DstVT = MVT::getVectorVT(DstSVT, NumSrcElts * 2); |
| 11931 | Res = DAG.getNode(PackOpcode, DL, DstVT, DAG.getBitcast(SrcVT, V1), |
| 11932 | DAG.getBitcast(SrcVT, V2)); |
| 11933 | V1 = V2 = Res; |
| 11934 | CurrentEltBits /= 2; |
| 11935 | } |
| 11936 | assert(Res && Res.getValueType() == VT && |
| 11937 | "Failed to lower compaction shuffle"); |
| 11938 | return Res; |
| 11939 | } |
| 11940 | |
| 11941 | |
| 11942 | |
| 11943 | |
| 11944 | |
| 11945 | static SDValue lowerShuffleAsBitMask(const SDLoc &DL, MVT VT, SDValue V1, |
| 11946 | SDValue V2, ArrayRef<int> Mask, |
| 11947 | const APInt &Zeroable, |
| 11948 | const X86Subtarget &Subtarget, |
| 11949 | SelectionDAG &DAG) { |
| 11950 | MVT MaskVT = VT; |
| 11951 | MVT EltVT = VT.getVectorElementType(); |
| 11952 | SDValue Zero, AllOnes; |
| 11953 | |
| 11954 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
| 11955 | EltVT = MVT::f64; |
| 11956 | MaskVT = MVT::getVectorVT(EltVT, Mask.size()); |
| 11957 | } |
| 11958 | |
| 11959 | MVT LogicVT = VT; |
| 11960 | if (EltVT == MVT::f32 || EltVT == MVT::f64) { |
| 11961 | Zero = DAG.getConstantFP(0.0, DL, EltVT); |
| 11962 | APFloat AllOnesValue = APFloat::getAllOnesValue( |
| 11963 | SelectionDAG::EVTToAPFloatSemantics(EltVT), EltVT.getSizeInBits()); |
| 11964 | AllOnes = DAG.getConstantFP(AllOnesValue, DL, EltVT); |
| 11965 | LogicVT = |
| 11966 | MVT::getVectorVT(EltVT == MVT::f64 ? MVT::i64 : MVT::i32, Mask.size()); |
| 11967 | } else { |
| 11968 | Zero = DAG.getConstant(0, DL, EltVT); |
| 11969 | AllOnes = DAG.getAllOnesConstant(DL, EltVT); |
| 11970 | } |
| 11971 | |
| 11972 | SmallVector<SDValue, 16> VMaskOps(Mask.size(), Zero); |
| 11973 | SDValue V; |
| 11974 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
| 11975 | if (Zeroable[i]) |
| 11976 | continue; |
| 11977 | if (Mask[i] % Size != i) |
| 11978 | return SDValue(); |
| 11979 | if (!V) |
| 11980 | V = Mask[i] < Size ? V1 : V2; |
| 11981 | else if (V != (Mask[i] < Size ? V1 : V2)) |
| 11982 | return SDValue(); |
| 11983 | |
| 11984 | VMaskOps[i] = AllOnes; |
| 11985 | } |
| 11986 | if (!V) |
| 11987 | return SDValue(); |
| 11988 | |
| 11989 | SDValue VMask = DAG.getBuildVector(MaskVT, DL, VMaskOps); |
| 11990 | VMask = DAG.getBitcast(LogicVT, VMask); |
| 11991 | V = DAG.getBitcast(LogicVT, V); |
| 11992 | SDValue And = DAG.getNode(ISD::AND, DL, LogicVT, V, VMask); |
| 11993 | return DAG.getBitcast(VT, And); |
| 11994 | } |
| 11995 | |
| 11996 | |
| 11997 | |
| 11998 | |
| 11999 | |
| 12000 | |
| 12001 | static SDValue lowerShuffleAsBitBlend(const SDLoc &DL, MVT VT, SDValue V1, |
| 12002 | SDValue V2, ArrayRef<int> Mask, |
| 12003 | SelectionDAG &DAG) { |
| 12004 | assert(VT.isInteger() && "Only supports integer vector types!"); |
| 12005 | MVT EltVT = VT.getVectorElementType(); |
| 12006 | SDValue Zero = DAG.getConstant(0, DL, EltVT); |
| 12007 | SDValue AllOnes = DAG.getAllOnesConstant(DL, EltVT); |
| 12008 | SmallVector<SDValue, 16> MaskOps; |
| 12009 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
| 12010 | if (Mask[i] >= 0 && Mask[i] != i && Mask[i] != i + Size) |
| 12011 | return SDValue(); |
| 12012 | MaskOps.push_back(Mask[i] < Size ? AllOnes : Zero); |
| 12013 | } |
| 12014 | |
| 12015 | SDValue V1Mask = DAG.getBuildVector(VT, DL, MaskOps); |
| 12016 | V1 = DAG.getNode(ISD::AND, DL, VT, V1, V1Mask); |
| 12017 | V2 = DAG.getNode(X86ISD::ANDNP, DL, VT, V1Mask, V2); |
| 12018 | return DAG.getNode(ISD::OR, DL, VT, V1, V2); |
| 12019 | } |
| 12020 | |
| 12021 | static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, |
| 12022 | SDValue PreservedSrc, |
| 12023 | const X86Subtarget &Subtarget, |
| 12024 | SelectionDAG &DAG); |
| 12025 | |
| 12026 | static bool matchShuffleAsBlend(SDValue V1, SDValue V2, |
| 12027 | MutableArrayRef<int> Mask, |
| 12028 | const APInt &Zeroable, bool &ForceV1Zero, |
| 12029 | bool &ForceV2Zero, uint64_t &BlendMask) { |
| 12030 | bool V1IsZeroOrUndef = |
| 12031 | V1.isUndef() || ISD::isBuildVectorAllZeros(V1.getNode()); |
| 12032 | bool V2IsZeroOrUndef = |
| 12033 | V2.isUndef() || ISD::isBuildVectorAllZeros(V2.getNode()); |
| 12034 | |
| 12035 | BlendMask = 0; |
| 12036 | ForceV1Zero = false, ForceV2Zero = false; |
| 12037 | assert(Mask.size() <= 64 && "Shuffle mask too big for blend mask"); |
| 12038 | |
| 12039 | |
| 12040 | |
| 12041 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
| 12042 | int M = Mask[i]; |
| 12043 | if (M == SM_SentinelUndef) |
| 12044 | continue; |
| 12045 | if (M == i) |
| 12046 | continue; |
| 12047 | if (M == i + Size) { |
| 12048 | BlendMask |= 1ull << i; |
| 12049 | continue; |
| 12050 | } |
| 12051 | if (Zeroable[i]) { |
| 12052 | if (V1IsZeroOrUndef) { |
| 12053 | ForceV1Zero = true; |
| 12054 | Mask[i] = i; |
| 12055 | continue; |
| 12056 | } |
| 12057 | if (V2IsZeroOrUndef) { |
| 12058 | ForceV2Zero = true; |
| 12059 | BlendMask |= 1ull << i; |
| 12060 | Mask[i] = i + Size; |
| 12061 | continue; |
| 12062 | } |
| 12063 | } |
| 12064 | return false; |
| 12065 | } |
| 12066 | return true; |
| 12067 | } |
| 12068 | |
| 12069 | static uint64_t scaleVectorShuffleBlendMask(uint64_t BlendMask, int Size, |
| 12070 | int Scale) { |
| 12071 | uint64_t ScaledMask = 0; |
| 12072 | for (int i = 0; i != Size; ++i) |
| 12073 | if (BlendMask & (1ull << i)) |
| 12074 | ScaledMask |= ((1ull << Scale) - 1) << (i * Scale); |
| 12075 | return ScaledMask; |
| 12076 | } |
| 12077 | |
| 12078 | |
| 12079 | |
| 12080 | |
| 12081 | |
| 12082 | |
| 12083 | |
| 12084 | static SDValue lowerShuffleAsBlend(const SDLoc &DL, MVT VT, SDValue V1, |
| 12085 | SDValue V2, ArrayRef<int> Original, |
| 12086 | const APInt &Zeroable, |
| 12087 | const X86Subtarget &Subtarget, |
| 12088 | SelectionDAG &DAG) { |
| 12089 | uint64_t BlendMask = 0; |
| 12090 | bool ForceV1Zero = false, ForceV2Zero = false; |
| 12091 | SmallVector<int, 64> Mask(Original.begin(), Original.end()); |
| 12092 | if (!matchShuffleAsBlend(V1, V2, Mask, Zeroable, ForceV1Zero, ForceV2Zero, |
| 12093 | BlendMask)) |
| 12094 | return SDValue(); |
| 12095 | |
| 12096 | |
| 12097 | if (ForceV1Zero) |
| 12098 | V1 = getZeroVector(VT, Subtarget, DAG, DL); |
| 12099 | if (ForceV2Zero) |
| 12100 | V2 = getZeroVector(VT, Subtarget, DAG, DL); |
| 12101 | |
| 12102 | switch (VT.SimpleTy) { |
| 12103 | case MVT::v4i64: |
| 12104 | case MVT::v8i32: |
| 12105 | assert(Subtarget.hasAVX2() && "256-bit integer blends require AVX2!"); |
| 12106 | LLVM_FALLTHROUGH; |
| 12107 | case MVT::v4f64: |
| 12108 | case MVT::v8f32: |
| 12109 | assert(Subtarget.hasAVX() && "256-bit float blends require AVX!"); |
| 12110 | LLVM_FALLTHROUGH; |
| 12111 | case MVT::v2f64: |
| 12112 | case MVT::v2i64: |
| 12113 | case MVT::v4f32: |
| 12114 | case MVT::v4i32: |
| 12115 | case MVT::v8i16: |
| 12116 | assert(Subtarget.hasSSE41() && "128-bit blends require SSE41!"); |
| 12117 | return DAG.getNode(X86ISD::BLENDI, DL, VT, V1, V2, |
| 12118 | DAG.getTargetConstant(BlendMask, DL, MVT::i8)); |
| 12119 | case MVT::v16i16: { |
| 12120 | assert(Subtarget.hasAVX2() && "v16i16 blends require AVX2!"); |
| 12121 | SmallVector<int, 8> RepeatedMask; |
| 12122 | if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { |
| 12123 | |
| 12124 | assert(RepeatedMask.size() == 8 && "Repeated mask size doesn't match!"); |
| 12125 | BlendMask = 0; |
| 12126 | for (int i = 0; i < 8; ++i) |
| 12127 | if (RepeatedMask[i] >= 8) |
| 12128 | BlendMask |= 1ull << i; |
| 12129 | return DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
| 12130 | DAG.getTargetConstant(BlendMask, DL, MVT::i8)); |
| 12131 | } |
| 12132 | |
| 12133 | |
| 12134 | |
| 12135 | uint64_t LoMask = BlendMask & 0xFF; |
| 12136 | uint64_t HiMask = (BlendMask >> 8) & 0xFF; |
| 12137 | if (LoMask == 0 || LoMask == 255 || HiMask == 0 || HiMask == 255) { |
| 12138 | SDValue Lo = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
| 12139 | DAG.getTargetConstant(LoMask, DL, MVT::i8)); |
| 12140 | SDValue Hi = DAG.getNode(X86ISD::BLENDI, DL, MVT::v16i16, V1, V2, |
| 12141 | DAG.getTargetConstant(HiMask, DL, MVT::i8)); |
| 12142 | return DAG.getVectorShuffle( |
| 12143 | MVT::v16i16, DL, Lo, Hi, |
| 12144 | {0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31}); |
| 12145 | } |
| 12146 | LLVM_FALLTHROUGH; |
| 12147 | } |
| 12148 | case MVT::v32i8: |
| 12149 | assert(Subtarget.hasAVX2() && "256-bit byte-blends require AVX2!"); |
| 12150 | LLVM_FALLTHROUGH; |
| 12151 | case MVT::v16i8: { |
| 12152 | assert(Subtarget.hasSSE41() && "128-bit byte-blends require SSE41!"); |
| 12153 | |
| 12154 | |
| 12155 | if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
| 12156 | Subtarget, DAG)) |
| 12157 | return Masked; |
| 12158 | |
| 12159 | if (Subtarget.hasBWI() && Subtarget.hasVLX()) { |
| 12160 | MVT IntegerType = |
| 12161 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
| 12162 | SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); |
| 12163 | return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); |
| 12164 | } |
| 12165 | |
| 12166 | |
| 12167 | if (Subtarget.hasVLX()) |
| 12168 | if (SDValue BitBlend = |
| 12169 | lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
| 12170 | return BitBlend; |
| 12171 | |
| 12172 | |
| 12173 | int Scale = VT.getScalarSizeInBits() / 8; |
| 12174 | |
| 12175 | |
| 12176 | |
| 12177 | MVT BlendVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
| 12178 | |
| 12179 | |
| 12180 | |
| 12181 | |
| 12182 | |
| 12183 | if (!ISD::isNormalLoad(V1.getNode()) && ISD::isNormalLoad(V2.getNode())) { |
| 12184 | ShuffleVectorSDNode::commuteMask(Mask); |
| 12185 | std::swap(V1, V2); |
| 12186 | } |
| 12187 | |
| 12188 | |
| 12189 | |
| 12190 | |
| 12191 | |
| 12192 | |
| 12193 | |
| 12194 | |
| 12195 | |
| 12196 | |
| 12197 | |
| 12198 | |
| 12199 | SmallVector<SDValue, 32> VSELECTMask; |
| 12200 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
| 12201 | for (int j = 0; j < Scale; ++j) |
| 12202 | VSELECTMask.push_back( |
| 12203 | Mask[i] < 0 ? DAG.getUNDEF(MVT::i8) |
| 12204 | : DAG.getConstant(Mask[i] < Size ? -1 : 0, DL, |
| 12205 | MVT::i8)); |
| 12206 | |
| 12207 | V1 = DAG.getBitcast(BlendVT, V1); |
| 12208 | V2 = DAG.getBitcast(BlendVT, V2); |
| 12209 | return DAG.getBitcast( |
| 12210 | VT, |
| 12211 | DAG.getSelect(DL, BlendVT, DAG.getBuildVector(BlendVT, DL, VSELECTMask), |
| 12212 | V1, V2)); |
| 12213 | } |
| 12214 | case MVT::v16f32: |
| 12215 | case MVT::v8f64: |
| 12216 | case MVT::v8i64: |
| 12217 | case MVT::v16i32: |
| 12218 | case MVT::v32i16: |
| 12219 | case MVT::v64i8: { |
| 12220 | |
| 12221 | bool OptForSize = DAG.shouldOptForSize(); |
| 12222 | if (!OptForSize) { |
| 12223 | if (SDValue Masked = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
| 12224 | Subtarget, DAG)) |
| 12225 | return Masked; |
| 12226 | } |
| 12227 | |
| 12228 | |
| 12229 | |
| 12230 | MVT IntegerType = |
| 12231 | MVT::getIntegerVT(std::max((int)VT.getVectorNumElements(), 8)); |
| 12232 | SDValue MaskNode = DAG.getConstant(BlendMask, DL, IntegerType); |
| 12233 | return getVectorMaskingNode(V2, MaskNode, V1, Subtarget, DAG); |
| 12234 | } |
| 12235 | default: |
| 12236 | llvm_unreachable("Not a supported integer vector type!"); |
| 12237 | } |
| 12238 | } |
| 12239 | |
| 12240 | |
| 12241 | |
| 12242 | |
| 12243 | |
| 12244 | |
| 12245 | static SDValue lowerShuffleAsBlendAndPermute(const SDLoc &DL, MVT VT, |
| 12246 | SDValue V1, SDValue V2, |
| 12247 | ArrayRef<int> Mask, |
| 12248 | SelectionDAG &DAG, |
| 12249 | bool ImmBlends = false) { |
| 12250 | |
| 12251 | |
| 12252 | SmallVector<int, 32> BlendMask(Mask.size(), -1); |
| 12253 | SmallVector<int, 32> PermuteMask(Mask.size(), -1); |
| 12254 | |
| 12255 | for (int i = 0, Size = Mask.size(); i < Size; ++i) { |
| 12256 | if (Mask[i] < 0) |
| 12257 | continue; |
| 12258 | |
| 12259 | assert(Mask[i] < Size * 2 && "Shuffle input is out of bounds."); |
| 12260 | |
| 12261 | if (BlendMask[Mask[i] % Size] < 0) |
| 12262 | BlendMask[Mask[i] % Size] = Mask[i]; |
| 12263 | else if (BlendMask[Mask[i] % Size] != Mask[i]) |
| 12264 | return SDValue(); |
| 12265 | |
| 12266 | PermuteMask[i] = Mask[i] % Size; |
| 12267 | } |
| 12268 | |
| 12269 | |
| 12270 | |
| 12271 | unsigned EltSize = VT.getScalarSizeInBits(); |
| 12272 | if (ImmBlends && EltSize == 8 && !canWidenShuffleElements(BlendMask)) |
| 12273 | return SDValue(); |
| 12274 | |
| 12275 | SDValue V = DAG.getVectorShuffle(VT, DL, V1, V2, BlendMask); |
| 12276 | return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), PermuteMask); |
| 12277 | } |
| 12278 | |
| 12279 | |
| 12280 | |
| 12281 | |
| 12282 | |
| 12283 | |
| 12284 | static SDValue lowerShuffleAsUNPCKAndPermute(const SDLoc &DL, MVT VT, |
| 12285 | SDValue V1, SDValue V2, |
| 12286 | ArrayRef<int> Mask, |
| 12287 | SelectionDAG &DAG) { |
| 12288 | int NumElts = Mask.size(); |
| 12289 | int NumLanes = VT.getSizeInBits() / 128; |
| 12290 | int NumLaneElts = NumElts / NumLanes; |
| 12291 | int NumHalfLaneElts = NumLaneElts / 2; |
| 12292 | |
| 12293 | bool MatchLo = true, MatchHi = true; |
| 12294 | SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; |
| 12295 | |
| 12296 | |
| 12297 | for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { |
| 12298 | for (int Elt = 0; Elt != NumLaneElts; ++Elt) { |
| 12299 | int M = Mask[Lane + Elt]; |
| 12300 | if (M < 0) |
| 12301 | continue; |
| 12302 | |
| 12303 | SDValue &Op = Ops[Elt & 1]; |
| 12304 | if (M < NumElts && (Op.isUndef() || Op == V1)) |
| 12305 | Op = V1; |
| 12306 | else if (NumElts <= M && (Op.isUndef() || Op == V2)) |
| 12307 | Op = V2; |
| 12308 | else |
| 12309 | return SDValue(); |
| 12310 | |
| 12311 | int Lo = Lane, Mid = Lane + NumHalfLaneElts, Hi = Lane + NumLaneElts; |
| 12312 | MatchLo &= isUndefOrInRange(M, Lo, Mid) || |
| 12313 | isUndefOrInRange(M, NumElts + Lo, NumElts + Mid); |
| 12314 | MatchHi &= isUndefOrInRange(M, Mid, Hi) || |
| 12315 | isUndefOrInRange(M, NumElts + Mid, NumElts + Hi); |
| 12316 | if (!MatchLo && !MatchHi) |
| 12317 | return SDValue(); |
| 12318 | } |
| 12319 | } |
| 12320 | assert((MatchLo ^ MatchHi) && "Failed to match UNPCKLO/UNPCKHI"); |
| 12321 | |
| 12322 | |
| 12323 | |
| 12324 | |
| 12325 | SmallVector<int, 32> PermuteMask(NumElts, -1); |
| 12326 | for (int Lane = 0; Lane != NumElts; Lane += NumLaneElts) { |
| 12327 | for (int Elt = 0; Elt != NumLaneElts; Elt += 2) { |
| 12328 | int M0 = Mask[Lane + Elt + 0]; |
| 12329 | int M1 = Mask[Lane + Elt + 1]; |
| 12330 | if (0 <= M0 && 0 <= M1 && |
| 12331 | (M0 % NumHalfLaneElts) != (M1 % NumHalfLaneElts)) |
| 12332 | return SDValue(); |
| 12333 | if (0 <= M0) |
| 12334 | PermuteMask[Lane + Elt + 0] = Lane + (2 * (M0 % NumHalfLaneElts)); |
| 12335 | if (0 <= M1) |
| 12336 | PermuteMask[Lane + Elt + 1] = Lane + (2 * (M1 % NumHalfLaneElts)) + 1; |
| 12337 | } |
| 12338 | } |
| 12339 | |
| 12340 | unsigned UnpckOp = MatchLo ? X86ISD::UNPCKL : X86ISD::UNPCKH; |
| 12341 | SDValue Unpck = DAG.getNode(UnpckOp, DL, VT, Ops); |
| 12342 | return DAG.getVectorShuffle(VT, DL, Unpck, DAG.getUNDEF(VT), PermuteMask); |
| 12343 | } |
| 12344 | |
| 12345 | |
| 12346 | |
| 12347 | static SDValue lowerShuffleAsByteRotateAndPermute( |
| 12348 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 12349 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 12350 | if ((VT.is128BitVector() && !Subtarget.hasSSSE3()) || |
| 12351 | (VT.is256BitVector() && !Subtarget.hasAVX2()) || |
| 12352 | (VT.is512BitVector() && !Subtarget.hasBWI())) |
| 12353 | return SDValue(); |
| 12354 | |
| 12355 | |
| 12356 | if (is128BitLaneCrossingShuffleMask(VT, Mask)) |
| 12357 | return SDValue(); |
| 12358 | |
| 12359 | int Scale = VT.getScalarSizeInBits() / 8; |
| 12360 | int NumLanes = VT.getSizeInBits() / 128; |
| 12361 | int NumElts = VT.getVectorNumElements(); |
| 12362 | int NumEltsPerLane = NumElts / NumLanes; |
| 12363 | |
| 12364 | |
| 12365 | bool Blend1 = true; |
| 12366 | bool Blend2 = true; |
| 12367 | std::pair<int, int> Range1 = std::make_pair(INT_MAX, INT_MIN); |
| 12368 | std::pair<int, int> Range2 = std::make_pair(INT_MAX, INT_MIN); |
| 12369 | for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { |
| 12370 | for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { |
| 12371 | int M = Mask[Lane + Elt]; |
| 12372 | if (M < 0) |
| 12373 | continue; |
| 12374 | if (M < NumElts) { |
| 12375 | Blend1 &= (M == (Lane + Elt)); |
| 12376 | assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); |
| 12377 | M = M % NumEltsPerLane; |
| 12378 | Range1.first = std::min(Range1.first, M); |
| 12379 | Range1.second = std::max(Range1.second, M); |
| 12380 | } else { |
| 12381 | M -= NumElts; |
| 12382 | Blend2 &= (M == (Lane + Elt)); |
| 12383 | assert(Lane <= M && M < (Lane + NumEltsPerLane) && "Out of range mask"); |
| 12384 | M = M % NumEltsPerLane; |
| 12385 | Range2.first = std::min(Range2.first, M); |
| 12386 | Range2.second = std::max(Range2.second, M); |
| 12387 | } |
| 12388 | } |
| 12389 | } |
| 12390 | |
| 12391 | |
| 12392 | |
| 12393 | |
| 12394 | if (!(0 <= Range1.first && Range1.second < NumEltsPerLane) || |
| 12395 | !(0 <= Range2.first && Range2.second < NumEltsPerLane)) |
| 12396 | return SDValue(); |
| 12397 | |
| 12398 | if (VT.getSizeInBits() > 128 && (Blend1 || Blend2)) |
| 12399 | return SDValue(); |
| 12400 | |
| 12401 | |
| 12402 | auto RotateAndPermute = [&](SDValue Lo, SDValue Hi, int RotAmt, int Ofs) { |
| 12403 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
| 12404 | SDValue Rotate = DAG.getBitcast( |
| 12405 | VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, DAG.getBitcast(ByteVT, Hi), |
| 12406 | DAG.getBitcast(ByteVT, Lo), |
| 12407 | DAG.getTargetConstant(Scale * RotAmt, DL, MVT::i8))); |
| 12408 | SmallVector<int, 64> PermMask(NumElts, SM_SentinelUndef); |
| 12409 | for (int Lane = 0; Lane != NumElts; Lane += NumEltsPerLane) { |
| 12410 | for (int Elt = 0; Elt != NumEltsPerLane; ++Elt) { |
| 12411 | int M = Mask[Lane + Elt]; |
| 12412 | if (M < 0) |
| 12413 | continue; |
| 12414 | if (M < NumElts) |
| 12415 | PermMask[Lane + Elt] = Lane + ((M + Ofs - RotAmt) % NumEltsPerLane); |
| 12416 | else |
| 12417 | PermMask[Lane + Elt] = Lane + ((M - Ofs - RotAmt) % NumEltsPerLane); |
| 12418 | } |
| 12419 | } |
| 12420 | return DAG.getVectorShuffle(VT, DL, Rotate, DAG.getUNDEF(VT), PermMask); |
| 12421 | }; |
| 12422 | |
| 12423 | |
| 12424 | if (Range2.second < Range1.first) |
| 12425 | return RotateAndPermute(V1, V2, Range1.first, 0); |
| 12426 | if (Range1.second < Range2.first) |
| 12427 | return RotateAndPermute(V2, V1, Range2.first, NumElts); |
| 12428 | return SDValue(); |
| 12429 | } |
| 12430 | |
| 12431 | |
| 12432 | |
| 12433 | |
| 12434 | |
| 12435 | |
| 12436 | |
| 12437 | |
| 12438 | static SDValue lowerShuffleAsDecomposedShuffleMerge( |
| 12439 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 12440 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 12441 | int NumElts = Mask.size(); |
| 12442 | int NumLanes = VT.getSizeInBits() / 128; |
| 12443 | int NumEltsPerLane = NumElts / NumLanes; |
| 12444 | |
| 12445 | |
| 12446 | |
| 12447 | bool IsAlternating = true; |
| 12448 | SmallVector<int, 32> V1Mask(NumElts, -1); |
| 12449 | SmallVector<int, 32> V2Mask(NumElts, -1); |
| 12450 | SmallVector<int, 32> FinalMask(NumElts, -1); |
| 12451 | for (int i = 0; i < NumElts; ++i) { |
| 12452 | int M = Mask[i]; |
| 12453 | if (M >= 0 && M < NumElts) { |
| 12454 | V1Mask[i] = M; |
| 12455 | FinalMask[i] = i; |
| 12456 | IsAlternating &= (i & 1) == 0; |
| 12457 | } else if (M >= NumElts) { |
| 12458 | V2Mask[i] = M - NumElts; |
| 12459 | FinalMask[i] = i + NumElts; |
| 12460 | IsAlternating &= (i & 1) == 1; |
| 12461 | } |
| 12462 | } |
| 12463 | |
| 12464 | |
| 12465 | |
| 12466 | |
| 12467 | |
| 12468 | |
| 12469 | if (!isNoopShuffleMask(V1Mask) && !isNoopShuffleMask(V2Mask)) { |
| 12470 | |
| 12471 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, |
| 12472 | DAG, true)) |
| 12473 | return BlendPerm; |
| 12474 | if (SDValue UnpackPerm = lowerShuffleAsUNPCKAndPermute(DL, VT, V1, V2, Mask, |
| 12475 | DAG)) |
| 12476 | return UnpackPerm; |
| 12477 | if (SDValue RotatePerm = lowerShuffleAsByteRotateAndPermute( |
| 12478 | DL, VT, V1, V2, Mask, Subtarget, DAG)) |
| 12479 | return RotatePerm; |
| 12480 | |
| 12481 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, VT, V1, V2, Mask, |
| 12482 | DAG)) |
| 12483 | return BlendPerm; |
| 12484 | } |
| 12485 | |
| 12486 | |
| 12487 | |
| 12488 | |
| 12489 | |
| 12490 | if (IsAlternating && VT.getScalarSizeInBits() < 32) { |
| 12491 | V1Mask.assign(NumElts, -1); |
| 12492 | V2Mask.assign(NumElts, -1); |
| 12493 | FinalMask.assign(NumElts, -1); |
| 12494 | for (int i = 0; i != NumElts; i += NumEltsPerLane) |
| 12495 | for (int j = 0; j != NumEltsPerLane; ++j) { |
| 12496 | int M = Mask[i + j]; |
| 12497 | if (M >= 0 && M < NumElts) { |
| 12498 | V1Mask[i + (j / 2)] = M; |
| 12499 | FinalMask[i + j] = i + (j / 2); |
| 12500 | } else if (M >= NumElts) { |
| 12501 | V2Mask[i + (j / 2)] = M - NumElts; |
| 12502 | FinalMask[i + j] = i + (j / 2) + NumElts; |
| 12503 | } |
| 12504 | } |
| 12505 | } |
| 12506 | |
| 12507 | V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); |
| 12508 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); |
| 12509 | return DAG.getVectorShuffle(VT, DL, V1, V2, FinalMask); |
| 12510 | } |
| 12511 | |
| 12512 | |
| 12513 | |
| 12514 | |
| 12515 | |
| 12516 | static int matchShuffleAsBitRotate(ArrayRef<int> Mask, int NumSubElts) { |
| 12517 | int NumElts = Mask.size(); |
| 12518 | assert((NumElts % NumSubElts) == 0 && "Illegal shuffle mask"); |
| 12519 | |
| 12520 | int RotateAmt = -1; |
| 12521 | for (int i = 0; i != NumElts; i += NumSubElts) { |
| 12522 | for (int j = 0; j != NumSubElts; ++j) { |
| 12523 | int M = Mask[i + j]; |
| 12524 | if (M < 0) |
| 12525 | continue; |
| 12526 | if (!isInRange(M, i, i + NumSubElts)) |
| 12527 | return -1; |
| 12528 | int Offset = (NumSubElts - (M - (i + j))) % NumSubElts; |
| 12529 | if (0 <= RotateAmt && Offset != RotateAmt) |
| 12530 | return -1; |
| 12531 | RotateAmt = Offset; |
| 12532 | } |
| 12533 | } |
| 12534 | return RotateAmt; |
| 12535 | } |
| 12536 | |
| 12537 | static int matchShuffleAsBitRotate(MVT &RotateVT, int EltSizeInBits, |
| 12538 | const X86Subtarget &Subtarget, |
| 12539 | ArrayRef<int> Mask) { |
| 12540 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
| 12541 | assert(EltSizeInBits < 64 && "Can't rotate 64-bit integers"); |
| 12542 | |
| 12543 | |
| 12544 | int MinSubElts = Subtarget.hasAVX512() ? std::max(32 / EltSizeInBits, 2) : 2; |
| 12545 | int MaxSubElts = 64 / EltSizeInBits; |
| 12546 | for (int NumSubElts = MinSubElts; NumSubElts <= MaxSubElts; NumSubElts *= 2) { |
| 12547 | int RotateAmt = matchShuffleAsBitRotate(Mask, NumSubElts); |
| 12548 | if (RotateAmt < 0) |
| 12549 | continue; |
| 12550 | |
| 12551 | int NumElts = Mask.size(); |
| 12552 | MVT RotateSVT = MVT::getIntegerVT(EltSizeInBits * NumSubElts); |
| 12553 | RotateVT = MVT::getVectorVT(RotateSVT, NumElts / NumSubElts); |
| 12554 | return RotateAmt * EltSizeInBits; |
| 12555 | } |
| 12556 | |
| 12557 | return -1; |
| 12558 | } |
| 12559 | |
| 12560 | |
| 12561 | static SDValue lowerShuffleAsBitRotate(const SDLoc &DL, MVT VT, SDValue V1, |
| 12562 | ArrayRef<int> Mask, |
| 12563 | const X86Subtarget &Subtarget, |
| 12564 | SelectionDAG &DAG) { |
| 12565 | |
| 12566 | |
| 12567 | bool IsLegal = |
| 12568 | (VT.is128BitVector() && Subtarget.hasXOP()) || Subtarget.hasAVX512(); |
| 12569 | if (!IsLegal && Subtarget.hasSSE3()) |
| 12570 | return SDValue(); |
| 12571 | |
| 12572 | MVT RotateVT; |
| 12573 | int RotateAmt = matchShuffleAsBitRotate(RotateVT, VT.getScalarSizeInBits(), |
| 12574 | Subtarget, Mask); |
| 12575 | if (RotateAmt < 0) |
| 12576 | return SDValue(); |
| 12577 | |
| 12578 | |
| 12579 | |
| 12580 | |
| 12581 | if (!IsLegal) { |
| 12582 | if ((RotateAmt % 16) == 0) |
| 12583 | return SDValue(); |
| 12584 | |
| 12585 | unsigned ShlAmt = RotateAmt; |
| 12586 | unsigned SrlAmt = RotateVT.getScalarSizeInBits() - RotateAmt; |
| 12587 | V1 = DAG.getBitcast(RotateVT, V1); |
| 12588 | SDValue SHL = DAG.getNode(X86ISD::VSHLI, DL, RotateVT, V1, |
| 12589 | DAG.getTargetConstant(ShlAmt, DL, MVT::i8)); |
| 12590 | SDValue SRL = DAG.getNode(X86ISD::VSRLI, DL, RotateVT, V1, |
| 12591 | DAG.getTargetConstant(SrlAmt, DL, MVT::i8)); |
| 12592 | SDValue Rot = DAG.getNode(ISD::OR, DL, RotateVT, SHL, SRL); |
| 12593 | return DAG.getBitcast(VT, Rot); |
| 12594 | } |
| 12595 | |
| 12596 | SDValue Rot = |
| 12597 | DAG.getNode(X86ISD::VROTLI, DL, RotateVT, DAG.getBitcast(RotateVT, V1), |
| 12598 | DAG.getTargetConstant(RotateAmt, DL, MVT::i8)); |
| 12599 | return DAG.getBitcast(VT, Rot); |
| 12600 | } |
| 12601 | |
| 12602 | |
| 12603 | |
| 12604 | |
| 12605 | static int matchShuffleAsElementRotate(SDValue &V1, SDValue &V2, |
| 12606 | ArrayRef<int> Mask) { |
| 12607 | int NumElts = Mask.size(); |
| 12608 | |
| 12609 | |
| 12610 | |
| 12611 | |
| 12612 | |
| 12613 | |
| 12614 | |
| 12615 | |
| 12616 | int Rotation = 0; |
| 12617 | SDValue Lo, Hi; |
| 12618 | for (int i = 0; i < NumElts; ++i) { |
| 12619 | int M = Mask[i]; |
| 12620 | assert((M == SM_SentinelUndef || (0 <= M && M < (2*NumElts))) && |
| 12621 | "Unexpected mask index."); |
| 12622 | if (M < 0) |
| 12623 | continue; |
| 12624 | |
| 12625 | |
| 12626 | int StartIdx = i - (M % NumElts); |
| 12627 | if (StartIdx == 0) |
| 12628 | |
| 12629 | return -1; |
| 12630 | |
| 12631 | |
| 12632 | |
| 12633 | |
| 12634 | int CandidateRotation = StartIdx < 0 ? -StartIdx : NumElts - StartIdx; |
| 12635 | |
| 12636 | if (Rotation == 0) |
| 12637 | Rotation = CandidateRotation; |
| 12638 | else if (Rotation != CandidateRotation) |
| 12639 | |
| 12640 | return -1; |
| 12641 | |
| 12642 | |
| 12643 | SDValue MaskV = M < NumElts ? V1 : V2; |
| 12644 | |
| 12645 | |
| 12646 | |
| 12647 | |
| 12648 | SDValue &TargetV = StartIdx < 0 ? Hi : Lo; |
| 12649 | |
| 12650 | |
| 12651 | |
| 12652 | if (!TargetV) |
| 12653 | TargetV = MaskV; |
| 12654 | else if (TargetV != MaskV) |
| 12655 | |
| 12656 | |
| 12657 | return -1; |
| 12658 | } |
| 12659 | |
| 12660 | |
| 12661 | assert(Rotation != 0 && "Failed to locate a viable rotation!"); |
| 12662 | assert((Lo || Hi) && "Failed to find a rotated input vector!"); |
| 12663 | if (!Lo) |
| 12664 | Lo = Hi; |
| 12665 | else if (!Hi) |
| 12666 | Hi = Lo; |
| 12667 | |
| 12668 | V1 = Lo; |
| 12669 | V2 = Hi; |
| 12670 | |
| 12671 | return Rotation; |
| 12672 | } |
| 12673 | |
| 12674 | |
| 12675 | |
| 12676 | |
| 12677 | |
| 12678 | |
| 12679 | |
| 12680 | |
| 12681 | |
| 12682 | |
| 12683 | |
| 12684 | |
| 12685 | |
| 12686 | |
| 12687 | |
| 12688 | |
| 12689 | |
| 12690 | static int matchShuffleAsByteRotate(MVT VT, SDValue &V1, SDValue &V2, |
| 12691 | ArrayRef<int> Mask) { |
| 12692 | |
| 12693 | if (isAnyZero(Mask)) |
| 12694 | return -1; |
| 12695 | |
| 12696 | |
| 12697 | SmallVector<int, 16> RepeatedMask; |
| 12698 | if (!is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedMask)) |
| 12699 | return -1; |
| 12700 | |
| 12701 | int Rotation = matchShuffleAsElementRotate(V1, V2, RepeatedMask); |
| 12702 | if (Rotation <= 0) |
| 12703 | return -1; |
| 12704 | |
| 12705 | |
| 12706 | |
| 12707 | int NumElts = RepeatedMask.size(); |
| 12708 | int Scale = 16 / NumElts; |
| 12709 | return Rotation * Scale; |
| 12710 | } |
| 12711 | |
| 12712 | static SDValue lowerShuffleAsByteRotate(const SDLoc &DL, MVT VT, SDValue V1, |
| 12713 | SDValue V2, ArrayRef<int> Mask, |
| 12714 | const X86Subtarget &Subtarget, |
| 12715 | SelectionDAG &DAG) { |
| 12716 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
| 12717 | |
| 12718 | SDValue Lo = V1, Hi = V2; |
| 12719 | int ByteRotation = matchShuffleAsByteRotate(VT, Lo, Hi, Mask); |
| 12720 | if (ByteRotation <= 0) |
| 12721 | return SDValue(); |
| 12722 | |
| 12723 | |
| 12724 | |
| 12725 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
| 12726 | Lo = DAG.getBitcast(ByteVT, Lo); |
| 12727 | Hi = DAG.getBitcast(ByteVT, Hi); |
| 12728 | |
| 12729 | |
| 12730 | if (Subtarget.hasSSSE3()) { |
| 12731 | assert((!VT.is512BitVector() || Subtarget.hasBWI()) && |
| 12732 | "512-bit PALIGNR requires BWI instructions"); |
| 12733 | return DAG.getBitcast( |
| 12734 | VT, DAG.getNode(X86ISD::PALIGNR, DL, ByteVT, Lo, Hi, |
| 12735 | DAG.getTargetConstant(ByteRotation, DL, MVT::i8))); |
| 12736 | } |
| 12737 | |
| 12738 | assert(VT.is128BitVector() && |
| 12739 | "Rotate-based lowering only supports 128-bit lowering!"); |
| 12740 | assert(Mask.size() <= 16 && |
| 12741 | "Can shuffle at most 16 bytes in a 128-bit vector!"); |
| 12742 | assert(ByteVT == MVT::v16i8 && |
| 12743 | "SSE2 rotate lowering only needed for v16i8!"); |
| 12744 | |
| 12745 | |
| 12746 | int LoByteShift = 16 - ByteRotation; |
| 12747 | int HiByteShift = ByteRotation; |
| 12748 | |
| 12749 | SDValue LoShift = |
| 12750 | DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Lo, |
| 12751 | DAG.getTargetConstant(LoByteShift, DL, MVT::i8)); |
| 12752 | SDValue HiShift = |
| 12753 | DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Hi, |
| 12754 | DAG.getTargetConstant(HiByteShift, DL, MVT::i8)); |
| 12755 | return DAG.getBitcast(VT, |
| 12756 | DAG.getNode(ISD::OR, DL, MVT::v16i8, LoShift, HiShift)); |
| 12757 | } |
| 12758 | |
| 12759 | |
| 12760 | |
| 12761 | |
| 12762 | |
| 12763 | |
| 12764 | |
| 12765 | |
| 12766 | |
| 12767 | |
| 12768 | |
| 12769 | static SDValue lowerShuffleAsVALIGN(const SDLoc &DL, MVT VT, SDValue V1, |
| 12770 | SDValue V2, ArrayRef<int> Mask, |
| 12771 | const X86Subtarget &Subtarget, |
| 12772 | SelectionDAG &DAG) { |
| 12773 | assert((VT.getScalarType() == MVT::i32 || VT.getScalarType() == MVT::i64) && |
| 12774 | "Only 32-bit and 64-bit elements are supported!"); |
| 12775 | |
| 12776 | |
| 12777 | assert((Subtarget.hasVLX() || (!VT.is128BitVector() && !VT.is256BitVector())) |
| 12778 | && "VLX required for 128/256-bit vectors"); |
| 12779 | |
| 12780 | SDValue Lo = V1, Hi = V2; |
| 12781 | int Rotation = matchShuffleAsElementRotate(Lo, Hi, Mask); |
| 12782 | if (Rotation <= 0) |
| 12783 | return SDValue(); |
| 12784 | |
| 12785 | return DAG.getNode(X86ISD::VALIGN, DL, VT, Lo, Hi, |
| 12786 | DAG.getTargetConstant(Rotation, DL, MVT::i8)); |
| 12787 | } |
| 12788 | |
| 12789 | |
| 12790 | static SDValue lowerShuffleAsByteShiftMask(const SDLoc &DL, MVT VT, SDValue V1, |
| 12791 | SDValue V2, ArrayRef<int> Mask, |
| 12792 | const APInt &Zeroable, |
| 12793 | const X86Subtarget &Subtarget, |
| 12794 | SelectionDAG &DAG) { |
| 12795 | assert(!isNoopShuffleMask(Mask) && "We shouldn't lower no-op shuffles!"); |
| 12796 | assert(VT.is128BitVector() && "Only 128-bit vectors supported"); |
| 12797 | |
| 12798 | |
| 12799 | |
| 12800 | unsigned ZeroLo = Zeroable.countTrailingOnes(); |
| 12801 | unsigned ZeroHi = Zeroable.countLeadingOnes(); |
| 12802 | if (!ZeroLo && !ZeroHi) |
| 12803 | return SDValue(); |
| 12804 | |
| 12805 | unsigned NumElts = Mask.size(); |
| 12806 | unsigned Len = NumElts - (ZeroLo + ZeroHi); |
| 12807 | if (!isSequentialOrUndefInRange(Mask, ZeroLo, Len, Mask[ZeroLo])) |
| 12808 | return SDValue(); |
| 12809 | |
| 12810 | unsigned Scale = VT.getScalarSizeInBits() / 8; |
| 12811 | ArrayRef<int> StubMask = Mask.slice(ZeroLo, Len); |
| 12812 | if (!isUndefOrInRange(StubMask, 0, NumElts) && |
| 12813 | !isUndefOrInRange(StubMask, NumElts, 2 * NumElts)) |
| 12814 | return SDValue(); |
| 12815 | |
| 12816 | SDValue Res = Mask[ZeroLo] < (int)NumElts ? V1 : V2; |
| 12817 | Res = DAG.getBitcast(MVT::v16i8, Res); |
| 12818 | |
| 12819 | |
| 12820 | |
| 12821 | |
| 12822 | |
| 12823 | |
| 12824 | if (ZeroLo == 0) { |
| 12825 | unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); |
| 12826 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
| 12827 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
| 12828 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
| 12829 | DAG.getTargetConstant(Scale * ZeroHi, DL, MVT::i8)); |
| 12830 | } else if (ZeroHi == 0) { |
| 12831 | unsigned Shift = Mask[ZeroLo] % NumElts; |
| 12832 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
| 12833 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
| 12834 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
| 12835 | DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); |
| 12836 | } else if (!Subtarget.hasSSSE3()) { |
| 12837 | |
| 12838 | |
| 12839 | |
| 12840 | unsigned Shift = (NumElts - 1) - (Mask[ZeroLo + Len - 1] % NumElts); |
| 12841 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
| 12842 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
| 12843 | Shift += Mask[ZeroLo] % NumElts; |
| 12844 | Res = DAG.getNode(X86ISD::VSRLDQ, DL, MVT::v16i8, Res, |
| 12845 | DAG.getTargetConstant(Scale * Shift, DL, MVT::i8)); |
| 12846 | Res = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, Res, |
| 12847 | DAG.getTargetConstant(Scale * ZeroLo, DL, MVT::i8)); |
| 12848 | } else |
| 12849 | return SDValue(); |
| 12850 | |
| 12851 | return DAG.getBitcast(VT, Res); |
| 12852 | } |
| 12853 | |
| 12854 | |
| 12855 | |
| 12856 | |
| 12857 | |
| 12858 | |
| 12859 | |
| 12860 | |
| 12861 | |
| 12862 | |
| 12863 | |
| 12864 | |
| 12865 | |
| 12866 | |
| 12867 | |
| 12868 | |
| 12869 | |
| 12870 | |
| 12871 | |
| 12872 | |
| 12873 | |
| 12874 | |
| 12875 | |
| 12876 | |
| 12877 | static int matchShuffleAsShift(MVT &ShiftVT, unsigned &Opcode, |
| 12878 | unsigned ScalarSizeInBits, ArrayRef<int> Mask, |
| 12879 | int MaskOffset, const APInt &Zeroable, |
| 12880 | const X86Subtarget &Subtarget) { |
| 12881 | int Size = Mask.size(); |
| 12882 | unsigned SizeInBits = Size * ScalarSizeInBits; |
| 12883 | |
| 12884 | auto CheckZeros = [&](int Shift, int Scale, bool Left) { |
| 12885 | for (int i = 0; i < Size; i += Scale) |
| 12886 | for (int j = 0; j < Shift; ++j) |
| 12887 | if (!Zeroable[i + j + (Left ? 0 : (Scale - Shift))]) |
| 12888 | return false; |
| 12889 | |
| 12890 | return true; |
| 12891 | }; |
| 12892 | |
| 12893 | auto MatchShift = [&](int Shift, int Scale, bool Left) { |
| 12894 | for (int i = 0; i != Size; i += Scale) { |
| 12895 | unsigned Pos = Left ? i + Shift : i; |
| 12896 | unsigned Low = Left ? i : i + Shift; |
| 12897 | unsigned Len = Scale - Shift; |
| 12898 | if (!isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset)) |
| 12899 | return -1; |
| 12900 | } |
| 12901 | |
| 12902 | int ShiftEltBits = ScalarSizeInBits * Scale; |
| 12903 | bool ByteShift = ShiftEltBits > 64; |
| 12904 | Opcode = Left ? (ByteShift ? X86ISD::VSHLDQ : X86ISD::VSHLI) |
| 12905 | : (ByteShift ? X86ISD::VSRLDQ : X86ISD::VSRLI); |
| 12906 | int ShiftAmt = Shift * ScalarSizeInBits / (ByteShift ? 8 : 1); |
| 12907 | |
| 12908 | |
| 12909 | |
| 12910 | Scale = ByteShift ? Scale / 2 : Scale; |
| 12911 | |
| 12912 | |
| 12913 | MVT ShiftSVT = MVT::getIntegerVT(ScalarSizeInBits * Scale); |
| 12914 | ShiftVT = ByteShift ? MVT::getVectorVT(MVT::i8, SizeInBits / 8) |
| 12915 | : MVT::getVectorVT(ShiftSVT, Size / Scale); |
| 12916 | return (int)ShiftAmt; |
| 12917 | }; |
| 12918 | |
| 12919 | |
| 12920 | |
| 12921 | |
| 12922 | |
| 12923 | |
| 12924 | |
| 12925 | unsigned MaxWidth = ((SizeInBits == 512) && !Subtarget.hasBWI() ? 64 : 128); |
| 12926 | for (int Scale = 2; Scale * ScalarSizeInBits <= MaxWidth; Scale *= 2) |
| 12927 | for (int Shift = 1; Shift != Scale; ++Shift) |
| 12928 | for (bool Left : {true, false}) |
| 12929 | if (CheckZeros(Shift, Scale, Left)) { |
| 12930 | int ShiftAmt = MatchShift(Shift, Scale, Left); |
| 12931 | if (0 < ShiftAmt) |
| 12932 | return ShiftAmt; |
| 12933 | } |
| 12934 | |
| 12935 | |
| 12936 | return -1; |
| 12937 | } |
| 12938 | |
| 12939 | static SDValue lowerShuffleAsShift(const SDLoc &DL, MVT VT, SDValue V1, |
| 12940 | SDValue V2, ArrayRef<int> Mask, |
| 12941 | const APInt &Zeroable, |
| 12942 | const X86Subtarget &Subtarget, |
| 12943 | SelectionDAG &DAG) { |
| 12944 | int Size = Mask.size(); |
| 12945 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
| 12946 | |
| 12947 | MVT ShiftVT; |
| 12948 | SDValue V = V1; |
| 12949 | unsigned Opcode; |
| 12950 | |
| 12951 | |
| 12952 | int ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), |
| 12953 | Mask, 0, Zeroable, Subtarget); |
| 12954 | |
| 12955 | |
| 12956 | if (ShiftAmt < 0) { |
| 12957 | ShiftAmt = matchShuffleAsShift(ShiftVT, Opcode, VT.getScalarSizeInBits(), |
| 12958 | Mask, Size, Zeroable, Subtarget); |
| 12959 | V = V2; |
| 12960 | } |
| 12961 | |
| 12962 | if (ShiftAmt < 0) |
| 12963 | return SDValue(); |
| 12964 | |
| 12965 | assert(DAG.getTargetLoweringInfo().isTypeLegal(ShiftVT) && |
| 12966 | "Illegal integer vector type"); |
| 12967 | V = DAG.getBitcast(ShiftVT, V); |
| 12968 | V = DAG.getNode(Opcode, DL, ShiftVT, V, |
| 12969 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
| 12970 | return DAG.getBitcast(VT, V); |
| 12971 | } |
| 12972 | |
| 12973 | |
| 12974 | |
| 12975 | static bool matchShuffleAsEXTRQ(MVT VT, SDValue &V1, SDValue &V2, |
| 12976 | ArrayRef<int> Mask, uint64_t &BitLen, |
| 12977 | uint64_t &BitIdx, const APInt &Zeroable) { |
| 12978 | int Size = Mask.size(); |
| 12979 | int HalfSize = Size / 2; |
| 12980 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
| 12981 | assert(!Zeroable.isAllOnesValue() && "Fully zeroable shuffle mask"); |
| 12982 | |
| 12983 | |
| 12984 | if (!isUndefUpperHalf(Mask)) |
| 12985 | return false; |
| 12986 | |
| 12987 | |
| 12988 | |
| 12989 | int Len = HalfSize; |
| 12990 | for (; Len > 0; --Len) |
| 12991 | if (!Zeroable[Len - 1]) |
| 12992 | break; |
| 12993 | assert(Len > 0 && "Zeroable shuffle mask"); |
| 12994 | |
| 12995 | |
| 12996 | SDValue Src; |
| 12997 | int Idx = -1; |
| 12998 | for (int i = 0; i != Len; ++i) { |
| 12999 | int M = Mask[i]; |
| 13000 | if (M == SM_SentinelUndef) |
| 13001 | continue; |
| 13002 | SDValue &V = (M < Size ? V1 : V2); |
| 13003 | M = M % Size; |
| 13004 | |
| 13005 | |
| 13006 | |
| 13007 | if (i > M || M >= HalfSize) |
| 13008 | return false; |
| 13009 | |
| 13010 | if (Idx < 0 || (Src == V && Idx == (M - i))) { |
| 13011 | Src = V; |
| 13012 | Idx = M - i; |
| 13013 | continue; |
| 13014 | } |
| 13015 | return false; |
| 13016 | } |
| 13017 | |
| 13018 | if (!Src || Idx < 0) |
| 13019 | return false; |
| 13020 | |
| 13021 | assert((Idx + Len) <= HalfSize && "Illegal extraction mask"); |
| 13022 | BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; |
| 13023 | BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; |
| 13024 | V1 = Src; |
| 13025 | return true; |
| 13026 | } |
| 13027 | |
| 13028 | |
| 13029 | |
| 13030 | |
| 13031 | static bool matchShuffleAsINSERTQ(MVT VT, SDValue &V1, SDValue &V2, |
| 13032 | ArrayRef<int> Mask, uint64_t &BitLen, |
| 13033 | uint64_t &BitIdx) { |
| 13034 | int Size = Mask.size(); |
| 13035 | int HalfSize = Size / 2; |
| 13036 | assert(Size == (int)VT.getVectorNumElements() && "Unexpected mask size"); |
| 13037 | |
| 13038 | |
| 13039 | if (!isUndefUpperHalf(Mask)) |
| 13040 | return false; |
| 13041 | |
| 13042 | for (int Idx = 0; Idx != HalfSize; ++Idx) { |
| 13043 | SDValue Base; |
| 13044 | |
| 13045 | |
| 13046 | if (isUndefInRange(Mask, 0, Idx)) { |
| 13047 | |
| 13048 | } else if (isSequentialOrUndefInRange(Mask, 0, Idx, 0)) { |
| 13049 | Base = V1; |
| 13050 | } else if (isSequentialOrUndefInRange(Mask, 0, Idx, Size)) { |
| 13051 | Base = V2; |
| 13052 | } else { |
| 13053 | continue; |
| 13054 | } |
| 13055 | |
| 13056 | |
| 13057 | |
| 13058 | for (int Hi = Idx + 1; Hi <= HalfSize; ++Hi) { |
| 13059 | SDValue Insert; |
| 13060 | int Len = Hi - Idx; |
| 13061 | |
| 13062 | |
| 13063 | if (isSequentialOrUndefInRange(Mask, Idx, Len, 0)) { |
| 13064 | Insert = V1; |
| 13065 | } else if (isSequentialOrUndefInRange(Mask, Idx, Len, Size)) { |
| 13066 | Insert = V2; |
| 13067 | } else { |
| 13068 | continue; |
| 13069 | } |
| 13070 | |
| 13071 | |
| 13072 | if (isUndefInRange(Mask, Hi, HalfSize - Hi)) { |
| 13073 | |
| 13074 | } else if ((!Base || (Base == V1)) && |
| 13075 | isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, Hi)) { |
| 13076 | Base = V1; |
| 13077 | } else if ((!Base || (Base == V2)) && |
| 13078 | isSequentialOrUndefInRange(Mask, Hi, HalfSize - Hi, |
| 13079 | Size + Hi)) { |
| 13080 | Base = V2; |
| 13081 | } else { |
| 13082 | continue; |
| 13083 | } |
| 13084 | |
| 13085 | BitLen = (Len * VT.getScalarSizeInBits()) & 0x3f; |
| 13086 | BitIdx = (Idx * VT.getScalarSizeInBits()) & 0x3f; |
| 13087 | V1 = Base; |
| 13088 | V2 = Insert; |
| 13089 | return true; |
| 13090 | } |
| 13091 | } |
| 13092 | |
| 13093 | return false; |
| 13094 | } |
| 13095 | |
| 13096 | |
| 13097 | static SDValue lowerShuffleWithSSE4A(const SDLoc &DL, MVT VT, SDValue V1, |
| 13098 | SDValue V2, ArrayRef<int> Mask, |
| 13099 | const APInt &Zeroable, SelectionDAG &DAG) { |
| 13100 | uint64_t BitLen, BitIdx; |
| 13101 | if (matchShuffleAsEXTRQ(VT, V1, V2, Mask, BitLen, BitIdx, Zeroable)) |
| 13102 | return DAG.getNode(X86ISD::EXTRQI, DL, VT, V1, |
| 13103 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
| 13104 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
| 13105 | |
| 13106 | if (matchShuffleAsINSERTQ(VT, V1, V2, Mask, BitLen, BitIdx)) |
| 13107 | return DAG.getNode(X86ISD::INSERTQI, DL, VT, V1 ? V1 : DAG.getUNDEF(VT), |
| 13108 | V2 ? V2 : DAG.getUNDEF(VT), |
| 13109 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
| 13110 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
| 13111 | |
| 13112 | return SDValue(); |
| 13113 | } |
| 13114 | |
| 13115 | |
| 13116 | |
| 13117 | |
| 13118 | |
| 13119 | |
| 13120 | |
| 13121 | |
| 13122 | |
| 13123 | |
| 13124 | static SDValue lowerShuffleAsSpecificZeroOrAnyExtend( |
| 13125 | const SDLoc &DL, MVT VT, int Scale, int Offset, bool AnyExt, SDValue InputV, |
| 13126 | ArrayRef<int> Mask, const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 13127 | assert(Scale > 1 && "Need a scale to extend."); |
| 13128 | int EltBits = VT.getScalarSizeInBits(); |
| 13129 | int NumElements = VT.getVectorNumElements(); |
| 13130 | int NumEltsPerLane = 128 / EltBits; |
| 13131 | int OffsetLane = Offset / NumEltsPerLane; |
| 13132 | assert((EltBits == 8 || EltBits == 16 || EltBits == 32) && |
| 13133 | "Only 8, 16, and 32 bit elements can be extended."); |
| 13134 | assert(Scale * EltBits <= 64 && "Cannot zero extend past 64 bits."); |
| 13135 | assert(0 <= Offset && "Extension offset must be positive."); |
| 13136 | assert((Offset < NumEltsPerLane || Offset % NumEltsPerLane == 0) && |
| 13137 | "Extension offset must be in the first lane or start an upper lane."); |
| 13138 | |
| 13139 | |
| 13140 | auto SafeOffset = [&](int Idx) { |
| 13141 | return OffsetLane == (Idx / NumEltsPerLane); |
| 13142 | }; |
| 13143 | |
| 13144 | |
| 13145 | auto ShuffleOffset = [&](SDValue V) { |
| 13146 | if (!Offset) |
| 13147 | return V; |
| 13148 | |
| 13149 | SmallVector<int, 8> ShMask((unsigned)NumElements, -1); |
| 13150 | for (int i = 0; i * Scale < NumElements; ++i) { |
| 13151 | int SrcIdx = i + Offset; |
| 13152 | ShMask[i] = SafeOffset(SrcIdx) ? SrcIdx : -1; |
| 13153 | } |
| 13154 | return DAG.getVectorShuffle(VT, DL, V, DAG.getUNDEF(VT), ShMask); |
| 13155 | }; |
| 13156 | |
| 13157 | |
| 13158 | |
| 13159 | if (Subtarget.hasSSE41()) { |
| 13160 | |
| 13161 | |
| 13162 | if (Offset && Scale == 2 && VT.is128BitVector()) |
| 13163 | return SDValue(); |
| 13164 | MVT ExtVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Scale), |
| 13165 | NumElements / Scale); |
| 13166 | InputV = ShuffleOffset(InputV); |
| 13167 | InputV = getEXTEND_VECTOR_INREG(AnyExt ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND, |
| 13168 | DL, ExtVT, InputV, DAG); |
| 13169 | return DAG.getBitcast(VT, InputV); |
| 13170 | } |
| 13171 | |
| 13172 | assert(VT.is128BitVector() && "Only 128-bit vectors can be extended."); |
| 13173 | |
| 13174 | |
| 13175 | |
| 13176 | if (AnyExt && EltBits == 32) { |
| 13177 | int PSHUFDMask[4] = {Offset, -1, SafeOffset(Offset + 1) ? Offset + 1 : -1, |
| 13178 | -1}; |
| 13179 | return DAG.getBitcast( |
| 13180 | VT, DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, |
| 13181 | DAG.getBitcast(MVT::v4i32, InputV), |
| 13182 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
| 13183 | } |
| 13184 | if (AnyExt && EltBits == 16 && Scale > 2) { |
| 13185 | int PSHUFDMask[4] = {Offset / 2, -1, |
| 13186 | SafeOffset(Offset + 1) ? (Offset + 1) / 2 : -1, -1}; |
| 13187 | InputV = DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, |
| 13188 | DAG.getBitcast(MVT::v4i32, InputV), |
| 13189 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); |
| 13190 | int PSHUFWMask[4] = {1, -1, -1, -1}; |
| 13191 | unsigned OddEvenOp = (Offset & 1) ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; |
| 13192 | return DAG.getBitcast( |
| 13193 | VT, DAG.getNode(OddEvenOp, DL, MVT::v8i16, |
| 13194 | DAG.getBitcast(MVT::v8i16, InputV), |
| 13195 | getV4X86ShuffleImm8ForMask(PSHUFWMask, DL, DAG))); |
| 13196 | } |
| 13197 | |
| 13198 | |
| 13199 | |
| 13200 | if ((Scale * EltBits) == 64 && EltBits < 32 && Subtarget.hasSSE4A()) { |
| 13201 | assert(NumElements == (int)Mask.size() && "Unexpected shuffle mask size!"); |
| 13202 | assert(VT.is128BitVector() && "Unexpected vector width!"); |
| 13203 | |
| 13204 | int LoIdx = Offset * EltBits; |
| 13205 | SDValue Lo = DAG.getBitcast( |
| 13206 | MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, |
| 13207 | DAG.getTargetConstant(EltBits, DL, MVT::i8), |
| 13208 | DAG.getTargetConstant(LoIdx, DL, MVT::i8))); |
| 13209 | |
| 13210 | if (isUndefUpperHalf(Mask) || !SafeOffset(Offset + 1)) |
| 13211 | return DAG.getBitcast(VT, Lo); |
| 13212 | |
| 13213 | int HiIdx = (Offset + 1) * EltBits; |
| 13214 | SDValue Hi = DAG.getBitcast( |
| 13215 | MVT::v2i64, DAG.getNode(X86ISD::EXTRQI, DL, VT, InputV, |
| 13216 | DAG.getTargetConstant(EltBits, DL, MVT::i8), |
| 13217 | DAG.getTargetConstant(HiIdx, DL, MVT::i8))); |
| 13218 | return DAG.getBitcast(VT, |
| 13219 | DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, Lo, Hi)); |
| 13220 | } |
| 13221 | |
| 13222 | |
| 13223 | |
| 13224 | |
| 13225 | if (Scale > 4 && EltBits == 8 && Subtarget.hasSSSE3()) { |
| 13226 | assert(NumElements == 16 && "Unexpected byte vector width!"); |
| 13227 | SDValue PSHUFBMask[16]; |
| 13228 | for (int i = 0; i < 16; ++i) { |
| 13229 | int Idx = Offset + (i / Scale); |
| 13230 | if ((i % Scale == 0 && SafeOffset(Idx))) { |
| 13231 | PSHUFBMask[i] = DAG.getConstant(Idx, DL, MVT::i8); |
| 13232 | continue; |
| 13233 | } |
| 13234 | PSHUFBMask[i] = |
| 13235 | AnyExt ? DAG.getUNDEF(MVT::i8) : DAG.getConstant(0x80, DL, MVT::i8); |
| 13236 | } |
| 13237 | InputV = DAG.getBitcast(MVT::v16i8, InputV); |
| 13238 | return DAG.getBitcast( |
| 13239 | VT, DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, InputV, |
| 13240 | DAG.getBuildVector(MVT::v16i8, DL, PSHUFBMask))); |
| 13241 | } |
| 13242 | |
| 13243 | |
| 13244 | |
| 13245 | int AlignToUnpack = Offset % (NumElements / Scale); |
| 13246 | if (AlignToUnpack) { |
| 13247 | SmallVector<int, 8> ShMask((unsigned)NumElements, -1); |
| 13248 | for (int i = AlignToUnpack; i < NumElements; ++i) |
| 13249 | ShMask[i - AlignToUnpack] = i; |
| 13250 | InputV = DAG.getVectorShuffle(VT, DL, InputV, DAG.getUNDEF(VT), ShMask); |
| 13251 | Offset -= AlignToUnpack; |
| 13252 | } |
| 13253 | |
| 13254 | |
| 13255 | do { |
| 13256 | unsigned UnpackLoHi = X86ISD::UNPCKL; |
| 13257 | if (Offset >= (NumElements / 2)) { |
| 13258 | UnpackLoHi = X86ISD::UNPCKH; |
| 13259 | Offset -= (NumElements / 2); |
| 13260 | } |
| 13261 | |
| 13262 | MVT InputVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), NumElements); |
| 13263 | SDValue Ext = AnyExt ? DAG.getUNDEF(InputVT) |
| 13264 | : getZeroVector(InputVT, Subtarget, DAG, DL); |
| 13265 | InputV = DAG.getBitcast(InputVT, InputV); |
| 13266 | InputV = DAG.getNode(UnpackLoHi, DL, InputVT, InputV, Ext); |
| 13267 | Scale /= 2; |
| 13268 | EltBits *= 2; |
| 13269 | NumElements /= 2; |
| 13270 | } while (Scale > 1); |
| 13271 | return DAG.getBitcast(VT, InputV); |
| 13272 | } |
| 13273 | |
| 13274 | |
| 13275 | |
| 13276 | |
| 13277 | |
| 13278 | |
| 13279 | |
| 13280 | |
| 13281 | |
| 13282 | |
| 13283 | |
| 13284 | |
| 13285 | |
| 13286 | static SDValue lowerShuffleAsZeroOrAnyExtend( |
| 13287 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 13288 | const APInt &Zeroable, const X86Subtarget &Subtarget, |
| 13289 | SelectionDAG &DAG) { |
| 13290 | int Bits = VT.getSizeInBits(); |
| 13291 | int NumLanes = Bits / 128; |
| 13292 | int NumElements = VT.getVectorNumElements(); |
| 13293 | int NumEltsPerLane = NumElements / NumLanes; |
| 13294 | assert(VT.getScalarSizeInBits() <= 32 && |
| 13295 | "Exceeds 32-bit integer zero extension limit"); |
| 13296 | assert((int)Mask.size() == NumElements && "Unexpected shuffle mask size"); |
| 13297 | |
| 13298 | |
| 13299 | |
| 13300 | auto Lower = [&](int Scale) -> SDValue { |
| 13301 | SDValue InputV; |
| 13302 | bool AnyExt = true; |
| 13303 | int Offset = 0; |
| 13304 | int Matches = 0; |
| 13305 | for (int i = 0; i < NumElements; ++i) { |
| 13306 | int M = Mask[i]; |
| 13307 | if (M < 0) |
| 13308 | continue; |
| 13309 | if (i % Scale != 0) { |
| 13310 | |
| 13311 | if (!Zeroable[i]) |
| 13312 | return SDValue(); |
| 13313 | |
| 13314 | |
| 13315 | AnyExt = false; |
| 13316 | continue; |
| 13317 | } |
| 13318 | |
| 13319 | |
| 13320 | |
| 13321 | SDValue V = M < NumElements ? V1 : V2; |
| 13322 | M = M % NumElements; |
| 13323 | if (!InputV) { |
| 13324 | InputV = V; |
| 13325 | Offset = M - (i / Scale); |
| 13326 | } else if (InputV != V) |
| 13327 | return SDValue(); |
| 13328 | |
| 13329 | |
| 13330 | |
| 13331 | |
| 13332 | if (!((0 <= Offset && Offset < NumEltsPerLane) || |
| 13333 | (Offset % NumEltsPerLane) == 0)) |
| 13334 | return SDValue(); |
| 13335 | |
| 13336 | |
| 13337 | |
| 13338 | if (Offset && (Offset / NumEltsPerLane) != (M / NumEltsPerLane)) |
| 13339 | return SDValue(); |
| 13340 | |
| 13341 | if ((M % NumElements) != (Offset + (i / Scale))) |
| 13342 | return SDValue(); |
| 13343 | Matches++; |
| 13344 | } |
| 13345 | |
| 13346 | |
| 13347 | |
| 13348 | |
| 13349 | if (!InputV) |
| 13350 | return SDValue(); |
| 13351 | |
| 13352 | |
| 13353 | |
| 13354 | if (Offset != 0 && Matches < 2) |
| 13355 | return SDValue(); |
| 13356 | |
| 13357 | return lowerShuffleAsSpecificZeroOrAnyExtend(DL, VT, Scale, Offset, AnyExt, |
| 13358 | InputV, Mask, Subtarget, DAG); |
| 13359 | }; |
| 13360 | |
| 13361 | |
| 13362 | assert(Bits % 64 == 0 && |
| 13363 | "The number of bits in a vector must be divisible by 64 on x86!"); |
| 13364 | int NumExtElements = Bits / 64; |
| 13365 | |
| 13366 | |
| 13367 | |
| 13368 | for (; NumExtElements < NumElements; NumExtElements *= 2) { |
| 13369 | assert(NumElements % NumExtElements == 0 && |
| 13370 | "The input vector size must be divisible by the extended size."); |
| 13371 | if (SDValue V = Lower(NumElements / NumExtElements)) |
| 13372 | return V; |
| 13373 | } |
| 13374 | |
| 13375 | |
| 13376 | if (Bits != 128) |
| 13377 | return SDValue(); |
| 13378 | |
| 13379 | |
| 13380 | |
| 13381 | auto CanZExtLowHalf = [&]() { |
| 13382 | for (int i = NumElements / 2; i != NumElements; ++i) |
| 13383 | if (!Zeroable[i]) |
| 13384 | return SDValue(); |
| 13385 | if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, 0)) |
| 13386 | return V1; |
| 13387 | if (isSequentialOrUndefInRange(Mask, 0, NumElements / 2, NumElements)) |
| 13388 | return V2; |
| 13389 | return SDValue(); |
| 13390 | }; |
| 13391 | |
| 13392 | if (SDValue V = CanZExtLowHalf()) { |
| 13393 | V = DAG.getBitcast(MVT::v2i64, V); |
| 13394 | V = DAG.getNode(X86ISD::VZEXT_MOVL, DL, MVT::v2i64, V); |
| 13395 | return DAG.getBitcast(VT, V); |
| 13396 | } |
| 13397 | |
| 13398 | |
| 13399 | return SDValue(); |
| 13400 | } |
| 13401 | |
| 13402 | |
| 13403 | |
| 13404 | |
| 13405 | static SDValue getScalarValueForVectorElement(SDValue V, int Idx, |
| 13406 | SelectionDAG &DAG) { |
| 13407 | MVT VT = V.getSimpleValueType(); |
| 13408 | MVT EltVT = VT.getVectorElementType(); |
| 13409 | V = peekThroughBitcasts(V); |
| 13410 | |
| 13411 | |
| 13412 | |
| 13413 | MVT NewVT = V.getSimpleValueType(); |
| 13414 | if (!NewVT.isVector() || NewVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) |
| 13415 | return SDValue(); |
| 13416 | |
| 13417 | if (V.getOpcode() == ISD::BUILD_VECTOR || |
| 13418 | (Idx == 0 && V.getOpcode() == ISD::SCALAR_TO_VECTOR)) { |
| 13419 | |
| 13420 | |
| 13421 | SDValue S = V.getOperand(Idx); |
| 13422 | if (EltVT.getSizeInBits() == S.getSimpleValueType().getSizeInBits()) |
| 13423 | return DAG.getBitcast(EltVT, S); |
| 13424 | } |
| 13425 | |
| 13426 | return SDValue(); |
| 13427 | } |
| 13428 | |
| 13429 | |
| 13430 | |
| 13431 | |
| 13432 | |
| 13433 | static bool isShuffleFoldableLoad(SDValue V) { |
| 13434 | V = peekThroughBitcasts(V); |
| 13435 | return ISD::isNON_EXTLoad(V.getNode()); |
| 13436 | } |
| 13437 | |
| 13438 | |
| 13439 | |
| 13440 | |
| 13441 | |
| 13442 | static SDValue lowerShuffleAsElementInsertion( |
| 13443 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 13444 | const APInt &Zeroable, const X86Subtarget &Subtarget, |
| 13445 | SelectionDAG &DAG) { |
| 13446 | MVT ExtVT = VT; |
| 13447 | MVT EltVT = VT.getVectorElementType(); |
| 13448 | |
| 13449 | int V2Index = |
| 13450 | find_if(Mask, [&Mask](int M) { return M >= (int)Mask.size(); }) - |
| 13451 | Mask.begin(); |
| 13452 | bool IsV1Zeroable = true; |
| 13453 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
| 13454 | if (i != V2Index && !Zeroable[i]) { |
| 13455 | IsV1Zeroable = false; |
| 13456 | break; |
| 13457 | } |
| 13458 | |
| 13459 | |
| 13460 | |
| 13461 | |
| 13462 | |
| 13463 | |
| 13464 | SDValue V2S = getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), |
| 13465 | DAG); |
| 13466 | if (V2S && DAG.getTargetLoweringInfo().isTypeLegal(V2S.getValueType())) { |
| 13467 | |
| 13468 | V2S = DAG.getBitcast(EltVT, V2S); |
| 13469 | if (EltVT == MVT::i8 || EltVT == MVT::i16) { |
| 13470 | |
| 13471 | |
| 13472 | if (!IsV1Zeroable) |
| 13473 | return SDValue(); |
| 13474 | |
| 13475 | |
| 13476 | ExtVT = MVT::getVectorVT(MVT::i32, ExtVT.getSizeInBits() / 32); |
| 13477 | V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S); |
| 13478 | } |
| 13479 | V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S); |
| 13480 | } else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 || |
| 13481 | EltVT == MVT::i16) { |
| 13482 | |
| 13483 | |
| 13484 | return SDValue(); |
| 13485 | } |
| 13486 | |
| 13487 | if (!IsV1Zeroable) { |
| 13488 | |
| 13489 | |
| 13490 | |
| 13491 | assert(VT == ExtVT && "Cannot change extended type when non-zeroable!"); |
| 13492 | if (!VT.isFloatingPoint() || V2Index != 0) |
| 13493 | return SDValue(); |
| 13494 | SmallVector<int, 8> V1Mask(Mask.begin(), Mask.end()); |
| 13495 | V1Mask[V2Index] = -1; |
| 13496 | if (!isNoopShuffleMask(V1Mask)) |
| 13497 | return SDValue(); |
| 13498 | if (!VT.is128BitVector()) |
| 13499 | return SDValue(); |
| 13500 | |
| 13501 | |
| 13502 | assert((EltVT == MVT::f32 || EltVT == MVT::f64) && |
| 13503 | "Only two types of floating point element types to handle!"); |
| 13504 | return DAG.getNode(EltVT == MVT::f32 ? X86ISD::MOVSS : X86ISD::MOVSD, DL, |
| 13505 | ExtVT, V1, V2); |
| 13506 | } |
| 13507 | |
| 13508 | |
| 13509 | if (VT.isFloatingPoint() && V2Index != 0) |
| 13510 | return SDValue(); |
| 13511 | |
| 13512 | V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2); |
| 13513 | if (ExtVT != VT) |
| 13514 | V2 = DAG.getBitcast(VT, V2); |
| 13515 | |
| 13516 | if (V2Index != 0) { |
| 13517 | |
| 13518 | |
| 13519 | |
| 13520 | |
| 13521 | if (VT.isFloatingPoint() || VT.getVectorNumElements() <= 4) { |
| 13522 | SmallVector<int, 4> V2Shuffle(Mask.size(), 1); |
| 13523 | V2Shuffle[V2Index] = 0; |
| 13524 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Shuffle); |
| 13525 | } else { |
| 13526 | V2 = DAG.getBitcast(MVT::v16i8, V2); |
| 13527 | V2 = DAG.getNode(X86ISD::VSHLDQ, DL, MVT::v16i8, V2, |
| 13528 | DAG.getTargetConstant( |
| 13529 | V2Index * EltVT.getSizeInBits() / 8, DL, MVT::i8)); |
| 13530 | V2 = DAG.getBitcast(VT, V2); |
| 13531 | } |
| 13532 | } |
| 13533 | return V2; |
| 13534 | } |
| 13535 | |
| 13536 | |
| 13537 | |
| 13538 | |
| 13539 | |
| 13540 | static SDValue lowerShuffleAsTruncBroadcast(const SDLoc &DL, MVT VT, SDValue V0, |
| 13541 | int BroadcastIdx, |
| 13542 | const X86Subtarget &Subtarget, |
| 13543 | SelectionDAG &DAG) { |
| 13544 | assert(Subtarget.hasAVX2() && |
| 13545 | "We can only lower integer broadcasts with AVX2!"); |
| 13546 | |
| 13547 | MVT EltVT = VT.getVectorElementType(); |
| 13548 | MVT V0VT = V0.getSimpleValueType(); |
| 13549 | |
| 13550 | assert(VT.isInteger() && "Unexpected non-integer trunc broadcast!"); |
| 13551 | assert(V0VT.isVector() && "Unexpected non-vector vector-sized value!"); |
| 13552 | |
| 13553 | MVT V0EltVT = V0VT.getVectorElementType(); |
| 13554 | if (!V0EltVT.isInteger()) |
| 13555 | return SDValue(); |
| 13556 | |
| 13557 | const unsigned EltSize = EltVT.getSizeInBits(); |
| 13558 | const unsigned V0EltSize = V0EltVT.getSizeInBits(); |
| 13559 | |
| 13560 | |
| 13561 | if (V0EltSize <= EltSize) |
| 13562 | return SDValue(); |
| 13563 | |
| 13564 | assert(((V0EltSize % EltSize) == 0) && |
| 13565 | "Scalar type sizes must all be powers of 2 on x86!"); |
| 13566 | |
| 13567 | const unsigned V0Opc = V0.getOpcode(); |
| 13568 | const unsigned Scale = V0EltSize / EltSize; |
| 13569 | const unsigned V0BroadcastIdx = BroadcastIdx / Scale; |
| 13570 | |
| 13571 | if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) && |
| 13572 | V0Opc != ISD::BUILD_VECTOR) |
| 13573 | return SDValue(); |
| 13574 | |
| 13575 | SDValue Scalar = V0.getOperand(V0BroadcastIdx); |
| 13576 | |
| 13577 | |
| 13578 | |
| 13579 | |
| 13580 | |
| 13581 | if (const int OffsetIdx = BroadcastIdx % Scale) |
| 13582 | Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar, |
| 13583 | DAG.getConstant(OffsetIdx * EltSize, DL, MVT::i8)); |
| 13584 | |
| 13585 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
| 13586 | DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar)); |
| 13587 | } |
| 13588 | |
| 13589 | |
| 13590 | |
| 13591 | |
| 13592 | |
| 13593 | static bool isSingleSHUFPSMask(ArrayRef<int> Mask) { |
| 13594 | |
| 13595 | assert(Mask.size() == 4 && "Unsupported mask size!"); |
| 13596 | assert(Mask[0] >= -1 && Mask[0] < 8 && "Out of bound mask element!"); |
| 13597 | assert(Mask[1] >= -1 && Mask[1] < 8 && "Out of bound mask element!"); |
| 13598 | assert(Mask[2] >= -1 && Mask[2] < 8 && "Out of bound mask element!"); |
| 13599 | assert(Mask[3] >= -1 && Mask[3] < 8 && "Out of bound mask element!"); |
| 13600 | |
| 13601 | |
| 13602 | |
| 13603 | if (Mask[0] >= 0 && Mask[1] >= 0 && (Mask[0] < 4) != (Mask[1] < 4)) |
| 13604 | return false; |
| 13605 | if (Mask[2] >= 0 && Mask[3] >= 0 && (Mask[2] < 4) != (Mask[3] < 4)) |
| 13606 | return false; |
| 13607 | |
| 13608 | return true; |
| 13609 | } |
| 13610 | |
| 13611 | |
| 13612 | |
| 13613 | |
| 13614 | static SDValue lowerShuffleOfExtractsAsVperm(const SDLoc &DL, SDValue N0, |
| 13615 | SDValue N1, ArrayRef<int> Mask, |
| 13616 | SelectionDAG &DAG) { |
| 13617 | MVT VT = N0.getSimpleValueType(); |
| 13618 | assert((VT.is128BitVector() && |
| 13619 | (VT.getScalarSizeInBits() == 32 || VT.getScalarSizeInBits() == 64)) && |
| 13620 | "VPERM* family of shuffles requires 32-bit or 64-bit elements"); |
| 13621 | |
| 13622 | |
| 13623 | if (!N0.hasOneUse() || !N1.hasOneUse() || |
| 13624 | N0.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
| 13625 | N1.getOpcode() != ISD::EXTRACT_SUBVECTOR || |
| 13626 | N0.getOperand(0) != N1.getOperand(0)) |
| 13627 | return SDValue(); |
| 13628 | |
| 13629 | SDValue WideVec = N0.getOperand(0); |
| 13630 | MVT WideVT = WideVec.getSimpleValueType(); |
| 13631 | if (!WideVT.is256BitVector()) |
| 13632 | return SDValue(); |
| 13633 | |
| 13634 | |
| 13635 | |
| 13636 | unsigned NumElts = VT.getVectorNumElements(); |
| 13637 | SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); |
| 13638 | const APInt &ExtIndex0 = N0.getConstantOperandAPInt(1); |
| 13639 | const APInt &ExtIndex1 = N1.getConstantOperandAPInt(1); |
| 13640 | if (ExtIndex1 == 0 && ExtIndex0 == NumElts) |
| 13641 | ShuffleVectorSDNode::commuteMask(NewMask); |
| 13642 | else if (ExtIndex0 != 0 || ExtIndex1 != NumElts) |
| 13643 | return SDValue(); |
| 13644 | |
| 13645 | |
| 13646 | |
| 13647 | |
| 13648 | if (NumElts == 4 && |
| 13649 | (isSingleSHUFPSMask(NewMask) || is128BitUnpackShuffleMask(NewMask))) |
| 13650 | return SDValue(); |
| 13651 | |
| 13652 | |
| 13653 | NewMask.append(NumElts, -1); |
| 13654 | |
| 13655 | |
| 13656 | SDValue Shuf = DAG.getVectorShuffle(WideVT, DL, WideVec, DAG.getUNDEF(WideVT), |
| 13657 | NewMask); |
| 13658 | |
| 13659 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Shuf, |
| 13660 | DAG.getIntPtrConstant(0, DL)); |
| 13661 | } |
| 13662 | |
| 13663 | |
| 13664 | |
| 13665 | |
| 13666 | |
| 13667 | |
| 13668 | static SDValue lowerShuffleAsBroadcast(const SDLoc &DL, MVT VT, SDValue V1, |
| 13669 | SDValue V2, ArrayRef<int> Mask, |
| 13670 | const X86Subtarget &Subtarget, |
| 13671 | SelectionDAG &DAG) { |
| 13672 | if (!((Subtarget.hasSSE3() && VT == MVT::v2f64) || |
| 13673 | (Subtarget.hasAVX() && VT.isFloatingPoint()) || |
| 13674 | (Subtarget.hasAVX2() && VT.isInteger()))) |
| 13675 | return SDValue(); |
| 13676 | |
| 13677 | |
| 13678 | |
| 13679 | unsigned NumEltBits = VT.getScalarSizeInBits(); |
| 13680 | unsigned Opcode = (VT == MVT::v2f64 && !Subtarget.hasAVX2()) |
| 13681 | ? X86ISD::MOVDDUP |
| 13682 | : X86ISD::VBROADCAST; |
| 13683 | bool BroadcastFromReg = (Opcode == X86ISD::MOVDDUP) || Subtarget.hasAVX2(); |
| 13684 | |
| 13685 | |
| 13686 | int BroadcastIdx = getSplatIndex(Mask); |
| 13687 | if (BroadcastIdx < 0) |
| 13688 | return SDValue(); |
| 13689 | assert(BroadcastIdx < (int)Mask.size() && "We only expect to be called with " |
| 13690 | "a sorted mask where the broadcast " |
| 13691 | "comes from V1."); |
| 13692 | |
| 13693 | |
| 13694 | |
| 13695 | |
| 13696 | |
| 13697 | int BitOffset = BroadcastIdx * NumEltBits; |
| 13698 | SDValue V = V1; |
| 13699 | for (;;) { |
| 13700 | switch (V.getOpcode()) { |
| 13701 | case ISD::BITCAST: { |
| 13702 | V = V.getOperand(0); |
| 13703 | continue; |
| 13704 | } |
| 13705 | case ISD::CONCAT_VECTORS: { |
| 13706 | int OpBitWidth = V.getOperand(0).getValueSizeInBits(); |
| 13707 | int OpIdx = BitOffset / OpBitWidth; |
| 13708 | V = V.getOperand(OpIdx); |
| 13709 | BitOffset %= OpBitWidth; |
| 13710 | continue; |
| 13711 | } |
| 13712 | case ISD::EXTRACT_SUBVECTOR: { |
| 13713 | |
| 13714 | unsigned EltBitWidth = V.getScalarValueSizeInBits(); |
| 13715 | unsigned Idx = V.getConstantOperandVal(1); |
| 13716 | unsigned BeginOffset = Idx * EltBitWidth; |
| 13717 | BitOffset += BeginOffset; |
| 13718 | V = V.getOperand(0); |
| 13719 | continue; |
| 13720 | } |
| 13721 | case ISD::INSERT_SUBVECTOR: { |
| 13722 | SDValue VOuter = V.getOperand(0), VInner = V.getOperand(1); |
| 13723 | int EltBitWidth = VOuter.getScalarValueSizeInBits(); |
| 13724 | int Idx = (int)V.getConstantOperandVal(2); |
| 13725 | int NumSubElts = (int)VInner.getSimpleValueType().getVectorNumElements(); |
| 13726 | int BeginOffset = Idx * EltBitWidth; |
| 13727 | int EndOffset = BeginOffset + NumSubElts * EltBitWidth; |
| 13728 | if (BeginOffset <= BitOffset && BitOffset < EndOffset) { |
| 13729 | BitOffset -= BeginOffset; |
| 13730 | V = VInner; |
| 13731 | } else { |
| 13732 | V = VOuter; |
| 13733 | } |
| 13734 | continue; |
| 13735 | } |
| 13736 | } |
| 13737 | break; |
| 13738 | } |
| 13739 | assert((BitOffset % NumEltBits) == 0 && "Illegal bit-offset"); |
| 13740 | BroadcastIdx = BitOffset / NumEltBits; |
| 13741 | |
| 13742 | |
| 13743 | bool BitCastSrc = V.getScalarValueSizeInBits() != NumEltBits; |
| 13744 | |
| 13745 | |
| 13746 | |
| 13747 | |
| 13748 | |
| 13749 | |
| 13750 | if (BitCastSrc && VT.isInteger()) |
| 13751 | if (SDValue TruncBroadcast = lowerShuffleAsTruncBroadcast( |
| 13752 | DL, VT, V, BroadcastIdx, Subtarget, DAG)) |
| 13753 | return TruncBroadcast; |
| 13754 | |
| 13755 | |
| 13756 | if (!BitCastSrc && |
| 13757 | ((V.getOpcode() == ISD::BUILD_VECTOR && V.hasOneUse()) || |
| 13758 | (V.getOpcode() == ISD::SCALAR_TO_VECTOR && BroadcastIdx == 0))) { |
| 13759 | V = V.getOperand(BroadcastIdx); |
| 13760 | |
| 13761 | |
| 13762 | if (!BroadcastFromReg && !isShuffleFoldableLoad(V)) |
| 13763 | return SDValue(); |
| 13764 | } else if (ISD::isNormalLoad(V.getNode()) && |
| 13765 | cast<LoadSDNode>(V)->isSimple()) { |
| 13766 | |
| 13767 | |
| 13768 | |
| 13769 | |
| 13770 | |
| 13771 | LoadSDNode *Ld = cast<LoadSDNode>(V); |
| 13772 | SDValue BaseAddr = Ld->getOperand(1); |
| 13773 | MVT SVT = VT.getScalarType(); |
| 13774 | unsigned Offset = BroadcastIdx * SVT.getStoreSize(); |
| 13775 | assert((int)(Offset * 8) == BitOffset && "Unexpected bit-offset"); |
| 13776 | SDValue NewAddr = |
| 13777 | DAG.getMemBasePlusOffset(BaseAddr, TypeSize::Fixed(Offset), DL); |
| 13778 | |
| 13779 | |
| 13780 | |
| 13781 | |
| 13782 | if (Opcode == X86ISD::VBROADCAST) { |
| 13783 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 13784 | SDValue Ops[] = {Ld->getChain(), NewAddr}; |
| 13785 | V = DAG.getMemIntrinsicNode( |
| 13786 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SVT, |
| 13787 | DAG.getMachineFunction().getMachineMemOperand( |
| 13788 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
| 13789 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
| 13790 | return DAG.getBitcast(VT, V); |
| 13791 | } |
| 13792 | assert(SVT == MVT::f64 && "Unexpected VT!"); |
| 13793 | V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, |
| 13794 | DAG.getMachineFunction().getMachineMemOperand( |
| 13795 | Ld->getMemOperand(), Offset, SVT.getStoreSize())); |
| 13796 | DAG.makeEquivalentMemoryOrdering(Ld, V); |
| 13797 | } else if (!BroadcastFromReg) { |
| 13798 | |
| 13799 | return SDValue(); |
| 13800 | } else if (BitOffset != 0) { |
| 13801 | |
| 13802 | |
| 13803 | |
| 13804 | if (!VT.is256BitVector() && !VT.is512BitVector()) |
| 13805 | return SDValue(); |
| 13806 | |
| 13807 | |
| 13808 | if (VT == MVT::v4f64 || VT == MVT::v4i64) |
| 13809 | return SDValue(); |
| 13810 | |
| 13811 | |
| 13812 | if ((BitOffset % 128) != 0) |
| 13813 | return SDValue(); |
| 13814 | |
| 13815 | assert((BitOffset % V.getScalarValueSizeInBits()) == 0 && |
| 13816 | "Unexpected bit-offset"); |
| 13817 | assert((V.getValueSizeInBits() == 256 || V.getValueSizeInBits() == 512) && |
| 13818 | "Unexpected vector size"); |
| 13819 | unsigned ExtractIdx = BitOffset / V.getScalarValueSizeInBits(); |
| 13820 | V = extract128BitVector(V, ExtractIdx, DAG, DL); |
| 13821 | } |
| 13822 | |
| 13823 | |
| 13824 | if (Opcode == X86ISD::MOVDDUP && !V.getValueType().isVector()) { |
| 13825 | V = DAG.getBitcast(MVT::f64, V); |
| 13826 | if (Subtarget.hasAVX()) { |
| 13827 | V = DAG.getNode(X86ISD::VBROADCAST, DL, MVT::v2f64, V); |
| 13828 | return DAG.getBitcast(VT, V); |
| 13829 | } |
| 13830 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V); |
| 13831 | } |
| 13832 | |
| 13833 | |
| 13834 | if (!V.getValueType().isVector()) { |
| 13835 | assert(V.getScalarValueSizeInBits() == NumEltBits && |
| 13836 | "Unexpected scalar size"); |
| 13837 | MVT BroadcastVT = MVT::getVectorVT(V.getSimpleValueType(), |
| 13838 | VT.getVectorNumElements()); |
| 13839 | return DAG.getBitcast(VT, DAG.getNode(Opcode, DL, BroadcastVT, V)); |
| 13840 | } |
| 13841 | |
| 13842 | |
| 13843 | |
| 13844 | |
| 13845 | if (V.getValueSizeInBits() > 128) |
| 13846 | V = extract128BitVector(peekThroughBitcasts(V), 0, DAG, DL); |
| 13847 | |
| 13848 | |
| 13849 | |
| 13850 | unsigned NumSrcElts = V.getValueSizeInBits() / NumEltBits; |
| 13851 | MVT CastVT = MVT::getVectorVT(VT.getVectorElementType(), NumSrcElts); |
| 13852 | return DAG.getNode(Opcode, DL, VT, DAG.getBitcast(CastVT, V)); |
| 13853 | } |
| 13854 | |
| 13855 | |
| 13856 | |
| 13857 | |
| 13858 | |
| 13859 | |
| 13860 | |
| 13861 | static bool matchShuffleAsInsertPS(SDValue &V1, SDValue &V2, |
| 13862 | unsigned &InsertPSMask, |
| 13863 | const APInt &Zeroable, |
| 13864 | ArrayRef<int> Mask, SelectionDAG &DAG) { |
| 13865 | assert(V1.getSimpleValueType().is128BitVector() && "Bad operand type!"); |
| 13866 | assert(V2.getSimpleValueType().is128BitVector() && "Bad operand type!"); |
| 13867 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
| 13868 | |
| 13869 | |
| 13870 | |
| 13871 | |
| 13872 | auto matchAsInsertPS = [&](SDValue VA, SDValue VB, |
| 13873 | ArrayRef<int> CandidateMask) { |
| 13874 | unsigned ZMask = 0; |
| 13875 | int VADstIndex = -1; |
| 13876 | int VBDstIndex = -1; |
| 13877 | bool VAUsedInPlace = false; |
| 13878 | |
| 13879 | for (int i = 0; i < 4; ++i) { |
| 13880 | |
| 13881 | if (Zeroable[i]) { |
| 13882 | ZMask |= 1 << i; |
| 13883 | continue; |
| 13884 | } |
| 13885 | |
| 13886 | |
| 13887 | if (i == CandidateMask[i]) { |
| 13888 | VAUsedInPlace = true; |
| 13889 | continue; |
| 13890 | } |
| 13891 | |
| 13892 | |
| 13893 | if (VADstIndex >= 0 || VBDstIndex >= 0) |
| 13894 | return false; |
| 13895 | |
| 13896 | if (CandidateMask[i] < 4) { |
| 13897 | |
| 13898 | VADstIndex = i; |
| 13899 | } else { |
| 13900 | |
| 13901 | VBDstIndex = i; |
| 13902 | } |
| 13903 | } |
| 13904 | |
| 13905 | |
| 13906 | if (VADstIndex < 0 && VBDstIndex < 0) |
| 13907 | return false; |
| 13908 | |
| 13909 | |
| 13910 | |
| 13911 | unsigned VBSrcIndex = 0; |
| 13912 | if (VADstIndex >= 0) { |
| 13913 | |
| 13914 | |
| 13915 | VBSrcIndex = CandidateMask[VADstIndex]; |
| 13916 | VBDstIndex = VADstIndex; |
| 13917 | VB = VA; |
| 13918 | } else { |
| 13919 | VBSrcIndex = CandidateMask[VBDstIndex] - 4; |
| 13920 | } |
| 13921 | |
| 13922 | |
| 13923 | |
| 13924 | if (!VAUsedInPlace) |
| 13925 | VA = DAG.getUNDEF(MVT::v4f32); |
| 13926 | |
| 13927 | |
| 13928 | V1 = VA; |
| 13929 | V2 = VB; |
| 13930 | |
| 13931 | |
| 13932 | InsertPSMask = VBSrcIndex << 6 | VBDstIndex << 4 | ZMask; |
| 13933 | assert((InsertPSMask & ~0xFFu) == 0 && "Invalid mask!"); |
| 13934 | return true; |
| 13935 | }; |
| 13936 | |
| 13937 | if (matchAsInsertPS(V1, V2, Mask)) |
| 13938 | return true; |
| 13939 | |
| 13940 | |
| 13941 | SmallVector<int, 4> CommutedMask(Mask.begin(), Mask.end()); |
| 13942 | ShuffleVectorSDNode::commuteMask(CommutedMask); |
| 13943 | if (matchAsInsertPS(V2, V1, CommutedMask)) |
| 13944 | return true; |
| 13945 | |
| 13946 | return false; |
| 13947 | } |
| 13948 | |
| 13949 | static SDValue lowerShuffleAsInsertPS(const SDLoc &DL, SDValue V1, SDValue V2, |
| 13950 | ArrayRef<int> Mask, const APInt &Zeroable, |
| 13951 | SelectionDAG &DAG) { |
| 13952 | assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
| 13953 | assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
| 13954 | |
| 13955 | |
| 13956 | unsigned InsertPSMask = 0; |
| 13957 | if (!matchShuffleAsInsertPS(V1, V2, InsertPSMask, Zeroable, Mask, DAG)) |
| 13958 | return SDValue(); |
| 13959 | |
| 13960 | |
| 13961 | return DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, V1, V2, |
| 13962 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
| 13963 | } |
| 13964 | |
| 13965 | |
| 13966 | |
| 13967 | |
| 13968 | |
| 13969 | |
| 13970 | |
| 13971 | |
| 13972 | |
| 13973 | |
| 13974 | static SDValue lowerShuffleAsPermuteAndUnpack( |
| 13975 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 13976 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 13977 | assert(!VT.isFloatingPoint() && |
| 13978 | "This routine only supports integer vectors."); |
| 13979 | assert(VT.is128BitVector() && |
| 13980 | "This routine only works on 128-bit vectors."); |
| 13981 | assert(!V2.isUndef() && |
| 13982 | "This routine should only be used when blending two inputs."); |
| 13983 | assert(Mask.size() >= 2 && "Single element masks are invalid."); |
| 13984 | |
| 13985 | int Size = Mask.size(); |
| 13986 | |
| 13987 | int NumLoInputs = |
| 13988 | count_if(Mask, [Size](int M) { return M >= 0 && M % Size < Size / 2; }); |
| 13989 | int NumHiInputs = |
| 13990 | count_if(Mask, [Size](int M) { return M % Size >= Size / 2; }); |
| 13991 | |
| 13992 | bool UnpackLo = NumLoInputs >= NumHiInputs; |
| 13993 | |
| 13994 | auto TryUnpack = [&](int ScalarSize, int Scale) { |
| 13995 | SmallVector<int, 16> V1Mask((unsigned)Size, -1); |
| 13996 | SmallVector<int, 16> V2Mask((unsigned)Size, -1); |
| 13997 | |
| 13998 | for (int i = 0; i < Size; ++i) { |
| 13999 | if (Mask[i] < 0) |
| 14000 | continue; |
| 14001 | |
| 14002 | |
| 14003 | int UnpackIdx = i / Scale; |
| 14004 | |
| 14005 | |
| 14006 | |
| 14007 | if ((UnpackIdx % 2 == 0) != (Mask[i] < Size)) |
| 14008 | return SDValue(); |
| 14009 | |
| 14010 | |
| 14011 | |
| 14012 | SmallVectorImpl<int> &VMask = (UnpackIdx % 2 == 0) ? V1Mask : V2Mask; |
| 14013 | VMask[(UnpackIdx / 2) * Scale + i % Scale + (UnpackLo ? 0 : Size / 2)] = |
| 14014 | Mask[i] % Size; |
| 14015 | } |
| 14016 | |
| 14017 | |
| 14018 | |
| 14019 | if ((NumLoInputs == 0 || NumHiInputs == 0) && !isNoopShuffleMask(V1Mask) && |
| 14020 | !isNoopShuffleMask(V2Mask)) |
| 14021 | return SDValue(); |
| 14022 | |
| 14023 | |
| 14024 | V1 = DAG.getVectorShuffle(VT, DL, V1, DAG.getUNDEF(VT), V1Mask); |
| 14025 | V2 = DAG.getVectorShuffle(VT, DL, V2, DAG.getUNDEF(VT), V2Mask); |
| 14026 | |
| 14027 | |
| 14028 | MVT UnpackVT = MVT::getVectorVT(MVT::getIntegerVT(ScalarSize), Size / Scale); |
| 14029 | V1 = DAG.getBitcast(UnpackVT, V1); |
| 14030 | V2 = DAG.getBitcast(UnpackVT, V2); |
| 14031 | |
| 14032 | |
| 14033 | return DAG.getBitcast( |
| 14034 | VT, DAG.getNode(UnpackLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, |
| 14035 | UnpackVT, V1, V2)); |
| 14036 | }; |
| 14037 | |
| 14038 | |
| 14039 | |
| 14040 | int OrigScalarSize = VT.getScalarSizeInBits(); |
| 14041 | for (int ScalarSize = 64; ScalarSize >= OrigScalarSize; ScalarSize /= 2) |
| 14042 | if (SDValue Unpack = TryUnpack(ScalarSize, ScalarSize / OrigScalarSize)) |
| 14043 | return Unpack; |
| 14044 | |
| 14045 | |
| 14046 | |
| 14047 | if (ISD::isBuildVectorAllZeros(V1.getNode()) || |
| 14048 | ISD::isBuildVectorAllZeros(V2.getNode())) |
| 14049 | return SDValue(); |
| 14050 | |
| 14051 | |
| 14052 | |
| 14053 | if (NumLoInputs == 0 || NumHiInputs == 0) { |
| 14054 | assert((NumLoInputs > 0 || NumHiInputs > 0) && |
| 14055 | "We have to have *some* inputs!"); |
| 14056 | int HalfOffset = NumLoInputs == 0 ? Size / 2 : 0; |
| 14057 | |
| 14058 | |
| 14059 | |
| 14060 | |
| 14061 | |
| 14062 | |
| 14063 | SmallVector<int, 32> PermMask((unsigned)Size, -1); |
| 14064 | for (int i = 0; i < Size; ++i) { |
| 14065 | if (Mask[i] < 0) |
| 14066 | continue; |
| 14067 | |
| 14068 | assert(Mask[i] % Size >= HalfOffset && "Found input from wrong half!"); |
| 14069 | |
| 14070 | PermMask[i] = |
| 14071 | 2 * ((Mask[i] % Size) - HalfOffset) + (Mask[i] < Size ? 0 : 1); |
| 14072 | } |
| 14073 | return DAG.getVectorShuffle( |
| 14074 | VT, DL, DAG.getNode(NumLoInputs == 0 ? X86ISD::UNPCKH : X86ISD::UNPCKL, |
| 14075 | DL, VT, V1, V2), |
| 14076 | DAG.getUNDEF(VT), PermMask); |
| 14077 | } |
| 14078 | |
| 14079 | return SDValue(); |
| 14080 | } |
| 14081 | |
| 14082 | |
| 14083 | |
| 14084 | |
| 14085 | |
| 14086 | |
| 14087 | |
| 14088 | |
| 14089 | static SDValue lowerV2F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 14090 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 14091 | const X86Subtarget &Subtarget, |
| 14092 | SelectionDAG &DAG) { |
| 14093 | assert(V1.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); |
| 14094 | assert(V2.getSimpleValueType() == MVT::v2f64 && "Bad operand type!"); |
| 14095 | assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); |
| 14096 | |
| 14097 | if (V2.isUndef()) { |
| 14098 | |
| 14099 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2f64, V1, V2, |
| 14100 | Mask, Subtarget, DAG)) |
| 14101 | return Broadcast; |
| 14102 | |
| 14103 | |
| 14104 | |
| 14105 | unsigned SHUFPDMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1); |
| 14106 | |
| 14107 | if (Subtarget.hasAVX()) { |
| 14108 | |
| 14109 | |
| 14110 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v2f64, V1, |
| 14111 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
| 14112 | } |
| 14113 | |
| 14114 | return DAG.getNode( |
| 14115 | X86ISD::SHUFP, DL, MVT::v2f64, |
| 14116 | Mask[0] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, |
| 14117 | Mask[1] == SM_SentinelUndef ? DAG.getUNDEF(MVT::v2f64) : V1, |
| 14118 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
| 14119 | } |
| 14120 | assert(Mask[0] >= 0 && "No undef lanes in multi-input v2 shuffles!"); |
| 14121 | assert(Mask[1] >= 0 && "No undef lanes in multi-input v2 shuffles!"); |
| 14122 | assert(Mask[0] < 2 && "We sort V1 to be the first input."); |
| 14123 | assert(Mask[1] >= 2 && "We sort V2 to be the second input."); |
| 14124 | |
| 14125 | if (Subtarget.hasAVX2()) |
| 14126 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
| 14127 | return Extract; |
| 14128 | |
| 14129 | |
| 14130 | |
| 14131 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
| 14132 | DL, MVT::v2f64, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 14133 | return Insertion; |
| 14134 | |
| 14135 | |
| 14136 | int InverseMask[2] = {Mask[0] < 0 ? -1 : (Mask[0] ^ 2), |
| 14137 | Mask[1] < 0 ? -1 : (Mask[1] ^ 2)}; |
| 14138 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
| 14139 | DL, MVT::v2f64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) |
| 14140 | return Insertion; |
| 14141 | |
| 14142 | |
| 14143 | |
| 14144 | if (isShuffleEquivalent(Mask, {0, 3}, V1, V2) || |
| 14145 | isShuffleEquivalent(Mask, {1, 3}, V1, V2)) |
| 14146 | if (SDValue V1S = getScalarValueForVectorElement(V1, Mask[0], DAG)) |
| 14147 | |
| 14148 | |
| 14149 | return DAG.getNode( |
| 14150 | X86ISD::MOVSD, DL, MVT::v2f64, V2, |
| 14151 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S)); |
| 14152 | |
| 14153 | if (Subtarget.hasSSE41()) |
| 14154 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2f64, V1, V2, Mask, |
| 14155 | Zeroable, Subtarget, DAG)) |
| 14156 | return Blend; |
| 14157 | |
| 14158 | |
| 14159 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2f64, Mask, V1, V2, DAG)) |
| 14160 | return V; |
| 14161 | |
| 14162 | unsigned SHUFPDMask = (Mask[0] == 1) | (((Mask[1] - 2) == 1) << 1); |
| 14163 | return DAG.getNode(X86ISD::SHUFP, DL, MVT::v2f64, V1, V2, |
| 14164 | DAG.getTargetConstant(SHUFPDMask, DL, MVT::i8)); |
| 14165 | } |
| 14166 | |
| 14167 | |
| 14168 | |
| 14169 | |
| 14170 | |
| 14171 | |
| 14172 | |
| 14173 | static SDValue lowerV2I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 14174 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 14175 | const X86Subtarget &Subtarget, |
| 14176 | SelectionDAG &DAG) { |
| 14177 | assert(V1.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); |
| 14178 | assert(V2.getSimpleValueType() == MVT::v2i64 && "Bad operand type!"); |
| 14179 | assert(Mask.size() == 2 && "Unexpected mask size for v2 shuffle!"); |
| 14180 | |
| 14181 | if (V2.isUndef()) { |
| 14182 | |
| 14183 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v2i64, V1, V2, |
| 14184 | Mask, Subtarget, DAG)) |
| 14185 | return Broadcast; |
| 14186 | |
| 14187 | |
| 14188 | |
| 14189 | |
| 14190 | V1 = DAG.getBitcast(MVT::v4i32, V1); |
| 14191 | int WidenedMask[4] = {Mask[0] < 0 ? -1 : (Mask[0] * 2), |
| 14192 | Mask[0] < 0 ? -1 : ((Mask[0] * 2) + 1), |
| 14193 | Mask[1] < 0 ? -1 : (Mask[1] * 2), |
| 14194 | Mask[1] < 0 ? -1 : ((Mask[1] * 2) + 1)}; |
| 14195 | return DAG.getBitcast( |
| 14196 | MVT::v2i64, |
| 14197 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, |
| 14198 | getV4X86ShuffleImm8ForMask(WidenedMask, DL, DAG))); |
| 14199 | } |
| 14200 | assert(Mask[0] != -1 && "No undef lanes in multi-input v2 shuffles!"); |
| 14201 | assert(Mask[1] != -1 && "No undef lanes in multi-input v2 shuffles!"); |
| 14202 | assert(Mask[0] < 2 && "We sort V1 to be the first input."); |
| 14203 | assert(Mask[1] >= 2 && "We sort V2 to be the second input."); |
| 14204 | |
| 14205 | if (Subtarget.hasAVX2()) |
| 14206 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
| 14207 | return Extract; |
| 14208 | |
| 14209 | |
| 14210 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v2i64, V1, V2, Mask, |
| 14211 | Zeroable, Subtarget, DAG)) |
| 14212 | return Shift; |
| 14213 | |
| 14214 | |
| 14215 | |
| 14216 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
| 14217 | DL, MVT::v2i64, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 14218 | return Insertion; |
| 14219 | |
| 14220 | |
| 14221 | int InverseMask[2] = {Mask[0] ^ 2, Mask[1] ^ 2}; |
| 14222 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
| 14223 | DL, MVT::v2i64, V2, V1, InverseMask, Zeroable, Subtarget, DAG)) |
| 14224 | return Insertion; |
| 14225 | |
| 14226 | |
| 14227 | |
| 14228 | bool IsBlendSupported = Subtarget.hasSSE41(); |
| 14229 | if (IsBlendSupported) |
| 14230 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask, |
| 14231 | Zeroable, Subtarget, DAG)) |
| 14232 | return Blend; |
| 14233 | |
| 14234 | |
| 14235 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v2i64, Mask, V1, V2, DAG)) |
| 14236 | return V; |
| 14237 | |
| 14238 | |
| 14239 | |
| 14240 | if (Subtarget.hasSSSE3()) { |
| 14241 | if (Subtarget.hasVLX()) |
| 14242 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v2i64, V1, V2, Mask, |
| 14243 | Subtarget, DAG)) |
| 14244 | return Rotate; |
| 14245 | |
| 14246 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v2i64, V1, V2, Mask, |
| 14247 | Subtarget, DAG)) |
| 14248 | return Rotate; |
| 14249 | } |
| 14250 | |
| 14251 | |
| 14252 | |
| 14253 | if (IsBlendSupported) |
| 14254 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v2i64, V1, V2, Mask, |
| 14255 | Subtarget, DAG); |
| 14256 | |
| 14257 | |
| 14258 | |
| 14259 | |
| 14260 | |
| 14261 | V1 = DAG.getBitcast(MVT::v2f64, V1); |
| 14262 | V2 = DAG.getBitcast(MVT::v2f64, V2); |
| 14263 | return DAG.getBitcast(MVT::v2i64, |
| 14264 | DAG.getVectorShuffle(MVT::v2f64, DL, V1, V2, Mask)); |
| 14265 | } |
| 14266 | |
| 14267 | |
| 14268 | |
| 14269 | |
| 14270 | |
| 14271 | |
| 14272 | static SDValue lowerShuffleWithSHUFPS(const SDLoc &DL, MVT VT, |
| 14273 | ArrayRef<int> Mask, SDValue V1, |
| 14274 | SDValue V2, SelectionDAG &DAG) { |
| 14275 | SDValue LowV = V1, HighV = V2; |
| 14276 | SmallVector<int, 4> NewMask(Mask.begin(), Mask.end()); |
| 14277 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
| 14278 | |
| 14279 | if (NumV2Elements == 1) { |
| 14280 | int V2Index = find_if(Mask, [](int M) { return M >= 4; }) - Mask.begin(); |
| 14281 | |
| 14282 | |
| 14283 | |
| 14284 | int V2AdjIndex = V2Index ^ 1; |
| 14285 | |
| 14286 | if (Mask[V2AdjIndex] < 0) { |
| 14287 | |
| 14288 | |
| 14289 | |
| 14290 | if (V2Index < 2) |
| 14291 | std::swap(LowV, HighV); |
| 14292 | NewMask[V2Index] -= 4; |
| 14293 | } else { |
| 14294 | |
| 14295 | |
| 14296 | int V1Index = V2AdjIndex; |
| 14297 | int BlendMask[4] = {Mask[V2Index] - 4, 0, Mask[V1Index], 0}; |
| 14298 | V2 = DAG.getNode(X86ISD::SHUFP, DL, VT, V2, V1, |
| 14299 | getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); |
| 14300 | |
| 14301 | |
| 14302 | |
| 14303 | if (V2Index < 2) { |
| 14304 | LowV = V2; |
| 14305 | HighV = V1; |
| 14306 | } else { |
| 14307 | HighV = V2; |
| 14308 | } |
| 14309 | NewMask[V1Index] = 2; |
| 14310 | NewMask[V2Index] = 0; |
| 14311 | } |
| 14312 | } else if (NumV2Elements == 2) { |
| 14313 | if (Mask[0] < 4 && Mask[1] < 4) { |
| 14314 | |
| 14315 | |
| 14316 | NewMask[2] -= 4; |
| 14317 | NewMask[3] -= 4; |
| 14318 | } else if (Mask[2] < 4 && Mask[3] < 4) { |
| 14319 | |
| 14320 | |
| 14321 | |
| 14322 | NewMask[0] -= 4; |
| 14323 | NewMask[1] -= 4; |
| 14324 | HighV = V1; |
| 14325 | LowV = V2; |
| 14326 | } else { |
| 14327 | |
| 14328 | |
| 14329 | |
| 14330 | |
| 14331 | |
| 14332 | |
| 14333 | int BlendMask[4] = {Mask[0] < 4 ? Mask[0] : Mask[1], |
| 14334 | Mask[2] < 4 ? Mask[2] : Mask[3], |
| 14335 | (Mask[0] >= 4 ? Mask[0] : Mask[1]) - 4, |
| 14336 | (Mask[2] >= 4 ? Mask[2] : Mask[3]) - 4}; |
| 14337 | V1 = DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, |
| 14338 | getV4X86ShuffleImm8ForMask(BlendMask, DL, DAG)); |
| 14339 | |
| 14340 | |
| 14341 | |
| 14342 | LowV = HighV = V1; |
| 14343 | NewMask[0] = Mask[0] < 4 ? 0 : 2; |
| 14344 | NewMask[1] = Mask[0] < 4 ? 2 : 0; |
| 14345 | NewMask[2] = Mask[2] < 4 ? 1 : 3; |
| 14346 | NewMask[3] = Mask[2] < 4 ? 3 : 1; |
| 14347 | } |
| 14348 | } else if (NumV2Elements == 3) { |
| 14349 | |
| 14350 | |
| 14351 | |
| 14352 | ShuffleVectorSDNode::commuteMask(NewMask); |
| 14353 | return lowerShuffleWithSHUFPS(DL, VT, NewMask, V2, V1, DAG); |
| 14354 | } |
| 14355 | return DAG.getNode(X86ISD::SHUFP, DL, VT, LowV, HighV, |
| 14356 | getV4X86ShuffleImm8ForMask(NewMask, DL, DAG)); |
| 14357 | } |
| 14358 | |
| 14359 | |
| 14360 | |
| 14361 | |
| 14362 | |
| 14363 | |
| 14364 | static SDValue lowerV4F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 14365 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 14366 | const X86Subtarget &Subtarget, |
| 14367 | SelectionDAG &DAG) { |
| 14368 | assert(V1.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
| 14369 | assert(V2.getSimpleValueType() == MVT::v4f32 && "Bad operand type!"); |
| 14370 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
| 14371 | |
| 14372 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
| 14373 | |
| 14374 | if (NumV2Elements == 0) { |
| 14375 | |
| 14376 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f32, V1, V2, |
| 14377 | Mask, Subtarget, DAG)) |
| 14378 | return Broadcast; |
| 14379 | |
| 14380 | |
| 14381 | if (Subtarget.hasSSE3()) { |
| 14382 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2)) |
| 14383 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v4f32, V1); |
| 14384 | if (isShuffleEquivalent(Mask, {1, 1, 3, 3}, V1, V2)) |
| 14385 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v4f32, V1); |
| 14386 | } |
| 14387 | |
| 14388 | if (Subtarget.hasAVX()) { |
| 14389 | |
| 14390 | |
| 14391 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f32, V1, |
| 14392 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
| 14393 | } |
| 14394 | |
| 14395 | |
| 14396 | |
| 14397 | if (!Subtarget.hasSSE2()) { |
| 14398 | if (isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2)) |
| 14399 | return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V1); |
| 14400 | if (isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1, V2)) |
| 14401 | return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V1, V1); |
| 14402 | } |
| 14403 | |
| 14404 | |
| 14405 | |
| 14406 | return DAG.getNode(X86ISD::SHUFP, DL, MVT::v4f32, V1, V1, |
| 14407 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
| 14408 | } |
| 14409 | |
| 14410 | if (Subtarget.hasAVX2()) |
| 14411 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
| 14412 | return Extract; |
| 14413 | |
| 14414 | |
| 14415 | |
| 14416 | |
| 14417 | |
| 14418 | |
| 14419 | if (NumV2Elements == 1 && Mask[0] >= 4) |
| 14420 | if (SDValue V = lowerShuffleAsElementInsertion( |
| 14421 | DL, MVT::v4f32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 14422 | return V; |
| 14423 | |
| 14424 | if (Subtarget.hasSSE41()) { |
| 14425 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f32, V1, V2, Mask, |
| 14426 | Zeroable, Subtarget, DAG)) |
| 14427 | return Blend; |
| 14428 | |
| 14429 | |
| 14430 | if (SDValue V = lowerShuffleAsInsertPS(DL, V1, V2, Mask, Zeroable, DAG)) |
| 14431 | return V; |
| 14432 | |
| 14433 | if (!isSingleSHUFPSMask(Mask)) |
| 14434 | if (SDValue BlendPerm = lowerShuffleAsBlendAndPermute(DL, MVT::v4f32, V1, |
| 14435 | V2, Mask, DAG)) |
| 14436 | return BlendPerm; |
| 14437 | } |
| 14438 | |
| 14439 | |
| 14440 | |
| 14441 | if (!Subtarget.hasSSE2()) { |
| 14442 | if (isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) |
| 14443 | return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2); |
| 14444 | if (isShuffleEquivalent(Mask, {2, 3, 6, 7}, V1, V2)) |
| 14445 | return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1); |
| 14446 | } |
| 14447 | |
| 14448 | |
| 14449 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG)) |
| 14450 | return V; |
| 14451 | |
| 14452 | |
| 14453 | return lowerShuffleWithSHUFPS(DL, MVT::v4f32, Mask, V1, V2, DAG); |
| 14454 | } |
| 14455 | |
| 14456 | |
| 14457 | |
| 14458 | |
| 14459 | |
| 14460 | static SDValue lowerV4I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 14461 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 14462 | const X86Subtarget &Subtarget, |
| 14463 | SelectionDAG &DAG) { |
| 14464 | assert(V1.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); |
| 14465 | assert(V2.getSimpleValueType() == MVT::v4i32 && "Bad operand type!"); |
| 14466 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
| 14467 | |
| 14468 | |
| 14469 | |
| 14470 | |
| 14471 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2, Mask, |
| 14472 | Zeroable, Subtarget, DAG)) |
| 14473 | return ZExt; |
| 14474 | |
| 14475 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 4; }); |
| 14476 | |
| 14477 | if (NumV2Elements == 0) { |
| 14478 | |
| 14479 | if (count_if(Mask, [](int M) { return M >= 0 && M < 4; }) > 1) { |
| 14480 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i32, V1, V2, |
| 14481 | Mask, Subtarget, DAG)) |
| 14482 | return Broadcast; |
| 14483 | } |
| 14484 | |
| 14485 | |
| 14486 | |
| 14487 | |
| 14488 | |
| 14489 | |
| 14490 | const int UnpackLoMask[] = {0, 0, 1, 1}; |
| 14491 | const int UnpackHiMask[] = {2, 2, 3, 3}; |
| 14492 | if (isShuffleEquivalent(Mask, {0, 0, 1, 1}, V1, V2)) |
| 14493 | Mask = UnpackLoMask; |
| 14494 | else if (isShuffleEquivalent(Mask, {2, 2, 3, 3}, V1, V2)) |
| 14495 | Mask = UnpackHiMask; |
| 14496 | |
| 14497 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v4i32, V1, |
| 14498 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
| 14499 | } |
| 14500 | |
| 14501 | if (Subtarget.hasAVX2()) |
| 14502 | if (SDValue Extract = lowerShuffleOfExtractsAsVperm(DL, V1, V2, Mask, DAG)) |
| 14503 | return Extract; |
| 14504 | |
| 14505 | |
| 14506 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i32, V1, V2, Mask, |
| 14507 | Zeroable, Subtarget, DAG)) |
| 14508 | return Shift; |
| 14509 | |
| 14510 | |
| 14511 | if (NumV2Elements == 1) |
| 14512 | if (SDValue V = lowerShuffleAsElementInsertion( |
| 14513 | DL, MVT::v4i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 14514 | return V; |
| 14515 | |
| 14516 | |
| 14517 | |
| 14518 | bool IsBlendSupported = Subtarget.hasSSE41(); |
| 14519 | if (IsBlendSupported) |
| 14520 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask, |
| 14521 | Zeroable, Subtarget, DAG)) |
| 14522 | return Blend; |
| 14523 | |
| 14524 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v4i32, V1, V2, Mask, |
| 14525 | Zeroable, Subtarget, DAG)) |
| 14526 | return Masked; |
| 14527 | |
| 14528 | |
| 14529 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i32, Mask, V1, V2, DAG)) |
| 14530 | return V; |
| 14531 | |
| 14532 | |
| 14533 | |
| 14534 | if (Subtarget.hasSSSE3()) { |
| 14535 | if (Subtarget.hasVLX()) |
| 14536 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i32, V1, V2, Mask, |
| 14537 | Subtarget, DAG)) |
| 14538 | return Rotate; |
| 14539 | |
| 14540 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i32, V1, V2, Mask, |
| 14541 | Subtarget, DAG)) |
| 14542 | return Rotate; |
| 14543 | } |
| 14544 | |
| 14545 | |
| 14546 | |
| 14547 | |
| 14548 | if (!isSingleSHUFPSMask(Mask)) { |
| 14549 | |
| 14550 | |
| 14551 | if (IsBlendSupported) |
| 14552 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i32, V1, V2, Mask, |
| 14553 | Subtarget, DAG); |
| 14554 | |
| 14555 | |
| 14556 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v4i32, V1, V2, |
| 14557 | Mask, Subtarget, DAG)) |
| 14558 | return Unpack; |
| 14559 | } |
| 14560 | |
| 14561 | |
| 14562 | |
| 14563 | |
| 14564 | |
| 14565 | |
| 14566 | SDValue CastV1 = DAG.getBitcast(MVT::v4f32, V1); |
| 14567 | SDValue CastV2 = DAG.getBitcast(MVT::v4f32, V2); |
| 14568 | SDValue ShufPS = DAG.getVectorShuffle(MVT::v4f32, DL, CastV1, CastV2, Mask); |
| 14569 | return DAG.getBitcast(MVT::v4i32, ShufPS); |
| 14570 | } |
| 14571 | |
| 14572 | |
| 14573 | |
| 14574 | |
| 14575 | |
| 14576 | |
| 14577 | |
| 14578 | |
| 14579 | |
| 14580 | |
| 14581 | |
| 14582 | |
| 14583 | |
| 14584 | |
| 14585 | |
| 14586 | |
| 14587 | |
| 14588 | static SDValue lowerV8I16GeneralSingleInputShuffle( |
| 14589 | const SDLoc &DL, MVT VT, SDValue V, MutableArrayRef<int> Mask, |
| 14590 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 14591 | assert(VT.getVectorElementType() == MVT::i16 && "Bad input type!"); |
| 14592 | MVT PSHUFDVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); |
| 14593 | |
| 14594 | assert(Mask.size() == 8 && "Shuffle mask length doesn't match!"); |
| 14595 | MutableArrayRef<int> LoMask = Mask.slice(0, 4); |
| 14596 | MutableArrayRef<int> HiMask = Mask.slice(4, 4); |
| 14597 | |
| 14598 | |
| 14599 | if (isUndefOrInRange(LoMask, 0, 4) && |
| 14600 | isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { |
| 14601 | return DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
| 14602 | getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); |
| 14603 | } |
| 14604 | if (isUndefOrInRange(HiMask, 4, 8) && |
| 14605 | isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { |
| 14606 | for (int i = 0; i != 4; ++i) |
| 14607 | HiMask[i] = (HiMask[i] < 0 ? HiMask[i] : (HiMask[i] - 4)); |
| 14608 | return DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
| 14609 | getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); |
| 14610 | } |
| 14611 | |
| 14612 | SmallVector<int, 4> LoInputs; |
| 14613 | copy_if(LoMask, std::back_inserter(LoInputs), [](int M) { return M >= 0; }); |
| 14614 | array_pod_sort(LoInputs.begin(), LoInputs.end()); |
| 14615 | LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), LoInputs.end()); |
| 14616 | SmallVector<int, 4> HiInputs; |
| 14617 | copy_if(HiMask, std::back_inserter(HiInputs), [](int M) { return M >= 0; }); |
| 14618 | array_pod_sort(HiInputs.begin(), HiInputs.end()); |
| 14619 | HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), HiInputs.end()); |
| 14620 | int NumLToL = llvm::lower_bound(LoInputs, 4) - LoInputs.begin(); |
| 14621 | int NumHToL = LoInputs.size() - NumLToL; |
| 14622 | int NumLToH = llvm::lower_bound(HiInputs, 4) - HiInputs.begin(); |
| 14623 | int NumHToH = HiInputs.size() - NumLToH; |
| 14624 | MutableArrayRef<int> LToLInputs(LoInputs.data(), NumLToL); |
| 14625 | MutableArrayRef<int> LToHInputs(HiInputs.data(), NumLToH); |
| 14626 | MutableArrayRef<int> HToLInputs(LoInputs.data() + NumLToL, NumHToL); |
| 14627 | MutableArrayRef<int> HToHInputs(HiInputs.data() + NumLToH, NumHToH); |
| 14628 | |
| 14629 | |
| 14630 | |
| 14631 | |
| 14632 | auto ShuffleDWordPairs = [&](ArrayRef<int> PSHUFHalfMask, |
| 14633 | ArrayRef<int> PSHUFDMask, unsigned ShufWOp) { |
| 14634 | V = DAG.getNode(ShufWOp, DL, VT, V, |
| 14635 | getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); |
| 14636 | V = DAG.getBitcast(PSHUFDVT, V); |
| 14637 | V = DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, V, |
| 14638 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG)); |
| 14639 | return DAG.getBitcast(VT, V); |
| 14640 | }; |
| 14641 | |
| 14642 | if ((NumHToL + NumHToH) == 0 || (NumLToL + NumLToH) == 0) { |
| 14643 | int PSHUFDMask[4] = { -1, -1, -1, -1 }; |
| 14644 | SmallVector<std::pair<int, int>, 4> DWordPairs; |
| 14645 | int DOffset = ((NumHToL + NumHToH) == 0 ? 0 : 2); |
| 14646 | |
| 14647 | |
| 14648 | for (int DWord = 0; DWord != 4; ++DWord) { |
| 14649 | int M0 = Mask[2 * DWord + 0]; |
| 14650 | int M1 = Mask[2 * DWord + 1]; |
| 14651 | M0 = (M0 >= 0 ? M0 % 4 : M0); |
| 14652 | M1 = (M1 >= 0 ? M1 % 4 : M1); |
| 14653 | if (M0 < 0 && M1 < 0) |
| 14654 | continue; |
| 14655 | |
| 14656 | bool Match = false; |
| 14657 | for (int j = 0, e = DWordPairs.size(); j < e; ++j) { |
| 14658 | auto &DWordPair = DWordPairs[j]; |
| 14659 | if ((M0 < 0 || isUndefOrEqual(DWordPair.first, M0)) && |
| 14660 | (M1 < 0 || isUndefOrEqual(DWordPair.second, M1))) { |
| 14661 | DWordPair.first = (M0 >= 0 ? M0 : DWordPair.first); |
| 14662 | DWordPair.second = (M1 >= 0 ? M1 : DWordPair.second); |
| 14663 | PSHUFDMask[DWord] = DOffset + j; |
| 14664 | Match = true; |
| 14665 | break; |
| 14666 | } |
| 14667 | } |
| 14668 | if (!Match) { |
| 14669 | PSHUFDMask[DWord] = DOffset + DWordPairs.size(); |
| 14670 | DWordPairs.push_back(std::make_pair(M0, M1)); |
| 14671 | } |
| 14672 | } |
| 14673 | |
| 14674 | if (DWordPairs.size() <= 2) { |
| 14675 | DWordPairs.resize(2, std::make_pair(-1, -1)); |
| 14676 | int PSHUFHalfMask[4] = {DWordPairs[0].first, DWordPairs[0].second, |
| 14677 | DWordPairs[1].first, DWordPairs[1].second}; |
| 14678 | if ((NumHToL + NumHToH) == 0) |
| 14679 | return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFLW); |
| 14680 | if ((NumLToL + NumLToH) == 0) |
| 14681 | return ShuffleDWordPairs(PSHUFHalfMask, PSHUFDMask, X86ISD::PSHUFHW); |
| 14682 | } |
| 14683 | } |
| 14684 | |
| 14685 | |
| 14686 | |
| 14687 | |
| 14688 | |
| 14689 | |
| 14690 | |
| 14691 | |
| 14692 | |
| 14693 | |
| 14694 | |
| 14695 | |
| 14696 | |
| 14697 | |
| 14698 | |
| 14699 | |
| 14700 | |
| 14701 | |
| 14702 | |
| 14703 | |
| 14704 | |
| 14705 | |
| 14706 | |
| 14707 | |
| 14708 | |
| 14709 | |
| 14710 | |
| 14711 | |
| 14712 | |
| 14713 | |
| 14714 | |
| 14715 | auto balanceSides = [&](ArrayRef<int> AToAInputs, ArrayRef<int> BToAInputs, |
| 14716 | ArrayRef<int> BToBInputs, ArrayRef<int> AToBInputs, |
| 14717 | int AOffset, int BOffset) { |
| 14718 | assert((AToAInputs.size() == 3 || AToAInputs.size() == 1) && |
| 14719 | "Must call this with A having 3 or 1 inputs from the A half."); |
| 14720 | assert((BToAInputs.size() == 1 || BToAInputs.size() == 3) && |
| 14721 | "Must call this with B having 1 or 3 inputs from the B half."); |
| 14722 | assert(AToAInputs.size() + BToAInputs.size() == 4 && |
| 14723 | "Must call this with either 3:1 or 1:3 inputs (summing to 4)."); |
| 14724 | |
| 14725 | bool ThreeAInputs = AToAInputs.size() == 3; |
| 14726 | |
| 14727 | |
| 14728 | |
| 14729 | |
| 14730 | |
| 14731 | int ADWord = 0, BDWord = 0; |
| 14732 | int &TripleDWord = ThreeAInputs ? ADWord : BDWord; |
| 14733 | int &OneInputDWord = ThreeAInputs ? BDWord : ADWord; |
| 14734 | int TripleInputOffset = ThreeAInputs ? AOffset : BOffset; |
| 14735 | ArrayRef<int> TripleInputs = ThreeAInputs ? AToAInputs : BToAInputs; |
| 14736 | int OneInput = ThreeAInputs ? BToAInputs[0] : AToAInputs[0]; |
| 14737 | int TripleInputSum = 0 + 1 + 2 + 3 + (4 * TripleInputOffset); |
| 14738 | int TripleNonInputIdx = |
| 14739 | TripleInputSum - std::accumulate(TripleInputs.begin(), TripleInputs.end(), 0); |
| 14740 | TripleDWord = TripleNonInputIdx / 2; |
| 14741 | |
| 14742 | |
| 14743 | |
| 14744 | OneInputDWord = (OneInput / 2) ^ 1; |
| 14745 | |
| 14746 | |
| 14747 | |
| 14748 | |
| 14749 | |
| 14750 | |
| 14751 | if (BToBInputs.size() == 2 && AToBInputs.size() == 2) { |
| 14752 | |
| 14753 | |
| 14754 | |
| 14755 | |
| 14756 | int NumFlippedAToBInputs = |
| 14757 | std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord) + |
| 14758 | std::count(AToBInputs.begin(), AToBInputs.end(), 2 * ADWord + 1); |
| 14759 | int NumFlippedBToBInputs = |
| 14760 | std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord) + |
| 14761 | std::count(BToBInputs.begin(), BToBInputs.end(), 2 * BDWord + 1); |
| 14762 | if ((NumFlippedAToBInputs == 1 && |
| 14763 | (NumFlippedBToBInputs == 0 || NumFlippedBToBInputs == 2)) || |
| 14764 | (NumFlippedBToBInputs == 1 && |
| 14765 | (NumFlippedAToBInputs == 0 || NumFlippedAToBInputs == 2))) { |
| 14766 | |
| 14767 | |
| 14768 | |
| 14769 | |
| 14770 | auto FixFlippedInputs = [&V, &DL, &Mask, &DAG](int PinnedIdx, int DWord, |
| 14771 | ArrayRef<int> Inputs) { |
| 14772 | int FixIdx = PinnedIdx ^ 1; |
| 14773 | bool IsFixIdxInput = is_contained(Inputs, PinnedIdx ^ 1); |
| 14774 | |
| 14775 | |
| 14776 | |
| 14777 | int FixFreeIdx = 2 * (DWord ^ (PinnedIdx / 2 == DWord)); |
| 14778 | bool IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); |
| 14779 | if (IsFixIdxInput == IsFixFreeIdxInput) |
| 14780 | FixFreeIdx += 1; |
| 14781 | IsFixFreeIdxInput = is_contained(Inputs, FixFreeIdx); |
| 14782 | assert(IsFixIdxInput != IsFixFreeIdxInput && |
| 14783 | "We need to be changing the number of flipped inputs!"); |
| 14784 | int PSHUFHalfMask[] = {0, 1, 2, 3}; |
| 14785 | std::swap(PSHUFHalfMask[FixFreeIdx % 4], PSHUFHalfMask[FixIdx % 4]); |
| 14786 | V = DAG.getNode( |
| 14787 | FixIdx < 4 ? X86ISD::PSHUFLW : X86ISD::PSHUFHW, DL, |
| 14788 | MVT::getVectorVT(MVT::i16, V.getValueSizeInBits() / 16), V, |
| 14789 | getV4X86ShuffleImm8ForMask(PSHUFHalfMask, DL, DAG)); |
| 14790 | |
| 14791 | for (int &M : Mask) |
| 14792 | if (M >= 0 && M == FixIdx) |
| 14793 | M = FixFreeIdx; |
| 14794 | else if (M >= 0 && M == FixFreeIdx) |
| 14795 | M = FixIdx; |
| 14796 | }; |
| 14797 | if (NumFlippedBToBInputs != 0) { |
| 14798 | int BPinnedIdx = |
| 14799 | BToAInputs.size() == 3 ? TripleNonInputIdx : OneInput; |
| 14800 | FixFlippedInputs(BPinnedIdx, BDWord, BToBInputs); |
| 14801 | } else { |
| 14802 | assert(NumFlippedAToBInputs != 0 && "Impossible given predicates!"); |
| 14803 | int APinnedIdx = ThreeAInputs ? TripleNonInputIdx : OneInput; |
| 14804 | FixFlippedInputs(APinnedIdx, ADWord, AToBInputs); |
| 14805 | } |
| 14806 | } |
| 14807 | } |
| 14808 | |
| 14809 | int PSHUFDMask[] = {0, 1, 2, 3}; |
| 14810 | PSHUFDMask[ADWord] = BDWord; |
| 14811 | PSHUFDMask[BDWord] = ADWord; |
| 14812 | V = DAG.getBitcast( |
| 14813 | VT, |
| 14814 | DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), |
| 14815 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
| 14816 | |
| 14817 | |
| 14818 | for (int &M : Mask) |
| 14819 | if (M >= 0 && M/2 == ADWord) |
| 14820 | M = 2 * BDWord + M % 2; |
| 14821 | else if (M >= 0 && M/2 == BDWord) |
| 14822 | M = 2 * ADWord + M % 2; |
| 14823 | |
| 14824 | |
| 14825 | |
| 14826 | return lowerV8I16GeneralSingleInputShuffle(DL, VT, V, Mask, Subtarget, DAG); |
| 14827 | }; |
| 14828 | if ((NumLToL == 3 && NumHToL == 1) || (NumLToL == 1 && NumHToL == 3)) |
| 14829 | return balanceSides(LToLInputs, HToLInputs, HToHInputs, LToHInputs, 0, 4); |
| 14830 | if ((NumHToH == 3 && NumLToH == 1) || (NumHToH == 1 && NumLToH == 3)) |
| 14831 | return balanceSides(HToHInputs, LToHInputs, LToLInputs, HToLInputs, 4, 0); |
| 14832 | |
| 14833 | |
| 14834 | |
| 14835 | |
| 14836 | |
| 14837 | |
| 14838 | int PSHUFLMask[4] = {-1, -1, -1, -1}; |
| 14839 | int PSHUFHMask[4] = {-1, -1, -1, -1}; |
| 14840 | int PSHUFDMask[4] = {-1, -1, -1, -1}; |
| 14841 | |
| 14842 | |
| 14843 | |
| 14844 | |
| 14845 | auto fixInPlaceInputs = |
| 14846 | [&PSHUFDMask](ArrayRef<int> InPlaceInputs, ArrayRef<int> IncomingInputs, |
| 14847 | MutableArrayRef<int> SourceHalfMask, |
| 14848 | MutableArrayRef<int> HalfMask, int HalfOffset) { |
| 14849 | if (InPlaceInputs.empty()) |
| 14850 | return; |
| 14851 | if (InPlaceInputs.size() == 1) { |
| 14852 | SourceHalfMask[InPlaceInputs[0] - HalfOffset] = |
| 14853 | InPlaceInputs[0] - HalfOffset; |
| 14854 | PSHUFDMask[InPlaceInputs[0] / 2] = InPlaceInputs[0] / 2; |
| 14855 | return; |
| 14856 | } |
| 14857 | if (IncomingInputs.empty()) { |
| 14858 | |
| 14859 | for (int Input : InPlaceInputs) { |
| 14860 | SourceHalfMask[Input - HalfOffset] = Input - HalfOffset; |
| 14861 | PSHUFDMask[Input / 2] = Input / 2; |
| 14862 | } |
| 14863 | return; |
| 14864 | } |
| 14865 | |
| 14866 | assert(InPlaceInputs.size() == 2 && "Cannot handle 3 or 4 inputs!"); |
| 14867 | SourceHalfMask[InPlaceInputs[0] - HalfOffset] = |
| 14868 | InPlaceInputs[0] - HalfOffset; |
| 14869 | |
| 14870 | |
| 14871 | int AdjIndex = InPlaceInputs[0] ^ 1; |
| 14872 | SourceHalfMask[AdjIndex - HalfOffset] = InPlaceInputs[1] - HalfOffset; |
| 14873 | std::replace(HalfMask.begin(), HalfMask.end(), InPlaceInputs[1], AdjIndex); |
| 14874 | PSHUFDMask[AdjIndex / 2] = AdjIndex / 2; |
| 14875 | }; |
| 14876 | fixInPlaceInputs(LToLInputs, HToLInputs, PSHUFLMask, LoMask, 0); |
| 14877 | fixInPlaceInputs(HToHInputs, LToHInputs, PSHUFHMask, HiMask, 4); |
| 14878 | |
| 14879 | |
| 14880 | |
| 14881 | |
| 14882 | |
| 14883 | auto moveInputsToRightHalf = [&PSHUFDMask]( |
| 14884 | MutableArrayRef<int> IncomingInputs, ArrayRef<int> ExistingInputs, |
| 14885 | MutableArrayRef<int> SourceHalfMask, MutableArrayRef<int> HalfMask, |
| 14886 | MutableArrayRef<int> FinalSourceHalfMask, int SourceOffset, |
| 14887 | int DestOffset) { |
| 14888 | auto isWordClobbered = [](ArrayRef<int> SourceHalfMask, int Word) { |
| 14889 | return SourceHalfMask[Word] >= 0 && SourceHalfMask[Word] != Word; |
| 14890 | }; |
| 14891 | auto isDWordClobbered = [&isWordClobbered](ArrayRef<int> SourceHalfMask, |
| 14892 | int Word) { |
| 14893 | int LowWord = Word & ~1; |
| 14894 | int HighWord = Word | 1; |
| 14895 | return isWordClobbered(SourceHalfMask, LowWord) || |
| 14896 | isWordClobbered(SourceHalfMask, HighWord); |
| 14897 | }; |
| 14898 | |
| 14899 | if (IncomingInputs.empty()) |
| 14900 | return; |
| 14901 | |
| 14902 | if (ExistingInputs.empty()) { |
| 14903 | |
| 14904 | for (int Input : IncomingInputs) { |
| 14905 | |
| 14906 | |
| 14907 | if (isWordClobbered(SourceHalfMask, Input - SourceOffset)) { |
| 14908 | if (SourceHalfMask[SourceHalfMask[Input - SourceOffset]] < 0) { |
| 14909 | SourceHalfMask[SourceHalfMask[Input - SourceOffset]] = |
| 14910 | Input - SourceOffset; |
| 14911 | |
| 14912 | for (int &M : HalfMask) |
| 14913 | if (M == SourceHalfMask[Input - SourceOffset] + SourceOffset) |
| 14914 | M = Input; |
| 14915 | else if (M == Input) |
| 14916 | M = SourceHalfMask[Input - SourceOffset] + SourceOffset; |
| 14917 | } else { |
| 14918 | assert(SourceHalfMask[SourceHalfMask[Input - SourceOffset]] == |
| 14919 | Input - SourceOffset && |
| 14920 | "Previous placement doesn't match!"); |
| 14921 | } |
| 14922 | |
| 14923 | |
| 14924 | |
| 14925 | Input = SourceHalfMask[Input - SourceOffset] + SourceOffset; |
| 14926 | } |
| 14927 | |
| 14928 | |
| 14929 | if (PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] < 0) |
| 14930 | PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] = Input / 2; |
| 14931 | else |
| 14932 | assert(PSHUFDMask[(Input - SourceOffset + DestOffset) / 2] == |
| 14933 | Input / 2 && |
| 14934 | "Previous placement doesn't match!"); |
| 14935 | } |
| 14936 | |
| 14937 | |
| 14938 | |
| 14939 | |
| 14940 | for (int &M : HalfMask) |
| 14941 | if (M >= SourceOffset && M < SourceOffset + 4) { |
| 14942 | M = M - SourceOffset + DestOffset; |
| 14943 | assert(M >= 0 && "This should never wrap below zero!"); |
| 14944 | } |
| 14945 | return; |
| 14946 | } |
| 14947 | |
| 14948 | |
| 14949 | |
| 14950 | |
| 14951 | if (IncomingInputs.size() == 1) { |
| 14952 | if (isWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { |
| 14953 | int InputFixed = find(SourceHalfMask, -1) - std::begin(SourceHalfMask) + |
| 14954 | SourceOffset; |
| 14955 | SourceHalfMask[InputFixed - SourceOffset] = |
| 14956 | IncomingInputs[0] - SourceOffset; |
| 14957 | std::replace(HalfMask.begin(), HalfMask.end(), IncomingInputs[0], |
| 14958 | InputFixed); |
| 14959 | IncomingInputs[0] = InputFixed; |
| 14960 | } |
| 14961 | } else if (IncomingInputs.size() == 2) { |
| 14962 | if (IncomingInputs[0] / 2 != IncomingInputs[1] / 2 || |
| 14963 | isDWordClobbered(SourceHalfMask, IncomingInputs[0] - SourceOffset)) { |
| 14964 | |
| 14965 | |
| 14966 | |
| 14967 | int InputsFixed[2] = {IncomingInputs[0] - SourceOffset, |
| 14968 | IncomingInputs[1] - SourceOffset}; |
| 14969 | |
| 14970 | |
| 14971 | |
| 14972 | |
| 14973 | if (!isWordClobbered(SourceHalfMask, InputsFixed[0]) && |
| 14974 | SourceHalfMask[InputsFixed[0] ^ 1] < 0) { |
| 14975 | SourceHalfMask[InputsFixed[0]] = InputsFixed[0]; |
| 14976 | SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; |
| 14977 | InputsFixed[1] = InputsFixed[0] ^ 1; |
| 14978 | } else if (!isWordClobbered(SourceHalfMask, InputsFixed[1]) && |
| 14979 | SourceHalfMask[InputsFixed[1] ^ 1] < 0) { |
| 14980 | SourceHalfMask[InputsFixed[1]] = InputsFixed[1]; |
| 14981 | SourceHalfMask[InputsFixed[1] ^ 1] = InputsFixed[0]; |
| 14982 | InputsFixed[0] = InputsFixed[1] ^ 1; |
| 14983 | } else if (SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] < 0 && |
| 14984 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] < 0) { |
| 14985 | |
| 14986 | |
| 14987 | |
| 14988 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1)] = InputsFixed[0]; |
| 14989 | SourceHalfMask[2 * ((InputsFixed[0] / 2) ^ 1) + 1] = InputsFixed[1]; |
| 14990 | InputsFixed[0] = 2 * ((InputsFixed[0] / 2) ^ 1); |
| 14991 | InputsFixed[1] = 2 * ((InputsFixed[0] / 2) ^ 1) + 1; |
| 14992 | } else { |
| 14993 | |
| 14994 | |
| 14995 | |
| 14996 | |
| 14997 | for (int i = 0; i < 4; ++i) |
| 14998 | assert((SourceHalfMask[i] < 0 || SourceHalfMask[i] == i) && |
| 14999 | "We can't handle any clobbers here!"); |
| 15000 | assert(InputsFixed[1] != (InputsFixed[0] ^ 1) && |
| 15001 | "Cannot have adjacent inputs here!"); |
| 15002 | |
| 15003 | SourceHalfMask[InputsFixed[0] ^ 1] = InputsFixed[1]; |
| 15004 | SourceHalfMask[InputsFixed[1]] = InputsFixed[0] ^ 1; |
| 15005 | |
| 15006 | |
| 15007 | |
| 15008 | for (int &M : FinalSourceHalfMask) |
| 15009 | if (M == (InputsFixed[0] ^ 1) + SourceOffset) |
| 15010 | M = InputsFixed[1] + SourceOffset; |
| 15011 | else if (M == InputsFixed[1] + SourceOffset) |
| 15012 | M = (InputsFixed[0] ^ 1) + SourceOffset; |
| 15013 | |
| 15014 | InputsFixed[1] = InputsFixed[0] ^ 1; |
| 15015 | } |
| 15016 | |
| 15017 | |
| 15018 | for (int &M : HalfMask) |
| 15019 | if (M == IncomingInputs[0]) |
| 15020 | M = InputsFixed[0] + SourceOffset; |
| 15021 | else if (M == IncomingInputs[1]) |
| 15022 | M = InputsFixed[1] + SourceOffset; |
| 15023 | |
| 15024 | IncomingInputs[0] = InputsFixed[0] + SourceOffset; |
| 15025 | IncomingInputs[1] = InputsFixed[1] + SourceOffset; |
| 15026 | } |
| 15027 | } else { |
| 15028 | llvm_unreachable("Unhandled input size!"); |
| 15029 | } |
| 15030 | |
| 15031 | |
| 15032 | int FreeDWord = (PSHUFDMask[DestOffset / 2] < 0 ? 0 : 1) + DestOffset / 2; |
| 15033 | assert(PSHUFDMask[FreeDWord] < 0 && "DWord not free"); |
| 15034 | PSHUFDMask[FreeDWord] = IncomingInputs[0] / 2; |
| 15035 | for (int &M : HalfMask) |
| 15036 | for (int Input : IncomingInputs) |
| 15037 | if (M == Input) |
| 15038 | M = FreeDWord * 2 + Input % 2; |
| 15039 | }; |
| 15040 | moveInputsToRightHalf(HToLInputs, LToLInputs, PSHUFHMask, LoMask, HiMask, |
| 15041 | 4, 0); |
| 15042 | moveInputsToRightHalf(LToHInputs, HToHInputs, PSHUFLMask, HiMask, LoMask, |
| 15043 | 0, 4); |
| 15044 | |
| 15045 | |
| 15046 | |
| 15047 | if (!isNoopShuffleMask(PSHUFLMask)) |
| 15048 | V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
| 15049 | getV4X86ShuffleImm8ForMask(PSHUFLMask, DL, DAG)); |
| 15050 | if (!isNoopShuffleMask(PSHUFHMask)) |
| 15051 | V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
| 15052 | getV4X86ShuffleImm8ForMask(PSHUFHMask, DL, DAG)); |
| 15053 | if (!isNoopShuffleMask(PSHUFDMask)) |
| 15054 | V = DAG.getBitcast( |
| 15055 | VT, |
| 15056 | DAG.getNode(X86ISD::PSHUFD, DL, PSHUFDVT, DAG.getBitcast(PSHUFDVT, V), |
| 15057 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
| 15058 | |
| 15059 | |
| 15060 | |
| 15061 | assert(count_if(LoMask, [](int M) { return M >= 4; }) == 0 && |
| 15062 | "Failed to lift all the high half inputs to the low mask!"); |
| 15063 | assert(count_if(HiMask, [](int M) { return M >= 0 && M < 4; }) == 0 && |
| 15064 | "Failed to lift all the low half inputs to the high mask!"); |
| 15065 | |
| 15066 | |
| 15067 | if (!isNoopShuffleMask(LoMask)) |
| 15068 | V = DAG.getNode(X86ISD::PSHUFLW, DL, VT, V, |
| 15069 | getV4X86ShuffleImm8ForMask(LoMask, DL, DAG)); |
| 15070 | |
| 15071 | |
| 15072 | for (int &M : HiMask) |
| 15073 | if (M >= 0) |
| 15074 | M -= 4; |
| 15075 | if (!isNoopShuffleMask(HiMask)) |
| 15076 | V = DAG.getNode(X86ISD::PSHUFHW, DL, VT, V, |
| 15077 | getV4X86ShuffleImm8ForMask(HiMask, DL, DAG)); |
| 15078 | |
| 15079 | return V; |
| 15080 | } |
| 15081 | |
| 15082 | |
| 15083 | |
| 15084 | static SDValue lowerShuffleAsBlendOfPSHUFBs( |
| 15085 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 15086 | const APInt &Zeroable, SelectionDAG &DAG, bool &V1InUse, bool &V2InUse) { |
| 15087 | assert(!is128BitLaneCrossingShuffleMask(VT, Mask) && |
| 15088 | "Lane crossing shuffle masks not supported"); |
| 15089 | |
| 15090 | int NumBytes = VT.getSizeInBits() / 8; |
| 15091 | int Size = Mask.size(); |
| 15092 | int Scale = NumBytes / Size; |
| 15093 | |
| 15094 | SmallVector<SDValue, 64> V1Mask(NumBytes, DAG.getUNDEF(MVT::i8)); |
| 15095 | SmallVector<SDValue, 64> V2Mask(NumBytes, DAG.getUNDEF(MVT::i8)); |
| 15096 | V1InUse = false; |
| 15097 | V2InUse = false; |
| 15098 | |
| 15099 | for (int i = 0; i < NumBytes; ++i) { |
| 15100 | int M = Mask[i / Scale]; |
| 15101 | if (M < 0) |
| 15102 | continue; |
| 15103 | |
| 15104 | const int ZeroMask = 0x80; |
| 15105 | int V1Idx = M < Size ? M * Scale + i % Scale : ZeroMask; |
| 15106 | int V2Idx = M < Size ? ZeroMask : (M - Size) * Scale + i % Scale; |
| 15107 | if (Zeroable[i / Scale]) |
| 15108 | V1Idx = V2Idx = ZeroMask; |
| 15109 | |
| 15110 | V1Mask[i] = DAG.getConstant(V1Idx, DL, MVT::i8); |
| 15111 | V2Mask[i] = DAG.getConstant(V2Idx, DL, MVT::i8); |
| 15112 | V1InUse |= (ZeroMask != V1Idx); |
| 15113 | V2InUse |= (ZeroMask != V2Idx); |
| 15114 | } |
| 15115 | |
| 15116 | MVT ShufVT = MVT::getVectorVT(MVT::i8, NumBytes); |
| 15117 | if (V1InUse) |
| 15118 | V1 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V1), |
| 15119 | DAG.getBuildVector(ShufVT, DL, V1Mask)); |
| 15120 | if (V2InUse) |
| 15121 | V2 = DAG.getNode(X86ISD::PSHUFB, DL, ShufVT, DAG.getBitcast(ShufVT, V2), |
| 15122 | DAG.getBuildVector(ShufVT, DL, V2Mask)); |
| 15123 | |
| 15124 | |
| 15125 | SDValue V; |
| 15126 | if (V1InUse && V2InUse) |
| 15127 | V = DAG.getNode(ISD::OR, DL, ShufVT, V1, V2); |
| 15128 | else |
| 15129 | V = V1InUse ? V1 : V2; |
| 15130 | |
| 15131 | |
| 15132 | return DAG.getBitcast(VT, V); |
| 15133 | } |
| 15134 | |
| 15135 | |
| 15136 | |
| 15137 | |
| 15138 | |
| 15139 | |
| 15140 | |
| 15141 | |
| 15142 | |
| 15143 | |
| 15144 | |
| 15145 | |
| 15146 | |
| 15147 | static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 15148 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 15149 | const X86Subtarget &Subtarget, |
| 15150 | SelectionDAG &DAG) { |
| 15151 | assert(V1.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); |
| 15152 | assert(V2.getSimpleValueType() == MVT::v8i16 && "Bad operand type!"); |
| 15153 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
| 15154 | |
| 15155 | |
| 15156 | |
| 15157 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i16, V1, V2, Mask, |
| 15158 | Zeroable, Subtarget, DAG)) |
| 15159 | return ZExt; |
| 15160 | |
| 15161 | |
| 15162 | if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v8i16, V1, V2, Mask, Zeroable, |
| 15163 | Subtarget, DAG)) |
| 15164 | return V; |
| 15165 | |
| 15166 | int NumV2Inputs = count_if(Mask, [](int M) { return M >= 8; }); |
| 15167 | |
| 15168 | if (NumV2Inputs == 0) { |
| 15169 | |
| 15170 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V1, Mask, |
| 15171 | Zeroable, Subtarget, DAG)) |
| 15172 | return Shift; |
| 15173 | |
| 15174 | |
| 15175 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i16, V1, V2, |
| 15176 | Mask, Subtarget, DAG)) |
| 15177 | return Broadcast; |
| 15178 | |
| 15179 | |
| 15180 | if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v8i16, V1, Mask, |
| 15181 | Subtarget, DAG)) |
| 15182 | return Rotate; |
| 15183 | |
| 15184 | |
| 15185 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) |
| 15186 | return V; |
| 15187 | |
| 15188 | |
| 15189 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, |
| 15190 | Subtarget)) |
| 15191 | return V; |
| 15192 | |
| 15193 | |
| 15194 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V1, Mask, |
| 15195 | Subtarget, DAG)) |
| 15196 | return Rotate; |
| 15197 | |
| 15198 | |
| 15199 | SmallVector<int, 8> MutableMask(Mask.begin(), Mask.end()); |
| 15200 | return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v8i16, V1, MutableMask, |
| 15201 | Subtarget, DAG); |
| 15202 | } |
| 15203 | |
| 15204 | assert(llvm::any_of(Mask, [](int M) { return M >= 0 && M < 8; }) && |
| 15205 | "All single-input shuffles should be canonicalized to be V1-input " |
| 15206 | "shuffles."); |
| 15207 | |
| 15208 | |
| 15209 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i16, V1, V2, Mask, |
| 15210 | Zeroable, Subtarget, DAG)) |
| 15211 | return Shift; |
| 15212 | |
| 15213 | |
| 15214 | if (Subtarget.hasSSE4A()) |
| 15215 | if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v8i16, V1, V2, Mask, |
| 15216 | Zeroable, DAG)) |
| 15217 | return V; |
| 15218 | |
| 15219 | |
| 15220 | if (NumV2Inputs == 1) |
| 15221 | if (SDValue V = lowerShuffleAsElementInsertion( |
| 15222 | DL, MVT::v8i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 15223 | return V; |
| 15224 | |
| 15225 | |
| 15226 | |
| 15227 | bool IsBlendSupported = Subtarget.hasSSE41(); |
| 15228 | if (IsBlendSupported) |
| 15229 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i16, V1, V2, Mask, |
| 15230 | Zeroable, Subtarget, DAG)) |
| 15231 | return Blend; |
| 15232 | |
| 15233 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v8i16, V1, V2, Mask, |
| 15234 | Zeroable, Subtarget, DAG)) |
| 15235 | return Masked; |
| 15236 | |
| 15237 | |
| 15238 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i16, Mask, V1, V2, DAG)) |
| 15239 | return V; |
| 15240 | |
| 15241 | |
| 15242 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v8i16, Mask, V1, V2, DAG, |
| 15243 | Subtarget)) |
| 15244 | return V; |
| 15245 | |
| 15246 | |
| 15247 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v8i16, V1, V2, Mask, Zeroable, |
| 15248 | Subtarget, DAG)) |
| 15249 | return V; |
| 15250 | |
| 15251 | |
| 15252 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i16, V1, V2, Mask, |
| 15253 | Subtarget, DAG)) |
| 15254 | return Rotate; |
| 15255 | |
| 15256 | if (SDValue BitBlend = |
| 15257 | lowerShuffleAsBitBlend(DL, MVT::v8i16, V1, V2, Mask, DAG)) |
| 15258 | return BitBlend; |
| 15259 | |
| 15260 | |
| 15261 | if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v8i16, V1, V2, Mask, |
| 15262 | Zeroable, Subtarget, DAG)) |
| 15263 | return V; |
| 15264 | |
| 15265 | |
| 15266 | |
| 15267 | |
| 15268 | int NumEvenDrops = canLowerByDroppingEvenElements(Mask, false); |
| 15269 | if ((NumEvenDrops == 1 || NumEvenDrops == 2) && Subtarget.hasSSE41() && |
| 15270 | !Subtarget.hasVLX()) { |
| 15271 | SmallVector<SDValue, 8> DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); |
| 15272 | for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) |
| 15273 | DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); |
| 15274 | SDValue DWordClearMask = DAG.getBuildVector(MVT::v4i32, DL, DWordClearOps); |
| 15275 | V1 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V1), |
| 15276 | DWordClearMask); |
| 15277 | V2 = DAG.getNode(ISD::AND, DL, MVT::v4i32, DAG.getBitcast(MVT::v4i32, V2), |
| 15278 | DWordClearMask); |
| 15279 | |
| 15280 | SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, V1, V2); |
| 15281 | if (NumEvenDrops == 2) { |
| 15282 | Result = DAG.getBitcast(MVT::v4i32, Result); |
| 15283 | Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v8i16, Result, Result); |
| 15284 | } |
| 15285 | return Result; |
| 15286 | } |
| 15287 | |
| 15288 | |
| 15289 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack(DL, MVT::v8i16, V1, V2, |
| 15290 | Mask, Subtarget, DAG)) |
| 15291 | return Unpack; |
| 15292 | |
| 15293 | |
| 15294 | |
| 15295 | if (!IsBlendSupported && Subtarget.hasSSSE3()) { |
| 15296 | bool V1InUse, V2InUse; |
| 15297 | return lowerShuffleAsBlendOfPSHUFBs(DL, MVT::v8i16, V1, V2, Mask, |
| 15298 | Zeroable, DAG, V1InUse, V2InUse); |
| 15299 | } |
| 15300 | |
| 15301 | |
| 15302 | |
| 15303 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i16, V1, V2, |
| 15304 | Mask, Subtarget, DAG); |
| 15305 | } |
| 15306 | |
| 15307 | |
| 15308 | |
| 15309 | |
| 15310 | static SDValue lowerShuffleWithPERMV(const SDLoc &DL, MVT VT, |
| 15311 | ArrayRef<int> Mask, SDValue V1, SDValue V2, |
| 15312 | const X86Subtarget &Subtarget, |
| 15313 | SelectionDAG &DAG) { |
| 15314 | MVT MaskVT = VT.changeTypeToInteger(); |
| 15315 | SDValue MaskNode; |
| 15316 | MVT ShuffleVT = VT; |
| 15317 | if (!VT.is512BitVector() && !Subtarget.hasVLX()) { |
| 15318 | V1 = widenSubVector(V1, false, Subtarget, DAG, DL, 512); |
| 15319 | V2 = widenSubVector(V2, false, Subtarget, DAG, DL, 512); |
| 15320 | ShuffleVT = V1.getSimpleValueType(); |
| 15321 | |
| 15322 | |
| 15323 | int NumElts = VT.getVectorNumElements(); |
| 15324 | unsigned Scale = 512 / VT.getSizeInBits(); |
| 15325 | SmallVector<int, 32> AdjustedMask(Mask.begin(), Mask.end()); |
| 15326 | for (int &M : AdjustedMask) |
| 15327 | if (NumElts <= M) |
| 15328 | M += (Scale - 1) * NumElts; |
| 15329 | MaskNode = getConstVector(AdjustedMask, MaskVT, DAG, DL, true); |
| 15330 | MaskNode = widenSubVector(MaskNode, false, Subtarget, DAG, DL, 512); |
| 15331 | } else { |
| 15332 | MaskNode = getConstVector(Mask, MaskVT, DAG, DL, true); |
| 15333 | } |
| 15334 | |
| 15335 | SDValue Result; |
| 15336 | if (V2.isUndef()) |
| 15337 | Result = DAG.getNode(X86ISD::VPERMV, DL, ShuffleVT, MaskNode, V1); |
| 15338 | else |
| 15339 | Result = DAG.getNode(X86ISD::VPERMV3, DL, ShuffleVT, V1, MaskNode, V2); |
| 15340 | |
| 15341 | if (VT != ShuffleVT) |
| 15342 | Result = extractSubVector(Result, 0, DAG, DL, VT.getSizeInBits()); |
| 15343 | |
| 15344 | return Result; |
| 15345 | } |
| 15346 | |
| 15347 | |
| 15348 | |
| 15349 | |
| 15350 | |
| 15351 | |
| 15352 | |
| 15353 | |
| 15354 | static SDValue lowerV16I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 15355 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 15356 | const X86Subtarget &Subtarget, |
| 15357 | SelectionDAG &DAG) { |
| 15358 | assert(V1.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); |
| 15359 | assert(V2.getSimpleValueType() == MVT::v16i8 && "Bad operand type!"); |
| 15360 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
| 15361 | |
| 15362 | |
| 15363 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i8, V1, V2, Mask, |
| 15364 | Zeroable, Subtarget, DAG)) |
| 15365 | return Shift; |
| 15366 | |
| 15367 | |
| 15368 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i8, V1, V2, Mask, |
| 15369 | Subtarget, DAG)) |
| 15370 | return Rotate; |
| 15371 | |
| 15372 | |
| 15373 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i8, Mask, V1, V2, DAG, |
| 15374 | Subtarget)) |
| 15375 | return V; |
| 15376 | |
| 15377 | |
| 15378 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v16i8, V1, V2, Mask, |
| 15379 | Zeroable, Subtarget, DAG)) |
| 15380 | return ZExt; |
| 15381 | |
| 15382 | |
| 15383 | if (SDValue V = lowerShuffleWithVPMOV(DL, MVT::v16i8, V1, V2, Mask, Zeroable, |
| 15384 | Subtarget, DAG)) |
| 15385 | return V; |
| 15386 | |
| 15387 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i8, V1, V2, Mask, Zeroable, |
| 15388 | Subtarget, DAG)) |
| 15389 | return V; |
| 15390 | |
| 15391 | |
| 15392 | if (Subtarget.hasSSE4A()) |
| 15393 | if (SDValue V = lowerShuffleWithSSE4A(DL, MVT::v16i8, V1, V2, Mask, |
| 15394 | Zeroable, DAG)) |
| 15395 | return V; |
| 15396 | |
| 15397 | int NumV2Elements = count_if(Mask, [](int M) { return M >= 16; }); |
| 15398 | |
| 15399 | |
| 15400 | if (NumV2Elements == 0) { |
| 15401 | |
| 15402 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i8, V1, V2, |
| 15403 | Mask, Subtarget, DAG)) |
| 15404 | return Broadcast; |
| 15405 | |
| 15406 | |
| 15407 | if (SDValue Rotate = lowerShuffleAsBitRotate(DL, MVT::v16i8, V1, Mask, |
| 15408 | Subtarget, DAG)) |
| 15409 | return Rotate; |
| 15410 | |
| 15411 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) |
| 15412 | return V; |
| 15413 | |
| 15414 | |
| 15415 | |
| 15416 | |
| 15417 | |
| 15418 | |
| 15419 | |
| 15420 | |
| 15421 | |
| 15422 | auto canWidenViaDuplication = [](ArrayRef<int> Mask) { |
| 15423 | for (int i = 0; i < 16; i += 2) |
| 15424 | if (Mask[i] >= 0 && Mask[i + 1] >= 0 && Mask[i] != Mask[i + 1]) |
| 15425 | return false; |
| 15426 | |
| 15427 | return true; |
| 15428 | }; |
| 15429 | auto tryToWidenViaDuplication = [&]() -> SDValue { |
| 15430 | if (!canWidenViaDuplication(Mask)) |
| 15431 | return SDValue(); |
| 15432 | SmallVector<int, 4> LoInputs; |
| 15433 | copy_if(Mask, std::back_inserter(LoInputs), |
| 15434 | [](int M) { return M >= 0 && M < 8; }); |
| 15435 | array_pod_sort(LoInputs.begin(), LoInputs.end()); |
| 15436 | LoInputs.erase(std::unique(LoInputs.begin(), LoInputs.end()), |
| 15437 | LoInputs.end()); |
| 15438 | SmallVector<int, 4> HiInputs; |
| 15439 | copy_if(Mask, std::back_inserter(HiInputs), [](int M) { return M >= 8; }); |
| 15440 | array_pod_sort(HiInputs.begin(), HiInputs.end()); |
| 15441 | HiInputs.erase(std::unique(HiInputs.begin(), HiInputs.end()), |
| 15442 | HiInputs.end()); |
| 15443 | |
| 15444 | bool TargetLo = LoInputs.size() >= HiInputs.size(); |
| 15445 | ArrayRef<int> InPlaceInputs = TargetLo ? LoInputs : HiInputs; |
| 15446 | ArrayRef<int> MovingInputs = TargetLo ? HiInputs : LoInputs; |
| 15447 | |
| 15448 | int PreDupI16Shuffle[] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
| 15449 | SmallDenseMap<int, int, 8> LaneMap; |
| 15450 | for (int I : InPlaceInputs) { |
| 15451 | PreDupI16Shuffle[I/2] = I/2; |
| 15452 | LaneMap[I] = I; |
| 15453 | } |
| 15454 | int j = TargetLo ? 0 : 4, je = j + 4; |
| 15455 | for (int i = 0, ie = MovingInputs.size(); i < ie; ++i) { |
| 15456 | |
| 15457 | |
| 15458 | if (PreDupI16Shuffle[j] != MovingInputs[i] / 2) { |
| 15459 | |
| 15460 | |
| 15461 | while (j < je && PreDupI16Shuffle[j] >= 0) |
| 15462 | ++j; |
| 15463 | |
| 15464 | if (j == je) |
| 15465 | |
| 15466 | return SDValue(); |
| 15467 | |
| 15468 | |
| 15469 | PreDupI16Shuffle[j] = MovingInputs[i] / 2; |
| 15470 | } |
| 15471 | |
| 15472 | |
| 15473 | LaneMap[MovingInputs[i]] = 2 * j + MovingInputs[i] % 2; |
| 15474 | } |
| 15475 | V1 = DAG.getBitcast( |
| 15476 | MVT::v16i8, |
| 15477 | DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), |
| 15478 | DAG.getUNDEF(MVT::v8i16), PreDupI16Shuffle)); |
| 15479 | |
| 15480 | |
| 15481 | bool EvenInUse = false, OddInUse = false; |
| 15482 | for (int i = 0; i < 16; i += 2) { |
| 15483 | EvenInUse |= (Mask[i + 0] >= 0); |
| 15484 | OddInUse |= (Mask[i + 1] >= 0); |
| 15485 | if (EvenInUse && OddInUse) |
| 15486 | break; |
| 15487 | } |
| 15488 | V1 = DAG.getNode(TargetLo ? X86ISD::UNPCKL : X86ISD::UNPCKH, DL, |
| 15489 | MVT::v16i8, EvenInUse ? V1 : DAG.getUNDEF(MVT::v16i8), |
| 15490 | OddInUse ? V1 : DAG.getUNDEF(MVT::v16i8)); |
| 15491 | |
| 15492 | int PostDupI16Shuffle[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; |
| 15493 | for (int i = 0; i < 16; ++i) |
| 15494 | if (Mask[i] >= 0) { |
| 15495 | int MappedMask = LaneMap[Mask[i]] - (TargetLo ? 0 : 8); |
| 15496 | assert(MappedMask < 8 && "Invalid v8 shuffle mask!"); |
| 15497 | if (PostDupI16Shuffle[i / 2] < 0) |
| 15498 | PostDupI16Shuffle[i / 2] = MappedMask; |
| 15499 | else |
| 15500 | assert(PostDupI16Shuffle[i / 2] == MappedMask && |
| 15501 | "Conflicting entries in the original shuffle!"); |
| 15502 | } |
| 15503 | return DAG.getBitcast( |
| 15504 | MVT::v16i8, |
| 15505 | DAG.getVectorShuffle(MVT::v8i16, DL, DAG.getBitcast(MVT::v8i16, V1), |
| 15506 | DAG.getUNDEF(MVT::v8i16), PostDupI16Shuffle)); |
| 15507 | }; |
| 15508 | if (SDValue V = tryToWidenViaDuplication()) |
| 15509 | return V; |
| 15510 | } |
| 15511 | |
| 15512 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v16i8, V1, V2, Mask, |
| 15513 | Zeroable, Subtarget, DAG)) |
| 15514 | return Masked; |
| 15515 | |
| 15516 | |
| 15517 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i8, Mask, V1, V2, DAG)) |
| 15518 | return V; |
| 15519 | |
| 15520 | |
| 15521 | if (SDValue V = lowerShuffleAsByteShiftMask(DL, MVT::v16i8, V1, V2, Mask, |
| 15522 | Zeroable, Subtarget, DAG)) |
| 15523 | return V; |
| 15524 | |
| 15525 | |
| 15526 | bool IsSingleInput = V2.isUndef(); |
| 15527 | int NumEvenDrops = canLowerByDroppingEvenElements(Mask, IsSingleInput); |
| 15528 | |
| 15529 | |
| 15530 | |
| 15531 | |
| 15532 | |
| 15533 | |
| 15534 | |
| 15535 | |
| 15536 | |
| 15537 | |
| 15538 | |
| 15539 | |
| 15540 | |
| 15541 | |
| 15542 | |
| 15543 | |
| 15544 | |
| 15545 | if (Subtarget.hasSSSE3() && (IsSingleInput || NumEvenDrops != 1)) { |
| 15546 | bool V1InUse = false; |
| 15547 | bool V2InUse = false; |
| 15548 | |
| 15549 | SDValue PSHUFB = lowerShuffleAsBlendOfPSHUFBs( |
| 15550 | DL, MVT::v16i8, V1, V2, Mask, Zeroable, DAG, V1InUse, V2InUse); |
| 15551 | |
| 15552 | |
| 15553 | |
| 15554 | |
| 15555 | if (V1InUse && V2InUse) { |
| 15556 | if (Subtarget.hasSSE41()) |
| 15557 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i8, V1, V2, Mask, |
| 15558 | Zeroable, Subtarget, DAG)) |
| 15559 | return Blend; |
| 15560 | |
| 15561 | |
| 15562 | |
| 15563 | |
| 15564 | |
| 15565 | |
| 15566 | |
| 15567 | |
| 15568 | |
| 15569 | if (SDValue Unpack = lowerShuffleAsPermuteAndUnpack( |
| 15570 | DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) |
| 15571 | return Unpack; |
| 15572 | |
| 15573 | |
| 15574 | if (Subtarget.hasVBMI()) |
| 15575 | return lowerShuffleWithPERMV(DL, MVT::v16i8, Mask, V1, V2, Subtarget, |
| 15576 | DAG); |
| 15577 | |
| 15578 | |
| 15579 | if (Subtarget.hasXOP()) { |
| 15580 | SDValue MaskNode = getConstVector(Mask, MVT::v16i8, DAG, DL, true); |
| 15581 | return DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, V1, V2, MaskNode); |
| 15582 | } |
| 15583 | |
| 15584 | |
| 15585 | |
| 15586 | if (SDValue V = lowerShuffleAsByteRotateAndPermute( |
| 15587 | DL, MVT::v16i8, V1, V2, Mask, Subtarget, DAG)) |
| 15588 | return V; |
| 15589 | } |
| 15590 | |
| 15591 | return PSHUFB; |
| 15592 | } |
| 15593 | |
| 15594 | |
| 15595 | if (NumV2Elements == 1) |
| 15596 | if (SDValue V = lowerShuffleAsElementInsertion( |
| 15597 | DL, MVT::v16i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 15598 | return V; |
| 15599 | |
| 15600 | if (SDValue Blend = lowerShuffleAsBitBlend(DL, MVT::v16i8, V1, V2, Mask, DAG)) |
| 15601 | return Blend; |
| 15602 | |
| 15603 | |
| 15604 | |
| 15605 | |
| 15606 | |
| 15607 | |
| 15608 | |
| 15609 | |
| 15610 | if (NumEvenDrops) { |
| 15611 | |
| 15612 | |
| 15613 | |
| 15614 | |
| 15615 | |
| 15616 | assert(NumEvenDrops <= 3 && |
| 15617 | "No support for dropping even elements more than 3 times."); |
| 15618 | SmallVector<SDValue, 8> WordClearOps(8, DAG.getConstant(0, DL, MVT::i16)); |
| 15619 | for (unsigned i = 0; i != 8; i += 1 << (NumEvenDrops - 1)) |
| 15620 | WordClearOps[i] = DAG.getConstant(0xFF, DL, MVT::i16); |
| 15621 | SDValue WordClearMask = DAG.getBuildVector(MVT::v8i16, DL, WordClearOps); |
| 15622 | V1 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V1), |
| 15623 | WordClearMask); |
| 15624 | if (!IsSingleInput) |
| 15625 | V2 = DAG.getNode(ISD::AND, DL, MVT::v8i16, DAG.getBitcast(MVT::v8i16, V2), |
| 15626 | WordClearMask); |
| 15627 | |
| 15628 | |
| 15629 | SDValue Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, V1, |
| 15630 | IsSingleInput ? V1 : V2); |
| 15631 | for (int i = 1; i < NumEvenDrops; ++i) { |
| 15632 | Result = DAG.getBitcast(MVT::v8i16, Result); |
| 15633 | Result = DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, Result, Result); |
| 15634 | } |
| 15635 | return Result; |
| 15636 | } |
| 15637 | |
| 15638 | |
| 15639 | if (NumV2Elements > 0) |
| 15640 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v16i8, V1, V2, Mask, |
| 15641 | Subtarget, DAG); |
| 15642 | |
| 15643 | |
| 15644 | |
| 15645 | |
| 15646 | SDValue V = V1; |
| 15647 | |
| 15648 | std::array<int, 8> LoBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; |
| 15649 | std::array<int, 8> HiBlendMask = {{-1, -1, -1, -1, -1, -1, -1, -1}}; |
| 15650 | for (int i = 0; i < 16; ++i) |
| 15651 | if (Mask[i] >= 0) |
| 15652 | (i < 8 ? LoBlendMask[i] : HiBlendMask[i % 8]) = Mask[i]; |
| 15653 | |
| 15654 | SDValue VLoHalf, VHiHalf; |
| 15655 | |
| 15656 | |
| 15657 | |
| 15658 | if (none_of(LoBlendMask, [](int M) { return M >= 0 && M % 2 == 1; }) && |
| 15659 | none_of(HiBlendMask, [](int M) { return M >= 0 && M % 2 == 1; })) { |
| 15660 | |
| 15661 | VLoHalf = DAG.getBitcast(MVT::v8i16, V); |
| 15662 | VLoHalf = DAG.getNode(ISD::AND, DL, MVT::v8i16, VLoHalf, |
| 15663 | DAG.getConstant(0x00FF, DL, MVT::v8i16)); |
| 15664 | |
| 15665 | |
| 15666 | VHiHalf = DAG.getUNDEF(MVT::v8i16); |
| 15667 | |
| 15668 | |
| 15669 | for (int &M : LoBlendMask) |
| 15670 | if (M >= 0) |
| 15671 | M /= 2; |
| 15672 | for (int &M : HiBlendMask) |
| 15673 | if (M >= 0) |
| 15674 | M /= 2; |
| 15675 | } else { |
| 15676 | |
| 15677 | |
| 15678 | SDValue Zero = getZeroVector(MVT::v16i8, Subtarget, DAG, DL); |
| 15679 | |
| 15680 | VLoHalf = DAG.getBitcast( |
| 15681 | MVT::v8i16, DAG.getNode(X86ISD::UNPCKL, DL, MVT::v16i8, V, Zero)); |
| 15682 | VHiHalf = DAG.getBitcast( |
| 15683 | MVT::v8i16, DAG.getNode(X86ISD::UNPCKH, DL, MVT::v16i8, V, Zero)); |
| 15684 | } |
| 15685 | |
| 15686 | SDValue LoV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, LoBlendMask); |
| 15687 | SDValue HiV = DAG.getVectorShuffle(MVT::v8i16, DL, VLoHalf, VHiHalf, HiBlendMask); |
| 15688 | |
| 15689 | return DAG.getNode(X86ISD::PACKUS, DL, MVT::v16i8, LoV, HiV); |
| 15690 | } |
| 15691 | |
| 15692 | |
| 15693 | |
| 15694 | |
| 15695 | |
| 15696 | static SDValue lower128BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 15697 | MVT VT, SDValue V1, SDValue V2, |
| 15698 | const APInt &Zeroable, |
| 15699 | const X86Subtarget &Subtarget, |
| 15700 | SelectionDAG &DAG) { |
| 15701 | switch (VT.SimpleTy) { |
| 15702 | case MVT::v2i64: |
| 15703 | return lowerV2I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 15704 | case MVT::v2f64: |
| 15705 | return lowerV2F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 15706 | case MVT::v4i32: |
| 15707 | return lowerV4I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 15708 | case MVT::v4f32: |
| 15709 | return lowerV4F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 15710 | case MVT::v8i16: |
| 15711 | return lowerV8I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 15712 | case MVT::v16i8: |
| 15713 | return lowerV16I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 15714 | |
| 15715 | default: |
| 15716 | llvm_unreachable("Unimplemented!"); |
| 15717 | } |
| 15718 | } |
| 15719 | |
| 15720 | |
| 15721 | |
| 15722 | |
| 15723 | |
| 15724 | |
| 15725 | static SDValue splitAndLowerShuffle(const SDLoc &DL, MVT VT, SDValue V1, |
| 15726 | SDValue V2, ArrayRef<int> Mask, |
| 15727 | SelectionDAG &DAG) { |
| 15728 | assert(VT.getSizeInBits() >= 256 && |
| 15729 | "Only for 256-bit or wider vector shuffles!"); |
| 15730 | assert(V1.getSimpleValueType() == VT && "Bad operand type!"); |
| 15731 | assert(V2.getSimpleValueType() == VT && "Bad operand type!"); |
| 15732 | |
| 15733 | ArrayRef<int> LoMask = Mask.slice(0, Mask.size() / 2); |
| 15734 | ArrayRef<int> HiMask = Mask.slice(Mask.size() / 2); |
| 15735 | |
| 15736 | int NumElements = VT.getVectorNumElements(); |
| 15737 | int SplitNumElements = NumElements / 2; |
| 15738 | MVT ScalarVT = VT.getVectorElementType(); |
| 15739 | MVT SplitVT = MVT::getVectorVT(ScalarVT, SplitNumElements); |
| 15740 | |
| 15741 | |
| 15742 | |
| 15743 | auto SplitVector = [&](SDValue V) { |
| 15744 | SDValue LoV, HiV; |
| 15745 | std::tie(LoV, HiV) = splitVector(peekThroughBitcasts(V), DAG, DL); |
| 15746 | return std::make_pair(DAG.getBitcast(SplitVT, LoV), |
| 15747 | DAG.getBitcast(SplitVT, HiV)); |
| 15748 | }; |
| 15749 | |
| 15750 | SDValue LoV1, HiV1, LoV2, HiV2; |
| 15751 | std::tie(LoV1, HiV1) = SplitVector(V1); |
| 15752 | std::tie(LoV2, HiV2) = SplitVector(V2); |
| 15753 | |
| 15754 | |
| 15755 | auto HalfBlend = [&](ArrayRef<int> HalfMask) { |
| 15756 | bool UseLoV1 = false, UseHiV1 = false, UseLoV2 = false, UseHiV2 = false; |
| 15757 | SmallVector<int, 32> V1BlendMask((unsigned)SplitNumElements, -1); |
| 15758 | SmallVector<int, 32> V2BlendMask((unsigned)SplitNumElements, -1); |
| 15759 | SmallVector<int, 32> BlendMask((unsigned)SplitNumElements, -1); |
| 15760 | for (int i = 0; i < SplitNumElements; ++i) { |
| 15761 | int M = HalfMask[i]; |
| 15762 | if (M >= NumElements) { |
| 15763 | if (M >= NumElements + SplitNumElements) |
| 15764 | UseHiV2 = true; |
| 15765 | else |
| 15766 | UseLoV2 = true; |
| 15767 | V2BlendMask[i] = M - NumElements; |
| 15768 | BlendMask[i] = SplitNumElements + i; |
| 15769 | } else if (M >= 0) { |
| 15770 | if (M >= SplitNumElements) |
| 15771 | UseHiV1 = true; |
| 15772 | else |
| 15773 | UseLoV1 = true; |
| 15774 | V1BlendMask[i] = M; |
| 15775 | BlendMask[i] = i; |
| 15776 | } |
| 15777 | } |
| 15778 | |
| 15779 | |
| 15780 | |
| 15781 | |
| 15782 | |
| 15783 | |
| 15784 | if (!UseLoV1 && !UseHiV1 && !UseLoV2 && !UseHiV2) |
| 15785 | return DAG.getUNDEF(SplitVT); |
| 15786 | if (!UseLoV2 && !UseHiV2) |
| 15787 | return DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); |
| 15788 | if (!UseLoV1 && !UseHiV1) |
| 15789 | return DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); |
| 15790 | |
| 15791 | SDValue V1Blend, V2Blend; |
| 15792 | if (UseLoV1 && UseHiV1) { |
| 15793 | V1Blend = |
| 15794 | DAG.getVectorShuffle(SplitVT, DL, LoV1, HiV1, V1BlendMask); |
| 15795 | } else { |
| 15796 | |
| 15797 | V1Blend = UseLoV1 ? LoV1 : HiV1; |
| 15798 | for (int i = 0; i < SplitNumElements; ++i) |
| 15799 | if (BlendMask[i] >= 0 && BlendMask[i] < SplitNumElements) |
| 15800 | BlendMask[i] = V1BlendMask[i] - (UseLoV1 ? 0 : SplitNumElements); |
| 15801 | } |
| 15802 | if (UseLoV2 && UseHiV2) { |
| 15803 | V2Blend = |
| 15804 | DAG.getVectorShuffle(SplitVT, DL, LoV2, HiV2, V2BlendMask); |
| 15805 | } else { |
| 15806 | |
| 15807 | V2Blend = UseLoV2 ? LoV2 : HiV2; |
| 15808 | for (int i = 0; i < SplitNumElements; ++i) |
| 15809 | if (BlendMask[i] >= SplitNumElements) |
| 15810 | BlendMask[i] = V2BlendMask[i] + (UseLoV2 ? SplitNumElements : 0); |
| 15811 | } |
| 15812 | return DAG.getVectorShuffle(SplitVT, DL, V1Blend, V2Blend, BlendMask); |
| 15813 | }; |
| 15814 | SDValue Lo = HalfBlend(LoMask); |
| 15815 | SDValue Hi = HalfBlend(HiMask); |
| 15816 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
| 15817 | } |
| 15818 | |
| 15819 | |
| 15820 | |
| 15821 | |
| 15822 | |
| 15823 | |
| 15824 | |
| 15825 | |
| 15826 | |
| 15827 | static SDValue lowerShuffleAsSplitOrBlend(const SDLoc &DL, MVT VT, SDValue V1, |
| 15828 | SDValue V2, ArrayRef<int> Mask, |
| 15829 | const X86Subtarget &Subtarget, |
| 15830 | SelectionDAG &DAG) { |
| 15831 | assert(!V2.isUndef() && "This routine must not be used to lower single-input " |
| 15832 | "shuffles as it could then recurse on itself."); |
| 15833 | int Size = Mask.size(); |
| 15834 | |
| 15835 | |
| 15836 | |
| 15837 | |
| 15838 | auto DoBothBroadcast = [&] { |
| 15839 | int V1BroadcastIdx = -1, V2BroadcastIdx = -1; |
| 15840 | for (int M : Mask) |
| 15841 | if (M >= Size) { |
| 15842 | if (V2BroadcastIdx < 0) |
| 15843 | V2BroadcastIdx = M - Size; |
| 15844 | else if (M - Size != V2BroadcastIdx) |
| 15845 | return false; |
| 15846 | } else if (M >= 0) { |
| 15847 | if (V1BroadcastIdx < 0) |
| 15848 | V1BroadcastIdx = M; |
| 15849 | else if (M != V1BroadcastIdx) |
| 15850 | return false; |
| 15851 | } |
| 15852 | return true; |
| 15853 | }; |
| 15854 | if (DoBothBroadcast()) |
| 15855 | return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, |
| 15856 | DAG); |
| 15857 | |
| 15858 | |
| 15859 | |
| 15860 | |
| 15861 | int LaneCount = VT.getSizeInBits() / 128; |
| 15862 | int LaneSize = Size / LaneCount; |
| 15863 | SmallBitVector LaneInputs[2]; |
| 15864 | LaneInputs[0].resize(LaneCount, false); |
| 15865 | LaneInputs[1].resize(LaneCount, false); |
| 15866 | for (int i = 0; i < Size; ++i) |
| 15867 | if (Mask[i] >= 0) |
| 15868 | LaneInputs[Mask[i] / Size][(Mask[i] % Size) / LaneSize] = true; |
| 15869 | if (LaneInputs[0].count() <= 1 && LaneInputs[1].count() <= 1) |
| 15870 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
| 15871 | |
| 15872 | |
| 15873 | |
| 15874 | return lowerShuffleAsDecomposedShuffleMerge(DL, VT, V1, V2, Mask, Subtarget, |
| 15875 | DAG); |
| 15876 | } |
| 15877 | |
| 15878 | |
| 15879 | |
| 15880 | static SDValue lowerShuffleAsLanePermuteAndSHUFP(const SDLoc &DL, MVT VT, |
| 15881 | SDValue V1, SDValue V2, |
| 15882 | ArrayRef<int> Mask, |
| 15883 | SelectionDAG &DAG) { |
| 15884 | assert(VT == MVT::v4f64 && "Only for v4f64 shuffles"); |
| 15885 | |
| 15886 | int LHSMask[4] = {-1, -1, -1, -1}; |
| 15887 | int RHSMask[4] = {-1, -1, -1, -1}; |
| 15888 | unsigned SHUFPMask = 0; |
| 15889 | |
| 15890 | |
| 15891 | |
| 15892 | for (int i = 0; i != 4; ++i) { |
| 15893 | int M = Mask[i]; |
| 15894 | if (M < 0) |
| 15895 | continue; |
| 15896 | int LaneBase = i & ~1; |
| 15897 | auto &LaneMask = (i & 1) ? RHSMask : LHSMask; |
| 15898 | LaneMask[LaneBase + (M & 1)] = M; |
| 15899 | SHUFPMask |= (M & 1) << i; |
| 15900 | } |
| 15901 | |
| 15902 | SDValue LHS = DAG.getVectorShuffle(VT, DL, V1, V2, LHSMask); |
| 15903 | SDValue RHS = DAG.getVectorShuffle(VT, DL, V1, V2, RHSMask); |
| 15904 | return DAG.getNode(X86ISD::SHUFP, DL, VT, LHS, RHS, |
| 15905 | DAG.getTargetConstant(SHUFPMask, DL, MVT::i8)); |
| 15906 | } |
| 15907 | |
| 15908 | |
| 15909 | |
| 15910 | |
| 15911 | |
| 15912 | |
| 15913 | |
| 15914 | |
| 15915 | |
| 15916 | static SDValue lowerShuffleAsLanePermuteAndPermute( |
| 15917 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 15918 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
| 15919 | int NumElts = VT.getVectorNumElements(); |
| 15920 | int NumLanes = VT.getSizeInBits() / 128; |
| 15921 | int NumEltsPerLane = NumElts / NumLanes; |
| 15922 | bool CanUseSublanes = Subtarget.hasAVX2() && V2.isUndef(); |
| 15923 | |
| 15924 | |
| 15925 | |
| 15926 | |
| 15927 | |
| 15928 | |
| 15929 | auto getSublanePermute = [&](int NumSublanes) -> SDValue { |
| 15930 | int NumSublanesPerLane = NumSublanes / NumLanes; |
| 15931 | int NumEltsPerSublane = NumElts / NumSublanes; |
| 15932 | |
| 15933 | SmallVector<int, 16> CrossLaneMask; |
| 15934 | SmallVector<int, 16> InLaneMask(NumElts, SM_SentinelUndef); |
| 15935 | |
| 15936 | SmallVector<int, 16> CrossLaneMaskLarge(NumSublanes, SM_SentinelUndef); |
| 15937 | |
| 15938 | for (int i = 0; i != NumElts; ++i) { |
| 15939 | int M = Mask[i]; |
| 15940 | if (M < 0) |
| 15941 | continue; |
| 15942 | |
| 15943 | int SrcSublane = M / NumEltsPerSublane; |
| 15944 | int DstLane = i / NumEltsPerLane; |
| 15945 | |
| 15946 | |
| 15947 | |
| 15948 | bool Found = false; |
| 15949 | int DstSubStart = DstLane * NumSublanesPerLane; |
| 15950 | int DstSubEnd = DstSubStart + NumSublanesPerLane; |
| 15951 | for (int DstSublane = DstSubStart; DstSublane < DstSubEnd; ++DstSublane) { |
| 15952 | if (!isUndefOrEqual(CrossLaneMaskLarge[DstSublane], SrcSublane)) |
| 15953 | continue; |
| 15954 | |
| 15955 | Found = true; |
| 15956 | CrossLaneMaskLarge[DstSublane] = SrcSublane; |
| 15957 | int DstSublaneOffset = DstSublane * NumEltsPerSublane; |
| 15958 | InLaneMask[i] = DstSublaneOffset + M % NumEltsPerSublane; |
| 15959 | break; |
| 15960 | } |
| 15961 | if (!Found) |
| 15962 | return SDValue(); |
| 15963 | } |
| 15964 | |
| 15965 | |
| 15966 | narrowShuffleMaskElts(NumEltsPerSublane, CrossLaneMaskLarge, CrossLaneMask); |
| 15967 | |
| 15968 | if (!CanUseSublanes) { |
| 15969 | |
| 15970 | |
| 15971 | |
| 15972 | |
| 15973 | int NumIdentityLanes = 0; |
| 15974 | bool OnlyShuffleLowestLane = true; |
| 15975 | for (int i = 0; i != NumLanes; ++i) { |
| 15976 | int LaneOffset = i * NumEltsPerLane; |
| 15977 | if (isSequentialOrUndefInRange(InLaneMask, LaneOffset, NumEltsPerLane, |
| 15978 | i * NumEltsPerLane)) |
| 15979 | NumIdentityLanes++; |
| 15980 | else if (CrossLaneMask[LaneOffset] != 0) |
| 15981 | OnlyShuffleLowestLane = false; |
| 15982 | } |
| 15983 | if (OnlyShuffleLowestLane && NumIdentityLanes == (NumLanes - 1)) |
| 15984 | return SDValue(); |
| 15985 | } |
| 15986 | |
| 15987 | SDValue CrossLane = DAG.getVectorShuffle(VT, DL, V1, V2, CrossLaneMask); |
| 15988 | return DAG.getVectorShuffle(VT, DL, CrossLane, DAG.getUNDEF(VT), |
| 15989 | InLaneMask); |
| 15990 | }; |
| 15991 | |
| 15992 | |
| 15993 | if (SDValue V = getSublanePermute(NumLanes)) |
| 15994 | return V; |
| 15995 | |
| 15996 | |
| 15997 | if (!CanUseSublanes) |
| 15998 | return SDValue(); |
| 15999 | |
| 16000 | |
| 16001 | if (SDValue V = getSublanePermute(NumLanes * 2)) |
| 16002 | return V; |
| 16003 | |
| 16004 | |
| 16005 | |
| 16006 | if (!Subtarget.hasFastVariableCrossLaneShuffle()) |
| 16007 | return SDValue(); |
| 16008 | |
| 16009 | return getSublanePermute(NumLanes * 4); |
| 16010 | } |
| 16011 | |
| 16012 | |
| 16013 | |
| 16014 | |
| 16015 | |
| 16016 | |
| 16017 | |
| 16018 | |
| 16019 | static SDValue lowerShuffleAsLanePermuteAndShuffle( |
| 16020 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 16021 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
| 16022 | |
| 16023 | assert(VT.is256BitVector() && "Only for 256-bit vector shuffles!"); |
| 16024 | int Size = Mask.size(); |
| 16025 | int LaneSize = Size / 2; |
| 16026 | |
| 16027 | |
| 16028 | |
| 16029 | |
| 16030 | if (VT == MVT::v4f64 && |
| 16031 | !all_of(Mask, [LaneSize](int M) { return M < LaneSize; })) |
| 16032 | if (SDValue V = |
| 16033 | lowerShuffleAsLanePermuteAndSHUFP(DL, VT, V1, V2, Mask, DAG)) |
| 16034 | return V; |
| 16035 | |
| 16036 | |
| 16037 | |
| 16038 | |
| 16039 | if (!Subtarget.hasAVX2()) { |
| 16040 | bool LaneCrossing[2] = {false, false}; |
| 16041 | for (int i = 0; i < Size; ++i) |
| 16042 | if (Mask[i] >= 0 && ((Mask[i] % Size) / LaneSize) != (i / LaneSize)) |
| 16043 | LaneCrossing[(Mask[i] % Size) / LaneSize] = true; |
| 16044 | if (!LaneCrossing[0] || !LaneCrossing[1]) |
| 16045 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
| 16046 | } else { |
| 16047 | bool LaneUsed[2] = {false, false}; |
| 16048 | for (int i = 0; i < Size; ++i) |
| 16049 | if (Mask[i] >= 0) |
| 16050 | LaneUsed[(Mask[i] % Size) / LaneSize] = true; |
| 16051 | if (!LaneUsed[0] || !LaneUsed[1]) |
| 16052 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
| 16053 | } |
| 16054 | |
| 16055 | |
| 16056 | assert(V2.isUndef() && |
| 16057 | "This last part of this routine only works on single input shuffles"); |
| 16058 | |
| 16059 | SmallVector<int, 32> InLaneMask(Mask.begin(), Mask.end()); |
| 16060 | for (int i = 0; i < Size; ++i) { |
| 16061 | int &M = InLaneMask[i]; |
| 16062 | if (M < 0) |
| 16063 | continue; |
| 16064 | if (((M % Size) / LaneSize) != (i / LaneSize)) |
| 16065 | M = (M % LaneSize) + ((i / LaneSize) * LaneSize) + Size; |
| 16066 | } |
| 16067 | assert(!is128BitLaneCrossingShuffleMask(VT, InLaneMask) && |
| 16068 | "In-lane shuffle mask expected"); |
| 16069 | |
| 16070 | |
| 16071 | MVT PVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; |
| 16072 | SDValue Flipped = DAG.getBitcast(PVT, V1); |
| 16073 | Flipped = |
| 16074 | DAG.getVectorShuffle(PVT, DL, Flipped, DAG.getUNDEF(PVT), {2, 3, 0, 1}); |
| 16075 | Flipped = DAG.getBitcast(VT, Flipped); |
| 16076 | return DAG.getVectorShuffle(VT, DL, V1, Flipped, InLaneMask); |
| 16077 | } |
| 16078 | |
| 16079 | |
| 16080 | static SDValue lowerV2X128Shuffle(const SDLoc &DL, MVT VT, SDValue V1, |
| 16081 | SDValue V2, ArrayRef<int> Mask, |
| 16082 | const APInt &Zeroable, |
| 16083 | const X86Subtarget &Subtarget, |
| 16084 | SelectionDAG &DAG) { |
| 16085 | if (V2.isUndef()) { |
| 16086 | |
| 16087 | bool SplatLo = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1); |
| 16088 | bool SplatHi = isShuffleEquivalent(Mask, {2, 3, 2, 3}, V1); |
| 16089 | if ((SplatLo || SplatHi) && !Subtarget.hasAVX512() && V1.hasOneUse() && |
| 16090 | MayFoldLoad(peekThroughOneUseBitcasts(V1))) { |
| 16091 | auto *Ld = cast<LoadSDNode>(peekThroughOneUseBitcasts(V1)); |
| 16092 | if (!Ld->isNonTemporal()) { |
| 16093 | MVT MemVT = VT.getHalfNumVectorElementsVT(); |
| 16094 | unsigned Ofs = SplatLo ? 0 : MemVT.getStoreSize(); |
| 16095 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 16096 | SDValue Ptr = DAG.getMemBasePlusOffset(Ld->getBasePtr(), |
| 16097 | TypeSize::Fixed(Ofs), DL); |
| 16098 | SDValue Ops[] = {Ld->getChain(), Ptr}; |
| 16099 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
| 16100 | X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, MemVT, |
| 16101 | DAG.getMachineFunction().getMachineMemOperand( |
| 16102 | Ld->getMemOperand(), Ofs, MemVT.getStoreSize())); |
| 16103 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); |
| 16104 | return BcastLd; |
| 16105 | } |
| 16106 | } |
| 16107 | |
| 16108 | |
| 16109 | if (Subtarget.hasAVX2()) |
| 16110 | return SDValue(); |
| 16111 | } |
| 16112 | |
| 16113 | bool V2IsZero = !V2.isUndef() && ISD::isBuildVectorAllZeros(V2.getNode()); |
| 16114 | |
| 16115 | SmallVector<int, 4> WidenedMask; |
| 16116 | if (!canWidenShuffleElements(Mask, Zeroable, V2IsZero, WidenedMask)) |
| 16117 | return SDValue(); |
| 16118 | |
| 16119 | bool IsLowZero = (Zeroable & 0x3) == 0x3; |
| 16120 | bool IsHighZero = (Zeroable & 0xc) == 0xc; |
| 16121 | |
| 16122 | |
| 16123 | if (WidenedMask[0] == 0 && IsHighZero) { |
| 16124 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
| 16125 | SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, |
| 16126 | DAG.getIntPtrConstant(0, DL)); |
| 16127 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
| 16128 | getZeroVector(VT, Subtarget, DAG, DL), LoV, |
| 16129 | DAG.getIntPtrConstant(0, DL)); |
| 16130 | } |
| 16131 | |
| 16132 | |
| 16133 | |
| 16134 | |
| 16135 | |
| 16136 | |
| 16137 | if (SDValue Blend = lowerShuffleAsBlend(DL, VT, V1, V2, Mask, Zeroable, |
| 16138 | Subtarget, DAG)) |
| 16139 | return Blend; |
| 16140 | |
| 16141 | |
| 16142 | |
| 16143 | if (!IsLowZero && !IsHighZero) { |
| 16144 | |
| 16145 | |
| 16146 | bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 0, 1}, V1, V2); |
| 16147 | if (OnlyUsesV1 || isShuffleEquivalent(Mask, {0, 1, 4, 5}, V1, V2)) { |
| 16148 | |
| 16149 | |
| 16150 | |
| 16151 | if (!isa<LoadSDNode>(peekThroughBitcasts(V1))) { |
| 16152 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
| 16153 | SDValue SubVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, |
| 16154 | OnlyUsesV1 ? V1 : V2, |
| 16155 | DAG.getIntPtrConstant(0, DL)); |
| 16156 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, |
| 16157 | DAG.getIntPtrConstant(2, DL)); |
| 16158 | } |
| 16159 | } |
| 16160 | |
| 16161 | |
| 16162 | if (Subtarget.hasVLX()) { |
| 16163 | if (WidenedMask[0] < 2 && WidenedMask[1] >= 2) { |
| 16164 | unsigned PermMask = ((WidenedMask[0] % 2) << 0) | |
| 16165 | ((WidenedMask[1] % 2) << 1); |
| 16166 | return DAG.getNode(X86ISD::SHUF128, DL, VT, V1, V2, |
| 16167 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
| 16168 | } |
| 16169 | } |
| 16170 | } |
| 16171 | |
| 16172 | |
| 16173 | |
| 16174 | |
| 16175 | |
| 16176 | |
| 16177 | |
| 16178 | |
| 16179 | |
| 16180 | |
| 16181 | |
| 16182 | |
| 16183 | |
| 16184 | |
| 16185 | assert((WidenedMask[0] >= 0 || IsLowZero) && |
| 16186 | (WidenedMask[1] >= 0 || IsHighZero) && "Undef half?"); |
| 16187 | |
| 16188 | unsigned PermMask = 0; |
| 16189 | PermMask |= IsLowZero ? 0x08 : (WidenedMask[0] << 0); |
| 16190 | PermMask |= IsHighZero ? 0x80 : (WidenedMask[1] << 4); |
| 16191 | |
| 16192 | |
| 16193 | if ((PermMask & 0x0a) != 0x00 && (PermMask & 0xa0) != 0x00) |
| 16194 | V1 = DAG.getUNDEF(VT); |
| 16195 | if ((PermMask & 0x0a) != 0x02 && (PermMask & 0xa0) != 0x20) |
| 16196 | V2 = DAG.getUNDEF(VT); |
| 16197 | |
| 16198 | return DAG.getNode(X86ISD::VPERM2X128, DL, VT, V1, V2, |
| 16199 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
| 16200 | } |
| 16201 | |
| 16202 | |
| 16203 | |
| 16204 | |
| 16205 | |
| 16206 | |
| 16207 | |
| 16208 | |
| 16209 | static SDValue lowerShuffleAsLanePermuteAndRepeatedMask( |
| 16210 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 16211 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 16212 | assert(!V2.isUndef() && "This is only useful with multiple inputs."); |
| 16213 | |
| 16214 | if (is128BitLaneRepeatedShuffleMask(VT, Mask)) |
| 16215 | return SDValue(); |
| 16216 | |
| 16217 | int NumElts = Mask.size(); |
| 16218 | int NumLanes = VT.getSizeInBits() / 128; |
| 16219 | int NumLaneElts = 128 / VT.getScalarSizeInBits(); |
| 16220 | SmallVector<int, 16> RepeatMask(NumLaneElts, -1); |
| 16221 | SmallVector<std::array<int, 2>, 2> LaneSrcs(NumLanes, {{-1, -1}}); |
| 16222 | |
| 16223 | |
| 16224 | |
| 16225 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
| 16226 | int Srcs[2] = {-1, -1}; |
| 16227 | SmallVector<int, 16> InLaneMask(NumLaneElts, -1); |
| 16228 | for (int i = 0; i != NumLaneElts; ++i) { |
| 16229 | int M = Mask[(Lane * NumLaneElts) + i]; |
| 16230 | if (M < 0) |
| 16231 | continue; |
| 16232 | |
| 16233 | |
| 16234 | |
| 16235 | |
| 16236 | int LaneSrc = M / NumLaneElts; |
| 16237 | int Src; |
| 16238 | if (Srcs[0] < 0 || Srcs[0] == LaneSrc) |
| 16239 | Src = 0; |
| 16240 | else if (Srcs[1] < 0 || Srcs[1] == LaneSrc) |
| 16241 | Src = 1; |
| 16242 | else |
| 16243 | return SDValue(); |
| 16244 | |
| 16245 | Srcs[Src] = LaneSrc; |
| 16246 | InLaneMask[i] = (M % NumLaneElts) + Src * NumElts; |
| 16247 | } |
| 16248 | |
| 16249 | |
| 16250 | if (Srcs[1] < 0) |
| 16251 | continue; |
| 16252 | |
| 16253 | LaneSrcs[Lane][0] = Srcs[0]; |
| 16254 | LaneSrcs[Lane][1] = Srcs[1]; |
| 16255 | |
| 16256 | auto MatchMasks = [](ArrayRef<int> M1, ArrayRef<int> M2) { |
| 16257 | assert(M1.size() == M2.size() && "Unexpected mask size"); |
| 16258 | for (int i = 0, e = M1.size(); i != e; ++i) |
| 16259 | if (M1[i] >= 0 && M2[i] >= 0 && M1[i] != M2[i]) |
| 16260 | return false; |
| 16261 | return true; |
| 16262 | }; |
| 16263 | |
| 16264 | auto MergeMasks = [](ArrayRef<int> Mask, MutableArrayRef<int> MergedMask) { |
| 16265 | assert(Mask.size() == MergedMask.size() && "Unexpected mask size"); |
| 16266 | for (int i = 0, e = MergedMask.size(); i != e; ++i) { |
| 16267 | int M = Mask[i]; |
| 16268 | if (M < 0) |
| 16269 | continue; |
| 16270 | assert((MergedMask[i] < 0 || MergedMask[i] == M) && |
| 16271 | "Unexpected mask element"); |
| 16272 | MergedMask[i] = M; |
| 16273 | } |
| 16274 | }; |
| 16275 | |
| 16276 | if (MatchMasks(InLaneMask, RepeatMask)) { |
| 16277 | |
| 16278 | MergeMasks(InLaneMask, RepeatMask); |
| 16279 | continue; |
| 16280 | } |
| 16281 | |
| 16282 | |
| 16283 | std::swap(LaneSrcs[Lane][0], LaneSrcs[Lane][1]); |
| 16284 | ShuffleVectorSDNode::commuteMask(InLaneMask); |
| 16285 | |
| 16286 | if (MatchMasks(InLaneMask, RepeatMask)) { |
| 16287 | |
| 16288 | MergeMasks(InLaneMask, RepeatMask); |
| 16289 | continue; |
| 16290 | } |
| 16291 | |
| 16292 | |
| 16293 | return SDValue(); |
| 16294 | } |
| 16295 | |
| 16296 | |
| 16297 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
| 16298 | |
| 16299 | if (LaneSrcs[Lane][0] >= 0) |
| 16300 | continue; |
| 16301 | |
| 16302 | for (int i = 0; i != NumLaneElts; ++i) { |
| 16303 | int M = Mask[(Lane * NumLaneElts) + i]; |
| 16304 | if (M < 0) |
| 16305 | continue; |
| 16306 | |
| 16307 | |
| 16308 | if (RepeatMask[i] < 0) |
| 16309 | RepeatMask[i] = M % NumLaneElts; |
| 16310 | |
| 16311 | if (RepeatMask[i] < NumElts) { |
| 16312 | if (RepeatMask[i] != M % NumLaneElts) |
| 16313 | return SDValue(); |
| 16314 | LaneSrcs[Lane][0] = M / NumLaneElts; |
| 16315 | } else { |
| 16316 | if (RepeatMask[i] != ((M % NumLaneElts) + NumElts)) |
| 16317 | return SDValue(); |
| 16318 | LaneSrcs[Lane][1] = M / NumLaneElts; |
| 16319 | } |
| 16320 | } |
| 16321 | |
| 16322 | if (LaneSrcs[Lane][0] < 0 && LaneSrcs[Lane][1] < 0) |
| 16323 | return SDValue(); |
| 16324 | } |
| 16325 | |
| 16326 | SmallVector<int, 16> NewMask(NumElts, -1); |
| 16327 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
| 16328 | int Src = LaneSrcs[Lane][0]; |
| 16329 | for (int i = 0; i != NumLaneElts; ++i) { |
| 16330 | int M = -1; |
| 16331 | if (Src >= 0) |
| 16332 | M = Src * NumLaneElts + i; |
| 16333 | NewMask[Lane * NumLaneElts + i] = M; |
| 16334 | } |
| 16335 | } |
| 16336 | SDValue NewV1 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
| 16337 | |
| 16338 | |
| 16339 | |
| 16340 | if (isa<ShuffleVectorSDNode>(NewV1) && |
| 16341 | cast<ShuffleVectorSDNode>(NewV1)->getMask() == Mask) |
| 16342 | return SDValue(); |
| 16343 | |
| 16344 | for (int Lane = 0; Lane != NumLanes; ++Lane) { |
| 16345 | int Src = LaneSrcs[Lane][1]; |
| 16346 | for (int i = 0; i != NumLaneElts; ++i) { |
| 16347 | int M = -1; |
| 16348 | if (Src >= 0) |
| 16349 | M = Src * NumLaneElts + i; |
| 16350 | NewMask[Lane * NumLaneElts + i] = M; |
| 16351 | } |
| 16352 | } |
| 16353 | SDValue NewV2 = DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
| 16354 | |
| 16355 | |
| 16356 | |
| 16357 | if (isa<ShuffleVectorSDNode>(NewV2) && |
| 16358 | cast<ShuffleVectorSDNode>(NewV2)->getMask() == Mask) |
| 16359 | return SDValue(); |
| 16360 | |
| 16361 | for (int i = 0; i != NumElts; ++i) { |
| 16362 | NewMask[i] = RepeatMask[i % NumLaneElts]; |
| 16363 | if (NewMask[i] < 0) |
| 16364 | continue; |
| 16365 | |
| 16366 | NewMask[i] += (i / NumLaneElts) * NumLaneElts; |
| 16367 | } |
| 16368 | return DAG.getVectorShuffle(VT, DL, NewV1, NewV2, NewMask); |
| 16369 | } |
| 16370 | |
| 16371 | |
| 16372 | |
| 16373 | |
| 16374 | |
| 16375 | |
| 16376 | |
| 16377 | static bool |
| 16378 | getHalfShuffleMask(ArrayRef<int> Mask, MutableArrayRef<int> HalfMask, |
| 16379 | int &HalfIdx1, int &HalfIdx2) { |
| 16380 | assert((Mask.size() == HalfMask.size() * 2) && |
| 16381 | "Expected input mask to be twice as long as output"); |
| 16382 | |
| 16383 | |
| 16384 | bool UndefLower = isUndefLowerHalf(Mask); |
| 16385 | bool UndefUpper = isUndefUpperHalf(Mask); |
| 16386 | if (UndefLower == UndefUpper) |
| 16387 | return false; |
| 16388 | |
| 16389 | unsigned HalfNumElts = HalfMask.size(); |
| 16390 | unsigned MaskIndexOffset = UndefLower ? HalfNumElts : 0; |
| 16391 | HalfIdx1 = -1; |
| 16392 | HalfIdx2 = -1; |
| 16393 | for (unsigned i = 0; i != HalfNumElts; ++i) { |
| 16394 | int M = Mask[i + MaskIndexOffset]; |
| 16395 | if (M < 0) { |
| 16396 | HalfMask[i] = M; |
| 16397 | continue; |
| 16398 | } |
| 16399 | |
| 16400 | |
| 16401 | |
| 16402 | int HalfIdx = M / HalfNumElts; |
| 16403 | |
| 16404 | |
| 16405 | int HalfElt = M % HalfNumElts; |
| 16406 | |
| 16407 | |
| 16408 | |
| 16409 | if (HalfIdx1 < 0 || HalfIdx1 == HalfIdx) { |
| 16410 | HalfMask[i] = HalfElt; |
| 16411 | HalfIdx1 = HalfIdx; |
| 16412 | continue; |
| 16413 | } |
| 16414 | if (HalfIdx2 < 0 || HalfIdx2 == HalfIdx) { |
| 16415 | HalfMask[i] = HalfElt + HalfNumElts; |
| 16416 | HalfIdx2 = HalfIdx; |
| 16417 | continue; |
| 16418 | } |
| 16419 | |
| 16420 | |
| 16421 | return false; |
| 16422 | } |
| 16423 | |
| 16424 | return true; |
| 16425 | } |
| 16426 | |
| 16427 | |
| 16428 | |
| 16429 | static SDValue getShuffleHalfVectors(const SDLoc &DL, SDValue V1, SDValue V2, |
| 16430 | ArrayRef<int> HalfMask, int HalfIdx1, |
| 16431 | int HalfIdx2, bool UndefLower, |
| 16432 | SelectionDAG &DAG, bool UseConcat = false) { |
| 16433 | assert(V1.getValueType() == V2.getValueType() && "Different sized vectors?"); |
| 16434 | assert(V1.getValueType().isSimple() && "Expecting only simple types"); |
| 16435 | |
| 16436 | MVT VT = V1.getSimpleValueType(); |
| 16437 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
| 16438 | unsigned HalfNumElts = HalfVT.getVectorNumElements(); |
| 16439 | |
| 16440 | auto getHalfVector = [&](int HalfIdx) { |
| 16441 | if (HalfIdx < 0) |
| 16442 | return DAG.getUNDEF(HalfVT); |
| 16443 | SDValue V = (HalfIdx < 2 ? V1 : V2); |
| 16444 | HalfIdx = (HalfIdx % 2) * HalfNumElts; |
| 16445 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V, |
| 16446 | DAG.getIntPtrConstant(HalfIdx, DL)); |
| 16447 | }; |
| 16448 | |
| 16449 | |
| 16450 | SDValue Half1 = getHalfVector(HalfIdx1); |
| 16451 | SDValue Half2 = getHalfVector(HalfIdx2); |
| 16452 | SDValue V = DAG.getVectorShuffle(HalfVT, DL, Half1, Half2, HalfMask); |
| 16453 | if (UseConcat) { |
| 16454 | SDValue Op0 = V; |
| 16455 | SDValue Op1 = DAG.getUNDEF(HalfVT); |
| 16456 | if (UndefLower) |
| 16457 | std::swap(Op0, Op1); |
| 16458 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Op0, Op1); |
| 16459 | } |
| 16460 | |
| 16461 | unsigned Offset = UndefLower ? HalfNumElts : 0; |
| 16462 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, |
| 16463 | DAG.getIntPtrConstant(Offset, DL)); |
| 16464 | } |
| 16465 | |
| 16466 | |
| 16467 | |
| 16468 | |
| 16469 | static SDValue lowerShuffleWithUndefHalf(const SDLoc &DL, MVT VT, SDValue V1, |
| 16470 | SDValue V2, ArrayRef<int> Mask, |
| 16471 | const X86Subtarget &Subtarget, |
| 16472 | SelectionDAG &DAG) { |
| 16473 | assert((VT.is256BitVector() || VT.is512BitVector()) && |
| 16474 | "Expected 256-bit or 512-bit vector"); |
| 16475 | |
| 16476 | bool UndefLower = isUndefLowerHalf(Mask); |
| 16477 | if (!UndefLower && !isUndefUpperHalf(Mask)) |
| 16478 | return SDValue(); |
| 16479 | |
| 16480 | assert((!UndefLower || !isUndefUpperHalf(Mask)) && |
| 16481 | "Completely undef shuffle mask should have been simplified already"); |
| 16482 | |
| 16483 | |
| 16484 | |
| 16485 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
| 16486 | unsigned HalfNumElts = HalfVT.getVectorNumElements(); |
| 16487 | if (!UndefLower && |
| 16488 | isSequentialOrUndefInRange(Mask, 0, HalfNumElts, HalfNumElts)) { |
| 16489 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, |
| 16490 | DAG.getIntPtrConstant(HalfNumElts, DL)); |
| 16491 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, |
| 16492 | DAG.getIntPtrConstant(0, DL)); |
| 16493 | } |
| 16494 | |
| 16495 | |
| 16496 | |
| 16497 | if (UndefLower && |
| 16498 | isSequentialOrUndefInRange(Mask, HalfNumElts, HalfNumElts, 0)) { |
| 16499 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HalfVT, V1, |
| 16500 | DAG.getIntPtrConstant(0, DL)); |
| 16501 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), Hi, |
| 16502 | DAG.getIntPtrConstant(HalfNumElts, DL)); |
| 16503 | } |
| 16504 | |
| 16505 | int HalfIdx1, HalfIdx2; |
| 16506 | SmallVector<int, 8> HalfMask(HalfNumElts); |
| 16507 | if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2)) |
| 16508 | return SDValue(); |
| 16509 | |
| 16510 | assert(HalfMask.size() == HalfNumElts && "Unexpected shuffle mask length"); |
| 16511 | |
| 16512 | |
| 16513 | unsigned NumLowerHalves = |
| 16514 | (HalfIdx1 == 0 || HalfIdx1 == 2) + (HalfIdx2 == 0 || HalfIdx2 == 2); |
| 16515 | unsigned NumUpperHalves = |
| 16516 | (HalfIdx1 == 1 || HalfIdx1 == 3) + (HalfIdx2 == 1 || HalfIdx2 == 3); |
| 16517 | assert(NumLowerHalves + NumUpperHalves <= 2 && "Only 1 or 2 halves allowed"); |
| 16518 | |
| 16519 | |
| 16520 | |
| 16521 | unsigned EltWidth = VT.getVectorElementType().getSizeInBits(); |
| 16522 | if (!UndefLower) { |
| 16523 | |
| 16524 | |
| 16525 | if (NumUpperHalves == 0) |
| 16526 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
| 16527 | UndefLower, DAG); |
| 16528 | |
| 16529 | if (NumUpperHalves == 1) { |
| 16530 | |
| 16531 | if (Subtarget.hasAVX2()) { |
| 16532 | |
| 16533 | if (EltWidth == 32 && NumLowerHalves && HalfVT.is128BitVector() && |
| 16534 | !is128BitUnpackShuffleMask(HalfMask) && |
| 16535 | (!isSingleSHUFPSMask(HalfMask) || |
| 16536 | Subtarget.hasFastVariableCrossLaneShuffle())) |
| 16537 | return SDValue(); |
| 16538 | |
| 16539 | |
| 16540 | |
| 16541 | |
| 16542 | if (EltWidth == 64 && V2.isUndef()) |
| 16543 | return SDValue(); |
| 16544 | } |
| 16545 | |
| 16546 | if (Subtarget.hasAVX512() && VT.is512BitVector()) |
| 16547 | return SDValue(); |
| 16548 | |
| 16549 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
| 16550 | UndefLower, DAG); |
| 16551 | } |
| 16552 | |
| 16553 | |
| 16554 | assert(NumUpperHalves == 2 && "Half vector count went wrong"); |
| 16555 | return SDValue(); |
| 16556 | } |
| 16557 | |
| 16558 | |
| 16559 | if (NumUpperHalves == 0) { |
| 16560 | |
| 16561 | |
| 16562 | if (Subtarget.hasAVX2() && EltWidth == 64) |
| 16563 | return SDValue(); |
| 16564 | |
| 16565 | if (Subtarget.hasAVX512() && VT.is512BitVector()) |
| 16566 | return SDValue(); |
| 16567 | |
| 16568 | return getShuffleHalfVectors(DL, V1, V2, HalfMask, HalfIdx1, HalfIdx2, |
| 16569 | UndefLower, DAG); |
| 16570 | } |
| 16571 | |
| 16572 | |
| 16573 | return SDValue(); |
| 16574 | } |
| 16575 | |
| 16576 | |
| 16577 | |
| 16578 | |
| 16579 | |
| 16580 | |
| 16581 | static bool isShuffleMaskInputInPlace(int Input, ArrayRef<int> Mask) { |
| 16582 | assert((Input == 0 || Input == 1) && "Only two inputs to shuffles."); |
| 16583 | int Size = Mask.size(); |
| 16584 | for (int i = 0; i < Size; ++i) |
| 16585 | if (Mask[i] >= 0 && Mask[i] / Size == Input && Mask[i] % Size != i) |
| 16586 | return false; |
| 16587 | |
| 16588 | return true; |
| 16589 | } |
| 16590 | |
| 16591 | |
| 16592 | |
| 16593 | |
| 16594 | |
| 16595 | static SDValue lowerShuffleAsRepeatedMaskAndLanePermute( |
| 16596 | const SDLoc &DL, MVT VT, SDValue V1, SDValue V2, ArrayRef<int> Mask, |
| 16597 | const X86Subtarget &Subtarget, SelectionDAG &DAG) { |
| 16598 | int NumElts = VT.getVectorNumElements(); |
| 16599 | int NumLanes = VT.getSizeInBits() / 128; |
| 16600 | int NumLaneElts = NumElts / NumLanes; |
| 16601 | |
| 16602 | |
| 16603 | |
| 16604 | if (Subtarget.hasAVX2()) { |
| 16605 | for (unsigned BroadcastSize : {16, 32, 64}) { |
| 16606 | if (BroadcastSize <= VT.getScalarSizeInBits()) |
| 16607 | continue; |
| 16608 | int NumBroadcastElts = BroadcastSize / VT.getScalarSizeInBits(); |
| 16609 | |
| 16610 | |
| 16611 | |
| 16612 | |
| 16613 | auto FindRepeatingBroadcastMask = [&](SmallVectorImpl<int> &RepeatMask) { |
| 16614 | for (int i = 0; i != NumElts; i += NumBroadcastElts) |
| 16615 | for (int j = 0; j != NumBroadcastElts; ++j) { |
| 16616 | int M = Mask[i + j]; |
| 16617 | if (M < 0) |
| 16618 | continue; |
| 16619 | int &R = RepeatMask[j]; |
| 16620 | if (0 != ((M % NumElts) / NumLaneElts)) |
| 16621 | return false; |
| 16622 | if (0 <= R && R != M) |
| 16623 | return false; |
| 16624 | R = M; |
| 16625 | } |
| 16626 | return true; |
| 16627 | }; |
| 16628 | |
| 16629 | SmallVector<int, 8> RepeatMask((unsigned)NumElts, -1); |
| 16630 | if (!FindRepeatingBroadcastMask(RepeatMask)) |
| 16631 | continue; |
| 16632 | |
| 16633 | |
| 16634 | SDValue RepeatShuf = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatMask); |
| 16635 | |
| 16636 | |
| 16637 | SmallVector<int, 8> BroadcastMask((unsigned)NumElts, -1); |
| 16638 | for (int i = 0; i != NumElts; i += NumBroadcastElts) |
| 16639 | for (int j = 0; j != NumBroadcastElts; ++j) |
| 16640 | BroadcastMask[i + j] = j; |
| 16641 | return DAG.getVectorShuffle(VT, DL, RepeatShuf, DAG.getUNDEF(VT), |
| 16642 | BroadcastMask); |
| 16643 | } |
| 16644 | } |
| 16645 | |
| 16646 | |
| 16647 | if (!is128BitLaneCrossingShuffleMask(VT, Mask)) |
| 16648 | return SDValue(); |
| 16649 | |
| 16650 | |
| 16651 | SmallVector<int, 8> RepeatedShuffleMask; |
| 16652 | if (is128BitLaneRepeatedShuffleMask(VT, Mask, RepeatedShuffleMask)) |
| 16653 | return SDValue(); |
| 16654 | |
| 16655 | |
| 16656 | |
| 16657 | int SubLaneScale = Subtarget.hasAVX2() && VT.is256BitVector() ? 2 : 1; |
| 16658 | int NumSubLanes = NumLanes * SubLaneScale; |
| 16659 | int NumSubLaneElts = NumLaneElts / SubLaneScale; |
| 16660 | |
| 16661 | |
| 16662 | |
| 16663 | |
| 16664 | int TopSrcSubLane = -1; |
| 16665 | SmallVector<int, 8> Dst2SrcSubLanes((unsigned)NumSubLanes, -1); |
| 16666 | SmallVector<int, 8> RepeatedSubLaneMasks[2] = { |
| 16667 | SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef), |
| 16668 | SmallVector<int, 8>((unsigned)NumSubLaneElts, SM_SentinelUndef)}; |
| 16669 | |
| 16670 | for (int DstSubLane = 0; DstSubLane != NumSubLanes; ++DstSubLane) { |
| 16671 | |
| 16672 | |
| 16673 | int SrcLane = -1; |
| 16674 | SmallVector<int, 8> SubLaneMask((unsigned)NumSubLaneElts, -1); |
| 16675 | for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { |
| 16676 | int M = Mask[(DstSubLane * NumSubLaneElts) + Elt]; |
| 16677 | if (M < 0) |
| 16678 | continue; |
| 16679 | int Lane = (M % NumElts) / NumLaneElts; |
| 16680 | if ((0 <= SrcLane) && (SrcLane != Lane)) |
| 16681 | return SDValue(); |
| 16682 | SrcLane = Lane; |
| 16683 | int LocalM = (M % NumLaneElts) + (M < NumElts ? 0 : NumElts); |
| 16684 | SubLaneMask[Elt] = LocalM; |
| 16685 | } |
| 16686 | |
| 16687 | |
| 16688 | if (SrcLane < 0) |
| 16689 | continue; |
| 16690 | |
| 16691 | |
| 16692 | for (int SubLane = 0; SubLane != SubLaneScale; ++SubLane) { |
| 16693 | auto MatchMasks = [NumSubLaneElts](ArrayRef<int> M1, ArrayRef<int> M2) { |
| 16694 | for (int i = 0; i != NumSubLaneElts; ++i) { |
| 16695 | if (M1[i] < 0 || M2[i] < 0) |
| 16696 | continue; |
| 16697 | if (M1[i] != M2[i]) |
| 16698 | return false; |
| 16699 | } |
| 16700 | return true; |
| 16701 | }; |
| 16702 | |
| 16703 | auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane]; |
| 16704 | if (!MatchMasks(SubLaneMask, RepeatedSubLaneMask)) |
| 16705 | continue; |
| 16706 | |
| 16707 | |
| 16708 | for (int i = 0; i != NumSubLaneElts; ++i) { |
| 16709 | int M = SubLaneMask[i]; |
| 16710 | if (M < 0) |
| 16711 | continue; |
| 16712 | assert((RepeatedSubLaneMask[i] < 0 || RepeatedSubLaneMask[i] == M) && |
| 16713 | "Unexpected mask element"); |
| 16714 | RepeatedSubLaneMask[i] = M; |
| 16715 | } |
| 16716 | |
| 16717 | |
| 16718 | |
| 16719 | int SrcSubLane = (SrcLane * SubLaneScale) + SubLane; |
| 16720 | TopSrcSubLane = std::max(TopSrcSubLane, SrcSubLane); |
| 16721 | Dst2SrcSubLanes[DstSubLane] = SrcSubLane; |
| 16722 | break; |
| 16723 | } |
| 16724 | |
| 16725 | |
| 16726 | if (Dst2SrcSubLanes[DstSubLane] < 0) |
| 16727 | return SDValue(); |
| 16728 | } |
| 16729 | assert(0 <= TopSrcSubLane && TopSrcSubLane < NumSubLanes && |
| 16730 | "Unexpected source lane"); |
| 16731 | |
| 16732 | |
| 16733 | SmallVector<int, 8> RepeatedMask((unsigned)NumElts, -1); |
| 16734 | for (int SubLane = 0; SubLane <= TopSrcSubLane; ++SubLane) { |
| 16735 | int Lane = SubLane / SubLaneScale; |
| 16736 | auto &RepeatedSubLaneMask = RepeatedSubLaneMasks[SubLane % SubLaneScale]; |
| 16737 | for (int Elt = 0; Elt != NumSubLaneElts; ++Elt) { |
| 16738 | int M = RepeatedSubLaneMask[Elt]; |
| 16739 | if (M < 0) |
| 16740 | continue; |
| 16741 | int Idx = (SubLane * NumSubLaneElts) + Elt; |
| 16742 | RepeatedMask[Idx] = M + (Lane * NumLaneElts); |
| 16743 | } |
| 16744 | } |
| 16745 | SDValue RepeatedShuffle = DAG.getVectorShuffle(VT, DL, V1, V2, RepeatedMask); |
| 16746 | |
| 16747 | |
| 16748 | SmallVector<int, 8> SubLaneMask((unsigned)NumElts, -1); |
| 16749 | for (int i = 0; i != NumElts; i += NumSubLaneElts) { |
| 16750 | int SrcSubLane = Dst2SrcSubLanes[i / NumSubLaneElts]; |
| 16751 | if (SrcSubLane < 0) |
| 16752 | continue; |
| 16753 | for (int j = 0; j != NumSubLaneElts; ++j) |
| 16754 | SubLaneMask[i + j] = j + (SrcSubLane * NumSubLaneElts); |
| 16755 | } |
| 16756 | |
| 16757 | return DAG.getVectorShuffle(VT, DL, RepeatedShuffle, DAG.getUNDEF(VT), |
| 16758 | SubLaneMask); |
| 16759 | } |
| 16760 | |
| 16761 | static bool matchShuffleWithSHUFPD(MVT VT, SDValue &V1, SDValue &V2, |
| 16762 | bool &ForceV1Zero, bool &ForceV2Zero, |
| 16763 | unsigned &ShuffleImm, ArrayRef<int> Mask, |
| 16764 | const APInt &Zeroable) { |
| 16765 | int NumElts = VT.getVectorNumElements(); |
| 16766 | assert(VT.getScalarSizeInBits() == 64 && |
| 16767 | (NumElts == 2 || NumElts == 4 || NumElts == 8) && |
| 16768 | "Unexpected data type for VSHUFPD"); |
| 16769 | assert(isUndefOrZeroOrInRange(Mask, 0, 2 * NumElts) && |
| 16770 | "Illegal shuffle mask"); |
| 16771 | |
| 16772 | bool ZeroLane[2] = { true, true }; |
| 16773 | for (int i = 0; i < NumElts; ++i) |
| 16774 | ZeroLane[i & 1] &= Zeroable[i]; |
| 16775 | |
| 16776 | |
| 16777 | |
| 16778 | ShuffleImm = 0; |
| 16779 | bool ShufpdMask = true; |
| 16780 | bool CommutableMask = true; |
| 16781 | for (int i = 0; i < NumElts; ++i) { |
| 16782 | if (Mask[i] == SM_SentinelUndef || ZeroLane[i & 1]) |
| 16783 | continue; |
| 16784 | if (Mask[i] < 0) |
| 16785 | return false; |
| 16786 | int Val = (i & 6) + NumElts * (i & 1); |
| 16787 | int CommutVal = (i & 0xe) + NumElts * ((i & 1) ^ 1); |
| 16788 | if (Mask[i] < Val || Mask[i] > Val + 1) |
| 16789 | ShufpdMask = false; |
| 16790 | if (Mask[i] < CommutVal || Mask[i] > CommutVal + 1) |
| 16791 | CommutableMask = false; |
| 16792 | ShuffleImm |= (Mask[i] % 2) << i; |
| 16793 | } |
| 16794 | |
| 16795 | if (!ShufpdMask && !CommutableMask) |
| 16796 | return false; |
| 16797 | |
| 16798 | if (!ShufpdMask && CommutableMask) |
| 16799 | std::swap(V1, V2); |
| 16800 | |
| 16801 | ForceV1Zero = ZeroLane[0]; |
| 16802 | ForceV2Zero = ZeroLane[1]; |
| 16803 | return true; |
| 16804 | } |
| 16805 | |
| 16806 | static SDValue lowerShuffleWithSHUFPD(const SDLoc &DL, MVT VT, SDValue V1, |
| 16807 | SDValue V2, ArrayRef<int> Mask, |
| 16808 | const APInt &Zeroable, |
| 16809 | const X86Subtarget &Subtarget, |
| 16810 | SelectionDAG &DAG) { |
| 16811 | assert((VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v8f64) && |
| 16812 | "Unexpected data type for VSHUFPD"); |
| 16813 | |
| 16814 | unsigned Immediate = 0; |
| 16815 | bool ForceV1Zero = false, ForceV2Zero = false; |
| 16816 | if (!matchShuffleWithSHUFPD(VT, V1, V2, ForceV1Zero, ForceV2Zero, Immediate, |
| 16817 | Mask, Zeroable)) |
| 16818 | return SDValue(); |
| 16819 | |
| 16820 | |
| 16821 | if (ForceV1Zero) |
| 16822 | V1 = getZeroVector(VT, Subtarget, DAG, DL); |
| 16823 | if (ForceV2Zero) |
| 16824 | V2 = getZeroVector(VT, Subtarget, DAG, DL); |
| 16825 | |
| 16826 | return DAG.getNode(X86ISD::SHUFP, DL, VT, V1, V2, |
| 16827 | DAG.getTargetConstant(Immediate, DL, MVT::i8)); |
| 16828 | } |
| 16829 | |
| 16830 | |
| 16831 | |
| 16832 | |
| 16833 | static SDValue lowerShuffleAsVTRUNCAndUnpack(const SDLoc &DL, MVT VT, |
| 16834 | SDValue V1, SDValue V2, |
| 16835 | ArrayRef<int> Mask, |
| 16836 | const APInt &Zeroable, |
| 16837 | SelectionDAG &DAG) { |
| 16838 | assert(VT == MVT::v32i8 && "Unexpected type!"); |
| 16839 | |
| 16840 | |
| 16841 | if (!isSequentialOrUndefInRange(Mask, 0, 8, 0, 8)) |
| 16842 | return SDValue(); |
| 16843 | |
| 16844 | |
| 16845 | if (Zeroable.countLeadingOnes() < (Mask.size() - 8)) |
| 16846 | return SDValue(); |
| 16847 | |
| 16848 | V1 = DAG.getBitcast(MVT::v4i64, V1); |
| 16849 | V2 = DAG.getBitcast(MVT::v4i64, V2); |
| 16850 | |
| 16851 | V1 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V1); |
| 16852 | V2 = DAG.getNode(X86ISD::VTRUNC, DL, MVT::v16i8, V2); |
| 16853 | |
| 16854 | |
| 16855 | |
| 16856 | SDValue Unpack = DAG.getVectorShuffle(MVT::v16i8, DL, V1, V2, |
| 16857 | { 0, 1, 2, 3, 16, 17, 18, 19, |
| 16858 | 4, 5, 6, 7, 20, 21, 22, 23 }); |
| 16859 | |
| 16860 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v32i8, |
| 16861 | DAG.getConstant(0, DL, MVT::v32i8), Unpack, |
| 16862 | DAG.getIntPtrConstant(0, DL)); |
| 16863 | } |
| 16864 | |
| 16865 | |
| 16866 | |
| 16867 | |
| 16868 | |
| 16869 | |
| 16870 | static SDValue lowerV4F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 16871 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 16872 | const X86Subtarget &Subtarget, |
| 16873 | SelectionDAG &DAG) { |
| 16874 | assert(V1.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); |
| 16875 | assert(V2.getSimpleValueType() == MVT::v4f64 && "Bad operand type!"); |
| 16876 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
| 16877 | |
| 16878 | if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4f64, V1, V2, Mask, Zeroable, |
| 16879 | Subtarget, DAG)) |
| 16880 | return V; |
| 16881 | |
| 16882 | if (V2.isUndef()) { |
| 16883 | |
| 16884 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4f64, V1, V2, |
| 16885 | Mask, Subtarget, DAG)) |
| 16886 | return Broadcast; |
| 16887 | |
| 16888 | |
| 16889 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2}, V1, V2)) |
| 16890 | return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v4f64, V1); |
| 16891 | |
| 16892 | if (!is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask)) { |
| 16893 | |
| 16894 | |
| 16895 | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | |
| 16896 | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3); |
| 16897 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v4f64, V1, |
| 16898 | DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); |
| 16899 | } |
| 16900 | |
| 16901 | |
| 16902 | if (Subtarget.hasAVX2()) |
| 16903 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4f64, V1, |
| 16904 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
| 16905 | |
| 16906 | |
| 16907 | |
| 16908 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 16909 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
| 16910 | return V; |
| 16911 | |
| 16912 | |
| 16913 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute(DL, MVT::v4f64, V1, V2, |
| 16914 | Mask, DAG, Subtarget)) |
| 16915 | return V; |
| 16916 | |
| 16917 | |
| 16918 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v4f64, V1, V2, Mask, |
| 16919 | DAG, Subtarget); |
| 16920 | } |
| 16921 | |
| 16922 | |
| 16923 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4f64, Mask, V1, V2, DAG)) |
| 16924 | return V; |
| 16925 | |
| 16926 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4f64, V1, V2, Mask, |
| 16927 | Zeroable, Subtarget, DAG)) |
| 16928 | return Blend; |
| 16929 | |
| 16930 | |
| 16931 | if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v4f64, V1, V2, Mask, |
| 16932 | Zeroable, Subtarget, DAG)) |
| 16933 | return Op; |
| 16934 | |
| 16935 | |
| 16936 | |
| 16937 | |
| 16938 | |
| 16939 | if (is128BitLaneCrossingShuffleMask(MVT::v4f64, Mask) && |
| 16940 | !all_of(Mask, [](int M) { return M < 2 || (4 <= M && M < 6); }) && |
| 16941 | (V1.getOpcode() != ISD::BUILD_VECTOR) && |
| 16942 | (V2.getOpcode() != ISD::BUILD_VECTOR)) |
| 16943 | if (SDValue Op = lowerShuffleAsLanePermuteAndSHUFP(DL, MVT::v4f64, V1, V2, |
| 16944 | Mask, DAG)) |
| 16945 | return Op; |
| 16946 | |
| 16947 | |
| 16948 | |
| 16949 | if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) |
| 16950 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, |
| 16951 | Subtarget, DAG); |
| 16952 | |
| 16953 | |
| 16954 | |
| 16955 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 16956 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
| 16957 | return V; |
| 16958 | |
| 16959 | |
| 16960 | |
| 16961 | |
| 16962 | |
| 16963 | if (!(Subtarget.hasAVX2() && (isShuffleMaskInputInPlace(0, Mask) || |
| 16964 | isShuffleMaskInputInPlace(1, Mask)))) |
| 16965 | if (SDValue V = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 16966 | DL, MVT::v4f64, V1, V2, Mask, Subtarget, DAG)) |
| 16967 | return V; |
| 16968 | |
| 16969 | |
| 16970 | if (Subtarget.hasVLX()) |
| 16971 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4f64, Zeroable, Mask, V1, V2, |
| 16972 | DAG, Subtarget)) |
| 16973 | return V; |
| 16974 | |
| 16975 | |
| 16976 | |
| 16977 | if (Subtarget.hasAVX2()) |
| 16978 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4f64, V1, V2, Mask, |
| 16979 | Subtarget, DAG); |
| 16980 | |
| 16981 | |
| 16982 | return lowerShuffleAsSplitOrBlend(DL, MVT::v4f64, V1, V2, Mask, |
| 16983 | Subtarget, DAG); |
| 16984 | } |
| 16985 | |
| 16986 | |
| 16987 | |
| 16988 | |
| 16989 | |
| 16990 | static SDValue lowerV4I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 16991 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 16992 | const X86Subtarget &Subtarget, |
| 16993 | SelectionDAG &DAG) { |
| 16994 | assert(V1.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); |
| 16995 | assert(V2.getSimpleValueType() == MVT::v4i64 && "Bad operand type!"); |
| 16996 | assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); |
| 16997 | assert(Subtarget.hasAVX2() && "We can only lower v4i64 with AVX2!"); |
| 16998 | |
| 16999 | if (SDValue V = lowerV2X128Shuffle(DL, MVT::v4i64, V1, V2, Mask, Zeroable, |
| 17000 | Subtarget, DAG)) |
| 17001 | return V; |
| 17002 | |
| 17003 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v4i64, V1, V2, Mask, |
| 17004 | Zeroable, Subtarget, DAG)) |
| 17005 | return Blend; |
| 17006 | |
| 17007 | |
| 17008 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v4i64, V1, V2, Mask, |
| 17009 | Subtarget, DAG)) |
| 17010 | return Broadcast; |
| 17011 | |
| 17012 | if (V2.isUndef()) { |
| 17013 | |
| 17014 | |
| 17015 | SmallVector<int, 2> RepeatedMask; |
| 17016 | if (is128BitLaneRepeatedShuffleMask(MVT::v4i64, Mask, RepeatedMask)) { |
| 17017 | SmallVector<int, 4> PSHUFDMask; |
| 17018 | narrowShuffleMaskElts(2, RepeatedMask, PSHUFDMask); |
| 17019 | return DAG.getBitcast( |
| 17020 | MVT::v4i64, |
| 17021 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, |
| 17022 | DAG.getBitcast(MVT::v8i32, V1), |
| 17023 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
| 17024 | } |
| 17025 | |
| 17026 | |
| 17027 | |
| 17028 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v4i64, V1, |
| 17029 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
| 17030 | } |
| 17031 | |
| 17032 | |
| 17033 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v4i64, V1, V2, Mask, |
| 17034 | Zeroable, Subtarget, DAG)) |
| 17035 | return Shift; |
| 17036 | |
| 17037 | |
| 17038 | if (Subtarget.hasVLX()) { |
| 17039 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v4i64, V1, V2, Mask, |
| 17040 | Subtarget, DAG)) |
| 17041 | return Rotate; |
| 17042 | |
| 17043 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v4i64, Zeroable, Mask, V1, V2, |
| 17044 | DAG, Subtarget)) |
| 17045 | return V; |
| 17046 | } |
| 17047 | |
| 17048 | |
| 17049 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v4i64, V1, V2, Mask, |
| 17050 | Subtarget, DAG)) |
| 17051 | return Rotate; |
| 17052 | |
| 17053 | |
| 17054 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v4i64, Mask, V1, V2, DAG)) |
| 17055 | return V; |
| 17056 | |
| 17057 | |
| 17058 | |
| 17059 | if (isShuffleMaskInputInPlace(0, Mask) || isShuffleMaskInputInPlace(1, Mask)) |
| 17060 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, |
| 17061 | Subtarget, DAG); |
| 17062 | |
| 17063 | |
| 17064 | |
| 17065 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17066 | DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) |
| 17067 | return V; |
| 17068 | |
| 17069 | |
| 17070 | |
| 17071 | |
| 17072 | |
| 17073 | if (!isShuffleMaskInputInPlace(0, Mask) && |
| 17074 | !isShuffleMaskInputInPlace(1, Mask)) |
| 17075 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 17076 | DL, MVT::v4i64, V1, V2, Mask, Subtarget, DAG)) |
| 17077 | return Result; |
| 17078 | |
| 17079 | |
| 17080 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v4i64, V1, V2, Mask, |
| 17081 | Subtarget, DAG); |
| 17082 | } |
| 17083 | |
| 17084 | |
| 17085 | |
| 17086 | |
| 17087 | |
| 17088 | static SDValue lowerV8F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17089 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17090 | const X86Subtarget &Subtarget, |
| 17091 | SelectionDAG &DAG) { |
| 17092 | assert(V1.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); |
| 17093 | assert(V2.getSimpleValueType() == MVT::v8f32 && "Bad operand type!"); |
| 17094 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
| 17095 | |
| 17096 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f32, V1, V2, Mask, |
| 17097 | Zeroable, Subtarget, DAG)) |
| 17098 | return Blend; |
| 17099 | |
| 17100 | |
| 17101 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8f32, V1, V2, Mask, |
| 17102 | Subtarget, DAG)) |
| 17103 | return Broadcast; |
| 17104 | |
| 17105 | |
| 17106 | |
| 17107 | SmallVector<int, 4> RepeatedMask; |
| 17108 | if (is128BitLaneRepeatedShuffleMask(MVT::v8f32, Mask, RepeatedMask)) { |
| 17109 | assert(RepeatedMask.size() == 4 && |
| 17110 | "Repeated masks must be half the mask width!"); |
| 17111 | |
| 17112 | |
| 17113 | if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2)) |
| 17114 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v8f32, V1); |
| 17115 | if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2)) |
| 17116 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v8f32, V1); |
| 17117 | |
| 17118 | if (V2.isUndef()) |
| 17119 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, V1, |
| 17120 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
| 17121 | |
| 17122 | |
| 17123 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8f32, Mask, V1, V2, DAG)) |
| 17124 | return V; |
| 17125 | |
| 17126 | |
| 17127 | |
| 17128 | return lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, V1, V2, DAG); |
| 17129 | } |
| 17130 | |
| 17131 | |
| 17132 | |
| 17133 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17134 | DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) |
| 17135 | return V; |
| 17136 | |
| 17137 | |
| 17138 | |
| 17139 | if (V2.isUndef()) { |
| 17140 | if (!is128BitLaneCrossingShuffleMask(MVT::v8f32, Mask)) { |
| 17141 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
| 17142 | return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v8f32, V1, VPermMask); |
| 17143 | } |
| 17144 | if (Subtarget.hasAVX2()) { |
| 17145 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
| 17146 | return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8f32, VPermMask, V1); |
| 17147 | } |
| 17148 | |
| 17149 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v8f32, V1, V2, Mask, |
| 17150 | DAG, Subtarget); |
| 17151 | } |
| 17152 | |
| 17153 | |
| 17154 | |
| 17155 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 17156 | DL, MVT::v8f32, V1, V2, Mask, Subtarget, DAG)) |
| 17157 | return Result; |
| 17158 | |
| 17159 | |
| 17160 | if (Subtarget.hasVLX()) |
| 17161 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f32, Zeroable, Mask, V1, V2, |
| 17162 | DAG, Subtarget)) |
| 17163 | return V; |
| 17164 | |
| 17165 | |
| 17166 | |
| 17167 | |
| 17168 | if (!Subtarget.hasAVX512() && isUnpackWdShuffleMask(Mask, MVT::v8f32)) |
| 17169 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, Subtarget, |
| 17170 | DAG); |
| 17171 | |
| 17172 | |
| 17173 | |
| 17174 | if (Subtarget.hasAVX2()) |
| 17175 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8f32, V1, V2, Mask, |
| 17176 | Subtarget, DAG); |
| 17177 | |
| 17178 | |
| 17179 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8f32, V1, V2, Mask, |
| 17180 | Subtarget, DAG); |
| 17181 | } |
| 17182 | |
| 17183 | |
| 17184 | |
| 17185 | |
| 17186 | |
| 17187 | static SDValue lowerV8I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17188 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17189 | const X86Subtarget &Subtarget, |
| 17190 | SelectionDAG &DAG) { |
| 17191 | assert(V1.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); |
| 17192 | assert(V2.getSimpleValueType() == MVT::v8i32 && "Bad operand type!"); |
| 17193 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
| 17194 | assert(Subtarget.hasAVX2() && "We can only lower v8i32 with AVX2!"); |
| 17195 | |
| 17196 | |
| 17197 | |
| 17198 | |
| 17199 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v8i32, V1, V2, Mask, |
| 17200 | Zeroable, Subtarget, DAG)) |
| 17201 | return ZExt; |
| 17202 | |
| 17203 | |
| 17204 | |
| 17205 | |
| 17206 | if (isUnpackWdShuffleMask(Mask, MVT::v8i32) && !V2.isUndef() && |
| 17207 | !Subtarget.hasAVX512()) |
| 17208 | return lowerShuffleAsSplitOrBlend(DL, MVT::v8i32, V1, V2, Mask, Subtarget, |
| 17209 | DAG); |
| 17210 | |
| 17211 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i32, V1, V2, Mask, |
| 17212 | Zeroable, Subtarget, DAG)) |
| 17213 | return Blend; |
| 17214 | |
| 17215 | |
| 17216 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v8i32, V1, V2, Mask, |
| 17217 | Subtarget, DAG)) |
| 17218 | return Broadcast; |
| 17219 | |
| 17220 | |
| 17221 | |
| 17222 | |
| 17223 | SmallVector<int, 4> RepeatedMask; |
| 17224 | bool Is128BitLaneRepeatedShuffle = |
| 17225 | is128BitLaneRepeatedShuffleMask(MVT::v8i32, Mask, RepeatedMask); |
| 17226 | if (Is128BitLaneRepeatedShuffle) { |
| 17227 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
| 17228 | if (V2.isUndef()) |
| 17229 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v8i32, V1, |
| 17230 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
| 17231 | |
| 17232 | |
| 17233 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v8i32, Mask, V1, V2, DAG)) |
| 17234 | return V; |
| 17235 | } |
| 17236 | |
| 17237 | |
| 17238 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i32, V1, V2, Mask, |
| 17239 | Zeroable, Subtarget, DAG)) |
| 17240 | return Shift; |
| 17241 | |
| 17242 | |
| 17243 | if (Subtarget.hasVLX()) { |
| 17244 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i32, V1, V2, Mask, |
| 17245 | Subtarget, DAG)) |
| 17246 | return Rotate; |
| 17247 | |
| 17248 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i32, Zeroable, Mask, V1, V2, |
| 17249 | DAG, Subtarget)) |
| 17250 | return V; |
| 17251 | } |
| 17252 | |
| 17253 | |
| 17254 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i32, V1, V2, Mask, |
| 17255 | Subtarget, DAG)) |
| 17256 | return Rotate; |
| 17257 | |
| 17258 | |
| 17259 | |
| 17260 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17261 | DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) |
| 17262 | return V; |
| 17263 | |
| 17264 | if (V2.isUndef()) { |
| 17265 | |
| 17266 | |
| 17267 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v8i32, Mask, V1, V2, DAG)) |
| 17268 | return V; |
| 17269 | |
| 17270 | |
| 17271 | |
| 17272 | SDValue VPermMask = getConstVector(Mask, MVT::v8i32, DAG, DL, true); |
| 17273 | return DAG.getNode(X86ISD::VPERMV, DL, MVT::v8i32, VPermMask, V1); |
| 17274 | } |
| 17275 | |
| 17276 | |
| 17277 | |
| 17278 | |
| 17279 | if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { |
| 17280 | SDValue CastV1 = DAG.getBitcast(MVT::v8f32, V1); |
| 17281 | SDValue CastV2 = DAG.getBitcast(MVT::v8f32, V2); |
| 17282 | SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v8f32, RepeatedMask, |
| 17283 | CastV1, CastV2, DAG); |
| 17284 | return DAG.getBitcast(MVT::v8i32, ShufPS); |
| 17285 | } |
| 17286 | |
| 17287 | |
| 17288 | |
| 17289 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 17290 | DL, MVT::v8i32, V1, V2, Mask, Subtarget, DAG)) |
| 17291 | return Result; |
| 17292 | |
| 17293 | |
| 17294 | return lowerShuffleAsDecomposedShuffleMerge(DL, MVT::v8i32, V1, V2, Mask, |
| 17295 | Subtarget, DAG); |
| 17296 | } |
| 17297 | |
| 17298 | |
| 17299 | |
| 17300 | |
| 17301 | |
| 17302 | static SDValue lowerV16I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17303 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17304 | const X86Subtarget &Subtarget, |
| 17305 | SelectionDAG &DAG) { |
| 17306 | assert(V1.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); |
| 17307 | assert(V2.getSimpleValueType() == MVT::v16i16 && "Bad operand type!"); |
| 17308 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
| 17309 | assert(Subtarget.hasAVX2() && "We can only lower v16i16 with AVX2!"); |
| 17310 | |
| 17311 | |
| 17312 | |
| 17313 | |
| 17314 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
| 17315 | DL, MVT::v16i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 17316 | return ZExt; |
| 17317 | |
| 17318 | |
| 17319 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v16i16, V1, V2, Mask, |
| 17320 | Subtarget, DAG)) |
| 17321 | return Broadcast; |
| 17322 | |
| 17323 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i16, V1, V2, Mask, |
| 17324 | Zeroable, Subtarget, DAG)) |
| 17325 | return Blend; |
| 17326 | |
| 17327 | |
| 17328 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i16, Mask, V1, V2, DAG)) |
| 17329 | return V; |
| 17330 | |
| 17331 | |
| 17332 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v16i16, Mask, V1, V2, DAG, |
| 17333 | Subtarget)) |
| 17334 | return V; |
| 17335 | |
| 17336 | |
| 17337 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v16i16, V1, V2, Mask, Zeroable, |
| 17338 | Subtarget, DAG)) |
| 17339 | return V; |
| 17340 | |
| 17341 | |
| 17342 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i16, V1, V2, Mask, |
| 17343 | Zeroable, Subtarget, DAG)) |
| 17344 | return Shift; |
| 17345 | |
| 17346 | |
| 17347 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i16, V1, V2, Mask, |
| 17348 | Subtarget, DAG)) |
| 17349 | return Rotate; |
| 17350 | |
| 17351 | |
| 17352 | |
| 17353 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17354 | DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) |
| 17355 | return V; |
| 17356 | |
| 17357 | if (V2.isUndef()) { |
| 17358 | |
| 17359 | if (SDValue Rotate = |
| 17360 | lowerShuffleAsBitRotate(DL, MVT::v16i16, V1, Mask, Subtarget, DAG)) |
| 17361 | return Rotate; |
| 17362 | |
| 17363 | |
| 17364 | |
| 17365 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v16i16, Mask, V1, V2, DAG)) |
| 17366 | return V; |
| 17367 | |
| 17368 | |
| 17369 | |
| 17370 | if (is128BitLaneCrossingShuffleMask(MVT::v16i16, Mask)) { |
| 17371 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
| 17372 | DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) |
| 17373 | return V; |
| 17374 | |
| 17375 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v16i16, V1, V2, Mask, |
| 17376 | DAG, Subtarget); |
| 17377 | } |
| 17378 | |
| 17379 | SmallVector<int, 8> RepeatedMask; |
| 17380 | if (is128BitLaneRepeatedShuffleMask(MVT::v16i16, Mask, RepeatedMask)) { |
| 17381 | |
| 17382 | |
| 17383 | |
| 17384 | return lowerV8I16GeneralSingleInputShuffle( |
| 17385 | DL, MVT::v16i16, V1, RepeatedMask, Subtarget, DAG); |
| 17386 | } |
| 17387 | } |
| 17388 | |
| 17389 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v16i16, Mask, V1, V2, |
| 17390 | Zeroable, Subtarget, DAG)) |
| 17391 | return PSHUFB; |
| 17392 | |
| 17393 | |
| 17394 | if (Subtarget.hasBWI()) |
| 17395 | return lowerShuffleWithPERMV(DL, MVT::v16i16, Mask, V1, V2, Subtarget, DAG); |
| 17396 | |
| 17397 | |
| 17398 | |
| 17399 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 17400 | DL, MVT::v16i16, V1, V2, Mask, Subtarget, DAG)) |
| 17401 | return Result; |
| 17402 | |
| 17403 | |
| 17404 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
| 17405 | DL, MVT::v16i16, V1, V2, Mask, DAG, Subtarget)) |
| 17406 | return V; |
| 17407 | |
| 17408 | |
| 17409 | return lowerShuffleAsSplitOrBlend(DL, MVT::v16i16, V1, V2, Mask, |
| 17410 | Subtarget, DAG); |
| 17411 | } |
| 17412 | |
| 17413 | |
| 17414 | |
| 17415 | |
| 17416 | |
| 17417 | static SDValue lowerV32I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17418 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17419 | const X86Subtarget &Subtarget, |
| 17420 | SelectionDAG &DAG) { |
| 17421 | assert(V1.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); |
| 17422 | assert(V2.getSimpleValueType() == MVT::v32i8 && "Bad operand type!"); |
| 17423 | assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); |
| 17424 | assert(Subtarget.hasAVX2() && "We can only lower v32i8 with AVX2!"); |
| 17425 | |
| 17426 | |
| 17427 | |
| 17428 | |
| 17429 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend(DL, MVT::v32i8, V1, V2, Mask, |
| 17430 | Zeroable, Subtarget, DAG)) |
| 17431 | return ZExt; |
| 17432 | |
| 17433 | |
| 17434 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, MVT::v32i8, V1, V2, Mask, |
| 17435 | Subtarget, DAG)) |
| 17436 | return Broadcast; |
| 17437 | |
| 17438 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i8, V1, V2, Mask, |
| 17439 | Zeroable, Subtarget, DAG)) |
| 17440 | return Blend; |
| 17441 | |
| 17442 | |
| 17443 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i8, Mask, V1, V2, DAG)) |
| 17444 | return V; |
| 17445 | |
| 17446 | |
| 17447 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v32i8, Mask, V1, V2, DAG, |
| 17448 | Subtarget)) |
| 17449 | return V; |
| 17450 | |
| 17451 | |
| 17452 | if (SDValue V = lowerShuffleAsVTRUNC(DL, MVT::v32i8, V1, V2, Mask, Zeroable, |
| 17453 | Subtarget, DAG)) |
| 17454 | return V; |
| 17455 | |
| 17456 | |
| 17457 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i8, V1, V2, Mask, |
| 17458 | Zeroable, Subtarget, DAG)) |
| 17459 | return Shift; |
| 17460 | |
| 17461 | |
| 17462 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i8, V1, V2, Mask, |
| 17463 | Subtarget, DAG)) |
| 17464 | return Rotate; |
| 17465 | |
| 17466 | |
| 17467 | if (V2.isUndef()) |
| 17468 | if (SDValue Rotate = |
| 17469 | lowerShuffleAsBitRotate(DL, MVT::v32i8, V1, Mask, Subtarget, DAG)) |
| 17470 | return Rotate; |
| 17471 | |
| 17472 | |
| 17473 | |
| 17474 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17475 | DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) |
| 17476 | return V; |
| 17477 | |
| 17478 | |
| 17479 | |
| 17480 | if (V2.isUndef() && is128BitLaneCrossingShuffleMask(MVT::v32i8, Mask)) { |
| 17481 | |
| 17482 | |
| 17483 | if (SDValue V = lowerShuffleWithUNPCK256(DL, MVT::v32i8, Mask, V1, V2, DAG)) |
| 17484 | return V; |
| 17485 | |
| 17486 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
| 17487 | DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) |
| 17488 | return V; |
| 17489 | |
| 17490 | return lowerShuffleAsLanePermuteAndShuffle(DL, MVT::v32i8, V1, V2, Mask, |
| 17491 | DAG, Subtarget); |
| 17492 | } |
| 17493 | |
| 17494 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i8, Mask, V1, V2, |
| 17495 | Zeroable, Subtarget, DAG)) |
| 17496 | return PSHUFB; |
| 17497 | |
| 17498 | |
| 17499 | if (Subtarget.hasVBMI()) |
| 17500 | return lowerShuffleWithPERMV(DL, MVT::v32i8, Mask, V1, V2, Subtarget, DAG); |
| 17501 | |
| 17502 | |
| 17503 | |
| 17504 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 17505 | DL, MVT::v32i8, V1, V2, Mask, Subtarget, DAG)) |
| 17506 | return Result; |
| 17507 | |
| 17508 | |
| 17509 | if (SDValue V = lowerShuffleAsLanePermuteAndPermute( |
| 17510 | DL, MVT::v32i8, V1, V2, Mask, DAG, Subtarget)) |
| 17511 | return V; |
| 17512 | |
| 17513 | |
| 17514 | |
| 17515 | |
| 17516 | if (Subtarget.hasVLX()) |
| 17517 | if (SDValue V = lowerShuffleAsVTRUNCAndUnpack(DL, MVT::v32i8, V1, V2, |
| 17518 | Mask, Zeroable, DAG)) |
| 17519 | return V; |
| 17520 | |
| 17521 | |
| 17522 | return lowerShuffleAsSplitOrBlend(DL, MVT::v32i8, V1, V2, Mask, |
| 17523 | Subtarget, DAG); |
| 17524 | } |
| 17525 | |
| 17526 | |
| 17527 | |
| 17528 | |
| 17529 | |
| 17530 | |
| 17531 | static SDValue lower256BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, MVT VT, |
| 17532 | SDValue V1, SDValue V2, const APInt &Zeroable, |
| 17533 | const X86Subtarget &Subtarget, |
| 17534 | SelectionDAG &DAG) { |
| 17535 | |
| 17536 | |
| 17537 | int NumElts = VT.getVectorNumElements(); |
| 17538 | int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); |
| 17539 | |
| 17540 | if (NumV2Elements == 1 && Mask[0] >= NumElts) |
| 17541 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
| 17542 | DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 17543 | return Insertion; |
| 17544 | |
| 17545 | |
| 17546 | if (SDValue V = |
| 17547 | lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
| 17548 | return V; |
| 17549 | |
| 17550 | |
| 17551 | |
| 17552 | |
| 17553 | |
| 17554 | |
| 17555 | |
| 17556 | if (VT.isInteger() && !Subtarget.hasAVX2()) { |
| 17557 | int ElementBits = VT.getScalarSizeInBits(); |
| 17558 | if (ElementBits < 32) { |
| 17559 | |
| 17560 | |
| 17561 | if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
| 17562 | Subtarget, DAG)) |
| 17563 | return V; |
| 17564 | if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
| 17565 | return V; |
| 17566 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
| 17567 | } |
| 17568 | |
| 17569 | MVT FpVT = MVT::getVectorVT(MVT::getFloatingPointVT(ElementBits), |
| 17570 | VT.getVectorNumElements()); |
| 17571 | V1 = DAG.getBitcast(FpVT, V1); |
| 17572 | V2 = DAG.getBitcast(FpVT, V2); |
| 17573 | return DAG.getBitcast(VT, DAG.getVectorShuffle(FpVT, DL, V1, V2, Mask)); |
| 17574 | } |
| 17575 | |
| 17576 | switch (VT.SimpleTy) { |
| 17577 | case MVT::v4f64: |
| 17578 | return lowerV4F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 17579 | case MVT::v4i64: |
| 17580 | return lowerV4I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 17581 | case MVT::v8f32: |
| 17582 | return lowerV8F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 17583 | case MVT::v8i32: |
| 17584 | return lowerV8I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 17585 | case MVT::v16i16: |
| 17586 | return lowerV16I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 17587 | case MVT::v32i8: |
| 17588 | return lowerV32I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 17589 | |
| 17590 | default: |
| 17591 | llvm_unreachable("Not a valid 256-bit x86 vector type!"); |
| 17592 | } |
| 17593 | } |
| 17594 | |
| 17595 | |
| 17596 | static SDValue lowerV4X128Shuffle(const SDLoc &DL, MVT VT, ArrayRef<int> Mask, |
| 17597 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17598 | const X86Subtarget &Subtarget, |
| 17599 | SelectionDAG &DAG) { |
| 17600 | assert(VT.getScalarSizeInBits() == 64 && |
| 17601 | "Unexpected element type size for 128bit shuffle."); |
| 17602 | |
| 17603 | |
| 17604 | |
| 17605 | assert(VT.is512BitVector() && "Unexpected vector size for 512bit shuffle."); |
| 17606 | |
| 17607 | |
| 17608 | SmallVector<int, 4> Widened128Mask; |
| 17609 | if (!canWidenShuffleElements(Mask, Widened128Mask)) |
| 17610 | return SDValue(); |
| 17611 | assert(Widened128Mask.size() == 4 && "Shuffle widening mismatch"); |
| 17612 | |
| 17613 | |
| 17614 | if (Widened128Mask[0] == 0 && (Zeroable & 0xf0) == 0xf0 && |
| 17615 | (Widened128Mask[1] == 1 || (Zeroable & 0x0c) == 0x0c)) { |
| 17616 | unsigned NumElts = ((Zeroable & 0x0c) == 0x0c) ? 2 : 4; |
| 17617 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
| 17618 | SDValue LoV = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V1, |
| 17619 | DAG.getIntPtrConstant(0, DL)); |
| 17620 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
| 17621 | getZeroVector(VT, Subtarget, DAG, DL), LoV, |
| 17622 | DAG.getIntPtrConstant(0, DL)); |
| 17623 | } |
| 17624 | |
| 17625 | |
| 17626 | |
| 17627 | bool OnlyUsesV1 = isShuffleEquivalent(Mask, {0, 1, 2, 3, 0, 1, 2, 3}, V1, V2); |
| 17628 | if (OnlyUsesV1 || |
| 17629 | isShuffleEquivalent(Mask, {0, 1, 2, 3, 8, 9, 10, 11}, V1, V2)) { |
| 17630 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 4); |
| 17631 | SDValue SubVec = |
| 17632 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, OnlyUsesV1 ? V1 : V2, |
| 17633 | DAG.getIntPtrConstant(0, DL)); |
| 17634 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, V1, SubVec, |
| 17635 | DAG.getIntPtrConstant(4, DL)); |
| 17636 | } |
| 17637 | |
| 17638 | |
| 17639 | bool IsInsert = true; |
| 17640 | int V2Index = -1; |
| 17641 | for (int i = 0; i < 4; ++i) { |
| 17642 | assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
| 17643 | if (Widened128Mask[i] < 0) |
| 17644 | continue; |
| 17645 | |
| 17646 | |
| 17647 | if (Widened128Mask[i] < 4) { |
| 17648 | if (Widened128Mask[i] != i) { |
| 17649 | IsInsert = false; |
| 17650 | break; |
| 17651 | } |
| 17652 | } else { |
| 17653 | |
| 17654 | if (V2Index >= 0 || Widened128Mask[i] != 4) { |
| 17655 | IsInsert = false; |
| 17656 | break; |
| 17657 | } |
| 17658 | V2Index = i; |
| 17659 | } |
| 17660 | } |
| 17661 | if (IsInsert && V2Index >= 0) { |
| 17662 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), 2); |
| 17663 | SDValue Subvec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVT, V2, |
| 17664 | DAG.getIntPtrConstant(0, DL)); |
| 17665 | return insert128BitVector(V1, Subvec, V2Index * 2, DAG, DL); |
| 17666 | } |
| 17667 | |
| 17668 | |
| 17669 | |
| 17670 | |
| 17671 | |
| 17672 | SmallVector<int, 2> Widened256Mask; |
| 17673 | if (canWidenShuffleElements(Widened128Mask, Widened256Mask)) { |
| 17674 | Widened128Mask.clear(); |
| 17675 | narrowShuffleMaskElts(2, Widened256Mask, Widened128Mask); |
| 17676 | } |
| 17677 | |
| 17678 | |
| 17679 | SDValue Ops[2] = {DAG.getUNDEF(VT), DAG.getUNDEF(VT)}; |
| 17680 | unsigned PermMask = 0; |
| 17681 | |
| 17682 | for (int i = 0; i < 4; ++i) { |
| 17683 | assert(Widened128Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
| 17684 | if (Widened128Mask[i] < 0) |
| 17685 | continue; |
| 17686 | |
| 17687 | SDValue Op = Widened128Mask[i] >= 4 ? V2 : V1; |
| 17688 | unsigned OpIndex = i / 2; |
| 17689 | if (Ops[OpIndex].isUndef()) |
| 17690 | Ops[OpIndex] = Op; |
| 17691 | else if (Ops[OpIndex] != Op) |
| 17692 | return SDValue(); |
| 17693 | |
| 17694 | |
| 17695 | |
| 17696 | PermMask |= (Widened128Mask[i] % 4) << (i * 2); |
| 17697 | } |
| 17698 | |
| 17699 | return DAG.getNode(X86ISD::SHUF128, DL, VT, Ops[0], Ops[1], |
| 17700 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
| 17701 | } |
| 17702 | |
| 17703 | |
| 17704 | static SDValue lowerV8F64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17705 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17706 | const X86Subtarget &Subtarget, |
| 17707 | SelectionDAG &DAG) { |
| 17708 | assert(V1.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); |
| 17709 | assert(V2.getSimpleValueType() == MVT::v8f64 && "Bad operand type!"); |
| 17710 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
| 17711 | |
| 17712 | if (V2.isUndef()) { |
| 17713 | |
| 17714 | if (isShuffleEquivalent(Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1, V2)) |
| 17715 | return DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v8f64, V1); |
| 17716 | |
| 17717 | if (!is128BitLaneCrossingShuffleMask(MVT::v8f64, Mask)) { |
| 17718 | |
| 17719 | |
| 17720 | unsigned VPERMILPMask = (Mask[0] == 1) | ((Mask[1] == 1) << 1) | |
| 17721 | ((Mask[2] == 3) << 2) | ((Mask[3] == 3) << 3) | |
| 17722 | ((Mask[4] == 5) << 4) | ((Mask[5] == 5) << 5) | |
| 17723 | ((Mask[6] == 7) << 6) | ((Mask[7] == 7) << 7); |
| 17724 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f64, V1, |
| 17725 | DAG.getTargetConstant(VPERMILPMask, DL, MVT::i8)); |
| 17726 | } |
| 17727 | |
| 17728 | SmallVector<int, 4> RepeatedMask; |
| 17729 | if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) |
| 17730 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8f64, V1, |
| 17731 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
| 17732 | } |
| 17733 | |
| 17734 | if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8f64, Mask, Zeroable, V1, |
| 17735 | V2, Subtarget, DAG)) |
| 17736 | return Shuf128; |
| 17737 | |
| 17738 | if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8f64, Mask, V1, V2, DAG)) |
| 17739 | return Unpck; |
| 17740 | |
| 17741 | |
| 17742 | if (SDValue Op = lowerShuffleWithSHUFPD(DL, MVT::v8f64, V1, V2, Mask, |
| 17743 | Zeroable, Subtarget, DAG)) |
| 17744 | return Op; |
| 17745 | |
| 17746 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8f64, Zeroable, Mask, V1, V2, |
| 17747 | DAG, Subtarget)) |
| 17748 | return V; |
| 17749 | |
| 17750 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8f64, V1, V2, Mask, |
| 17751 | Zeroable, Subtarget, DAG)) |
| 17752 | return Blend; |
| 17753 | |
| 17754 | return lowerShuffleWithPERMV(DL, MVT::v8f64, Mask, V1, V2, Subtarget, DAG); |
| 17755 | } |
| 17756 | |
| 17757 | |
| 17758 | static SDValue lowerV16F32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17759 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17760 | const X86Subtarget &Subtarget, |
| 17761 | SelectionDAG &DAG) { |
| 17762 | assert(V1.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); |
| 17763 | assert(V2.getSimpleValueType() == MVT::v16f32 && "Bad operand type!"); |
| 17764 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
| 17765 | |
| 17766 | |
| 17767 | |
| 17768 | SmallVector<int, 4> RepeatedMask; |
| 17769 | if (is128BitLaneRepeatedShuffleMask(MVT::v16f32, Mask, RepeatedMask)) { |
| 17770 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
| 17771 | |
| 17772 | |
| 17773 | if (isShuffleEquivalent(RepeatedMask, {0, 0, 2, 2}, V1, V2)) |
| 17774 | return DAG.getNode(X86ISD::MOVSLDUP, DL, MVT::v16f32, V1); |
| 17775 | if (isShuffleEquivalent(RepeatedMask, {1, 1, 3, 3}, V1, V2)) |
| 17776 | return DAG.getNode(X86ISD::MOVSHDUP, DL, MVT::v16f32, V1); |
| 17777 | |
| 17778 | if (V2.isUndef()) |
| 17779 | return DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v16f32, V1, |
| 17780 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
| 17781 | |
| 17782 | |
| 17783 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16f32, Mask, V1, V2, DAG)) |
| 17784 | return V; |
| 17785 | |
| 17786 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16f32, V1, V2, Mask, |
| 17787 | Zeroable, Subtarget, DAG)) |
| 17788 | return Blend; |
| 17789 | |
| 17790 | |
| 17791 | return lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, V1, V2, DAG); |
| 17792 | } |
| 17793 | |
| 17794 | |
| 17795 | |
| 17796 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17797 | DL, MVT::v16f32, V1, V2, Mask, Subtarget, DAG)) |
| 17798 | return V; |
| 17799 | |
| 17800 | |
| 17801 | |
| 17802 | if (V2.isUndef() && |
| 17803 | !is128BitLaneCrossingShuffleMask(MVT::v16f32, Mask)) { |
| 17804 | SDValue VPermMask = getConstVector(Mask, MVT::v16i32, DAG, DL, true); |
| 17805 | return DAG.getNode(X86ISD::VPERMILPV, DL, MVT::v16f32, V1, VPermMask); |
| 17806 | } |
| 17807 | |
| 17808 | |
| 17809 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16f32, Zeroable, Mask, |
| 17810 | V1, V2, DAG, Subtarget)) |
| 17811 | return V; |
| 17812 | |
| 17813 | return lowerShuffleWithPERMV(DL, MVT::v16f32, Mask, V1, V2, Subtarget, DAG); |
| 17814 | } |
| 17815 | |
| 17816 | |
| 17817 | static SDValue lowerV8I64Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17818 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17819 | const X86Subtarget &Subtarget, |
| 17820 | SelectionDAG &DAG) { |
| 17821 | assert(V1.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); |
| 17822 | assert(V2.getSimpleValueType() == MVT::v8i64 && "Bad operand type!"); |
| 17823 | assert(Mask.size() == 8 && "Unexpected mask size for v8 shuffle!"); |
| 17824 | |
| 17825 | if (V2.isUndef()) { |
| 17826 | |
| 17827 | |
| 17828 | |
| 17829 | SmallVector<int, 2> Repeated128Mask; |
| 17830 | if (is128BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated128Mask)) { |
| 17831 | SmallVector<int, 4> PSHUFDMask; |
| 17832 | narrowShuffleMaskElts(2, Repeated128Mask, PSHUFDMask); |
| 17833 | return DAG.getBitcast( |
| 17834 | MVT::v8i64, |
| 17835 | DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, |
| 17836 | DAG.getBitcast(MVT::v16i32, V1), |
| 17837 | getV4X86ShuffleImm8ForMask(PSHUFDMask, DL, DAG))); |
| 17838 | } |
| 17839 | |
| 17840 | SmallVector<int, 4> Repeated256Mask; |
| 17841 | if (is256BitLaneRepeatedShuffleMask(MVT::v8i64, Mask, Repeated256Mask)) |
| 17842 | return DAG.getNode(X86ISD::VPERMI, DL, MVT::v8i64, V1, |
| 17843 | getV4X86ShuffleImm8ForMask(Repeated256Mask, DL, DAG)); |
| 17844 | } |
| 17845 | |
| 17846 | if (SDValue Shuf128 = lowerV4X128Shuffle(DL, MVT::v8i64, Mask, Zeroable, V1, |
| 17847 | V2, Subtarget, DAG)) |
| 17848 | return Shuf128; |
| 17849 | |
| 17850 | |
| 17851 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v8i64, V1, V2, Mask, |
| 17852 | Zeroable, Subtarget, DAG)) |
| 17853 | return Shift; |
| 17854 | |
| 17855 | |
| 17856 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v8i64, V1, V2, Mask, |
| 17857 | Subtarget, DAG)) |
| 17858 | return Rotate; |
| 17859 | |
| 17860 | |
| 17861 | if (Subtarget.hasBWI()) |
| 17862 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v8i64, V1, V2, Mask, |
| 17863 | Subtarget, DAG)) |
| 17864 | return Rotate; |
| 17865 | |
| 17866 | if (SDValue Unpck = lowerShuffleWithUNPCK(DL, MVT::v8i64, Mask, V1, V2, DAG)) |
| 17867 | return Unpck; |
| 17868 | |
| 17869 | |
| 17870 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v8i64, Zeroable, Mask, V1, V2, |
| 17871 | DAG, Subtarget)) |
| 17872 | return V; |
| 17873 | |
| 17874 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v8i64, V1, V2, Mask, |
| 17875 | Zeroable, Subtarget, DAG)) |
| 17876 | return Blend; |
| 17877 | |
| 17878 | return lowerShuffleWithPERMV(DL, MVT::v8i64, Mask, V1, V2, Subtarget, DAG); |
| 17879 | } |
| 17880 | |
| 17881 | |
| 17882 | static SDValue lowerV16I32Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17883 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17884 | const X86Subtarget &Subtarget, |
| 17885 | SelectionDAG &DAG) { |
| 17886 | assert(V1.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); |
| 17887 | assert(V2.getSimpleValueType() == MVT::v16i32 && "Bad operand type!"); |
| 17888 | assert(Mask.size() == 16 && "Unexpected mask size for v16 shuffle!"); |
| 17889 | |
| 17890 | |
| 17891 | |
| 17892 | |
| 17893 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
| 17894 | DL, MVT::v16i32, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 17895 | return ZExt; |
| 17896 | |
| 17897 | |
| 17898 | |
| 17899 | |
| 17900 | SmallVector<int, 4> RepeatedMask; |
| 17901 | bool Is128BitLaneRepeatedShuffle = |
| 17902 | is128BitLaneRepeatedShuffleMask(MVT::v16i32, Mask, RepeatedMask); |
| 17903 | if (Is128BitLaneRepeatedShuffle) { |
| 17904 | assert(RepeatedMask.size() == 4 && "Unexpected repeated mask size!"); |
| 17905 | if (V2.isUndef()) |
| 17906 | return DAG.getNode(X86ISD::PSHUFD, DL, MVT::v16i32, V1, |
| 17907 | getV4X86ShuffleImm8ForMask(RepeatedMask, DL, DAG)); |
| 17908 | |
| 17909 | |
| 17910 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v16i32, Mask, V1, V2, DAG)) |
| 17911 | return V; |
| 17912 | } |
| 17913 | |
| 17914 | |
| 17915 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v16i32, V1, V2, Mask, |
| 17916 | Zeroable, Subtarget, DAG)) |
| 17917 | return Shift; |
| 17918 | |
| 17919 | |
| 17920 | if (SDValue Rotate = lowerShuffleAsVALIGN(DL, MVT::v16i32, V1, V2, Mask, |
| 17921 | Subtarget, DAG)) |
| 17922 | return Rotate; |
| 17923 | |
| 17924 | |
| 17925 | if (Subtarget.hasBWI()) |
| 17926 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v16i32, V1, V2, Mask, |
| 17927 | Subtarget, DAG)) |
| 17928 | return Rotate; |
| 17929 | |
| 17930 | |
| 17931 | |
| 17932 | if (Is128BitLaneRepeatedShuffle && isSingleSHUFPSMask(RepeatedMask)) { |
| 17933 | SDValue CastV1 = DAG.getBitcast(MVT::v16f32, V1); |
| 17934 | SDValue CastV2 = DAG.getBitcast(MVT::v16f32, V2); |
| 17935 | SDValue ShufPS = lowerShuffleWithSHUFPS(DL, MVT::v16f32, RepeatedMask, |
| 17936 | CastV1, CastV2, DAG); |
| 17937 | return DAG.getBitcast(MVT::v16i32, ShufPS); |
| 17938 | } |
| 17939 | |
| 17940 | |
| 17941 | |
| 17942 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 17943 | DL, MVT::v16i32, V1, V2, Mask, Subtarget, DAG)) |
| 17944 | return V; |
| 17945 | |
| 17946 | |
| 17947 | if (SDValue V = lowerShuffleToEXPAND(DL, MVT::v16i32, Zeroable, Mask, V1, V2, |
| 17948 | DAG, Subtarget)) |
| 17949 | return V; |
| 17950 | |
| 17951 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v16i32, V1, V2, Mask, |
| 17952 | Zeroable, Subtarget, DAG)) |
| 17953 | return Blend; |
| 17954 | |
| 17955 | return lowerShuffleWithPERMV(DL, MVT::v16i32, Mask, V1, V2, Subtarget, DAG); |
| 17956 | } |
| 17957 | |
| 17958 | |
| 17959 | static SDValue lowerV32I16Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 17960 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 17961 | const X86Subtarget &Subtarget, |
| 17962 | SelectionDAG &DAG) { |
| 17963 | assert(V1.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); |
| 17964 | assert(V2.getSimpleValueType() == MVT::v32i16 && "Bad operand type!"); |
| 17965 | assert(Mask.size() == 32 && "Unexpected mask size for v32 shuffle!"); |
| 17966 | assert(Subtarget.hasBWI() && "We can only lower v32i16 with AVX-512-BWI!"); |
| 17967 | |
| 17968 | |
| 17969 | |
| 17970 | |
| 17971 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
| 17972 | DL, MVT::v32i16, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 17973 | return ZExt; |
| 17974 | |
| 17975 | |
| 17976 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v32i16, Mask, V1, V2, DAG)) |
| 17977 | return V; |
| 17978 | |
| 17979 | |
| 17980 | if (SDValue V = |
| 17981 | lowerShuffleWithPACK(DL, MVT::v32i16, Mask, V1, V2, DAG, Subtarget)) |
| 17982 | return V; |
| 17983 | |
| 17984 | |
| 17985 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v32i16, V1, V2, Mask, |
| 17986 | Zeroable, Subtarget, DAG)) |
| 17987 | return Shift; |
| 17988 | |
| 17989 | |
| 17990 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v32i16, V1, V2, Mask, |
| 17991 | Subtarget, DAG)) |
| 17992 | return Rotate; |
| 17993 | |
| 17994 | if (V2.isUndef()) { |
| 17995 | |
| 17996 | if (SDValue Rotate = |
| 17997 | lowerShuffleAsBitRotate(DL, MVT::v32i16, V1, Mask, Subtarget, DAG)) |
| 17998 | return Rotate; |
| 17999 | |
| 18000 | SmallVector<int, 8> RepeatedMask; |
| 18001 | if (is128BitLaneRepeatedShuffleMask(MVT::v32i16, Mask, RepeatedMask)) { |
| 18002 | |
| 18003 | |
| 18004 | |
| 18005 | return lowerV8I16GeneralSingleInputShuffle(DL, MVT::v32i16, V1, |
| 18006 | RepeatedMask, Subtarget, DAG); |
| 18007 | } |
| 18008 | } |
| 18009 | |
| 18010 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v32i16, V1, V2, Mask, |
| 18011 | Zeroable, Subtarget, DAG)) |
| 18012 | return Blend; |
| 18013 | |
| 18014 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v32i16, Mask, V1, V2, |
| 18015 | Zeroable, Subtarget, DAG)) |
| 18016 | return PSHUFB; |
| 18017 | |
| 18018 | return lowerShuffleWithPERMV(DL, MVT::v32i16, Mask, V1, V2, Subtarget, DAG); |
| 18019 | } |
| 18020 | |
| 18021 | |
| 18022 | static SDValue lowerV64I8Shuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 18023 | const APInt &Zeroable, SDValue V1, SDValue V2, |
| 18024 | const X86Subtarget &Subtarget, |
| 18025 | SelectionDAG &DAG) { |
| 18026 | assert(V1.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); |
| 18027 | assert(V2.getSimpleValueType() == MVT::v64i8 && "Bad operand type!"); |
| 18028 | assert(Mask.size() == 64 && "Unexpected mask size for v64 shuffle!"); |
| 18029 | assert(Subtarget.hasBWI() && "We can only lower v64i8 with AVX-512-BWI!"); |
| 18030 | |
| 18031 | |
| 18032 | |
| 18033 | |
| 18034 | if (SDValue ZExt = lowerShuffleAsZeroOrAnyExtend( |
| 18035 | DL, MVT::v64i8, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 18036 | return ZExt; |
| 18037 | |
| 18038 | |
| 18039 | if (SDValue V = lowerShuffleWithUNPCK(DL, MVT::v64i8, Mask, V1, V2, DAG)) |
| 18040 | return V; |
| 18041 | |
| 18042 | |
| 18043 | if (SDValue V = lowerShuffleWithPACK(DL, MVT::v64i8, Mask, V1, V2, DAG, |
| 18044 | Subtarget)) |
| 18045 | return V; |
| 18046 | |
| 18047 | |
| 18048 | if (SDValue Shift = lowerShuffleAsShift(DL, MVT::v64i8, V1, V2, Mask, |
| 18049 | Zeroable, Subtarget, DAG)) |
| 18050 | return Shift; |
| 18051 | |
| 18052 | |
| 18053 | if (SDValue Rotate = lowerShuffleAsByteRotate(DL, MVT::v64i8, V1, V2, Mask, |
| 18054 | Subtarget, DAG)) |
| 18055 | return Rotate; |
| 18056 | |
| 18057 | |
| 18058 | if (V2.isUndef()) |
| 18059 | if (SDValue Rotate = |
| 18060 | lowerShuffleAsBitRotate(DL, MVT::v64i8, V1, Mask, Subtarget, DAG)) |
| 18061 | return Rotate; |
| 18062 | |
| 18063 | |
| 18064 | if (SDValue Masked = lowerShuffleAsBitMask(DL, MVT::v64i8, V1, V2, Mask, |
| 18065 | Zeroable, Subtarget, DAG)) |
| 18066 | return Masked; |
| 18067 | |
| 18068 | if (SDValue PSHUFB = lowerShuffleWithPSHUFB(DL, MVT::v64i8, Mask, V1, V2, |
| 18069 | Zeroable, Subtarget, DAG)) |
| 18070 | return PSHUFB; |
| 18071 | |
| 18072 | |
| 18073 | if (Subtarget.hasVBMI()) |
| 18074 | return lowerShuffleWithPERMV(DL, MVT::v64i8, Mask, V1, V2, Subtarget, DAG); |
| 18075 | |
| 18076 | |
| 18077 | |
| 18078 | if (SDValue V = lowerShuffleAsRepeatedMaskAndLanePermute( |
| 18079 | DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) |
| 18080 | return V; |
| 18081 | |
| 18082 | if (SDValue Blend = lowerShuffleAsBlend(DL, MVT::v64i8, V1, V2, Mask, |
| 18083 | Zeroable, Subtarget, DAG)) |
| 18084 | return Blend; |
| 18085 | |
| 18086 | |
| 18087 | |
| 18088 | if (!V2.isUndef()) |
| 18089 | if (SDValue Result = lowerShuffleAsLanePermuteAndRepeatedMask( |
| 18090 | DL, MVT::v64i8, V1, V2, Mask, Subtarget, DAG)) |
| 18091 | return Result; |
| 18092 | |
| 18093 | |
| 18094 | return splitAndLowerShuffle(DL, MVT::v64i8, V1, V2, Mask, DAG); |
| 18095 | } |
| 18096 | |
| 18097 | |
| 18098 | |
| 18099 | |
| 18100 | |
| 18101 | |
| 18102 | static SDValue lower512BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 18103 | MVT VT, SDValue V1, SDValue V2, |
| 18104 | const APInt &Zeroable, |
| 18105 | const X86Subtarget &Subtarget, |
| 18106 | SelectionDAG &DAG) { |
| 18107 | assert(Subtarget.hasAVX512() && |
| 18108 | "Cannot lower 512-bit vectors w/ basic ISA!"); |
| 18109 | |
| 18110 | |
| 18111 | |
| 18112 | int NumElts = Mask.size(); |
| 18113 | int NumV2Elements = count_if(Mask, [NumElts](int M) { return M >= NumElts; }); |
| 18114 | |
| 18115 | if (NumV2Elements == 1 && Mask[0] >= NumElts) |
| 18116 | if (SDValue Insertion = lowerShuffleAsElementInsertion( |
| 18117 | DL, VT, V1, V2, Mask, Zeroable, Subtarget, DAG)) |
| 18118 | return Insertion; |
| 18119 | |
| 18120 | |
| 18121 | if (SDValue V = |
| 18122 | lowerShuffleWithUndefHalf(DL, VT, V1, V2, Mask, Subtarget, DAG)) |
| 18123 | return V; |
| 18124 | |
| 18125 | |
| 18126 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, Mask, |
| 18127 | Subtarget, DAG)) |
| 18128 | return Broadcast; |
| 18129 | |
| 18130 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) { |
| 18131 | |
| 18132 | |
| 18133 | if (SDValue V = lowerShuffleAsBitMask(DL, VT, V1, V2, Mask, Zeroable, |
| 18134 | Subtarget, DAG)) |
| 18135 | return V; |
| 18136 | if (SDValue V = lowerShuffleAsBitBlend(DL, VT, V1, V2, Mask, DAG)) |
| 18137 | return V; |
| 18138 | |
| 18139 | return splitAndLowerShuffle(DL, VT, V1, V2, Mask, DAG); |
| 18140 | } |
| 18141 | |
| 18142 | |
| 18143 | |
| 18144 | |
| 18145 | |
| 18146 | switch (VT.SimpleTy) { |
| 18147 | case MVT::v8f64: |
| 18148 | return lowerV8F64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 18149 | case MVT::v16f32: |
| 18150 | return lowerV16F32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 18151 | case MVT::v8i64: |
| 18152 | return lowerV8I64Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 18153 | case MVT::v16i32: |
| 18154 | return lowerV16I32Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 18155 | case MVT::v32i16: |
| 18156 | return lowerV32I16Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 18157 | case MVT::v64i8: |
| 18158 | return lowerV64I8Shuffle(DL, Mask, Zeroable, V1, V2, Subtarget, DAG); |
| 18159 | |
| 18160 | default: |
| 18161 | llvm_unreachable("Not a valid 512-bit x86 vector type!"); |
| 18162 | } |
| 18163 | } |
| 18164 | |
| 18165 | static SDValue lower1BitShuffleAsKSHIFTR(const SDLoc &DL, ArrayRef<int> Mask, |
| 18166 | MVT VT, SDValue V1, SDValue V2, |
| 18167 | const X86Subtarget &Subtarget, |
| 18168 | SelectionDAG &DAG) { |
| 18169 | |
| 18170 | if (!V2.isUndef()) |
| 18171 | return SDValue(); |
| 18172 | |
| 18173 | int ShiftAmt = -1; |
| 18174 | int NumElts = Mask.size(); |
| 18175 | for (int i = 0; i != NumElts; ++i) { |
| 18176 | int M = Mask[i]; |
| 18177 | assert((M == SM_SentinelUndef || (0 <= M && M < NumElts)) && |
| 18178 | "Unexpected mask index."); |
| 18179 | if (M < 0) |
| 18180 | continue; |
| 18181 | |
| 18182 | |
| 18183 | if (ShiftAmt < 0) { |
| 18184 | ShiftAmt = M - i; |
| 18185 | |
| 18186 | if (ShiftAmt <= 0) |
| 18187 | return SDValue(); |
| 18188 | } |
| 18189 | |
| 18190 | if (ShiftAmt != M - i) |
| 18191 | return SDValue(); |
| 18192 | } |
| 18193 | assert(ShiftAmt >= 0 && "All undef?"); |
| 18194 | |
| 18195 | |
| 18196 | MVT WideVT = VT; |
| 18197 | if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) |
| 18198 | WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
| 18199 | SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, |
| 18200 | DAG.getUNDEF(WideVT), V1, |
| 18201 | DAG.getIntPtrConstant(0, DL)); |
| 18202 | Res = DAG.getNode(X86ISD::KSHIFTR, DL, WideVT, Res, |
| 18203 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
| 18204 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
| 18205 | DAG.getIntPtrConstant(0, DL)); |
| 18206 | } |
| 18207 | |
| 18208 | |
| 18209 | |
| 18210 | |
| 18211 | static int match1BitShuffleAsKSHIFT(unsigned &Opcode, ArrayRef<int> Mask, |
| 18212 | int MaskOffset, const APInt &Zeroable) { |
| 18213 | int Size = Mask.size(); |
| 18214 | |
| 18215 | auto CheckZeros = [&](int Shift, bool Left) { |
| 18216 | for (int j = 0; j < Shift; ++j) |
| 18217 | if (!Zeroable[j + (Left ? 0 : (Size - Shift))]) |
| 18218 | return false; |
| 18219 | |
| 18220 | return true; |
| 18221 | }; |
| 18222 | |
| 18223 | auto MatchShift = [&](int Shift, bool Left) { |
| 18224 | unsigned Pos = Left ? Shift : 0; |
| 18225 | unsigned Low = Left ? 0 : Shift; |
| 18226 | unsigned Len = Size - Shift; |
| 18227 | return isSequentialOrUndefInRange(Mask, Pos, Len, Low + MaskOffset); |
| 18228 | }; |
| 18229 | |
| 18230 | for (int Shift = 1; Shift != Size; ++Shift) |
| 18231 | for (bool Left : {true, false}) |
| 18232 | if (CheckZeros(Shift, Left) && MatchShift(Shift, Left)) { |
| 18233 | Opcode = Left ? X86ISD::KSHIFTL : X86ISD::KSHIFTR; |
| 18234 | return Shift; |
| 18235 | } |
| 18236 | |
| 18237 | return -1; |
| 18238 | } |
| 18239 | |
| 18240 | |
| 18241 | |
| 18242 | |
| 18243 | |
| 18244 | |
| 18245 | static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef<int> Mask, |
| 18246 | MVT VT, SDValue V1, SDValue V2, |
| 18247 | const APInt &Zeroable, |
| 18248 | const X86Subtarget &Subtarget, |
| 18249 | SelectionDAG &DAG) { |
| 18250 | assert(Subtarget.hasAVX512() && |
| 18251 | "Cannot lower 512-bit vectors w/o basic ISA!"); |
| 18252 | |
| 18253 | int NumElts = Mask.size(); |
| 18254 | |
| 18255 | |
| 18256 | int SubvecElts = 0; |
| 18257 | int Src = -1; |
| 18258 | for (int i = 0; i != NumElts; ++i) { |
| 18259 | if (Mask[i] >= 0) { |
| 18260 | |
| 18261 | |
| 18262 | if (Src < 0) |
| 18263 | Src = Mask[i] / NumElts; |
| 18264 | if (Src != (Mask[i] / NumElts) || (Mask[i] % NumElts) != i) |
| 18265 | break; |
| 18266 | } |
| 18267 | |
| 18268 | ++SubvecElts; |
| 18269 | } |
| 18270 | assert(SubvecElts != NumElts && "Identity shuffle?"); |
| 18271 | |
| 18272 | |
| 18273 | SubvecElts = PowerOf2Floor(SubvecElts); |
| 18274 | |
| 18275 | |
| 18276 | |
| 18277 | if ((int)Zeroable.countLeadingOnes() >= (NumElts - SubvecElts)) { |
| 18278 | assert(Src >= 0 && "Expected a source!"); |
| 18279 | MVT ExtractVT = MVT::getVectorVT(MVT::i1, SubvecElts); |
| 18280 | SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtractVT, |
| 18281 | Src == 0 ? V1 : V2, |
| 18282 | DAG.getIntPtrConstant(0, DL)); |
| 18283 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
| 18284 | DAG.getConstant(0, DL, VT), |
| 18285 | Extract, DAG.getIntPtrConstant(0, DL)); |
| 18286 | } |
| 18287 | |
| 18288 | |
| 18289 | if (SDValue Shift = lower1BitShuffleAsKSHIFTR(DL, Mask, VT, V1, V2, Subtarget, |
| 18290 | DAG)) |
| 18291 | return Shift; |
| 18292 | |
| 18293 | |
| 18294 | unsigned Offset = 0; |
| 18295 | for (SDValue V : { V1, V2 }) { |
| 18296 | unsigned Opcode; |
| 18297 | int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); |
| 18298 | if (ShiftAmt >= 0) { |
| 18299 | MVT WideVT = VT; |
| 18300 | if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) |
| 18301 | WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
| 18302 | SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, |
| 18303 | DAG.getUNDEF(WideVT), V, |
| 18304 | DAG.getIntPtrConstant(0, DL)); |
| 18305 | |
| 18306 | if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { |
| 18307 | int WideElts = WideVT.getVectorNumElements(); |
| 18308 | |
| 18309 | Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, |
| 18310 | DAG.getTargetConstant(WideElts - NumElts, DL, MVT::i8)); |
| 18311 | |
| 18312 | ShiftAmt += WideElts - NumElts; |
| 18313 | } |
| 18314 | |
| 18315 | Res = DAG.getNode(Opcode, DL, WideVT, Res, |
| 18316 | DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
| 18317 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
| 18318 | DAG.getIntPtrConstant(0, DL)); |
| 18319 | } |
| 18320 | Offset += NumElts; |
| 18321 | } |
| 18322 | |
| 18323 | |
| 18324 | |
| 18325 | MVT ExtVT; |
| 18326 | switch (VT.SimpleTy) { |
| 18327 | default: |
| 18328 | llvm_unreachable("Expected a vector of i1 elements"); |
| 18329 | case MVT::v2i1: |
| 18330 | ExtVT = MVT::v2i64; |
| 18331 | break; |
| 18332 | case MVT::v4i1: |
| 18333 | ExtVT = MVT::v4i32; |
| 18334 | break; |
| 18335 | case MVT::v8i1: |
| 18336 | |
| 18337 | |
| 18338 | ExtVT = Subtarget.hasVLX() ? MVT::v8i32 : MVT::v8i64; |
| 18339 | break; |
| 18340 | case MVT::v16i1: |
| 18341 | |
| 18342 | |
| 18343 | ExtVT = Subtarget.canExtendTo512DQ() ? MVT::v16i32 : MVT::v16i16; |
| 18344 | break; |
| 18345 | case MVT::v32i1: |
| 18346 | |
| 18347 | |
| 18348 | assert(Subtarget.hasBWI() && "Expected AVX512BW support"); |
| 18349 | ExtVT = Subtarget.canExtendTo512BW() ? MVT::v32i16 : MVT::v32i8; |
| 18350 | break; |
| 18351 | case MVT::v64i1: |
| 18352 | |
| 18353 | |
| 18354 | if (!Subtarget.useBWIRegs()) |
| 18355 | return SDValue(); |
| 18356 | ExtVT = MVT::v64i8; |
| 18357 | break; |
| 18358 | } |
| 18359 | |
| 18360 | V1 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V1); |
| 18361 | V2 = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, V2); |
| 18362 | |
| 18363 | SDValue Shuffle = DAG.getVectorShuffle(ExtVT, DL, V1, V2, Mask); |
| 18364 | |
| 18365 | int NumElems = VT.getVectorNumElements(); |
| 18366 | if ((Subtarget.hasBWI() && (NumElems >= 32)) || |
| 18367 | (Subtarget.hasDQI() && (NumElems < 32))) |
| 18368 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, ExtVT), |
| 18369 | Shuffle, ISD::SETGT); |
| 18370 | |
| 18371 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Shuffle); |
| 18372 | } |
| 18373 | |
| 18374 | |
| 18375 | |
| 18376 | static bool canonicalizeShuffleMaskWithCommute(ArrayRef<int> Mask) { |
| 18377 | int NumElements = Mask.size(); |
| 18378 | |
| 18379 | int NumV1Elements = 0, NumV2Elements = 0; |
| 18380 | for (int M : Mask) |
| 18381 | if (M < 0) |
| 18382 | continue; |
| 18383 | else if (M < NumElements) |
| 18384 | ++NumV1Elements; |
| 18385 | else |
| 18386 | ++NumV2Elements; |
| 18387 | |
| 18388 | |
| 18389 | |
| 18390 | |
| 18391 | if (NumV2Elements > NumV1Elements) |
| 18392 | return true; |
| 18393 | |
| 18394 | assert(NumV1Elements > 0 && "No V1 indices"); |
| 18395 | |
| 18396 | if (NumV2Elements == 0) |
| 18397 | return false; |
| 18398 | |
| 18399 | |
| 18400 | |
| 18401 | |
| 18402 | |
| 18403 | |
| 18404 | if (NumV1Elements == NumV2Elements) { |
| 18405 | int LowV1Elements = 0, LowV2Elements = 0; |
| 18406 | for (int M : Mask.slice(0, NumElements / 2)) |
| 18407 | if (M >= NumElements) |
| 18408 | ++LowV2Elements; |
| 18409 | else if (M >= 0) |
| 18410 | ++LowV1Elements; |
| 18411 | if (LowV2Elements > LowV1Elements) |
| 18412 | return true; |
| 18413 | if (LowV2Elements == LowV1Elements) { |
| 18414 | int SumV1Indices = 0, SumV2Indices = 0; |
| 18415 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
| 18416 | if (Mask[i] >= NumElements) |
| 18417 | SumV2Indices += i; |
| 18418 | else if (Mask[i] >= 0) |
| 18419 | SumV1Indices += i; |
| 18420 | if (SumV2Indices < SumV1Indices) |
| 18421 | return true; |
| 18422 | if (SumV2Indices == SumV1Indices) { |
| 18423 | int NumV1OddIndices = 0, NumV2OddIndices = 0; |
| 18424 | for (int i = 0, Size = Mask.size(); i < Size; ++i) |
| 18425 | if (Mask[i] >= NumElements) |
| 18426 | NumV2OddIndices += i % 2; |
| 18427 | else if (Mask[i] >= 0) |
| 18428 | NumV1OddIndices += i % 2; |
| 18429 | if (NumV2OddIndices < NumV1OddIndices) |
| 18430 | return true; |
| 18431 | } |
| 18432 | } |
| 18433 | } |
| 18434 | |
| 18435 | return false; |
| 18436 | } |
| 18437 | |
| 18438 | |
| 18439 | |
| 18440 | |
| 18441 | |
| 18442 | |
| 18443 | |
| 18444 | |
| 18445 | static SDValue lowerVECTOR_SHUFFLE(SDValue Op, const X86Subtarget &Subtarget, |
| 18446 | SelectionDAG &DAG) { |
| 18447 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op); |
| 18448 | ArrayRef<int> OrigMask = SVOp->getMask(); |
| 18449 | SDValue V1 = Op.getOperand(0); |
| 18450 | SDValue V2 = Op.getOperand(1); |
| 18451 | MVT VT = Op.getSimpleValueType(); |
| 18452 | int NumElements = VT.getVectorNumElements(); |
| 18453 | SDLoc DL(Op); |
| 18454 | bool Is1BitVector = (VT.getVectorElementType() == MVT::i1); |
| 18455 | |
| 18456 | assert((VT.getSizeInBits() != 64 || Is1BitVector) && |
| 18457 | "Can't lower MMX shuffles"); |
| 18458 | |
| 18459 | bool V1IsUndef = V1.isUndef(); |
| 18460 | bool V2IsUndef = V2.isUndef(); |
| 18461 | if (V1IsUndef && V2IsUndef) |
| 18462 | return DAG.getUNDEF(VT); |
| 18463 | |
| 18464 | |
| 18465 | |
| 18466 | |
| 18467 | if (V1IsUndef) |
| 18468 | return DAG.getCommutedVectorShuffle(*SVOp); |
| 18469 | |
| 18470 | |
| 18471 | |
| 18472 | |
| 18473 | if (V2IsUndef && |
| 18474 | any_of(OrigMask, [NumElements](int M) { return M >= NumElements; })) { |
| 18475 | SmallVector<int, 8> NewMask(OrigMask.begin(), OrigMask.end()); |
| 18476 | for (int &M : NewMask) |
| 18477 | if (M >= NumElements) |
| 18478 | M = -1; |
| 18479 | return DAG.getVectorShuffle(VT, DL, V1, V2, NewMask); |
| 18480 | } |
| 18481 | |
| 18482 | |
| 18483 | int MaskUpperLimit = OrigMask.size() * (V2IsUndef ? 1 : 2); |
| 18484 | (void)MaskUpperLimit; |
| 18485 | assert(llvm::all_of(OrigMask, |
| 18486 | [&](int M) { return -1 <= M && M < MaskUpperLimit; }) && |
| 18487 | "Out of bounds shuffle index"); |
| 18488 | |
| 18489 | |
| 18490 | |
| 18491 | |
| 18492 | APInt KnownUndef, KnownZero; |
| 18493 | computeZeroableShuffleElements(OrigMask, V1, V2, KnownUndef, KnownZero); |
| 18494 | |
| 18495 | APInt Zeroable = KnownUndef | KnownZero; |
| 18496 | if (Zeroable.isAllOnesValue()) |
| 18497 | return getZeroVector(VT, Subtarget, DAG, DL); |
| 18498 | |
| 18499 | bool V2IsZero = !V2IsUndef && ISD::isBuildVectorAllZeros(V2.getNode()); |
| 18500 | |
| 18501 | |
| 18502 | |
| 18503 | |
| 18504 | |
| 18505 | SmallVector<int, 16> WidenedMask; |
| 18506 | if (VT.getScalarSizeInBits() < 64 && !Is1BitVector && |
| 18507 | canWidenShuffleElements(OrigMask, Zeroable, V2IsZero, WidenedMask)) { |
| 18508 | |
| 18509 | |
| 18510 | |
| 18511 | |
| 18512 | if (SDValue Broadcast = lowerShuffleAsBroadcast(DL, VT, V1, V2, OrigMask, |
| 18513 | Subtarget, DAG)) |
| 18514 | return Broadcast; |
| 18515 | |
| 18516 | MVT NewEltVT = VT.isFloatingPoint() |
| 18517 | ? MVT::getFloatingPointVT(VT.getScalarSizeInBits() * 2) |
| 18518 | : MVT::getIntegerVT(VT.getScalarSizeInBits() * 2); |
| 18519 | int NewNumElts = NumElements / 2; |
| 18520 | MVT NewVT = MVT::getVectorVT(NewEltVT, NewNumElts); |
| 18521 | |
| 18522 | |
| 18523 | if (DAG.getTargetLoweringInfo().isTypeLegal(NewVT)) { |
| 18524 | if (V2IsZero) { |
| 18525 | |
| 18526 | |
| 18527 | bool UsedZeroVector = false; |
| 18528 | assert(is_contained(WidenedMask, SM_SentinelZero) && |
| 18529 | "V2's non-undef elements are used?!"); |
| 18530 | for (int i = 0; i != NewNumElts; ++i) |
| 18531 | if (WidenedMask[i] == SM_SentinelZero) { |
| 18532 | WidenedMask[i] = i + NewNumElts; |
| 18533 | UsedZeroVector = true; |
| 18534 | } |
| 18535 | |
| 18536 | |
| 18537 | if (UsedZeroVector) |
| 18538 | V2 = getZeroVector(NewVT, Subtarget, DAG, DL); |
| 18539 | } |
| 18540 | V1 = DAG.getBitcast(NewVT, V1); |
| 18541 | V2 = DAG.getBitcast(NewVT, V2); |
| 18542 | return DAG.getBitcast( |
| 18543 | VT, DAG.getVectorShuffle(NewVT, DL, V1, V2, WidenedMask)); |
| 18544 | } |
| 18545 | } |
| 18546 | |
| 18547 | |
| 18548 | SmallVector<int, 64> Mask(OrigMask.begin(), OrigMask.end()); |
| 18549 | if (canonicalizeShuffleMaskWithCommute(Mask)) { |
| 18550 | ShuffleVectorSDNode::commuteMask(Mask); |
| 18551 | std::swap(V1, V2); |
| 18552 | } |
| 18553 | |
| 18554 | |
| 18555 | if (VT.is128BitVector()) |
| 18556 | return lower128BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
| 18557 | |
| 18558 | if (VT.is256BitVector()) |
| 18559 | return lower256BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
| 18560 | |
| 18561 | if (VT.is512BitVector()) |
| 18562 | return lower512BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
| 18563 | |
| 18564 | if (Is1BitVector) |
| 18565 | return lower1BitShuffle(DL, Mask, VT, V1, V2, Zeroable, Subtarget, DAG); |
| 18566 | |
| 18567 | llvm_unreachable("Unimplemented!"); |
| 18568 | } |
| 18569 | |
| 18570 | |
| 18571 | static SDValue lowerVSELECTtoVectorShuffle(SDValue Op, |
| 18572 | const X86Subtarget &Subtarget, |
| 18573 | SelectionDAG &DAG) { |
| 18574 | SDValue Cond = Op.getOperand(0); |
| 18575 | SDValue LHS = Op.getOperand(1); |
| 18576 | SDValue RHS = Op.getOperand(2); |
| 18577 | MVT VT = Op.getSimpleValueType(); |
| 18578 | |
| 18579 | |
| 18580 | |
| 18581 | if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) { |
| 18582 | SmallVector<int, 32> Mask; |
| 18583 | if (createShuffleMaskFromVSELECT(Mask, Cond)) |
| 18584 | return DAG.getVectorShuffle(VT, SDLoc(Op), LHS, RHS, Mask); |
| 18585 | } |
| 18586 | |
| 18587 | return SDValue(); |
| 18588 | } |
| 18589 | |
| 18590 | SDValue X86TargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const { |
| 18591 | SDValue Cond = Op.getOperand(0); |
| 18592 | SDValue LHS = Op.getOperand(1); |
| 18593 | SDValue RHS = Op.getOperand(2); |
| 18594 | |
| 18595 | |
| 18596 | |
| 18597 | if (ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()) && |
| 18598 | ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()) && |
| 18599 | ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) |
| 18600 | return SDValue(); |
| 18601 | |
| 18602 | |
| 18603 | |
| 18604 | if (SDValue BlendOp = lowerVSELECTtoVectorShuffle(Op, Subtarget, DAG)) |
| 18605 | return BlendOp; |
| 18606 | |
| 18607 | |
| 18608 | |
| 18609 | MVT CondVT = Cond.getSimpleValueType(); |
| 18610 | unsigned CondEltSize = Cond.getScalarValueSizeInBits(); |
| 18611 | if (CondEltSize == 1) |
| 18612 | return Op; |
| 18613 | |
| 18614 | |
| 18615 | if (!Subtarget.hasSSE41()) |
| 18616 | return SDValue(); |
| 18617 | |
| 18618 | SDLoc dl(Op); |
| 18619 | MVT VT = Op.getSimpleValueType(); |
| 18620 | unsigned EltSize = VT.getScalarSizeInBits(); |
| 18621 | unsigned NumElts = VT.getVectorNumElements(); |
| 18622 | |
| 18623 | |
| 18624 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
| 18625 | return SDValue(); |
| 18626 | |
| 18627 | |
| 18628 | |
| 18629 | |
| 18630 | if (VT.getSizeInBits() == 512) { |
| 18631 | |
| 18632 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
| 18633 | SDValue Mask = DAG.getSetCC(dl, MaskVT, Cond, |
| 18634 | DAG.getConstant(0, dl, CondVT), |
| 18635 | ISD::SETNE); |
| 18636 | |
| 18637 | return DAG.getSelect(dl, VT, Mask, LHS, RHS); |
| 18638 | } |
| 18639 | |
| 18640 | |
| 18641 | if (CondEltSize != EltSize) { |
| 18642 | |
| 18643 | if (CondEltSize != DAG.ComputeNumSignBits(Cond)) |
| 18644 | return SDValue(); |
| 18645 | |
| 18646 | MVT NewCondSVT = MVT::getIntegerVT(EltSize); |
| 18647 | MVT NewCondVT = MVT::getVectorVT(NewCondSVT, NumElts); |
| 18648 | Cond = DAG.getSExtOrTrunc(Cond, dl, NewCondVT); |
| 18649 | return DAG.getNode(ISD::VSELECT, dl, VT, Cond, LHS, RHS); |
| 18650 | } |
| 18651 | |
| 18652 | |
| 18653 | |
| 18654 | |
| 18655 | switch (VT.SimpleTy) { |
| 18656 | default: |
| 18657 | |
| 18658 | return Op; |
| 18659 | |
| 18660 | case MVT::v32i8: |
| 18661 | |
| 18662 | if (Subtarget.hasAVX2()) |
| 18663 | return Op; |
| 18664 | |
| 18665 | return SDValue(); |
| 18666 | |
| 18667 | case MVT::v8i16: |
| 18668 | case MVT::v16i16: { |
| 18669 | |
| 18670 | MVT CastVT = MVT::getVectorVT(MVT::i8, NumElts * 2); |
| 18671 | Cond = DAG.getBitcast(CastVT, Cond); |
| 18672 | LHS = DAG.getBitcast(CastVT, LHS); |
| 18673 | RHS = DAG.getBitcast(CastVT, RHS); |
| 18674 | SDValue Select = DAG.getNode(ISD::VSELECT, dl, CastVT, Cond, LHS, RHS); |
| 18675 | return DAG.getBitcast(VT, Select); |
| 18676 | } |
| 18677 | } |
| 18678 | } |
| 18679 | |
| 18680 | static SDValue LowerEXTRACT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG) { |
| 18681 | MVT VT = Op.getSimpleValueType(); |
| 18682 | SDValue Vec = Op.getOperand(0); |
| 18683 | SDValue Idx = Op.getOperand(1); |
| 18684 | assert(isa<ConstantSDNode>(Idx) && "Constant index expected"); |
| 18685 | SDLoc dl(Op); |
| 18686 | |
| 18687 | if (!Vec.getSimpleValueType().is128BitVector()) |
| 18688 | return SDValue(); |
| 18689 | |
| 18690 | if (VT.getSizeInBits() == 8) { |
| 18691 | |
| 18692 | |
| 18693 | if (llvm::isNullConstant(Idx) && !MayFoldIntoZeroExtend(Op) && |
| 18694 | !MayFoldIntoStore(Op)) |
| 18695 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, |
| 18696 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
| 18697 | DAG.getBitcast(MVT::v4i32, Vec), Idx)); |
| 18698 | |
| 18699 | unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue(); |
| 18700 | SDValue Extract = DAG.getNode(X86ISD::PEXTRB, dl, MVT::i32, Vec, |
| 18701 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
| 18702 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); |
| 18703 | } |
| 18704 | |
| 18705 | if (VT == MVT::f32) { |
| 18706 | |
| 18707 | |
| 18708 | |
| 18709 | |
| 18710 | |
| 18711 | if (!Op.hasOneUse()) |
| 18712 | return SDValue(); |
| 18713 | SDNode *User = *Op.getNode()->use_begin(); |
| 18714 | if ((User->getOpcode() != ISD::STORE || isNullConstant(Idx)) && |
| 18715 | (User->getOpcode() != ISD::BITCAST || |
| 18716 | User->getValueType(0) != MVT::i32)) |
| 18717 | return SDValue(); |
| 18718 | SDValue Extract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
| 18719 | DAG.getBitcast(MVT::v4i32, Vec), Idx); |
| 18720 | return DAG.getBitcast(MVT::f32, Extract); |
| 18721 | } |
| 18722 | |
| 18723 | if (VT == MVT::i32 || VT == MVT::i64) |
| 18724 | return Op; |
| 18725 | |
| 18726 | return SDValue(); |
| 18727 | } |
| 18728 | |
| 18729 | |
| 18730 | |
| 18731 | static SDValue ExtractBitFromMaskVector(SDValue Op, SelectionDAG &DAG, |
| 18732 | const X86Subtarget &Subtarget) { |
| 18733 | SDValue Vec = Op.getOperand(0); |
| 18734 | SDLoc dl(Vec); |
| 18735 | MVT VecVT = Vec.getSimpleValueType(); |
| 18736 | SDValue Idx = Op.getOperand(1); |
| 18737 | auto* IdxC = dyn_cast<ConstantSDNode>(Idx); |
| 18738 | MVT EltVT = Op.getSimpleValueType(); |
| 18739 | |
| 18740 | assert((VecVT.getVectorNumElements() <= 16 || Subtarget.hasBWI()) && |
| 18741 | "Unexpected vector type in ExtractBitFromMaskVector"); |
| 18742 | |
| 18743 | |
| 18744 | |
| 18745 | if (!IdxC) { |
| 18746 | unsigned NumElts = VecVT.getVectorNumElements(); |
| 18747 | |
| 18748 | |
| 18749 | MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; |
| 18750 | MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); |
| 18751 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec); |
| 18752 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ExtEltVT, Ext, Idx); |
| 18753 | return DAG.getNode(ISD::TRUNCATE, dl, EltVT, Elt); |
| 18754 | } |
| 18755 | |
| 18756 | unsigned IdxVal = IdxC->getZExtValue(); |
| 18757 | if (IdxVal == 0) |
| 18758 | return Op; |
| 18759 | |
| 18760 | |
| 18761 | unsigned NumElems = VecVT.getVectorNumElements(); |
| 18762 | MVT WideVecVT = VecVT; |
| 18763 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { |
| 18764 | WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
| 18765 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, |
| 18766 | DAG.getUNDEF(WideVecVT), Vec, |
| 18767 | DAG.getIntPtrConstant(0, dl)); |
| 18768 | } |
| 18769 | |
| 18770 | |
| 18771 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, |
| 18772 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
| 18773 | |
| 18774 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, |
| 18775 | DAG.getIntPtrConstant(0, dl)); |
| 18776 | } |
| 18777 | |
| 18778 | SDValue |
| 18779 | X86TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op, |
| 18780 | SelectionDAG &DAG) const { |
| 18781 | SDLoc dl(Op); |
| 18782 | SDValue Vec = Op.getOperand(0); |
| 18783 | MVT VecVT = Vec.getSimpleValueType(); |
| 18784 | SDValue Idx = Op.getOperand(1); |
| 18785 | auto* IdxC = dyn_cast<ConstantSDNode>(Idx); |
| 18786 | |
| 18787 | if (VecVT.getVectorElementType() == MVT::i1) |
| 18788 | return ExtractBitFromMaskVector(Op, DAG, Subtarget); |
| 18789 | |
| 18790 | if (!IdxC) { |
| 18791 | |
| 18792 | |
| 18793 | |
| 18794 | |
| 18795 | |
| 18796 | |
| 18797 | |
| 18798 | |
| 18799 | |
| 18800 | |
| 18801 | |
| 18802 | |
| 18803 | |
| 18804 | |
| 18805 | |
| 18806 | |
| 18807 | |
| 18808 | |
| 18809 | |
| 18810 | |
| 18811 | |
| 18812 | |
| 18813 | |
| 18814 | |
| 18815 | |
| 18816 | |
| 18817 | |
| 18818 | |
| 18819 | |
| 18820 | |
| 18821 | return SDValue(); |
| 18822 | } |
| 18823 | |
| 18824 | unsigned IdxVal = IdxC->getZExtValue(); |
| 18825 | |
| 18826 | |
| 18827 | |
| 18828 | if (VecVT.is256BitVector() || VecVT.is512BitVector()) { |
| 18829 | |
| 18830 | Vec = extract128BitVector(Vec, IdxVal, DAG, dl); |
| 18831 | MVT EltVT = VecVT.getVectorElementType(); |
| 18832 | |
| 18833 | unsigned ElemsPerChunk = 128 / EltVT.getSizeInBits(); |
| 18834 | assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2"); |
| 18835 | |
| 18836 | |
| 18837 | |
| 18838 | IdxVal &= ElemsPerChunk - 1; |
| 18839 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, Op.getValueType(), Vec, |
| 18840 | DAG.getIntPtrConstant(IdxVal, dl)); |
| 18841 | } |
| 18842 | |
| 18843 | assert(VecVT.is128BitVector() && "Unexpected vector length"); |
| 18844 | |
| 18845 | MVT VT = Op.getSimpleValueType(); |
| 18846 | |
| 18847 | if (VT.getSizeInBits() == 16) { |
| 18848 | |
| 18849 | |
| 18850 | if (IdxVal == 0 && !MayFoldIntoZeroExtend(Op) && |
| 18851 | !(Subtarget.hasSSE41() && MayFoldIntoStore(Op))) |
| 18852 | return DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, |
| 18853 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
| 18854 | DAG.getBitcast(MVT::v4i32, Vec), Idx)); |
| 18855 | |
| 18856 | SDValue Extract = DAG.getNode(X86ISD::PEXTRW, dl, MVT::i32, Vec, |
| 18857 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
| 18858 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Extract); |
| 18859 | } |
| 18860 | |
| 18861 | if (Subtarget.hasSSE41()) |
| 18862 | if (SDValue Res = LowerEXTRACT_VECTOR_ELT_SSE4(Op, DAG)) |
| 18863 | return Res; |
| 18864 | |
| 18865 | |
| 18866 | |
| 18867 | |
| 18868 | if (VT.getSizeInBits() == 8 && Op->isOnlyUserOf(Vec.getNode())) { |
| 18869 | |
| 18870 | int DWordIdx = IdxVal / 4; |
| 18871 | if (DWordIdx == 0) { |
| 18872 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, |
| 18873 | DAG.getBitcast(MVT::v4i32, Vec), |
| 18874 | DAG.getIntPtrConstant(DWordIdx, dl)); |
| 18875 | int ShiftVal = (IdxVal % 4) * 8; |
| 18876 | if (ShiftVal != 0) |
| 18877 | Res = DAG.getNode(ISD::SRL, dl, MVT::i32, Res, |
| 18878 | DAG.getConstant(ShiftVal, dl, MVT::i8)); |
| 18879 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 18880 | } |
| 18881 | |
| 18882 | int WordIdx = IdxVal / 2; |
| 18883 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, |
| 18884 | DAG.getBitcast(MVT::v8i16, Vec), |
| 18885 | DAG.getIntPtrConstant(WordIdx, dl)); |
| 18886 | int ShiftVal = (IdxVal % 2) * 8; |
| 18887 | if (ShiftVal != 0) |
| 18888 | Res = DAG.getNode(ISD::SRL, dl, MVT::i16, Res, |
| 18889 | DAG.getConstant(ShiftVal, dl, MVT::i8)); |
| 18890 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 18891 | } |
| 18892 | |
| 18893 | if (VT.getSizeInBits() == 32) { |
| 18894 | if (IdxVal == 0) |
| 18895 | return Op; |
| 18896 | |
| 18897 | |
| 18898 | int Mask[4] = { static_cast<int>(IdxVal), -1, -1, -1 }; |
| 18899 | Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); |
| 18900 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, |
| 18901 | DAG.getIntPtrConstant(0, dl)); |
| 18902 | } |
| 18903 | |
| 18904 | if (VT.getSizeInBits() == 64) { |
| 18905 | |
| 18906 | |
| 18907 | |
| 18908 | if (IdxVal == 0) |
| 18909 | return Op; |
| 18910 | |
| 18911 | |
| 18912 | |
| 18913 | |
| 18914 | int Mask[2] = { 1, -1 }; |
| 18915 | Vec = DAG.getVectorShuffle(VecVT, dl, Vec, DAG.getUNDEF(VecVT), Mask); |
| 18916 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Vec, |
| 18917 | DAG.getIntPtrConstant(0, dl)); |
| 18918 | } |
| 18919 | |
| 18920 | return SDValue(); |
| 18921 | } |
| 18922 | |
| 18923 | |
| 18924 | |
| 18925 | static SDValue InsertBitToMaskVector(SDValue Op, SelectionDAG &DAG, |
| 18926 | const X86Subtarget &Subtarget) { |
| 18927 | SDLoc dl(Op); |
| 18928 | SDValue Vec = Op.getOperand(0); |
| 18929 | SDValue Elt = Op.getOperand(1); |
| 18930 | SDValue Idx = Op.getOperand(2); |
| 18931 | MVT VecVT = Vec.getSimpleValueType(); |
| 18932 | |
| 18933 | if (!isa<ConstantSDNode>(Idx)) { |
| 18934 | |
| 18935 | |
| 18936 | unsigned NumElts = VecVT.getVectorNumElements(); |
| 18937 | MVT ExtEltVT = (NumElts <= 8) ? MVT::getIntegerVT(128 / NumElts) : MVT::i8; |
| 18938 | MVT ExtVecVT = MVT::getVectorVT(ExtEltVT, NumElts); |
| 18939 | SDValue ExtOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, ExtVecVT, |
| 18940 | DAG.getNode(ISD::SIGN_EXTEND, dl, ExtVecVT, Vec), |
| 18941 | DAG.getNode(ISD::SIGN_EXTEND, dl, ExtEltVT, Elt), Idx); |
| 18942 | return DAG.getNode(ISD::TRUNCATE, dl, VecVT, ExtOp); |
| 18943 | } |
| 18944 | |
| 18945 | |
| 18946 | SDValue EltInVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v1i1, Elt); |
| 18947 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VecVT, Vec, EltInVec, Idx); |
| 18948 | } |
| 18949 | |
| 18950 | SDValue X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, |
| 18951 | SelectionDAG &DAG) const { |
| 18952 | MVT VT = Op.getSimpleValueType(); |
| 18953 | MVT EltVT = VT.getVectorElementType(); |
| 18954 | unsigned NumElts = VT.getVectorNumElements(); |
| 18955 | unsigned EltSizeInBits = EltVT.getScalarSizeInBits(); |
| 18956 | |
| 18957 | if (EltVT == MVT::i1) |
| 18958 | return InsertBitToMaskVector(Op, DAG, Subtarget); |
| 18959 | |
| 18960 | SDLoc dl(Op); |
| 18961 | SDValue N0 = Op.getOperand(0); |
| 18962 | SDValue N1 = Op.getOperand(1); |
| 18963 | SDValue N2 = Op.getOperand(2); |
| 18964 | auto *N2C = dyn_cast<ConstantSDNode>(N2); |
| 18965 | |
| 18966 | if (!N2C) { |
| 18967 | |
| 18968 | |
| 18969 | |
| 18970 | if (!(Subtarget.hasBWI() || |
| 18971 | (Subtarget.hasAVX512() && EltSizeInBits >= 32) || |
| 18972 | (Subtarget.hasSSE41() && VT.isFloatingPoint()))) |
| 18973 | return SDValue(); |
| 18974 | |
| 18975 | MVT IdxSVT = MVT::getIntegerVT(EltSizeInBits); |
| 18976 | MVT IdxVT = MVT::getVectorVT(IdxSVT, NumElts); |
| 18977 | if (!isTypeLegal(IdxSVT) || !isTypeLegal(IdxVT)) |
| 18978 | return SDValue(); |
| 18979 | |
| 18980 | SDValue IdxExt = DAG.getZExtOrTrunc(N2, dl, IdxSVT); |
| 18981 | SDValue IdxSplat = DAG.getSplatBuildVector(IdxVT, dl, IdxExt); |
| 18982 | SDValue EltSplat = DAG.getSplatBuildVector(VT, dl, N1); |
| 18983 | |
| 18984 | SmallVector<SDValue, 16> RawIndices; |
| 18985 | for (unsigned I = 0; I != NumElts; ++I) |
| 18986 | RawIndices.push_back(DAG.getConstant(I, dl, IdxSVT)); |
| 18987 | SDValue Indices = DAG.getBuildVector(IdxVT, dl, RawIndices); |
| 18988 | |
| 18989 | |
| 18990 | return DAG.getSelectCC(dl, IdxSplat, Indices, EltSplat, N0, |
| 18991 | ISD::CondCode::SETEQ); |
| 18992 | } |
| 18993 | |
| 18994 | if (N2C->getAPIntValue().uge(NumElts)) |
| 18995 | return SDValue(); |
| 18996 | uint64_t IdxVal = N2C->getZExtValue(); |
| 18997 | |
| 18998 | bool IsZeroElt = X86::isZeroNode(N1); |
| 18999 | bool IsAllOnesElt = VT.isInteger() && llvm::isAllOnesConstant(N1); |
| 19000 | |
| 19001 | |
| 19002 | |
| 19003 | |
| 19004 | if ((IsZeroElt || IsAllOnesElt) && Subtarget.hasSSE41() && |
| 19005 | (16 <= EltSizeInBits || (IsZeroElt && !VT.is128BitVector()))) { |
| 19006 | SmallVector<int, 8> BlendMask; |
| 19007 | for (unsigned i = 0; i != NumElts; ++i) |
| 19008 | BlendMask.push_back(i == IdxVal ? i + NumElts : i); |
| 19009 | SDValue CstVector = IsZeroElt ? getZeroVector(VT, Subtarget, DAG, dl) |
| 19010 | : getOnesVector(VT, DAG, dl); |
| 19011 | return DAG.getVectorShuffle(VT, dl, N0, CstVector, BlendMask); |
| 19012 | } |
| 19013 | |
| 19014 | |
| 19015 | |
| 19016 | if (VT.is256BitVector() || VT.is512BitVector()) { |
| 19017 | |
| 19018 | |
| 19019 | if (VT.is256BitVector() && IdxVal == 0) { |
| 19020 | |
| 19021 | |
| 19022 | |
| 19023 | if ((Subtarget.hasAVX() && (EltVT == MVT::f64 || EltVT == MVT::f32)) || |
| 19024 | (Subtarget.hasAVX2() && EltVT == MVT::i32)) { |
| 19025 | SDValue N1Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); |
| 19026 | return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1Vec, |
| 19027 | DAG.getTargetConstant(1, dl, MVT::i8)); |
| 19028 | } |
| 19029 | } |
| 19030 | |
| 19031 | |
| 19032 | SDValue V = extract128BitVector(N0, IdxVal, DAG, dl); |
| 19033 | |
| 19034 | |
| 19035 | unsigned NumEltsIn128 = 128 / EltSizeInBits; |
| 19036 | assert(isPowerOf2_32(NumEltsIn128)); |
| 19037 | |
| 19038 | unsigned IdxIn128 = IdxVal & (NumEltsIn128 - 1); |
| 19039 | |
| 19040 | V = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, V.getValueType(), V, N1, |
| 19041 | DAG.getIntPtrConstant(IdxIn128, dl)); |
| 19042 | |
| 19043 | |
| 19044 | return insert128BitVector(N0, V, IdxVal, DAG, dl); |
| 19045 | } |
| 19046 | assert(VT.is128BitVector() && "Only 128-bit vector types should be left!"); |
| 19047 | |
| 19048 | |
| 19049 | if (IdxVal == 0 && ISD::isBuildVectorAllZeros(N0.getNode())) { |
| 19050 | if (EltVT == MVT::i32 || EltVT == MVT::f32 || EltVT == MVT::f64 || |
| 19051 | EltVT == MVT::i64) { |
| 19052 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, N1); |
| 19053 | return getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG); |
| 19054 | } |
| 19055 | |
| 19056 | |
| 19057 | |
| 19058 | if (EltVT == MVT::i16 || EltVT == MVT::i8) { |
| 19059 | N1 = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, N1); |
| 19060 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
| 19061 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, ShufVT, N1); |
| 19062 | N1 = getShuffleVectorZeroOrUndef(N1, 0, true, Subtarget, DAG); |
| 19063 | return DAG.getBitcast(VT, N1); |
| 19064 | } |
| 19065 | } |
| 19066 | |
| 19067 | |
| 19068 | |
| 19069 | if (VT == MVT::v8i16 || (VT == MVT::v16i8 && Subtarget.hasSSE41())) { |
| 19070 | unsigned Opc; |
| 19071 | if (VT == MVT::v8i16) { |
| 19072 | assert(Subtarget.hasSSE2() && "SSE2 required for PINSRW"); |
| 19073 | Opc = X86ISD::PINSRW; |
| 19074 | } else { |
| 19075 | assert(VT == MVT::v16i8 && "PINSRB requires v16i8 vector"); |
| 19076 | assert(Subtarget.hasSSE41() && "SSE41 required for PINSRB"); |
| 19077 | Opc = X86ISD::PINSRB; |
| 19078 | } |
| 19079 | |
| 19080 | assert(N1.getValueType() != MVT::i32 && "Unexpected VT"); |
| 19081 | N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1); |
| 19082 | N2 = DAG.getTargetConstant(IdxVal, dl, MVT::i8); |
| 19083 | return DAG.getNode(Opc, dl, VT, N0, N1, N2); |
| 19084 | } |
| 19085 | |
| 19086 | if (Subtarget.hasSSE41()) { |
| 19087 | if (EltVT == MVT::f32) { |
| 19088 | |
| 19089 | |
| 19090 | |
| 19091 | |
| 19092 | |
| 19093 | |
| 19094 | |
| 19095 | |
| 19096 | |
| 19097 | bool MinSize = DAG.getMachineFunction().getFunction().hasMinSize(); |
| 19098 | if (IdxVal == 0 && (!MinSize || !MayFoldLoad(N1))) { |
| 19099 | |
| 19100 | |
| 19101 | |
| 19102 | |
| 19103 | |
| 19104 | |
| 19105 | |
| 19106 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); |
| 19107 | return DAG.getNode(X86ISD::BLENDI, dl, VT, N0, N1, |
| 19108 | DAG.getTargetConstant(1, dl, MVT::i8)); |
| 19109 | } |
| 19110 | |
| 19111 | N1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, N1); |
| 19112 | return DAG.getNode(X86ISD::INSERTPS, dl, VT, N0, N1, |
| 19113 | DAG.getTargetConstant(IdxVal << 4, dl, MVT::i8)); |
| 19114 | } |
| 19115 | |
| 19116 | |
| 19117 | if (EltVT == MVT::i32 || EltVT == MVT::i64) |
| 19118 | return Op; |
| 19119 | } |
| 19120 | |
| 19121 | return SDValue(); |
| 19122 | } |
| 19123 | |
| 19124 | static SDValue LowerSCALAR_TO_VECTOR(SDValue Op, const X86Subtarget &Subtarget, |
| 19125 | SelectionDAG &DAG) { |
| 19126 | SDLoc dl(Op); |
| 19127 | MVT OpVT = Op.getSimpleValueType(); |
| 19128 | |
| 19129 | |
| 19130 | |
| 19131 | if (X86::isZeroNode(Op.getOperand(0))) |
| 19132 | return getZeroVector(OpVT, Subtarget, DAG, dl); |
| 19133 | |
| 19134 | |
| 19135 | |
| 19136 | if (!OpVT.is128BitVector()) { |
| 19137 | |
| 19138 | unsigned SizeFactor = OpVT.getSizeInBits() / 128; |
| 19139 | MVT VT128 = MVT::getVectorVT(OpVT.getVectorElementType(), |
| 19140 | OpVT.getVectorNumElements() / SizeFactor); |
| 19141 | |
| 19142 | Op = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT128, Op.getOperand(0)); |
| 19143 | |
| 19144 | |
| 19145 | return insert128BitVector(DAG.getUNDEF(OpVT), Op, 0, DAG, dl); |
| 19146 | } |
| 19147 | assert(OpVT.is128BitVector() && OpVT.isInteger() && OpVT != MVT::v2i64 && |
| 19148 | "Expected an SSE type!"); |
| 19149 | |
| 19150 | |
| 19151 | if (OpVT == MVT::v4i32) |
| 19152 | return Op; |
| 19153 | |
| 19154 | SDValue AnyExt = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Op.getOperand(0)); |
| 19155 | return DAG.getBitcast( |
| 19156 | OpVT, DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, AnyExt)); |
| 19157 | } |
| 19158 | |
| 19159 | |
| 19160 | |
| 19161 | |
| 19162 | static SDValue LowerINSERT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, |
| 19163 | SelectionDAG &DAG) { |
| 19164 | assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1); |
| 19165 | |
| 19166 | return insert1BitVector(Op, DAG, Subtarget); |
| 19167 | } |
| 19168 | |
| 19169 | static SDValue LowerEXTRACT_SUBVECTOR(SDValue Op, const X86Subtarget &Subtarget, |
| 19170 | SelectionDAG &DAG) { |
| 19171 | assert(Op.getSimpleValueType().getVectorElementType() == MVT::i1 && |
| 19172 | "Only vXi1 extract_subvectors need custom lowering"); |
| 19173 | |
| 19174 | SDLoc dl(Op); |
| 19175 | SDValue Vec = Op.getOperand(0); |
| 19176 | uint64_t IdxVal = Op.getConstantOperandVal(1); |
| 19177 | |
| 19178 | if (IdxVal == 0) |
| 19179 | return Op; |
| 19180 | |
| 19181 | MVT VecVT = Vec.getSimpleValueType(); |
| 19182 | unsigned NumElems = VecVT.getVectorNumElements(); |
| 19183 | |
| 19184 | |
| 19185 | MVT WideVecVT = VecVT; |
| 19186 | if ((!Subtarget.hasDQI() && NumElems == 8) || NumElems < 8) { |
| 19187 | WideVecVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; |
| 19188 | Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVecVT, |
| 19189 | DAG.getUNDEF(WideVecVT), Vec, |
| 19190 | DAG.getIntPtrConstant(0, dl)); |
| 19191 | } |
| 19192 | |
| 19193 | |
| 19194 | Vec = DAG.getNode(X86ISD::KSHIFTR, dl, WideVecVT, Vec, |
| 19195 | DAG.getTargetConstant(IdxVal, dl, MVT::i8)); |
| 19196 | |
| 19197 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, Op.getValueType(), Vec, |
| 19198 | DAG.getIntPtrConstant(0, dl)); |
| 19199 | } |
| 19200 | |
| 19201 | |
| 19202 | unsigned X86TargetLowering::getGlobalWrapperKind( |
| 19203 | const GlobalValue *GV, const unsigned char OpFlags) const { |
| 19204 | |
| 19205 | if (GV && GV->isAbsoluteSymbolRef()) |
| 19206 | return X86ISD::Wrapper; |
| 19207 | |
| 19208 | CodeModel::Model M = getTargetMachine().getCodeModel(); |
| 19209 | if (Subtarget.isPICStyleRIPRel() && |
| 19210 | (M == CodeModel::Small || M == CodeModel::Kernel)) |
| 19211 | return X86ISD::WrapperRIP; |
| 19212 | |
| 19213 | |
| 19214 | if (OpFlags == X86II::MO_GOTPCREL) |
| 19215 | return X86ISD::WrapperRIP; |
| 19216 | |
| 19217 | return X86ISD::Wrapper; |
| 19218 | } |
| 19219 | |
| 19220 | |
| 19221 | |
| 19222 | |
| 19223 | |
| 19224 | |
| 19225 | |
| 19226 | SDValue |
| 19227 | X86TargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const { |
| 19228 | ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op); |
| 19229 | |
| 19230 | |
| 19231 | |
| 19232 | unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); |
| 19233 | |
| 19234 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 19235 | SDValue Result = DAG.getTargetConstantPool( |
| 19236 | CP->getConstVal(), PtrVT, CP->getAlign(), CP->getOffset(), OpFlag); |
| 19237 | SDLoc DL(CP); |
| 19238 | Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); |
| 19239 | |
| 19240 | if (OpFlag) { |
| 19241 | Result = |
| 19242 | DAG.getNode(ISD::ADD, DL, PtrVT, |
| 19243 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); |
| 19244 | } |
| 19245 | |
| 19246 | return Result; |
| 19247 | } |
| 19248 | |
| 19249 | SDValue X86TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { |
| 19250 | JumpTableSDNode *JT = cast<JumpTableSDNode>(Op); |
| 19251 | |
| 19252 | |
| 19253 | |
| 19254 | unsigned char OpFlag = Subtarget.classifyLocalReference(nullptr); |
| 19255 | |
| 19256 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 19257 | SDValue Result = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag); |
| 19258 | SDLoc DL(JT); |
| 19259 | Result = DAG.getNode(getGlobalWrapperKind(), DL, PtrVT, Result); |
| 19260 | |
| 19261 | |
| 19262 | if (OpFlag) |
| 19263 | Result = |
| 19264 | DAG.getNode(ISD::ADD, DL, PtrVT, |
| 19265 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), Result); |
| 19266 | |
| 19267 | return Result; |
| 19268 | } |
| 19269 | |
| 19270 | SDValue X86TargetLowering::LowerExternalSymbol(SDValue Op, |
| 19271 | SelectionDAG &DAG) const { |
| 19272 | return LowerGlobalOrExternal(Op, DAG, false); |
| 19273 | } |
| 19274 | |
| 19275 | SDValue |
| 19276 | X86TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { |
| 19277 | |
| 19278 | unsigned char OpFlags = |
| 19279 | Subtarget.classifyBlockAddressReference(); |
| 19280 | const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress(); |
| 19281 | int64_t Offset = cast<BlockAddressSDNode>(Op)->getOffset(); |
| 19282 | SDLoc dl(Op); |
| 19283 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 19284 | SDValue Result = DAG.getTargetBlockAddress(BA, PtrVT, Offset, OpFlags); |
| 19285 | Result = DAG.getNode(getGlobalWrapperKind(), dl, PtrVT, Result); |
| 19286 | |
| 19287 | |
| 19288 | if (isGlobalRelativeToPICBase(OpFlags)) { |
| 19289 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, |
| 19290 | DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); |
| 19291 | } |
| 19292 | |
| 19293 | return Result; |
| 19294 | } |
| 19295 | |
| 19296 | |
| 19297 | |
| 19298 | SDValue X86TargetLowering::LowerGlobalOrExternal(SDValue Op, SelectionDAG &DAG, |
| 19299 | bool ForCall) const { |
| 19300 | |
| 19301 | const SDLoc &dl = SDLoc(Op); |
| 19302 | const GlobalValue *GV = nullptr; |
| 19303 | int64_t Offset = 0; |
| 19304 | const char *ExternalSym = nullptr; |
| 19305 | if (const auto *G = dyn_cast<GlobalAddressSDNode>(Op)) { |
| 19306 | GV = G->getGlobal(); |
| 19307 | Offset = G->getOffset(); |
| 19308 | } else { |
| 19309 | const auto *ES = cast<ExternalSymbolSDNode>(Op); |
| 19310 | ExternalSym = ES->getSymbol(); |
| 19311 | } |
| 19312 | |
| 19313 | |
| 19314 | const Module &Mod = *DAG.getMachineFunction().getFunction().getParent(); |
| 19315 | unsigned char OpFlags; |
| 19316 | if (ForCall) |
| 19317 | OpFlags = Subtarget.classifyGlobalFunctionReference(GV, Mod); |
| 19318 | else |
| 19319 | OpFlags = Subtarget.classifyGlobalReference(GV, Mod); |
| 19320 | bool HasPICReg = isGlobalRelativeToPICBase(OpFlags); |
| 19321 | bool NeedsLoad = isGlobalStubReference(OpFlags); |
| 19322 | |
| 19323 | CodeModel::Model M = DAG.getTarget().getCodeModel(); |
| 19324 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 19325 | SDValue Result; |
| 19326 | |
| 19327 | if (GV) { |
| 19328 | |
| 19329 | |
| 19330 | |
| 19331 | |
| 19332 | |
| 19333 | int64_t GlobalOffset = 0; |
| 19334 | if (OpFlags == X86II::MO_NO_FLAG && Offset >= 0 && |
| 19335 | X86::isOffsetSuitableForCodeModel(Offset, M, true)) { |
| 19336 | std::swap(GlobalOffset, Offset); |
| 19337 | } |
| 19338 | Result = DAG.getTargetGlobalAddress(GV, dl, PtrVT, GlobalOffset, OpFlags); |
| 19339 | } else { |
| 19340 | |
| 19341 | Result = DAG.getTargetExternalSymbol(ExternalSym, PtrVT, OpFlags); |
| 19342 | } |
| 19343 | |
| 19344 | |
| 19345 | |
| 19346 | if (ForCall && !NeedsLoad && !HasPICReg && Offset == 0) |
| 19347 | return Result; |
| 19348 | |
| 19349 | Result = DAG.getNode(getGlobalWrapperKind(GV, OpFlags), dl, PtrVT, Result); |
| 19350 | |
| 19351 | |
| 19352 | if (HasPICReg) { |
| 19353 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, |
| 19354 | DAG.getNode(X86ISD::GlobalBaseReg, dl, PtrVT), Result); |
| 19355 | } |
| 19356 | |
| 19357 | |
| 19358 | |
| 19359 | if (NeedsLoad) |
| 19360 | Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Result, |
| 19361 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
| 19362 | |
| 19363 | |
| 19364 | |
| 19365 | if (Offset != 0) |
| 19366 | Result = DAG.getNode(ISD::ADD, dl, PtrVT, Result, |
| 19367 | DAG.getConstant(Offset, dl, PtrVT)); |
| 19368 | |
| 19369 | return Result; |
| 19370 | } |
| 19371 | |
| 19372 | SDValue |
| 19373 | X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const { |
| 19374 | return LowerGlobalOrExternal(Op, DAG, false); |
| 19375 | } |
| 19376 | |
| 19377 | static SDValue |
| 19378 | GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA, |
| 19379 | SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg, |
| 19380 | unsigned char OperandFlags, bool LocalDynamic = false) { |
| 19381 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| 19382 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 19383 | SDLoc dl(GA); |
| 19384 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
| 19385 | GA->getValueType(0), |
| 19386 | GA->getOffset(), |
| 19387 | OperandFlags); |
| 19388 | |
| 19389 | X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR |
| 19390 | : X86ISD::TLSADDR; |
| 19391 | |
| 19392 | if (InFlag) { |
| 19393 | SDValue Ops[] = { Chain, TGA, *InFlag }; |
| 19394 | Chain = DAG.getNode(CallType, dl, NodeTys, Ops); |
| 19395 | } else { |
| 19396 | SDValue Ops[] = { Chain, TGA }; |
| 19397 | Chain = DAG.getNode(CallType, dl, NodeTys, Ops); |
| 19398 | } |
| 19399 | |
| 19400 | |
| 19401 | MFI.setAdjustsStack(true); |
| 19402 | MFI.setHasCalls(true); |
| 19403 | |
| 19404 | SDValue Flag = Chain.getValue(1); |
| 19405 | return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag); |
| 19406 | } |
| 19407 | |
| 19408 | |
| 19409 | static SDValue |
| 19410 | LowerToTLSGeneralDynamicModel32(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
| 19411 | const EVT PtrVT) { |
| 19412 | SDValue InFlag; |
| 19413 | SDLoc dl(GA); |
| 19414 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, |
| 19415 | DAG.getNode(X86ISD::GlobalBaseReg, |
| 19416 | SDLoc(), PtrVT), InFlag); |
| 19417 | InFlag = Chain.getValue(1); |
| 19418 | |
| 19419 | return GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, X86II::MO_TLSGD); |
| 19420 | } |
| 19421 | |
| 19422 | |
| 19423 | static SDValue |
| 19424 | LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
| 19425 | const EVT PtrVT) { |
| 19426 | return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, |
| 19427 | X86::RAX, X86II::MO_TLSGD); |
| 19428 | } |
| 19429 | |
| 19430 | |
| 19431 | static SDValue |
| 19432 | LowerToTLSGeneralDynamicModelX32(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
| 19433 | const EVT PtrVT) { |
| 19434 | return GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, |
| 19435 | X86::EAX, X86II::MO_TLSGD); |
| 19436 | } |
| 19437 | |
| 19438 | static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA, |
| 19439 | SelectionDAG &DAG, const EVT PtrVT, |
| 19440 | bool Is64Bit, bool Is64BitLP64) { |
| 19441 | SDLoc dl(GA); |
| 19442 | |
| 19443 | |
| 19444 | X86MachineFunctionInfo *MFI = DAG.getMachineFunction() |
| 19445 | .getInfo<X86MachineFunctionInfo>(); |
| 19446 | MFI->incNumLocalDynamicTLSAccesses(); |
| 19447 | |
| 19448 | SDValue Base; |
| 19449 | if (Is64Bit) { |
| 19450 | unsigned ReturnReg = Is64BitLP64 ? X86::RAX : X86::EAX; |
| 19451 | Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, nullptr, PtrVT, ReturnReg, |
| 19452 | X86II::MO_TLSLD, true); |
| 19453 | } else { |
| 19454 | SDValue InFlag; |
| 19455 | SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX, |
| 19456 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), InFlag); |
| 19457 | InFlag = Chain.getValue(1); |
| 19458 | Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX, |
| 19459 | X86II::MO_TLSLDM, true); |
| 19460 | } |
| 19461 | |
| 19462 | |
| 19463 | |
| 19464 | |
| 19465 | |
| 19466 | unsigned char OperandFlags = X86II::MO_DTPOFF; |
| 19467 | unsigned WrapperKind = X86ISD::Wrapper; |
| 19468 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
| 19469 | GA->getValueType(0), |
| 19470 | GA->getOffset(), OperandFlags); |
| 19471 | SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); |
| 19472 | |
| 19473 | |
| 19474 | return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base); |
| 19475 | } |
| 19476 | |
| 19477 | |
| 19478 | static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG, |
| 19479 | const EVT PtrVT, TLSModel::Model model, |
| 19480 | bool is64Bit, bool isPIC) { |
| 19481 | SDLoc dl(GA); |
| 19482 | |
| 19483 | |
| 19484 | Value *Ptr = Constant::getNullValue(Type::getInt8PtrTy(*DAG.getContext(), |
| 19485 | is64Bit ? 257 : 256)); |
| 19486 | |
| 19487 | SDValue ThreadPointer = |
| 19488 | DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), DAG.getIntPtrConstant(0, dl), |
| 19489 | MachinePointerInfo(Ptr)); |
| 19490 | |
| 19491 | unsigned char OperandFlags = 0; |
| 19492 | |
| 19493 | |
| 19494 | unsigned WrapperKind = X86ISD::Wrapper; |
| 19495 | if (model == TLSModel::LocalExec) { |
| 19496 | OperandFlags = is64Bit ? X86II::MO_TPOFF : X86II::MO_NTPOFF; |
| 19497 | } else if (model == TLSModel::InitialExec) { |
| 19498 | if (is64Bit) { |
| 19499 | OperandFlags = X86II::MO_GOTTPOFF; |
| 19500 | WrapperKind = X86ISD::WrapperRIP; |
| 19501 | } else { |
| 19502 | OperandFlags = isPIC ? X86II::MO_GOTNTPOFF : X86II::MO_INDNTPOFF; |
| 19503 | } |
| 19504 | } else { |
| 19505 | llvm_unreachable("Unexpected model"); |
| 19506 | } |
| 19507 | |
| 19508 | |
| 19509 | |
| 19510 | |
| 19511 | SDValue TGA = |
| 19512 | DAG.getTargetGlobalAddress(GA->getGlobal(), dl, GA->getValueType(0), |
| 19513 | GA->getOffset(), OperandFlags); |
| 19514 | SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA); |
| 19515 | |
| 19516 | if (model == TLSModel::InitialExec) { |
| 19517 | if (isPIC && !is64Bit) { |
| 19518 | Offset = DAG.getNode(ISD::ADD, dl, PtrVT, |
| 19519 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), |
| 19520 | Offset); |
| 19521 | } |
| 19522 | |
| 19523 | Offset = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Offset, |
| 19524 | MachinePointerInfo::getGOT(DAG.getMachineFunction())); |
| 19525 | } |
| 19526 | |
| 19527 | |
| 19528 | |
| 19529 | return DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, Offset); |
| 19530 | } |
| 19531 | |
| 19532 | SDValue |
| 19533 | X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { |
| 19534 | |
| 19535 | GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op); |
| 19536 | |
| 19537 | if (DAG.getTarget().useEmulatedTLS()) |
| 19538 | return LowerToTLSEmulatedModel(GA, DAG); |
| 19539 | |
| 19540 | const GlobalValue *GV = GA->getGlobal(); |
| 19541 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 19542 | bool PositionIndependent = isPositionIndependent(); |
| 19543 | |
| 19544 | if (Subtarget.isTargetELF()) { |
| 19545 | TLSModel::Model model = DAG.getTarget().getTLSModel(GV); |
| 19546 | switch (model) { |
| 19547 | case TLSModel::GeneralDynamic: |
| 19548 | if (Subtarget.is64Bit()) { |
| 19549 | if (Subtarget.isTarget64BitLP64()) |
| 19550 | return LowerToTLSGeneralDynamicModel64(GA, DAG, PtrVT); |
| 19551 | return LowerToTLSGeneralDynamicModelX32(GA, DAG, PtrVT); |
| 19552 | } |
| 19553 | return LowerToTLSGeneralDynamicModel32(GA, DAG, PtrVT); |
| 19554 | case TLSModel::LocalDynamic: |
| 19555 | return LowerToTLSLocalDynamicModel(GA, DAG, PtrVT, Subtarget.is64Bit(), |
| 19556 | Subtarget.isTarget64BitLP64()); |
| 19557 | case TLSModel::InitialExec: |
| 19558 | case TLSModel::LocalExec: |
| 19559 | return LowerToTLSExecModel(GA, DAG, PtrVT, model, Subtarget.is64Bit(), |
| 19560 | PositionIndependent); |
| 19561 | } |
| 19562 | llvm_unreachable("Unknown TLS model."); |
| 19563 | } |
| 19564 | |
| 19565 | if (Subtarget.isTargetDarwin()) { |
| 19566 | |
| 19567 | unsigned char OpFlag = 0; |
| 19568 | unsigned WrapperKind = Subtarget.isPICStyleRIPRel() ? |
| 19569 | X86ISD::WrapperRIP : X86ISD::Wrapper; |
| 19570 | |
| 19571 | |
| 19572 | |
| 19573 | bool PIC32 = PositionIndependent && !Subtarget.is64Bit(); |
| 19574 | if (PIC32) |
| 19575 | OpFlag = X86II::MO_TLVP_PIC_BASE; |
| 19576 | else |
| 19577 | OpFlag = X86II::MO_TLVP; |
| 19578 | SDLoc DL(Op); |
| 19579 | SDValue Result = DAG.getTargetGlobalAddress(GA->getGlobal(), DL, |
| 19580 | GA->getValueType(0), |
| 19581 | GA->getOffset(), OpFlag); |
| 19582 | SDValue Offset = DAG.getNode(WrapperKind, DL, PtrVT, Result); |
| 19583 | |
| 19584 | |
| 19585 | if (PIC32) |
| 19586 | Offset = DAG.getNode(ISD::ADD, DL, PtrVT, |
| 19587 | DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(), PtrVT), |
| 19588 | Offset); |
| 19589 | |
| 19590 | |
| 19591 | |
| 19592 | SDValue Chain = DAG.getEntryNode(); |
| 19593 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 19594 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL); |
| 19595 | SDValue Args[] = { Chain, Offset }; |
| 19596 | Chain = DAG.getNode(X86ISD::TLSCALL, DL, NodeTys, Args); |
| 19597 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, DL, true), |
| 19598 | DAG.getIntPtrConstant(0, DL, true), |
| 19599 | Chain.getValue(1), DL); |
| 19600 | |
| 19601 | |
| 19602 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| 19603 | MFI.setAdjustsStack(true); |
| 19604 | |
| 19605 | |
| 19606 | |
| 19607 | unsigned Reg = Subtarget.is64Bit() ? X86::RAX : X86::EAX; |
| 19608 | return DAG.getCopyFromReg(Chain, DL, Reg, PtrVT, Chain.getValue(1)); |
| 19609 | } |
| 19610 | |
| 19611 | if (Subtarget.isOSWindows()) { |
| 19612 | |
| 19613 | |
| 19614 | |
| 19615 | |
| 19616 | |
| 19617 | |
| 19618 | |
| 19619 | |
| 19620 | |
| 19621 | |
| 19622 | |
| 19623 | SDLoc dl(GA); |
| 19624 | SDValue Chain = DAG.getEntryNode(); |
| 19625 | |
| 19626 | |
| 19627 | |
| 19628 | |
| 19629 | Value *Ptr = Constant::getNullValue(Subtarget.is64Bit() |
| 19630 | ? Type::getInt8PtrTy(*DAG.getContext(), |
| 19631 | 256) |
| 19632 | : Type::getInt32PtrTy(*DAG.getContext(), |
| 19633 | 257)); |
| 19634 | |
| 19635 | SDValue TlsArray = Subtarget.is64Bit() |
| 19636 | ? DAG.getIntPtrConstant(0x58, dl) |
| 19637 | : (Subtarget.isTargetWindowsGNU() |
| 19638 | ? DAG.getIntPtrConstant(0x2C, dl) |
| 19639 | : DAG.getExternalSymbol("_tls_array", PtrVT)); |
| 19640 | |
| 19641 | SDValue ThreadPointer = |
| 19642 | DAG.getLoad(PtrVT, dl, Chain, TlsArray, MachinePointerInfo(Ptr)); |
| 19643 | |
| 19644 | SDValue res; |
| 19645 | if (GV->getThreadLocalMode() == GlobalVariable::LocalExecTLSModel) { |
| 19646 | res = ThreadPointer; |
| 19647 | } else { |
| 19648 | |
| 19649 | SDValue IDX = DAG.getExternalSymbol("_tls_index", PtrVT); |
| 19650 | if (Subtarget.is64Bit()) |
| 19651 | IDX = DAG.getExtLoad(ISD::ZEXTLOAD, dl, PtrVT, Chain, IDX, |
| 19652 | MachinePointerInfo(), MVT::i32); |
| 19653 | else |
| 19654 | IDX = DAG.getLoad(PtrVT, dl, Chain, IDX, MachinePointerInfo()); |
| 19655 | |
| 19656 | const DataLayout &DL = DAG.getDataLayout(); |
| 19657 | SDValue Scale = |
| 19658 | DAG.getConstant(Log2_64_Ceil(DL.getPointerSize()), dl, MVT::i8); |
| 19659 | IDX = DAG.getNode(ISD::SHL, dl, PtrVT, IDX, Scale); |
| 19660 | |
| 19661 | res = DAG.getNode(ISD::ADD, dl, PtrVT, ThreadPointer, IDX); |
| 19662 | } |
| 19663 | |
| 19664 | res = DAG.getLoad(PtrVT, dl, Chain, res, MachinePointerInfo()); |
| 19665 | |
| 19666 | |
| 19667 | SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, |
| 19668 | GA->getValueType(0), |
| 19669 | GA->getOffset(), X86II::MO_SECREL); |
| 19670 | SDValue Offset = DAG.getNode(X86ISD::Wrapper, dl, PtrVT, TGA); |
| 19671 | |
| 19672 | |
| 19673 | |
| 19674 | return DAG.getNode(ISD::ADD, dl, PtrVT, res, Offset); |
| 19675 | } |
| 19676 | |
| 19677 | llvm_unreachable("TLS not implemented for this target."); |
| 19678 | } |
| 19679 | |
| 19680 | |
| 19681 | |
| 19682 | |
| 19683 | static SDValue LowerShiftParts(SDValue Op, SelectionDAG &DAG) { |
| 19684 | SDValue Lo, Hi; |
| 19685 | DAG.getTargetLoweringInfo().expandShiftParts(Op.getNode(), Lo, Hi, DAG); |
| 19686 | return DAG.getMergeValues({Lo, Hi}, SDLoc(Op)); |
| 19687 | } |
| 19688 | |
| 19689 | static SDValue LowerFunnelShift(SDValue Op, const X86Subtarget &Subtarget, |
| 19690 | SelectionDAG &DAG) { |
| 19691 | MVT VT = Op.getSimpleValueType(); |
| 19692 | assert((Op.getOpcode() == ISD::FSHL || Op.getOpcode() == ISD::FSHR) && |
| 19693 | "Unexpected funnel shift opcode!"); |
| 19694 | |
| 19695 | SDLoc DL(Op); |
| 19696 | SDValue Op0 = Op.getOperand(0); |
| 19697 | SDValue Op1 = Op.getOperand(1); |
| 19698 | SDValue Amt = Op.getOperand(2); |
| 19699 | |
| 19700 | bool IsFSHR = Op.getOpcode() == ISD::FSHR; |
| 19701 | |
| 19702 | if (VT.isVector()) { |
| 19703 | assert(Subtarget.hasVBMI2() && "Expected VBMI2"); |
| 19704 | |
| 19705 | if (IsFSHR) |
| 19706 | std::swap(Op0, Op1); |
| 19707 | |
| 19708 | |
| 19709 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) { |
| 19710 | Op0 = widenSubVector(Op0, false, Subtarget, DAG, DL, 512); |
| 19711 | Op1 = widenSubVector(Op1, false, Subtarget, DAG, DL, 512); |
| 19712 | } |
| 19713 | |
| 19714 | SDValue Funnel; |
| 19715 | APInt APIntShiftAmt; |
| 19716 | MVT ResultVT = Op0.getSimpleValueType(); |
| 19717 | if (X86::isConstantSplat(Amt, APIntShiftAmt)) { |
| 19718 | uint64_t ShiftAmt = APIntShiftAmt.urem(VT.getScalarSizeInBits()); |
| 19719 | Funnel = |
| 19720 | DAG.getNode(IsFSHR ? X86ISD::VSHRD : X86ISD::VSHLD, DL, ResultVT, Op0, |
| 19721 | Op1, DAG.getTargetConstant(ShiftAmt, DL, MVT::i8)); |
| 19722 | } else { |
| 19723 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) |
| 19724 | Amt = widenSubVector(Amt, false, Subtarget, DAG, DL, 512); |
| 19725 | Funnel = DAG.getNode(IsFSHR ? X86ISD::VSHRDV : X86ISD::VSHLDV, DL, |
| 19726 | ResultVT, Op0, Op1, Amt); |
| 19727 | } |
| 19728 | if (!Subtarget.hasVLX() && !VT.is512BitVector()) |
| 19729 | Funnel = extractSubVector(Funnel, 0, DAG, DL, VT.getSizeInBits()); |
| 19730 | return Funnel; |
| 19731 | } |
| 19732 | assert( |
| 19733 | (VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) && |
| 19734 | "Unexpected funnel shift type!"); |
| 19735 | |
| 19736 | |
| 19737 | bool OptForSize = DAG.shouldOptForSize(); |
| 19738 | bool ExpandFunnel = !OptForSize && Subtarget.isSHLDSlow(); |
| 19739 | |
| 19740 | |
| 19741 | |
| 19742 | if ((VT == MVT::i8 || (ExpandFunnel && VT == MVT::i16)) && |
| 19743 | !isa<ConstantSDNode>(Amt)) { |
| 19744 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 19745 | SDValue Mask = DAG.getConstant(EltSizeInBits - 1, DL, Amt.getValueType()); |
| 19746 | SDValue HiShift = DAG.getConstant(EltSizeInBits, DL, Amt.getValueType()); |
| 19747 | Op0 = DAG.getAnyExtOrTrunc(Op0, DL, MVT::i32); |
| 19748 | Op1 = DAG.getZExtOrTrunc(Op1, DL, MVT::i32); |
| 19749 | Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, Mask); |
| 19750 | SDValue Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Op0, HiShift); |
| 19751 | Res = DAG.getNode(ISD::OR, DL, MVT::i32, Res, Op1); |
| 19752 | if (IsFSHR) { |
| 19753 | Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, Amt); |
| 19754 | } else { |
| 19755 | Res = DAG.getNode(ISD::SHL, DL, MVT::i32, Res, Amt); |
| 19756 | Res = DAG.getNode(ISD::SRL, DL, MVT::i32, Res, HiShift); |
| 19757 | } |
| 19758 | return DAG.getZExtOrTrunc(Res, DL, VT); |
| 19759 | } |
| 19760 | |
| 19761 | if (VT == MVT::i8 || ExpandFunnel) |
| 19762 | return SDValue(); |
| 19763 | |
| 19764 | |
| 19765 | if (VT == MVT::i16) { |
| 19766 | Amt = DAG.getNode(ISD::AND, DL, Amt.getValueType(), Amt, |
| 19767 | DAG.getConstant(15, DL, Amt.getValueType())); |
| 19768 | unsigned FSHOp = (IsFSHR ? X86ISD::FSHR : X86ISD::FSHL); |
| 19769 | return DAG.getNode(FSHOp, DL, VT, Op0, Op1, Amt); |
| 19770 | } |
| 19771 | |
| 19772 | return Op; |
| 19773 | } |
| 19774 | |
| 19775 | |
| 19776 | |
| 19777 | static SDValue LowerI64IntToFP_AVX512DQ(SDValue Op, SelectionDAG &DAG, |
| 19778 | const X86Subtarget &Subtarget) { |
| 19779 | assert((Op.getOpcode() == ISD::SINT_TO_FP || |
| 19780 | Op.getOpcode() == ISD::STRICT_SINT_TO_FP || |
| 19781 | Op.getOpcode() == ISD::STRICT_UINT_TO_FP || |
| 19782 | Op.getOpcode() == ISD::UINT_TO_FP) && |
| 19783 | "Unexpected opcode!"); |
| 19784 | bool IsStrict = Op->isStrictFPOpcode(); |
| 19785 | unsigned OpNo = IsStrict ? 1 : 0; |
| 19786 | SDValue Src = Op.getOperand(OpNo); |
| 19787 | MVT SrcVT = Src.getSimpleValueType(); |
| 19788 | MVT VT = Op.getSimpleValueType(); |
| 19789 | |
| 19790 | if (!Subtarget.hasDQI() || SrcVT != MVT::i64 || Subtarget.is64Bit() || |
| 19791 | (VT != MVT::f32 && VT != MVT::f64)) |
| 19792 | return SDValue(); |
| 19793 | |
| 19794 | |
| 19795 | |
| 19796 | |
| 19797 | unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; |
| 19798 | MVT VecInVT = MVT::getVectorVT(MVT::i64, NumElts); |
| 19799 | MVT VecVT = MVT::getVectorVT(VT, NumElts); |
| 19800 | |
| 19801 | SDLoc dl(Op); |
| 19802 | SDValue InVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecInVT, Src); |
| 19803 | if (IsStrict) { |
| 19804 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, {VecVT, MVT::Other}, |
| 19805 | {Op.getOperand(0), InVec}); |
| 19806 | SDValue Chain = CvtVec.getValue(1); |
| 19807 | SDValue Value = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
| 19808 | DAG.getIntPtrConstant(0, dl)); |
| 19809 | return DAG.getMergeValues({Value, Chain}, dl); |
| 19810 | } |
| 19811 | |
| 19812 | SDValue CvtVec = DAG.getNode(Op.getOpcode(), dl, VecVT, InVec); |
| 19813 | |
| 19814 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, CvtVec, |
| 19815 | DAG.getIntPtrConstant(0, dl)); |
| 19816 | } |
| 19817 | |
| 19818 | static bool useVectorCast(unsigned Opcode, MVT FromVT, MVT ToVT, |
| 19819 | const X86Subtarget &Subtarget) { |
| 19820 | switch (Opcode) { |
| 19821 | case ISD::SINT_TO_FP: |
| 19822 | |
| 19823 | if (!Subtarget.hasSSE2() || FromVT != MVT::v4i32) |
| 19824 | return false; |
| 19825 | |
| 19826 | return ToVT == MVT::v4f32 || (Subtarget.hasAVX() && ToVT == MVT::v4f64); |
| 19827 | |
| 19828 | case ISD::UINT_TO_FP: |
| 19829 | |
| 19830 | if (!Subtarget.hasAVX512() || FromVT != MVT::v4i32) |
| 19831 | return false; |
| 19832 | |
| 19833 | return ToVT == MVT::v4f32 || ToVT == MVT::v4f64; |
| 19834 | |
| 19835 | default: |
| 19836 | return false; |
| 19837 | } |
| 19838 | } |
| 19839 | |
| 19840 | |
| 19841 | |
| 19842 | |
| 19843 | static SDValue vectorizeExtractedCast(SDValue Cast, SelectionDAG &DAG, |
| 19844 | const X86Subtarget &Subtarget) { |
| 19845 | |
| 19846 | |
| 19847 | SDValue Extract = Cast.getOperand(0); |
| 19848 | MVT DestVT = Cast.getSimpleValueType(); |
| 19849 | if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 19850 | !isa<ConstantSDNode>(Extract.getOperand(1))) |
| 19851 | return SDValue(); |
| 19852 | |
| 19853 | |
| 19854 | SDValue VecOp = Extract.getOperand(0); |
| 19855 | MVT FromVT = VecOp.getSimpleValueType(); |
| 19856 | unsigned NumEltsInXMM = 128 / FromVT.getScalarSizeInBits(); |
| 19857 | MVT Vec128VT = MVT::getVectorVT(FromVT.getScalarType(), NumEltsInXMM); |
| 19858 | MVT ToVT = MVT::getVectorVT(DestVT, NumEltsInXMM); |
| 19859 | if (!useVectorCast(Cast.getOpcode(), Vec128VT, ToVT, Subtarget)) |
| 19860 | return SDValue(); |
| 19861 | |
| 19862 | |
| 19863 | |
| 19864 | SDLoc DL(Cast); |
| 19865 | if (!isNullConstant(Extract.getOperand(1))) { |
| 19866 | SmallVector<int, 16> Mask(FromVT.getVectorNumElements(), -1); |
| 19867 | Mask[0] = Extract.getConstantOperandVal(1); |
| 19868 | VecOp = DAG.getVectorShuffle(FromVT, DL, VecOp, DAG.getUNDEF(FromVT), Mask); |
| 19869 | } |
| 19870 | |
| 19871 | |
| 19872 | if (FromVT != Vec128VT) |
| 19873 | VecOp = extract128BitVector(VecOp, 0, DAG, DL); |
| 19874 | |
| 19875 | |
| 19876 | |
| 19877 | SDValue VCast = DAG.getNode(Cast.getOpcode(), DL, ToVT, VecOp); |
| 19878 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, DestVT, VCast, |
| 19879 | DAG.getIntPtrConstant(0, DL)); |
| 19880 | } |
| 19881 | |
| 19882 | |
| 19883 | |
| 19884 | |
| 19885 | static SDValue lowerFPToIntToFP(SDValue CastToFP, SelectionDAG &DAG, |
| 19886 | const X86Subtarget &Subtarget) { |
| 19887 | |
| 19888 | SDValue CastToInt = CastToFP.getOperand(0); |
| 19889 | MVT VT = CastToFP.getSimpleValueType(); |
| 19890 | if (CastToInt.getOpcode() != ISD::FP_TO_SINT || VT.isVector()) |
| 19891 | return SDValue(); |
| 19892 | |
| 19893 | MVT IntVT = CastToInt.getSimpleValueType(); |
| 19894 | SDValue X = CastToInt.getOperand(0); |
| 19895 | MVT SrcVT = X.getSimpleValueType(); |
| 19896 | if (SrcVT != MVT::f32 && SrcVT != MVT::f64) |
| 19897 | return SDValue(); |
| 19898 | |
| 19899 | |
| 19900 | |
| 19901 | if (!Subtarget.hasSSE2() || (VT != MVT::f32 && VT != MVT::f64) || |
| 19902 | IntVT != MVT::i32) |
| 19903 | return SDValue(); |
| 19904 | |
| 19905 | unsigned SrcSize = SrcVT.getSizeInBits(); |
| 19906 | unsigned IntSize = IntVT.getSizeInBits(); |
| 19907 | unsigned VTSize = VT.getSizeInBits(); |
| 19908 | MVT VecSrcVT = MVT::getVectorVT(SrcVT, 128 / SrcSize); |
| 19909 | MVT VecIntVT = MVT::getVectorVT(IntVT, 128 / IntSize); |
| 19910 | MVT VecVT = MVT::getVectorVT(VT, 128 / VTSize); |
| 19911 | |
| 19912 | |
| 19913 | unsigned ToIntOpcode = |
| 19914 | SrcSize != IntSize ? X86ISD::CVTTP2SI : (unsigned)ISD::FP_TO_SINT; |
| 19915 | unsigned ToFPOpcode = |
| 19916 | IntSize != VTSize ? X86ISD::CVTSI2P : (unsigned)ISD::SINT_TO_FP; |
| 19917 | |
| 19918 | |
| 19919 | |
| 19920 | |
| 19921 | |
| 19922 | |
| 19923 | |
| 19924 | SDLoc DL(CastToFP); |
| 19925 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); |
| 19926 | SDValue VecX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecSrcVT, X); |
| 19927 | SDValue VCastToInt = DAG.getNode(ToIntOpcode, DL, VecIntVT, VecX); |
| 19928 | SDValue VCastToFP = DAG.getNode(ToFPOpcode, DL, VecVT, VCastToInt); |
| 19929 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, VCastToFP, ZeroIdx); |
| 19930 | } |
| 19931 | |
| 19932 | static SDValue lowerINT_TO_FP_vXi64(SDValue Op, SelectionDAG &DAG, |
| 19933 | const X86Subtarget &Subtarget) { |
| 19934 | SDLoc DL(Op); |
| 19935 | bool IsStrict = Op->isStrictFPOpcode(); |
| 19936 | MVT VT = Op->getSimpleValueType(0); |
| 19937 | SDValue Src = Op->getOperand(IsStrict ? 1 : 0); |
| 19938 | |
| 19939 | if (Subtarget.hasDQI()) { |
| 19940 | assert(!Subtarget.hasVLX() && "Unexpected features"); |
| 19941 | |
| 19942 | assert((Src.getSimpleValueType() == MVT::v2i64 || |
| 19943 | Src.getSimpleValueType() == MVT::v4i64) && |
| 19944 | "Unsupported custom type"); |
| 19945 | |
| 19946 | |
| 19947 | assert((VT == MVT::v4f32 || VT == MVT::v2f64 || VT == MVT::v4f64) && |
| 19948 | "Unexpected VT!"); |
| 19949 | MVT WideVT = VT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; |
| 19950 | |
| 19951 | |
| 19952 | |
| 19953 | SDValue Tmp = IsStrict ? DAG.getConstant(0, DL, MVT::v8i64) |
| 19954 | : DAG.getUNDEF(MVT::v8i64); |
| 19955 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i64, Tmp, Src, |
| 19956 | DAG.getIntPtrConstant(0, DL)); |
| 19957 | SDValue Res, Chain; |
| 19958 | if (IsStrict) { |
| 19959 | Res = DAG.getNode(Op.getOpcode(), DL, {WideVT, MVT::Other}, |
| 19960 | {Op->getOperand(0), Src}); |
| 19961 | Chain = Res.getValue(1); |
| 19962 | } else { |
| 19963 | Res = DAG.getNode(Op.getOpcode(), DL, WideVT, Src); |
| 19964 | } |
| 19965 | |
| 19966 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
| 19967 | DAG.getIntPtrConstant(0, DL)); |
| 19968 | |
| 19969 | if (IsStrict) |
| 19970 | return DAG.getMergeValues({Res, Chain}, DL); |
| 19971 | return Res; |
| 19972 | } |
| 19973 | |
| 19974 | bool IsSigned = Op->getOpcode() == ISD::SINT_TO_FP || |
| 19975 | Op->getOpcode() == ISD::STRICT_SINT_TO_FP; |
| 19976 | if (VT != MVT::v4f32 || IsSigned) |
| 19977 | return SDValue(); |
| 19978 | |
| 19979 | SDValue Zero = DAG.getConstant(0, DL, MVT::v4i64); |
| 19980 | SDValue One = DAG.getConstant(1, DL, MVT::v4i64); |
| 19981 | SDValue Sign = DAG.getNode(ISD::OR, DL, MVT::v4i64, |
| 19982 | DAG.getNode(ISD::SRL, DL, MVT::v4i64, Src, One), |
| 19983 | DAG.getNode(ISD::AND, DL, MVT::v4i64, Src, One)); |
| 19984 | SDValue IsNeg = DAG.getSetCC(DL, MVT::v4i64, Src, Zero, ISD::SETLT); |
| 19985 | SDValue SignSrc = DAG.getSelect(DL, MVT::v4i64, IsNeg, Sign, Src); |
| 19986 | SmallVector<SDValue, 4> SignCvts(4); |
| 19987 | SmallVector<SDValue, 4> Chains(4); |
| 19988 | for (int i = 0; i != 4; ++i) { |
| 19989 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i64, SignSrc, |
| 19990 | DAG.getIntPtrConstant(i, DL)); |
| 19991 | if (IsStrict) { |
| 19992 | SignCvts[i] = |
| 19993 | DAG.getNode(ISD::STRICT_SINT_TO_FP, DL, {MVT::f32, MVT::Other}, |
| 19994 | {Op.getOperand(0), Elt}); |
| 19995 | Chains[i] = SignCvts[i].getValue(1); |
| 19996 | } else { |
| 19997 | SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, DL, MVT::f32, Elt); |
| 19998 | } |
| 19999 | } |
| 20000 | SDValue SignCvt = DAG.getBuildVector(VT, DL, SignCvts); |
| 20001 | |
| 20002 | SDValue Slow, Chain; |
| 20003 | if (IsStrict) { |
| 20004 | Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); |
| 20005 | Slow = DAG.getNode(ISD::STRICT_FADD, DL, {MVT::v4f32, MVT::Other}, |
| 20006 | {Chain, SignCvt, SignCvt}); |
| 20007 | Chain = Slow.getValue(1); |
| 20008 | } else { |
| 20009 | Slow = DAG.getNode(ISD::FADD, DL, MVT::v4f32, SignCvt, SignCvt); |
| 20010 | } |
| 20011 | |
| 20012 | IsNeg = DAG.getNode(ISD::TRUNCATE, DL, MVT::v4i32, IsNeg); |
| 20013 | SDValue Cvt = DAG.getSelect(DL, MVT::v4f32, IsNeg, Slow, SignCvt); |
| 20014 | |
| 20015 | if (IsStrict) |
| 20016 | return DAG.getMergeValues({Cvt, Chain}, DL); |
| 20017 | |
| 20018 | return Cvt; |
| 20019 | } |
| 20020 | |
| 20021 | SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, |
| 20022 | SelectionDAG &DAG) const { |
| 20023 | bool IsStrict = Op->isStrictFPOpcode(); |
| 20024 | unsigned OpNo = IsStrict ? 1 : 0; |
| 20025 | SDValue Src = Op.getOperand(OpNo); |
| 20026 | SDValue Chain = IsStrict ? Op->getOperand(0) : DAG.getEntryNode(); |
| 20027 | MVT SrcVT = Src.getSimpleValueType(); |
| 20028 | MVT VT = Op.getSimpleValueType(); |
| 20029 | SDLoc dl(Op); |
| 20030 | |
| 20031 | if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) |
| 20032 | return Extract; |
| 20033 | |
| 20034 | if (SDValue R = lowerFPToIntToFP(Op, DAG, Subtarget)) |
| 20035 | return R; |
| 20036 | |
| 20037 | if (SrcVT.isVector()) { |
| 20038 | if (SrcVT == MVT::v2i32 && VT == MVT::v2f64) { |
| 20039 | |
| 20040 | |
| 20041 | if (IsStrict) |
| 20042 | return DAG.getNode( |
| 20043 | X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, |
| 20044 | {Chain, DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
| 20045 | DAG.getUNDEF(SrcVT))}); |
| 20046 | return DAG.getNode(X86ISD::CVTSI2P, dl, VT, |
| 20047 | DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
| 20048 | DAG.getUNDEF(SrcVT))); |
| 20049 | } |
| 20050 | if (SrcVT == MVT::v2i64 || SrcVT == MVT::v4i64) |
| 20051 | return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); |
| 20052 | |
| 20053 | return SDValue(); |
| 20054 | } |
| 20055 | |
| 20056 | assert(SrcVT <= MVT::i64 && SrcVT >= MVT::i16 && |
| 20057 | "Unknown SINT_TO_FP to lower!"); |
| 20058 | |
| 20059 | bool UseSSEReg = isScalarFPTypeInSSEReg(VT); |
| 20060 | |
| 20061 | |
| 20062 | |
| 20063 | if (SrcVT == MVT::i32 && UseSSEReg) |
| 20064 | return Op; |
| 20065 | if (SrcVT == MVT::i64 && UseSSEReg && Subtarget.is64Bit()) |
| 20066 | return Op; |
| 20067 | |
| 20068 | if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) |
| 20069 | return V; |
| 20070 | |
| 20071 | |
| 20072 | if (SrcVT == MVT::i16 && (UseSSEReg || VT == MVT::f128)) { |
| 20073 | SDValue Ext = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i32, Src); |
| 20074 | if (IsStrict) |
| 20075 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
| 20076 | {Chain, Ext}); |
| 20077 | |
| 20078 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Ext); |
| 20079 | } |
| 20080 | |
| 20081 | if (VT == MVT::f128) |
| 20082 | return SDValue(); |
| 20083 | |
| 20084 | SDValue ValueToStore = Src; |
| 20085 | if (SrcVT == MVT::i64 && Subtarget.hasSSE2() && !Subtarget.is64Bit()) |
| 20086 | |
| 20087 | |
| 20088 | |
| 20089 | ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); |
| 20090 | |
| 20091 | unsigned Size = SrcVT.getStoreSize(); |
| 20092 | Align Alignment(Size); |
| 20093 | MachineFunction &MF = DAG.getMachineFunction(); |
| 20094 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
| 20095 | int SSFI = MF.getFrameInfo().CreateStackObject(Size, Alignment, false); |
| 20096 | MachinePointerInfo MPI = |
| 20097 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); |
| 20098 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
| 20099 | Chain = DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, Alignment); |
| 20100 | std::pair<SDValue, SDValue> Tmp = |
| 20101 | BuildFILD(VT, SrcVT, dl, Chain, StackSlot, MPI, Alignment, DAG); |
| 20102 | |
| 20103 | if (IsStrict) |
| 20104 | return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); |
| 20105 | |
| 20106 | return Tmp.first; |
| 20107 | } |
| 20108 | |
| 20109 | std::pair<SDValue, SDValue> X86TargetLowering::BuildFILD( |
| 20110 | EVT DstVT, EVT SrcVT, const SDLoc &DL, SDValue Chain, SDValue Pointer, |
| 20111 | MachinePointerInfo PtrInfo, Align Alignment, SelectionDAG &DAG) const { |
| 20112 | |
| 20113 | SDVTList Tys; |
| 20114 | bool useSSE = isScalarFPTypeInSSEReg(DstVT); |
| 20115 | if (useSSE) |
| 20116 | Tys = DAG.getVTList(MVT::f80, MVT::Other); |
| 20117 | else |
| 20118 | Tys = DAG.getVTList(DstVT, MVT::Other); |
| 20119 | |
| 20120 | SDValue FILDOps[] = {Chain, Pointer}; |
| 20121 | SDValue Result = |
| 20122 | DAG.getMemIntrinsicNode(X86ISD::FILD, DL, Tys, FILDOps, SrcVT, PtrInfo, |
| 20123 | Alignment, MachineMemOperand::MOLoad); |
| 20124 | Chain = Result.getValue(1); |
| 20125 | |
| 20126 | if (useSSE) { |
| 20127 | MachineFunction &MF = DAG.getMachineFunction(); |
| 20128 | unsigned SSFISize = DstVT.getStoreSize(); |
| 20129 | int SSFI = |
| 20130 | MF.getFrameInfo().CreateStackObject(SSFISize, Align(SSFISize), false); |
| 20131 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
| 20132 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
| 20133 | Tys = DAG.getVTList(MVT::Other); |
| 20134 | SDValue FSTOps[] = {Chain, Result, StackSlot}; |
| 20135 | MachineMemOperand *StoreMMO = DAG.getMachineFunction().getMachineMemOperand( |
| 20136 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI), |
| 20137 | MachineMemOperand::MOStore, SSFISize, Align(SSFISize)); |
| 20138 | |
| 20139 | Chain = |
| 20140 | DAG.getMemIntrinsicNode(X86ISD::FST, DL, Tys, FSTOps, DstVT, StoreMMO); |
| 20141 | Result = DAG.getLoad( |
| 20142 | DstVT, DL, Chain, StackSlot, |
| 20143 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI)); |
| 20144 | Chain = Result.getValue(1); |
| 20145 | } |
| 20146 | |
| 20147 | return { Result, Chain }; |
| 20148 | } |
| 20149 | |
| 20150 | |
| 20151 | |
| 20152 | |
| 20153 | static bool shouldUseHorizontalOp(bool IsSingleSource, SelectionDAG &DAG, |
| 20154 | const X86Subtarget &Subtarget) { |
| 20155 | bool IsOptimizingSize = DAG.shouldOptForSize(); |
| 20156 | bool HasFastHOps = Subtarget.hasFastHorizontalOps(); |
| 20157 | return !IsSingleSource || IsOptimizingSize || HasFastHOps; |
| 20158 | } |
| 20159 | |
| 20160 | |
| 20161 | static SDValue LowerUINT_TO_FP_i64(SDValue Op, SelectionDAG &DAG, |
| 20162 | const X86Subtarget &Subtarget) { |
| 20163 | |
| 20164 | |
| 20165 | |
| 20166 | assert(!Op->isStrictFPOpcode() && "Expected non-strict uint_to_fp!"); |
| 20167 | |
| 20168 | |
| 20169 | |
| 20170 | |
| 20171 | |
| 20172 | |
| 20173 | |
| 20174 | |
| 20175 | |
| 20176 | |
| 20177 | |
| 20178 | |
| 20179 | |
| 20180 | SDLoc dl(Op); |
| 20181 | LLVMContext *Context = DAG.getContext(); |
| 20182 | |
| 20183 | |
| 20184 | static const uint32_t CV0[] = { 0x43300000, 0x45300000, 0, 0 }; |
| 20185 | Constant *C0 = ConstantDataVector::get(*Context, CV0); |
| 20186 | auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
| 20187 | SDValue CPIdx0 = DAG.getConstantPool(C0, PtrVT, Align(16)); |
| 20188 | |
| 20189 | SmallVector<Constant*,2> CV1; |
| 20190 | CV1.push_back( |
| 20191 | ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), |
| 20192 | APInt(64, 0x4330000000000000ULL)))); |
| 20193 | CV1.push_back( |
| 20194 | ConstantFP::get(*Context, APFloat(APFloat::IEEEdouble(), |
| 20195 | APInt(64, 0x4530000000000000ULL)))); |
| 20196 | Constant *C1 = ConstantVector::get(CV1); |
| 20197 | SDValue CPIdx1 = DAG.getConstantPool(C1, PtrVT, Align(16)); |
| 20198 | |
| 20199 | |
| 20200 | SDValue XR1 = |
| 20201 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Op.getOperand(0)); |
| 20202 | SDValue CLod0 = DAG.getLoad( |
| 20203 | MVT::v4i32, dl, DAG.getEntryNode(), CPIdx0, |
| 20204 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); |
| 20205 | SDValue Unpck1 = |
| 20206 | getUnpackl(DAG, dl, MVT::v4i32, DAG.getBitcast(MVT::v4i32, XR1), CLod0); |
| 20207 | |
| 20208 | SDValue CLod1 = DAG.getLoad( |
| 20209 | MVT::v2f64, dl, CLod0.getValue(1), CPIdx1, |
| 20210 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(16)); |
| 20211 | SDValue XR2F = DAG.getBitcast(MVT::v2f64, Unpck1); |
| 20212 | |
| 20213 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, XR2F, CLod1); |
| 20214 | SDValue Result; |
| 20215 | |
| 20216 | if (Subtarget.hasSSE3() && |
| 20217 | shouldUseHorizontalOp(true, DAG, Subtarget)) { |
| 20218 | Result = DAG.getNode(X86ISD::FHADD, dl, MVT::v2f64, Sub, Sub); |
| 20219 | } else { |
| 20220 | SDValue Shuffle = DAG.getVectorShuffle(MVT::v2f64, dl, Sub, Sub, {1,-1}); |
| 20221 | Result = DAG.getNode(ISD::FADD, dl, MVT::v2f64, Shuffle, Sub); |
| 20222 | } |
| 20223 | Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Result, |
| 20224 | DAG.getIntPtrConstant(0, dl)); |
| 20225 | return Result; |
| 20226 | } |
| 20227 | |
| 20228 | |
| 20229 | static SDValue LowerUINT_TO_FP_i32(SDValue Op, SelectionDAG &DAG, |
| 20230 | const X86Subtarget &Subtarget) { |
| 20231 | unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; |
| 20232 | SDLoc dl(Op); |
| 20233 | |
| 20234 | SDValue Bias = DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, |
| 20235 | MVT::f64); |
| 20236 | |
| 20237 | |
| 20238 | SDValue Load = |
| 20239 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4i32, Op.getOperand(OpNo)); |
| 20240 | |
| 20241 | |
| 20242 | Load = getShuffleVectorZeroOrUndef(Load, 0, true, Subtarget, DAG); |
| 20243 | |
| 20244 | |
| 20245 | SDValue Or = DAG.getNode( |
| 20246 | ISD::OR, dl, MVT::v2i64, |
| 20247 | DAG.getBitcast(MVT::v2i64, Load), |
| 20248 | DAG.getBitcast(MVT::v2i64, |
| 20249 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2f64, Bias))); |
| 20250 | Or = |
| 20251 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, |
| 20252 | DAG.getBitcast(MVT::v2f64, Or), DAG.getIntPtrConstant(0, dl)); |
| 20253 | |
| 20254 | if (Op.getNode()->isStrictFPOpcode()) { |
| 20255 | |
| 20256 | |
| 20257 | SDValue Chain = Op.getOperand(0); |
| 20258 | SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::f64, MVT::Other}, |
| 20259 | {Chain, Or, Bias}); |
| 20260 | |
| 20261 | if (Op.getValueType() == Sub.getValueType()) |
| 20262 | return Sub; |
| 20263 | |
| 20264 | |
| 20265 | std::pair<SDValue, SDValue> ResultPair = DAG.getStrictFPExtendOrRound( |
| 20266 | Sub, Sub.getValue(1), dl, Op.getSimpleValueType()); |
| 20267 | |
| 20268 | return DAG.getMergeValues({ResultPair.first, ResultPair.second}, dl); |
| 20269 | } |
| 20270 | |
| 20271 | |
| 20272 | |
| 20273 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::f64, Or, Bias); |
| 20274 | |
| 20275 | |
| 20276 | return DAG.getFPExtendOrRound(Sub, dl, Op.getSimpleValueType()); |
| 20277 | } |
| 20278 | |
| 20279 | static SDValue lowerUINT_TO_FP_v2i32(SDValue Op, SelectionDAG &DAG, |
| 20280 | const X86Subtarget &Subtarget, |
| 20281 | const SDLoc &DL) { |
| 20282 | if (Op.getSimpleValueType() != MVT::v2f64) |
| 20283 | return SDValue(); |
| 20284 | |
| 20285 | bool IsStrict = Op->isStrictFPOpcode(); |
| 20286 | |
| 20287 | SDValue N0 = Op.getOperand(IsStrict ? 1 : 0); |
| 20288 | assert(N0.getSimpleValueType() == MVT::v2i32 && "Unexpected input type"); |
| 20289 | |
| 20290 | if (Subtarget.hasAVX512()) { |
| 20291 | if (!Subtarget.hasVLX()) { |
| 20292 | |
| 20293 | if (!IsStrict) |
| 20294 | return SDValue(); |
| 20295 | |
| 20296 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
| 20297 | DAG.getConstant(0, DL, MVT::v2i32)); |
| 20298 | SDValue Res = DAG.getNode(Op->getOpcode(), DL, {MVT::v4f64, MVT::Other}, |
| 20299 | {Op.getOperand(0), N0}); |
| 20300 | SDValue Chain = Res.getValue(1); |
| 20301 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v2f64, Res, |
| 20302 | DAG.getIntPtrConstant(0, DL)); |
| 20303 | return DAG.getMergeValues({Res, Chain}, DL); |
| 20304 | } |
| 20305 | |
| 20306 | |
| 20307 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
| 20308 | DAG.getUNDEF(MVT::v2i32)); |
| 20309 | if (IsStrict) |
| 20310 | return DAG.getNode(X86ISD::STRICT_CVTUI2P, DL, {MVT::v2f64, MVT::Other}, |
| 20311 | {Op.getOperand(0), N0}); |
| 20312 | return DAG.getNode(X86ISD::CVTUI2P, DL, MVT::v2f64, N0); |
| 20313 | } |
| 20314 | |
| 20315 | |
| 20316 | |
| 20317 | |
| 20318 | |
| 20319 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v2i64, N0); |
| 20320 | SDValue VBias = |
| 20321 | DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), DL, MVT::v2f64); |
| 20322 | SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v2i64, ZExtIn, |
| 20323 | DAG.getBitcast(MVT::v2i64, VBias)); |
| 20324 | Or = DAG.getBitcast(MVT::v2f64, Or); |
| 20325 | |
| 20326 | if (IsStrict) |
| 20327 | return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v2f64, MVT::Other}, |
| 20328 | {Op.getOperand(0), Or, VBias}); |
| 20329 | return DAG.getNode(ISD::FSUB, DL, MVT::v2f64, Or, VBias); |
| 20330 | } |
| 20331 | |
| 20332 | static SDValue lowerUINT_TO_FP_vXi32(SDValue Op, SelectionDAG &DAG, |
| 20333 | const X86Subtarget &Subtarget) { |
| 20334 | SDLoc DL(Op); |
| 20335 | bool IsStrict = Op->isStrictFPOpcode(); |
| 20336 | SDValue V = Op->getOperand(IsStrict ? 1 : 0); |
| 20337 | MVT VecIntVT = V.getSimpleValueType(); |
| 20338 | assert((VecIntVT == MVT::v4i32 || VecIntVT == MVT::v8i32) && |
| 20339 | "Unsupported custom type"); |
| 20340 | |
| 20341 | if (Subtarget.hasAVX512()) { |
| 20342 | |
| 20343 | assert(!Subtarget.hasVLX() && "Unexpected features"); |
| 20344 | MVT VT = Op->getSimpleValueType(0); |
| 20345 | |
| 20346 | |
| 20347 | if (VT == MVT::v8f64) |
| 20348 | return Op; |
| 20349 | |
| 20350 | assert((VT == MVT::v4f32 || VT == MVT::v8f32 || VT == MVT::v4f64) && |
| 20351 | "Unexpected VT!"); |
| 20352 | MVT WideVT = VT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; |
| 20353 | MVT WideIntVT = VT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; |
| 20354 | |
| 20355 | |
| 20356 | SDValue Tmp = |
| 20357 | IsStrict ? DAG.getConstant(0, DL, WideIntVT) : DAG.getUNDEF(WideIntVT); |
| 20358 | V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideIntVT, Tmp, V, |
| 20359 | DAG.getIntPtrConstant(0, DL)); |
| 20360 | SDValue Res, Chain; |
| 20361 | if (IsStrict) { |
| 20362 | Res = DAG.getNode(ISD::STRICT_UINT_TO_FP, DL, {WideVT, MVT::Other}, |
| 20363 | {Op->getOperand(0), V}); |
| 20364 | Chain = Res.getValue(1); |
| 20365 | } else { |
| 20366 | Res = DAG.getNode(ISD::UINT_TO_FP, DL, WideVT, V); |
| 20367 | } |
| 20368 | |
| 20369 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
| 20370 | DAG.getIntPtrConstant(0, DL)); |
| 20371 | |
| 20372 | if (IsStrict) |
| 20373 | return DAG.getMergeValues({Res, Chain}, DL); |
| 20374 | return Res; |
| 20375 | } |
| 20376 | |
| 20377 | if (Subtarget.hasAVX() && VecIntVT == MVT::v4i32 && |
| 20378 | Op->getSimpleValueType(0) == MVT::v4f64) { |
| 20379 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::v4i64, V); |
| 20380 | Constant *Bias = ConstantFP::get( |
| 20381 | *DAG.getContext(), |
| 20382 | APFloat(APFloat::IEEEdouble(), APInt(64, 0x4330000000000000ULL))); |
| 20383 | auto PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
| 20384 | SDValue CPIdx = DAG.getConstantPool(Bias, PtrVT, Align(8)); |
| 20385 | SDVTList Tys = DAG.getVTList(MVT::v4f64, MVT::Other); |
| 20386 | SDValue Ops[] = {DAG.getEntryNode(), CPIdx}; |
| 20387 | SDValue VBias = DAG.getMemIntrinsicNode( |
| 20388 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::f64, |
| 20389 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), Align(8), |
| 20390 | MachineMemOperand::MOLoad); |
| 20391 | |
| 20392 | SDValue Or = DAG.getNode(ISD::OR, DL, MVT::v4i64, ZExtIn, |
| 20393 | DAG.getBitcast(MVT::v4i64, VBias)); |
| 20394 | Or = DAG.getBitcast(MVT::v4f64, Or); |
| 20395 | |
| 20396 | if (IsStrict) |
| 20397 | return DAG.getNode(ISD::STRICT_FSUB, DL, {MVT::v4f64, MVT::Other}, |
| 20398 | {Op.getOperand(0), Or, VBias}); |
| 20399 | return DAG.getNode(ISD::FSUB, DL, MVT::v4f64, Or, VBias); |
| 20400 | } |
| 20401 | |
| 20402 | |
| 20403 | |
| 20404 | |
| 20405 | |
| 20406 | |
| 20407 | |
| 20408 | |
| 20409 | |
| 20410 | |
| 20411 | |
| 20412 | |
| 20413 | |
| 20414 | bool Is128 = VecIntVT == MVT::v4i32; |
| 20415 | MVT VecFloatVT = Is128 ? MVT::v4f32 : MVT::v8f32; |
| 20416 | |
| 20417 | |
| 20418 | if (VecFloatVT != Op->getSimpleValueType(0)) |
| 20419 | return SDValue(); |
| 20420 | |
| 20421 | |
| 20422 | |
| 20423 | |
| 20424 | |
| 20425 | |
| 20426 | |
| 20427 | |
| 20428 | |
| 20429 | SDValue VecCstLow = DAG.getConstant(0x4b000000, DL, VecIntVT); |
| 20430 | |
| 20431 | SDValue VecCstHigh = DAG.getConstant(0x53000000, DL, VecIntVT); |
| 20432 | |
| 20433 | |
| 20434 | SDValue VecCstShift = DAG.getConstant(16, DL, VecIntVT); |
| 20435 | SDValue HighShift = DAG.getNode(ISD::SRL, DL, VecIntVT, V, VecCstShift); |
| 20436 | |
| 20437 | SDValue Low, High; |
| 20438 | if (Subtarget.hasSSE41()) { |
| 20439 | MVT VecI16VT = Is128 ? MVT::v8i16 : MVT::v16i16; |
| 20440 | |
| 20441 | SDValue VecCstLowBitcast = DAG.getBitcast(VecI16VT, VecCstLow); |
| 20442 | SDValue VecBitcast = DAG.getBitcast(VecI16VT, V); |
| 20443 | |
| 20444 | |
| 20445 | Low = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecBitcast, |
| 20446 | VecCstLowBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); |
| 20447 | |
| 20448 | |
| 20449 | SDValue VecCstHighBitcast = DAG.getBitcast(VecI16VT, VecCstHigh); |
| 20450 | SDValue VecShiftBitcast = DAG.getBitcast(VecI16VT, HighShift); |
| 20451 | |
| 20452 | |
| 20453 | High = DAG.getNode(X86ISD::BLENDI, DL, VecI16VT, VecShiftBitcast, |
| 20454 | VecCstHighBitcast, DAG.getTargetConstant(0xaa, DL, MVT::i8)); |
| 20455 | } else { |
| 20456 | SDValue VecCstMask = DAG.getConstant(0xffff, DL, VecIntVT); |
| 20457 | |
| 20458 | SDValue LowAnd = DAG.getNode(ISD::AND, DL, VecIntVT, V, VecCstMask); |
| 20459 | Low = DAG.getNode(ISD::OR, DL, VecIntVT, LowAnd, VecCstLow); |
| 20460 | |
| 20461 | |
| 20462 | High = DAG.getNode(ISD::OR, DL, VecIntVT, HighShift, VecCstHigh); |
| 20463 | } |
| 20464 | |
| 20465 | |
| 20466 | SDValue VecCstFSub = DAG.getConstantFP( |
| 20467 | APFloat(APFloat::IEEEsingle(), APInt(32, 0x53000080)), DL, VecFloatVT); |
| 20468 | |
| 20469 | |
| 20470 | |
| 20471 | |
| 20472 | |
| 20473 | SDValue HighBitcast = DAG.getBitcast(VecFloatVT, High); |
| 20474 | |
| 20475 | |
| 20476 | SDValue LowBitcast = DAG.getBitcast(VecFloatVT, Low); |
| 20477 | |
| 20478 | if (IsStrict) { |
| 20479 | SDValue FHigh = DAG.getNode(ISD::STRICT_FSUB, DL, {VecFloatVT, MVT::Other}, |
| 20480 | {Op.getOperand(0), HighBitcast, VecCstFSub}); |
| 20481 | return DAG.getNode(ISD::STRICT_FADD, DL, {VecFloatVT, MVT::Other}, |
| 20482 | {FHigh.getValue(1), LowBitcast, FHigh}); |
| 20483 | } |
| 20484 | |
| 20485 | SDValue FHigh = |
| 20486 | DAG.getNode(ISD::FSUB, DL, VecFloatVT, HighBitcast, VecCstFSub); |
| 20487 | return DAG.getNode(ISD::FADD, DL, VecFloatVT, LowBitcast, FHigh); |
| 20488 | } |
| 20489 | |
| 20490 | static SDValue lowerUINT_TO_FP_vec(SDValue Op, SelectionDAG &DAG, |
| 20491 | const X86Subtarget &Subtarget) { |
| 20492 | unsigned OpNo = Op.getNode()->isStrictFPOpcode() ? 1 : 0; |
| 20493 | SDValue N0 = Op.getOperand(OpNo); |
| 20494 | MVT SrcVT = N0.getSimpleValueType(); |
| 20495 | SDLoc dl(Op); |
| 20496 | |
| 20497 | switch (SrcVT.SimpleTy) { |
| 20498 | default: |
| 20499 | llvm_unreachable("Custom UINT_TO_FP is not supported!"); |
| 20500 | case MVT::v2i32: |
| 20501 | return lowerUINT_TO_FP_v2i32(Op, DAG, Subtarget, dl); |
| 20502 | case MVT::v4i32: |
| 20503 | case MVT::v8i32: |
| 20504 | return lowerUINT_TO_FP_vXi32(Op, DAG, Subtarget); |
| 20505 | case MVT::v2i64: |
| 20506 | case MVT::v4i64: |
| 20507 | return lowerINT_TO_FP_vXi64(Op, DAG, Subtarget); |
| 20508 | } |
| 20509 | } |
| 20510 | |
| 20511 | SDValue X86TargetLowering::LowerUINT_TO_FP(SDValue Op, |
| 20512 | SelectionDAG &DAG) const { |
| 20513 | bool IsStrict = Op->isStrictFPOpcode(); |
| 20514 | unsigned OpNo = IsStrict ? 1 : 0; |
| 20515 | SDValue Src = Op.getOperand(OpNo); |
| 20516 | SDLoc dl(Op); |
| 20517 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 20518 | MVT SrcVT = Src.getSimpleValueType(); |
| 20519 | MVT DstVT = Op->getSimpleValueType(0); |
| 20520 | SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); |
| 20521 | |
| 20522 | if (DstVT == MVT::f128) |
| 20523 | return SDValue(); |
| 20524 | |
| 20525 | if (DstVT.isVector()) |
| 20526 | return lowerUINT_TO_FP_vec(Op, DAG, Subtarget); |
| 20527 | |
| 20528 | if (SDValue Extract = vectorizeExtractedCast(Op, DAG, Subtarget)) |
| 20529 | return Extract; |
| 20530 | |
| 20531 | if (Subtarget.hasAVX512() && isScalarFPTypeInSSEReg(DstVT) && |
| 20532 | (SrcVT == MVT::i32 || (SrcVT == MVT::i64 && Subtarget.is64Bit()))) { |
| 20533 | |
| 20534 | |
| 20535 | return Op; |
| 20536 | } |
| 20537 | |
| 20538 | |
| 20539 | if (SrcVT == MVT::i32 && Subtarget.is64Bit()) { |
| 20540 | Src = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Src); |
| 20541 | if (IsStrict) |
| 20542 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {DstVT, MVT::Other}, |
| 20543 | {Chain, Src}); |
| 20544 | return DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Src); |
| 20545 | } |
| 20546 | |
| 20547 | if (SDValue V = LowerI64IntToFP_AVX512DQ(Op, DAG, Subtarget)) |
| 20548 | return V; |
| 20549 | |
| 20550 | |
| 20551 | |
| 20552 | if (SrcVT == MVT::i64 && DstVT == MVT::f64 && X86ScalarSSEf64 && !IsStrict) |
| 20553 | return LowerUINT_TO_FP_i64(Op, DAG, Subtarget); |
| 20554 | if (SrcVT == MVT::i32 && X86ScalarSSEf64 && DstVT != MVT::f80) |
| 20555 | return LowerUINT_TO_FP_i32(Op, DAG, Subtarget); |
| 20556 | if (Subtarget.is64Bit() && SrcVT == MVT::i64 && |
| 20557 | (DstVT == MVT::f32 || DstVT == MVT::f64)) |
| 20558 | return SDValue(); |
| 20559 | |
| 20560 | |
| 20561 | SDValue StackSlot = DAG.CreateStackTemporary(MVT::i64, 8); |
| 20562 | int SSFI = cast<FrameIndexSDNode>(StackSlot)->getIndex(); |
| 20563 | Align SlotAlign(8); |
| 20564 | MachinePointerInfo MPI = |
| 20565 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SSFI); |
| 20566 | if (SrcVT == MVT::i32) { |
| 20567 | SDValue OffsetSlot = |
| 20568 | DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), dl); |
| 20569 | SDValue Store1 = DAG.getStore(Chain, dl, Src, StackSlot, MPI, SlotAlign); |
| 20570 | SDValue Store2 = DAG.getStore(Store1, dl, DAG.getConstant(0, dl, MVT::i32), |
| 20571 | OffsetSlot, MPI.getWithOffset(4), SlotAlign); |
| 20572 | std::pair<SDValue, SDValue> Tmp = |
| 20573 | BuildFILD(DstVT, MVT::i64, dl, Store2, StackSlot, MPI, SlotAlign, DAG); |
| 20574 | if (IsStrict) |
| 20575 | return DAG.getMergeValues({Tmp.first, Tmp.second}, dl); |
| 20576 | |
| 20577 | return Tmp.first; |
| 20578 | } |
| 20579 | |
| 20580 | assert(SrcVT == MVT::i64 && "Unexpected type in UINT_TO_FP"); |
| 20581 | SDValue ValueToStore = Src; |
| 20582 | if (isScalarFPTypeInSSEReg(Op.getValueType()) && !Subtarget.is64Bit()) { |
| 20583 | |
| 20584 | |
| 20585 | |
| 20586 | ValueToStore = DAG.getBitcast(MVT::f64, ValueToStore); |
| 20587 | } |
| 20588 | SDValue Store = |
| 20589 | DAG.getStore(Chain, dl, ValueToStore, StackSlot, MPI, SlotAlign); |
| 20590 | |
| 20591 | |
| 20592 | |
| 20593 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
| 20594 | SDValue Ops[] = { Store, StackSlot }; |
| 20595 | SDValue Fild = |
| 20596 | DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, Ops, MVT::i64, MPI, |
| 20597 | SlotAlign, MachineMemOperand::MOLoad); |
| 20598 | Chain = Fild.getValue(1); |
| 20599 | |
| 20600 | |
| 20601 | |
| 20602 | SDValue SignSet = DAG.getSetCC( |
| 20603 | dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), MVT::i64), |
| 20604 | Op.getOperand(OpNo), DAG.getConstant(0, dl, MVT::i64), ISD::SETLT); |
| 20605 | |
| 20606 | |
| 20607 | APInt FF(64, 0x5F80000000000000ULL); |
| 20608 | SDValue FudgePtr = DAG.getConstantPool( |
| 20609 | ConstantInt::get(*DAG.getContext(), FF), PtrVT); |
| 20610 | Align CPAlignment = cast<ConstantPoolSDNode>(FudgePtr)->getAlign(); |
| 20611 | |
| 20612 | |
| 20613 | SDValue Zero = DAG.getIntPtrConstant(0, dl); |
| 20614 | SDValue Four = DAG.getIntPtrConstant(4, dl); |
| 20615 | SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); |
| 20616 | FudgePtr = DAG.getNode(ISD::ADD, dl, PtrVT, FudgePtr, Offset); |
| 20617 | |
| 20618 | |
| 20619 | SDValue Fudge = DAG.getExtLoad( |
| 20620 | ISD::EXTLOAD, dl, MVT::f80, Chain, FudgePtr, |
| 20621 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), MVT::f32, |
| 20622 | CPAlignment); |
| 20623 | Chain = Fudge.getValue(1); |
| 20624 | |
| 20625 | |
| 20626 | if (IsStrict) { |
| 20627 | SDValue Add = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::f80, MVT::Other}, |
| 20628 | {Chain, Fild, Fudge}); |
| 20629 | |
| 20630 | if (DstVT == MVT::f80) |
| 20631 | return Add; |
| 20632 | return DAG.getNode(ISD::STRICT_FP_ROUND, dl, {DstVT, MVT::Other}, |
| 20633 | {Add.getValue(1), Add, DAG.getIntPtrConstant(0, dl)}); |
| 20634 | } |
| 20635 | SDValue Add = DAG.getNode(ISD::FADD, dl, MVT::f80, Fild, Fudge); |
| 20636 | return DAG.getNode(ISD::FP_ROUND, dl, DstVT, Add, |
| 20637 | DAG.getIntPtrConstant(0, dl)); |
| 20638 | } |
| 20639 | |
| 20640 | |
| 20641 | |
| 20642 | |
| 20643 | |
| 20644 | |
| 20645 | |
| 20646 | SDValue |
| 20647 | X86TargetLowering::FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, |
| 20648 | bool IsSigned, SDValue &Chain) const { |
| 20649 | bool IsStrict = Op->isStrictFPOpcode(); |
| 20650 | SDLoc DL(Op); |
| 20651 | |
| 20652 | EVT DstTy = Op.getValueType(); |
| 20653 | SDValue Value = Op.getOperand(IsStrict ? 1 : 0); |
| 20654 | EVT TheVT = Value.getValueType(); |
| 20655 | auto PtrVT = getPointerTy(DAG.getDataLayout()); |
| 20656 | |
| 20657 | if (TheVT != MVT::f32 && TheVT != MVT::f64 && TheVT != MVT::f80) { |
| 20658 | |
| 20659 | |
| 20660 | return SDValue(); |
| 20661 | } |
| 20662 | |
| 20663 | |
| 20664 | |
| 20665 | |
| 20666 | bool UnsignedFixup = !IsSigned && DstTy == MVT::i64; |
| 20667 | |
| 20668 | |
| 20669 | |
| 20670 | if (!IsSigned && DstTy != MVT::i64) { |
| 20671 | |
| 20672 | |
| 20673 | assert(DstTy == MVT::i32 && "Unexpected FP_TO_UINT"); |
| 20674 | DstTy = MVT::i64; |
| 20675 | } |
| 20676 | |
| 20677 | assert(DstTy.getSimpleVT() <= MVT::i64 && |
| 20678 | DstTy.getSimpleVT() >= MVT::i16 && |
| 20679 | "Unknown FP_TO_INT to lower!"); |
| 20680 | |
| 20681 | |
| 20682 | |
| 20683 | MachineFunction &MF = DAG.getMachineFunction(); |
| 20684 | unsigned MemSize = DstTy.getStoreSize(); |
| 20685 | int SSFI = |
| 20686 | MF.getFrameInfo().CreateStackObject(MemSize, Align(MemSize), false); |
| 20687 | SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); |
| 20688 | |
| 20689 | Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode(); |
| 20690 | |
| 20691 | SDValue Adjust; |
| 20692 | |
| 20693 | if (UnsignedFixup) { |
| 20694 | |
| 20695 | |
| 20696 | |
| 20697 | |
| 20698 | |
| 20699 | |
| 20700 | |
| 20701 | |
| 20702 | |
| 20703 | |
| 20704 | |
| 20705 | |
| 20706 | |
| 20707 | |
| 20708 | |
| 20709 | |
| 20710 | |
| 20711 | APFloat Thresh(APFloat::IEEEsingle(), APInt(32, 0x5f000000)); |
| 20712 | LLVM_ATTRIBUTE_UNUSED APFloat::opStatus Status = APFloat::opOK; |
| 20713 | bool LosesInfo = false; |
| 20714 | if (TheVT == MVT::f64) |
| 20715 | |
| 20716 | Status = Thresh.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, |
| 20717 | &LosesInfo); |
| 20718 | else if (TheVT == MVT::f80) |
| 20719 | Status = Thresh.convert(APFloat::x87DoubleExtended(), |
| 20720 | APFloat::rmNearestTiesToEven, &LosesInfo); |
| 20721 | |
| 20722 | assert(Status == APFloat::opOK && !LosesInfo && |
| 20723 | "FP conversion should have been exact"); |
| 20724 | |
| 20725 | SDValue ThreshVal = DAG.getConstantFP(Thresh, DL, TheVT); |
| 20726 | |
| 20727 | EVT ResVT = getSetCCResultType(DAG.getDataLayout(), |
| 20728 | *DAG.getContext(), TheVT); |
| 20729 | SDValue Cmp; |
| 20730 | if (IsStrict) { |
| 20731 | Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE, Chain, |
| 20732 | true); |
| 20733 | Chain = Cmp.getValue(1); |
| 20734 | } else { |
| 20735 | Cmp = DAG.getSetCC(DL, ResVT, Value, ThreshVal, ISD::SETGE); |
| 20736 | } |
| 20737 | |
| 20738 | |
| 20739 | |
| 20740 | |
| 20741 | |
| 20742 | |
| 20743 | |
| 20744 | |
| 20745 | |
| 20746 | |
| 20747 | |
| 20748 | |
| 20749 | SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Cmp); |
| 20750 | SDValue Const63 = DAG.getConstant(63, DL, MVT::i8); |
| 20751 | Adjust = DAG.getNode(ISD::SHL, DL, MVT::i64, Zext, Const63); |
| 20752 | |
| 20753 | SDValue FltOfs = DAG.getSelect(DL, TheVT, Cmp, ThreshVal, |
| 20754 | DAG.getConstantFP(0.0, DL, TheVT)); |
| 20755 | |
| 20756 | if (IsStrict) { |
| 20757 | Value = DAG.getNode(ISD::STRICT_FSUB, DL, { TheVT, MVT::Other}, |
| 20758 | { Chain, Value, FltOfs }); |
| 20759 | Chain = Value.getValue(1); |
| 20760 | } else |
| 20761 | Value = DAG.getNode(ISD::FSUB, DL, TheVT, Value, FltOfs); |
| 20762 | } |
| 20763 | |
| 20764 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); |
| 20765 | |
| 20766 | |
| 20767 | |
| 20768 | if (isScalarFPTypeInSSEReg(TheVT)) { |
| 20769 | assert(DstTy == MVT::i64 && "Invalid FP_TO_SINT to lower!"); |
| 20770 | Chain = DAG.getStore(Chain, DL, Value, StackSlot, MPI); |
| 20771 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
| 20772 | SDValue Ops[] = { Chain, StackSlot }; |
| 20773 | |
| 20774 | unsigned FLDSize = TheVT.getStoreSize(); |
| 20775 | assert(FLDSize <= MemSize && "Stack slot not big enough"); |
| 20776 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
| 20777 | MPI, MachineMemOperand::MOLoad, FLDSize, Align(FLDSize)); |
| 20778 | Value = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, TheVT, MMO); |
| 20779 | Chain = Value.getValue(1); |
| 20780 | } |
| 20781 | |
| 20782 | |
| 20783 | MachineMemOperand *MMO = MF.getMachineMemOperand( |
| 20784 | MPI, MachineMemOperand::MOStore, MemSize, Align(MemSize)); |
| 20785 | SDValue Ops[] = { Chain, Value, StackSlot }; |
| 20786 | SDValue FIST = DAG.getMemIntrinsicNode(X86ISD::FP_TO_INT_IN_MEM, DL, |
| 20787 | DAG.getVTList(MVT::Other), |
| 20788 | Ops, DstTy, MMO); |
| 20789 | |
| 20790 | SDValue Res = DAG.getLoad(Op.getValueType(), SDLoc(Op), FIST, StackSlot, MPI); |
| 20791 | Chain = Res.getValue(1); |
| 20792 | |
| 20793 | |
| 20794 | if (UnsignedFixup) |
| 20795 | Res = DAG.getNode(ISD::XOR, DL, MVT::i64, Res, Adjust); |
| 20796 | |
| 20797 | return Res; |
| 20798 | } |
| 20799 | |
| 20800 | static SDValue LowerAVXExtend(SDValue Op, SelectionDAG &DAG, |
| 20801 | const X86Subtarget &Subtarget) { |
| 20802 | MVT VT = Op.getSimpleValueType(); |
| 20803 | SDValue In = Op.getOperand(0); |
| 20804 | MVT InVT = In.getSimpleValueType(); |
| 20805 | SDLoc dl(Op); |
| 20806 | unsigned Opc = Op.getOpcode(); |
| 20807 | |
| 20808 | assert(VT.isVector() && InVT.isVector() && "Expected vector type"); |
| 20809 | assert((Opc == ISD::ANY_EXTEND || Opc == ISD::ZERO_EXTEND) && |
| 20810 | "Unexpected extension opcode"); |
| 20811 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
| 20812 | "Expected same number of elements"); |
| 20813 | assert((VT.getVectorElementType() == MVT::i16 || |
| 20814 | VT.getVectorElementType() == MVT::i32 || |
| 20815 | VT.getVectorElementType() == MVT::i64) && |
| 20816 | "Unexpected element type"); |
| 20817 | assert((InVT.getVectorElementType() == MVT::i8 || |
| 20818 | InVT.getVectorElementType() == MVT::i16 || |
| 20819 | InVT.getVectorElementType() == MVT::i32) && |
| 20820 | "Unexpected element type"); |
| 20821 | |
| 20822 | unsigned ExtendInVecOpc = getOpcode_EXTEND_VECTOR_INREG(Opc); |
| 20823 | |
| 20824 | if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { |
| 20825 | assert(InVT == MVT::v32i8 && "Unexpected VT!"); |
| 20826 | return splitVectorIntUnary(Op, DAG); |
| 20827 | } |
| 20828 | |
| 20829 | if (Subtarget.hasInt256()) |
| 20830 | return Op; |
| 20831 | |
| 20832 | |
| 20833 | |
| 20834 | |
| 20835 | |
| 20836 | |
| 20837 | |
| 20838 | |
| 20839 | |
| 20840 | |
| 20841 | |
| 20842 | |
| 20843 | |
| 20844 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
| 20845 | SDValue OpLo = DAG.getNode(ExtendInVecOpc, dl, HalfVT, In); |
| 20846 | |
| 20847 | |
| 20848 | |
| 20849 | if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(In)) |
| 20850 | if (hasIdenticalHalvesShuffleMask(Shuf->getMask())) |
| 20851 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpLo); |
| 20852 | |
| 20853 | SDValue ZeroVec = DAG.getConstant(0, dl, InVT); |
| 20854 | SDValue Undef = DAG.getUNDEF(InVT); |
| 20855 | bool NeedZero = Opc == ISD::ZERO_EXTEND; |
| 20856 | SDValue OpHi = getUnpackh(DAG, dl, InVT, In, NeedZero ? ZeroVec : Undef); |
| 20857 | OpHi = DAG.getBitcast(HalfVT, OpHi); |
| 20858 | |
| 20859 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); |
| 20860 | } |
| 20861 | |
| 20862 | |
| 20863 | static SDValue SplitAndExtendv16i1(unsigned ExtOpc, MVT VT, SDValue In, |
| 20864 | const SDLoc &dl, SelectionDAG &DAG) { |
| 20865 | assert((VT == MVT::v16i8 || VT == MVT::v16i16) && "Unexpected VT."); |
| 20866 | SDValue Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, |
| 20867 | DAG.getIntPtrConstant(0, dl)); |
| 20868 | SDValue Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v8i1, In, |
| 20869 | DAG.getIntPtrConstant(8, dl)); |
| 20870 | Lo = DAG.getNode(ExtOpc, dl, MVT::v8i16, Lo); |
| 20871 | Hi = DAG.getNode(ExtOpc, dl, MVT::v8i16, Hi); |
| 20872 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i16, Lo, Hi); |
| 20873 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 20874 | } |
| 20875 | |
| 20876 | static SDValue LowerZERO_EXTEND_Mask(SDValue Op, |
| 20877 | const X86Subtarget &Subtarget, |
| 20878 | SelectionDAG &DAG) { |
| 20879 | MVT VT = Op->getSimpleValueType(0); |
| 20880 | SDValue In = Op->getOperand(0); |
| 20881 | MVT InVT = In.getSimpleValueType(); |
| 20882 | assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); |
| 20883 | SDLoc DL(Op); |
| 20884 | unsigned NumElts = VT.getVectorNumElements(); |
| 20885 | |
| 20886 | |
| 20887 | |
| 20888 | if (VT.getVectorElementType() != MVT::i8) { |
| 20889 | SDValue Extend = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, In); |
| 20890 | return DAG.getNode(ISD::SRL, DL, VT, Extend, |
| 20891 | DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, VT)); |
| 20892 | } |
| 20893 | |
| 20894 | |
| 20895 | MVT ExtVT = VT; |
| 20896 | if (!Subtarget.hasBWI()) { |
| 20897 | |
| 20898 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) |
| 20899 | return SplitAndExtendv16i1(ISD::ZERO_EXTEND, VT, In, DL, DAG); |
| 20900 | |
| 20901 | ExtVT = MVT::getVectorVT(MVT::i32, NumElts); |
| 20902 | } |
| 20903 | |
| 20904 | |
| 20905 | MVT WideVT = ExtVT; |
| 20906 | if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { |
| 20907 | NumElts *= 512 / ExtVT.getSizeInBits(); |
| 20908 | InVT = MVT::getVectorVT(MVT::i1, NumElts); |
| 20909 | In = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InVT, DAG.getUNDEF(InVT), |
| 20910 | In, DAG.getIntPtrConstant(0, DL)); |
| 20911 | WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), |
| 20912 | NumElts); |
| 20913 | } |
| 20914 | |
| 20915 | SDValue One = DAG.getConstant(1, DL, WideVT); |
| 20916 | SDValue Zero = DAG.getConstant(0, DL, WideVT); |
| 20917 | |
| 20918 | SDValue SelectedVal = DAG.getSelect(DL, WideVT, In, One, Zero); |
| 20919 | |
| 20920 | |
| 20921 | if (VT != ExtVT) { |
| 20922 | WideVT = MVT::getVectorVT(MVT::i8, NumElts); |
| 20923 | SelectedVal = DAG.getNode(ISD::TRUNCATE, DL, WideVT, SelectedVal); |
| 20924 | } |
| 20925 | |
| 20926 | |
| 20927 | if (WideVT != VT) |
| 20928 | SelectedVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, SelectedVal, |
| 20929 | DAG.getIntPtrConstant(0, DL)); |
| 20930 | |
| 20931 | return SelectedVal; |
| 20932 | } |
| 20933 | |
| 20934 | static SDValue LowerZERO_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
| 20935 | SelectionDAG &DAG) { |
| 20936 | SDValue In = Op.getOperand(0); |
| 20937 | MVT SVT = In.getSimpleValueType(); |
| 20938 | |
| 20939 | if (SVT.getVectorElementType() == MVT::i1) |
| 20940 | return LowerZERO_EXTEND_Mask(Op, Subtarget, DAG); |
| 20941 | |
| 20942 | assert(Subtarget.hasAVX() && "Expected AVX support"); |
| 20943 | return LowerAVXExtend(Op, DAG, Subtarget); |
| 20944 | } |
| 20945 | |
| 20946 | |
| 20947 | |
| 20948 | |
| 20949 | |
| 20950 | |
| 20951 | static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, |
| 20952 | const SDLoc &DL, SelectionDAG &DAG, |
| 20953 | const X86Subtarget &Subtarget) { |
| 20954 | assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) && |
| 20955 | "Unexpected PACK opcode"); |
| 20956 | assert(DstVT.isVector() && "VT not a vector?"); |
| 20957 | |
| 20958 | |
| 20959 | if (!Subtarget.hasSSE2()) |
| 20960 | return SDValue(); |
| 20961 | |
| 20962 | EVT SrcVT = In.getValueType(); |
| 20963 | |
| 20964 | |
| 20965 | if (SrcVT == DstVT) |
| 20966 | return In; |
| 20967 | |
| 20968 | |
| 20969 | |
| 20970 | unsigned DstSizeInBits = DstVT.getSizeInBits(); |
| 20971 | unsigned SrcSizeInBits = SrcVT.getSizeInBits(); |
| 20972 | if ((DstSizeInBits % 64) != 0 || (SrcSizeInBits % 128) != 0) |
| 20973 | return SDValue(); |
| 20974 | |
| 20975 | unsigned NumElems = SrcVT.getVectorNumElements(); |
| 20976 | if (!isPowerOf2_32(NumElems)) |
| 20977 | return SDValue(); |
| 20978 | |
| 20979 | LLVMContext &Ctx = *DAG.getContext(); |
| 20980 | assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation"); |
| 20981 | assert(SrcSizeInBits > DstSizeInBits && "Illegal truncation"); |
| 20982 | |
| 20983 | EVT PackedSVT = EVT::getIntegerVT(Ctx, SrcVT.getScalarSizeInBits() / 2); |
| 20984 | |
| 20985 | |
| 20986 | |
| 20987 | EVT InVT = MVT::i16, OutVT = MVT::i8; |
| 20988 | if (SrcVT.getScalarSizeInBits() > 16 && |
| 20989 | (Opcode == X86ISD::PACKSS || Subtarget.hasSSE41())) { |
| 20990 | InVT = MVT::i32; |
| 20991 | OutVT = MVT::i16; |
| 20992 | } |
| 20993 | |
| 20994 | |
| 20995 | if (SrcVT.is128BitVector()) { |
| 20996 | InVT = EVT::getVectorVT(Ctx, InVT, 128 / InVT.getSizeInBits()); |
| 20997 | OutVT = EVT::getVectorVT(Ctx, OutVT, 128 / OutVT.getSizeInBits()); |
| 20998 | In = DAG.getBitcast(InVT, In); |
| 20999 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, In, DAG.getUNDEF(InVT)); |
| 21000 | Res = extractSubVector(Res, 0, DAG, DL, 64); |
| 21001 | return DAG.getBitcast(DstVT, Res); |
| 21002 | } |
| 21003 | |
| 21004 | |
| 21005 | SDValue Lo, Hi; |
| 21006 | std::tie(Lo, Hi) = splitVector(In, DAG, DL); |
| 21007 | |
| 21008 | unsigned SubSizeInBits = SrcSizeInBits / 2; |
| 21009 | InVT = EVT::getVectorVT(Ctx, InVT, SubSizeInBits / InVT.getSizeInBits()); |
| 21010 | OutVT = EVT::getVectorVT(Ctx, OutVT, SubSizeInBits / OutVT.getSizeInBits()); |
| 21011 | |
| 21012 | |
| 21013 | if (SrcVT.is256BitVector() && DstVT.is128BitVector()) { |
| 21014 | Lo = DAG.getBitcast(InVT, Lo); |
| 21015 | Hi = DAG.getBitcast(InVT, Hi); |
| 21016 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); |
| 21017 | return DAG.getBitcast(DstVT, Res); |
| 21018 | } |
| 21019 | |
| 21020 | |
| 21021 | |
| 21022 | if (SrcVT.is512BitVector() && Subtarget.hasInt256()) { |
| 21023 | Lo = DAG.getBitcast(InVT, Lo); |
| 21024 | Hi = DAG.getBitcast(InVT, Hi); |
| 21025 | SDValue Res = DAG.getNode(Opcode, DL, OutVT, Lo, Hi); |
| 21026 | |
| 21027 | |
| 21028 | |
| 21029 | |
| 21030 | SmallVector<int, 64> Mask; |
| 21031 | int Scale = 64 / OutVT.getScalarSizeInBits(); |
| 21032 | narrowShuffleMaskElts(Scale, { 0, 2, 1, 3 }, Mask); |
| 21033 | Res = DAG.getVectorShuffle(OutVT, DL, Res, Res, Mask); |
| 21034 | |
| 21035 | if (DstVT.is256BitVector()) |
| 21036 | return DAG.getBitcast(DstVT, Res); |
| 21037 | |
| 21038 | |
| 21039 | EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); |
| 21040 | Res = DAG.getBitcast(PackedVT, Res); |
| 21041 | return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); |
| 21042 | } |
| 21043 | |
| 21044 | |
| 21045 | assert(SrcSizeInBits >= 256 && "Expected 256-bit vector or greater"); |
| 21046 | EVT PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems / 2); |
| 21047 | Lo = truncateVectorWithPACK(Opcode, PackedVT, Lo, DL, DAG, Subtarget); |
| 21048 | Hi = truncateVectorWithPACK(Opcode, PackedVT, Hi, DL, DAG, Subtarget); |
| 21049 | |
| 21050 | PackedVT = EVT::getVectorVT(Ctx, PackedSVT, NumElems); |
| 21051 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, PackedVT, Lo, Hi); |
| 21052 | return truncateVectorWithPACK(Opcode, DstVT, Res, DL, DAG, Subtarget); |
| 21053 | } |
| 21054 | |
| 21055 | static SDValue LowerTruncateVecI1(SDValue Op, SelectionDAG &DAG, |
| 21056 | const X86Subtarget &Subtarget) { |
| 21057 | |
| 21058 | SDLoc DL(Op); |
| 21059 | MVT VT = Op.getSimpleValueType(); |
| 21060 | SDValue In = Op.getOperand(0); |
| 21061 | MVT InVT = In.getSimpleValueType(); |
| 21062 | |
| 21063 | assert(VT.getVectorElementType() == MVT::i1 && "Unexpected vector type."); |
| 21064 | |
| 21065 | |
| 21066 | unsigned ShiftInx = InVT.getScalarSizeInBits() - 1; |
| 21067 | if (InVT.getScalarSizeInBits() <= 16) { |
| 21068 | if (Subtarget.hasBWI()) { |
| 21069 | |
| 21070 | if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { |
| 21071 | |
| 21072 | |
| 21073 | MVT ExtVT = MVT::getVectorVT(MVT::i16, InVT.getSizeInBits()/16); |
| 21074 | In = DAG.getNode(ISD::SHL, DL, ExtVT, |
| 21075 | DAG.getBitcast(ExtVT, In), |
| 21076 | DAG.getConstant(ShiftInx, DL, ExtVT)); |
| 21077 | In = DAG.getBitcast(InVT, In); |
| 21078 | } |
| 21079 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), |
| 21080 | In, ISD::SETGT); |
| 21081 | } |
| 21082 | |
| 21083 | assert((InVT.is256BitVector() || InVT.is128BitVector()) && |
| 21084 | "Unexpected vector type."); |
| 21085 | unsigned NumElts = InVT.getVectorNumElements(); |
| 21086 | assert((NumElts == 8 || NumElts == 16) && "Unexpected number of elements"); |
| 21087 | |
| 21088 | |
| 21089 | |
| 21090 | |
| 21091 | |
| 21092 | |
| 21093 | |
| 21094 | |
| 21095 | |
| 21096 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) { |
| 21097 | SDValue Lo, Hi; |
| 21098 | if (InVT == MVT::v16i8) { |
| 21099 | Lo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, In); |
| 21100 | Hi = DAG.getVectorShuffle( |
| 21101 | InVT, DL, In, In, |
| 21102 | {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); |
| 21103 | Hi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, DL, MVT::v8i32, Hi); |
| 21104 | } else { |
| 21105 | assert(InVT == MVT::v16i16 && "Unexpected VT!"); |
| 21106 | Lo = extract128BitVector(In, 0, DAG, DL); |
| 21107 | Hi = extract128BitVector(In, 8, DAG, DL); |
| 21108 | } |
| 21109 | |
| 21110 | |
| 21111 | Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Lo); |
| 21112 | Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::v8i1, Hi); |
| 21113 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
| 21114 | } |
| 21115 | |
| 21116 | |
| 21117 | |
| 21118 | MVT EltVT = Subtarget.hasVLX() ? MVT::i32 : MVT::getIntegerVT(512/NumElts); |
| 21119 | MVT ExtVT = MVT::getVectorVT(EltVT, NumElts); |
| 21120 | In = DAG.getNode(ISD::SIGN_EXTEND, DL, ExtVT, In); |
| 21121 | InVT = ExtVT; |
| 21122 | ShiftInx = InVT.getScalarSizeInBits() - 1; |
| 21123 | } |
| 21124 | |
| 21125 | if (DAG.ComputeNumSignBits(In) < InVT.getScalarSizeInBits()) { |
| 21126 | |
| 21127 | In = DAG.getNode(ISD::SHL, DL, InVT, In, |
| 21128 | DAG.getConstant(ShiftInx, DL, InVT)); |
| 21129 | } |
| 21130 | |
| 21131 | if (Subtarget.hasDQI()) |
| 21132 | return DAG.getSetCC(DL, VT, DAG.getConstant(0, DL, InVT), In, ISD::SETGT); |
| 21133 | return DAG.getSetCC(DL, VT, In, DAG.getConstant(0, DL, InVT), ISD::SETNE); |
| 21134 | } |
| 21135 | |
| 21136 | SDValue X86TargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const { |
| 21137 | SDLoc DL(Op); |
| 21138 | MVT VT = Op.getSimpleValueType(); |
| 21139 | SDValue In = Op.getOperand(0); |
| 21140 | MVT InVT = In.getSimpleValueType(); |
| 21141 | unsigned InNumEltBits = InVT.getScalarSizeInBits(); |
| 21142 | |
| 21143 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
| 21144 | "Invalid TRUNCATE operation"); |
| 21145 | |
| 21146 | |
| 21147 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 21148 | if (!TLI.isTypeLegal(InVT)) { |
| 21149 | if ((InVT == MVT::v8i64 || InVT == MVT::v16i32 || InVT == MVT::v16i64) && |
| 21150 | VT.is128BitVector()) { |
| 21151 | assert((InVT == MVT::v16i64 || Subtarget.hasVLX()) && |
| 21152 | "Unexpected subtarget!"); |
| 21153 | |
| 21154 | |
| 21155 | |
| 21156 | SDValue Lo, Hi; |
| 21157 | std::tie(Lo, Hi) = DAG.SplitVector(In, DL); |
| 21158 | |
| 21159 | EVT LoVT, HiVT; |
| 21160 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 21161 | |
| 21162 | Lo = DAG.getNode(ISD::TRUNCATE, DL, LoVT, Lo); |
| 21163 | Hi = DAG.getNode(ISD::TRUNCATE, DL, HiVT, Hi); |
| 21164 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); |
| 21165 | } |
| 21166 | |
| 21167 | |
| 21168 | return SDValue(); |
| 21169 | } |
| 21170 | |
| 21171 | if (VT.getVectorElementType() == MVT::i1) |
| 21172 | return LowerTruncateVecI1(Op, DAG, Subtarget); |
| 21173 | |
| 21174 | |
| 21175 | if (Subtarget.hasAVX512()) { |
| 21176 | if (InVT == MVT::v32i16 && !Subtarget.hasBWI()) { |
| 21177 | assert(VT == MVT::v32i8 && "Unexpected VT!"); |
| 21178 | return splitVectorIntUnary(Op, DAG); |
| 21179 | } |
| 21180 | |
| 21181 | |
| 21182 | |
| 21183 | |
| 21184 | |
| 21185 | if (InVT != MVT::v16i16 || Subtarget.hasBWI() || |
| 21186 | Subtarget.canExtendTo512DQ()) |
| 21187 | return Op; |
| 21188 | } |
| 21189 | |
| 21190 | unsigned NumPackedSignBits = std::min<unsigned>(VT.getScalarSizeInBits(), 16); |
| 21191 | unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; |
| 21192 | |
| 21193 | |
| 21194 | |
| 21195 | |
| 21196 | KnownBits Known = DAG.computeKnownBits(In); |
| 21197 | if ((InNumEltBits - NumPackedZeroBits) <= Known.countMinLeadingZeros()) |
| 21198 | if (SDValue V = |
| 21199 | truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget)) |
| 21200 | return V; |
| 21201 | |
| 21202 | |
| 21203 | |
| 21204 | if ((InNumEltBits - NumPackedSignBits) < DAG.ComputeNumSignBits(In)) |
| 21205 | if (SDValue V = |
| 21206 | truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget)) |
| 21207 | return V; |
| 21208 | |
| 21209 | |
| 21210 | assert(VT.is128BitVector() && InVT.is256BitVector() && "Unexpected types!"); |
| 21211 | |
| 21212 | if ((VT == MVT::v4i32) && (InVT == MVT::v4i64)) { |
| 21213 | In = DAG.getBitcast(MVT::v8i32, In); |
| 21214 | |
| 21215 | |
| 21216 | if (Subtarget.hasInt256()) { |
| 21217 | static const int ShufMask[] = {0, 2, 4, 6, -1, -1, -1, -1}; |
| 21218 | In = DAG.getVectorShuffle(MVT::v8i32, DL, In, In, ShufMask); |
| 21219 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, In, |
| 21220 | DAG.getIntPtrConstant(0, DL)); |
| 21221 | } |
| 21222 | |
| 21223 | SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, |
| 21224 | DAG.getIntPtrConstant(0, DL)); |
| 21225 | SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v4i32, In, |
| 21226 | DAG.getIntPtrConstant(4, DL)); |
| 21227 | static const int ShufMask[] = {0, 2, 4, 6}; |
| 21228 | return DAG.getVectorShuffle(VT, DL, OpLo, OpHi, ShufMask); |
| 21229 | } |
| 21230 | |
| 21231 | if ((VT == MVT::v8i16) && (InVT == MVT::v8i32)) { |
| 21232 | In = DAG.getBitcast(MVT::v32i8, In); |
| 21233 | |
| 21234 | |
| 21235 | if (Subtarget.hasInt256()) { |
| 21236 | |
| 21237 | static const int ShufMask1[] = { 0, 1, 4, 5, 8, 9, 12, 13, |
| 21238 | -1, -1, -1, -1, -1, -1, -1, -1, |
| 21239 | 16, 17, 20, 21, 24, 25, 28, 29, |
| 21240 | -1, -1, -1, -1, -1, -1, -1, -1 }; |
| 21241 | In = DAG.getVectorShuffle(MVT::v32i8, DL, In, In, ShufMask1); |
| 21242 | In = DAG.getBitcast(MVT::v4i64, In); |
| 21243 | |
| 21244 | static const int ShufMask2[] = {0, 2, -1, -1}; |
| 21245 | In = DAG.getVectorShuffle(MVT::v4i64, DL, In, In, ShufMask2); |
| 21246 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, |
| 21247 | DAG.getBitcast(MVT::v16i16, In), |
| 21248 | DAG.getIntPtrConstant(0, DL)); |
| 21249 | } |
| 21250 | |
| 21251 | SDValue OpLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, |
| 21252 | DAG.getIntPtrConstant(0, DL)); |
| 21253 | SDValue OpHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v16i8, In, |
| 21254 | DAG.getIntPtrConstant(16, DL)); |
| 21255 | |
| 21256 | |
| 21257 | static const int ShufMask1[] = {0, 1, 4, 5, 8, 9, 12, 13, |
| 21258 | -1, -1, -1, -1, -1, -1, -1, -1}; |
| 21259 | |
| 21260 | OpLo = DAG.getVectorShuffle(MVT::v16i8, DL, OpLo, OpLo, ShufMask1); |
| 21261 | OpHi = DAG.getVectorShuffle(MVT::v16i8, DL, OpHi, OpHi, ShufMask1); |
| 21262 | |
| 21263 | OpLo = DAG.getBitcast(MVT::v4i32, OpLo); |
| 21264 | OpHi = DAG.getBitcast(MVT::v4i32, OpHi); |
| 21265 | |
| 21266 | |
| 21267 | static const int ShufMask2[] = {0, 1, 4, 5}; |
| 21268 | SDValue res = DAG.getVectorShuffle(MVT::v4i32, DL, OpLo, OpHi, ShufMask2); |
| 21269 | return DAG.getBitcast(MVT::v8i16, res); |
| 21270 | } |
| 21271 | |
| 21272 | if (VT == MVT::v16i8 && InVT == MVT::v16i16) { |
| 21273 | |
| 21274 | In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(255, DL, InVT)); |
| 21275 | |
| 21276 | SDValue InLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, |
| 21277 | DAG.getIntPtrConstant(0, DL)); |
| 21278 | SDValue InHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, MVT::v8i16, In, |
| 21279 | DAG.getIntPtrConstant(8, DL)); |
| 21280 | return DAG.getNode(X86ISD::PACKUS, DL, VT, InLo, InHi); |
| 21281 | } |
| 21282 | |
| 21283 | llvm_unreachable("All 256->128 cases should have been handled above!"); |
| 21284 | } |
| 21285 | |
| 21286 | |
| 21287 | |
| 21288 | static SDValue expandFP_TO_UINT_SSE(MVT VT, SDValue Src, const SDLoc &dl, |
| 21289 | SelectionDAG &DAG, |
| 21290 | const X86Subtarget &Subtarget) { |
| 21291 | MVT SrcVT = Src.getSimpleValueType(); |
| 21292 | unsigned DstBits = VT.getScalarSizeInBits(); |
| 21293 | assert(DstBits == 32 && "expandFP_TO_UINT_SSE - only vXi32 supported"); |
| 21294 | |
| 21295 | |
| 21296 | |
| 21297 | SDValue Small = DAG.getNode(X86ISD::CVTTP2SI, dl, VT, Src); |
| 21298 | SDValue Big = |
| 21299 | DAG.getNode(X86ISD::CVTTP2SI, dl, VT, |
| 21300 | DAG.getNode(ISD::FSUB, dl, SrcVT, Src, |
| 21301 | DAG.getConstantFP(2147483648.0f, dl, SrcVT))); |
| 21302 | |
| 21303 | |
| 21304 | |
| 21305 | |
| 21306 | |
| 21307 | |
| 21308 | |
| 21309 | |
| 21310 | |
| 21311 | |
| 21312 | if (VT == MVT::v8i32 && !Subtarget.hasAVX2()) { |
| 21313 | SDValue Overflow = DAG.getNode(ISD::OR, dl, VT, Small, Big); |
| 21314 | return DAG.getNode(X86ISD::BLENDV, dl, VT, Small, Overflow, Small); |
| 21315 | } |
| 21316 | |
| 21317 | SDValue IsOverflown = |
| 21318 | DAG.getNode(X86ISD::VSRAI, dl, VT, Small, |
| 21319 | DAG.getTargetConstant(DstBits - 1, dl, MVT::i8)); |
| 21320 | return DAG.getNode(ISD::OR, dl, VT, Small, |
| 21321 | DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); |
| 21322 | } |
| 21323 | |
| 21324 | SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { |
| 21325 | bool IsStrict = Op->isStrictFPOpcode(); |
| 21326 | bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT || |
| 21327 | Op.getOpcode() == ISD::STRICT_FP_TO_SINT; |
| 21328 | MVT VT = Op->getSimpleValueType(0); |
| 21329 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
| 21330 | MVT SrcVT = Src.getSimpleValueType(); |
| 21331 | SDLoc dl(Op); |
| 21332 | |
| 21333 | if (VT.isVector()) { |
| 21334 | if (VT == MVT::v2i1 && SrcVT == MVT::v2f64) { |
| 21335 | MVT ResVT = MVT::v4i32; |
| 21336 | MVT TruncVT = MVT::v4i1; |
| 21337 | unsigned Opc; |
| 21338 | if (IsStrict) |
| 21339 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
| 21340 | else |
| 21341 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
| 21342 | |
| 21343 | if (!IsSigned && !Subtarget.hasVLX()) { |
| 21344 | assert(Subtarget.useAVX512Regs() && "Unexpected features!"); |
| 21345 | |
| 21346 | ResVT = MVT::v8i32; |
| 21347 | TruncVT = MVT::v8i1; |
| 21348 | Opc = Op.getOpcode(); |
| 21349 | |
| 21350 | |
| 21351 | |
| 21352 | SDValue Tmp = IsStrict ? DAG.getConstantFP(0.0, dl, MVT::v8f64) |
| 21353 | : DAG.getUNDEF(MVT::v8f64); |
| 21354 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8f64, Tmp, Src, |
| 21355 | DAG.getIntPtrConstant(0, dl)); |
| 21356 | } |
| 21357 | SDValue Res, Chain; |
| 21358 | if (IsStrict) { |
| 21359 | Res = |
| 21360 | DAG.getNode(Opc, dl, {ResVT, MVT::Other}, {Op->getOperand(0), Src}); |
| 21361 | Chain = Res.getValue(1); |
| 21362 | } else { |
| 21363 | Res = DAG.getNode(Opc, dl, ResVT, Src); |
| 21364 | } |
| 21365 | |
| 21366 | Res = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Res); |
| 21367 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i1, Res, |
| 21368 | DAG.getIntPtrConstant(0, dl)); |
| 21369 | if (IsStrict) |
| 21370 | return DAG.getMergeValues({Res, Chain}, dl); |
| 21371 | return Res; |
| 21372 | } |
| 21373 | |
| 21374 | |
| 21375 | if (VT == MVT::v8i32 && SrcVT == MVT::v8f64) { |
| 21376 | assert(!IsSigned && "Expected unsigned conversion!"); |
| 21377 | assert(Subtarget.useAVX512Regs() && "Requires avx512f"); |
| 21378 | return Op; |
| 21379 | } |
| 21380 | |
| 21381 | |
| 21382 | if ((VT == MVT::v4i32 || VT == MVT::v8i32) && |
| 21383 | (SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v8f32) && |
| 21384 | Subtarget.useAVX512Regs()) { |
| 21385 | assert(!IsSigned && "Expected unsigned conversion!"); |
| 21386 | assert(!Subtarget.hasVLX() && "Unexpected features!"); |
| 21387 | MVT WideVT = SrcVT == MVT::v4f64 ? MVT::v8f64 : MVT::v16f32; |
| 21388 | MVT ResVT = SrcVT == MVT::v4f64 ? MVT::v8i32 : MVT::v16i32; |
| 21389 | |
| 21390 | |
| 21391 | |
| 21392 | SDValue Tmp = |
| 21393 | IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); |
| 21394 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, |
| 21395 | DAG.getIntPtrConstant(0, dl)); |
| 21396 | |
| 21397 | SDValue Res, Chain; |
| 21398 | if (IsStrict) { |
| 21399 | Res = DAG.getNode(ISD::STRICT_FP_TO_UINT, dl, {ResVT, MVT::Other}, |
| 21400 | {Op->getOperand(0), Src}); |
| 21401 | Chain = Res.getValue(1); |
| 21402 | } else { |
| 21403 | Res = DAG.getNode(ISD::FP_TO_UINT, dl, ResVT, Src); |
| 21404 | } |
| 21405 | |
| 21406 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
| 21407 | DAG.getIntPtrConstant(0, dl)); |
| 21408 | |
| 21409 | if (IsStrict) |
| 21410 | return DAG.getMergeValues({Res, Chain}, dl); |
| 21411 | return Res; |
| 21412 | } |
| 21413 | |
| 21414 | |
| 21415 | if ((VT == MVT::v2i64 || VT == MVT::v4i64) && |
| 21416 | (SrcVT == MVT::v2f64 || SrcVT == MVT::v4f64 || SrcVT == MVT::v4f32) && |
| 21417 | Subtarget.useAVX512Regs() && Subtarget.hasDQI()) { |
| 21418 | assert(!Subtarget.hasVLX() && "Unexpected features!"); |
| 21419 | MVT WideVT = SrcVT == MVT::v4f32 ? MVT::v8f32 : MVT::v8f64; |
| 21420 | |
| 21421 | |
| 21422 | |
| 21423 | SDValue Tmp = |
| 21424 | IsStrict ? DAG.getConstantFP(0.0, dl, WideVT) : DAG.getUNDEF(WideVT); |
| 21425 | Src = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, WideVT, Tmp, Src, |
| 21426 | DAG.getIntPtrConstant(0, dl)); |
| 21427 | |
| 21428 | SDValue Res, Chain; |
| 21429 | if (IsStrict) { |
| 21430 | Res = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, |
| 21431 | {Op->getOperand(0), Src}); |
| 21432 | Chain = Res.getValue(1); |
| 21433 | } else { |
| 21434 | Res = DAG.getNode(Op.getOpcode(), dl, MVT::v8i64, Src); |
| 21435 | } |
| 21436 | |
| 21437 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, Res, |
| 21438 | DAG.getIntPtrConstant(0, dl)); |
| 21439 | |
| 21440 | if (IsStrict) |
| 21441 | return DAG.getMergeValues({Res, Chain}, dl); |
| 21442 | return Res; |
| 21443 | } |
| 21444 | |
| 21445 | if (VT == MVT::v2i64 && SrcVT == MVT::v2f32) { |
| 21446 | if (!Subtarget.hasVLX()) { |
| 21447 | |
| 21448 | |
| 21449 | if (!IsStrict) |
| 21450 | return SDValue(); |
| 21451 | |
| 21452 | SDValue Zero = DAG.getConstantFP(0.0, dl, MVT::v2f32); |
| 21453 | SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8f32, |
| 21454 | {Src, Zero, Zero, Zero}); |
| 21455 | Tmp = DAG.getNode(Op.getOpcode(), dl, {MVT::v8i64, MVT::Other}, |
| 21456 | {Op->getOperand(0), Tmp}); |
| 21457 | SDValue Chain = Tmp.getValue(1); |
| 21458 | Tmp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i64, Tmp, |
| 21459 | DAG.getIntPtrConstant(0, dl)); |
| 21460 | return DAG.getMergeValues({Tmp, Chain}, dl); |
| 21461 | } |
| 21462 | |
| 21463 | assert(Subtarget.hasDQI() && Subtarget.hasVLX() && "Requires AVX512DQVL"); |
| 21464 | SDValue Tmp = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
| 21465 | DAG.getUNDEF(MVT::v2f32)); |
| 21466 | if (IsStrict) { |
| 21467 | unsigned Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI |
| 21468 | : X86ISD::STRICT_CVTTP2UI; |
| 21469 | return DAG.getNode(Opc, dl, {VT, MVT::Other}, {Op->getOperand(0), Tmp}); |
| 21470 | } |
| 21471 | unsigned Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
| 21472 | return DAG.getNode(Opc, dl, VT, Tmp); |
| 21473 | } |
| 21474 | |
| 21475 | |
| 21476 | |
| 21477 | if ((VT == MVT::v4i32 && SrcVT == MVT::v4f32) || |
| 21478 | (VT == MVT::v4i32 && SrcVT == MVT::v4f64) || |
| 21479 | (VT == MVT::v8i32 && SrcVT == MVT::v8f32)) { |
| 21480 | assert(!IsSigned && "Expected unsigned conversion!"); |
| 21481 | return expandFP_TO_UINT_SSE(VT, Src, dl, DAG, Subtarget); |
| 21482 | } |
| 21483 | |
| 21484 | return SDValue(); |
| 21485 | } |
| 21486 | |
| 21487 | assert(!VT.isVector()); |
| 21488 | |
| 21489 | bool UseSSEReg = isScalarFPTypeInSSEReg(SrcVT); |
| 21490 | |
| 21491 | if (!IsSigned && UseSSEReg) { |
| 21492 | |
| 21493 | if (Subtarget.hasAVX512()) |
| 21494 | return Op; |
| 21495 | |
| 21496 | |
| 21497 | |
| 21498 | if (!IsStrict && ((VT == MVT::i32 && !Subtarget.is64Bit()) || |
| 21499 | (VT == MVT::i64 && Subtarget.is64Bit()))) { |
| 21500 | unsigned DstBits = VT.getScalarSizeInBits(); |
| 21501 | APInt UIntLimit = APInt::getSignMask(DstBits); |
| 21502 | SDValue FloatOffset = DAG.getNode(ISD::UINT_TO_FP, dl, SrcVT, |
| 21503 | DAG.getConstant(UIntLimit, dl, VT)); |
| 21504 | MVT SrcVecVT = MVT::getVectorVT(SrcVT, 128 / SrcVT.getScalarSizeInBits()); |
| 21505 | |
| 21506 | |
| 21507 | |
| 21508 | |
| 21509 | SDValue Small = |
| 21510 | DAG.getNode(X86ISD::CVTTS2SI, dl, VT, |
| 21511 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, Src)); |
| 21512 | SDValue Big = DAG.getNode( |
| 21513 | X86ISD::CVTTS2SI, dl, VT, |
| 21514 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, SrcVecVT, |
| 21515 | DAG.getNode(ISD::FSUB, dl, SrcVT, Src, FloatOffset))); |
| 21516 | |
| 21517 | |
| 21518 | |
| 21519 | |
| 21520 | |
| 21521 | |
| 21522 | |
| 21523 | SDValue IsOverflown = DAG.getNode( |
| 21524 | ISD::SRA, dl, VT, Small, DAG.getConstant(DstBits - 1, dl, MVT::i8)); |
| 21525 | return DAG.getNode(ISD::OR, dl, VT, Small, |
| 21526 | DAG.getNode(ISD::AND, dl, VT, Big, IsOverflown)); |
| 21527 | } |
| 21528 | |
| 21529 | |
| 21530 | if (VT == MVT::i64) |
| 21531 | return SDValue(); |
| 21532 | |
| 21533 | assert(VT == MVT::i32 && "Unexpected VT!"); |
| 21534 | |
| 21535 | |
| 21536 | |
| 21537 | |
| 21538 | if (Subtarget.is64Bit()) { |
| 21539 | SDValue Res, Chain; |
| 21540 | if (IsStrict) { |
| 21541 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i64, MVT::Other}, |
| 21542 | { Op.getOperand(0), Src }); |
| 21543 | Chain = Res.getValue(1); |
| 21544 | } else |
| 21545 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i64, Src); |
| 21546 | |
| 21547 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 21548 | if (IsStrict) |
| 21549 | return DAG.getMergeValues({ Res, Chain }, dl); |
| 21550 | return Res; |
| 21551 | } |
| 21552 | |
| 21553 | |
| 21554 | |
| 21555 | if (!Subtarget.hasSSE3()) |
| 21556 | return SDValue(); |
| 21557 | } |
| 21558 | |
| 21559 | |
| 21560 | |
| 21561 | |
| 21562 | if (VT == MVT::i16 && (UseSSEReg || SrcVT == MVT::f128)) { |
| 21563 | assert(IsSigned && "Expected i16 FP_TO_UINT to have been promoted!"); |
| 21564 | SDValue Res, Chain; |
| 21565 | if (IsStrict) { |
| 21566 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, { MVT::i32, MVT::Other}, |
| 21567 | { Op.getOperand(0), Src }); |
| 21568 | Chain = Res.getValue(1); |
| 21569 | } else |
| 21570 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src); |
| 21571 | |
| 21572 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 21573 | if (IsStrict) |
| 21574 | return DAG.getMergeValues({ Res, Chain }, dl); |
| 21575 | return Res; |
| 21576 | } |
| 21577 | |
| 21578 | |
| 21579 | if (UseSSEReg && IsSigned) |
| 21580 | return Op; |
| 21581 | |
| 21582 | |
| 21583 | if (SrcVT == MVT::f128) { |
| 21584 | RTLIB::Libcall LC; |
| 21585 | if (IsSigned) |
| 21586 | LC = RTLIB::getFPTOSINT(SrcVT, VT); |
| 21587 | else |
| 21588 | LC = RTLIB::getFPTOUINT(SrcVT, VT); |
| 21589 | |
| 21590 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
| 21591 | MakeLibCallOptions CallOptions; |
| 21592 | std::pair<SDValue, SDValue> Tmp = makeLibCall(DAG, LC, VT, Src, CallOptions, |
| 21593 | SDLoc(Op), Chain); |
| 21594 | |
| 21595 | if (IsStrict) |
| 21596 | return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); |
| 21597 | |
| 21598 | return Tmp.first; |
| 21599 | } |
| 21600 | |
| 21601 | |
| 21602 | SDValue Chain; |
| 21603 | if (SDValue V = FP_TO_INTHelper(Op, DAG, IsSigned, Chain)) { |
| 21604 | if (IsStrict) |
| 21605 | return DAG.getMergeValues({V, Chain}, dl); |
| 21606 | return V; |
| 21607 | } |
| 21608 | |
| 21609 | llvm_unreachable("Expected FP_TO_INTHelper to handle all remaining cases."); |
| 21610 | } |
| 21611 | |
| 21612 | SDValue X86TargetLowering::LowerLRINT_LLRINT(SDValue Op, |
| 21613 | SelectionDAG &DAG) const { |
| 21614 | SDValue Src = Op.getOperand(0); |
| 21615 | MVT SrcVT = Src.getSimpleValueType(); |
| 21616 | |
| 21617 | |
| 21618 | if (isScalarFPTypeInSSEReg(SrcVT)) |
| 21619 | return Op; |
| 21620 | |
| 21621 | return LRINT_LLRINTHelper(Op.getNode(), DAG); |
| 21622 | } |
| 21623 | |
| 21624 | SDValue X86TargetLowering::LRINT_LLRINTHelper(SDNode *N, |
| 21625 | SelectionDAG &DAG) const { |
| 21626 | EVT DstVT = N->getValueType(0); |
| 21627 | SDValue Src = N->getOperand(0); |
| 21628 | EVT SrcVT = Src.getValueType(); |
| 21629 | |
| 21630 | if (SrcVT != MVT::f32 && SrcVT != MVT::f64 && SrcVT != MVT::f80) { |
| 21631 | |
| 21632 | |
| 21633 | return SDValue(); |
| 21634 | } |
| 21635 | |
| 21636 | SDLoc DL(N); |
| 21637 | SDValue Chain = DAG.getEntryNode(); |
| 21638 | |
| 21639 | bool UseSSE = isScalarFPTypeInSSEReg(SrcVT); |
| 21640 | |
| 21641 | |
| 21642 | |
| 21643 | EVT OtherVT = UseSSE ? SrcVT : DstVT; |
| 21644 | SDValue StackPtr = DAG.CreateStackTemporary(DstVT, OtherVT); |
| 21645 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
| 21646 | MachinePointerInfo MPI = |
| 21647 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
| 21648 | |
| 21649 | if (UseSSE) { |
| 21650 | assert(DstVT == MVT::i64 && "Invalid LRINT/LLRINT to lower!"); |
| 21651 | Chain = DAG.getStore(Chain, DL, Src, StackPtr, MPI); |
| 21652 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
| 21653 | SDValue Ops[] = { Chain, StackPtr }; |
| 21654 | |
| 21655 | Src = DAG.getMemIntrinsicNode(X86ISD::FLD, DL, Tys, Ops, SrcVT, MPI, |
| 21656 | None, MachineMemOperand::MOLoad); |
| 21657 | Chain = Src.getValue(1); |
| 21658 | } |
| 21659 | |
| 21660 | SDValue StoreOps[] = { Chain, Src, StackPtr }; |
| 21661 | Chain = DAG.getMemIntrinsicNode(X86ISD::FIST, DL, DAG.getVTList(MVT::Other), |
| 21662 | StoreOps, DstVT, MPI, None, |
| 21663 | MachineMemOperand::MOStore); |
| 21664 | |
| 21665 | return DAG.getLoad(DstVT, DL, Chain, StackPtr, MPI); |
| 21666 | } |
| 21667 | |
| 21668 | SDValue |
| 21669 | X86TargetLowering::LowerFP_TO_INT_SAT(SDValue Op, SelectionDAG &DAG) const { |
| 21670 | |
| 21671 | |
| 21672 | SDNode *Node = Op.getNode(); |
| 21673 | bool IsSigned = Node->getOpcode() == ISD::FP_TO_SINT_SAT; |
| 21674 | unsigned FpToIntOpcode = IsSigned ? ISD::FP_TO_SINT : ISD::FP_TO_UINT; |
| 21675 | SDLoc dl(SDValue(Node, 0)); |
| 21676 | SDValue Src = Node->getOperand(0); |
| 21677 | |
| 21678 | |
| 21679 | |
| 21680 | |
| 21681 | |
| 21682 | EVT SrcVT = Src.getValueType(); |
| 21683 | EVT DstVT = Node->getValueType(0); |
| 21684 | EVT TmpVT = DstVT; |
| 21685 | |
| 21686 | |
| 21687 | |
| 21688 | if (!isScalarFPTypeInSSEReg(SrcVT)) |
| 21689 | return SDValue(); |
| 21690 | |
| 21691 | EVT SatVT = cast<VTSDNode>(Node->getOperand(1))->getVT(); |
| 21692 | unsigned SatWidth = SatVT.getScalarSizeInBits(); |
| 21693 | unsigned DstWidth = DstVT.getScalarSizeInBits(); |
| 21694 | unsigned TmpWidth = TmpVT.getScalarSizeInBits(); |
| 21695 | assert(SatWidth <= DstWidth && SatWidth <= TmpWidth && |
| 21696 | "Expected saturation width smaller than result width"); |
| 21697 | |
| 21698 | |
| 21699 | if (TmpWidth < 32) { |
| 21700 | TmpVT = MVT::i32; |
| 21701 | TmpWidth = 32; |
| 21702 | } |
| 21703 | |
| 21704 | |
| 21705 | |
| 21706 | if (SatWidth == 32 && !IsSigned && Subtarget.is64Bit()) { |
| 21707 | TmpVT = MVT::i64; |
| 21708 | TmpWidth = 64; |
| 21709 | } |
| 21710 | |
| 21711 | |
| 21712 | |
| 21713 | if (SatWidth < TmpWidth) |
| 21714 | FpToIntOpcode = ISD::FP_TO_SINT; |
| 21715 | |
| 21716 | |
| 21717 | |
| 21718 | APInt MinInt, MaxInt; |
| 21719 | if (IsSigned) { |
| 21720 | MinInt = APInt::getSignedMinValue(SatWidth).sextOrSelf(DstWidth); |
| 21721 | MaxInt = APInt::getSignedMaxValue(SatWidth).sextOrSelf(DstWidth); |
| 21722 | } else { |
| 21723 | MinInt = APInt::getMinValue(SatWidth).zextOrSelf(DstWidth); |
| 21724 | MaxInt = APInt::getMaxValue(SatWidth).zextOrSelf(DstWidth); |
| 21725 | } |
| 21726 | |
| 21727 | APFloat MinFloat(DAG.EVTToAPFloatSemantics(SrcVT)); |
| 21728 | APFloat MaxFloat(DAG.EVTToAPFloatSemantics(SrcVT)); |
| 21729 | |
| 21730 | APFloat::opStatus MinStatus = MinFloat.convertFromAPInt( |
| 21731 | MinInt, IsSigned, APFloat::rmTowardZero); |
| 21732 | APFloat::opStatus MaxStatus = MaxFloat.convertFromAPInt( |
| 21733 | MaxInt, IsSigned, APFloat::rmTowardZero); |
| 21734 | bool AreExactFloatBounds = !(MinStatus & APFloat::opStatus::opInexact) |
| 21735 | && !(MaxStatus & APFloat::opStatus::opInexact); |
| 21736 | |
| 21737 | SDValue MinFloatNode = DAG.getConstantFP(MinFloat, dl, SrcVT); |
| 21738 | SDValue MaxFloatNode = DAG.getConstantFP(MaxFloat, dl, SrcVT); |
| 21739 | |
| 21740 | |
| 21741 | |
| 21742 | if (AreExactFloatBounds) { |
| 21743 | if (DstVT != TmpVT) { |
| 21744 | |
| 21745 | SDValue MinClamped = DAG.getNode( |
| 21746 | X86ISD::FMAX, dl, SrcVT, MinFloatNode, Src); |
| 21747 | |
| 21748 | SDValue BothClamped = DAG.getNode( |
| 21749 | X86ISD::FMIN, dl, SrcVT, MaxFloatNode, MinClamped); |
| 21750 | |
| 21751 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, BothClamped); |
| 21752 | |
| 21753 | |
| 21754 | |
| 21755 | return DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); |
| 21756 | } |
| 21757 | |
| 21758 | |
| 21759 | SDValue MinClamped = DAG.getNode( |
| 21760 | X86ISD::FMAX, dl, SrcVT, Src, MinFloatNode); |
| 21761 | |
| 21762 | SDValue BothClamped = DAG.getNode( |
| 21763 | X86ISD::FMINC, dl, SrcVT, MinClamped, MaxFloatNode); |
| 21764 | |
| 21765 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, DstVT, BothClamped); |
| 21766 | |
| 21767 | if (!IsSigned) { |
| 21768 | |
| 21769 | |
| 21770 | return FpToInt; |
| 21771 | } |
| 21772 | |
| 21773 | |
| 21774 | SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); |
| 21775 | return DAG.getSelectCC( |
| 21776 | dl, Src, Src, ZeroInt, FpToInt, ISD::CondCode::SETUO); |
| 21777 | } |
| 21778 | |
| 21779 | SDValue MinIntNode = DAG.getConstant(MinInt, dl, DstVT); |
| 21780 | SDValue MaxIntNode = DAG.getConstant(MaxInt, dl, DstVT); |
| 21781 | |
| 21782 | |
| 21783 | SDValue FpToInt = DAG.getNode(FpToIntOpcode, dl, TmpVT, Src); |
| 21784 | |
| 21785 | if (DstVT != TmpVT) { |
| 21786 | |
| 21787 | |
| 21788 | FpToInt = DAG.getNode(ISD::TRUNCATE, dl, DstVT, FpToInt); |
| 21789 | } |
| 21790 | |
| 21791 | SDValue Select = FpToInt; |
| 21792 | |
| 21793 | |
| 21794 | |
| 21795 | if (!IsSigned || SatWidth != TmpVT.getScalarSizeInBits()) { |
| 21796 | |
| 21797 | |
| 21798 | Select = DAG.getSelectCC( |
| 21799 | dl, Src, MinFloatNode, MinIntNode, Select, ISD::CondCode::SETULT); |
| 21800 | } |
| 21801 | |
| 21802 | |
| 21803 | Select = DAG.getSelectCC( |
| 21804 | dl, Src, MaxFloatNode, MaxIntNode, Select, ISD::CondCode::SETOGT); |
| 21805 | |
| 21806 | |
| 21807 | |
| 21808 | if (!IsSigned || DstVT != TmpVT) { |
| 21809 | return Select; |
| 21810 | } |
| 21811 | |
| 21812 | |
| 21813 | SDValue ZeroInt = DAG.getConstant(0, dl, DstVT); |
| 21814 | return DAG.getSelectCC( |
| 21815 | dl, Src, Src, ZeroInt, Select, ISD::CondCode::SETUO); |
| 21816 | } |
| 21817 | |
| 21818 | SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { |
| 21819 | bool IsStrict = Op->isStrictFPOpcode(); |
| 21820 | |
| 21821 | SDLoc DL(Op); |
| 21822 | MVT VT = Op.getSimpleValueType(); |
| 21823 | SDValue In = Op.getOperand(IsStrict ? 1 : 0); |
| 21824 | MVT SVT = In.getSimpleValueType(); |
| 21825 | |
| 21826 | if (VT == MVT::f128) |
| 21827 | return SDValue(); |
| 21828 | |
| 21829 | assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); |
| 21830 | |
| 21831 | SDValue Res = |
| 21832 | DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, In, DAG.getUNDEF(SVT)); |
| 21833 | if (IsStrict) |
| 21834 | return DAG.getNode(X86ISD::STRICT_VFPEXT, DL, {VT, MVT::Other}, |
| 21835 | {Op->getOperand(0), Res}); |
| 21836 | return DAG.getNode(X86ISD::VFPEXT, DL, VT, Res); |
| 21837 | } |
| 21838 | |
| 21839 | SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { |
| 21840 | bool IsStrict = Op->isStrictFPOpcode(); |
| 21841 | SDValue In = Op.getOperand(IsStrict ? 1 : 0); |
| 21842 | |
| 21843 | if (In.getSimpleValueType() != MVT::f128) |
| 21844 | return Op; |
| 21845 | |
| 21846 | return SDValue(); |
| 21847 | } |
| 21848 | |
| 21849 | static SDValue LowerFP16_TO_FP(SDValue Op, SelectionDAG &DAG) { |
| 21850 | bool IsStrict = Op->isStrictFPOpcode(); |
| 21851 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
| 21852 | assert(Src.getValueType() == MVT::i16 && Op.getValueType() == MVT::f32 && |
| 21853 | "Unexpected VT!"); |
| 21854 | |
| 21855 | SDLoc dl(Op); |
| 21856 | SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v8i16, |
| 21857 | DAG.getConstant(0, dl, MVT::v8i16), Src, |
| 21858 | DAG.getIntPtrConstant(0, dl)); |
| 21859 | |
| 21860 | SDValue Chain; |
| 21861 | if (IsStrict) { |
| 21862 | Res = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {MVT::v4f32, MVT::Other}, |
| 21863 | {Op.getOperand(0), Res}); |
| 21864 | Chain = Res.getValue(1); |
| 21865 | } else { |
| 21866 | Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res); |
| 21867 | } |
| 21868 | |
| 21869 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, |
| 21870 | DAG.getIntPtrConstant(0, dl)); |
| 21871 | |
| 21872 | if (IsStrict) |
| 21873 | return DAG.getMergeValues({Res, Chain}, dl); |
| 21874 | |
| 21875 | return Res; |
| 21876 | } |
| 21877 | |
| 21878 | static SDValue LowerFP_TO_FP16(SDValue Op, SelectionDAG &DAG) { |
| 21879 | bool IsStrict = Op->isStrictFPOpcode(); |
| 21880 | SDValue Src = Op.getOperand(IsStrict ? 1 : 0); |
| 21881 | assert(Src.getValueType() == MVT::f32 && Op.getValueType() == MVT::i16 && |
| 21882 | "Unexpected VT!"); |
| 21883 | |
| 21884 | SDLoc dl(Op); |
| 21885 | SDValue Res, Chain; |
| 21886 | if (IsStrict) { |
| 21887 | Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4f32, |
| 21888 | DAG.getConstantFP(0, dl, MVT::v4f32), Src, |
| 21889 | DAG.getIntPtrConstant(0, dl)); |
| 21890 | Res = DAG.getNode( |
| 21891 | X86ISD::STRICT_CVTPS2PH, dl, {MVT::v8i16, MVT::Other}, |
| 21892 | {Op.getOperand(0), Res, DAG.getTargetConstant(4, dl, MVT::i32)}); |
| 21893 | Chain = Res.getValue(1); |
| 21894 | } else { |
| 21895 | |
| 21896 | Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, Src); |
| 21897 | Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res, |
| 21898 | DAG.getTargetConstant(4, dl, MVT::i32)); |
| 21899 | } |
| 21900 | |
| 21901 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i16, Res, |
| 21902 | DAG.getIntPtrConstant(0, dl)); |
| 21903 | |
| 21904 | if (IsStrict) |
| 21905 | return DAG.getMergeValues({Res, Chain}, dl); |
| 21906 | |
| 21907 | return Res; |
| 21908 | } |
| 21909 | |
| 21910 | |
| 21911 | |
| 21912 | static SDValue lowerAddSubToHorizontalOp(SDValue Op, SelectionDAG &DAG, |
| 21913 | const X86Subtarget &Subtarget) { |
| 21914 | |
| 21915 | SDValue LHS = Op.getOperand(0); |
| 21916 | SDValue RHS = Op.getOperand(1); |
| 21917 | if (!LHS.hasOneUse() && !RHS.hasOneUse()) |
| 21918 | return Op; |
| 21919 | |
| 21920 | |
| 21921 | bool IsFP = Op.getSimpleValueType().isFloatingPoint(); |
| 21922 | if (IsFP && !Subtarget.hasSSE3()) |
| 21923 | return Op; |
| 21924 | if (!IsFP && !Subtarget.hasSSSE3()) |
| 21925 | return Op; |
| 21926 | |
| 21927 | |
| 21928 | if (LHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 21929 | RHS.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 21930 | LHS.getOperand(0) != RHS.getOperand(0) || |
| 21931 | !isa<ConstantSDNode>(LHS.getOperand(1)) || |
| 21932 | !isa<ConstantSDNode>(RHS.getOperand(1)) || |
| 21933 | !shouldUseHorizontalOp(true, DAG, Subtarget)) |
| 21934 | return Op; |
| 21935 | |
| 21936 | |
| 21937 | |
| 21938 | unsigned HOpcode; |
| 21939 | switch (Op.getOpcode()) { |
| 21940 | case ISD::ADD: HOpcode = X86ISD::HADD; break; |
| 21941 | case ISD::SUB: HOpcode = X86ISD::HSUB; break; |
| 21942 | case ISD::FADD: HOpcode = X86ISD::FHADD; break; |
| 21943 | case ISD::FSUB: HOpcode = X86ISD::FHSUB; break; |
| 21944 | default: |
| 21945 | llvm_unreachable("Trying to lower unsupported opcode to horizontal op"); |
| 21946 | } |
| 21947 | unsigned LExtIndex = LHS.getConstantOperandVal(1); |
| 21948 | unsigned RExtIndex = RHS.getConstantOperandVal(1); |
| 21949 | if ((LExtIndex & 1) == 1 && (RExtIndex & 1) == 0 && |
| 21950 | (HOpcode == X86ISD::HADD || HOpcode == X86ISD::FHADD)) |
| 21951 | std::swap(LExtIndex, RExtIndex); |
| 21952 | |
| 21953 | if ((LExtIndex & 1) != 0 || RExtIndex != (LExtIndex + 1)) |
| 21954 | return Op; |
| 21955 | |
| 21956 | SDValue X = LHS.getOperand(0); |
| 21957 | EVT VecVT = X.getValueType(); |
| 21958 | unsigned BitWidth = VecVT.getSizeInBits(); |
| 21959 | unsigned NumLanes = BitWidth / 128; |
| 21960 | unsigned NumEltsPerLane = VecVT.getVectorNumElements() / NumLanes; |
| 21961 | assert((BitWidth == 128 || BitWidth == 256 || BitWidth == 512) && |
| 21962 | "Not expecting illegal vector widths here"); |
| 21963 | |
| 21964 | |
| 21965 | |
| 21966 | SDLoc DL(Op); |
| 21967 | if (BitWidth == 256 || BitWidth == 512) { |
| 21968 | unsigned LaneIdx = LExtIndex / NumEltsPerLane; |
| 21969 | X = extract128BitVector(X, LaneIdx * NumEltsPerLane, DAG, DL); |
| 21970 | LExtIndex %= NumEltsPerLane; |
| 21971 | } |
| 21972 | |
| 21973 | |
| 21974 | |
| 21975 | |
| 21976 | |
| 21977 | SDValue HOp = DAG.getNode(HOpcode, DL, X.getValueType(), X, X); |
| 21978 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getSimpleValueType(), HOp, |
| 21979 | DAG.getIntPtrConstant(LExtIndex / 2, DL)); |
| 21980 | } |
| 21981 | |
| 21982 | |
| 21983 | |
| 21984 | SDValue X86TargetLowering::lowerFaddFsub(SDValue Op, SelectionDAG &DAG) const { |
| 21985 | assert((Op.getValueType() == MVT::f32 || Op.getValueType() == MVT::f64) && |
| 21986 | "Only expecting float/double"); |
| 21987 | return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); |
| 21988 | } |
| 21989 | |
| 21990 | |
| 21991 | |
| 21992 | |
| 21993 | |
| 21994 | static SDValue LowerFROUND(SDValue Op, SelectionDAG &DAG) { |
| 21995 | SDValue N0 = Op.getOperand(0); |
| 21996 | SDLoc dl(Op); |
| 21997 | MVT VT = Op.getSimpleValueType(); |
| 21998 | |
| 21999 | |
| 22000 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
| 22001 | bool Ignored; |
| 22002 | APFloat Point5Pred = APFloat(0.5f); |
| 22003 | Point5Pred.convert(Sem, APFloat::rmNearestTiesToEven, &Ignored); |
| 22004 | Point5Pred.next(true); |
| 22005 | |
| 22006 | SDValue Adder = DAG.getNode(ISD::FCOPYSIGN, dl, VT, |
| 22007 | DAG.getConstantFP(Point5Pred, dl, VT), N0); |
| 22008 | N0 = DAG.getNode(ISD::FADD, dl, VT, N0, Adder); |
| 22009 | |
| 22010 | |
| 22011 | return DAG.getNode(ISD::FTRUNC, dl, VT, N0); |
| 22012 | } |
| 22013 | |
| 22014 | |
| 22015 | |
| 22016 | static SDValue LowerFABSorFNEG(SDValue Op, SelectionDAG &DAG) { |
| 22017 | assert((Op.getOpcode() == ISD::FABS || Op.getOpcode() == ISD::FNEG) && |
| 22018 | "Wrong opcode for lowering FABS or FNEG."); |
| 22019 | |
| 22020 | bool IsFABS = (Op.getOpcode() == ISD::FABS); |
| 22021 | |
| 22022 | |
| 22023 | |
| 22024 | if (IsFABS) |
| 22025 | for (SDNode *User : Op->uses()) |
| 22026 | if (User->getOpcode() == ISD::FNEG) |
| 22027 | return Op; |
| 22028 | |
| 22029 | SDLoc dl(Op); |
| 22030 | MVT VT = Op.getSimpleValueType(); |
| 22031 | |
| 22032 | bool IsF128 = (VT == MVT::f128); |
| 22033 | assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 || |
| 22034 | VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 || |
| 22035 | VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) && |
| 22036 | "Unexpected type in LowerFABSorFNEG"); |
| 22037 | |
| 22038 | |
| 22039 | |
| 22040 | |
| 22041 | |
| 22042 | |
| 22043 | |
| 22044 | |
| 22045 | |
| 22046 | bool IsFakeVector = !VT.isVector() && !IsF128; |
| 22047 | MVT LogicVT = VT; |
| 22048 | if (IsFakeVector) |
| 22049 | LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; |
| 22050 | |
| 22051 | unsigned EltBits = VT.getScalarSizeInBits(); |
| 22052 | |
| 22053 | APInt MaskElt = IsFABS ? APInt::getSignedMaxValue(EltBits) : |
| 22054 | APInt::getSignMask(EltBits); |
| 22055 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
| 22056 | SDValue Mask = DAG.getConstantFP(APFloat(Sem, MaskElt), dl, LogicVT); |
| 22057 | |
| 22058 | SDValue Op0 = Op.getOperand(0); |
| 22059 | bool IsFNABS = !IsFABS && (Op0.getOpcode() == ISD::FABS); |
| 22060 | unsigned LogicOp = IsFABS ? X86ISD::FAND : |
| 22061 | IsFNABS ? X86ISD::FOR : |
| 22062 | X86ISD::FXOR; |
| 22063 | SDValue Operand = IsFNABS ? Op0.getOperand(0) : Op0; |
| 22064 | |
| 22065 | if (VT.isVector() || IsF128) |
| 22066 | return DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); |
| 22067 | |
| 22068 | |
| 22069 | |
| 22070 | Operand = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Operand); |
| 22071 | SDValue LogicNode = DAG.getNode(LogicOp, dl, LogicVT, Operand, Mask); |
| 22072 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, LogicNode, |
| 22073 | DAG.getIntPtrConstant(0, dl)); |
| 22074 | } |
| 22075 | |
| 22076 | static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { |
| 22077 | SDValue Mag = Op.getOperand(0); |
| 22078 | SDValue Sign = Op.getOperand(1); |
| 22079 | SDLoc dl(Op); |
| 22080 | |
| 22081 | |
| 22082 | MVT VT = Op.getSimpleValueType(); |
| 22083 | if (Sign.getSimpleValueType().bitsLT(VT)) |
| 22084 | Sign = DAG.getNode(ISD::FP_EXTEND, dl, VT, Sign); |
| 22085 | |
| 22086 | |
| 22087 | if (Sign.getSimpleValueType().bitsGT(VT)) |
| 22088 | Sign = |
| 22089 | DAG.getNode(ISD::FP_ROUND, dl, VT, Sign, DAG.getIntPtrConstant(0, dl)); |
| 22090 | |
| 22091 | |
| 22092 | |
| 22093 | bool IsF128 = (VT == MVT::f128); |
| 22094 | assert((VT == MVT::f64 || VT == MVT::f32 || VT == MVT::f128 || |
| 22095 | VT == MVT::v2f64 || VT == MVT::v4f64 || VT == MVT::v4f32 || |
| 22096 | VT == MVT::v8f32 || VT == MVT::v8f64 || VT == MVT::v16f32) && |
| 22097 | "Unexpected type in LowerFCOPYSIGN"); |
| 22098 | |
| 22099 | const fltSemantics &Sem = SelectionDAG::EVTToAPFloatSemantics(VT); |
| 22100 | |
| 22101 | |
| 22102 | |
| 22103 | |
| 22104 | |
| 22105 | |
| 22106 | bool IsFakeVector = !VT.isVector() && !IsF128; |
| 22107 | MVT LogicVT = VT; |
| 22108 | if (IsFakeVector) |
| 22109 | LogicVT = (VT == MVT::f64) ? MVT::v2f64 : MVT::v4f32; |
| 22110 | |
| 22111 | |
| 22112 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 22113 | SDValue SignMask = DAG.getConstantFP( |
| 22114 | APFloat(Sem, APInt::getSignMask(EltSizeInBits)), dl, LogicVT); |
| 22115 | SDValue MagMask = DAG.getConstantFP( |
| 22116 | APFloat(Sem, APInt::getSignedMaxValue(EltSizeInBits)), dl, LogicVT); |
| 22117 | |
| 22118 | |
| 22119 | if (IsFakeVector) |
| 22120 | Sign = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Sign); |
| 22121 | SDValue SignBit = DAG.getNode(X86ISD::FAND, dl, LogicVT, Sign, SignMask); |
| 22122 | |
| 22123 | |
| 22124 | |
| 22125 | |
| 22126 | SDValue MagBits; |
| 22127 | if (ConstantFPSDNode *Op0CN = isConstOrConstSplatFP(Mag)) { |
| 22128 | APFloat APF = Op0CN->getValueAPF(); |
| 22129 | APF.clearSign(); |
| 22130 | MagBits = DAG.getConstantFP(APF, dl, LogicVT); |
| 22131 | } else { |
| 22132 | |
| 22133 | if (IsFakeVector) |
| 22134 | Mag = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LogicVT, Mag); |
| 22135 | MagBits = DAG.getNode(X86ISD::FAND, dl, LogicVT, Mag, MagMask); |
| 22136 | } |
| 22137 | |
| 22138 | |
| 22139 | SDValue Or = DAG.getNode(X86ISD::FOR, dl, LogicVT, MagBits, SignBit); |
| 22140 | return !IsFakeVector ? Or : DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Or, |
| 22141 | DAG.getIntPtrConstant(0, dl)); |
| 22142 | } |
| 22143 | |
| 22144 | static SDValue LowerFGETSIGN(SDValue Op, SelectionDAG &DAG) { |
| 22145 | SDValue N0 = Op.getOperand(0); |
| 22146 | SDLoc dl(Op); |
| 22147 | MVT VT = Op.getSimpleValueType(); |
| 22148 | |
| 22149 | MVT OpVT = N0.getSimpleValueType(); |
| 22150 | assert((OpVT == MVT::f32 || OpVT == MVT::f64) && |
| 22151 | "Unexpected type for FGETSIGN"); |
| 22152 | |
| 22153 | |
| 22154 | MVT VecVT = (OpVT == MVT::f32 ? MVT::v4f32 : MVT::v2f64); |
| 22155 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, N0); |
| 22156 | Res = DAG.getNode(X86ISD::MOVMSK, dl, MVT::i32, Res); |
| 22157 | Res = DAG.getZExtOrTrunc(Res, dl, VT); |
| 22158 | Res = DAG.getNode(ISD::AND, dl, VT, Res, DAG.getConstant(1, dl, VT)); |
| 22159 | return Res; |
| 22160 | } |
| 22161 | |
| 22162 | |
| 22163 | static SDValue getSETCC(X86::CondCode Cond, SDValue EFLAGS, const SDLoc &dl, |
| 22164 | SelectionDAG &DAG) { |
| 22165 | return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, |
| 22166 | DAG.getTargetConstant(Cond, dl, MVT::i8), EFLAGS); |
| 22167 | } |
| 22168 | |
| 22169 | |
| 22170 | |
| 22171 | |
| 22172 | |
| 22173 | static bool matchScalarReduction(SDValue Op, ISD::NodeType BinOp, |
| 22174 | SmallVectorImpl<SDValue> &SrcOps, |
| 22175 | SmallVectorImpl<APInt> *SrcMask = nullptr) { |
| 22176 | SmallVector<SDValue, 8> Opnds; |
| 22177 | DenseMap<SDValue, APInt> SrcOpMap; |
| 22178 | EVT VT = MVT::Other; |
| 22179 | |
| 22180 | |
| 22181 | |
| 22182 | assert(Op.getOpcode() == unsigned(BinOp) && |
| 22183 | "Unexpected bit reduction opcode"); |
| 22184 | Opnds.push_back(Op.getOperand(0)); |
| 22185 | Opnds.push_back(Op.getOperand(1)); |
| 22186 | |
| 22187 | for (unsigned Slot = 0, e = Opnds.size(); Slot < e; ++Slot) { |
| 22188 | SmallVectorImpl<SDValue>::const_iterator I = Opnds.begin() + Slot; |
| 22189 | |
| 22190 | if (I->getOpcode() == unsigned(BinOp)) { |
| 22191 | Opnds.push_back(I->getOperand(0)); |
| 22192 | Opnds.push_back(I->getOperand(1)); |
| 22193 | |
| 22194 | e += 2; |
| 22195 | continue; |
| 22196 | } |
| 22197 | |
| 22198 | |
| 22199 | if (I->getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
| 22200 | return false; |
| 22201 | |
| 22202 | |
| 22203 | auto *Idx = dyn_cast<ConstantSDNode>(I->getOperand(1)); |
| 22204 | if (!Idx) |
| 22205 | return false; |
| 22206 | |
| 22207 | SDValue Src = I->getOperand(0); |
| 22208 | DenseMap<SDValue, APInt>::iterator M = SrcOpMap.find(Src); |
| 22209 | if (M == SrcOpMap.end()) { |
| 22210 | VT = Src.getValueType(); |
| 22211 | |
| 22212 | if (!SrcOpMap.empty() && VT != SrcOpMap.begin()->first.getValueType()) |
| 22213 | return false; |
| 22214 | unsigned NumElts = VT.getVectorNumElements(); |
| 22215 | APInt EltCount = APInt::getNullValue(NumElts); |
| 22216 | M = SrcOpMap.insert(std::make_pair(Src, EltCount)).first; |
| 22217 | SrcOps.push_back(Src); |
| 22218 | } |
| 22219 | |
| 22220 | |
| 22221 | unsigned CIdx = Idx->getZExtValue(); |
| 22222 | if (M->second[CIdx]) |
| 22223 | return false; |
| 22224 | M->second.setBit(CIdx); |
| 22225 | } |
| 22226 | |
| 22227 | if (SrcMask) { |
| 22228 | |
| 22229 | for (SDValue &SrcOp : SrcOps) |
| 22230 | SrcMask->push_back(SrcOpMap[SrcOp]); |
| 22231 | } else { |
| 22232 | |
| 22233 | for (const auto &I : SrcOpMap) |
| 22234 | if (!I.second.isAllOnesValue()) |
| 22235 | return false; |
| 22236 | } |
| 22237 | |
| 22238 | return true; |
| 22239 | } |
| 22240 | |
| 22241 | |
| 22242 | static SDValue LowerVectorAllZero(const SDLoc &DL, SDValue V, ISD::CondCode CC, |
| 22243 | const APInt &Mask, |
| 22244 | const X86Subtarget &Subtarget, |
| 22245 | SelectionDAG &DAG, X86::CondCode &X86CC) { |
| 22246 | EVT VT = V.getValueType(); |
| 22247 | unsigned ScalarSize = VT.getScalarSizeInBits(); |
| 22248 | if (Mask.getBitWidth() != ScalarSize) { |
| 22249 | assert(ScalarSize == 1 && "Element Mask vs Vector bitwidth mismatch"); |
| 22250 | return SDValue(); |
| 22251 | } |
| 22252 | |
| 22253 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); |
| 22254 | X86CC = (CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE); |
| 22255 | |
| 22256 | auto MaskBits = [&](SDValue Src) { |
| 22257 | if (Mask.isAllOnesValue()) |
| 22258 | return Src; |
| 22259 | EVT SrcVT = Src.getValueType(); |
| 22260 | SDValue MaskValue = DAG.getConstant(Mask, DL, SrcVT); |
| 22261 | return DAG.getNode(ISD::AND, DL, SrcVT, Src, MaskValue); |
| 22262 | }; |
| 22263 | |
| 22264 | |
| 22265 | if (VT.getSizeInBits() < 128) { |
| 22266 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); |
| 22267 | if (!DAG.getTargetLoweringInfo().isTypeLegal(IntVT)) |
| 22268 | return SDValue(); |
| 22269 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, |
| 22270 | DAG.getBitcast(IntVT, MaskBits(V)), |
| 22271 | DAG.getConstant(0, DL, IntVT)); |
| 22272 | } |
| 22273 | |
| 22274 | |
| 22275 | if (!isPowerOf2_32(VT.getSizeInBits())) |
| 22276 | return SDValue(); |
| 22277 | |
| 22278 | |
| 22279 | unsigned TestSize = Subtarget.hasAVX() ? 256 : 128; |
| 22280 | while (VT.getSizeInBits() > TestSize) { |
| 22281 | auto Split = DAG.SplitVector(V, DL); |
| 22282 | VT = Split.first.getValueType(); |
| 22283 | V = DAG.getNode(ISD::OR, DL, VT, Split.first, Split.second); |
| 22284 | } |
| 22285 | |
| 22286 | bool UsePTEST = Subtarget.hasSSE41(); |
| 22287 | if (UsePTEST) { |
| 22288 | MVT TestVT = VT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
| 22289 | V = DAG.getBitcast(TestVT, MaskBits(V)); |
| 22290 | return DAG.getNode(X86ISD::PTEST, DL, MVT::i32, V, V); |
| 22291 | } |
| 22292 | |
| 22293 | |
| 22294 | |
| 22295 | if (!Mask.isAllOnesValue() && VT.getScalarSizeInBits() > 32) |
| 22296 | return SDValue(); |
| 22297 | |
| 22298 | V = DAG.getBitcast(MVT::v16i8, MaskBits(V)); |
| 22299 | V = DAG.getNode(X86ISD::PCMPEQ, DL, MVT::v16i8, V, |
| 22300 | getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); |
| 22301 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
| 22302 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, V, |
| 22303 | DAG.getConstant(0xFFFF, DL, MVT::i32)); |
| 22304 | } |
| 22305 | |
| 22306 | |
| 22307 | |
| 22308 | static SDValue MatchVectorAllZeroTest(SDValue Op, ISD::CondCode CC, |
| 22309 | const SDLoc &DL, |
| 22310 | const X86Subtarget &Subtarget, |
| 22311 | SelectionDAG &DAG, SDValue &X86CC) { |
| 22312 | assert((CC == ISD::SETEQ || CC == ISD::SETNE) && "Unsupported ISD::CondCode"); |
| 22313 | |
| 22314 | if (!Subtarget.hasSSE2() || !Op->hasOneUse()) |
| 22315 | return SDValue(); |
| 22316 | |
| 22317 | |
| 22318 | |
| 22319 | APInt Mask = APInt::getAllOnesValue(Op.getScalarValueSizeInBits()); |
| 22320 | switch (Op.getOpcode()) { |
| 22321 | case ISD::TRUNCATE: { |
| 22322 | SDValue Src = Op.getOperand(0); |
| 22323 | Mask = APInt::getLowBitsSet(Src.getScalarValueSizeInBits(), |
| 22324 | Op.getScalarValueSizeInBits()); |
| 22325 | Op = Src; |
| 22326 | break; |
| 22327 | } |
| 22328 | case ISD::AND: { |
| 22329 | if (auto *Cst = dyn_cast<ConstantSDNode>(Op.getOperand(1))) { |
| 22330 | Mask = Cst->getAPIntValue(); |
| 22331 | Op = Op.getOperand(0); |
| 22332 | } |
| 22333 | break; |
| 22334 | } |
| 22335 | } |
| 22336 | |
| 22337 | SmallVector<SDValue, 8> VecIns; |
| 22338 | if (Op.getOpcode() == ISD::OR && matchScalarReduction(Op, ISD::OR, VecIns)) { |
| 22339 | EVT VT = VecIns[0].getValueType(); |
| 22340 | assert(llvm::all_of(VecIns, |
| 22341 | [VT](SDValue V) { return VT == V.getValueType(); }) && |
| 22342 | "Reduction source vector mismatch"); |
| 22343 | |
| 22344 | |
| 22345 | if (VT.getSizeInBits() < 128 || !isPowerOf2_32(VT.getSizeInBits())) |
| 22346 | return SDValue(); |
| 22347 | |
| 22348 | |
| 22349 | for (unsigned Slot = 0, e = VecIns.size(); e - Slot > 1; |
| 22350 | Slot += 2, e += 1) { |
| 22351 | |
| 22352 | |
| 22353 | SDValue LHS = VecIns[Slot]; |
| 22354 | SDValue RHS = VecIns[Slot + 1]; |
| 22355 | VecIns.push_back(DAG.getNode(ISD::OR, DL, VT, LHS, RHS)); |
| 22356 | } |
| 22357 | |
| 22358 | X86::CondCode CCode; |
| 22359 | if (SDValue V = LowerVectorAllZero(DL, VecIns.back(), CC, Mask, Subtarget, |
| 22360 | DAG, CCode)) { |
| 22361 | X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8); |
| 22362 | return V; |
| 22363 | } |
| 22364 | } |
| 22365 | |
| 22366 | if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
| 22367 | ISD::NodeType BinOp; |
| 22368 | if (SDValue Match = |
| 22369 | DAG.matchBinOpReduction(Op.getNode(), BinOp, {ISD::OR})) { |
| 22370 | X86::CondCode CCode; |
| 22371 | if (SDValue V = |
| 22372 | LowerVectorAllZero(DL, Match, CC, Mask, Subtarget, DAG, CCode)) { |
| 22373 | X86CC = DAG.getTargetConstant(CCode, DL, MVT::i8); |
| 22374 | return V; |
| 22375 | } |
| 22376 | } |
| 22377 | } |
| 22378 | |
| 22379 | return SDValue(); |
| 22380 | } |
| 22381 | |
| 22382 | |
| 22383 | static bool hasNonFlagsUse(SDValue Op) { |
| 22384 | for (SDNode::use_iterator UI = Op->use_begin(), UE = Op->use_end(); UI != UE; |
| 22385 | ++UI) { |
| 22386 | SDNode *User = *UI; |
| 22387 | unsigned UOpNo = UI.getOperandNo(); |
| 22388 | if (User->getOpcode() == ISD::TRUNCATE && User->hasOneUse()) { |
| 22389 | |
| 22390 | UOpNo = User->use_begin().getOperandNo(); |
| 22391 | User = *User->use_begin(); |
| 22392 | } |
| 22393 | |
| 22394 | if (User->getOpcode() != ISD::BRCOND && User->getOpcode() != ISD::SETCC && |
| 22395 | !(User->getOpcode() == ISD::SELECT && UOpNo == 0)) |
| 22396 | return true; |
| 22397 | } |
| 22398 | return false; |
| 22399 | } |
| 22400 | |
| 22401 | |
| 22402 | |
| 22403 | |
| 22404 | static bool isProfitableToUseFlagOp(SDValue Op) { |
| 22405 | for (SDNode *U : Op->uses()) |
| 22406 | if (U->getOpcode() != ISD::CopyToReg && |
| 22407 | U->getOpcode() != ISD::SETCC && |
| 22408 | U->getOpcode() != ISD::STORE) |
| 22409 | return false; |
| 22410 | |
| 22411 | return true; |
| 22412 | } |
| 22413 | |
| 22414 | |
| 22415 | |
| 22416 | static SDValue EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl, |
| 22417 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
| 22418 | |
| 22419 | |
| 22420 | bool NeedCF = false; |
| 22421 | bool NeedOF = false; |
| 22422 | switch (X86CC) { |
| 22423 | default: break; |
| 22424 | case X86::COND_A: case X86::COND_AE: |
| 22425 | case X86::COND_B: case X86::COND_BE: |
| 22426 | NeedCF = true; |
| 22427 | break; |
| 22428 | case X86::COND_G: case X86::COND_GE: |
| 22429 | case X86::COND_L: case X86::COND_LE: |
| 22430 | case X86::COND_O: case X86::COND_NO: { |
| 22431 | |
| 22432 | |
| 22433 | |
| 22434 | switch (Op->getOpcode()) { |
| 22435 | case ISD::ADD: |
| 22436 | case ISD::SUB: |
| 22437 | case ISD::MUL: |
| 22438 | case ISD::SHL: |
| 22439 | if (Op.getNode()->getFlags().hasNoSignedWrap()) |
| 22440 | break; |
| 22441 | LLVM_FALLTHROUGH; |
| 22442 | default: |
| 22443 | NeedOF = true; |
| 22444 | break; |
| 22445 | } |
| 22446 | break; |
| 22447 | } |
| 22448 | } |
| 22449 | |
| 22450 | |
| 22451 | |
| 22452 | if (Op.getResNo() != 0 || NeedOF || NeedCF) { |
| 22453 | |
| 22454 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
| 22455 | DAG.getConstant(0, dl, Op.getValueType())); |
| 22456 | } |
| 22457 | unsigned Opcode = 0; |
| 22458 | unsigned NumOperands = 0; |
| 22459 | |
| 22460 | SDValue ArithOp = Op; |
| 22461 | |
| 22462 | |
| 22463 | |
| 22464 | |
| 22465 | switch (ArithOp.getOpcode()) { |
| 22466 | case ISD::AND: |
| 22467 | |
| 22468 | |
| 22469 | if (!hasNonFlagsUse(Op)) |
| 22470 | break; |
| 22471 | |
| 22472 | LLVM_FALLTHROUGH; |
| 22473 | case ISD::ADD: |
| 22474 | case ISD::SUB: |
| 22475 | case ISD::OR: |
| 22476 | case ISD::XOR: |
| 22477 | if (!isProfitableToUseFlagOp(Op)) |
| 22478 | break; |
| 22479 | |
| 22480 | |
| 22481 | switch (ArithOp.getOpcode()) { |
| 22482 | default: llvm_unreachable("unexpected operator!"); |
| 22483 | case ISD::ADD: Opcode = X86ISD::ADD; break; |
| 22484 | case ISD::SUB: Opcode = X86ISD::SUB; break; |
| 22485 | case ISD::XOR: Opcode = X86ISD::XOR; break; |
| 22486 | case ISD::AND: Opcode = X86ISD::AND; break; |
| 22487 | case ISD::OR: Opcode = X86ISD::OR; break; |
| 22488 | } |
| 22489 | |
| 22490 | NumOperands = 2; |
| 22491 | break; |
| 22492 | case X86ISD::ADD: |
| 22493 | case X86ISD::SUB: |
| 22494 | case X86ISD::OR: |
| 22495 | case X86ISD::XOR: |
| 22496 | case X86ISD::AND: |
| 22497 | return SDValue(Op.getNode(), 1); |
| 22498 | case ISD::SSUBO: |
| 22499 | case ISD::USUBO: { |
| 22500 | |
| 22501 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
| 22502 | return DAG.getNode(X86ISD::SUB, dl, VTs, Op->getOperand(0), |
| 22503 | Op->getOperand(1)).getValue(1); |
| 22504 | } |
| 22505 | default: |
| 22506 | break; |
| 22507 | } |
| 22508 | |
| 22509 | if (Opcode == 0) { |
| 22510 | |
| 22511 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
| 22512 | DAG.getConstant(0, dl, Op.getValueType())); |
| 22513 | } |
| 22514 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
| 22515 | SmallVector<SDValue, 4> Ops(Op->op_begin(), Op->op_begin() + NumOperands); |
| 22516 | |
| 22517 | SDValue New = DAG.getNode(Opcode, dl, VTs, Ops); |
| 22518 | DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), New); |
| 22519 | return SDValue(New.getNode(), 1); |
| 22520 | } |
| 22521 | |
| 22522 | |
| 22523 | |
| 22524 | static SDValue EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, |
| 22525 | const SDLoc &dl, SelectionDAG &DAG, |
| 22526 | const X86Subtarget &Subtarget) { |
| 22527 | if (isNullConstant(Op1)) |
| 22528 | return EmitTest(Op0, X86CC, dl, DAG, Subtarget); |
| 22529 | |
| 22530 | EVT CmpVT = Op0.getValueType(); |
| 22531 | |
| 22532 | assert((CmpVT == MVT::i8 || CmpVT == MVT::i16 || |
| 22533 | CmpVT == MVT::i32 || CmpVT == MVT::i64) && "Unexpected VT!"); |
| 22534 | |
| 22535 | |
| 22536 | |
| 22537 | if (CmpVT == MVT::i16 && !Subtarget.isAtom() && |
| 22538 | !DAG.getMachineFunction().getFunction().hasMinSize()) { |
| 22539 | ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); |
| 22540 | ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); |
| 22541 | |
| 22542 | if ((COp0 && !COp0->getAPIntValue().isSignedIntN(8)) || |
| 22543 | (COp1 && !COp1->getAPIntValue().isSignedIntN(8))) { |
| 22544 | unsigned ExtendOp = |
| 22545 | isX86CCSigned(X86CC) ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| 22546 | if (X86CC == X86::COND_E || X86CC == X86::COND_NE) { |
| 22547 | |
| 22548 | |
| 22549 | if (Op0.getOpcode() == ISD::TRUNCATE) { |
| 22550 | SDValue In = Op0.getOperand(0); |
| 22551 | unsigned EffBits = |
| 22552 | In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; |
| 22553 | if (EffBits <= 16) |
| 22554 | ExtendOp = ISD::SIGN_EXTEND; |
| 22555 | } else if (Op1.getOpcode() == ISD::TRUNCATE) { |
| 22556 | SDValue In = Op1.getOperand(0); |
| 22557 | unsigned EffBits = |
| 22558 | In.getScalarValueSizeInBits() - DAG.ComputeNumSignBits(In) + 1; |
| 22559 | if (EffBits <= 16) |
| 22560 | ExtendOp = ISD::SIGN_EXTEND; |
| 22561 | } |
| 22562 | } |
| 22563 | |
| 22564 | CmpVT = MVT::i32; |
| 22565 | Op0 = DAG.getNode(ExtendOp, dl, CmpVT, Op0); |
| 22566 | Op1 = DAG.getNode(ExtendOp, dl, CmpVT, Op1); |
| 22567 | } |
| 22568 | } |
| 22569 | |
| 22570 | |
| 22571 | |
| 22572 | if (CmpVT == MVT::i64 && isa<ConstantSDNode>(Op1) && !isX86CCSigned(X86CC) && |
| 22573 | Op0.hasOneUse() && |
| 22574 | cast<ConstantSDNode>(Op1)->getAPIntValue().getActiveBits() <= 32 && |
| 22575 | DAG.MaskedValueIsZero(Op0, APInt::getHighBitsSet(64, 32))) { |
| 22576 | CmpVT = MVT::i32; |
| 22577 | Op0 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op0); |
| 22578 | Op1 = DAG.getNode(ISD::TRUNCATE, dl, CmpVT, Op1); |
| 22579 | } |
| 22580 | |
| 22581 | |
| 22582 | |
| 22583 | if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op0.getOperand(0)) && |
| 22584 | Op0.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { |
| 22585 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
| 22586 | SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(1), Op1); |
| 22587 | return Add.getValue(1); |
| 22588 | } |
| 22589 | |
| 22590 | |
| 22591 | |
| 22592 | if (Op1.getOpcode() == ISD::SUB && isNullConstant(Op1.getOperand(0)) && |
| 22593 | Op1.hasOneUse() && (X86CC == X86::COND_E || X86CC == X86::COND_NE)) { |
| 22594 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
| 22595 | SDValue Add = DAG.getNode(X86ISD::ADD, dl, VTs, Op0, Op1.getOperand(1)); |
| 22596 | return Add.getValue(1); |
| 22597 | } |
| 22598 | |
| 22599 | |
| 22600 | SDVTList VTs = DAG.getVTList(CmpVT, MVT::i32); |
| 22601 | SDValue Sub = DAG.getNode(X86ISD::SUB, dl, VTs, Op0, Op1); |
| 22602 | return Sub.getValue(1); |
| 22603 | } |
| 22604 | |
| 22605 | |
| 22606 | bool X86TargetLowering::isFsqrtCheap(SDValue Op, SelectionDAG &DAG) const { |
| 22607 | EVT VT = Op.getValueType(); |
| 22608 | |
| 22609 | |
| 22610 | if (DAG.getNodeIfExists(X86ISD::FRSQRT, DAG.getVTList(VT), Op)) |
| 22611 | return false; |
| 22612 | |
| 22613 | if (VT.isVector()) |
| 22614 | return Subtarget.hasFastVectorFSQRT(); |
| 22615 | return Subtarget.hasFastScalarFSQRT(); |
| 22616 | } |
| 22617 | |
| 22618 | |
| 22619 | |
| 22620 | SDValue X86TargetLowering::getSqrtEstimate(SDValue Op, |
| 22621 | SelectionDAG &DAG, int Enabled, |
| 22622 | int &RefinementSteps, |
| 22623 | bool &UseOneConstNR, |
| 22624 | bool Reciprocal) const { |
| 22625 | EVT VT = Op.getValueType(); |
| 22626 | |
| 22627 | |
| 22628 | |
| 22629 | |
| 22630 | |
| 22631 | |
| 22632 | |
| 22633 | |
| 22634 | |
| 22635 | if ((VT == MVT::f32 && Subtarget.hasSSE1()) || |
| 22636 | (VT == MVT::v4f32 && Subtarget.hasSSE1() && Reciprocal) || |
| 22637 | (VT == MVT::v4f32 && Subtarget.hasSSE2() && !Reciprocal) || |
| 22638 | (VT == MVT::v8f32 && Subtarget.hasAVX()) || |
| 22639 | (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { |
| 22640 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
| 22641 | RefinementSteps = 1; |
| 22642 | |
| 22643 | UseOneConstNR = false; |
| 22644 | |
| 22645 | unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RSQRT14 : X86ISD::FRSQRT; |
| 22646 | return DAG.getNode(Opcode, SDLoc(Op), VT, Op); |
| 22647 | } |
| 22648 | return SDValue(); |
| 22649 | } |
| 22650 | |
| 22651 | |
| 22652 | |
| 22653 | SDValue X86TargetLowering::getRecipEstimate(SDValue Op, SelectionDAG &DAG, |
| 22654 | int Enabled, |
| 22655 | int &RefinementSteps) const { |
| 22656 | EVT VT = Op.getValueType(); |
| 22657 | |
| 22658 | |
| 22659 | |
| 22660 | |
| 22661 | |
| 22662 | |
| 22663 | |
| 22664 | |
| 22665 | if ((VT == MVT::f32 && Subtarget.hasSSE1()) || |
| 22666 | (VT == MVT::v4f32 && Subtarget.hasSSE1()) || |
| 22667 | (VT == MVT::v8f32 && Subtarget.hasAVX()) || |
| 22668 | (VT == MVT::v16f32 && Subtarget.useAVX512Regs())) { |
| 22669 | |
| 22670 | |
| 22671 | |
| 22672 | if (VT == MVT::f32 && Enabled == ReciprocalEstimate::Unspecified) |
| 22673 | return SDValue(); |
| 22674 | |
| 22675 | if (RefinementSteps == ReciprocalEstimate::Unspecified) |
| 22676 | RefinementSteps = 1; |
| 22677 | |
| 22678 | |
| 22679 | unsigned Opcode = VT == MVT::v16f32 ? X86ISD::RCP14 : X86ISD::FRCP; |
| 22680 | return DAG.getNode(Opcode, SDLoc(Op), VT, Op); |
| 22681 | } |
| 22682 | return SDValue(); |
| 22683 | } |
| 22684 | |
| 22685 | |
| 22686 | |
| 22687 | |
| 22688 | |
| 22689 | |
| 22690 | |
| 22691 | unsigned X86TargetLowering::combineRepeatedFPDivisors() const { |
| 22692 | return 2; |
| 22693 | } |
| 22694 | |
| 22695 | SDValue |
| 22696 | X86TargetLowering::BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
| 22697 | SelectionDAG &DAG, |
| 22698 | SmallVectorImpl<SDNode *> &Created) const { |
| 22699 | AttributeList Attr = DAG.getMachineFunction().getFunction().getAttributes(); |
| 22700 | if (isIntDivCheap(N->getValueType(0), Attr)) |
| 22701 | return SDValue(N,0); |
| 22702 | |
| 22703 | assert((Divisor.isPowerOf2() || (-Divisor).isPowerOf2()) && |
| 22704 | "Unexpected divisor!"); |
| 22705 | |
| 22706 | |
| 22707 | |
| 22708 | if (!Subtarget.hasCMov()) |
| 22709 | return SDValue(); |
| 22710 | |
| 22711 | |
| 22712 | EVT VT = N->getValueType(0); |
| 22713 | |
| 22714 | if (VT != MVT::i16 && VT != MVT::i32 && |
| 22715 | !(Subtarget.is64Bit() && VT == MVT::i64)) |
| 22716 | return SDValue(); |
| 22717 | |
| 22718 | unsigned Lg2 = Divisor.countTrailingZeros(); |
| 22719 | |
| 22720 | |
| 22721 | if (Lg2 == 1) |
| 22722 | return SDValue(); |
| 22723 | |
| 22724 | SDLoc DL(N); |
| 22725 | SDValue N0 = N->getOperand(0); |
| 22726 | SDValue Zero = DAG.getConstant(0, DL, VT); |
| 22727 | APInt Lg2Mask = APInt::getLowBitsSet(VT.getSizeInBits(), Lg2); |
| 22728 | SDValue Pow2MinusOne = DAG.getConstant(Lg2Mask, DL, VT); |
| 22729 | |
| 22730 | |
| 22731 | SDValue Cmp = DAG.getSetCC(DL, MVT::i8, N0, Zero, ISD::SETLT); |
| 22732 | SDValue Add = DAG.getNode(ISD::ADD, DL, VT, N0, Pow2MinusOne); |
| 22733 | SDValue CMov = DAG.getNode(ISD::SELECT, DL, VT, Cmp, Add, N0); |
| 22734 | |
| 22735 | Created.push_back(Cmp.getNode()); |
| 22736 | Created.push_back(Add.getNode()); |
| 22737 | Created.push_back(CMov.getNode()); |
| 22738 | |
| 22739 | |
| 22740 | SDValue SRA = |
| 22741 | DAG.getNode(ISD::SRA, DL, VT, CMov, DAG.getConstant(Lg2, DL, MVT::i8)); |
| 22742 | |
| 22743 | |
| 22744 | |
| 22745 | if (Divisor.isNonNegative()) |
| 22746 | return SRA; |
| 22747 | |
| 22748 | Created.push_back(SRA.getNode()); |
| 22749 | return DAG.getNode(ISD::SUB, DL, VT, Zero, SRA); |
| 22750 | } |
| 22751 | |
| 22752 | |
| 22753 | |
| 22754 | static SDValue LowerAndToBT(SDValue And, ISD::CondCode CC, |
| 22755 | const SDLoc &dl, SelectionDAG &DAG, |
| 22756 | SDValue &X86CC) { |
| 22757 | assert(And.getOpcode() == ISD::AND && "Expected AND node!"); |
| 22758 | SDValue Op0 = And.getOperand(0); |
| 22759 | SDValue Op1 = And.getOperand(1); |
| 22760 | if (Op0.getOpcode() == ISD::TRUNCATE) |
| 22761 | Op0 = Op0.getOperand(0); |
| 22762 | if (Op1.getOpcode() == ISD::TRUNCATE) |
| 22763 | Op1 = Op1.getOperand(0); |
| 22764 | |
| 22765 | SDValue Src, BitNo; |
| 22766 | if (Op1.getOpcode() == ISD::SHL) |
| 22767 | std::swap(Op0, Op1); |
| 22768 | if (Op0.getOpcode() == ISD::SHL) { |
| 22769 | if (isOneConstant(Op0.getOperand(0))) { |
| 22770 | |
| 22771 | |
| 22772 | unsigned BitWidth = Op0.getValueSizeInBits(); |
| 22773 | unsigned AndBitWidth = And.getValueSizeInBits(); |
| 22774 | if (BitWidth > AndBitWidth) { |
| 22775 | KnownBits Known = DAG.computeKnownBits(Op0); |
| 22776 | if (Known.countMinLeadingZeros() < BitWidth - AndBitWidth) |
| 22777 | return SDValue(); |
| 22778 | } |
| 22779 | Src = Op1; |
| 22780 | BitNo = Op0.getOperand(1); |
| 22781 | } |
| 22782 | } else if (Op1.getOpcode() == ISD::Constant) { |
| 22783 | ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1); |
| 22784 | uint64_t AndRHSVal = AndRHS->getZExtValue(); |
| 22785 | SDValue AndLHS = Op0; |
| 22786 | |
| 22787 | if (AndRHSVal == 1 && AndLHS.getOpcode() == ISD::SRL) { |
| 22788 | Src = AndLHS.getOperand(0); |
| 22789 | BitNo = AndLHS.getOperand(1); |
| 22790 | } else { |
| 22791 | |
| 22792 | |
| 22793 | bool OptForSize = DAG.shouldOptForSize(); |
| 22794 | if ((!isUInt<32>(AndRHSVal) || (OptForSize && !isUInt<8>(AndRHSVal))) && |
| 22795 | isPowerOf2_64(AndRHSVal)) { |
| 22796 | Src = AndLHS; |
| 22797 | BitNo = DAG.getConstant(Log2_64_Ceil(AndRHSVal), dl, |
| 22798 | Src.getValueType()); |
| 22799 | } |
| 22800 | } |
| 22801 | } |
| 22802 | |
| 22803 | |
| 22804 | if (!Src.getNode()) |
| 22805 | return SDValue(); |
| 22806 | |
| 22807 | |
| 22808 | |
| 22809 | |
| 22810 | |
| 22811 | |
| 22812 | if (Src.getValueType() == MVT::i8 || Src.getValueType() == MVT::i16) |
| 22813 | Src = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, Src); |
| 22814 | |
| 22815 | |
| 22816 | |
| 22817 | |
| 22818 | |
| 22819 | if (Src.getValueType() == MVT::i64 && |
| 22820 | DAG.MaskedValueIsZero(BitNo, APInt(BitNo.getValueSizeInBits(), 32))) |
| 22821 | Src = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, Src); |
| 22822 | |
| 22823 | |
| 22824 | |
| 22825 | if (Src.getValueType() != BitNo.getValueType()) |
| 22826 | BitNo = DAG.getNode(ISD::ANY_EXTEND, dl, Src.getValueType(), BitNo); |
| 22827 | |
| 22828 | X86CC = DAG.getTargetConstant(CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B, |
| 22829 | dl, MVT::i8); |
| 22830 | return DAG.getNode(X86ISD::BT, dl, MVT::i32, Src, BitNo); |
| 22831 | } |
| 22832 | |
| 22833 | |
| 22834 | |
| 22835 | static unsigned translateX86FSETCC(ISD::CondCode SetCCOpcode, SDValue &Op0, |
| 22836 | SDValue &Op1, bool &IsAlwaysSignaling) { |
| 22837 | unsigned SSECC; |
| 22838 | bool Swap = false; |
| 22839 | |
| 22840 | |
| 22841 | |
| 22842 | |
| 22843 | |
| 22844 | |
| 22845 | |
| 22846 | |
| 22847 | |
| 22848 | |
| 22849 | switch (SetCCOpcode) { |
| 22850 | default: llvm_unreachable("Unexpected SETCC condition"); |
| 22851 | case ISD::SETOEQ: |
| 22852 | case ISD::SETEQ: SSECC = 0; break; |
| 22853 | case ISD::SETOGT: |
| 22854 | case ISD::SETGT: Swap = true; LLVM_FALLTHROUGH; |
| 22855 | case ISD::SETLT: |
| 22856 | case ISD::SETOLT: SSECC = 1; break; |
| 22857 | case ISD::SETOGE: |
| 22858 | case ISD::SETGE: Swap = true; LLVM_FALLTHROUGH; |
| 22859 | case ISD::SETLE: |
| 22860 | case ISD::SETOLE: SSECC = 2; break; |
| 22861 | case ISD::SETUO: SSECC = 3; break; |
| 22862 | case ISD::SETUNE: |
| 22863 | case ISD::SETNE: SSECC = 4; break; |
| 22864 | case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; |
| 22865 | case ISD::SETUGE: SSECC = 5; break; |
| 22866 | case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; |
| 22867 | case ISD::SETUGT: SSECC = 6; break; |
| 22868 | case ISD::SETO: SSECC = 7; break; |
| 22869 | case ISD::SETUEQ: SSECC = 8; break; |
| 22870 | case ISD::SETONE: SSECC = 12; break; |
| 22871 | } |
| 22872 | if (Swap) |
| 22873 | std::swap(Op0, Op1); |
| 22874 | |
| 22875 | switch (SetCCOpcode) { |
| 22876 | default: |
| 22877 | IsAlwaysSignaling = true; |
| 22878 | break; |
| 22879 | case ISD::SETEQ: |
| 22880 | case ISD::SETOEQ: |
| 22881 | case ISD::SETUEQ: |
| 22882 | case ISD::SETNE: |
| 22883 | case ISD::SETONE: |
| 22884 | case ISD::SETUNE: |
| 22885 | case ISD::SETO: |
| 22886 | case ISD::SETUO: |
| 22887 | IsAlwaysSignaling = false; |
| 22888 | break; |
| 22889 | } |
| 22890 | |
| 22891 | return SSECC; |
| 22892 | } |
| 22893 | |
| 22894 | |
| 22895 | |
| 22896 | static SDValue splitIntVSETCC(EVT VT, SDValue LHS, SDValue RHS, |
| 22897 | ISD::CondCode Cond, SelectionDAG &DAG, |
| 22898 | const SDLoc &dl) { |
| 22899 | assert(VT.isInteger() && VT == LHS.getValueType() && |
| 22900 | VT == RHS.getValueType() && "Unsupported VTs!"); |
| 22901 | |
| 22902 | SDValue CC = DAG.getCondCode(Cond); |
| 22903 | |
| 22904 | |
| 22905 | SDValue LHS1, LHS2; |
| 22906 | std::tie(LHS1, LHS2) = splitVector(LHS, DAG, dl); |
| 22907 | |
| 22908 | |
| 22909 | SDValue RHS1, RHS2; |
| 22910 | std::tie(RHS1, RHS2) = splitVector(RHS, DAG, dl); |
| 22911 | |
| 22912 | |
| 22913 | EVT LoVT, HiVT; |
| 22914 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 22915 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, |
| 22916 | DAG.getNode(ISD::SETCC, dl, LoVT, LHS1, RHS1, CC), |
| 22917 | DAG.getNode(ISD::SETCC, dl, HiVT, LHS2, RHS2, CC)); |
| 22918 | } |
| 22919 | |
| 22920 | static SDValue LowerIntVSETCC_AVX512(SDValue Op, SelectionDAG &DAG) { |
| 22921 | |
| 22922 | SDValue Op0 = Op.getOperand(0); |
| 22923 | SDValue Op1 = Op.getOperand(1); |
| 22924 | SDValue CC = Op.getOperand(2); |
| 22925 | MVT VT = Op.getSimpleValueType(); |
| 22926 | SDLoc dl(Op); |
| 22927 | |
| 22928 | assert(VT.getVectorElementType() == MVT::i1 && |
| 22929 | "Cannot set masked compare for this operation"); |
| 22930 | |
| 22931 | ISD::CondCode SetCCOpcode = cast<CondCodeSDNode>(CC)->get(); |
| 22932 | |
| 22933 | |
| 22934 | if (SetCCOpcode == ISD::SETLT) { |
| 22935 | SetCCOpcode = ISD::getSetCCSwappedOperands(SetCCOpcode); |
| 22936 | std::swap(Op0, Op1); |
| 22937 | } |
| 22938 | |
| 22939 | return DAG.getSetCC(dl, VT, Op0, Op1, SetCCOpcode); |
| 22940 | } |
| 22941 | |
| 22942 | |
| 22943 | |
| 22944 | |
| 22945 | |
| 22946 | static SDValue incDecVectorConstant(SDValue V, SelectionDAG &DAG, bool IsInc) { |
| 22947 | auto *BV = dyn_cast<BuildVectorSDNode>(V.getNode()); |
| 22948 | if (!BV) |
| 22949 | return SDValue(); |
| 22950 | |
| 22951 | MVT VT = V.getSimpleValueType(); |
| 22952 | MVT EltVT = VT.getVectorElementType(); |
| 22953 | unsigned NumElts = VT.getVectorNumElements(); |
| 22954 | SmallVector<SDValue, 8> NewVecC; |
| 22955 | SDLoc DL(V); |
| 22956 | for (unsigned i = 0; i < NumElts; ++i) { |
| 22957 | auto *Elt = dyn_cast<ConstantSDNode>(BV->getOperand(i)); |
| 22958 | if (!Elt || Elt->isOpaque() || Elt->getSimpleValueType(0) != EltVT) |
| 22959 | return SDValue(); |
| 22960 | |
| 22961 | |
| 22962 | const APInt &EltC = Elt->getAPIntValue(); |
| 22963 | if ((IsInc && EltC.isMaxValue()) || (!IsInc && EltC.isNullValue())) |
| 22964 | return SDValue(); |
| 22965 | |
| 22966 | NewVecC.push_back(DAG.getConstant(EltC + (IsInc ? 1 : -1), DL, EltVT)); |
| 22967 | } |
| 22968 | |
| 22969 | return DAG.getBuildVector(VT, DL, NewVecC); |
| 22970 | } |
| 22971 | |
| 22972 | |
| 22973 | |
| 22974 | |
| 22975 | |
| 22976 | static SDValue LowerVSETCCWithSUBUS(SDValue Op0, SDValue Op1, MVT VT, |
| 22977 | ISD::CondCode Cond, const SDLoc &dl, |
| 22978 | const X86Subtarget &Subtarget, |
| 22979 | SelectionDAG &DAG) { |
| 22980 | if (!Subtarget.hasSSE2()) |
| 22981 | return SDValue(); |
| 22982 | |
| 22983 | MVT VET = VT.getVectorElementType(); |
| 22984 | if (VET != MVT::i8 && VET != MVT::i16) |
| 22985 | return SDValue(); |
| 22986 | |
| 22987 | switch (Cond) { |
| 22988 | default: |
| 22989 | return SDValue(); |
| 22990 | case ISD::SETULT: { |
| 22991 | |
| 22992 | |
| 22993 | |
| 22994 | |
| 22995 | |
| 22996 | if (Subtarget.hasAVX()) |
| 22997 | return SDValue(); |
| 22998 | SDValue ULEOp1 = incDecVectorConstant(Op1, DAG, false); |
| 22999 | if (!ULEOp1) |
| 23000 | return SDValue(); |
| 23001 | Op1 = ULEOp1; |
| 23002 | break; |
| 23003 | } |
| 23004 | case ISD::SETUGT: { |
| 23005 | |
| 23006 | |
| 23007 | |
| 23008 | |
| 23009 | SDValue UGEOp1 = incDecVectorConstant(Op1, DAG, true); |
| 23010 | if (!UGEOp1) |
| 23011 | return SDValue(); |
| 23012 | Op1 = Op0; |
| 23013 | Op0 = UGEOp1; |
| 23014 | break; |
| 23015 | } |
| 23016 | |
| 23017 | case ISD::SETUGE: |
| 23018 | std::swap(Op0, Op1); |
| 23019 | break; |
| 23020 | case ISD::SETULE: |
| 23021 | break; |
| 23022 | } |
| 23023 | |
| 23024 | SDValue Result = DAG.getNode(ISD::USUBSAT, dl, VT, Op0, Op1); |
| 23025 | return DAG.getNode(X86ISD::PCMPEQ, dl, VT, Result, |
| 23026 | DAG.getConstant(0, dl, VT)); |
| 23027 | } |
| 23028 | |
| 23029 | static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget, |
| 23030 | SelectionDAG &DAG) { |
| 23031 | bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || |
| 23032 | Op.getOpcode() == ISD::STRICT_FSETCCS; |
| 23033 | SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); |
| 23034 | SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); |
| 23035 | SDValue CC = Op.getOperand(IsStrict ? 3 : 2); |
| 23036 | MVT VT = Op->getSimpleValueType(0); |
| 23037 | ISD::CondCode Cond = cast<CondCodeSDNode>(CC)->get(); |
| 23038 | bool isFP = Op1.getSimpleValueType().isFloatingPoint(); |
| 23039 | SDLoc dl(Op); |
| 23040 | |
| 23041 | if (isFP) { |
| 23042 | #ifndef NDEBUG |
| 23043 | MVT EltVT = Op0.getSimpleValueType().getVectorElementType(); |
| 23044 | assert(EltVT == MVT::f32 || EltVT == MVT::f64); |
| 23045 | #endif |
| 23046 | |
| 23047 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
| 23048 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
| 23049 | |
| 23050 | |
| 23051 | |
| 23052 | |
| 23053 | |
| 23054 | unsigned Opc; |
| 23055 | if (Subtarget.hasAVX512() && VT.getVectorElementType() == MVT::i1 && |
| 23056 | (!IsStrict || Subtarget.hasVLX() || |
| 23057 | Op0.getSimpleValueType().is512BitVector())) { |
| 23058 | assert(VT.getVectorNumElements() <= 16); |
| 23059 | Opc = IsStrict ? X86ISD::STRICT_CMPM : X86ISD::CMPM; |
| 23060 | } else { |
| 23061 | Opc = IsStrict ? X86ISD::STRICT_CMPP : X86ISD::CMPP; |
| 23062 | |
| 23063 | |
| 23064 | |
| 23065 | VT = Op0.getSimpleValueType(); |
| 23066 | } |
| 23067 | |
| 23068 | SDValue Cmp; |
| 23069 | bool IsAlwaysSignaling; |
| 23070 | unsigned SSECC = translateX86FSETCC(Cond, Op0, Op1, IsAlwaysSignaling); |
| 23071 | if (!Subtarget.hasAVX()) { |
| 23072 | |
| 23073 | |
| 23074 | |
| 23075 | |
| 23076 | |
| 23077 | |
| 23078 | |
| 23079 | if (IsStrict && IsAlwaysSignaling && !IsSignaling) |
| 23080 | return SDValue(); |
| 23081 | |
| 23082 | |
| 23083 | if (IsStrict && !IsAlwaysSignaling && IsSignaling) { |
| 23084 | SDValue SignalCmp = DAG.getNode( |
| 23085 | Opc, dl, {VT, MVT::Other}, |
| 23086 | {Chain, Op0, Op1, DAG.getTargetConstant(1, dl, MVT::i8)}); |
| 23087 | |
| 23088 | |
| 23089 | |
| 23090 | |
| 23091 | SignalCmp->setFlags(Op->getFlags()); |
| 23092 | Chain = SignalCmp.getValue(1); |
| 23093 | } |
| 23094 | |
| 23095 | |
| 23096 | |
| 23097 | if (SSECC >= 8) { |
| 23098 | |
| 23099 | unsigned CC0, CC1; |
| 23100 | unsigned CombineOpc; |
| 23101 | if (Cond == ISD::SETUEQ) { |
| 23102 | CC0 = 3; |
| 23103 | CC1 = 0; |
| 23104 | CombineOpc = X86ISD::FOR; |
| 23105 | } else { |
| 23106 | assert(Cond == ISD::SETONE); |
| 23107 | CC0 = 7; |
| 23108 | CC1 = 4; |
| 23109 | CombineOpc = X86ISD::FAND; |
| 23110 | } |
| 23111 | |
| 23112 | SDValue Cmp0, Cmp1; |
| 23113 | if (IsStrict) { |
| 23114 | Cmp0 = DAG.getNode( |
| 23115 | Opc, dl, {VT, MVT::Other}, |
| 23116 | {Chain, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)}); |
| 23117 | Cmp1 = DAG.getNode( |
| 23118 | Opc, dl, {VT, MVT::Other}, |
| 23119 | {Chain, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)}); |
| 23120 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Cmp0.getValue(1), |
| 23121 | Cmp1.getValue(1)); |
| 23122 | } else { |
| 23123 | Cmp0 = DAG.getNode( |
| 23124 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC0, dl, MVT::i8)); |
| 23125 | Cmp1 = DAG.getNode( |
| 23126 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(CC1, dl, MVT::i8)); |
| 23127 | } |
| 23128 | Cmp = DAG.getNode(CombineOpc, dl, VT, Cmp0, Cmp1); |
| 23129 | } else { |
| 23130 | if (IsStrict) { |
| 23131 | Cmp = DAG.getNode( |
| 23132 | Opc, dl, {VT, MVT::Other}, |
| 23133 | {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); |
| 23134 | Chain = Cmp.getValue(1); |
| 23135 | } else |
| 23136 | Cmp = DAG.getNode( |
| 23137 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); |
| 23138 | } |
| 23139 | } else { |
| 23140 | |
| 23141 | if (IsStrict) { |
| 23142 | |
| 23143 | SSECC |= (IsAlwaysSignaling ^ IsSignaling) << 4; |
| 23144 | Cmp = DAG.getNode( |
| 23145 | Opc, dl, {VT, MVT::Other}, |
| 23146 | {Chain, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)}); |
| 23147 | Chain = Cmp.getValue(1); |
| 23148 | } else |
| 23149 | Cmp = DAG.getNode( |
| 23150 | Opc, dl, VT, Op0, Op1, DAG.getTargetConstant(SSECC, dl, MVT::i8)); |
| 23151 | } |
| 23152 | |
| 23153 | if (VT.getFixedSizeInBits() > |
| 23154 | Op.getSimpleValueType().getFixedSizeInBits()) { |
| 23155 | |
| 23156 | |
| 23157 | EVT CastVT = EVT(VT).changeVectorElementTypeToInteger(); |
| 23158 | Cmp = DAG.getBitcast(CastVT, Cmp); |
| 23159 | Cmp = DAG.getSetCC(dl, Op.getSimpleValueType(), Cmp, |
| 23160 | DAG.getConstant(0, dl, CastVT), ISD::SETNE); |
| 23161 | } else { |
| 23162 | |
| 23163 | |
| 23164 | |
| 23165 | Cmp = DAG.getBitcast(Op.getSimpleValueType(), Cmp); |
| 23166 | } |
| 23167 | |
| 23168 | if (IsStrict) |
| 23169 | return DAG.getMergeValues({Cmp, Chain}, dl); |
| 23170 | |
| 23171 | return Cmp; |
| 23172 | } |
| 23173 | |
| 23174 | assert(!IsStrict && "Strict SETCC only handles FP operands."); |
| 23175 | |
| 23176 | MVT VTOp0 = Op0.getSimpleValueType(); |
| 23177 | (void)VTOp0; |
| 23178 | assert(VTOp0 == Op1.getSimpleValueType() && |
| 23179 | "Expected operands with same type!"); |
| 23180 | assert(VT.getVectorNumElements() == VTOp0.getVectorNumElements() && |
| 23181 | "Invalid number of packed elements for source and destination!"); |
| 23182 | |
| 23183 | |
| 23184 | |
| 23185 | assert((Subtarget.hasAVX512() || (VT == VTOp0)) && |
| 23186 | "Value types for source and destination must be the same!"); |
| 23187 | |
| 23188 | |
| 23189 | if (VT.getVectorElementType() == MVT::i1) { |
| 23190 | |
| 23191 | |
| 23192 | assert((VTOp0.getScalarSizeInBits() >= 32 || Subtarget.hasBWI()) && |
| 23193 | "Unexpected operand type"); |
| 23194 | return LowerIntVSETCC_AVX512(Op, DAG); |
| 23195 | } |
| 23196 | |
| 23197 | |
| 23198 | if (VT.is128BitVector() && Subtarget.hasXOP()) { |
| 23199 | |
| 23200 | unsigned CmpMode = 0; |
| 23201 | switch (Cond) { |
| 23202 | default: llvm_unreachable("Unexpected SETCC condition"); |
| 23203 | case ISD::SETULT: |
| 23204 | case ISD::SETLT: CmpMode = 0x00; break; |
| 23205 | case ISD::SETULE: |
| 23206 | case ISD::SETLE: CmpMode = 0x01; break; |
| 23207 | case ISD::SETUGT: |
| 23208 | case ISD::SETGT: CmpMode = 0x02; break; |
| 23209 | case ISD::SETUGE: |
| 23210 | case ISD::SETGE: CmpMode = 0x03; break; |
| 23211 | case ISD::SETEQ: CmpMode = 0x04; break; |
| 23212 | case ISD::SETNE: CmpMode = 0x05; break; |
| 23213 | } |
| 23214 | |
| 23215 | |
| 23216 | unsigned Opc = |
| 23217 | ISD::isUnsignedIntSetCC(Cond) ? X86ISD::VPCOMU : X86ISD::VPCOM; |
| 23218 | |
| 23219 | return DAG.getNode(Opc, dl, VT, Op0, Op1, |
| 23220 | DAG.getTargetConstant(CmpMode, dl, MVT::i8)); |
| 23221 | } |
| 23222 | |
| 23223 | |
| 23224 | |
| 23225 | if (Cond == ISD::SETNE && ISD::isBuildVectorAllZeros(Op1.getNode())) { |
| 23226 | SDValue BC0 = peekThroughBitcasts(Op0); |
| 23227 | if (BC0.getOpcode() == ISD::AND) { |
| 23228 | APInt UndefElts; |
| 23229 | SmallVector<APInt, 64> EltBits; |
| 23230 | if (getTargetConstantBitsFromNode(BC0.getOperand(1), |
| 23231 | VT.getScalarSizeInBits(), UndefElts, |
| 23232 | EltBits, false, false)) { |
| 23233 | if (llvm::all_of(EltBits, [](APInt &V) { return V.isPowerOf2(); })) { |
| 23234 | Cond = ISD::SETEQ; |
| 23235 | Op1 = DAG.getBitcast(VT, BC0.getOperand(1)); |
| 23236 | } |
| 23237 | } |
| 23238 | } |
| 23239 | } |
| 23240 | |
| 23241 | |
| 23242 | if (Cond == ISD::SETEQ && Op0.getOpcode() == ISD::AND && |
| 23243 | Op0.getOperand(1) == Op1 && Op0.hasOneUse()) { |
| 23244 | ConstantSDNode *C1 = isConstOrConstSplat(Op1); |
| 23245 | if (C1 && C1->getAPIntValue().isPowerOf2()) { |
| 23246 | unsigned BitWidth = VT.getScalarSizeInBits(); |
| 23247 | unsigned ShiftAmt = BitWidth - C1->getAPIntValue().logBase2() - 1; |
| 23248 | |
| 23249 | SDValue Result = Op0.getOperand(0); |
| 23250 | Result = DAG.getNode(ISD::SHL, dl, VT, Result, |
| 23251 | DAG.getConstant(ShiftAmt, dl, VT)); |
| 23252 | Result = DAG.getNode(ISD::SRA, dl, VT, Result, |
| 23253 | DAG.getConstant(BitWidth - 1, dl, VT)); |
| 23254 | return Result; |
| 23255 | } |
| 23256 | } |
| 23257 | |
| 23258 | |
| 23259 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
| 23260 | return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); |
| 23261 | |
| 23262 | if (VT == MVT::v32i16 || VT == MVT::v64i8) { |
| 23263 | assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!"); |
| 23264 | return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl); |
| 23265 | } |
| 23266 | |
| 23267 | |
| 23268 | |
| 23269 | |
| 23270 | |
| 23271 | |
| 23272 | APInt ConstValue; |
| 23273 | if (Cond == ISD::SETNE && |
| 23274 | ISD::isConstantSplatVector(Op1.getNode(), ConstValue)) { |
| 23275 | if (ConstValue.isMinSignedValue()) |
| 23276 | Cond = ISD::SETGT; |
| 23277 | else if (ConstValue.isMaxSignedValue()) |
| 23278 | Cond = ISD::SETLT; |
| 23279 | else if (ConstValue.isNullValue() && DAG.SignBitIsZero(Op0)) |
| 23280 | Cond = ISD::SETGT; |
| 23281 | } |
| 23282 | |
| 23283 | |
| 23284 | |
| 23285 | |
| 23286 | |
| 23287 | bool FlipSigns = ISD::isUnsignedIntSetCC(Cond) && |
| 23288 | !(DAG.SignBitIsZero(Op0) && DAG.SignBitIsZero(Op1)); |
| 23289 | |
| 23290 | |
| 23291 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 23292 | if (ISD::isUnsignedIntSetCC(Cond) && |
| 23293 | (FlipSigns || ISD::isTrueWhenEqual(Cond)) && |
| 23294 | TLI.isOperationLegal(ISD::UMIN, VT)) { |
| 23295 | |
| 23296 | |
| 23297 | if (Cond == ISD::SETUGT) { |
| 23298 | |
| 23299 | if (SDValue UGTOp1 = incDecVectorConstant(Op1, DAG, true)) { |
| 23300 | Op1 = UGTOp1; |
| 23301 | Cond = ISD::SETUGE; |
| 23302 | } |
| 23303 | } |
| 23304 | if (Cond == ISD::SETULT) { |
| 23305 | |
| 23306 | if (SDValue ULTOp1 = incDecVectorConstant(Op1, DAG, false)) { |
| 23307 | Op1 = ULTOp1; |
| 23308 | Cond = ISD::SETULE; |
| 23309 | } |
| 23310 | } |
| 23311 | bool Invert = false; |
| 23312 | unsigned Opc; |
| 23313 | switch (Cond) { |
| 23314 | default: llvm_unreachable("Unexpected condition code"); |
| 23315 | case ISD::SETUGT: Invert = true; LLVM_FALLTHROUGH; |
| 23316 | case ISD::SETULE: Opc = ISD::UMIN; break; |
| 23317 | case ISD::SETULT: Invert = true; LLVM_FALLTHROUGH; |
| 23318 | case ISD::SETUGE: Opc = ISD::UMAX; break; |
| 23319 | } |
| 23320 | |
| 23321 | SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); |
| 23322 | Result = DAG.getNode(X86ISD::PCMPEQ, dl, VT, Op0, Result); |
| 23323 | |
| 23324 | |
| 23325 | if (Invert) |
| 23326 | Result = DAG.getNOT(dl, Result, VT); |
| 23327 | |
| 23328 | return Result; |
| 23329 | } |
| 23330 | |
| 23331 | |
| 23332 | if (FlipSigns) |
| 23333 | if (SDValue V = |
| 23334 | LowerVSETCCWithSUBUS(Op0, Op1, VT, Cond, dl, Subtarget, DAG)) |
| 23335 | return V; |
| 23336 | |
| 23337 | |
| 23338 | |
| 23339 | |
| 23340 | unsigned Opc = (Cond == ISD::SETEQ || Cond == ISD::SETNE) ? X86ISD::PCMPEQ |
| 23341 | : X86ISD::PCMPGT; |
| 23342 | bool Swap = Cond == ISD::SETLT || Cond == ISD::SETULT || |
| 23343 | Cond == ISD::SETGE || Cond == ISD::SETUGE; |
| 23344 | bool Invert = Cond == ISD::SETNE || |
| 23345 | (Cond != ISD::SETEQ && ISD::isTrueWhenEqual(Cond)); |
| 23346 | |
| 23347 | if (Swap) |
| 23348 | std::swap(Op0, Op1); |
| 23349 | |
| 23350 | |
| 23351 | |
| 23352 | if (VT == MVT::v2i64) { |
| 23353 | if (Opc == X86ISD::PCMPGT && !Subtarget.hasSSE42()) { |
| 23354 | assert(Subtarget.hasSSE2() && "Don't know how to lower!"); |
| 23355 | |
| 23356 | |
| 23357 | |
| 23358 | if (!FlipSigns && !Invert && ISD::isBuildVectorAllZeros(Op0.getNode())) { |
| 23359 | Op0 = DAG.getConstant(0, dl, MVT::v4i32); |
| 23360 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
| 23361 | |
| 23362 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
| 23363 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
| 23364 | SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
| 23365 | |
| 23366 | return DAG.getBitcast(VT, Result); |
| 23367 | } |
| 23368 | |
| 23369 | if (!FlipSigns && !Invert && ISD::isBuildVectorAllOnes(Op1.getNode())) { |
| 23370 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
| 23371 | Op1 = DAG.getConstant(-1, dl, MVT::v4i32); |
| 23372 | |
| 23373 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
| 23374 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
| 23375 | SDValue Result = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
| 23376 | |
| 23377 | return DAG.getBitcast(VT, Result); |
| 23378 | } |
| 23379 | |
| 23380 | |
| 23381 | |
| 23382 | |
| 23383 | SDValue SB; |
| 23384 | if (FlipSigns) { |
| 23385 | SB = DAG.getConstant(0x8000000080000000ULL, dl, MVT::v2i64); |
| 23386 | } else { |
| 23387 | SB = DAG.getConstant(0x0000000080000000ULL, dl, MVT::v2i64); |
| 23388 | } |
| 23389 | Op0 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op0, SB); |
| 23390 | Op1 = DAG.getNode(ISD::XOR, dl, MVT::v2i64, Op1, SB); |
| 23391 | |
| 23392 | |
| 23393 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
| 23394 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
| 23395 | |
| 23396 | |
| 23397 | SDValue GT = DAG.getNode(X86ISD::PCMPGT, dl, MVT::v4i32, Op0, Op1); |
| 23398 | SDValue EQ = DAG.getNode(X86ISD::PCMPEQ, dl, MVT::v4i32, Op0, Op1); |
| 23399 | |
| 23400 | |
| 23401 | static const int MaskHi[] = { 1, 1, 3, 3 }; |
| 23402 | static const int MaskLo[] = { 0, 0, 2, 2 }; |
| 23403 | SDValue EQHi = DAG.getVectorShuffle(MVT::v4i32, dl, EQ, EQ, MaskHi); |
| 23404 | SDValue GTLo = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskLo); |
| 23405 | SDValue GTHi = DAG.getVectorShuffle(MVT::v4i32, dl, GT, GT, MaskHi); |
| 23406 | |
| 23407 | SDValue Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, EQHi, GTLo); |
| 23408 | Result = DAG.getNode(ISD::OR, dl, MVT::v4i32, Result, GTHi); |
| 23409 | |
| 23410 | if (Invert) |
| 23411 | Result = DAG.getNOT(dl, Result, MVT::v4i32); |
| 23412 | |
| 23413 | return DAG.getBitcast(VT, Result); |
| 23414 | } |
| 23415 | |
| 23416 | if (Opc == X86ISD::PCMPEQ && !Subtarget.hasSSE41()) { |
| 23417 | |
| 23418 | |
| 23419 | assert(Subtarget.hasSSE2() && !FlipSigns && "Don't know how to lower!"); |
| 23420 | |
| 23421 | |
| 23422 | Op0 = DAG.getBitcast(MVT::v4i32, Op0); |
| 23423 | Op1 = DAG.getBitcast(MVT::v4i32, Op1); |
| 23424 | |
| 23425 | |
| 23426 | SDValue Result = DAG.getNode(Opc, dl, MVT::v4i32, Op0, Op1); |
| 23427 | |
| 23428 | |
| 23429 | static const int Mask[] = { 1, 0, 3, 2 }; |
| 23430 | SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Result, Result, Mask); |
| 23431 | Result = DAG.getNode(ISD::AND, dl, MVT::v4i32, Result, Shuf); |
| 23432 | |
| 23433 | if (Invert) |
| 23434 | Result = DAG.getNOT(dl, Result, MVT::v4i32); |
| 23435 | |
| 23436 | return DAG.getBitcast(VT, Result); |
| 23437 | } |
| 23438 | } |
| 23439 | |
| 23440 | |
| 23441 | |
| 23442 | if (FlipSigns) { |
| 23443 | MVT EltVT = VT.getVectorElementType(); |
| 23444 | SDValue SM = DAG.getConstant(APInt::getSignMask(EltVT.getSizeInBits()), dl, |
| 23445 | VT); |
| 23446 | Op0 = DAG.getNode(ISD::XOR, dl, VT, Op0, SM); |
| 23447 | Op1 = DAG.getNode(ISD::XOR, dl, VT, Op1, SM); |
| 23448 | } |
| 23449 | |
| 23450 | SDValue Result = DAG.getNode(Opc, dl, VT, Op0, Op1); |
| 23451 | |
| 23452 | |
| 23453 | if (Invert) |
| 23454 | Result = DAG.getNOT(dl, Result, VT); |
| 23455 | |
| 23456 | return Result; |
| 23457 | } |
| 23458 | |
| 23459 | |
| 23460 | static SDValue EmitAVX512Test(SDValue Op0, SDValue Op1, ISD::CondCode CC, |
| 23461 | const SDLoc &dl, SelectionDAG &DAG, |
| 23462 | const X86Subtarget &Subtarget, |
| 23463 | SDValue &X86CC) { |
| 23464 | |
| 23465 | if (CC != ISD::SETEQ && CC != ISD::SETNE) |
| 23466 | return SDValue(); |
| 23467 | |
| 23468 | |
| 23469 | if (Op0.getOpcode() != ISD::BITCAST) |
| 23470 | return SDValue(); |
| 23471 | |
| 23472 | Op0 = Op0.getOperand(0); |
| 23473 | MVT VT = Op0.getSimpleValueType(); |
| 23474 | if (!(Subtarget.hasAVX512() && VT == MVT::v16i1) && |
| 23475 | !(Subtarget.hasDQI() && VT == MVT::v8i1) && |
| 23476 | !(Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1))) |
| 23477 | return SDValue(); |
| 23478 | |
| 23479 | X86::CondCode X86Cond; |
| 23480 | if (isNullConstant(Op1)) { |
| 23481 | X86Cond = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; |
| 23482 | } else if (isAllOnesConstant(Op1)) { |
| 23483 | |
| 23484 | X86Cond = CC == ISD::SETEQ ? X86::COND_B : X86::COND_AE; |
| 23485 | } else |
| 23486 | return SDValue(); |
| 23487 | |
| 23488 | |
| 23489 | bool KTestable = false; |
| 23490 | if (Subtarget.hasDQI() && (VT == MVT::v8i1 || VT == MVT::v16i1)) |
| 23491 | KTestable = true; |
| 23492 | if (Subtarget.hasBWI() && (VT == MVT::v32i1 || VT == MVT::v64i1)) |
| 23493 | KTestable = true; |
| 23494 | if (!isNullConstant(Op1)) |
| 23495 | KTestable = false; |
| 23496 | if (KTestable && Op0.getOpcode() == ISD::AND && Op0.hasOneUse()) { |
| 23497 | SDValue LHS = Op0.getOperand(0); |
| 23498 | SDValue RHS = Op0.getOperand(1); |
| 23499 | X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
| 23500 | return DAG.getNode(X86ISD::KTEST, dl, MVT::i32, LHS, RHS); |
| 23501 | } |
| 23502 | |
| 23503 | |
| 23504 | SDValue LHS = Op0; |
| 23505 | SDValue RHS = Op0; |
| 23506 | if (Op0.getOpcode() == ISD::OR && Op0.hasOneUse()) { |
| 23507 | LHS = Op0.getOperand(0); |
| 23508 | RHS = Op0.getOperand(1); |
| 23509 | } |
| 23510 | |
| 23511 | X86CC = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
| 23512 | return DAG.getNode(X86ISD::KORTEST, dl, MVT::i32, LHS, RHS); |
| 23513 | } |
| 23514 | |
| 23515 | |
| 23516 | |
| 23517 | SDValue X86TargetLowering::emitFlagsForSetcc(SDValue Op0, SDValue Op1, |
| 23518 | ISD::CondCode CC, const SDLoc &dl, |
| 23519 | SelectionDAG &DAG, |
| 23520 | SDValue &X86CC) const { |
| 23521 | |
| 23522 | |
| 23523 | |
| 23524 | |
| 23525 | if (Op0.getOpcode() == ISD::AND && Op0.hasOneUse() && isNullConstant(Op1) && |
| 23526 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
| 23527 | if (SDValue BT = LowerAndToBT(Op0, CC, dl, DAG, X86CC)) |
| 23528 | return BT; |
| 23529 | } |
| 23530 | |
| 23531 | |
| 23532 | |
| 23533 | if (isNullConstant(Op1) && (CC == ISD::SETEQ || CC == ISD::SETNE)) |
| 23534 | if (SDValue CmpZ = |
| 23535 | MatchVectorAllZeroTest(Op0, CC, dl, Subtarget, DAG, X86CC)) |
| 23536 | return CmpZ; |
| 23537 | |
| 23538 | |
| 23539 | if (SDValue Test = EmitAVX512Test(Op0, Op1, CC, dl, DAG, Subtarget, X86CC)) |
| 23540 | return Test; |
| 23541 | |
| 23542 | |
| 23543 | |
| 23544 | if ((isOneConstant(Op1) || isNullConstant(Op1)) && |
| 23545 | (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
| 23546 | |
| 23547 | |
| 23548 | if (Op0.getOpcode() == X86ISD::SETCC) { |
| 23549 | bool Invert = (CC == ISD::SETNE) ^ isNullConstant(Op1); |
| 23550 | |
| 23551 | X86CC = Op0.getOperand(0); |
| 23552 | if (Invert) { |
| 23553 | X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0); |
| 23554 | CCode = X86::GetOppositeBranchCondition(CCode); |
| 23555 | X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); |
| 23556 | } |
| 23557 | |
| 23558 | return Op0.getOperand(1); |
| 23559 | } |
| 23560 | } |
| 23561 | |
| 23562 | |
| 23563 | |
| 23564 | if (isAllOnesConstant(Op1) && Op0.getOpcode() == ISD::ADD && |
| 23565 | Op0.getOperand(1) == Op1 && (CC == ISD::SETEQ || CC == ISD::SETNE)) { |
| 23566 | if (isProfitableToUseFlagOp(Op0)) { |
| 23567 | SDVTList VTs = DAG.getVTList(Op0.getValueType(), MVT::i32); |
| 23568 | |
| 23569 | SDValue New = DAG.getNode(X86ISD::ADD, dl, VTs, Op0.getOperand(0), |
| 23570 | Op0.getOperand(1)); |
| 23571 | DAG.ReplaceAllUsesOfValueWith(SDValue(Op0.getNode(), 0), New); |
| 23572 | X86::CondCode CCode = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; |
| 23573 | X86CC = DAG.getTargetConstant(CCode, dl, MVT::i8); |
| 23574 | return SDValue(New.getNode(), 1); |
| 23575 | } |
| 23576 | } |
| 23577 | |
| 23578 | X86::CondCode CondCode = |
| 23579 | TranslateX86CC(CC, dl, false, Op0, Op1, DAG); |
| 23580 | assert(CondCode != X86::COND_INVALID && "Unexpected condition code!"); |
| 23581 | |
| 23582 | SDValue EFLAGS = EmitCmp(Op0, Op1, CondCode, dl, DAG, Subtarget); |
| 23583 | X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); |
| 23584 | return EFLAGS; |
| 23585 | } |
| 23586 | |
| 23587 | SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { |
| 23588 | |
| 23589 | bool IsStrict = Op.getOpcode() == ISD::STRICT_FSETCC || |
| 23590 | Op.getOpcode() == ISD::STRICT_FSETCCS; |
| 23591 | MVT VT = Op->getSimpleValueType(0); |
| 23592 | |
| 23593 | if (VT.isVector()) return LowerVSETCC(Op, Subtarget, DAG); |
| 23594 | |
| 23595 | assert(VT == MVT::i8 && "SetCC type must be 8-bit integer"); |
| 23596 | SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); |
| 23597 | SDValue Op0 = Op.getOperand(IsStrict ? 1 : 0); |
| 23598 | SDValue Op1 = Op.getOperand(IsStrict ? 2 : 1); |
| 23599 | SDLoc dl(Op); |
| 23600 | ISD::CondCode CC = |
| 23601 | cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get(); |
| 23602 | |
| 23603 | |
| 23604 | |
| 23605 | if (Op0.getValueType() == MVT::f128) { |
| 23606 | softenSetCCOperands(DAG, MVT::f128, Op0, Op1, CC, dl, Op0, Op1, Chain, |
| 23607 | Op.getOpcode() == ISD::STRICT_FSETCCS); |
| 23608 | |
| 23609 | |
| 23610 | if (!Op1.getNode()) { |
| 23611 | assert(Op0.getValueType() == Op.getValueType() && |
| 23612 | "Unexpected setcc expansion!"); |
| 23613 | if (IsStrict) |
| 23614 | return DAG.getMergeValues({Op0, Chain}, dl); |
| 23615 | return Op0; |
| 23616 | } |
| 23617 | } |
| 23618 | |
| 23619 | if (Op0.getSimpleValueType().isInteger()) { |
| 23620 | |
| 23621 | |
| 23622 | |
| 23623 | |
| 23624 | |
| 23625 | |
| 23626 | |
| 23627 | |
| 23628 | |
| 23629 | |
| 23630 | if (auto *Op1C = dyn_cast<ConstantSDNode>(Op1)) { |
| 23631 | const APInt &Op1Val = Op1C->getAPIntValue(); |
| 23632 | if (!Op1Val.isNullValue()) { |
| 23633 | |
| 23634 | if ((CC == ISD::CondCode::SETGT && !Op1Val.isMaxSignedValue()) || |
| 23635 | (CC == ISD::CondCode::SETUGT && !Op1Val.isMaxValue())) { |
| 23636 | APInt Op1ValPlusOne = Op1Val + 1; |
| 23637 | if (Op1ValPlusOne.isSignedIntN(32) && |
| 23638 | (!Op1Val.isSignedIntN(8) || Op1ValPlusOne.isSignedIntN(8))) { |
| 23639 | Op1 = DAG.getConstant(Op1ValPlusOne, dl, Op0.getValueType()); |
| 23640 | CC = CC == ISD::CondCode::SETGT ? ISD::CondCode::SETGE |
| 23641 | : ISD::CondCode::SETUGE; |
| 23642 | } |
| 23643 | } |
| 23644 | } |
| 23645 | } |
| 23646 | |
| 23647 | SDValue X86CC; |
| 23648 | SDValue EFLAGS = emitFlagsForSetcc(Op0, Op1, CC, dl, DAG, X86CC); |
| 23649 | SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); |
| 23650 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
| 23651 | } |
| 23652 | |
| 23653 | |
| 23654 | X86::CondCode CondCode = TranslateX86CC(CC, dl, true, Op0, Op1, DAG); |
| 23655 | if (CondCode == X86::COND_INVALID) |
| 23656 | return SDValue(); |
| 23657 | |
| 23658 | SDValue EFLAGS; |
| 23659 | if (IsStrict) { |
| 23660 | bool IsSignaling = Op.getOpcode() == ISD::STRICT_FSETCCS; |
| 23661 | EFLAGS = |
| 23662 | DAG.getNode(IsSignaling ? X86ISD::STRICT_FCMPS : X86ISD::STRICT_FCMP, |
| 23663 | dl, {MVT::i32, MVT::Other}, {Chain, Op0, Op1}); |
| 23664 | Chain = EFLAGS.getValue(1); |
| 23665 | } else { |
| 23666 | EFLAGS = DAG.getNode(X86ISD::FCMP, dl, MVT::i32, Op0, Op1); |
| 23667 | } |
| 23668 | |
| 23669 | SDValue X86CC = DAG.getTargetConstant(CondCode, dl, MVT::i8); |
| 23670 | SDValue Res = DAG.getNode(X86ISD::SETCC, dl, MVT::i8, X86CC, EFLAGS); |
| 23671 | return IsStrict ? DAG.getMergeValues({Res, Chain}, dl) : Res; |
| 23672 | } |
| 23673 | |
| 23674 | SDValue X86TargetLowering::LowerSETCCCARRY(SDValue Op, SelectionDAG &DAG) const { |
| 23675 | SDValue LHS = Op.getOperand(0); |
| 23676 | SDValue RHS = Op.getOperand(1); |
| 23677 | SDValue Carry = Op.getOperand(2); |
| 23678 | SDValue Cond = Op.getOperand(3); |
| 23679 | SDLoc DL(Op); |
| 23680 | |
| 23681 | assert(LHS.getSimpleValueType().isInteger() && "SETCCCARRY is integer only."); |
| 23682 | X86::CondCode CC = TranslateIntegerX86CC(cast<CondCodeSDNode>(Cond)->get()); |
| 23683 | |
| 23684 | |
| 23685 | EVT CarryVT = Carry.getValueType(); |
| 23686 | Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), |
| 23687 | Carry, DAG.getAllOnesConstant(DL, CarryVT)); |
| 23688 | |
| 23689 | SDVTList VTs = DAG.getVTList(LHS.getValueType(), MVT::i32); |
| 23690 | SDValue Cmp = DAG.getNode(X86ISD::SBB, DL, VTs, LHS, RHS, Carry.getValue(1)); |
| 23691 | return getSETCC(CC, Cmp.getValue(1), DL, DAG); |
| 23692 | } |
| 23693 | |
| 23694 | |
| 23695 | |
| 23696 | |
| 23697 | |
| 23698 | static std::pair<SDValue, SDValue> |
| 23699 | getX86XALUOOp(X86::CondCode &Cond, SDValue Op, SelectionDAG &DAG) { |
| 23700 | assert(Op.getResNo() == 0 && "Unexpected result number!"); |
| 23701 | SDValue Value, Overflow; |
| 23702 | SDValue LHS = Op.getOperand(0); |
| 23703 | SDValue RHS = Op.getOperand(1); |
| 23704 | unsigned BaseOp = 0; |
| 23705 | SDLoc DL(Op); |
| 23706 | switch (Op.getOpcode()) { |
| 23707 | default: llvm_unreachable("Unknown ovf instruction!"); |
| 23708 | case ISD::SADDO: |
| 23709 | BaseOp = X86ISD::ADD; |
| 23710 | Cond = X86::COND_O; |
| 23711 | break; |
| 23712 | case ISD::UADDO: |
| 23713 | BaseOp = X86ISD::ADD; |
| 23714 | Cond = isOneConstant(RHS) ? X86::COND_E : X86::COND_B; |
| 23715 | break; |
| 23716 | case ISD::SSUBO: |
| 23717 | BaseOp = X86ISD::SUB; |
| 23718 | Cond = X86::COND_O; |
| 23719 | break; |
| 23720 | case ISD::USUBO: |
| 23721 | BaseOp = X86ISD::SUB; |
| 23722 | Cond = X86::COND_B; |
| 23723 | break; |
| 23724 | case ISD::SMULO: |
| 23725 | BaseOp = X86ISD::SMUL; |
| 23726 | Cond = X86::COND_O; |
| 23727 | break; |
| 23728 | case ISD::UMULO: |
| 23729 | BaseOp = X86ISD::UMUL; |
| 23730 | Cond = X86::COND_O; |
| 23731 | break; |
| 23732 | } |
| 23733 | |
| 23734 | if (BaseOp) { |
| 23735 | |
| 23736 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
| 23737 | Value = DAG.getNode(BaseOp, DL, VTs, LHS, RHS); |
| 23738 | Overflow = Value.getValue(1); |
| 23739 | } |
| 23740 | |
| 23741 | return std::make_pair(Value, Overflow); |
| 23742 | } |
| 23743 | |
| 23744 | static SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG) { |
| 23745 | |
| 23746 | |
| 23747 | |
| 23748 | |
| 23749 | SDLoc DL(Op); |
| 23750 | X86::CondCode Cond; |
| 23751 | SDValue Value, Overflow; |
| 23752 | std::tie(Value, Overflow) = getX86XALUOOp(Cond, Op, DAG); |
| 23753 | |
| 23754 | SDValue SetCC = getSETCC(Cond, Overflow, DL, DAG); |
| 23755 | assert(Op->getValueType(1) == MVT::i8 && "Unexpected VT!"); |
| 23756 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), Value, SetCC); |
| 23757 | } |
| 23758 | |
| 23759 | |
| 23760 | static bool isX86LogicalCmp(SDValue Op) { |
| 23761 | unsigned Opc = Op.getOpcode(); |
| 23762 | if (Opc == X86ISD::CMP || Opc == X86ISD::COMI || Opc == X86ISD::UCOMI || |
| 23763 | Opc == X86ISD::FCMP) |
| 23764 | return true; |
| 23765 | if (Op.getResNo() == 1 && |
| 23766 | (Opc == X86ISD::ADD || Opc == X86ISD::SUB || Opc == X86ISD::ADC || |
| 23767 | Opc == X86ISD::SBB || Opc == X86ISD::SMUL || Opc == X86ISD::UMUL || |
| 23768 | Opc == X86ISD::OR || Opc == X86ISD::XOR || Opc == X86ISD::AND)) |
| 23769 | return true; |
| 23770 | |
| 23771 | return false; |
| 23772 | } |
| 23773 | |
| 23774 | static bool isTruncWithZeroHighBitsInput(SDValue V, SelectionDAG &DAG) { |
| 23775 | if (V.getOpcode() != ISD::TRUNCATE) |
| 23776 | return false; |
| 23777 | |
| 23778 | SDValue VOp0 = V.getOperand(0); |
| 23779 | unsigned InBits = VOp0.getValueSizeInBits(); |
| 23780 | unsigned Bits = V.getValueSizeInBits(); |
| 23781 | return DAG.MaskedValueIsZero(VOp0, APInt::getHighBitsSet(InBits,InBits-Bits)); |
| 23782 | } |
| 23783 | |
| 23784 | SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { |
| 23785 | bool AddTest = true; |
| 23786 | SDValue Cond = Op.getOperand(0); |
| 23787 | SDValue Op1 = Op.getOperand(1); |
| 23788 | SDValue Op2 = Op.getOperand(2); |
| 23789 | SDLoc DL(Op); |
| 23790 | MVT VT = Op1.getSimpleValueType(); |
| 23791 | SDValue CC; |
| 23792 | |
| 23793 | |
| 23794 | |
| 23795 | |
| 23796 | if (Cond.getOpcode() == ISD::SETCC && isScalarFPTypeInSSEReg(VT) && |
| 23797 | VT == Cond.getOperand(0).getSimpleValueType() && Cond->hasOneUse()) { |
| 23798 | SDValue CondOp0 = Cond.getOperand(0), CondOp1 = Cond.getOperand(1); |
| 23799 | bool IsAlwaysSignaling; |
| 23800 | unsigned SSECC = |
| 23801 | translateX86FSETCC(cast<CondCodeSDNode>(Cond.getOperand(2))->get(), |
| 23802 | CondOp0, CondOp1, IsAlwaysSignaling); |
| 23803 | |
| 23804 | if (Subtarget.hasAVX512()) { |
| 23805 | SDValue Cmp = |
| 23806 | DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CondOp0, CondOp1, |
| 23807 | DAG.getTargetConstant(SSECC, DL, MVT::i8)); |
| 23808 | assert(!VT.isVector() && "Not a scalar type?"); |
| 23809 | return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); |
| 23810 | } |
| 23811 | |
| 23812 | if (SSECC < 8 || Subtarget.hasAVX()) { |
| 23813 | SDValue Cmp = DAG.getNode(X86ISD::FSETCC, DL, VT, CondOp0, CondOp1, |
| 23814 | DAG.getTargetConstant(SSECC, DL, MVT::i8)); |
| 23815 | |
| 23816 | |
| 23817 | |
| 23818 | |
| 23819 | |
| 23820 | |
| 23821 | |
| 23822 | |
| 23823 | |
| 23824 | |
| 23825 | |
| 23826 | |
| 23827 | |
| 23828 | if (Subtarget.hasAVX() && !isNullFPConstant(Op1) && |
| 23829 | !isNullFPConstant(Op2)) { |
| 23830 | |
| 23831 | |
| 23832 | MVT VecVT = VT == MVT::f32 ? MVT::v4f32 : MVT::v2f64; |
| 23833 | SDValue VOp1 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op1); |
| 23834 | SDValue VOp2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Op2); |
| 23835 | SDValue VCmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Cmp); |
| 23836 | |
| 23837 | MVT VCmpVT = VT == MVT::f32 ? MVT::v4i32 : MVT::v2i64; |
| 23838 | VCmp = DAG.getBitcast(VCmpVT, VCmp); |
| 23839 | |
| 23840 | SDValue VSel = DAG.getSelect(DL, VecVT, VCmp, VOp1, VOp2); |
| 23841 | |
| 23842 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
| 23843 | VSel, DAG.getIntPtrConstant(0, DL)); |
| 23844 | } |
| 23845 | SDValue AndN = DAG.getNode(X86ISD::FANDN, DL, VT, Cmp, Op2); |
| 23846 | SDValue And = DAG.getNode(X86ISD::FAND, DL, VT, Cmp, Op1); |
| 23847 | return DAG.getNode(X86ISD::FOR, DL, VT, AndN, And); |
| 23848 | } |
| 23849 | } |
| 23850 | |
| 23851 | |
| 23852 | if (isScalarFPTypeInSSEReg(VT) && Subtarget.hasAVX512()) { |
| 23853 | SDValue Cmp = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, Cond); |
| 23854 | return DAG.getNode(X86ISD::SELECTS, DL, VT, Cmp, Op1, Op2); |
| 23855 | } |
| 23856 | |
| 23857 | if (Cond.getOpcode() == ISD::SETCC) { |
| 23858 | if (SDValue NewCond = LowerSETCC(Cond, DAG)) { |
| 23859 | Cond = NewCond; |
| 23860 | |
| 23861 | |
| 23862 | |
| 23863 | Op1 = Op.getOperand(1); |
| 23864 | Op2 = Op.getOperand(2); |
| 23865 | } |
| 23866 | } |
| 23867 | |
| 23868 | |
| 23869 | |
| 23870 | |
| 23871 | |
| 23872 | |
| 23873 | |
| 23874 | if (Cond.getOpcode() == X86ISD::SETCC && |
| 23875 | Cond.getOperand(1).getOpcode() == X86ISD::CMP && |
| 23876 | isNullConstant(Cond.getOperand(1).getOperand(1))) { |
| 23877 | SDValue Cmp = Cond.getOperand(1); |
| 23878 | SDValue CmpOp0 = Cmp.getOperand(0); |
| 23879 | unsigned CondCode = Cond.getConstantOperandVal(0); |
| 23880 | |
| 23881 | |
| 23882 | |
| 23883 | |
| 23884 | |
| 23885 | |
| 23886 | auto MatchFFSMinus1 = [&](SDValue Op1, SDValue Op2) { |
| 23887 | return (Op1.getOpcode() == ISD::CTTZ_ZERO_UNDEF && Op1.hasOneUse() && |
| 23888 | Op1.getOperand(0) == CmpOp0 && isAllOnesConstant(Op2)); |
| 23889 | }; |
| 23890 | if (Subtarget.hasCMov() && (VT == MVT::i32 || VT == MVT::i64) && |
| 23891 | ((CondCode == X86::COND_NE && MatchFFSMinus1(Op1, Op2)) || |
| 23892 | (CondCode == X86::COND_E && MatchFFSMinus1(Op2, Op1)))) { |
| 23893 | |
| 23894 | } else if ((isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && |
| 23895 | (CondCode == X86::COND_E || CondCode == X86::COND_NE)) { |
| 23896 | SDValue Y = isAllOnesConstant(Op2) ? Op1 : Op2; |
| 23897 | |
| 23898 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32); |
| 23899 | SDVTList CmpVTs = DAG.getVTList(CmpOp0.getValueType(), MVT::i32); |
| 23900 | |
| 23901 | |
| 23902 | |
| 23903 | |
| 23904 | if (isNullConstant(Y) && |
| 23905 | (isAllOnesConstant(Op1) == (CondCode == X86::COND_NE))) { |
| 23906 | SDValue Zero = DAG.getConstant(0, DL, CmpOp0.getValueType()); |
| 23907 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, CmpVTs, Zero, CmpOp0); |
| 23908 | Zero = DAG.getConstant(0, DL, Op.getValueType()); |
| 23909 | return DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Neg.getValue(1)); |
| 23910 | } |
| 23911 | |
| 23912 | Cmp = DAG.getNode(X86ISD::SUB, DL, CmpVTs, |
| 23913 | CmpOp0, DAG.getConstant(1, DL, CmpOp0.getValueType())); |
| 23914 | |
| 23915 | SDValue Zero = DAG.getConstant(0, DL, Op.getValueType()); |
| 23916 | SDValue Res = |
| 23917 | DAG.getNode(X86ISD::SBB, DL, VTs, Zero, Zero, Cmp.getValue(1)); |
| 23918 | |
| 23919 | if (isAllOnesConstant(Op1) != (CondCode == X86::COND_E)) |
| 23920 | Res = DAG.getNOT(DL, Res, Res.getValueType()); |
| 23921 | |
| 23922 | return DAG.getNode(ISD::OR, DL, Res.getValueType(), Res, Y); |
| 23923 | } else if (!Subtarget.hasCMov() && CondCode == X86::COND_E && |
| 23924 | Cmp.getOperand(0).getOpcode() == ISD::AND && |
| 23925 | isOneConstant(Cmp.getOperand(0).getOperand(1))) { |
| 23926 | SDValue Src1, Src2; |
| 23927 | |
| 23928 | |
| 23929 | |
| 23930 | auto isOrXorPattern = [&]() { |
| 23931 | if ((Op2.getOpcode() == ISD::XOR || Op2.getOpcode() == ISD::OR) && |
| 23932 | (Op2.getOperand(0) == Op1 || Op2.getOperand(1) == Op1)) { |
| 23933 | Src1 = |
| 23934 | Op2.getOperand(0) == Op1 ? Op2.getOperand(1) : Op2.getOperand(0); |
| 23935 | Src2 = Op1; |
| 23936 | return true; |
| 23937 | } |
| 23938 | return false; |
| 23939 | }; |
| 23940 | |
| 23941 | if (isOrXorPattern()) { |
| 23942 | SDValue Neg; |
| 23943 | unsigned int CmpSz = CmpOp0.getSimpleValueType().getSizeInBits(); |
| 23944 | |
| 23945 | |
| 23946 | if (CmpSz > VT.getSizeInBits()) |
| 23947 | Neg = DAG.getNode(ISD::TRUNCATE, DL, VT, CmpOp0); |
| 23948 | else if (CmpSz < VT.getSizeInBits()) |
| 23949 | Neg = DAG.getNode(ISD::AND, DL, VT, |
| 23950 | DAG.getNode(ISD::ANY_EXTEND, DL, VT, CmpOp0.getOperand(0)), |
| 23951 | DAG.getConstant(1, DL, VT)); |
| 23952 | else |
| 23953 | Neg = CmpOp0; |
| 23954 | SDValue Mask = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
| 23955 | Neg); |
| 23956 | SDValue And = DAG.getNode(ISD::AND, DL, VT, Mask, Src1); |
| 23957 | return DAG.getNode(Op2.getOpcode(), DL, VT, And, Src2); |
| 23958 | } |
| 23959 | } |
| 23960 | } |
| 23961 | |
| 23962 | |
| 23963 | if (Cond.getOpcode() == ISD::AND && |
| 23964 | Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY && |
| 23965 | isOneConstant(Cond.getOperand(1))) |
| 23966 | Cond = Cond.getOperand(0); |
| 23967 | |
| 23968 | |
| 23969 | |
| 23970 | unsigned CondOpcode = Cond.getOpcode(); |
| 23971 | if (CondOpcode == X86ISD::SETCC || |
| 23972 | CondOpcode == X86ISD::SETCC_CARRY) { |
| 23973 | CC = Cond.getOperand(0); |
| 23974 | |
| 23975 | SDValue Cmp = Cond.getOperand(1); |
| 23976 | bool IllegalFPCMov = false; |
| 23977 | if (VT.isFloatingPoint() && !VT.isVector() && |
| 23978 | !isScalarFPTypeInSSEReg(VT) && Subtarget.hasCMov()) |
| 23979 | IllegalFPCMov = !hasFPCMov(cast<ConstantSDNode>(CC)->getSExtValue()); |
| 23980 | |
| 23981 | if ((isX86LogicalCmp(Cmp) && !IllegalFPCMov) || |
| 23982 | Cmp.getOpcode() == X86ISD::BT) { |
| 23983 | Cond = Cmp; |
| 23984 | AddTest = false; |
| 23985 | } |
| 23986 | } else if (CondOpcode == ISD::USUBO || CondOpcode == ISD::SSUBO || |
| 23987 | CondOpcode == ISD::UADDO || CondOpcode == ISD::SADDO || |
| 23988 | CondOpcode == ISD::UMULO || CondOpcode == ISD::SMULO) { |
| 23989 | SDValue Value; |
| 23990 | X86::CondCode X86Cond; |
| 23991 | std::tie(Value, Cond) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); |
| 23992 | |
| 23993 | CC = DAG.getTargetConstant(X86Cond, DL, MVT::i8); |
| 23994 | AddTest = false; |
| 23995 | } |
| 23996 | |
| 23997 | if (AddTest) { |
| 23998 | |
| 23999 | if (isTruncWithZeroHighBitsInput(Cond, DAG)) |
| 24000 | Cond = Cond.getOperand(0); |
| 24001 | |
| 24002 | |
| 24003 | |
| 24004 | if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { |
| 24005 | SDValue BTCC; |
| 24006 | if (SDValue BT = LowerAndToBT(Cond, ISD::SETNE, DL, DAG, BTCC)) { |
| 24007 | CC = BTCC; |
| 24008 | Cond = BT; |
| 24009 | AddTest = false; |
| 24010 | } |
| 24011 | } |
| 24012 | } |
| 24013 | |
| 24014 | if (AddTest) { |
| 24015 | CC = DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8); |
| 24016 | Cond = EmitTest(Cond, X86::COND_NE, DL, DAG, Subtarget); |
| 24017 | } |
| 24018 | |
| 24019 | |
| 24020 | |
| 24021 | |
| 24022 | |
| 24023 | if (Cond.getOpcode() == X86ISD::SUB) { |
| 24024 | unsigned CondCode = cast<ConstantSDNode>(CC)->getZExtValue(); |
| 24025 | |
| 24026 | if ((CondCode == X86::COND_AE || CondCode == X86::COND_B) && |
| 24027 | (isAllOnesConstant(Op1) || isAllOnesConstant(Op2)) && |
| 24028 | (isNullConstant(Op1) || isNullConstant(Op2))) { |
| 24029 | SDValue Res = |
| 24030 | DAG.getNode(X86ISD::SETCC_CARRY, DL, Op.getValueType(), |
| 24031 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), Cond); |
| 24032 | if (isAllOnesConstant(Op1) != (CondCode == X86::COND_B)) |
| 24033 | return DAG.getNOT(DL, Res, Res.getValueType()); |
| 24034 | return Res; |
| 24035 | } |
| 24036 | } |
| 24037 | |
| 24038 | |
| 24039 | |
| 24040 | |
| 24041 | if (Op.getValueType() == MVT::i8 && |
| 24042 | Op1.getOpcode() == ISD::TRUNCATE && Op2.getOpcode() == ISD::TRUNCATE) { |
| 24043 | SDValue T1 = Op1.getOperand(0), T2 = Op2.getOperand(0); |
| 24044 | if (T1.getValueType() == T2.getValueType() && |
| 24045 | |
| 24046 | T1.getOpcode() != ISD::CopyFromReg && T2.getOpcode()!=ISD::CopyFromReg){ |
| 24047 | SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, T1.getValueType(), T2, T1, |
| 24048 | CC, Cond); |
| 24049 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); |
| 24050 | } |
| 24051 | } |
| 24052 | |
| 24053 | |
| 24054 | |
| 24055 | |
| 24056 | |
| 24057 | |
| 24058 | |
| 24059 | if ((Op.getValueType() == MVT::i8 && Subtarget.hasCMov()) || |
| 24060 | (Op.getValueType() == MVT::i16 && !MayFoldLoad(Op1) && |
| 24061 | !MayFoldLoad(Op2))) { |
| 24062 | Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op1); |
| 24063 | Op2 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Op2); |
| 24064 | SDValue Ops[] = { Op2, Op1, CC, Cond }; |
| 24065 | SDValue Cmov = DAG.getNode(X86ISD::CMOV, DL, MVT::i32, Ops); |
| 24066 | return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Cmov); |
| 24067 | } |
| 24068 | |
| 24069 | |
| 24070 | |
| 24071 | SDValue Ops[] = { Op2, Op1, CC, Cond }; |
| 24072 | return DAG.getNode(X86ISD::CMOV, DL, Op.getValueType(), Ops); |
| 24073 | } |
| 24074 | |
| 24075 | static SDValue LowerSIGN_EXTEND_Mask(SDValue Op, |
| 24076 | const X86Subtarget &Subtarget, |
| 24077 | SelectionDAG &DAG) { |
| 24078 | MVT VT = Op->getSimpleValueType(0); |
| 24079 | SDValue In = Op->getOperand(0); |
| 24080 | MVT InVT = In.getSimpleValueType(); |
| 24081 | assert(InVT.getVectorElementType() == MVT::i1 && "Unexpected input type!"); |
| 24082 | MVT VTElt = VT.getVectorElementType(); |
| 24083 | SDLoc dl(Op); |
| 24084 | |
| 24085 | unsigned NumElts = VT.getVectorNumElements(); |
| 24086 | |
| 24087 | |
| 24088 | MVT ExtVT = VT; |
| 24089 | if (!Subtarget.hasBWI() && VTElt.getSizeInBits() <= 16) { |
| 24090 | |
| 24091 | if (NumElts == 16 && !Subtarget.canExtendTo512DQ()) |
| 24092 | return SplitAndExtendv16i1(Op.getOpcode(), VT, In, dl, DAG); |
| 24093 | |
| 24094 | ExtVT = MVT::getVectorVT(MVT::i32, NumElts); |
| 24095 | } |
| 24096 | |
| 24097 | |
| 24098 | MVT WideVT = ExtVT; |
| 24099 | if (!ExtVT.is512BitVector() && !Subtarget.hasVLX()) { |
| 24100 | NumElts *= 512 / ExtVT.getSizeInBits(); |
| 24101 | InVT = MVT::getVectorVT(MVT::i1, NumElts); |
| 24102 | In = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, InVT, DAG.getUNDEF(InVT), |
| 24103 | In, DAG.getIntPtrConstant(0, dl)); |
| 24104 | WideVT = MVT::getVectorVT(ExtVT.getVectorElementType(), NumElts); |
| 24105 | } |
| 24106 | |
| 24107 | SDValue V; |
| 24108 | MVT WideEltVT = WideVT.getVectorElementType(); |
| 24109 | if ((Subtarget.hasDQI() && WideEltVT.getSizeInBits() >= 32) || |
| 24110 | (Subtarget.hasBWI() && WideEltVT.getSizeInBits() <= 16)) { |
| 24111 | V = DAG.getNode(Op.getOpcode(), dl, WideVT, In); |
| 24112 | } else { |
| 24113 | SDValue NegOne = DAG.getConstant(-1, dl, WideVT); |
| 24114 | SDValue Zero = DAG.getConstant(0, dl, WideVT); |
| 24115 | V = DAG.getSelect(dl, WideVT, In, NegOne, Zero); |
| 24116 | } |
| 24117 | |
| 24118 | |
| 24119 | if (VT != ExtVT) { |
| 24120 | WideVT = MVT::getVectorVT(VTElt, NumElts); |
| 24121 | V = DAG.getNode(ISD::TRUNCATE, dl, WideVT, V); |
| 24122 | } |
| 24123 | |
| 24124 | |
| 24125 | if (WideVT != VT) |
| 24126 | V = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, V, |
| 24127 | DAG.getIntPtrConstant(0, dl)); |
| 24128 | |
| 24129 | return V; |
| 24130 | } |
| 24131 | |
| 24132 | static SDValue LowerANY_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
| 24133 | SelectionDAG &DAG) { |
| 24134 | SDValue In = Op->getOperand(0); |
| 24135 | MVT InVT = In.getSimpleValueType(); |
| 24136 | |
| 24137 | if (InVT.getVectorElementType() == MVT::i1) |
| 24138 | return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); |
| 24139 | |
| 24140 | assert(Subtarget.hasAVX() && "Expected AVX support"); |
| 24141 | return LowerAVXExtend(Op, DAG, Subtarget); |
| 24142 | } |
| 24143 | |
| 24144 | |
| 24145 | |
| 24146 | |
| 24147 | |
| 24148 | static SDValue LowerEXTEND_VECTOR_INREG(SDValue Op, |
| 24149 | const X86Subtarget &Subtarget, |
| 24150 | SelectionDAG &DAG) { |
| 24151 | SDValue In = Op->getOperand(0); |
| 24152 | MVT VT = Op->getSimpleValueType(0); |
| 24153 | MVT InVT = In.getSimpleValueType(); |
| 24154 | |
| 24155 | MVT SVT = VT.getVectorElementType(); |
| 24156 | MVT InSVT = InVT.getVectorElementType(); |
| 24157 | assert(SVT.getFixedSizeInBits() > InSVT.getFixedSizeInBits()); |
| 24158 | |
| 24159 | if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16) |
| 24160 | return SDValue(); |
| 24161 | if (InSVT != MVT::i32 && InSVT != MVT::i16 && InSVT != MVT::i8) |
| 24162 | return SDValue(); |
| 24163 | if (!(VT.is128BitVector() && Subtarget.hasSSE2()) && |
| 24164 | !(VT.is256BitVector() && Subtarget.hasAVX()) && |
| 24165 | !(VT.is512BitVector() && Subtarget.hasAVX512())) |
| 24166 | return SDValue(); |
| 24167 | |
| 24168 | SDLoc dl(Op); |
| 24169 | unsigned Opc = Op.getOpcode(); |
| 24170 | unsigned NumElts = VT.getVectorNumElements(); |
| 24171 | |
| 24172 | |
| 24173 | |
| 24174 | if (InVT.getSizeInBits() > 128) { |
| 24175 | |
| 24176 | |
| 24177 | int InSize = InSVT.getSizeInBits() * NumElts; |
| 24178 | In = extractSubVector(In, 0, DAG, dl, std::max(InSize, 128)); |
| 24179 | InVT = In.getSimpleValueType(); |
| 24180 | } |
| 24181 | |
| 24182 | |
| 24183 | |
| 24184 | |
| 24185 | if (Subtarget.hasInt256()) { |
| 24186 | assert(VT.getSizeInBits() > 128 && "Unexpected 128-bit vector extension"); |
| 24187 | |
| 24188 | if (InVT.getVectorNumElements() != NumElts) |
| 24189 | return DAG.getNode(Op.getOpcode(), dl, VT, In); |
| 24190 | |
| 24191 | |
| 24192 | |
| 24193 | unsigned ExtOpc = |
| 24194 | Opc == ISD::SIGN_EXTEND_VECTOR_INREG ? ISD::SIGN_EXTEND |
| 24195 | : ISD::ZERO_EXTEND; |
| 24196 | return DAG.getNode(ExtOpc, dl, VT, In); |
| 24197 | } |
| 24198 | |
| 24199 | |
| 24200 | if (Subtarget.hasAVX()) { |
| 24201 | assert(VT.is256BitVector() && "256-bit vector expected"); |
| 24202 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
| 24203 | int HalfNumElts = HalfVT.getVectorNumElements(); |
| 24204 | |
| 24205 | unsigned NumSrcElts = InVT.getVectorNumElements(); |
| 24206 | SmallVector<int, 16> HiMask(NumSrcElts, SM_SentinelUndef); |
| 24207 | for (int i = 0; i != HalfNumElts; ++i) |
| 24208 | HiMask[i] = HalfNumElts + i; |
| 24209 | |
| 24210 | SDValue Lo = DAG.getNode(Opc, dl, HalfVT, In); |
| 24211 | SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, DAG.getUNDEF(InVT), HiMask); |
| 24212 | Hi = DAG.getNode(Opc, dl, HalfVT, Hi); |
| 24213 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 24214 | } |
| 24215 | |
| 24216 | |
| 24217 | assert(Opc == ISD::SIGN_EXTEND_VECTOR_INREG && "Unexpected opcode!"); |
| 24218 | assert(VT.is128BitVector() && InVT.is128BitVector() && "Unexpected VTs"); |
| 24219 | |
| 24220 | |
| 24221 | SDValue Curr = In; |
| 24222 | SDValue SignExt = Curr; |
| 24223 | |
| 24224 | |
| 24225 | |
| 24226 | if (InVT != MVT::v4i32) { |
| 24227 | MVT DestVT = VT == MVT::v2i64 ? MVT::v4i32 : VT; |
| 24228 | |
| 24229 | unsigned DestWidth = DestVT.getScalarSizeInBits(); |
| 24230 | unsigned Scale = DestWidth / InSVT.getSizeInBits(); |
| 24231 | |
| 24232 | unsigned InNumElts = InVT.getVectorNumElements(); |
| 24233 | unsigned DestElts = DestVT.getVectorNumElements(); |
| 24234 | |
| 24235 | |
| 24236 | |
| 24237 | SmallVector<int, 16> Mask(InNumElts, SM_SentinelUndef); |
| 24238 | for (unsigned i = 0; i != DestElts; ++i) |
| 24239 | Mask[i * Scale + (Scale - 1)] = i; |
| 24240 | |
| 24241 | Curr = DAG.getVectorShuffle(InVT, dl, In, In, Mask); |
| 24242 | Curr = DAG.getBitcast(DestVT, Curr); |
| 24243 | |
| 24244 | unsigned SignExtShift = DestWidth - InSVT.getSizeInBits(); |
| 24245 | SignExt = DAG.getNode(X86ISD::VSRAI, dl, DestVT, Curr, |
| 24246 | DAG.getTargetConstant(SignExtShift, dl, MVT::i8)); |
| 24247 | } |
| 24248 | |
| 24249 | if (VT == MVT::v2i64) { |
| 24250 | assert(Curr.getValueType() == MVT::v4i32 && "Unexpected input VT"); |
| 24251 | SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); |
| 24252 | SDValue Sign = DAG.getSetCC(dl, MVT::v4i32, Zero, Curr, ISD::SETGT); |
| 24253 | SignExt = DAG.getVectorShuffle(MVT::v4i32, dl, SignExt, Sign, {0, 4, 1, 5}); |
| 24254 | SignExt = DAG.getBitcast(VT, SignExt); |
| 24255 | } |
| 24256 | |
| 24257 | return SignExt; |
| 24258 | } |
| 24259 | |
| 24260 | static SDValue LowerSIGN_EXTEND(SDValue Op, const X86Subtarget &Subtarget, |
| 24261 | SelectionDAG &DAG) { |
| 24262 | MVT VT = Op->getSimpleValueType(0); |
| 24263 | SDValue In = Op->getOperand(0); |
| 24264 | MVT InVT = In.getSimpleValueType(); |
| 24265 | SDLoc dl(Op); |
| 24266 | |
| 24267 | if (InVT.getVectorElementType() == MVT::i1) |
| 24268 | return LowerSIGN_EXTEND_Mask(Op, Subtarget, DAG); |
| 24269 | |
| 24270 | assert(VT.isVector() && InVT.isVector() && "Expected vector type"); |
| 24271 | assert(VT.getVectorNumElements() == InVT.getVectorNumElements() && |
| 24272 | "Expected same number of elements"); |
| 24273 | assert((VT.getVectorElementType() == MVT::i16 || |
| 24274 | VT.getVectorElementType() == MVT::i32 || |
| 24275 | VT.getVectorElementType() == MVT::i64) && |
| 24276 | "Unexpected element type"); |
| 24277 | assert((InVT.getVectorElementType() == MVT::i8 || |
| 24278 | InVT.getVectorElementType() == MVT::i16 || |
| 24279 | InVT.getVectorElementType() == MVT::i32) && |
| 24280 | "Unexpected element type"); |
| 24281 | |
| 24282 | if (VT == MVT::v32i16 && !Subtarget.hasBWI()) { |
| 24283 | assert(InVT == MVT::v32i8 && "Unexpected VT!"); |
| 24284 | return splitVectorIntUnary(Op, DAG); |
| 24285 | } |
| 24286 | |
| 24287 | if (Subtarget.hasInt256()) |
| 24288 | return Op; |
| 24289 | |
| 24290 | |
| 24291 | |
| 24292 | |
| 24293 | |
| 24294 | |
| 24295 | |
| 24296 | |
| 24297 | |
| 24298 | MVT HalfVT = VT.getHalfNumVectorElementsVT(); |
| 24299 | SDValue OpLo = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, In); |
| 24300 | |
| 24301 | unsigned NumElems = InVT.getVectorNumElements(); |
| 24302 | SmallVector<int,8> ShufMask(NumElems, -1); |
| 24303 | for (unsigned i = 0; i != NumElems/2; ++i) |
| 24304 | ShufMask[i] = i + NumElems/2; |
| 24305 | |
| 24306 | SDValue OpHi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); |
| 24307 | OpHi = DAG.getNode(ISD::SIGN_EXTEND_VECTOR_INREG, dl, HalfVT, OpHi); |
| 24308 | |
| 24309 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, OpLo, OpHi); |
| 24310 | } |
| 24311 | |
| 24312 | |
| 24313 | static SDValue splitVectorStore(StoreSDNode *Store, SelectionDAG &DAG) { |
| 24314 | SDValue StoredVal = Store->getValue(); |
| 24315 | assert((StoredVal.getValueType().is256BitVector() || |
| 24316 | StoredVal.getValueType().is512BitVector()) && |
| 24317 | "Expecting 256/512-bit op"); |
| 24318 | |
| 24319 | |
| 24320 | |
| 24321 | |
| 24322 | |
| 24323 | |
| 24324 | if (!Store->isSimple()) |
| 24325 | return SDValue(); |
| 24326 | |
| 24327 | SDLoc DL(Store); |
| 24328 | SDValue Value0, Value1; |
| 24329 | std::tie(Value0, Value1) = splitVector(StoredVal, DAG, DL); |
| 24330 | unsigned HalfOffset = Value0.getValueType().getStoreSize(); |
| 24331 | SDValue Ptr0 = Store->getBasePtr(); |
| 24332 | SDValue Ptr1 = |
| 24333 | DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(HalfOffset), DL); |
| 24334 | SDValue Ch0 = |
| 24335 | DAG.getStore(Store->getChain(), DL, Value0, Ptr0, Store->getPointerInfo(), |
| 24336 | Store->getOriginalAlign(), |
| 24337 | Store->getMemOperand()->getFlags()); |
| 24338 | SDValue Ch1 = DAG.getStore(Store->getChain(), DL, Value1, Ptr1, |
| 24339 | Store->getPointerInfo().getWithOffset(HalfOffset), |
| 24340 | Store->getOriginalAlign(), |
| 24341 | Store->getMemOperand()->getFlags()); |
| 24342 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Ch0, Ch1); |
| 24343 | } |
| 24344 | |
| 24345 | |
| 24346 | |
| 24347 | static SDValue scalarizeVectorStore(StoreSDNode *Store, MVT StoreVT, |
| 24348 | SelectionDAG &DAG) { |
| 24349 | SDValue StoredVal = Store->getValue(); |
| 24350 | assert(StoreVT.is128BitVector() && |
| 24351 | StoredVal.getValueType().is128BitVector() && "Expecting 128-bit op"); |
| 24352 | StoredVal = DAG.getBitcast(StoreVT, StoredVal); |
| 24353 | |
| 24354 | |
| 24355 | |
| 24356 | |
| 24357 | if (!Store->isSimple()) |
| 24358 | return SDValue(); |
| 24359 | |
| 24360 | MVT StoreSVT = StoreVT.getScalarType(); |
| 24361 | unsigned NumElems = StoreVT.getVectorNumElements(); |
| 24362 | unsigned ScalarSize = StoreSVT.getStoreSize(); |
| 24363 | |
| 24364 | SDLoc DL(Store); |
| 24365 | SmallVector<SDValue, 4> Stores; |
| 24366 | for (unsigned i = 0; i != NumElems; ++i) { |
| 24367 | unsigned Offset = i * ScalarSize; |
| 24368 | SDValue Ptr = DAG.getMemBasePlusOffset(Store->getBasePtr(), |
| 24369 | TypeSize::Fixed(Offset), DL); |
| 24370 | SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, StoreSVT, StoredVal, |
| 24371 | DAG.getIntPtrConstant(i, DL)); |
| 24372 | SDValue Ch = DAG.getStore(Store->getChain(), DL, Scl, Ptr, |
| 24373 | Store->getPointerInfo().getWithOffset(Offset), |
| 24374 | Store->getOriginalAlign(), |
| 24375 | Store->getMemOperand()->getFlags()); |
| 24376 | Stores.push_back(Ch); |
| 24377 | } |
| 24378 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores); |
| 24379 | } |
| 24380 | |
| 24381 | static SDValue LowerStore(SDValue Op, const X86Subtarget &Subtarget, |
| 24382 | SelectionDAG &DAG) { |
| 24383 | StoreSDNode *St = cast<StoreSDNode>(Op.getNode()); |
| 24384 | SDLoc dl(St); |
| 24385 | SDValue StoredVal = St->getValue(); |
| 24386 | |
| 24387 | |
| 24388 | if (StoredVal.getValueType().isVector() && |
| 24389 | StoredVal.getValueType().getVectorElementType() == MVT::i1) { |
| 24390 | unsigned NumElts = StoredVal.getValueType().getVectorNumElements(); |
| 24391 | assert(NumElts <= 8 && "Unexpected VT"); |
| 24392 | assert(!St->isTruncatingStore() && "Expected non-truncating store"); |
| 24393 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && |
| 24394 | "Expected AVX512F without AVX512DQI"); |
| 24395 | |
| 24396 | |
| 24397 | StoredVal = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, |
| 24398 | DAG.getUNDEF(MVT::v16i1), StoredVal, |
| 24399 | DAG.getIntPtrConstant(0, dl)); |
| 24400 | StoredVal = DAG.getBitcast(MVT::i16, StoredVal); |
| 24401 | StoredVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, StoredVal); |
| 24402 | |
| 24403 | if (NumElts < 8) |
| 24404 | StoredVal = DAG.getZeroExtendInReg( |
| 24405 | StoredVal, dl, EVT::getIntegerVT(*DAG.getContext(), NumElts)); |
| 24406 | |
| 24407 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
| 24408 | St->getPointerInfo(), St->getOriginalAlign(), |
| 24409 | St->getMemOperand()->getFlags()); |
| 24410 | } |
| 24411 | |
| 24412 | if (St->isTruncatingStore()) |
| 24413 | return SDValue(); |
| 24414 | |
| 24415 | |
| 24416 | |
| 24417 | |
| 24418 | |
| 24419 | MVT StoreVT = StoredVal.getSimpleValueType(); |
| 24420 | if (StoreVT.is256BitVector() || |
| 24421 | ((StoreVT == MVT::v32i16 || StoreVT == MVT::v64i8) && |
| 24422 | !Subtarget.hasBWI())) { |
| 24423 | SmallVector<SDValue, 4> CatOps; |
| 24424 | if (StoredVal.hasOneUse() && collectConcatOps(StoredVal.getNode(), CatOps)) |
| 24425 | return splitVectorStore(St, DAG); |
| 24426 | return SDValue(); |
| 24427 | } |
| 24428 | |
| 24429 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 24430 | assert(StoreVT.isVector() && StoreVT.getSizeInBits() == 64 && |
| 24431 | "Unexpected VT"); |
| 24432 | assert(TLI.getTypeAction(*DAG.getContext(), StoreVT) == |
| 24433 | TargetLowering::TypeWidenVector && "Unexpected type action!"); |
| 24434 | |
| 24435 | EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), StoreVT); |
| 24436 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, StoredVal, |
| 24437 | DAG.getUNDEF(StoreVT)); |
| 24438 | |
| 24439 | if (Subtarget.hasSSE2()) { |
| 24440 | |
| 24441 | |
| 24442 | MVT StVT = Subtarget.is64Bit() && StoreVT.isInteger() ? MVT::i64 : MVT::f64; |
| 24443 | MVT CastVT = MVT::getVectorVT(StVT, 2); |
| 24444 | StoredVal = DAG.getBitcast(CastVT, StoredVal); |
| 24445 | StoredVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, StVT, StoredVal, |
| 24446 | DAG.getIntPtrConstant(0, dl)); |
| 24447 | |
| 24448 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
| 24449 | St->getPointerInfo(), St->getOriginalAlign(), |
| 24450 | St->getMemOperand()->getFlags()); |
| 24451 | } |
| 24452 | assert(Subtarget.hasSSE1() && "Expected SSE"); |
| 24453 | SDVTList Tys = DAG.getVTList(MVT::Other); |
| 24454 | SDValue Ops[] = {St->getChain(), StoredVal, St->getBasePtr()}; |
| 24455 | return DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, MVT::i64, |
| 24456 | St->getMemOperand()); |
| 24457 | } |
| 24458 | |
| 24459 | |
| 24460 | |
| 24461 | |
| 24462 | |
| 24463 | |
| 24464 | |
| 24465 | |
| 24466 | static SDValue LowerLoad(SDValue Op, const X86Subtarget &Subtarget, |
| 24467 | SelectionDAG &DAG) { |
| 24468 | MVT RegVT = Op.getSimpleValueType(); |
| 24469 | assert(RegVT.isVector() && "We only custom lower vector loads."); |
| 24470 | assert(RegVT.isInteger() && |
| 24471 | "We only custom lower integer vector loads."); |
| 24472 | |
| 24473 | LoadSDNode *Ld = cast<LoadSDNode>(Op.getNode()); |
| 24474 | SDLoc dl(Ld); |
| 24475 | |
| 24476 | |
| 24477 | if (RegVT.getVectorElementType() == MVT::i1) { |
| 24478 | assert(EVT(RegVT) == Ld->getMemoryVT() && "Expected non-extending load"); |
| 24479 | assert(RegVT.getVectorNumElements() <= 8 && "Unexpected VT"); |
| 24480 | assert(Subtarget.hasAVX512() && !Subtarget.hasDQI() && |
| 24481 | "Expected AVX512F without AVX512DQI"); |
| 24482 | |
| 24483 | SDValue NewLd = DAG.getLoad(MVT::i8, dl, Ld->getChain(), Ld->getBasePtr(), |
| 24484 | Ld->getPointerInfo(), Ld->getOriginalAlign(), |
| 24485 | Ld->getMemOperand()->getFlags()); |
| 24486 | |
| 24487 | |
| 24488 | assert(NewLd->getNumValues() == 2 && "Loads must carry a chain!"); |
| 24489 | |
| 24490 | SDValue Val = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i16, NewLd); |
| 24491 | Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, RegVT, |
| 24492 | DAG.getBitcast(MVT::v16i1, Val), |
| 24493 | DAG.getIntPtrConstant(0, dl)); |
| 24494 | return DAG.getMergeValues({Val, NewLd.getValue(1)}, dl); |
| 24495 | } |
| 24496 | |
| 24497 | return SDValue(); |
| 24498 | } |
| 24499 | |
| 24500 | |
| 24501 | |
| 24502 | static bool isAndOrOfSetCCs(SDValue Op, unsigned &Opc) { |
| 24503 | Opc = Op.getOpcode(); |
| 24504 | if (Opc != ISD::OR && Opc != ISD::AND) |
| 24505 | return false; |
| 24506 | return (Op.getOperand(0).getOpcode() == X86ISD::SETCC && |
| 24507 | Op.getOperand(0).hasOneUse() && |
| 24508 | Op.getOperand(1).getOpcode() == X86ISD::SETCC && |
| 24509 | Op.getOperand(1).hasOneUse()); |
| 24510 | } |
| 24511 | |
| 24512 | SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { |
| 24513 | SDValue Chain = Op.getOperand(0); |
| 24514 | SDValue Cond = Op.getOperand(1); |
| 24515 | SDValue Dest = Op.getOperand(2); |
| 24516 | SDLoc dl(Op); |
| 24517 | |
| 24518 | if (Cond.getOpcode() == ISD::SETCC && |
| 24519 | Cond.getOperand(0).getValueType() != MVT::f128) { |
| 24520 | SDValue LHS = Cond.getOperand(0); |
| 24521 | SDValue RHS = Cond.getOperand(1); |
| 24522 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
| 24523 | |
| 24524 | |
| 24525 | |
| 24526 | |
| 24527 | if (ISD::isOverflowIntrOpRes(LHS) && |
| 24528 | (CC == ISD::SETEQ || CC == ISD::SETNE) && |
| 24529 | (isNullConstant(RHS) || isOneConstant(RHS))) { |
| 24530 | SDValue Value, Overflow; |
| 24531 | X86::CondCode X86Cond; |
| 24532 | std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, LHS.getValue(0), DAG); |
| 24533 | |
| 24534 | if ((CC == ISD::SETEQ) == isNullConstant(RHS)) |
| 24535 | X86Cond = X86::GetOppositeBranchCondition(X86Cond); |
| 24536 | |
| 24537 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
| 24538 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24539 | Overflow); |
| 24540 | } |
| 24541 | |
| 24542 | if (LHS.getSimpleValueType().isInteger()) { |
| 24543 | SDValue CCVal; |
| 24544 | SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, CC, SDLoc(Cond), DAG, CCVal); |
| 24545 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24546 | EFLAGS); |
| 24547 | } |
| 24548 | |
| 24549 | if (CC == ISD::SETOEQ) { |
| 24550 | |
| 24551 | |
| 24552 | |
| 24553 | |
| 24554 | |
| 24555 | if (Op.getNode()->hasOneUse()) { |
| 24556 | SDNode *User = *Op.getNode()->use_begin(); |
| 24557 | |
| 24558 | |
| 24559 | |
| 24560 | if (User->getOpcode() == ISD::BR) { |
| 24561 | SDValue FalseBB = User->getOperand(1); |
| 24562 | SDNode *NewBR = |
| 24563 | DAG.UpdateNodeOperands(User, User->getOperand(0), Dest); |
| 24564 | assert(NewBR == User); |
| 24565 | (void)NewBR; |
| 24566 | Dest = FalseBB; |
| 24567 | |
| 24568 | SDValue Cmp = |
| 24569 | DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
| 24570 | SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); |
| 24571 | Chain = DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, |
| 24572 | CCVal, Cmp); |
| 24573 | CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); |
| 24574 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24575 | Cmp); |
| 24576 | } |
| 24577 | } |
| 24578 | } else if (CC == ISD::SETUNE) { |
| 24579 | |
| 24580 | |
| 24581 | |
| 24582 | SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
| 24583 | SDValue CCVal = DAG.getTargetConstant(X86::COND_NE, dl, MVT::i8); |
| 24584 | Chain = |
| 24585 | DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, Cmp); |
| 24586 | CCVal = DAG.getTargetConstant(X86::COND_P, dl, MVT::i8); |
| 24587 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24588 | Cmp); |
| 24589 | } else { |
| 24590 | X86::CondCode X86Cond = |
| 24591 | TranslateX86CC(CC, dl, true, LHS, RHS, DAG); |
| 24592 | SDValue Cmp = DAG.getNode(X86ISD::FCMP, SDLoc(Cond), MVT::i32, LHS, RHS); |
| 24593 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
| 24594 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24595 | Cmp); |
| 24596 | } |
| 24597 | } |
| 24598 | |
| 24599 | if (ISD::isOverflowIntrOpRes(Cond)) { |
| 24600 | SDValue Value, Overflow; |
| 24601 | X86::CondCode X86Cond; |
| 24602 | std::tie(Value, Overflow) = getX86XALUOOp(X86Cond, Cond.getValue(0), DAG); |
| 24603 | |
| 24604 | SDValue CCVal = DAG.getTargetConstant(X86Cond, dl, MVT::i8); |
| 24605 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24606 | Overflow); |
| 24607 | } |
| 24608 | |
| 24609 | |
| 24610 | if (isTruncWithZeroHighBitsInput(Cond, DAG)) |
| 24611 | Cond = Cond.getOperand(0); |
| 24612 | |
| 24613 | EVT CondVT = Cond.getValueType(); |
| 24614 | |
| 24615 | |
| 24616 | if (!(Cond.getOpcode() == ISD::AND && isOneConstant(Cond.getOperand(1)))) |
| 24617 | Cond = |
| 24618 | DAG.getNode(ISD::AND, dl, CondVT, Cond, DAG.getConstant(1, dl, CondVT)); |
| 24619 | |
| 24620 | SDValue LHS = Cond; |
| 24621 | SDValue RHS = DAG.getConstant(0, dl, CondVT); |
| 24622 | |
| 24623 | SDValue CCVal; |
| 24624 | SDValue EFLAGS = emitFlagsForSetcc(LHS, RHS, ISD::SETNE, dl, DAG, CCVal); |
| 24625 | return DAG.getNode(X86ISD::BRCOND, dl, MVT::Other, Chain, Dest, CCVal, |
| 24626 | EFLAGS); |
| 24627 | } |
| 24628 | |
| 24629 | |
| 24630 | |
| 24631 | |
| 24632 | |
| 24633 | |
| 24634 | SDValue |
| 24635 | X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, |
| 24636 | SelectionDAG &DAG) const { |
| 24637 | MachineFunction &MF = DAG.getMachineFunction(); |
| 24638 | bool SplitStack = MF.shouldSplitStack(); |
| 24639 | bool EmitStackProbeCall = hasStackProbeSymbol(MF); |
| 24640 | bool Lower = (Subtarget.isOSWindows() && !Subtarget.isTargetMachO()) || |
| 24641 | SplitStack || EmitStackProbeCall; |
| 24642 | SDLoc dl(Op); |
| 24643 | |
| 24644 | |
| 24645 | SDNode *Node = Op.getNode(); |
| 24646 | SDValue Chain = Op.getOperand(0); |
| 24647 | SDValue Size = Op.getOperand(1); |
| 24648 | MaybeAlign Alignment(Op.getConstantOperandVal(2)); |
| 24649 | EVT VT = Node->getValueType(0); |
| 24650 | |
| 24651 | |
| 24652 | |
| 24653 | Chain = DAG.getCALLSEQ_START(Chain, 0, 0, dl); |
| 24654 | |
| 24655 | bool Is64Bit = Subtarget.is64Bit(); |
| 24656 | MVT SPTy = getPointerTy(DAG.getDataLayout()); |
| 24657 | |
| 24658 | SDValue Result; |
| 24659 | if (!Lower) { |
| 24660 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 24661 | Register SPReg = TLI.getStackPointerRegisterToSaveRestore(); |
| 24662 | assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" |
| 24663 | " not tell us which reg is the stack pointer!"); |
| 24664 | |
| 24665 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
| 24666 | const Align StackAlign = TFI.getStackAlign(); |
| 24667 | if (hasInlineStackProbe(MF)) { |
| 24668 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 24669 | |
| 24670 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
| 24671 | Register Vreg = MRI.createVirtualRegister(AddrRegClass); |
| 24672 | Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); |
| 24673 | Result = DAG.getNode(X86ISD::PROBED_ALLOCA, dl, SPTy, Chain, |
| 24674 | DAG.getRegister(Vreg, SPTy)); |
| 24675 | } else { |
| 24676 | SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); |
| 24677 | Chain = SP.getValue(1); |
| 24678 | Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); |
| 24679 | } |
| 24680 | if (Alignment && *Alignment > StackAlign) |
| 24681 | Result = |
| 24682 | DAG.getNode(ISD::AND, dl, VT, Result, |
| 24683 | DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); |
| 24684 | Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); |
| 24685 | } else if (SplitStack) { |
| 24686 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
| 24687 | |
| 24688 | if (Is64Bit) { |
| 24689 | |
| 24690 | |
| 24691 | const Function &F = MF.getFunction(); |
| 24692 | for (const auto &A : F.args()) { |
| 24693 | if (A.hasNestAttr()) |
| 24694 | report_fatal_error("Cannot use segmented stacks with functions that " |
| 24695 | "have nested arguments."); |
| 24696 | } |
| 24697 | } |
| 24698 | |
| 24699 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
| 24700 | Register Vreg = MRI.createVirtualRegister(AddrRegClass); |
| 24701 | Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); |
| 24702 | Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, |
| 24703 | DAG.getRegister(Vreg, SPTy)); |
| 24704 | } else { |
| 24705 | SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 24706 | Chain = DAG.getNode(X86ISD::WIN_ALLOCA, dl, NodeTys, Chain, Size); |
| 24707 | MF.getInfo<X86MachineFunctionInfo>()->setHasWinAlloca(true); |
| 24708 | |
| 24709 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 24710 | Register SPReg = RegInfo->getStackRegister(); |
| 24711 | SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, SPTy); |
| 24712 | Chain = SP.getValue(1); |
| 24713 | |
| 24714 | if (Alignment) { |
| 24715 | SP = DAG.getNode(ISD::AND, dl, VT, SP.getValue(0), |
| 24716 | DAG.getConstant(~(Alignment->value() - 1ULL), dl, VT)); |
| 24717 | Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); |
| 24718 | } |
| 24719 | |
| 24720 | Result = SP; |
| 24721 | } |
| 24722 | |
| 24723 | Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), |
| 24724 | DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); |
| 24725 | |
| 24726 | SDValue Ops[2] = {Result, Chain}; |
| 24727 | return DAG.getMergeValues(Ops, dl); |
| 24728 | } |
| 24729 | |
| 24730 | SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { |
| 24731 | MachineFunction &MF = DAG.getMachineFunction(); |
| 24732 | auto PtrVT = getPointerTy(MF.getDataLayout()); |
| 24733 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
| 24734 | |
| 24735 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
| 24736 | SDLoc DL(Op); |
| 24737 | |
| 24738 | if (!Subtarget.is64Bit() || |
| 24739 | Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) { |
| 24740 | |
| 24741 | |
| 24742 | SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
| 24743 | return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1), |
| 24744 | MachinePointerInfo(SV)); |
| 24745 | } |
| 24746 | |
| 24747 | |
| 24748 | |
| 24749 | |
| 24750 | |
| 24751 | |
| 24752 | SmallVector<SDValue, 8> MemOps; |
| 24753 | SDValue FIN = Op.getOperand(1); |
| 24754 | |
| 24755 | SDValue Store = DAG.getStore( |
| 24756 | Op.getOperand(0), DL, |
| 24757 | DAG.getConstant(FuncInfo->getVarArgsGPOffset(), DL, MVT::i32), FIN, |
| 24758 | MachinePointerInfo(SV)); |
| 24759 | MemOps.push_back(Store); |
| 24760 | |
| 24761 | |
| 24762 | FIN = DAG.getMemBasePlusOffset(FIN, TypeSize::Fixed(4), DL); |
| 24763 | Store = DAG.getStore( |
| 24764 | Op.getOperand(0), DL, |
| 24765 | DAG.getConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32), FIN, |
| 24766 | MachinePointerInfo(SV, 4)); |
| 24767 | MemOps.push_back(Store); |
| 24768 | |
| 24769 | |
| 24770 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant(4, DL)); |
| 24771 | SDValue OVFIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); |
| 24772 | Store = |
| 24773 | DAG.getStore(Op.getOperand(0), DL, OVFIN, FIN, MachinePointerInfo(SV, 8)); |
| 24774 | MemOps.push_back(Store); |
| 24775 | |
| 24776 | |
| 24777 | FIN = DAG.getNode(ISD::ADD, DL, PtrVT, FIN, DAG.getIntPtrConstant( |
| 24778 | Subtarget.isTarget64BitLP64() ? 8 : 4, DL)); |
| 24779 | SDValue RSFIN = DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(), PtrVT); |
| 24780 | Store = DAG.getStore( |
| 24781 | Op.getOperand(0), DL, RSFIN, FIN, |
| 24782 | MachinePointerInfo(SV, Subtarget.isTarget64BitLP64() ? 16 : 12)); |
| 24783 | MemOps.push_back(Store); |
| 24784 | return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps); |
| 24785 | } |
| 24786 | |
| 24787 | SDValue X86TargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const { |
| 24788 | assert(Subtarget.is64Bit() && |
| 24789 | "LowerVAARG only handles 64-bit va_arg!"); |
| 24790 | assert(Op.getNumOperands() == 4); |
| 24791 | |
| 24792 | MachineFunction &MF = DAG.getMachineFunction(); |
| 24793 | if (Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv())) |
| 24794 | |
| 24795 | return DAG.expandVAArg(Op.getNode()); |
| 24796 | |
| 24797 | SDValue Chain = Op.getOperand(0); |
| 24798 | SDValue SrcPtr = Op.getOperand(1); |
| 24799 | const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue(); |
| 24800 | unsigned Align = Op.getConstantOperandVal(3); |
| 24801 | SDLoc dl(Op); |
| 24802 | |
| 24803 | EVT ArgVT = Op.getNode()->getValueType(0); |
| 24804 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
| 24805 | uint32_t ArgSize = DAG.getDataLayout().getTypeAllocSize(ArgTy); |
| 24806 | uint8_t ArgMode; |
| 24807 | |
| 24808 | |
| 24809 | |
| 24810 | |
| 24811 | assert(ArgVT != MVT::f80 && "va_arg for f80 not yet implemented"); |
| 24812 | if (ArgVT.isFloatingPoint() && ArgSize <= 16 ) { |
| 24813 | ArgMode = 2; |
| 24814 | } else { |
| 24815 | assert(ArgVT.isInteger() && ArgSize <= 32 && |
| 24816 | "Unhandled argument type in LowerVAARG"); |
| 24817 | ArgMode = 1; |
| 24818 | } |
| 24819 | |
| 24820 | if (ArgMode == 2) { |
| 24821 | |
| 24822 | assert(!Subtarget.useSoftFloat() && |
| 24823 | !(MF.getFunction().hasFnAttribute(Attribute::NoImplicitFloat)) && |
| 24824 | Subtarget.hasSSE1()); |
| 24825 | } |
| 24826 | |
| 24827 | |
| 24828 | |
| 24829 | SDValue InstOps[] = {Chain, SrcPtr, |
| 24830 | DAG.getTargetConstant(ArgSize, dl, MVT::i32), |
| 24831 | DAG.getTargetConstant(ArgMode, dl, MVT::i8), |
| 24832 | DAG.getTargetConstant(Align, dl, MVT::i32)}; |
| 24833 | SDVTList VTs = DAG.getVTList(getPointerTy(DAG.getDataLayout()), MVT::Other); |
| 24834 | SDValue VAARG = DAG.getMemIntrinsicNode( |
| 24835 | Subtarget.isTarget64BitLP64() ? X86ISD::VAARG_64 : X86ISD::VAARG_X32, dl, |
| 24836 | VTs, InstOps, MVT::i64, MachinePointerInfo(SV), |
| 24837 | None, |
| 24838 | MachineMemOperand::MOLoad | MachineMemOperand::MOStore); |
| 24839 | Chain = VAARG.getValue(1); |
| 24840 | |
| 24841 | |
| 24842 | return DAG.getLoad(ArgVT, dl, Chain, VAARG, MachinePointerInfo()); |
| 24843 | } |
| 24844 | |
| 24845 | static SDValue LowerVACOPY(SDValue Op, const X86Subtarget &Subtarget, |
| 24846 | SelectionDAG &DAG) { |
| 24847 | |
| 24848 | |
| 24849 | assert(Subtarget.is64Bit() && "This code only handles 64-bit va_copy!"); |
| 24850 | if (Subtarget.isCallingConvWin64( |
| 24851 | DAG.getMachineFunction().getFunction().getCallingConv())) |
| 24852 | |
| 24853 | return DAG.expandVACopy(Op.getNode()); |
| 24854 | |
| 24855 | SDValue Chain = Op.getOperand(0); |
| 24856 | SDValue DstPtr = Op.getOperand(1); |
| 24857 | SDValue SrcPtr = Op.getOperand(2); |
| 24858 | const Value *DstSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue(); |
| 24859 | const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
| 24860 | SDLoc DL(Op); |
| 24861 | |
| 24862 | return DAG.getMemcpy( |
| 24863 | Chain, DL, DstPtr, SrcPtr, |
| 24864 | DAG.getIntPtrConstant(Subtarget.isTarget64BitLP64() ? 24 : 16, DL), |
| 24865 | Align(Subtarget.isTarget64BitLP64() ? 8 : 4), false, false, |
| 24866 | false, MachinePointerInfo(DstSV), MachinePointerInfo(SrcSV)); |
| 24867 | } |
| 24868 | |
| 24869 | |
| 24870 | static unsigned getTargetVShiftUniformOpcode(unsigned Opc, bool IsVariable) { |
| 24871 | switch (Opc) { |
| 24872 | case ISD::SHL: |
| 24873 | case X86ISD::VSHL: |
| 24874 | case X86ISD::VSHLI: |
| 24875 | return IsVariable ? X86ISD::VSHL : X86ISD::VSHLI; |
| 24876 | case ISD::SRL: |
| 24877 | case X86ISD::VSRL: |
| 24878 | case X86ISD::VSRLI: |
| 24879 | return IsVariable ? X86ISD::VSRL : X86ISD::VSRLI; |
| 24880 | case ISD::SRA: |
| 24881 | case X86ISD::VSRA: |
| 24882 | case X86ISD::VSRAI: |
| 24883 | return IsVariable ? X86ISD::VSRA : X86ISD::VSRAI; |
| 24884 | } |
| 24885 | llvm_unreachable("Unknown target vector shift node"); |
| 24886 | } |
| 24887 | |
| 24888 | |
| 24889 | |
| 24890 | static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT, |
| 24891 | SDValue SrcOp, uint64_t ShiftAmt, |
| 24892 | SelectionDAG &DAG) { |
| 24893 | MVT ElementType = VT.getVectorElementType(); |
| 24894 | |
| 24895 | |
| 24896 | |
| 24897 | if (VT != SrcOp.getSimpleValueType()) |
| 24898 | SrcOp = DAG.getBitcast(VT, SrcOp); |
| 24899 | |
| 24900 | |
| 24901 | if (ShiftAmt == 0) |
| 24902 | return SrcOp; |
| 24903 | |
| 24904 | |
| 24905 | if (ShiftAmt >= ElementType.getSizeInBits()) { |
| 24906 | if (Opc == X86ISD::VSRAI) |
| 24907 | ShiftAmt = ElementType.getSizeInBits() - 1; |
| 24908 | else |
| 24909 | return DAG.getConstant(0, dl, VT); |
| 24910 | } |
| 24911 | |
| 24912 | assert((Opc == X86ISD::VSHLI || Opc == X86ISD::VSRLI || Opc == X86ISD::VSRAI) |
| 24913 | && "Unknown target vector shift-by-constant node"); |
| 24914 | |
| 24915 | |
| 24916 | |
| 24917 | if (ISD::isBuildVectorOfConstantSDNodes(SrcOp.getNode())) { |
| 24918 | SmallVector<SDValue, 8> Elts; |
| 24919 | unsigned NumElts = SrcOp->getNumOperands(); |
| 24920 | |
| 24921 | switch (Opc) { |
| 24922 | default: llvm_unreachable("Unknown opcode!"); |
| 24923 | case X86ISD::VSHLI: |
| 24924 | for (unsigned i = 0; i != NumElts; ++i) { |
| 24925 | SDValue CurrentOp = SrcOp->getOperand(i); |
| 24926 | if (CurrentOp->isUndef()) { |
| 24927 | |
| 24928 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
| 24929 | continue; |
| 24930 | } |
| 24931 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
| 24932 | const APInt &C = ND->getAPIntValue(); |
| 24933 | Elts.push_back(DAG.getConstant(C.shl(ShiftAmt), dl, ElementType)); |
| 24934 | } |
| 24935 | break; |
| 24936 | case X86ISD::VSRLI: |
| 24937 | for (unsigned i = 0; i != NumElts; ++i) { |
| 24938 | SDValue CurrentOp = SrcOp->getOperand(i); |
| 24939 | if (CurrentOp->isUndef()) { |
| 24940 | |
| 24941 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
| 24942 | continue; |
| 24943 | } |
| 24944 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
| 24945 | const APInt &C = ND->getAPIntValue(); |
| 24946 | Elts.push_back(DAG.getConstant(C.lshr(ShiftAmt), dl, ElementType)); |
| 24947 | } |
| 24948 | break; |
| 24949 | case X86ISD::VSRAI: |
| 24950 | for (unsigned i = 0; i != NumElts; ++i) { |
| 24951 | SDValue CurrentOp = SrcOp->getOperand(i); |
| 24952 | if (CurrentOp->isUndef()) { |
| 24953 | |
| 24954 | Elts.push_back(DAG.getConstant(0, dl, ElementType)); |
| 24955 | continue; |
| 24956 | } |
| 24957 | auto *ND = cast<ConstantSDNode>(CurrentOp); |
| 24958 | const APInt &C = ND->getAPIntValue(); |
| 24959 | Elts.push_back(DAG.getConstant(C.ashr(ShiftAmt), dl, ElementType)); |
| 24960 | } |
| 24961 | break; |
| 24962 | } |
| 24963 | |
| 24964 | return DAG.getBuildVector(VT, dl, Elts); |
| 24965 | } |
| 24966 | |
| 24967 | return DAG.getNode(Opc, dl, VT, SrcOp, |
| 24968 | DAG.getTargetConstant(ShiftAmt, dl, MVT::i8)); |
| 24969 | } |
| 24970 | |
| 24971 | |
| 24972 | |
| 24973 | static SDValue getTargetVShiftNode(unsigned Opc, const SDLoc &dl, MVT VT, |
| 24974 | SDValue SrcOp, SDValue ShAmt, |
| 24975 | const X86Subtarget &Subtarget, |
| 24976 | SelectionDAG &DAG) { |
| 24977 | MVT SVT = ShAmt.getSimpleValueType(); |
| 24978 | assert((SVT == MVT::i32 || SVT == MVT::i64) && "Unexpected value type!"); |
| 24979 | |
| 24980 | |
| 24981 | if (ConstantSDNode *CShAmt = dyn_cast<ConstantSDNode>(ShAmt)) |
| 24982 | return getTargetVShiftByConstNode(Opc, dl, VT, SrcOp, |
| 24983 | CShAmt->getZExtValue(), DAG); |
| 24984 | |
| 24985 | |
| 24986 | Opc = getTargetVShiftUniformOpcode(Opc, true); |
| 24987 | |
| 24988 | |
| 24989 | |
| 24990 | |
| 24991 | |
| 24992 | |
| 24993 | |
| 24994 | |
| 24995 | |
| 24996 | |
| 24997 | |
| 24998 | |
| 24999 | |
| 25000 | if (SVT == MVT::i64) |
| 25001 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v2i64, ShAmt); |
| 25002 | else if (ShAmt.getOpcode() == ISD::ZERO_EXTEND && |
| 25003 | ShAmt.getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 25004 | (ShAmt.getOperand(0).getSimpleValueType() == MVT::i16 || |
| 25005 | ShAmt.getOperand(0).getSimpleValueType() == MVT::i8)) { |
| 25006 | ShAmt = ShAmt.getOperand(0); |
| 25007 | MVT AmtTy = ShAmt.getSimpleValueType() == MVT::i8 ? MVT::v16i8 : MVT::v8i16; |
| 25008 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), AmtTy, ShAmt); |
| 25009 | if (Subtarget.hasSSE41()) |
| 25010 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), |
| 25011 | MVT::v2i64, ShAmt); |
| 25012 | else { |
| 25013 | SDValue ByteShift = DAG.getTargetConstant( |
| 25014 | (128 - AmtTy.getScalarSizeInBits()) / 8, SDLoc(ShAmt), MVT::i8); |
| 25015 | ShAmt = DAG.getBitcast(MVT::v16i8, ShAmt); |
| 25016 | ShAmt = DAG.getNode(X86ISD::VSHLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, |
| 25017 | ByteShift); |
| 25018 | ShAmt = DAG.getNode(X86ISD::VSRLDQ, SDLoc(ShAmt), MVT::v16i8, ShAmt, |
| 25019 | ByteShift); |
| 25020 | } |
| 25021 | } else if (Subtarget.hasSSE41() && |
| 25022 | ShAmt.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
| 25023 | ShAmt = DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(ShAmt), MVT::v4i32, ShAmt); |
| 25024 | ShAmt = DAG.getNode(ISD::ZERO_EXTEND_VECTOR_INREG, SDLoc(ShAmt), |
| 25025 | MVT::v2i64, ShAmt); |
| 25026 | } else { |
| 25027 | SDValue ShOps[4] = {ShAmt, DAG.getConstant(0, dl, SVT), DAG.getUNDEF(SVT), |
| 25028 | DAG.getUNDEF(SVT)}; |
| 25029 | ShAmt = DAG.getBuildVector(MVT::v4i32, dl, ShOps); |
| 25030 | } |
| 25031 | |
| 25032 | |
| 25033 | |
| 25034 | MVT EltVT = VT.getVectorElementType(); |
| 25035 | MVT ShVT = MVT::getVectorVT(EltVT, 128 / EltVT.getSizeInBits()); |
| 25036 | |
| 25037 | ShAmt = DAG.getBitcast(ShVT, ShAmt); |
| 25038 | return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); |
| 25039 | } |
| 25040 | |
| 25041 | |
| 25042 | |
| 25043 | static SDValue getMaskNode(SDValue Mask, MVT MaskVT, |
| 25044 | const X86Subtarget &Subtarget, SelectionDAG &DAG, |
| 25045 | const SDLoc &dl) { |
| 25046 | |
| 25047 | if (isAllOnesConstant(Mask)) |
| 25048 | return DAG.getConstant(1, dl, MaskVT); |
| 25049 | if (X86::isZeroNode(Mask)) |
| 25050 | return DAG.getConstant(0, dl, MaskVT); |
| 25051 | |
| 25052 | assert(MaskVT.bitsLE(Mask.getSimpleValueType()) && "Unexpected mask size!"); |
| 25053 | |
| 25054 | if (Mask.getSimpleValueType() == MVT::i64 && Subtarget.is32Bit()) { |
| 25055 | assert(MaskVT == MVT::v64i1 && "Expected v64i1 mask!"); |
| 25056 | assert(Subtarget.hasBWI() && "Expected AVX512BW target!"); |
| 25057 | |
| 25058 | SDValue Lo, Hi; |
| 25059 | Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, |
| 25060 | DAG.getConstant(0, dl, MVT::i32)); |
| 25061 | Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, |
| 25062 | DAG.getConstant(1, dl, MVT::i32)); |
| 25063 | |
| 25064 | Lo = DAG.getBitcast(MVT::v32i1, Lo); |
| 25065 | Hi = DAG.getBitcast(MVT::v32i1, Hi); |
| 25066 | |
| 25067 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); |
| 25068 | } else { |
| 25069 | MVT BitcastVT = MVT::getVectorVT(MVT::i1, |
| 25070 | Mask.getSimpleValueType().getSizeInBits()); |
| 25071 | |
| 25072 | |
| 25073 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, |
| 25074 | DAG.getBitcast(BitcastVT, Mask), |
| 25075 | DAG.getIntPtrConstant(0, dl)); |
| 25076 | } |
| 25077 | } |
| 25078 | |
| 25079 | |
| 25080 | |
| 25081 | |
| 25082 | static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, |
| 25083 | SDValue PreservedSrc, |
| 25084 | const X86Subtarget &Subtarget, |
| 25085 | SelectionDAG &DAG) { |
| 25086 | MVT VT = Op.getSimpleValueType(); |
| 25087 | MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); |
| 25088 | unsigned OpcodeSelect = ISD::VSELECT; |
| 25089 | SDLoc dl(Op); |
| 25090 | |
| 25091 | if (isAllOnesConstant(Mask)) |
| 25092 | return Op; |
| 25093 | |
| 25094 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 25095 | |
| 25096 | if (PreservedSrc.isUndef()) |
| 25097 | PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); |
| 25098 | return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); |
| 25099 | } |
| 25100 | |
| 25101 | |
| 25102 | |
| 25103 | |
| 25104 | |
| 25105 | |
| 25106 | |
| 25107 | |
| 25108 | static SDValue getScalarMaskingNode(SDValue Op, SDValue Mask, |
| 25109 | SDValue PreservedSrc, |
| 25110 | const X86Subtarget &Subtarget, |
| 25111 | SelectionDAG &DAG) { |
| 25112 | |
| 25113 | if (auto *MaskConst = dyn_cast<ConstantSDNode>(Mask)) |
| 25114 | if (MaskConst->getZExtValue() & 0x1) |
| 25115 | return Op; |
| 25116 | |
| 25117 | MVT VT = Op.getSimpleValueType(); |
| 25118 | SDLoc dl(Op); |
| 25119 | |
| 25120 | assert(Mask.getValueType() == MVT::i8 && "Unexpect type"); |
| 25121 | SDValue IMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v1i1, |
| 25122 | DAG.getBitcast(MVT::v8i1, Mask), |
| 25123 | DAG.getIntPtrConstant(0, dl)); |
| 25124 | if (Op.getOpcode() == X86ISD::FSETCCM || |
| 25125 | Op.getOpcode() == X86ISD::FSETCCM_SAE || |
| 25126 | Op.getOpcode() == X86ISD::VFPCLASSS) |
| 25127 | return DAG.getNode(ISD::AND, dl, VT, Op, IMask); |
| 25128 | |
| 25129 | if (PreservedSrc.isUndef()) |
| 25130 | PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); |
| 25131 | return DAG.getNode(X86ISD::SELECTS, dl, VT, IMask, Op, PreservedSrc); |
| 25132 | } |
| 25133 | |
| 25134 | static int getSEHRegistrationNodeSize(const Function *Fn) { |
| 25135 | if (!Fn->hasPersonalityFn()) |
| 25136 | report_fatal_error( |
| 25137 | "querying registration node size for function without personality"); |
| 25138 | |
| 25139 | |
| 25140 | switch (classifyEHPersonality(Fn->getPersonalityFn())) { |
| 25141 | case EHPersonality::MSVC_X86SEH: return 24; |
| 25142 | case EHPersonality::MSVC_CXX: return 16; |
| 25143 | default: break; |
| 25144 | } |
| 25145 | report_fatal_error( |
| 25146 | "can only recover FP for 32-bit MSVC EH personality functions"); |
| 25147 | } |
| 25148 | |
| 25149 | |
| 25150 | |
| 25151 | |
| 25152 | |
| 25153 | |
| 25154 | |
| 25155 | |
| 25156 | |
| 25157 | static SDValue recoverFramePointer(SelectionDAG &DAG, const Function *Fn, |
| 25158 | SDValue EntryEBP) { |
| 25159 | MachineFunction &MF = DAG.getMachineFunction(); |
| 25160 | SDLoc dl; |
| 25161 | |
| 25162 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 25163 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
| 25164 | |
| 25165 | |
| 25166 | |
| 25167 | |
| 25168 | if (!Fn->hasPersonalityFn()) |
| 25169 | return EntryEBP; |
| 25170 | |
| 25171 | |
| 25172 | |
| 25173 | MCSymbol *OffsetSym = |
| 25174 | MF.getMMI().getContext().getOrCreateParentFrameOffsetSymbol( |
| 25175 | GlobalValue::dropLLVMManglingEscape(Fn->getName())); |
| 25176 | SDValue OffsetSymVal = DAG.getMCSymbol(OffsetSym, PtrVT); |
| 25177 | SDValue ParentFrameOffset = |
| 25178 | DAG.getNode(ISD::LOCAL_RECOVER, dl, PtrVT, OffsetSymVal); |
| 25179 | |
| 25180 | |
| 25181 | |
| 25182 | const X86Subtarget &Subtarget = |
| 25183 | static_cast<const X86Subtarget &>(DAG.getSubtarget()); |
| 25184 | if (Subtarget.is64Bit()) |
| 25185 | return DAG.getNode(ISD::ADD, dl, PtrVT, EntryEBP, ParentFrameOffset); |
| 25186 | |
| 25187 | int RegNodeSize = getSEHRegistrationNodeSize(Fn); |
| 25188 | |
| 25189 | |
| 25190 | SDValue RegNodeBase = DAG.getNode(ISD::SUB, dl, PtrVT, EntryEBP, |
| 25191 | DAG.getConstant(RegNodeSize, dl, PtrVT)); |
| 25192 | return DAG.getNode(ISD::SUB, dl, PtrVT, RegNodeBase, ParentFrameOffset); |
| 25193 | } |
| 25194 | |
| 25195 | SDValue X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, |
| 25196 | SelectionDAG &DAG) const { |
| 25197 | |
| 25198 | auto isRoundModeCurDirection = [](SDValue Rnd) { |
| 25199 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) |
| 25200 | return C->getAPIntValue() == X86::STATIC_ROUNDING::CUR_DIRECTION; |
| 25201 | |
| 25202 | return false; |
| 25203 | }; |
| 25204 | auto isRoundModeSAE = [](SDValue Rnd) { |
| 25205 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) { |
| 25206 | unsigned RC = C->getZExtValue(); |
| 25207 | if (RC & X86::STATIC_ROUNDING::NO_EXC) { |
| 25208 | |
| 25209 | RC ^= X86::STATIC_ROUNDING::NO_EXC; |
| 25210 | |
| 25211 | |
| 25212 | return RC == 0 || RC == X86::STATIC_ROUNDING::CUR_DIRECTION; |
| 25213 | } |
| 25214 | } |
| 25215 | |
| 25216 | return false; |
| 25217 | }; |
| 25218 | auto isRoundModeSAEToX = [](SDValue Rnd, unsigned &RC) { |
| 25219 | if (auto *C = dyn_cast<ConstantSDNode>(Rnd)) { |
| 25220 | RC = C->getZExtValue(); |
| 25221 | if (RC & X86::STATIC_ROUNDING::NO_EXC) { |
| 25222 | |
| 25223 | RC ^= X86::STATIC_ROUNDING::NO_EXC; |
| 25224 | return RC == X86::STATIC_ROUNDING::TO_NEAREST_INT || |
| 25225 | RC == X86::STATIC_ROUNDING::TO_NEG_INF || |
| 25226 | RC == X86::STATIC_ROUNDING::TO_POS_INF || |
| 25227 | RC == X86::STATIC_ROUNDING::TO_ZERO; |
| 25228 | } |
| 25229 | } |
| 25230 | |
| 25231 | return false; |
| 25232 | }; |
| 25233 | |
| 25234 | SDLoc dl(Op); |
| 25235 | unsigned IntNo = Op.getConstantOperandVal(0); |
| 25236 | MVT VT = Op.getSimpleValueType(); |
| 25237 | const IntrinsicData* IntrData = getIntrinsicWithoutChain(IntNo); |
| 25238 | |
| 25239 | |
| 25240 | SelectionDAG::FlagInserter FlagsInserter(DAG, Op->getFlags()); |
| 25241 | |
| 25242 | if (IntrData) { |
| 25243 | switch(IntrData->Type) { |
| 25244 | case INTR_TYPE_1OP: { |
| 25245 | |
| 25246 | |
| 25247 | |
| 25248 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
| 25249 | if (IntrWithRoundingModeOpcode != 0) { |
| 25250 | SDValue Rnd = Op.getOperand(2); |
| 25251 | unsigned RC = 0; |
| 25252 | if (isRoundModeSAEToX(Rnd, RC)) |
| 25253 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
| 25254 | Op.getOperand(1), |
| 25255 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
| 25256 | if (!isRoundModeCurDirection(Rnd)) |
| 25257 | return SDValue(); |
| 25258 | } |
| 25259 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25260 | Op.getOperand(1)); |
| 25261 | } |
| 25262 | case INTR_TYPE_1OP_SAE: { |
| 25263 | SDValue Sae = Op.getOperand(2); |
| 25264 | |
| 25265 | unsigned Opc; |
| 25266 | if (isRoundModeCurDirection(Sae)) |
| 25267 | Opc = IntrData->Opc0; |
| 25268 | else if (isRoundModeSAE(Sae)) |
| 25269 | Opc = IntrData->Opc1; |
| 25270 | else |
| 25271 | return SDValue(); |
| 25272 | |
| 25273 | return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1)); |
| 25274 | } |
| 25275 | case INTR_TYPE_2OP: { |
| 25276 | SDValue Src2 = Op.getOperand(2); |
| 25277 | |
| 25278 | |
| 25279 | |
| 25280 | |
| 25281 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
| 25282 | if (IntrWithRoundingModeOpcode != 0) { |
| 25283 | SDValue Rnd = Op.getOperand(3); |
| 25284 | unsigned RC = 0; |
| 25285 | if (isRoundModeSAEToX(Rnd, RC)) |
| 25286 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
| 25287 | Op.getOperand(1), Src2, |
| 25288 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
| 25289 | if (!isRoundModeCurDirection(Rnd)) |
| 25290 | return SDValue(); |
| 25291 | } |
| 25292 | |
| 25293 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25294 | Op.getOperand(1), Src2); |
| 25295 | } |
| 25296 | case INTR_TYPE_2OP_SAE: { |
| 25297 | SDValue Sae = Op.getOperand(3); |
| 25298 | |
| 25299 | unsigned Opc; |
| 25300 | if (isRoundModeCurDirection(Sae)) |
| 25301 | Opc = IntrData->Opc0; |
| 25302 | else if (isRoundModeSAE(Sae)) |
| 25303 | Opc = IntrData->Opc1; |
| 25304 | else |
| 25305 | return SDValue(); |
| 25306 | |
| 25307 | return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1), |
| 25308 | Op.getOperand(2)); |
| 25309 | } |
| 25310 | case INTR_TYPE_3OP: |
| 25311 | case INTR_TYPE_3OP_IMM8: { |
| 25312 | SDValue Src1 = Op.getOperand(1); |
| 25313 | SDValue Src2 = Op.getOperand(2); |
| 25314 | SDValue Src3 = Op.getOperand(3); |
| 25315 | |
| 25316 | if (IntrData->Type == INTR_TYPE_3OP_IMM8 && |
| 25317 | Src3.getValueType() != MVT::i8) { |
| 25318 | Src3 = DAG.getTargetConstant( |
| 25319 | cast<ConstantSDNode>(Src3)->getZExtValue() & 0xff, dl, MVT::i8); |
| 25320 | } |
| 25321 | |
| 25322 | |
| 25323 | |
| 25324 | |
| 25325 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
| 25326 | if (IntrWithRoundingModeOpcode != 0) { |
| 25327 | SDValue Rnd = Op.getOperand(4); |
| 25328 | unsigned RC = 0; |
| 25329 | if (isRoundModeSAEToX(Rnd, RC)) |
| 25330 | return DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
| 25331 | Src1, Src2, Src3, |
| 25332 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
| 25333 | if (!isRoundModeCurDirection(Rnd)) |
| 25334 | return SDValue(); |
| 25335 | } |
| 25336 | |
| 25337 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25338 | {Src1, Src2, Src3}); |
| 25339 | } |
| 25340 | case INTR_TYPE_4OP_IMM8: { |
| 25341 | assert(Op.getOperand(4)->getOpcode() == ISD::TargetConstant); |
| 25342 | SDValue Src4 = Op.getOperand(4); |
| 25343 | if (Src4.getValueType() != MVT::i8) { |
| 25344 | Src4 = DAG.getTargetConstant( |
| 25345 | cast<ConstantSDNode>(Src4)->getZExtValue() & 0xff, dl, MVT::i8); |
| 25346 | } |
| 25347 | |
| 25348 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25349 | Op.getOperand(1), Op.getOperand(2), Op.getOperand(3), |
| 25350 | Src4); |
| 25351 | } |
| 25352 | case INTR_TYPE_1OP_MASK: { |
| 25353 | SDValue Src = Op.getOperand(1); |
| 25354 | SDValue PassThru = Op.getOperand(2); |
| 25355 | SDValue Mask = Op.getOperand(3); |
| 25356 | |
| 25357 | |
| 25358 | |
| 25359 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
| 25360 | if (IntrWithRoundingModeOpcode != 0) { |
| 25361 | SDValue Rnd = Op.getOperand(4); |
| 25362 | unsigned RC = 0; |
| 25363 | if (isRoundModeSAEToX(Rnd, RC)) |
| 25364 | return getVectorMaskingNode( |
| 25365 | DAG.getNode(IntrWithRoundingModeOpcode, dl, Op.getValueType(), |
| 25366 | Src, DAG.getTargetConstant(RC, dl, MVT::i32)), |
| 25367 | Mask, PassThru, Subtarget, DAG); |
| 25368 | if (!isRoundModeCurDirection(Rnd)) |
| 25369 | return SDValue(); |
| 25370 | } |
| 25371 | return getVectorMaskingNode( |
| 25372 | DAG.getNode(IntrData->Opc0, dl, VT, Src), Mask, PassThru, |
| 25373 | Subtarget, DAG); |
| 25374 | } |
| 25375 | case INTR_TYPE_1OP_MASK_SAE: { |
| 25376 | SDValue Src = Op.getOperand(1); |
| 25377 | SDValue PassThru = Op.getOperand(2); |
| 25378 | SDValue Mask = Op.getOperand(3); |
| 25379 | SDValue Rnd = Op.getOperand(4); |
| 25380 | |
| 25381 | unsigned Opc; |
| 25382 | if (isRoundModeCurDirection(Rnd)) |
| 25383 | Opc = IntrData->Opc0; |
| 25384 | else if (isRoundModeSAE(Rnd)) |
| 25385 | Opc = IntrData->Opc1; |
| 25386 | else |
| 25387 | return SDValue(); |
| 25388 | |
| 25389 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src), Mask, PassThru, |
| 25390 | Subtarget, DAG); |
| 25391 | } |
| 25392 | case INTR_TYPE_SCALAR_MASK: { |
| 25393 | SDValue Src1 = Op.getOperand(1); |
| 25394 | SDValue Src2 = Op.getOperand(2); |
| 25395 | SDValue passThru = Op.getOperand(3); |
| 25396 | SDValue Mask = Op.getOperand(4); |
| 25397 | unsigned IntrWithRoundingModeOpcode = IntrData->Opc1; |
| 25398 | |
| 25399 | |
| 25400 | |
| 25401 | bool HasRounding = IntrWithRoundingModeOpcode != 0; |
| 25402 | if (Op.getNumOperands() == (5U + HasRounding)) { |
| 25403 | if (HasRounding) { |
| 25404 | SDValue Rnd = Op.getOperand(5); |
| 25405 | unsigned RC = 0; |
| 25406 | if (isRoundModeSAEToX(Rnd, RC)) |
| 25407 | return getScalarMaskingNode( |
| 25408 | DAG.getNode(IntrWithRoundingModeOpcode, dl, VT, Src1, Src2, |
| 25409 | DAG.getTargetConstant(RC, dl, MVT::i32)), |
| 25410 | Mask, passThru, Subtarget, DAG); |
| 25411 | if (!isRoundModeCurDirection(Rnd)) |
| 25412 | return SDValue(); |
| 25413 | } |
| 25414 | return getScalarMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src1, |
| 25415 | Src2), |
| 25416 | Mask, passThru, Subtarget, DAG); |
| 25417 | } |
| 25418 | |
| 25419 | assert(Op.getNumOperands() == (6U + HasRounding) && |
| 25420 | "Unexpected intrinsic form"); |
| 25421 | SDValue RoundingMode = Op.getOperand(5); |
| 25422 | unsigned Opc = IntrData->Opc0; |
| 25423 | if (HasRounding) { |
| 25424 | SDValue Sae = Op.getOperand(6); |
| 25425 | if (isRoundModeSAE(Sae)) |
| 25426 | Opc = IntrWithRoundingModeOpcode; |
| 25427 | else if (!isRoundModeCurDirection(Sae)) |
| 25428 | return SDValue(); |
| 25429 | } |
| 25430 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, |
| 25431 | Src2, RoundingMode), |
| 25432 | Mask, passThru, Subtarget, DAG); |
| 25433 | } |
| 25434 | case INTR_TYPE_SCALAR_MASK_RND: { |
| 25435 | SDValue Src1 = Op.getOperand(1); |
| 25436 | SDValue Src2 = Op.getOperand(2); |
| 25437 | SDValue passThru = Op.getOperand(3); |
| 25438 | SDValue Mask = Op.getOperand(4); |
| 25439 | SDValue Rnd = Op.getOperand(5); |
| 25440 | |
| 25441 | SDValue NewOp; |
| 25442 | unsigned RC = 0; |
| 25443 | if (isRoundModeCurDirection(Rnd)) |
| 25444 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); |
| 25445 | else if (isRoundModeSAEToX(Rnd, RC)) |
| 25446 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, |
| 25447 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
| 25448 | else |
| 25449 | return SDValue(); |
| 25450 | |
| 25451 | return getScalarMaskingNode(NewOp, Mask, passThru, Subtarget, DAG); |
| 25452 | } |
| 25453 | case INTR_TYPE_SCALAR_MASK_SAE: { |
| 25454 | SDValue Src1 = Op.getOperand(1); |
| 25455 | SDValue Src2 = Op.getOperand(2); |
| 25456 | SDValue passThru = Op.getOperand(3); |
| 25457 | SDValue Mask = Op.getOperand(4); |
| 25458 | SDValue Sae = Op.getOperand(5); |
| 25459 | unsigned Opc; |
| 25460 | if (isRoundModeCurDirection(Sae)) |
| 25461 | Opc = IntrData->Opc0; |
| 25462 | else if (isRoundModeSAE(Sae)) |
| 25463 | Opc = IntrData->Opc1; |
| 25464 | else |
| 25465 | return SDValue(); |
| 25466 | |
| 25467 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), |
| 25468 | Mask, passThru, Subtarget, DAG); |
| 25469 | } |
| 25470 | case INTR_TYPE_2OP_MASK: { |
| 25471 | SDValue Src1 = Op.getOperand(1); |
| 25472 | SDValue Src2 = Op.getOperand(2); |
| 25473 | SDValue PassThru = Op.getOperand(3); |
| 25474 | SDValue Mask = Op.getOperand(4); |
| 25475 | SDValue NewOp; |
| 25476 | if (IntrData->Opc1 != 0) { |
| 25477 | SDValue Rnd = Op.getOperand(5); |
| 25478 | unsigned RC = 0; |
| 25479 | if (isRoundModeSAEToX(Rnd, RC)) |
| 25480 | NewOp = DAG.getNode(IntrData->Opc1, dl, VT, Src1, Src2, |
| 25481 | DAG.getTargetConstant(RC, dl, MVT::i32)); |
| 25482 | else if (!isRoundModeCurDirection(Rnd)) |
| 25483 | return SDValue(); |
| 25484 | } |
| 25485 | if (!NewOp) |
| 25486 | NewOp = DAG.getNode(IntrData->Opc0, dl, VT, Src1, Src2); |
| 25487 | return getVectorMaskingNode(NewOp, Mask, PassThru, Subtarget, DAG); |
| 25488 | } |
| 25489 | case INTR_TYPE_2OP_MASK_SAE: { |
| 25490 | SDValue Src1 = Op.getOperand(1); |
| 25491 | SDValue Src2 = Op.getOperand(2); |
| 25492 | SDValue PassThru = Op.getOperand(3); |
| 25493 | SDValue Mask = Op.getOperand(4); |
| 25494 | |
| 25495 | unsigned Opc = IntrData->Opc0; |
| 25496 | if (IntrData->Opc1 != 0) { |
| 25497 | SDValue Sae = Op.getOperand(5); |
| 25498 | if (isRoundModeSAE(Sae)) |
| 25499 | Opc = IntrData->Opc1; |
| 25500 | else if (!isRoundModeCurDirection(Sae)) |
| 25501 | return SDValue(); |
| 25502 | } |
| 25503 | |
| 25504 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2), |
| 25505 | Mask, PassThru, Subtarget, DAG); |
| 25506 | } |
| 25507 | case INTR_TYPE_3OP_SCALAR_MASK_SAE: { |
| 25508 | SDValue Src1 = Op.getOperand(1); |
| 25509 | SDValue Src2 = Op.getOperand(2); |
| 25510 | SDValue Src3 = Op.getOperand(3); |
| 25511 | SDValue PassThru = Op.getOperand(4); |
| 25512 | SDValue Mask = Op.getOperand(5); |
| 25513 | SDValue Sae = Op.getOperand(6); |
| 25514 | unsigned Opc; |
| 25515 | if (isRoundModeCurDirection(Sae)) |
| 25516 | Opc = IntrData->Opc0; |
| 25517 | else if (isRoundModeSAE(Sae)) |
| 25518 | Opc = IntrData->Opc1; |
| 25519 | else |
| 25520 | return SDValue(); |
| 25521 | |
| 25522 | return getScalarMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), |
| 25523 | Mask, PassThru, Subtarget, DAG); |
| 25524 | } |
| 25525 | case INTR_TYPE_3OP_MASK_SAE: { |
| 25526 | SDValue Src1 = Op.getOperand(1); |
| 25527 | SDValue Src2 = Op.getOperand(2); |
| 25528 | SDValue Src3 = Op.getOperand(3); |
| 25529 | SDValue PassThru = Op.getOperand(4); |
| 25530 | SDValue Mask = Op.getOperand(5); |
| 25531 | |
| 25532 | unsigned Opc = IntrData->Opc0; |
| 25533 | if (IntrData->Opc1 != 0) { |
| 25534 | SDValue Sae = Op.getOperand(6); |
| 25535 | if (isRoundModeSAE(Sae)) |
| 25536 | Opc = IntrData->Opc1; |
| 25537 | else if (!isRoundModeCurDirection(Sae)) |
| 25538 | return SDValue(); |
| 25539 | } |
| 25540 | return getVectorMaskingNode(DAG.getNode(Opc, dl, VT, Src1, Src2, Src3), |
| 25541 | Mask, PassThru, Subtarget, DAG); |
| 25542 | } |
| 25543 | case BLENDV: { |
| 25544 | SDValue Src1 = Op.getOperand(1); |
| 25545 | SDValue Src2 = Op.getOperand(2); |
| 25546 | SDValue Src3 = Op.getOperand(3); |
| 25547 | |
| 25548 | EVT MaskVT = Src3.getValueType().changeVectorElementTypeToInteger(); |
| 25549 | Src3 = DAG.getBitcast(MaskVT, Src3); |
| 25550 | |
| 25551 | |
| 25552 | return DAG.getNode(IntrData->Opc0, dl, VT, Src3, Src2, Src1); |
| 25553 | } |
| 25554 | case VPERM_2OP : { |
| 25555 | SDValue Src1 = Op.getOperand(1); |
| 25556 | SDValue Src2 = Op.getOperand(2); |
| 25557 | |
| 25558 | |
| 25559 | return DAG.getNode(IntrData->Opc0, dl, VT,Src2, Src1); |
| 25560 | } |
| 25561 | case IFMA_OP: |
| 25562 | |
| 25563 | |
| 25564 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25565 | Op.getOperand(2), Op.getOperand(3), Op.getOperand(1)); |
| 25566 | case FPCLASSS: { |
| 25567 | SDValue Src1 = Op.getOperand(1); |
| 25568 | SDValue Imm = Op.getOperand(2); |
| 25569 | SDValue Mask = Op.getOperand(3); |
| 25570 | SDValue FPclass = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Imm); |
| 25571 | SDValue FPclassMask = getScalarMaskingNode(FPclass, Mask, SDValue(), |
| 25572 | Subtarget, DAG); |
| 25573 | |
| 25574 | |
| 25575 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, |
| 25576 | DAG.getConstant(0, dl, MVT::v8i1), |
| 25577 | FPclassMask, DAG.getIntPtrConstant(0, dl)); |
| 25578 | return DAG.getBitcast(MVT::i8, Ins); |
| 25579 | } |
| 25580 | |
| 25581 | case CMP_MASK_CC: { |
| 25582 | MVT MaskVT = Op.getSimpleValueType(); |
| 25583 | SDValue CC = Op.getOperand(3); |
| 25584 | SDValue Mask = Op.getOperand(4); |
| 25585 | |
| 25586 | |
| 25587 | |
| 25588 | if (IntrData->Opc1 != 0) { |
| 25589 | SDValue Sae = Op.getOperand(5); |
| 25590 | if (isRoundModeSAE(Sae)) |
| 25591 | return DAG.getNode(IntrData->Opc1, dl, MaskVT, Op.getOperand(1), |
| 25592 | Op.getOperand(2), CC, Mask, Sae); |
| 25593 | if (!isRoundModeCurDirection(Sae)) |
| 25594 | return SDValue(); |
| 25595 | } |
| 25596 | |
| 25597 | return DAG.getNode(IntrData->Opc0, dl, MaskVT, |
| 25598 | {Op.getOperand(1), Op.getOperand(2), CC, Mask}); |
| 25599 | } |
| 25600 | case CMP_MASK_SCALAR_CC: { |
| 25601 | SDValue Src1 = Op.getOperand(1); |
| 25602 | SDValue Src2 = Op.getOperand(2); |
| 25603 | SDValue CC = Op.getOperand(3); |
| 25604 | SDValue Mask = Op.getOperand(4); |
| 25605 | |
| 25606 | SDValue Cmp; |
| 25607 | if (IntrData->Opc1 != 0) { |
| 25608 | SDValue Sae = Op.getOperand(5); |
| 25609 | if (isRoundModeSAE(Sae)) |
| 25610 | Cmp = DAG.getNode(IntrData->Opc1, dl, MVT::v1i1, Src1, Src2, CC, Sae); |
| 25611 | else if (!isRoundModeCurDirection(Sae)) |
| 25612 | return SDValue(); |
| 25613 | } |
| 25614 | |
| 25615 | if (!Cmp.getNode()) |
| 25616 | Cmp = DAG.getNode(IntrData->Opc0, dl, MVT::v1i1, Src1, Src2, CC); |
| 25617 | |
| 25618 | SDValue CmpMask = getScalarMaskingNode(Cmp, Mask, SDValue(), |
| 25619 | Subtarget, DAG); |
| 25620 | |
| 25621 | |
| 25622 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v8i1, |
| 25623 | DAG.getConstant(0, dl, MVT::v8i1), |
| 25624 | CmpMask, DAG.getIntPtrConstant(0, dl)); |
| 25625 | return DAG.getBitcast(MVT::i8, Ins); |
| 25626 | } |
| 25627 | case COMI: { |
| 25628 | ISD::CondCode CC = (ISD::CondCode)IntrData->Opc1; |
| 25629 | SDValue LHS = Op.getOperand(1); |
| 25630 | SDValue RHS = Op.getOperand(2); |
| 25631 | |
| 25632 | if (CC == ISD::SETLT || CC == ISD::SETLE) |
| 25633 | std::swap(LHS, RHS); |
| 25634 | |
| 25635 | SDValue Comi = DAG.getNode(IntrData->Opc0, dl, MVT::i32, LHS, RHS); |
| 25636 | SDValue SetCC; |
| 25637 | switch (CC) { |
| 25638 | case ISD::SETEQ: { |
| 25639 | SetCC = getSETCC(X86::COND_E, Comi, dl, DAG); |
| 25640 | SDValue SetNP = getSETCC(X86::COND_NP, Comi, dl, DAG); |
| 25641 | SetCC = DAG.getNode(ISD::AND, dl, MVT::i8, SetCC, SetNP); |
| 25642 | break; |
| 25643 | } |
| 25644 | case ISD::SETNE: { |
| 25645 | SetCC = getSETCC(X86::COND_NE, Comi, dl, DAG); |
| 25646 | SDValue SetP = getSETCC(X86::COND_P, Comi, dl, DAG); |
| 25647 | SetCC = DAG.getNode(ISD::OR, dl, MVT::i8, SetCC, SetP); |
| 25648 | break; |
| 25649 | } |
| 25650 | case ISD::SETGT: |
| 25651 | case ISD::SETLT: { |
| 25652 | SetCC = getSETCC(X86::COND_A, Comi, dl, DAG); |
| 25653 | break; |
| 25654 | } |
| 25655 | case ISD::SETGE: |
| 25656 | case ISD::SETLE: |
| 25657 | SetCC = getSETCC(X86::COND_AE, Comi, dl, DAG); |
| 25658 | break; |
| 25659 | default: |
| 25660 | llvm_unreachable("Unexpected illegal condition!"); |
| 25661 | } |
| 25662 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
| 25663 | } |
| 25664 | case COMI_RM: { |
| 25665 | SDValue LHS = Op.getOperand(1); |
| 25666 | SDValue RHS = Op.getOperand(2); |
| 25667 | unsigned CondVal = Op.getConstantOperandVal(3); |
| 25668 | SDValue Sae = Op.getOperand(4); |
| 25669 | |
| 25670 | SDValue FCmp; |
| 25671 | if (isRoundModeCurDirection(Sae)) |
| 25672 | FCmp = DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, |
| 25673 | DAG.getTargetConstant(CondVal, dl, MVT::i8)); |
| 25674 | else if (isRoundModeSAE(Sae)) |
| 25675 | FCmp = DAG.getNode(X86ISD::FSETCCM_SAE, dl, MVT::v1i1, LHS, RHS, |
| 25676 | DAG.getTargetConstant(CondVal, dl, MVT::i8), Sae); |
| 25677 | else |
| 25678 | return SDValue(); |
| 25679 | |
| 25680 | |
| 25681 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, MVT::v16i1, |
| 25682 | DAG.getConstant(0, dl, MVT::v16i1), |
| 25683 | FCmp, DAG.getIntPtrConstant(0, dl)); |
| 25684 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, |
| 25685 | DAG.getBitcast(MVT::i16, Ins)); |
| 25686 | } |
| 25687 | case VSHIFT: |
| 25688 | return getTargetVShiftNode(IntrData->Opc0, dl, Op.getSimpleValueType(), |
| 25689 | Op.getOperand(1), Op.getOperand(2), Subtarget, |
| 25690 | DAG); |
| 25691 | case COMPRESS_EXPAND_IN_REG: { |
| 25692 | SDValue Mask = Op.getOperand(3); |
| 25693 | SDValue DataToCompress = Op.getOperand(1); |
| 25694 | SDValue PassThru = Op.getOperand(2); |
| 25695 | if (ISD::isBuildVectorAllOnes(Mask.getNode())) |
| 25696 | return Op.getOperand(1); |
| 25697 | |
| 25698 | |
| 25699 | if (PassThru.isUndef()) |
| 25700 | PassThru = DAG.getConstant(0, dl, VT); |
| 25701 | |
| 25702 | return DAG.getNode(IntrData->Opc0, dl, VT, DataToCompress, PassThru, |
| 25703 | Mask); |
| 25704 | } |
| 25705 | case FIXUPIMM: |
| 25706 | case FIXUPIMM_MASKZ: { |
| 25707 | SDValue Src1 = Op.getOperand(1); |
| 25708 | SDValue Src2 = Op.getOperand(2); |
| 25709 | SDValue Src3 = Op.getOperand(3); |
| 25710 | SDValue Imm = Op.getOperand(4); |
| 25711 | SDValue Mask = Op.getOperand(5); |
| 25712 | SDValue Passthru = (IntrData->Type == FIXUPIMM) |
| 25713 | ? Src1 |
| 25714 | : getZeroVector(VT, Subtarget, DAG, dl); |
| 25715 | |
| 25716 | unsigned Opc = IntrData->Opc0; |
| 25717 | if (IntrData->Opc1 != 0) { |
| 25718 | SDValue Sae = Op.getOperand(6); |
| 25719 | if (isRoundModeSAE(Sae)) |
| 25720 | Opc = IntrData->Opc1; |
| 25721 | else if (!isRoundModeCurDirection(Sae)) |
| 25722 | return SDValue(); |
| 25723 | } |
| 25724 | |
| 25725 | SDValue FixupImm = DAG.getNode(Opc, dl, VT, Src1, Src2, Src3, Imm); |
| 25726 | |
| 25727 | if (Opc == X86ISD::VFIXUPIMM || Opc == X86ISD::VFIXUPIMM_SAE) |
| 25728 | return getVectorMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG); |
| 25729 | |
| 25730 | return getScalarMaskingNode(FixupImm, Mask, Passthru, Subtarget, DAG); |
| 25731 | } |
| 25732 | case ROUNDP: { |
| 25733 | assert(IntrData->Opc0 == X86ISD::VRNDSCALE && "Unexpected opcode"); |
| 25734 | |
| 25735 | |
| 25736 | auto Round = cast<ConstantSDNode>(Op.getOperand(2)); |
| 25737 | SDValue RoundingMode = |
| 25738 | DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); |
| 25739 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25740 | Op.getOperand(1), RoundingMode); |
| 25741 | } |
| 25742 | case ROUNDS: { |
| 25743 | assert(IntrData->Opc0 == X86ISD::VRNDSCALES && "Unexpected opcode"); |
| 25744 | |
| 25745 | |
| 25746 | auto Round = cast<ConstantSDNode>(Op.getOperand(3)); |
| 25747 | SDValue RoundingMode = |
| 25748 | DAG.getTargetConstant(Round->getZExtValue() & 0xf, dl, MVT::i32); |
| 25749 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25750 | Op.getOperand(1), Op.getOperand(2), RoundingMode); |
| 25751 | } |
| 25752 | case BEXTRI: { |
| 25753 | assert(IntrData->Opc0 == X86ISD::BEXTRI && "Unexpected opcode"); |
| 25754 | |
| 25755 | uint64_t Imm = Op.getConstantOperandVal(2); |
| 25756 | SDValue Control = DAG.getTargetConstant(Imm & 0xffff, dl, |
| 25757 | Op.getValueType()); |
| 25758 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), |
| 25759 | Op.getOperand(1), Control); |
| 25760 | } |
| 25761 | |
| 25762 | case ADX: { |
| 25763 | SDVTList CFVTs = DAG.getVTList(Op->getValueType(0), MVT::i32); |
| 25764 | SDVTList VTs = DAG.getVTList(Op.getOperand(2).getValueType(), MVT::i32); |
| 25765 | |
| 25766 | SDValue Res; |
| 25767 | |
| 25768 | |
| 25769 | if (isNullConstant(Op.getOperand(1))) { |
| 25770 | Res = DAG.getNode(IntrData->Opc1, dl, VTs, Op.getOperand(2), |
| 25771 | Op.getOperand(3)); |
| 25772 | } else { |
| 25773 | SDValue GenCF = DAG.getNode(X86ISD::ADD, dl, CFVTs, Op.getOperand(1), |
| 25774 | DAG.getConstant(-1, dl, MVT::i8)); |
| 25775 | Res = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(2), |
| 25776 | Op.getOperand(3), GenCF.getValue(1)); |
| 25777 | } |
| 25778 | SDValue SetCC = getSETCC(X86::COND_B, Res.getValue(1), dl, DAG); |
| 25779 | SDValue Results[] = { SetCC, Res }; |
| 25780 | return DAG.getMergeValues(Results, dl); |
| 25781 | } |
| 25782 | case CVTPD2PS_MASK: |
| 25783 | case CVTPD2DQ_MASK: |
| 25784 | case CVTQQ2PS_MASK: |
| 25785 | case TRUNCATE_TO_REG: { |
| 25786 | SDValue Src = Op.getOperand(1); |
| 25787 | SDValue PassThru = Op.getOperand(2); |
| 25788 | SDValue Mask = Op.getOperand(3); |
| 25789 | |
| 25790 | if (isAllOnesConstant(Mask)) |
| 25791 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); |
| 25792 | |
| 25793 | MVT SrcVT = Src.getSimpleValueType(); |
| 25794 | MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); |
| 25795 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 25796 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), |
| 25797 | {Src, PassThru, Mask}); |
| 25798 | } |
| 25799 | case CVTPS2PH_MASK: { |
| 25800 | SDValue Src = Op.getOperand(1); |
| 25801 | SDValue Rnd = Op.getOperand(2); |
| 25802 | SDValue PassThru = Op.getOperand(3); |
| 25803 | SDValue Mask = Op.getOperand(4); |
| 25804 | |
| 25805 | if (isAllOnesConstant(Mask)) |
| 25806 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src, Rnd); |
| 25807 | |
| 25808 | MVT SrcVT = Src.getSimpleValueType(); |
| 25809 | MVT MaskVT = MVT::getVectorVT(MVT::i1, SrcVT.getVectorNumElements()); |
| 25810 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 25811 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, Rnd, |
| 25812 | PassThru, Mask); |
| 25813 | |
| 25814 | } |
| 25815 | case CVTNEPS2BF16_MASK: { |
| 25816 | SDValue Src = Op.getOperand(1); |
| 25817 | SDValue PassThru = Op.getOperand(2); |
| 25818 | SDValue Mask = Op.getOperand(3); |
| 25819 | |
| 25820 | if (ISD::isBuildVectorAllOnes(Mask.getNode())) |
| 25821 | return DAG.getNode(IntrData->Opc0, dl, Op.getValueType(), Src); |
| 25822 | |
| 25823 | |
| 25824 | if (PassThru.isUndef()) |
| 25825 | PassThru = DAG.getConstant(0, dl, PassThru.getValueType()); |
| 25826 | |
| 25827 | return DAG.getNode(IntrData->Opc1, dl, Op.getValueType(), Src, PassThru, |
| 25828 | Mask); |
| 25829 | } |
| 25830 | default: |
| 25831 | break; |
| 25832 | } |
| 25833 | } |
| 25834 | |
| 25835 | switch (IntNo) { |
| 25836 | default: return SDValue(); |
| 25837 | |
| 25838 | |
| 25839 | |
| 25840 | |
| 25841 | case Intrinsic::x86_avx512_ktestc_b: |
| 25842 | case Intrinsic::x86_avx512_ktestc_w: |
| 25843 | case Intrinsic::x86_avx512_ktestc_d: |
| 25844 | case Intrinsic::x86_avx512_ktestc_q: |
| 25845 | case Intrinsic::x86_avx512_ktestz_b: |
| 25846 | case Intrinsic::x86_avx512_ktestz_w: |
| 25847 | case Intrinsic::x86_avx512_ktestz_d: |
| 25848 | case Intrinsic::x86_avx512_ktestz_q: |
| 25849 | case Intrinsic::x86_sse41_ptestz: |
| 25850 | case Intrinsic::x86_sse41_ptestc: |
| 25851 | case Intrinsic::x86_sse41_ptestnzc: |
| 25852 | case Intrinsic::x86_avx_ptestz_256: |
| 25853 | case Intrinsic::x86_avx_ptestc_256: |
| 25854 | case Intrinsic::x86_avx_ptestnzc_256: |
| 25855 | case Intrinsic::x86_avx_vtestz_ps: |
| 25856 | case Intrinsic::x86_avx_vtestc_ps: |
| 25857 | case Intrinsic::x86_avx_vtestnzc_ps: |
| 25858 | case Intrinsic::x86_avx_vtestz_pd: |
| 25859 | case Intrinsic::x86_avx_vtestc_pd: |
| 25860 | case Intrinsic::x86_avx_vtestnzc_pd: |
| 25861 | case Intrinsic::x86_avx_vtestz_ps_256: |
| 25862 | case Intrinsic::x86_avx_vtestc_ps_256: |
| 25863 | case Intrinsic::x86_avx_vtestnzc_ps_256: |
| 25864 | case Intrinsic::x86_avx_vtestz_pd_256: |
| 25865 | case Intrinsic::x86_avx_vtestc_pd_256: |
| 25866 | case Intrinsic::x86_avx_vtestnzc_pd_256: { |
| 25867 | unsigned TestOpc = X86ISD::PTEST; |
| 25868 | X86::CondCode X86CC; |
| 25869 | switch (IntNo) { |
| 25870 | default: llvm_unreachable("Bad fallthrough in Intrinsic lowering."); |
| 25871 | case Intrinsic::x86_avx512_ktestc_b: |
| 25872 | case Intrinsic::x86_avx512_ktestc_w: |
| 25873 | case Intrinsic::x86_avx512_ktestc_d: |
| 25874 | case Intrinsic::x86_avx512_ktestc_q: |
| 25875 | |
| 25876 | TestOpc = X86ISD::KTEST; |
| 25877 | X86CC = X86::COND_B; |
| 25878 | break; |
| 25879 | case Intrinsic::x86_avx512_ktestz_b: |
| 25880 | case Intrinsic::x86_avx512_ktestz_w: |
| 25881 | case Intrinsic::x86_avx512_ktestz_d: |
| 25882 | case Intrinsic::x86_avx512_ktestz_q: |
| 25883 | TestOpc = X86ISD::KTEST; |
| 25884 | X86CC = X86::COND_E; |
| 25885 | break; |
| 25886 | case Intrinsic::x86_avx_vtestz_ps: |
| 25887 | case Intrinsic::x86_avx_vtestz_pd: |
| 25888 | case Intrinsic::x86_avx_vtestz_ps_256: |
| 25889 | case Intrinsic::x86_avx_vtestz_pd_256: |
| 25890 | TestOpc = X86ISD::TESTP; |
| 25891 | LLVM_FALLTHROUGH; |
| 25892 | case Intrinsic::x86_sse41_ptestz: |
| 25893 | case Intrinsic::x86_avx_ptestz_256: |
| 25894 | |
| 25895 | X86CC = X86::COND_E; |
| 25896 | break; |
| 25897 | case Intrinsic::x86_avx_vtestc_ps: |
| 25898 | case Intrinsic::x86_avx_vtestc_pd: |
| 25899 | case Intrinsic::x86_avx_vtestc_ps_256: |
| 25900 | case Intrinsic::x86_avx_vtestc_pd_256: |
| 25901 | TestOpc = X86ISD::TESTP; |
| 25902 | LLVM_FALLTHROUGH; |
| 25903 | case Intrinsic::x86_sse41_ptestc: |
| 25904 | case Intrinsic::x86_avx_ptestc_256: |
| 25905 | |
| 25906 | X86CC = X86::COND_B; |
| 25907 | break; |
| 25908 | case Intrinsic::x86_avx_vtestnzc_ps: |
| 25909 | case Intrinsic::x86_avx_vtestnzc_pd: |
| 25910 | case Intrinsic::x86_avx_vtestnzc_ps_256: |
| 25911 | case Intrinsic::x86_avx_vtestnzc_pd_256: |
| 25912 | TestOpc = X86ISD::TESTP; |
| 25913 | LLVM_FALLTHROUGH; |
| 25914 | case Intrinsic::x86_sse41_ptestnzc: |
| 25915 | case Intrinsic::x86_avx_ptestnzc_256: |
| 25916 | |
| 25917 | X86CC = X86::COND_A; |
| 25918 | break; |
| 25919 | } |
| 25920 | |
| 25921 | SDValue LHS = Op.getOperand(1); |
| 25922 | SDValue RHS = Op.getOperand(2); |
| 25923 | SDValue Test = DAG.getNode(TestOpc, dl, MVT::i32, LHS, RHS); |
| 25924 | SDValue SetCC = getSETCC(X86CC, Test, dl, DAG); |
| 25925 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
| 25926 | } |
| 25927 | |
| 25928 | case Intrinsic::x86_sse42_pcmpistria128: |
| 25929 | case Intrinsic::x86_sse42_pcmpestria128: |
| 25930 | case Intrinsic::x86_sse42_pcmpistric128: |
| 25931 | case Intrinsic::x86_sse42_pcmpestric128: |
| 25932 | case Intrinsic::x86_sse42_pcmpistrio128: |
| 25933 | case Intrinsic::x86_sse42_pcmpestrio128: |
| 25934 | case Intrinsic::x86_sse42_pcmpistris128: |
| 25935 | case Intrinsic::x86_sse42_pcmpestris128: |
| 25936 | case Intrinsic::x86_sse42_pcmpistriz128: |
| 25937 | case Intrinsic::x86_sse42_pcmpestriz128: { |
| 25938 | unsigned Opcode; |
| 25939 | X86::CondCode X86CC; |
| 25940 | switch (IntNo) { |
| 25941 | default: llvm_unreachable("Impossible intrinsic"); |
| 25942 | case Intrinsic::x86_sse42_pcmpistria128: |
| 25943 | Opcode = X86ISD::PCMPISTR; |
| 25944 | X86CC = X86::COND_A; |
| 25945 | break; |
| 25946 | case Intrinsic::x86_sse42_pcmpestria128: |
| 25947 | Opcode = X86ISD::PCMPESTR; |
| 25948 | X86CC = X86::COND_A; |
| 25949 | break; |
| 25950 | case Intrinsic::x86_sse42_pcmpistric128: |
| 25951 | Opcode = X86ISD::PCMPISTR; |
| 25952 | X86CC = X86::COND_B; |
| 25953 | break; |
| 25954 | case Intrinsic::x86_sse42_pcmpestric128: |
| 25955 | Opcode = X86ISD::PCMPESTR; |
| 25956 | X86CC = X86::COND_B; |
| 25957 | break; |
| 25958 | case Intrinsic::x86_sse42_pcmpistrio128: |
| 25959 | Opcode = X86ISD::PCMPISTR; |
| 25960 | X86CC = X86::COND_O; |
| 25961 | break; |
| 25962 | case Intrinsic::x86_sse42_pcmpestrio128: |
| 25963 | Opcode = X86ISD::PCMPESTR; |
| 25964 | X86CC = X86::COND_O; |
| 25965 | break; |
| 25966 | case Intrinsic::x86_sse42_pcmpistris128: |
| 25967 | Opcode = X86ISD::PCMPISTR; |
| 25968 | X86CC = X86::COND_S; |
| 25969 | break; |
| 25970 | case Intrinsic::x86_sse42_pcmpestris128: |
| 25971 | Opcode = X86ISD::PCMPESTR; |
| 25972 | X86CC = X86::COND_S; |
| 25973 | break; |
| 25974 | case Intrinsic::x86_sse42_pcmpistriz128: |
| 25975 | Opcode = X86ISD::PCMPISTR; |
| 25976 | X86CC = X86::COND_E; |
| 25977 | break; |
| 25978 | case Intrinsic::x86_sse42_pcmpestriz128: |
| 25979 | Opcode = X86ISD::PCMPESTR; |
| 25980 | X86CC = X86::COND_E; |
| 25981 | break; |
| 25982 | } |
| 25983 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
| 25984 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
| 25985 | SDValue PCMP = DAG.getNode(Opcode, dl, VTs, NewOps).getValue(2); |
| 25986 | SDValue SetCC = getSETCC(X86CC, PCMP, dl, DAG); |
| 25987 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, SetCC); |
| 25988 | } |
| 25989 | |
| 25990 | case Intrinsic::x86_sse42_pcmpistri128: |
| 25991 | case Intrinsic::x86_sse42_pcmpestri128: { |
| 25992 | unsigned Opcode; |
| 25993 | if (IntNo == Intrinsic::x86_sse42_pcmpistri128) |
| 25994 | Opcode = X86ISD::PCMPISTR; |
| 25995 | else |
| 25996 | Opcode = X86ISD::PCMPESTR; |
| 25997 | |
| 25998 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
| 25999 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
| 26000 | return DAG.getNode(Opcode, dl, VTs, NewOps); |
| 26001 | } |
| 26002 | |
| 26003 | case Intrinsic::x86_sse42_pcmpistrm128: |
| 26004 | case Intrinsic::x86_sse42_pcmpestrm128: { |
| 26005 | unsigned Opcode; |
| 26006 | if (IntNo == Intrinsic::x86_sse42_pcmpistrm128) |
| 26007 | Opcode = X86ISD::PCMPISTR; |
| 26008 | else |
| 26009 | Opcode = X86ISD::PCMPESTR; |
| 26010 | |
| 26011 | SmallVector<SDValue, 5> NewOps(Op->op_begin()+1, Op->op_end()); |
| 26012 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::v16i8, MVT::i32); |
| 26013 | return DAG.getNode(Opcode, dl, VTs, NewOps).getValue(1); |
| 26014 | } |
| 26015 | |
| 26016 | case Intrinsic::eh_sjlj_lsda: { |
| 26017 | MachineFunction &MF = DAG.getMachineFunction(); |
| 26018 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 26019 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
| 26020 | auto &Context = MF.getMMI().getContext(); |
| 26021 | MCSymbol *S = Context.getOrCreateSymbol(Twine("GCC_except_table") + |
| 26022 | Twine(MF.getFunctionNumber())); |
| 26023 | return DAG.getNode(getGlobalWrapperKind(), dl, VT, |
| 26024 | DAG.getMCSymbol(S, PtrVT)); |
| 26025 | } |
| 26026 | |
| 26027 | case Intrinsic::x86_seh_lsda: { |
| 26028 | |
| 26029 | MachineFunction &MF = DAG.getMachineFunction(); |
| 26030 | SDValue Op1 = Op.getOperand(1); |
| 26031 | auto *Fn = cast<Function>(cast<GlobalAddressSDNode>(Op1)->getGlobal()); |
| 26032 | MCSymbol *LSDASym = MF.getMMI().getContext().getOrCreateLSDASymbol( |
| 26033 | GlobalValue::dropLLVMManglingEscape(Fn->getName())); |
| 26034 | |
| 26035 | |
| 26036 | |
| 26037 | SDValue Result = DAG.getMCSymbol(LSDASym, VT); |
| 26038 | return DAG.getNode(X86ISD::Wrapper, dl, VT, Result); |
| 26039 | } |
| 26040 | |
| 26041 | case Intrinsic::eh_recoverfp: { |
| 26042 | SDValue FnOp = Op.getOperand(1); |
| 26043 | SDValue IncomingFPOp = Op.getOperand(2); |
| 26044 | GlobalAddressSDNode *GSD = dyn_cast<GlobalAddressSDNode>(FnOp); |
| 26045 | auto *Fn = dyn_cast_or_null<Function>(GSD ? GSD->getGlobal() : nullptr); |
| 26046 | if (!Fn) |
| 26047 | report_fatal_error( |
| 26048 | "llvm.eh.recoverfp must take a function as the first argument"); |
| 26049 | return recoverFramePointer(DAG, Fn, IncomingFPOp); |
| 26050 | } |
| 26051 | |
| 26052 | case Intrinsic::localaddress: { |
| 26053 | |
| 26054 | |
| 26055 | MachineFunction &MF = DAG.getMachineFunction(); |
| 26056 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 26057 | unsigned Reg; |
| 26058 | if (RegInfo->hasBasePointer(MF)) |
| 26059 | Reg = RegInfo->getBaseRegister(); |
| 26060 | else { |
| 26061 | bool CantUseFP = RegInfo->hasStackRealignment(MF); |
| 26062 | if (CantUseFP) |
| 26063 | Reg = RegInfo->getPtrSizedStackRegister(MF); |
| 26064 | else |
| 26065 | Reg = RegInfo->getPtrSizedFrameRegister(MF); |
| 26066 | } |
| 26067 | return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT); |
| 26068 | } |
| 26069 | case Intrinsic::swift_async_context_addr: { |
| 26070 | auto &MF = DAG.getMachineFunction(); |
| 26071 | auto X86FI = MF.getInfo<X86MachineFunctionInfo>(); |
| 26072 | if (Subtarget.is64Bit()) { |
| 26073 | MF.getFrameInfo().setFrameAddressIsTaken(true); |
| 26074 | X86FI->setHasSwiftAsyncContext(true); |
| 26075 | return SDValue( |
| 26076 | DAG.getMachineNode( |
| 26077 | X86::SUB64ri8, dl, MVT::i64, |
| 26078 | DAG.getCopyFromReg(DAG.getEntryNode(), dl, X86::RBP, MVT::i64), |
| 26079 | DAG.getTargetConstant(8, dl, MVT::i32)), |
| 26080 | 0); |
| 26081 | } else { |
| 26082 | |
| 26083 | |
| 26084 | if (!X86FI->getSwiftAsyncContextFrameIdx()) |
| 26085 | X86FI->setSwiftAsyncContextFrameIdx( |
| 26086 | MF.getFrameInfo().CreateStackObject(4, Align(4), false)); |
| 26087 | return DAG.getFrameIndex(*X86FI->getSwiftAsyncContextFrameIdx(), MVT::i32); |
| 26088 | } |
| 26089 | } |
| 26090 | case Intrinsic::x86_avx512_vp2intersect_q_512: |
| 26091 | case Intrinsic::x86_avx512_vp2intersect_q_256: |
| 26092 | case Intrinsic::x86_avx512_vp2intersect_q_128: |
| 26093 | case Intrinsic::x86_avx512_vp2intersect_d_512: |
| 26094 | case Intrinsic::x86_avx512_vp2intersect_d_256: |
| 26095 | case Intrinsic::x86_avx512_vp2intersect_d_128: { |
| 26096 | MVT MaskVT = Op.getSimpleValueType(); |
| 26097 | |
| 26098 | SDVTList VTs = DAG.getVTList(MVT::Untyped, MVT::Other); |
| 26099 | SDLoc DL(Op); |
| 26100 | |
| 26101 | SDValue Operation = |
| 26102 | DAG.getNode(X86ISD::VP2INTERSECT, DL, VTs, |
| 26103 | Op->getOperand(1), Op->getOperand(2)); |
| 26104 | |
| 26105 | SDValue Result0 = DAG.getTargetExtractSubreg(X86::sub_mask_0, DL, |
| 26106 | MaskVT, Operation); |
| 26107 | SDValue Result1 = DAG.getTargetExtractSubreg(X86::sub_mask_1, DL, |
| 26108 | MaskVT, Operation); |
| 26109 | return DAG.getMergeValues({Result0, Result1}, DL); |
| 26110 | } |
| 26111 | case Intrinsic::x86_mmx_pslli_w: |
| 26112 | case Intrinsic::x86_mmx_pslli_d: |
| 26113 | case Intrinsic::x86_mmx_pslli_q: |
| 26114 | case Intrinsic::x86_mmx_psrli_w: |
| 26115 | case Intrinsic::x86_mmx_psrli_d: |
| 26116 | case Intrinsic::x86_mmx_psrli_q: |
| 26117 | case Intrinsic::x86_mmx_psrai_w: |
| 26118 | case Intrinsic::x86_mmx_psrai_d: { |
| 26119 | SDLoc DL(Op); |
| 26120 | SDValue ShAmt = Op.getOperand(2); |
| 26121 | |
| 26122 | if (auto *C = dyn_cast<ConstantSDNode>(ShAmt)) { |
| 26123 | |
| 26124 | |
| 26125 | unsigned ShiftAmount = C->getAPIntValue().getLimitedValue(255); |
| 26126 | if (ShiftAmount == 0) |
| 26127 | return Op.getOperand(1); |
| 26128 | |
| 26129 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
| 26130 | Op.getOperand(0), Op.getOperand(1), |
| 26131 | DAG.getTargetConstant(ShiftAmount, DL, MVT::i32)); |
| 26132 | } |
| 26133 | |
| 26134 | unsigned NewIntrinsic; |
| 26135 | switch (IntNo) { |
| 26136 | default: llvm_unreachable("Impossible intrinsic"); |
| 26137 | case Intrinsic::x86_mmx_pslli_w: |
| 26138 | NewIntrinsic = Intrinsic::x86_mmx_psll_w; |
| 26139 | break; |
| 26140 | case Intrinsic::x86_mmx_pslli_d: |
| 26141 | NewIntrinsic = Intrinsic::x86_mmx_psll_d; |
| 26142 | break; |
| 26143 | case Intrinsic::x86_mmx_pslli_q: |
| 26144 | NewIntrinsic = Intrinsic::x86_mmx_psll_q; |
| 26145 | break; |
| 26146 | case Intrinsic::x86_mmx_psrli_w: |
| 26147 | NewIntrinsic = Intrinsic::x86_mmx_psrl_w; |
| 26148 | break; |
| 26149 | case Intrinsic::x86_mmx_psrli_d: |
| 26150 | NewIntrinsic = Intrinsic::x86_mmx_psrl_d; |
| 26151 | break; |
| 26152 | case Intrinsic::x86_mmx_psrli_q: |
| 26153 | NewIntrinsic = Intrinsic::x86_mmx_psrl_q; |
| 26154 | break; |
| 26155 | case Intrinsic::x86_mmx_psrai_w: |
| 26156 | NewIntrinsic = Intrinsic::x86_mmx_psra_w; |
| 26157 | break; |
| 26158 | case Intrinsic::x86_mmx_psrai_d: |
| 26159 | NewIntrinsic = Intrinsic::x86_mmx_psra_d; |
| 26160 | break; |
| 26161 | } |
| 26162 | |
| 26163 | |
| 26164 | |
| 26165 | |
| 26166 | ShAmt = DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, ShAmt); |
| 26167 | return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, Op.getValueType(), |
| 26168 | DAG.getTargetConstant(NewIntrinsic, DL, |
| 26169 | getPointerTy(DAG.getDataLayout())), |
| 26170 | Op.getOperand(1), ShAmt); |
| 26171 | } |
| 26172 | } |
| 26173 | } |
| 26174 | |
| 26175 | static SDValue getAVX2GatherNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
| 26176 | SDValue Src, SDValue Mask, SDValue Base, |
| 26177 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
| 26178 | const X86Subtarget &Subtarget) { |
| 26179 | SDLoc dl(Op); |
| 26180 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
| 26181 | |
| 26182 | if (!C) |
| 26183 | return SDValue(); |
| 26184 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 26185 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
| 26186 | TLI.getPointerTy(DAG.getDataLayout())); |
| 26187 | EVT MaskVT = Mask.getValueType().changeVectorElementTypeToInteger(); |
| 26188 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); |
| 26189 | |
| 26190 | |
| 26191 | |
| 26192 | if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) |
| 26193 | Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); |
| 26194 | |
| 26195 | |
| 26196 | Mask = DAG.getBitcast(MaskVT, Mask); |
| 26197 | |
| 26198 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 26199 | |
| 26200 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; |
| 26201 | SDValue Res = |
| 26202 | DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops, |
| 26203 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
| 26204 | return DAG.getMergeValues({Res, Res.getValue(1)}, dl); |
| 26205 | } |
| 26206 | |
| 26207 | static SDValue getGatherNode(SDValue Op, SelectionDAG &DAG, |
| 26208 | SDValue Src, SDValue Mask, SDValue Base, |
| 26209 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
| 26210 | const X86Subtarget &Subtarget) { |
| 26211 | MVT VT = Op.getSimpleValueType(); |
| 26212 | SDLoc dl(Op); |
| 26213 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
| 26214 | |
| 26215 | if (!C) |
| 26216 | return SDValue(); |
| 26217 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 26218 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
| 26219 | TLI.getPointerTy(DAG.getDataLayout())); |
| 26220 | unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), |
| 26221 | VT.getVectorNumElements()); |
| 26222 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); |
| 26223 | |
| 26224 | |
| 26225 | |
| 26226 | if (Mask.getValueType() != MaskVT) |
| 26227 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 26228 | |
| 26229 | SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Other); |
| 26230 | |
| 26231 | |
| 26232 | |
| 26233 | if (Src.isUndef() || ISD::isBuildVectorAllOnes(Mask.getNode())) |
| 26234 | Src = getZeroVector(Op.getSimpleValueType(), Subtarget, DAG, dl); |
| 26235 | |
| 26236 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 26237 | |
| 26238 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale }; |
| 26239 | SDValue Res = |
| 26240 | DAG.getMemIntrinsicNode(X86ISD::MGATHER, dl, VTs, Ops, |
| 26241 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
| 26242 | return DAG.getMergeValues({Res, Res.getValue(1)}, dl); |
| 26243 | } |
| 26244 | |
| 26245 | static SDValue getScatterNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
| 26246 | SDValue Src, SDValue Mask, SDValue Base, |
| 26247 | SDValue Index, SDValue ScaleOp, SDValue Chain, |
| 26248 | const X86Subtarget &Subtarget) { |
| 26249 | SDLoc dl(Op); |
| 26250 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
| 26251 | |
| 26252 | if (!C) |
| 26253 | return SDValue(); |
| 26254 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 26255 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
| 26256 | TLI.getPointerTy(DAG.getDataLayout())); |
| 26257 | unsigned MinElts = std::min(Index.getSimpleValueType().getVectorNumElements(), |
| 26258 | Src.getSimpleValueType().getVectorNumElements()); |
| 26259 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MinElts); |
| 26260 | |
| 26261 | |
| 26262 | |
| 26263 | if (Mask.getValueType() != MaskVT) |
| 26264 | Mask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 26265 | |
| 26266 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 26267 | |
| 26268 | SDVTList VTs = DAG.getVTList(MVT::Other); |
| 26269 | SDValue Ops[] = {Chain, Src, Mask, Base, Index, Scale}; |
| 26270 | SDValue Res = |
| 26271 | DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
| 26272 | MemIntr->getMemoryVT(), MemIntr->getMemOperand()); |
| 26273 | return Res; |
| 26274 | } |
| 26275 | |
| 26276 | static SDValue getPrefetchNode(unsigned Opc, SDValue Op, SelectionDAG &DAG, |
| 26277 | SDValue Mask, SDValue Base, SDValue Index, |
| 26278 | SDValue ScaleOp, SDValue Chain, |
| 26279 | const X86Subtarget &Subtarget) { |
| 26280 | SDLoc dl(Op); |
| 26281 | auto *C = dyn_cast<ConstantSDNode>(ScaleOp); |
| 26282 | |
| 26283 | if (!C) |
| 26284 | return SDValue(); |
| 26285 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 26286 | SDValue Scale = DAG.getTargetConstant(C->getZExtValue(), dl, |
| 26287 | TLI.getPointerTy(DAG.getDataLayout())); |
| 26288 | SDValue Disp = DAG.getTargetConstant(0, dl, MVT::i32); |
| 26289 | SDValue Segment = DAG.getRegister(0, MVT::i32); |
| 26290 | MVT MaskVT = |
| 26291 | MVT::getVectorVT(MVT::i1, Index.getSimpleValueType().getVectorNumElements()); |
| 26292 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 26293 | SDValue Ops[] = {VMask, Base, Scale, Index, Disp, Segment, Chain}; |
| 26294 | SDNode *Res = DAG.getMachineNode(Opc, dl, MVT::Other, Ops); |
| 26295 | return SDValue(Res, 0); |
| 26296 | } |
| 26297 | |
| 26298 | |
| 26299 | |
| 26300 | |
| 26301 | |
| 26302 | |
| 26303 | |
| 26304 | |
| 26305 | |
| 26306 | static SDValue expandIntrinsicWChainHelper(SDNode *N, const SDLoc &DL, |
| 26307 | SelectionDAG &DAG, |
| 26308 | unsigned TargetOpcode, |
| 26309 | unsigned SrcReg, |
| 26310 | const X86Subtarget &Subtarget, |
| 26311 | SmallVectorImpl<SDValue> &Results) { |
| 26312 | SDValue Chain = N->getOperand(0); |
| 26313 | SDValue Glue; |
| 26314 | |
| 26315 | if (SrcReg) { |
| 26316 | assert(N->getNumOperands() == 3 && "Unexpected number of operands!"); |
| 26317 | Chain = DAG.getCopyToReg(Chain, DL, SrcReg, N->getOperand(2), Glue); |
| 26318 | Glue = Chain.getValue(1); |
| 26319 | } |
| 26320 | |
| 26321 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 26322 | SDValue N1Ops[] = {Chain, Glue}; |
| 26323 | SDNode *N1 = DAG.getMachineNode( |
| 26324 | TargetOpcode, DL, Tys, ArrayRef<SDValue>(N1Ops, Glue.getNode() ? 2 : 1)); |
| 26325 | Chain = SDValue(N1, 0); |
| 26326 | |
| 26327 | |
| 26328 | SDValue LO, HI; |
| 26329 | if (Subtarget.is64Bit()) { |
| 26330 | LO = DAG.getCopyFromReg(Chain, DL, X86::RAX, MVT::i64, SDValue(N1, 1)); |
| 26331 | HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::RDX, MVT::i64, |
| 26332 | LO.getValue(2)); |
| 26333 | } else { |
| 26334 | LO = DAG.getCopyFromReg(Chain, DL, X86::EAX, MVT::i32, SDValue(N1, 1)); |
| 26335 | HI = DAG.getCopyFromReg(LO.getValue(1), DL, X86::EDX, MVT::i32, |
| 26336 | LO.getValue(2)); |
| 26337 | } |
| 26338 | Chain = HI.getValue(1); |
| 26339 | Glue = HI.getValue(2); |
| 26340 | |
| 26341 | if (Subtarget.is64Bit()) { |
| 26342 | |
| 26343 | SDValue Tmp = DAG.getNode(ISD::SHL, DL, MVT::i64, HI, |
| 26344 | DAG.getConstant(32, DL, MVT::i8)); |
| 26345 | Results.push_back(DAG.getNode(ISD::OR, DL, MVT::i64, LO, Tmp)); |
| 26346 | Results.push_back(Chain); |
| 26347 | return Glue; |
| 26348 | } |
| 26349 | |
| 26350 | |
| 26351 | SDValue Ops[] = { LO, HI }; |
| 26352 | SDValue Pair = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Ops); |
| 26353 | Results.push_back(Pair); |
| 26354 | Results.push_back(Chain); |
| 26355 | return Glue; |
| 26356 | } |
| 26357 | |
| 26358 | |
| 26359 | |
| 26360 | |
| 26361 | static void getReadTimeStampCounter(SDNode *N, const SDLoc &DL, unsigned Opcode, |
| 26362 | SelectionDAG &DAG, |
| 26363 | const X86Subtarget &Subtarget, |
| 26364 | SmallVectorImpl<SDValue> &Results) { |
| 26365 | |
| 26366 | |
| 26367 | |
| 26368 | SDValue Glue = expandIntrinsicWChainHelper(N, DL, DAG, Opcode, |
| 26369 | 0, Subtarget, |
| 26370 | Results); |
| 26371 | if (Opcode != X86::RDTSCP) |
| 26372 | return; |
| 26373 | |
| 26374 | SDValue Chain = Results[1]; |
| 26375 | |
| 26376 | |
| 26377 | SDValue ecx = DAG.getCopyFromReg(Chain, DL, X86::ECX, MVT::i32, Glue); |
| 26378 | Results[1] = ecx; |
| 26379 | Results.push_back(ecx.getValue(1)); |
| 26380 | } |
| 26381 | |
| 26382 | static SDValue LowerREADCYCLECOUNTER(SDValue Op, const X86Subtarget &Subtarget, |
| 26383 | SelectionDAG &DAG) { |
| 26384 | SmallVector<SDValue, 3> Results; |
| 26385 | SDLoc DL(Op); |
| 26386 | getReadTimeStampCounter(Op.getNode(), DL, X86::RDTSC, DAG, Subtarget, |
| 26387 | Results); |
| 26388 | return DAG.getMergeValues(Results, DL); |
| 26389 | } |
| 26390 | |
| 26391 | static SDValue MarkEHRegistrationNode(SDValue Op, SelectionDAG &DAG) { |
| 26392 | MachineFunction &MF = DAG.getMachineFunction(); |
| 26393 | SDValue Chain = Op.getOperand(0); |
| 26394 | SDValue RegNode = Op.getOperand(2); |
| 26395 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
| 26396 | if (!EHInfo) |
| 26397 | report_fatal_error("EH registrations only live in functions using WinEH"); |
| 26398 | |
| 26399 | |
| 26400 | auto *FINode = dyn_cast<FrameIndexSDNode>(RegNode); |
| 26401 | if (!FINode) |
| 26402 | report_fatal_error("llvm.x86.seh.ehregnode expects a static alloca"); |
| 26403 | EHInfo->EHRegNodeFrameIndex = FINode->getIndex(); |
| 26404 | |
| 26405 | |
| 26406 | return Chain; |
| 26407 | } |
| 26408 | |
| 26409 | static SDValue MarkEHGuard(SDValue Op, SelectionDAG &DAG) { |
| 26410 | MachineFunction &MF = DAG.getMachineFunction(); |
| 26411 | SDValue Chain = Op.getOperand(0); |
| 26412 | SDValue EHGuard = Op.getOperand(2); |
| 26413 | WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo(); |
| 26414 | if (!EHInfo) |
| 26415 | report_fatal_error("EHGuard only live in functions using WinEH"); |
| 26416 | |
| 26417 | |
| 26418 | auto *FINode = dyn_cast<FrameIndexSDNode>(EHGuard); |
| 26419 | if (!FINode) |
| 26420 | report_fatal_error("llvm.x86.seh.ehguard expects a static alloca"); |
| 26421 | EHInfo->EHGuardFrameIndex = FINode->getIndex(); |
| 26422 | |
| 26423 | |
| 26424 | return Chain; |
| 26425 | } |
| 26426 | |
| 26427 | |
| 26428 | static SDValue |
| 26429 | EmitTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, SDValue Val, |
| 26430 | SDValue Ptr, EVT MemVT, MachineMemOperand *MMO, |
| 26431 | SelectionDAG &DAG) { |
| 26432 | SDVTList VTs = DAG.getVTList(MVT::Other); |
| 26433 | SDValue Undef = DAG.getUNDEF(Ptr.getValueType()); |
| 26434 | SDValue Ops[] = { Chain, Val, Ptr, Undef }; |
| 26435 | unsigned Opc = SignedSat ? X86ISD::VTRUNCSTORES : X86ISD::VTRUNCSTOREUS; |
| 26436 | return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO); |
| 26437 | } |
| 26438 | |
| 26439 | |
| 26440 | static SDValue |
| 26441 | EmitMaskedTruncSStore(bool SignedSat, SDValue Chain, const SDLoc &Dl, |
| 26442 | SDValue Val, SDValue Ptr, SDValue Mask, EVT MemVT, |
| 26443 | MachineMemOperand *MMO, SelectionDAG &DAG) { |
| 26444 | SDVTList VTs = DAG.getVTList(MVT::Other); |
| 26445 | SDValue Ops[] = { Chain, Val, Ptr, Mask }; |
| 26446 | unsigned Opc = SignedSat ? X86ISD::VMTRUNCSTORES : X86ISD::VMTRUNCSTOREUS; |
| 26447 | return DAG.getMemIntrinsicNode(Opc, Dl, VTs, Ops, MemVT, MMO); |
| 26448 | } |
| 26449 | |
| 26450 | static SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, const X86Subtarget &Subtarget, |
| 26451 | SelectionDAG &DAG) { |
| 26452 | unsigned IntNo = Op.getConstantOperandVal(1); |
| 26453 | const IntrinsicData *IntrData = getIntrinsicWithChain(IntNo); |
| 26454 | if (!IntrData) { |
| 26455 | switch (IntNo) { |
| 26456 | case llvm::Intrinsic::x86_seh_ehregnode: |
| 26457 | return MarkEHRegistrationNode(Op, DAG); |
| 26458 | case llvm::Intrinsic::x86_seh_ehguard: |
| 26459 | return MarkEHGuard(Op, DAG); |
| 26460 | case llvm::Intrinsic::x86_rdpkru: { |
| 26461 | SDLoc dl(Op); |
| 26462 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
| 26463 | |
| 26464 | return DAG.getNode(X86ISD::RDPKRU, dl, VTs, Op.getOperand(0), |
| 26465 | DAG.getConstant(0, dl, MVT::i32)); |
| 26466 | } |
| 26467 | case llvm::Intrinsic::x86_wrpkru: { |
| 26468 | SDLoc dl(Op); |
| 26469 | |
| 26470 | |
| 26471 | return DAG.getNode(X86ISD::WRPKRU, dl, MVT::Other, |
| 26472 | Op.getOperand(0), Op.getOperand(2), |
| 26473 | DAG.getConstant(0, dl, MVT::i32), |
| 26474 | DAG.getConstant(0, dl, MVT::i32)); |
| 26475 | } |
| 26476 | case llvm::Intrinsic::x86_flags_read_u32: |
| 26477 | case llvm::Intrinsic::x86_flags_read_u64: |
| 26478 | case llvm::Intrinsic::x86_flags_write_u32: |
| 26479 | case llvm::Intrinsic::x86_flags_write_u64: { |
| 26480 | |
| 26481 | |
| 26482 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| 26483 | MFI.setHasCopyImplyingStackAdjustment(true); |
| 26484 | |
| 26485 | |
| 26486 | return Op; |
| 26487 | } |
| 26488 | case Intrinsic::x86_lwpins32: |
| 26489 | case Intrinsic::x86_lwpins64: |
| 26490 | case Intrinsic::x86_umwait: |
| 26491 | case Intrinsic::x86_tpause: { |
| 26492 | SDLoc dl(Op); |
| 26493 | SDValue Chain = Op->getOperand(0); |
| 26494 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
| 26495 | unsigned Opcode; |
| 26496 | |
| 26497 | switch (IntNo) { |
| 26498 | default: llvm_unreachable("Impossible intrinsic"); |
| 26499 | case Intrinsic::x86_umwait: |
| 26500 | Opcode = X86ISD::UMWAIT; |
| 26501 | break; |
| 26502 | case Intrinsic::x86_tpause: |
| 26503 | Opcode = X86ISD::TPAUSE; |
| 26504 | break; |
| 26505 | case Intrinsic::x86_lwpins32: |
| 26506 | case Intrinsic::x86_lwpins64: |
| 26507 | Opcode = X86ISD::LWPINS; |
| 26508 | break; |
| 26509 | } |
| 26510 | |
| 26511 | SDValue Operation = |
| 26512 | DAG.getNode(Opcode, dl, VTs, Chain, Op->getOperand(2), |
| 26513 | Op->getOperand(3), Op->getOperand(4)); |
| 26514 | SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); |
| 26515 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
| 26516 | Operation.getValue(1)); |
| 26517 | } |
| 26518 | case Intrinsic::x86_enqcmd: |
| 26519 | case Intrinsic::x86_enqcmds: { |
| 26520 | SDLoc dl(Op); |
| 26521 | SDValue Chain = Op.getOperand(0); |
| 26522 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
| 26523 | unsigned Opcode; |
| 26524 | switch (IntNo) { |
| 26525 | default: llvm_unreachable("Impossible intrinsic!"); |
| 26526 | case Intrinsic::x86_enqcmd: |
| 26527 | Opcode = X86ISD::ENQCMD; |
| 26528 | break; |
| 26529 | case Intrinsic::x86_enqcmds: |
| 26530 | Opcode = X86ISD::ENQCMDS; |
| 26531 | break; |
| 26532 | } |
| 26533 | SDValue Operation = DAG.getNode(Opcode, dl, VTs, Chain, Op.getOperand(2), |
| 26534 | Op.getOperand(3)); |
| 26535 | SDValue SetCC = getSETCC(X86::COND_E, Operation.getValue(0), dl, DAG); |
| 26536 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
| 26537 | Operation.getValue(1)); |
| 26538 | } |
| 26539 | case Intrinsic::x86_aesenc128kl: |
| 26540 | case Intrinsic::x86_aesdec128kl: |
| 26541 | case Intrinsic::x86_aesenc256kl: |
| 26542 | case Intrinsic::x86_aesdec256kl: { |
| 26543 | SDLoc DL(Op); |
| 26544 | SDVTList VTs = DAG.getVTList(MVT::v2i64, MVT::i32, MVT::Other); |
| 26545 | SDValue Chain = Op.getOperand(0); |
| 26546 | unsigned Opcode; |
| 26547 | |
| 26548 | switch (IntNo) { |
| 26549 | default: llvm_unreachable("Impossible intrinsic"); |
| 26550 | case Intrinsic::x86_aesenc128kl: |
| 26551 | Opcode = X86ISD::AESENC128KL; |
| 26552 | break; |
| 26553 | case Intrinsic::x86_aesdec128kl: |
| 26554 | Opcode = X86ISD::AESDEC128KL; |
| 26555 | break; |
| 26556 | case Intrinsic::x86_aesenc256kl: |
| 26557 | Opcode = X86ISD::AESENC256KL; |
| 26558 | break; |
| 26559 | case Intrinsic::x86_aesdec256kl: |
| 26560 | Opcode = X86ISD::AESDEC256KL; |
| 26561 | break; |
| 26562 | } |
| 26563 | |
| 26564 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 26565 | MachineMemOperand *MMO = MemIntr->getMemOperand(); |
| 26566 | EVT MemVT = MemIntr->getMemoryVT(); |
| 26567 | SDValue Operation = DAG.getMemIntrinsicNode( |
| 26568 | Opcode, DL, VTs, {Chain, Op.getOperand(2), Op.getOperand(3)}, MemVT, |
| 26569 | MMO); |
| 26570 | SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(1), DL, DAG); |
| 26571 | |
| 26572 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
| 26573 | {ZF, Operation.getValue(0), Operation.getValue(2)}); |
| 26574 | } |
| 26575 | case Intrinsic::x86_aesencwide128kl: |
| 26576 | case Intrinsic::x86_aesdecwide128kl: |
| 26577 | case Intrinsic::x86_aesencwide256kl: |
| 26578 | case Intrinsic::x86_aesdecwide256kl: { |
| 26579 | SDLoc DL(Op); |
| 26580 | SDVTList VTs = DAG.getVTList( |
| 26581 | {MVT::i32, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::v2i64, |
| 26582 | MVT::v2i64, MVT::v2i64, MVT::v2i64, MVT::Other}); |
| 26583 | SDValue Chain = Op.getOperand(0); |
| 26584 | unsigned Opcode; |
| 26585 | |
| 26586 | switch (IntNo) { |
| 26587 | default: llvm_unreachable("Impossible intrinsic"); |
| 26588 | case Intrinsic::x86_aesencwide128kl: |
| 26589 | Opcode = X86ISD::AESENCWIDE128KL; |
| 26590 | break; |
| 26591 | case Intrinsic::x86_aesdecwide128kl: |
| 26592 | Opcode = X86ISD::AESDECWIDE128KL; |
| 26593 | break; |
| 26594 | case Intrinsic::x86_aesencwide256kl: |
| 26595 | Opcode = X86ISD::AESENCWIDE256KL; |
| 26596 | break; |
| 26597 | case Intrinsic::x86_aesdecwide256kl: |
| 26598 | Opcode = X86ISD::AESDECWIDE256KL; |
| 26599 | break; |
| 26600 | } |
| 26601 | |
| 26602 | MemIntrinsicSDNode *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 26603 | MachineMemOperand *MMO = MemIntr->getMemOperand(); |
| 26604 | EVT MemVT = MemIntr->getMemoryVT(); |
| 26605 | SDValue Operation = DAG.getMemIntrinsicNode( |
| 26606 | Opcode, DL, VTs, |
| 26607 | {Chain, Op.getOperand(2), Op.getOperand(3), Op.getOperand(4), |
| 26608 | Op.getOperand(5), Op.getOperand(6), Op.getOperand(7), |
| 26609 | Op.getOperand(8), Op.getOperand(9), Op.getOperand(10)}, |
| 26610 | MemVT, MMO); |
| 26611 | SDValue ZF = getSETCC(X86::COND_E, Operation.getValue(0), DL, DAG); |
| 26612 | |
| 26613 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
| 26614 | {ZF, Operation.getValue(1), Operation.getValue(2), |
| 26615 | Operation.getValue(3), Operation.getValue(4), |
| 26616 | Operation.getValue(5), Operation.getValue(6), |
| 26617 | Operation.getValue(7), Operation.getValue(8), |
| 26618 | Operation.getValue(9)}); |
| 26619 | } |
| 26620 | case Intrinsic::x86_testui: { |
| 26621 | SDLoc dl(Op); |
| 26622 | SDValue Chain = Op.getOperand(0); |
| 26623 | SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other); |
| 26624 | SDValue Operation = DAG.getNode(X86ISD::TESTUI, dl, VTs, Chain); |
| 26625 | SDValue SetCC = getSETCC(X86::COND_B, Operation.getValue(0), dl, DAG); |
| 26626 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), SetCC, |
| 26627 | Operation.getValue(1)); |
| 26628 | } |
| 26629 | } |
| 26630 | return SDValue(); |
| 26631 | } |
| 26632 | |
| 26633 | SDLoc dl(Op); |
| 26634 | switch(IntrData->Type) { |
| 26635 | default: llvm_unreachable("Unknown Intrinsic Type"); |
| 26636 | case RDSEED: |
| 26637 | case RDRAND: { |
| 26638 | |
| 26639 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::i32, MVT::Other); |
| 26640 | SDValue Result = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); |
| 26641 | |
| 26642 | |
| 26643 | |
| 26644 | SDValue Ops[] = {DAG.getZExtOrTrunc(Result, dl, Op->getValueType(1)), |
| 26645 | DAG.getConstant(1, dl, Op->getValueType(1)), |
| 26646 | DAG.getTargetConstant(X86::COND_B, dl, MVT::i8), |
| 26647 | SDValue(Result.getNode(), 1)}; |
| 26648 | SDValue isValid = DAG.getNode(X86ISD::CMOV, dl, Op->getValueType(1), Ops); |
| 26649 | |
| 26650 | |
| 26651 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), Result, isValid, |
| 26652 | SDValue(Result.getNode(), 2)); |
| 26653 | } |
| 26654 | case GATHER_AVX2: { |
| 26655 | SDValue Chain = Op.getOperand(0); |
| 26656 | SDValue Src = Op.getOperand(2); |
| 26657 | SDValue Base = Op.getOperand(3); |
| 26658 | SDValue Index = Op.getOperand(4); |
| 26659 | SDValue Mask = Op.getOperand(5); |
| 26660 | SDValue Scale = Op.getOperand(6); |
| 26661 | return getAVX2GatherNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, |
| 26662 | Scale, Chain, Subtarget); |
| 26663 | } |
| 26664 | case GATHER: { |
| 26665 | |
| 26666 | SDValue Chain = Op.getOperand(0); |
| 26667 | SDValue Src = Op.getOperand(2); |
| 26668 | SDValue Base = Op.getOperand(3); |
| 26669 | SDValue Index = Op.getOperand(4); |
| 26670 | SDValue Mask = Op.getOperand(5); |
| 26671 | SDValue Scale = Op.getOperand(6); |
| 26672 | return getGatherNode(Op, DAG, Src, Mask, Base, Index, Scale, |
| 26673 | Chain, Subtarget); |
| 26674 | } |
| 26675 | case SCATTER: { |
| 26676 | |
| 26677 | SDValue Chain = Op.getOperand(0); |
| 26678 | SDValue Base = Op.getOperand(2); |
| 26679 | SDValue Mask = Op.getOperand(3); |
| 26680 | SDValue Index = Op.getOperand(4); |
| 26681 | SDValue Src = Op.getOperand(5); |
| 26682 | SDValue Scale = Op.getOperand(6); |
| 26683 | return getScatterNode(IntrData->Opc0, Op, DAG, Src, Mask, Base, Index, |
| 26684 | Scale, Chain, Subtarget); |
| 26685 | } |
| 26686 | case PREFETCH: { |
| 26687 | const APInt &HintVal = Op.getConstantOperandAPInt(6); |
| 26688 | assert((HintVal == 2 || HintVal == 3) && |
| 26689 | "Wrong prefetch hint in intrinsic: should be 2 or 3"); |
| 26690 | unsigned Opcode = (HintVal == 2 ? IntrData->Opc1 : IntrData->Opc0); |
| 26691 | SDValue Chain = Op.getOperand(0); |
| 26692 | SDValue Mask = Op.getOperand(2); |
| 26693 | SDValue Index = Op.getOperand(3); |
| 26694 | SDValue Base = Op.getOperand(4); |
| 26695 | SDValue Scale = Op.getOperand(5); |
| 26696 | return getPrefetchNode(Opcode, Op, DAG, Mask, Base, Index, Scale, Chain, |
| 26697 | Subtarget); |
| 26698 | } |
| 26699 | |
| 26700 | case RDTSC: { |
| 26701 | SmallVector<SDValue, 2> Results; |
| 26702 | getReadTimeStampCounter(Op.getNode(), dl, IntrData->Opc0, DAG, Subtarget, |
| 26703 | Results); |
| 26704 | return DAG.getMergeValues(Results, dl); |
| 26705 | } |
| 26706 | |
| 26707 | case RDPMC: |
| 26708 | |
| 26709 | case XGETBV: { |
| 26710 | SmallVector<SDValue, 2> Results; |
| 26711 | |
| 26712 | |
| 26713 | |
| 26714 | |
| 26715 | expandIntrinsicWChainHelper(Op.getNode(), dl, DAG, IntrData->Opc0, X86::ECX, |
| 26716 | Subtarget, Results); |
| 26717 | return DAG.getMergeValues(Results, dl); |
| 26718 | } |
| 26719 | |
| 26720 | case XTEST: { |
| 26721 | SDVTList VTs = DAG.getVTList(Op->getValueType(0), MVT::Other); |
| 26722 | SDValue InTrans = DAG.getNode(IntrData->Opc0, dl, VTs, Op.getOperand(0)); |
| 26723 | |
| 26724 | SDValue SetCC = getSETCC(X86::COND_NE, InTrans, dl, DAG); |
| 26725 | SDValue Ret = DAG.getNode(ISD::ZERO_EXTEND, dl, Op->getValueType(0), SetCC); |
| 26726 | return DAG.getNode(ISD::MERGE_VALUES, dl, Op->getVTList(), |
| 26727 | Ret, SDValue(InTrans.getNode(), 1)); |
| 26728 | } |
| 26729 | case TRUNCATE_TO_MEM_VI8: |
| 26730 | case TRUNCATE_TO_MEM_VI16: |
| 26731 | case TRUNCATE_TO_MEM_VI32: { |
| 26732 | SDValue Mask = Op.getOperand(4); |
| 26733 | SDValue DataToTruncate = Op.getOperand(3); |
| 26734 | SDValue Addr = Op.getOperand(2); |
| 26735 | SDValue Chain = Op.getOperand(0); |
| 26736 | |
| 26737 | MemIntrinsicSDNode *MemIntr = dyn_cast<MemIntrinsicSDNode>(Op); |
| 26738 | assert(MemIntr && "Expected MemIntrinsicSDNode!"); |
| 26739 | |
| 26740 | EVT MemVT = MemIntr->getMemoryVT(); |
| 26741 | |
| 26742 | uint16_t TruncationOp = IntrData->Opc0; |
| 26743 | switch (TruncationOp) { |
| 26744 | case X86ISD::VTRUNC: { |
| 26745 | if (isAllOnesConstant(Mask)) |
| 26746 | return DAG.getTruncStore(Chain, dl, DataToTruncate, Addr, MemVT, |
| 26747 | MemIntr->getMemOperand()); |
| 26748 | |
| 26749 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); |
| 26750 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 26751 | SDValue Offset = DAG.getUNDEF(VMask.getValueType()); |
| 26752 | |
| 26753 | return DAG.getMaskedStore(Chain, dl, DataToTruncate, Addr, Offset, VMask, |
| 26754 | MemVT, MemIntr->getMemOperand(), ISD::UNINDEXED, |
| 26755 | true ); |
| 26756 | } |
| 26757 | case X86ISD::VTRUNCUS: |
| 26758 | case X86ISD::VTRUNCS: { |
| 26759 | bool IsSigned = (TruncationOp == X86ISD::VTRUNCS); |
| 26760 | if (isAllOnesConstant(Mask)) |
| 26761 | return EmitTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, MemVT, |
| 26762 | MemIntr->getMemOperand(), DAG); |
| 26763 | |
| 26764 | MVT MaskVT = MVT::getVectorVT(MVT::i1, MemVT.getVectorNumElements()); |
| 26765 | SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); |
| 26766 | |
| 26767 | return EmitMaskedTruncSStore(IsSigned, Chain, dl, DataToTruncate, Addr, |
| 26768 | VMask, MemVT, MemIntr->getMemOperand(), DAG); |
| 26769 | } |
| 26770 | default: |
| 26771 | llvm_unreachable("Unsupported truncstore intrinsic"); |
| 26772 | } |
| 26773 | } |
| 26774 | } |
| 26775 | } |
| 26776 | |
| 26777 | SDValue X86TargetLowering::LowerRETURNADDR(SDValue Op, |
| 26778 | SelectionDAG &DAG) const { |
| 26779 | MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo(); |
| 26780 | MFI.setReturnAddressIsTaken(true); |
| 26781 | |
| 26782 | if (verifyReturnAddressArgumentIsConstant(Op, DAG)) |
| 26783 | return SDValue(); |
| 26784 | |
| 26785 | unsigned Depth = Op.getConstantOperandVal(0); |
| 26786 | SDLoc dl(Op); |
| 26787 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
| 26788 | |
| 26789 | if (Depth > 0) { |
| 26790 | SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); |
| 26791 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 26792 | SDValue Offset = DAG.getConstant(RegInfo->getSlotSize(), dl, PtrVT); |
| 26793 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), |
| 26794 | DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset), |
| 26795 | MachinePointerInfo()); |
| 26796 | } |
| 26797 | |
| 26798 | |
| 26799 | SDValue RetAddrFI = getReturnAddressFrameIndex(DAG); |
| 26800 | return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI, |
| 26801 | MachinePointerInfo()); |
| 26802 | } |
| 26803 | |
| 26804 | SDValue X86TargetLowering::LowerADDROFRETURNADDR(SDValue Op, |
| 26805 | SelectionDAG &DAG) const { |
| 26806 | DAG.getMachineFunction().getFrameInfo().setReturnAddressIsTaken(true); |
| 26807 | return getReturnAddressFrameIndex(DAG); |
| 26808 | } |
| 26809 | |
| 26810 | SDValue X86TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { |
| 26811 | MachineFunction &MF = DAG.getMachineFunction(); |
| 26812 | MachineFrameInfo &MFI = MF.getFrameInfo(); |
| 26813 | X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>(); |
| 26814 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 26815 | EVT VT = Op.getValueType(); |
| 26816 | |
| 26817 | MFI.setFrameAddressIsTaken(true); |
| 26818 | |
| 26819 | if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI()) { |
| 26820 | |
| 26821 | |
| 26822 | |
| 26823 | int FrameAddrIndex = FuncInfo->getFAIndex(); |
| 26824 | if (!FrameAddrIndex) { |
| 26825 | |
| 26826 | unsigned SlotSize = RegInfo->getSlotSize(); |
| 26827 | FrameAddrIndex = MF.getFrameInfo().CreateFixedObject( |
| 26828 | SlotSize, 0, false); |
| 26829 | FuncInfo->setFAIndex(FrameAddrIndex); |
| 26830 | } |
| 26831 | return DAG.getFrameIndex(FrameAddrIndex, VT); |
| 26832 | } |
| 26833 | |
| 26834 | unsigned FrameReg = |
| 26835 | RegInfo->getPtrSizedFrameRegister(DAG.getMachineFunction()); |
| 26836 | SDLoc dl(Op); |
| 26837 | unsigned Depth = Op.getConstantOperandVal(0); |
| 26838 | assert(((FrameReg == X86::RBP && VT == MVT::i64) || |
| 26839 | (FrameReg == X86::EBP && VT == MVT::i32)) && |
| 26840 | "Invalid Frame Register!"); |
| 26841 | SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); |
| 26842 | while (Depth--) |
| 26843 | FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, |
| 26844 | MachinePointerInfo()); |
| 26845 | return FrameAddr; |
| 26846 | } |
| 26847 | |
| 26848 | |
| 26849 | |
| 26850 | Register X86TargetLowering::getRegisterByName(const char* RegName, LLT VT, |
| 26851 | const MachineFunction &MF) const { |
| 26852 | const TargetFrameLowering &TFI = *Subtarget.getFrameLowering(); |
| 26853 | |
| 26854 | Register Reg = StringSwitch<unsigned>(RegName) |
| 26855 | .Case("esp", X86::ESP) |
| 26856 | .Case("rsp", X86::RSP) |
| 26857 | .Case("ebp", X86::EBP) |
| 26858 | .Case("rbp", X86::RBP) |
| 26859 | .Default(0); |
| 26860 | |
| 26861 | if (Reg == X86::EBP || Reg == X86::RBP) { |
| 26862 | if (!TFI.hasFP(MF)) |
| 26863 | report_fatal_error("register " + StringRef(RegName) + |
| 26864 | " is allocatable: function has no frame pointer"); |
| 26865 | #ifndef NDEBUG |
| 26866 | else { |
| 26867 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 26868 | Register FrameReg = RegInfo->getPtrSizedFrameRegister(MF); |
| 26869 | assert((FrameReg == X86::EBP || FrameReg == X86::RBP) && |
| 26870 | "Invalid Frame Register!"); |
| 26871 | } |
| 26872 | #endif |
| 26873 | } |
| 26874 | |
| 26875 | if (Reg) |
| 26876 | return Reg; |
| 26877 | |
| 26878 | report_fatal_error("Invalid register name global variable"); |
| 26879 | } |
| 26880 | |
| 26881 | SDValue X86TargetLowering::LowerFRAME_TO_ARGS_OFFSET(SDValue Op, |
| 26882 | SelectionDAG &DAG) const { |
| 26883 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 26884 | return DAG.getIntPtrConstant(2 * RegInfo->getSlotSize(), SDLoc(Op)); |
| 26885 | } |
| 26886 | |
| 26887 | Register X86TargetLowering::getExceptionPointerRegister( |
| 26888 | const Constant *PersonalityFn) const { |
| 26889 | if (classifyEHPersonality(PersonalityFn) == EHPersonality::CoreCLR) |
| 26890 | return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; |
| 26891 | |
| 26892 | return Subtarget.isTarget64BitLP64() ? X86::RAX : X86::EAX; |
| 26893 | } |
| 26894 | |
| 26895 | Register X86TargetLowering::getExceptionSelectorRegister( |
| 26896 | const Constant *PersonalityFn) const { |
| 26897 | |
| 26898 | if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn))) |
| 26899 | return X86::NoRegister; |
| 26900 | return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX; |
| 26901 | } |
| 26902 | |
| 26903 | bool X86TargetLowering::needsFixedCatchObjects() const { |
| 26904 | return Subtarget.isTargetWin64(); |
| 26905 | } |
| 26906 | |
| 26907 | SDValue X86TargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const { |
| 26908 | SDValue Chain = Op.getOperand(0); |
| 26909 | SDValue Offset = Op.getOperand(1); |
| 26910 | SDValue Handler = Op.getOperand(2); |
| 26911 | SDLoc dl (Op); |
| 26912 | |
| 26913 | EVT PtrVT = getPointerTy(DAG.getDataLayout()); |
| 26914 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 26915 | Register FrameReg = RegInfo->getFrameRegister(DAG.getMachineFunction()); |
| 26916 | assert(((FrameReg == X86::RBP && PtrVT == MVT::i64) || |
| 26917 | (FrameReg == X86::EBP && PtrVT == MVT::i32)) && |
| 26918 | "Invalid Frame Register!"); |
| 26919 | SDValue Frame = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, PtrVT); |
| 26920 | Register StoreAddrReg = (PtrVT == MVT::i64) ? X86::RCX : X86::ECX; |
| 26921 | |
| 26922 | SDValue StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, Frame, |
| 26923 | DAG.getIntPtrConstant(RegInfo->getSlotSize(), |
| 26924 | dl)); |
| 26925 | StoreAddr = DAG.getNode(ISD::ADD, dl, PtrVT, StoreAddr, Offset); |
| 26926 | Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo()); |
| 26927 | Chain = DAG.getCopyToReg(Chain, dl, StoreAddrReg, StoreAddr); |
| 26928 | |
| 26929 | return DAG.getNode(X86ISD::EH_RETURN, dl, MVT::Other, Chain, |
| 26930 | DAG.getRegister(StoreAddrReg, PtrVT)); |
| 26931 | } |
| 26932 | |
| 26933 | SDValue X86TargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op, |
| 26934 | SelectionDAG &DAG) const { |
| 26935 | SDLoc DL(Op); |
| 26936 | |
| 26937 | |
| 26938 | |
| 26939 | |
| 26940 | |
| 26941 | |
| 26942 | if (!Subtarget.is64Bit()) { |
| 26943 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
| 26944 | (void)TII->getGlobalBaseReg(&DAG.getMachineFunction()); |
| 26945 | } |
| 26946 | return DAG.getNode(X86ISD::EH_SJLJ_SETJMP, DL, |
| 26947 | DAG.getVTList(MVT::i32, MVT::Other), |
| 26948 | Op.getOperand(0), Op.getOperand(1)); |
| 26949 | } |
| 26950 | |
| 26951 | SDValue X86TargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op, |
| 26952 | SelectionDAG &DAG) const { |
| 26953 | SDLoc DL(Op); |
| 26954 | return DAG.getNode(X86ISD::EH_SJLJ_LONGJMP, DL, MVT::Other, |
| 26955 | Op.getOperand(0), Op.getOperand(1)); |
| 26956 | } |
| 26957 | |
| 26958 | SDValue X86TargetLowering::lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, |
| 26959 | SelectionDAG &DAG) const { |
| 26960 | SDLoc DL(Op); |
| 26961 | return DAG.getNode(X86ISD::EH_SJLJ_SETUP_DISPATCH, DL, MVT::Other, |
| 26962 | Op.getOperand(0)); |
| 26963 | } |
| 26964 | |
| 26965 | static SDValue LowerADJUST_TRAMPOLINE(SDValue Op, SelectionDAG &DAG) { |
| 26966 | return Op.getOperand(0); |
| 26967 | } |
| 26968 | |
| 26969 | SDValue X86TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op, |
| 26970 | SelectionDAG &DAG) const { |
| 26971 | SDValue Root = Op.getOperand(0); |
| 26972 | SDValue Trmp = Op.getOperand(1); |
| 26973 | SDValue FPtr = Op.getOperand(2); |
| 26974 | SDValue Nest = Op.getOperand(3); |
| 26975 | SDLoc dl (Op); |
| 26976 | |
| 26977 | const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue(); |
| 26978 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 26979 | |
| 26980 | if (Subtarget.is64Bit()) { |
| 26981 | SDValue OutChains[6]; |
| 26982 | |
| 26983 | |
| 26984 | const unsigned char JMP64r = 0xFF; |
| 26985 | const unsigned char MOV64ri = 0xB8; |
| 26986 | |
| 26987 | const unsigned char N86R10 = TRI->getEncodingValue(X86::R10) & 0x7; |
| 26988 | const unsigned char N86R11 = TRI->getEncodingValue(X86::R11) & 0x7; |
| 26989 | |
| 26990 | const unsigned char REX_WB = 0x40 | 0x08 | 0x01; |
| 26991 | |
| 26992 | |
| 26993 | unsigned OpCode = ((MOV64ri | N86R11) << 8) | REX_WB; |
| 26994 | SDValue Addr = Trmp; |
| 26995 | OutChains[0] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
| 26996 | Addr, MachinePointerInfo(TrmpAddr)); |
| 26997 | |
| 26998 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
| 26999 | DAG.getConstant(2, dl, MVT::i64)); |
| 27000 | OutChains[1] = DAG.getStore(Root, dl, FPtr, Addr, |
| 27001 | MachinePointerInfo(TrmpAddr, 2), Align(2)); |
| 27002 | |
| 27003 | |
| 27004 | |
| 27005 | OpCode = ((MOV64ri | N86R10) << 8) | REX_WB; |
| 27006 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
| 27007 | DAG.getConstant(10, dl, MVT::i64)); |
| 27008 | OutChains[2] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
| 27009 | Addr, MachinePointerInfo(TrmpAddr, 10)); |
| 27010 | |
| 27011 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
| 27012 | DAG.getConstant(12, dl, MVT::i64)); |
| 27013 | OutChains[3] = DAG.getStore(Root, dl, Nest, Addr, |
| 27014 | MachinePointerInfo(TrmpAddr, 12), Align(2)); |
| 27015 | |
| 27016 | |
| 27017 | OpCode = (JMP64r << 8) | REX_WB; |
| 27018 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
| 27019 | DAG.getConstant(20, dl, MVT::i64)); |
| 27020 | OutChains[4] = DAG.getStore(Root, dl, DAG.getConstant(OpCode, dl, MVT::i16), |
| 27021 | Addr, MachinePointerInfo(TrmpAddr, 20)); |
| 27022 | |
| 27023 | unsigned char ModRM = N86R11 | (4 << 3) | (3 << 6); |
| 27024 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp, |
| 27025 | DAG.getConstant(22, dl, MVT::i64)); |
| 27026 | OutChains[5] = DAG.getStore(Root, dl, DAG.getConstant(ModRM, dl, MVT::i8), |
| 27027 | Addr, MachinePointerInfo(TrmpAddr, 22)); |
| 27028 | |
| 27029 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
| 27030 | } else { |
| 27031 | const Function *Func = |
| 27032 | cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue()); |
| 27033 | CallingConv::ID CC = Func->getCallingConv(); |
| 27034 | unsigned NestReg; |
| 27035 | |
| 27036 | switch (CC) { |
| 27037 | default: |
| 27038 | llvm_unreachable("Unsupported calling convention"); |
| 27039 | case CallingConv::C: |
| 27040 | case CallingConv::X86_StdCall: { |
| 27041 | |
| 27042 | |
| 27043 | NestReg = X86::ECX; |
| 27044 | |
| 27045 | |
| 27046 | FunctionType *FTy = Func->getFunctionType(); |
| 27047 | const AttributeList &Attrs = Func->getAttributes(); |
| 27048 | |
| 27049 | if (!Attrs.isEmpty() && !Func->isVarArg()) { |
| 27050 | unsigned InRegCount = 0; |
| 27051 | unsigned Idx = 1; |
| 27052 | |
| 27053 | for (FunctionType::param_iterator I = FTy->param_begin(), |
| 27054 | E = FTy->param_end(); I != E; ++I, ++Idx) |
| 27055 | if (Attrs.hasAttribute(Idx, Attribute::InReg)) { |
| 27056 | const DataLayout &DL = DAG.getDataLayout(); |
| 27057 | |
| 27058 | InRegCount += (DL.getTypeSizeInBits(*I) + 31) / 32; |
| 27059 | } |
| 27060 | |
| 27061 | if (InRegCount > 2) { |
| 27062 | report_fatal_error("Nest register in use - reduce number of inreg" |
| 27063 | " parameters!"); |
| 27064 | } |
| 27065 | } |
| 27066 | break; |
| 27067 | } |
| 27068 | case CallingConv::X86_FastCall: |
| 27069 | case CallingConv::X86_ThisCall: |
| 27070 | case CallingConv::Fast: |
| 27071 | case CallingConv::Tail: |
| 27072 | case CallingConv::SwiftTail: |
| 27073 | |
| 27074 | |
| 27075 | NestReg = X86::EAX; |
| 27076 | break; |
| 27077 | } |
| 27078 | |
| 27079 | SDValue OutChains[4]; |
| 27080 | SDValue Addr, Disp; |
| 27081 | |
| 27082 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
| 27083 | DAG.getConstant(10, dl, MVT::i32)); |
| 27084 | Disp = DAG.getNode(ISD::SUB, dl, MVT::i32, FPtr, Addr); |
| 27085 | |
| 27086 | |
| 27087 | const unsigned char MOV32ri = 0xB8; |
| 27088 | const unsigned char N86Reg = TRI->getEncodingValue(NestReg) & 0x7; |
| 27089 | OutChains[0] = |
| 27090 | DAG.getStore(Root, dl, DAG.getConstant(MOV32ri | N86Reg, dl, MVT::i8), |
| 27091 | Trmp, MachinePointerInfo(TrmpAddr)); |
| 27092 | |
| 27093 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
| 27094 | DAG.getConstant(1, dl, MVT::i32)); |
| 27095 | OutChains[1] = DAG.getStore(Root, dl, Nest, Addr, |
| 27096 | MachinePointerInfo(TrmpAddr, 1), Align(1)); |
| 27097 | |
| 27098 | const unsigned char JMP = 0xE9; |
| 27099 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
| 27100 | DAG.getConstant(5, dl, MVT::i32)); |
| 27101 | OutChains[2] = |
| 27102 | DAG.getStore(Root, dl, DAG.getConstant(JMP, dl, MVT::i8), Addr, |
| 27103 | MachinePointerInfo(TrmpAddr, 5), Align(1)); |
| 27104 | |
| 27105 | Addr = DAG.getNode(ISD::ADD, dl, MVT::i32, Trmp, |
| 27106 | DAG.getConstant(6, dl, MVT::i32)); |
| 27107 | OutChains[3] = DAG.getStore(Root, dl, Disp, Addr, |
| 27108 | MachinePointerInfo(TrmpAddr, 6), Align(1)); |
| 27109 | |
| 27110 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains); |
| 27111 | } |
| 27112 | } |
| 27113 | |
| 27114 | SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, |
| 27115 | SelectionDAG &DAG) const { |
| 27116 | |
| 27117 | |
| 27118 | |
| 27119 | |
| 27120 | |
| 27121 | |
| 27122 | |
| 27123 | |
| 27124 | |
| 27125 | |
| 27126 | |
| 27127 | |
| 27128 | |
| 27129 | |
| 27130 | |
| 27131 | |
| 27132 | |
| 27133 | |
| 27134 | |
| 27135 | |
| 27136 | |
| 27137 | |
| 27138 | MachineFunction &MF = DAG.getMachineFunction(); |
| 27139 | MVT VT = Op.getSimpleValueType(); |
| 27140 | SDLoc DL(Op); |
| 27141 | |
| 27142 | |
| 27143 | int SSFI = MF.getFrameInfo().CreateStackObject(2, Align(2), false); |
| 27144 | SDValue StackSlot = |
| 27145 | DAG.getFrameIndex(SSFI, getPointerTy(DAG.getDataLayout())); |
| 27146 | |
| 27147 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, SSFI); |
| 27148 | |
| 27149 | SDValue Chain = Op.getOperand(0); |
| 27150 | SDValue Ops[] = {Chain, StackSlot}; |
| 27151 | Chain = DAG.getMemIntrinsicNode(X86ISD::FNSTCW16m, DL, |
| 27152 | DAG.getVTList(MVT::Other), Ops, MVT::i16, MPI, |
| 27153 | Align(2), MachineMemOperand::MOStore); |
| 27154 | |
| 27155 | |
| 27156 | SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI, Align(2)); |
| 27157 | Chain = CWD.getValue(1); |
| 27158 | |
| 27159 | |
| 27160 | SDValue Shift = |
| 27161 | DAG.getNode(ISD::SRL, DL, MVT::i16, |
| 27162 | DAG.getNode(ISD::AND, DL, MVT::i16, |
| 27163 | CWD, DAG.getConstant(0xc00, DL, MVT::i16)), |
| 27164 | DAG.getConstant(9, DL, MVT::i8)); |
| 27165 | Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, Shift); |
| 27166 | |
| 27167 | SDValue LUT = DAG.getConstant(0x2d, DL, MVT::i32); |
| 27168 | SDValue RetVal = |
| 27169 | DAG.getNode(ISD::AND, DL, MVT::i32, |
| 27170 | DAG.getNode(ISD::SRL, DL, MVT::i32, LUT, Shift), |
| 27171 | DAG.getConstant(3, DL, MVT::i32)); |
| 27172 | |
| 27173 | RetVal = DAG.getZExtOrTrunc(RetVal, DL, VT); |
| 27174 | |
| 27175 | return DAG.getMergeValues({RetVal, Chain}, DL); |
| 27176 | } |
| 27177 | |
| 27178 | SDValue X86TargetLowering::LowerSET_ROUNDING(SDValue Op, |
| 27179 | SelectionDAG &DAG) const { |
| 27180 | MachineFunction &MF = DAG.getMachineFunction(); |
| 27181 | SDLoc DL(Op); |
| 27182 | SDValue Chain = Op.getNode()->getOperand(0); |
| 27183 | |
| 27184 | |
| 27185 | |
| 27186 | int OldCWFrameIdx = MF.getFrameInfo().CreateStackObject(4, Align(4), false); |
| 27187 | SDValue StackSlot = |
| 27188 | DAG.getFrameIndex(OldCWFrameIdx, getPointerTy(DAG.getDataLayout())); |
| 27189 | MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, OldCWFrameIdx); |
| 27190 | MachineMemOperand *MMO = |
| 27191 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 2, Align(2)); |
| 27192 | |
| 27193 | |
| 27194 | SDValue Ops[] = {Chain, StackSlot}; |
| 27195 | Chain = DAG.getMemIntrinsicNode( |
| 27196 | X86ISD::FNSTCW16m, DL, DAG.getVTList(MVT::Other), Ops, MVT::i16, MMO); |
| 27197 | |
| 27198 | |
| 27199 | SDValue CWD = DAG.getLoad(MVT::i16, DL, Chain, StackSlot, MPI); |
| 27200 | Chain = CWD.getValue(1); |
| 27201 | CWD = DAG.getNode(ISD::AND, DL, MVT::i16, CWD.getValue(0), |
| 27202 | DAG.getConstant(0xf3ff, DL, MVT::i16)); |
| 27203 | |
| 27204 | |
| 27205 | SDValue NewRM = Op.getNode()->getOperand(1); |
| 27206 | SDValue RMBits; |
| 27207 | if (auto *CVal = dyn_cast<ConstantSDNode>(NewRM)) { |
| 27208 | uint64_t RM = CVal->getZExtValue(); |
| 27209 | int FieldVal; |
| 27210 | switch (static_cast<RoundingMode>(RM)) { |
| 27211 | case RoundingMode::NearestTiesToEven: FieldVal = X86::rmToNearest; break; |
| 27212 | case RoundingMode::TowardNegative: FieldVal = X86::rmDownward; break; |
| 27213 | case RoundingMode::TowardPositive: FieldVal = X86::rmUpward; break; |
| 27214 | case RoundingMode::TowardZero: FieldVal = X86::rmTowardZero; break; |
| 27215 | default: |
| 27216 | llvm_unreachable("rounding mode is not supported by X86 hardware"); |
| 27217 | } |
| 27218 | RMBits = DAG.getConstant(FieldVal, DL, MVT::i16); |
| 27219 | } else { |
| 27220 | |
| 27221 | |
| 27222 | |
| 27223 | |
| 27224 | |
| 27225 | |
| 27226 | |
| 27227 | |
| 27228 | |
| 27229 | |
| 27230 | |
| 27231 | |
| 27232 | SDValue ShiftValue = |
| 27233 | DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
| 27234 | DAG.getNode(ISD::ADD, DL, MVT::i32, |
| 27235 | DAG.getNode(ISD::SHL, DL, MVT::i32, NewRM, |
| 27236 | DAG.getConstant(1, DL, MVT::i8)), |
| 27237 | DAG.getConstant(4, DL, MVT::i32))); |
| 27238 | SDValue Shifted = |
| 27239 | DAG.getNode(ISD::SHL, DL, MVT::i16, DAG.getConstant(0xc9, DL, MVT::i16), |
| 27240 | ShiftValue); |
| 27241 | RMBits = DAG.getNode(ISD::AND, DL, MVT::i16, Shifted, |
| 27242 | DAG.getConstant(0xc00, DL, MVT::i16)); |
| 27243 | } |
| 27244 | |
| 27245 | |
| 27246 | CWD = DAG.getNode(ISD::OR, DL, MVT::i16, CWD, RMBits); |
| 27247 | Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, 2); |
| 27248 | |
| 27249 | |
| 27250 | SDValue OpsLD[] = {Chain, StackSlot}; |
| 27251 | MachineMemOperand *MMOL = |
| 27252 | MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 2, Align(2)); |
| 27253 | Chain = DAG.getMemIntrinsicNode( |
| 27254 | X86ISD::FLDCW16m, DL, DAG.getVTList(MVT::Other), OpsLD, MVT::i16, MMOL); |
| 27255 | |
| 27256 | |
| 27257 | |
| 27258 | if (Subtarget.hasSSE1()) { |
| 27259 | |
| 27260 | Chain = DAG.getNode( |
| 27261 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, |
| 27262 | DAG.getTargetConstant(Intrinsic::x86_sse_stmxcsr, DL, MVT::i32), |
| 27263 | StackSlot); |
| 27264 | |
| 27265 | |
| 27266 | SDValue CWD = DAG.getLoad(MVT::i32, DL, Chain, StackSlot, MPI); |
| 27267 | Chain = CWD.getValue(1); |
| 27268 | CWD = DAG.getNode(ISD::AND, DL, MVT::i32, CWD.getValue(0), |
| 27269 | DAG.getConstant(0xffff9fff, DL, MVT::i32)); |
| 27270 | |
| 27271 | |
| 27272 | RMBits = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, RMBits); |
| 27273 | RMBits = DAG.getNode(ISD::SHL, DL, MVT::i32, RMBits, |
| 27274 | DAG.getConstant(3, DL, MVT::i8)); |
| 27275 | |
| 27276 | |
| 27277 | CWD = DAG.getNode(ISD::OR, DL, MVT::i32, CWD, RMBits); |
| 27278 | Chain = DAG.getStore(Chain, DL, CWD, StackSlot, MPI, 4); |
| 27279 | |
| 27280 | |
| 27281 | Chain = DAG.getNode( |
| 27282 | ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other), Chain, |
| 27283 | DAG.getTargetConstant(Intrinsic::x86_sse_ldmxcsr, DL, MVT::i32), |
| 27284 | StackSlot); |
| 27285 | } |
| 27286 | |
| 27287 | return Chain; |
| 27288 | } |
| 27289 | |
| 27290 | |
| 27291 | |
| 27292 | |
| 27293 | |
| 27294 | |
| 27295 | |
| 27296 | static SDValue LowerVectorCTLZ_AVX512CDI(SDValue Op, SelectionDAG &DAG, |
| 27297 | const X86Subtarget &Subtarget) { |
| 27298 | assert(Op.getOpcode() == ISD::CTLZ); |
| 27299 | SDLoc dl(Op); |
| 27300 | MVT VT = Op.getSimpleValueType(); |
| 27301 | MVT EltVT = VT.getVectorElementType(); |
| 27302 | unsigned NumElems = VT.getVectorNumElements(); |
| 27303 | |
| 27304 | assert((EltVT == MVT::i8 || EltVT == MVT::i16) && |
| 27305 | "Unsupported element type"); |
| 27306 | |
| 27307 | |
| 27308 | if (NumElems > 16 || |
| 27309 | (NumElems == 16 && !Subtarget.canExtendTo512DQ())) |
| 27310 | return splitVectorIntUnary(Op, DAG); |
| 27311 | |
| 27312 | MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); |
| 27313 | assert((NewVT.is256BitVector() || NewVT.is512BitVector()) && |
| 27314 | "Unsupported value type for operation"); |
| 27315 | |
| 27316 | |
| 27317 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, NewVT, Op.getOperand(0)); |
| 27318 | SDValue CtlzNode = DAG.getNode(ISD::CTLZ, dl, NewVT, Op); |
| 27319 | SDValue TruncNode = DAG.getNode(ISD::TRUNCATE, dl, VT, CtlzNode); |
| 27320 | SDValue Delta = DAG.getConstant(32 - EltVT.getSizeInBits(), dl, VT); |
| 27321 | |
| 27322 | return DAG.getNode(ISD::SUB, dl, VT, TruncNode, Delta); |
| 27323 | } |
| 27324 | |
| 27325 | |
| 27326 | static SDValue LowerVectorCTLZInRegLUT(SDValue Op, const SDLoc &DL, |
| 27327 | const X86Subtarget &Subtarget, |
| 27328 | SelectionDAG &DAG) { |
| 27329 | MVT VT = Op.getSimpleValueType(); |
| 27330 | int NumElts = VT.getVectorNumElements(); |
| 27331 | int NumBytes = NumElts * (VT.getScalarSizeInBits() / 8); |
| 27332 | MVT CurrVT = MVT::getVectorVT(MVT::i8, NumBytes); |
| 27333 | |
| 27334 | |
| 27335 | const int LUT[16] = { 4, 3, 2, 2, |
| 27336 | 1, 1, 1, 1, |
| 27337 | 0, 0, 0, 0, |
| 27338 | 0, 0, 0, 0}; |
| 27339 | |
| 27340 | SmallVector<SDValue, 64> LUTVec; |
| 27341 | for (int i = 0; i < NumBytes; ++i) |
| 27342 | LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); |
| 27343 | SDValue InRegLUT = DAG.getBuildVector(CurrVT, DL, LUTVec); |
| 27344 | |
| 27345 | |
| 27346 | |
| 27347 | |
| 27348 | |
| 27349 | |
| 27350 | SDValue Op0 = DAG.getBitcast(CurrVT, Op.getOperand(0)); |
| 27351 | SDValue Zero = DAG.getConstant(0, DL, CurrVT); |
| 27352 | |
| 27353 | SDValue NibbleShift = DAG.getConstant(0x4, DL, CurrVT); |
| 27354 | SDValue Lo = Op0; |
| 27355 | SDValue Hi = DAG.getNode(ISD::SRL, DL, CurrVT, Op0, NibbleShift); |
| 27356 | SDValue HiZ; |
| 27357 | if (CurrVT.is512BitVector()) { |
| 27358 | MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); |
| 27359 | HiZ = DAG.getSetCC(DL, MaskVT, Hi, Zero, ISD::SETEQ); |
| 27360 | HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); |
| 27361 | } else { |
| 27362 | HiZ = DAG.getSetCC(DL, CurrVT, Hi, Zero, ISD::SETEQ); |
| 27363 | } |
| 27364 | |
| 27365 | Lo = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Lo); |
| 27366 | Hi = DAG.getNode(X86ISD::PSHUFB, DL, CurrVT, InRegLUT, Hi); |
| 27367 | Lo = DAG.getNode(ISD::AND, DL, CurrVT, Lo, HiZ); |
| 27368 | SDValue Res = DAG.getNode(ISD::ADD, DL, CurrVT, Lo, Hi); |
| 27369 | |
| 27370 | |
| 27371 | |
| 27372 | |
| 27373 | |
| 27374 | |
| 27375 | while (CurrVT != VT) { |
| 27376 | int CurrScalarSizeInBits = CurrVT.getScalarSizeInBits(); |
| 27377 | int CurrNumElts = CurrVT.getVectorNumElements(); |
| 27378 | MVT NextSVT = MVT::getIntegerVT(CurrScalarSizeInBits * 2); |
| 27379 | MVT NextVT = MVT::getVectorVT(NextSVT, CurrNumElts / 2); |
| 27380 | SDValue Shift = DAG.getConstant(CurrScalarSizeInBits, DL, NextVT); |
| 27381 | |
| 27382 | |
| 27383 | if (CurrVT.is512BitVector()) { |
| 27384 | MVT MaskVT = MVT::getVectorVT(MVT::i1, CurrVT.getVectorNumElements()); |
| 27385 | HiZ = DAG.getSetCC(DL, MaskVT, DAG.getBitcast(CurrVT, Op0), |
| 27386 | DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); |
| 27387 | HiZ = DAG.getNode(ISD::SIGN_EXTEND, DL, CurrVT, HiZ); |
| 27388 | } else { |
| 27389 | HiZ = DAG.getSetCC(DL, CurrVT, DAG.getBitcast(CurrVT, Op0), |
| 27390 | DAG.getBitcast(CurrVT, Zero), ISD::SETEQ); |
| 27391 | } |
| 27392 | HiZ = DAG.getBitcast(NextVT, HiZ); |
| 27393 | |
| 27394 | |
| 27395 | |
| 27396 | |
| 27397 | SDValue ResNext = Res = DAG.getBitcast(NextVT, Res); |
| 27398 | SDValue R0 = DAG.getNode(ISD::SRL, DL, NextVT, ResNext, Shift); |
| 27399 | SDValue R1 = DAG.getNode(ISD::SRL, DL, NextVT, HiZ, Shift); |
| 27400 | R1 = DAG.getNode(ISD::AND, DL, NextVT, ResNext, R1); |
| 27401 | Res = DAG.getNode(ISD::ADD, DL, NextVT, R0, R1); |
| 27402 | CurrVT = NextVT; |
| 27403 | } |
| 27404 | |
| 27405 | return Res; |
| 27406 | } |
| 27407 | |
| 27408 | static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, |
| 27409 | const X86Subtarget &Subtarget, |
| 27410 | SelectionDAG &DAG) { |
| 27411 | MVT VT = Op.getSimpleValueType(); |
| 27412 | |
| 27413 | if (Subtarget.hasCDI() && |
| 27414 | |
| 27415 | (Subtarget.canExtendTo512DQ() || VT.getVectorElementType() != MVT::i8)) |
| 27416 | return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget); |
| 27417 | |
| 27418 | |
| 27419 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
| 27420 | return splitVectorIntUnary(Op, DAG); |
| 27421 | |
| 27422 | |
| 27423 | if (VT.is512BitVector() && !Subtarget.hasBWI()) |
| 27424 | return splitVectorIntUnary(Op, DAG); |
| 27425 | |
| 27426 | assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB"); |
| 27427 | return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG); |
| 27428 | } |
| 27429 | |
| 27430 | static SDValue LowerCTLZ(SDValue Op, const X86Subtarget &Subtarget, |
| 27431 | SelectionDAG &DAG) { |
| 27432 | MVT VT = Op.getSimpleValueType(); |
| 27433 | MVT OpVT = VT; |
| 27434 | unsigned NumBits = VT.getSizeInBits(); |
| 27435 | SDLoc dl(Op); |
| 27436 | unsigned Opc = Op.getOpcode(); |
| 27437 | |
| 27438 | if (VT.isVector()) |
| 27439 | return LowerVectorCTLZ(Op, dl, Subtarget, DAG); |
| 27440 | |
| 27441 | Op = Op.getOperand(0); |
| 27442 | if (VT == MVT::i8) { |
| 27443 | |
| 27444 | OpVT = MVT::i32; |
| 27445 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, OpVT, Op); |
| 27446 | } |
| 27447 | |
| 27448 | |
| 27449 | SDVTList VTs = DAG.getVTList(OpVT, MVT::i32); |
| 27450 | Op = DAG.getNode(X86ISD::BSR, dl, VTs, Op); |
| 27451 | |
| 27452 | if (Opc == ISD::CTLZ) { |
| 27453 | |
| 27454 | SDValue Ops[] = {Op, DAG.getConstant(NumBits + NumBits - 1, dl, OpVT), |
| 27455 | DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), |
| 27456 | Op.getValue(1)}; |
| 27457 | Op = DAG.getNode(X86ISD::CMOV, dl, OpVT, Ops); |
| 27458 | } |
| 27459 | |
| 27460 | |
| 27461 | Op = DAG.getNode(ISD::XOR, dl, OpVT, Op, |
| 27462 | DAG.getConstant(NumBits - 1, dl, OpVT)); |
| 27463 | |
| 27464 | if (VT == MVT::i8) |
| 27465 | Op = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op); |
| 27466 | return Op; |
| 27467 | } |
| 27468 | |
| 27469 | static SDValue LowerCTTZ(SDValue Op, const X86Subtarget &Subtarget, |
| 27470 | SelectionDAG &DAG) { |
| 27471 | MVT VT = Op.getSimpleValueType(); |
| 27472 | unsigned NumBits = VT.getScalarSizeInBits(); |
| 27473 | SDValue N0 = Op.getOperand(0); |
| 27474 | SDLoc dl(Op); |
| 27475 | |
| 27476 | assert(!VT.isVector() && Op.getOpcode() == ISD::CTTZ && |
| 27477 | "Only scalar CTTZ requires custom lowering"); |
| 27478 | |
| 27479 | |
| 27480 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| 27481 | Op = DAG.getNode(X86ISD::BSF, dl, VTs, N0); |
| 27482 | |
| 27483 | |
| 27484 | SDValue Ops[] = {Op, DAG.getConstant(NumBits, dl, VT), |
| 27485 | DAG.getTargetConstant(X86::COND_E, dl, MVT::i8), |
| 27486 | Op.getValue(1)}; |
| 27487 | return DAG.getNode(X86ISD::CMOV, dl, VT, Ops); |
| 27488 | } |
| 27489 | |
| 27490 | static SDValue lowerAddSub(SDValue Op, SelectionDAG &DAG, |
| 27491 | const X86Subtarget &Subtarget) { |
| 27492 | MVT VT = Op.getSimpleValueType(); |
| 27493 | if (VT == MVT::i16 || VT == MVT::i32) |
| 27494 | return lowerAddSubToHorizontalOp(Op, DAG, Subtarget); |
| 27495 | |
| 27496 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
| 27497 | return splitVectorIntBinary(Op, DAG); |
| 27498 | |
| 27499 | assert(Op.getSimpleValueType().is256BitVector() && |
| 27500 | Op.getSimpleValueType().isInteger() && |
| 27501 | "Only handle AVX 256-bit vector integer operation"); |
| 27502 | return splitVectorIntBinary(Op, DAG); |
| 27503 | } |
| 27504 | |
| 27505 | static SDValue LowerADDSAT_SUBSAT(SDValue Op, SelectionDAG &DAG, |
| 27506 | const X86Subtarget &Subtarget) { |
| 27507 | MVT VT = Op.getSimpleValueType(); |
| 27508 | SDValue X = Op.getOperand(0), Y = Op.getOperand(1); |
| 27509 | unsigned Opcode = Op.getOpcode(); |
| 27510 | SDLoc DL(Op); |
| 27511 | |
| 27512 | if (VT == MVT::v32i16 || VT == MVT::v64i8 || |
| 27513 | (VT.is256BitVector() && !Subtarget.hasInt256())) { |
| 27514 | assert(Op.getSimpleValueType().isInteger() && |
| 27515 | "Only handle AVX vector integer operation"); |
| 27516 | return splitVectorIntBinary(Op, DAG); |
| 27517 | } |
| 27518 | |
| 27519 | |
| 27520 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 27521 | EVT SetCCResultType = |
| 27522 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
| 27523 | |
| 27524 | if (Opcode == ISD::USUBSAT && !TLI.isOperationLegal(ISD::UMAX, VT)) { |
| 27525 | |
| 27526 | SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, X, Y); |
| 27527 | SDValue Cmp = DAG.getSetCC(DL, SetCCResultType, X, Y, ISD::SETUGT); |
| 27528 | |
| 27529 | if (SetCCResultType == VT && |
| 27530 | DAG.ComputeNumSignBits(Cmp) == VT.getScalarSizeInBits()) |
| 27531 | return DAG.getNode(ISD::AND, DL, VT, Cmp, Sub); |
| 27532 | return DAG.getSelect(DL, VT, Cmp, Sub, DAG.getConstant(0, DL, VT)); |
| 27533 | } |
| 27534 | |
| 27535 | |
| 27536 | return SDValue(); |
| 27537 | } |
| 27538 | |
| 27539 | static SDValue LowerABS(SDValue Op, const X86Subtarget &Subtarget, |
| 27540 | SelectionDAG &DAG) { |
| 27541 | MVT VT = Op.getSimpleValueType(); |
| 27542 | if (VT == MVT::i16 || VT == MVT::i32 || VT == MVT::i64) { |
| 27543 | |
| 27544 | |
| 27545 | SDLoc DL(Op); |
| 27546 | SDValue N0 = Op.getOperand(0); |
| 27547 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, DAG.getVTList(VT, MVT::i32), |
| 27548 | DAG.getConstant(0, DL, VT), N0); |
| 27549 | SDValue Ops[] = {N0, Neg, DAG.getTargetConstant(X86::COND_GE, DL, MVT::i8), |
| 27550 | SDValue(Neg.getNode(), 1)}; |
| 27551 | return DAG.getNode(X86ISD::CMOV, DL, VT, Ops); |
| 27552 | } |
| 27553 | |
| 27554 | |
| 27555 | if ((VT == MVT::v2i64 || VT == MVT::v4i64) && Subtarget.hasSSE41()) { |
| 27556 | SDLoc DL(Op); |
| 27557 | SDValue Src = Op.getOperand(0); |
| 27558 | SDValue Sub = |
| 27559 | DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Src); |
| 27560 | return DAG.getNode(X86ISD::BLENDV, DL, VT, Src, Sub, Src); |
| 27561 | } |
| 27562 | |
| 27563 | if (VT.is256BitVector() && !Subtarget.hasInt256()) { |
| 27564 | assert(VT.isInteger() && |
| 27565 | "Only handle AVX 256-bit vector integer operation"); |
| 27566 | return splitVectorIntUnary(Op, DAG); |
| 27567 | } |
| 27568 | |
| 27569 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
| 27570 | return splitVectorIntUnary(Op, DAG); |
| 27571 | |
| 27572 | |
| 27573 | return SDValue(); |
| 27574 | } |
| 27575 | |
| 27576 | static SDValue LowerMINMAX(SDValue Op, SelectionDAG &DAG) { |
| 27577 | MVT VT = Op.getSimpleValueType(); |
| 27578 | |
| 27579 | |
| 27580 | if (VT.getScalarType() != MVT::i64 && VT.is256BitVector()) |
| 27581 | return splitVectorIntBinary(Op, DAG); |
| 27582 | |
| 27583 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
| 27584 | return splitVectorIntBinary(Op, DAG); |
| 27585 | |
| 27586 | |
| 27587 | return SDValue(); |
| 27588 | } |
| 27589 | |
| 27590 | static SDValue LowerMUL(SDValue Op, const X86Subtarget &Subtarget, |
| 27591 | SelectionDAG &DAG) { |
| 27592 | SDLoc dl(Op); |
| 27593 | MVT VT = Op.getSimpleValueType(); |
| 27594 | |
| 27595 | |
| 27596 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
| 27597 | return splitVectorIntBinary(Op, DAG); |
| 27598 | |
| 27599 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
| 27600 | return splitVectorIntBinary(Op, DAG); |
| 27601 | |
| 27602 | SDValue A = Op.getOperand(0); |
| 27603 | SDValue B = Op.getOperand(1); |
| 27604 | |
| 27605 | |
| 27606 | |
| 27607 | if (VT == MVT::v16i8 || VT == MVT::v32i8 || VT == MVT::v64i8) { |
| 27608 | unsigned NumElts = VT.getVectorNumElements(); |
| 27609 | |
| 27610 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
| 27611 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
| 27612 | MVT ExVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); |
| 27613 | return DAG.getNode( |
| 27614 | ISD::TRUNCATE, dl, VT, |
| 27615 | DAG.getNode(ISD::MUL, dl, ExVT, |
| 27616 | DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, A), |
| 27617 | DAG.getNode(ISD::ANY_EXTEND, dl, ExVT, B))); |
| 27618 | } |
| 27619 | |
| 27620 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
| 27621 | |
| 27622 | |
| 27623 | |
| 27624 | |
| 27625 | |
| 27626 | SDValue Undef = DAG.getUNDEF(VT); |
| 27627 | SDValue ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Undef)); |
| 27628 | SDValue AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Undef)); |
| 27629 | |
| 27630 | SDValue BLo, BHi; |
| 27631 | if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { |
| 27632 | |
| 27633 | SmallVector<SDValue, 16> LoOps, HiOps; |
| 27634 | for (unsigned i = 0; i != NumElts; i += 16) { |
| 27635 | for (unsigned j = 0; j != 8; ++j) { |
| 27636 | LoOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j), dl, |
| 27637 | MVT::i16)); |
| 27638 | HiOps.push_back(DAG.getAnyExtOrTrunc(B.getOperand(i + j + 8), dl, |
| 27639 | MVT::i16)); |
| 27640 | } |
| 27641 | } |
| 27642 | |
| 27643 | BLo = DAG.getBuildVector(ExVT, dl, LoOps); |
| 27644 | BHi = DAG.getBuildVector(ExVT, dl, HiOps); |
| 27645 | } else { |
| 27646 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Undef)); |
| 27647 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Undef)); |
| 27648 | } |
| 27649 | |
| 27650 | |
| 27651 | SDValue RLo = DAG.getNode(ISD::MUL, dl, ExVT, ALo, BLo); |
| 27652 | SDValue RHi = DAG.getNode(ISD::MUL, dl, ExVT, AHi, BHi); |
| 27653 | RLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, DAG.getConstant(255, dl, ExVT)); |
| 27654 | RHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, DAG.getConstant(255, dl, ExVT)); |
| 27655 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
| 27656 | } |
| 27657 | |
| 27658 | |
| 27659 | if (VT == MVT::v4i32) { |
| 27660 | assert(Subtarget.hasSSE2() && !Subtarget.hasSSE41() && |
| 27661 | "Should not custom lower when pmulld is available!"); |
| 27662 | |
| 27663 | |
| 27664 | static const int UnpackMask[] = { 1, -1, 3, -1 }; |
| 27665 | SDValue Aodds = DAG.getVectorShuffle(VT, dl, A, A, UnpackMask); |
| 27666 | SDValue Bodds = DAG.getVectorShuffle(VT, dl, B, B, UnpackMask); |
| 27667 | |
| 27668 | |
| 27669 | SDValue Evens = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, |
| 27670 | DAG.getBitcast(MVT::v2i64, A), |
| 27671 | DAG.getBitcast(MVT::v2i64, B)); |
| 27672 | |
| 27673 | SDValue Odds = DAG.getNode(X86ISD::PMULUDQ, dl, MVT::v2i64, |
| 27674 | DAG.getBitcast(MVT::v2i64, Aodds), |
| 27675 | DAG.getBitcast(MVT::v2i64, Bodds)); |
| 27676 | |
| 27677 | Evens = DAG.getBitcast(VT, Evens); |
| 27678 | Odds = DAG.getBitcast(VT, Odds); |
| 27679 | |
| 27680 | |
| 27681 | |
| 27682 | static const int ShufMask[] = { 0, 4, 2, 6 }; |
| 27683 | return DAG.getVectorShuffle(VT, dl, Evens, Odds, ShufMask); |
| 27684 | } |
| 27685 | |
| 27686 | assert((VT == MVT::v2i64 || VT == MVT::v4i64 || VT == MVT::v8i64) && |
| 27687 | "Only know how to lower V2I64/V4I64/V8I64 multiply"); |
| 27688 | assert(!Subtarget.hasDQI() && "DQI should use MULLQ"); |
| 27689 | |
| 27690 | |
| 27691 | |
| 27692 | |
| 27693 | |
| 27694 | |
| 27695 | |
| 27696 | |
| 27697 | |
| 27698 | |
| 27699 | KnownBits AKnown = DAG.computeKnownBits(A); |
| 27700 | KnownBits BKnown = DAG.computeKnownBits(B); |
| 27701 | |
| 27702 | APInt LowerBitsMask = APInt::getLowBitsSet(64, 32); |
| 27703 | bool ALoIsZero = LowerBitsMask.isSubsetOf(AKnown.Zero); |
| 27704 | bool BLoIsZero = LowerBitsMask.isSubsetOf(BKnown.Zero); |
| 27705 | |
| 27706 | APInt UpperBitsMask = APInt::getHighBitsSet(64, 32); |
| 27707 | bool AHiIsZero = UpperBitsMask.isSubsetOf(AKnown.Zero); |
| 27708 | bool BHiIsZero = UpperBitsMask.isSubsetOf(BKnown.Zero); |
| 27709 | |
| 27710 | SDValue Zero = DAG.getConstant(0, dl, VT); |
| 27711 | |
| 27712 | |
| 27713 | SDValue AloBlo = Zero; |
| 27714 | if (!ALoIsZero && !BLoIsZero) |
| 27715 | AloBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, B); |
| 27716 | |
| 27717 | SDValue AloBhi = Zero; |
| 27718 | if (!ALoIsZero && !BHiIsZero) { |
| 27719 | SDValue Bhi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, B, 32, DAG); |
| 27720 | AloBhi = DAG.getNode(X86ISD::PMULUDQ, dl, VT, A, Bhi); |
| 27721 | } |
| 27722 | |
| 27723 | SDValue AhiBlo = Zero; |
| 27724 | if (!AHiIsZero && !BLoIsZero) { |
| 27725 | SDValue Ahi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, A, 32, DAG); |
| 27726 | AhiBlo = DAG.getNode(X86ISD::PMULUDQ, dl, VT, Ahi, B); |
| 27727 | } |
| 27728 | |
| 27729 | SDValue Hi = DAG.getNode(ISD::ADD, dl, VT, AloBhi, AhiBlo); |
| 27730 | Hi = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Hi, 32, DAG); |
| 27731 | |
| 27732 | return DAG.getNode(ISD::ADD, dl, VT, AloBlo, Hi); |
| 27733 | } |
| 27734 | |
| 27735 | static SDValue LowervXi8MulWithUNPCK(SDValue A, SDValue B, const SDLoc &dl, |
| 27736 | MVT VT, bool IsSigned, |
| 27737 | const X86Subtarget &Subtarget, |
| 27738 | SelectionDAG &DAG, |
| 27739 | SDValue *Low = nullptr) { |
| 27740 | unsigned NumElts = VT.getVectorNumElements(); |
| 27741 | |
| 27742 | |
| 27743 | |
| 27744 | |
| 27745 | |
| 27746 | |
| 27747 | |
| 27748 | |
| 27749 | |
| 27750 | |
| 27751 | |
| 27752 | |
| 27753 | |
| 27754 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
| 27755 | SDValue Zero = DAG.getConstant(0, dl, VT); |
| 27756 | |
| 27757 | SDValue ALo, AHi; |
| 27758 | if (IsSigned) { |
| 27759 | ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, A)); |
| 27760 | AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, A)); |
| 27761 | } else { |
| 27762 | ALo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, A, Zero)); |
| 27763 | AHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, A, Zero)); |
| 27764 | } |
| 27765 | |
| 27766 | SDValue BLo, BHi; |
| 27767 | if (ISD::isBuildVectorOfConstantSDNodes(B.getNode())) { |
| 27768 | |
| 27769 | SmallVector<SDValue, 16> LoOps, HiOps; |
| 27770 | for (unsigned i = 0; i != NumElts; i += 16) { |
| 27771 | for (unsigned j = 0; j != 8; ++j) { |
| 27772 | SDValue LoOp = B.getOperand(i + j); |
| 27773 | SDValue HiOp = B.getOperand(i + j + 8); |
| 27774 | |
| 27775 | if (IsSigned) { |
| 27776 | LoOp = DAG.getAnyExtOrTrunc(LoOp, dl, MVT::i16); |
| 27777 | HiOp = DAG.getAnyExtOrTrunc(HiOp, dl, MVT::i16); |
| 27778 | LoOp = DAG.getNode(ISD::SHL, dl, MVT::i16, LoOp, |
| 27779 | DAG.getConstant(8, dl, MVT::i16)); |
| 27780 | HiOp = DAG.getNode(ISD::SHL, dl, MVT::i16, HiOp, |
| 27781 | DAG.getConstant(8, dl, MVT::i16)); |
| 27782 | } else { |
| 27783 | LoOp = DAG.getZExtOrTrunc(LoOp, dl, MVT::i16); |
| 27784 | HiOp = DAG.getZExtOrTrunc(HiOp, dl, MVT::i16); |
| 27785 | } |
| 27786 | |
| 27787 | LoOps.push_back(LoOp); |
| 27788 | HiOps.push_back(HiOp); |
| 27789 | } |
| 27790 | } |
| 27791 | |
| 27792 | BLo = DAG.getBuildVector(ExVT, dl, LoOps); |
| 27793 | BHi = DAG.getBuildVector(ExVT, dl, HiOps); |
| 27794 | } else if (IsSigned) { |
| 27795 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, Zero, B)); |
| 27796 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, Zero, B)); |
| 27797 | } else { |
| 27798 | BLo = DAG.getBitcast(ExVT, getUnpackl(DAG, dl, VT, B, Zero)); |
| 27799 | BHi = DAG.getBitcast(ExVT, getUnpackh(DAG, dl, VT, B, Zero)); |
| 27800 | } |
| 27801 | |
| 27802 | |
| 27803 | |
| 27804 | unsigned MulOpc = IsSigned ? ISD::MULHS : ISD::MUL; |
| 27805 | SDValue RLo = DAG.getNode(MulOpc, dl, ExVT, ALo, BLo); |
| 27806 | SDValue RHi = DAG.getNode(MulOpc, dl, ExVT, AHi, BHi); |
| 27807 | |
| 27808 | if (Low) { |
| 27809 | |
| 27810 | SDValue Mask = DAG.getConstant(255, dl, ExVT); |
| 27811 | SDValue LLo = DAG.getNode(ISD::AND, dl, ExVT, RLo, Mask); |
| 27812 | SDValue LHi = DAG.getNode(ISD::AND, dl, ExVT, RHi, Mask); |
| 27813 | *Low = DAG.getNode(X86ISD::PACKUS, dl, VT, LLo, LHi); |
| 27814 | } |
| 27815 | |
| 27816 | RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RLo, 8, DAG); |
| 27817 | RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, RHi, 8, DAG); |
| 27818 | |
| 27819 | |
| 27820 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
| 27821 | } |
| 27822 | |
| 27823 | static SDValue LowerMULH(SDValue Op, const X86Subtarget &Subtarget, |
| 27824 | SelectionDAG &DAG) { |
| 27825 | SDLoc dl(Op); |
| 27826 | MVT VT = Op.getSimpleValueType(); |
| 27827 | bool IsSigned = Op->getOpcode() == ISD::MULHS; |
| 27828 | unsigned NumElts = VT.getVectorNumElements(); |
| 27829 | SDValue A = Op.getOperand(0); |
| 27830 | SDValue B = Op.getOperand(1); |
| 27831 | |
| 27832 | |
| 27833 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
| 27834 | return splitVectorIntBinary(Op, DAG); |
| 27835 | |
| 27836 | if ((VT == MVT::v32i16 || VT == MVT::v64i8) && !Subtarget.hasBWI()) |
| 27837 | return splitVectorIntBinary(Op, DAG); |
| 27838 | |
| 27839 | if (VT == MVT::v4i32 || VT == MVT::v8i32 || VT == MVT::v16i32) { |
| 27840 | assert((VT == MVT::v4i32 && Subtarget.hasSSE2()) || |
| 27841 | (VT == MVT::v8i32 && Subtarget.hasInt256()) || |
| 27842 | (VT == MVT::v16i32 && Subtarget.hasAVX512())); |
| 27843 | |
| 27844 | |
| 27845 | |
| 27846 | |
| 27847 | |
| 27848 | |
| 27849 | |
| 27850 | |
| 27851 | |
| 27852 | |
| 27853 | |
| 27854 | |
| 27855 | |
| 27856 | const int Mask[] = {1, -1, 3, -1, 5, -1, 7, -1, |
| 27857 | 9, -1, 11, -1, 13, -1, 15, -1}; |
| 27858 | |
| 27859 | SDValue Odd0 = DAG.getVectorShuffle(VT, dl, A, A, |
| 27860 | makeArrayRef(&Mask[0], NumElts)); |
| 27861 | |
| 27862 | SDValue Odd1 = DAG.getVectorShuffle(VT, dl, B, B, |
| 27863 | makeArrayRef(&Mask[0], NumElts)); |
| 27864 | |
| 27865 | |
| 27866 | |
| 27867 | MVT MulVT = MVT::getVectorVT(MVT::i64, NumElts / 2); |
| 27868 | unsigned Opcode = |
| 27869 | (IsSigned && Subtarget.hasSSE41()) ? X86ISD::PMULDQ : X86ISD::PMULUDQ; |
| 27870 | |
| 27871 | |
| 27872 | SDValue Mul1 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, |
| 27873 | DAG.getBitcast(MulVT, A), |
| 27874 | DAG.getBitcast(MulVT, B))); |
| 27875 | |
| 27876 | |
| 27877 | SDValue Mul2 = DAG.getBitcast(VT, DAG.getNode(Opcode, dl, MulVT, |
| 27878 | DAG.getBitcast(MulVT, Odd0), |
| 27879 | DAG.getBitcast(MulVT, Odd1))); |
| 27880 | |
| 27881 | |
| 27882 | SmallVector<int, 16> ShufMask(NumElts); |
| 27883 | for (int i = 0; i != (int)NumElts; ++i) |
| 27884 | ShufMask[i] = (i / 2) * 2 + ((i % 2) * NumElts) + 1; |
| 27885 | |
| 27886 | SDValue Res = DAG.getVectorShuffle(VT, dl, Mul1, Mul2, ShufMask); |
| 27887 | |
| 27888 | |
| 27889 | |
| 27890 | if (IsSigned && !Subtarget.hasSSE41()) { |
| 27891 | SDValue Zero = DAG.getConstant(0, dl, VT); |
| 27892 | SDValue T1 = DAG.getNode(ISD::AND, dl, VT, |
| 27893 | DAG.getSetCC(dl, VT, Zero, A, ISD::SETGT), B); |
| 27894 | SDValue T2 = DAG.getNode(ISD::AND, dl, VT, |
| 27895 | DAG.getSetCC(dl, VT, Zero, B, ISD::SETGT), A); |
| 27896 | |
| 27897 | SDValue Fixup = DAG.getNode(ISD::ADD, dl, VT, T1, T2); |
| 27898 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, Fixup); |
| 27899 | } |
| 27900 | |
| 27901 | return Res; |
| 27902 | } |
| 27903 | |
| 27904 | |
| 27905 | assert((VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || |
| 27906 | (VT == MVT::v64i8 && Subtarget.hasBWI())) && |
| 27907 | "Unsupported vector type"); |
| 27908 | |
| 27909 | |
| 27910 | |
| 27911 | |
| 27912 | |
| 27913 | |
| 27914 | |
| 27915 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
| 27916 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
| 27917 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
| 27918 | unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| 27919 | SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); |
| 27920 | SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); |
| 27921 | SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); |
| 27922 | Mul = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
| 27923 | return DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); |
| 27924 | } |
| 27925 | |
| 27926 | return LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG); |
| 27927 | } |
| 27928 | |
| 27929 | |
| 27930 | static SDValue LowerMULO(SDValue Op, const X86Subtarget &Subtarget, |
| 27931 | SelectionDAG &DAG) { |
| 27932 | MVT VT = Op.getSimpleValueType(); |
| 27933 | |
| 27934 | |
| 27935 | if (!VT.isVector()) |
| 27936 | return LowerXALUO(Op, DAG); |
| 27937 | |
| 27938 | SDLoc dl(Op); |
| 27939 | bool IsSigned = Op->getOpcode() == ISD::SMULO; |
| 27940 | SDValue A = Op.getOperand(0); |
| 27941 | SDValue B = Op.getOperand(1); |
| 27942 | EVT OvfVT = Op->getValueType(1); |
| 27943 | |
| 27944 | if ((VT == MVT::v32i8 && !Subtarget.hasInt256()) || |
| 27945 | (VT == MVT::v64i8 && !Subtarget.hasBWI())) { |
| 27946 | |
| 27947 | SDValue LHSLo, LHSHi; |
| 27948 | std::tie(LHSLo, LHSHi) = splitVector(A, DAG, dl); |
| 27949 | |
| 27950 | |
| 27951 | SDValue RHSLo, RHSHi; |
| 27952 | std::tie(RHSLo, RHSHi) = splitVector(B, DAG, dl); |
| 27953 | |
| 27954 | EVT LoOvfVT, HiOvfVT; |
| 27955 | std::tie(LoOvfVT, HiOvfVT) = DAG.GetSplitDestVTs(OvfVT); |
| 27956 | SDVTList LoVTs = DAG.getVTList(LHSLo.getValueType(), LoOvfVT); |
| 27957 | SDVTList HiVTs = DAG.getVTList(LHSHi.getValueType(), HiOvfVT); |
| 27958 | |
| 27959 | |
| 27960 | SDValue Lo = DAG.getNode(Op.getOpcode(), dl, LoVTs, LHSLo, RHSLo); |
| 27961 | SDValue Hi = DAG.getNode(Op.getOpcode(), dl, HiVTs, LHSHi, RHSHi); |
| 27962 | |
| 27963 | |
| 27964 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 27965 | SDValue Ovf = DAG.getNode(ISD::CONCAT_VECTORS, dl, OvfVT, Lo.getValue(1), |
| 27966 | Hi.getValue(1)); |
| 27967 | |
| 27968 | return DAG.getMergeValues({Res, Ovf}, dl); |
| 27969 | } |
| 27970 | |
| 27971 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 27972 | EVT SetccVT = |
| 27973 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); |
| 27974 | |
| 27975 | if ((VT == MVT::v16i8 && Subtarget.hasInt256()) || |
| 27976 | (VT == MVT::v32i8 && Subtarget.canExtendTo512BW())) { |
| 27977 | unsigned NumElts = VT.getVectorNumElements(); |
| 27978 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
| 27979 | unsigned ExAVX = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| 27980 | SDValue ExA = DAG.getNode(ExAVX, dl, ExVT, A); |
| 27981 | SDValue ExB = DAG.getNode(ExAVX, dl, ExVT, B); |
| 27982 | SDValue Mul = DAG.getNode(ISD::MUL, dl, ExVT, ExA, ExB); |
| 27983 | |
| 27984 | SDValue Low = DAG.getNode(ISD::TRUNCATE, dl, VT, Mul); |
| 27985 | |
| 27986 | SDValue Ovf; |
| 27987 | if (IsSigned) { |
| 27988 | SDValue High, LowSign; |
| 27989 | if (OvfVT.getVectorElementType() == MVT::i1 && |
| 27990 | (Subtarget.hasBWI() || Subtarget.canExtendTo512DQ())) { |
| 27991 | |
| 27992 | |
| 27993 | High = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Mul, 8, DAG); |
| 27994 | |
| 27995 | LowSign = |
| 27996 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExVT, Mul, 8, DAG); |
| 27997 | LowSign = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, LowSign, |
| 27998 | 15, DAG); |
| 27999 | SetccVT = OvfVT; |
| 28000 | if (!Subtarget.hasBWI()) { |
| 28001 | |
| 28002 | High = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, High); |
| 28003 | LowSign = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v16i32, LowSign); |
| 28004 | } |
| 28005 | } else { |
| 28006 | |
| 28007 | High = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
| 28008 | High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); |
| 28009 | LowSign = |
| 28010 | DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); |
| 28011 | } |
| 28012 | |
| 28013 | Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE); |
| 28014 | } else { |
| 28015 | SDValue High = |
| 28016 | getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExVT, Mul, 8, DAG); |
| 28017 | if (OvfVT.getVectorElementType() == MVT::i1 && |
| 28018 | (Subtarget.hasBWI() || Subtarget.canExtendTo512DQ())) { |
| 28019 | |
| 28020 | SetccVT = OvfVT; |
| 28021 | if (!Subtarget.hasBWI()) { |
| 28022 | |
| 28023 | High = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v16i32, High); |
| 28024 | } |
| 28025 | } else { |
| 28026 | |
| 28027 | High = DAG.getNode(ISD::TRUNCATE, dl, VT, High); |
| 28028 | } |
| 28029 | |
| 28030 | Ovf = |
| 28031 | DAG.getSetCC(dl, SetccVT, High, |
| 28032 | DAG.getConstant(0, dl, High.getValueType()), ISD::SETNE); |
| 28033 | } |
| 28034 | |
| 28035 | Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT); |
| 28036 | |
| 28037 | return DAG.getMergeValues({Low, Ovf}, dl); |
| 28038 | } |
| 28039 | |
| 28040 | SDValue Low; |
| 28041 | SDValue High = |
| 28042 | LowervXi8MulWithUNPCK(A, B, dl, VT, IsSigned, Subtarget, DAG, &Low); |
| 28043 | |
| 28044 | SDValue Ovf; |
| 28045 | if (IsSigned) { |
| 28046 | |
| 28047 | SDValue LowSign = |
| 28048 | DAG.getNode(ISD::SRA, dl, VT, Low, DAG.getConstant(7, dl, VT)); |
| 28049 | Ovf = DAG.getSetCC(dl, SetccVT, LowSign, High, ISD::SETNE); |
| 28050 | } else { |
| 28051 | |
| 28052 | Ovf = |
| 28053 | DAG.getSetCC(dl, SetccVT, High, DAG.getConstant(0, dl, VT), ISD::SETNE); |
| 28054 | } |
| 28055 | |
| 28056 | Ovf = DAG.getSExtOrTrunc(Ovf, dl, OvfVT); |
| 28057 | |
| 28058 | return DAG.getMergeValues({Low, Ovf}, dl); |
| 28059 | } |
| 28060 | |
| 28061 | SDValue X86TargetLowering::LowerWin64_i128OP(SDValue Op, SelectionDAG &DAG) const { |
| 28062 | assert(Subtarget.isTargetWin64() && "Unexpected target"); |
| 28063 | EVT VT = Op.getValueType(); |
| 28064 | assert(VT.isInteger() && VT.getSizeInBits() == 128 && |
| 28065 | "Unexpected return type for lowering"); |
| 28066 | |
| 28067 | RTLIB::Libcall LC; |
| 28068 | bool isSigned; |
| 28069 | switch (Op->getOpcode()) { |
| 28070 | default: llvm_unreachable("Unexpected request for libcall!"); |
| 28071 | case ISD::SDIV: isSigned = true; LC = RTLIB::SDIV_I128; break; |
| 28072 | case ISD::UDIV: isSigned = false; LC = RTLIB::UDIV_I128; break; |
| 28073 | case ISD::SREM: isSigned = true; LC = RTLIB::SREM_I128; break; |
| 28074 | case ISD::UREM: isSigned = false; LC = RTLIB::UREM_I128; break; |
| 28075 | } |
| 28076 | |
| 28077 | SDLoc dl(Op); |
| 28078 | SDValue InChain = DAG.getEntryNode(); |
| 28079 | |
| 28080 | TargetLowering::ArgListTy Args; |
| 28081 | TargetLowering::ArgListEntry Entry; |
| 28082 | for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { |
| 28083 | EVT ArgVT = Op->getOperand(i).getValueType(); |
| 28084 | assert(ArgVT.isInteger() && ArgVT.getSizeInBits() == 128 && |
| 28085 | "Unexpected argument type for lowering"); |
| 28086 | SDValue StackPtr = DAG.CreateStackTemporary(ArgVT, 16); |
| 28087 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
| 28088 | MachinePointerInfo MPI = |
| 28089 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
| 28090 | Entry.Node = StackPtr; |
| 28091 | InChain = |
| 28092 | DAG.getStore(InChain, dl, Op->getOperand(i), StackPtr, MPI, Align(16)); |
| 28093 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
| 28094 | Entry.Ty = PointerType::get(ArgTy,0); |
| 28095 | Entry.IsSExt = false; |
| 28096 | Entry.IsZExt = false; |
| 28097 | Args.push_back(Entry); |
| 28098 | } |
| 28099 | |
| 28100 | SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), |
| 28101 | getPointerTy(DAG.getDataLayout())); |
| 28102 | |
| 28103 | TargetLowering::CallLoweringInfo CLI(DAG); |
| 28104 | CLI.setDebugLoc(dl) |
| 28105 | .setChain(InChain) |
| 28106 | .setLibCallee( |
| 28107 | getLibcallCallingConv(LC), |
| 28108 | static_cast<EVT>(MVT::v2i64).getTypeForEVT(*DAG.getContext()), Callee, |
| 28109 | std::move(Args)) |
| 28110 | .setInRegister() |
| 28111 | .setSExtResult(isSigned) |
| 28112 | .setZExtResult(!isSigned); |
| 28113 | |
| 28114 | std::pair<SDValue, SDValue> CallInfo = LowerCallTo(CLI); |
| 28115 | return DAG.getBitcast(VT, CallInfo.first); |
| 28116 | } |
| 28117 | |
| 28118 | |
| 28119 | |
| 28120 | static bool SupportedVectorShiftWithImm(MVT VT, const X86Subtarget &Subtarget, |
| 28121 | unsigned Opcode) { |
| 28122 | if (VT.getScalarSizeInBits() < 16) |
| 28123 | return false; |
| 28124 | |
| 28125 | if (VT.is512BitVector() && Subtarget.hasAVX512() && |
| 28126 | (VT.getScalarSizeInBits() > 16 || Subtarget.hasBWI())) |
| 28127 | return true; |
| 28128 | |
| 28129 | bool LShift = (VT.is128BitVector() && Subtarget.hasSSE2()) || |
| 28130 | (VT.is256BitVector() && Subtarget.hasInt256()); |
| 28131 | |
| 28132 | bool AShift = LShift && (Subtarget.hasAVX512() || |
| 28133 | (VT != MVT::v2i64 && VT != MVT::v4i64)); |
| 28134 | return (Opcode == ISD::SRA) ? AShift : LShift; |
| 28135 | } |
| 28136 | |
| 28137 | |
| 28138 | |
| 28139 | static |
| 28140 | bool SupportedVectorShiftWithBaseAmnt(MVT VT, const X86Subtarget &Subtarget, |
| 28141 | unsigned Opcode) { |
| 28142 | return SupportedVectorShiftWithImm(VT, Subtarget, Opcode); |
| 28143 | } |
| 28144 | |
| 28145 | |
| 28146 | |
| 28147 | static bool SupportedVectorVarShift(MVT VT, const X86Subtarget &Subtarget, |
| 28148 | unsigned Opcode) { |
| 28149 | |
| 28150 | if (!Subtarget.hasInt256() || VT.getScalarSizeInBits() < 16) |
| 28151 | return false; |
| 28152 | |
| 28153 | |
| 28154 | if (VT.getScalarSizeInBits() == 16 && !Subtarget.hasBWI()) |
| 28155 | return false; |
| 28156 | |
| 28157 | if (Subtarget.hasAVX512()) |
| 28158 | return true; |
| 28159 | |
| 28160 | bool LShift = VT.is128BitVector() || VT.is256BitVector(); |
| 28161 | bool AShift = LShift && VT != MVT::v2i64 && VT != MVT::v4i64; |
| 28162 | return (Opcode == ISD::SRA) ? AShift : LShift; |
| 28163 | } |
| 28164 | |
| 28165 | static SDValue LowerScalarImmediateShift(SDValue Op, SelectionDAG &DAG, |
| 28166 | const X86Subtarget &Subtarget) { |
| 28167 | MVT VT = Op.getSimpleValueType(); |
| 28168 | SDLoc dl(Op); |
| 28169 | SDValue R = Op.getOperand(0); |
| 28170 | SDValue Amt = Op.getOperand(1); |
| 28171 | unsigned X86Opc = getTargetVShiftUniformOpcode(Op.getOpcode(), false); |
| 28172 | |
| 28173 | auto ArithmeticShiftRight64 = [&](uint64_t ShiftAmt) { |
| 28174 | assert((VT == MVT::v2i64 || VT == MVT::v4i64) && "Unexpected SRA type"); |
| 28175 | MVT ExVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); |
| 28176 | SDValue Ex = DAG.getBitcast(ExVT, R); |
| 28177 | |
| 28178 | |
| 28179 | if (ShiftAmt == 63 && Subtarget.hasSSE42()) { |
| 28180 | assert((VT != MVT::v4i64 || Subtarget.hasInt256()) && |
| 28181 | "Unsupported PCMPGT op"); |
| 28182 | return DAG.getNode(X86ISD::PCMPGT, dl, VT, DAG.getConstant(0, dl, VT), R); |
| 28183 | } |
| 28184 | |
| 28185 | if (ShiftAmt >= 32) { |
| 28186 | |
| 28187 | SDValue Upper = |
| 28188 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, 31, DAG); |
| 28189 | SDValue Lower = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, |
| 28190 | ShiftAmt - 32, DAG); |
| 28191 | if (VT == MVT::v2i64) |
| 28192 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {5, 1, 7, 3}); |
| 28193 | if (VT == MVT::v4i64) |
| 28194 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, |
| 28195 | {9, 1, 11, 3, 13, 5, 15, 7}); |
| 28196 | } else { |
| 28197 | |
| 28198 | SDValue Upper = getTargetVShiftByConstNode(X86ISD::VSRAI, dl, ExVT, Ex, |
| 28199 | ShiftAmt, DAG); |
| 28200 | SDValue Lower = |
| 28201 | getTargetVShiftByConstNode(X86ISD::VSRLI, dl, VT, R, ShiftAmt, DAG); |
| 28202 | Lower = DAG.getBitcast(ExVT, Lower); |
| 28203 | if (VT == MVT::v2i64) |
| 28204 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, {4, 1, 6, 3}); |
| 28205 | if (VT == MVT::v4i64) |
| 28206 | Ex = DAG.getVectorShuffle(ExVT, dl, Upper, Lower, |
| 28207 | {8, 1, 10, 3, 12, 5, 14, 7}); |
| 28208 | } |
| 28209 | return DAG.getBitcast(VT, Ex); |
| 28210 | }; |
| 28211 | |
| 28212 | |
| 28213 | APInt APIntShiftAmt; |
| 28214 | if (!X86::isConstantSplat(Amt, APIntShiftAmt)) |
| 28215 | return SDValue(); |
| 28216 | |
| 28217 | |
| 28218 | if (APIntShiftAmt.uge(VT.getScalarSizeInBits())) |
| 28219 | return DAG.getUNDEF(VT); |
| 28220 | |
| 28221 | uint64_t ShiftAmt = APIntShiftAmt.getZExtValue(); |
| 28222 | |
| 28223 | if (SupportedVectorShiftWithImm(VT, Subtarget, Op.getOpcode())) |
| 28224 | return getTargetVShiftByConstNode(X86Opc, dl, VT, R, ShiftAmt, DAG); |
| 28225 | |
| 28226 | |
| 28227 | if (((!Subtarget.hasXOP() && VT == MVT::v2i64) || |
| 28228 | (Subtarget.hasInt256() && VT == MVT::v4i64)) && |
| 28229 | Op.getOpcode() == ISD::SRA) |
| 28230 | return ArithmeticShiftRight64(ShiftAmt); |
| 28231 | |
| 28232 | if (VT == MVT::v16i8 || (Subtarget.hasInt256() && VT == MVT::v32i8) || |
| 28233 | (Subtarget.hasBWI() && VT == MVT::v64i8)) { |
| 28234 | unsigned NumElts = VT.getVectorNumElements(); |
| 28235 | MVT ShiftVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
| 28236 | |
| 28237 | |
| 28238 | if (Op.getOpcode() == ISD::SHL && ShiftAmt == 1) |
| 28239 | return DAG.getNode(ISD::ADD, dl, VT, R, R); |
| 28240 | |
| 28241 | |
| 28242 | if (Op.getOpcode() == ISD::SRA && ShiftAmt == 7) { |
| 28243 | SDValue Zeros = DAG.getConstant(0, dl, VT); |
| 28244 | if (VT.is512BitVector()) { |
| 28245 | assert(VT == MVT::v64i8 && "Unexpected element type!"); |
| 28246 | SDValue CMP = DAG.getSetCC(dl, MVT::v64i1, Zeros, R, ISD::SETGT); |
| 28247 | return DAG.getNode(ISD::SIGN_EXTEND, dl, VT, CMP); |
| 28248 | } |
| 28249 | return DAG.getNode(X86ISD::PCMPGT, dl, VT, Zeros, R); |
| 28250 | } |
| 28251 | |
| 28252 | |
| 28253 | if (VT == MVT::v16i8 && Subtarget.hasXOP()) |
| 28254 | return SDValue(); |
| 28255 | |
| 28256 | if (Op.getOpcode() == ISD::SHL) { |
| 28257 | |
| 28258 | SDValue SHL = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ShiftVT, R, |
| 28259 | ShiftAmt, DAG); |
| 28260 | SHL = DAG.getBitcast(VT, SHL); |
| 28261 | |
| 28262 | APInt Mask = APInt::getHighBitsSet(8, 8 - ShiftAmt); |
| 28263 | return DAG.getNode(ISD::AND, dl, VT, SHL, DAG.getConstant(Mask, dl, VT)); |
| 28264 | } |
| 28265 | if (Op.getOpcode() == ISD::SRL) { |
| 28266 | |
| 28267 | SDValue SRL = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ShiftVT, R, |
| 28268 | ShiftAmt, DAG); |
| 28269 | SRL = DAG.getBitcast(VT, SRL); |
| 28270 | |
| 28271 | APInt Mask = APInt::getLowBitsSet(8, 8 - ShiftAmt); |
| 28272 | return DAG.getNode(ISD::AND, dl, VT, SRL, DAG.getConstant(Mask, dl, VT)); |
| 28273 | } |
| 28274 | if (Op.getOpcode() == ISD::SRA) { |
| 28275 | |
| 28276 | SDValue Res = DAG.getNode(ISD::SRL, dl, VT, R, Amt); |
| 28277 | |
| 28278 | SDValue Mask = DAG.getConstant(128 >> ShiftAmt, dl, VT); |
| 28279 | Res = DAG.getNode(ISD::XOR, dl, VT, Res, Mask); |
| 28280 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, Mask); |
| 28281 | return Res; |
| 28282 | } |
| 28283 | llvm_unreachable("Unknown shift opcode."); |
| 28284 | } |
| 28285 | |
| 28286 | return SDValue(); |
| 28287 | } |
| 28288 | |
| 28289 | static SDValue LowerScalarVariableShift(SDValue Op, SelectionDAG &DAG, |
| 28290 | const X86Subtarget &Subtarget) { |
| 28291 | MVT VT = Op.getSimpleValueType(); |
| 28292 | SDLoc dl(Op); |
| 28293 | SDValue R = Op.getOperand(0); |
| 28294 | SDValue Amt = Op.getOperand(1); |
| 28295 | unsigned Opcode = Op.getOpcode(); |
| 28296 | unsigned X86OpcI = getTargetVShiftUniformOpcode(Opcode, false); |
| 28297 | unsigned X86OpcV = getTargetVShiftUniformOpcode(Opcode, true); |
| 28298 | |
| 28299 | if (SDValue BaseShAmt = DAG.getSplatValue(Amt)) { |
| 28300 | if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Opcode)) { |
| 28301 | MVT EltVT = VT.getVectorElementType(); |
| 28302 | assert(EltVT.bitsLE(MVT::i64) && "Unexpected element type!"); |
| 28303 | if (EltVT != MVT::i64 && EltVT.bitsGT(MVT::i32)) |
| 28304 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, BaseShAmt); |
| 28305 | else if (EltVT.bitsLT(MVT::i32)) |
| 28306 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); |
| 28307 | |
| 28308 | return getTargetVShiftNode(X86OpcI, dl, VT, R, BaseShAmt, Subtarget, DAG); |
| 28309 | } |
| 28310 | |
| 28311 | |
| 28312 | if (((VT == MVT::v16i8 && !Subtarget.canExtendTo512DQ()) || |
| 28313 | (VT == MVT::v32i8 && !Subtarget.canExtendTo512BW()) || |
| 28314 | VT == MVT::v64i8) && |
| 28315 | !Subtarget.hasXOP()) { |
| 28316 | unsigned NumElts = VT.getVectorNumElements(); |
| 28317 | MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
| 28318 | if (SupportedVectorShiftWithBaseAmnt(ExtVT, Subtarget, Opcode)) { |
| 28319 | unsigned LogicalOp = (Opcode == ISD::SHL ? ISD::SHL : ISD::SRL); |
| 28320 | unsigned LogicalX86Op = getTargetVShiftUniformOpcode(LogicalOp, false); |
| 28321 | BaseShAmt = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i32, BaseShAmt); |
| 28322 | |
| 28323 | |
| 28324 | |
| 28325 | SDValue BitMask = DAG.getConstant(-1, dl, ExtVT); |
| 28326 | BitMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, BitMask, |
| 28327 | BaseShAmt, Subtarget, DAG); |
| 28328 | if (Opcode != ISD::SHL) |
| 28329 | BitMask = getTargetVShiftByConstNode(LogicalX86Op, dl, ExtVT, BitMask, |
| 28330 | 8, DAG); |
| 28331 | BitMask = DAG.getBitcast(VT, BitMask); |
| 28332 | BitMask = DAG.getVectorShuffle(VT, dl, BitMask, BitMask, |
| 28333 | SmallVector<int, 64>(NumElts, 0)); |
| 28334 | |
| 28335 | SDValue Res = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, |
| 28336 | DAG.getBitcast(ExtVT, R), BaseShAmt, |
| 28337 | Subtarget, DAG); |
| 28338 | Res = DAG.getBitcast(VT, Res); |
| 28339 | Res = DAG.getNode(ISD::AND, dl, VT, Res, BitMask); |
| 28340 | |
| 28341 | if (Opcode == ISD::SRA) { |
| 28342 | |
| 28343 | |
| 28344 | SDValue SignMask = DAG.getConstant(0x8080, dl, ExtVT); |
| 28345 | SignMask = getTargetVShiftNode(LogicalX86Op, dl, ExtVT, SignMask, |
| 28346 | BaseShAmt, Subtarget, DAG); |
| 28347 | SignMask = DAG.getBitcast(VT, SignMask); |
| 28348 | Res = DAG.getNode(ISD::XOR, dl, VT, Res, SignMask); |
| 28349 | Res = DAG.getNode(ISD::SUB, dl, VT, Res, SignMask); |
| 28350 | } |
| 28351 | return Res; |
| 28352 | } |
| 28353 | } |
| 28354 | } |
| 28355 | |
| 28356 | |
| 28357 | if (VT == MVT::v2i64 && Amt.getOpcode() == ISD::BITCAST && |
| 28358 | Amt.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) { |
| 28359 | Amt = Amt.getOperand(0); |
| 28360 | unsigned Ratio = 64 / Amt.getScalarValueSizeInBits(); |
| 28361 | std::vector<SDValue> Vals(Ratio); |
| 28362 | for (unsigned i = 0; i != Ratio; ++i) |
| 28363 | Vals[i] = Amt.getOperand(i); |
| 28364 | for (unsigned i = Ratio, e = Amt.getNumOperands(); i != e; i += Ratio) { |
| 28365 | for (unsigned j = 0; j != Ratio; ++j) |
| 28366 | if (Vals[j] != Amt.getOperand(i + j)) |
| 28367 | return SDValue(); |
| 28368 | } |
| 28369 | |
| 28370 | if (SupportedVectorShiftWithBaseAmnt(VT, Subtarget, Op.getOpcode())) |
| 28371 | return DAG.getNode(X86OpcV, dl, VT, R, Op.getOperand(1)); |
| 28372 | } |
| 28373 | return SDValue(); |
| 28374 | } |
| 28375 | |
| 28376 | |
| 28377 | static SDValue convertShiftLeftToScale(SDValue Amt, const SDLoc &dl, |
| 28378 | const X86Subtarget &Subtarget, |
| 28379 | SelectionDAG &DAG) { |
| 28380 | MVT VT = Amt.getSimpleValueType(); |
| 28381 | if (!(VT == MVT::v8i16 || VT == MVT::v4i32 || |
| 28382 | (Subtarget.hasInt256() && VT == MVT::v16i16) || |
| 28383 | (Subtarget.hasVBMI2() && VT == MVT::v32i16) || |
| 28384 | (!Subtarget.hasAVX512() && VT == MVT::v16i8))) |
| 28385 | return SDValue(); |
| 28386 | |
| 28387 | if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) { |
| 28388 | SmallVector<SDValue, 8> Elts; |
| 28389 | MVT SVT = VT.getVectorElementType(); |
| 28390 | unsigned SVTBits = SVT.getSizeInBits(); |
| 28391 | APInt One(SVTBits, 1); |
| 28392 | unsigned NumElems = VT.getVectorNumElements(); |
| 28393 | |
| 28394 | for (unsigned i = 0; i != NumElems; ++i) { |
| 28395 | SDValue Op = Amt->getOperand(i); |
| 28396 | if (Op->isUndef()) { |
| 28397 | Elts.push_back(Op); |
| 28398 | continue; |
| 28399 | } |
| 28400 | |
| 28401 | ConstantSDNode *ND = cast<ConstantSDNode>(Op); |
| 28402 | APInt C(SVTBits, ND->getZExtValue()); |
| 28403 | uint64_t ShAmt = C.getZExtValue(); |
| 28404 | if (ShAmt >= SVTBits) { |
| 28405 | Elts.push_back(DAG.getUNDEF(SVT)); |
| 28406 | continue; |
| 28407 | } |
| 28408 | Elts.push_back(DAG.getConstant(One.shl(ShAmt), dl, SVT)); |
| 28409 | } |
| 28410 | return DAG.getBuildVector(VT, dl, Elts); |
| 28411 | } |
| 28412 | |
| 28413 | |
| 28414 | |
| 28415 | if (VT == MVT::v4i32) { |
| 28416 | Amt = DAG.getNode(ISD::SHL, dl, VT, Amt, DAG.getConstant(23, dl, VT)); |
| 28417 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, |
| 28418 | DAG.getConstant(0x3f800000U, dl, VT)); |
| 28419 | Amt = DAG.getBitcast(MVT::v4f32, Amt); |
| 28420 | return DAG.getNode(ISD::FP_TO_SINT, dl, VT, Amt); |
| 28421 | } |
| 28422 | |
| 28423 | |
| 28424 | if (VT == MVT::v8i16 && !Subtarget.hasAVX2()) { |
| 28425 | SDValue Z = DAG.getConstant(0, dl, VT); |
| 28426 | SDValue Lo = DAG.getBitcast(MVT::v4i32, getUnpackl(DAG, dl, VT, Amt, Z)); |
| 28427 | SDValue Hi = DAG.getBitcast(MVT::v4i32, getUnpackh(DAG, dl, VT, Amt, Z)); |
| 28428 | Lo = convertShiftLeftToScale(Lo, dl, Subtarget, DAG); |
| 28429 | Hi = convertShiftLeftToScale(Hi, dl, Subtarget, DAG); |
| 28430 | if (Subtarget.hasSSE41()) |
| 28431 | return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); |
| 28432 | |
| 28433 | return DAG.getVectorShuffle(VT, dl, DAG.getBitcast(VT, Lo), |
| 28434 | DAG.getBitcast(VT, Hi), |
| 28435 | {0, 2, 4, 6, 8, 10, 12, 14}); |
| 28436 | } |
| 28437 | |
| 28438 | return SDValue(); |
| 28439 | } |
| 28440 | |
| 28441 | static SDValue LowerShift(SDValue Op, const X86Subtarget &Subtarget, |
| 28442 | SelectionDAG &DAG) { |
| 28443 | MVT VT = Op.getSimpleValueType(); |
| 28444 | SDLoc dl(Op); |
| 28445 | SDValue R = Op.getOperand(0); |
| 28446 | SDValue Amt = Op.getOperand(1); |
| 28447 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 28448 | bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
| 28449 | |
| 28450 | unsigned Opc = Op.getOpcode(); |
| 28451 | unsigned X86OpcV = getTargetVShiftUniformOpcode(Opc, true); |
| 28452 | unsigned X86OpcI = getTargetVShiftUniformOpcode(Opc, false); |
| 28453 | |
| 28454 | assert(VT.isVector() && "Custom lowering only for vector shifts!"); |
| 28455 | assert(Subtarget.hasSSE2() && "Only custom lower when we have SSE2!"); |
| 28456 | |
| 28457 | if (SDValue V = LowerScalarImmediateShift(Op, DAG, Subtarget)) |
| 28458 | return V; |
| 28459 | |
| 28460 | if (SDValue V = LowerScalarVariableShift(Op, DAG, Subtarget)) |
| 28461 | return V; |
| 28462 | |
| 28463 | if (SupportedVectorVarShift(VT, Subtarget, Opc)) |
| 28464 | return Op; |
| 28465 | |
| 28466 | |
| 28467 | |
| 28468 | if (Subtarget.hasXOP() && (VT == MVT::v2i64 || VT == MVT::v4i32 || |
| 28469 | VT == MVT::v8i16 || VT == MVT::v16i8)) { |
| 28470 | if (Opc == ISD::SRL || Opc == ISD::SRA) { |
| 28471 | SDValue Zero = DAG.getConstant(0, dl, VT); |
| 28472 | Amt = DAG.getNode(ISD::SUB, dl, VT, Zero, Amt); |
| 28473 | } |
| 28474 | if (Opc == ISD::SHL || Opc == ISD::SRL) |
| 28475 | return DAG.getNode(X86ISD::VPSHL, dl, VT, R, Amt); |
| 28476 | if (Opc == ISD::SRA) |
| 28477 | return DAG.getNode(X86ISD::VPSHA, dl, VT, R, Amt); |
| 28478 | } |
| 28479 | |
| 28480 | |
| 28481 | |
| 28482 | if (VT == MVT::v2i64 && Opc != ISD::SRA) { |
| 28483 | |
| 28484 | SDValue Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {0, 0}); |
| 28485 | SDValue Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Amt, {1, 1}); |
| 28486 | SDValue R0 = DAG.getNode(Opc, dl, VT, R, Amt0); |
| 28487 | SDValue R1 = DAG.getNode(Opc, dl, VT, R, Amt1); |
| 28488 | return DAG.getVectorShuffle(VT, dl, R0, R1, {0, 3}); |
| 28489 | } |
| 28490 | |
| 28491 | |
| 28492 | |
| 28493 | |
| 28494 | if ((VT == MVT::v2i64 || (VT == MVT::v4i64 && Subtarget.hasInt256())) && |
| 28495 | Opc == ISD::SRA) { |
| 28496 | SDValue S = DAG.getConstant(APInt::getSignMask(64), dl, VT); |
| 28497 | SDValue M = DAG.getNode(ISD::SRL, dl, VT, S, Amt); |
| 28498 | R = DAG.getNode(ISD::SRL, dl, VT, R, Amt); |
| 28499 | R = DAG.getNode(ISD::XOR, dl, VT, R, M); |
| 28500 | R = DAG.getNode(ISD::SUB, dl, VT, R, M); |
| 28501 | return R; |
| 28502 | } |
| 28503 | |
| 28504 | |
| 28505 | |
| 28506 | |
| 28507 | |
| 28508 | |
| 28509 | |
| 28510 | |
| 28511 | |
| 28512 | |
| 28513 | |
| 28514 | if (ConstantAmt && (VT == MVT::v8i16 || VT == MVT::v4i32 || |
| 28515 | (VT == MVT::v16i16 && Subtarget.hasInt256()))) { |
| 28516 | SDValue Amt1, Amt2; |
| 28517 | unsigned NumElts = VT.getVectorNumElements(); |
| 28518 | SmallVector<int, 8> ShuffleMask; |
| 28519 | for (unsigned i = 0; i != NumElts; ++i) { |
| 28520 | SDValue A = Amt->getOperand(i); |
| 28521 | if (A.isUndef()) { |
| 28522 | ShuffleMask.push_back(SM_SentinelUndef); |
| 28523 | continue; |
| 28524 | } |
| 28525 | if (!Amt1 || Amt1 == A) { |
| 28526 | ShuffleMask.push_back(i); |
| 28527 | Amt1 = A; |
| 28528 | continue; |
| 28529 | } |
| 28530 | if (!Amt2 || Amt2 == A) { |
| 28531 | ShuffleMask.push_back(i + NumElts); |
| 28532 | Amt2 = A; |
| 28533 | continue; |
| 28534 | } |
| 28535 | break; |
| 28536 | } |
| 28537 | |
| 28538 | |
| 28539 | if (ShuffleMask.size() == NumElts && Amt1 && Amt2 && |
| 28540 | (VT != MVT::v16i16 || |
| 28541 | is128BitLaneRepeatedShuffleMask(VT, ShuffleMask)) && |
| 28542 | (VT == MVT::v4i32 || Subtarget.hasSSE41() || Opc != ISD::SHL || |
| 28543 | canWidenShuffleElements(ShuffleMask))) { |
| 28544 | auto *Cst1 = dyn_cast<ConstantSDNode>(Amt1); |
| 28545 | auto *Cst2 = dyn_cast<ConstantSDNode>(Amt2); |
| 28546 | if (Cst1 && Cst2 && Cst1->getAPIntValue().ult(EltSizeInBits) && |
| 28547 | Cst2->getAPIntValue().ult(EltSizeInBits)) { |
| 28548 | SDValue Shift1 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, |
| 28549 | Cst1->getZExtValue(), DAG); |
| 28550 | SDValue Shift2 = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, |
| 28551 | Cst2->getZExtValue(), DAG); |
| 28552 | return DAG.getVectorShuffle(VT, dl, Shift1, Shift2, ShuffleMask); |
| 28553 | } |
| 28554 | } |
| 28555 | } |
| 28556 | |
| 28557 | |
| 28558 | |
| 28559 | if (Opc == ISD::SHL) |
| 28560 | if (SDValue Scale = convertShiftLeftToScale(Amt, dl, Subtarget, DAG)) |
| 28561 | return DAG.getNode(ISD::MUL, dl, VT, R, Scale); |
| 28562 | |
| 28563 | |
| 28564 | |
| 28565 | if (Opc == ISD::SRL && ConstantAmt && |
| 28566 | (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256()))) { |
| 28567 | SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); |
| 28568 | SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); |
| 28569 | if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { |
| 28570 | SDValue Zero = DAG.getConstant(0, dl, VT); |
| 28571 | SDValue ZAmt = DAG.getSetCC(dl, VT, Amt, Zero, ISD::SETEQ); |
| 28572 | SDValue Res = DAG.getNode(ISD::MULHU, dl, VT, R, Scale); |
| 28573 | return DAG.getSelect(dl, VT, ZAmt, R, Res); |
| 28574 | } |
| 28575 | } |
| 28576 | |
| 28577 | |
| 28578 | |
| 28579 | |
| 28580 | |
| 28581 | if (Opc == ISD::SRA && ConstantAmt && |
| 28582 | (VT == MVT::v8i16 || (VT == MVT::v16i16 && Subtarget.hasInt256())) && |
| 28583 | ((Subtarget.hasSSE41() && !Subtarget.hasXOP() && |
| 28584 | !Subtarget.hasAVX512()) || |
| 28585 | DAG.isKnownNeverZero(Amt))) { |
| 28586 | SDValue EltBits = DAG.getConstant(EltSizeInBits, dl, VT); |
| 28587 | SDValue RAmt = DAG.getNode(ISD::SUB, dl, VT, EltBits, Amt); |
| 28588 | if (SDValue Scale = convertShiftLeftToScale(RAmt, dl, Subtarget, DAG)) { |
| 28589 | SDValue Amt0 = |
| 28590 | DAG.getSetCC(dl, VT, Amt, DAG.getConstant(0, dl, VT), ISD::SETEQ); |
| 28591 | SDValue Amt1 = |
| 28592 | DAG.getSetCC(dl, VT, Amt, DAG.getConstant(1, dl, VT), ISD::SETEQ); |
| 28593 | SDValue Sra1 = |
| 28594 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, R, 1, DAG); |
| 28595 | SDValue Res = DAG.getNode(ISD::MULHS, dl, VT, R, Scale); |
| 28596 | Res = DAG.getSelect(dl, VT, Amt0, R, Res); |
| 28597 | return DAG.getSelect(dl, VT, Amt1, Sra1, Res); |
| 28598 | } |
| 28599 | } |
| 28600 | |
| 28601 | |
| 28602 | |
| 28603 | |
| 28604 | |
| 28605 | |
| 28606 | if (VT == MVT::v4i32) { |
| 28607 | SDValue Amt0, Amt1, Amt2, Amt3; |
| 28608 | if (ConstantAmt) { |
| 28609 | Amt0 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {0, 0, 0, 0}); |
| 28610 | Amt1 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {1, 1, 1, 1}); |
| 28611 | Amt2 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {2, 2, 2, 2}); |
| 28612 | Amt3 = DAG.getVectorShuffle(VT, dl, Amt, DAG.getUNDEF(VT), {3, 3, 3, 3}); |
| 28613 | } else { |
| 28614 | |
| 28615 | |
| 28616 | |
| 28617 | |
| 28618 | if (Subtarget.hasAVX()) { |
| 28619 | SDValue Z = DAG.getConstant(0, dl, VT); |
| 28620 | Amt0 = DAG.getVectorShuffle(VT, dl, Amt, Z, {0, 4, -1, -1}); |
| 28621 | Amt1 = DAG.getVectorShuffle(VT, dl, Amt, Z, {1, 5, -1, -1}); |
| 28622 | Amt2 = DAG.getVectorShuffle(VT, dl, Amt, Z, {2, 6, -1, -1}); |
| 28623 | Amt3 = DAG.getVectorShuffle(VT, dl, Amt, Z, {3, 7, -1, -1}); |
| 28624 | } else { |
| 28625 | SDValue Amt01 = DAG.getBitcast(MVT::v8i16, Amt); |
| 28626 | SDValue Amt23 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
| 28627 | {4, 5, 6, 7, -1, -1, -1, -1}); |
| 28628 | Amt0 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
| 28629 | {0, 1, 1, 1, -1, -1, -1, -1}); |
| 28630 | Amt1 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt01, Amt01, |
| 28631 | {2, 3, 3, 3, -1, -1, -1, -1}); |
| 28632 | Amt2 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, |
| 28633 | {0, 1, 1, 1, -1, -1, -1, -1}); |
| 28634 | Amt3 = DAG.getVectorShuffle(MVT::v8i16, dl, Amt23, Amt23, |
| 28635 | {2, 3, 3, 3, -1, -1, -1, -1}); |
| 28636 | } |
| 28637 | } |
| 28638 | |
| 28639 | unsigned ShOpc = ConstantAmt ? Opc : X86OpcV; |
| 28640 | SDValue R0 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt0)); |
| 28641 | SDValue R1 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt1)); |
| 28642 | SDValue R2 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt2)); |
| 28643 | SDValue R3 = DAG.getNode(ShOpc, dl, VT, R, DAG.getBitcast(VT, Amt3)); |
| 28644 | |
| 28645 | |
| 28646 | |
| 28647 | if (Subtarget.hasSSE41()) { |
| 28648 | SDValue R02 = DAG.getVectorShuffle(VT, dl, R0, R2, {0, -1, 6, -1}); |
| 28649 | SDValue R13 = DAG.getVectorShuffle(VT, dl, R1, R3, {-1, 1, -1, 7}); |
| 28650 | return DAG.getVectorShuffle(VT, dl, R02, R13, {0, 5, 2, 7}); |
| 28651 | } |
| 28652 | SDValue R01 = DAG.getVectorShuffle(VT, dl, R0, R1, {0, -1, -1, 5}); |
| 28653 | SDValue R23 = DAG.getVectorShuffle(VT, dl, R2, R3, {2, -1, -1, 7}); |
| 28654 | return DAG.getVectorShuffle(VT, dl, R01, R23, {0, 3, 4, 7}); |
| 28655 | } |
| 28656 | |
| 28657 | |
| 28658 | |
| 28659 | |
| 28660 | |
| 28661 | if ((Subtarget.hasInt256() && VT == MVT::v8i16) || |
| 28662 | (Subtarget.canExtendTo512DQ() && VT == MVT::v16i16) || |
| 28663 | (Subtarget.canExtendTo512DQ() && VT == MVT::v16i8) || |
| 28664 | (Subtarget.canExtendTo512BW() && VT == MVT::v32i8) || |
| 28665 | (Subtarget.hasBWI() && Subtarget.hasVLX() && VT == MVT::v16i8)) { |
| 28666 | assert((!Subtarget.hasBWI() || VT == MVT::v32i8 || VT == MVT::v16i8) && |
| 28667 | "Unexpected vector type"); |
| 28668 | MVT EvtSVT = Subtarget.hasBWI() ? MVT::i16 : MVT::i32; |
| 28669 | MVT ExtVT = MVT::getVectorVT(EvtSVT, VT.getVectorNumElements()); |
| 28670 | unsigned ExtOpc = Opc == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| 28671 | R = DAG.getNode(ExtOpc, dl, ExtVT, R); |
| 28672 | Amt = DAG.getNode(ISD::ZERO_EXTEND, dl, ExtVT, Amt); |
| 28673 | return DAG.getNode(ISD::TRUNCATE, dl, VT, |
| 28674 | DAG.getNode(Opc, dl, ExtVT, R, Amt)); |
| 28675 | } |
| 28676 | |
| 28677 | |
| 28678 | |
| 28679 | if (ConstantAmt && (Opc == ISD::SRA || Opc == ISD::SRL) && |
| 28680 | (VT == MVT::v16i8 || (VT == MVT::v32i8 && Subtarget.hasInt256()) || |
| 28681 | (VT == MVT::v64i8 && Subtarget.hasBWI())) && |
| 28682 | !Subtarget.hasXOP()) { |
| 28683 | int NumElts = VT.getVectorNumElements(); |
| 28684 | SDValue Cst8 = DAG.getTargetConstant(8, dl, MVT::i8); |
| 28685 | |
| 28686 | |
| 28687 | |
| 28688 | MVT ExVT = MVT::getVectorVT(MVT::i16, NumElts); |
| 28689 | Amt = DAG.getZExtOrTrunc(Amt, dl, ExVT); |
| 28690 | Amt = DAG.getNode(ISD::SUB, dl, ExVT, DAG.getConstant(8, dl, ExVT), Amt); |
| 28691 | Amt = DAG.getNode(ISD::SHL, dl, ExVT, DAG.getConstant(1, dl, ExVT), Amt); |
| 28692 | assert(ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()) && |
| 28693 | "Constant build vector expected"); |
| 28694 | |
| 28695 | if (VT == MVT::v16i8 && Subtarget.hasInt256()) { |
| 28696 | R = Opc == ISD::SRA ? DAG.getSExtOrTrunc(R, dl, ExVT) |
| 28697 | : DAG.getZExtOrTrunc(R, dl, ExVT); |
| 28698 | R = DAG.getNode(ISD::MUL, dl, ExVT, R, Amt); |
| 28699 | R = DAG.getNode(X86ISD::VSRLI, dl, ExVT, R, Cst8); |
| 28700 | return DAG.getZExtOrTrunc(R, dl, VT); |
| 28701 | } |
| 28702 | |
| 28703 | SmallVector<SDValue, 16> LoAmt, HiAmt; |
| 28704 | for (int i = 0; i != NumElts; i += 16) { |
| 28705 | for (int j = 0; j != 8; ++j) { |
| 28706 | LoAmt.push_back(Amt.getOperand(i + j)); |
| 28707 | HiAmt.push_back(Amt.getOperand(i + j + 8)); |
| 28708 | } |
| 28709 | } |
| 28710 | |
| 28711 | MVT VT16 = MVT::getVectorVT(MVT::i16, NumElts / 2); |
| 28712 | SDValue LoA = DAG.getBuildVector(VT16, dl, LoAmt); |
| 28713 | SDValue HiA = DAG.getBuildVector(VT16, dl, HiAmt); |
| 28714 | |
| 28715 | SDValue LoR = DAG.getBitcast(VT16, getUnpackl(DAG, dl, VT, R, R)); |
| 28716 | SDValue HiR = DAG.getBitcast(VT16, getUnpackh(DAG, dl, VT, R, R)); |
| 28717 | LoR = DAG.getNode(X86OpcI, dl, VT16, LoR, Cst8); |
| 28718 | HiR = DAG.getNode(X86OpcI, dl, VT16, HiR, Cst8); |
| 28719 | LoR = DAG.getNode(ISD::MUL, dl, VT16, LoR, LoA); |
| 28720 | HiR = DAG.getNode(ISD::MUL, dl, VT16, HiR, HiA); |
| 28721 | LoR = DAG.getNode(X86ISD::VSRLI, dl, VT16, LoR, Cst8); |
| 28722 | HiR = DAG.getNode(X86ISD::VSRLI, dl, VT16, HiR, Cst8); |
| 28723 | return DAG.getNode(X86ISD::PACKUS, dl, VT, LoR, HiR); |
| 28724 | } |
| 28725 | |
| 28726 | if (VT == MVT::v16i8 || |
| 28727 | (VT == MVT::v32i8 && Subtarget.hasInt256() && !Subtarget.hasXOP()) || |
| 28728 | (VT == MVT::v64i8 && Subtarget.hasBWI())) { |
| 28729 | MVT ExtVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements() / 2); |
| 28730 | |
| 28731 | auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { |
| 28732 | if (VT.is512BitVector()) { |
| 28733 | |
| 28734 | |
| 28735 | |
| 28736 | MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); |
| 28737 | V0 = DAG.getBitcast(VT, V0); |
| 28738 | V1 = DAG.getBitcast(VT, V1); |
| 28739 | Sel = DAG.getBitcast(VT, Sel); |
| 28740 | Sel = DAG.getSetCC(dl, MaskVT, DAG.getConstant(0, dl, VT), Sel, |
| 28741 | ISD::SETGT); |
| 28742 | return DAG.getBitcast(SelVT, DAG.getSelect(dl, VT, Sel, V0, V1)); |
| 28743 | } else if (Subtarget.hasSSE41()) { |
| 28744 | |
| 28745 | |
| 28746 | V0 = DAG.getBitcast(VT, V0); |
| 28747 | V1 = DAG.getBitcast(VT, V1); |
| 28748 | Sel = DAG.getBitcast(VT, Sel); |
| 28749 | return DAG.getBitcast(SelVT, |
| 28750 | DAG.getNode(X86ISD::BLENDV, dl, VT, Sel, V0, V1)); |
| 28751 | } |
| 28752 | |
| 28753 | |
| 28754 | |
| 28755 | SDValue Z = DAG.getConstant(0, dl, SelVT); |
| 28756 | SDValue C = DAG.getNode(X86ISD::PCMPGT, dl, SelVT, Z, Sel); |
| 28757 | return DAG.getSelect(dl, SelVT, C, V0, V1); |
| 28758 | }; |
| 28759 | |
| 28760 | |
| 28761 | |
| 28762 | |
| 28763 | Amt = DAG.getBitcast(ExtVT, Amt); |
| 28764 | Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, ExtVT, Amt, 5, DAG); |
| 28765 | Amt = DAG.getBitcast(VT, Amt); |
| 28766 | |
| 28767 | if (Opc == ISD::SHL || Opc == ISD::SRL) { |
| 28768 | |
| 28769 | SDValue M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(4, dl, VT)); |
| 28770 | R = SignBitSelect(VT, Amt, M, R); |
| 28771 | |
| 28772 | |
| 28773 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
| 28774 | |
| 28775 | |
| 28776 | M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(2, dl, VT)); |
| 28777 | R = SignBitSelect(VT, Amt, M, R); |
| 28778 | |
| 28779 | |
| 28780 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
| 28781 | |
| 28782 | |
| 28783 | M = DAG.getNode(Opc, dl, VT, R, DAG.getConstant(1, dl, VT)); |
| 28784 | R = SignBitSelect(VT, Amt, M, R); |
| 28785 | return R; |
| 28786 | } |
| 28787 | |
| 28788 | if (Opc == ISD::SRA) { |
| 28789 | |
| 28790 | |
| 28791 | |
| 28792 | SDValue ALo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), Amt); |
| 28793 | SDValue AHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), Amt); |
| 28794 | SDValue RLo = getUnpackl(DAG, dl, VT, DAG.getUNDEF(VT), R); |
| 28795 | SDValue RHi = getUnpackh(DAG, dl, VT, DAG.getUNDEF(VT), R); |
| 28796 | ALo = DAG.getBitcast(ExtVT, ALo); |
| 28797 | AHi = DAG.getBitcast(ExtVT, AHi); |
| 28798 | RLo = DAG.getBitcast(ExtVT, RLo); |
| 28799 | RHi = DAG.getBitcast(ExtVT, RHi); |
| 28800 | |
| 28801 | |
| 28802 | SDValue MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 4, DAG); |
| 28803 | SDValue MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 4, DAG); |
| 28804 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
| 28805 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
| 28806 | |
| 28807 | |
| 28808 | ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); |
| 28809 | AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); |
| 28810 | |
| 28811 | |
| 28812 | MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 2, DAG); |
| 28813 | MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 2, DAG); |
| 28814 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
| 28815 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
| 28816 | |
| 28817 | |
| 28818 | ALo = DAG.getNode(ISD::ADD, dl, ExtVT, ALo, ALo); |
| 28819 | AHi = DAG.getNode(ISD::ADD, dl, ExtVT, AHi, AHi); |
| 28820 | |
| 28821 | |
| 28822 | MLo = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RLo, 1, DAG); |
| 28823 | MHi = getTargetVShiftByConstNode(X86OpcI, dl, ExtVT, RHi, 1, DAG); |
| 28824 | RLo = SignBitSelect(ExtVT, ALo, MLo, RLo); |
| 28825 | RHi = SignBitSelect(ExtVT, AHi, MHi, RHi); |
| 28826 | |
| 28827 | |
| 28828 | |
| 28829 | RLo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RLo, 8, DAG); |
| 28830 | RHi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, RHi, 8, DAG); |
| 28831 | return DAG.getNode(X86ISD::PACKUS, dl, VT, RLo, RHi); |
| 28832 | } |
| 28833 | } |
| 28834 | |
| 28835 | if (Subtarget.hasInt256() && !Subtarget.hasXOP() && VT == MVT::v16i16) { |
| 28836 | MVT ExtVT = MVT::v8i32; |
| 28837 | SDValue Z = DAG.getConstant(0, dl, VT); |
| 28838 | SDValue ALo = getUnpackl(DAG, dl, VT, Amt, Z); |
| 28839 | SDValue AHi = getUnpackh(DAG, dl, VT, Amt, Z); |
| 28840 | SDValue RLo = getUnpackl(DAG, dl, VT, Z, R); |
| 28841 | SDValue RHi = getUnpackh(DAG, dl, VT, Z, R); |
| 28842 | ALo = DAG.getBitcast(ExtVT, ALo); |
| 28843 | AHi = DAG.getBitcast(ExtVT, AHi); |
| 28844 | RLo = DAG.getBitcast(ExtVT, RLo); |
| 28845 | RHi = DAG.getBitcast(ExtVT, RHi); |
| 28846 | SDValue Lo = DAG.getNode(Opc, dl, ExtVT, RLo, ALo); |
| 28847 | SDValue Hi = DAG.getNode(Opc, dl, ExtVT, RHi, AHi); |
| 28848 | Lo = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Lo, 16, DAG); |
| 28849 | Hi = getTargetVShiftByConstNode(X86ISD::VSRLI, dl, ExtVT, Hi, 16, DAG); |
| 28850 | return DAG.getNode(X86ISD::PACKUS, dl, VT, Lo, Hi); |
| 28851 | } |
| 28852 | |
| 28853 | if (VT == MVT::v8i16) { |
| 28854 | |
| 28855 | |
| 28856 | bool UseSSE41 = Subtarget.hasSSE41() && |
| 28857 | !ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
| 28858 | |
| 28859 | auto SignBitSelect = [&](SDValue Sel, SDValue V0, SDValue V1) { |
| 28860 | |
| 28861 | |
| 28862 | if (UseSSE41) { |
| 28863 | MVT ExtVT = MVT::getVectorVT(MVT::i8, VT.getVectorNumElements() * 2); |
| 28864 | V0 = DAG.getBitcast(ExtVT, V0); |
| 28865 | V1 = DAG.getBitcast(ExtVT, V1); |
| 28866 | Sel = DAG.getBitcast(ExtVT, Sel); |
| 28867 | return DAG.getBitcast( |
| 28868 | VT, DAG.getNode(X86ISD::BLENDV, dl, ExtVT, Sel, V0, V1)); |
| 28869 | } |
| 28870 | |
| 28871 | |
| 28872 | |
| 28873 | SDValue C = |
| 28874 | getTargetVShiftByConstNode(X86ISD::VSRAI, dl, VT, Sel, 15, DAG); |
| 28875 | return DAG.getSelect(dl, VT, C, V0, V1); |
| 28876 | }; |
| 28877 | |
| 28878 | |
| 28879 | if (UseSSE41) { |
| 28880 | |
| 28881 | |
| 28882 | Amt = DAG.getNode( |
| 28883 | ISD::OR, dl, VT, |
| 28884 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 4, DAG), |
| 28885 | getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG)); |
| 28886 | } else { |
| 28887 | Amt = getTargetVShiftByConstNode(X86ISD::VSHLI, dl, VT, Amt, 12, DAG); |
| 28888 | } |
| 28889 | |
| 28890 | |
| 28891 | SDValue M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 8, DAG); |
| 28892 | R = SignBitSelect(Amt, M, R); |
| 28893 | |
| 28894 | |
| 28895 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
| 28896 | |
| 28897 | |
| 28898 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 4, DAG); |
| 28899 | R = SignBitSelect(Amt, M, R); |
| 28900 | |
| 28901 | |
| 28902 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
| 28903 | |
| 28904 | |
| 28905 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 2, DAG); |
| 28906 | R = SignBitSelect(Amt, M, R); |
| 28907 | |
| 28908 | |
| 28909 | Amt = DAG.getNode(ISD::ADD, dl, VT, Amt, Amt); |
| 28910 | |
| 28911 | |
| 28912 | M = getTargetVShiftByConstNode(X86OpcI, dl, VT, R, 1, DAG); |
| 28913 | R = SignBitSelect(Amt, M, R); |
| 28914 | return R; |
| 28915 | } |
| 28916 | |
| 28917 | |
| 28918 | if (VT.is256BitVector()) |
| 28919 | return splitVectorIntBinary(Op, DAG); |
| 28920 | |
| 28921 | if (VT == MVT::v32i16 || VT == MVT::v64i8) |
| 28922 | return splitVectorIntBinary(Op, DAG); |
| 28923 | |
| 28924 | return SDValue(); |
| 28925 | } |
| 28926 | |
| 28927 | static SDValue LowerRotate(SDValue Op, const X86Subtarget &Subtarget, |
| 28928 | SelectionDAG &DAG) { |
| 28929 | MVT VT = Op.getSimpleValueType(); |
| 28930 | assert(VT.isVector() && "Custom lowering only for vector rotates!"); |
| 28931 | |
| 28932 | SDLoc DL(Op); |
| 28933 | SDValue R = Op.getOperand(0); |
| 28934 | SDValue Amt = Op.getOperand(1); |
| 28935 | unsigned Opcode = Op.getOpcode(); |
| 28936 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 28937 | int NumElts = VT.getVectorNumElements(); |
| 28938 | |
| 28939 | |
| 28940 | APInt CstSplatValue; |
| 28941 | bool IsCstSplat = X86::isConstantSplat(Amt, CstSplatValue); |
| 28942 | |
| 28943 | |
| 28944 | if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0) |
| 28945 | return R; |
| 28946 | |
| 28947 | |
| 28948 | if (Subtarget.hasAVX512() && 32 <= EltSizeInBits) { |
| 28949 | |
| 28950 | if (IsCstSplat) { |
| 28951 | unsigned RotOpc = (Opcode == ISD::ROTL ? X86ISD::VROTLI : X86ISD::VROTRI); |
| 28952 | uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits); |
| 28953 | return DAG.getNode(RotOpc, DL, VT, R, |
| 28954 | DAG.getTargetConstant(RotAmt, DL, MVT::i8)); |
| 28955 | } |
| 28956 | |
| 28957 | |
| 28958 | return Op; |
| 28959 | } |
| 28960 | |
| 28961 | |
| 28962 | if (Subtarget.hasVBMI2() && 16 == EltSizeInBits) { |
| 28963 | unsigned FunnelOpc = (Opcode == ISD::ROTL ? ISD::FSHL : ISD::FSHR); |
| 28964 | return DAG.getNode(FunnelOpc, DL, VT, R, R, Amt); |
| 28965 | } |
| 28966 | |
| 28967 | assert((Opcode == ISD::ROTL) && "Only ROTL supported"); |
| 28968 | |
| 28969 | |
| 28970 | |
| 28971 | |
| 28972 | if (Subtarget.hasXOP()) { |
| 28973 | if (VT.is256BitVector()) |
| 28974 | return splitVectorIntBinary(Op, DAG); |
| 28975 | assert(VT.is128BitVector() && "Only rotate 128-bit vectors!"); |
| 28976 | |
| 28977 | |
| 28978 | if (IsCstSplat) { |
| 28979 | uint64_t RotAmt = CstSplatValue.urem(EltSizeInBits); |
| 28980 | return DAG.getNode(X86ISD::VROTLI, DL, VT, R, |
| 28981 | DAG.getTargetConstant(RotAmt, DL, MVT::i8)); |
| 28982 | } |
| 28983 | |
| 28984 | |
| 28985 | return Op; |
| 28986 | } |
| 28987 | |
| 28988 | |
| 28989 | if (VT.is256BitVector() && !Subtarget.hasAVX2()) |
| 28990 | return splitVectorIntBinary(Op, DAG); |
| 28991 | |
| 28992 | assert((VT == MVT::v4i32 || VT == MVT::v8i16 || VT == MVT::v16i8 || |
| 28993 | ((VT == MVT::v8i32 || VT == MVT::v16i16 || VT == MVT::v32i8 || |
| 28994 | VT == MVT::v32i16) && |
| 28995 | Subtarget.hasAVX2())) && |
| 28996 | "Only vXi32/vXi16/vXi8 vector rotates supported"); |
| 28997 | |
| 28998 | |
| 28999 | if (IsCstSplat) |
| 29000 | return SDValue(); |
| 29001 | |
| 29002 | bool IsSplatAmt = DAG.isSplatValue(Amt); |
| 29003 | |
| 29004 | |
| 29005 | |
| 29006 | if (EltSizeInBits == 8 && !IsSplatAmt) { |
| 29007 | if (ISD::isBuildVectorOfConstantSDNodes(Amt.getNode())) |
| 29008 | return SDValue(); |
| 29009 | |
| 29010 | |
| 29011 | MVT ExtVT = MVT::getVectorVT(MVT::i16, NumElts / 2); |
| 29012 | |
| 29013 | auto SignBitSelect = [&](MVT SelVT, SDValue Sel, SDValue V0, SDValue V1) { |
| 29014 | if (Subtarget.hasSSE41()) { |
| 29015 | |
| 29016 | |
| 29017 | V0 = DAG.getBitcast(VT, V0); |
| 29018 | V1 = DAG.getBitcast(VT, V1); |
| 29019 | Sel = DAG.getBitcast(VT, Sel); |
| 29020 | return DAG.getBitcast(SelVT, |
| 29021 | DAG.getNode(X86ISD::BLENDV, DL, VT, Sel, V0, V1)); |
| 29022 | } |
| 29023 | |
| 29024 | |
| 29025 | |
| 29026 | SDValue Z = DAG.getConstant(0, DL, SelVT); |
| 29027 | SDValue C = DAG.getNode(X86ISD::PCMPGT, DL, SelVT, Z, Sel); |
| 29028 | return DAG.getSelect(DL, SelVT, C, V0, V1); |
| 29029 | }; |
| 29030 | |
| 29031 | |
| 29032 | |
| 29033 | |
| 29034 | Amt = DAG.getBitcast(ExtVT, Amt); |
| 29035 | Amt = DAG.getNode(ISD::SHL, DL, ExtVT, Amt, DAG.getConstant(5, DL, ExtVT)); |
| 29036 | Amt = DAG.getBitcast(VT, Amt); |
| 29037 | |
| 29038 | |
| 29039 | SDValue M; |
| 29040 | M = DAG.getNode( |
| 29041 | ISD::OR, DL, VT, |
| 29042 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(4, DL, VT)), |
| 29043 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(4, DL, VT))); |
| 29044 | R = SignBitSelect(VT, Amt, M, R); |
| 29045 | |
| 29046 | |
| 29047 | Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); |
| 29048 | |
| 29049 | |
| 29050 | M = DAG.getNode( |
| 29051 | ISD::OR, DL, VT, |
| 29052 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(2, DL, VT)), |
| 29053 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(6, DL, VT))); |
| 29054 | R = SignBitSelect(VT, Amt, M, R); |
| 29055 | |
| 29056 | |
| 29057 | Amt = DAG.getNode(ISD::ADD, DL, VT, Amt, Amt); |
| 29058 | |
| 29059 | |
| 29060 | M = DAG.getNode( |
| 29061 | ISD::OR, DL, VT, |
| 29062 | DAG.getNode(ISD::SHL, DL, VT, R, DAG.getConstant(1, DL, VT)), |
| 29063 | DAG.getNode(ISD::SRL, DL, VT, R, DAG.getConstant(7, DL, VT))); |
| 29064 | return SignBitSelect(VT, Amt, M, R); |
| 29065 | } |
| 29066 | |
| 29067 | |
| 29068 | if (SDValue BaseRotAmt = DAG.getSplatValue(Amt)) { |
| 29069 | |
| 29070 | |
| 29071 | Amt = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, BaseRotAmt); |
| 29072 | Amt = DAG.getNode(ISD::AND, DL, VT, Amt, |
| 29073 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
| 29074 | Amt = DAG.getVectorShuffle(VT, DL, Amt, DAG.getUNDEF(VT), |
| 29075 | SmallVector<int>(NumElts, 0)); |
| 29076 | } else { |
| 29077 | Amt = DAG.getNode(ISD::AND, DL, VT, Amt, |
| 29078 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
| 29079 | } |
| 29080 | |
| 29081 | bool ConstantAmt = ISD::isBuildVectorOfConstantSDNodes(Amt.getNode()); |
| 29082 | bool LegalVarShifts = SupportedVectorVarShift(VT, Subtarget, ISD::SHL) && |
| 29083 | SupportedVectorVarShift(VT, Subtarget, ISD::SRL); |
| 29084 | |
| 29085 | |
| 29086 | |
| 29087 | if (IsSplatAmt || LegalVarShifts || (Subtarget.hasAVX2() && !ConstantAmt)) { |
| 29088 | SDValue AmtR = DAG.getConstant(EltSizeInBits, DL, VT); |
| 29089 | AmtR = DAG.getNode(ISD::SUB, DL, VT, AmtR, Amt); |
| 29090 | SDValue SHL = DAG.getNode(ISD::SHL, DL, VT, R, Amt); |
| 29091 | SDValue SRL = DAG.getNode(ISD::SRL, DL, VT, R, AmtR); |
| 29092 | return DAG.getNode(ISD::OR, DL, VT, SHL, SRL); |
| 29093 | } |
| 29094 | |
| 29095 | |
| 29096 | SDValue Scale = convertShiftLeftToScale(Amt, DL, Subtarget, DAG); |
| 29097 | assert(Scale && "Failed to convert ROTL amount to scale"); |
| 29098 | |
| 29099 | |
| 29100 | if (EltSizeInBits == 16) { |
| 29101 | SDValue Lo = DAG.getNode(ISD::MUL, DL, VT, R, Scale); |
| 29102 | SDValue Hi = DAG.getNode(ISD::MULHU, DL, VT, R, Scale); |
| 29103 | return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); |
| 29104 | } |
| 29105 | |
| 29106 | |
| 29107 | |
| 29108 | |
| 29109 | assert(VT == MVT::v4i32 && "Only v4i32 vector rotate expected"); |
| 29110 | static const int OddMask[] = {1, -1, 3, -1}; |
| 29111 | SDValue R13 = DAG.getVectorShuffle(VT, DL, R, R, OddMask); |
| 29112 | SDValue Scale13 = DAG.getVectorShuffle(VT, DL, Scale, Scale, OddMask); |
| 29113 | |
| 29114 | SDValue Res02 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, |
| 29115 | DAG.getBitcast(MVT::v2i64, R), |
| 29116 | DAG.getBitcast(MVT::v2i64, Scale)); |
| 29117 | SDValue Res13 = DAG.getNode(X86ISD::PMULUDQ, DL, MVT::v2i64, |
| 29118 | DAG.getBitcast(MVT::v2i64, R13), |
| 29119 | DAG.getBitcast(MVT::v2i64, Scale13)); |
| 29120 | Res02 = DAG.getBitcast(VT, Res02); |
| 29121 | Res13 = DAG.getBitcast(VT, Res13); |
| 29122 | |
| 29123 | return DAG.getNode(ISD::OR, DL, VT, |
| 29124 | DAG.getVectorShuffle(VT, DL, Res02, Res13, {0, 4, 2, 6}), |
| 29125 | DAG.getVectorShuffle(VT, DL, Res02, Res13, {1, 5, 3, 7})); |
| 29126 | } |
| 29127 | |
| 29128 | |
| 29129 | |
| 29130 | |
| 29131 | |
| 29132 | bool X86TargetLowering::needsCmpXchgNb(Type *MemType) const { |
| 29133 | unsigned OpWidth = MemType->getPrimitiveSizeInBits(); |
| 29134 | |
| 29135 | if (OpWidth == 64) |
| 29136 | return Subtarget.hasCmpxchg8b() && !Subtarget.is64Bit(); |
| 29137 | if (OpWidth == 128) |
| 29138 | return Subtarget.hasCmpxchg16b(); |
| 29139 | |
| 29140 | return false; |
| 29141 | } |
| 29142 | |
| 29143 | bool X86TargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
| 29144 | Type *MemType = SI->getValueOperand()->getType(); |
| 29145 | |
| 29146 | bool NoImplicitFloatOps = |
| 29147 | SI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); |
| 29148 | if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && |
| 29149 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && |
| 29150 | (Subtarget.hasSSE1() || Subtarget.hasX87())) |
| 29151 | return false; |
| 29152 | |
| 29153 | return needsCmpXchgNb(MemType); |
| 29154 | } |
| 29155 | |
| 29156 | |
| 29157 | |
| 29158 | TargetLowering::AtomicExpansionKind |
| 29159 | X86TargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
| 29160 | Type *MemType = LI->getType(); |
| 29161 | |
| 29162 | |
| 29163 | |
| 29164 | |
| 29165 | bool NoImplicitFloatOps = |
| 29166 | LI->getFunction()->hasFnAttribute(Attribute::NoImplicitFloat); |
| 29167 | if (MemType->getPrimitiveSizeInBits() == 64 && !Subtarget.is64Bit() && |
| 29168 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && |
| 29169 | (Subtarget.hasSSE1() || Subtarget.hasX87())) |
| 29170 | return AtomicExpansionKind::None; |
| 29171 | |
| 29172 | return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg |
| 29173 | : AtomicExpansionKind::None; |
| 29174 | } |
| 29175 | |
| 29176 | TargetLowering::AtomicExpansionKind |
| 29177 | X86TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { |
| 29178 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
| 29179 | Type *MemType = AI->getType(); |
| 29180 | |
| 29181 | |
| 29182 | |
| 29183 | if (MemType->getPrimitiveSizeInBits() > NativeWidth) { |
| 29184 | return needsCmpXchgNb(MemType) ? AtomicExpansionKind::CmpXChg |
| 29185 | : AtomicExpansionKind::None; |
| 29186 | } |
| 29187 | |
| 29188 | AtomicRMWInst::BinOp Op = AI->getOperation(); |
| 29189 | switch (Op) { |
| 29190 | default: |
| 29191 | llvm_unreachable("Unknown atomic operation"); |
| 29192 | case AtomicRMWInst::Xchg: |
| 29193 | case AtomicRMWInst::Add: |
| 29194 | case AtomicRMWInst::Sub: |
| 29195 | |
| 29196 | return AtomicExpansionKind::None; |
| 29197 | case AtomicRMWInst::Or: |
| 29198 | case AtomicRMWInst::And: |
| 29199 | case AtomicRMWInst::Xor: |
| 29200 | |
| 29201 | |
| 29202 | return !AI->use_empty() ? AtomicExpansionKind::CmpXChg |
| 29203 | : AtomicExpansionKind::None; |
| 29204 | case AtomicRMWInst::Nand: |
| 29205 | case AtomicRMWInst::Max: |
| 29206 | case AtomicRMWInst::Min: |
| 29207 | case AtomicRMWInst::UMax: |
| 29208 | case AtomicRMWInst::UMin: |
| 29209 | case AtomicRMWInst::FAdd: |
| 29210 | case AtomicRMWInst::FSub: |
| 29211 | |
| 29212 | |
| 29213 | return AtomicExpansionKind::CmpXChg; |
| 29214 | } |
| 29215 | } |
| 29216 | |
| 29217 | LoadInst * |
| 29218 | X86TargetLowering::lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *AI) const { |
| 29219 | unsigned NativeWidth = Subtarget.is64Bit() ? 64 : 32; |
| 29220 | Type *MemType = AI->getType(); |
| 29221 | |
| 29222 | |
| 29223 | |
| 29224 | if (MemType->getPrimitiveSizeInBits() > NativeWidth) |
| 29225 | return nullptr; |
| 29226 | |
| 29227 | |
| 29228 | |
| 29229 | |
| 29230 | if (auto *C = dyn_cast<ConstantInt>(AI->getValOperand())) |
| 29231 | if (AI->getOperation() == AtomicRMWInst::Or && C->isZero() && |
| 29232 | AI->use_empty()) |
| 29233 | return nullptr; |
| 29234 | |
| 29235 | IRBuilder<> Builder(AI); |
| 29236 | Module *M = Builder.GetInsertBlock()->getParent()->getParent(); |
| 29237 | auto SSID = AI->getSyncScopeID(); |
| 29238 | |
| 29239 | |
| 29240 | auto Order = AtomicCmpXchgInst::getStrongestFailureOrdering(AI->getOrdering()); |
| 29241 | |
| 29242 | |
| 29243 | |
| 29244 | |
| 29245 | |
| 29246 | |
| 29247 | |
| 29248 | |
| 29249 | |
| 29250 | |
| 29251 | |
| 29252 | |
| 29253 | |
| 29254 | |
| 29255 | |
| 29256 | |
| 29257 | |
| 29258 | if (SSID == SyncScope::SingleThread) |
| 29259 | |
| 29260 | |
| 29261 | return nullptr; |
| 29262 | |
| 29263 | if (!Subtarget.hasMFence()) |
| 29264 | |
| 29265 | |
| 29266 | |
| 29267 | |
| 29268 | return nullptr; |
| 29269 | |
| 29270 | Function *MFence = |
| 29271 | llvm::Intrinsic::getDeclaration(M, Intrinsic::x86_sse2_mfence); |
| 29272 | Builder.CreateCall(MFence, {}); |
| 29273 | |
| 29274 | |
| 29275 | LoadInst *Loaded = Builder.CreateAlignedLoad( |
| 29276 | AI->getType(), AI->getPointerOperand(), AI->getAlign()); |
| 29277 | Loaded->setAtomic(Order, SSID); |
| 29278 | AI->replaceAllUsesWith(Loaded); |
| 29279 | AI->eraseFromParent(); |
| 29280 | return Loaded; |
| 29281 | } |
| 29282 | |
| 29283 | bool X86TargetLowering::lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { |
| 29284 | if (!SI.isUnordered()) |
| 29285 | return false; |
| 29286 | return ExperimentalUnorderedISEL; |
| 29287 | } |
| 29288 | bool X86TargetLowering::lowerAtomicLoadAsLoadSDNode(const LoadInst &LI) const { |
| 29289 | if (!LI.isUnordered()) |
| 29290 | return false; |
| 29291 | return ExperimentalUnorderedISEL; |
| 29292 | } |
| 29293 | |
| 29294 | |
| 29295 | |
| 29296 | |
| 29297 | |
| 29298 | |
| 29299 | static SDValue emitLockedStackOp(SelectionDAG &DAG, |
| 29300 | const X86Subtarget &Subtarget, SDValue Chain, |
| 29301 | const SDLoc &DL) { |
| 29302 | |
| 29303 | |
| 29304 | |
| 29305 | |
| 29306 | |
| 29307 | |
| 29308 | |
| 29309 | |
| 29310 | |
| 29311 | |
| 29312 | |
| 29313 | |
| 29314 | |
| 29315 | |
| 29316 | |
| 29317 | |
| 29318 | |
| 29319 | |
| 29320 | |
| 29321 | |
| 29322 | |
| 29323 | |
| 29324 | |
| 29325 | |
| 29326 | auto &MF = DAG.getMachineFunction(); |
| 29327 | auto &TFL = *Subtarget.getFrameLowering(); |
| 29328 | const unsigned SPOffset = TFL.has128ByteRedZone(MF) ? -64 : 0; |
| 29329 | |
| 29330 | if (Subtarget.is64Bit()) { |
| 29331 | SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); |
| 29332 | SDValue Ops[] = { |
| 29333 | DAG.getRegister(X86::RSP, MVT::i64), |
| 29334 | DAG.getTargetConstant(1, DL, MVT::i8), |
| 29335 | DAG.getRegister(0, MVT::i64), |
| 29336 | DAG.getTargetConstant(SPOffset, DL, MVT::i32), |
| 29337 | DAG.getRegister(0, MVT::i16), |
| 29338 | Zero, |
| 29339 | Chain}; |
| 29340 | SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, |
| 29341 | MVT::Other, Ops); |
| 29342 | return SDValue(Res, 1); |
| 29343 | } |
| 29344 | |
| 29345 | SDValue Zero = DAG.getTargetConstant(0, DL, MVT::i32); |
| 29346 | SDValue Ops[] = { |
| 29347 | DAG.getRegister(X86::ESP, MVT::i32), |
| 29348 | DAG.getTargetConstant(1, DL, MVT::i8), |
| 29349 | DAG.getRegister(0, MVT::i32), |
| 29350 | DAG.getTargetConstant(SPOffset, DL, MVT::i32), |
| 29351 | DAG.getRegister(0, MVT::i16), |
| 29352 | Zero, |
| 29353 | Chain |
| 29354 | }; |
| 29355 | SDNode *Res = DAG.getMachineNode(X86::OR32mi8Locked, DL, MVT::i32, |
| 29356 | MVT::Other, Ops); |
| 29357 | return SDValue(Res, 1); |
| 29358 | } |
| 29359 | |
| 29360 | static SDValue LowerATOMIC_FENCE(SDValue Op, const X86Subtarget &Subtarget, |
| 29361 | SelectionDAG &DAG) { |
| 29362 | SDLoc dl(Op); |
| 29363 | AtomicOrdering FenceOrdering = |
| 29364 | static_cast<AtomicOrdering>(Op.getConstantOperandVal(1)); |
| 29365 | SyncScope::ID FenceSSID = |
| 29366 | static_cast<SyncScope::ID>(Op.getConstantOperandVal(2)); |
| 29367 | |
| 29368 | |
| 29369 | |
| 29370 | if (FenceOrdering == AtomicOrdering::SequentiallyConsistent && |
| 29371 | FenceSSID == SyncScope::System) { |
| 29372 | if (Subtarget.hasMFence()) |
| 29373 | return DAG.getNode(X86ISD::MFENCE, dl, MVT::Other, Op.getOperand(0)); |
| 29374 | |
| 29375 | SDValue Chain = Op.getOperand(0); |
| 29376 | return emitLockedStackOp(DAG, Subtarget, Chain, dl); |
| 29377 | } |
| 29378 | |
| 29379 | |
| 29380 | return DAG.getNode(X86ISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0)); |
| 29381 | } |
| 29382 | |
| 29383 | static SDValue LowerCMP_SWAP(SDValue Op, const X86Subtarget &Subtarget, |
| 29384 | SelectionDAG &DAG) { |
| 29385 | MVT T = Op.getSimpleValueType(); |
| 29386 | SDLoc DL(Op); |
| 29387 | unsigned Reg = 0; |
| 29388 | unsigned size = 0; |
| 29389 | switch(T.SimpleTy) { |
| 29390 | default: llvm_unreachable("Invalid value type!"); |
| 29391 | case MVT::i8: Reg = X86::AL; size = 1; break; |
| 29392 | case MVT::i16: Reg = X86::AX; size = 2; break; |
| 29393 | case MVT::i32: Reg = X86::EAX; size = 4; break; |
| 29394 | case MVT::i64: |
| 29395 | assert(Subtarget.is64Bit() && "Node not type legal!"); |
| 29396 | Reg = X86::RAX; size = 8; |
| 29397 | break; |
| 29398 | } |
| 29399 | SDValue cpIn = DAG.getCopyToReg(Op.getOperand(0), DL, Reg, |
| 29400 | Op.getOperand(2), SDValue()); |
| 29401 | SDValue Ops[] = { cpIn.getValue(0), |
| 29402 | Op.getOperand(1), |
| 29403 | Op.getOperand(3), |
| 29404 | DAG.getTargetConstant(size, DL, MVT::i8), |
| 29405 | cpIn.getValue(1) }; |
| 29406 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 29407 | MachineMemOperand *MMO = cast<AtomicSDNode>(Op)->getMemOperand(); |
| 29408 | SDValue Result = DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG_DAG, DL, Tys, |
| 29409 | Ops, T, MMO); |
| 29410 | |
| 29411 | SDValue cpOut = |
| 29412 | DAG.getCopyFromReg(Result.getValue(0), DL, Reg, T, Result.getValue(1)); |
| 29413 | SDValue EFLAGS = DAG.getCopyFromReg(cpOut.getValue(1), DL, X86::EFLAGS, |
| 29414 | MVT::i32, cpOut.getValue(2)); |
| 29415 | SDValue Success = getSETCC(X86::COND_E, EFLAGS, DL, DAG); |
| 29416 | |
| 29417 | return DAG.getNode(ISD::MERGE_VALUES, DL, Op->getVTList(), |
| 29418 | cpOut, Success, EFLAGS.getValue(1)); |
| 29419 | } |
| 29420 | |
| 29421 | |
| 29422 | static SDValue getPMOVMSKB(const SDLoc &DL, SDValue V, SelectionDAG &DAG, |
| 29423 | const X86Subtarget &Subtarget) { |
| 29424 | MVT InVT = V.getSimpleValueType(); |
| 29425 | |
| 29426 | if (InVT == MVT::v64i8) { |
| 29427 | SDValue Lo, Hi; |
| 29428 | std::tie(Lo, Hi) = DAG.SplitVector(V, DL); |
| 29429 | Lo = getPMOVMSKB(DL, Lo, DAG, Subtarget); |
| 29430 | Hi = getPMOVMSKB(DL, Hi, DAG, Subtarget); |
| 29431 | Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Lo); |
| 29432 | Hi = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Hi); |
| 29433 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i64, Hi, |
| 29434 | DAG.getConstant(32, DL, MVT::i8)); |
| 29435 | return DAG.getNode(ISD::OR, DL, MVT::i64, Lo, Hi); |
| 29436 | } |
| 29437 | if (InVT == MVT::v32i8 && !Subtarget.hasInt256()) { |
| 29438 | SDValue Lo, Hi; |
| 29439 | std::tie(Lo, Hi) = DAG.SplitVector(V, DL); |
| 29440 | Lo = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Lo); |
| 29441 | Hi = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Hi); |
| 29442 | Hi = DAG.getNode(ISD::SHL, DL, MVT::i32, Hi, |
| 29443 | DAG.getConstant(16, DL, MVT::i8)); |
| 29444 | return DAG.getNode(ISD::OR, DL, MVT::i32, Lo, Hi); |
| 29445 | } |
| 29446 | |
| 29447 | return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
| 29448 | } |
| 29449 | |
| 29450 | static SDValue LowerBITCAST(SDValue Op, const X86Subtarget &Subtarget, |
| 29451 | SelectionDAG &DAG) { |
| 29452 | SDValue Src = Op.getOperand(0); |
| 29453 | MVT SrcVT = Src.getSimpleValueType(); |
| 29454 | MVT DstVT = Op.getSimpleValueType(); |
| 29455 | |
| 29456 | |
| 29457 | |
| 29458 | if (SrcVT == MVT::i64 && DstVT == MVT::v64i1) { |
| 29459 | assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); |
| 29460 | assert(Subtarget.hasBWI() && "Expected BWI target"); |
| 29461 | SDLoc dl(Op); |
| 29462 | SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, |
| 29463 | DAG.getIntPtrConstant(0, dl)); |
| 29464 | Lo = DAG.getBitcast(MVT::v32i1, Lo); |
| 29465 | SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Src, |
| 29466 | DAG.getIntPtrConstant(1, dl)); |
| 29467 | Hi = DAG.getBitcast(MVT::v32i1, Hi); |
| 29468 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); |
| 29469 | } |
| 29470 | |
| 29471 | |
| 29472 | if ((SrcVT == MVT::v16i1 || SrcVT == MVT::v32i1) && DstVT.isScalarInteger()) { |
| 29473 | assert(!Subtarget.hasAVX512() && "Should use K-registers with AVX512"); |
| 29474 | MVT SExtVT = SrcVT == MVT::v16i1 ? MVT::v16i8 : MVT::v32i8; |
| 29475 | SDLoc DL(Op); |
| 29476 | SDValue V = DAG.getSExtOrTrunc(Src, DL, SExtVT); |
| 29477 | V = getPMOVMSKB(DL, V, DAG, Subtarget); |
| 29478 | return DAG.getZExtOrTrunc(V, DL, DstVT); |
| 29479 | } |
| 29480 | |
| 29481 | assert((SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8 || |
| 29482 | SrcVT == MVT::i64) && "Unexpected VT!"); |
| 29483 | |
| 29484 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
| 29485 | if (!(DstVT == MVT::f64 && SrcVT == MVT::i64) && |
| 29486 | !(DstVT == MVT::x86mmx && SrcVT.isVector())) |
| 29487 | |
| 29488 | return SDValue(); |
| 29489 | |
| 29490 | SDLoc dl(Op); |
| 29491 | if (SrcVT.isVector()) { |
| 29492 | |
| 29493 | |
| 29494 | MVT NewVT = MVT::getVectorVT(SrcVT.getVectorElementType(), |
| 29495 | SrcVT.getVectorNumElements() * 2); |
| 29496 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewVT, Src, |
| 29497 | DAG.getUNDEF(SrcVT)); |
| 29498 | } else { |
| 29499 | assert(SrcVT == MVT::i64 && !Subtarget.is64Bit() && |
| 29500 | "Unexpected source type in LowerBITCAST"); |
| 29501 | Src = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Src); |
| 29502 | } |
| 29503 | |
| 29504 | MVT V2X64VT = DstVT == MVT::f64 ? MVT::v2f64 : MVT::v2i64; |
| 29505 | Src = DAG.getNode(ISD::BITCAST, dl, V2X64VT, Src); |
| 29506 | |
| 29507 | if (DstVT == MVT::x86mmx) |
| 29508 | return DAG.getNode(X86ISD::MOVDQ2Q, dl, DstVT, Src); |
| 29509 | |
| 29510 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, DstVT, Src, |
| 29511 | DAG.getIntPtrConstant(0, dl)); |
| 29512 | } |
| 29513 | |
| 29514 | |
| 29515 | |
| 29516 | |
| 29517 | |
| 29518 | |
| 29519 | |
| 29520 | static SDValue LowerHorizontalByteSum(SDValue V, MVT VT, |
| 29521 | const X86Subtarget &Subtarget, |
| 29522 | SelectionDAG &DAG) { |
| 29523 | SDLoc DL(V); |
| 29524 | MVT ByteVecVT = V.getSimpleValueType(); |
| 29525 | MVT EltVT = VT.getVectorElementType(); |
| 29526 | assert(ByteVecVT.getVectorElementType() == MVT::i8 && |
| 29527 | "Expected value to have byte element type."); |
| 29528 | assert(EltVT != MVT::i8 && |
| 29529 | "Horizontal byte sum only makes sense for wider elements!"); |
| 29530 | unsigned VecSize = VT.getSizeInBits(); |
| 29531 | assert(ByteVecVT.getSizeInBits() == VecSize && "Cannot change vector size!"); |
| 29532 | |
| 29533 | |
| 29534 | |
| 29535 | if (EltVT == MVT::i64) { |
| 29536 | SDValue Zeros = DAG.getConstant(0, DL, ByteVecVT); |
| 29537 | MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); |
| 29538 | V = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, V, Zeros); |
| 29539 | return DAG.getBitcast(VT, V); |
| 29540 | } |
| 29541 | |
| 29542 | if (EltVT == MVT::i32) { |
| 29543 | |
| 29544 | |
| 29545 | |
| 29546 | |
| 29547 | |
| 29548 | SDValue Zeros = DAG.getConstant(0, DL, VT); |
| 29549 | SDValue V32 = DAG.getBitcast(VT, V); |
| 29550 | SDValue Low = getUnpackl(DAG, DL, VT, V32, Zeros); |
| 29551 | SDValue High = getUnpackh(DAG, DL, VT, V32, Zeros); |
| 29552 | |
| 29553 | |
| 29554 | Zeros = DAG.getConstant(0, DL, ByteVecVT); |
| 29555 | MVT SadVecVT = MVT::getVectorVT(MVT::i64, VecSize / 64); |
| 29556 | Low = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, |
| 29557 | DAG.getBitcast(ByteVecVT, Low), Zeros); |
| 29558 | High = DAG.getNode(X86ISD::PSADBW, DL, SadVecVT, |
| 29559 | DAG.getBitcast(ByteVecVT, High), Zeros); |
| 29560 | |
| 29561 | |
| 29562 | MVT ShortVecVT = MVT::getVectorVT(MVT::i16, VecSize / 16); |
| 29563 | V = DAG.getNode(X86ISD::PACKUS, DL, ByteVecVT, |
| 29564 | DAG.getBitcast(ShortVecVT, Low), |
| 29565 | DAG.getBitcast(ShortVecVT, High)); |
| 29566 | |
| 29567 | return DAG.getBitcast(VT, V); |
| 29568 | } |
| 29569 | |
| 29570 | |
| 29571 | assert(EltVT == MVT::i16 && "Unknown how to handle type"); |
| 29572 | |
| 29573 | |
| 29574 | |
| 29575 | |
| 29576 | |
| 29577 | SDValue ShifterV = DAG.getConstant(8, DL, VT); |
| 29578 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, DAG.getBitcast(VT, V), ShifterV); |
| 29579 | V = DAG.getNode(ISD::ADD, DL, ByteVecVT, DAG.getBitcast(ByteVecVT, Shl), |
| 29580 | DAG.getBitcast(ByteVecVT, V)); |
| 29581 | return DAG.getNode(ISD::SRL, DL, VT, DAG.getBitcast(VT, V), ShifterV); |
| 29582 | } |
| 29583 | |
| 29584 | static SDValue LowerVectorCTPOPInRegLUT(SDValue Op, const SDLoc &DL, |
| 29585 | const X86Subtarget &Subtarget, |
| 29586 | SelectionDAG &DAG) { |
| 29587 | MVT VT = Op.getSimpleValueType(); |
| 29588 | MVT EltVT = VT.getVectorElementType(); |
| 29589 | int NumElts = VT.getVectorNumElements(); |
| 29590 | (void)EltVT; |
| 29591 | assert(EltVT == MVT::i8 && "Only vXi8 vector CTPOP lowering supported."); |
| 29592 | |
| 29593 | |
| 29594 | |
| 29595 | |
| 29596 | |
| 29597 | |
| 29598 | |
| 29599 | |
| 29600 | |
| 29601 | |
| 29602 | |
| 29603 | const int LUT[16] = { 0, 1, 1, 2, |
| 29604 | 1, 2, 2, 3, |
| 29605 | 1, 2, 2, 3, |
| 29606 | 2, 3, 3, 4}; |
| 29607 | |
| 29608 | SmallVector<SDValue, 64> LUTVec; |
| 29609 | for (int i = 0; i < NumElts; ++i) |
| 29610 | LUTVec.push_back(DAG.getConstant(LUT[i % 16], DL, MVT::i8)); |
| 29611 | SDValue InRegLUT = DAG.getBuildVector(VT, DL, LUTVec); |
| 29612 | SDValue M0F = DAG.getConstant(0x0F, DL, VT); |
| 29613 | |
| 29614 | |
| 29615 | SDValue FourV = DAG.getConstant(4, DL, VT); |
| 29616 | SDValue HiNibbles = DAG.getNode(ISD::SRL, DL, VT, Op, FourV); |
| 29617 | |
| 29618 | |
| 29619 | SDValue LoNibbles = DAG.getNode(ISD::AND, DL, VT, Op, M0F); |
| 29620 | |
| 29621 | |
| 29622 | |
| 29623 | |
| 29624 | SDValue HiPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, HiNibbles); |
| 29625 | SDValue LoPopCnt = DAG.getNode(X86ISD::PSHUFB, DL, VT, InRegLUT, LoNibbles); |
| 29626 | return DAG.getNode(ISD::ADD, DL, VT, HiPopCnt, LoPopCnt); |
| 29627 | } |
| 29628 | |
| 29629 | |
| 29630 | |
| 29631 | static SDValue LowerVectorCTPOP(SDValue Op, const X86Subtarget &Subtarget, |
| 29632 | SelectionDAG &DAG) { |
| 29633 | MVT VT = Op.getSimpleValueType(); |
| 29634 | assert((VT.is512BitVector() || VT.is256BitVector() || VT.is128BitVector()) && |
| 29635 | "Unknown CTPOP type to handle"); |
| 29636 | SDLoc DL(Op.getNode()); |
| 29637 | SDValue Op0 = Op.getOperand(0); |
| 29638 | |
| 29639 | |
| 29640 | if (Subtarget.hasVPOPCNTDQ()) { |
| 29641 | unsigned NumElems = VT.getVectorNumElements(); |
| 29642 | assert((VT.getVectorElementType() == MVT::i8 || |
| 29643 | VT.getVectorElementType() == MVT::i16) && "Unexpected type"); |
| 29644 | if (NumElems < 16 || (NumElems == 16 && Subtarget.canExtendTo512DQ())) { |
| 29645 | MVT NewVT = MVT::getVectorVT(MVT::i32, NumElems); |
| 29646 | Op = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, Op0); |
| 29647 | Op = DAG.getNode(ISD::CTPOP, DL, NewVT, Op); |
| 29648 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Op); |
| 29649 | } |
| 29650 | } |
| 29651 | |
| 29652 | |
| 29653 | if (VT.is256BitVector() && !Subtarget.hasInt256()) |
| 29654 | return splitVectorIntUnary(Op, DAG); |
| 29655 | |
| 29656 | |
| 29657 | if (VT.is512BitVector() && !Subtarget.hasBWI()) |
| 29658 | return splitVectorIntUnary(Op, DAG); |
| 29659 | |
| 29660 | |
| 29661 | if (VT.getScalarType() != MVT::i8) { |
| 29662 | MVT ByteVT = MVT::getVectorVT(MVT::i8, VT.getSizeInBits() / 8); |
| 29663 | SDValue ByteOp = DAG.getBitcast(ByteVT, Op0); |
| 29664 | SDValue PopCnt8 = DAG.getNode(ISD::CTPOP, DL, ByteVT, ByteOp); |
| 29665 | return LowerHorizontalByteSum(PopCnt8, VT, Subtarget, DAG); |
| 29666 | } |
| 29667 | |
| 29668 | |
| 29669 | if (!Subtarget.hasSSSE3()) |
| 29670 | return SDValue(); |
| 29671 | |
| 29672 | return LowerVectorCTPOPInRegLUT(Op0, DL, Subtarget, DAG); |
| 29673 | } |
| 29674 | |
| 29675 | static SDValue LowerCTPOP(SDValue Op, const X86Subtarget &Subtarget, |
| 29676 | SelectionDAG &DAG) { |
| 29677 | assert(Op.getSimpleValueType().isVector() && |
| 29678 | "We only do custom lowering for vector population count."); |
| 29679 | return LowerVectorCTPOP(Op, Subtarget, DAG); |
| 29680 | } |
| 29681 | |
| 29682 | static SDValue LowerBITREVERSE_XOP(SDValue Op, SelectionDAG &DAG) { |
| 29683 | MVT VT = Op.getSimpleValueType(); |
| 29684 | SDValue In = Op.getOperand(0); |
| 29685 | SDLoc DL(Op); |
| 29686 | |
| 29687 | |
| 29688 | |
| 29689 | if (!VT.isVector()) { |
| 29690 | MVT VecVT = MVT::getVectorVT(VT, 128 / VT.getSizeInBits()); |
| 29691 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, In); |
| 29692 | Res = DAG.getNode(ISD::BITREVERSE, DL, VecVT, Res); |
| 29693 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Res, |
| 29694 | DAG.getIntPtrConstant(0, DL)); |
| 29695 | } |
| 29696 | |
| 29697 | int NumElts = VT.getVectorNumElements(); |
| 29698 | int ScalarSizeInBytes = VT.getScalarSizeInBits() / 8; |
| 29699 | |
| 29700 | |
| 29701 | if (VT.is256BitVector()) |
| 29702 | return splitVectorIntUnary(Op, DAG); |
| 29703 | |
| 29704 | assert(VT.is128BitVector() && |
| 29705 | "Only 128-bit vector bitreverse lowering supported."); |
| 29706 | |
| 29707 | |
| 29708 | |
| 29709 | |
| 29710 | |
| 29711 | SmallVector<SDValue, 16> MaskElts; |
| 29712 | for (int i = 0; i != NumElts; ++i) { |
| 29713 | for (int j = ScalarSizeInBytes - 1; j >= 0; --j) { |
| 29714 | int SourceByte = 16 + (i * ScalarSizeInBytes) + j; |
| 29715 | int PermuteByte = SourceByte | (2 << 5); |
| 29716 | MaskElts.push_back(DAG.getConstant(PermuteByte, DL, MVT::i8)); |
| 29717 | } |
| 29718 | } |
| 29719 | |
| 29720 | SDValue Mask = DAG.getBuildVector(MVT::v16i8, DL, MaskElts); |
| 29721 | SDValue Res = DAG.getBitcast(MVT::v16i8, In); |
| 29722 | Res = DAG.getNode(X86ISD::VPPERM, DL, MVT::v16i8, DAG.getUNDEF(MVT::v16i8), |
| 29723 | Res, Mask); |
| 29724 | return DAG.getBitcast(VT, Res); |
| 29725 | } |
| 29726 | |
| 29727 | static SDValue LowerBITREVERSE(SDValue Op, const X86Subtarget &Subtarget, |
| 29728 | SelectionDAG &DAG) { |
| 29729 | MVT VT = Op.getSimpleValueType(); |
| 29730 | |
| 29731 | if (Subtarget.hasXOP() && !VT.is512BitVector()) |
| 29732 | return LowerBITREVERSE_XOP(Op, DAG); |
| 29733 | |
| 29734 | assert(Subtarget.hasSSSE3() && "SSSE3 required for BITREVERSE"); |
| 29735 | |
| 29736 | SDValue In = Op.getOperand(0); |
| 29737 | SDLoc DL(Op); |
| 29738 | |
| 29739 | assert(VT.getScalarType() == MVT::i8 && |
| 29740 | "Only byte vector BITREVERSE supported"); |
| 29741 | |
| 29742 | |
| 29743 | if (VT == MVT::v64i8 && !Subtarget.hasBWI()) |
| 29744 | return splitVectorIntUnary(Op, DAG); |
| 29745 | |
| 29746 | |
| 29747 | if (VT == MVT::v32i8 && !Subtarget.hasInt256()) |
| 29748 | return splitVectorIntUnary(Op, DAG); |
| 29749 | |
| 29750 | unsigned NumElts = VT.getVectorNumElements(); |
| 29751 | |
| 29752 | |
| 29753 | if (Subtarget.hasGFNI()) { |
| 29754 | MVT MatrixVT = MVT::getVectorVT(MVT::i64, NumElts / 8); |
| 29755 | SDValue Matrix = DAG.getConstant(0x8040201008040201ULL, DL, MatrixVT); |
| 29756 | Matrix = DAG.getBitcast(VT, Matrix); |
| 29757 | return DAG.getNode(X86ISD::GF2P8AFFINEQB, DL, VT, In, Matrix, |
| 29758 | DAG.getTargetConstant(0, DL, MVT::i8)); |
| 29759 | } |
| 29760 | |
| 29761 | |
| 29762 | |
| 29763 | |
| 29764 | SDValue NibbleMask = DAG.getConstant(0xF, DL, VT); |
| 29765 | SDValue Lo = DAG.getNode(ISD::AND, DL, VT, In, NibbleMask); |
| 29766 | SDValue Hi = DAG.getNode(ISD::SRL, DL, VT, In, DAG.getConstant(4, DL, VT)); |
| 29767 | |
| 29768 | const int LoLUT[16] = { |
| 29769 | 0x00, 0x80, 0x40, 0xC0, |
| 29770 | 0x20, 0xA0, 0x60, 0xE0, |
| 29771 | 0x10, 0x90, 0x50, 0xD0, |
| 29772 | 0x30, 0xB0, 0x70, 0xF0}; |
| 29773 | const int HiLUT[16] = { |
| 29774 | 0x00, 0x08, 0x04, 0x0C, |
| 29775 | 0x02, 0x0A, 0x06, 0x0E, |
| 29776 | 0x01, 0x09, 0x05, 0x0D, |
| 29777 | 0x03, 0x0B, 0x07, 0x0F}; |
| 29778 | |
| 29779 | SmallVector<SDValue, 16> LoMaskElts, HiMaskElts; |
| 29780 | for (unsigned i = 0; i < NumElts; ++i) { |
| 29781 | LoMaskElts.push_back(DAG.getConstant(LoLUT[i % 16], DL, MVT::i8)); |
| 29782 | HiMaskElts.push_back(DAG.getConstant(HiLUT[i % 16], DL, MVT::i8)); |
| 29783 | } |
| 29784 | |
| 29785 | SDValue LoMask = DAG.getBuildVector(VT, DL, LoMaskElts); |
| 29786 | SDValue HiMask = DAG.getBuildVector(VT, DL, HiMaskElts); |
| 29787 | Lo = DAG.getNode(X86ISD::PSHUFB, DL, VT, LoMask, Lo); |
| 29788 | Hi = DAG.getNode(X86ISD::PSHUFB, DL, VT, HiMask, Hi); |
| 29789 | return DAG.getNode(ISD::OR, DL, VT, Lo, Hi); |
| 29790 | } |
| 29791 | |
| 29792 | static SDValue LowerPARITY(SDValue Op, const X86Subtarget &Subtarget, |
| 29793 | SelectionDAG &DAG) { |
| 29794 | SDLoc DL(Op); |
| 29795 | SDValue X = Op.getOperand(0); |
| 29796 | MVT VT = Op.getSimpleValueType(); |
| 29797 | |
| 29798 | |
| 29799 | if (VT == MVT::i8 || |
| 29800 | DAG.MaskedValueIsZero(X, APInt::getBitsSetFrom(VT.getSizeInBits(), 8))) { |
| 29801 | X = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); |
| 29802 | SDValue Flags = DAG.getNode(X86ISD::CMP, DL, MVT::i32, X, |
| 29803 | DAG.getConstant(0, DL, MVT::i8)); |
| 29804 | |
| 29805 | SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); |
| 29806 | |
| 29807 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); |
| 29808 | } |
| 29809 | |
| 29810 | if (VT == MVT::i64) { |
| 29811 | |
| 29812 | SDValue Hi = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, |
| 29813 | DAG.getNode(ISD::SRL, DL, MVT::i64, X, |
| 29814 | DAG.getConstant(32, DL, MVT::i8))); |
| 29815 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, X); |
| 29816 | X = DAG.getNode(ISD::XOR, DL, MVT::i32, Lo, Hi); |
| 29817 | } |
| 29818 | |
| 29819 | if (VT != MVT::i16) { |
| 29820 | |
| 29821 | SDValue Hi16 = DAG.getNode(ISD::SRL, DL, MVT::i32, X, |
| 29822 | DAG.getConstant(16, DL, MVT::i8)); |
| 29823 | X = DAG.getNode(ISD::XOR, DL, MVT::i32, X, Hi16); |
| 29824 | } else { |
| 29825 | |
| 29826 | X = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, X); |
| 29827 | } |
| 29828 | |
| 29829 | |
| 29830 | |
| 29831 | SDValue Hi = DAG.getNode( |
| 29832 | ISD::TRUNCATE, DL, MVT::i8, |
| 29833 | DAG.getNode(ISD::SRL, DL, MVT::i32, X, DAG.getConstant(8, DL, MVT::i8))); |
| 29834 | SDValue Lo = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, X); |
| 29835 | SDVTList VTs = DAG.getVTList(MVT::i8, MVT::i32); |
| 29836 | SDValue Flags = DAG.getNode(X86ISD::XOR, DL, VTs, Lo, Hi).getValue(1); |
| 29837 | |
| 29838 | |
| 29839 | SDValue Setnp = getSETCC(X86::COND_NP, Flags, DL, DAG); |
| 29840 | |
| 29841 | return DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Setnp); |
| 29842 | } |
| 29843 | |
| 29844 | static SDValue lowerAtomicArithWithLOCK(SDValue N, SelectionDAG &DAG, |
| 29845 | const X86Subtarget &Subtarget) { |
| 29846 | unsigned NewOpc = 0; |
| 29847 | switch (N->getOpcode()) { |
| 29848 | case ISD::ATOMIC_LOAD_ADD: |
| 29849 | NewOpc = X86ISD::LADD; |
| 29850 | break; |
| 29851 | case ISD::ATOMIC_LOAD_SUB: |
| 29852 | NewOpc = X86ISD::LSUB; |
| 29853 | break; |
| 29854 | case ISD::ATOMIC_LOAD_OR: |
| 29855 | NewOpc = X86ISD::LOR; |
| 29856 | break; |
| 29857 | case ISD::ATOMIC_LOAD_XOR: |
| 29858 | NewOpc = X86ISD::LXOR; |
| 29859 | break; |
| 29860 | case ISD::ATOMIC_LOAD_AND: |
| 29861 | NewOpc = X86ISD::LAND; |
| 29862 | break; |
| 29863 | default: |
| 29864 | llvm_unreachable("Unknown ATOMIC_LOAD_ opcode"); |
| 29865 | } |
| 29866 | |
| 29867 | MachineMemOperand *MMO = cast<MemSDNode>(N)->getMemOperand(); |
| 29868 | |
| 29869 | return DAG.getMemIntrinsicNode( |
| 29870 | NewOpc, SDLoc(N), DAG.getVTList(MVT::i32, MVT::Other), |
| 29871 | {N->getOperand(0), N->getOperand(1), N->getOperand(2)}, |
| 29872 | N->getSimpleValueType(0), MMO); |
| 29873 | } |
| 29874 | |
| 29875 | |
| 29876 | static SDValue lowerAtomicArith(SDValue N, SelectionDAG &DAG, |
| 29877 | const X86Subtarget &Subtarget) { |
| 29878 | AtomicSDNode *AN = cast<AtomicSDNode>(N.getNode()); |
| 29879 | SDValue Chain = N->getOperand(0); |
| 29880 | SDValue LHS = N->getOperand(1); |
| 29881 | SDValue RHS = N->getOperand(2); |
| 29882 | unsigned Opc = N->getOpcode(); |
| 29883 | MVT VT = N->getSimpleValueType(0); |
| 29884 | SDLoc DL(N); |
| 29885 | |
| 29886 | |
| 29887 | |
| 29888 | |
| 29889 | if (N->hasAnyUseOfValue(0)) { |
| 29890 | |
| 29891 | |
| 29892 | if (Opc == ISD::ATOMIC_LOAD_SUB) { |
| 29893 | RHS = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), RHS); |
| 29894 | return DAG.getAtomic(ISD::ATOMIC_LOAD_ADD, DL, VT, Chain, LHS, |
| 29895 | RHS, AN->getMemOperand()); |
| 29896 | } |
| 29897 | assert(Opc == ISD::ATOMIC_LOAD_ADD && |
| 29898 | "Used AtomicRMW ops other than Add should have been expanded!"); |
| 29899 | return N; |
| 29900 | } |
| 29901 | |
| 29902 | |
| 29903 | |
| 29904 | |
| 29905 | |
| 29906 | |
| 29907 | if (Opc == ISD::ATOMIC_LOAD_OR && isNullConstant(RHS)) { |
| 29908 | |
| 29909 | |
| 29910 | |
| 29911 | |
| 29912 | |
| 29913 | if (AN->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent && |
| 29914 | AN->getSyncScopeID() == SyncScope::System) { |
| 29915 | |
| 29916 | |
| 29917 | |
| 29918 | SDValue NewChain = emitLockedStackOp(DAG, Subtarget, Chain, DL); |
| 29919 | assert(!N->hasAnyUseOfValue(0)); |
| 29920 | |
| 29921 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
| 29922 | DAG.getUNDEF(VT), NewChain); |
| 29923 | } |
| 29924 | |
| 29925 | SDValue NewChain = DAG.getNode(X86ISD::MEMBARRIER, DL, MVT::Other, Chain); |
| 29926 | assert(!N->hasAnyUseOfValue(0)); |
| 29927 | |
| 29928 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
| 29929 | DAG.getUNDEF(VT), NewChain); |
| 29930 | } |
| 29931 | |
| 29932 | SDValue LockOp = lowerAtomicArithWithLOCK(N, DAG, Subtarget); |
| 29933 | |
| 29934 | assert(!N->hasAnyUseOfValue(0)); |
| 29935 | |
| 29936 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), |
| 29937 | DAG.getUNDEF(VT), LockOp.getValue(1)); |
| 29938 | } |
| 29939 | |
| 29940 | static SDValue LowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG, |
| 29941 | const X86Subtarget &Subtarget) { |
| 29942 | auto *Node = cast<AtomicSDNode>(Op.getNode()); |
| 29943 | SDLoc dl(Node); |
| 29944 | EVT VT = Node->getMemoryVT(); |
| 29945 | |
| 29946 | bool IsSeqCst = |
| 29947 | Node->getSuccessOrdering() == AtomicOrdering::SequentiallyConsistent; |
| 29948 | bool IsTypeLegal = DAG.getTargetLoweringInfo().isTypeLegal(VT); |
| 29949 | |
| 29950 | |
| 29951 | |
| 29952 | if (!IsSeqCst && IsTypeLegal) |
| 29953 | return Op; |
| 29954 | |
| 29955 | if (VT == MVT::i64 && !IsTypeLegal) { |
| 29956 | |
| 29957 | |
| 29958 | bool NoImplicitFloatOps = |
| 29959 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
| 29960 | Attribute::NoImplicitFloat); |
| 29961 | if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { |
| 29962 | SDValue Chain; |
| 29963 | if (Subtarget.hasSSE1()) { |
| 29964 | SDValue SclToVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, |
| 29965 | Node->getOperand(2)); |
| 29966 | MVT StVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; |
| 29967 | SclToVec = DAG.getBitcast(StVT, SclToVec); |
| 29968 | SDVTList Tys = DAG.getVTList(MVT::Other); |
| 29969 | SDValue Ops[] = {Node->getChain(), SclToVec, Node->getBasePtr()}; |
| 29970 | Chain = DAG.getMemIntrinsicNode(X86ISD::VEXTRACT_STORE, dl, Tys, Ops, |
| 29971 | MVT::i64, Node->getMemOperand()); |
| 29972 | } else if (Subtarget.hasX87()) { |
| 29973 | |
| 29974 | |
| 29975 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
| 29976 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
| 29977 | MachinePointerInfo MPI = |
| 29978 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
| 29979 | Chain = |
| 29980 | DAG.getStore(Node->getChain(), dl, Node->getOperand(2), StackPtr, |
| 29981 | MPI, MaybeAlign(), MachineMemOperand::MOStore); |
| 29982 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
| 29983 | SDValue LdOps[] = {Chain, StackPtr}; |
| 29984 | SDValue Value = |
| 29985 | DAG.getMemIntrinsicNode(X86ISD::FILD, dl, Tys, LdOps, MVT::i64, MPI, |
| 29986 | None, MachineMemOperand::MOLoad); |
| 29987 | Chain = Value.getValue(1); |
| 29988 | |
| 29989 | |
| 29990 | SDValue StoreOps[] = {Chain, Value, Node->getBasePtr()}; |
| 29991 | Chain = |
| 29992 | DAG.getMemIntrinsicNode(X86ISD::FIST, dl, DAG.getVTList(MVT::Other), |
| 29993 | StoreOps, MVT::i64, Node->getMemOperand()); |
| 29994 | } |
| 29995 | |
| 29996 | if (Chain) { |
| 29997 | |
| 29998 | |
| 29999 | if (IsSeqCst) |
| 30000 | Chain = emitLockedStackOp(DAG, Subtarget, Chain, dl); |
| 30001 | |
| 30002 | return Chain; |
| 30003 | } |
| 30004 | } |
| 30005 | } |
| 30006 | |
| 30007 | |
| 30008 | |
| 30009 | |
| 30010 | SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, |
| 30011 | Node->getMemoryVT(), |
| 30012 | Node->getOperand(0), |
| 30013 | Node->getOperand(1), Node->getOperand(2), |
| 30014 | Node->getMemOperand()); |
| 30015 | return Swap.getValue(1); |
| 30016 | } |
| 30017 | |
| 30018 | static SDValue LowerADDSUBCARRY(SDValue Op, SelectionDAG &DAG) { |
| 30019 | SDNode *N = Op.getNode(); |
| 30020 | MVT VT = N->getSimpleValueType(0); |
| 30021 | unsigned Opc = Op.getOpcode(); |
| 30022 | |
| 30023 | |
| 30024 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
| 30025 | return SDValue(); |
| 30026 | |
| 30027 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| 30028 | SDLoc DL(N); |
| 30029 | |
| 30030 | |
| 30031 | SDValue Carry = Op.getOperand(2); |
| 30032 | EVT CarryVT = Carry.getValueType(); |
| 30033 | Carry = DAG.getNode(X86ISD::ADD, DL, DAG.getVTList(CarryVT, MVT::i32), |
| 30034 | Carry, DAG.getAllOnesConstant(DL, CarryVT)); |
| 30035 | |
| 30036 | bool IsAdd = Opc == ISD::ADDCARRY || Opc == ISD::SADDO_CARRY; |
| 30037 | SDValue Sum = DAG.getNode(IsAdd ? X86ISD::ADC : X86ISD::SBB, DL, VTs, |
| 30038 | Op.getOperand(0), Op.getOperand(1), |
| 30039 | Carry.getValue(1)); |
| 30040 | |
| 30041 | bool IsSigned = Opc == ISD::SADDO_CARRY || Opc == ISD::SSUBO_CARRY; |
| 30042 | SDValue SetCC = getSETCC(IsSigned ? X86::COND_O : X86::COND_B, |
| 30043 | Sum.getValue(1), DL, DAG); |
| 30044 | if (N->getValueType(1) == MVT::i1) |
| 30045 | SetCC = DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, SetCC); |
| 30046 | |
| 30047 | return DAG.getNode(ISD::MERGE_VALUES, DL, N->getVTList(), Sum, SetCC); |
| 30048 | } |
| 30049 | |
| 30050 | static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget &Subtarget, |
| 30051 | SelectionDAG &DAG) { |
| 30052 | assert(Subtarget.isTargetDarwin() && Subtarget.is64Bit()); |
| 30053 | |
| 30054 | |
| 30055 | |
| 30056 | |
| 30057 | SDLoc dl(Op); |
| 30058 | SDValue Arg = Op.getOperand(0); |
| 30059 | EVT ArgVT = Arg.getValueType(); |
| 30060 | Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); |
| 30061 | |
| 30062 | TargetLowering::ArgListTy Args; |
| 30063 | TargetLowering::ArgListEntry Entry; |
| 30064 | |
| 30065 | Entry.Node = Arg; |
| 30066 | Entry.Ty = ArgTy; |
| 30067 | Entry.IsSExt = false; |
| 30068 | Entry.IsZExt = false; |
| 30069 | Args.push_back(Entry); |
| 30070 | |
| 30071 | bool isF64 = ArgVT == MVT::f64; |
| 30072 | |
| 30073 | |
| 30074 | |
| 30075 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 30076 | RTLIB::Libcall LC = isF64 ? RTLIB::SINCOS_STRET_F64 : RTLIB::SINCOS_STRET_F32; |
| 30077 | const char *LibcallName = TLI.getLibcallName(LC); |
| 30078 | SDValue Callee = |
| 30079 | DAG.getExternalSymbol(LibcallName, TLI.getPointerTy(DAG.getDataLayout())); |
| 30080 | |
| 30081 | Type *RetTy = isF64 ? (Type *)StructType::get(ArgTy, ArgTy) |
| 30082 | : (Type *)FixedVectorType::get(ArgTy, 4); |
| 30083 | |
| 30084 | TargetLowering::CallLoweringInfo CLI(DAG); |
| 30085 | CLI.setDebugLoc(dl) |
| 30086 | .setChain(DAG.getEntryNode()) |
| 30087 | .setLibCallee(CallingConv::C, RetTy, Callee, std::move(Args)); |
| 30088 | |
| 30089 | std::pair<SDValue, SDValue> CallResult = TLI.LowerCallTo(CLI); |
| 30090 | |
| 30091 | if (isF64) |
| 30092 | |
| 30093 | return CallResult.first; |
| 30094 | |
| 30095 | |
| 30096 | SDValue SinVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, |
| 30097 | CallResult.first, DAG.getIntPtrConstant(0, dl)); |
| 30098 | SDValue CosVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ArgVT, |
| 30099 | CallResult.first, DAG.getIntPtrConstant(1, dl)); |
| 30100 | SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); |
| 30101 | return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); |
| 30102 | } |
| 30103 | |
| 30104 | |
| 30105 | |
| 30106 | static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, |
| 30107 | bool FillWithZeroes = false) { |
| 30108 | |
| 30109 | MVT InVT = InOp.getSimpleValueType(); |
| 30110 | if (InVT == NVT) |
| 30111 | return InOp; |
| 30112 | |
| 30113 | if (InOp.isUndef()) |
| 30114 | return DAG.getUNDEF(NVT); |
| 30115 | |
| 30116 | assert(InVT.getVectorElementType() == NVT.getVectorElementType() && |
| 30117 | "input and widen element type must match"); |
| 30118 | |
| 30119 | unsigned InNumElts = InVT.getVectorNumElements(); |
| 30120 | unsigned WidenNumElts = NVT.getVectorNumElements(); |
| 30121 | assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && |
| 30122 | "Unexpected request for vector widening"); |
| 30123 | |
| 30124 | SDLoc dl(InOp); |
| 30125 | if (InOp.getOpcode() == ISD::CONCAT_VECTORS && |
| 30126 | InOp.getNumOperands() == 2) { |
| 30127 | SDValue N1 = InOp.getOperand(1); |
| 30128 | if ((ISD::isBuildVectorAllZeros(N1.getNode()) && FillWithZeroes) || |
| 30129 | N1.isUndef()) { |
| 30130 | InOp = InOp.getOperand(0); |
| 30131 | InVT = InOp.getSimpleValueType(); |
| 30132 | InNumElts = InVT.getVectorNumElements(); |
| 30133 | } |
| 30134 | } |
| 30135 | if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || |
| 30136 | ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { |
| 30137 | SmallVector<SDValue, 16> Ops; |
| 30138 | for (unsigned i = 0; i < InNumElts; ++i) |
| 30139 | Ops.push_back(InOp.getOperand(i)); |
| 30140 | |
| 30141 | EVT EltVT = InOp.getOperand(0).getValueType(); |
| 30142 | |
| 30143 | SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : |
| 30144 | DAG.getUNDEF(EltVT); |
| 30145 | for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) |
| 30146 | Ops.push_back(FillVal); |
| 30147 | return DAG.getBuildVector(NVT, dl, Ops); |
| 30148 | } |
| 30149 | SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : |
| 30150 | DAG.getUNDEF(NVT); |
| 30151 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, |
| 30152 | InOp, DAG.getIntPtrConstant(0, dl)); |
| 30153 | } |
| 30154 | |
| 30155 | static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget &Subtarget, |
| 30156 | SelectionDAG &DAG) { |
| 30157 | assert(Subtarget.hasAVX512() && |
| 30158 | "MGATHER/MSCATTER are supported on AVX-512 arch only"); |
| 30159 | |
| 30160 | MaskedScatterSDNode *N = cast<MaskedScatterSDNode>(Op.getNode()); |
| 30161 | SDValue Src = N->getValue(); |
| 30162 | MVT VT = Src.getSimpleValueType(); |
| 30163 | assert(VT.getScalarSizeInBits() >= 32 && "Unsupported scatter op"); |
| 30164 | SDLoc dl(Op); |
| 30165 | |
| 30166 | SDValue Scale = N->getScale(); |
| 30167 | SDValue Index = N->getIndex(); |
| 30168 | SDValue Mask = N->getMask(); |
| 30169 | SDValue Chain = N->getChain(); |
| 30170 | SDValue BasePtr = N->getBasePtr(); |
| 30171 | |
| 30172 | if (VT == MVT::v2f32 || VT == MVT::v2i32) { |
| 30173 | assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); |
| 30174 | |
| 30175 | if (Index.getValueType() == MVT::v2i64 && Subtarget.hasVLX()) { |
| 30176 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 30177 | EVT WideVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); |
| 30178 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Src, DAG.getUNDEF(VT)); |
| 30179 | SDVTList VTs = DAG.getVTList(MVT::Other); |
| 30180 | SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; |
| 30181 | return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
| 30182 | N->getMemoryVT(), N->getMemOperand()); |
| 30183 | } |
| 30184 | return SDValue(); |
| 30185 | } |
| 30186 | |
| 30187 | MVT IndexVT = Index.getSimpleValueType(); |
| 30188 | |
| 30189 | |
| 30190 | |
| 30191 | if (IndexVT == MVT::v2i32) |
| 30192 | return SDValue(); |
| 30193 | |
| 30194 | |
| 30195 | |
| 30196 | if (!Subtarget.hasVLX() && !VT.is512BitVector() && |
| 30197 | !Index.getSimpleValueType().is512BitVector()) { |
| 30198 | |
| 30199 | unsigned Factor = std::min(512/VT.getSizeInBits(), |
| 30200 | 512/IndexVT.getSizeInBits()); |
| 30201 | unsigned NumElts = VT.getVectorNumElements() * Factor; |
| 30202 | |
| 30203 | VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
| 30204 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); |
| 30205 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
| 30206 | |
| 30207 | Src = ExtendToType(Src, VT, DAG); |
| 30208 | Index = ExtendToType(Index, IndexVT, DAG); |
| 30209 | Mask = ExtendToType(Mask, MaskVT, DAG, true); |
| 30210 | } |
| 30211 | |
| 30212 | SDVTList VTs = DAG.getVTList(MVT::Other); |
| 30213 | SDValue Ops[] = {Chain, Src, Mask, BasePtr, Index, Scale}; |
| 30214 | return DAG.getMemIntrinsicNode(X86ISD::MSCATTER, dl, VTs, Ops, |
| 30215 | N->getMemoryVT(), N->getMemOperand()); |
| 30216 | } |
| 30217 | |
| 30218 | static SDValue LowerMLOAD(SDValue Op, const X86Subtarget &Subtarget, |
| 30219 | SelectionDAG &DAG) { |
| 30220 | |
| 30221 | MaskedLoadSDNode *N = cast<MaskedLoadSDNode>(Op.getNode()); |
| 30222 | MVT VT = Op.getSimpleValueType(); |
| 30223 | MVT ScalarVT = VT.getScalarType(); |
| 30224 | SDValue Mask = N->getMask(); |
| 30225 | MVT MaskVT = Mask.getSimpleValueType(); |
| 30226 | SDValue PassThru = N->getPassThru(); |
| 30227 | SDLoc dl(Op); |
| 30228 | |
| 30229 | |
| 30230 | if (MaskVT.getVectorElementType() != MVT::i1) { |
| 30231 | |
| 30232 | if (PassThru.isUndef() || ISD::isBuildVectorAllZeros(PassThru.getNode())) |
| 30233 | return Op; |
| 30234 | |
| 30235 | SDValue NewLoad = DAG.getMaskedLoad( |
| 30236 | VT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, |
| 30237 | getZeroVector(VT, Subtarget, DAG, dl), N->getMemoryVT(), |
| 30238 | N->getMemOperand(), N->getAddressingMode(), N->getExtensionType(), |
| 30239 | N->isExpandingLoad()); |
| 30240 | |
| 30241 | SDValue Select = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); |
| 30242 | return DAG.getMergeValues({ Select, NewLoad.getValue(1) }, dl); |
| 30243 | } |
| 30244 | |
| 30245 | assert((!N->isExpandingLoad() || Subtarget.hasAVX512()) && |
| 30246 | "Expanding masked load is supported on AVX-512 target only!"); |
| 30247 | |
| 30248 | assert((!N->isExpandingLoad() || ScalarVT.getSizeInBits() >= 32) && |
| 30249 | "Expanding masked load is supported for 32 and 64-bit types only!"); |
| 30250 | |
| 30251 | assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
| 30252 | "Cannot lower masked load op."); |
| 30253 | |
| 30254 | assert((ScalarVT.getSizeInBits() >= 32 || |
| 30255 | (Subtarget.hasBWI() && |
| 30256 | (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && |
| 30257 | "Unsupported masked load op."); |
| 30258 | |
| 30259 | |
| 30260 | |
| 30261 | unsigned NumEltsInWideVec = 512 / VT.getScalarSizeInBits(); |
| 30262 | MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); |
| 30263 | PassThru = ExtendToType(PassThru, WideDataVT, DAG); |
| 30264 | |
| 30265 | |
| 30266 | assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && |
| 30267 | "Unexpected mask type"); |
| 30268 | |
| 30269 | MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); |
| 30270 | |
| 30271 | Mask = ExtendToType(Mask, WideMaskVT, DAG, true); |
| 30272 | SDValue NewLoad = DAG.getMaskedLoad( |
| 30273 | WideDataVT, dl, N->getChain(), N->getBasePtr(), N->getOffset(), Mask, |
| 30274 | PassThru, N->getMemoryVT(), N->getMemOperand(), N->getAddressingMode(), |
| 30275 | N->getExtensionType(), N->isExpandingLoad()); |
| 30276 | |
| 30277 | SDValue Extract = |
| 30278 | DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, NewLoad.getValue(0), |
| 30279 | DAG.getIntPtrConstant(0, dl)); |
| 30280 | SDValue RetOps[] = {Extract, NewLoad.getValue(1)}; |
| 30281 | return DAG.getMergeValues(RetOps, dl); |
| 30282 | } |
| 30283 | |
| 30284 | static SDValue LowerMSTORE(SDValue Op, const X86Subtarget &Subtarget, |
| 30285 | SelectionDAG &DAG) { |
| 30286 | MaskedStoreSDNode *N = cast<MaskedStoreSDNode>(Op.getNode()); |
| 30287 | SDValue DataToStore = N->getValue(); |
| 30288 | MVT VT = DataToStore.getSimpleValueType(); |
| 30289 | MVT ScalarVT = VT.getScalarType(); |
| 30290 | SDValue Mask = N->getMask(); |
| 30291 | SDLoc dl(Op); |
| 30292 | |
| 30293 | assert((!N->isCompressingStore() || Subtarget.hasAVX512()) && |
| 30294 | "Expanding masked load is supported on AVX-512 target only!"); |
| 30295 | |
| 30296 | assert((!N->isCompressingStore() || ScalarVT.getSizeInBits() >= 32) && |
| 30297 | "Expanding masked load is supported for 32 and 64-bit types only!"); |
| 30298 | |
| 30299 | assert(Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
| 30300 | "Cannot lower masked store op."); |
| 30301 | |
| 30302 | assert((ScalarVT.getSizeInBits() >= 32 || |
| 30303 | (Subtarget.hasBWI() && |
| 30304 | (ScalarVT == MVT::i8 || ScalarVT == MVT::i16))) && |
| 30305 | "Unsupported masked store op."); |
| 30306 | |
| 30307 | |
| 30308 | |
| 30309 | unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); |
| 30310 | MVT WideDataVT = MVT::getVectorVT(ScalarVT, NumEltsInWideVec); |
| 30311 | |
| 30312 | |
| 30313 | assert(Mask.getSimpleValueType().getScalarType() == MVT::i1 && |
| 30314 | "Unexpected mask type"); |
| 30315 | |
| 30316 | MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); |
| 30317 | |
| 30318 | DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); |
| 30319 | Mask = ExtendToType(Mask, WideMaskVT, DAG, true); |
| 30320 | return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), |
| 30321 | N->getOffset(), Mask, N->getMemoryVT(), |
| 30322 | N->getMemOperand(), N->getAddressingMode(), |
| 30323 | N->isTruncatingStore(), N->isCompressingStore()); |
| 30324 | } |
| 30325 | |
| 30326 | static SDValue LowerMGATHER(SDValue Op, const X86Subtarget &Subtarget, |
| 30327 | SelectionDAG &DAG) { |
| 30328 | assert(Subtarget.hasAVX2() && |
| 30329 | "MGATHER/MSCATTER are supported on AVX-512/AVX-2 arch only"); |
| 30330 | |
| 30331 | MaskedGatherSDNode *N = cast<MaskedGatherSDNode>(Op.getNode()); |
| 30332 | SDLoc dl(Op); |
| 30333 | MVT VT = Op.getSimpleValueType(); |
| 30334 | SDValue Index = N->getIndex(); |
| 30335 | SDValue Mask = N->getMask(); |
| 30336 | SDValue PassThru = N->getPassThru(); |
| 30337 | MVT IndexVT = Index.getSimpleValueType(); |
| 30338 | |
| 30339 | assert(VT.getScalarSizeInBits() >= 32 && "Unsupported gather op"); |
| 30340 | |
| 30341 | |
| 30342 | if (IndexVT == MVT::v2i32) |
| 30343 | return SDValue(); |
| 30344 | |
| 30345 | |
| 30346 | |
| 30347 | MVT OrigVT = VT; |
| 30348 | if (Subtarget.hasAVX512() && !Subtarget.hasVLX() && !VT.is512BitVector() && |
| 30349 | !IndexVT.is512BitVector()) { |
| 30350 | |
| 30351 | unsigned Factor = std::min(512/VT.getSizeInBits(), |
| 30352 | 512/IndexVT.getSizeInBits()); |
| 30353 | |
| 30354 | unsigned NumElts = VT.getVectorNumElements() * Factor; |
| 30355 | |
| 30356 | VT = MVT::getVectorVT(VT.getVectorElementType(), NumElts); |
| 30357 | IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(), NumElts); |
| 30358 | MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts); |
| 30359 | |
| 30360 | PassThru = ExtendToType(PassThru, VT, DAG); |
| 30361 | Index = ExtendToType(Index, IndexVT, DAG); |
| 30362 | Mask = ExtendToType(Mask, MaskVT, DAG, true); |
| 30363 | } |
| 30364 | |
| 30365 | SDValue Ops[] = { N->getChain(), PassThru, Mask, N->getBasePtr(), Index, |
| 30366 | N->getScale() }; |
| 30367 | SDValue NewGather = DAG.getMemIntrinsicNode( |
| 30368 | X86ISD::MGATHER, dl, DAG.getVTList(VT, MVT::Other), Ops, N->getMemoryVT(), |
| 30369 | N->getMemOperand()); |
| 30370 | SDValue Extract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, OrigVT, |
| 30371 | NewGather, DAG.getIntPtrConstant(0, dl)); |
| 30372 | return DAG.getMergeValues({Extract, NewGather.getValue(1)}, dl); |
| 30373 | } |
| 30374 | |
| 30375 | static SDValue LowerADDRSPACECAST(SDValue Op, SelectionDAG &DAG) { |
| 30376 | SDLoc dl(Op); |
| 30377 | SDValue Src = Op.getOperand(0); |
| 30378 | MVT DstVT = Op.getSimpleValueType(); |
| 30379 | |
| 30380 | AddrSpaceCastSDNode *N = cast<AddrSpaceCastSDNode>(Op.getNode()); |
| 30381 | unsigned SrcAS = N->getSrcAddressSpace(); |
| 30382 | |
| 30383 | assert(SrcAS != N->getDestAddressSpace() && |
| 30384 | "addrspacecast must be between different address spaces"); |
| 30385 | |
| 30386 | if (SrcAS == X86AS::PTR32_UPTR && DstVT == MVT::i64) { |
| 30387 | Op = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Src); |
| 30388 | } else if (DstVT == MVT::i64) { |
| 30389 | Op = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Src); |
| 30390 | } else if (DstVT == MVT::i32) { |
| 30391 | Op = DAG.getNode(ISD::TRUNCATE, dl, DstVT, Src); |
| 30392 | } else { |
| 30393 | report_fatal_error("Bad address space in addrspacecast"); |
| 30394 | } |
| 30395 | return Op; |
| 30396 | } |
| 30397 | |
| 30398 | SDValue X86TargetLowering::LowerGC_TRANSITION(SDValue Op, |
| 30399 | SelectionDAG &DAG) const { |
| 30400 | |
| 30401 | |
| 30402 | |
| 30403 | |
| 30404 | |
| 30405 | |
| 30406 | SmallVector<SDValue, 2> Ops; |
| 30407 | |
| 30408 | Ops.push_back(Op.getOperand(0)); |
| 30409 | if (Op->getGluedNode()) |
| 30410 | Ops.push_back(Op->getOperand(Op->getNumOperands() - 1)); |
| 30411 | |
| 30412 | SDLoc OpDL(Op); |
| 30413 | SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); |
| 30414 | SDValue NOOP(DAG.getMachineNode(X86::NOOP, SDLoc(Op), VTs, Ops), 0); |
| 30415 | |
| 30416 | return NOOP; |
| 30417 | } |
| 30418 | |
| 30419 | |
| 30420 | static SDValue LowerCVTPS2PH(SDValue Op, SelectionDAG &DAG) { |
| 30421 | SDLoc dl(Op); |
| 30422 | EVT VT = Op.getValueType(); |
| 30423 | SDValue Lo, Hi; |
| 30424 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0); |
| 30425 | EVT LoVT, HiVT; |
| 30426 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 30427 | SDValue RC = Op.getOperand(1); |
| 30428 | Lo = DAG.getNode(X86ISD::CVTPS2PH, dl, LoVT, Lo, RC); |
| 30429 | Hi = DAG.getNode(X86ISD::CVTPS2PH, dl, HiVT, Hi, RC); |
| 30430 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 30431 | } |
| 30432 | |
| 30433 | |
| 30434 | SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { |
| 30435 | switch (Op.getOpcode()) { |
| 30436 | default: llvm_unreachable("Should not custom lower this!"); |
| 30437 | case ISD::ATOMIC_FENCE: return LowerATOMIC_FENCE(Op, Subtarget, DAG); |
| 30438 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: |
| 30439 | return LowerCMP_SWAP(Op, Subtarget, DAG); |
| 30440 | case ISD::CTPOP: return LowerCTPOP(Op, Subtarget, DAG); |
| 30441 | case ISD::ATOMIC_LOAD_ADD: |
| 30442 | case ISD::ATOMIC_LOAD_SUB: |
| 30443 | case ISD::ATOMIC_LOAD_OR: |
| 30444 | case ISD::ATOMIC_LOAD_XOR: |
| 30445 | case ISD::ATOMIC_LOAD_AND: return lowerAtomicArith(Op, DAG, Subtarget); |
| 30446 | case ISD::ATOMIC_STORE: return LowerATOMIC_STORE(Op, DAG, Subtarget); |
| 30447 | case ISD::BITREVERSE: return LowerBITREVERSE(Op, Subtarget, DAG); |
| 30448 | case ISD::PARITY: return LowerPARITY(Op, Subtarget, DAG); |
| 30449 | case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG); |
| 30450 | case ISD::CONCAT_VECTORS: return LowerCONCAT_VECTORS(Op, Subtarget, DAG); |
| 30451 | case ISD::VECTOR_SHUFFLE: return lowerVECTOR_SHUFFLE(Op, Subtarget, DAG); |
| 30452 | case ISD::VSELECT: return LowerVSELECT(Op, DAG); |
| 30453 | case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG); |
| 30454 | case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG); |
| 30455 | case ISD::INSERT_SUBVECTOR: return LowerINSERT_SUBVECTOR(Op, Subtarget,DAG); |
| 30456 | case ISD::EXTRACT_SUBVECTOR: return LowerEXTRACT_SUBVECTOR(Op,Subtarget,DAG); |
| 30457 | case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, Subtarget,DAG); |
| 30458 | case ISD::ConstantPool: return LowerConstantPool(Op, DAG); |
| 30459 | case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG); |
| 30460 | case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); |
| 30461 | case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG); |
| 30462 | case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); |
| 30463 | case ISD::SHL_PARTS: |
| 30464 | case ISD::SRA_PARTS: |
| 30465 | case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG); |
| 30466 | case ISD::FSHL: |
| 30467 | case ISD::FSHR: return LowerFunnelShift(Op, Subtarget, DAG); |
| 30468 | case ISD::STRICT_SINT_TO_FP: |
| 30469 | case ISD::SINT_TO_FP: return LowerSINT_TO_FP(Op, DAG); |
| 30470 | case ISD::STRICT_UINT_TO_FP: |
| 30471 | case ISD::UINT_TO_FP: return LowerUINT_TO_FP(Op, DAG); |
| 30472 | case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG); |
| 30473 | case ISD::ZERO_EXTEND: return LowerZERO_EXTEND(Op, Subtarget, DAG); |
| 30474 | case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, Subtarget, DAG); |
| 30475 | case ISD::ANY_EXTEND: return LowerANY_EXTEND(Op, Subtarget, DAG); |
| 30476 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
| 30477 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
| 30478 | return LowerEXTEND_VECTOR_INREG(Op, Subtarget, DAG); |
| 30479 | case ISD::FP_TO_SINT: |
| 30480 | case ISD::STRICT_FP_TO_SINT: |
| 30481 | case ISD::FP_TO_UINT: |
| 30482 | case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); |
| 30483 | case ISD::FP_TO_SINT_SAT: |
| 30484 | case ISD::FP_TO_UINT_SAT: return LowerFP_TO_INT_SAT(Op, DAG); |
| 30485 | case ISD::FP_EXTEND: |
| 30486 | case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); |
| 30487 | case ISD::FP_ROUND: |
| 30488 | case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); |
| 30489 | case ISD::FP16_TO_FP: |
| 30490 | case ISD::STRICT_FP16_TO_FP: return LowerFP16_TO_FP(Op, DAG); |
| 30491 | case ISD::FP_TO_FP16: |
| 30492 | case ISD::STRICT_FP_TO_FP16: return LowerFP_TO_FP16(Op, DAG); |
| 30493 | case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); |
| 30494 | case ISD::STORE: return LowerStore(Op, Subtarget, DAG); |
| 30495 | case ISD::FADD: |
| 30496 | case ISD::FSUB: return lowerFaddFsub(Op, DAG); |
| 30497 | case ISD::FROUND: return LowerFROUND(Op, DAG); |
| 30498 | case ISD::FABS: |
| 30499 | case ISD::FNEG: return LowerFABSorFNEG(Op, DAG); |
| 30500 | case ISD::FCOPYSIGN: return LowerFCOPYSIGN(Op, DAG); |
| 30501 | case ISD::FGETSIGN: return LowerFGETSIGN(Op, DAG); |
| 30502 | case ISD::LRINT: |
| 30503 | case ISD::LLRINT: return LowerLRINT_LLRINT(Op, DAG); |
| 30504 | case ISD::SETCC: |
| 30505 | case ISD::STRICT_FSETCC: |
| 30506 | case ISD::STRICT_FSETCCS: return LowerSETCC(Op, DAG); |
| 30507 | case ISD::SETCCCARRY: return LowerSETCCCARRY(Op, DAG); |
| 30508 | case ISD::SELECT: return LowerSELECT(Op, DAG); |
| 30509 | case ISD::BRCOND: return LowerBRCOND(Op, DAG); |
| 30510 | case ISD::JumpTable: return LowerJumpTable(Op, DAG); |
| 30511 | case ISD::VASTART: return LowerVASTART(Op, DAG); |
| 30512 | case ISD::VAARG: return LowerVAARG(Op, DAG); |
| 30513 | case ISD::VACOPY: return LowerVACOPY(Op, Subtarget, DAG); |
| 30514 | case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG); |
| 30515 | case ISD::INTRINSIC_VOID: |
| 30516 | case ISD::INTRINSIC_W_CHAIN: return LowerINTRINSIC_W_CHAIN(Op, Subtarget, DAG); |
| 30517 | case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); |
| 30518 | case ISD::ADDROFRETURNADDR: return LowerADDROFRETURNADDR(Op, DAG); |
| 30519 | case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); |
| 30520 | case ISD::FRAME_TO_ARGS_OFFSET: |
| 30521 | return LowerFRAME_TO_ARGS_OFFSET(Op, DAG); |
| 30522 | case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); |
| 30523 | case ISD::EH_RETURN: return LowerEH_RETURN(Op, DAG); |
| 30524 | case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG); |
| 30525 | case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG); |
| 30526 | case ISD::EH_SJLJ_SETUP_DISPATCH: |
| 30527 | return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG); |
| 30528 | case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG); |
| 30529 | case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG); |
| 30530 | case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG); |
| 30531 | case ISD::SET_ROUNDING: return LowerSET_ROUNDING(Op, DAG); |
| 30532 | case ISD::CTLZ: |
| 30533 | case ISD::CTLZ_ZERO_UNDEF: return LowerCTLZ(Op, Subtarget, DAG); |
| 30534 | case ISD::CTTZ: |
| 30535 | case ISD::CTTZ_ZERO_UNDEF: return LowerCTTZ(Op, Subtarget, DAG); |
| 30536 | case ISD::MUL: return LowerMUL(Op, Subtarget, DAG); |
| 30537 | case ISD::MULHS: |
| 30538 | case ISD::MULHU: return LowerMULH(Op, Subtarget, DAG); |
| 30539 | case ISD::ROTL: |
| 30540 | case ISD::ROTR: return LowerRotate(Op, Subtarget, DAG); |
| 30541 | case ISD::SRA: |
| 30542 | case ISD::SRL: |
| 30543 | case ISD::SHL: return LowerShift(Op, Subtarget, DAG); |
| 30544 | case ISD::SADDO: |
| 30545 | case ISD::UADDO: |
| 30546 | case ISD::SSUBO: |
| 30547 | case ISD::USUBO: return LowerXALUO(Op, DAG); |
| 30548 | case ISD::SMULO: |
| 30549 | case ISD::UMULO: return LowerMULO(Op, Subtarget, DAG); |
| 30550 | case ISD::READCYCLECOUNTER: return LowerREADCYCLECOUNTER(Op, Subtarget,DAG); |
| 30551 | case ISD::BITCAST: return LowerBITCAST(Op, Subtarget, DAG); |
| 30552 | case ISD::SADDO_CARRY: |
| 30553 | case ISD::SSUBO_CARRY: |
| 30554 | case ISD::ADDCARRY: |
| 30555 | case ISD::SUBCARRY: return LowerADDSUBCARRY(Op, DAG); |
| 30556 | case ISD::ADD: |
| 30557 | case ISD::SUB: return lowerAddSub(Op, DAG, Subtarget); |
| 30558 | case ISD::UADDSAT: |
| 30559 | case ISD::SADDSAT: |
| 30560 | case ISD::USUBSAT: |
| 30561 | case ISD::SSUBSAT: return LowerADDSAT_SUBSAT(Op, DAG, Subtarget); |
| 30562 | case ISD::SMAX: |
| 30563 | case ISD::SMIN: |
| 30564 | case ISD::UMAX: |
| 30565 | case ISD::UMIN: return LowerMINMAX(Op, DAG); |
| 30566 | case ISD::ABS: return LowerABS(Op, Subtarget, DAG); |
| 30567 | case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); |
| 30568 | case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); |
| 30569 | case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); |
| 30570 | case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); |
| 30571 | case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); |
| 30572 | case ISD::GC_TRANSITION_START: |
| 30573 | case ISD::GC_TRANSITION_END: return LowerGC_TRANSITION(Op, DAG); |
| 30574 | case ISD::ADDRSPACECAST: return LowerADDRSPACECAST(Op, DAG); |
| 30575 | case X86ISD::CVTPS2PH: return LowerCVTPS2PH(Op, DAG); |
| 30576 | } |
| 30577 | } |
| 30578 | |
| 30579 | |
| 30580 | |
| 30581 | void X86TargetLowering::ReplaceNodeResults(SDNode *N, |
| 30582 | SmallVectorImpl<SDValue>&Results, |
| 30583 | SelectionDAG &DAG) const { |
| 30584 | SDLoc dl(N); |
| 30585 | switch (N->getOpcode()) { |
| 30586 | default: |
| 30587 | #ifndef NDEBUG |
| 30588 | dbgs() << "ReplaceNodeResults: "; |
| 30589 | N->dump(&DAG); |
| 30590 | #endif |
| 30591 | llvm_unreachable("Do not know how to custom type legalize this operation!"); |
| 30592 | case X86ISD::CVTPH2PS: { |
| 30593 | EVT VT = N->getValueType(0); |
| 30594 | SDValue Lo, Hi; |
| 30595 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
| 30596 | EVT LoVT, HiVT; |
| 30597 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 30598 | Lo = DAG.getNode(X86ISD::CVTPH2PS, dl, LoVT, Lo); |
| 30599 | Hi = DAG.getNode(X86ISD::CVTPH2PS, dl, HiVT, Hi); |
| 30600 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 30601 | Results.push_back(Res); |
| 30602 | return; |
| 30603 | } |
| 30604 | case X86ISD::STRICT_CVTPH2PS: { |
| 30605 | EVT VT = N->getValueType(0); |
| 30606 | SDValue Lo, Hi; |
| 30607 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 1); |
| 30608 | EVT LoVT, HiVT; |
| 30609 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT); |
| 30610 | Lo = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {LoVT, MVT::Other}, |
| 30611 | {N->getOperand(0), Lo}); |
| 30612 | Hi = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {HiVT, MVT::Other}, |
| 30613 | {N->getOperand(0), Hi}); |
| 30614 | SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
| 30615 | Lo.getValue(1), Hi.getValue(1)); |
| 30616 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 30617 | Results.push_back(Res); |
| 30618 | Results.push_back(Chain); |
| 30619 | return; |
| 30620 | } |
| 30621 | case X86ISD::CVTPS2PH: |
| 30622 | Results.push_back(LowerCVTPS2PH(SDValue(N, 0), DAG)); |
| 30623 | return; |
| 30624 | case ISD::CTPOP: { |
| 30625 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
| 30626 | |
| 30627 | bool NoImplicitFloatOps = |
| 30628 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
| 30629 | Attribute::NoImplicitFloat); |
| 30630 | if (isTypeLegal(MVT::v2i64) && !NoImplicitFloatOps) { |
| 30631 | SDValue Wide = |
| 30632 | DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, N->getOperand(0)); |
| 30633 | Wide = DAG.getNode(ISD::CTPOP, dl, MVT::v2i64, Wide); |
| 30634 | |
| 30635 | |
| 30636 | Wide = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, Wide); |
| 30637 | Wide = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i32, Wide, |
| 30638 | DAG.getIntPtrConstant(0, dl)); |
| 30639 | Wide = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Wide); |
| 30640 | Results.push_back(Wide); |
| 30641 | } |
| 30642 | return; |
| 30643 | } |
| 30644 | case ISD::MUL: { |
| 30645 | EVT VT = N->getValueType(0); |
| 30646 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 30647 | VT.getVectorElementType() == MVT::i8 && "Unexpected VT!"); |
| 30648 | |
| 30649 | |
| 30650 | MVT MulVT = MVT::getVectorVT(MVT::i16, VT.getVectorNumElements()); |
| 30651 | SDValue Op0 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(0)); |
| 30652 | SDValue Op1 = DAG.getNode(ISD::ANY_EXTEND, dl, MulVT, N->getOperand(1)); |
| 30653 | SDValue Res = DAG.getNode(ISD::MUL, dl, MulVT, Op0, Op1); |
| 30654 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 30655 | unsigned NumConcats = 16 / VT.getVectorNumElements(); |
| 30656 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); |
| 30657 | ConcatOps[0] = Res; |
| 30658 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i8, ConcatOps); |
| 30659 | Results.push_back(Res); |
| 30660 | return; |
| 30661 | } |
| 30662 | case X86ISD::VPMADDWD: |
| 30663 | case X86ISD::AVG: { |
| 30664 | |
| 30665 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
| 30666 | |
| 30667 | EVT VT = N->getValueType(0); |
| 30668 | EVT InVT = N->getOperand(0).getValueType(); |
| 30669 | assert(VT.getSizeInBits() < 128 && 128 % VT.getSizeInBits() == 0 && |
| 30670 | "Expected a VT that divides into 128 bits."); |
| 30671 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 30672 | "Unexpected type action!"); |
| 30673 | unsigned NumConcat = 128 / InVT.getSizeInBits(); |
| 30674 | |
| 30675 | EVT InWideVT = EVT::getVectorVT(*DAG.getContext(), |
| 30676 | InVT.getVectorElementType(), |
| 30677 | NumConcat * InVT.getVectorNumElements()); |
| 30678 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), |
| 30679 | VT.getVectorElementType(), |
| 30680 | NumConcat * VT.getVectorNumElements()); |
| 30681 | |
| 30682 | SmallVector<SDValue, 16> Ops(NumConcat, DAG.getUNDEF(InVT)); |
| 30683 | Ops[0] = N->getOperand(0); |
| 30684 | SDValue InVec0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); |
| 30685 | Ops[0] = N->getOperand(1); |
| 30686 | SDValue InVec1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, InWideVT, Ops); |
| 30687 | |
| 30688 | SDValue Res = DAG.getNode(N->getOpcode(), dl, WideVT, InVec0, InVec1); |
| 30689 | Results.push_back(Res); |
| 30690 | return; |
| 30691 | } |
| 30692 | |
| 30693 | case X86ISD::FMINC: |
| 30694 | case X86ISD::FMIN: |
| 30695 | case X86ISD::FMAXC: |
| 30696 | case X86ISD::FMAX: { |
| 30697 | EVT VT = N->getValueType(0); |
| 30698 | assert(VT == MVT::v2f32 && "Unexpected type (!= v2f32) on FMIN/FMAX."); |
| 30699 | SDValue UNDEF = DAG.getUNDEF(VT); |
| 30700 | SDValue LHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, |
| 30701 | N->getOperand(0), UNDEF); |
| 30702 | SDValue RHS = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, |
| 30703 | N->getOperand(1), UNDEF); |
| 30704 | Results.push_back(DAG.getNode(N->getOpcode(), dl, MVT::v4f32, LHS, RHS)); |
| 30705 | return; |
| 30706 | } |
| 30707 | case ISD::SDIV: |
| 30708 | case ISD::UDIV: |
| 30709 | case ISD::SREM: |
| 30710 | case ISD::UREM: { |
| 30711 | EVT VT = N->getValueType(0); |
| 30712 | if (VT.isVector()) { |
| 30713 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 30714 | "Unexpected type action!"); |
| 30715 | |
| 30716 | |
| 30717 | |
| 30718 | APInt SplatVal; |
| 30719 | if (ISD::isConstantSplatVector(N->getOperand(1).getNode(), SplatVal)) { |
| 30720 | unsigned NumConcats = 128 / VT.getSizeInBits(); |
| 30721 | SmallVector<SDValue, 8> Ops0(NumConcats, DAG.getUNDEF(VT)); |
| 30722 | Ops0[0] = N->getOperand(0); |
| 30723 | EVT ResVT = getTypeToTransformTo(*DAG.getContext(), VT); |
| 30724 | SDValue N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResVT, Ops0); |
| 30725 | SDValue N1 = DAG.getConstant(SplatVal, dl, ResVT); |
| 30726 | SDValue Res = DAG.getNode(N->getOpcode(), dl, ResVT, N0, N1); |
| 30727 | Results.push_back(Res); |
| 30728 | } |
| 30729 | return; |
| 30730 | } |
| 30731 | |
| 30732 | SDValue V = LowerWin64_i128OP(SDValue(N,0), DAG); |
| 30733 | Results.push_back(V); |
| 30734 | return; |
| 30735 | } |
| 30736 | case ISD::TRUNCATE: { |
| 30737 | MVT VT = N->getSimpleValueType(0); |
| 30738 | if (getTypeAction(*DAG.getContext(), VT) != TypeWidenVector) |
| 30739 | return; |
| 30740 | |
| 30741 | |
| 30742 | |
| 30743 | |
| 30744 | MVT WidenVT = getTypeToTransformTo(*DAG.getContext(), VT).getSimpleVT(); |
| 30745 | SDValue In = N->getOperand(0); |
| 30746 | EVT InVT = In.getValueType(); |
| 30747 | |
| 30748 | unsigned InBits = InVT.getSizeInBits(); |
| 30749 | if (128 % InBits == 0) { |
| 30750 | |
| 30751 | |
| 30752 | |
| 30753 | MVT InEltVT = InVT.getSimpleVT().getVectorElementType(); |
| 30754 | EVT EltVT = VT.getVectorElementType(); |
| 30755 | unsigned WidenNumElts = WidenVT.getVectorNumElements(); |
| 30756 | SmallVector<SDValue, 16> Ops(WidenNumElts, DAG.getUNDEF(EltVT)); |
| 30757 | |
| 30758 | |
| 30759 | unsigned MinElts = VT.getVectorNumElements(); |
| 30760 | for (unsigned i=0; i < MinElts; ++i) { |
| 30761 | SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, In, |
| 30762 | DAG.getIntPtrConstant(i, dl)); |
| 30763 | Ops[i] = DAG.getNode(ISD::TRUNCATE, dl, EltVT, Val); |
| 30764 | } |
| 30765 | Results.push_back(DAG.getBuildVector(WidenVT, dl, Ops)); |
| 30766 | return; |
| 30767 | } |
| 30768 | |
| 30769 | |
| 30770 | |
| 30771 | if (Subtarget.hasAVX512() && isTypeLegal(InVT)) { |
| 30772 | |
| 30773 | if ((InBits == 256 && Subtarget.hasVLX()) || InBits == 512) { |
| 30774 | Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); |
| 30775 | return; |
| 30776 | } |
| 30777 | |
| 30778 | if (InVT == MVT::v4i64 && VT == MVT::v4i8 && isTypeLegal(MVT::v8i64)) { |
| 30779 | In = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i64, In, |
| 30780 | DAG.getUNDEF(MVT::v4i64)); |
| 30781 | Results.push_back(DAG.getNode(X86ISD::VTRUNC, dl, WidenVT, In)); |
| 30782 | return; |
| 30783 | } |
| 30784 | } |
| 30785 | if (Subtarget.hasVLX() && InVT == MVT::v8i64 && VT == MVT::v8i8 && |
| 30786 | getTypeAction(*DAG.getContext(), InVT) == TypeSplitVector && |
| 30787 | isTypeLegal(MVT::v4i64)) { |
| 30788 | |
| 30789 | |
| 30790 | SDValue Lo, Hi; |
| 30791 | std::tie(Lo, Hi) = DAG.SplitVector(In, dl); |
| 30792 | |
| 30793 | Lo = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Lo); |
| 30794 | Hi = DAG.getNode(X86ISD::VTRUNC, dl, MVT::v16i8, Hi); |
| 30795 | SDValue Res = DAG.getVectorShuffle(MVT::v16i8, dl, Lo, Hi, |
| 30796 | { 0, 1, 2, 3, 16, 17, 18, 19, |
| 30797 | -1, -1, -1, -1, -1, -1, -1, -1 }); |
| 30798 | Results.push_back(Res); |
| 30799 | return; |
| 30800 | } |
| 30801 | |
| 30802 | return; |
| 30803 | } |
| 30804 | case ISD::ANY_EXTEND: |
| 30805 | |
| 30806 | |
| 30807 | assert(N->getValueType(0) == MVT::v8i8 && |
| 30808 | "Do not know how to legalize this Node"); |
| 30809 | return; |
| 30810 | case ISD::SIGN_EXTEND: |
| 30811 | case ISD::ZERO_EXTEND: { |
| 30812 | EVT VT = N->getValueType(0); |
| 30813 | SDValue In = N->getOperand(0); |
| 30814 | EVT InVT = In.getValueType(); |
| 30815 | if (!Subtarget.hasSSE41() && VT == MVT::v4i64 && |
| 30816 | (InVT == MVT::v4i16 || InVT == MVT::v4i8)){ |
| 30817 | assert(getTypeAction(*DAG.getContext(), InVT) == TypeWidenVector && |
| 30818 | "Unexpected type action!"); |
| 30819 | assert(N->getOpcode() == ISD::SIGN_EXTEND && "Unexpected opcode"); |
| 30820 | |
| 30821 | |
| 30822 | |
| 30823 | |
| 30824 | In = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, In); |
| 30825 | |
| 30826 | |
| 30827 | SDValue Zero = DAG.getConstant(0, dl, MVT::v4i32); |
| 30828 | SDValue SignBits = DAG.getSetCC(dl, MVT::v4i32, Zero, In, ISD::SETGT); |
| 30829 | |
| 30830 | |
| 30831 | |
| 30832 | SDValue Lo = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, |
| 30833 | {0, 4, 1, 5}); |
| 30834 | Lo = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Lo); |
| 30835 | SDValue Hi = DAG.getVectorShuffle(MVT::v4i32, dl, In, SignBits, |
| 30836 | {2, 6, 3, 7}); |
| 30837 | Hi = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, Hi); |
| 30838 | |
| 30839 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 30840 | Results.push_back(Res); |
| 30841 | return; |
| 30842 | } |
| 30843 | |
| 30844 | if (VT == MVT::v16i32 || VT == MVT::v8i64) { |
| 30845 | if (!InVT.is128BitVector()) { |
| 30846 | |
| 30847 | |
| 30848 | if (getTypeAction(*DAG.getContext(), InVT) != TypePromoteInteger) |
| 30849 | return; |
| 30850 | InVT = getTypeToTransformTo(*DAG.getContext(), InVT); |
| 30851 | if (!InVT.is128BitVector()) |
| 30852 | return; |
| 30853 | |
| 30854 | |
| 30855 | |
| 30856 | In = DAG.getNode(N->getOpcode(), dl, InVT, In); |
| 30857 | } |
| 30858 | |
| 30859 | |
| 30860 | |
| 30861 | EVT LoVT, HiVT; |
| 30862 | std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); |
| 30863 | assert(isTypeLegal(LoVT) && "Split VT not legal?"); |
| 30864 | |
| 30865 | SDValue Lo = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, LoVT, In, DAG); |
| 30866 | |
| 30867 | |
| 30868 | unsigned NumElts = InVT.getVectorNumElements(); |
| 30869 | unsigned HalfNumElts = NumElts / 2; |
| 30870 | SmallVector<int, 16> ShufMask(NumElts, SM_SentinelUndef); |
| 30871 | for (unsigned i = 0; i != HalfNumElts; ++i) |
| 30872 | ShufMask[i] = i + HalfNumElts; |
| 30873 | |
| 30874 | SDValue Hi = DAG.getVectorShuffle(InVT, dl, In, In, ShufMask); |
| 30875 | Hi = getEXTEND_VECTOR_INREG(N->getOpcode(), dl, HiVT, Hi, DAG); |
| 30876 | |
| 30877 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Lo, Hi); |
| 30878 | Results.push_back(Res); |
| 30879 | } |
| 30880 | return; |
| 30881 | } |
| 30882 | case ISD::FP_TO_SINT: |
| 30883 | case ISD::STRICT_FP_TO_SINT: |
| 30884 | case ISD::FP_TO_UINT: |
| 30885 | case ISD::STRICT_FP_TO_UINT: { |
| 30886 | bool IsStrict = N->isStrictFPOpcode(); |
| 30887 | bool IsSigned = N->getOpcode() == ISD::FP_TO_SINT || |
| 30888 | N->getOpcode() == ISD::STRICT_FP_TO_SINT; |
| 30889 | EVT VT = N->getValueType(0); |
| 30890 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
| 30891 | EVT SrcVT = Src.getValueType(); |
| 30892 | |
| 30893 | if (VT.isVector() && VT.getScalarSizeInBits() < 32) { |
| 30894 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 30895 | "Unexpected type action!"); |
| 30896 | |
| 30897 | |
| 30898 | unsigned NewEltWidth = std::min(128 / VT.getVectorNumElements(), 32U); |
| 30899 | MVT PromoteVT = MVT::getVectorVT(MVT::getIntegerVT(NewEltWidth), |
| 30900 | VT.getVectorNumElements()); |
| 30901 | SDValue Res; |
| 30902 | SDValue Chain; |
| 30903 | if (IsStrict) { |
| 30904 | Res = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl, {PromoteVT, MVT::Other}, |
| 30905 | {N->getOperand(0), Src}); |
| 30906 | Chain = Res.getValue(1); |
| 30907 | } else |
| 30908 | Res = DAG.getNode(ISD::FP_TO_SINT, dl, PromoteVT, Src); |
| 30909 | |
| 30910 | |
| 30911 | |
| 30912 | if (PromoteVT == MVT::v2i32) |
| 30913 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Res, |
| 30914 | DAG.getUNDEF(MVT::v2i32)); |
| 30915 | |
| 30916 | Res = DAG.getNode(!IsSigned ? ISD::AssertZext : ISD::AssertSext, dl, |
| 30917 | Res.getValueType(), Res, |
| 30918 | DAG.getValueType(VT.getVectorElementType())); |
| 30919 | |
| 30920 | if (PromoteVT == MVT::v2i32) |
| 30921 | Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2i32, Res, |
| 30922 | DAG.getIntPtrConstant(0, dl)); |
| 30923 | |
| 30924 | |
| 30925 | Res = DAG.getNode(ISD::TRUNCATE, dl, VT, Res); |
| 30926 | |
| 30927 | |
| 30928 | unsigned NumConcats = 128 / VT.getSizeInBits(); |
| 30929 | MVT ConcatVT = MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(), |
| 30930 | VT.getVectorNumElements() * NumConcats); |
| 30931 | SmallVector<SDValue, 8> ConcatOps(NumConcats, DAG.getUNDEF(VT)); |
| 30932 | ConcatOps[0] = Res; |
| 30933 | Res = DAG.getNode(ISD::CONCAT_VECTORS, dl, ConcatVT, ConcatOps); |
| 30934 | Results.push_back(Res); |
| 30935 | if (IsStrict) |
| 30936 | Results.push_back(Chain); |
| 30937 | return; |
| 30938 | } |
| 30939 | |
| 30940 | |
| 30941 | if (VT == MVT::v2i32) { |
| 30942 | assert((!IsStrict || IsSigned || Subtarget.hasAVX512()) && |
| 30943 | "Strict unsigned conversion requires AVX512"); |
| 30944 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
| 30945 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 30946 | "Unexpected type action!"); |
| 30947 | if (Src.getValueType() == MVT::v2f64) { |
| 30948 | if (!IsSigned && !Subtarget.hasAVX512()) { |
| 30949 | SDValue Res = |
| 30950 | expandFP_TO_UINT_SSE(MVT::v4i32, Src, dl, DAG, Subtarget); |
| 30951 | Results.push_back(Res); |
| 30952 | return; |
| 30953 | } |
| 30954 | |
| 30955 | unsigned Opc; |
| 30956 | if (IsStrict) |
| 30957 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
| 30958 | else |
| 30959 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
| 30960 | |
| 30961 | |
| 30962 | if (!IsSigned && !Subtarget.hasVLX()) { |
| 30963 | |
| 30964 | |
| 30965 | |
| 30966 | |
| 30967 | |
| 30968 | if (!IsStrict) |
| 30969 | return; |
| 30970 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f64, Src, |
| 30971 | DAG.getConstantFP(0.0, dl, MVT::v2f64)); |
| 30972 | Opc = N->getOpcode(); |
| 30973 | } |
| 30974 | SDValue Res; |
| 30975 | SDValue Chain; |
| 30976 | if (IsStrict) { |
| 30977 | Res = DAG.getNode(Opc, dl, {MVT::v4i32, MVT::Other}, |
| 30978 | {N->getOperand(0), Src}); |
| 30979 | Chain = Res.getValue(1); |
| 30980 | } else { |
| 30981 | Res = DAG.getNode(Opc, dl, MVT::v4i32, Src); |
| 30982 | } |
| 30983 | Results.push_back(Res); |
| 30984 | if (IsStrict) |
| 30985 | Results.push_back(Chain); |
| 30986 | return; |
| 30987 | } |
| 30988 | |
| 30989 | |
| 30990 | |
| 30991 | if (Src.getValueType() == MVT::v2f32 && IsStrict) { |
| 30992 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
| 30993 | DAG.getConstantFP(0.0, dl, MVT::v2f32)); |
| 30994 | SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4i32, MVT::Other}, |
| 30995 | {N->getOperand(0), Src}); |
| 30996 | Results.push_back(Res); |
| 30997 | Results.push_back(Res.getValue(1)); |
| 30998 | return; |
| 30999 | } |
| 31000 | |
| 31001 | |
| 31002 | |
| 31003 | return; |
| 31004 | } |
| 31005 | |
| 31006 | assert(!VT.isVector() && "Vectors should have been handled above!"); |
| 31007 | |
| 31008 | if (Subtarget.hasDQI() && VT == MVT::i64 && |
| 31009 | (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { |
| 31010 | assert(!Subtarget.is64Bit() && "i64 should be legal"); |
| 31011 | unsigned NumElts = Subtarget.hasVLX() ? 2 : 8; |
| 31012 | |
| 31013 | unsigned SrcElts = |
| 31014 | std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits()); |
| 31015 | MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts); |
| 31016 | MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts); |
| 31017 | unsigned Opc = N->getOpcode(); |
| 31018 | if (NumElts != SrcElts) { |
| 31019 | if (IsStrict) |
| 31020 | Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; |
| 31021 | else |
| 31022 | Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; |
| 31023 | } |
| 31024 | |
| 31025 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); |
| 31026 | SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, |
| 31027 | DAG.getConstantFP(0.0, dl, VecInVT), Src, |
| 31028 | ZeroIdx); |
| 31029 | SDValue Chain; |
| 31030 | if (IsStrict) { |
| 31031 | SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); |
| 31032 | Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res); |
| 31033 | Chain = Res.getValue(1); |
| 31034 | } else |
| 31035 | Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res); |
| 31036 | Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); |
| 31037 | Results.push_back(Res); |
| 31038 | if (IsStrict) |
| 31039 | Results.push_back(Chain); |
| 31040 | return; |
| 31041 | } |
| 31042 | |
| 31043 | SDValue Chain; |
| 31044 | if (SDValue V = FP_TO_INTHelper(SDValue(N, 0), DAG, IsSigned, Chain)) { |
| 31045 | Results.push_back(V); |
| 31046 | if (IsStrict) |
| 31047 | Results.push_back(Chain); |
| 31048 | } |
| 31049 | return; |
| 31050 | } |
| 31051 | case ISD::LRINT: |
| 31052 | case ISD::LLRINT: { |
| 31053 | if (SDValue V = LRINT_LLRINTHelper(N, DAG)) |
| 31054 | Results.push_back(V); |
| 31055 | return; |
| 31056 | } |
| 31057 | |
| 31058 | case ISD::SINT_TO_FP: |
| 31059 | case ISD::STRICT_SINT_TO_FP: |
| 31060 | case ISD::UINT_TO_FP: |
| 31061 | case ISD::STRICT_UINT_TO_FP: { |
| 31062 | bool IsStrict = N->isStrictFPOpcode(); |
| 31063 | bool IsSigned = N->getOpcode() == ISD::SINT_TO_FP || |
| 31064 | N->getOpcode() == ISD::STRICT_SINT_TO_FP; |
| 31065 | EVT VT = N->getValueType(0); |
| 31066 | if (VT != MVT::v2f32) |
| 31067 | return; |
| 31068 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
| 31069 | EVT SrcVT = Src.getValueType(); |
| 31070 | if (Subtarget.hasDQI() && Subtarget.hasVLX() && SrcVT == MVT::v2i64) { |
| 31071 | if (IsStrict) { |
| 31072 | unsigned Opc = IsSigned ? X86ISD::STRICT_CVTSI2P |
| 31073 | : X86ISD::STRICT_CVTUI2P; |
| 31074 | SDValue Res = DAG.getNode(Opc, dl, {MVT::v4f32, MVT::Other}, |
| 31075 | {N->getOperand(0), Src}); |
| 31076 | Results.push_back(Res); |
| 31077 | Results.push_back(Res.getValue(1)); |
| 31078 | } else { |
| 31079 | unsigned Opc = IsSigned ? X86ISD::CVTSI2P : X86ISD::CVTUI2P; |
| 31080 | Results.push_back(DAG.getNode(Opc, dl, MVT::v4f32, Src)); |
| 31081 | } |
| 31082 | return; |
| 31083 | } |
| 31084 | if (SrcVT == MVT::v2i64 && !IsSigned && Subtarget.is64Bit() && |
| 31085 | Subtarget.hasSSE41() && !Subtarget.hasAVX512()) { |
| 31086 | SDValue Zero = DAG.getConstant(0, dl, SrcVT); |
| 31087 | SDValue One = DAG.getConstant(1, dl, SrcVT); |
| 31088 | SDValue Sign = DAG.getNode(ISD::OR, dl, SrcVT, |
| 31089 | DAG.getNode(ISD::SRL, dl, SrcVT, Src, One), |
| 31090 | DAG.getNode(ISD::AND, dl, SrcVT, Src, One)); |
| 31091 | SDValue IsNeg = DAG.getSetCC(dl, MVT::v2i64, Src, Zero, ISD::SETLT); |
| 31092 | SDValue SignSrc = DAG.getSelect(dl, SrcVT, IsNeg, Sign, Src); |
| 31093 | SmallVector<SDValue, 4> SignCvts(4, DAG.getConstantFP(0.0, dl, MVT::f32)); |
| 31094 | for (int i = 0; i != 2; ++i) { |
| 31095 | SDValue Elt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, |
| 31096 | SignSrc, DAG.getIntPtrConstant(i, dl)); |
| 31097 | if (IsStrict) |
| 31098 | SignCvts[i] = |
| 31099 | DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {MVT::f32, MVT::Other}, |
| 31100 | {N->getOperand(0), Elt}); |
| 31101 | else |
| 31102 | SignCvts[i] = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, Elt); |
| 31103 | }; |
| 31104 | SDValue SignCvt = DAG.getBuildVector(MVT::v4f32, dl, SignCvts); |
| 31105 | SDValue Slow, Chain; |
| 31106 | if (IsStrict) { |
| 31107 | Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
| 31108 | SignCvts[0].getValue(1), SignCvts[1].getValue(1)); |
| 31109 | Slow = DAG.getNode(ISD::STRICT_FADD, dl, {MVT::v4f32, MVT::Other}, |
| 31110 | {Chain, SignCvt, SignCvt}); |
| 31111 | Chain = Slow.getValue(1); |
| 31112 | } else { |
| 31113 | Slow = DAG.getNode(ISD::FADD, dl, MVT::v4f32, SignCvt, SignCvt); |
| 31114 | } |
| 31115 | IsNeg = DAG.getBitcast(MVT::v4i32, IsNeg); |
| 31116 | IsNeg = |
| 31117 | DAG.getVectorShuffle(MVT::v4i32, dl, IsNeg, IsNeg, {1, 3, -1, -1}); |
| 31118 | SDValue Cvt = DAG.getSelect(dl, MVT::v4f32, IsNeg, Slow, SignCvt); |
| 31119 | Results.push_back(Cvt); |
| 31120 | if (IsStrict) |
| 31121 | Results.push_back(Chain); |
| 31122 | return; |
| 31123 | } |
| 31124 | |
| 31125 | if (SrcVT != MVT::v2i32) |
| 31126 | return; |
| 31127 | |
| 31128 | if (IsSigned || Subtarget.hasAVX512()) { |
| 31129 | if (!IsStrict) |
| 31130 | return; |
| 31131 | |
| 31132 | |
| 31133 | |
| 31134 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i32, Src, |
| 31135 | DAG.getConstant(0, dl, MVT::v2i32)); |
| 31136 | SDValue Res = DAG.getNode(N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, |
| 31137 | {N->getOperand(0), Src}); |
| 31138 | Results.push_back(Res); |
| 31139 | Results.push_back(Res.getValue(1)); |
| 31140 | return; |
| 31141 | } |
| 31142 | |
| 31143 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
| 31144 | SDValue ZExtIn = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::v2i64, Src); |
| 31145 | SDValue VBias = |
| 31146 | DAG.getConstantFP(BitsToDouble(0x4330000000000000ULL), dl, MVT::v2f64); |
| 31147 | SDValue Or = DAG.getNode(ISD::OR, dl, MVT::v2i64, ZExtIn, |
| 31148 | DAG.getBitcast(MVT::v2i64, VBias)); |
| 31149 | Or = DAG.getBitcast(MVT::v2f64, Or); |
| 31150 | if (IsStrict) { |
| 31151 | SDValue Sub = DAG.getNode(ISD::STRICT_FSUB, dl, {MVT::v2f64, MVT::Other}, |
| 31152 | {N->getOperand(0), Or, VBias}); |
| 31153 | SDValue Res = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, |
| 31154 | {MVT::v4f32, MVT::Other}, |
| 31155 | {Sub.getValue(1), Sub}); |
| 31156 | Results.push_back(Res); |
| 31157 | Results.push_back(Res.getValue(1)); |
| 31158 | } else { |
| 31159 | |
| 31160 | SDValue Sub = DAG.getNode(ISD::FSUB, dl, MVT::v2f64, Or, VBias); |
| 31161 | Results.push_back(DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, Sub)); |
| 31162 | } |
| 31163 | return; |
| 31164 | } |
| 31165 | case ISD::STRICT_FP_ROUND: |
| 31166 | case ISD::FP_ROUND: { |
| 31167 | bool IsStrict = N->isStrictFPOpcode(); |
| 31168 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
| 31169 | if (!isTypeLegal(Src.getValueType())) |
| 31170 | return; |
| 31171 | SDValue V; |
| 31172 | if (IsStrict) |
| 31173 | V = DAG.getNode(X86ISD::STRICT_VFPROUND, dl, {MVT::v4f32, MVT::Other}, |
| 31174 | {N->getOperand(0), N->getOperand(1)}); |
| 31175 | else |
| 31176 | V = DAG.getNode(X86ISD::VFPROUND, dl, MVT::v4f32, N->getOperand(0)); |
| 31177 | Results.push_back(V); |
| 31178 | if (IsStrict) |
| 31179 | Results.push_back(V.getValue(1)); |
| 31180 | return; |
| 31181 | } |
| 31182 | case ISD::FP_EXTEND: |
| 31183 | case ISD::STRICT_FP_EXTEND: { |
| 31184 | |
| 31185 | |
| 31186 | assert(N->getValueType(0) == MVT::v2f32 && |
| 31187 | "Do not know how to legalize this Node"); |
| 31188 | return; |
| 31189 | } |
| 31190 | case ISD::INTRINSIC_W_CHAIN: { |
| 31191 | unsigned IntNo = N->getConstantOperandVal(1); |
| 31192 | switch (IntNo) { |
| 31193 | default : llvm_unreachable("Do not know how to custom type " |
| 31194 | "legalize this intrinsic operation!"); |
| 31195 | case Intrinsic::x86_rdtsc: |
| 31196 | return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, |
| 31197 | Results); |
| 31198 | case Intrinsic::x86_rdtscp: |
| 31199 | return getReadTimeStampCounter(N, dl, X86::RDTSCP, DAG, Subtarget, |
| 31200 | Results); |
| 31201 | case Intrinsic::x86_rdpmc: |
| 31202 | expandIntrinsicWChainHelper(N, dl, DAG, X86::RDPMC, X86::ECX, Subtarget, |
| 31203 | Results); |
| 31204 | return; |
| 31205 | case Intrinsic::x86_xgetbv: |
| 31206 | expandIntrinsicWChainHelper(N, dl, DAG, X86::XGETBV, X86::ECX, Subtarget, |
| 31207 | Results); |
| 31208 | return; |
| 31209 | } |
| 31210 | } |
| 31211 | case ISD::READCYCLECOUNTER: { |
| 31212 | return getReadTimeStampCounter(N, dl, X86::RDTSC, DAG, Subtarget, Results); |
| 31213 | } |
| 31214 | case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS: { |
| 31215 | EVT T = N->getValueType(0); |
| 31216 | assert((T == MVT::i64 || T == MVT::i128) && "can only expand cmpxchg pair"); |
| 31217 | bool Regs64bit = T == MVT::i128; |
| 31218 | assert((!Regs64bit || Subtarget.hasCmpxchg16b()) && |
| 31219 | "64-bit ATOMIC_CMP_SWAP_WITH_SUCCESS requires CMPXCHG16B"); |
| 31220 | MVT HalfT = Regs64bit ? MVT::i64 : MVT::i32; |
| 31221 | SDValue cpInL, cpInH; |
| 31222 | cpInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), |
| 31223 | DAG.getConstant(0, dl, HalfT)); |
| 31224 | cpInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(2), |
| 31225 | DAG.getConstant(1, dl, HalfT)); |
| 31226 | cpInL = DAG.getCopyToReg(N->getOperand(0), dl, |
| 31227 | Regs64bit ? X86::RAX : X86::EAX, |
| 31228 | cpInL, SDValue()); |
| 31229 | cpInH = DAG.getCopyToReg(cpInL.getValue(0), dl, |
| 31230 | Regs64bit ? X86::RDX : X86::EDX, |
| 31231 | cpInH, cpInL.getValue(1)); |
| 31232 | SDValue swapInL, swapInH; |
| 31233 | swapInL = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), |
| 31234 | DAG.getConstant(0, dl, HalfT)); |
| 31235 | swapInH = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, HalfT, N->getOperand(3), |
| 31236 | DAG.getConstant(1, dl, HalfT)); |
| 31237 | swapInH = |
| 31238 | DAG.getCopyToReg(cpInH.getValue(0), dl, Regs64bit ? X86::RCX : X86::ECX, |
| 31239 | swapInH, cpInH.getValue(1)); |
| 31240 | |
| 31241 | |
| 31242 | |
| 31243 | |
| 31244 | |
| 31245 | |
| 31246 | |
| 31247 | SDValue Result; |
| 31248 | SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); |
| 31249 | MachineMemOperand *MMO = cast<AtomicSDNode>(N)->getMemOperand(); |
| 31250 | if (Regs64bit) { |
| 31251 | SDValue Ops[] = {swapInH.getValue(0), N->getOperand(1), swapInL, |
| 31252 | swapInH.getValue(1)}; |
| 31253 | Result = |
| 31254 | DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG16_DAG, dl, Tys, Ops, T, MMO); |
| 31255 | } else { |
| 31256 | swapInL = DAG.getCopyToReg(swapInH.getValue(0), dl, X86::EBX, swapInL, |
| 31257 | swapInH.getValue(1)); |
| 31258 | SDValue Ops[] = {swapInL.getValue(0), N->getOperand(1), |
| 31259 | swapInL.getValue(1)}; |
| 31260 | Result = |
| 31261 | DAG.getMemIntrinsicNode(X86ISD::LCMPXCHG8_DAG, dl, Tys, Ops, T, MMO); |
| 31262 | } |
| 31263 | |
| 31264 | SDValue cpOutL = DAG.getCopyFromReg(Result.getValue(0), dl, |
| 31265 | Regs64bit ? X86::RAX : X86::EAX, |
| 31266 | HalfT, Result.getValue(1)); |
| 31267 | SDValue cpOutH = DAG.getCopyFromReg(cpOutL.getValue(1), dl, |
| 31268 | Regs64bit ? X86::RDX : X86::EDX, |
| 31269 | HalfT, cpOutL.getValue(2)); |
| 31270 | SDValue OpsF[] = { cpOutL.getValue(0), cpOutH.getValue(0)}; |
| 31271 | |
| 31272 | SDValue EFLAGS = DAG.getCopyFromReg(cpOutH.getValue(1), dl, X86::EFLAGS, |
| 31273 | MVT::i32, cpOutH.getValue(2)); |
| 31274 | SDValue Success = getSETCC(X86::COND_E, EFLAGS, dl, DAG); |
| 31275 | Success = DAG.getZExtOrTrunc(Success, dl, N->getValueType(1)); |
| 31276 | |
| 31277 | Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, T, OpsF)); |
| 31278 | Results.push_back(Success); |
| 31279 | Results.push_back(EFLAGS.getValue(1)); |
| 31280 | return; |
| 31281 | } |
| 31282 | case ISD::ATOMIC_LOAD: { |
| 31283 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
| 31284 | bool NoImplicitFloatOps = |
| 31285 | DAG.getMachineFunction().getFunction().hasFnAttribute( |
| 31286 | Attribute::NoImplicitFloat); |
| 31287 | if (!Subtarget.useSoftFloat() && !NoImplicitFloatOps) { |
| 31288 | auto *Node = cast<AtomicSDNode>(N); |
| 31289 | if (Subtarget.hasSSE1()) { |
| 31290 | |
| 31291 | |
| 31292 | MVT LdVT = Subtarget.hasSSE2() ? MVT::v2i64 : MVT::v4f32; |
| 31293 | SDVTList Tys = DAG.getVTList(LdVT, MVT::Other); |
| 31294 | SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; |
| 31295 | SDValue Ld = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, |
| 31296 | MVT::i64, Node->getMemOperand()); |
| 31297 | if (Subtarget.hasSSE2()) { |
| 31298 | SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64, Ld, |
| 31299 | DAG.getIntPtrConstant(0, dl)); |
| 31300 | Results.push_back(Res); |
| 31301 | Results.push_back(Ld.getValue(1)); |
| 31302 | return; |
| 31303 | } |
| 31304 | |
| 31305 | |
| 31306 | |
| 31307 | SDValue Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Ld, |
| 31308 | DAG.getIntPtrConstant(0, dl)); |
| 31309 | Res = DAG.getBitcast(MVT::i64, Res); |
| 31310 | Results.push_back(Res); |
| 31311 | Results.push_back(Ld.getValue(1)); |
| 31312 | return; |
| 31313 | } |
| 31314 | if (Subtarget.hasX87()) { |
| 31315 | |
| 31316 | |
| 31317 | SDVTList Tys = DAG.getVTList(MVT::f80, MVT::Other); |
| 31318 | SDValue Ops[] = { Node->getChain(), Node->getBasePtr() }; |
| 31319 | SDValue Result = DAG.getMemIntrinsicNode(X86ISD::FILD, |
| 31320 | dl, Tys, Ops, MVT::i64, |
| 31321 | Node->getMemOperand()); |
| 31322 | SDValue Chain = Result.getValue(1); |
| 31323 | |
| 31324 | |
| 31325 | |
| 31326 | |
| 31327 | |
| 31328 | SDValue StackPtr = DAG.CreateStackTemporary(MVT::i64); |
| 31329 | int SPFI = cast<FrameIndexSDNode>(StackPtr.getNode())->getIndex(); |
| 31330 | MachinePointerInfo MPI = |
| 31331 | MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), SPFI); |
| 31332 | SDValue StoreOps[] = { Chain, Result, StackPtr }; |
| 31333 | Chain = DAG.getMemIntrinsicNode( |
| 31334 | X86ISD::FIST, dl, DAG.getVTList(MVT::Other), StoreOps, MVT::i64, |
| 31335 | MPI, None , MachineMemOperand::MOStore); |
| 31336 | |
| 31337 | |
| 31338 | |
| 31339 | |
| 31340 | Result = DAG.getLoad(MVT::i64, dl, Chain, StackPtr, MPI); |
| 31341 | Results.push_back(Result); |
| 31342 | Results.push_back(Result.getValue(1)); |
| 31343 | return; |
| 31344 | } |
| 31345 | } |
| 31346 | |
| 31347 | |
| 31348 | |
| 31349 | break; |
| 31350 | } |
| 31351 | case ISD::ATOMIC_SWAP: |
| 31352 | case ISD::ATOMIC_LOAD_ADD: |
| 31353 | case ISD::ATOMIC_LOAD_SUB: |
| 31354 | case ISD::ATOMIC_LOAD_AND: |
| 31355 | case ISD::ATOMIC_LOAD_OR: |
| 31356 | case ISD::ATOMIC_LOAD_XOR: |
| 31357 | case ISD::ATOMIC_LOAD_NAND: |
| 31358 | case ISD::ATOMIC_LOAD_MIN: |
| 31359 | case ISD::ATOMIC_LOAD_MAX: |
| 31360 | case ISD::ATOMIC_LOAD_UMIN: |
| 31361 | case ISD::ATOMIC_LOAD_UMAX: |
| 31362 | |
| 31363 | |
| 31364 | break; |
| 31365 | |
| 31366 | case ISD::BITCAST: { |
| 31367 | assert(Subtarget.hasSSE2() && "Requires at least SSE2!"); |
| 31368 | EVT DstVT = N->getValueType(0); |
| 31369 | EVT SrcVT = N->getOperand(0).getValueType(); |
| 31370 | |
| 31371 | |
| 31372 | |
| 31373 | if (SrcVT == MVT::v64i1 && DstVT == MVT::i64 && Subtarget.hasBWI()) { |
| 31374 | assert(!Subtarget.is64Bit() && "Expected 32-bit mode"); |
| 31375 | SDValue Lo, Hi; |
| 31376 | std::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); |
| 31377 | Lo = DAG.getBitcast(MVT::i32, Lo); |
| 31378 | Hi = DAG.getBitcast(MVT::i32, Hi); |
| 31379 | SDValue Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi); |
| 31380 | Results.push_back(Res); |
| 31381 | return; |
| 31382 | } |
| 31383 | |
| 31384 | if (DstVT.isVector() && SrcVT == MVT::x86mmx) { |
| 31385 | |
| 31386 | assert(Subtarget.hasSSE2() && "Requires SSE2"); |
| 31387 | assert(getTypeAction(*DAG.getContext(), DstVT) == TypeWidenVector && |
| 31388 | "Unexpected type action!"); |
| 31389 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), DstVT); |
| 31390 | SDValue Res = DAG.getNode(X86ISD::MOVQ2DQ, dl, MVT::v2i64, |
| 31391 | N->getOperand(0)); |
| 31392 | Res = DAG.getBitcast(WideVT, Res); |
| 31393 | Results.push_back(Res); |
| 31394 | return; |
| 31395 | } |
| 31396 | |
| 31397 | return; |
| 31398 | } |
| 31399 | case ISD::MGATHER: { |
| 31400 | EVT VT = N->getValueType(0); |
| 31401 | if ((VT == MVT::v2f32 || VT == MVT::v2i32) && |
| 31402 | (Subtarget.hasVLX() || !Subtarget.hasAVX512())) { |
| 31403 | auto *Gather = cast<MaskedGatherSDNode>(N); |
| 31404 | SDValue Index = Gather->getIndex(); |
| 31405 | if (Index.getValueType() != MVT::v2i64) |
| 31406 | return; |
| 31407 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 31408 | "Unexpected type action!"); |
| 31409 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); |
| 31410 | SDValue Mask = Gather->getMask(); |
| 31411 | assert(Mask.getValueType() == MVT::v2i1 && "Unexpected mask type"); |
| 31412 | SDValue PassThru = DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, |
| 31413 | Gather->getPassThru(), |
| 31414 | DAG.getUNDEF(VT)); |
| 31415 | if (!Subtarget.hasVLX()) { |
| 31416 | |
| 31417 | |
| 31418 | Mask = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4i1, Mask, |
| 31419 | DAG.getUNDEF(MVT::v2i1)); |
| 31420 | Mask = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i32, Mask); |
| 31421 | } |
| 31422 | SDValue Ops[] = { Gather->getChain(), PassThru, Mask, |
| 31423 | Gather->getBasePtr(), Index, Gather->getScale() }; |
| 31424 | SDValue Res = DAG.getMemIntrinsicNode( |
| 31425 | X86ISD::MGATHER, dl, DAG.getVTList(WideVT, MVT::Other), Ops, |
| 31426 | Gather->getMemoryVT(), Gather->getMemOperand()); |
| 31427 | Results.push_back(Res); |
| 31428 | Results.push_back(Res.getValue(1)); |
| 31429 | return; |
| 31430 | } |
| 31431 | return; |
| 31432 | } |
| 31433 | case ISD::LOAD: { |
| 31434 | |
| 31435 | |
| 31436 | |
| 31437 | MVT VT = N->getSimpleValueType(0); |
| 31438 | assert(VT.isVector() && VT.getSizeInBits() == 64 && "Unexpected VT"); |
| 31439 | assert(getTypeAction(*DAG.getContext(), VT) == TypeWidenVector && |
| 31440 | "Unexpected type action!"); |
| 31441 | if (!ISD::isNON_EXTLoad(N)) |
| 31442 | return; |
| 31443 | auto *Ld = cast<LoadSDNode>(N); |
| 31444 | if (Subtarget.hasSSE2()) { |
| 31445 | MVT LdVT = Subtarget.is64Bit() && VT.isInteger() ? MVT::i64 : MVT::f64; |
| 31446 | SDValue Res = DAG.getLoad(LdVT, dl, Ld->getChain(), Ld->getBasePtr(), |
| 31447 | Ld->getPointerInfo(), Ld->getOriginalAlign(), |
| 31448 | Ld->getMemOperand()->getFlags()); |
| 31449 | SDValue Chain = Res.getValue(1); |
| 31450 | MVT VecVT = MVT::getVectorVT(LdVT, 2); |
| 31451 | Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VecVT, Res); |
| 31452 | EVT WideVT = getTypeToTransformTo(*DAG.getContext(), VT); |
| 31453 | Res = DAG.getBitcast(WideVT, Res); |
| 31454 | Results.push_back(Res); |
| 31455 | Results.push_back(Chain); |
| 31456 | return; |
| 31457 | } |
| 31458 | assert(Subtarget.hasSSE1() && "Expected SSE"); |
| 31459 | SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other); |
| 31460 | SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; |
| 31461 | SDValue Res = DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, dl, Tys, Ops, |
| 31462 | MVT::i64, Ld->getMemOperand()); |
| 31463 | Results.push_back(Res); |
| 31464 | Results.push_back(Res.getValue(1)); |
| 31465 | return; |
| 31466 | } |
| 31467 | case ISD::ADDRSPACECAST: { |
| 31468 | SDValue V = LowerADDRSPACECAST(SDValue(N,0), DAG); |
| 31469 | Results.push_back(V); |
| 31470 | return; |
| 31471 | } |
| 31472 | case ISD::BITREVERSE: |
| 31473 | assert(N->getValueType(0) == MVT::i64 && "Unexpected VT!"); |
| 31474 | assert(Subtarget.hasXOP() && "Expected XOP"); |
| 31475 | |
| 31476 | |
| 31477 | Results.push_back(LowerBITREVERSE(SDValue(N, 0), Subtarget, DAG)); |
| 31478 | return; |
| 31479 | } |
| 31480 | } |
| 31481 | |
| 31482 | const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const { |
| 31483 | switch ((X86ISD::NodeType)Opcode) { |
| 31484 | case X86ISD::FIRST_NUMBER: break; |
| 31485 | #define NODE_NAME_CASE(NODE) case X86ISD::NODE: return "X86ISD::" #NODE; |
| 31486 | NODE_NAME_CASE(BSF) |
| 31487 | NODE_NAME_CASE(BSR) |
| 31488 | NODE_NAME_CASE(FSHL) |
| 31489 | NODE_NAME_CASE(FSHR) |
| 31490 | NODE_NAME_CASE(FAND) |
| 31491 | NODE_NAME_CASE(FANDN) |
| 31492 | NODE_NAME_CASE(FOR) |
| 31493 | NODE_NAME_CASE(FXOR) |
| 31494 | NODE_NAME_CASE(FILD) |
| 31495 | NODE_NAME_CASE(FIST) |
| 31496 | NODE_NAME_CASE(FP_TO_INT_IN_MEM) |
| 31497 | NODE_NAME_CASE(FLD) |
| 31498 | NODE_NAME_CASE(FST) |
| 31499 | NODE_NAME_CASE(CALL) |
| 31500 | NODE_NAME_CASE(CALL_RVMARKER) |
| 31501 | NODE_NAME_CASE(BT) |
| 31502 | NODE_NAME_CASE(CMP) |
| 31503 | NODE_NAME_CASE(FCMP) |
| 31504 | NODE_NAME_CASE(STRICT_FCMP) |
| 31505 | NODE_NAME_CASE(STRICT_FCMPS) |
| 31506 | NODE_NAME_CASE(COMI) |
| 31507 | NODE_NAME_CASE(UCOMI) |
| 31508 | NODE_NAME_CASE(CMPM) |
| 31509 | NODE_NAME_CASE(CMPMM) |
| 31510 | NODE_NAME_CASE(STRICT_CMPM) |
| 31511 | NODE_NAME_CASE(CMPMM_SAE) |
| 31512 | NODE_NAME_CASE(SETCC) |
| 31513 | NODE_NAME_CASE(SETCC_CARRY) |
| 31514 | NODE_NAME_CASE(FSETCC) |
| 31515 | NODE_NAME_CASE(FSETCCM) |
| 31516 | NODE_NAME_CASE(FSETCCM_SAE) |
| 31517 | NODE_NAME_CASE(CMOV) |
| 31518 | NODE_NAME_CASE(BRCOND) |
| 31519 | NODE_NAME_CASE(RET_FLAG) |
| 31520 | NODE_NAME_CASE(IRET) |
| 31521 | NODE_NAME_CASE(REP_STOS) |
| 31522 | NODE_NAME_CASE(REP_MOVS) |
| 31523 | NODE_NAME_CASE(GlobalBaseReg) |
| 31524 | NODE_NAME_CASE(Wrapper) |
| 31525 | NODE_NAME_CASE(WrapperRIP) |
| 31526 | NODE_NAME_CASE(MOVQ2DQ) |
| 31527 | NODE_NAME_CASE(MOVDQ2Q) |
| 31528 | NODE_NAME_CASE(MMX_MOVD2W) |
| 31529 | NODE_NAME_CASE(MMX_MOVW2D) |
| 31530 | NODE_NAME_CASE(PEXTRB) |
| 31531 | NODE_NAME_CASE(PEXTRW) |
| 31532 | NODE_NAME_CASE(INSERTPS) |
| 31533 | NODE_NAME_CASE(PINSRB) |
| 31534 | NODE_NAME_CASE(PINSRW) |
| 31535 | NODE_NAME_CASE(PSHUFB) |
| 31536 | NODE_NAME_CASE(ANDNP) |
| 31537 | NODE_NAME_CASE(BLENDI) |
| 31538 | NODE_NAME_CASE(BLENDV) |
| 31539 | NODE_NAME_CASE(HADD) |
| 31540 | NODE_NAME_CASE(HSUB) |
| 31541 | NODE_NAME_CASE(FHADD) |
| 31542 | NODE_NAME_CASE(FHSUB) |
| 31543 | NODE_NAME_CASE(CONFLICT) |
| 31544 | NODE_NAME_CASE(FMAX) |
| 31545 | NODE_NAME_CASE(FMAXS) |
| 31546 | NODE_NAME_CASE(FMAX_SAE) |
| 31547 | NODE_NAME_CASE(FMAXS_SAE) |
| 31548 | NODE_NAME_CASE(FMIN) |
| 31549 | NODE_NAME_CASE(FMINS) |
| 31550 | NODE_NAME_CASE(FMIN_SAE) |
| 31551 | NODE_NAME_CASE(FMINS_SAE) |
| 31552 | NODE_NAME_CASE(FMAXC) |
| 31553 | NODE_NAME_CASE(FMINC) |
| 31554 | NODE_NAME_CASE(FRSQRT) |
| 31555 | NODE_NAME_CASE(FRCP) |
| 31556 | NODE_NAME_CASE(EXTRQI) |
| 31557 | NODE_NAME_CASE(INSERTQI) |
| 31558 | NODE_NAME_CASE(TLSADDR) |
| 31559 | NODE_NAME_CASE(TLSBASEADDR) |
| 31560 | NODE_NAME_CASE(TLSCALL) |
| 31561 | NODE_NAME_CASE(EH_SJLJ_SETJMP) |
| 31562 | NODE_NAME_CASE(EH_SJLJ_LONGJMP) |
| 31563 | NODE_NAME_CASE(EH_SJLJ_SETUP_DISPATCH) |
| 31564 | NODE_NAME_CASE(EH_RETURN) |
| 31565 | NODE_NAME_CASE(TC_RETURN) |
| 31566 | NODE_NAME_CASE(FNSTCW16m) |
| 31567 | NODE_NAME_CASE(FLDCW16m) |
| 31568 | NODE_NAME_CASE(LCMPXCHG_DAG) |
| 31569 | NODE_NAME_CASE(LCMPXCHG8_DAG) |
| 31570 | NODE_NAME_CASE(LCMPXCHG16_DAG) |
| 31571 | NODE_NAME_CASE(LCMPXCHG16_SAVE_RBX_DAG) |
| 31572 | NODE_NAME_CASE(LADD) |
| 31573 | NODE_NAME_CASE(LSUB) |
| 31574 | NODE_NAME_CASE(LOR) |
| 31575 | NODE_NAME_CASE(LXOR) |
| 31576 | NODE_NAME_CASE(LAND) |
| 31577 | NODE_NAME_CASE(VZEXT_MOVL) |
| 31578 | NODE_NAME_CASE(VZEXT_LOAD) |
| 31579 | NODE_NAME_CASE(VEXTRACT_STORE) |
| 31580 | NODE_NAME_CASE(VTRUNC) |
| 31581 | NODE_NAME_CASE(VTRUNCS) |
| 31582 | NODE_NAME_CASE(VTRUNCUS) |
| 31583 | NODE_NAME_CASE(VMTRUNC) |
| 31584 | NODE_NAME_CASE(VMTRUNCS) |
| 31585 | NODE_NAME_CASE(VMTRUNCUS) |
| 31586 | NODE_NAME_CASE(VTRUNCSTORES) |
| 31587 | NODE_NAME_CASE(VTRUNCSTOREUS) |
| 31588 | NODE_NAME_CASE(VMTRUNCSTORES) |
| 31589 | NODE_NAME_CASE(VMTRUNCSTOREUS) |
| 31590 | NODE_NAME_CASE(VFPEXT) |
| 31591 | NODE_NAME_CASE(STRICT_VFPEXT) |
| 31592 | NODE_NAME_CASE(VFPEXT_SAE) |
| 31593 | NODE_NAME_CASE(VFPEXTS) |
| 31594 | NODE_NAME_CASE(VFPEXTS_SAE) |
| 31595 | NODE_NAME_CASE(VFPROUND) |
| 31596 | NODE_NAME_CASE(STRICT_VFPROUND) |
| 31597 | NODE_NAME_CASE(VMFPROUND) |
| 31598 | NODE_NAME_CASE(VFPROUND_RND) |
| 31599 | NODE_NAME_CASE(VFPROUNDS) |
| 31600 | NODE_NAME_CASE(VFPROUNDS_RND) |
| 31601 | NODE_NAME_CASE(VSHLDQ) |
| 31602 | NODE_NAME_CASE(VSRLDQ) |
| 31603 | NODE_NAME_CASE(VSHL) |
| 31604 | NODE_NAME_CASE(VSRL) |
| 31605 | NODE_NAME_CASE(VSRA) |
| 31606 | NODE_NAME_CASE(VSHLI) |
| 31607 | NODE_NAME_CASE(VSRLI) |
| 31608 | NODE_NAME_CASE(VSRAI) |
| 31609 | NODE_NAME_CASE(VSHLV) |
| 31610 | NODE_NAME_CASE(VSRLV) |
| 31611 | NODE_NAME_CASE(VSRAV) |
| 31612 | NODE_NAME_CASE(VROTLI) |
| 31613 | NODE_NAME_CASE(VROTRI) |
| 31614 | NODE_NAME_CASE(VPPERM) |
| 31615 | NODE_NAME_CASE(CMPP) |
| 31616 | NODE_NAME_CASE(STRICT_CMPP) |
| 31617 | NODE_NAME_CASE(PCMPEQ) |
| 31618 | NODE_NAME_CASE(PCMPGT) |
| 31619 | NODE_NAME_CASE(PHMINPOS) |
| 31620 | NODE_NAME_CASE(ADD) |
| 31621 | NODE_NAME_CASE(SUB) |
| 31622 | NODE_NAME_CASE(ADC) |
| 31623 | NODE_NAME_CASE(SBB) |
| 31624 | NODE_NAME_CASE(SMUL) |
| 31625 | NODE_NAME_CASE(UMUL) |
| 31626 | NODE_NAME_CASE(OR) |
| 31627 | NODE_NAME_CASE(XOR) |
| 31628 | NODE_NAME_CASE(AND) |
| 31629 | NODE_NAME_CASE(BEXTR) |
| 31630 | NODE_NAME_CASE(BEXTRI) |
| 31631 | NODE_NAME_CASE(BZHI) |
| 31632 | NODE_NAME_CASE(PDEP) |
| 31633 | NODE_NAME_CASE(PEXT) |
| 31634 | NODE_NAME_CASE(MUL_IMM) |
| 31635 | NODE_NAME_CASE(MOVMSK) |
| 31636 | NODE_NAME_CASE(PTEST) |
| 31637 | NODE_NAME_CASE(TESTP) |
| 31638 | NODE_NAME_CASE(KORTEST) |
| 31639 | NODE_NAME_CASE(KTEST) |
| 31640 | NODE_NAME_CASE(KADD) |
| 31641 | NODE_NAME_CASE(KSHIFTL) |
| 31642 | NODE_NAME_CASE(KSHIFTR) |
| 31643 | NODE_NAME_CASE(PACKSS) |
| 31644 | NODE_NAME_CASE(PACKUS) |
| 31645 | NODE_NAME_CASE(PALIGNR) |
| 31646 | NODE_NAME_CASE(VALIGN) |
| 31647 | NODE_NAME_CASE(VSHLD) |
| 31648 | NODE_NAME_CASE(VSHRD) |
| 31649 | NODE_NAME_CASE(VSHLDV) |
| 31650 | NODE_NAME_CASE(VSHRDV) |
| 31651 | NODE_NAME_CASE(PSHUFD) |
| 31652 | NODE_NAME_CASE(PSHUFHW) |
| 31653 | NODE_NAME_CASE(PSHUFLW) |
| 31654 | NODE_NAME_CASE(SHUFP) |
| 31655 | NODE_NAME_CASE(SHUF128) |
| 31656 | NODE_NAME_CASE(MOVLHPS) |
| 31657 | NODE_NAME_CASE(MOVHLPS) |
| 31658 | NODE_NAME_CASE(MOVDDUP) |
| 31659 | NODE_NAME_CASE(MOVSHDUP) |
| 31660 | NODE_NAME_CASE(MOVSLDUP) |
| 31661 | NODE_NAME_CASE(MOVSD) |
| 31662 | NODE_NAME_CASE(MOVSS) |
| 31663 | NODE_NAME_CASE(UNPCKL) |
| 31664 | NODE_NAME_CASE(UNPCKH) |
| 31665 | NODE_NAME_CASE(VBROADCAST) |
| 31666 | NODE_NAME_CASE(VBROADCAST_LOAD) |
| 31667 | NODE_NAME_CASE(VBROADCASTM) |
| 31668 | NODE_NAME_CASE(SUBV_BROADCAST_LOAD) |
| 31669 | NODE_NAME_CASE(VPERMILPV) |
| 31670 | NODE_NAME_CASE(VPERMILPI) |
| 31671 | NODE_NAME_CASE(VPERM2X128) |
| 31672 | NODE_NAME_CASE(VPERMV) |
| 31673 | NODE_NAME_CASE(VPERMV3) |
| 31674 | NODE_NAME_CASE(VPERMI) |
| 31675 | NODE_NAME_CASE(VPTERNLOG) |
| 31676 | NODE_NAME_CASE(VFIXUPIMM) |
| 31677 | NODE_NAME_CASE(VFIXUPIMM_SAE) |
| 31678 | NODE_NAME_CASE(VFIXUPIMMS) |
| 31679 | NODE_NAME_CASE(VFIXUPIMMS_SAE) |
| 31680 | NODE_NAME_CASE(VRANGE) |
| 31681 | NODE_NAME_CASE(VRANGE_SAE) |
| 31682 | NODE_NAME_CASE(VRANGES) |
| 31683 | NODE_NAME_CASE(VRANGES_SAE) |
| 31684 | NODE_NAME_CASE(PMULUDQ) |
| 31685 | NODE_NAME_CASE(PMULDQ) |
| 31686 | NODE_NAME_CASE(PSADBW) |
| 31687 | NODE_NAME_CASE(DBPSADBW) |
| 31688 | NODE_NAME_CASE(VASTART_SAVE_XMM_REGS) |
| 31689 | NODE_NAME_CASE(VAARG_64) |
| 31690 | NODE_NAME_CASE(VAARG_X32) |
| 31691 | NODE_NAME_CASE(WIN_ALLOCA) |
| 31692 | NODE_NAME_CASE(MEMBARRIER) |
| 31693 | NODE_NAME_CASE(MFENCE) |
| 31694 | NODE_NAME_CASE(SEG_ALLOCA) |
| 31695 | NODE_NAME_CASE(PROBED_ALLOCA) |
| 31696 | NODE_NAME_CASE(RDRAND) |
| 31697 | NODE_NAME_CASE(RDSEED) |
| 31698 | NODE_NAME_CASE(RDPKRU) |
| 31699 | NODE_NAME_CASE(WRPKRU) |
| 31700 | NODE_NAME_CASE(VPMADDUBSW) |
| 31701 | NODE_NAME_CASE(VPMADDWD) |
| 31702 | NODE_NAME_CASE(VPSHA) |
| 31703 | NODE_NAME_CASE(VPSHL) |
| 31704 | NODE_NAME_CASE(VPCOM) |
| 31705 | NODE_NAME_CASE(VPCOMU) |
| 31706 | NODE_NAME_CASE(VPERMIL2) |
| 31707 | NODE_NAME_CASE(FMSUB) |
| 31708 | NODE_NAME_CASE(STRICT_FMSUB) |
| 31709 | NODE_NAME_CASE(FNMADD) |
| 31710 | NODE_NAME_CASE(STRICT_FNMADD) |
| 31711 | NODE_NAME_CASE(FNMSUB) |
| 31712 | NODE_NAME_CASE(STRICT_FNMSUB) |
| 31713 | NODE_NAME_CASE(FMADDSUB) |
| 31714 | NODE_NAME_CASE(FMSUBADD) |
| 31715 | NODE_NAME_CASE(FMADD_RND) |
| 31716 | NODE_NAME_CASE(FNMADD_RND) |
| 31717 | NODE_NAME_CASE(FMSUB_RND) |
| 31718 | NODE_NAME_CASE(FNMSUB_RND) |
| 31719 | NODE_NAME_CASE(FMADDSUB_RND) |
| 31720 | NODE_NAME_CASE(FMSUBADD_RND) |
| 31721 | NODE_NAME_CASE(VPMADD52H) |
| 31722 | NODE_NAME_CASE(VPMADD52L) |
| 31723 | NODE_NAME_CASE(VRNDSCALE) |
| 31724 | NODE_NAME_CASE(STRICT_VRNDSCALE) |
| 31725 | NODE_NAME_CASE(VRNDSCALE_SAE) |
| 31726 | NODE_NAME_CASE(VRNDSCALES) |
| 31727 | NODE_NAME_CASE(VRNDSCALES_SAE) |
| 31728 | NODE_NAME_CASE(VREDUCE) |
| 31729 | NODE_NAME_CASE(VREDUCE_SAE) |
| 31730 | NODE_NAME_CASE(VREDUCES) |
| 31731 | NODE_NAME_CASE(VREDUCES_SAE) |
| 31732 | NODE_NAME_CASE(VGETMANT) |
| 31733 | NODE_NAME_CASE(VGETMANT_SAE) |
| 31734 | NODE_NAME_CASE(VGETMANTS) |
| 31735 | NODE_NAME_CASE(VGETMANTS_SAE) |
| 31736 | NODE_NAME_CASE(PCMPESTR) |
| 31737 | NODE_NAME_CASE(PCMPISTR) |
| 31738 | NODE_NAME_CASE(XTEST) |
| 31739 | NODE_NAME_CASE(COMPRESS) |
| 31740 | NODE_NAME_CASE(EXPAND) |
| 31741 | NODE_NAME_CASE(SELECTS) |
| 31742 | NODE_NAME_CASE(ADDSUB) |
| 31743 | NODE_NAME_CASE(RCP14) |
| 31744 | NODE_NAME_CASE(RCP14S) |
| 31745 | NODE_NAME_CASE(RCP28) |
| 31746 | NODE_NAME_CASE(RCP28_SAE) |
| 31747 | NODE_NAME_CASE(RCP28S) |
| 31748 | NODE_NAME_CASE(RCP28S_SAE) |
| 31749 | NODE_NAME_CASE(EXP2) |
| 31750 | NODE_NAME_CASE(EXP2_SAE) |
| 31751 | NODE_NAME_CASE(RSQRT14) |
| 31752 | NODE_NAME_CASE(RSQRT14S) |
| 31753 | NODE_NAME_CASE(RSQRT28) |
| 31754 | NODE_NAME_CASE(RSQRT28_SAE) |
| 31755 | NODE_NAME_CASE(RSQRT28S) |
| 31756 | NODE_NAME_CASE(RSQRT28S_SAE) |
| 31757 | NODE_NAME_CASE(FADD_RND) |
| 31758 | NODE_NAME_CASE(FADDS) |
| 31759 | NODE_NAME_CASE(FADDS_RND) |
| 31760 | NODE_NAME_CASE(FSUB_RND) |
| 31761 | NODE_NAME_CASE(FSUBS) |
| 31762 | NODE_NAME_CASE(FSUBS_RND) |
| 31763 | NODE_NAME_CASE(FMUL_RND) |
| 31764 | NODE_NAME_CASE(FMULS) |
| 31765 | NODE_NAME_CASE(FMULS_RND) |
| 31766 | NODE_NAME_CASE(FDIV_RND) |
| 31767 | NODE_NAME_CASE(FDIVS) |
| 31768 | NODE_NAME_CASE(FDIVS_RND) |
| 31769 | NODE_NAME_CASE(FSQRT_RND) |
| 31770 | NODE_NAME_CASE(FSQRTS) |
| 31771 | NODE_NAME_CASE(FSQRTS_RND) |
| 31772 | NODE_NAME_CASE(FGETEXP) |
| 31773 | NODE_NAME_CASE(FGETEXP_SAE) |
| 31774 | NODE_NAME_CASE(FGETEXPS) |
| 31775 | NODE_NAME_CASE(FGETEXPS_SAE) |
| 31776 | NODE_NAME_CASE(SCALEF) |
| 31777 | NODE_NAME_CASE(SCALEF_RND) |
| 31778 | NODE_NAME_CASE(SCALEFS) |
| 31779 | NODE_NAME_CASE(SCALEFS_RND) |
| 31780 | NODE_NAME_CASE(AVG) |
| 31781 | NODE_NAME_CASE(MULHRS) |
| 31782 | NODE_NAME_CASE(SINT_TO_FP_RND) |
| 31783 | NODE_NAME_CASE(UINT_TO_FP_RND) |
| 31784 | NODE_NAME_CASE(CVTTP2SI) |
| 31785 | NODE_NAME_CASE(CVTTP2UI) |
| 31786 | NODE_NAME_CASE(STRICT_CVTTP2SI) |
| 31787 | NODE_NAME_CASE(STRICT_CVTTP2UI) |
| 31788 | NODE_NAME_CASE(MCVTTP2SI) |
| 31789 | NODE_NAME_CASE(MCVTTP2UI) |
| 31790 | NODE_NAME_CASE(CVTTP2SI_SAE) |
| 31791 | NODE_NAME_CASE(CVTTP2UI_SAE) |
| 31792 | NODE_NAME_CASE(CVTTS2SI) |
| 31793 | NODE_NAME_CASE(CVTTS2UI) |
| 31794 | NODE_NAME_CASE(CVTTS2SI_SAE) |
| 31795 | NODE_NAME_CASE(CVTTS2UI_SAE) |
| 31796 | NODE_NAME_CASE(CVTSI2P) |
| 31797 | NODE_NAME_CASE(CVTUI2P) |
| 31798 | NODE_NAME_CASE(STRICT_CVTSI2P) |
| 31799 | NODE_NAME_CASE(STRICT_CVTUI2P) |
| 31800 | NODE_NAME_CASE(MCVTSI2P) |
| 31801 | NODE_NAME_CASE(MCVTUI2P) |
| 31802 | NODE_NAME_CASE(VFPCLASS) |
| 31803 | NODE_NAME_CASE(VFPCLASSS) |
| 31804 | NODE_NAME_CASE(MULTISHIFT) |
| 31805 | NODE_NAME_CASE(SCALAR_SINT_TO_FP) |
| 31806 | NODE_NAME_CASE(SCALAR_SINT_TO_FP_RND) |
| 31807 | NODE_NAME_CASE(SCALAR_UINT_TO_FP) |
| 31808 | NODE_NAME_CASE(SCALAR_UINT_TO_FP_RND) |
| 31809 | NODE_NAME_CASE(CVTPS2PH) |
| 31810 | NODE_NAME_CASE(STRICT_CVTPS2PH) |
| 31811 | NODE_NAME_CASE(MCVTPS2PH) |
| 31812 | NODE_NAME_CASE(CVTPH2PS) |
| 31813 | NODE_NAME_CASE(STRICT_CVTPH2PS) |
| 31814 | NODE_NAME_CASE(CVTPH2PS_SAE) |
| 31815 | NODE_NAME_CASE(CVTP2SI) |
| 31816 | NODE_NAME_CASE(CVTP2UI) |
| 31817 | NODE_NAME_CASE(MCVTP2SI) |
| 31818 | NODE_NAME_CASE(MCVTP2UI) |
| 31819 | NODE_NAME_CASE(CVTP2SI_RND) |
| 31820 | NODE_NAME_CASE(CVTP2UI_RND) |
| 31821 | NODE_NAME_CASE(CVTS2SI) |
| 31822 | NODE_NAME_CASE(CVTS2UI) |
| 31823 | NODE_NAME_CASE(CVTS2SI_RND) |
| 31824 | NODE_NAME_CASE(CVTS2UI_RND) |
| 31825 | NODE_NAME_CASE(CVTNE2PS2BF16) |
| 31826 | NODE_NAME_CASE(CVTNEPS2BF16) |
| 31827 | NODE_NAME_CASE(MCVTNEPS2BF16) |
| 31828 | NODE_NAME_CASE(DPBF16PS) |
| 31829 | NODE_NAME_CASE(LWPINS) |
| 31830 | NODE_NAME_CASE(MGATHER) |
| 31831 | NODE_NAME_CASE(MSCATTER) |
| 31832 | NODE_NAME_CASE(VPDPBUSD) |
| 31833 | NODE_NAME_CASE(VPDPBUSDS) |
| 31834 | NODE_NAME_CASE(VPDPWSSD) |
| 31835 | NODE_NAME_CASE(VPDPWSSDS) |
| 31836 | NODE_NAME_CASE(VPSHUFBITQMB) |
| 31837 | NODE_NAME_CASE(GF2P8MULB) |
| 31838 | NODE_NAME_CASE(GF2P8AFFINEQB) |
| 31839 | NODE_NAME_CASE(GF2P8AFFINEINVQB) |
| 31840 | NODE_NAME_CASE(NT_CALL) |
| 31841 | NODE_NAME_CASE(NT_BRIND) |
| 31842 | NODE_NAME_CASE(UMWAIT) |
| 31843 | NODE_NAME_CASE(TPAUSE) |
| 31844 | NODE_NAME_CASE(ENQCMD) |
| 31845 | NODE_NAME_CASE(ENQCMDS) |
| 31846 | NODE_NAME_CASE(VP2INTERSECT) |
| 31847 | NODE_NAME_CASE(AESENC128KL) |
| 31848 | NODE_NAME_CASE(AESDEC128KL) |
| 31849 | NODE_NAME_CASE(AESENC256KL) |
| 31850 | NODE_NAME_CASE(AESDEC256KL) |
| 31851 | NODE_NAME_CASE(AESENCWIDE128KL) |
| 31852 | NODE_NAME_CASE(AESDECWIDE128KL) |
| 31853 | NODE_NAME_CASE(AESENCWIDE256KL) |
| 31854 | NODE_NAME_CASE(AESDECWIDE256KL) |
| 31855 | NODE_NAME_CASE(TESTUI) |
| 31856 | } |
| 31857 | return nullptr; |
| 31858 | #undef NODE_NAME_CASE |
| 31859 | } |
| 31860 | |
| 31861 | |
| 31862 | |
| 31863 | bool X86TargetLowering::isLegalAddressingMode(const DataLayout &DL, |
| 31864 | const AddrMode &AM, Type *Ty, |
| 31865 | unsigned AS, |
| 31866 | Instruction *I) const { |
| 31867 | |
| 31868 | CodeModel::Model M = getTargetMachine().getCodeModel(); |
| 31869 | |
| 31870 | |
| 31871 | if (!X86::isOffsetSuitableForCodeModel(AM.BaseOffs, M, AM.BaseGV != nullptr)) |
| 31872 | return false; |
| 31873 | |
| 31874 | if (AM.BaseGV) { |
| 31875 | unsigned GVFlags = Subtarget.classifyGlobalReference(AM.BaseGV); |
| 31876 | |
| 31877 | |
| 31878 | if (isGlobalStubReference(GVFlags)) |
| 31879 | return false; |
| 31880 | |
| 31881 | |
| 31882 | |
| 31883 | if (AM.HasBaseReg && isGlobalRelativeToPICBase(GVFlags)) |
| 31884 | return false; |
| 31885 | |
| 31886 | |
| 31887 | if ((M != CodeModel::Small || isPositionIndependent()) && |
| 31888 | Subtarget.is64Bit() && (AM.BaseOffs || AM.Scale > 1)) |
| 31889 | return false; |
| 31890 | } |
| 31891 | |
| 31892 | switch (AM.Scale) { |
| 31893 | case 0: |
| 31894 | case 1: |
| 31895 | case 2: |
| 31896 | case 4: |
| 31897 | case 8: |
| 31898 | |
| 31899 | break; |
| 31900 | case 3: |
| 31901 | case 5: |
| 31902 | case 9: |
| 31903 | |
| 31904 | |
| 31905 | if (AM.HasBaseReg) |
| 31906 | return false; |
| 31907 | break; |
| 31908 | default: |
| 31909 | return false; |
| 31910 | } |
| 31911 | |
| 31912 | return true; |
| 31913 | } |
| 31914 | |
| 31915 | bool X86TargetLowering::isVectorShiftByScalarCheap(Type *Ty) const { |
| 31916 | unsigned Bits = Ty->getScalarSizeInBits(); |
| 31917 | |
| 31918 | |
| 31919 | |
| 31920 | if (Bits == 8) |
| 31921 | return false; |
| 31922 | |
| 31923 | |
| 31924 | |
| 31925 | if (Subtarget.hasXOP() && |
| 31926 | (Bits == 8 || Bits == 16 || Bits == 32 || Bits == 64)) |
| 31927 | return false; |
| 31928 | |
| 31929 | |
| 31930 | |
| 31931 | if (Subtarget.hasAVX2() && (Bits == 32 || Bits == 64)) |
| 31932 | return false; |
| 31933 | |
| 31934 | |
| 31935 | if (Subtarget.hasBWI() && Bits == 16) |
| 31936 | return false; |
| 31937 | |
| 31938 | |
| 31939 | |
| 31940 | return true; |
| 31941 | } |
| 31942 | |
| 31943 | bool X86TargetLowering::isBinOp(unsigned Opcode) const { |
| 31944 | switch (Opcode) { |
| 31945 | |
| 31946 | |
| 31947 | case X86ISD::ANDNP: |
| 31948 | case X86ISD::PCMPGT: |
| 31949 | case X86ISD::FMAX: |
| 31950 | case X86ISD::FMIN: |
| 31951 | case X86ISD::FANDN: |
| 31952 | return true; |
| 31953 | } |
| 31954 | |
| 31955 | return TargetLoweringBase::isBinOp(Opcode); |
| 31956 | } |
| 31957 | |
| 31958 | bool X86TargetLowering::isCommutativeBinOp(unsigned Opcode) const { |
| 31959 | switch (Opcode) { |
| 31960 | |
| 31961 | case X86ISD::PCMPEQ: |
| 31962 | case X86ISD::PMULDQ: |
| 31963 | case X86ISD::PMULUDQ: |
| 31964 | case X86ISD::FMAXC: |
| 31965 | case X86ISD::FMINC: |
| 31966 | case X86ISD::FAND: |
| 31967 | case X86ISD::FOR: |
| 31968 | case X86ISD::FXOR: |
| 31969 | return true; |
| 31970 | } |
| 31971 | |
| 31972 | return TargetLoweringBase::isCommutativeBinOp(Opcode); |
| 31973 | } |
| 31974 | |
| 31975 | bool X86TargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { |
| 31976 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
| 31977 | return false; |
| 31978 | unsigned NumBits1 = Ty1->getPrimitiveSizeInBits(); |
| 31979 | unsigned NumBits2 = Ty2->getPrimitiveSizeInBits(); |
| 31980 | return NumBits1 > NumBits2; |
| 31981 | } |
| 31982 | |
| 31983 | bool X86TargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { |
| 31984 | if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) |
| 31985 | return false; |
| 31986 | |
| 31987 | if (!isTypeLegal(EVT::getEVT(Ty1))) |
| 31988 | return false; |
| 31989 | |
| 31990 | assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); |
| 31991 | |
| 31992 | |
| 31993 | |
| 31994 | return true; |
| 31995 | } |
| 31996 | |
| 31997 | bool X86TargetLowering::isLegalICmpImmediate(int64_t Imm) const { |
| 31998 | return isInt<32>(Imm); |
| 31999 | } |
| 32000 | |
| 32001 | bool X86TargetLowering::isLegalAddImmediate(int64_t Imm) const { |
| 32002 | |
| 32003 | return isInt<32>(Imm); |
| 32004 | } |
| 32005 | |
| 32006 | bool X86TargetLowering::isLegalStoreImmediate(int64_t Imm) const { |
| 32007 | return isInt<32>(Imm); |
| 32008 | } |
| 32009 | |
| 32010 | bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { |
| 32011 | if (!VT1.isScalarInteger() || !VT2.isScalarInteger()) |
| 32012 | return false; |
| 32013 | unsigned NumBits1 = VT1.getSizeInBits(); |
| 32014 | unsigned NumBits2 = VT2.getSizeInBits(); |
| 32015 | return NumBits1 > NumBits2; |
| 32016 | } |
| 32017 | |
| 32018 | bool X86TargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const { |
| 32019 | |
| 32020 | return Ty1->isIntegerTy(32) && Ty2->isIntegerTy(64) && Subtarget.is64Bit(); |
| 32021 | } |
| 32022 | |
| 32023 | bool X86TargetLowering::isZExtFree(EVT VT1, EVT VT2) const { |
| 32024 | |
| 32025 | return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget.is64Bit(); |
| 32026 | } |
| 32027 | |
| 32028 | bool X86TargetLowering::isZExtFree(SDValue Val, EVT VT2) const { |
| 32029 | EVT VT1 = Val.getValueType(); |
| 32030 | if (isZExtFree(VT1, VT2)) |
| 32031 | return true; |
| 32032 | |
| 32033 | if (Val.getOpcode() != ISD::LOAD) |
| 32034 | return false; |
| 32035 | |
| 32036 | if (!VT1.isSimple() || !VT1.isInteger() || |
| 32037 | !VT2.isSimple() || !VT2.isInteger()) |
| 32038 | return false; |
| 32039 | |
| 32040 | switch (VT1.getSimpleVT().SimpleTy) { |
| 32041 | default: break; |
| 32042 | case MVT::i8: |
| 32043 | case MVT::i16: |
| 32044 | case MVT::i32: |
| 32045 | |
| 32046 | return true; |
| 32047 | } |
| 32048 | |
| 32049 | return false; |
| 32050 | } |
| 32051 | |
| 32052 | bool X86TargetLowering::shouldSinkOperands(Instruction *I, |
| 32053 | SmallVectorImpl<Use *> &Ops) const { |
| 32054 | |
| 32055 | |
| 32056 | |
| 32057 | int ShiftAmountOpNum = -1; |
| 32058 | if (I->isShift()) |
| 32059 | ShiftAmountOpNum = 1; |
| 32060 | else if (auto *II = dyn_cast<IntrinsicInst>(I)) { |
| 32061 | if (II->getIntrinsicID() == Intrinsic::fshl || |
| 32062 | II->getIntrinsicID() == Intrinsic::fshr) |
| 32063 | ShiftAmountOpNum = 2; |
| 32064 | } |
| 32065 | |
| 32066 | if (ShiftAmountOpNum == -1) |
| 32067 | return false; |
| 32068 | |
| 32069 | auto *Shuf = dyn_cast<ShuffleVectorInst>(I->getOperand(ShiftAmountOpNum)); |
| 32070 | if (Shuf && getSplatIndex(Shuf->getShuffleMask()) >= 0 && |
| 32071 | isVectorShiftByScalarCheap(I->getType())) { |
| 32072 | Ops.push_back(&I->getOperandUse(ShiftAmountOpNum)); |
| 32073 | return true; |
| 32074 | } |
| 32075 | |
| 32076 | return false; |
| 32077 | } |
| 32078 | |
| 32079 | bool X86TargetLowering::shouldConvertPhiType(Type *From, Type *To) const { |
| 32080 | if (!Subtarget.is64Bit()) |
| 32081 | return false; |
| 32082 | return TargetLowering::shouldConvertPhiType(From, To); |
| 32083 | } |
| 32084 | |
| 32085 | bool X86TargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const { |
| 32086 | if (isa<MaskedLoadSDNode>(ExtVal.getOperand(0))) |
| 32087 | return false; |
| 32088 | |
| 32089 | EVT SrcVT = ExtVal.getOperand(0).getValueType(); |
| 32090 | |
| 32091 | |
| 32092 | if (SrcVT.getScalarType() == MVT::i1) |
| 32093 | return false; |
| 32094 | |
| 32095 | return true; |
| 32096 | } |
| 32097 | |
| 32098 | bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, |
| 32099 | EVT VT) const { |
| 32100 | if (!Subtarget.hasAnyFMA()) |
| 32101 | return false; |
| 32102 | |
| 32103 | VT = VT.getScalarType(); |
| 32104 | |
| 32105 | if (!VT.isSimple()) |
| 32106 | return false; |
| 32107 | |
| 32108 | switch (VT.getSimpleVT().SimpleTy) { |
| 32109 | case MVT::f32: |
| 32110 | case MVT::f64: |
| 32111 | return true; |
| 32112 | default: |
| 32113 | break; |
| 32114 | } |
| 32115 | |
| 32116 | return false; |
| 32117 | } |
| 32118 | |
| 32119 | bool X86TargetLowering::isNarrowingProfitable(EVT VT1, EVT VT2) const { |
| 32120 | |
| 32121 | return !(VT1 == MVT::i32 && VT2 == MVT::i16); |
| 32122 | } |
| 32123 | |
| 32124 | |
| 32125 | |
| 32126 | |
| 32127 | |
| 32128 | bool X86TargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask, EVT VT) const { |
| 32129 | if (!VT.isSimple()) |
| 32130 | return false; |
| 32131 | |
| 32132 | |
| 32133 | if (VT.getSimpleVT().getScalarType() == MVT::i1) |
| 32134 | return false; |
| 32135 | |
| 32136 | |
| 32137 | if (VT.getSimpleVT().getSizeInBits() == 64) |
| 32138 | return false; |
| 32139 | |
| 32140 | |
| 32141 | |
| 32142 | return isTypeLegal(VT.getSimpleVT()); |
| 32143 | } |
| 32144 | |
| 32145 | bool X86TargetLowering::isVectorClearMaskLegal(ArrayRef<int> Mask, |
| 32146 | EVT VT) const { |
| 32147 | |
| 32148 | |
| 32149 | if (!Subtarget.hasAVX2()) |
| 32150 | if (VT == MVT::v32i8 || VT == MVT::v16i16) |
| 32151 | return false; |
| 32152 | |
| 32153 | |
| 32154 | return isShuffleMaskLegal(Mask, VT); |
| 32155 | } |
| 32156 | |
| 32157 | bool X86TargetLowering::areJTsAllowed(const Function *Fn) const { |
| 32158 | |
| 32159 | if (Subtarget.useIndirectThunkBranches()) |
| 32160 | return false; |
| 32161 | |
| 32162 | |
| 32163 | return TargetLowering::areJTsAllowed(Fn); |
| 32164 | } |
| 32165 | |
| 32166 | |
| 32167 | |
| 32168 | |
| 32169 | |
| 32170 | |
| 32171 | |
| 32172 | static bool isEFLAGSLiveAfter(MachineBasicBlock::iterator Itr, |
| 32173 | MachineBasicBlock *BB) { |
| 32174 | |
| 32175 | for (MachineBasicBlock::iterator miI = std::next(Itr), miE = BB->end(); |
| 32176 | miI != miE; ++miI) { |
| 32177 | const MachineInstr& mi = *miI; |
| 32178 | if (mi.readsRegister(X86::EFLAGS)) |
| 32179 | return true; |
| 32180 | |
| 32181 | if (mi.definesRegister(X86::EFLAGS)) |
| 32182 | return false; |
| 32183 | } |
| 32184 | |
| 32185 | |
| 32186 | |
| 32187 | for (MachineBasicBlock::succ_iterator sItr = BB->succ_begin(), |
| 32188 | sEnd = BB->succ_end(); |
| 32189 | sItr != sEnd; ++sItr) { |
| 32190 | MachineBasicBlock* succ = *sItr; |
| 32191 | if (succ->isLiveIn(X86::EFLAGS)) |
| 32192 | return true; |
| 32193 | } |
| 32194 | |
| 32195 | return false; |
| 32196 | } |
| 32197 | |
| 32198 | |
| 32199 | static MachineBasicBlock *emitXBegin(MachineInstr &MI, MachineBasicBlock *MBB, |
| 32200 | const TargetInstrInfo *TII) { |
| 32201 | const DebugLoc &DL = MI.getDebugLoc(); |
| 32202 | |
| 32203 | const BasicBlock *BB = MBB->getBasicBlock(); |
| 32204 | MachineFunction::iterator I = ++MBB->getIterator(); |
| 32205 | |
| 32206 | |
| 32207 | |
| 32208 | |
| 32209 | |
| 32210 | |
| 32211 | |
| 32212 | |
| 32213 | |
| 32214 | |
| 32215 | |
| 32216 | |
| 32217 | |
| 32218 | |
| 32219 | |
| 32220 | |
| 32221 | MachineBasicBlock *thisMBB = MBB; |
| 32222 | MachineFunction *MF = MBB->getParent(); |
| 32223 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
| 32224 | MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); |
| 32225 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
| 32226 | MF->insert(I, mainMBB); |
| 32227 | MF->insert(I, fallMBB); |
| 32228 | MF->insert(I, sinkMBB); |
| 32229 | |
| 32230 | if (isEFLAGSLiveAfter(MI, MBB)) { |
| 32231 | mainMBB->addLiveIn(X86::EFLAGS); |
| 32232 | fallMBB->addLiveIn(X86::EFLAGS); |
| 32233 | sinkMBB->addLiveIn(X86::EFLAGS); |
| 32234 | } |
| 32235 | |
| 32236 | |
| 32237 | sinkMBB->splice(sinkMBB->begin(), MBB, |
| 32238 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
| 32239 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
| 32240 | |
| 32241 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 32242 | Register DstReg = MI.getOperand(0).getReg(); |
| 32243 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
| 32244 | Register mainDstReg = MRI.createVirtualRegister(RC); |
| 32245 | Register fallDstReg = MRI.createVirtualRegister(RC); |
| 32246 | |
| 32247 | |
| 32248 | |
| 32249 | |
| 32250 | |
| 32251 | BuildMI(thisMBB, DL, TII->get(X86::XBEGIN_4)).addMBB(fallMBB); |
| 32252 | thisMBB->addSuccessor(mainMBB); |
| 32253 | thisMBB->addSuccessor(fallMBB); |
| 32254 | |
| 32255 | |
| 32256 | |
| 32257 | BuildMI(mainMBB, DL, TII->get(X86::MOV32ri), mainDstReg).addImm(-1); |
| 32258 | BuildMI(mainMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); |
| 32259 | mainMBB->addSuccessor(sinkMBB); |
| 32260 | |
| 32261 | |
| 32262 | |
| 32263 | |
| 32264 | |
| 32265 | BuildMI(fallMBB, DL, TII->get(X86::XABORT_DEF)); |
| 32266 | BuildMI(fallMBB, DL, TII->get(TargetOpcode::COPY), fallDstReg) |
| 32267 | .addReg(X86::EAX); |
| 32268 | fallMBB->addSuccessor(sinkMBB); |
| 32269 | |
| 32270 | |
| 32271 | |
| 32272 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, TII->get(X86::PHI), DstReg) |
| 32273 | .addReg(mainDstReg).addMBB(mainMBB) |
| 32274 | .addReg(fallDstReg).addMBB(fallMBB); |
| 32275 | |
| 32276 | MI.eraseFromParent(); |
| 32277 | return sinkMBB; |
| 32278 | } |
| 32279 | |
| 32280 | MachineBasicBlock * |
| 32281 | X86TargetLowering::EmitVAARGWithCustomInserter(MachineInstr &MI, |
| 32282 | MachineBasicBlock *MBB) const { |
| 32283 | |
| 32284 | |
| 32285 | |
| 32286 | |
| 32287 | |
| 32288 | |
| 32289 | |
| 32290 | |
| 32291 | |
| 32292 | |
| 32293 | assert(MI.getNumOperands() == 10 && "VAARG should have 10 operands!"); |
| 32294 | static_assert(X86::AddrNumOperands == 5, "VAARG assumes 5 address operands"); |
| 32295 | |
| 32296 | Register DestReg = MI.getOperand(0).getReg(); |
| 32297 | MachineOperand &Base = MI.getOperand(1); |
| 32298 | MachineOperand &Scale = MI.getOperand(2); |
| 32299 | MachineOperand &Index = MI.getOperand(3); |
| 32300 | MachineOperand &Disp = MI.getOperand(4); |
| 32301 | MachineOperand &Segment = MI.getOperand(5); |
| 32302 | unsigned ArgSize = MI.getOperand(6).getImm(); |
| 32303 | unsigned ArgMode = MI.getOperand(7).getImm(); |
| 32304 | Align Alignment = Align(MI.getOperand(8).getImm()); |
| 32305 | |
| 32306 | MachineFunction *MF = MBB->getParent(); |
| 32307 | |
| 32308 | |
| 32309 | assert(MI.hasOneMemOperand() && "Expected VAARG to have one memoperand"); |
| 32310 | |
| 32311 | MachineMemOperand *OldMMO = MI.memoperands().front(); |
| 32312 | |
| 32313 | |
| 32314 | MachineMemOperand *LoadOnlyMMO = MF->getMachineMemOperand( |
| 32315 | OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOStore); |
| 32316 | MachineMemOperand *StoreOnlyMMO = MF->getMachineMemOperand( |
| 32317 | OldMMO, OldMMO->getFlags() & ~MachineMemOperand::MOLoad); |
| 32318 | |
| 32319 | |
| 32320 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 32321 | MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); |
| 32322 | const TargetRegisterClass *AddrRegClass = |
| 32323 | getRegClassFor(getPointerTy(MBB->getParent()->getDataLayout())); |
| 32324 | const TargetRegisterClass *OffsetRegClass = getRegClassFor(MVT::i32); |
| 32325 | const DebugLoc &DL = MI.getDebugLoc(); |
| 32326 | |
| 32327 | |
| 32328 | |
| 32329 | |
| 32330 | |
| 32331 | |
| 32332 | |
| 32333 | |
| 32334 | |
| 32335 | |
| 32336 | unsigned TotalNumIntRegs = 6; |
| 32337 | unsigned TotalNumXMMRegs = 8; |
| 32338 | bool UseGPOffset = (ArgMode == 1); |
| 32339 | bool UseFPOffset = (ArgMode == 2); |
| 32340 | unsigned MaxOffset = TotalNumIntRegs * 8 + |
| 32341 | (UseFPOffset ? TotalNumXMMRegs * 16 : 0); |
| 32342 | |
| 32343 | |
| 32344 | unsigned ArgSizeA8 = (ArgSize + 7) & ~7; |
| 32345 | bool NeedsAlign = (Alignment > 8); |
| 32346 | |
| 32347 | MachineBasicBlock *thisMBB = MBB; |
| 32348 | MachineBasicBlock *overflowMBB; |
| 32349 | MachineBasicBlock *offsetMBB; |
| 32350 | MachineBasicBlock *endMBB; |
| 32351 | |
| 32352 | unsigned OffsetDestReg = 0; |
| 32353 | unsigned OverflowDestReg = 0; |
| 32354 | unsigned OffsetReg = 0; |
| 32355 | |
| 32356 | if (!UseGPOffset && !UseFPOffset) { |
| 32357 | |
| 32358 | |
| 32359 | OffsetDestReg = 0; |
| 32360 | OverflowDestReg = DestReg; |
| 32361 | |
| 32362 | offsetMBB = nullptr; |
| 32363 | overflowMBB = thisMBB; |
| 32364 | endMBB = thisMBB; |
| 32365 | } else { |
| 32366 | |
| 32367 | |
| 32368 | |
| 32369 | |
| 32370 | |
| 32371 | |
| 32372 | |
| 32373 | |
| 32374 | |
| 32375 | |
| 32376 | |
| 32377 | |
| 32378 | |
| 32379 | OffsetDestReg = MRI.createVirtualRegister(AddrRegClass); |
| 32380 | OverflowDestReg = MRI.createVirtualRegister(AddrRegClass); |
| 32381 | |
| 32382 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
| 32383 | overflowMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 32384 | offsetMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 32385 | endMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 32386 | |
| 32387 | MachineFunction::iterator MBBIter = ++MBB->getIterator(); |
| 32388 | |
| 32389 | |
| 32390 | MF->insert(MBBIter, offsetMBB); |
| 32391 | MF->insert(MBBIter, overflowMBB); |
| 32392 | MF->insert(MBBIter, endMBB); |
| 32393 | |
| 32394 | |
| 32395 | endMBB->splice(endMBB->begin(), thisMBB, |
| 32396 | std::next(MachineBasicBlock::iterator(MI)), thisMBB->end()); |
| 32397 | endMBB->transferSuccessorsAndUpdatePHIs(thisMBB); |
| 32398 | |
| 32399 | |
| 32400 | thisMBB->addSuccessor(offsetMBB); |
| 32401 | thisMBB->addSuccessor(overflowMBB); |
| 32402 | |
| 32403 | |
| 32404 | offsetMBB->addSuccessor(endMBB); |
| 32405 | overflowMBB->addSuccessor(endMBB); |
| 32406 | |
| 32407 | |
| 32408 | OffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
| 32409 | BuildMI(thisMBB, DL, TII->get(X86::MOV32rm), OffsetReg) |
| 32410 | .add(Base) |
| 32411 | .add(Scale) |
| 32412 | .add(Index) |
| 32413 | .addDisp(Disp, UseFPOffset ? 4 : 0) |
| 32414 | .add(Segment) |
| 32415 | .setMemRefs(LoadOnlyMMO); |
| 32416 | |
| 32417 | |
| 32418 | BuildMI(thisMBB, DL, TII->get(X86::CMP32ri)) |
| 32419 | .addReg(OffsetReg) |
| 32420 | .addImm(MaxOffset + 8 - ArgSizeA8); |
| 32421 | |
| 32422 | |
| 32423 | |
| 32424 | BuildMI(thisMBB, DL, TII->get(X86::JCC_1)) |
| 32425 | .addMBB(overflowMBB).addImm(X86::COND_AE); |
| 32426 | } |
| 32427 | |
| 32428 | |
| 32429 | if (offsetMBB) { |
| 32430 | assert(OffsetReg != 0); |
| 32431 | |
| 32432 | |
| 32433 | Register RegSaveReg = MRI.createVirtualRegister(AddrRegClass); |
| 32434 | BuildMI( |
| 32435 | offsetMBB, DL, |
| 32436 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), |
| 32437 | RegSaveReg) |
| 32438 | .add(Base) |
| 32439 | .add(Scale) |
| 32440 | .add(Index) |
| 32441 | .addDisp(Disp, Subtarget.isTarget64BitLP64() ? 16 : 12) |
| 32442 | .add(Segment) |
| 32443 | .setMemRefs(LoadOnlyMMO); |
| 32444 | |
| 32445 | if (Subtarget.isTarget64BitLP64()) { |
| 32446 | |
| 32447 | Register OffsetReg64 = MRI.createVirtualRegister(AddrRegClass); |
| 32448 | BuildMI(offsetMBB, DL, TII->get(X86::SUBREG_TO_REG), OffsetReg64) |
| 32449 | .addImm(0) |
| 32450 | .addReg(OffsetReg) |
| 32451 | .addImm(X86::sub_32bit); |
| 32452 | |
| 32453 | |
| 32454 | BuildMI(offsetMBB, DL, TII->get(X86::ADD64rr), OffsetDestReg) |
| 32455 | .addReg(OffsetReg64) |
| 32456 | .addReg(RegSaveReg); |
| 32457 | } else { |
| 32458 | |
| 32459 | BuildMI(offsetMBB, DL, TII->get(X86::ADD32rr), OffsetDestReg) |
| 32460 | .addReg(OffsetReg) |
| 32461 | .addReg(RegSaveReg); |
| 32462 | } |
| 32463 | |
| 32464 | |
| 32465 | Register NextOffsetReg = MRI.createVirtualRegister(OffsetRegClass); |
| 32466 | BuildMI(offsetMBB, DL, TII->get(X86::ADD32ri), NextOffsetReg) |
| 32467 | .addReg(OffsetReg) |
| 32468 | .addImm(UseFPOffset ? 16 : 8); |
| 32469 | |
| 32470 | |
| 32471 | BuildMI(offsetMBB, DL, TII->get(X86::MOV32mr)) |
| 32472 | .add(Base) |
| 32473 | .add(Scale) |
| 32474 | .add(Index) |
| 32475 | .addDisp(Disp, UseFPOffset ? 4 : 0) |
| 32476 | .add(Segment) |
| 32477 | .addReg(NextOffsetReg) |
| 32478 | .setMemRefs(StoreOnlyMMO); |
| 32479 | |
| 32480 | |
| 32481 | BuildMI(offsetMBB, DL, TII->get(X86::JMP_1)) |
| 32482 | .addMBB(endMBB); |
| 32483 | } |
| 32484 | |
| 32485 | |
| 32486 | |
| 32487 | |
| 32488 | |
| 32489 | |
| 32490 | Register OverflowAddrReg = MRI.createVirtualRegister(AddrRegClass); |
| 32491 | BuildMI(overflowMBB, DL, |
| 32492 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64rm : X86::MOV32rm), |
| 32493 | OverflowAddrReg) |
| 32494 | .add(Base) |
| 32495 | .add(Scale) |
| 32496 | .add(Index) |
| 32497 | .addDisp(Disp, 8) |
| 32498 | .add(Segment) |
| 32499 | .setMemRefs(LoadOnlyMMO); |
| 32500 | |
| 32501 | |
| 32502 | |
| 32503 | if (NeedsAlign) { |
| 32504 | |
| 32505 | Register TmpReg = MRI.createVirtualRegister(AddrRegClass); |
| 32506 | |
| 32507 | |
| 32508 | BuildMI( |
| 32509 | overflowMBB, DL, |
| 32510 | TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), |
| 32511 | TmpReg) |
| 32512 | .addReg(OverflowAddrReg) |
| 32513 | .addImm(Alignment.value() - 1); |
| 32514 | |
| 32515 | BuildMI( |
| 32516 | overflowMBB, DL, |
| 32517 | TII->get(Subtarget.isTarget64BitLP64() ? X86::AND64ri32 : X86::AND32ri), |
| 32518 | OverflowDestReg) |
| 32519 | .addReg(TmpReg) |
| 32520 | .addImm(~(uint64_t)(Alignment.value() - 1)); |
| 32521 | } else { |
| 32522 | BuildMI(overflowMBB, DL, TII->get(TargetOpcode::COPY), OverflowDestReg) |
| 32523 | .addReg(OverflowAddrReg); |
| 32524 | } |
| 32525 | |
| 32526 | |
| 32527 | |
| 32528 | Register NextAddrReg = MRI.createVirtualRegister(AddrRegClass); |
| 32529 | BuildMI( |
| 32530 | overflowMBB, DL, |
| 32531 | TII->get(Subtarget.isTarget64BitLP64() ? X86::ADD64ri32 : X86::ADD32ri), |
| 32532 | NextAddrReg) |
| 32533 | .addReg(OverflowDestReg) |
| 32534 | .addImm(ArgSizeA8); |
| 32535 | |
| 32536 | |
| 32537 | BuildMI(overflowMBB, DL, |
| 32538 | TII->get(Subtarget.isTarget64BitLP64() ? X86::MOV64mr : X86::MOV32mr)) |
| 32539 | .add(Base) |
| 32540 | .add(Scale) |
| 32541 | .add(Index) |
| 32542 | .addDisp(Disp, 8) |
| 32543 | .add(Segment) |
| 32544 | .addReg(NextAddrReg) |
| 32545 | .setMemRefs(StoreOnlyMMO); |
| 32546 | |
| 32547 | |
| 32548 | if (offsetMBB) { |
| 32549 | BuildMI(*endMBB, endMBB->begin(), DL, |
| 32550 | TII->get(X86::PHI), DestReg) |
| 32551 | .addReg(OffsetDestReg).addMBB(offsetMBB) |
| 32552 | .addReg(OverflowDestReg).addMBB(overflowMBB); |
| 32553 | } |
| 32554 | |
| 32555 | |
| 32556 | MI.eraseFromParent(); |
| 32557 | |
| 32558 | return endMBB; |
| 32559 | } |
| 32560 | |
| 32561 | |
| 32562 | |
| 32563 | |
| 32564 | |
| 32565 | |
| 32566 | static bool checkAndUpdateEFLAGSKill(MachineBasicBlock::iterator SelectItr, |
| 32567 | MachineBasicBlock* BB, |
| 32568 | const TargetRegisterInfo* TRI) { |
| 32569 | if (isEFLAGSLiveAfter(SelectItr, BB)) |
| 32570 | return false; |
| 32571 | |
| 32572 | |
| 32573 | |
| 32574 | SelectItr->addRegisterKilled(X86::EFLAGS, TRI); |
| 32575 | return true; |
| 32576 | } |
| 32577 | |
| 32578 | |
| 32579 | |
| 32580 | |
| 32581 | static bool isCMOVPseudo(MachineInstr &MI) { |
| 32582 | switch (MI.getOpcode()) { |
| 32583 | case X86::CMOV_FR32: |
| 32584 | case X86::CMOV_FR32X: |
| 32585 | case X86::CMOV_FR64: |
| 32586 | case X86::CMOV_FR64X: |
| 32587 | case X86::CMOV_GR8: |
| 32588 | case X86::CMOV_GR16: |
| 32589 | case X86::CMOV_GR32: |
| 32590 | case X86::CMOV_RFP32: |
| 32591 | case X86::CMOV_RFP64: |
| 32592 | case X86::CMOV_RFP80: |
| 32593 | case X86::CMOV_VR64: |
| 32594 | case X86::CMOV_VR128: |
| 32595 | case X86::CMOV_VR128X: |
| 32596 | case X86::CMOV_VR256: |
| 32597 | case X86::CMOV_VR256X: |
| 32598 | case X86::CMOV_VR512: |
| 32599 | case X86::CMOV_VK1: |
| 32600 | case X86::CMOV_VK2: |
| 32601 | case X86::CMOV_VK4: |
| 32602 | case X86::CMOV_VK8: |
| 32603 | case X86::CMOV_VK16: |
| 32604 | case X86::CMOV_VK32: |
| 32605 | case X86::CMOV_VK64: |
| 32606 | return true; |
| 32607 | |
| 32608 | default: |
| 32609 | return false; |
| 32610 | } |
| 32611 | } |
| 32612 | |
| 32613 | |
| 32614 | |
| 32615 | |
| 32616 | |
| 32617 | |
| 32618 | static MachineInstrBuilder createPHIsForCMOVsInSinkBB( |
| 32619 | MachineBasicBlock::iterator MIItBegin, MachineBasicBlock::iterator MIItEnd, |
| 32620 | MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, |
| 32621 | MachineBasicBlock *SinkMBB) { |
| 32622 | MachineFunction *MF = TrueMBB->getParent(); |
| 32623 | const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo(); |
| 32624 | const DebugLoc &DL = MIItBegin->getDebugLoc(); |
| 32625 | |
| 32626 | X86::CondCode CC = X86::CondCode(MIItBegin->getOperand(3).getImm()); |
| 32627 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
| 32628 | |
| 32629 | MachineBasicBlock::iterator SinkInsertionPoint = SinkMBB->begin(); |
| 32630 | |
| 32631 | |
| 32632 | |
| 32633 | |
| 32634 | |
| 32635 | |
| 32636 | |
| 32637 | DenseMap<unsigned, std::pair<unsigned, unsigned>> RegRewriteTable; |
| 32638 | MachineInstrBuilder MIB; |
| 32639 | |
| 32640 | for (MachineBasicBlock::iterator MIIt = MIItBegin; MIIt != MIItEnd; ++MIIt) { |
| 32641 | Register DestReg = MIIt->getOperand(0).getReg(); |
| 32642 | Register Op1Reg = MIIt->getOperand(1).getReg(); |
| 32643 | Register Op2Reg = MIIt->getOperand(2).getReg(); |
| 32644 | |
| 32645 | |
| 32646 | |
| 32647 | |
| 32648 | if (MIIt->getOperand(3).getImm() == OppCC) |
| 32649 | std::swap(Op1Reg, Op2Reg); |
| 32650 | |
| 32651 | if (RegRewriteTable.find(Op1Reg) != RegRewriteTable.end()) |
| 32652 | Op1Reg = RegRewriteTable[Op1Reg].first; |
| 32653 | |
| 32654 | if (RegRewriteTable.find(Op2Reg) != RegRewriteTable.end()) |
| 32655 | Op2Reg = RegRewriteTable[Op2Reg].second; |
| 32656 | |
| 32657 | MIB = BuildMI(*SinkMBB, SinkInsertionPoint, DL, TII->get(X86::PHI), DestReg) |
| 32658 | .addReg(Op1Reg) |
| 32659 | .addMBB(FalseMBB) |
| 32660 | .addReg(Op2Reg) |
| 32661 | .addMBB(TrueMBB); |
| 32662 | |
| 32663 | |
| 32664 | RegRewriteTable[DestReg] = std::make_pair(Op1Reg, Op2Reg); |
| 32665 | } |
| 32666 | |
| 32667 | return MIB; |
| 32668 | } |
| 32669 | |
| 32670 | |
| 32671 | MachineBasicBlock * |
| 32672 | X86TargetLowering::EmitLoweredCascadedSelect(MachineInstr &FirstCMOV, |
| 32673 | MachineInstr &SecondCascadedCMOV, |
| 32674 | MachineBasicBlock *ThisMBB) const { |
| 32675 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 32676 | const DebugLoc &DL = FirstCMOV.getDebugLoc(); |
| 32677 | |
| 32678 | |
| 32679 | |
| 32680 | |
| 32681 | |
| 32682 | |
| 32683 | |
| 32684 | |
| 32685 | |
| 32686 | |
| 32687 | |
| 32688 | |
| 32689 | |
| 32690 | |
| 32691 | |
| 32692 | |
| 32693 | |
| 32694 | |
| 32695 | |
| 32696 | |
| 32697 | |
| 32698 | |
| 32699 | |
| 32700 | |
| 32701 | |
| 32702 | |
| 32703 | |
| 32704 | |
| 32705 | |
| 32706 | |
| 32707 | |
| 32708 | |
| 32709 | |
| 32710 | |
| 32711 | |
| 32712 | |
| 32713 | |
| 32714 | |
| 32715 | |
| 32716 | |
| 32717 | |
| 32718 | |
| 32719 | |
| 32720 | |
| 32721 | |
| 32722 | |
| 32723 | |
| 32724 | |
| 32725 | |
| 32726 | |
| 32727 | |
| 32728 | |
| 32729 | |
| 32730 | |
| 32731 | |
| 32732 | |
| 32733 | |
| 32734 | |
| 32735 | |
| 32736 | |
| 32737 | |
| 32738 | |
| 32739 | |
| 32740 | |
| 32741 | |
| 32742 | |
| 32743 | |
| 32744 | |
| 32745 | |
| 32746 | |
| 32747 | |
| 32748 | |
| 32749 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
| 32750 | MachineFunction *F = ThisMBB->getParent(); |
| 32751 | MachineBasicBlock *FirstInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); |
| 32752 | MachineBasicBlock *SecondInsertedMBB = F->CreateMachineBasicBlock(LLVM_BB); |
| 32753 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
| 32754 | |
| 32755 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
| 32756 | F->insert(It, FirstInsertedMBB); |
| 32757 | F->insert(It, SecondInsertedMBB); |
| 32758 | F->insert(It, SinkMBB); |
| 32759 | |
| 32760 | |
| 32761 | |
| 32762 | |
| 32763 | FirstInsertedMBB->addLiveIn(X86::EFLAGS); |
| 32764 | |
| 32765 | |
| 32766 | |
| 32767 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 32768 | if (!SecondCascadedCMOV.killsRegister(X86::EFLAGS) && |
| 32769 | !checkAndUpdateEFLAGSKill(SecondCascadedCMOV, ThisMBB, TRI)) { |
| 32770 | SecondInsertedMBB->addLiveIn(X86::EFLAGS); |
| 32771 | SinkMBB->addLiveIn(X86::EFLAGS); |
| 32772 | } |
| 32773 | |
| 32774 | |
| 32775 | SinkMBB->splice(SinkMBB->begin(), ThisMBB, |
| 32776 | std::next(MachineBasicBlock::iterator(FirstCMOV)), |
| 32777 | ThisMBB->end()); |
| 32778 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
| 32779 | |
| 32780 | |
| 32781 | ThisMBB->addSuccessor(FirstInsertedMBB); |
| 32782 | |
| 32783 | ThisMBB->addSuccessor(SinkMBB); |
| 32784 | |
| 32785 | FirstInsertedMBB->addSuccessor(SecondInsertedMBB); |
| 32786 | |
| 32787 | FirstInsertedMBB->addSuccessor(SinkMBB); |
| 32788 | |
| 32789 | SecondInsertedMBB->addSuccessor(SinkMBB); |
| 32790 | |
| 32791 | |
| 32792 | X86::CondCode FirstCC = X86::CondCode(FirstCMOV.getOperand(3).getImm()); |
| 32793 | BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(FirstCC); |
| 32794 | |
| 32795 | X86::CondCode SecondCC = |
| 32796 | X86::CondCode(SecondCascadedCMOV.getOperand(3).getImm()); |
| 32797 | BuildMI(FirstInsertedMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(SecondCC); |
| 32798 | |
| 32799 | |
| 32800 | |
| 32801 | Register DestReg = FirstCMOV.getOperand(0).getReg(); |
| 32802 | Register Op1Reg = FirstCMOV.getOperand(1).getReg(); |
| 32803 | Register Op2Reg = FirstCMOV.getOperand(2).getReg(); |
| 32804 | MachineInstrBuilder MIB = |
| 32805 | BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(X86::PHI), DestReg) |
| 32806 | .addReg(Op1Reg) |
| 32807 | .addMBB(SecondInsertedMBB) |
| 32808 | .addReg(Op2Reg) |
| 32809 | .addMBB(ThisMBB); |
| 32810 | |
| 32811 | |
| 32812 | |
| 32813 | MIB.addReg(FirstCMOV.getOperand(2).getReg()).addMBB(FirstInsertedMBB); |
| 32814 | |
| 32815 | BuildMI(*SinkMBB, std::next(MachineBasicBlock::iterator(MIB.getInstr())), DL, |
| 32816 | TII->get(TargetOpcode::COPY), |
| 32817 | SecondCascadedCMOV.getOperand(0).getReg()) |
| 32818 | .addReg(FirstCMOV.getOperand(0).getReg()); |
| 32819 | |
| 32820 | |
| 32821 | FirstCMOV.eraseFromParent(); |
| 32822 | SecondCascadedCMOV.eraseFromParent(); |
| 32823 | |
| 32824 | return SinkMBB; |
| 32825 | } |
| 32826 | |
| 32827 | MachineBasicBlock * |
| 32828 | X86TargetLowering::EmitLoweredSelect(MachineInstr &MI, |
| 32829 | MachineBasicBlock *ThisMBB) const { |
| 32830 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 32831 | const DebugLoc &DL = MI.getDebugLoc(); |
| 32832 | |
| 32833 | |
| 32834 | |
| 32835 | |
| 32836 | |
| 32837 | |
| 32838 | |
| 32839 | |
| 32840 | |
| 32841 | |
| 32842 | |
| 32843 | |
| 32844 | |
| 32845 | |
| 32846 | |
| 32847 | |
| 32848 | |
| 32849 | |
| 32850 | |
| 32851 | |
| 32852 | |
| 32853 | |
| 32854 | |
| 32855 | |
| 32856 | |
| 32857 | |
| 32858 | |
| 32859 | |
| 32860 | |
| 32861 | |
| 32862 | |
| 32863 | |
| 32864 | |
| 32865 | |
| 32866 | |
| 32867 | |
| 32868 | |
| 32869 | |
| 32870 | |
| 32871 | |
| 32872 | |
| 32873 | |
| 32874 | |
| 32875 | |
| 32876 | |
| 32877 | |
| 32878 | |
| 32879 | |
| 32880 | X86::CondCode CC = X86::CondCode(MI.getOperand(3).getImm()); |
| 32881 | X86::CondCode OppCC = X86::GetOppositeBranchCondition(CC); |
| 32882 | MachineInstr *LastCMOV = &MI; |
| 32883 | MachineBasicBlock::iterator NextMIIt = MachineBasicBlock::iterator(MI); |
| 32884 | |
| 32885 | |
| 32886 | |
| 32887 | |
| 32888 | |
| 32889 | if (isCMOVPseudo(MI)) { |
| 32890 | |
| 32891 | |
| 32892 | while (NextMIIt != ThisMBB->end() && isCMOVPseudo(*NextMIIt) && |
| 32893 | (NextMIIt->getOperand(3).getImm() == CC || |
| 32894 | NextMIIt->getOperand(3).getImm() == OppCC)) { |
| 32895 | LastCMOV = &*NextMIIt; |
| 32896 | NextMIIt = next_nodbg(NextMIIt, ThisMBB->end()); |
| 32897 | } |
| 32898 | } |
| 32899 | |
| 32900 | |
| 32901 | |
| 32902 | if (LastCMOV == &MI && NextMIIt != ThisMBB->end() && |
| 32903 | NextMIIt->getOpcode() == MI.getOpcode() && |
| 32904 | NextMIIt->getOperand(2).getReg() == MI.getOperand(2).getReg() && |
| 32905 | NextMIIt->getOperand(1).getReg() == MI.getOperand(0).getReg() && |
| 32906 | NextMIIt->getOperand(1).isKill()) { |
| 32907 | return EmitLoweredCascadedSelect(MI, *NextMIIt, ThisMBB); |
| 32908 | } |
| 32909 | |
| 32910 | const BasicBlock *LLVM_BB = ThisMBB->getBasicBlock(); |
| 32911 | MachineFunction *F = ThisMBB->getParent(); |
| 32912 | MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(LLVM_BB); |
| 32913 | MachineBasicBlock *SinkMBB = F->CreateMachineBasicBlock(LLVM_BB); |
| 32914 | |
| 32915 | MachineFunction::iterator It = ++ThisMBB->getIterator(); |
| 32916 | F->insert(It, FalseMBB); |
| 32917 | F->insert(It, SinkMBB); |
| 32918 | |
| 32919 | |
| 32920 | |
| 32921 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 32922 | if (!LastCMOV->killsRegister(X86::EFLAGS) && |
| 32923 | !checkAndUpdateEFLAGSKill(LastCMOV, ThisMBB, TRI)) { |
| 32924 | FalseMBB->addLiveIn(X86::EFLAGS); |
| 32925 | SinkMBB->addLiveIn(X86::EFLAGS); |
| 32926 | } |
| 32927 | |
| 32928 | |
| 32929 | auto DbgEnd = MachineBasicBlock::iterator(LastCMOV); |
| 32930 | auto DbgIt = MachineBasicBlock::iterator(MI); |
| 32931 | while (DbgIt != DbgEnd) { |
| 32932 | auto Next = std::next(DbgIt); |
| 32933 | if (DbgIt->isDebugInstr()) |
| 32934 | SinkMBB->push_back(DbgIt->removeFromParent()); |
| 32935 | DbgIt = Next; |
| 32936 | } |
| 32937 | |
| 32938 | |
| 32939 | SinkMBB->splice(SinkMBB->end(), ThisMBB, |
| 32940 | std::next(MachineBasicBlock::iterator(LastCMOV)), |
| 32941 | ThisMBB->end()); |
| 32942 | SinkMBB->transferSuccessorsAndUpdatePHIs(ThisMBB); |
| 32943 | |
| 32944 | |
| 32945 | ThisMBB->addSuccessor(FalseMBB); |
| 32946 | |
| 32947 | ThisMBB->addSuccessor(SinkMBB); |
| 32948 | |
| 32949 | FalseMBB->addSuccessor(SinkMBB); |
| 32950 | |
| 32951 | |
| 32952 | BuildMI(ThisMBB, DL, TII->get(X86::JCC_1)).addMBB(SinkMBB).addImm(CC); |
| 32953 | |
| 32954 | |
| 32955 | |
| 32956 | |
| 32957 | MachineBasicBlock::iterator MIItBegin = MachineBasicBlock::iterator(MI); |
| 32958 | MachineBasicBlock::iterator MIItEnd = |
| 32959 | std::next(MachineBasicBlock::iterator(LastCMOV)); |
| 32960 | createPHIsForCMOVsInSinkBB(MIItBegin, MIItEnd, ThisMBB, FalseMBB, SinkMBB); |
| 32961 | |
| 32962 | |
| 32963 | ThisMBB->erase(MIItBegin, MIItEnd); |
| 32964 | |
| 32965 | return SinkMBB; |
| 32966 | } |
| 32967 | |
| 32968 | static unsigned getSUBriOpcode(bool IsLP64, int64_t Imm) { |
| 32969 | if (IsLP64) { |
| 32970 | if (isInt<8>(Imm)) |
| 32971 | return X86::SUB64ri8; |
| 32972 | return X86::SUB64ri32; |
| 32973 | } else { |
| 32974 | if (isInt<8>(Imm)) |
| 32975 | return X86::SUB32ri8; |
| 32976 | return X86::SUB32ri; |
| 32977 | } |
| 32978 | } |
| 32979 | |
| 32980 | MachineBasicBlock * |
| 32981 | X86TargetLowering::EmitLoweredProbedAlloca(MachineInstr &MI, |
| 32982 | MachineBasicBlock *MBB) const { |
| 32983 | MachineFunction *MF = MBB->getParent(); |
| 32984 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 32985 | const X86FrameLowering &TFI = *Subtarget.getFrameLowering(); |
| 32986 | const DebugLoc &DL = MI.getDebugLoc(); |
| 32987 | const BasicBlock *LLVM_BB = MBB->getBasicBlock(); |
| 32988 | |
| 32989 | const unsigned ProbeSize = getStackProbeSize(*MF); |
| 32990 | |
| 32991 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 32992 | MachineBasicBlock *testMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 32993 | MachineBasicBlock *tailMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 32994 | MachineBasicBlock *blockMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 32995 | |
| 32996 | MachineFunction::iterator MBBIter = ++MBB->getIterator(); |
| 32997 | MF->insert(MBBIter, testMBB); |
| 32998 | MF->insert(MBBIter, blockMBB); |
| 32999 | MF->insert(MBBIter, tailMBB); |
| 33000 | |
| 33001 | Register sizeVReg = MI.getOperand(1).getReg(); |
| 33002 | |
| 33003 | Register physSPReg = TFI.Uses64BitFramePtr ? X86::RSP : X86::ESP; |
| 33004 | |
| 33005 | Register TmpStackPtr = MRI.createVirtualRegister( |
| 33006 | TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); |
| 33007 | Register FinalStackPtr = MRI.createVirtualRegister( |
| 33008 | TFI.Uses64BitFramePtr ? &X86::GR64RegClass : &X86::GR32RegClass); |
| 33009 | |
| 33010 | BuildMI(*MBB, {MI}, DL, TII->get(TargetOpcode::COPY), TmpStackPtr) |
| 33011 | .addReg(physSPReg); |
| 33012 | { |
| 33013 | const unsigned Opc = TFI.Uses64BitFramePtr ? X86::SUB64rr : X86::SUB32rr; |
| 33014 | BuildMI(*MBB, {MI}, DL, TII->get(Opc), FinalStackPtr) |
| 33015 | .addReg(TmpStackPtr) |
| 33016 | .addReg(sizeVReg); |
| 33017 | } |
| 33018 | |
| 33019 | |
| 33020 | |
| 33021 | BuildMI(testMBB, DL, |
| 33022 | TII->get(TFI.Uses64BitFramePtr ? X86::CMP64rr : X86::CMP32rr)) |
| 33023 | .addReg(FinalStackPtr) |
| 33024 | .addReg(physSPReg); |
| 33025 | |
| 33026 | BuildMI(testMBB, DL, TII->get(X86::JCC_1)) |
| 33027 | .addMBB(tailMBB) |
| 33028 | .addImm(X86::COND_GE); |
| 33029 | testMBB->addSuccessor(blockMBB); |
| 33030 | testMBB->addSuccessor(tailMBB); |
| 33031 | |
| 33032 | |
| 33033 | |
| 33034 | |
| 33035 | |
| 33036 | |
| 33037 | |
| 33038 | |
| 33039 | |
| 33040 | |
| 33041 | |
| 33042 | |
| 33043 | |
| 33044 | const unsigned XORMIOpc = |
| 33045 | TFI.Uses64BitFramePtr ? X86::XOR64mi8 : X86::XOR32mi8; |
| 33046 | addRegOffset(BuildMI(blockMBB, DL, TII->get(XORMIOpc)), physSPReg, false, 0) |
| 33047 | .addImm(0); |
| 33048 | |
| 33049 | BuildMI(blockMBB, DL, |
| 33050 | TII->get(getSUBriOpcode(TFI.Uses64BitFramePtr, ProbeSize)), physSPReg) |
| 33051 | .addReg(physSPReg) |
| 33052 | .addImm(ProbeSize); |
| 33053 | |
| 33054 | |
| 33055 | BuildMI(blockMBB, DL, TII->get(X86::JMP_1)).addMBB(testMBB); |
| 33056 | blockMBB->addSuccessor(testMBB); |
| 33057 | |
| 33058 | |
| 33059 | BuildMI(tailMBB, DL, TII->get(TargetOpcode::COPY), MI.getOperand(0).getReg()) |
| 33060 | .addReg(FinalStackPtr); |
| 33061 | |
| 33062 | tailMBB->splice(tailMBB->end(), MBB, |
| 33063 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
| 33064 | tailMBB->transferSuccessorsAndUpdatePHIs(MBB); |
| 33065 | MBB->addSuccessor(testMBB); |
| 33066 | |
| 33067 | |
| 33068 | MI.eraseFromParent(); |
| 33069 | |
| 33070 | |
| 33071 | return tailMBB; |
| 33072 | } |
| 33073 | |
| 33074 | MachineBasicBlock * |
| 33075 | X86TargetLowering::EmitLoweredSegAlloca(MachineInstr &MI, |
| 33076 | MachineBasicBlock *BB) const { |
| 33077 | MachineFunction *MF = BB->getParent(); |
| 33078 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 33079 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33080 | const BasicBlock *LLVM_BB = BB->getBasicBlock(); |
| 33081 | |
| 33082 | assert(MF->shouldSplitStack()); |
| 33083 | |
| 33084 | const bool Is64Bit = Subtarget.is64Bit(); |
| 33085 | const bool IsLP64 = Subtarget.isTarget64BitLP64(); |
| 33086 | |
| 33087 | const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS; |
| 33088 | const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30; |
| 33089 | |
| 33090 | |
| 33091 | |
| 33092 | |
| 33093 | |
| 33094 | |
| 33095 | |
| 33096 | |
| 33097 | |
| 33098 | |
| 33099 | |
| 33100 | |
| 33101 | |
| 33102 | |
| 33103 | |
| 33104 | |
| 33105 | |
| 33106 | MachineBasicBlock *mallocMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 33107 | MachineBasicBlock *bumpMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 33108 | MachineBasicBlock *continueMBB = MF->CreateMachineBasicBlock(LLVM_BB); |
| 33109 | |
| 33110 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 33111 | const TargetRegisterClass *AddrRegClass = |
| 33112 | getRegClassFor(getPointerTy(MF->getDataLayout())); |
| 33113 | |
| 33114 | Register mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass), |
| 33115 | bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass), |
| 33116 | tmpSPVReg = MRI.createVirtualRegister(AddrRegClass), |
| 33117 | SPLimitVReg = MRI.createVirtualRegister(AddrRegClass), |
| 33118 | sizeVReg = MI.getOperand(1).getReg(), |
| 33119 | physSPReg = |
| 33120 | IsLP64 || Subtarget.isTargetNaCl64() ? X86::RSP : X86::ESP; |
| 33121 | |
| 33122 | MachineFunction::iterator MBBIter = ++BB->getIterator(); |
| 33123 | |
| 33124 | MF->insert(MBBIter, bumpMBB); |
| 33125 | MF->insert(MBBIter, mallocMBB); |
| 33126 | MF->insert(MBBIter, continueMBB); |
| 33127 | |
| 33128 | continueMBB->splice(continueMBB->begin(), BB, |
| 33129 | std::next(MachineBasicBlock::iterator(MI)), BB->end()); |
| 33130 | continueMBB->transferSuccessorsAndUpdatePHIs(BB); |
| 33131 | |
| 33132 | |
| 33133 | |
| 33134 | BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg); |
| 33135 | BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg) |
| 33136 | .addReg(tmpSPVReg).addReg(sizeVReg); |
| 33137 | BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr)) |
| 33138 | .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg) |
| 33139 | .addReg(SPLimitVReg); |
| 33140 | BuildMI(BB, DL, TII->get(X86::JCC_1)).addMBB(mallocMBB).addImm(X86::COND_G); |
| 33141 | |
| 33142 | |
| 33143 | |
| 33144 | BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), physSPReg) |
| 33145 | .addReg(SPLimitVReg); |
| 33146 | BuildMI(bumpMBB, DL, TII->get(TargetOpcode::COPY), bumpSPPtrVReg) |
| 33147 | .addReg(SPLimitVReg); |
| 33148 | BuildMI(bumpMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); |
| 33149 | |
| 33150 | |
| 33151 | const uint32_t *RegMask = |
| 33152 | Subtarget.getRegisterInfo()->getCallPreservedMask(*MF, CallingConv::C); |
| 33153 | if (IsLP64) { |
| 33154 | BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI) |
| 33155 | .addReg(sizeVReg); |
| 33156 | BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) |
| 33157 | .addExternalSymbol("__morestack_allocate_stack_space") |
| 33158 | .addRegMask(RegMask) |
| 33159 | .addReg(X86::RDI, RegState::Implicit) |
| 33160 | .addReg(X86::RAX, RegState::ImplicitDefine); |
| 33161 | } else if (Is64Bit) { |
| 33162 | BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI) |
| 33163 | .addReg(sizeVReg); |
| 33164 | BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32)) |
| 33165 | .addExternalSymbol("__morestack_allocate_stack_space") |
| 33166 | .addRegMask(RegMask) |
| 33167 | .addReg(X86::EDI, RegState::Implicit) |
| 33168 | .addReg(X86::EAX, RegState::ImplicitDefine); |
| 33169 | } else { |
| 33170 | BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg) |
| 33171 | .addImm(12); |
| 33172 | BuildMI(mallocMBB, DL, TII->get(X86::PUSH32r)).addReg(sizeVReg); |
| 33173 | BuildMI(mallocMBB, DL, TII->get(X86::CALLpcrel32)) |
| 33174 | .addExternalSymbol("__morestack_allocate_stack_space") |
| 33175 | .addRegMask(RegMask) |
| 33176 | .addReg(X86::EAX, RegState::ImplicitDefine); |
| 33177 | } |
| 33178 | |
| 33179 | if (!Is64Bit) |
| 33180 | BuildMI(mallocMBB, DL, TII->get(X86::ADD32ri), physSPReg).addReg(physSPReg) |
| 33181 | .addImm(16); |
| 33182 | |
| 33183 | BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg) |
| 33184 | .addReg(IsLP64 ? X86::RAX : X86::EAX); |
| 33185 | BuildMI(mallocMBB, DL, TII->get(X86::JMP_1)).addMBB(continueMBB); |
| 33186 | |
| 33187 | |
| 33188 | BB->addSuccessor(bumpMBB); |
| 33189 | BB->addSuccessor(mallocMBB); |
| 33190 | mallocMBB->addSuccessor(continueMBB); |
| 33191 | bumpMBB->addSuccessor(continueMBB); |
| 33192 | |
| 33193 | |
| 33194 | BuildMI(*continueMBB, continueMBB->begin(), DL, TII->get(X86::PHI), |
| 33195 | MI.getOperand(0).getReg()) |
| 33196 | .addReg(mallocPtrVReg) |
| 33197 | .addMBB(mallocMBB) |
| 33198 | .addReg(bumpSPPtrVReg) |
| 33199 | .addMBB(bumpMBB); |
| 33200 | |
| 33201 | |
| 33202 | MI.eraseFromParent(); |
| 33203 | |
| 33204 | |
| 33205 | return continueMBB; |
| 33206 | } |
| 33207 | |
| 33208 | MachineBasicBlock * |
| 33209 | X86TargetLowering::EmitLoweredCatchRet(MachineInstr &MI, |
| 33210 | MachineBasicBlock *BB) const { |
| 33211 | MachineFunction *MF = BB->getParent(); |
| 33212 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 33213 | MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB(); |
| 33214 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33215 | |
| 33216 | assert(!isAsynchronousEHPersonality( |
| 33217 | classifyEHPersonality(MF->getFunction().getPersonalityFn())) && |
| 33218 | "SEH does not use catchret!"); |
| 33219 | |
| 33220 | |
| 33221 | if (!Subtarget.is32Bit()) |
| 33222 | return BB; |
| 33223 | |
| 33224 | |
| 33225 | |
| 33226 | MachineBasicBlock *RestoreMBB = |
| 33227 | MF->CreateMachineBasicBlock(BB->getBasicBlock()); |
| 33228 | assert(BB->succ_size() == 1); |
| 33229 | MF->insert(std::next(BB->getIterator()), RestoreMBB); |
| 33230 | RestoreMBB->transferSuccessorsAndUpdatePHIs(BB); |
| 33231 | BB->addSuccessor(RestoreMBB); |
| 33232 | MI.getOperand(0).setMBB(RestoreMBB); |
| 33233 | |
| 33234 | |
| 33235 | |
| 33236 | RestoreMBB->setIsEHPad(true); |
| 33237 | |
| 33238 | auto RestoreMBBI = RestoreMBB->begin(); |
| 33239 | BuildMI(*RestoreMBB, RestoreMBBI, DL, TII.get(X86::JMP_4)).addMBB(TargetMBB); |
| 33240 | return BB; |
| 33241 | } |
| 33242 | |
| 33243 | MachineBasicBlock * |
| 33244 | X86TargetLowering::EmitLoweredTLSAddr(MachineInstr &MI, |
| 33245 | MachineBasicBlock *BB) const { |
| 33246 | |
| 33247 | |
| 33248 | |
| 33249 | |
| 33250 | |
| 33251 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
| 33252 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33253 | MachineFunction &MF = *BB->getParent(); |
| 33254 | |
| 33255 | |
| 33256 | unsigned AdjStackDown = TII.getCallFrameSetupOpcode(); |
| 33257 | MachineInstrBuilder CallseqStart = |
| 33258 | BuildMI(MF, DL, TII.get(AdjStackDown)).addImm(0).addImm(0).addImm(0); |
| 33259 | BB->insert(MachineBasicBlock::iterator(MI), CallseqStart); |
| 33260 | |
| 33261 | |
| 33262 | |
| 33263 | |
| 33264 | unsigned AdjStackUp = TII.getCallFrameDestroyOpcode(); |
| 33265 | MachineInstrBuilder CallseqEnd = |
| 33266 | BuildMI(MF, DL, TII.get(AdjStackUp)).addImm(0).addImm(0); |
| 33267 | BB->insertAfter(MachineBasicBlock::iterator(MI), CallseqEnd); |
| 33268 | |
| 33269 | return BB; |
| 33270 | } |
| 33271 | |
| 33272 | MachineBasicBlock * |
| 33273 | X86TargetLowering::EmitLoweredTLSCall(MachineInstr &MI, |
| 33274 | MachineBasicBlock *BB) const { |
| 33275 | |
| 33276 | |
| 33277 | |
| 33278 | |
| 33279 | MachineFunction *F = BB->getParent(); |
| 33280 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
| 33281 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33282 | |
| 33283 | assert(Subtarget.isTargetDarwin() && "Darwin only instr emitted?"); |
| 33284 | assert(MI.getOperand(3).isGlobal() && "This should be a global"); |
| 33285 | |
| 33286 | |
| 33287 | |
| 33288 | |
| 33289 | const uint32_t *RegMask = |
| 33290 | Subtarget.is64Bit() ? |
| 33291 | Subtarget.getRegisterInfo()->getDarwinTLSCallPreservedMask() : |
| 33292 | Subtarget.getRegisterInfo()->getCallPreservedMask(*F, CallingConv::C); |
| 33293 | if (Subtarget.is64Bit()) { |
| 33294 | MachineInstrBuilder MIB = |
| 33295 | BuildMI(*BB, MI, DL, TII->get(X86::MOV64rm), X86::RDI) |
| 33296 | .addReg(X86::RIP) |
| 33297 | .addImm(0) |
| 33298 | .addReg(0) |
| 33299 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
| 33300 | MI.getOperand(3).getTargetFlags()) |
| 33301 | .addReg(0); |
| 33302 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL64m)); |
| 33303 | addDirectMem(MIB, X86::RDI); |
| 33304 | MIB.addReg(X86::RAX, RegState::ImplicitDefine).addRegMask(RegMask); |
| 33305 | } else if (!isPositionIndependent()) { |
| 33306 | MachineInstrBuilder MIB = |
| 33307 | BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) |
| 33308 | .addReg(0) |
| 33309 | .addImm(0) |
| 33310 | .addReg(0) |
| 33311 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
| 33312 | MI.getOperand(3).getTargetFlags()) |
| 33313 | .addReg(0); |
| 33314 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); |
| 33315 | addDirectMem(MIB, X86::EAX); |
| 33316 | MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); |
| 33317 | } else { |
| 33318 | MachineInstrBuilder MIB = |
| 33319 | BuildMI(*BB, MI, DL, TII->get(X86::MOV32rm), X86::EAX) |
| 33320 | .addReg(TII->getGlobalBaseReg(F)) |
| 33321 | .addImm(0) |
| 33322 | .addReg(0) |
| 33323 | .addGlobalAddress(MI.getOperand(3).getGlobal(), 0, |
| 33324 | MI.getOperand(3).getTargetFlags()) |
| 33325 | .addReg(0); |
| 33326 | MIB = BuildMI(*BB, MI, DL, TII->get(X86::CALL32m)); |
| 33327 | addDirectMem(MIB, X86::EAX); |
| 33328 | MIB.addReg(X86::EAX, RegState::ImplicitDefine).addRegMask(RegMask); |
| 33329 | } |
| 33330 | |
| 33331 | MI.eraseFromParent(); |
| 33332 | return BB; |
| 33333 | } |
| 33334 | |
| 33335 | static unsigned getOpcodeForIndirectThunk(unsigned RPOpc) { |
| 33336 | switch (RPOpc) { |
| 33337 | case X86::INDIRECT_THUNK_CALL32: |
| 33338 | return X86::CALLpcrel32; |
| 33339 | case X86::INDIRECT_THUNK_CALL64: |
| 33340 | return X86::CALL64pcrel32; |
| 33341 | case X86::INDIRECT_THUNK_TCRETURN32: |
| 33342 | return X86::TCRETURNdi; |
| 33343 | case X86::INDIRECT_THUNK_TCRETURN64: |
| 33344 | return X86::TCRETURNdi64; |
| 33345 | } |
| 33346 | llvm_unreachable("not indirect thunk opcode"); |
| 33347 | } |
| 33348 | |
| 33349 | static const char *getIndirectThunkSymbol(const X86Subtarget &Subtarget, |
| 33350 | unsigned Reg) { |
| 33351 | if (Subtarget.useRetpolineExternalThunk()) { |
| 33352 | |
| 33353 | |
| 33354 | |
| 33355 | |
| 33356 | |
| 33357 | |
| 33358 | |
| 33359 | |
| 33360 | |
| 33361 | |
| 33362 | |
| 33363 | |
| 33364 | |
| 33365 | switch (Reg) { |
| 33366 | case X86::EAX: |
| 33367 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33368 | return "__x86_indirect_thunk_eax"; |
| 33369 | case X86::ECX: |
| 33370 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33371 | return "__x86_indirect_thunk_ecx"; |
| 33372 | case X86::EDX: |
| 33373 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33374 | return "__x86_indirect_thunk_edx"; |
| 33375 | case X86::EDI: |
| 33376 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33377 | return "__x86_indirect_thunk_edi"; |
| 33378 | case X86::R11: |
| 33379 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
| 33380 | return "__x86_indirect_thunk_r11"; |
| 33381 | } |
| 33382 | llvm_unreachable("unexpected reg for external indirect thunk"); |
| 33383 | } |
| 33384 | |
| 33385 | if (Subtarget.useRetpolineIndirectCalls() || |
| 33386 | Subtarget.useRetpolineIndirectBranches()) { |
| 33387 | |
| 33388 | switch (Reg) { |
| 33389 | case X86::EAX: |
| 33390 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33391 | return "__llvm_retpoline_eax"; |
| 33392 | case X86::ECX: |
| 33393 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33394 | return "__llvm_retpoline_ecx"; |
| 33395 | case X86::EDX: |
| 33396 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33397 | return "__llvm_retpoline_edx"; |
| 33398 | case X86::EDI: |
| 33399 | assert(!Subtarget.is64Bit() && "Should not be using a 32-bit thunk!"); |
| 33400 | return "__llvm_retpoline_edi"; |
| 33401 | case X86::R11: |
| 33402 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
| 33403 | return "__llvm_retpoline_r11"; |
| 33404 | } |
| 33405 | llvm_unreachable("unexpected reg for retpoline"); |
| 33406 | } |
| 33407 | |
| 33408 | if (Subtarget.useLVIControlFlowIntegrity()) { |
| 33409 | assert(Subtarget.is64Bit() && "Should not be using a 64-bit thunk!"); |
| 33410 | return "__llvm_lvi_thunk_r11"; |
| 33411 | } |
| 33412 | llvm_unreachable("getIndirectThunkSymbol() invoked without thunk feature"); |
| 33413 | } |
| 33414 | |
| 33415 | MachineBasicBlock * |
| 33416 | X86TargetLowering::EmitLoweredIndirectThunk(MachineInstr &MI, |
| 33417 | MachineBasicBlock *BB) const { |
| 33418 | |
| 33419 | |
| 33420 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33421 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
| 33422 | Register CalleeVReg = MI.getOperand(0).getReg(); |
| 33423 | unsigned Opc = getOpcodeForIndirectThunk(MI.getOpcode()); |
| 33424 | |
| 33425 | |
| 33426 | |
| 33427 | |
| 33428 | |
| 33429 | |
| 33430 | |
| 33431 | SmallVector<unsigned, 3> AvailableRegs; |
| 33432 | if (Subtarget.is64Bit()) |
| 33433 | AvailableRegs.push_back(X86::R11); |
| 33434 | else |
| 33435 | AvailableRegs.append({X86::EAX, X86::ECX, X86::EDX, X86::EDI}); |
| 33436 | |
| 33437 | |
| 33438 | for (const auto &MO : MI.operands()) { |
| 33439 | if (MO.isReg() && MO.isUse()) |
| 33440 | for (unsigned &Reg : AvailableRegs) |
| 33441 | if (Reg == MO.getReg()) |
| 33442 | Reg = 0; |
| 33443 | } |
| 33444 | |
| 33445 | |
| 33446 | unsigned AvailableReg = 0; |
| 33447 | for (unsigned MaybeReg : AvailableRegs) { |
| 33448 | if (MaybeReg) { |
| 33449 | AvailableReg = MaybeReg; |
| 33450 | break; |
| 33451 | } |
| 33452 | } |
| 33453 | if (!AvailableReg) |
| 33454 | report_fatal_error("calling convention incompatible with retpoline, no " |
| 33455 | "available registers"); |
| 33456 | |
| 33457 | const char *Symbol = getIndirectThunkSymbol(Subtarget, AvailableReg); |
| 33458 | |
| 33459 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), AvailableReg) |
| 33460 | .addReg(CalleeVReg); |
| 33461 | MI.getOperand(0).ChangeToES(Symbol); |
| 33462 | MI.setDesc(TII->get(Opc)); |
| 33463 | MachineInstrBuilder(*BB->getParent(), &MI) |
| 33464 | .addReg(AvailableReg, RegState::Implicit | RegState::Kill); |
| 33465 | return BB; |
| 33466 | } |
| 33467 | |
| 33468 | |
| 33469 | |
| 33470 | |
| 33471 | |
| 33472 | |
| 33473 | |
| 33474 | |
| 33475 | |
| 33476 | |
| 33477 | |
| 33478 | |
| 33479 | |
| 33480 | void X86TargetLowering::emitSetJmpShadowStackFix(MachineInstr &MI, |
| 33481 | MachineBasicBlock *MBB) const { |
| 33482 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33483 | MachineFunction *MF = MBB->getParent(); |
| 33484 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 33485 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 33486 | MachineInstrBuilder MIB; |
| 33487 | |
| 33488 | |
| 33489 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
| 33490 | MI.memoperands_end()); |
| 33491 | |
| 33492 | |
| 33493 | MVT PVT = getPointerTy(MF->getDataLayout()); |
| 33494 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
| 33495 | Register ZReg = MRI.createVirtualRegister(PtrRC); |
| 33496 | unsigned XorRROpc = (PVT == MVT::i64) ? X86::XOR64rr : X86::XOR32rr; |
| 33497 | BuildMI(*MBB, MI, DL, TII->get(XorRROpc)) |
| 33498 | .addDef(ZReg) |
| 33499 | .addReg(ZReg, RegState::Undef) |
| 33500 | .addReg(ZReg, RegState::Undef); |
| 33501 | |
| 33502 | |
| 33503 | Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); |
| 33504 | unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; |
| 33505 | BuildMI(*MBB, MI, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); |
| 33506 | |
| 33507 | |
| 33508 | unsigned PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
| 33509 | MIB = BuildMI(*MBB, MI, DL, TII->get(PtrStoreOpc)); |
| 33510 | const int64_t SSPOffset = 3 * PVT.getStoreSize(); |
| 33511 | const unsigned MemOpndSlot = 1; |
| 33512 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
| 33513 | if (i == X86::AddrDisp) |
| 33514 | MIB.addDisp(MI.getOperand(MemOpndSlot + i), SSPOffset); |
| 33515 | else |
| 33516 | MIB.add(MI.getOperand(MemOpndSlot + i)); |
| 33517 | } |
| 33518 | MIB.addReg(SSPCopyReg); |
| 33519 | MIB.setMemRefs(MMOs); |
| 33520 | } |
| 33521 | |
| 33522 | MachineBasicBlock * |
| 33523 | X86TargetLowering::emitEHSjLjSetJmp(MachineInstr &MI, |
| 33524 | MachineBasicBlock *MBB) const { |
| 33525 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33526 | MachineFunction *MF = MBB->getParent(); |
| 33527 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 33528 | const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 33529 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 33530 | |
| 33531 | const BasicBlock *BB = MBB->getBasicBlock(); |
| 33532 | MachineFunction::iterator I = ++MBB->getIterator(); |
| 33533 | |
| 33534 | |
| 33535 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
| 33536 | MI.memoperands_end()); |
| 33537 | |
| 33538 | unsigned DstReg; |
| 33539 | unsigned MemOpndSlot = 0; |
| 33540 | |
| 33541 | unsigned CurOp = 0; |
| 33542 | |
| 33543 | DstReg = MI.getOperand(CurOp++).getReg(); |
| 33544 | const TargetRegisterClass *RC = MRI.getRegClass(DstReg); |
| 33545 | assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!"); |
| 33546 | (void)TRI; |
| 33547 | Register mainDstReg = MRI.createVirtualRegister(RC); |
| 33548 | Register restoreDstReg = MRI.createVirtualRegister(RC); |
| 33549 | |
| 33550 | MemOpndSlot = CurOp; |
| 33551 | |
| 33552 | MVT PVT = getPointerTy(MF->getDataLayout()); |
| 33553 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
| 33554 | "Invalid Pointer Size!"); |
| 33555 | |
| 33556 | |
| 33557 | |
| 33558 | |
| 33559 | |
| 33560 | |
| 33561 | |
| 33562 | |
| 33563 | |
| 33564 | |
| 33565 | |
| 33566 | |
| 33567 | |
| 33568 | |
| 33569 | |
| 33570 | |
| 33571 | |
| 33572 | MachineBasicBlock *thisMBB = MBB; |
| 33573 | MachineBasicBlock *mainMBB = MF->CreateMachineBasicBlock(BB); |
| 33574 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
| 33575 | MachineBasicBlock *restoreMBB = MF->CreateMachineBasicBlock(BB); |
| 33576 | MF->insert(I, mainMBB); |
| 33577 | MF->insert(I, sinkMBB); |
| 33578 | MF->push_back(restoreMBB); |
| 33579 | restoreMBB->setHasAddressTaken(); |
| 33580 | |
| 33581 | MachineInstrBuilder MIB; |
| 33582 | |
| 33583 | |
| 33584 | sinkMBB->splice(sinkMBB->begin(), MBB, |
| 33585 | std::next(MachineBasicBlock::iterator(MI)), MBB->end()); |
| 33586 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
| 33587 | |
| 33588 | |
| 33589 | unsigned PtrStoreOpc = 0; |
| 33590 | unsigned LabelReg = 0; |
| 33591 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
| 33592 | bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && |
| 33593 | !isPositionIndependent(); |
| 33594 | |
| 33595 | |
| 33596 | if (!UseImmLabel) { |
| 33597 | PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
| 33598 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
| 33599 | LabelReg = MRI.createVirtualRegister(PtrRC); |
| 33600 | if (Subtarget.is64Bit()) { |
| 33601 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA64r), LabelReg) |
| 33602 | .addReg(X86::RIP) |
| 33603 | .addImm(0) |
| 33604 | .addReg(0) |
| 33605 | .addMBB(restoreMBB) |
| 33606 | .addReg(0); |
| 33607 | } else { |
| 33608 | const X86InstrInfo *XII = static_cast<const X86InstrInfo*>(TII); |
| 33609 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::LEA32r), LabelReg) |
| 33610 | .addReg(XII->getGlobalBaseReg(MF)) |
| 33611 | .addImm(0) |
| 33612 | .addReg(0) |
| 33613 | .addMBB(restoreMBB, Subtarget.classifyBlockAddressReference()) |
| 33614 | .addReg(0); |
| 33615 | } |
| 33616 | } else |
| 33617 | PtrStoreOpc = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; |
| 33618 | |
| 33619 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrStoreOpc)); |
| 33620 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
| 33621 | if (i == X86::AddrDisp) |
| 33622 | MIB.addDisp(MI.getOperand(MemOpndSlot + i), LabelOffset); |
| 33623 | else |
| 33624 | MIB.add(MI.getOperand(MemOpndSlot + i)); |
| 33625 | } |
| 33626 | if (!UseImmLabel) |
| 33627 | MIB.addReg(LabelReg); |
| 33628 | else |
| 33629 | MIB.addMBB(restoreMBB); |
| 33630 | MIB.setMemRefs(MMOs); |
| 33631 | |
| 33632 | if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { |
| 33633 | emitSetJmpShadowStackFix(MI, thisMBB); |
| 33634 | } |
| 33635 | |
| 33636 | |
| 33637 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(X86::EH_SjLj_Setup)) |
| 33638 | .addMBB(restoreMBB); |
| 33639 | |
| 33640 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 33641 | MIB.addRegMask(RegInfo->getNoPreservedMask()); |
| 33642 | thisMBB->addSuccessor(mainMBB); |
| 33643 | thisMBB->addSuccessor(restoreMBB); |
| 33644 | |
| 33645 | |
| 33646 | |
| 33647 | BuildMI(mainMBB, DL, TII->get(X86::MOV32r0), mainDstReg); |
| 33648 | mainMBB->addSuccessor(sinkMBB); |
| 33649 | |
| 33650 | |
| 33651 | BuildMI(*sinkMBB, sinkMBB->begin(), DL, |
| 33652 | TII->get(X86::PHI), DstReg) |
| 33653 | .addReg(mainDstReg).addMBB(mainMBB) |
| 33654 | .addReg(restoreDstReg).addMBB(restoreMBB); |
| 33655 | |
| 33656 | |
| 33657 | if (RegInfo->hasBasePointer(*MF)) { |
| 33658 | const bool Uses64BitFramePtr = |
| 33659 | Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); |
| 33660 | X86MachineFunctionInfo *X86FI = MF->getInfo<X86MachineFunctionInfo>(); |
| 33661 | X86FI->setRestoreBasePointer(MF); |
| 33662 | Register FramePtr = RegInfo->getFrameRegister(*MF); |
| 33663 | Register BasePtr = RegInfo->getBaseRegister(); |
| 33664 | unsigned Opm = Uses64BitFramePtr ? X86::MOV64rm : X86::MOV32rm; |
| 33665 | addRegOffset(BuildMI(restoreMBB, DL, TII->get(Opm), BasePtr), |
| 33666 | FramePtr, true, X86FI->getRestoreBasePointerOffset()) |
| 33667 | .setMIFlag(MachineInstr::FrameSetup); |
| 33668 | } |
| 33669 | BuildMI(restoreMBB, DL, TII->get(X86::MOV32ri), restoreDstReg).addImm(1); |
| 33670 | BuildMI(restoreMBB, DL, TII->get(X86::JMP_1)).addMBB(sinkMBB); |
| 33671 | restoreMBB->addSuccessor(sinkMBB); |
| 33672 | |
| 33673 | MI.eraseFromParent(); |
| 33674 | return sinkMBB; |
| 33675 | } |
| 33676 | |
| 33677 | |
| 33678 | |
| 33679 | |
| 33680 | |
| 33681 | |
| 33682 | MachineBasicBlock * |
| 33683 | X86TargetLowering::emitLongJmpShadowStackFix(MachineInstr &MI, |
| 33684 | MachineBasicBlock *MBB) const { |
| 33685 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33686 | MachineFunction *MF = MBB->getParent(); |
| 33687 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 33688 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 33689 | |
| 33690 | |
| 33691 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
| 33692 | MI.memoperands_end()); |
| 33693 | |
| 33694 | MVT PVT = getPointerTy(MF->getDataLayout()); |
| 33695 | const TargetRegisterClass *PtrRC = getRegClassFor(PVT); |
| 33696 | |
| 33697 | |
| 33698 | |
| 33699 | |
| 33700 | |
| 33701 | |
| 33702 | |
| 33703 | |
| 33704 | |
| 33705 | |
| 33706 | |
| 33707 | |
| 33708 | |
| 33709 | |
| 33710 | |
| 33711 | |
| 33712 | |
| 33713 | |
| 33714 | |
| 33715 | |
| 33716 | |
| 33717 | |
| 33718 | |
| 33719 | |
| 33720 | |
| 33721 | MachineFunction::iterator I = ++MBB->getIterator(); |
| 33722 | const BasicBlock *BB = MBB->getBasicBlock(); |
| 33723 | |
| 33724 | MachineBasicBlock *checkSspMBB = MF->CreateMachineBasicBlock(BB); |
| 33725 | MachineBasicBlock *fallMBB = MF->CreateMachineBasicBlock(BB); |
| 33726 | MachineBasicBlock *fixShadowMBB = MF->CreateMachineBasicBlock(BB); |
| 33727 | MachineBasicBlock *fixShadowLoopPrepareMBB = MF->CreateMachineBasicBlock(BB); |
| 33728 | MachineBasicBlock *fixShadowLoopMBB = MF->CreateMachineBasicBlock(BB); |
| 33729 | MachineBasicBlock *sinkMBB = MF->CreateMachineBasicBlock(BB); |
| 33730 | MF->insert(I, checkSspMBB); |
| 33731 | MF->insert(I, fallMBB); |
| 33732 | MF->insert(I, fixShadowMBB); |
| 33733 | MF->insert(I, fixShadowLoopPrepareMBB); |
| 33734 | MF->insert(I, fixShadowLoopMBB); |
| 33735 | MF->insert(I, sinkMBB); |
| 33736 | |
| 33737 | |
| 33738 | sinkMBB->splice(sinkMBB->begin(), MBB, MachineBasicBlock::iterator(MI), |
| 33739 | MBB->end()); |
| 33740 | sinkMBB->transferSuccessorsAndUpdatePHIs(MBB); |
| 33741 | |
| 33742 | MBB->addSuccessor(checkSspMBB); |
| 33743 | |
| 33744 | |
| 33745 | Register ZReg = MRI.createVirtualRegister(&X86::GR32RegClass); |
| 33746 | BuildMI(checkSspMBB, DL, TII->get(X86::MOV32r0), ZReg); |
| 33747 | |
| 33748 | if (PVT == MVT::i64) { |
| 33749 | Register TmpZReg = MRI.createVirtualRegister(PtrRC); |
| 33750 | BuildMI(checkSspMBB, DL, TII->get(X86::SUBREG_TO_REG), TmpZReg) |
| 33751 | .addImm(0) |
| 33752 | .addReg(ZReg) |
| 33753 | .addImm(X86::sub_32bit); |
| 33754 | ZReg = TmpZReg; |
| 33755 | } |
| 33756 | |
| 33757 | |
| 33758 | Register SSPCopyReg = MRI.createVirtualRegister(PtrRC); |
| 33759 | unsigned RdsspOpc = (PVT == MVT::i64) ? X86::RDSSPQ : X86::RDSSPD; |
| 33760 | BuildMI(checkSspMBB, DL, TII->get(RdsspOpc), SSPCopyReg).addReg(ZReg); |
| 33761 | |
| 33762 | |
| 33763 | |
| 33764 | unsigned TestRROpc = (PVT == MVT::i64) ? X86::TEST64rr : X86::TEST32rr; |
| 33765 | BuildMI(checkSspMBB, DL, TII->get(TestRROpc)) |
| 33766 | .addReg(SSPCopyReg) |
| 33767 | .addReg(SSPCopyReg); |
| 33768 | BuildMI(checkSspMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); |
| 33769 | checkSspMBB->addSuccessor(sinkMBB); |
| 33770 | checkSspMBB->addSuccessor(fallMBB); |
| 33771 | |
| 33772 | |
| 33773 | Register PrevSSPReg = MRI.createVirtualRegister(PtrRC); |
| 33774 | unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; |
| 33775 | const int64_t SPPOffset = 3 * PVT.getStoreSize(); |
| 33776 | MachineInstrBuilder MIB = |
| 33777 | BuildMI(fallMBB, DL, TII->get(PtrLoadOpc), PrevSSPReg); |
| 33778 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
| 33779 | const MachineOperand &MO = MI.getOperand(i); |
| 33780 | if (i == X86::AddrDisp) |
| 33781 | MIB.addDisp(MO, SPPOffset); |
| 33782 | else if (MO.isReg()) |
| 33783 | |
| 33784 | MIB.addReg(MO.getReg()); |
| 33785 | else |
| 33786 | MIB.add(MO); |
| 33787 | } |
| 33788 | MIB.setMemRefs(MMOs); |
| 33789 | |
| 33790 | |
| 33791 | Register SspSubReg = MRI.createVirtualRegister(PtrRC); |
| 33792 | unsigned SubRROpc = (PVT == MVT::i64) ? X86::SUB64rr : X86::SUB32rr; |
| 33793 | BuildMI(fallMBB, DL, TII->get(SubRROpc), SspSubReg) |
| 33794 | .addReg(PrevSSPReg) |
| 33795 | .addReg(SSPCopyReg); |
| 33796 | |
| 33797 | |
| 33798 | BuildMI(fallMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_BE); |
| 33799 | fallMBB->addSuccessor(sinkMBB); |
| 33800 | fallMBB->addSuccessor(fixShadowMBB); |
| 33801 | |
| 33802 | |
| 33803 | unsigned ShrRIOpc = (PVT == MVT::i64) ? X86::SHR64ri : X86::SHR32ri; |
| 33804 | unsigned Offset = (PVT == MVT::i64) ? 3 : 2; |
| 33805 | Register SspFirstShrReg = MRI.createVirtualRegister(PtrRC); |
| 33806 | BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspFirstShrReg) |
| 33807 | .addReg(SspSubReg) |
| 33808 | .addImm(Offset); |
| 33809 | |
| 33810 | |
| 33811 | unsigned IncsspOpc = (PVT == MVT::i64) ? X86::INCSSPQ : X86::INCSSPD; |
| 33812 | BuildMI(fixShadowMBB, DL, TII->get(IncsspOpc)).addReg(SspFirstShrReg); |
| 33813 | |
| 33814 | |
| 33815 | Register SspSecondShrReg = MRI.createVirtualRegister(PtrRC); |
| 33816 | BuildMI(fixShadowMBB, DL, TII->get(ShrRIOpc), SspSecondShrReg) |
| 33817 | .addReg(SspFirstShrReg) |
| 33818 | .addImm(8); |
| 33819 | |
| 33820 | |
| 33821 | BuildMI(fixShadowMBB, DL, TII->get(X86::JCC_1)).addMBB(sinkMBB).addImm(X86::COND_E); |
| 33822 | fixShadowMBB->addSuccessor(sinkMBB); |
| 33823 | fixShadowMBB->addSuccessor(fixShadowLoopPrepareMBB); |
| 33824 | |
| 33825 | |
| 33826 | unsigned ShlR1Opc = (PVT == MVT::i64) ? X86::SHL64r1 : X86::SHL32r1; |
| 33827 | Register SspAfterShlReg = MRI.createVirtualRegister(PtrRC); |
| 33828 | BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(ShlR1Opc), SspAfterShlReg) |
| 33829 | .addReg(SspSecondShrReg); |
| 33830 | |
| 33831 | |
| 33832 | Register Value128InReg = MRI.createVirtualRegister(PtrRC); |
| 33833 | unsigned MovRIOpc = (PVT == MVT::i64) ? X86::MOV64ri32 : X86::MOV32ri; |
| 33834 | BuildMI(fixShadowLoopPrepareMBB, DL, TII->get(MovRIOpc), Value128InReg) |
| 33835 | .addImm(128); |
| 33836 | fixShadowLoopPrepareMBB->addSuccessor(fixShadowLoopMBB); |
| 33837 | |
| 33838 | |
| 33839 | |
| 33840 | Register DecReg = MRI.createVirtualRegister(PtrRC); |
| 33841 | Register CounterReg = MRI.createVirtualRegister(PtrRC); |
| 33842 | BuildMI(fixShadowLoopMBB, DL, TII->get(X86::PHI), CounterReg) |
| 33843 | .addReg(SspAfterShlReg) |
| 33844 | .addMBB(fixShadowLoopPrepareMBB) |
| 33845 | .addReg(DecReg) |
| 33846 | .addMBB(fixShadowLoopMBB); |
| 33847 | |
| 33848 | |
| 33849 | BuildMI(fixShadowLoopMBB, DL, TII->get(IncsspOpc)).addReg(Value128InReg); |
| 33850 | |
| 33851 | |
| 33852 | unsigned DecROpc = (PVT == MVT::i64) ? X86::DEC64r : X86::DEC32r; |
| 33853 | BuildMI(fixShadowLoopMBB, DL, TII->get(DecROpc), DecReg).addReg(CounterReg); |
| 33854 | |
| 33855 | |
| 33856 | BuildMI(fixShadowLoopMBB, DL, TII->get(X86::JCC_1)).addMBB(fixShadowLoopMBB).addImm(X86::COND_NE); |
| 33857 | fixShadowLoopMBB->addSuccessor(sinkMBB); |
| 33858 | fixShadowLoopMBB->addSuccessor(fixShadowLoopMBB); |
| 33859 | |
| 33860 | return sinkMBB; |
| 33861 | } |
| 33862 | |
| 33863 | MachineBasicBlock * |
| 33864 | X86TargetLowering::emitEHSjLjLongJmp(MachineInstr &MI, |
| 33865 | MachineBasicBlock *MBB) const { |
| 33866 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33867 | MachineFunction *MF = MBB->getParent(); |
| 33868 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 33869 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 33870 | |
| 33871 | |
| 33872 | SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(), |
| 33873 | MI.memoperands_end()); |
| 33874 | |
| 33875 | MVT PVT = getPointerTy(MF->getDataLayout()); |
| 33876 | assert((PVT == MVT::i64 || PVT == MVT::i32) && |
| 33877 | "Invalid Pointer Size!"); |
| 33878 | |
| 33879 | const TargetRegisterClass *RC = |
| 33880 | (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; |
| 33881 | Register Tmp = MRI.createVirtualRegister(RC); |
| 33882 | |
| 33883 | const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo(); |
| 33884 | Register FP = (PVT == MVT::i64) ? X86::RBP : X86::EBP; |
| 33885 | Register SP = RegInfo->getStackRegister(); |
| 33886 | |
| 33887 | MachineInstrBuilder MIB; |
| 33888 | |
| 33889 | const int64_t LabelOffset = 1 * PVT.getStoreSize(); |
| 33890 | const int64_t SPOffset = 2 * PVT.getStoreSize(); |
| 33891 | |
| 33892 | unsigned PtrLoadOpc = (PVT == MVT::i64) ? X86::MOV64rm : X86::MOV32rm; |
| 33893 | unsigned IJmpOpc = (PVT == MVT::i64) ? X86::JMP64r : X86::JMP32r; |
| 33894 | |
| 33895 | MachineBasicBlock *thisMBB = MBB; |
| 33896 | |
| 33897 | |
| 33898 | if (MF->getMMI().getModule()->getModuleFlag("cf-protection-return")) { |
| 33899 | thisMBB = emitLongJmpShadowStackFix(MI, thisMBB); |
| 33900 | } |
| 33901 | |
| 33902 | |
| 33903 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), FP); |
| 33904 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
| 33905 | const MachineOperand &MO = MI.getOperand(i); |
| 33906 | if (MO.isReg()) |
| 33907 | |
| 33908 | MIB.addReg(MO.getReg()); |
| 33909 | else |
| 33910 | MIB.add(MO); |
| 33911 | } |
| 33912 | MIB.setMemRefs(MMOs); |
| 33913 | |
| 33914 | |
| 33915 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), Tmp); |
| 33916 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
| 33917 | const MachineOperand &MO = MI.getOperand(i); |
| 33918 | if (i == X86::AddrDisp) |
| 33919 | MIB.addDisp(MO, LabelOffset); |
| 33920 | else if (MO.isReg()) |
| 33921 | |
| 33922 | MIB.addReg(MO.getReg()); |
| 33923 | else |
| 33924 | MIB.add(MO); |
| 33925 | } |
| 33926 | MIB.setMemRefs(MMOs); |
| 33927 | |
| 33928 | |
| 33929 | MIB = BuildMI(*thisMBB, MI, DL, TII->get(PtrLoadOpc), SP); |
| 33930 | for (unsigned i = 0; i < X86::AddrNumOperands; ++i) { |
| 33931 | if (i == X86::AddrDisp) |
| 33932 | MIB.addDisp(MI.getOperand(i), SPOffset); |
| 33933 | else |
| 33934 | MIB.add(MI.getOperand(i)); |
| 33935 | |
| 33936 | } |
| 33937 | MIB.setMemRefs(MMOs); |
| 33938 | |
| 33939 | |
| 33940 | BuildMI(*thisMBB, MI, DL, TII->get(IJmpOpc)).addReg(Tmp); |
| 33941 | |
| 33942 | MI.eraseFromParent(); |
| 33943 | return thisMBB; |
| 33944 | } |
| 33945 | |
| 33946 | void X86TargetLowering::SetupEntryBlockForSjLj(MachineInstr &MI, |
| 33947 | MachineBasicBlock *MBB, |
| 33948 | MachineBasicBlock *DispatchBB, |
| 33949 | int FI) const { |
| 33950 | const DebugLoc &DL = MI.getDebugLoc(); |
| 33951 | MachineFunction *MF = MBB->getParent(); |
| 33952 | MachineRegisterInfo *MRI = &MF->getRegInfo(); |
| 33953 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
| 33954 | |
| 33955 | MVT PVT = getPointerTy(MF->getDataLayout()); |
| 33956 | assert((PVT == MVT::i64 || PVT == MVT::i32) && "Invalid Pointer Size!"); |
| 33957 | |
| 33958 | unsigned Op = 0; |
| 33959 | unsigned VR = 0; |
| 33960 | |
| 33961 | bool UseImmLabel = (MF->getTarget().getCodeModel() == CodeModel::Small) && |
| 33962 | !isPositionIndependent(); |
| 33963 | |
| 33964 | if (UseImmLabel) { |
| 33965 | Op = (PVT == MVT::i64) ? X86::MOV64mi32 : X86::MOV32mi; |
| 33966 | } else { |
| 33967 | const TargetRegisterClass *TRC = |
| 33968 | (PVT == MVT::i64) ? &X86::GR64RegClass : &X86::GR32RegClass; |
| 33969 | VR = MRI->createVirtualRegister(TRC); |
| 33970 | Op = (PVT == MVT::i64) ? X86::MOV64mr : X86::MOV32mr; |
| 33971 | |
| 33972 | if (Subtarget.is64Bit()) |
| 33973 | BuildMI(*MBB, MI, DL, TII->get(X86::LEA64r), VR) |
| 33974 | .addReg(X86::RIP) |
| 33975 | .addImm(1) |
| 33976 | .addReg(0) |
| 33977 | .addMBB(DispatchBB) |
| 33978 | .addReg(0); |
| 33979 | else |
| 33980 | BuildMI(*MBB, MI, DL, TII->get(X86::LEA32r), VR) |
| 33981 | .addReg(0) |
| 33982 | .addImm(1) |
| 33983 | .addReg(0) |
| 33984 | .addMBB(DispatchBB, Subtarget.classifyBlockAddressReference()) |
| 33985 | .addReg(0); |
| 33986 | } |
| 33987 | |
| 33988 | MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(Op)); |
| 33989 | addFrameReference(MIB, FI, Subtarget.is64Bit() ? 56 : 36); |
| 33990 | if (UseImmLabel) |
| 33991 | MIB.addMBB(DispatchBB); |
| 33992 | else |
| 33993 | MIB.addReg(VR); |
| 33994 | } |
| 33995 | |
| 33996 | MachineBasicBlock * |
| 33997 | X86TargetLowering::EmitSjLjDispatchBlock(MachineInstr &MI, |
| 33998 | MachineBasicBlock *BB) const { |
| 33999 | const DebugLoc &DL = MI.getDebugLoc(); |
| 34000 | MachineFunction *MF = BB->getParent(); |
| 34001 | MachineRegisterInfo *MRI = &MF->getRegInfo(); |
| 34002 | const X86InstrInfo *TII = Subtarget.getInstrInfo(); |
| 34003 | int FI = MF->getFrameInfo().getFunctionContextIndex(); |
| 34004 | |
| 34005 | |
| 34006 | |
| 34007 | DenseMap<unsigned, SmallVector<MachineBasicBlock *, 2>> CallSiteNumToLPad; |
| 34008 | unsigned MaxCSNum = 0; |
| 34009 | for (auto &MBB : *MF) { |
| 34010 | if (!MBB.isEHPad()) |
| 34011 | continue; |
| 34012 | |
| 34013 | MCSymbol *Sym = nullptr; |
| 34014 | for (const auto &MI : MBB) { |
| 34015 | if (MI.isDebugInstr()) |
| 34016 | continue; |
| 34017 | |
| 34018 | assert(MI.isEHLabel() && "expected EH_LABEL"); |
| 34019 | Sym = MI.getOperand(0).getMCSymbol(); |
| 34020 | break; |
| 34021 | } |
| 34022 | |
| 34023 | if (!MF->hasCallSiteLandingPad(Sym)) |
| 34024 | continue; |
| 34025 | |
| 34026 | for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) { |
| 34027 | CallSiteNumToLPad[CSI].push_back(&MBB); |
| 34028 | MaxCSNum = std::max(MaxCSNum, CSI); |
| 34029 | } |
| 34030 | } |
| 34031 | |
| 34032 | |
| 34033 | std::vector<MachineBasicBlock *> LPadList; |
| 34034 | SmallPtrSet<MachineBasicBlock *, 32> InvokeBBs; |
| 34035 | LPadList.reserve(CallSiteNumToLPad.size()); |
| 34036 | |
| 34037 | for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) { |
| 34038 | for (auto &LP : CallSiteNumToLPad[CSI]) { |
| 34039 | LPadList.push_back(LP); |
| 34040 | InvokeBBs.insert(LP->pred_begin(), LP->pred_end()); |
| 34041 | } |
| 34042 | } |
| 34043 | |
| 34044 | assert(!LPadList.empty() && |
| 34045 | "No landing pad destinations for the dispatch jump table!"); |
| 34046 | |
| 34047 | |
| 34048 | |
| 34049 | |
| 34050 | MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock(); |
| 34051 | DispatchBB->setIsEHPad(true); |
| 34052 | |
| 34053 | MachineBasicBlock *TrapBB = MF->CreateMachineBasicBlock(); |
| 34054 | BuildMI(TrapBB, DL, TII->get(X86::TRAP)); |
| 34055 | DispatchBB->addSuccessor(TrapBB); |
| 34056 | |
| 34057 | MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock(); |
| 34058 | DispatchBB->addSuccessor(DispContBB); |
| 34059 | |
| 34060 | |
| 34061 | MF->push_back(DispatchBB); |
| 34062 | MF->push_back(DispContBB); |
| 34063 | MF->push_back(TrapBB); |
| 34064 | |
| 34065 | |
| 34066 | |
| 34067 | SetupEntryBlockForSjLj(MI, BB, DispatchBB, FI); |
| 34068 | |
| 34069 | |
| 34070 | unsigned JTE = getJumpTableEncoding(); |
| 34071 | MachineJumpTableInfo *JTI = MF->getOrCreateJumpTableInfo(JTE); |
| 34072 | unsigned MJTI = JTI->createJumpTableIndex(LPadList); |
| 34073 | |
| 34074 | const X86RegisterInfo &RI = TII->getRegisterInfo(); |
| 34075 | |
| 34076 | |
| 34077 | if (RI.hasBasePointer(*MF)) { |
| 34078 | const bool FPIs64Bit = |
| 34079 | Subtarget.isTarget64BitLP64() || Subtarget.isTargetNaCl64(); |
| 34080 | X86MachineFunctionInfo *MFI = MF->getInfo<X86MachineFunctionInfo>(); |
| 34081 | MFI->setRestoreBasePointer(MF); |
| 34082 | |
| 34083 | Register FP = RI.getFrameRegister(*MF); |
| 34084 | Register BP = RI.getBaseRegister(); |
| 34085 | unsigned Op = FPIs64Bit ? X86::MOV64rm : X86::MOV32rm; |
| 34086 | addRegOffset(BuildMI(DispatchBB, DL, TII->get(Op), BP), FP, true, |
| 34087 | MFI->getRestoreBasePointerOffset()) |
| 34088 | .addRegMask(RI.getNoPreservedMask()); |
| 34089 | } else { |
| 34090 | BuildMI(DispatchBB, DL, TII->get(X86::NOOP)) |
| 34091 | .addRegMask(RI.getNoPreservedMask()); |
| 34092 | } |
| 34093 | |
| 34094 | |
| 34095 | Register IReg = MRI->createVirtualRegister(&X86::GR32_NOSPRegClass); |
| 34096 | addFrameReference(BuildMI(DispatchBB, DL, TII->get(X86::MOV32rm), IReg), FI, |
| 34097 | Subtarget.is64Bit() ? 8 : 4); |
| 34098 | BuildMI(DispatchBB, DL, TII->get(X86::CMP32ri)) |
| 34099 | .addReg(IReg) |
| 34100 | .addImm(LPadList.size()); |
| 34101 | BuildMI(DispatchBB, DL, TII->get(X86::JCC_1)).addMBB(TrapBB).addImm(X86::COND_AE); |
| 34102 | |
| 34103 | if (Subtarget.is64Bit()) { |
| 34104 | Register BReg = MRI->createVirtualRegister(&X86::GR64RegClass); |
| 34105 | Register IReg64 = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass); |
| 34106 | |
| 34107 | |
| 34108 | BuildMI(DispContBB, DL, TII->get(X86::LEA64r), BReg) |
| 34109 | .addReg(X86::RIP) |
| 34110 | .addImm(1) |
| 34111 | .addReg(0) |
| 34112 | .addJumpTableIndex(MJTI) |
| 34113 | .addReg(0); |
| 34114 | |
| 34115 | BuildMI(DispContBB, DL, TII->get(TargetOpcode::SUBREG_TO_REG), IReg64) |
| 34116 | .addImm(0) |
| 34117 | .addReg(IReg) |
| 34118 | .addImm(X86::sub_32bit); |
| 34119 | |
| 34120 | switch (JTE) { |
| 34121 | case MachineJumpTableInfo::EK_BlockAddress: |
| 34122 | |
| 34123 | BuildMI(DispContBB, DL, TII->get(X86::JMP64m)) |
| 34124 | .addReg(BReg) |
| 34125 | .addImm(8) |
| 34126 | .addReg(IReg64) |
| 34127 | .addImm(0) |
| 34128 | .addReg(0); |
| 34129 | break; |
| 34130 | case MachineJumpTableInfo::EK_LabelDifference32: { |
| 34131 | Register OReg = MRI->createVirtualRegister(&X86::GR32RegClass); |
| 34132 | Register OReg64 = MRI->createVirtualRegister(&X86::GR64RegClass); |
| 34133 | Register TReg = MRI->createVirtualRegister(&X86::GR64RegClass); |
| 34134 | |
| 34135 | |
| 34136 | BuildMI(DispContBB, DL, TII->get(X86::MOV32rm), OReg) |
| 34137 | .addReg(BReg) |
| 34138 | .addImm(4) |
| 34139 | .addReg(IReg64) |
| 34140 | .addImm(0) |
| 34141 | .addReg(0); |
| 34142 | |
| 34143 | BuildMI(DispContBB, DL, TII->get(X86::MOVSX64rr32), OReg64).addReg(OReg); |
| 34144 | |
| 34145 | BuildMI(DispContBB, DL, TII->get(X86::ADD64rr), TReg) |
| 34146 | .addReg(OReg64) |
| 34147 | .addReg(BReg); |
| 34148 | |
| 34149 | BuildMI(DispContBB, DL, TII->get(X86::JMP64r)).addReg(TReg); |
| 34150 | break; |
| 34151 | } |
| 34152 | default: |
| 34153 | llvm_unreachable("Unexpected jump table encoding"); |
| 34154 | } |
| 34155 | } else { |
| 34156 | |
| 34157 | BuildMI(DispContBB, DL, TII->get(X86::JMP32m)) |
| 34158 | .addReg(0) |
| 34159 | .addImm(4) |
| 34160 | .addReg(IReg) |
| 34161 | .addJumpTableIndex(MJTI) |
| 34162 | .addReg(0); |
| 34163 | } |
| 34164 | |
| 34165 | |
| 34166 | SmallPtrSet<MachineBasicBlock *, 8> SeenMBBs; |
| 34167 | for (auto &LP : LPadList) |
| 34168 | if (SeenMBBs.insert(LP).second) |
| 34169 | DispContBB->addSuccessor(LP); |
| 34170 | |
| 34171 | |
| 34172 | SmallVector<MachineBasicBlock *, 64> MBBLPads; |
| 34173 | const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs(); |
| 34174 | for (MachineBasicBlock *MBB : InvokeBBs) { |
| 34175 | |
| 34176 | |
| 34177 | |
| 34178 | SmallVector<MachineBasicBlock *, 8> Successors(MBB->succ_rbegin(), |
| 34179 | MBB->succ_rend()); |
| 34180 | |
| 34181 | for (auto MBBS : Successors) { |
| 34182 | if (MBBS->isEHPad()) { |
| 34183 | MBB->removeSuccessor(MBBS); |
| 34184 | MBBLPads.push_back(MBBS); |
| 34185 | } |
| 34186 | } |
| 34187 | |
| 34188 | MBB->addSuccessor(DispatchBB); |
| 34189 | |
| 34190 | |
| 34191 | |
| 34192 | |
| 34193 | |
| 34194 | for (auto &II : reverse(*MBB)) { |
| 34195 | if (!II.isCall()) |
| 34196 | continue; |
| 34197 | |
| 34198 | DenseMap<unsigned, bool> DefRegs; |
| 34199 | for (auto &MOp : II.operands()) |
| 34200 | if (MOp.isReg()) |
| 34201 | DefRegs[MOp.getReg()] = true; |
| 34202 | |
| 34203 | MachineInstrBuilder MIB(*MF, &II); |
| 34204 | for (unsigned RegIdx = 0; SavedRegs[RegIdx]; ++RegIdx) { |
| 34205 | unsigned Reg = SavedRegs[RegIdx]; |
| 34206 | if (!DefRegs[Reg]) |
| 34207 | MIB.addReg(Reg, RegState::ImplicitDefine | RegState::Dead); |
| 34208 | } |
| 34209 | |
| 34210 | break; |
| 34211 | } |
| 34212 | } |
| 34213 | |
| 34214 | |
| 34215 | |
| 34216 | for (auto &LP : MBBLPads) |
| 34217 | LP->setIsEHPad(false); |
| 34218 | |
| 34219 | |
| 34220 | MI.eraseFromParent(); |
| 34221 | return BB; |
| 34222 | } |
| 34223 | |
| 34224 | MachineBasicBlock * |
| 34225 | X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI, |
| 34226 | MachineBasicBlock *BB) const { |
| 34227 | MachineFunction *MF = BB->getParent(); |
| 34228 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 34229 | const DebugLoc &DL = MI.getDebugLoc(); |
| 34230 | |
| 34231 | auto TMMImmToTMMReg = [](unsigned Imm) { |
| 34232 | assert (Imm < 8 && "Illegal tmm index"); |
| 34233 | return X86::TMM0 + Imm; |
| 34234 | }; |
| 34235 | switch (MI.getOpcode()) { |
| 34236 | default: llvm_unreachable("Unexpected instr type to insert"); |
| 34237 | case X86::TLS_addr32: |
| 34238 | case X86::TLS_addr64: |
| 34239 | case X86::TLS_addrX32: |
| 34240 | case X86::TLS_base_addr32: |
| 34241 | case X86::TLS_base_addr64: |
| 34242 | case X86::TLS_base_addrX32: |
| 34243 | return EmitLoweredTLSAddr(MI, BB); |
| 34244 | case X86::INDIRECT_THUNK_CALL32: |
| 34245 | case X86::INDIRECT_THUNK_CALL64: |
| 34246 | case X86::INDIRECT_THUNK_TCRETURN32: |
| 34247 | case X86::INDIRECT_THUNK_TCRETURN64: |
| 34248 | return EmitLoweredIndirectThunk(MI, BB); |
| 34249 | case X86::CATCHRET: |
| 34250 | return EmitLoweredCatchRet(MI, BB); |
| 34251 | case X86::SEG_ALLOCA_32: |
| 34252 | case X86::SEG_ALLOCA_64: |
| 34253 | return EmitLoweredSegAlloca(MI, BB); |
| 34254 | case X86::PROBED_ALLOCA_32: |
| 34255 | case X86::PROBED_ALLOCA_64: |
| 34256 | return EmitLoweredProbedAlloca(MI, BB); |
| 34257 | case X86::TLSCall_32: |
| 34258 | case X86::TLSCall_64: |
| 34259 | return EmitLoweredTLSCall(MI, BB); |
| 34260 | case X86::CMOV_FR32: |
| 34261 | case X86::CMOV_FR32X: |
| 34262 | case X86::CMOV_FR64: |
| 34263 | case X86::CMOV_FR64X: |
| 34264 | case X86::CMOV_GR8: |
| 34265 | case X86::CMOV_GR16: |
| 34266 | case X86::CMOV_GR32: |
| 34267 | case X86::CMOV_RFP32: |
| 34268 | case X86::CMOV_RFP64: |
| 34269 | case X86::CMOV_RFP80: |
| 34270 | case X86::CMOV_VR64: |
| 34271 | case X86::CMOV_VR128: |
| 34272 | case X86::CMOV_VR128X: |
| 34273 | case X86::CMOV_VR256: |
| 34274 | case X86::CMOV_VR256X: |
| 34275 | case X86::CMOV_VR512: |
| 34276 | case X86::CMOV_VK1: |
| 34277 | case X86::CMOV_VK2: |
| 34278 | case X86::CMOV_VK4: |
| 34279 | case X86::CMOV_VK8: |
| 34280 | case X86::CMOV_VK16: |
| 34281 | case X86::CMOV_VK32: |
| 34282 | case X86::CMOV_VK64: |
| 34283 | return EmitLoweredSelect(MI, BB); |
| 34284 | |
| 34285 | case X86::RDFLAGS32: |
| 34286 | case X86::RDFLAGS64: { |
| 34287 | unsigned PushF = |
| 34288 | MI.getOpcode() == X86::RDFLAGS32 ? X86::PUSHF32 : X86::PUSHF64; |
| 34289 | unsigned Pop = MI.getOpcode() == X86::RDFLAGS32 ? X86::POP32r : X86::POP64r; |
| 34290 | MachineInstr *Push = BuildMI(*BB, MI, DL, TII->get(PushF)); |
| 34291 | |
| 34292 | |
| 34293 | |
| 34294 | |
| 34295 | assert(Push->getOperand(2).getReg() == X86::EFLAGS && |
| 34296 | "Unexpected register in operand!"); |
| 34297 | Push->getOperand(2).setIsUndef(); |
| 34298 | assert(Push->getOperand(3).getReg() == X86::DF && |
| 34299 | "Unexpected register in operand!"); |
| 34300 | Push->getOperand(3).setIsUndef(); |
| 34301 | BuildMI(*BB, MI, DL, TII->get(Pop), MI.getOperand(0).getReg()); |
| 34302 | |
| 34303 | MI.eraseFromParent(); |
| 34304 | return BB; |
| 34305 | } |
| 34306 | |
| 34307 | case X86::WRFLAGS32: |
| 34308 | case X86::WRFLAGS64: { |
| 34309 | unsigned Push = |
| 34310 | MI.getOpcode() == X86::WRFLAGS32 ? X86::PUSH32r : X86::PUSH64r; |
| 34311 | unsigned PopF = |
| 34312 | MI.getOpcode() == X86::WRFLAGS32 ? X86::POPF32 : X86::POPF64; |
| 34313 | BuildMI(*BB, MI, DL, TII->get(Push)).addReg(MI.getOperand(0).getReg()); |
| 34314 | BuildMI(*BB, MI, DL, TII->get(PopF)); |
| 34315 | |
| 34316 | MI.eraseFromParent(); |
| 34317 | return BB; |
| 34318 | } |
| 34319 | |
| 34320 | case X86::FP32_TO_INT16_IN_MEM: |
| 34321 | case X86::FP32_TO_INT32_IN_MEM: |
| 34322 | case X86::FP32_TO_INT64_IN_MEM: |
| 34323 | case X86::FP64_TO_INT16_IN_MEM: |
| 34324 | case X86::FP64_TO_INT32_IN_MEM: |
| 34325 | case X86::FP64_TO_INT64_IN_MEM: |
| 34326 | case X86::FP80_TO_INT16_IN_MEM: |
| 34327 | case X86::FP80_TO_INT32_IN_MEM: |
| 34328 | case X86::FP80_TO_INT64_IN_MEM: { |
| 34329 | |
| 34330 | |
| 34331 | int OrigCWFrameIdx = |
| 34332 | MF->getFrameInfo().CreateStackObject(2, Align(2), false); |
| 34333 | addFrameReference(BuildMI(*BB, MI, DL, |
| 34334 | TII->get(X86::FNSTCW16m)), OrigCWFrameIdx); |
| 34335 | |
| 34336 | |
| 34337 | Register OldCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); |
| 34338 | addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOVZX32rm16), OldCW), |
| 34339 | OrigCWFrameIdx); |
| 34340 | |
| 34341 | |
| 34342 | Register NewCW = MF->getRegInfo().createVirtualRegister(&X86::GR32RegClass); |
| 34343 | BuildMI(*BB, MI, DL, TII->get(X86::OR32ri), NewCW) |
| 34344 | .addReg(OldCW, RegState::Kill).addImm(0xC00); |
| 34345 | |
| 34346 | |
| 34347 | Register NewCW16 = |
| 34348 | MF->getRegInfo().createVirtualRegister(&X86::GR16RegClass); |
| 34349 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), NewCW16) |
| 34350 | .addReg(NewCW, RegState::Kill, X86::sub_16bit); |
| 34351 | |
| 34352 | |
| 34353 | int NewCWFrameIdx = |
| 34354 | MF->getFrameInfo().CreateStackObject(2, Align(2), false); |
| 34355 | addFrameReference(BuildMI(*BB, MI, DL, TII->get(X86::MOV16mr)), |
| 34356 | NewCWFrameIdx) |
| 34357 | .addReg(NewCW16, RegState::Kill); |
| 34358 | |
| 34359 | |
| 34360 | addFrameReference(BuildMI(*BB, MI, DL, |
| 34361 | TII->get(X86::FLDCW16m)), NewCWFrameIdx); |
| 34362 | |
| 34363 | |
| 34364 | unsigned Opc; |
| 34365 | switch (MI.getOpcode()) { |
| 34366 | default: llvm_unreachable("illegal opcode!"); |
| 34367 | case X86::FP32_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m32; break; |
| 34368 | case X86::FP32_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m32; break; |
| 34369 | case X86::FP32_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m32; break; |
| 34370 | case X86::FP64_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m64; break; |
| 34371 | case X86::FP64_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m64; break; |
| 34372 | case X86::FP64_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m64; break; |
| 34373 | case X86::FP80_TO_INT16_IN_MEM: Opc = X86::IST_Fp16m80; break; |
| 34374 | case X86::FP80_TO_INT32_IN_MEM: Opc = X86::IST_Fp32m80; break; |
| 34375 | case X86::FP80_TO_INT64_IN_MEM: Opc = X86::IST_Fp64m80; break; |
| 34376 | } |
| 34377 | |
| 34378 | X86AddressMode AM = getAddressFromInstr(&MI, 0); |
| 34379 | addFullAddress(BuildMI(*BB, MI, DL, TII->get(Opc)), AM) |
| 34380 | .addReg(MI.getOperand(X86::AddrNumOperands).getReg()); |
| 34381 | |
| 34382 | |
| 34383 | addFrameReference(BuildMI(*BB, MI, DL, |
| 34384 | TII->get(X86::FLDCW16m)), OrigCWFrameIdx); |
| 34385 | |
| 34386 | MI.eraseFromParent(); |
| 34387 | return BB; |
| 34388 | } |
| 34389 | |
| 34390 | |
| 34391 | case X86::XBEGIN: |
| 34392 | return emitXBegin(MI, BB, Subtarget.getInstrInfo()); |
| 34393 | |
| 34394 | case X86::VAARG_64: |
| 34395 | case X86::VAARG_X32: |
| 34396 | return EmitVAARGWithCustomInserter(MI, BB); |
| 34397 | |
| 34398 | case X86::EH_SjLj_SetJmp32: |
| 34399 | case X86::EH_SjLj_SetJmp64: |
| 34400 | return emitEHSjLjSetJmp(MI, BB); |
| 34401 | |
| 34402 | case X86::EH_SjLj_LongJmp32: |
| 34403 | case X86::EH_SjLj_LongJmp64: |
| 34404 | return emitEHSjLjLongJmp(MI, BB); |
| 34405 | |
| 34406 | case X86::Int_eh_sjlj_setup_dispatch: |
| 34407 | return EmitSjLjDispatchBlock(MI, BB); |
| 34408 | |
| 34409 | case TargetOpcode::STATEPOINT: |
| 34410 | |
| 34411 | |
| 34412 | return emitPatchPoint(MI, BB); |
| 34413 | |
| 34414 | case TargetOpcode::STACKMAP: |
| 34415 | case TargetOpcode::PATCHPOINT: |
| 34416 | return emitPatchPoint(MI, BB); |
| 34417 | |
| 34418 | case TargetOpcode::PATCHABLE_EVENT_CALL: |
| 34419 | case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL: |
| 34420 | return BB; |
| 34421 | |
| 34422 | case X86::LCMPXCHG8B: { |
| 34423 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 34424 | |
| 34425 | |
| 34426 | |
| 34427 | |
| 34428 | |
| 34429 | |
| 34430 | |
| 34431 | |
| 34432 | |
| 34433 | |
| 34434 | |
| 34435 | if (!Subtarget.is32Bit() || !TRI->hasBasePointer(*MF)) |
| 34436 | return BB; |
| 34437 | |
| 34438 | |
| 34439 | |
| 34440 | |
| 34441 | |
| 34442 | assert(TRI->getBaseRegister() == X86::ESI && |
| 34443 | "LCMPXCHG8B custom insertion for i686 is written with X86::ESI as a " |
| 34444 | "base pointer in mind"); |
| 34445 | |
| 34446 | MachineRegisterInfo &MRI = MF->getRegInfo(); |
| 34447 | MVT SPTy = getPointerTy(MF->getDataLayout()); |
| 34448 | const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); |
| 34449 | Register computedAddrVReg = MRI.createVirtualRegister(AddrRegClass); |
| 34450 | |
| 34451 | X86AddressMode AM = getAddressFromInstr(&MI, 0); |
| 34452 | |
| 34453 | |
| 34454 | if (AM.IndexReg == X86::NoRegister) |
| 34455 | return BB; |
| 34456 | |
| 34457 | |
| 34458 | |
| 34459 | |
| 34460 | MachineBasicBlock::reverse_iterator RMBBI(MI.getReverseIterator()); |
| 34461 | while (RMBBI != BB->rend() && (RMBBI->definesRegister(X86::EAX) || |
| 34462 | RMBBI->definesRegister(X86::EBX) || |
| 34463 | RMBBI->definesRegister(X86::ECX) || |
| 34464 | RMBBI->definesRegister(X86::EDX))) { |
| 34465 | ++RMBBI; |
| 34466 | } |
| 34467 | MachineBasicBlock::iterator MBBI(RMBBI); |
| 34468 | addFullAddress( |
| 34469 | BuildMI(*BB, *MBBI, DL, TII->get(X86::LEA32r), computedAddrVReg), AM); |
| 34470 | |
| 34471 | setDirectAddressInInstr(&MI, 0, computedAddrVReg); |
| 34472 | |
| 34473 | return BB; |
| 34474 | } |
| 34475 | case X86::LCMPXCHG16B_NO_RBX: { |
| 34476 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 34477 | Register BasePtr = TRI->getBaseRegister(); |
| 34478 | if (TRI->hasBasePointer(*MF) && |
| 34479 | (BasePtr == X86::RBX || BasePtr == X86::EBX)) { |
| 34480 | if (!BB->isLiveIn(BasePtr)) |
| 34481 | BB->addLiveIn(BasePtr); |
| 34482 | |
| 34483 | Register SaveRBX = |
| 34484 | MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
| 34485 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) |
| 34486 | .addReg(X86::RBX); |
| 34487 | Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
| 34488 | MachineInstrBuilder MIB = |
| 34489 | BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B_SAVE_RBX), Dst); |
| 34490 | for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) |
| 34491 | MIB.add(MI.getOperand(Idx)); |
| 34492 | MIB.add(MI.getOperand(X86::AddrNumOperands)); |
| 34493 | MIB.addReg(SaveRBX); |
| 34494 | } else { |
| 34495 | |
| 34496 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::RBX) |
| 34497 | .add(MI.getOperand(X86::AddrNumOperands)); |
| 34498 | MachineInstrBuilder MIB = |
| 34499 | BuildMI(*BB, MI, DL, TII->get(X86::LCMPXCHG16B)); |
| 34500 | for (unsigned Idx = 0; Idx < X86::AddrNumOperands; ++Idx) |
| 34501 | MIB.add(MI.getOperand(Idx)); |
| 34502 | } |
| 34503 | MI.eraseFromParent(); |
| 34504 | return BB; |
| 34505 | } |
| 34506 | case X86::MWAITX: { |
| 34507 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 34508 | Register BasePtr = TRI->getBaseRegister(); |
| 34509 | bool IsRBX = (BasePtr == X86::RBX || BasePtr == X86::EBX); |
| 34510 | |
| 34511 | |
| 34512 | if (!IsRBX || !TRI->hasBasePointer(*MF)) { |
| 34513 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) |
| 34514 | .addReg(MI.getOperand(0).getReg()); |
| 34515 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) |
| 34516 | .addReg(MI.getOperand(1).getReg()); |
| 34517 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EBX) |
| 34518 | .addReg(MI.getOperand(2).getReg()); |
| 34519 | BuildMI(*BB, MI, DL, TII->get(X86::MWAITXrrr)); |
| 34520 | MI.eraseFromParent(); |
| 34521 | } else { |
| 34522 | if (!BB->isLiveIn(BasePtr)) { |
| 34523 | BB->addLiveIn(BasePtr); |
| 34524 | } |
| 34525 | |
| 34526 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::ECX) |
| 34527 | .addReg(MI.getOperand(0).getReg()); |
| 34528 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), X86::EAX) |
| 34529 | .addReg(MI.getOperand(1).getReg()); |
| 34530 | assert(Subtarget.is64Bit() && "Expected 64-bit mode!"); |
| 34531 | |
| 34532 | Register SaveRBX = |
| 34533 | MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
| 34534 | BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), SaveRBX) |
| 34535 | .addReg(X86::RBX); |
| 34536 | |
| 34537 | Register Dst = MF->getRegInfo().createVirtualRegister(&X86::GR64RegClass); |
| 34538 | BuildMI(*BB, MI, DL, TII->get(X86::MWAITX_SAVE_RBX)) |
| 34539 | .addDef(Dst) |
| 34540 | .addReg(MI.getOperand(2).getReg()) |
| 34541 | .addUse(SaveRBX); |
| 34542 | MI.eraseFromParent(); |
| 34543 | } |
| 34544 | return BB; |
| 34545 | } |
| 34546 | case TargetOpcode::PREALLOCATED_SETUP: { |
| 34547 | assert(Subtarget.is32Bit() && "preallocated only used in 32-bit"); |
| 34548 | auto MFI = MF->getInfo<X86MachineFunctionInfo>(); |
| 34549 | MFI->setHasPreallocatedCall(true); |
| 34550 | int64_t PreallocatedId = MI.getOperand(0).getImm(); |
| 34551 | size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId); |
| 34552 | assert(StackAdjustment != 0 && "0 stack adjustment"); |
| 34553 | LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment " |
| 34554 | << StackAdjustment << "\n"); |
| 34555 | BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP) |
| 34556 | .addReg(X86::ESP) |
| 34557 | .addImm(StackAdjustment); |
| 34558 | MI.eraseFromParent(); |
| 34559 | return BB; |
| 34560 | } |
| 34561 | case TargetOpcode::PREALLOCATED_ARG: { |
| 34562 | assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit"); |
| 34563 | int64_t PreallocatedId = MI.getOperand(1).getImm(); |
| 34564 | int64_t ArgIdx = MI.getOperand(2).getImm(); |
| 34565 | auto MFI = MF->getInfo<X86MachineFunctionInfo>(); |
| 34566 | size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx]; |
| 34567 | LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx |
| 34568 | << ", arg offset " << ArgOffset << "\n"); |
| 34569 | |
| 34570 | addRegOffset( |
| 34571 | BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()), |
| 34572 | X86::ESP, false, ArgOffset); |
| 34573 | MI.eraseFromParent(); |
| 34574 | return BB; |
| 34575 | } |
| 34576 | case X86::PTDPBSSD: |
| 34577 | case X86::PTDPBSUD: |
| 34578 | case X86::PTDPBUSD: |
| 34579 | case X86::PTDPBUUD: |
| 34580 | case X86::PTDPBF16PS: { |
| 34581 | unsigned Opc; |
| 34582 | switch (MI.getOpcode()) { |
| 34583 | case X86::PTDPBSSD: Opc = X86::TDPBSSD; break; |
| 34584 | case X86::PTDPBSUD: Opc = X86::TDPBSUD; break; |
| 34585 | case X86::PTDPBUSD: Opc = X86::TDPBUSD; break; |
| 34586 | case X86::PTDPBUUD: Opc = X86::TDPBUUD; break; |
| 34587 | case X86::PTDPBF16PS: Opc = X86::TDPBF16PS; break; |
| 34588 | } |
| 34589 | |
| 34590 | MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); |
| 34591 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Define); |
| 34592 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(0).getImm()), RegState::Undef); |
| 34593 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(1).getImm()), RegState::Undef); |
| 34594 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(2).getImm()), RegState::Undef); |
| 34595 | |
| 34596 | MI.eraseFromParent(); |
| 34597 | return BB; |
| 34598 | } |
| 34599 | case X86::PTILEZERO: { |
| 34600 | unsigned Imm = MI.getOperand(0).getImm(); |
| 34601 | BuildMI(*BB, MI, DL, TII->get(X86::TILEZERO), TMMImmToTMMReg(Imm)); |
| 34602 | MI.eraseFromParent(); |
| 34603 | return BB; |
| 34604 | } |
| 34605 | case X86::PTILELOADD: |
| 34606 | case X86::PTILELOADDT1: |
| 34607 | case X86::PTILESTORED: { |
| 34608 | unsigned Opc; |
| 34609 | switch (MI.getOpcode()) { |
| 34610 | case X86::PTILELOADD: Opc = X86::TILELOADD; break; |
| 34611 | case X86::PTILELOADDT1: Opc = X86::TILELOADDT1; break; |
| 34612 | case X86::PTILESTORED: Opc = X86::TILESTORED; break; |
| 34613 | } |
| 34614 | |
| 34615 | MachineInstrBuilder MIB = BuildMI(*BB, MI, DL, TII->get(Opc)); |
| 34616 | unsigned CurOp = 0; |
| 34617 | if (Opc != X86::TILESTORED) |
| 34618 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()), |
| 34619 | RegState::Define); |
| 34620 | |
| 34621 | MIB.add(MI.getOperand(CurOp++)); |
| 34622 | MIB.add(MI.getOperand(CurOp++)); |
| 34623 | MIB.add(MI.getOperand(CurOp++)); |
| 34624 | MIB.add(MI.getOperand(CurOp++)); |
| 34625 | MIB.add(MI.getOperand(CurOp++)); |
| 34626 | |
| 34627 | if (Opc == X86::TILESTORED) |
| 34628 | MIB.addReg(TMMImmToTMMReg(MI.getOperand(CurOp++).getImm()), |
| 34629 | RegState::Undef); |
| 34630 | |
| 34631 | MI.eraseFromParent(); |
| 34632 | return BB; |
| 34633 | } |
| 34634 | } |
| 34635 | } |
| 34636 | |
| 34637 | |
| 34638 | |
| 34639 | |
| 34640 | |
| 34641 | bool |
| 34642 | X86TargetLowering::targetShrinkDemandedConstant(SDValue Op, |
| 34643 | const APInt &DemandedBits, |
| 34644 | const APInt &DemandedElts, |
| 34645 | TargetLoweringOpt &TLO) const { |
| 34646 | EVT VT = Op.getValueType(); |
| 34647 | unsigned Opcode = Op.getOpcode(); |
| 34648 | unsigned EltSize = VT.getScalarSizeInBits(); |
| 34649 | |
| 34650 | if (VT.isVector()) { |
| 34651 | |
| 34652 | |
| 34653 | |
| 34654 | auto NeedsSignExtension = [&](SDValue V, unsigned ActiveBits) { |
| 34655 | if (!ISD::isBuildVectorOfConstantSDNodes(V.getNode())) |
| 34656 | return false; |
| 34657 | for (unsigned i = 0, e = V.getNumOperands(); i != e; ++i) { |
| 34658 | if (!DemandedElts[i] || V.getOperand(i).isUndef()) |
| 34659 | continue; |
| 34660 | const APInt &Val = V.getConstantOperandAPInt(i); |
| 34661 | if (Val.getBitWidth() > Val.getNumSignBits() && |
| 34662 | Val.trunc(ActiveBits).getNumSignBits() == ActiveBits) |
| 34663 | return true; |
| 34664 | } |
| 34665 | return false; |
| 34666 | }; |
| 34667 | |
| 34668 | |
| 34669 | unsigned ActiveBits = DemandedBits.getActiveBits(); |
| 34670 | if (EltSize > ActiveBits && EltSize > 1 && isTypeLegal(VT) && |
| 34671 | (Opcode == ISD::OR || Opcode == ISD::XOR) && |
| 34672 | NeedsSignExtension(Op.getOperand(1), ActiveBits)) { |
| 34673 | EVT ExtSVT = EVT::getIntegerVT(*TLO.DAG.getContext(), ActiveBits); |
| 34674 | EVT ExtVT = EVT::getVectorVT(*TLO.DAG.getContext(), ExtSVT, |
| 34675 | VT.getVectorNumElements()); |
| 34676 | SDValue NewC = |
| 34677 | TLO.DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(Op), VT, |
| 34678 | Op.getOperand(1), TLO.DAG.getValueType(ExtVT)); |
| 34679 | SDValue NewOp = |
| 34680 | TLO.DAG.getNode(Opcode, SDLoc(Op), VT, Op.getOperand(0), NewC); |
| 34681 | return TLO.CombineTo(Op, NewOp); |
| 34682 | } |
| 34683 | return false; |
| 34684 | } |
| 34685 | |
| 34686 | |
| 34687 | |
| 34688 | if (Opcode != ISD::AND) |
| 34689 | return false; |
| 34690 | |
| 34691 | |
| 34692 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
| 34693 | if (!C) |
| 34694 | return false; |
| 34695 | |
| 34696 | const APInt &Mask = C->getAPIntValue(); |
| 34697 | |
| 34698 | |
| 34699 | APInt ShrunkMask = Mask & DemandedBits; |
| 34700 | |
| 34701 | |
| 34702 | unsigned Width = ShrunkMask.getActiveBits(); |
| 34703 | |
| 34704 | |
| 34705 | if (Width == 0) |
| 34706 | return false; |
| 34707 | |
| 34708 | |
| 34709 | Width = PowerOf2Ceil(std::max(Width, 8U)); |
| 34710 | |
| 34711 | Width = std::min(Width, EltSize); |
| 34712 | |
| 34713 | |
| 34714 | APInt ZeroExtendMask = APInt::getLowBitsSet(EltSize, Width); |
| 34715 | |
| 34716 | |
| 34717 | |
| 34718 | if (ZeroExtendMask == Mask) |
| 34719 | return true; |
| 34720 | |
| 34721 | |
| 34722 | |
| 34723 | if (!ZeroExtendMask.isSubsetOf(Mask | ~DemandedBits)) |
| 34724 | return false; |
| 34725 | |
| 34726 | |
| 34727 | SDLoc DL(Op); |
| 34728 | SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); |
| 34729 | SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); |
| 34730 | return TLO.CombineTo(Op, NewOp); |
| 34731 | } |
| 34732 | |
| 34733 | void X86TargetLowering::computeKnownBitsForTargetNode(const SDValue Op, |
| 34734 | KnownBits &Known, |
| 34735 | const APInt &DemandedElts, |
| 34736 | const SelectionDAG &DAG, |
| 34737 | unsigned Depth) const { |
| 34738 | unsigned BitWidth = Known.getBitWidth(); |
| 34739 | unsigned NumElts = DemandedElts.getBitWidth(); |
| 34740 | unsigned Opc = Op.getOpcode(); |
| 34741 | EVT VT = Op.getValueType(); |
| 34742 | assert((Opc >= ISD::BUILTIN_OP_END || |
| 34743 | Opc == ISD::INTRINSIC_WO_CHAIN || |
| 34744 | Opc == ISD::INTRINSIC_W_CHAIN || |
| 34745 | Opc == ISD::INTRINSIC_VOID) && |
| 34746 | "Should use MaskedValueIsZero if you don't know whether Op" |
| 34747 | " is a target node!"); |
| 34748 | |
| 34749 | Known.resetAll(); |
| 34750 | switch (Opc) { |
| 34751 | default: break; |
| 34752 | case X86ISD::SETCC: |
| 34753 | Known.Zero.setBitsFrom(1); |
| 34754 | break; |
| 34755 | case X86ISD::MOVMSK: { |
| 34756 | unsigned NumLoBits = Op.getOperand(0).getValueType().getVectorNumElements(); |
| 34757 | Known.Zero.setBitsFrom(NumLoBits); |
| 34758 | break; |
| 34759 | } |
| 34760 | case X86ISD::PEXTRB: |
| 34761 | case X86ISD::PEXTRW: { |
| 34762 | SDValue Src = Op.getOperand(0); |
| 34763 | EVT SrcVT = Src.getValueType(); |
| 34764 | APInt DemandedElt = APInt::getOneBitSet(SrcVT.getVectorNumElements(), |
| 34765 | Op.getConstantOperandVal(1)); |
| 34766 | Known = DAG.computeKnownBits(Src, DemandedElt, Depth + 1); |
| 34767 | Known = Known.anyextOrTrunc(BitWidth); |
| 34768 | Known.Zero.setBitsFrom(SrcVT.getScalarSizeInBits()); |
| 34769 | break; |
| 34770 | } |
| 34771 | case X86ISD::VSRAI: |
| 34772 | case X86ISD::VSHLI: |
| 34773 | case X86ISD::VSRLI: { |
| 34774 | unsigned ShAmt = Op.getConstantOperandVal(1); |
| 34775 | if (ShAmt >= VT.getScalarSizeInBits()) { |
| 34776 | Known.setAllZero(); |
| 34777 | break; |
| 34778 | } |
| 34779 | |
| 34780 | Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
| 34781 | if (Opc == X86ISD::VSHLI) { |
| 34782 | Known.Zero <<= ShAmt; |
| 34783 | Known.One <<= ShAmt; |
| 34784 | |
| 34785 | Known.Zero.setLowBits(ShAmt); |
| 34786 | } else if (Opc == X86ISD::VSRLI) { |
| 34787 | Known.Zero.lshrInPlace(ShAmt); |
| 34788 | Known.One.lshrInPlace(ShAmt); |
| 34789 | |
| 34790 | Known.Zero.setHighBits(ShAmt); |
| 34791 | } else { |
| 34792 | Known.Zero.ashrInPlace(ShAmt); |
| 34793 | Known.One.ashrInPlace(ShAmt); |
| 34794 | } |
| 34795 | break; |
| 34796 | } |
| 34797 | case X86ISD::PACKUS: { |
| 34798 | |
| 34799 | APInt DemandedLHS, DemandedRHS; |
| 34800 | getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
| 34801 | |
| 34802 | Known.One = APInt::getAllOnesValue(BitWidth * 2); |
| 34803 | Known.Zero = APInt::getAllOnesValue(BitWidth * 2); |
| 34804 | |
| 34805 | KnownBits Known2; |
| 34806 | if (!!DemandedLHS) { |
| 34807 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
| 34808 | Known = KnownBits::commonBits(Known, Known2); |
| 34809 | } |
| 34810 | if (!!DemandedRHS) { |
| 34811 | Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
| 34812 | Known = KnownBits::commonBits(Known, Known2); |
| 34813 | } |
| 34814 | |
| 34815 | if (Known.countMinLeadingZeros() < BitWidth) |
| 34816 | Known.resetAll(); |
| 34817 | Known = Known.trunc(BitWidth); |
| 34818 | break; |
| 34819 | } |
| 34820 | case X86ISD::VBROADCAST: { |
| 34821 | SDValue Src = Op.getOperand(0); |
| 34822 | if (!Src.getSimpleValueType().isVector()) { |
| 34823 | Known = DAG.computeKnownBits(Src, Depth + 1); |
| 34824 | return; |
| 34825 | } |
| 34826 | break; |
| 34827 | } |
| 34828 | case X86ISD::ANDNP: { |
| 34829 | KnownBits Known2; |
| 34830 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
| 34831 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
| 34832 | |
| 34833 | |
| 34834 | Known.One &= Known2.Zero; |
| 34835 | Known.Zero |= Known2.One; |
| 34836 | break; |
| 34837 | } |
| 34838 | case X86ISD::FOR: { |
| 34839 | KnownBits Known2; |
| 34840 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
| 34841 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
| 34842 | |
| 34843 | Known |= Known2; |
| 34844 | break; |
| 34845 | } |
| 34846 | case X86ISD::PSADBW: { |
| 34847 | assert(VT.getScalarType() == MVT::i64 && |
| 34848 | Op.getOperand(0).getValueType().getScalarType() == MVT::i8 && |
| 34849 | "Unexpected PSADBW types"); |
| 34850 | |
| 34851 | |
| 34852 | Known.Zero.setBitsFrom(16); |
| 34853 | break; |
| 34854 | } |
| 34855 | case X86ISD::PMULUDQ: { |
| 34856 | KnownBits Known2; |
| 34857 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
| 34858 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
| 34859 | |
| 34860 | Known = Known.trunc(BitWidth / 2).zext(BitWidth); |
| 34861 | Known2 = Known2.trunc(BitWidth / 2).zext(BitWidth); |
| 34862 | Known = KnownBits::mul(Known, Known2); |
| 34863 | break; |
| 34864 | } |
| 34865 | case X86ISD::CMOV: { |
| 34866 | Known = DAG.computeKnownBits(Op.getOperand(1), Depth + 1); |
| 34867 | |
| 34868 | if (Known.isUnknown()) |
| 34869 | break; |
| 34870 | KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); |
| 34871 | |
| 34872 | |
| 34873 | Known = KnownBits::commonBits(Known, Known2); |
| 34874 | break; |
| 34875 | } |
| 34876 | case X86ISD::BEXTR: |
| 34877 | case X86ISD::BEXTRI: { |
| 34878 | SDValue Op0 = Op.getOperand(0); |
| 34879 | SDValue Op1 = Op.getOperand(1); |
| 34880 | |
| 34881 | if (auto* Cst1 = dyn_cast<ConstantSDNode>(Op1)) { |
| 34882 | unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); |
| 34883 | unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); |
| 34884 | |
| 34885 | |
| 34886 | if (Length == 0) { |
| 34887 | Known.setAllZero(); |
| 34888 | break; |
| 34889 | } |
| 34890 | |
| 34891 | if ((Shift + Length) <= BitWidth) { |
| 34892 | Known = DAG.computeKnownBits(Op0, Depth + 1); |
| 34893 | Known = Known.extractBits(Length, Shift); |
| 34894 | Known = Known.zextOrTrunc(BitWidth); |
| 34895 | } |
| 34896 | } |
| 34897 | break; |
| 34898 | } |
| 34899 | case X86ISD::PDEP: { |
| 34900 | KnownBits Known2; |
| 34901 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
| 34902 | Known2 = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); |
| 34903 | |
| 34904 | Known.One.clearAllBits(); |
| 34905 | |
| 34906 | |
| 34907 | Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
| 34908 | break; |
| 34909 | } |
| 34910 | case X86ISD::PEXT: { |
| 34911 | Known = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); |
| 34912 | |
| 34913 | unsigned Count = Known.Zero.countPopulation(); |
| 34914 | Known.Zero = APInt::getHighBitsSet(BitWidth, Count); |
| 34915 | Known.One.clearAllBits(); |
| 34916 | break; |
| 34917 | } |
| 34918 | case X86ISD::VTRUNC: |
| 34919 | case X86ISD::VTRUNCS: |
| 34920 | case X86ISD::VTRUNCUS: |
| 34921 | case X86ISD::CVTSI2P: |
| 34922 | case X86ISD::CVTUI2P: |
| 34923 | case X86ISD::CVTP2SI: |
| 34924 | case X86ISD::CVTP2UI: |
| 34925 | case X86ISD::MCVTP2SI: |
| 34926 | case X86ISD::MCVTP2UI: |
| 34927 | case X86ISD::CVTTP2SI: |
| 34928 | case X86ISD::CVTTP2UI: |
| 34929 | case X86ISD::MCVTTP2SI: |
| 34930 | case X86ISD::MCVTTP2UI: |
| 34931 | case X86ISD::MCVTSI2P: |
| 34932 | case X86ISD::MCVTUI2P: |
| 34933 | case X86ISD::VFPROUND: |
| 34934 | case X86ISD::VMFPROUND: |
| 34935 | case X86ISD::CVTPS2PH: |
| 34936 | case X86ISD::MCVTPS2PH: { |
| 34937 | |
| 34938 | EVT SrcVT = Op.getOperand(0).getValueType(); |
| 34939 | if (SrcVT.isVector()) { |
| 34940 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 34941 | if (NumElts > NumSrcElts && |
| 34942 | DemandedElts.countTrailingZeros() >= NumSrcElts) |
| 34943 | Known.setAllZero(); |
| 34944 | } |
| 34945 | break; |
| 34946 | } |
| 34947 | case X86ISD::STRICT_CVTTP2SI: |
| 34948 | case X86ISD::STRICT_CVTTP2UI: |
| 34949 | case X86ISD::STRICT_CVTSI2P: |
| 34950 | case X86ISD::STRICT_CVTUI2P: |
| 34951 | case X86ISD::STRICT_VFPROUND: |
| 34952 | case X86ISD::STRICT_CVTPS2PH: { |
| 34953 | |
| 34954 | EVT SrcVT = Op.getOperand(1).getValueType(); |
| 34955 | if (SrcVT.isVector()) { |
| 34956 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 34957 | if (NumElts > NumSrcElts && |
| 34958 | DemandedElts.countTrailingZeros() >= NumSrcElts) |
| 34959 | Known.setAllZero(); |
| 34960 | } |
| 34961 | break; |
| 34962 | } |
| 34963 | case X86ISD::MOVQ2DQ: { |
| 34964 | |
| 34965 | if (DemandedElts.countTrailingZeros() >= (NumElts / 2)) |
| 34966 | Known.setAllZero(); |
| 34967 | break; |
| 34968 | } |
| 34969 | } |
| 34970 | |
| 34971 | |
| 34972 | |
| 34973 | if (isTargetShuffle(Opc)) { |
| 34974 | SmallVector<int, 64> Mask; |
| 34975 | SmallVector<SDValue, 2> Ops; |
| 34976 | if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) { |
| 34977 | unsigned NumOps = Ops.size(); |
| 34978 | unsigned NumElts = VT.getVectorNumElements(); |
| 34979 | if (Mask.size() == NumElts) { |
| 34980 | SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0)); |
| 34981 | Known.Zero.setAllBits(); Known.One.setAllBits(); |
| 34982 | for (unsigned i = 0; i != NumElts; ++i) { |
| 34983 | if (!DemandedElts[i]) |
| 34984 | continue; |
| 34985 | int M = Mask[i]; |
| 34986 | if (M == SM_SentinelUndef) { |
| 34987 | |
| 34988 | |
| 34989 | Known.resetAll(); |
| 34990 | break; |
| 34991 | } |
| 34992 | if (M == SM_SentinelZero) { |
| 34993 | Known.One.clearAllBits(); |
| 34994 | continue; |
| 34995 | } |
| 34996 | assert(0 <= M && (unsigned)M < (NumOps * NumElts) && |
| 34997 | "Shuffle index out of range"); |
| 34998 | |
| 34999 | unsigned OpIdx = (unsigned)M / NumElts; |
| 35000 | unsigned EltIdx = (unsigned)M % NumElts; |
| 35001 | if (Ops[OpIdx].getValueType() != VT) { |
| 35002 | |
| 35003 | Known.resetAll(); |
| 35004 | break; |
| 35005 | } |
| 35006 | DemandedOps[OpIdx].setBit(EltIdx); |
| 35007 | } |
| 35008 | |
| 35009 | for (unsigned i = 0; i != NumOps && !Known.isUnknown(); ++i) { |
| 35010 | if (!DemandedOps[i]) |
| 35011 | continue; |
| 35012 | KnownBits Known2 = |
| 35013 | DAG.computeKnownBits(Ops[i], DemandedOps[i], Depth + 1); |
| 35014 | Known = KnownBits::commonBits(Known, Known2); |
| 35015 | } |
| 35016 | } |
| 35017 | } |
| 35018 | } |
| 35019 | } |
| 35020 | |
| 35021 | unsigned X86TargetLowering::ComputeNumSignBitsForTargetNode( |
| 35022 | SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG, |
| 35023 | unsigned Depth) const { |
| 35024 | EVT VT = Op.getValueType(); |
| 35025 | unsigned VTBits = VT.getScalarSizeInBits(); |
| 35026 | unsigned Opcode = Op.getOpcode(); |
| 35027 | switch (Opcode) { |
| 35028 | case X86ISD::SETCC_CARRY: |
| 35029 | |
| 35030 | return VTBits; |
| 35031 | |
| 35032 | case X86ISD::VTRUNC: { |
| 35033 | SDValue Src = Op.getOperand(0); |
| 35034 | MVT SrcVT = Src.getSimpleValueType(); |
| 35035 | unsigned NumSrcBits = SrcVT.getScalarSizeInBits(); |
| 35036 | assert(VTBits < NumSrcBits && "Illegal truncation input type"); |
| 35037 | APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
| 35038 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedSrc, Depth + 1); |
| 35039 | if (Tmp > (NumSrcBits - VTBits)) |
| 35040 | return Tmp - (NumSrcBits - VTBits); |
| 35041 | return 1; |
| 35042 | } |
| 35043 | |
| 35044 | case X86ISD::PACKSS: { |
| 35045 | |
| 35046 | APInt DemandedLHS, DemandedRHS; |
| 35047 | getPackDemandedElts(Op.getValueType(), DemandedElts, DemandedLHS, |
| 35048 | DemandedRHS); |
| 35049 | |
| 35050 | unsigned SrcBits = Op.getOperand(0).getScalarValueSizeInBits(); |
| 35051 | unsigned Tmp0 = SrcBits, Tmp1 = SrcBits; |
| 35052 | if (!!DemandedLHS) |
| 35053 | Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), DemandedLHS, Depth + 1); |
| 35054 | if (!!DemandedRHS) |
| 35055 | Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), DemandedRHS, Depth + 1); |
| 35056 | unsigned Tmp = std::min(Tmp0, Tmp1); |
| 35057 | if (Tmp > (SrcBits - VTBits)) |
| 35058 | return Tmp - (SrcBits - VTBits); |
| 35059 | return 1; |
| 35060 | } |
| 35061 | |
| 35062 | case X86ISD::VBROADCAST: { |
| 35063 | SDValue Src = Op.getOperand(0); |
| 35064 | if (!Src.getSimpleValueType().isVector()) |
| 35065 | return DAG.ComputeNumSignBits(Src, Depth + 1); |
| 35066 | break; |
| 35067 | } |
| 35068 | |
| 35069 | case X86ISD::VSHLI: { |
| 35070 | SDValue Src = Op.getOperand(0); |
| 35071 | const APInt &ShiftVal = Op.getConstantOperandAPInt(1); |
| 35072 | if (ShiftVal.uge(VTBits)) |
| 35073 | return VTBits; |
| 35074 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); |
| 35075 | if (ShiftVal.uge(Tmp)) |
| 35076 | return 1; |
| 35077 | return Tmp - ShiftVal.getZExtValue(); |
| 35078 | } |
| 35079 | |
| 35080 | case X86ISD::VSRAI: { |
| 35081 | SDValue Src = Op.getOperand(0); |
| 35082 | APInt ShiftVal = Op.getConstantOperandAPInt(1); |
| 35083 | if (ShiftVal.uge(VTBits - 1)) |
| 35084 | return VTBits; |
| 35085 | unsigned Tmp = DAG.ComputeNumSignBits(Src, DemandedElts, Depth + 1); |
| 35086 | ShiftVal += Tmp; |
| 35087 | return ShiftVal.uge(VTBits) ? VTBits : ShiftVal.getZExtValue(); |
| 35088 | } |
| 35089 | |
| 35090 | case X86ISD::FSETCC: |
| 35091 | |
| 35092 | if (VT == MVT::f32 || VT == MVT::f64 || |
| 35093 | ((VT == MVT::v4f32 || VT == MVT::v2f64) && DemandedElts == 1)) |
| 35094 | return VTBits; |
| 35095 | break; |
| 35096 | |
| 35097 | case X86ISD::PCMPGT: |
| 35098 | case X86ISD::PCMPEQ: |
| 35099 | case X86ISD::CMPP: |
| 35100 | case X86ISD::VPCOM: |
| 35101 | case X86ISD::VPCOMU: |
| 35102 | |
| 35103 | return VTBits; |
| 35104 | |
| 35105 | case X86ISD::ANDNP: { |
| 35106 | unsigned Tmp0 = |
| 35107 | DAG.ComputeNumSignBits(Op.getOperand(0), DemandedElts, Depth + 1); |
| 35108 | if (Tmp0 == 1) return 1; |
| 35109 | unsigned Tmp1 = |
| 35110 | DAG.ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1); |
| 35111 | return std::min(Tmp0, Tmp1); |
| 35112 | } |
| 35113 | |
| 35114 | case X86ISD::CMOV: { |
| 35115 | unsigned Tmp0 = DAG.ComputeNumSignBits(Op.getOperand(0), Depth+1); |
| 35116 | if (Tmp0 == 1) return 1; |
| 35117 | unsigned Tmp1 = DAG.ComputeNumSignBits(Op.getOperand(1), Depth+1); |
| 35118 | return std::min(Tmp0, Tmp1); |
| 35119 | } |
| 35120 | } |
| 35121 | |
| 35122 | |
| 35123 | |
| 35124 | if (isTargetShuffle(Opcode)) { |
| 35125 | SmallVector<int, 64> Mask; |
| 35126 | SmallVector<SDValue, 2> Ops; |
| 35127 | if (getTargetShuffleMask(Op.getNode(), VT.getSimpleVT(), true, Ops, Mask)) { |
| 35128 | unsigned NumOps = Ops.size(); |
| 35129 | unsigned NumElts = VT.getVectorNumElements(); |
| 35130 | if (Mask.size() == NumElts) { |
| 35131 | SmallVector<APInt, 2> DemandedOps(NumOps, APInt(NumElts, 0)); |
| 35132 | for (unsigned i = 0; i != NumElts; ++i) { |
| 35133 | if (!DemandedElts[i]) |
| 35134 | continue; |
| 35135 | int M = Mask[i]; |
| 35136 | if (M == SM_SentinelUndef) { |
| 35137 | |
| 35138 | |
| 35139 | return 1; |
| 35140 | } else if (M == SM_SentinelZero) { |
| 35141 | |
| 35142 | continue; |
| 35143 | } |
| 35144 | assert(0 <= M && (unsigned)M < (NumOps * NumElts) && |
| 35145 | "Shuffle index out of range"); |
| 35146 | |
| 35147 | unsigned OpIdx = (unsigned)M / NumElts; |
| 35148 | unsigned EltIdx = (unsigned)M % NumElts; |
| 35149 | if (Ops[OpIdx].getValueType() != VT) { |
| 35150 | |
| 35151 | return 1; |
| 35152 | } |
| 35153 | DemandedOps[OpIdx].setBit(EltIdx); |
| 35154 | } |
| 35155 | unsigned Tmp0 = VTBits; |
| 35156 | for (unsigned i = 0; i != NumOps && Tmp0 > 1; ++i) { |
| 35157 | if (!DemandedOps[i]) |
| 35158 | continue; |
| 35159 | unsigned Tmp1 = |
| 35160 | DAG.ComputeNumSignBits(Ops[i], DemandedOps[i], Depth + 1); |
| 35161 | Tmp0 = std::min(Tmp0, Tmp1); |
| 35162 | } |
| 35163 | return Tmp0; |
| 35164 | } |
| 35165 | } |
| 35166 | } |
| 35167 | |
| 35168 | |
| 35169 | return 1; |
| 35170 | } |
| 35171 | |
| 35172 | SDValue X86TargetLowering::unwrapAddress(SDValue N) const { |
| 35173 | if (N->getOpcode() == X86ISD::Wrapper || N->getOpcode() == X86ISD::WrapperRIP) |
| 35174 | return N->getOperand(0); |
| 35175 | return N; |
| 35176 | } |
| 35177 | |
| 35178 | |
| 35179 | |
| 35180 | static SDValue narrowLoadToVZLoad(LoadSDNode *LN, MVT MemVT, MVT VT, |
| 35181 | SelectionDAG &DAG) { |
| 35182 | |
| 35183 | if (!LN->isSimple()) |
| 35184 | return SDValue(); |
| 35185 | |
| 35186 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 35187 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
| 35188 | return DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, SDLoc(LN), Tys, Ops, MemVT, |
| 35189 | LN->getPointerInfo(), LN->getOriginalAlign(), |
| 35190 | LN->getMemOperand()->getFlags()); |
| 35191 | } |
| 35192 | |
| 35193 | |
| 35194 | |
| 35195 | |
| 35196 | static bool matchUnaryShuffle(MVT MaskVT, ArrayRef<int> Mask, |
| 35197 | bool AllowFloatDomain, bool AllowIntDomain, |
| 35198 | SDValue &V1, const SDLoc &DL, SelectionDAG &DAG, |
| 35199 | const X86Subtarget &Subtarget, unsigned &Shuffle, |
| 35200 | MVT &SrcVT, MVT &DstVT) { |
| 35201 | unsigned NumMaskElts = Mask.size(); |
| 35202 | unsigned MaskEltSize = MaskVT.getScalarSizeInBits(); |
| 35203 | |
| 35204 | |
| 35205 | if (MaskEltSize == 32 && Mask[0] == 0) { |
| 35206 | if (isUndefOrZero(Mask[1]) && isUndefInRange(Mask, 2, NumMaskElts - 2)) { |
| 35207 | Shuffle = X86ISD::VZEXT_MOVL; |
| 35208 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
| 35209 | return true; |
| 35210 | } |
| 35211 | if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 35212 | isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { |
| 35213 | Shuffle = X86ISD::VZEXT_MOVL; |
| 35214 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
| 35215 | return true; |
| 35216 | } |
| 35217 | } |
| 35218 | |
| 35219 | |
| 35220 | |
| 35221 | if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSE41()) || |
| 35222 | (MaskVT.is256BitVector() && Subtarget.hasInt256()))) { |
| 35223 | unsigned MaxScale = 64 / MaskEltSize; |
| 35224 | for (unsigned Scale = 2; Scale <= MaxScale; Scale *= 2) { |
| 35225 | bool MatchAny = true; |
| 35226 | bool MatchZero = true; |
| 35227 | unsigned NumDstElts = NumMaskElts / Scale; |
| 35228 | for (unsigned i = 0; i != NumDstElts && (MatchAny || MatchZero); ++i) { |
| 35229 | if (!isUndefOrEqual(Mask[i * Scale], (int)i)) { |
| 35230 | MatchAny = MatchZero = false; |
| 35231 | break; |
| 35232 | } |
| 35233 | MatchAny &= isUndefInRange(Mask, (i * Scale) + 1, Scale - 1); |
| 35234 | MatchZero &= isUndefOrZeroInRange(Mask, (i * Scale) + 1, Scale - 1); |
| 35235 | } |
| 35236 | if (MatchAny || MatchZero) { |
| 35237 | assert(MatchZero && "Failed to match zext but matched aext?"); |
| 35238 | unsigned SrcSize = std::max(128u, NumDstElts * MaskEltSize); |
| 35239 | MVT ScalarTy = MaskVT.isInteger() ? MaskVT.getScalarType() : |
| 35240 | MVT::getIntegerVT(MaskEltSize); |
| 35241 | SrcVT = MVT::getVectorVT(ScalarTy, SrcSize / MaskEltSize); |
| 35242 | |
| 35243 | if (SrcVT.getSizeInBits() != MaskVT.getSizeInBits()) |
| 35244 | V1 = extractSubVector(V1, 0, DAG, DL, SrcSize); |
| 35245 | |
| 35246 | Shuffle = unsigned(MatchAny ? ISD::ANY_EXTEND : ISD::ZERO_EXTEND); |
| 35247 | if (SrcVT.getVectorNumElements() != NumDstElts) |
| 35248 | Shuffle = getOpcode_EXTEND_VECTOR_INREG(Shuffle); |
| 35249 | |
| 35250 | DstVT = MVT::getIntegerVT(Scale * MaskEltSize); |
| 35251 | DstVT = MVT::getVectorVT(DstVT, NumDstElts); |
| 35252 | return true; |
| 35253 | } |
| 35254 | } |
| 35255 | } |
| 35256 | |
| 35257 | |
| 35258 | if (((MaskEltSize == 32) || (MaskEltSize == 64 && Subtarget.hasSSE2())) && |
| 35259 | isUndefOrEqual(Mask[0], 0) && |
| 35260 | isUndefOrZeroInRange(Mask, 1, NumMaskElts - 1)) { |
| 35261 | Shuffle = X86ISD::VZEXT_MOVL; |
| 35262 | SrcVT = DstVT = !Subtarget.hasSSE2() ? MVT::v4f32 : MaskVT; |
| 35263 | return true; |
| 35264 | } |
| 35265 | |
| 35266 | |
| 35267 | |
| 35268 | |
| 35269 | if (MaskVT.is128BitVector() && Subtarget.hasSSE3() && AllowFloatDomain) { |
| 35270 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}, V1)) { |
| 35271 | Shuffle = X86ISD::MOVDDUP; |
| 35272 | SrcVT = DstVT = MVT::v2f64; |
| 35273 | return true; |
| 35274 | } |
| 35275 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { |
| 35276 | Shuffle = X86ISD::MOVSLDUP; |
| 35277 | SrcVT = DstVT = MVT::v4f32; |
| 35278 | return true; |
| 35279 | } |
| 35280 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3}, V1)) { |
| 35281 | Shuffle = X86ISD::MOVSHDUP; |
| 35282 | SrcVT = DstVT = MVT::v4f32; |
| 35283 | return true; |
| 35284 | } |
| 35285 | } |
| 35286 | |
| 35287 | if (MaskVT.is256BitVector() && AllowFloatDomain) { |
| 35288 | assert(Subtarget.hasAVX() && "AVX required for 256-bit vector shuffles"); |
| 35289 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2}, V1)) { |
| 35290 | Shuffle = X86ISD::MOVDDUP; |
| 35291 | SrcVT = DstVT = MVT::v4f64; |
| 35292 | return true; |
| 35293 | } |
| 35294 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { |
| 35295 | Shuffle = X86ISD::MOVSLDUP; |
| 35296 | SrcVT = DstVT = MVT::v8f32; |
| 35297 | return true; |
| 35298 | } |
| 35299 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1, 3, 3, 5, 5, 7, 7}, V1)) { |
| 35300 | Shuffle = X86ISD::MOVSHDUP; |
| 35301 | SrcVT = DstVT = MVT::v8f32; |
| 35302 | return true; |
| 35303 | } |
| 35304 | } |
| 35305 | |
| 35306 | if (MaskVT.is512BitVector() && AllowFloatDomain) { |
| 35307 | assert(Subtarget.hasAVX512() && |
| 35308 | "AVX512 required for 512-bit vector shuffles"); |
| 35309 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0, 2, 2, 4, 4, 6, 6}, V1)) { |
| 35310 | Shuffle = X86ISD::MOVDDUP; |
| 35311 | SrcVT = DstVT = MVT::v8f64; |
| 35312 | return true; |
| 35313 | } |
| 35314 | if (isTargetShuffleEquivalent( |
| 35315 | MaskVT, Mask, |
| 35316 | {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}, V1)) { |
| 35317 | Shuffle = X86ISD::MOVSLDUP; |
| 35318 | SrcVT = DstVT = MVT::v16f32; |
| 35319 | return true; |
| 35320 | } |
| 35321 | if (isTargetShuffleEquivalent( |
| 35322 | MaskVT, Mask, |
| 35323 | {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}, V1)) { |
| 35324 | Shuffle = X86ISD::MOVSHDUP; |
| 35325 | SrcVT = DstVT = MVT::v16f32; |
| 35326 | return true; |
| 35327 | } |
| 35328 | } |
| 35329 | |
| 35330 | return false; |
| 35331 | } |
| 35332 | |
| 35333 | |
| 35334 | |
| 35335 | |
| 35336 | static bool matchUnaryPermuteShuffle(MVT MaskVT, ArrayRef<int> Mask, |
| 35337 | const APInt &Zeroable, |
| 35338 | bool AllowFloatDomain, bool AllowIntDomain, |
| 35339 | const X86Subtarget &Subtarget, |
| 35340 | unsigned &Shuffle, MVT &ShuffleVT, |
| 35341 | unsigned &PermuteImm) { |
| 35342 | unsigned NumMaskElts = Mask.size(); |
| 35343 | unsigned InputSizeInBits = MaskVT.getSizeInBits(); |
| 35344 | unsigned MaskScalarSizeInBits = InputSizeInBits / NumMaskElts; |
| 35345 | MVT MaskEltVT = MVT::getIntegerVT(MaskScalarSizeInBits); |
| 35346 | bool ContainsZeros = isAnyZero(Mask); |
| 35347 | |
| 35348 | |
| 35349 | if (!ContainsZeros && MaskScalarSizeInBits == 64) { |
| 35350 | |
| 35351 | if (is128BitLaneCrossingShuffleMask(MaskEltVT, Mask)) { |
| 35352 | |
| 35353 | if (Subtarget.hasAVX2() && MaskVT.is256BitVector()) { |
| 35354 | Shuffle = X86ISD::VPERMI; |
| 35355 | ShuffleVT = (AllowFloatDomain ? MVT::v4f64 : MVT::v4i64); |
| 35356 | PermuteImm = getV4X86ShuffleImm(Mask); |
| 35357 | return true; |
| 35358 | } |
| 35359 | if (Subtarget.hasAVX512() && MaskVT.is512BitVector()) { |
| 35360 | SmallVector<int, 4> RepeatedMask; |
| 35361 | if (is256BitLaneRepeatedShuffleMask(MVT::v8f64, Mask, RepeatedMask)) { |
| 35362 | Shuffle = X86ISD::VPERMI; |
| 35363 | ShuffleVT = (AllowFloatDomain ? MVT::v8f64 : MVT::v8i64); |
| 35364 | PermuteImm = getV4X86ShuffleImm(RepeatedMask); |
| 35365 | return true; |
| 35366 | } |
| 35367 | } |
| 35368 | } else if (AllowFloatDomain && Subtarget.hasAVX()) { |
| 35369 | |
| 35370 | Shuffle = X86ISD::VPERMILPI; |
| 35371 | ShuffleVT = MVT::getVectorVT(MVT::f64, Mask.size()); |
| 35372 | PermuteImm = 0; |
| 35373 | for (int i = 0, e = Mask.size(); i != e; ++i) { |
| 35374 | int M = Mask[i]; |
| 35375 | if (M == SM_SentinelUndef) |
| 35376 | continue; |
| 35377 | assert(((M / 2) == (i / 2)) && "Out of range shuffle mask index"); |
| 35378 | PermuteImm |= (M & 1) << i; |
| 35379 | } |
| 35380 | return true; |
| 35381 | } |
| 35382 | } |
| 35383 | |
| 35384 | |
| 35385 | |
| 35386 | |
| 35387 | if ((MaskScalarSizeInBits == 64 || MaskScalarSizeInBits == 32) && |
| 35388 | !ContainsZeros && (AllowIntDomain || Subtarget.hasAVX())) { |
| 35389 | SmallVector<int, 4> RepeatedMask; |
| 35390 | if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { |
| 35391 | |
| 35392 | SmallVector<int, 4> WordMask = RepeatedMask; |
| 35393 | if (MaskScalarSizeInBits == 64) |
| 35394 | narrowShuffleMaskElts(2, RepeatedMask, WordMask); |
| 35395 | |
| 35396 | Shuffle = (AllowIntDomain ? X86ISD::PSHUFD : X86ISD::VPERMILPI); |
| 35397 | ShuffleVT = (AllowIntDomain ? MVT::i32 : MVT::f32); |
| 35398 | ShuffleVT = MVT::getVectorVT(ShuffleVT, InputSizeInBits / 32); |
| 35399 | PermuteImm = getV4X86ShuffleImm(WordMask); |
| 35400 | return true; |
| 35401 | } |
| 35402 | } |
| 35403 | |
| 35404 | |
| 35405 | if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits == 16 && |
| 35406 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
| 35407 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
| 35408 | (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { |
| 35409 | SmallVector<int, 4> RepeatedMask; |
| 35410 | if (is128BitLaneRepeatedShuffleMask(MaskEltVT, Mask, RepeatedMask)) { |
| 35411 | ArrayRef<int> LoMask(RepeatedMask.data() + 0, 4); |
| 35412 | ArrayRef<int> HiMask(RepeatedMask.data() + 4, 4); |
| 35413 | |
| 35414 | |
| 35415 | if (isUndefOrInRange(LoMask, 0, 4) && |
| 35416 | isSequentialOrUndefInRange(HiMask, 0, 4, 4)) { |
| 35417 | Shuffle = X86ISD::PSHUFLW; |
| 35418 | ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); |
| 35419 | PermuteImm = getV4X86ShuffleImm(LoMask); |
| 35420 | return true; |
| 35421 | } |
| 35422 | |
| 35423 | |
| 35424 | if (isUndefOrInRange(HiMask, 4, 8) && |
| 35425 | isSequentialOrUndefInRange(LoMask, 0, 4, 0)) { |
| 35426 | |
| 35427 | int OffsetHiMask[4]; |
| 35428 | for (int i = 0; i != 4; ++i) |
| 35429 | OffsetHiMask[i] = (HiMask[i] < 0 ? HiMask[i] : HiMask[i] - 4); |
| 35430 | |
| 35431 | Shuffle = X86ISD::PSHUFHW; |
| 35432 | ShuffleVT = MVT::getVectorVT(MVT::i16, InputSizeInBits / 16); |
| 35433 | PermuteImm = getV4X86ShuffleImm(OffsetHiMask); |
| 35434 | return true; |
| 35435 | } |
| 35436 | } |
| 35437 | } |
| 35438 | |
| 35439 | |
| 35440 | if (AllowIntDomain && |
| 35441 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
| 35442 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
| 35443 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
| 35444 | int ShiftAmt = matchShuffleAsShift(ShuffleVT, Shuffle, MaskScalarSizeInBits, |
| 35445 | Mask, 0, Zeroable, Subtarget); |
| 35446 | if (0 < ShiftAmt && (!ShuffleVT.is512BitVector() || Subtarget.hasBWI() || |
| 35447 | 32 <= ShuffleVT.getScalarSizeInBits())) { |
| 35448 | PermuteImm = (unsigned)ShiftAmt; |
| 35449 | return true; |
| 35450 | } |
| 35451 | } |
| 35452 | |
| 35453 | |
| 35454 | if (!ContainsZeros && AllowIntDomain && MaskScalarSizeInBits < 64 && |
| 35455 | ((MaskVT.is128BitVector() && Subtarget.hasXOP()) || |
| 35456 | Subtarget.hasAVX512())) { |
| 35457 | int RotateAmt = matchShuffleAsBitRotate(ShuffleVT, MaskScalarSizeInBits, |
| 35458 | Subtarget, Mask); |
| 35459 | if (0 < RotateAmt) { |
| 35460 | Shuffle = X86ISD::VROTLI; |
| 35461 | PermuteImm = (unsigned)RotateAmt; |
| 35462 | return true; |
| 35463 | } |
| 35464 | } |
| 35465 | |
| 35466 | return false; |
| 35467 | } |
| 35468 | |
| 35469 | |
| 35470 | |
| 35471 | |
| 35472 | static bool matchBinaryShuffle(MVT MaskVT, ArrayRef<int> Mask, |
| 35473 | bool AllowFloatDomain, bool AllowIntDomain, |
| 35474 | SDValue &V1, SDValue &V2, const SDLoc &DL, |
| 35475 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
| 35476 | unsigned &Shuffle, MVT &SrcVT, MVT &DstVT, |
| 35477 | bool IsUnary) { |
| 35478 | unsigned NumMaskElts = Mask.size(); |
| 35479 | unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); |
| 35480 | |
| 35481 | if (MaskVT.is128BitVector()) { |
| 35482 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 0}) && AllowFloatDomain) { |
| 35483 | V2 = V1; |
| 35484 | V1 = (SM_SentinelUndef == Mask[0] ? DAG.getUNDEF(MVT::v4f32) : V1); |
| 35485 | Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKL : X86ISD::MOVLHPS; |
| 35486 | SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; |
| 35487 | return true; |
| 35488 | } |
| 35489 | if (isTargetShuffleEquivalent(MaskVT, Mask, {1, 1}) && AllowFloatDomain) { |
| 35490 | V2 = V1; |
| 35491 | Shuffle = Subtarget.hasSSE2() ? X86ISD::UNPCKH : X86ISD::MOVHLPS; |
| 35492 | SrcVT = DstVT = Subtarget.hasSSE2() ? MVT::v2f64 : MVT::v4f32; |
| 35493 | return true; |
| 35494 | } |
| 35495 | if (isTargetShuffleEquivalent(MaskVT, Mask, {0, 3}) && |
| 35496 | Subtarget.hasSSE2() && (AllowFloatDomain || !Subtarget.hasSSE41())) { |
| 35497 | std::swap(V1, V2); |
| 35498 | Shuffle = X86ISD::MOVSD; |
| 35499 | SrcVT = DstVT = MVT::v2f64; |
| 35500 | return true; |
| 35501 | } |
| 35502 | if (isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3}) && |
| 35503 | (AllowFloatDomain || !Subtarget.hasSSE41())) { |
| 35504 | Shuffle = X86ISD::MOVSS; |
| 35505 | SrcVT = DstVT = MVT::v4f32; |
| 35506 | return true; |
| 35507 | } |
| 35508 | } |
| 35509 | |
| 35510 | |
| 35511 | if (((MaskVT == MVT::v8i16 || MaskVT == MVT::v16i8) && Subtarget.hasSSE2()) || |
| 35512 | ((MaskVT == MVT::v16i16 || MaskVT == MVT::v32i8) && Subtarget.hasInt256()) || |
| 35513 | ((MaskVT == MVT::v32i16 || MaskVT == MVT::v64i8) && Subtarget.hasBWI())) { |
| 35514 | if (matchShuffleWithPACK(MaskVT, SrcVT, V1, V2, Shuffle, Mask, DAG, |
| 35515 | Subtarget)) { |
| 35516 | DstVT = MaskVT; |
| 35517 | return true; |
| 35518 | } |
| 35519 | } |
| 35520 | |
| 35521 | |
| 35522 | if ((MaskVT == MVT::v4f32 && Subtarget.hasSSE1()) || |
| 35523 | (MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
| 35524 | (MaskVT.is256BitVector() && 32 <= EltSizeInBits && Subtarget.hasAVX()) || |
| 35525 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
| 35526 | (MaskVT.is512BitVector() && Subtarget.hasAVX512())) { |
| 35527 | if (matchShuffleWithUNPCK(MaskVT, V1, V2, Shuffle, IsUnary, Mask, DL, DAG, |
| 35528 | Subtarget)) { |
| 35529 | SrcVT = DstVT = MaskVT; |
| 35530 | if (MaskVT.is256BitVector() && !Subtarget.hasAVX2()) |
| 35531 | SrcVT = DstVT = (32 == EltSizeInBits ? MVT::v8f32 : MVT::v4f64); |
| 35532 | return true; |
| 35533 | } |
| 35534 | } |
| 35535 | |
| 35536 | |
| 35537 | |
| 35538 | if ((EltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && |
| 35539 | (EltSizeInBits % V2.getScalarValueSizeInBits()) == 0) { |
| 35540 | bool IsBlend = true; |
| 35541 | unsigned NumV1Elts = V1.getValueType().getVectorNumElements(); |
| 35542 | unsigned NumV2Elts = V2.getValueType().getVectorNumElements(); |
| 35543 | unsigned Scale1 = NumV1Elts / NumMaskElts; |
| 35544 | unsigned Scale2 = NumV2Elts / NumMaskElts; |
| 35545 | APInt DemandedZeroV1 = APInt::getNullValue(NumV1Elts); |
| 35546 | APInt DemandedZeroV2 = APInt::getNullValue(NumV2Elts); |
| 35547 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
| 35548 | int M = Mask[i]; |
| 35549 | if (M == SM_SentinelUndef) |
| 35550 | continue; |
| 35551 | if (M == SM_SentinelZero) { |
| 35552 | DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); |
| 35553 | DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); |
| 35554 | continue; |
| 35555 | } |
| 35556 | if (M == (int)i) { |
| 35557 | DemandedZeroV2.setBits(i * Scale2, (i + 1) * Scale2); |
| 35558 | continue; |
| 35559 | } |
| 35560 | if (M == (int)(i + NumMaskElts)) { |
| 35561 | DemandedZeroV1.setBits(i * Scale1, (i + 1) * Scale1); |
| 35562 | continue; |
| 35563 | } |
| 35564 | IsBlend = false; |
| 35565 | break; |
| 35566 | } |
| 35567 | if (IsBlend && |
| 35568 | DAG.computeKnownBits(V1, DemandedZeroV1).isZero() && |
| 35569 | DAG.computeKnownBits(V2, DemandedZeroV2).isZero()) { |
| 35570 | Shuffle = ISD::OR; |
| 35571 | SrcVT = DstVT = MaskVT.changeTypeToInteger(); |
| 35572 | return true; |
| 35573 | } |
| 35574 | } |
| 35575 | |
| 35576 | return false; |
| 35577 | } |
| 35578 | |
| 35579 | static bool matchBinaryPermuteShuffle( |
| 35580 | MVT MaskVT, ArrayRef<int> Mask, const APInt &Zeroable, |
| 35581 | bool AllowFloatDomain, bool AllowIntDomain, SDValue &V1, SDValue &V2, |
| 35582 | const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget, |
| 35583 | unsigned &Shuffle, MVT &ShuffleVT, unsigned &PermuteImm) { |
| 35584 | unsigned NumMaskElts = Mask.size(); |
| 35585 | unsigned EltSizeInBits = MaskVT.getScalarSizeInBits(); |
| 35586 | |
| 35587 | |
| 35588 | if (AllowIntDomain && (EltSizeInBits == 64 || EltSizeInBits == 32) && |
| 35589 | ((MaskVT.is128BitVector() && Subtarget.hasVLX()) || |
| 35590 | (MaskVT.is256BitVector() && Subtarget.hasVLX()) || |
| 35591 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
| 35592 | if (!isAnyZero(Mask)) { |
| 35593 | int Rotation = matchShuffleAsElementRotate(V1, V2, Mask); |
| 35594 | if (0 < Rotation) { |
| 35595 | Shuffle = X86ISD::VALIGN; |
| 35596 | if (EltSizeInBits == 64) |
| 35597 | ShuffleVT = MVT::getVectorVT(MVT::i64, MaskVT.getSizeInBits() / 64); |
| 35598 | else |
| 35599 | ShuffleVT = MVT::getVectorVT(MVT::i32, MaskVT.getSizeInBits() / 32); |
| 35600 | PermuteImm = Rotation; |
| 35601 | return true; |
| 35602 | } |
| 35603 | } |
| 35604 | } |
| 35605 | |
| 35606 | |
| 35607 | if (AllowIntDomain && ((MaskVT.is128BitVector() && Subtarget.hasSSSE3()) || |
| 35608 | (MaskVT.is256BitVector() && Subtarget.hasAVX2()) || |
| 35609 | (MaskVT.is512BitVector() && Subtarget.hasBWI()))) { |
| 35610 | int ByteRotation = matchShuffleAsByteRotate(MaskVT, V1, V2, Mask); |
| 35611 | if (0 < ByteRotation) { |
| 35612 | Shuffle = X86ISD::PALIGNR; |
| 35613 | ShuffleVT = MVT::getVectorVT(MVT::i8, MaskVT.getSizeInBits() / 8); |
| 35614 | PermuteImm = ByteRotation; |
| 35615 | return true; |
| 35616 | } |
| 35617 | } |
| 35618 | |
| 35619 | |
| 35620 | if ((NumMaskElts <= 8 && ((Subtarget.hasSSE41() && MaskVT.is128BitVector()) || |
| 35621 | (Subtarget.hasAVX() && MaskVT.is256BitVector()))) || |
| 35622 | (MaskVT == MVT::v16i16 && Subtarget.hasAVX2())) { |
| 35623 | uint64_t BlendMask = 0; |
| 35624 | bool ForceV1Zero = false, ForceV2Zero = false; |
| 35625 | SmallVector<int, 8> TargetMask(Mask.begin(), Mask.end()); |
| 35626 | if (matchShuffleAsBlend(V1, V2, TargetMask, Zeroable, ForceV1Zero, |
| 35627 | ForceV2Zero, BlendMask)) { |
| 35628 | if (MaskVT == MVT::v16i16) { |
| 35629 | |
| 35630 | SmallVector<int, 8> RepeatedMask; |
| 35631 | if (isRepeatedTargetShuffleMask(128, MaskVT, TargetMask, |
| 35632 | RepeatedMask)) { |
| 35633 | assert(RepeatedMask.size() == 8 && |
| 35634 | "Repeated mask size doesn't match!"); |
| 35635 | PermuteImm = 0; |
| 35636 | for (int i = 0; i < 8; ++i) |
| 35637 | if (RepeatedMask[i] >= 8) |
| 35638 | PermuteImm |= 1 << i; |
| 35639 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
| 35640 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
| 35641 | Shuffle = X86ISD::BLENDI; |
| 35642 | ShuffleVT = MaskVT; |
| 35643 | return true; |
| 35644 | } |
| 35645 | } else { |
| 35646 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
| 35647 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
| 35648 | PermuteImm = (unsigned)BlendMask; |
| 35649 | Shuffle = X86ISD::BLENDI; |
| 35650 | ShuffleVT = MaskVT; |
| 35651 | return true; |
| 35652 | } |
| 35653 | } |
| 35654 | } |
| 35655 | |
| 35656 | |
| 35657 | |
| 35658 | if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && |
| 35659 | MaskVT.is128BitVector() && isAnyZero(Mask) && |
| 35660 | matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { |
| 35661 | Shuffle = X86ISD::INSERTPS; |
| 35662 | ShuffleVT = MVT::v4f32; |
| 35663 | return true; |
| 35664 | } |
| 35665 | |
| 35666 | |
| 35667 | if (AllowFloatDomain && EltSizeInBits == 64 && |
| 35668 | ((MaskVT.is128BitVector() && Subtarget.hasSSE2()) || |
| 35669 | (MaskVT.is256BitVector() && Subtarget.hasAVX()) || |
| 35670 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
| 35671 | bool ForceV1Zero = false, ForceV2Zero = false; |
| 35672 | if (matchShuffleWithSHUFPD(MaskVT, V1, V2, ForceV1Zero, ForceV2Zero, |
| 35673 | PermuteImm, Mask, Zeroable)) { |
| 35674 | V1 = ForceV1Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V1; |
| 35675 | V2 = ForceV2Zero ? getZeroVector(MaskVT, Subtarget, DAG, DL) : V2; |
| 35676 | Shuffle = X86ISD::SHUFP; |
| 35677 | ShuffleVT = MVT::getVectorVT(MVT::f64, MaskVT.getSizeInBits() / 64); |
| 35678 | return true; |
| 35679 | } |
| 35680 | } |
| 35681 | |
| 35682 | |
| 35683 | if (AllowFloatDomain && EltSizeInBits == 32 && |
| 35684 | ((MaskVT.is128BitVector() && Subtarget.hasSSE1()) || |
| 35685 | (MaskVT.is256BitVector() && Subtarget.hasAVX()) || |
| 35686 | (MaskVT.is512BitVector() && Subtarget.hasAVX512()))) { |
| 35687 | SmallVector<int, 4> RepeatedMask; |
| 35688 | if (isRepeatedTargetShuffleMask(128, MaskVT, Mask, RepeatedMask)) { |
| 35689 | |
| 35690 | |
| 35691 | auto MatchHalf = [&](unsigned Offset, int &S0, int &S1) { |
| 35692 | int M0 = RepeatedMask[Offset]; |
| 35693 | int M1 = RepeatedMask[Offset + 1]; |
| 35694 | |
| 35695 | if (isUndefInRange(RepeatedMask, Offset, 2)) { |
| 35696 | return DAG.getUNDEF(MaskVT); |
| 35697 | } else if (isUndefOrZeroInRange(RepeatedMask, Offset, 2)) { |
| 35698 | S0 = (SM_SentinelUndef == M0 ? -1 : 0); |
| 35699 | S1 = (SM_SentinelUndef == M1 ? -1 : 1); |
| 35700 | return getZeroVector(MaskVT, Subtarget, DAG, DL); |
| 35701 | } else if (isUndefOrInRange(M0, 0, 4) && isUndefOrInRange(M1, 0, 4)) { |
| 35702 | S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); |
| 35703 | S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); |
| 35704 | return V1; |
| 35705 | } else if (isUndefOrInRange(M0, 4, 8) && isUndefOrInRange(M1, 4, 8)) { |
| 35706 | S0 = (SM_SentinelUndef == M0 ? -1 : M0 & 3); |
| 35707 | S1 = (SM_SentinelUndef == M1 ? -1 : M1 & 3); |
| 35708 | return V2; |
| 35709 | } |
| 35710 | |
| 35711 | return SDValue(); |
| 35712 | }; |
| 35713 | |
| 35714 | int ShufMask[4] = {-1, -1, -1, -1}; |
| 35715 | SDValue Lo = MatchHalf(0, ShufMask[0], ShufMask[1]); |
| 35716 | SDValue Hi = MatchHalf(2, ShufMask[2], ShufMask[3]); |
| 35717 | |
| 35718 | if (Lo && Hi) { |
| 35719 | V1 = Lo; |
| 35720 | V2 = Hi; |
| 35721 | Shuffle = X86ISD::SHUFP; |
| 35722 | ShuffleVT = MVT::getVectorVT(MVT::f32, MaskVT.getSizeInBits() / 32); |
| 35723 | PermuteImm = getV4X86ShuffleImm(ShufMask); |
| 35724 | return true; |
| 35725 | } |
| 35726 | } |
| 35727 | } |
| 35728 | |
| 35729 | |
| 35730 | if (AllowFloatDomain && EltSizeInBits == 32 && Subtarget.hasSSE41() && |
| 35731 | MaskVT.is128BitVector() && |
| 35732 | matchShuffleAsInsertPS(V1, V2, PermuteImm, Zeroable, Mask, DAG)) { |
| 35733 | Shuffle = X86ISD::INSERTPS; |
| 35734 | ShuffleVT = MVT::v4f32; |
| 35735 | return true; |
| 35736 | } |
| 35737 | |
| 35738 | return false; |
| 35739 | } |
| 35740 | |
| 35741 | static SDValue combineX86ShuffleChainWithExtract( |
| 35742 | ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, |
| 35743 | bool HasVariableMask, bool AllowVariableCrossLaneMask, |
| 35744 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
| 35745 | const X86Subtarget &Subtarget); |
| 35746 | |
| 35747 | |
| 35748 | |
| 35749 | |
| 35750 | |
| 35751 | |
| 35752 | |
| 35753 | |
| 35754 | |
| 35755 | |
| 35756 | static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root, |
| 35757 | ArrayRef<int> BaseMask, int Depth, |
| 35758 | bool HasVariableMask, |
| 35759 | bool AllowVariableCrossLaneMask, |
| 35760 | bool AllowVariablePerLaneMask, |
| 35761 | SelectionDAG &DAG, |
| 35762 | const X86Subtarget &Subtarget) { |
| 35763 | assert(!BaseMask.empty() && "Cannot combine an empty shuffle mask!"); |
| 35764 | assert((Inputs.size() == 1 || Inputs.size() == 2) && |
| 35765 | "Unexpected number of shuffle inputs!"); |
| 35766 | |
| 35767 | MVT RootVT = Root.getSimpleValueType(); |
| 35768 | unsigned RootSizeInBits = RootVT.getSizeInBits(); |
| 35769 | unsigned NumRootElts = RootVT.getVectorNumElements(); |
| 35770 | |
| 35771 | |
| 35772 | |
| 35773 | auto CanonicalizeShuffleInput = [&](MVT VT, SDValue Op) { |
| 35774 | return DAG.getBitcast(VT, Op); |
| 35775 | }; |
| 35776 | |
| 35777 | |
| 35778 | |
| 35779 | bool UnaryShuffle = (Inputs.size() == 1); |
| 35780 | SDValue V1 = peekThroughBitcasts(Inputs[0]); |
| 35781 | SDValue V2 = (UnaryShuffle ? DAG.getUNDEF(V1.getValueType()) |
| 35782 | : peekThroughBitcasts(Inputs[1])); |
| 35783 | |
| 35784 | MVT VT1 = V1.getSimpleValueType(); |
| 35785 | MVT VT2 = V2.getSimpleValueType(); |
| 35786 | assert(VT1.getSizeInBits() == RootSizeInBits && |
| 35787 | VT2.getSizeInBits() == RootSizeInBits && "Vector size mismatch"); |
| 35788 | |
| 35789 | SDLoc DL(Root); |
| 35790 | SDValue Res; |
| 35791 | |
| 35792 | unsigned NumBaseMaskElts = BaseMask.size(); |
| 35793 | if (NumBaseMaskElts == 1) { |
| 35794 | assert(BaseMask[0] == 0 && "Invalid shuffle index found!"); |
| 35795 | return CanonicalizeShuffleInput(RootVT, V1); |
| 35796 | } |
| 35797 | |
| 35798 | bool OptForSize = DAG.shouldOptForSize(); |
| 35799 | unsigned BaseMaskEltSizeInBits = RootSizeInBits / NumBaseMaskElts; |
| 35800 | bool FloatDomain = VT1.isFloatingPoint() || VT2.isFloatingPoint() || |
| 35801 | (RootVT.isFloatingPoint() && Depth >= 1) || |
| 35802 | (RootVT.is256BitVector() && !Subtarget.hasAVX2()); |
| 35803 | |
| 35804 | |
| 35805 | |
| 35806 | |
| 35807 | bool IsMaskedShuffle = false; |
| 35808 | if (RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128)) { |
| 35809 | if (Root.hasOneUse() && Root->use_begin()->getOpcode() == ISD::VSELECT && |
| 35810 | Root->use_begin()->getOperand(0).getScalarValueSizeInBits() == 1) { |
| 35811 | IsMaskedShuffle = true; |
| 35812 | } |
| 35813 | } |
| 35814 | |
| 35815 | |
| 35816 | |
| 35817 | |
| 35818 | if (UnaryShuffle && isTargetShuffleSplat(V1) && !isAnyZero(BaseMask) && |
| 35819 | (BaseMaskEltSizeInBits % V1.getScalarValueSizeInBits()) == 0 && |
| 35820 | V1.getValueSizeInBits() >= RootSizeInBits) { |
| 35821 | return CanonicalizeShuffleInput(RootVT, V1); |
| 35822 | } |
| 35823 | |
| 35824 | |
| 35825 | |
| 35826 | if (VT1 == VT2 && VT1.getSizeInBits() == RootSizeInBits && VT1.isVector()) { |
| 35827 | SmallVector<int> ScaledMask, IdentityMask; |
| 35828 | unsigned NumElts = VT1.getVectorNumElements(); |
| 35829 | if (BaseMask.size() <= NumElts && |
| 35830 | scaleShuffleElements(BaseMask, NumElts, ScaledMask)) { |
| 35831 | for (unsigned i = 0; i != NumElts; ++i) |
| 35832 | IdentityMask.push_back(i); |
| 35833 | if (isTargetShuffleEquivalent(RootVT, ScaledMask, IdentityMask, V1, V2)) |
| 35834 | return CanonicalizeShuffleInput(RootVT, V1); |
| 35835 | } |
| 35836 | } |
| 35837 | |
| 35838 | |
| 35839 | if (RootVT.is512BitVector() && |
| 35840 | (NumBaseMaskElts == 2 || NumBaseMaskElts == 4)) { |
| 35841 | |
| 35842 | |
| 35843 | |
| 35844 | if (isUndefOrZeroInRange(BaseMask, 1, NumBaseMaskElts - 1)) { |
| 35845 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
| 35846 | return SDValue(); |
| 35847 | assert(isInRange(BaseMask[0], 0, NumBaseMaskElts) && |
| 35848 | "Unexpected lane shuffle"); |
| 35849 | Res = CanonicalizeShuffleInput(RootVT, V1); |
| 35850 | unsigned SubIdx = BaseMask[0] * (NumRootElts / NumBaseMaskElts); |
| 35851 | bool UseZero = isAnyZero(BaseMask); |
| 35852 | Res = extractSubVector(Res, SubIdx, DAG, DL, BaseMaskEltSizeInBits); |
| 35853 | return widenSubVector(Res, UseZero, Subtarget, DAG, DL, RootSizeInBits); |
| 35854 | } |
| 35855 | |
| 35856 | |
| 35857 | SmallVector<int, 4> Mask; |
| 35858 | assert((BaseMaskEltSizeInBits % 128) == 0 && "Illegal mask size"); |
| 35859 | narrowShuffleMaskElts(BaseMaskEltSizeInBits / 128, BaseMask, Mask); |
| 35860 | |
| 35861 | |
| 35862 | auto MatchSHUF128 = [&](MVT ShuffleVT, const SDLoc &DL, ArrayRef<int> Mask, |
| 35863 | SDValue V1, SDValue V2, SelectionDAG &DAG) { |
| 35864 | unsigned PermMask = 0; |
| 35865 | |
| 35866 | SDValue Ops[2] = {DAG.getUNDEF(ShuffleVT), DAG.getUNDEF(ShuffleVT)}; |
| 35867 | for (int i = 0; i < 4; ++i) { |
| 35868 | assert(Mask[i] >= -1 && "Illegal shuffle sentinel value"); |
| 35869 | if (Mask[i] < 0) |
| 35870 | continue; |
| 35871 | |
| 35872 | SDValue Op = Mask[i] >= 4 ? V2 : V1; |
| 35873 | unsigned OpIndex = i / 2; |
| 35874 | if (Ops[OpIndex].isUndef()) |
| 35875 | Ops[OpIndex] = Op; |
| 35876 | else if (Ops[OpIndex] != Op) |
| 35877 | return SDValue(); |
| 35878 | |
| 35879 | |
| 35880 | |
| 35881 | |
| 35882 | PermMask |= (Mask[i] % 4) << (i * 2); |
| 35883 | } |
| 35884 | |
| 35885 | return DAG.getNode(X86ISD::SHUF128, DL, ShuffleVT, |
| 35886 | CanonicalizeShuffleInput(ShuffleVT, Ops[0]), |
| 35887 | CanonicalizeShuffleInput(ShuffleVT, Ops[1]), |
| 35888 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
| 35889 | }; |
| 35890 | |
| 35891 | |
| 35892 | |
| 35893 | |
| 35894 | bool PreferPERMQ = |
| 35895 | UnaryShuffle && isUndefOrInRange(Mask[0], 0, 2) && |
| 35896 | isUndefOrInRange(Mask[1], 0, 2) && isUndefOrInRange(Mask[2], 2, 4) && |
| 35897 | isUndefOrInRange(Mask[3], 2, 4) && |
| 35898 | (Mask[0] < 0 || Mask[2] < 0 || Mask[0] == (Mask[2] % 2)) && |
| 35899 | (Mask[1] < 0 || Mask[3] < 0 || Mask[1] == (Mask[3] % 2)); |
| 35900 | |
| 35901 | if (!isAnyZero(Mask) && !PreferPERMQ) { |
| 35902 | if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) |
| 35903 | return SDValue(); |
| 35904 | MVT ShuffleVT = (FloatDomain ? MVT::v8f64 : MVT::v8i64); |
| 35905 | if (SDValue V = MatchSHUF128(ShuffleVT, DL, Mask, V1, V2, DAG)) |
| 35906 | return DAG.getBitcast(RootVT, V); |
| 35907 | } |
| 35908 | } |
| 35909 | |
| 35910 | |
| 35911 | if (RootVT.is256BitVector() && NumBaseMaskElts == 2) { |
| 35912 | |
| 35913 | |
| 35914 | |
| 35915 | if (isUndefOrZero(BaseMask[1])) { |
| 35916 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
| 35917 | return SDValue(); |
| 35918 | assert(isInRange(BaseMask[0], 0, 2) && "Unexpected lane shuffle"); |
| 35919 | Res = CanonicalizeShuffleInput(RootVT, V1); |
| 35920 | Res = extract128BitVector(Res, BaseMask[0] * (NumRootElts / 2), DAG, DL); |
| 35921 | return widenSubVector(Res, BaseMask[1] == SM_SentinelZero, Subtarget, DAG, |
| 35922 | DL, 256); |
| 35923 | } |
| 35924 | |
| 35925 | |
| 35926 | |
| 35927 | |
| 35928 | if (BaseMask[0] == 0 && BaseMask[1] == 0 && !Subtarget.hasAVX2()) { |
| 35929 | if (Depth == 0 && Root.getOpcode() == ISD::INSERT_SUBVECTOR) |
| 35930 | return SDValue(); |
| 35931 | Res = CanonicalizeShuffleInput(RootVT, V1); |
| 35932 | Res = extractSubVector(Res, 0, DAG, DL, 128); |
| 35933 | return concatSubVectors(Res, Res, DAG, DL); |
| 35934 | } |
| 35935 | |
| 35936 | if (Depth == 0 && Root.getOpcode() == X86ISD::VPERM2X128) |
| 35937 | return SDValue(); |
| 35938 | |
| 35939 | |
| 35940 | |
| 35941 | |
| 35942 | if (UnaryShuffle && |
| 35943 | !(Subtarget.hasAVX2() && isUndefOrInRange(BaseMask, 0, 2)) && |
| 35944 | (OptForSize || !isSequentialOrUndefOrZeroInRange(BaseMask, 0, 2, 0))) { |
| 35945 | unsigned PermMask = 0; |
| 35946 | PermMask |= ((BaseMask[0] < 0 ? 0x8 : (BaseMask[0] & 1)) << 0); |
| 35947 | PermMask |= ((BaseMask[1] < 0 ? 0x8 : (BaseMask[1] & 1)) << 4); |
| 35948 | return DAG.getNode( |
| 35949 | X86ISD::VPERM2X128, DL, RootVT, CanonicalizeShuffleInput(RootVT, V1), |
| 35950 | DAG.getUNDEF(RootVT), DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
| 35951 | } |
| 35952 | |
| 35953 | if (Depth == 0 && Root.getOpcode() == X86ISD::SHUF128) |
| 35954 | return SDValue(); |
| 35955 | |
| 35956 | |
| 35957 | if (!UnaryShuffle && !IsMaskedShuffle) { |
| 35958 | assert(llvm::all_of(BaseMask, [](int M) { return 0 <= M && M < 4; }) && |
| 35959 | "Unexpected shuffle sentinel value"); |
| 35960 | |
| 35961 | if (!((BaseMask[0] == 0 && BaseMask[1] == 3) || |
| 35962 | (BaseMask[0] == 2 && BaseMask[1] == 1))) { |
| 35963 | unsigned PermMask = 0; |
| 35964 | PermMask |= ((BaseMask[0] & 3) << 0); |
| 35965 | PermMask |= ((BaseMask[1] & 3) << 4); |
| 35966 | SDValue LHS = isInRange(BaseMask[0], 0, 2) ? V1 : V2; |
| 35967 | SDValue RHS = isInRange(BaseMask[1], 0, 2) ? V1 : V2; |
| 35968 | return DAG.getNode(X86ISD::VPERM2X128, DL, RootVT, |
| 35969 | CanonicalizeShuffleInput(RootVT, LHS), |
| 35970 | CanonicalizeShuffleInput(RootVT, RHS), |
| 35971 | DAG.getTargetConstant(PermMask, DL, MVT::i8)); |
| 35972 | } |
| 35973 | } |
| 35974 | } |
| 35975 | |
| 35976 | |
| 35977 | |
| 35978 | SmallVector<int, 64> Mask; |
| 35979 | if (BaseMaskEltSizeInBits > 64) { |
| 35980 | assert((BaseMaskEltSizeInBits % 64) == 0 && "Illegal mask size"); |
| 35981 | int MaskScale = BaseMaskEltSizeInBits / 64; |
| 35982 | narrowShuffleMaskElts(MaskScale, BaseMask, Mask); |
| 35983 | } else { |
| 35984 | Mask.assign(BaseMask.begin(), BaseMask.end()); |
| 35985 | } |
| 35986 | |
| 35987 | |
| 35988 | |
| 35989 | |
| 35990 | if (IsMaskedShuffle && NumRootElts > Mask.size()) { |
| 35991 | assert((NumRootElts % Mask.size()) == 0 && "Illegal mask size"); |
| 35992 | int MaskScale = NumRootElts / Mask.size(); |
| 35993 | SmallVector<int, 64> ScaledMask; |
| 35994 | narrowShuffleMaskElts(MaskScale, Mask, ScaledMask); |
| 35995 | Mask = std::move(ScaledMask); |
| 35996 | } |
| 35997 | |
| 35998 | unsigned NumMaskElts = Mask.size(); |
| 35999 | unsigned MaskEltSizeInBits = RootSizeInBits / NumMaskElts; |
| 36000 | |
| 36001 | |
| 36002 | FloatDomain &= (32 <= MaskEltSizeInBits); |
| 36003 | MVT MaskVT = FloatDomain ? MVT::getFloatingPointVT(MaskEltSizeInBits) |
| 36004 | : MVT::getIntegerVT(MaskEltSizeInBits); |
| 36005 | MaskVT = MVT::getVectorVT(MaskVT, NumMaskElts); |
| 36006 | |
| 36007 | |
| 36008 | if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) |
| 36009 | return SDValue(); |
| 36010 | |
| 36011 | |
| 36012 | MVT ShuffleSrcVT, ShuffleVT; |
| 36013 | unsigned Shuffle, PermuteImm; |
| 36014 | |
| 36015 | |
| 36016 | |
| 36017 | |
| 36018 | bool AllowFloatDomain = FloatDomain || (Depth >= 3); |
| 36019 | bool AllowIntDomain = (!FloatDomain || (Depth >= 3)) && Subtarget.hasSSE2() && |
| 36020 | (!MaskVT.is256BitVector() || Subtarget.hasAVX2()); |
| 36021 | |
| 36022 | |
| 36023 | APInt KnownUndef, KnownZero; |
| 36024 | resolveZeroablesFromTargetShuffle(Mask, KnownUndef, KnownZero); |
| 36025 | APInt Zeroable = KnownUndef | KnownZero; |
| 36026 | |
| 36027 | if (UnaryShuffle) { |
| 36028 | |
| 36029 | |
| 36030 | if ((Subtarget.hasAVX2() || |
| 36031 | (Subtarget.hasAVX() && 32 <= MaskEltSizeInBits)) && |
| 36032 | (!IsMaskedShuffle || NumRootElts == NumMaskElts)) { |
| 36033 | if (isUndefOrEqual(Mask, 0)) { |
| 36034 | if (V1.getValueType() == MaskVT && |
| 36035 | V1.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 36036 | MayFoldLoad(V1.getOperand(0))) { |
| 36037 | if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) |
| 36038 | return SDValue(); |
| 36039 | Res = V1.getOperand(0); |
| 36040 | Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); |
| 36041 | return DAG.getBitcast(RootVT, Res); |
| 36042 | } |
| 36043 | if (Subtarget.hasAVX2()) { |
| 36044 | if (Depth == 0 && Root.getOpcode() == X86ISD::VBROADCAST) |
| 36045 | return SDValue(); |
| 36046 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
| 36047 | Res = DAG.getNode(X86ISD::VBROADCAST, DL, MaskVT, Res); |
| 36048 | return DAG.getBitcast(RootVT, Res); |
| 36049 | } |
| 36050 | } |
| 36051 | } |
| 36052 | |
| 36053 | SDValue NewV1 = V1; |
| 36054 | if (matchUnaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, |
| 36055 | DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, |
| 36056 | ShuffleVT) && |
| 36057 | (!IsMaskedShuffle || |
| 36058 | (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
| 36059 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
| 36060 | return SDValue(); |
| 36061 | Res = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); |
| 36062 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res); |
| 36063 | return DAG.getBitcast(RootVT, Res); |
| 36064 | } |
| 36065 | |
| 36066 | if (matchUnaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, |
| 36067 | AllowIntDomain, Subtarget, Shuffle, ShuffleVT, |
| 36068 | PermuteImm) && |
| 36069 | (!IsMaskedShuffle || |
| 36070 | (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
| 36071 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
| 36072 | return SDValue(); |
| 36073 | Res = CanonicalizeShuffleInput(ShuffleVT, V1); |
| 36074 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, Res, |
| 36075 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
| 36076 | return DAG.getBitcast(RootVT, Res); |
| 36077 | } |
| 36078 | } |
| 36079 | |
| 36080 | |
| 36081 | |
| 36082 | |
| 36083 | if (!UnaryShuffle && AllowFloatDomain && RootSizeInBits == 128 && |
| 36084 | Subtarget.hasSSE41() && |
| 36085 | !isTargetShuffleEquivalent(MaskVT, Mask, {4, 1, 2, 3})) { |
| 36086 | if (MaskEltSizeInBits == 32) { |
| 36087 | SDValue SrcV1 = V1, SrcV2 = V2; |
| 36088 | if (matchShuffleAsInsertPS(SrcV1, SrcV2, PermuteImm, Zeroable, Mask, |
| 36089 | DAG) && |
| 36090 | SrcV2.getOpcode() == ISD::SCALAR_TO_VECTOR) { |
| 36091 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) |
| 36092 | return SDValue(); |
| 36093 | Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, |
| 36094 | CanonicalizeShuffleInput(MVT::v4f32, SrcV1), |
| 36095 | CanonicalizeShuffleInput(MVT::v4f32, SrcV2), |
| 36096 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
| 36097 | return DAG.getBitcast(RootVT, Res); |
| 36098 | } |
| 36099 | } |
| 36100 | if (MaskEltSizeInBits == 64 && |
| 36101 | isTargetShuffleEquivalent(MaskVT, Mask, {0, 2}) && |
| 36102 | V2.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 36103 | V2.getScalarValueSizeInBits() <= 32) { |
| 36104 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTPS) |
| 36105 | return SDValue(); |
| 36106 | PermuteImm = (2 << 4) | (0 << 0); |
| 36107 | Res = DAG.getNode(X86ISD::INSERTPS, DL, MVT::v4f32, |
| 36108 | CanonicalizeShuffleInput(MVT::v4f32, V1), |
| 36109 | CanonicalizeShuffleInput(MVT::v4f32, V2), |
| 36110 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
| 36111 | return DAG.getBitcast(RootVT, Res); |
| 36112 | } |
| 36113 | } |
| 36114 | |
| 36115 | SDValue NewV1 = V1; |
| 36116 | SDValue NewV2 = V2; |
| 36117 | if (matchBinaryShuffle(MaskVT, Mask, AllowFloatDomain, AllowIntDomain, NewV1, |
| 36118 | NewV2, DL, DAG, Subtarget, Shuffle, ShuffleSrcVT, |
| 36119 | ShuffleVT, UnaryShuffle) && |
| 36120 | (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
| 36121 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
| 36122 | return SDValue(); |
| 36123 | NewV1 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV1); |
| 36124 | NewV2 = CanonicalizeShuffleInput(ShuffleSrcVT, NewV2); |
| 36125 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2); |
| 36126 | return DAG.getBitcast(RootVT, Res); |
| 36127 | } |
| 36128 | |
| 36129 | NewV1 = V1; |
| 36130 | NewV2 = V2; |
| 36131 | if (matchBinaryPermuteShuffle(MaskVT, Mask, Zeroable, AllowFloatDomain, |
| 36132 | AllowIntDomain, NewV1, NewV2, DL, DAG, |
| 36133 | Subtarget, Shuffle, ShuffleVT, PermuteImm) && |
| 36134 | (!IsMaskedShuffle || (NumRootElts == ShuffleVT.getVectorNumElements()))) { |
| 36135 | if (Depth == 0 && Root.getOpcode() == Shuffle) |
| 36136 | return SDValue(); |
| 36137 | NewV1 = CanonicalizeShuffleInput(ShuffleVT, NewV1); |
| 36138 | NewV2 = CanonicalizeShuffleInput(ShuffleVT, NewV2); |
| 36139 | Res = DAG.getNode(Shuffle, DL, ShuffleVT, NewV1, NewV2, |
| 36140 | DAG.getTargetConstant(PermuteImm, DL, MVT::i8)); |
| 36141 | return DAG.getBitcast(RootVT, Res); |
| 36142 | } |
| 36143 | |
| 36144 | |
| 36145 | MVT IntMaskVT = MVT::getIntegerVT(MaskEltSizeInBits); |
| 36146 | IntMaskVT = MVT::getVectorVT(IntMaskVT, NumMaskElts); |
| 36147 | |
| 36148 | |
| 36149 | if (Subtarget.hasSSE4A() && AllowIntDomain && RootSizeInBits == 128) { |
| 36150 | uint64_t BitLen, BitIdx; |
| 36151 | if (matchShuffleAsEXTRQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx, |
| 36152 | Zeroable)) { |
| 36153 | if (Depth == 0 && Root.getOpcode() == X86ISD::EXTRQI) |
| 36154 | return SDValue(); |
| 36155 | V1 = CanonicalizeShuffleInput(IntMaskVT, V1); |
| 36156 | Res = DAG.getNode(X86ISD::EXTRQI, DL, IntMaskVT, V1, |
| 36157 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
| 36158 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
| 36159 | return DAG.getBitcast(RootVT, Res); |
| 36160 | } |
| 36161 | |
| 36162 | if (matchShuffleAsINSERTQ(IntMaskVT, V1, V2, Mask, BitLen, BitIdx)) { |
| 36163 | if (Depth == 0 && Root.getOpcode() == X86ISD::INSERTQI) |
| 36164 | return SDValue(); |
| 36165 | V1 = CanonicalizeShuffleInput(IntMaskVT, V1); |
| 36166 | V2 = CanonicalizeShuffleInput(IntMaskVT, V2); |
| 36167 | Res = DAG.getNode(X86ISD::INSERTQI, DL, IntMaskVT, V1, V2, |
| 36168 | DAG.getTargetConstant(BitLen, DL, MVT::i8), |
| 36169 | DAG.getTargetConstant(BitIdx, DL, MVT::i8)); |
| 36170 | return DAG.getBitcast(RootVT, Res); |
| 36171 | } |
| 36172 | } |
| 36173 | |
| 36174 | |
| 36175 | if (AllowIntDomain && MaskEltSizeInBits < 64 && Subtarget.hasAVX512()) { |
| 36176 | |
| 36177 | if (matchShuffleAsVTRUNC(ShuffleSrcVT, ShuffleVT, IntMaskVT, Mask, Zeroable, |
| 36178 | Subtarget)) { |
| 36179 | bool IsTRUNCATE = ShuffleVT.getVectorNumElements() == |
| 36180 | ShuffleSrcVT.getVectorNumElements(); |
| 36181 | unsigned Opc = |
| 36182 | IsTRUNCATE ? (unsigned)ISD::TRUNCATE : (unsigned)X86ISD::VTRUNC; |
| 36183 | if (Depth == 0 && Root.getOpcode() == Opc) |
| 36184 | return SDValue(); |
| 36185 | V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); |
| 36186 | Res = DAG.getNode(Opc, DL, ShuffleVT, V1); |
| 36187 | if (ShuffleVT.getSizeInBits() < RootSizeInBits) |
| 36188 | Res = widenSubVector(Res, true, Subtarget, DAG, DL, RootSizeInBits); |
| 36189 | return DAG.getBitcast(RootVT, Res); |
| 36190 | } |
| 36191 | |
| 36192 | |
| 36193 | if (RootSizeInBits < 512 && |
| 36194 | ((RootVT.is256BitVector() && Subtarget.useAVX512Regs()) || |
| 36195 | (RootVT.is128BitVector() && Subtarget.hasVLX())) && |
| 36196 | (MaskEltSizeInBits > 8 || Subtarget.hasBWI()) && |
| 36197 | isSequentialOrUndefInRange(Mask, 0, NumMaskElts, 0, 2)) { |
| 36198 | if (Depth == 0 && Root.getOpcode() == ISD::TRUNCATE) |
| 36199 | return SDValue(); |
| 36200 | ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); |
| 36201 | ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts / 2); |
| 36202 | V1 = CanonicalizeShuffleInput(ShuffleSrcVT, V1); |
| 36203 | V2 = CanonicalizeShuffleInput(ShuffleSrcVT, V2); |
| 36204 | ShuffleSrcVT = MVT::getIntegerVT(MaskEltSizeInBits * 2); |
| 36205 | ShuffleSrcVT = MVT::getVectorVT(ShuffleSrcVT, NumMaskElts); |
| 36206 | Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, ShuffleSrcVT, V1, V2); |
| 36207 | Res = DAG.getNode(ISD::TRUNCATE, DL, IntMaskVT, Res); |
| 36208 | return DAG.getBitcast(RootVT, Res); |
| 36209 | } |
| 36210 | } |
| 36211 | |
| 36212 | |
| 36213 | |
| 36214 | if (Depth < 1) |
| 36215 | return SDValue(); |
| 36216 | |
| 36217 | |
| 36218 | int VariableCrossLaneShuffleDepth = |
| 36219 | Subtarget.hasFastVariableCrossLaneShuffle() ? 1 : 2; |
| 36220 | int VariablePerLaneShuffleDepth = |
| 36221 | Subtarget.hasFastVariablePerLaneShuffle() ? 1 : 2; |
| 36222 | AllowVariableCrossLaneMask &= |
| 36223 | (Depth >= VariableCrossLaneShuffleDepth) || HasVariableMask; |
| 36224 | AllowVariablePerLaneMask &= |
| 36225 | (Depth >= VariablePerLaneShuffleDepth) || HasVariableMask; |
| 36226 | |
| 36227 | |
| 36228 | bool AllowBWIVPERMV3 = |
| 36229 | (Depth >= (VariableCrossLaneShuffleDepth + 2) || HasVariableMask); |
| 36230 | |
| 36231 | bool MaskContainsZeros = isAnyZero(Mask); |
| 36232 | |
| 36233 | if (is128BitLaneCrossingShuffleMask(MaskVT, Mask)) { |
| 36234 | |
| 36235 | if (UnaryShuffle && AllowVariableCrossLaneMask && !MaskContainsZeros) { |
| 36236 | if (Subtarget.hasAVX2() && |
| 36237 | (MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) { |
| 36238 | SDValue VPermMask = getConstVector(Mask, IntMaskVT, DAG, DL, true); |
| 36239 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
| 36240 | Res = DAG.getNode(X86ISD::VPERMV, DL, MaskVT, VPermMask, Res); |
| 36241 | return DAG.getBitcast(RootVT, Res); |
| 36242 | } |
| 36243 | |
| 36244 | if ((Subtarget.hasAVX512() && |
| 36245 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
| 36246 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || |
| 36247 | (Subtarget.hasBWI() && |
| 36248 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
| 36249 | (Subtarget.hasVBMI() && |
| 36250 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8))) { |
| 36251 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
| 36252 | V2 = DAG.getUNDEF(MaskVT); |
| 36253 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
| 36254 | return DAG.getBitcast(RootVT, Res); |
| 36255 | } |
| 36256 | } |
| 36257 | |
| 36258 | |
| 36259 | |
| 36260 | if (UnaryShuffle && AllowVariableCrossLaneMask && |
| 36261 | ((Subtarget.hasAVX512() && |
| 36262 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
| 36263 | MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || |
| 36264 | MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32 || |
| 36265 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32)) || |
| 36266 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
| 36267 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
| 36268 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
| 36269 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { |
| 36270 | |
| 36271 | for (unsigned i = 0; i != NumMaskElts; ++i) |
| 36272 | if (Mask[i] == SM_SentinelZero) |
| 36273 | Mask[i] = NumMaskElts + i; |
| 36274 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
| 36275 | V2 = getZeroVector(MaskVT, Subtarget, DAG, DL); |
| 36276 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
| 36277 | return DAG.getBitcast(RootVT, Res); |
| 36278 | } |
| 36279 | |
| 36280 | |
| 36281 | |
| 36282 | if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( |
| 36283 | Inputs, Root, BaseMask, Depth, HasVariableMask, |
| 36284 | AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, |
| 36285 | Subtarget)) |
| 36286 | return WideShuffle; |
| 36287 | |
| 36288 | |
| 36289 | |
| 36290 | if (AllowVariableCrossLaneMask && !MaskContainsZeros && |
| 36291 | ((Subtarget.hasAVX512() && |
| 36292 | (MaskVT == MVT::v8f64 || MaskVT == MVT::v8i64 || |
| 36293 | MaskVT == MVT::v4f64 || MaskVT == MVT::v4i64 || |
| 36294 | MaskVT == MVT::v16f32 || MaskVT == MVT::v16i32 || |
| 36295 | MaskVT == MVT::v8f32 || MaskVT == MVT::v8i32)) || |
| 36296 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
| 36297 | (MaskVT == MVT::v16i16 || MaskVT == MVT::v32i16)) || |
| 36298 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
| 36299 | (MaskVT == MVT::v32i8 || MaskVT == MVT::v64i8)))) { |
| 36300 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
| 36301 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
| 36302 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
| 36303 | return DAG.getBitcast(RootVT, Res); |
| 36304 | } |
| 36305 | return SDValue(); |
| 36306 | } |
| 36307 | |
| 36308 | |
| 36309 | |
| 36310 | if (UnaryShuffle && MaskContainsZeros && AllowVariablePerLaneMask && |
| 36311 | isSequentialOrUndefOrZeroInRange(Mask, 0, NumMaskElts, 0) && |
| 36312 | DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) { |
| 36313 | APInt Zero = APInt::getNullValue(MaskEltSizeInBits); |
| 36314 | APInt AllOnes = APInt::getAllOnesValue(MaskEltSizeInBits); |
| 36315 | APInt UndefElts(NumMaskElts, 0); |
| 36316 | SmallVector<APInt, 64> EltBits(NumMaskElts, Zero); |
| 36317 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
| 36318 | int M = Mask[i]; |
| 36319 | if (M == SM_SentinelUndef) { |
| 36320 | UndefElts.setBit(i); |
| 36321 | continue; |
| 36322 | } |
| 36323 | if (M == SM_SentinelZero) |
| 36324 | continue; |
| 36325 | EltBits[i] = AllOnes; |
| 36326 | } |
| 36327 | SDValue BitMask = getConstVector(EltBits, UndefElts, MaskVT, DAG, DL); |
| 36328 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
| 36329 | unsigned AndOpcode = |
| 36330 | MaskVT.isFloatingPoint() ? unsigned(X86ISD::FAND) : unsigned(ISD::AND); |
| 36331 | Res = DAG.getNode(AndOpcode, DL, MaskVT, Res, BitMask); |
| 36332 | return DAG.getBitcast(RootVT, Res); |
| 36333 | } |
| 36334 | |
| 36335 | |
| 36336 | |
| 36337 | |
| 36338 | if (UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros && |
| 36339 | ((MaskVT == MVT::v8f32 && Subtarget.hasAVX()) || |
| 36340 | (MaskVT == MVT::v16f32 && Subtarget.hasAVX512()))) { |
| 36341 | SmallVector<SDValue, 16> VPermIdx; |
| 36342 | for (int M : Mask) { |
| 36343 | SDValue Idx = |
| 36344 | M < 0 ? DAG.getUNDEF(MVT::i32) : DAG.getConstant(M % 4, DL, MVT::i32); |
| 36345 | VPermIdx.push_back(Idx); |
| 36346 | } |
| 36347 | SDValue VPermMask = DAG.getBuildVector(IntMaskVT, DL, VPermIdx); |
| 36348 | Res = CanonicalizeShuffleInput(MaskVT, V1); |
| 36349 | Res = DAG.getNode(X86ISD::VPERMILPV, DL, MaskVT, Res, VPermMask); |
| 36350 | return DAG.getBitcast(RootVT, Res); |
| 36351 | } |
| 36352 | |
| 36353 | |
| 36354 | |
| 36355 | if (AllowVariablePerLaneMask && Subtarget.hasXOP() && |
| 36356 | (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v4f32 || |
| 36357 | MaskVT == MVT::v8f32)) { |
| 36358 | |
| 36359 | |
| 36360 | |
| 36361 | |
| 36362 | unsigned NumLanes = MaskVT.getSizeInBits() / 128; |
| 36363 | unsigned NumEltsPerLane = NumMaskElts / NumLanes; |
| 36364 | SmallVector<int, 8> VPerm2Idx; |
| 36365 | unsigned M2ZImm = 0; |
| 36366 | for (int M : Mask) { |
| 36367 | if (M == SM_SentinelUndef) { |
| 36368 | VPerm2Idx.push_back(-1); |
| 36369 | continue; |
| 36370 | } |
| 36371 | if (M == SM_SentinelZero) { |
| 36372 | M2ZImm = 2; |
| 36373 | VPerm2Idx.push_back(8); |
| 36374 | continue; |
| 36375 | } |
| 36376 | int Index = (M % NumEltsPerLane) + ((M / NumMaskElts) * NumEltsPerLane); |
| 36377 | Index = (MaskVT.getScalarSizeInBits() == 64 ? Index << 1 : Index); |
| 36378 | VPerm2Idx.push_back(Index); |
| 36379 | } |
| 36380 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
| 36381 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
| 36382 | SDValue VPerm2MaskOp = getConstVector(VPerm2Idx, IntMaskVT, DAG, DL, true); |
| 36383 | Res = DAG.getNode(X86ISD::VPERMIL2, DL, MaskVT, V1, V2, VPerm2MaskOp, |
| 36384 | DAG.getTargetConstant(M2ZImm, DL, MVT::i8)); |
| 36385 | return DAG.getBitcast(RootVT, Res); |
| 36386 | } |
| 36387 | |
| 36388 | |
| 36389 | |
| 36390 | |
| 36391 | |
| 36392 | |
| 36393 | if (UnaryShuffle && AllowVariablePerLaneMask && |
| 36394 | ((RootVT.is128BitVector() && Subtarget.hasSSSE3()) || |
| 36395 | (RootVT.is256BitVector() && Subtarget.hasAVX2()) || |
| 36396 | (RootVT.is512BitVector() && Subtarget.hasBWI()))) { |
| 36397 | SmallVector<SDValue, 16> PSHUFBMask; |
| 36398 | int NumBytes = RootVT.getSizeInBits() / 8; |
| 36399 | int Ratio = NumBytes / NumMaskElts; |
| 36400 | for (int i = 0; i < NumBytes; ++i) { |
| 36401 | int M = Mask[i / Ratio]; |
| 36402 | if (M == SM_SentinelUndef) { |
| 36403 | PSHUFBMask.push_back(DAG.getUNDEF(MVT::i8)); |
| 36404 | continue; |
| 36405 | } |
| 36406 | if (M == SM_SentinelZero) { |
| 36407 | PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); |
| 36408 | continue; |
| 36409 | } |
| 36410 | M = Ratio * M + i % Ratio; |
| 36411 | assert((M / 16) == (i / 16) && "Lane crossing detected"); |
| 36412 | PSHUFBMask.push_back(DAG.getConstant(M, DL, MVT::i8)); |
| 36413 | } |
| 36414 | MVT ByteVT = MVT::getVectorVT(MVT::i8, NumBytes); |
| 36415 | Res = CanonicalizeShuffleInput(ByteVT, V1); |
| 36416 | SDValue PSHUFBMaskOp = DAG.getBuildVector(ByteVT, DL, PSHUFBMask); |
| 36417 | Res = DAG.getNode(X86ISD::PSHUFB, DL, ByteVT, Res, PSHUFBMaskOp); |
| 36418 | return DAG.getBitcast(RootVT, Res); |
| 36419 | } |
| 36420 | |
| 36421 | |
| 36422 | |
| 36423 | |
| 36424 | if (AllowVariablePerLaneMask && RootVT.is128BitVector() && |
| 36425 | Subtarget.hasXOP()) { |
| 36426 | |
| 36427 | |
| 36428 | |
| 36429 | SmallVector<SDValue, 16> VPPERMMask; |
| 36430 | int NumBytes = 16; |
| 36431 | int Ratio = NumBytes / NumMaskElts; |
| 36432 | for (int i = 0; i < NumBytes; ++i) { |
| 36433 | int M = Mask[i / Ratio]; |
| 36434 | if (M == SM_SentinelUndef) { |
| 36435 | VPPERMMask.push_back(DAG.getUNDEF(MVT::i8)); |
| 36436 | continue; |
| 36437 | } |
| 36438 | if (M == SM_SentinelZero) { |
| 36439 | VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8)); |
| 36440 | continue; |
| 36441 | } |
| 36442 | M = Ratio * M + i % Ratio; |
| 36443 | VPPERMMask.push_back(DAG.getConstant(M, DL, MVT::i8)); |
| 36444 | } |
| 36445 | MVT ByteVT = MVT::v16i8; |
| 36446 | V1 = CanonicalizeShuffleInput(ByteVT, V1); |
| 36447 | V2 = CanonicalizeShuffleInput(ByteVT, V2); |
| 36448 | SDValue VPPERMMaskOp = DAG.getBuildVector(ByteVT, DL, VPPERMMask); |
| 36449 | Res = DAG.getNode(X86ISD::VPPERM, DL, ByteVT, V1, V2, VPPERMMaskOp); |
| 36450 | return DAG.getBitcast(RootVT, Res); |
| 36451 | } |
| 36452 | |
| 36453 | |
| 36454 | |
| 36455 | if (SDValue WideShuffle = combineX86ShuffleChainWithExtract( |
| 36456 | Inputs, Root, BaseMask, Depth, HasVariableMask, |
| 36457 | AllowVariableCrossLaneMask, AllowVariablePerLaneMask, DAG, Subtarget)) |
| 36458 | return WideShuffle; |
| 36459 | |
| 36460 | |
| 36461 | |
| 36462 | if (!UnaryShuffle && AllowVariablePerLaneMask && !MaskContainsZeros && |
| 36463 | ((Subtarget.hasAVX512() && |
| 36464 | (MaskVT == MVT::v2f64 || MaskVT == MVT::v4f64 || MaskVT == MVT::v8f64 || |
| 36465 | MaskVT == MVT::v2i64 || MaskVT == MVT::v4i64 || MaskVT == MVT::v8i64 || |
| 36466 | MaskVT == MVT::v4f32 || MaskVT == MVT::v4i32 || MaskVT == MVT::v8f32 || |
| 36467 | MaskVT == MVT::v8i32 || MaskVT == MVT::v16f32 || |
| 36468 | MaskVT == MVT::v16i32)) || |
| 36469 | (Subtarget.hasBWI() && AllowBWIVPERMV3 && |
| 36470 | (MaskVT == MVT::v8i16 || MaskVT == MVT::v16i16 || |
| 36471 | MaskVT == MVT::v32i16)) || |
| 36472 | (Subtarget.hasVBMI() && AllowBWIVPERMV3 && |
| 36473 | (MaskVT == MVT::v16i8 || MaskVT == MVT::v32i8 || |
| 36474 | MaskVT == MVT::v64i8)))) { |
| 36475 | V1 = CanonicalizeShuffleInput(MaskVT, V1); |
| 36476 | V2 = CanonicalizeShuffleInput(MaskVT, V2); |
| 36477 | Res = lowerShuffleWithPERMV(DL, MaskVT, Mask, V1, V2, Subtarget, DAG); |
| 36478 | return DAG.getBitcast(RootVT, Res); |
| 36479 | } |
| 36480 | |
| 36481 | |
| 36482 | return SDValue(); |
| 36483 | } |
| 36484 | |
| 36485 | |
| 36486 | |
| 36487 | |
| 36488 | |
| 36489 | |
| 36490 | |
| 36491 | |
| 36492 | |
| 36493 | static SDValue combineX86ShuffleChainWithExtract( |
| 36494 | ArrayRef<SDValue> Inputs, SDValue Root, ArrayRef<int> BaseMask, int Depth, |
| 36495 | bool HasVariableMask, bool AllowVariableCrossLaneMask, |
| 36496 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
| 36497 | const X86Subtarget &Subtarget) { |
| 36498 | unsigned NumMaskElts = BaseMask.size(); |
| 36499 | unsigned NumInputs = Inputs.size(); |
| 36500 | if (NumInputs == 0) |
| 36501 | return SDValue(); |
| 36502 | |
| 36503 | EVT RootVT = Root.getValueType(); |
| 36504 | unsigned RootSizeInBits = RootVT.getSizeInBits(); |
| 36505 | assert((RootSizeInBits % NumMaskElts) == 0 && "Unexpected root shuffle mask"); |
| 36506 | |
| 36507 | SmallVector<SDValue, 4> WideInputs(Inputs.begin(), Inputs.end()); |
| 36508 | SmallVector<unsigned, 4> Offsets(NumInputs, 0); |
| 36509 | |
| 36510 | |
| 36511 | |
| 36512 | unsigned WideSizeInBits = RootSizeInBits; |
| 36513 | for (unsigned i = 0; i != NumInputs; ++i) { |
| 36514 | SDValue &Src = WideInputs[i]; |
| 36515 | unsigned &Offset = Offsets[i]; |
| 36516 | Src = peekThroughBitcasts(Src); |
| 36517 | EVT BaseVT = Src.getValueType(); |
| 36518 | while (Src.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
| 36519 | Offset += Src.getConstantOperandVal(1); |
| 36520 | Src = Src.getOperand(0); |
| 36521 | } |
| 36522 | WideSizeInBits = std::max(WideSizeInBits, |
| 36523 | (unsigned)Src.getValueSizeInBits()); |
| 36524 | assert((Offset % BaseVT.getVectorNumElements()) == 0 && |
| 36525 | "Unexpected subvector extraction"); |
| 36526 | Offset /= BaseVT.getVectorNumElements(); |
| 36527 | Offset *= NumMaskElts; |
| 36528 | } |
| 36529 | |
| 36530 | |
| 36531 | |
| 36532 | if (llvm::all_of(Offsets, [](unsigned Offset) { return Offset == 0; })) |
| 36533 | return SDValue(); |
| 36534 | |
| 36535 | unsigned Scale = WideSizeInBits / RootSizeInBits; |
| 36536 | assert((WideSizeInBits % RootSizeInBits) == 0 && |
| 36537 | "Unexpected subvector extraction"); |
| 36538 | |
| 36539 | |
| 36540 | |
| 36541 | |
| 36542 | EVT WideSVT = WideInputs[0].getValueType().getScalarType(); |
| 36543 | if (llvm::any_of(WideInputs, [&WideSVT, &DAG](SDValue Op) { |
| 36544 | return !DAG.getTargetLoweringInfo().isTypeLegal(Op.getValueType()) || |
| 36545 | Op.getValueType().getScalarType() != WideSVT; |
| 36546 | })) |
| 36547 | return SDValue(); |
| 36548 | |
| 36549 | for (SDValue &NewInput : WideInputs) { |
| 36550 | assert((WideSizeInBits % NewInput.getValueSizeInBits()) == 0 && |
| 36551 | "Shuffle vector size mismatch"); |
| 36552 | if (WideSizeInBits > NewInput.getValueSizeInBits()) |
| 36553 | NewInput = widenSubVector(NewInput, false, Subtarget, DAG, |
| 36554 | SDLoc(NewInput), WideSizeInBits); |
| 36555 | assert(WideSizeInBits == NewInput.getValueSizeInBits() && |
| 36556 | "Unexpected subvector extraction"); |
| 36557 | } |
| 36558 | |
| 36559 | |
| 36560 | for (unsigned i = 1; i != NumInputs; ++i) |
| 36561 | Offsets[i] += i * Scale * NumMaskElts; |
| 36562 | |
| 36563 | SmallVector<int, 64> WideMask(BaseMask.begin(), BaseMask.end()); |
| 36564 | for (int &M : WideMask) { |
| 36565 | if (M < 0) |
| 36566 | continue; |
| 36567 | M = (M % NumMaskElts) + Offsets[M / NumMaskElts]; |
| 36568 | } |
| 36569 | WideMask.append((Scale - 1) * NumMaskElts, SM_SentinelUndef); |
| 36570 | |
| 36571 | |
| 36572 | resolveTargetShuffleInputsAndMask(WideInputs, WideMask); |
| 36573 | assert(!WideInputs.empty() && "Shuffle with no inputs detected"); |
| 36574 | |
| 36575 | if (WideInputs.size() > 2) |
| 36576 | return SDValue(); |
| 36577 | |
| 36578 | |
| 36579 | Depth += count_if(Offsets, [](unsigned Offset) { return Offset > 0; }); |
| 36580 | |
| 36581 | |
| 36582 | |
| 36583 | SDValue WideRoot = WideInputs[0]; |
| 36584 | if (SDValue WideShuffle = |
| 36585 | combineX86ShuffleChain(WideInputs, WideRoot, WideMask, Depth, |
| 36586 | HasVariableMask, AllowVariableCrossLaneMask, |
| 36587 | AllowVariablePerLaneMask, DAG, Subtarget)) { |
| 36588 | WideShuffle = |
| 36589 | extractSubVector(WideShuffle, 0, DAG, SDLoc(Root), RootSizeInBits); |
| 36590 | return DAG.getBitcast(RootVT, WideShuffle); |
| 36591 | } |
| 36592 | return SDValue(); |
| 36593 | } |
| 36594 | |
| 36595 | |
| 36596 | |
| 36597 | static SDValue canonicalizeShuffleMaskWithHorizOp( |
| 36598 | MutableArrayRef<SDValue> Ops, MutableArrayRef<int> Mask, |
| 36599 | unsigned RootSizeInBits, const SDLoc &DL, SelectionDAG &DAG, |
| 36600 | const X86Subtarget &Subtarget) { |
| 36601 | if (Mask.empty() || Ops.empty()) |
| 36602 | return SDValue(); |
| 36603 | |
| 36604 | SmallVector<SDValue> BC; |
| 36605 | for (SDValue Op : Ops) |
| 36606 | BC.push_back(peekThroughBitcasts(Op)); |
| 36607 | |
| 36608 | |
| 36609 | SDValue BC0 = BC[0]; |
| 36610 | EVT VT0 = BC0.getValueType(); |
| 36611 | unsigned Opcode0 = BC0.getOpcode(); |
| 36612 | if (VT0.getSizeInBits() != RootSizeInBits || llvm::any_of(BC, [&](SDValue V) { |
| 36613 | return V.getOpcode() != Opcode0 || V.getValueType() != VT0; |
| 36614 | })) |
| 36615 | return SDValue(); |
| 36616 | |
| 36617 | bool isHoriz = (Opcode0 == X86ISD::FHADD || Opcode0 == X86ISD::HADD || |
| 36618 | Opcode0 == X86ISD::FHSUB || Opcode0 == X86ISD::HSUB); |
| 36619 | bool isPack = (Opcode0 == X86ISD::PACKSS || Opcode0 == X86ISD::PACKUS); |
| 36620 | if (!isHoriz && !isPack) |
| 36621 | return SDValue(); |
| 36622 | |
| 36623 | |
| 36624 | bool OneUseOps = llvm::all_of(Ops, [](SDValue Op) { |
| 36625 | return Op.hasOneUse() && |
| 36626 | peekThroughBitcasts(Op) == peekThroughOneUseBitcasts(Op); |
| 36627 | }); |
| 36628 | |
| 36629 | int NumElts = VT0.getVectorNumElements(); |
| 36630 | int NumLanes = VT0.getSizeInBits() / 128; |
| 36631 | int NumEltsPerLane = NumElts / NumLanes; |
| 36632 | int NumHalfEltsPerLane = NumEltsPerLane / 2; |
| 36633 | MVT SrcVT = BC0.getOperand(0).getSimpleValueType(); |
| 36634 | unsigned EltSizeInBits = RootSizeInBits / Mask.size(); |
| 36635 | |
| 36636 | if (NumEltsPerLane >= 4 && |
| 36637 | (isPack || shouldUseHorizontalOp(Ops.size() == 1, DAG, Subtarget))) { |
| 36638 | SmallVector<int> LaneMask, ScaledMask; |
| 36639 | if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, LaneMask) && |
| 36640 | scaleShuffleElements(LaneMask, 4, ScaledMask)) { |
| 36641 | |
| 36642 | |
| 36643 | |
| 36644 | |
| 36645 | if (isHoriz) { |
| 36646 | |
| 36647 | auto GetHOpSrc = [&](int M) { |
| 36648 | if (M == SM_SentinelUndef) |
| 36649 | return DAG.getUNDEF(VT0); |
| 36650 | if (M == SM_SentinelZero) |
| 36651 | return getZeroVector(VT0.getSimpleVT(), Subtarget, DAG, DL); |
| 36652 | SDValue Src0 = BC[M / 4]; |
| 36653 | SDValue Src1 = Src0.getOperand((M % 4) >= 2); |
| 36654 | if (Src1.getOpcode() == Opcode0 && Src0->isOnlyUserOf(Src1.getNode())) |
| 36655 | return Src1.getOperand(M % 2); |
| 36656 | return SDValue(); |
| 36657 | }; |
| 36658 | SDValue M0 = GetHOpSrc(ScaledMask[0]); |
| 36659 | SDValue M1 = GetHOpSrc(ScaledMask[1]); |
| 36660 | SDValue M2 = GetHOpSrc(ScaledMask[2]); |
| 36661 | SDValue M3 = GetHOpSrc(ScaledMask[3]); |
| 36662 | if (M0 && M1 && M2 && M3) { |
| 36663 | SDValue LHS = DAG.getNode(Opcode0, DL, SrcVT, M0, M1); |
| 36664 | SDValue RHS = DAG.getNode(Opcode0, DL, SrcVT, M2, M3); |
| 36665 | return DAG.getNode(Opcode0, DL, VT0, LHS, RHS); |
| 36666 | } |
| 36667 | } |
| 36668 | |
| 36669 | if (Ops.size() >= 2) { |
| 36670 | SDValue LHS, RHS; |
| 36671 | auto GetHOpSrc = [&](int M, int &OutM) { |
| 36672 | |
| 36673 | if (M < 0) |
| 36674 | return M == SM_SentinelUndef; |
| 36675 | SDValue Src = BC[M / 4].getOperand((M % 4) >= 2); |
| 36676 | if (!LHS || LHS == Src) { |
| 36677 | LHS = Src; |
| 36678 | OutM = (M % 2); |
| 36679 | return true; |
| 36680 | } |
| 36681 | if (!RHS || RHS == Src) { |
| 36682 | RHS = Src; |
| 36683 | OutM = (M % 2) + 2; |
| 36684 | return true; |
| 36685 | } |
| 36686 | return false; |
| 36687 | }; |
| 36688 | int PostMask[4] = {-1, -1, -1, -1}; |
| 36689 | if (GetHOpSrc(ScaledMask[0], PostMask[0]) && |
| 36690 | GetHOpSrc(ScaledMask[1], PostMask[1]) && |
| 36691 | GetHOpSrc(ScaledMask[2], PostMask[2]) && |
| 36692 | GetHOpSrc(ScaledMask[3], PostMask[3])) { |
| 36693 | LHS = DAG.getBitcast(SrcVT, LHS); |
| 36694 | RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS); |
| 36695 | SDValue Res = DAG.getNode(Opcode0, DL, VT0, LHS, RHS); |
| 36696 | |
| 36697 | |
| 36698 | MVT ShuffleVT = MVT::getVectorVT(MVT::f32, RootSizeInBits / 32); |
| 36699 | Res = DAG.getBitcast(ShuffleVT, Res); |
| 36700 | return DAG.getNode(X86ISD::SHUFP, DL, ShuffleVT, Res, Res, |
| 36701 | getV4X86ShuffleImm8ForMask(PostMask, DL, DAG)); |
| 36702 | } |
| 36703 | } |
| 36704 | } |
| 36705 | } |
| 36706 | |
| 36707 | if (2 < Ops.size()) |
| 36708 | return SDValue(); |
| 36709 | |
| 36710 | SDValue BC1 = BC[BC.size() - 1]; |
| 36711 | if (Mask.size() == VT0.getVectorNumElements()) { |
| 36712 | |
| 36713 | |
| 36714 | |
| 36715 | if (Ops.size() == 2) { |
| 36716 | auto ContainsOps = [](SDValue HOp, SDValue Op) { |
| 36717 | return Op == HOp.getOperand(0) || Op == HOp.getOperand(1); |
| 36718 | }; |
| 36719 | |
| 36720 | if (ContainsOps(BC1, BC0.getOperand(0)) && |
| 36721 | ContainsOps(BC1, BC0.getOperand(1))) { |
| 36722 | ShuffleVectorSDNode::commuteMask(Mask); |
| 36723 | std::swap(Ops[0], Ops[1]); |
| 36724 | std::swap(BC0, BC1); |
| 36725 | } |
| 36726 | |
| 36727 | |
| 36728 | if (ContainsOps(BC0, BC1.getOperand(0)) && |
| 36729 | ContainsOps(BC0, BC1.getOperand(1))) { |
| 36730 | for (int &M : Mask) { |
| 36731 | if (M < NumElts) |
| 36732 | continue; |
| 36733 | int SubLane = ((M % NumEltsPerLane) >= NumHalfEltsPerLane) ? 1 : 0; |
| 36734 | M -= NumElts + (SubLane * NumHalfEltsPerLane); |
| 36735 | if (BC1.getOperand(SubLane) != BC0.getOperand(0)) |
| 36736 | M += NumHalfEltsPerLane; |
| 36737 | } |
| 36738 | } |
| 36739 | } |
| 36740 | |
| 36741 | |
| 36742 | for (int i = 0; i != NumElts; ++i) { |
| 36743 | int &M = Mask[i]; |
| 36744 | if (isUndefOrZero(M)) |
| 36745 | continue; |
| 36746 | if (M < NumElts && BC0.getOperand(0) == BC0.getOperand(1) && |
| 36747 | (M % NumEltsPerLane) >= NumHalfEltsPerLane) |
| 36748 | M -= NumHalfEltsPerLane; |
| 36749 | if (NumElts <= M && BC1.getOperand(0) == BC1.getOperand(1) && |
| 36750 | (M % NumEltsPerLane) >= NumHalfEltsPerLane) |
| 36751 | M -= NumHalfEltsPerLane; |
| 36752 | } |
| 36753 | } |
| 36754 | |
| 36755 | |
| 36756 | |
| 36757 | |
| 36758 | SmallVector<int, 16> TargetMask128, WideMask128; |
| 36759 | if (isRepeatedTargetShuffleMask(128, EltSizeInBits, Mask, TargetMask128) && |
| 36760 | scaleShuffleElements(TargetMask128, 2, WideMask128)) { |
| 36761 | assert(isUndefOrZeroOrInRange(WideMask128, 0, 4) && "Illegal shuffle"); |
| 36762 | bool SingleOp = (Ops.size() == 1); |
| 36763 | if (isPack || OneUseOps || |
| 36764 | shouldUseHorizontalOp(SingleOp, DAG, Subtarget)) { |
| 36765 | SDValue Lo = isInRange(WideMask128[0], 0, 2) ? BC0 : BC1; |
| 36766 | SDValue Hi = isInRange(WideMask128[1], 0, 2) ? BC0 : BC1; |
| 36767 | Lo = Lo.getOperand(WideMask128[0] & 1); |
| 36768 | Hi = Hi.getOperand(WideMask128[1] & 1); |
| 36769 | if (SingleOp) { |
| 36770 | SDValue Undef = DAG.getUNDEF(SrcVT); |
| 36771 | SDValue Zero = getZeroVector(SrcVT, Subtarget, DAG, DL); |
| 36772 | Lo = (WideMask128[0] == SM_SentinelZero ? Zero : Lo); |
| 36773 | Hi = (WideMask128[1] == SM_SentinelZero ? Zero : Hi); |
| 36774 | Lo = (WideMask128[0] == SM_SentinelUndef ? Undef : Lo); |
| 36775 | Hi = (WideMask128[1] == SM_SentinelUndef ? Undef : Hi); |
| 36776 | } |
| 36777 | return DAG.getNode(Opcode0, DL, VT0, Lo, Hi); |
| 36778 | } |
| 36779 | } |
| 36780 | |
| 36781 | return SDValue(); |
| 36782 | } |
| 36783 | |
| 36784 | |
| 36785 | |
| 36786 | |
| 36787 | static SDValue combineX86ShufflesConstants(ArrayRef<SDValue> Ops, |
| 36788 | ArrayRef<int> Mask, SDValue Root, |
| 36789 | bool HasVariableMask, |
| 36790 | SelectionDAG &DAG, |
| 36791 | const X86Subtarget &Subtarget) { |
| 36792 | MVT VT = Root.getSimpleValueType(); |
| 36793 | |
| 36794 | unsigned SizeInBits = VT.getSizeInBits(); |
| 36795 | unsigned NumMaskElts = Mask.size(); |
| 36796 | unsigned MaskSizeInBits = SizeInBits / NumMaskElts; |
| 36797 | unsigned NumOps = Ops.size(); |
| 36798 | |
| 36799 | |
| 36800 | bool OneUseConstantOp = false; |
| 36801 | SmallVector<APInt, 16> UndefEltsOps(NumOps); |
| 36802 | SmallVector<SmallVector<APInt, 16>, 16> RawBitsOps(NumOps); |
| 36803 | for (unsigned i = 0; i != NumOps; ++i) { |
| 36804 | SDValue SrcOp = Ops[i]; |
| 36805 | OneUseConstantOp |= SrcOp.hasOneUse(); |
| 36806 | if (!getTargetConstantBitsFromNode(SrcOp, MaskSizeInBits, UndefEltsOps[i], |
| 36807 | RawBitsOps[i])) |
| 36808 | return SDValue(); |
| 36809 | } |
| 36810 | |
| 36811 | |
| 36812 | |
| 36813 | |
| 36814 | if (!OneUseConstantOp && !HasVariableMask) |
| 36815 | return SDValue(); |
| 36816 | |
| 36817 | |
| 36818 | SDLoc DL(Root); |
| 36819 | APInt UndefElts(NumMaskElts, 0); |
| 36820 | APInt ZeroElts(NumMaskElts, 0); |
| 36821 | APInt ConstantElts(NumMaskElts, 0); |
| 36822 | SmallVector<APInt, 8> ConstantBitData(NumMaskElts, |
| 36823 | APInt::getNullValue(MaskSizeInBits)); |
| 36824 | for (unsigned i = 0; i != NumMaskElts; ++i) { |
| 36825 | int M = Mask[i]; |
| 36826 | if (M == SM_SentinelUndef) { |
| 36827 | UndefElts.setBit(i); |
| 36828 | continue; |
| 36829 | } else if (M == SM_SentinelZero) { |
| 36830 | ZeroElts.setBit(i); |
| 36831 | continue; |
| 36832 | } |
| 36833 | assert(0 <= M && M < (int)(NumMaskElts * NumOps)); |
| 36834 | |
| 36835 | unsigned SrcOpIdx = (unsigned)M / NumMaskElts; |
| 36836 | unsigned SrcMaskIdx = (unsigned)M % NumMaskElts; |
| 36837 | |
| 36838 | auto &SrcUndefElts = UndefEltsOps[SrcOpIdx]; |
| 36839 | if (SrcUndefElts[SrcMaskIdx]) { |
| 36840 | UndefElts.setBit(i); |
| 36841 | continue; |
| 36842 | } |
| 36843 | |
| 36844 | auto &SrcEltBits = RawBitsOps[SrcOpIdx]; |
| 36845 | APInt &Bits = SrcEltBits[SrcMaskIdx]; |
| 36846 | if (!Bits) { |
| 36847 | ZeroElts.setBit(i); |
| 36848 | continue; |
| 36849 | } |
| 36850 | |
| 36851 | ConstantElts.setBit(i); |
| 36852 | ConstantBitData[i] = Bits; |
| 36853 | } |
| 36854 | assert((UndefElts | ZeroElts | ConstantElts).isAllOnesValue()); |
| 36855 | |
| 36856 | |
| 36857 | if ((UndefElts | ZeroElts).isAllOnesValue()) |
| 36858 | return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, DL); |
| 36859 | |
| 36860 | |
| 36861 | MVT MaskSVT; |
| 36862 | if (VT.isFloatingPoint() && (MaskSizeInBits == 32 || MaskSizeInBits == 64)) |
| 36863 | MaskSVT = MVT::getFloatingPointVT(MaskSizeInBits); |
| 36864 | else |
| 36865 | MaskSVT = MVT::getIntegerVT(MaskSizeInBits); |
| 36866 | |
| 36867 | MVT MaskVT = MVT::getVectorVT(MaskSVT, NumMaskElts); |
| 36868 | if (!DAG.getTargetLoweringInfo().isTypeLegal(MaskVT)) |
| 36869 | return SDValue(); |
| 36870 | |
| 36871 | SDValue CstOp = getConstVector(ConstantBitData, UndefElts, MaskVT, DAG, DL); |
| 36872 | return DAG.getBitcast(VT, CstOp); |
| 36873 | } |
| 36874 | |
| 36875 | namespace llvm { |
| 36876 | namespace X86 { |
| 36877 | enum { |
| 36878 | MaxShuffleCombineDepth = 8 |
| 36879 | }; |
| 36880 | } |
| 36881 | } |
| 36882 | |
| 36883 | |
| 36884 | |
| 36885 | |
| 36886 | |
| 36887 | |
| 36888 | |
| 36889 | |
| 36890 | |
| 36891 | |
| 36892 | |
| 36893 | |
| 36894 | |
| 36895 | |
| 36896 | |
| 36897 | |
| 36898 | |
| 36899 | |
| 36900 | |
| 36901 | |
| 36902 | |
| 36903 | |
| 36904 | |
| 36905 | |
| 36906 | |
| 36907 | |
| 36908 | |
| 36909 | |
| 36910 | |
| 36911 | |
| 36912 | static SDValue combineX86ShufflesRecursively( |
| 36913 | ArrayRef<SDValue> SrcOps, int SrcOpIndex, SDValue Root, |
| 36914 | ArrayRef<int> RootMask, ArrayRef<const SDNode *> SrcNodes, unsigned Depth, |
| 36915 | unsigned MaxDepth, bool HasVariableMask, bool AllowVariableCrossLaneMask, |
| 36916 | bool AllowVariablePerLaneMask, SelectionDAG &DAG, |
| 36917 | const X86Subtarget &Subtarget) { |
| 36918 | assert(RootMask.size() > 0 && |
| 36919 | (RootMask.size() > 1 || (RootMask[0] == 0 && SrcOpIndex == 0)) && |
| 36920 | "Illegal shuffle root mask"); |
| 36921 | assert(Root.getSimpleValueType().isVector() && |
| 36922 | "Shuffles operate on vector types!"); |
| 36923 | unsigned RootSizeInBits = Root.getSimpleValueType().getSizeInBits(); |
| 36924 | |
| 36925 | |
| 36926 | |
| 36927 | if (Depth >= MaxDepth) |
| 1 | Assuming 'Depth' is < 'MaxDepth' | |
|
| |
| 36928 | return SDValue(); |
| 36929 | |
| 36930 | |
| 36931 | SDValue Op = SrcOps[SrcOpIndex]; |
| 36932 | Op = peekThroughOneUseBitcasts(Op); |
| 36933 | |
| 36934 | EVT VT = Op.getValueType(); |
| 36935 | if (!VT.isVector() || !VT.isSimple()) |
| |
| 11 | | Returning from 'EVT::isVector' | |
|
| 12 | | Calling 'EVT::isSimple' | |
|
| 14 | | Returning from 'EVT::isSimple' | |
|
| |
| 36936 | return SDValue(); |
| 36937 | |
| 36938 | assert((RootSizeInBits % VT.getSizeInBits()) == 0 && |
| 36939 | "Can only combine shuffles upto size of the root op."); |
| 36940 | |
| 36941 | |
| 36942 | |
| 36943 | SmallVector<int, 64> OpMask; |
| 36944 | SmallVector<SDValue, 2> OpInputs; |
| 36945 | APInt OpUndef, OpZero; |
| 36946 | APInt OpDemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
| 36947 | bool IsOpVariableMask = isTargetShuffleVariableMask(Op.getOpcode()); |
| 36948 | if (!getTargetShuffleInputs(Op, OpDemandedElts, OpInputs, OpMask, OpUndef, |
| 16 | | Calling 'getTargetShuffleInputs' | |
|
| 32 | | Returning from 'getTargetShuffleInputs' | |
|
| |
| 36949 | OpZero, DAG, Depth, false)) |
| 36950 | return SDValue(); |
| 36951 | |
| 36952 | |
| 36953 | |
| 36954 | if (llvm::any_of(OpInputs, [VT](SDValue OpInput) { |
| 34 | | Calling 'any_of<llvm::SmallVector<llvm::SDValue, 2> &, (lambda at /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelLowering.cpp:36954:30)>' | |
|
| 41 | | Returning from 'any_of<llvm::SmallVector<llvm::SDValue, 2> &, (lambda at /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86/X86ISelLowering.cpp:36954:30)>' | |
|
| |
| 36955 | return OpInput.getValueSizeInBits() > VT.getSizeInBits(); |
| 36956 | })) |
| 36957 | return SDValue(); |
| 36958 | |
| 36959 | |
| 36960 | |
| 36961 | if (RootSizeInBits > VT.getSizeInBits()) { |
| 43 | | Assuming the condition is false | |
|
| |
| 36962 | unsigned NumSubVecs = RootSizeInBits / VT.getSizeInBits(); |
| 36963 | unsigned OpMaskSize = OpMask.size(); |
| 36964 | if (OpInputs.size() > 1) { |
| 36965 | unsigned PaddedMaskSize = NumSubVecs * OpMaskSize; |
| 36966 | for (int &M : OpMask) { |
| 36967 | if (M < 0) |
| 36968 | continue; |
| 36969 | int EltIdx = M % OpMaskSize; |
| 36970 | int OpIdx = M / OpMaskSize; |
| 36971 | M = (PaddedMaskSize * OpIdx) + EltIdx; |
| 36972 | } |
| 36973 | } |
| 36974 | OpZero = OpZero.zext(NumSubVecs * OpMaskSize); |
| 36975 | OpUndef = OpUndef.zext(NumSubVecs * OpMaskSize); |
| 36976 | OpMask.append((NumSubVecs - 1) * OpMaskSize, SM_SentinelUndef); |
| 36977 | } |
| 36978 | |
| 36979 | SmallVector<int, 64> Mask; |
| 36980 | SmallVector<SDValue, 16> Ops; |
| 36981 | |
| 36982 | |
| 36983 | bool EmptyRoot = (Depth == 0) && (RootMask.size() == 1); |
| 45 | | Assuming 'Depth' is not equal to 0 | |
|
| 36984 | if (EmptyRoot) { |
| |
| 36985 | |
| 36986 | |
| 36987 | bool ResolveKnownZeros = true; |
| 36988 | if (!OpZero.isNullValue()) { |
| 36989 | APInt UsedInputs = APInt::getNullValue(OpInputs.size()); |
| 36990 | for (int i = 0, e = OpMask.size(); i != e; ++i) { |
| 36991 | int M = OpMask[i]; |
| 36992 | if (OpUndef[i] || OpZero[i] || isUndefOrZero(M)) |
| 36993 | continue; |
| 36994 | UsedInputs.setBit(M / OpMask.size()); |
| 36995 | if (UsedInputs.isAllOnesValue()) { |
| 36996 | ResolveKnownZeros = false; |
| 36997 | break; |
| 36998 | } |
| 36999 | } |
| 37000 | } |
| 37001 | resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero, |
| 37002 | ResolveKnownZeros); |
| 37003 | |
| 37004 | Mask = OpMask; |
| 37005 | Ops.append(OpInputs.begin(), OpInputs.end()); |
| 37006 | } else { |
| 37007 | resolveTargetShuffleFromZeroables(OpMask, OpUndef, OpZero); |
| 37008 | |
| 37009 | |
| 37010 | Ops.append(SrcOps.begin(), SrcOps.end()); |
| 37011 | |
| 37012 | auto AddOp = [&Ops](SDValue Input, int InsertionPoint) -> int { |
| 37013 | |
| 37014 | SDValue InputBC = peekThroughBitcasts(Input); |
| 37015 | for (int i = 0, e = Ops.size(); i < e; ++i) |
| 37016 | if (InputBC == peekThroughBitcasts(Ops[i])) |
| 37017 | return i; |
| 37018 | |
| 37019 | if (InsertionPoint >= 0) { |
| 37020 | Ops[InsertionPoint] = Input; |
| 37021 | return InsertionPoint; |
| 37022 | } |
| 37023 | |
| 37024 | Ops.push_back(Input); |
| 37025 | return Ops.size() - 1; |
| 37026 | }; |
| 37027 | |
| 37028 | SmallVector<int, 2> OpInputIdx; |
| 37029 | for (SDValue OpInput : OpInputs) |
| 47 | | Assuming '__begin2' is equal to '__end2' | |
|
| 37030 | OpInputIdx.push_back( |
| 37031 | AddOp(OpInput, OpInputIdx.empty() ? SrcOpIndex : -1)); |
| 37032 | |
| 37033 | assert(((RootMask.size() > OpMask.size() && |
| 37034 | RootMask.size() % OpMask.size() == 0) || |
| 37035 | (OpMask.size() > RootMask.size() && |
| 37036 | OpMask.size() % RootMask.size() == 0) || |
| 37037 | OpMask.size() == RootMask.size()) && |
| 37038 | "The smaller number of elements must divide the larger."); |
| 37039 | |
| 37040 | |
| 37041 | |
| 37042 | |
| 37043 | assert(isPowerOf2_32(RootMask.size()) && |
| 37044 | "Non-power-of-2 shuffle mask sizes"); |
| 37045 | assert(isPowerOf2_32(OpMask.size()) && "Non-power-of-2 shuffle mask sizes"); |
| 37046 | unsigned RootMaskSizeLog2 = countTrailingZeros(RootMask.size()); |
| 48 | | Calling 'countTrailingZeros<unsigned long>' | |
|
| 55 | | Returning from 'countTrailingZeros<unsigned long>' | |
|
| 56 | | 'RootMaskSizeLog2' initialized to 64 | |
|
| 37047 | unsigned OpMaskSizeLog2 = countTrailingZeros(OpMask.size()); |
| 37048 | |
| 37049 | unsigned MaskWidth = std::max<unsigned>(OpMask.size(), RootMask.size()); |
| 37050 | unsigned RootRatio = |
| 37051 | std::max<unsigned>(1, OpMask.size() >> RootMaskSizeLog2); |
| 57 | | The result of the right shift is undefined due to shifting by '64', which is greater or equal to the width of type 'size_t' |
|
| 37052 | unsigned OpRatio = std::max<unsigned>(1, RootMask.size() >> OpMaskSizeLog2); |
| 37053 | assert((RootRatio == 1 || OpRatio == 1) && |
| 37054 | "Must not have a ratio for both incoming and op masks!"); |
| 37055 | |
| 37056 | assert(isPowerOf2_32(MaskWidth) && "Non-power-of-2 shuffle mask sizes"); |
| 37057 | assert(isPowerOf2_32(RootRatio) && "Non-power-of-2 shuffle mask sizes"); |
| 37058 | assert(isPowerOf2_32(OpRatio) && "Non-power-of-2 shuffle mask sizes"); |
| 37059 | unsigned RootRatioLog2 = countTrailingZeros(RootRatio); |
| 37060 | unsigned OpRatioLog2 = countTrailingZeros(OpRatio); |
| 37061 | |
| 37062 | Mask.resize(MaskWidth, SM_SentinelUndef); |
| 37063 | |
| 37064 | |
| 37065 | |
| 37066 | |
| 37067 | |
| 37068 | for (unsigned i = 0; i < MaskWidth; ++i) { |
| 37069 | unsigned RootIdx = i >> RootRatioLog2; |
| 37070 | if (RootMask[RootIdx] < 0) { |
| 37071 | |
| 37072 | Mask[i] = RootMask[RootIdx]; |
| 37073 | continue; |
| 37074 | } |
| 37075 | |
| 37076 | unsigned RootMaskedIdx = |
| 37077 | RootRatio == 1 |
| 37078 | ? RootMask[RootIdx] |
| 37079 | : (RootMask[RootIdx] << RootRatioLog2) + (i & (RootRatio - 1)); |
| 37080 | |
| 37081 | |
| 37082 | |
| 37083 | if ((RootMaskedIdx < (SrcOpIndex * MaskWidth)) || |
| 37084 | (((SrcOpIndex + 1) * MaskWidth) <= RootMaskedIdx)) { |
| 37085 | Mask[i] = RootMaskedIdx; |
| 37086 | continue; |
| 37087 | } |
| 37088 | |
| 37089 | RootMaskedIdx = RootMaskedIdx & (MaskWidth - 1); |
| 37090 | unsigned OpIdx = RootMaskedIdx >> OpRatioLog2; |
| 37091 | if (OpMask[OpIdx] < 0) { |
| 37092 | |
| 37093 | |
| 37094 | Mask[i] = OpMask[OpIdx]; |
| 37095 | continue; |
| 37096 | } |
| 37097 | |
| 37098 | |
| 37099 | unsigned OpMaskedIdx = OpRatio == 1 ? OpMask[OpIdx] |
| 37100 | : (OpMask[OpIdx] << OpRatioLog2) + |
| 37101 | (RootMaskedIdx & (OpRatio - 1)); |
| 37102 | |
| 37103 | OpMaskedIdx = OpMaskedIdx & (MaskWidth - 1); |
| 37104 | int InputIdx = OpMask[OpIdx] / (int)OpMask.size(); |
| 37105 | assert(0 <= OpInputIdx[InputIdx] && "Unknown target shuffle input"); |
| 37106 | OpMaskedIdx += OpInputIdx[InputIdx] * MaskWidth; |
| 37107 | |
| 37108 | Mask[i] = OpMaskedIdx; |
| 37109 | } |
| 37110 | } |
| 37111 | |
| 37112 | |
| 37113 | resolveTargetShuffleInputsAndMask(Ops, Mask); |
| 37114 | |
| 37115 | |
| 37116 | if (all_of(Mask, [](int Idx) { return Idx == SM_SentinelUndef; })) |
| 37117 | return DAG.getUNDEF(Root.getValueType()); |
| 37118 | if (all_of(Mask, [](int Idx) { return Idx < 0; })) |
| 37119 | return getZeroVector(Root.getSimpleValueType(), Subtarget, DAG, |
| 37120 | SDLoc(Root)); |
| 37121 | if (Ops.size() == 1 && ISD::isBuildVectorAllOnes(Ops[0].getNode()) && |
| 37122 | none_of(Mask, [](int M) { return M == SM_SentinelZero; })) |
| 37123 | return getOnesVector(Root.getValueType(), DAG, SDLoc(Root)); |
| 37124 | |
| 37125 | assert(!Ops.empty() && "Shuffle with no inputs detected"); |
| 37126 | HasVariableMask |= IsOpVariableMask; |
| 37127 | |
| 37128 | |
| 37129 | SmallVector<const SDNode *, 16> CombinedNodes(SrcNodes.begin(), |
| 37130 | SrcNodes.end()); |
| 37131 | CombinedNodes.push_back(Op.getNode()); |
| 37132 | |
| 37133 | |
| 37134 | |
| 37135 | |
| 37136 | |
| 37137 | |
| 37138 | |
| 37139 | |
| 37140 | if (Ops.size() < (MaxDepth - Depth)) { |
| 37141 | for (int i = 0, e = Ops.size(); i < e; ++i) { |
| 37142 | |
| 37143 | |
| 37144 | SmallVector<int, 64> ResolvedMask = Mask; |
| 37145 | if (EmptyRoot) |
| 37146 | resolveTargetShuffleFromZeroables(ResolvedMask, OpUndef, OpZero); |
| 37147 | bool AllowCrossLaneVar = false; |
| 37148 | bool AllowPerLaneVar = false; |
| 37149 | if (Ops[i].getNode()->hasOneUse() || |
| 37150 | SDNode::areOnlyUsersOf(CombinedNodes, Ops[i].getNode())) { |
| 37151 | AllowCrossLaneVar = AllowVariableCrossLaneMask; |
| 37152 | AllowPerLaneVar = AllowVariablePerLaneMask; |
| 37153 | } |
| 37154 | if (SDValue Res = combineX86ShufflesRecursively( |
| 37155 | Ops, i, Root, ResolvedMask, CombinedNodes, Depth + 1, MaxDepth, |
| 37156 | HasVariableMask, AllowCrossLaneVar, AllowPerLaneVar, DAG, |
| 37157 | Subtarget)) |
| 37158 | return Res; |
| 37159 | } |
| 37160 | } |
| 37161 | |
| 37162 | |
| 37163 | if (SDValue Cst = combineX86ShufflesConstants( |
| 37164 | Ops, Mask, Root, HasVariableMask, DAG, Subtarget)) |
| 37165 | return Cst; |
| 37166 | |
| 37167 | |
| 37168 | |
| 37169 | if (Depth == 0 && llvm::all_of(Ops, [&](SDValue Op) { |
| 37170 | APInt UndefElts; |
| 37171 | SmallVector<APInt> RawBits; |
| 37172 | unsigned EltSizeInBits = RootSizeInBits / Mask.size(); |
| 37173 | return getTargetConstantBitsFromNode(Op, EltSizeInBits, UndefElts, |
| 37174 | RawBits); |
| 37175 | })) { |
| 37176 | return SDValue(); |
| 37177 | } |
| 37178 | |
| 37179 | |
| 37180 | |
| 37181 | if (SDValue HOp = canonicalizeShuffleMaskWithHorizOp( |
| 37182 | Ops, Mask, RootSizeInBits, SDLoc(Root), DAG, Subtarget)) |
| 37183 | return DAG.getBitcast(Root.getValueType(), HOp); |
| 37184 | |
| 37185 | |
| 37186 | if (any_of(Ops, [RootSizeInBits](SDValue Op) { |
| 37187 | return Op.getValueSizeInBits() < RootSizeInBits; |
| 37188 | })) { |
| 37189 | for (SDValue &Op : Ops) |
| 37190 | if (Op.getValueSizeInBits() < RootSizeInBits) |
| 37191 | Op = widenSubVector(Op, false, Subtarget, DAG, SDLoc(Op), |
| 37192 | RootSizeInBits); |
| 37193 | |
| 37194 | resolveTargetShuffleInputsAndMask(Ops, Mask); |
| 37195 | } |
| 37196 | |
| 37197 | |
| 37198 | if (Ops.size() <= 2) { |
| 37199 | |
| 37200 | |
| 37201 | |
| 37202 | |
| 37203 | |
| 37204 | while (Mask.size() > 1) { |
| 37205 | SmallVector<int, 64> WidenedMask; |
| 37206 | if (!canWidenShuffleElements(Mask, WidenedMask)) |
| 37207 | break; |
| 37208 | Mask = std::move(WidenedMask); |
| 37209 | } |
| 37210 | |
| 37211 | |
| 37212 | |
| 37213 | if (Ops.size() == 2 && canonicalizeShuffleMaskWithCommute(Mask)) { |
| 37214 | ShuffleVectorSDNode::commuteMask(Mask); |
| 37215 | std::swap(Ops[0], Ops[1]); |
| 37216 | } |
| 37217 | |
| 37218 | |
| 37219 | return combineX86ShuffleChain(Ops, Root, Mask, Depth, HasVariableMask, |
| 37220 | AllowVariableCrossLaneMask, |
| 37221 | AllowVariablePerLaneMask, DAG, Subtarget); |
| 37222 | } |
| 37223 | |
| 37224 | |
| 37225 | |
| 37226 | return combineX86ShuffleChainWithExtract( |
| 37227 | Ops, Root, Mask, Depth, HasVariableMask, AllowVariableCrossLaneMask, |
| 37228 | AllowVariablePerLaneMask, DAG, Subtarget); |
| 37229 | } |
| 37230 | |
| 37231 | |
| 37232 | static SDValue combineX86ShufflesRecursively(SDValue Op, SelectionDAG &DAG, |
| 37233 | const X86Subtarget &Subtarget) { |
| 37234 | return combineX86ShufflesRecursively( |
| 37235 | {Op}, 0, Op, {0}, {}, 0, X86::MaxShuffleCombineDepth, |
| 37236 | false, |
| 37237 | true, true, DAG, |
| 37238 | Subtarget); |
| 37239 | } |
| 37240 | |
| 37241 | |
| 37242 | |
| 37243 | |
| 37244 | |
| 37245 | static SmallVector<int, 4> getPSHUFShuffleMask(SDValue N) { |
| 37246 | MVT VT = N.getSimpleValueType(); |
| 37247 | SmallVector<int, 4> Mask; |
| 37248 | SmallVector<SDValue, 2> Ops; |
| 37249 | bool HaveMask = |
| 37250 | getTargetShuffleMask(N.getNode(), VT, false, Ops, Mask); |
| 37251 | (void)HaveMask; |
| 37252 | assert(HaveMask); |
| 37253 | |
| 37254 | |
| 37255 | |
| 37256 | if (VT.getSizeInBits() > 128) { |
| 37257 | int LaneElts = 128 / VT.getScalarSizeInBits(); |
| 37258 | #ifndef NDEBUG |
| 37259 | for (int i = 1, NumLanes = VT.getSizeInBits() / 128; i < NumLanes; ++i) |
| 37260 | for (int j = 0; j < LaneElts; ++j) |
| 37261 | assert(Mask[j] == Mask[i * LaneElts + j] - (LaneElts * i) && |
| 37262 | "Mask doesn't repeat in high 128-bit lanes!"); |
| 37263 | #endif |
| 37264 | Mask.resize(LaneElts); |
| 37265 | } |
| 37266 | |
| 37267 | switch (N.getOpcode()) { |
| 37268 | case X86ISD::PSHUFD: |
| 37269 | return Mask; |
| 37270 | case X86ISD::PSHUFLW: |
| 37271 | Mask.resize(4); |
| 37272 | return Mask; |
| 37273 | case X86ISD::PSHUFHW: |
| 37274 | Mask.erase(Mask.begin(), Mask.begin() + 4); |
| 37275 | for (int &M : Mask) |
| 37276 | M -= 4; |
| 37277 | return Mask; |
| 37278 | default: |
| 37279 | llvm_unreachable("No valid shuffle instruction found!"); |
| 37280 | } |
| 37281 | } |
| 37282 | |
| 37283 | |
| 37284 | |
| 37285 | |
| 37286 | |
| 37287 | |
| 37288 | static SDValue |
| 37289 | combineRedundantDWordShuffle(SDValue N, MutableArrayRef<int> Mask, |
| 37290 | SelectionDAG &DAG) { |
| 37291 | assert(N.getOpcode() == X86ISD::PSHUFD && |
| 37292 | "Called with something other than an x86 128-bit half shuffle!"); |
| 37293 | SDLoc DL(N); |
| 37294 | |
| 37295 | |
| 37296 | |
| 37297 | |
| 37298 | SmallVector<SDValue, 8> Chain; |
| 37299 | SDValue V = N.getOperand(0); |
| 37300 | for (; V.hasOneUse(); V = V.getOperand(0)) { |
| 37301 | switch (V.getOpcode()) { |
| 37302 | default: |
| 37303 | return SDValue(); |
| 37304 | |
| 37305 | case ISD::BITCAST: |
| 37306 | |
| 37307 | |
| 37308 | continue; |
| 37309 | |
| 37310 | case X86ISD::PSHUFD: |
| 37311 | |
| 37312 | break; |
| 37313 | |
| 37314 | case X86ISD::PSHUFLW: |
| 37315 | |
| 37316 | |
| 37317 | if (Mask[0] != 0 || Mask[1] != 1 || |
| 37318 | !(Mask[2] >= 2 && Mask[2] < 4 && Mask[3] >= 2 && Mask[3] < 4)) |
| 37319 | return SDValue(); |
| 37320 | |
| 37321 | Chain.push_back(V); |
| 37322 | continue; |
| 37323 | |
| 37324 | case X86ISD::PSHUFHW: |
| 37325 | |
| 37326 | |
| 37327 | if (Mask[2] != 2 || Mask[3] != 3 || |
| 37328 | !(Mask[0] >= 0 && Mask[0] < 2 && Mask[1] >= 0 && Mask[1] < 2)) |
| 37329 | return SDValue(); |
| 37330 | |
| 37331 | Chain.push_back(V); |
| 37332 | continue; |
| 37333 | |
| 37334 | case X86ISD::UNPCKL: |
| 37335 | case X86ISD::UNPCKH: |
| 37336 | |
| 37337 | |
| 37338 | if (V.getSimpleValueType().getVectorElementType() != MVT::i8 && |
| 37339 | V.getSimpleValueType().getVectorElementType() != MVT::i16) |
| 37340 | return SDValue(); |
| 37341 | |
| 37342 | |
| 37343 | unsigned CombineOp = |
| 37344 | V.getOpcode() == X86ISD::UNPCKL ? X86ISD::PSHUFLW : X86ISD::PSHUFHW; |
| 37345 | if (V.getOperand(0) != V.getOperand(1) || |
| 37346 | !V->isOnlyUserOf(V.getOperand(0).getNode())) |
| 37347 | return SDValue(); |
| 37348 | Chain.push_back(V); |
| 37349 | V = V.getOperand(0); |
| 37350 | do { |
| 37351 | switch (V.getOpcode()) { |
| 37352 | default: |
| 37353 | return SDValue(); |
| 37354 | |
| 37355 | case X86ISD::PSHUFLW: |
| 37356 | case X86ISD::PSHUFHW: |
| 37357 | if (V.getOpcode() == CombineOp) |
| 37358 | break; |
| 37359 | |
| 37360 | Chain.push_back(V); |
| 37361 | |
| 37362 | LLVM_FALLTHROUGH; |
| 37363 | case ISD::BITCAST: |
| 37364 | V = V.getOperand(0); |
| 37365 | continue; |
| 37366 | } |
| 37367 | break; |
| 37368 | } while (V.hasOneUse()); |
| 37369 | break; |
| 37370 | } |
| 37371 | |
| 37372 | break; |
| 37373 | } |
| 37374 | |
| 37375 | if (!V.hasOneUse()) |
| 37376 | |
| 37377 | return SDValue(); |
| 37378 | |
| 37379 | |
| 37380 | SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); |
| 37381 | for (int &M : Mask) |
| 37382 | M = VMask[M]; |
| 37383 | V = DAG.getNode(V.getOpcode(), DL, V.getValueType(), V.getOperand(0), |
| 37384 | getV4X86ShuffleImm8ForMask(Mask, DL, DAG)); |
| 37385 | |
| 37386 | |
| 37387 | while (!Chain.empty()) { |
| 37388 | SDValue W = Chain.pop_back_val(); |
| 37389 | |
| 37390 | if (V.getValueType() != W.getOperand(0).getValueType()) |
| 37391 | V = DAG.getBitcast(W.getOperand(0).getValueType(), V); |
| 37392 | |
| 37393 | switch (W.getOpcode()) { |
| 37394 | default: |
| 37395 | llvm_unreachable("Only PSHUF and UNPCK instructions get here!"); |
| 37396 | |
| 37397 | case X86ISD::UNPCKL: |
| 37398 | case X86ISD::UNPCKH: |
| 37399 | V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, V); |
| 37400 | break; |
| 37401 | |
| 37402 | case X86ISD::PSHUFD: |
| 37403 | case X86ISD::PSHUFLW: |
| 37404 | case X86ISD::PSHUFHW: |
| 37405 | V = DAG.getNode(W.getOpcode(), DL, W.getValueType(), V, W.getOperand(1)); |
| 37406 | break; |
| 37407 | } |
| 37408 | } |
| 37409 | if (V.getValueType() != N.getValueType()) |
| 37410 | V = DAG.getBitcast(N.getValueType(), V); |
| 37411 | |
| 37412 | |
| 37413 | return V; |
| 37414 | } |
| 37415 | |
| 37416 | |
| 37417 | |
| 37418 | static SDValue combineCommutableSHUFP(SDValue N, MVT VT, const SDLoc &DL, |
| 37419 | SelectionDAG &DAG) { |
| 37420 | |
| 37421 | if (VT != MVT::v4f32 && VT != MVT::v8f32 && VT != MVT::v16f32) |
| 37422 | return SDValue(); |
| 37423 | |
| 37424 | |
| 37425 | auto commuteSHUFP = [&VT, &DL, &DAG](SDValue Parent, SDValue V) { |
| 37426 | if (V.getOpcode() != X86ISD::SHUFP || !Parent->isOnlyUserOf(V.getNode())) |
| 37427 | return SDValue(); |
| 37428 | SDValue N0 = V.getOperand(0); |
| 37429 | SDValue N1 = V.getOperand(1); |
| 37430 | unsigned Imm = V.getConstantOperandVal(2); |
| 37431 | if (!MayFoldLoad(peekThroughOneUseBitcasts(N0)) || |
| 37432 | MayFoldLoad(peekThroughOneUseBitcasts(N1))) |
| 37433 | return SDValue(); |
| 37434 | Imm = ((Imm & 0x0F) << 4) | ((Imm & 0xF0) >> 4); |
| 37435 | return DAG.getNode(X86ISD::SHUFP, DL, VT, N1, N0, |
| 37436 | DAG.getTargetConstant(Imm, DL, MVT::i8)); |
| 37437 | }; |
| 37438 | |
| 37439 | switch (N.getOpcode()) { |
| 37440 | case X86ISD::VPERMILPI: |
| 37441 | if (SDValue NewSHUFP = commuteSHUFP(N, N.getOperand(0))) { |
| 37442 | unsigned Imm = N.getConstantOperandVal(1); |
| 37443 | return DAG.getNode(X86ISD::VPERMILPI, DL, VT, NewSHUFP, |
| 37444 | DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8)); |
| 37445 | } |
| 37446 | break; |
| 37447 | case X86ISD::SHUFP: { |
| 37448 | SDValue N0 = N.getOperand(0); |
| 37449 | SDValue N1 = N.getOperand(1); |
| 37450 | unsigned Imm = N.getConstantOperandVal(2); |
| 37451 | if (N0 == N1) { |
| 37452 | if (SDValue NewSHUFP = commuteSHUFP(N, N0)) |
| 37453 | return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, NewSHUFP, |
| 37454 | DAG.getTargetConstant(Imm ^ 0xAA, DL, MVT::i8)); |
| 37455 | } else if (SDValue NewSHUFP = commuteSHUFP(N, N0)) { |
| 37456 | return DAG.getNode(X86ISD::SHUFP, DL, VT, NewSHUFP, N1, |
| 37457 | DAG.getTargetConstant(Imm ^ 0x0A, DL, MVT::i8)); |
| 37458 | } else if (SDValue NewSHUFP = commuteSHUFP(N, N1)) { |
| 37459 | return DAG.getNode(X86ISD::SHUFP, DL, VT, N0, NewSHUFP, |
| 37460 | DAG.getTargetConstant(Imm ^ 0xA0, DL, MVT::i8)); |
| 37461 | } |
| 37462 | break; |
| 37463 | } |
| 37464 | } |
| 37465 | |
| 37466 | return SDValue(); |
| 37467 | } |
| 37468 | |
| 37469 | |
| 37470 | static SDValue canonicalizeShuffleWithBinOps(SDValue N, SelectionDAG &DAG, |
| 37471 | const SDLoc &DL) { |
| 37472 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 37473 | EVT ShuffleVT = N.getValueType(); |
| 37474 | |
| 37475 | auto IsMergeableWithShuffle = [](SDValue Op) { |
| 37476 | |
| 37477 | |
| 37478 | |
| 37479 | |
| 37480 | return ISD::isBuildVectorAllOnes(Op.getNode()) || |
| 37481 | ISD::isBuildVectorAllZeros(Op.getNode()) || |
| 37482 | ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) || |
| 37483 | ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()) || |
| 37484 | (isTargetShuffle(Op.getOpcode()) && Op->hasOneUse()); |
| 37485 | }; |
| 37486 | auto IsSafeToMoveShuffle = [ShuffleVT](SDValue Op, unsigned BinOp) { |
| 37487 | |
| 37488 | |
| 37489 | return BinOp == ISD::AND || BinOp == ISD::OR || BinOp == ISD::XOR || |
| 37490 | (Op.getScalarValueSizeInBits() <= ShuffleVT.getScalarSizeInBits()); |
| 37491 | }; |
| 37492 | |
| 37493 | unsigned Opc = N.getOpcode(); |
| 37494 | switch (Opc) { |
| 37495 | |
| 37496 | case X86ISD::PSHUFB: { |
| 37497 | |
| 37498 | SmallVector<int> Mask; |
| 37499 | SmallVector<SDValue> Ops; |
| 37500 | if (!getTargetShuffleMask(N.getNode(), ShuffleVT.getSimpleVT(), false, Ops, |
| 37501 | Mask)) |
| 37502 | break; |
| 37503 | LLVM_FALLTHROUGH; |
| 37504 | } |
| 37505 | case X86ISD::VBROADCAST: |
| 37506 | case X86ISD::MOVDDUP: |
| 37507 | case X86ISD::PSHUFD: |
| 37508 | case X86ISD::VPERMI: |
| 37509 | case X86ISD::VPERMILPI: { |
| 37510 | if (N.getOperand(0).getValueType() == ShuffleVT && |
| 37511 | N->isOnlyUserOf(N.getOperand(0).getNode())) { |
| 37512 | SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); |
| 37513 | unsigned SrcOpcode = N0.getOpcode(); |
| 37514 | if (TLI.isBinOp(SrcOpcode) && IsSafeToMoveShuffle(N0, SrcOpcode)) { |
| 37515 | SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); |
| 37516 | SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); |
| 37517 | if (IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op01)) { |
| 37518 | SDValue LHS, RHS; |
| 37519 | Op00 = DAG.getBitcast(ShuffleVT, Op00); |
| 37520 | Op01 = DAG.getBitcast(ShuffleVT, Op01); |
| 37521 | if (N.getNumOperands() == 2) { |
| 37522 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, N.getOperand(1)); |
| 37523 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, N.getOperand(1)); |
| 37524 | } else { |
| 37525 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00); |
| 37526 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01); |
| 37527 | } |
| 37528 | EVT OpVT = N0.getValueType(); |
| 37529 | return DAG.getBitcast(ShuffleVT, |
| 37530 | DAG.getNode(SrcOpcode, DL, OpVT, |
| 37531 | DAG.getBitcast(OpVT, LHS), |
| 37532 | DAG.getBitcast(OpVT, RHS))); |
| 37533 | } |
| 37534 | } |
| 37535 | } |
| 37536 | break; |
| 37537 | } |
| 37538 | |
| 37539 | case X86ISD::INSERTPS: { |
| 37540 | |
| 37541 | unsigned InsertPSMask = N.getConstantOperandVal(2); |
| 37542 | unsigned ZeroMask = InsertPSMask & 0xF; |
| 37543 | if (ZeroMask != 0) |
| 37544 | break; |
| 37545 | LLVM_FALLTHROUGH; |
| 37546 | } |
| 37547 | case X86ISD::MOVSD: |
| 37548 | case X86ISD::MOVSS: |
| 37549 | case X86ISD::BLENDI: |
| 37550 | case X86ISD::SHUFP: |
| 37551 | case X86ISD::UNPCKH: |
| 37552 | case X86ISD::UNPCKL: { |
| 37553 | if (N->isOnlyUserOf(N.getOperand(0).getNode()) && |
| 37554 | N->isOnlyUserOf(N.getOperand(1).getNode())) { |
| 37555 | SDValue N0 = peekThroughOneUseBitcasts(N.getOperand(0)); |
| 37556 | SDValue N1 = peekThroughOneUseBitcasts(N.getOperand(1)); |
| 37557 | unsigned SrcOpcode = N0.getOpcode(); |
| 37558 | if (TLI.isBinOp(SrcOpcode) && N1.getOpcode() == SrcOpcode && |
| 37559 | IsSafeToMoveShuffle(N0, SrcOpcode) && |
| 37560 | IsSafeToMoveShuffle(N1, SrcOpcode)) { |
| 37561 | SDValue Op00 = peekThroughOneUseBitcasts(N0.getOperand(0)); |
| 37562 | SDValue Op10 = peekThroughOneUseBitcasts(N1.getOperand(0)); |
| 37563 | SDValue Op01 = peekThroughOneUseBitcasts(N0.getOperand(1)); |
| 37564 | SDValue Op11 = peekThroughOneUseBitcasts(N1.getOperand(1)); |
| 37565 | |
| 37566 | |
| 37567 | if (((IsMergeableWithShuffle(Op00) && IsMergeableWithShuffle(Op10)) || |
| 37568 | (IsMergeableWithShuffle(Op01) && IsMergeableWithShuffle(Op11))) || |
| 37569 | ((IsMergeableWithShuffle(Op00) || IsMergeableWithShuffle(Op10)) && |
| 37570 | (IsMergeableWithShuffle(Op01) || IsMergeableWithShuffle(Op11)))) { |
| 37571 | SDValue LHS, RHS; |
| 37572 | Op00 = DAG.getBitcast(ShuffleVT, Op00); |
| 37573 | Op10 = DAG.getBitcast(ShuffleVT, Op10); |
| 37574 | Op01 = DAG.getBitcast(ShuffleVT, Op01); |
| 37575 | Op11 = DAG.getBitcast(ShuffleVT, Op11); |
| 37576 | if (N.getNumOperands() == 3) { |
| 37577 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10, N.getOperand(2)); |
| 37578 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11, N.getOperand(2)); |
| 37579 | } else { |
| 37580 | LHS = DAG.getNode(Opc, DL, ShuffleVT, Op00, Op10); |
| 37581 | RHS = DAG.getNode(Opc, DL, ShuffleVT, Op01, Op11); |
| 37582 | } |
| 37583 | EVT OpVT = N0.getValueType(); |
| 37584 | return DAG.getBitcast(ShuffleVT, |
| 37585 | DAG.getNode(SrcOpcode, DL, OpVT, |
| 37586 | DAG.getBitcast(OpVT, LHS), |
| 37587 | DAG.getBitcast(OpVT, RHS))); |
| 37588 | } |
| 37589 | } |
| 37590 | } |
| 37591 | break; |
| 37592 | } |
| 37593 | } |
| 37594 | return SDValue(); |
| 37595 | } |
| 37596 | |
| 37597 | |
| 37598 | static SDValue canonicalizeLaneShuffleWithRepeatedOps(SDValue V, |
| 37599 | SelectionDAG &DAG, |
| 37600 | const SDLoc &DL) { |
| 37601 | assert(V.getOpcode() == X86ISD::VPERM2X128 && "Unknown lane shuffle"); |
| 37602 | |
| 37603 | MVT VT = V.getSimpleValueType(); |
| 37604 | SDValue Src0 = peekThroughBitcasts(V.getOperand(0)); |
| 37605 | SDValue Src1 = peekThroughBitcasts(V.getOperand(1)); |
| 37606 | unsigned SrcOpc0 = Src0.getOpcode(); |
| 37607 | unsigned SrcOpc1 = Src1.getOpcode(); |
| 37608 | EVT SrcVT0 = Src0.getValueType(); |
| 37609 | EVT SrcVT1 = Src1.getValueType(); |
| 37610 | |
| 37611 | if (!Src1.isUndef() && (SrcVT0 != SrcVT1 || SrcOpc0 != SrcOpc1)) |
| 37612 | return SDValue(); |
| 37613 | |
| 37614 | switch (SrcOpc0) { |
| 37615 | case X86ISD::MOVDDUP: { |
| 37616 | SDValue LHS = Src0.getOperand(0); |
| 37617 | SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0); |
| 37618 | SDValue Res = |
| 37619 | DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, V.getOperand(2)); |
| 37620 | Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res); |
| 37621 | return DAG.getBitcast(VT, Res); |
| 37622 | } |
| 37623 | case X86ISD::VPERMILPI: |
| 37624 | |
| 37625 | if (SrcVT0 == MVT::v4f64) { |
| 37626 | uint64_t Mask = Src0.getConstantOperandVal(1); |
| 37627 | if ((Mask & 0x3) != ((Mask >> 2) & 0x3)) |
| 37628 | break; |
| 37629 | } |
| 37630 | LLVM_FALLTHROUGH; |
| 37631 | case X86ISD::VSHLI: |
| 37632 | case X86ISD::VSRLI: |
| 37633 | case X86ISD::VSRAI: |
| 37634 | case X86ISD::PSHUFD: |
| 37635 | if (Src1.isUndef() || Src0.getOperand(1) == Src1.getOperand(1)) { |
| 37636 | SDValue LHS = Src0.getOperand(0); |
| 37637 | SDValue RHS = Src1.isUndef() ? Src1 : Src1.getOperand(0); |
| 37638 | SDValue Res = DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT0, LHS, RHS, |
| 37639 | V.getOperand(2)); |
| 37640 | Res = DAG.getNode(SrcOpc0, DL, SrcVT0, Res, Src0.getOperand(1)); |
| 37641 | return DAG.getBitcast(VT, Res); |
| 37642 | } |
| 37643 | break; |
| 37644 | } |
| 37645 | |
| 37646 | return SDValue(); |
| 37647 | } |
| 37648 | |
| 37649 | |
| 37650 | static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG, |
| 37651 | TargetLowering::DAGCombinerInfo &DCI, |
| 37652 | const X86Subtarget &Subtarget) { |
| 37653 | SDLoc DL(N); |
| 37654 | MVT VT = N.getSimpleValueType(); |
| 37655 | SmallVector<int, 4> Mask; |
| 37656 | unsigned Opcode = N.getOpcode(); |
| 37657 | |
| 37658 | if (SDValue R = combineCommutableSHUFP(N, VT, DL, DAG)) |
| 37659 | return R; |
| 37660 | |
| 37661 | if (SDValue R = canonicalizeShuffleWithBinOps(N, DAG, DL)) |
| 37662 | return R; |
| 37663 | |
| 37664 | |
| 37665 | switch (Opcode) { |
| 37666 | case X86ISD::MOVDDUP: { |
| 37667 | SDValue Src = N.getOperand(0); |
| 37668 | |
| 37669 | if (VT == MVT::v2f64 && Src.hasOneUse() && |
| 37670 | ISD::isNormalLoad(Src.getNode())) { |
| 37671 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
| 37672 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::f64, MVT::v2f64, DAG)) { |
| 37673 | SDValue Movddup = DAG.getNode(X86ISD::MOVDDUP, DL, MVT::v2f64, VZLoad); |
| 37674 | DCI.CombineTo(N.getNode(), Movddup); |
| 37675 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
| 37676 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 37677 | return N; |
| 37678 | } |
| 37679 | } |
| 37680 | |
| 37681 | return SDValue(); |
| 37682 | } |
| 37683 | case X86ISD::VBROADCAST: { |
| 37684 | SDValue Src = N.getOperand(0); |
| 37685 | SDValue BC = peekThroughBitcasts(Src); |
| 37686 | EVT SrcVT = Src.getValueType(); |
| 37687 | EVT BCVT = BC.getValueType(); |
| 37688 | |
| 37689 | |
| 37690 | |
| 37691 | if (isTargetShuffle(BC.getOpcode()) && |
| 37692 | VT.getScalarSizeInBits() % BCVT.getScalarSizeInBits() == 0) { |
| 37693 | unsigned Scale = VT.getScalarSizeInBits() / BCVT.getScalarSizeInBits(); |
| 37694 | SmallVector<int, 16> DemandedMask(BCVT.getVectorNumElements(), |
| 37695 | SM_SentinelUndef); |
| 37696 | for (unsigned i = 0; i != Scale; ++i) |
| 37697 | DemandedMask[i] = i; |
| 37698 | if (SDValue Res = combineX86ShufflesRecursively( |
| 37699 | {BC}, 0, BC, DemandedMask, {}, 0, |
| 37700 | X86::MaxShuffleCombineDepth, |
| 37701 | false, true, |
| 37702 | true, DAG, Subtarget)) |
| 37703 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
| 37704 | DAG.getBitcast(SrcVT, Res)); |
| 37705 | } |
| 37706 | |
| 37707 | |
| 37708 | |
| 37709 | if (Src.getOpcode() == ISD::BITCAST && |
| 37710 | SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits() && |
| 37711 | DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && |
| 37712 | FixedVectorType::isValidElementType( |
| 37713 | BCVT.getScalarType().getTypeForEVT(*DAG.getContext()))) { |
| 37714 | EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(), |
| 37715 | VT.getVectorNumElements()); |
| 37716 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC)); |
| 37717 | } |
| 37718 | |
| 37719 | |
| 37720 | if (SrcVT.getSizeInBits() > 128) |
| 37721 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
| 37722 | extract128BitVector(Src, 0, DAG, DL)); |
| 37723 | |
| 37724 | |
| 37725 | if (Src.getOpcode() == ISD::SCALAR_TO_VECTOR) |
| 37726 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Src.getOperand(0)); |
| 37727 | |
| 37728 | |
| 37729 | |
| 37730 | for (SDNode *User : Src->uses()) |
| 37731 | if (User != N.getNode() && User->getOpcode() == X86ISD::VBROADCAST && |
| 37732 | Src == User->getOperand(0) && |
| 37733 | User->getValueSizeInBits(0).getFixedSize() > |
| 37734 | VT.getFixedSizeInBits()) { |
| 37735 | return extractSubVector(SDValue(User, 0), 0, DAG, DL, |
| 37736 | VT.getSizeInBits()); |
| 37737 | } |
| 37738 | |
| 37739 | |
| 37740 | |
| 37741 | if (!SrcVT.isVector() && (Src.hasOneUse() || VT.isFloatingPoint()) && |
| 37742 | ISD::isNormalLoad(Src.getNode())) { |
| 37743 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
| 37744 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37745 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
| 37746 | SDValue BcastLd = |
| 37747 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
| 37748 | LN->getMemoryVT(), LN->getMemOperand()); |
| 37749 | |
| 37750 | bool NoReplaceExtract = Src.hasOneUse(); |
| 37751 | DCI.CombineTo(N.getNode(), BcastLd); |
| 37752 | if (NoReplaceExtract) { |
| 37753 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
| 37754 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 37755 | } else { |
| 37756 | SDValue Scl = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SrcVT, BcastLd, |
| 37757 | DAG.getIntPtrConstant(0, DL)); |
| 37758 | DCI.CombineTo(LN, Scl, BcastLd.getValue(1)); |
| 37759 | } |
| 37760 | return N; |
| 37761 | } |
| 37762 | |
| 37763 | |
| 37764 | |
| 37765 | if (SrcVT == MVT::i16 && Src.getOpcode() == ISD::TRUNCATE && |
| 37766 | Src.hasOneUse() && Src.getOperand(0).hasOneUse()) { |
| 37767 | assert(Subtarget.hasAVX2() && "Expected AVX2"); |
| 37768 | SDValue TruncIn = Src.getOperand(0); |
| 37769 | |
| 37770 | |
| 37771 | |
| 37772 | if (ISD::isNormalLoad(TruncIn.getNode())) { |
| 37773 | LoadSDNode *LN = cast<LoadSDNode>(TruncIn); |
| 37774 | |
| 37775 | if (LN->isSimple()) { |
| 37776 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37777 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
| 37778 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
| 37779 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, |
| 37780 | LN->getPointerInfo(), LN->getOriginalAlign(), |
| 37781 | LN->getMemOperand()->getFlags()); |
| 37782 | DCI.CombineTo(N.getNode(), BcastLd); |
| 37783 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
| 37784 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
| 37785 | return N; |
| 37786 | } |
| 37787 | } |
| 37788 | |
| 37789 | |
| 37790 | if (ISD::isUNINDEXEDLoad(Src.getOperand(0).getNode()) && |
| 37791 | ISD::isEXTLoad(Src.getOperand(0).getNode())) { |
| 37792 | LoadSDNode *LN = cast<LoadSDNode>(Src.getOperand(0)); |
| 37793 | if (LN->getMemoryVT().getSizeInBits() == 16) { |
| 37794 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37795 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
| 37796 | SDValue BcastLd = |
| 37797 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
| 37798 | LN->getMemoryVT(), LN->getMemOperand()); |
| 37799 | DCI.CombineTo(N.getNode(), BcastLd); |
| 37800 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
| 37801 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
| 37802 | return N; |
| 37803 | } |
| 37804 | } |
| 37805 | |
| 37806 | |
| 37807 | |
| 37808 | if (TruncIn.getOpcode() == ISD::SRL && |
| 37809 | TruncIn.getOperand(0).hasOneUse() && |
| 37810 | isa<ConstantSDNode>(TruncIn.getOperand(1)) && |
| 37811 | ISD::isNormalLoad(TruncIn.getOperand(0).getNode())) { |
| 37812 | LoadSDNode *LN = cast<LoadSDNode>(TruncIn.getOperand(0)); |
| 37813 | unsigned ShiftAmt = TruncIn.getConstantOperandVal(1); |
| 37814 | |
| 37815 | |
| 37816 | if (ShiftAmt % 16 == 0 && TruncIn.getValueSizeInBits() % 16 == 0 && |
| 37817 | LN->isSimple()) { |
| 37818 | unsigned Offset = ShiftAmt / 8; |
| 37819 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37820 | SDValue Ptr = DAG.getMemBasePlusOffset(LN->getBasePtr(), |
| 37821 | TypeSize::Fixed(Offset), DL); |
| 37822 | SDValue Ops[] = { LN->getChain(), Ptr }; |
| 37823 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
| 37824 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MVT::i16, |
| 37825 | LN->getPointerInfo().getWithOffset(Offset), |
| 37826 | LN->getOriginalAlign(), |
| 37827 | LN->getMemOperand()->getFlags()); |
| 37828 | DCI.CombineTo(N.getNode(), BcastLd); |
| 37829 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
| 37830 | DCI.recursivelyDeleteUnusedNodes(Src.getNode()); |
| 37831 | return N; |
| 37832 | } |
| 37833 | } |
| 37834 | } |
| 37835 | |
| 37836 | |
| 37837 | if (Src.getOpcode() == X86ISD::VZEXT_LOAD && Src.hasOneUse()) { |
| 37838 | MemSDNode *LN = cast<MemIntrinsicSDNode>(Src); |
| 37839 | if (LN->getMemoryVT().getSizeInBits() == VT.getScalarSizeInBits()) { |
| 37840 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37841 | SDValue Ops[] = { LN->getChain(), LN->getBasePtr() }; |
| 37842 | SDValue BcastLd = |
| 37843 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, |
| 37844 | LN->getMemoryVT(), LN->getMemOperand()); |
| 37845 | DCI.CombineTo(N.getNode(), BcastLd); |
| 37846 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
| 37847 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 37848 | return N; |
| 37849 | } |
| 37850 | } |
| 37851 | |
| 37852 | |
| 37853 | if ((SrcVT == MVT::v2f64 || SrcVT == MVT::v4f32 || SrcVT == MVT::v2i64 || |
| 37854 | SrcVT == MVT::v4i32) && |
| 37855 | Src.hasOneUse() && ISD::isNormalLoad(Src.getNode())) { |
| 37856 | LoadSDNode *LN = cast<LoadSDNode>(Src); |
| 37857 | |
| 37858 | if (LN->isSimple()) { |
| 37859 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37860 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
| 37861 | SDValue BcastLd = DAG.getMemIntrinsicNode( |
| 37862 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, SrcVT.getScalarType(), |
| 37863 | LN->getPointerInfo(), LN->getOriginalAlign(), |
| 37864 | LN->getMemOperand()->getFlags()); |
| 37865 | DCI.CombineTo(N.getNode(), BcastLd); |
| 37866 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), BcastLd.getValue(1)); |
| 37867 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 37868 | return N; |
| 37869 | } |
| 37870 | } |
| 37871 | |
| 37872 | return SDValue(); |
| 37873 | } |
| 37874 | case X86ISD::VZEXT_MOVL: { |
| 37875 | SDValue N0 = N.getOperand(0); |
| 37876 | |
| 37877 | |
| 37878 | |
| 37879 | if (N0.hasOneUse() && ISD::isNormalLoad(N0.getNode())) { |
| 37880 | auto *LN = cast<LoadSDNode>(N0); |
| 37881 | if (SDValue VZLoad = |
| 37882 | narrowLoadToVZLoad(LN, VT.getVectorElementType(), VT, DAG)) { |
| 37883 | DCI.CombineTo(N.getNode(), VZLoad); |
| 37884 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
| 37885 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 37886 | return N; |
| 37887 | } |
| 37888 | } |
| 37889 | |
| 37890 | |
| 37891 | |
| 37892 | |
| 37893 | if (N0.hasOneUse() && N0.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
| 37894 | auto *LN = cast<MemSDNode>(N0); |
| 37895 | if (VT.getScalarSizeInBits() == LN->getMemoryVT().getSizeInBits()) { |
| 37896 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 37897 | SDValue Ops[] = {LN->getChain(), LN->getBasePtr()}; |
| 37898 | SDValue VZLoad = |
| 37899 | DAG.getMemIntrinsicNode(X86ISD::VZEXT_LOAD, DL, Tys, Ops, |
| 37900 | LN->getMemoryVT(), LN->getMemOperand()); |
| 37901 | DCI.CombineTo(N.getNode(), VZLoad); |
| 37902 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
| 37903 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 37904 | return N; |
| 37905 | } |
| 37906 | } |
| 37907 | |
| 37908 | |
| 37909 | |
| 37910 | |
| 37911 | if (N0.hasOneUse() && N0.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 37912 | N0.getOperand(0).hasOneUse() && |
| 37913 | N0.getOperand(0).getValueType() == MVT::i64) { |
| 37914 | SDValue In = N0.getOperand(0); |
| 37915 | APInt Mask = APInt::getHighBitsSet(64, 32); |
| 37916 | if (DAG.MaskedValueIsZero(In, Mask)) { |
| 37917 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, In); |
| 37918 | MVT VecVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() * 2); |
| 37919 | SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VecVT, Trunc); |
| 37920 | SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, VecVT, SclVec); |
| 37921 | return DAG.getBitcast(VT, Movl); |
| 37922 | } |
| 37923 | } |
| 37924 | |
| 37925 | |
| 37926 | |
| 37927 | |
| 37928 | if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR) { |
| 37929 | if (auto *C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) { |
| 37930 | |
| 37931 | EVT ScalarVT = N0.getOperand(0).getValueType(); |
| 37932 | Type *ScalarTy = ScalarVT.getTypeForEVT(*DAG.getContext()); |
| 37933 | unsigned NumElts = VT.getVectorNumElements(); |
| 37934 | Constant *Zero = ConstantInt::getNullValue(ScalarTy); |
| 37935 | SmallVector<Constant *, 32> ConstantVec(NumElts, Zero); |
| 37936 | ConstantVec[0] = const_cast<ConstantInt *>(C->getConstantIntValue()); |
| 37937 | |
| 37938 | |
| 37939 | MVT PVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); |
| 37940 | SDValue CP = DAG.getConstantPool(ConstantVector::get(ConstantVec), PVT); |
| 37941 | MachinePointerInfo MPI = |
| 37942 | MachinePointerInfo::getConstantPool(DAG.getMachineFunction()); |
| 37943 | Align Alignment = cast<ConstantPoolSDNode>(CP)->getAlign(); |
| 37944 | return DAG.getLoad(VT, DL, DAG.getEntryNode(), CP, MPI, Alignment, |
| 37945 | MachineMemOperand::MOLoad); |
| 37946 | } |
| 37947 | } |
| 37948 | |
| 37949 | |
| 37950 | |
| 37951 | |
| 37952 | |
| 37953 | if (!DCI.isBeforeLegalizeOps() && N0.hasOneUse()) { |
| 37954 | SDValue V = peekThroughOneUseBitcasts(N0); |
| 37955 | |
| 37956 | if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.getOperand(0).isUndef() && |
| 37957 | isNullConstant(V.getOperand(2))) { |
| 37958 | SDValue In = V.getOperand(1); |
| 37959 | MVT SubVT = MVT::getVectorVT(VT.getVectorElementType(), |
| 37960 | In.getValueSizeInBits() / |
| 37961 | VT.getScalarSizeInBits()); |
| 37962 | In = DAG.getBitcast(SubVT, In); |
| 37963 | SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, DL, SubVT, In); |
| 37964 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
| 37965 | getZeroVector(VT, Subtarget, DAG, DL), Movl, |
| 37966 | V.getOperand(2)); |
| 37967 | } |
| 37968 | } |
| 37969 | |
| 37970 | return SDValue(); |
| 37971 | } |
| 37972 | case X86ISD::BLENDI: { |
| 37973 | SDValue N0 = N.getOperand(0); |
| 37974 | SDValue N1 = N.getOperand(1); |
| 37975 | |
| 37976 | |
| 37977 | |
| 37978 | if (N0.getOpcode() == ISD::BITCAST && N1.getOpcode() == ISD::BITCAST && |
| 37979 | N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()) { |
| 37980 | MVT SrcVT = N0.getOperand(0).getSimpleValueType(); |
| 37981 | if ((VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits()) == 0 && |
| 37982 | SrcVT.getScalarSizeInBits() >= 32) { |
| 37983 | unsigned BlendMask = N.getConstantOperandVal(2); |
| 37984 | unsigned Size = VT.getVectorNumElements(); |
| 37985 | unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits(); |
| 37986 | BlendMask = scaleVectorShuffleBlendMask(BlendMask, Size, Scale); |
| 37987 | return DAG.getBitcast( |
| 37988 | VT, DAG.getNode(X86ISD::BLENDI, DL, SrcVT, N0.getOperand(0), |
| 37989 | N1.getOperand(0), |
| 37990 | DAG.getTargetConstant(BlendMask, DL, MVT::i8))); |
| 37991 | } |
| 37992 | } |
| 37993 | return SDValue(); |
| 37994 | } |
| 37995 | case X86ISD::VPERMI: { |
| 37996 | |
| 37997 | |
| 37998 | SDValue N0 = N.getOperand(0); |
| 37999 | SDValue N1 = N.getOperand(1); |
| 38000 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 38001 | if (N0.getOpcode() == ISD::BITCAST && |
| 38002 | N0.getOperand(0).getScalarValueSizeInBits() == EltSizeInBits) { |
| 38003 | SDValue Src = N0.getOperand(0); |
| 38004 | EVT SrcVT = Src.getValueType(); |
| 38005 | SDValue Res = DAG.getNode(X86ISD::VPERMI, DL, SrcVT, Src, N1); |
| 38006 | return DAG.getBitcast(VT, Res); |
| 38007 | } |
| 38008 | return SDValue(); |
| 38009 | } |
| 38010 | case X86ISD::VPERM2X128: { |
| 38011 | |
| 38012 | SDValue LHS = N->getOperand(0); |
| 38013 | SDValue RHS = N->getOperand(1); |
| 38014 | if (LHS.getOpcode() == ISD::BITCAST && |
| 38015 | (RHS.getOpcode() == ISD::BITCAST || RHS.isUndef())) { |
| 38016 | EVT SrcVT = LHS.getOperand(0).getValueType(); |
| 38017 | if (RHS.isUndef() || SrcVT == RHS.getOperand(0).getValueType()) { |
| 38018 | return DAG.getBitcast(VT, DAG.getNode(X86ISD::VPERM2X128, DL, SrcVT, |
| 38019 | DAG.getBitcast(SrcVT, LHS), |
| 38020 | DAG.getBitcast(SrcVT, RHS), |
| 38021 | N->getOperand(2))); |
| 38022 | } |
| 38023 | } |
| 38024 | |
| 38025 | |
| 38026 | if (SDValue Res = canonicalizeLaneShuffleWithRepeatedOps(N, DAG, DL)) |
| 38027 | return Res; |
| 38028 | |
| 38029 | |
| 38030 | |
| 38031 | auto FindSubVector128 = [&](unsigned Idx) { |
| 38032 | if (Idx > 3) |
| 38033 | return SDValue(); |
| 38034 | SDValue Src = peekThroughBitcasts(N.getOperand(Idx < 2 ? 0 : 1)); |
| 38035 | SmallVector<SDValue> SubOps; |
| 38036 | if (collectConcatOps(Src.getNode(), SubOps) && SubOps.size() == 2) |
| 38037 | return SubOps[Idx & 1]; |
| 38038 | unsigned NumElts = Src.getValueType().getVectorNumElements(); |
| 38039 | if ((Idx & 1) == 1 && Src.getOpcode() == ISD::INSERT_SUBVECTOR && |
| 38040 | Src.getOperand(1).getValueSizeInBits() == 128 && |
| 38041 | Src.getConstantOperandAPInt(2) == (NumElts / 2)) { |
| 38042 | return Src.getOperand(1); |
| 38043 | } |
| 38044 | return SDValue(); |
| 38045 | }; |
| 38046 | unsigned Imm = N.getConstantOperandVal(2); |
| 38047 | if (SDValue SubLo = FindSubVector128(Imm & 0x0F)) { |
| 38048 | if (SDValue SubHi = FindSubVector128((Imm & 0xF0) >> 4)) { |
| 38049 | MVT SubVT = VT.getHalfNumVectorElementsVT(); |
| 38050 | SubLo = DAG.getBitcast(SubVT, SubLo); |
| 38051 | SubHi = DAG.getBitcast(SubVT, SubHi); |
| 38052 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, SubLo, SubHi); |
| 38053 | } |
| 38054 | } |
| 38055 | return SDValue(); |
| 38056 | } |
| 38057 | case X86ISD::PSHUFD: |
| 38058 | case X86ISD::PSHUFLW: |
| 38059 | case X86ISD::PSHUFHW: |
| 38060 | Mask = getPSHUFShuffleMask(N); |
| 38061 | assert(Mask.size() == 4); |
| 38062 | break; |
| 38063 | case X86ISD::MOVSD: |
| 38064 | case X86ISD::MOVSS: { |
| 38065 | SDValue N0 = N.getOperand(0); |
| 38066 | SDValue N1 = N.getOperand(1); |
| 38067 | |
| 38068 | |
| 38069 | |
| 38070 | |
| 38071 | unsigned Opcode1 = N1.getOpcode(); |
| 38072 | if (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL || Opcode1 == ISD::FSUB || |
| 38073 | Opcode1 == ISD::FDIV) { |
| 38074 | SDValue N10 = N1.getOperand(0); |
| 38075 | SDValue N11 = N1.getOperand(1); |
| 38076 | if (N10 == N0 || |
| 38077 | (N11 == N0 && (Opcode1 == ISD::FADD || Opcode1 == ISD::FMUL))) { |
| 38078 | if (N10 != N0) |
| 38079 | std::swap(N10, N11); |
| 38080 | MVT SVT = VT.getVectorElementType(); |
| 38081 | SDValue ZeroIdx = DAG.getIntPtrConstant(0, DL); |
| 38082 | N10 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N10, ZeroIdx); |
| 38083 | N11 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, SVT, N11, ZeroIdx); |
| 38084 | SDValue Scl = DAG.getNode(Opcode1, DL, SVT, N10, N11); |
| 38085 | SDValue SclVec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); |
| 38086 | return DAG.getNode(Opcode, DL, VT, N0, SclVec); |
| 38087 | } |
| 38088 | } |
| 38089 | |
| 38090 | return SDValue(); |
| 38091 | } |
| 38092 | case X86ISD::INSERTPS: { |
| 38093 | assert(VT == MVT::v4f32 && "INSERTPS ValueType must be MVT::v4f32"); |
| 38094 | SDValue Op0 = N.getOperand(0); |
| 38095 | SDValue Op1 = N.getOperand(1); |
| 38096 | unsigned InsertPSMask = N.getConstantOperandVal(2); |
| 38097 | unsigned SrcIdx = (InsertPSMask >> 6) & 0x3; |
| 38098 | unsigned DstIdx = (InsertPSMask >> 4) & 0x3; |
| 38099 | unsigned ZeroMask = InsertPSMask & 0xF; |
| 38100 | |
| 38101 | |
| 38102 | if (((ZeroMask | (1u << DstIdx)) == 0xF) && !Op0.isUndef()) |
| 38103 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, DAG.getUNDEF(VT), Op1, |
| 38104 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
| 38105 | |
| 38106 | |
| 38107 | if ((ZeroMask & (1u << DstIdx)) && !Op1.isUndef()) |
| 38108 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), |
| 38109 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
| 38110 | |
| 38111 | |
| 38112 | SmallVector<int, 8> TargetMask1; |
| 38113 | SmallVector<SDValue, 2> Ops1; |
| 38114 | APInt KnownUndef1, KnownZero1; |
| 38115 | if (getTargetShuffleAndZeroables(Op1, TargetMask1, Ops1, KnownUndef1, |
| 38116 | KnownZero1)) { |
| 38117 | if (KnownUndef1[SrcIdx] || KnownZero1[SrcIdx]) { |
| 38118 | |
| 38119 | InsertPSMask |= (1u << DstIdx); |
| 38120 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, DAG.getUNDEF(VT), |
| 38121 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
| 38122 | } |
| 38123 | |
| 38124 | int M = TargetMask1[SrcIdx]; |
| 38125 | assert(0 <= M && M < 8 && "Shuffle index out of range"); |
| 38126 | InsertPSMask = (InsertPSMask & 0x3f) | ((M & 0x3) << 6); |
| 38127 | Op1 = Ops1[M < 4 ? 0 : 1]; |
| 38128 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, |
| 38129 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
| 38130 | } |
| 38131 | |
| 38132 | |
| 38133 | SmallVector<int, 8> TargetMask0; |
| 38134 | SmallVector<SDValue, 2> Ops0; |
| 38135 | APInt KnownUndef0, KnownZero0; |
| 38136 | if (getTargetShuffleAndZeroables(Op0, TargetMask0, Ops0, KnownUndef0, |
| 38137 | KnownZero0)) { |
| 38138 | bool Updated = false; |
| 38139 | bool UseInput00 = false; |
| 38140 | bool UseInput01 = false; |
| 38141 | for (int i = 0; i != 4; ++i) { |
| 38142 | if ((InsertPSMask & (1u << i)) || (i == (int)DstIdx)) { |
| 38143 | |
| 38144 | continue; |
| 38145 | } else if (KnownUndef0[i] || KnownZero0[i]) { |
| 38146 | |
| 38147 | InsertPSMask |= (1u << i); |
| 38148 | Updated = true; |
| 38149 | continue; |
| 38150 | } |
| 38151 | |
| 38152 | |
| 38153 | int M = TargetMask0[i]; |
| 38154 | if (M != i && M != (i + 4)) |
| 38155 | return SDValue(); |
| 38156 | |
| 38157 | |
| 38158 | UseInput00 |= (0 <= M && M < 4); |
| 38159 | UseInput01 |= (4 <= M); |
| 38160 | } |
| 38161 | |
| 38162 | |
| 38163 | |
| 38164 | if (UseInput00 && !UseInput01) { |
| 38165 | Updated = true; |
| 38166 | Op0 = Ops0[0]; |
| 38167 | } else if (!UseInput00 && UseInput01) { |
| 38168 | Updated = true; |
| 38169 | Op0 = Ops0[1]; |
| 38170 | } |
| 38171 | |
| 38172 | if (Updated) |
| 38173 | return DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, Op1, |
| 38174 | DAG.getTargetConstant(InsertPSMask, DL, MVT::i8)); |
| 38175 | } |
| 38176 | |
| 38177 | |
| 38178 | |
| 38179 | |
| 38180 | if (Op1.getOpcode() == X86ISD::VBROADCAST_LOAD && Op1.hasOneUse()) { |
| 38181 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op1); |
| 38182 | if (MemIntr->getMemoryVT().getScalarSizeInBits() == 32) { |
| 38183 | SDValue Load = DAG.getLoad(MVT::f32, DL, MemIntr->getChain(), |
| 38184 | MemIntr->getBasePtr(), |
| 38185 | MemIntr->getMemOperand()); |
| 38186 | SDValue Insert = DAG.getNode(X86ISD::INSERTPS, DL, VT, Op0, |
| 38187 | DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, |
| 38188 | Load), |
| 38189 | DAG.getTargetConstant(InsertPSMask & 0x3f, DL, MVT::i8)); |
| 38190 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); |
| 38191 | return Insert; |
| 38192 | } |
| 38193 | } |
| 38194 | |
| 38195 | return SDValue(); |
| 38196 | } |
| 38197 | default: |
| 38198 | return SDValue(); |
| 38199 | } |
| 38200 | |
| 38201 | |
| 38202 | if (isNoopShuffleMask(Mask)) |
| 38203 | return N.getOperand(0); |
| 38204 | |
| 38205 | |
| 38206 | SDValue V = N.getOperand(0); |
| 38207 | switch (N.getOpcode()) { |
| 38208 | default: |
| 38209 | break; |
| 38210 | case X86ISD::PSHUFLW: |
| 38211 | case X86ISD::PSHUFHW: |
| 38212 | assert(VT.getVectorElementType() == MVT::i16 && "Bad word shuffle type!"); |
| 38213 | |
| 38214 | |
| 38215 | |
| 38216 | |
| 38217 | if (makeArrayRef(Mask).equals({2, 3, 0, 1})) { |
| 38218 | int DMask[] = {0, 1, 2, 3}; |
| 38219 | int DOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 2; |
| 38220 | DMask[DOffset + 0] = DOffset + 1; |
| 38221 | DMask[DOffset + 1] = DOffset + 0; |
| 38222 | MVT DVT = MVT::getVectorVT(MVT::i32, VT.getVectorNumElements() / 2); |
| 38223 | V = DAG.getBitcast(DVT, V); |
| 38224 | V = DAG.getNode(X86ISD::PSHUFD, DL, DVT, V, |
| 38225 | getV4X86ShuffleImm8ForMask(DMask, DL, DAG)); |
| 38226 | return DAG.getBitcast(VT, V); |
| 38227 | } |
| 38228 | |
| 38229 | |
| 38230 | |
| 38231 | |
| 38232 | if (Mask[0] == Mask[1] && Mask[2] == Mask[3] && |
| 38233 | (V.getOpcode() == X86ISD::PSHUFLW || |
| 38234 | V.getOpcode() == X86ISD::PSHUFHW) && |
| 38235 | V.getOpcode() != N.getOpcode() && |
| 38236 | V.hasOneUse() && V.getOperand(0).hasOneUse()) { |
| 38237 | SDValue D = peekThroughOneUseBitcasts(V.getOperand(0)); |
| 38238 | if (D.getOpcode() == X86ISD::PSHUFD) { |
| 38239 | SmallVector<int, 4> VMask = getPSHUFShuffleMask(V); |
| 38240 | SmallVector<int, 4> DMask = getPSHUFShuffleMask(D); |
| 38241 | int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; |
| 38242 | int VOffset = V.getOpcode() == X86ISD::PSHUFLW ? 0 : 4; |
| 38243 | int WordMask[8]; |
| 38244 | for (int i = 0; i < 4; ++i) { |
| 38245 | WordMask[i + NOffset] = Mask[i] + NOffset; |
| 38246 | WordMask[i + VOffset] = VMask[i] + VOffset; |
| 38247 | } |
| 38248 | |
| 38249 | int MappedMask[8]; |
| 38250 | for (int i = 0; i < 8; ++i) |
| 38251 | MappedMask[i] = 2 * DMask[WordMask[i] / 2] + WordMask[i] % 2; |
| 38252 | if (makeArrayRef(MappedMask).equals({0, 0, 1, 1, 2, 2, 3, 3}) || |
| 38253 | makeArrayRef(MappedMask).equals({4, 4, 5, 5, 6, 6, 7, 7})) { |
| 38254 | |
| 38255 | V = DAG.getBitcast(VT, D.getOperand(0)); |
| 38256 | return DAG.getNode(MappedMask[0] == 0 ? X86ISD::UNPCKL |
| 38257 | : X86ISD::UNPCKH, |
| 38258 | DL, VT, V, V); |
| 38259 | } |
| 38260 | } |
| 38261 | } |
| 38262 | |
| 38263 | break; |
| 38264 | |
| 38265 | case X86ISD::PSHUFD: |
| 38266 | if (SDValue NewN = combineRedundantDWordShuffle(N, Mask, DAG)) |
| 38267 | return NewN; |
| 38268 | |
| 38269 | break; |
| 38270 | } |
| 38271 | |
| 38272 | return SDValue(); |
| 38273 | } |
| 38274 | |
| 38275 | |
| 38276 | |
| 38277 | |
| 38278 | static bool isAddSubOrSubAddMask(ArrayRef<int> Mask, bool &Op0Even) { |
| 38279 | |
| 38280 | int ParitySrc[2] = {-1, -1}; |
| 38281 | unsigned Size = Mask.size(); |
| 38282 | for (unsigned i = 0; i != Size; ++i) { |
| 38283 | int M = Mask[i]; |
| 38284 | if (M < 0) |
| 38285 | continue; |
| 38286 | |
| 38287 | |
| 38288 | if ((M % Size) != i) |
| 38289 | return false; |
| 38290 | |
| 38291 | |
| 38292 | int Src = M / Size; |
| 38293 | if (ParitySrc[i % 2] >= 0 && ParitySrc[i % 2] != Src) |
| 38294 | return false; |
| 38295 | ParitySrc[i % 2] = Src; |
| 38296 | } |
| 38297 | |
| 38298 | |
| 38299 | if (ParitySrc[0] < 0 || ParitySrc[1] < 0 || ParitySrc[0] == ParitySrc[1]) |
| 38300 | return false; |
| 38301 | |
| 38302 | Op0Even = ParitySrc[0] == 0; |
| 38303 | return true; |
| 38304 | } |
| 38305 | |
| 38306 | |
| 38307 | |
| 38308 | |
| 38309 | |
| 38310 | |
| 38311 | |
| 38312 | |
| 38313 | |
| 38314 | |
| 38315 | static bool isAddSubOrSubAdd(SDNode *N, const X86Subtarget &Subtarget, |
| 38316 | SelectionDAG &DAG, SDValue &Opnd0, SDValue &Opnd1, |
| 38317 | bool &IsSubAdd) { |
| 38318 | |
| 38319 | EVT VT = N->getValueType(0); |
| 38320 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 38321 | if (!Subtarget.hasSSE3() || !TLI.isTypeLegal(VT) || |
| 38322 | !VT.getSimpleVT().isFloatingPoint()) |
| 38323 | return false; |
| 38324 | |
| 38325 | |
| 38326 | |
| 38327 | |
| 38328 | if (N->getOpcode() != ISD::VECTOR_SHUFFLE) |
| 38329 | return false; |
| 38330 | |
| 38331 | SDValue V1 = N->getOperand(0); |
| 38332 | SDValue V2 = N->getOperand(1); |
| 38333 | |
| 38334 | |
| 38335 | if ((V1.getOpcode() != ISD::FADD && V1.getOpcode() != ISD::FSUB) || |
| 38336 | (V2.getOpcode() != ISD::FADD && V2.getOpcode() != ISD::FSUB) || |
| 38337 | V1.getOpcode() == V2.getOpcode()) |
| 38338 | return false; |
| 38339 | |
| 38340 | |
| 38341 | if (!V1->hasOneUse() || !V2->hasOneUse()) |
| 38342 | return false; |
| 38343 | |
| 38344 | |
| 38345 | |
| 38346 | SDValue LHS, RHS; |
| 38347 | if (V1.getOpcode() == ISD::FSUB) { |
| 38348 | LHS = V1->getOperand(0); RHS = V1->getOperand(1); |
| 38349 | if ((V2->getOperand(0) != LHS || V2->getOperand(1) != RHS) && |
| 38350 | (V2->getOperand(0) != RHS || V2->getOperand(1) != LHS)) |
| 38351 | return false; |
| 38352 | } else { |
| 38353 | assert(V2.getOpcode() == ISD::FSUB && "Unexpected opcode"); |
| 38354 | LHS = V2->getOperand(0); RHS = V2->getOperand(1); |
| 38355 | if ((V1->getOperand(0) != LHS || V1->getOperand(1) != RHS) && |
| 38356 | (V1->getOperand(0) != RHS || V1->getOperand(1) != LHS)) |
| 38357 | return false; |
| 38358 | } |
| 38359 | |
| 38360 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); |
| 38361 | bool Op0Even; |
| 38362 | if (!isAddSubOrSubAddMask(Mask, Op0Even)) |
| 38363 | return false; |
| 38364 | |
| 38365 | |
| 38366 | IsSubAdd = Op0Even ? V1->getOpcode() == ISD::FADD |
| 38367 | : V2->getOpcode() == ISD::FADD; |
| 38368 | |
| 38369 | Opnd0 = LHS; |
| 38370 | Opnd1 = RHS; |
| 38371 | return true; |
| 38372 | } |
| 38373 | |
| 38374 | |
| 38375 | static SDValue combineShuffleToFMAddSub(SDNode *N, |
| 38376 | const X86Subtarget &Subtarget, |
| 38377 | SelectionDAG &DAG) { |
| 38378 | |
| 38379 | |
| 38380 | |
| 38381 | if (N->getOpcode() != ISD::VECTOR_SHUFFLE) |
| 38382 | return SDValue(); |
| 38383 | |
| 38384 | MVT VT = N->getSimpleValueType(0); |
| 38385 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 38386 | if (!Subtarget.hasAnyFMA() || !TLI.isTypeLegal(VT)) |
| 38387 | return SDValue(); |
| 38388 | |
| 38389 | |
| 38390 | SDValue Op0 = N->getOperand(0); |
| 38391 | SDValue Op1 = N->getOperand(1); |
| 38392 | SDValue FMAdd = Op0, FMSub = Op1; |
| 38393 | if (FMSub.getOpcode() != X86ISD::FMSUB) |
| 38394 | std::swap(FMAdd, FMSub); |
| 38395 | |
| 38396 | if (FMAdd.getOpcode() != ISD::FMA || FMSub.getOpcode() != X86ISD::FMSUB || |
| 38397 | FMAdd.getOperand(0) != FMSub.getOperand(0) || !FMAdd.hasOneUse() || |
| 38398 | FMAdd.getOperand(1) != FMSub.getOperand(1) || !FMSub.hasOneUse() || |
| 38399 | FMAdd.getOperand(2) != FMSub.getOperand(2)) |
| 38400 | return SDValue(); |
| 38401 | |
| 38402 | |
| 38403 | ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(N)->getMask(); |
| 38404 | bool Op0Even; |
| 38405 | if (!isAddSubOrSubAddMask(Mask, Op0Even)) |
| 38406 | return SDValue(); |
| 38407 | |
| 38408 | |
| 38409 | SDLoc DL(N); |
| 38410 | bool IsSubAdd = Op0Even ? Op0 == FMAdd : Op1 == FMAdd; |
| 38411 | unsigned Opcode = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
| 38412 | return DAG.getNode(Opcode, DL, VT, FMAdd.getOperand(0), FMAdd.getOperand(1), |
| 38413 | FMAdd.getOperand(2)); |
| 38414 | } |
| 38415 | |
| 38416 | |
| 38417 | |
| 38418 | static SDValue combineShuffleToAddSubOrFMAddSub(SDNode *N, |
| 38419 | const X86Subtarget &Subtarget, |
| 38420 | SelectionDAG &DAG) { |
| 38421 | if (SDValue V = combineShuffleToFMAddSub(N, Subtarget, DAG)) |
| 38422 | return V; |
| 38423 | |
| 38424 | SDValue Opnd0, Opnd1; |
| 38425 | bool IsSubAdd; |
| 38426 | if (!isAddSubOrSubAdd(N, Subtarget, DAG, Opnd0, Opnd1, IsSubAdd)) |
| 38427 | return SDValue(); |
| 38428 | |
| 38429 | MVT VT = N->getSimpleValueType(0); |
| 38430 | SDLoc DL(N); |
| 38431 | |
| 38432 | |
| 38433 | SDValue Opnd2; |
| 38434 | if (isFMAddSubOrFMSubAdd(Subtarget, DAG, Opnd0, Opnd1, Opnd2, 2)) { |
| 38435 | unsigned Opc = IsSubAdd ? X86ISD::FMSUBADD : X86ISD::FMADDSUB; |
| 38436 | return DAG.getNode(Opc, DL, VT, Opnd0, Opnd1, Opnd2); |
| 38437 | } |
| 38438 | |
| 38439 | if (IsSubAdd) |
| 38440 | return SDValue(); |
| 38441 | |
| 38442 | |
| 38443 | |
| 38444 | |
| 38445 | if (VT.is512BitVector()) |
| 38446 | return SDValue(); |
| 38447 | |
| 38448 | return DAG.getNode(X86ISD::ADDSUB, DL, VT, Opnd0, Opnd1); |
| 38449 | } |
| 38450 | |
| 38451 | |
| 38452 | |
| 38453 | |
| 38454 | static SDValue combineShuffleOfConcatUndef(SDNode *N, SelectionDAG &DAG, |
| 38455 | const X86Subtarget &Subtarget) { |
| 38456 | if (!Subtarget.hasAVX2() || !isa<ShuffleVectorSDNode>(N)) |
| 38457 | return SDValue(); |
| 38458 | |
| 38459 | EVT VT = N->getValueType(0); |
| 38460 | |
| 38461 | |
| 38462 | if (!VT.is128BitVector() && !VT.is256BitVector()) |
| 38463 | return SDValue(); |
| 38464 | |
| 38465 | if (VT.getVectorElementType() != MVT::i32 && |
| 38466 | VT.getVectorElementType() != MVT::i64 && |
| 38467 | VT.getVectorElementType() != MVT::f32 && |
| 38468 | VT.getVectorElementType() != MVT::f64) |
| 38469 | return SDValue(); |
| 38470 | |
| 38471 | SDValue N0 = N->getOperand(0); |
| 38472 | SDValue N1 = N->getOperand(1); |
| 38473 | |
| 38474 | |
| 38475 | if (N0.getOpcode() != ISD::CONCAT_VECTORS || |
| 38476 | N1.getOpcode() != ISD::CONCAT_VECTORS || N0.getNumOperands() != 2 || |
| 38477 | N1.getNumOperands() != 2 || !N0.getOperand(1).isUndef() || |
| 38478 | !N1.getOperand(1).isUndef()) |
| 38479 | return SDValue(); |
| 38480 | |
| 38481 | |
| 38482 | |
| 38483 | SmallVector<int, 8> Mask; |
| 38484 | int NumElts = VT.getVectorNumElements(); |
| 38485 | |
| 38486 | ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N); |
| 38487 | for (int Elt : SVOp->getMask()) |
| 38488 | Mask.push_back(Elt < NumElts ? Elt : (Elt - NumElts / 2)); |
| 38489 | |
| 38490 | SDLoc DL(N); |
| 38491 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, N0.getOperand(0), |
| 38492 | N1.getOperand(0)); |
| 38493 | return DAG.getVectorShuffle(VT, DL, Concat, DAG.getUNDEF(VT), Mask); |
| 38494 | } |
| 38495 | |
| 38496 | |
| 38497 | |
| 38498 | |
| 38499 | static SDValue narrowShuffle(ShuffleVectorSDNode *Shuf, SelectionDAG &DAG) { |
| 38500 | if (!Shuf->getValueType(0).isSimple()) |
| 38501 | return SDValue(); |
| 38502 | MVT VT = Shuf->getSimpleValueType(0); |
| 38503 | if (!VT.is256BitVector() && !VT.is512BitVector()) |
| 38504 | return SDValue(); |
| 38505 | |
| 38506 | |
| 38507 | ArrayRef<int> Mask = Shuf->getMask(); |
| 38508 | if (!isUndefUpperHalf(Mask)) |
| 38509 | return SDValue(); |
| 38510 | |
| 38511 | |
| 38512 | |
| 38513 | int HalfIdx1, HalfIdx2; |
| 38514 | SmallVector<int, 8> HalfMask(Mask.size() / 2); |
| 38515 | if (!getHalfShuffleMask(Mask, HalfMask, HalfIdx1, HalfIdx2) || |
| 38516 | (HalfIdx1 % 2 == 1) || (HalfIdx2 % 2 == 1)) |
| 38517 | return SDValue(); |
| 38518 | |
| 38519 | |
| 38520 | |
| 38521 | |
| 38522 | |
| 38523 | |
| 38524 | return getShuffleHalfVectors(SDLoc(Shuf), Shuf->getOperand(0), |
| 38525 | Shuf->getOperand(1), HalfMask, HalfIdx1, |
| 38526 | HalfIdx2, false, DAG, true); |
| 38527 | } |
| 38528 | |
| 38529 | static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG, |
| 38530 | TargetLowering::DAGCombinerInfo &DCI, |
| 38531 | const X86Subtarget &Subtarget) { |
| 38532 | if (auto *Shuf = dyn_cast<ShuffleVectorSDNode>(N)) |
| 38533 | if (SDValue V = narrowShuffle(Shuf, DAG)) |
| 38534 | return V; |
| 38535 | |
| 38536 | |
| 38537 | |
| 38538 | SDLoc dl(N); |
| 38539 | EVT VT = N->getValueType(0); |
| 38540 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 38541 | if (TLI.isTypeLegal(VT)) |
| 38542 | if (SDValue AddSub = combineShuffleToAddSubOrFMAddSub(N, Subtarget, DAG)) |
| 38543 | return AddSub; |
| 38544 | |
| 38545 | |
| 38546 | if (SDValue LD = combineToConsecutiveLoads( |
| 38547 | VT, SDValue(N, 0), dl, DAG, Subtarget, true)) |
| 38548 | return LD; |
| 38549 | |
| 38550 | |
| 38551 | |
| 38552 | |
| 38553 | |
| 38554 | |
| 38555 | |
| 38556 | if (SDValue ShufConcat = combineShuffleOfConcatUndef(N, DAG, Subtarget)) |
| 38557 | return ShufConcat; |
| 38558 | |
| 38559 | if (isTargetShuffle(N->getOpcode())) { |
| 38560 | SDValue Op(N, 0); |
| 38561 | if (SDValue Shuffle = combineTargetShuffle(Op, DAG, DCI, Subtarget)) |
| 38562 | return Shuffle; |
| 38563 | |
| 38564 | |
| 38565 | |
| 38566 | |
| 38567 | |
| 38568 | |
| 38569 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 38570 | return Res; |
| 38571 | |
| 38572 | |
| 38573 | |
| 38574 | APInt KnownUndef, KnownZero; |
| 38575 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
| 38576 | if (TLI.SimplifyDemandedVectorElts(Op, DemandedElts, KnownUndef, KnownZero, |
| 38577 | DCI)) |
| 38578 | return SDValue(N, 0); |
| 38579 | } |
| 38580 | |
| 38581 | return SDValue(); |
| 38582 | } |
| 38583 | |
| 38584 | |
| 38585 | |
| 38586 | bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetShuffle( |
| 38587 | SDValue Op, const APInt &DemandedElts, unsigned MaskIndex, |
| 38588 | TargetLowering::TargetLoweringOpt &TLO, unsigned Depth) const { |
| 38589 | |
| 38590 | unsigned NumElts = DemandedElts.getBitWidth(); |
| 38591 | if (DemandedElts.isAllOnesValue()) |
| 38592 | return false; |
| 38593 | |
| 38594 | SDValue Mask = Op.getOperand(MaskIndex); |
| 38595 | if (!Mask.hasOneUse()) |
| 38596 | return false; |
| 38597 | |
| 38598 | |
| 38599 | APInt MaskUndef, MaskZero; |
| 38600 | if (SimplifyDemandedVectorElts(Mask, DemandedElts, MaskUndef, MaskZero, TLO, |
| 38601 | Depth + 1)) |
| 38602 | return true; |
| 38603 | |
| 38604 | |
| 38605 | |
| 38606 | SDValue BC = peekThroughOneUseBitcasts(Mask); |
| 38607 | EVT BCVT = BC.getValueType(); |
| 38608 | auto *Load = dyn_cast<LoadSDNode>(BC); |
| 38609 | if (!Load) |
| 38610 | return false; |
| 38611 | |
| 38612 | const Constant *C = getTargetConstantFromNode(Load); |
| 38613 | if (!C) |
| 38614 | return false; |
| 38615 | |
| 38616 | Type *CTy = C->getType(); |
| 38617 | if (!CTy->isVectorTy() || |
| 38618 | CTy->getPrimitiveSizeInBits() != Mask.getValueSizeInBits()) |
| 38619 | return false; |
| 38620 | |
| 38621 | |
| 38622 | unsigned NumCstElts = cast<FixedVectorType>(CTy)->getNumElements(); |
| 38623 | if (NumCstElts != NumElts && NumCstElts != (NumElts * 2)) |
| 38624 | return false; |
| 38625 | unsigned Scale = NumCstElts / NumElts; |
| 38626 | |
| 38627 | |
| 38628 | bool Simplified = false; |
| 38629 | SmallVector<Constant *, 32> ConstVecOps; |
| 38630 | for (unsigned i = 0; i != NumCstElts; ++i) { |
| 38631 | Constant *Elt = C->getAggregateElement(i); |
| 38632 | if (!DemandedElts[i / Scale] && !isa<UndefValue>(Elt)) { |
| 38633 | ConstVecOps.push_back(UndefValue::get(Elt->getType())); |
| 38634 | Simplified = true; |
| 38635 | continue; |
| 38636 | } |
| 38637 | ConstVecOps.push_back(Elt); |
| 38638 | } |
| 38639 | if (!Simplified) |
| 38640 | return false; |
| 38641 | |
| 38642 | |
| 38643 | SDLoc DL(Op); |
| 38644 | SDValue CV = TLO.DAG.getConstantPool(ConstantVector::get(ConstVecOps), BCVT); |
| 38645 | SDValue LegalCV = LowerConstantPool(CV, TLO.DAG); |
| 38646 | SDValue NewMask = TLO.DAG.getLoad( |
| 38647 | BCVT, DL, TLO.DAG.getEntryNode(), LegalCV, |
| 38648 | MachinePointerInfo::getConstantPool(TLO.DAG.getMachineFunction()), |
| 38649 | Load->getAlign()); |
| 38650 | return TLO.CombineTo(Mask, TLO.DAG.getBitcast(Mask.getValueType(), NewMask)); |
| 38651 | } |
| 38652 | |
| 38653 | bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( |
| 38654 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, APInt &KnownZero, |
| 38655 | TargetLoweringOpt &TLO, unsigned Depth) const { |
| 38656 | int NumElts = DemandedElts.getBitWidth(); |
| 38657 | unsigned Opc = Op.getOpcode(); |
| 38658 | EVT VT = Op.getValueType(); |
| 38659 | |
| 38660 | |
| 38661 | switch (Opc) { |
| 38662 | case X86ISD::PMULDQ: |
| 38663 | case X86ISD::PMULUDQ: { |
| 38664 | APInt LHSUndef, LHSZero; |
| 38665 | APInt RHSUndef, RHSZero; |
| 38666 | SDValue LHS = Op.getOperand(0); |
| 38667 | SDValue RHS = Op.getOperand(1); |
| 38668 | if (SimplifyDemandedVectorElts(LHS, DemandedElts, LHSUndef, LHSZero, TLO, |
| 38669 | Depth + 1)) |
| 38670 | return true; |
| 38671 | if (SimplifyDemandedVectorElts(RHS, DemandedElts, RHSUndef, RHSZero, TLO, |
| 38672 | Depth + 1)) |
| 38673 | return true; |
| 38674 | |
| 38675 | KnownZero = LHSZero | RHSZero; |
| 38676 | break; |
| 38677 | } |
| 38678 | case X86ISD::VSHL: |
| 38679 | case X86ISD::VSRL: |
| 38680 | case X86ISD::VSRA: { |
| 38681 | |
| 38682 | SDValue Amt = Op.getOperand(1); |
| 38683 | MVT AmtVT = Amt.getSimpleValueType(); |
| 38684 | assert(AmtVT.is128BitVector() && "Unexpected value type"); |
| 38685 | |
| 38686 | |
| 38687 | |
| 38688 | bool AssumeSingleUse = llvm::all_of(Amt->uses(), [&Amt](SDNode *Use) { |
| 38689 | unsigned UseOpc = Use->getOpcode(); |
| 38690 | return (UseOpc == X86ISD::VSHL || UseOpc == X86ISD::VSRL || |
| 38691 | UseOpc == X86ISD::VSRA) && |
| 38692 | Use->getOperand(0) != Amt; |
| 38693 | }); |
| 38694 | |
| 38695 | APInt AmtUndef, AmtZero; |
| 38696 | unsigned NumAmtElts = AmtVT.getVectorNumElements(); |
| 38697 | APInt AmtElts = APInt::getLowBitsSet(NumAmtElts, NumAmtElts / 2); |
| 38698 | if (SimplifyDemandedVectorElts(Amt, AmtElts, AmtUndef, AmtZero, TLO, |
| 38699 | Depth + 1, AssumeSingleUse)) |
| 38700 | return true; |
| 38701 | LLVM_FALLTHROUGH; |
| 38702 | } |
| 38703 | case X86ISD::VSHLI: |
| 38704 | case X86ISD::VSRLI: |
| 38705 | case X86ISD::VSRAI: { |
| 38706 | SDValue Src = Op.getOperand(0); |
| 38707 | APInt SrcUndef; |
| 38708 | if (SimplifyDemandedVectorElts(Src, DemandedElts, SrcUndef, KnownZero, TLO, |
| 38709 | Depth + 1)) |
| 38710 | return true; |
| 38711 | |
| 38712 | |
| 38713 | if (!DemandedElts.isAllOnesValue()) |
| 38714 | if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
| 38715 | Src, DemandedElts, TLO.DAG, Depth + 1)) |
| 38716 | return TLO.CombineTo( |
| 38717 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc, Op.getOperand(1))); |
| 38718 | break; |
| 38719 | } |
| 38720 | case X86ISD::KSHIFTL: { |
| 38721 | SDValue Src = Op.getOperand(0); |
| 38722 | auto *Amt = cast<ConstantSDNode>(Op.getOperand(1)); |
| 38723 | assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); |
| 38724 | unsigned ShiftAmt = Amt->getZExtValue(); |
| 38725 | |
| 38726 | if (ShiftAmt == 0) |
| 38727 | return TLO.CombineTo(Op, Src); |
| 38728 | |
| 38729 | |
| 38730 | |
| 38731 | |
| 38732 | if (Src.getOpcode() == X86ISD::KSHIFTR) { |
| 38733 | if (!DemandedElts.intersects(APInt::getLowBitsSet(NumElts, ShiftAmt))) { |
| 38734 | unsigned C1 = Src.getConstantOperandVal(1); |
| 38735 | unsigned NewOpc = X86ISD::KSHIFTL; |
| 38736 | int Diff = ShiftAmt - C1; |
| 38737 | if (Diff < 0) { |
| 38738 | Diff = -Diff; |
| 38739 | NewOpc = X86ISD::KSHIFTR; |
| 38740 | } |
| 38741 | |
| 38742 | SDLoc dl(Op); |
| 38743 | SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); |
| 38744 | return TLO.CombineTo( |
| 38745 | Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); |
| 38746 | } |
| 38747 | } |
| 38748 | |
| 38749 | APInt DemandedSrc = DemandedElts.lshr(ShiftAmt); |
| 38750 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, |
| 38751 | Depth + 1)) |
| 38752 | return true; |
| 38753 | |
| 38754 | KnownUndef <<= ShiftAmt; |
| 38755 | KnownZero <<= ShiftAmt; |
| 38756 | KnownZero.setLowBits(ShiftAmt); |
| 38757 | break; |
| 38758 | } |
| 38759 | case X86ISD::KSHIFTR: { |
| 38760 | SDValue Src = Op.getOperand(0); |
| 38761 | auto *Amt = cast<ConstantSDNode>(Op.getOperand(1)); |
| 38762 | assert(Amt->getAPIntValue().ult(NumElts) && "Out of range shift amount"); |
| 38763 | unsigned ShiftAmt = Amt->getZExtValue(); |
| 38764 | |
| 38765 | if (ShiftAmt == 0) |
| 38766 | return TLO.CombineTo(Op, Src); |
| 38767 | |
| 38768 | |
| 38769 | |
| 38770 | |
| 38771 | if (Src.getOpcode() == X86ISD::KSHIFTL) { |
| 38772 | if (!DemandedElts.intersects(APInt::getHighBitsSet(NumElts, ShiftAmt))) { |
| 38773 | unsigned C1 = Src.getConstantOperandVal(1); |
| 38774 | unsigned NewOpc = X86ISD::KSHIFTR; |
| 38775 | int Diff = ShiftAmt - C1; |
| 38776 | if (Diff < 0) { |
| 38777 | Diff = -Diff; |
| 38778 | NewOpc = X86ISD::KSHIFTL; |
| 38779 | } |
| 38780 | |
| 38781 | SDLoc dl(Op); |
| 38782 | SDValue NewSA = TLO.DAG.getTargetConstant(Diff, dl, MVT::i8); |
| 38783 | return TLO.CombineTo( |
| 38784 | Op, TLO.DAG.getNode(NewOpc, dl, VT, Src.getOperand(0), NewSA)); |
| 38785 | } |
| 38786 | } |
| 38787 | |
| 38788 | APInt DemandedSrc = DemandedElts.shl(ShiftAmt); |
| 38789 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, KnownUndef, KnownZero, TLO, |
| 38790 | Depth + 1)) |
| 38791 | return true; |
| 38792 | |
| 38793 | KnownUndef.lshrInPlace(ShiftAmt); |
| 38794 | KnownZero.lshrInPlace(ShiftAmt); |
| 38795 | KnownZero.setHighBits(ShiftAmt); |
| 38796 | break; |
| 38797 | } |
| 38798 | case X86ISD::CVTSI2P: |
| 38799 | case X86ISD::CVTUI2P: { |
| 38800 | SDValue Src = Op.getOperand(0); |
| 38801 | MVT SrcVT = Src.getSimpleValueType(); |
| 38802 | APInt SrcUndef, SrcZero; |
| 38803 | APInt SrcElts = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
| 38804 | if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, |
| 38805 | Depth + 1)) |
| 38806 | return true; |
| 38807 | break; |
| 38808 | } |
| 38809 | case X86ISD::PACKSS: |
| 38810 | case X86ISD::PACKUS: { |
| 38811 | SDValue N0 = Op.getOperand(0); |
| 38812 | SDValue N1 = Op.getOperand(1); |
| 38813 | |
| 38814 | APInt DemandedLHS, DemandedRHS; |
| 38815 | getPackDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
| 38816 | |
| 38817 | APInt LHSUndef, LHSZero; |
| 38818 | if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO, |
| 38819 | Depth + 1)) |
| 38820 | return true; |
| 38821 | APInt RHSUndef, RHSZero; |
| 38822 | if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO, |
| 38823 | Depth + 1)) |
| 38824 | return true; |
| 38825 | |
| 38826 | |
| 38827 | |
| 38828 | |
| 38829 | |
| 38830 | if (!DemandedElts.isAllOnesValue()) { |
| 38831 | SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS, |
| 38832 | TLO.DAG, Depth + 1); |
| 38833 | SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS, |
| 38834 | TLO.DAG, Depth + 1); |
| 38835 | if (NewN0 || NewN1) { |
| 38836 | NewN0 = NewN0 ? NewN0 : N0; |
| 38837 | NewN1 = NewN1 ? NewN1 : N1; |
| 38838 | return TLO.CombineTo(Op, |
| 38839 | TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); |
| 38840 | } |
| 38841 | } |
| 38842 | break; |
| 38843 | } |
| 38844 | case X86ISD::HADD: |
| 38845 | case X86ISD::HSUB: |
| 38846 | case X86ISD::FHADD: |
| 38847 | case X86ISD::FHSUB: { |
| 38848 | SDValue N0 = Op.getOperand(0); |
| 38849 | SDValue N1 = Op.getOperand(1); |
| 38850 | |
| 38851 | APInt DemandedLHS, DemandedRHS; |
| 38852 | getHorizDemandedElts(VT, DemandedElts, DemandedLHS, DemandedRHS); |
| 38853 | |
| 38854 | APInt LHSUndef, LHSZero; |
| 38855 | if (SimplifyDemandedVectorElts(N0, DemandedLHS, LHSUndef, LHSZero, TLO, |
| 38856 | Depth + 1)) |
| 38857 | return true; |
| 38858 | APInt RHSUndef, RHSZero; |
| 38859 | if (SimplifyDemandedVectorElts(N1, DemandedRHS, RHSUndef, RHSZero, TLO, |
| 38860 | Depth + 1)) |
| 38861 | return true; |
| 38862 | |
| 38863 | |
| 38864 | |
| 38865 | |
| 38866 | |
| 38867 | if (N0 != N1 && !DemandedElts.isAllOnesValue()) { |
| 38868 | SDValue NewN0 = SimplifyMultipleUseDemandedVectorElts(N0, DemandedLHS, |
| 38869 | TLO.DAG, Depth + 1); |
| 38870 | SDValue NewN1 = SimplifyMultipleUseDemandedVectorElts(N1, DemandedRHS, |
| 38871 | TLO.DAG, Depth + 1); |
| 38872 | if (NewN0 || NewN1) { |
| 38873 | NewN0 = NewN0 ? NewN0 : N0; |
| 38874 | NewN1 = NewN1 ? NewN1 : N1; |
| 38875 | return TLO.CombineTo(Op, |
| 38876 | TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewN0, NewN1)); |
| 38877 | } |
| 38878 | } |
| 38879 | break; |
| 38880 | } |
| 38881 | case X86ISD::VTRUNC: |
| 38882 | case X86ISD::VTRUNCS: |
| 38883 | case X86ISD::VTRUNCUS: { |
| 38884 | SDValue Src = Op.getOperand(0); |
| 38885 | MVT SrcVT = Src.getSimpleValueType(); |
| 38886 | APInt DemandedSrc = DemandedElts.zextOrTrunc(SrcVT.getVectorNumElements()); |
| 38887 | APInt SrcUndef, SrcZero; |
| 38888 | if (SimplifyDemandedVectorElts(Src, DemandedSrc, SrcUndef, SrcZero, TLO, |
| 38889 | Depth + 1)) |
| 38890 | return true; |
| 38891 | KnownZero = SrcZero.zextOrTrunc(NumElts); |
| 38892 | KnownUndef = SrcUndef.zextOrTrunc(NumElts); |
| 38893 | break; |
| 38894 | } |
| 38895 | case X86ISD::BLENDV: { |
| 38896 | APInt SelUndef, SelZero; |
| 38897 | if (SimplifyDemandedVectorElts(Op.getOperand(0), DemandedElts, SelUndef, |
| 38898 | SelZero, TLO, Depth + 1)) |
| 38899 | return true; |
| 38900 | |
| 38901 | |
| 38902 | APInt LHSUndef, LHSZero; |
| 38903 | if (SimplifyDemandedVectorElts(Op.getOperand(1), DemandedElts, LHSUndef, |
| 38904 | LHSZero, TLO, Depth + 1)) |
| 38905 | return true; |
| 38906 | |
| 38907 | APInt RHSUndef, RHSZero; |
| 38908 | if (SimplifyDemandedVectorElts(Op.getOperand(2), DemandedElts, RHSUndef, |
| 38909 | RHSZero, TLO, Depth + 1)) |
| 38910 | return true; |
| 38911 | |
| 38912 | KnownZero = LHSZero & RHSZero; |
| 38913 | KnownUndef = LHSUndef & RHSUndef; |
| 38914 | break; |
| 38915 | } |
| 38916 | case X86ISD::VZEXT_MOVL: { |
| 38917 | |
| 38918 | SDValue Src = Op.getOperand(0); |
| 38919 | APInt DemandedUpperElts = DemandedElts; |
| 38920 | DemandedUpperElts.clearLowBits(1); |
| 38921 | if (TLO.DAG.computeKnownBits(Src, DemandedUpperElts, Depth + 1).isZero()) |
| 38922 | return TLO.CombineTo(Op, Src); |
| 38923 | break; |
| 38924 | } |
| 38925 | case X86ISD::VBROADCAST: { |
| 38926 | SDValue Src = Op.getOperand(0); |
| 38927 | MVT SrcVT = Src.getSimpleValueType(); |
| 38928 | if (!SrcVT.isVector()) |
| 38929 | break; |
| 38930 | |
| 38931 | if (DemandedElts == 1) { |
| 38932 | if (Src.getValueType() != VT) |
| 38933 | Src = widenSubVector(VT.getSimpleVT(), Src, false, Subtarget, TLO.DAG, |
| 38934 | SDLoc(Op)); |
| 38935 | return TLO.CombineTo(Op, Src); |
| 38936 | } |
| 38937 | APInt SrcUndef, SrcZero; |
| 38938 | APInt SrcElts = APInt::getOneBitSet(SrcVT.getVectorNumElements(), 0); |
| 38939 | if (SimplifyDemandedVectorElts(Src, SrcElts, SrcUndef, SrcZero, TLO, |
| 38940 | Depth + 1)) |
| 38941 | return true; |
| 38942 | |
| 38943 | |
| 38944 | if (SDValue NewSrc = SimplifyMultipleUseDemandedVectorElts( |
| 38945 | Src, SrcElts, TLO.DAG, Depth + 1)) |
| 38946 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); |
| 38947 | break; |
| 38948 | } |
| 38949 | case X86ISD::VPERMV: |
| 38950 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 0, TLO, |
| 38951 | Depth)) |
| 38952 | return true; |
| 38953 | break; |
| 38954 | case X86ISD::PSHUFB: |
| 38955 | case X86ISD::VPERMV3: |
| 38956 | case X86ISD::VPERMILPV: |
| 38957 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 1, TLO, |
| 38958 | Depth)) |
| 38959 | return true; |
| 38960 | break; |
| 38961 | case X86ISD::VPPERM: |
| 38962 | case X86ISD::VPERMIL2: |
| 38963 | if (SimplifyDemandedVectorEltsForTargetShuffle(Op, DemandedElts, 2, TLO, |
| 38964 | Depth)) |
| 38965 | return true; |
| 38966 | break; |
| 38967 | } |
| 38968 | |
| 38969 | |
| 38970 | |
| 38971 | |
| 38972 | if ((VT.is256BitVector() || VT.is512BitVector()) && |
| 38973 | DemandedElts.lshr(NumElts / 2) == 0) { |
| 38974 | unsigned SizeInBits = VT.getSizeInBits(); |
| 38975 | unsigned ExtSizeInBits = SizeInBits / 2; |
| 38976 | |
| 38977 | |
| 38978 | if (VT.is512BitVector() && DemandedElts.lshr(NumElts / 4) == 0) |
| 38979 | ExtSizeInBits = SizeInBits / 4; |
| 38980 | |
| 38981 | switch (Opc) { |
| 38982 | |
| 38983 | case X86ISD::VBROADCAST: { |
| 38984 | SDLoc DL(Op); |
| 38985 | SDValue Src = Op.getOperand(0); |
| 38986 | if (Src.getValueSizeInBits() > ExtSizeInBits) |
| 38987 | Src = extractSubVector(Src, 0, TLO.DAG, DL, ExtSizeInBits); |
| 38988 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
| 38989 | ExtSizeInBits / VT.getScalarSizeInBits()); |
| 38990 | SDValue Bcst = TLO.DAG.getNode(X86ISD::VBROADCAST, DL, BcstVT, Src); |
| 38991 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
| 38992 | TLO.DAG, DL, ExtSizeInBits)); |
| 38993 | } |
| 38994 | case X86ISD::VBROADCAST_LOAD: { |
| 38995 | SDLoc DL(Op); |
| 38996 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 38997 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
| 38998 | ExtSizeInBits / VT.getScalarSizeInBits()); |
| 38999 | SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); |
| 39000 | SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; |
| 39001 | SDValue Bcst = TLO.DAG.getMemIntrinsicNode( |
| 39002 | X86ISD::VBROADCAST_LOAD, DL, Tys, Ops, MemIntr->getMemoryVT(), |
| 39003 | MemIntr->getMemOperand()); |
| 39004 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
| 39005 | Bcst.getValue(1)); |
| 39006 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
| 39007 | TLO.DAG, DL, ExtSizeInBits)); |
| 39008 | } |
| 39009 | |
| 39010 | case X86ISD::SUBV_BROADCAST_LOAD: { |
| 39011 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op); |
| 39012 | EVT MemVT = MemIntr->getMemoryVT(); |
| 39013 | if (ExtSizeInBits == MemVT.getStoreSizeInBits()) { |
| 39014 | SDLoc DL(Op); |
| 39015 | SDValue Ld = |
| 39016 | TLO.DAG.getLoad(MemVT, DL, MemIntr->getChain(), |
| 39017 | MemIntr->getBasePtr(), MemIntr->getMemOperand()); |
| 39018 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
| 39019 | Ld.getValue(1)); |
| 39020 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Ld, 0, |
| 39021 | TLO.DAG, DL, ExtSizeInBits)); |
| 39022 | } else if ((ExtSizeInBits % MemVT.getStoreSizeInBits()) == 0) { |
| 39023 | SDLoc DL(Op); |
| 39024 | EVT BcstVT = EVT::getVectorVT(*TLO.DAG.getContext(), VT.getScalarType(), |
| 39025 | ExtSizeInBits / VT.getScalarSizeInBits()); |
| 39026 | SDVTList Tys = TLO.DAG.getVTList(BcstVT, MVT::Other); |
| 39027 | SDValue Ops[] = {MemIntr->getOperand(0), MemIntr->getOperand(1)}; |
| 39028 | SDValue Bcst = |
| 39029 | TLO.DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, |
| 39030 | Ops, MemVT, MemIntr->getMemOperand()); |
| 39031 | TLO.DAG.makeEquivalentMemoryOrdering(SDValue(MemIntr, 1), |
| 39032 | Bcst.getValue(1)); |
| 39033 | return TLO.CombineTo(Op, insertSubVector(TLO.DAG.getUNDEF(VT), Bcst, 0, |
| 39034 | TLO.DAG, DL, ExtSizeInBits)); |
| 39035 | } |
| 39036 | break; |
| 39037 | } |
| 39038 | |
| 39039 | case X86ISD::VSHLDQ: |
| 39040 | case X86ISD::VSRLDQ: |
| 39041 | |
| 39042 | case X86ISD::VSHL: |
| 39043 | case X86ISD::VSRL: |
| 39044 | case X86ISD::VSRA: |
| 39045 | |
| 39046 | case X86ISD::VSHLI: |
| 39047 | case X86ISD::VSRLI: |
| 39048 | case X86ISD::VSRAI: { |
| 39049 | SDLoc DL(Op); |
| 39050 | SDValue Ext0 = |
| 39051 | extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); |
| 39052 | SDValue ExtOp = |
| 39053 | TLO.DAG.getNode(Opc, DL, Ext0.getValueType(), Ext0, Op.getOperand(1)); |
| 39054 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
| 39055 | SDValue Insert = |
| 39056 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
| 39057 | return TLO.CombineTo(Op, Insert); |
| 39058 | } |
| 39059 | case X86ISD::VPERMI: { |
| 39060 | |
| 39061 | |
| 39062 | if (VT == MVT::v4f64 || VT == MVT::v4i64) { |
| 39063 | SmallVector<int, 4> Mask; |
| 39064 | DecodeVPERMMask(NumElts, Op.getConstantOperandVal(1), Mask); |
| 39065 | if (isUndefOrEqual(Mask[0], 2) && isUndefOrEqual(Mask[1], 3)) { |
| 39066 | SDLoc DL(Op); |
| 39067 | SDValue Ext = extractSubVector(Op.getOperand(0), 2, TLO.DAG, DL, 128); |
| 39068 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
| 39069 | SDValue Insert = insertSubVector(UndefVec, Ext, 0, TLO.DAG, DL, 128); |
| 39070 | return TLO.CombineTo(Op, Insert); |
| 39071 | } |
| 39072 | } |
| 39073 | break; |
| 39074 | } |
| 39075 | case X86ISD::VPERM2X128: { |
| 39076 | |
| 39077 | SDLoc DL(Op); |
| 39078 | unsigned LoMask = Op.getConstantOperandVal(2) & 0xF; |
| 39079 | if (LoMask & 0x8) |
| 39080 | return TLO.CombineTo( |
| 39081 | Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, DL)); |
| 39082 | unsigned EltIdx = (LoMask & 0x1) * (NumElts / 2); |
| 39083 | unsigned SrcIdx = (LoMask & 0x2) >> 1; |
| 39084 | SDValue ExtOp = |
| 39085 | extractSubVector(Op.getOperand(SrcIdx), EltIdx, TLO.DAG, DL, 128); |
| 39086 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
| 39087 | SDValue Insert = |
| 39088 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
| 39089 | return TLO.CombineTo(Op, Insert); |
| 39090 | } |
| 39091 | |
| 39092 | case X86ISD::VZEXT_MOVL: |
| 39093 | |
| 39094 | case X86ISD::PSHUFD: |
| 39095 | case X86ISD::PSHUFLW: |
| 39096 | case X86ISD::PSHUFHW: |
| 39097 | case X86ISD::VPERMILPI: |
| 39098 | |
| 39099 | case X86ISD::VPERMILPV: |
| 39100 | case X86ISD::VPERMIL2: |
| 39101 | case X86ISD::PSHUFB: |
| 39102 | case X86ISD::UNPCKL: |
| 39103 | case X86ISD::UNPCKH: |
| 39104 | case X86ISD::BLENDI: |
| 39105 | |
| 39106 | case X86ISD::AVG: |
| 39107 | case X86ISD::PACKSS: |
| 39108 | case X86ISD::PACKUS: |
| 39109 | |
| 39110 | case X86ISD::HADD: |
| 39111 | case X86ISD::HSUB: |
| 39112 | case X86ISD::FHADD: |
| 39113 | case X86ISD::FHSUB: { |
| 39114 | SDLoc DL(Op); |
| 39115 | SmallVector<SDValue, 4> Ops; |
| 39116 | for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i) { |
| 39117 | SDValue SrcOp = Op.getOperand(i); |
| 39118 | EVT SrcVT = SrcOp.getValueType(); |
| 39119 | assert((!SrcVT.isVector() || SrcVT.getSizeInBits() == SizeInBits) && |
| 39120 | "Unsupported vector size"); |
| 39121 | Ops.push_back(SrcVT.isVector() ? extractSubVector(SrcOp, 0, TLO.DAG, DL, |
| 39122 | ExtSizeInBits) |
| 39123 | : SrcOp); |
| 39124 | } |
| 39125 | MVT ExtVT = VT.getSimpleVT(); |
| 39126 | ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), |
| 39127 | ExtSizeInBits / ExtVT.getScalarSizeInBits()); |
| 39128 | SDValue ExtOp = TLO.DAG.getNode(Opc, DL, ExtVT, Ops); |
| 39129 | SDValue UndefVec = TLO.DAG.getUNDEF(VT); |
| 39130 | SDValue Insert = |
| 39131 | insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); |
| 39132 | return TLO.CombineTo(Op, Insert); |
| 39133 | } |
| 39134 | } |
| 39135 | } |
| 39136 | |
| 39137 | |
| 39138 | APInt OpUndef, OpZero; |
| 39139 | SmallVector<int, 64> OpMask; |
| 39140 | SmallVector<SDValue, 2> OpInputs; |
| 39141 | if (!getTargetShuffleInputs(Op, DemandedElts, OpInputs, OpMask, OpUndef, |
| 39142 | OpZero, TLO.DAG, Depth, false)) |
| 39143 | return false; |
| 39144 | |
| 39145 | |
| 39146 | if (OpMask.size() != (unsigned)NumElts || |
| 39147 | llvm::any_of(OpInputs, [VT](SDValue V) { |
| 39148 | return VT.getSizeInBits() != V.getValueSizeInBits() || |
| 39149 | !V.getValueType().isVector(); |
| 39150 | })) |
| 39151 | return false; |
| 39152 | |
| 39153 | KnownZero = OpZero; |
| 39154 | KnownUndef = OpUndef; |
| 39155 | |
| 39156 | |
| 39157 | int NumSrcs = OpInputs.size(); |
| 39158 | for (int i = 0; i != NumElts; ++i) |
| 39159 | if (!DemandedElts[i]) |
| 39160 | OpMask[i] = SM_SentinelUndef; |
| 39161 | |
| 39162 | if (isUndefInRange(OpMask, 0, NumElts)) { |
| 39163 | KnownUndef.setAllBits(); |
| 39164 | return TLO.CombineTo(Op, TLO.DAG.getUNDEF(VT)); |
| 39165 | } |
| 39166 | if (isUndefOrZeroInRange(OpMask, 0, NumElts)) { |
| 39167 | KnownZero.setAllBits(); |
| 39168 | return TLO.CombineTo( |
| 39169 | Op, getZeroVector(VT.getSimpleVT(), Subtarget, TLO.DAG, SDLoc(Op))); |
| 39170 | } |
| 39171 | for (int Src = 0; Src != NumSrcs; ++Src) |
| 39172 | if (isSequentialOrUndefInRange(OpMask, 0, NumElts, Src * NumElts)) |
| 39173 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, OpInputs[Src])); |
| 39174 | |
| 39175 | |
| 39176 | for (int Src = 0; Src != NumSrcs; ++Src) { |
| 39177 | |
| 39178 | if (OpInputs[Src].getValueType() != VT) |
| 39179 | continue; |
| 39180 | |
| 39181 | int Lo = Src * NumElts; |
| 39182 | APInt SrcElts = APInt::getNullValue(NumElts); |
| 39183 | for (int i = 0; i != NumElts; ++i) |
| 39184 | if (DemandedElts[i]) { |
| 39185 | int M = OpMask[i] - Lo; |
| 39186 | if (0 <= M && M < NumElts) |
| 39187 | SrcElts.setBit(M); |
| 39188 | } |
| 39189 | |
| 39190 | |
| 39191 | APInt SrcUndef, SrcZero; |
| 39192 | if (SimplifyDemandedVectorElts(OpInputs[Src], SrcElts, SrcUndef, SrcZero, |
| 39193 | TLO, Depth + 1)) |
| 39194 | return true; |
| 39195 | } |
| 39196 | |
| 39197 | |
| 39198 | |
| 39199 | |
| 39200 | |
| 39201 | |
| 39202 | |
| 39203 | |
| 39204 | if (!DemandedElts.isAllOnesValue()) { |
| 39205 | assert(Depth < X86::MaxShuffleCombineDepth && "Depth out of range"); |
| 39206 | |
| 39207 | SmallVector<int, 64> DemandedMask(NumElts, SM_SentinelUndef); |
| 39208 | for (int i = 0; i != NumElts; ++i) |
| 39209 | if (DemandedElts[i]) |
| 39210 | DemandedMask[i] = i; |
| 39211 | |
| 39212 | SDValue NewShuffle = combineX86ShufflesRecursively( |
| 39213 | {Op}, 0, Op, DemandedMask, {}, 0, X86::MaxShuffleCombineDepth - Depth, |
| 39214 | false, |
| 39215 | true, true, TLO.DAG, |
| 39216 | Subtarget); |
| 39217 | if (NewShuffle) |
| 39218 | return TLO.CombineTo(Op, NewShuffle); |
| 39219 | } |
| 39220 | |
| 39221 | return false; |
| 39222 | } |
| 39223 | |
| 39224 | bool X86TargetLowering::SimplifyDemandedBitsForTargetNode( |
| 39225 | SDValue Op, const APInt &OriginalDemandedBits, |
| 39226 | const APInt &OriginalDemandedElts, KnownBits &Known, TargetLoweringOpt &TLO, |
| 39227 | unsigned Depth) const { |
| 39228 | EVT VT = Op.getValueType(); |
| 39229 | unsigned BitWidth = OriginalDemandedBits.getBitWidth(); |
| 39230 | unsigned Opc = Op.getOpcode(); |
| 39231 | switch(Opc) { |
| 39232 | case X86ISD::VTRUNC: { |
| 39233 | KnownBits KnownOp; |
| 39234 | SDValue Src = Op.getOperand(0); |
| 39235 | MVT SrcVT = Src.getSimpleValueType(); |
| 39236 | |
| 39237 | |
| 39238 | APInt TruncMask = OriginalDemandedBits.zext(SrcVT.getScalarSizeInBits()); |
| 39239 | APInt DemandedElts = OriginalDemandedElts.trunc(SrcVT.getVectorNumElements()); |
| 39240 | if (SimplifyDemandedBits(Src, TruncMask, DemandedElts, KnownOp, TLO, Depth + 1)) |
| 39241 | return true; |
| 39242 | break; |
| 39243 | } |
| 39244 | case X86ISD::PMULDQ: |
| 39245 | case X86ISD::PMULUDQ: { |
| 39246 | |
| 39247 | KnownBits KnownOp; |
| 39248 | SDValue LHS = Op.getOperand(0); |
| 39249 | SDValue RHS = Op.getOperand(1); |
| 39250 | |
| 39251 | APInt DemandedMask = APInt::getLowBitsSet(64, 32); |
| 39252 | if (SimplifyDemandedBits(LHS, DemandedMask, OriginalDemandedElts, KnownOp, |
| 39253 | TLO, Depth + 1)) |
| 39254 | return true; |
| 39255 | if (SimplifyDemandedBits(RHS, DemandedMask, OriginalDemandedElts, KnownOp, |
| 39256 | TLO, Depth + 1)) |
| 39257 | return true; |
| 39258 | |
| 39259 | |
| 39260 | SDValue DemandedLHS = SimplifyMultipleUseDemandedBits( |
| 39261 | LHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); |
| 39262 | SDValue DemandedRHS = SimplifyMultipleUseDemandedBits( |
| 39263 | RHS, DemandedMask, OriginalDemandedElts, TLO.DAG, Depth + 1); |
| 39264 | if (DemandedLHS || DemandedRHS) { |
| 39265 | DemandedLHS = DemandedLHS ? DemandedLHS : LHS; |
| 39266 | DemandedRHS = DemandedRHS ? DemandedRHS : RHS; |
| 39267 | return TLO.CombineTo( |
| 39268 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, DemandedLHS, DemandedRHS)); |
| 39269 | } |
| 39270 | break; |
| 39271 | } |
| 39272 | case X86ISD::VSHLI: { |
| 39273 | SDValue Op0 = Op.getOperand(0); |
| 39274 | |
| 39275 | unsigned ShAmt = Op.getConstantOperandVal(1); |
| 39276 | if (ShAmt >= BitWidth) |
| 39277 | break; |
| 39278 | |
| 39279 | APInt DemandedMask = OriginalDemandedBits.lshr(ShAmt); |
| 39280 | |
| 39281 | |
| 39282 | |
| 39283 | |
| 39284 | if (Op0.getOpcode() == X86ISD::VSRLI && |
| 39285 | OriginalDemandedBits.countTrailingZeros() >= ShAmt) { |
| 39286 | unsigned Shift2Amt = Op0.getConstantOperandVal(1); |
| 39287 | if (Shift2Amt < BitWidth) { |
| 39288 | int Diff = ShAmt - Shift2Amt; |
| 39289 | if (Diff == 0) |
| 39290 | return TLO.CombineTo(Op, Op0.getOperand(0)); |
| 39291 | |
| 39292 | unsigned NewOpc = Diff < 0 ? X86ISD::VSRLI : X86ISD::VSHLI; |
| 39293 | SDValue NewShift = TLO.DAG.getNode( |
| 39294 | NewOpc, SDLoc(Op), VT, Op0.getOperand(0), |
| 39295 | TLO.DAG.getTargetConstant(std::abs(Diff), SDLoc(Op), MVT::i8)); |
| 39296 | return TLO.CombineTo(Op, NewShift); |
| 39297 | } |
| 39298 | } |
| 39299 | |
| 39300 | |
| 39301 | unsigned NumSignBits = |
| 39302 | TLO.DAG.ComputeNumSignBits(Op0, OriginalDemandedElts, Depth + 1); |
| 39303 | unsigned UpperDemandedBits = |
| 39304 | BitWidth - OriginalDemandedBits.countTrailingZeros(); |
| 39305 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) |
| 39306 | return TLO.CombineTo(Op, Op0); |
| 39307 | |
| 39308 | if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, |
| 39309 | TLO, Depth + 1)) |
| 39310 | return true; |
| 39311 | |
| 39312 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
| 39313 | Known.Zero <<= ShAmt; |
| 39314 | Known.One <<= ShAmt; |
| 39315 | |
| 39316 | |
| 39317 | Known.Zero.setLowBits(ShAmt); |
| 39318 | return false; |
| 39319 | } |
| 39320 | case X86ISD::VSRLI: { |
| 39321 | unsigned ShAmt = Op.getConstantOperandVal(1); |
| 39322 | if (ShAmt >= BitWidth) |
| 39323 | break; |
| 39324 | |
| 39325 | APInt DemandedMask = OriginalDemandedBits << ShAmt; |
| 39326 | |
| 39327 | if (SimplifyDemandedBits(Op.getOperand(0), DemandedMask, |
| 39328 | OriginalDemandedElts, Known, TLO, Depth + 1)) |
| 39329 | return true; |
| 39330 | |
| 39331 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
| 39332 | Known.Zero.lshrInPlace(ShAmt); |
| 39333 | Known.One.lshrInPlace(ShAmt); |
| 39334 | |
| 39335 | |
| 39336 | Known.Zero.setHighBits(ShAmt); |
| 39337 | return false; |
| 39338 | } |
| 39339 | case X86ISD::VSRAI: { |
| 39340 | SDValue Op0 = Op.getOperand(0); |
| 39341 | SDValue Op1 = Op.getOperand(1); |
| 39342 | |
| 39343 | unsigned ShAmt = cast<ConstantSDNode>(Op1)->getZExtValue(); |
| 39344 | if (ShAmt >= BitWidth) |
| 39345 | break; |
| 39346 | |
| 39347 | APInt DemandedMask = OriginalDemandedBits << ShAmt; |
| 39348 | |
| 39349 | |
| 39350 | if (OriginalDemandedBits.isSignMask()) |
| 39351 | return TLO.CombineTo(Op, Op0); |
| 39352 | |
| 39353 | |
| 39354 | if (Op0.getOpcode() == X86ISD::VSHLI && |
| 39355 | Op.getOperand(1) == Op0.getOperand(1)) { |
| 39356 | SDValue Op00 = Op0.getOperand(0); |
| 39357 | unsigned NumSignBits = |
| 39358 | TLO.DAG.ComputeNumSignBits(Op00, OriginalDemandedElts); |
| 39359 | if (ShAmt < NumSignBits) |
| 39360 | return TLO.CombineTo(Op, Op00); |
| 39361 | } |
| 39362 | |
| 39363 | |
| 39364 | |
| 39365 | if (OriginalDemandedBits.countLeadingZeros() < ShAmt) |
| 39366 | DemandedMask.setSignBit(); |
| 39367 | |
| 39368 | if (SimplifyDemandedBits(Op0, DemandedMask, OriginalDemandedElts, Known, |
| 39369 | TLO, Depth + 1)) |
| 39370 | return true; |
| 39371 | |
| 39372 | assert(!Known.hasConflict() && "Bits known to be one AND zero?"); |
| 39373 | Known.Zero.lshrInPlace(ShAmt); |
| 39374 | Known.One.lshrInPlace(ShAmt); |
| 39375 | |
| 39376 | |
| 39377 | |
| 39378 | if (Known.Zero[BitWidth - ShAmt - 1] || |
| 39379 | OriginalDemandedBits.countLeadingZeros() >= ShAmt) |
| 39380 | return TLO.CombineTo( |
| 39381 | Op, TLO.DAG.getNode(X86ISD::VSRLI, SDLoc(Op), VT, Op0, Op1)); |
| 39382 | |
| 39383 | |
| 39384 | if (Known.One[BitWidth - ShAmt - 1]) |
| 39385 | Known.One.setHighBits(ShAmt); |
| 39386 | return false; |
| 39387 | } |
| 39388 | case X86ISD::PEXTRB: |
| 39389 | case X86ISD::PEXTRW: { |
| 39390 | SDValue Vec = Op.getOperand(0); |
| 39391 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(1)); |
| 39392 | MVT VecVT = Vec.getSimpleValueType(); |
| 39393 | unsigned NumVecElts = VecVT.getVectorNumElements(); |
| 39394 | |
| 39395 | if (CIdx && CIdx->getAPIntValue().ult(NumVecElts)) { |
| 39396 | unsigned Idx = CIdx->getZExtValue(); |
| 39397 | unsigned VecBitWidth = VecVT.getScalarSizeInBits(); |
| 39398 | |
| 39399 | |
| 39400 | |
| 39401 | APInt DemandedVecBits = OriginalDemandedBits.trunc(VecBitWidth); |
| 39402 | if (DemandedVecBits == 0) |
| 39403 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
| 39404 | |
| 39405 | APInt KnownUndef, KnownZero; |
| 39406 | APInt DemandedVecElts = APInt::getOneBitSet(NumVecElts, Idx); |
| 39407 | if (SimplifyDemandedVectorElts(Vec, DemandedVecElts, KnownUndef, |
| 39408 | KnownZero, TLO, Depth + 1)) |
| 39409 | return true; |
| 39410 | |
| 39411 | KnownBits KnownVec; |
| 39412 | if (SimplifyDemandedBits(Vec, DemandedVecBits, DemandedVecElts, |
| 39413 | KnownVec, TLO, Depth + 1)) |
| 39414 | return true; |
| 39415 | |
| 39416 | if (SDValue V = SimplifyMultipleUseDemandedBits( |
| 39417 | Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1)) |
| 39418 | return TLO.CombineTo( |
| 39419 | Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1))); |
| 39420 | |
| 39421 | Known = KnownVec.zext(BitWidth); |
| 39422 | return false; |
| 39423 | } |
| 39424 | break; |
| 39425 | } |
| 39426 | case X86ISD::PINSRB: |
| 39427 | case X86ISD::PINSRW: { |
| 39428 | SDValue Vec = Op.getOperand(0); |
| 39429 | SDValue Scl = Op.getOperand(1); |
| 39430 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
| 39431 | MVT VecVT = Vec.getSimpleValueType(); |
| 39432 | |
| 39433 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements())) { |
| 39434 | unsigned Idx = CIdx->getZExtValue(); |
| 39435 | if (!OriginalDemandedElts[Idx]) |
| 39436 | return TLO.CombineTo(Op, Vec); |
| 39437 | |
| 39438 | KnownBits KnownVec; |
| 39439 | APInt DemandedVecElts(OriginalDemandedElts); |
| 39440 | DemandedVecElts.clearBit(Idx); |
| 39441 | if (SimplifyDemandedBits(Vec, OriginalDemandedBits, DemandedVecElts, |
| 39442 | KnownVec, TLO, Depth + 1)) |
| 39443 | return true; |
| 39444 | |
| 39445 | KnownBits KnownScl; |
| 39446 | unsigned NumSclBits = Scl.getScalarValueSizeInBits(); |
| 39447 | APInt DemandedSclBits = OriginalDemandedBits.zext(NumSclBits); |
| 39448 | if (SimplifyDemandedBits(Scl, DemandedSclBits, KnownScl, TLO, Depth + 1)) |
| 39449 | return true; |
| 39450 | |
| 39451 | KnownScl = KnownScl.trunc(VecVT.getScalarSizeInBits()); |
| 39452 | Known = KnownBits::commonBits(KnownVec, KnownScl); |
| 39453 | return false; |
| 39454 | } |
| 39455 | break; |
| 39456 | } |
| 39457 | case X86ISD::PACKSS: |
| 39458 | |
| 39459 | |
| 39460 | |
| 39461 | if (OriginalDemandedBits.isSignMask()) { |
| 39462 | APInt DemandedLHS, DemandedRHS; |
| 39463 | getPackDemandedElts(VT, OriginalDemandedElts, DemandedLHS, DemandedRHS); |
| 39464 | |
| 39465 | KnownBits KnownLHS, KnownRHS; |
| 39466 | APInt SignMask = APInt::getSignMask(BitWidth * 2); |
| 39467 | if (SimplifyDemandedBits(Op.getOperand(0), SignMask, DemandedLHS, |
| 39468 | KnownLHS, TLO, Depth + 1)) |
| 39469 | return true; |
| 39470 | if (SimplifyDemandedBits(Op.getOperand(1), SignMask, DemandedRHS, |
| 39471 | KnownRHS, TLO, Depth + 1)) |
| 39472 | return true; |
| 39473 | |
| 39474 | |
| 39475 | SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( |
| 39476 | Op.getOperand(0), SignMask, DemandedLHS, TLO.DAG, Depth + 1); |
| 39477 | SDValue DemandedOp1 = SimplifyMultipleUseDemandedBits( |
| 39478 | Op.getOperand(1), SignMask, DemandedRHS, TLO.DAG, Depth + 1); |
| 39479 | if (DemandedOp0 || DemandedOp1) { |
| 39480 | SDValue Op0 = DemandedOp0 ? DemandedOp0 : Op.getOperand(0); |
| 39481 | SDValue Op1 = DemandedOp1 ? DemandedOp1 : Op.getOperand(1); |
| 39482 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, Op0, Op1)); |
| 39483 | } |
| 39484 | } |
| 39485 | |
| 39486 | break; |
| 39487 | case X86ISD::VBROADCAST: { |
| 39488 | SDValue Src = Op.getOperand(0); |
| 39489 | MVT SrcVT = Src.getSimpleValueType(); |
| 39490 | APInt DemandedElts = APInt::getOneBitSet( |
| 39491 | SrcVT.isVector() ? SrcVT.getVectorNumElements() : 1, 0); |
| 39492 | if (SimplifyDemandedBits(Src, OriginalDemandedBits, DemandedElts, Known, |
| 39493 | TLO, Depth + 1)) |
| 39494 | return true; |
| 39495 | |
| 39496 | |
| 39497 | |
| 39498 | if ((BitWidth == 64) && SrcVT.isScalarInteger() && !Subtarget.hasAVX512() && |
| 39499 | OriginalDemandedBits.countLeadingZeros() >= (BitWidth / 2)) { |
| 39500 | MVT NewSrcVT = MVT::getIntegerVT(BitWidth / 2); |
| 39501 | SDValue NewSrc = |
| 39502 | TLO.DAG.getNode(ISD::TRUNCATE, SDLoc(Src), NewSrcVT, Src); |
| 39503 | MVT NewVT = MVT::getVectorVT(NewSrcVT, VT.getVectorNumElements() * 2); |
| 39504 | SDValue NewBcst = |
| 39505 | TLO.DAG.getNode(X86ISD::VBROADCAST, SDLoc(Op), NewVT, NewSrc); |
| 39506 | return TLO.CombineTo(Op, TLO.DAG.getBitcast(VT, NewBcst)); |
| 39507 | } |
| 39508 | break; |
| 39509 | } |
| 39510 | case X86ISD::PCMPGT: |
| 39511 | |
| 39512 | |
| 39513 | if (OriginalDemandedBits.isSignMask() && |
| 39514 | ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) |
| 39515 | return TLO.CombineTo(Op, Op.getOperand(1)); |
| 39516 | break; |
| 39517 | case X86ISD::MOVMSK: { |
| 39518 | SDValue Src = Op.getOperand(0); |
| 39519 | MVT SrcVT = Src.getSimpleValueType(); |
| 39520 | unsigned SrcBits = SrcVT.getScalarSizeInBits(); |
| 39521 | unsigned NumElts = SrcVT.getVectorNumElements(); |
| 39522 | |
| 39523 | |
| 39524 | if (OriginalDemandedBits.countTrailingZeros() >= NumElts) |
| 39525 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
| 39526 | |
| 39527 | |
| 39528 | APInt KnownUndef, KnownZero; |
| 39529 | APInt DemandedElts = OriginalDemandedBits.zextOrTrunc(NumElts); |
| 39530 | if (SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, |
| 39531 | TLO, Depth + 1)) |
| 39532 | return true; |
| 39533 | |
| 39534 | Known.Zero = KnownZero.zextOrSelf(BitWidth); |
| 39535 | Known.Zero.setHighBits(BitWidth - NumElts); |
| 39536 | |
| 39537 | |
| 39538 | KnownBits KnownSrc; |
| 39539 | APInt DemandedSrcBits = APInt::getSignMask(SrcBits); |
| 39540 | if (SimplifyDemandedBits(Src, DemandedSrcBits, DemandedElts, KnownSrc, TLO, |
| 39541 | Depth + 1)) |
| 39542 | return true; |
| 39543 | |
| 39544 | if (KnownSrc.One[SrcBits - 1]) |
| 39545 | Known.One.setLowBits(NumElts); |
| 39546 | else if (KnownSrc.Zero[SrcBits - 1]) |
| 39547 | Known.Zero.setLowBits(NumElts); |
| 39548 | |
| 39549 | |
| 39550 | if (SDValue NewSrc = SimplifyMultipleUseDemandedBits( |
| 39551 | Src, DemandedSrcBits, DemandedElts, TLO.DAG, Depth + 1)) |
| 39552 | return TLO.CombineTo(Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, NewSrc)); |
| 39553 | return false; |
| 39554 | } |
| 39555 | case X86ISD::BEXTR: |
| 39556 | case X86ISD::BEXTRI: { |
| 39557 | SDValue Op0 = Op.getOperand(0); |
| 39558 | SDValue Op1 = Op.getOperand(1); |
| 39559 | |
| 39560 | |
| 39561 | if (auto *Cst1 = dyn_cast<ConstantSDNode>(Op1)) { |
| 39562 | |
| 39563 | uint64_t Val1 = Cst1->getZExtValue(); |
| 39564 | uint64_t MaskedVal1 = Val1 & 0xFFFF; |
| 39565 | if (Opc == X86ISD::BEXTR && MaskedVal1 != Val1) { |
| 39566 | SDLoc DL(Op); |
| 39567 | return TLO.CombineTo( |
| 39568 | Op, TLO.DAG.getNode(X86ISD::BEXTR, DL, VT, Op0, |
| 39569 | TLO.DAG.getConstant(MaskedVal1, DL, VT))); |
| 39570 | } |
| 39571 | |
| 39572 | unsigned Shift = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 0); |
| 39573 | unsigned Length = Cst1->getAPIntValue().extractBitsAsZExtValue(8, 8); |
| 39574 | |
| 39575 | |
| 39576 | if (Length == 0) { |
| 39577 | Known.setAllZero(); |
| 39578 | return false; |
| 39579 | } |
| 39580 | |
| 39581 | if ((Shift + Length) <= BitWidth) { |
| 39582 | APInt DemandedMask = APInt::getBitsSet(BitWidth, Shift, Shift + Length); |
| 39583 | if (SimplifyDemandedBits(Op0, DemandedMask, Known, TLO, Depth + 1)) |
| 39584 | return true; |
| 39585 | |
| 39586 | Known = Known.extractBits(Length, Shift); |
| 39587 | Known = Known.zextOrTrunc(BitWidth); |
| 39588 | return false; |
| 39589 | } |
| 39590 | } else { |
| 39591 | assert(Opc == X86ISD::BEXTR && "Unexpected opcode!"); |
| 39592 | KnownBits Known1; |
| 39593 | APInt DemandedMask(APInt::getLowBitsSet(BitWidth, 16)); |
| 39594 | if (SimplifyDemandedBits(Op1, DemandedMask, Known1, TLO, Depth + 1)) |
| 39595 | return true; |
| 39596 | |
| 39597 | |
| 39598 | KnownBits LengthBits = Known1.extractBits(8, 8); |
| 39599 | if (LengthBits.isZero()) |
| 39600 | return TLO.CombineTo(Op, TLO.DAG.getConstant(0, SDLoc(Op), VT)); |
| 39601 | } |
| 39602 | |
| 39603 | break; |
| 39604 | } |
| 39605 | case X86ISD::PDEP: { |
| 39606 | SDValue Op0 = Op.getOperand(0); |
| 39607 | SDValue Op1 = Op.getOperand(1); |
| 39608 | |
| 39609 | unsigned DemandedBitsLZ = OriginalDemandedBits.countLeadingZeros(); |
| 39610 | APInt LoMask = APInt::getLowBitsSet(BitWidth, BitWidth - DemandedBitsLZ); |
| 39611 | |
| 39612 | |
| 39613 | |
| 39614 | if (SimplifyDemandedBits(Op1, LoMask, Known, TLO, Depth + 1)) |
| 39615 | return true; |
| 39616 | |
| 39617 | |
| 39618 | |
| 39619 | |
| 39620 | KnownBits Known2; |
| 39621 | uint64_t Count = (~Known.Zero & LoMask).countPopulation(); |
| 39622 | APInt DemandedMask(APInt::getLowBitsSet(BitWidth, Count)); |
| 39623 | if (SimplifyDemandedBits(Op0, DemandedMask, Known2, TLO, Depth + 1)) |
| 39624 | return true; |
| 39625 | |
| 39626 | |
| 39627 | Known.One.clearAllBits(); |
| 39628 | |
| 39629 | |
| 39630 | Known.Zero.setLowBits(Known2.countMinTrailingZeros()); |
| 39631 | return false; |
| 39632 | } |
| 39633 | } |
| 39634 | |
| 39635 | return TargetLowering::SimplifyDemandedBitsForTargetNode( |
| 39636 | Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth); |
| 39637 | } |
| 39638 | |
| 39639 | SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
| 39640 | SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts, |
| 39641 | SelectionDAG &DAG, unsigned Depth) const { |
| 39642 | int NumElts = DemandedElts.getBitWidth(); |
| 39643 | unsigned Opc = Op.getOpcode(); |
| 39644 | EVT VT = Op.getValueType(); |
| 39645 | |
| 39646 | switch (Opc) { |
| 39647 | case X86ISD::PINSRB: |
| 39648 | case X86ISD::PINSRW: { |
| 39649 | |
| 39650 | SDValue Vec = Op.getOperand(0); |
| 39651 | auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2)); |
| 39652 | MVT VecVT = Vec.getSimpleValueType(); |
| 39653 | if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) && |
| 39654 | !DemandedElts[CIdx->getZExtValue()]) |
| 39655 | return Vec; |
| 39656 | break; |
| 39657 | } |
| 39658 | case X86ISD::VSHLI: { |
| 39659 | |
| 39660 | |
| 39661 | SDValue Op0 = Op.getOperand(0); |
| 39662 | unsigned ShAmt = Op.getConstantOperandVal(1); |
| 39663 | unsigned BitWidth = DemandedBits.getBitWidth(); |
| 39664 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0, DemandedElts, Depth + 1); |
| 39665 | unsigned UpperDemandedBits = BitWidth - DemandedBits.countTrailingZeros(); |
| 39666 | if (NumSignBits > ShAmt && (NumSignBits - ShAmt) >= UpperDemandedBits) |
| 39667 | return Op0; |
| 39668 | break; |
| 39669 | } |
| 39670 | case X86ISD::VSRAI: |
| 39671 | |
| 39672 | |
| 39673 | if (DemandedBits.isSignMask()) |
| 39674 | return Op.getOperand(0); |
| 39675 | break; |
| 39676 | case X86ISD::PCMPGT: |
| 39677 | |
| 39678 | |
| 39679 | if (DemandedBits.isSignMask() && |
| 39680 | ISD::isBuildVectorAllZeros(Op.getOperand(0).getNode())) |
| 39681 | return Op.getOperand(1); |
| 39682 | break; |
| 39683 | } |
| 39684 | |
| 39685 | APInt ShuffleUndef, ShuffleZero; |
| 39686 | SmallVector<int, 16> ShuffleMask; |
| 39687 | SmallVector<SDValue, 2> ShuffleOps; |
| 39688 | if (getTargetShuffleInputs(Op, DemandedElts, ShuffleOps, ShuffleMask, |
| 39689 | ShuffleUndef, ShuffleZero, DAG, Depth, false)) { |
| 39690 | |
| 39691 | |
| 39692 | int NumOps = ShuffleOps.size(); |
| 39693 | if (ShuffleMask.size() == (unsigned)NumElts && |
| 39694 | llvm::all_of(ShuffleOps, [VT](SDValue V) { |
| 39695 | return VT.getSizeInBits() == V.getValueSizeInBits(); |
| 39696 | })) { |
| 39697 | |
| 39698 | if (DemandedElts.isSubsetOf(ShuffleUndef)) |
| 39699 | return DAG.getUNDEF(VT); |
| 39700 | if (DemandedElts.isSubsetOf(ShuffleUndef | ShuffleZero)) |
| 39701 | return getZeroVector(VT.getSimpleVT(), Subtarget, DAG, SDLoc(Op)); |
| 39702 | |
| 39703 | |
| 39704 | APInt IdentityOp = APInt::getAllOnesValue(NumOps); |
| 39705 | for (int i = 0; i != NumElts; ++i) { |
| 39706 | int M = ShuffleMask[i]; |
| 39707 | if (!DemandedElts[i] || ShuffleUndef[i]) |
| 39708 | continue; |
| 39709 | int OpIdx = M / NumElts; |
| 39710 | int EltIdx = M % NumElts; |
| 39711 | if (M < 0 || EltIdx != i) { |
| 39712 | IdentityOp.clearAllBits(); |
| 39713 | break; |
| 39714 | } |
| 39715 | IdentityOp &= APInt::getOneBitSet(NumOps, OpIdx); |
| 39716 | if (IdentityOp == 0) |
| 39717 | break; |
| 39718 | } |
| 39719 | assert((IdentityOp == 0 || IdentityOp.countPopulation() == 1) && |
| 39720 | "Multiple identity shuffles detected"); |
| 39721 | |
| 39722 | if (IdentityOp != 0) |
| 39723 | return DAG.getBitcast(VT, ShuffleOps[IdentityOp.countTrailingZeros()]); |
| 39724 | } |
| 39725 | } |
| 39726 | |
| 39727 | return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode( |
| 39728 | Op, DemandedBits, DemandedElts, DAG, Depth); |
| 39729 | } |
| 39730 | |
| 39731 | |
| 39732 | |
| 39733 | static bool checkBitcastSrcVectorSize(SDValue Src, unsigned Size, |
| 39734 | bool AllowTruncate) { |
| 39735 | switch (Src.getOpcode()) { |
| 39736 | case ISD::TRUNCATE: |
| 39737 | if (!AllowTruncate) |
| 39738 | return false; |
| 39739 | LLVM_FALLTHROUGH; |
| 39740 | case ISD::SETCC: |
| 39741 | return Src.getOperand(0).getValueSizeInBits() == Size; |
| 39742 | case ISD::AND: |
| 39743 | case ISD::XOR: |
| 39744 | case ISD::OR: |
| 39745 | return checkBitcastSrcVectorSize(Src.getOperand(0), Size, AllowTruncate) && |
| 39746 | checkBitcastSrcVectorSize(Src.getOperand(1), Size, AllowTruncate); |
| 39747 | } |
| 39748 | return false; |
| 39749 | } |
| 39750 | |
| 39751 | |
| 39752 | static unsigned getAltBitOpcode(unsigned Opcode) { |
| 39753 | switch(Opcode) { |
| 39754 | case ISD::AND: return X86ISD::FAND; |
| 39755 | case ISD::OR: return X86ISD::FOR; |
| 39756 | case ISD::XOR: return X86ISD::FXOR; |
| 39757 | case X86ISD::ANDNP: return X86ISD::FANDN; |
| 39758 | } |
| 39759 | llvm_unreachable("Unknown bitwise opcode"); |
| 39760 | } |
| 39761 | |
| 39762 | |
| 39763 | static SDValue adjustBitcastSrcVectorSSE1(SelectionDAG &DAG, SDValue Src, |
| 39764 | const SDLoc &DL) { |
| 39765 | EVT SrcVT = Src.getValueType(); |
| 39766 | if (SrcVT != MVT::v4i1) |
| 39767 | return SDValue(); |
| 39768 | |
| 39769 | switch (Src.getOpcode()) { |
| 39770 | case ISD::SETCC: |
| 39771 | if (Src.getOperand(0).getValueType() == MVT::v4i32 && |
| 39772 | ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode()) && |
| 39773 | cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT) { |
| 39774 | SDValue Op0 = Src.getOperand(0); |
| 39775 | if (ISD::isNormalLoad(Op0.getNode())) |
| 39776 | return DAG.getBitcast(MVT::v4f32, Op0); |
| 39777 | if (Op0.getOpcode() == ISD::BITCAST && |
| 39778 | Op0.getOperand(0).getValueType() == MVT::v4f32) |
| 39779 | return Op0.getOperand(0); |
| 39780 | } |
| 39781 | break; |
| 39782 | case ISD::AND: |
| 39783 | case ISD::XOR: |
| 39784 | case ISD::OR: { |
| 39785 | SDValue Op0 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(0), DL); |
| 39786 | SDValue Op1 = adjustBitcastSrcVectorSSE1(DAG, Src.getOperand(1), DL); |
| 39787 | if (Op0 && Op1) |
| 39788 | return DAG.getNode(getAltBitOpcode(Src.getOpcode()), DL, MVT::v4f32, Op0, |
| 39789 | Op1); |
| 39790 | break; |
| 39791 | } |
| 39792 | } |
| 39793 | return SDValue(); |
| 39794 | } |
| 39795 | |
| 39796 | |
| 39797 | static SDValue signExtendBitcastSrcVector(SelectionDAG &DAG, EVT SExtVT, |
| 39798 | SDValue Src, const SDLoc &DL) { |
| 39799 | switch (Src.getOpcode()) { |
| 39800 | case ISD::SETCC: |
| 39801 | case ISD::TRUNCATE: |
| 39802 | return DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); |
| 39803 | case ISD::AND: |
| 39804 | case ISD::XOR: |
| 39805 | case ISD::OR: |
| 39806 | return DAG.getNode( |
| 39807 | Src.getOpcode(), DL, SExtVT, |
| 39808 | signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(0), DL), |
| 39809 | signExtendBitcastSrcVector(DAG, SExtVT, Src.getOperand(1), DL)); |
| 39810 | } |
| 39811 | llvm_unreachable("Unexpected node type for vXi1 sign extension"); |
| 39812 | } |
| 39813 | |
| 39814 | |
| 39815 | |
| 39816 | |
| 39817 | |
| 39818 | |
| 39819 | |
| 39820 | static SDValue combineBitcastvxi1(SelectionDAG &DAG, EVT VT, SDValue Src, |
| 39821 | const SDLoc &DL, |
| 39822 | const X86Subtarget &Subtarget) { |
| 39823 | EVT SrcVT = Src.getValueType(); |
| 39824 | if (!SrcVT.isSimple() || SrcVT.getScalarType() != MVT::i1) |
| 39825 | return SDValue(); |
| 39826 | |
| 39827 | |
| 39828 | |
| 39829 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2()) { |
| 39830 | if (SDValue V = adjustBitcastSrcVectorSSE1(DAG, Src, DL)) { |
| 39831 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, |
| 39832 | DAG.getBitcast(MVT::v4f32, V)); |
| 39833 | return DAG.getZExtOrTrunc(V, DL, VT); |
| 39834 | } |
| 39835 | } |
| 39836 | |
| 39837 | |
| 39838 | |
| 39839 | |
| 39840 | |
| 39841 | bool PreferMovMsk = Src.getOpcode() == ISD::TRUNCATE && Src.hasOneUse() && |
| 39842 | (Src.getOperand(0).getValueType() == MVT::v16i8 || |
| 39843 | Src.getOperand(0).getValueType() == MVT::v32i8 || |
| 39844 | Src.getOperand(0).getValueType() == MVT::v64i8); |
| 39845 | |
| 39846 | |
| 39847 | |
| 39848 | if (Src.getOpcode() == ISD::SETCC && Src.hasOneUse() && |
| 39849 | cast<CondCodeSDNode>(Src.getOperand(2))->get() == ISD::SETLT && |
| 39850 | ISD::isBuildVectorAllZeros(Src.getOperand(1).getNode())) { |
| 39851 | EVT CmpVT = Src.getOperand(0).getValueType(); |
| 39852 | EVT EltVT = CmpVT.getVectorElementType(); |
| 39853 | if (CmpVT.getSizeInBits() <= 256 && |
| 39854 | (EltVT == MVT::i8 || EltVT == MVT::i32 || EltVT == MVT::i64)) |
| 39855 | PreferMovMsk = true; |
| 39856 | } |
| 39857 | |
| 39858 | |
| 39859 | |
| 39860 | if (!Subtarget.hasSSE2() || (Subtarget.hasAVX512() && !PreferMovMsk)) |
| 39861 | return SDValue(); |
| 39862 | |
| 39863 | |
| 39864 | |
| 39865 | |
| 39866 | |
| 39867 | |
| 39868 | |
| 39869 | |
| 39870 | |
| 39871 | |
| 39872 | |
| 39873 | MVT SExtVT; |
| 39874 | bool PropagateSExt = false; |
| 39875 | switch (SrcVT.getSimpleVT().SimpleTy) { |
| 39876 | default: |
| 39877 | return SDValue(); |
| 39878 | case MVT::v2i1: |
| 39879 | SExtVT = MVT::v2i64; |
| 39880 | break; |
| 39881 | case MVT::v4i1: |
| 39882 | SExtVT = MVT::v4i32; |
| 39883 | |
| 39884 | |
| 39885 | if (Subtarget.hasAVX() && |
| 39886 | checkBitcastSrcVectorSize(Src, 256, Subtarget.hasAVX2())) { |
| 39887 | SExtVT = MVT::v4i64; |
| 39888 | PropagateSExt = true; |
| 39889 | } |
| 39890 | break; |
| 39891 | case MVT::v8i1: |
| 39892 | SExtVT = MVT::v8i16; |
| 39893 | |
| 39894 | |
| 39895 | |
| 39896 | |
| 39897 | |
| 39898 | if (Subtarget.hasAVX() && (checkBitcastSrcVectorSize(Src, 256, true) || |
| 39899 | checkBitcastSrcVectorSize(Src, 512, true))) { |
| 39900 | SExtVT = MVT::v8i32; |
| 39901 | PropagateSExt = true; |
| 39902 | } |
| 39903 | break; |
| 39904 | case MVT::v16i1: |
| 39905 | SExtVT = MVT::v16i8; |
| 39906 | |
| 39907 | |
| 39908 | |
| 39909 | |
| 39910 | break; |
| 39911 | case MVT::v32i1: |
| 39912 | SExtVT = MVT::v32i8; |
| 39913 | break; |
| 39914 | case MVT::v64i1: |
| 39915 | |
| 39916 | |
| 39917 | if (Subtarget.hasAVX512()) { |
| 39918 | if (Subtarget.hasBWI()) |
| 39919 | return SDValue(); |
| 39920 | SExtVT = MVT::v64i8; |
| 39921 | break; |
| 39922 | } |
| 39923 | |
| 39924 | if (checkBitcastSrcVectorSize(Src, 512, false)) { |
| 39925 | SExtVT = MVT::v64i8; |
| 39926 | break; |
| 39927 | } |
| 39928 | return SDValue(); |
| 39929 | }; |
| 39930 | |
| 39931 | SDValue V = PropagateSExt ? signExtendBitcastSrcVector(DAG, SExtVT, Src, DL) |
| 39932 | : DAG.getNode(ISD::SIGN_EXTEND, DL, SExtVT, Src); |
| 39933 | |
| 39934 | if (SExtVT == MVT::v16i8 || SExtVT == MVT::v32i8 || SExtVT == MVT::v64i8) { |
| 39935 | V = getPMOVMSKB(DL, V, DAG, Subtarget); |
| 39936 | } else { |
| 39937 | if (SExtVT == MVT::v8i16) |
| 39938 | V = DAG.getNode(X86ISD::PACKSS, DL, MVT::v16i8, V, |
| 39939 | DAG.getUNDEF(MVT::v8i16)); |
| 39940 | V = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, V); |
| 39941 | } |
| 39942 | |
| 39943 | EVT IntVT = |
| 39944 | EVT::getIntegerVT(*DAG.getContext(), SrcVT.getVectorNumElements()); |
| 39945 | V = DAG.getZExtOrTrunc(V, DL, IntVT); |
| 39946 | return DAG.getBitcast(VT, V); |
| 39947 | } |
| 39948 | |
| 39949 | |
| 39950 | static SDValue combinevXi1ConstantToInteger(SDValue Op, SelectionDAG &DAG) { |
| 39951 | EVT SrcVT = Op.getValueType(); |
| 39952 | assert(SrcVT.getVectorElementType() == MVT::i1 && |
| 39953 | "Expected a vXi1 vector"); |
| 39954 | assert(ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) && |
| 39955 | "Expected a constant build vector"); |
| 39956 | |
| 39957 | APInt Imm(SrcVT.getVectorNumElements(), 0); |
| 39958 | for (unsigned Idx = 0, e = Op.getNumOperands(); Idx < e; ++Idx) { |
| 39959 | SDValue In = Op.getOperand(Idx); |
| 39960 | if (!In.isUndef() && (cast<ConstantSDNode>(In)->getZExtValue() & 0x1)) |
| 39961 | Imm.setBit(Idx); |
| 39962 | } |
| 39963 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), Imm.getBitWidth()); |
| 39964 | return DAG.getConstant(Imm, SDLoc(Op), IntVT); |
| 39965 | } |
| 39966 | |
| 39967 | static SDValue combineCastedMaskArithmetic(SDNode *N, SelectionDAG &DAG, |
| 39968 | TargetLowering::DAGCombinerInfo &DCI, |
| 39969 | const X86Subtarget &Subtarget) { |
| 39970 | assert(N->getOpcode() == ISD::BITCAST && "Expected a bitcast"); |
| 39971 | |
| 39972 | if (!DCI.isBeforeLegalizeOps()) |
| 39973 | return SDValue(); |
| 39974 | |
| 39975 | |
| 39976 | if (!Subtarget.hasAVX512()) |
| 39977 | return SDValue(); |
| 39978 | |
| 39979 | EVT DstVT = N->getValueType(0); |
| 39980 | SDValue Op = N->getOperand(0); |
| 39981 | EVT SrcVT = Op.getValueType(); |
| 39982 | |
| 39983 | if (!Op.hasOneUse()) |
| 39984 | return SDValue(); |
| 39985 | |
| 39986 | |
| 39987 | if (Op.getOpcode() != ISD::AND && |
| 39988 | Op.getOpcode() != ISD::OR && |
| 39989 | Op.getOpcode() != ISD::XOR) |
| 39990 | return SDValue(); |
| 39991 | |
| 39992 | |
| 39993 | if (!(SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && |
| 39994 | DstVT.isScalarInteger()) && |
| 39995 | !(DstVT.isVector() && DstVT.getVectorElementType() == MVT::i1 && |
| 39996 | SrcVT.isScalarInteger())) |
| 39997 | return SDValue(); |
| 39998 | |
| 39999 | SDValue LHS = Op.getOperand(0); |
| 40000 | SDValue RHS = Op.getOperand(1); |
| 40001 | |
| 40002 | if (LHS.hasOneUse() && LHS.getOpcode() == ISD::BITCAST && |
| 40003 | LHS.getOperand(0).getValueType() == DstVT) |
| 40004 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, LHS.getOperand(0), |
| 40005 | DAG.getBitcast(DstVT, RHS)); |
| 40006 | |
| 40007 | if (RHS.hasOneUse() && RHS.getOpcode() == ISD::BITCAST && |
| 40008 | RHS.getOperand(0).getValueType() == DstVT) |
| 40009 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, |
| 40010 | DAG.getBitcast(DstVT, LHS), RHS.getOperand(0)); |
| 40011 | |
| 40012 | |
| 40013 | |
| 40014 | if (ISD::isBuildVectorOfConstantSDNodes(RHS.getNode())) { |
| 40015 | RHS = combinevXi1ConstantToInteger(RHS, DAG); |
| 40016 | return DAG.getNode(Op.getOpcode(), SDLoc(N), DstVT, |
| 40017 | DAG.getBitcast(DstVT, LHS), RHS); |
| 40018 | } |
| 40019 | |
| 40020 | return SDValue(); |
| 40021 | } |
| 40022 | |
| 40023 | static SDValue createMMXBuildVector(BuildVectorSDNode *BV, SelectionDAG &DAG, |
| 40024 | const X86Subtarget &Subtarget) { |
| 40025 | SDLoc DL(BV); |
| 40026 | unsigned NumElts = BV->getNumOperands(); |
| 40027 | SDValue Splat = BV->getSplatValue(); |
| 40028 | |
| 40029 | |
| 40030 | auto CreateMMXElement = [&](SDValue V) { |
| 40031 | if (V.isUndef()) |
| 40032 | return DAG.getUNDEF(MVT::x86mmx); |
| 40033 | if (V.getValueType().isFloatingPoint()) { |
| 40034 | if (Subtarget.hasSSE1() && !isa<ConstantFPSDNode>(V)) { |
| 40035 | V = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4f32, V); |
| 40036 | V = DAG.getBitcast(MVT::v2i64, V); |
| 40037 | return DAG.getNode(X86ISD::MOVDQ2Q, DL, MVT::x86mmx, V); |
| 40038 | } |
| 40039 | V = DAG.getBitcast(MVT::i32, V); |
| 40040 | } else { |
| 40041 | V = DAG.getAnyExtOrTrunc(V, DL, MVT::i32); |
| 40042 | } |
| 40043 | return DAG.getNode(X86ISD::MMX_MOVW2D, DL, MVT::x86mmx, V); |
| 40044 | }; |
| 40045 | |
| 40046 | |
| 40047 | SmallVector<SDValue, 8> Ops; |
| 40048 | |
| 40049 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 40050 | |
| 40051 | |
| 40052 | if (Splat) { |
| 40053 | if (Splat.isUndef()) |
| 40054 | return DAG.getUNDEF(MVT::x86mmx); |
| 40055 | |
| 40056 | Splat = CreateMMXElement(Splat); |
| 40057 | |
| 40058 | if (Subtarget.hasSSE1()) { |
| 40059 | |
| 40060 | if (NumElts == 8) |
| 40061 | Splat = DAG.getNode( |
| 40062 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, |
| 40063 | DAG.getTargetConstant(Intrinsic::x86_mmx_punpcklbw, DL, |
| 40064 | TLI.getPointerTy(DAG.getDataLayout())), |
| 40065 | Splat, Splat); |
| 40066 | |
| 40067 | |
| 40068 | unsigned ShufMask = (NumElts > 2 ? 0 : 0x44); |
| 40069 | return DAG.getNode( |
| 40070 | ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, |
| 40071 | DAG.getTargetConstant(Intrinsic::x86_sse_pshuf_w, DL, |
| 40072 | TLI.getPointerTy(DAG.getDataLayout())), |
| 40073 | Splat, DAG.getTargetConstant(ShufMask, DL, MVT::i8)); |
| 40074 | } |
| 40075 | Ops.append(NumElts, Splat); |
| 40076 | } else { |
| 40077 | for (unsigned i = 0; i != NumElts; ++i) |
| 40078 | Ops.push_back(CreateMMXElement(BV->getOperand(i))); |
| 40079 | } |
| 40080 | |
| 40081 | |
| 40082 | while (Ops.size() > 1) { |
| 40083 | unsigned NumOps = Ops.size(); |
| 40084 | unsigned IntrinOp = |
| 40085 | (NumOps == 2 ? Intrinsic::x86_mmx_punpckldq |
| 40086 | : (NumOps == 4 ? Intrinsic::x86_mmx_punpcklwd |
| 40087 | : Intrinsic::x86_mmx_punpcklbw)); |
| 40088 | SDValue Intrin = DAG.getTargetConstant( |
| 40089 | IntrinOp, DL, TLI.getPointerTy(DAG.getDataLayout())); |
| 40090 | for (unsigned i = 0; i != NumOps; i += 2) |
| 40091 | Ops[i / 2] = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, MVT::x86mmx, Intrin, |
| 40092 | Ops[i], Ops[i + 1]); |
| 40093 | Ops.resize(NumOps / 2); |
| 40094 | } |
| 40095 | |
| 40096 | return Ops[0]; |
| 40097 | } |
| 40098 | |
| 40099 | |
| 40100 | |
| 40101 | |
| 40102 | |
| 40103 | static SDValue combineBitcastToBoolVector(EVT VT, SDValue V, const SDLoc &DL, |
| 40104 | SelectionDAG &DAG, |
| 40105 | const X86Subtarget &Subtarget) { |
| 40106 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 40107 | unsigned Opc = V.getOpcode(); |
| 40108 | switch (Opc) { |
| 40109 | case ISD::BITCAST: { |
| 40110 | |
| 40111 | SDValue Src = V.getOperand(0); |
| 40112 | EVT SrcVT = Src.getValueType(); |
| 40113 | if (SrcVT.isVector() || SrcVT.isFloatingPoint()) |
| 40114 | return DAG.getBitcast(VT, Src); |
| 40115 | break; |
| 40116 | } |
| 40117 | case ISD::TRUNCATE: { |
| 40118 | |
| 40119 | SDValue Src = V.getOperand(0); |
| 40120 | EVT NewSrcVT = |
| 40121 | EVT::getVectorVT(*DAG.getContext(), MVT::i1, Src.getValueSizeInBits()); |
| 40122 | if (TLI.isTypeLegal(NewSrcVT)) |
| 40123 | if (SDValue N0 = |
| 40124 | combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget)) |
| 40125 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, N0, |
| 40126 | DAG.getIntPtrConstant(0, DL)); |
| 40127 | break; |
| 40128 | } |
| 40129 | case ISD::ANY_EXTEND: |
| 40130 | case ISD::ZERO_EXTEND: { |
| 40131 | |
| 40132 | SDValue Src = V.getOperand(0); |
| 40133 | EVT NewSrcVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, |
| 40134 | Src.getScalarValueSizeInBits()); |
| 40135 | if (TLI.isTypeLegal(NewSrcVT)) |
| 40136 | if (SDValue N0 = |
| 40137 | combineBitcastToBoolVector(NewSrcVT, Src, DL, DAG, Subtarget)) |
| 40138 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
| 40139 | Opc == ISD::ANY_EXTEND ? DAG.getUNDEF(VT) |
| 40140 | : DAG.getConstant(0, DL, VT), |
| 40141 | N0, DAG.getIntPtrConstant(0, DL)); |
| 40142 | break; |
| 40143 | } |
| 40144 | case ISD::OR: { |
| 40145 | |
| 40146 | SDValue Src0 = V.getOperand(0); |
| 40147 | SDValue Src1 = V.getOperand(1); |
| 40148 | if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) |
| 40149 | if (SDValue N1 = combineBitcastToBoolVector(VT, Src1, DL, DAG, Subtarget)) |
| 40150 | return DAG.getNode(Opc, DL, VT, N0, N1); |
| 40151 | break; |
| 40152 | } |
| 40153 | case ISD::SHL: { |
| 40154 | |
| 40155 | SDValue Src0 = V.getOperand(0); |
| 40156 | if ((VT == MVT::v8i1 && !Subtarget.hasDQI()) || |
| 40157 | ((VT == MVT::v32i1 || VT == MVT::v64i1) && !Subtarget.hasBWI())) |
| 40158 | break; |
| 40159 | |
| 40160 | if (auto *Amt = dyn_cast<ConstantSDNode>(V.getOperand(1))) |
| 40161 | if (SDValue N0 = combineBitcastToBoolVector(VT, Src0, DL, DAG, Subtarget)) |
| 40162 | return DAG.getNode( |
| 40163 | X86ISD::KSHIFTL, DL, VT, N0, |
| 40164 | DAG.getTargetConstant(Amt->getZExtValue(), DL, MVT::i8)); |
| 40165 | break; |
| 40166 | } |
| 40167 | } |
| 40168 | return SDValue(); |
| 40169 | } |
| 40170 | |
| 40171 | static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG, |
| 40172 | TargetLowering::DAGCombinerInfo &DCI, |
| 40173 | const X86Subtarget &Subtarget) { |
| 40174 | SDValue N0 = N->getOperand(0); |
| 40175 | EVT VT = N->getValueType(0); |
| 40176 | EVT SrcVT = N0.getValueType(); |
| 40177 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 40178 | |
| 40179 | |
| 40180 | |
| 40181 | |
| 40182 | |
| 40183 | |
| 40184 | |
| 40185 | if (DCI.isBeforeLegalize()) { |
| 40186 | SDLoc dl(N); |
| 40187 | if (SDValue V = combineBitcastvxi1(DAG, VT, N0, dl, Subtarget)) |
| 40188 | return V; |
| 40189 | |
| 40190 | |
| 40191 | |
| 40192 | if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() && |
| 40193 | Subtarget.hasAVX512()) { |
| 40194 | N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0); |
| 40195 | N0 = DAG.getBitcast(MVT::v8i1, N0); |
| 40196 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0, |
| 40197 | DAG.getIntPtrConstant(0, dl)); |
| 40198 | } |
| 40199 | |
| 40200 | |
| 40201 | |
| 40202 | if ((SrcVT == MVT::v4i1 || SrcVT == MVT::v2i1) && VT.isScalarInteger() && |
| 40203 | Subtarget.hasAVX512()) { |
| 40204 | |
| 40205 | |
| 40206 | |
| 40207 | |
| 40208 | |
| 40209 | |
| 40210 | |
| 40211 | if (N0.getOpcode() == ISD::CONCAT_VECTORS) { |
| 40212 | SDValue LastOp = N0.getOperand(N0.getNumOperands() - 1); |
| 40213 | if (ISD::isBuildVectorAllZeros(LastOp.getNode())) { |
| 40214 | SrcVT = LastOp.getValueType(); |
| 40215 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
| 40216 | SmallVector<SDValue, 4> Ops(N0->op_begin(), N0->op_end()); |
| 40217 | Ops.resize(NumConcats, DAG.getConstant(0, dl, SrcVT)); |
| 40218 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
| 40219 | N0 = DAG.getBitcast(MVT::i8, N0); |
| 40220 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
| 40221 | } |
| 40222 | } |
| 40223 | |
| 40224 | unsigned NumConcats = 8 / SrcVT.getVectorNumElements(); |
| 40225 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getUNDEF(SrcVT)); |
| 40226 | Ops[0] = N0; |
| 40227 | N0 = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
| 40228 | N0 = DAG.getBitcast(MVT::i8, N0); |
| 40229 | return DAG.getNode(ISD::TRUNCATE, dl, VT, N0); |
| 40230 | } |
| 40231 | } else { |
| 40232 | |
| 40233 | |
| 40234 | if (VT.isVector() && VT.getScalarType() == MVT::i1 && |
| 40235 | SrcVT.isScalarInteger() && TLI.isTypeLegal(VT)) { |
| 40236 | if (SDValue V = |
| 40237 | combineBitcastToBoolVector(VT, N0, SDLoc(N), DAG, Subtarget)) |
| 40238 | return V; |
| 40239 | } |
| 40240 | } |
| 40241 | |
| 40242 | |
| 40243 | |
| 40244 | |
| 40245 | |
| 40246 | |
| 40247 | if (VT == MVT::i8 && SrcVT == MVT::v8i1 && Subtarget.hasAVX512() && |
| 40248 | !Subtarget.hasDQI() && N0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 40249 | N0.getOperand(0).getValueType() == MVT::v16i1 && |
| 40250 | isNullConstant(N0.getOperand(1))) |
| 40251 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, |
| 40252 | DAG.getBitcast(MVT::i16, N0.getOperand(0))); |
| 40253 | |
| 40254 | |
| 40255 | |
| 40256 | |
| 40257 | if (N0.getOpcode() == X86ISD::VBROADCAST_LOAD && N0.hasOneUse() && |
| 40258 | VT.isFloatingPoint() != SrcVT.isFloatingPoint() && VT.isVector()) { |
| 40259 | auto *BCast = cast<MemIntrinsicSDNode>(N0); |
| 40260 | unsigned SrcVTSize = SrcVT.getScalarSizeInBits(); |
| 40261 | unsigned MemSize = BCast->getMemoryVT().getScalarSizeInBits(); |
| 40262 | |
| 40263 | if (MemSize >= 32) { |
| 40264 | MVT MemVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(MemSize) |
| 40265 | : MVT::getIntegerVT(MemSize); |
| 40266 | MVT LoadVT = VT.isFloatingPoint() ? MVT::getFloatingPointVT(SrcVTSize) |
| 40267 | : MVT::getIntegerVT(SrcVTSize); |
| 40268 | LoadVT = MVT::getVectorVT(LoadVT, SrcVT.getVectorNumElements()); |
| 40269 | |
| 40270 | SDVTList Tys = DAG.getVTList(LoadVT, MVT::Other); |
| 40271 | SDValue Ops[] = { BCast->getChain(), BCast->getBasePtr() }; |
| 40272 | SDValue ResNode = |
| 40273 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, SDLoc(N), Tys, Ops, |
| 40274 | MemVT, BCast->getMemOperand()); |
| 40275 | DAG.ReplaceAllUsesOfValueWith(SDValue(BCast, 1), ResNode.getValue(1)); |
| 40276 | return DAG.getBitcast(VT, ResNode); |
| 40277 | } |
| 40278 | } |
| 40279 | |
| 40280 | |
| 40281 | |
| 40282 | |
| 40283 | if (VT == MVT::x86mmx) { |
| 40284 | |
| 40285 | APInt UndefElts; |
| 40286 | SmallVector<APInt, 1> EltBits; |
| 40287 | if (getTargetConstantBitsFromNode(N0, 64, UndefElts, EltBits)) { |
| 40288 | SDLoc DL(N0); |
| 40289 | |
| 40290 | if (EltBits[0].countLeadingZeros() >= 32) |
| 40291 | return DAG.getNode(X86ISD::MMX_MOVW2D, DL, VT, |
| 40292 | DAG.getConstant(EltBits[0].trunc(32), DL, MVT::i32)); |
| 40293 | |
| 40294 | |
| 40295 | APFloat F64(APFloat::IEEEdouble(), EltBits[0]); |
| 40296 | return DAG.getBitcast(VT, DAG.getConstantFP(F64, DL, MVT::f64)); |
| 40297 | } |
| 40298 | |
| 40299 | |
| 40300 | if (N0.getOpcode() == ISD::BUILD_VECTOR && |
| 40301 | (SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && |
| 40302 | N0.getOperand(0).getValueType() == SrcVT.getScalarType()) { |
| 40303 | bool LowUndef = true, AllUndefOrZero = true; |
| 40304 | for (unsigned i = 1, e = SrcVT.getVectorNumElements(); i != e; ++i) { |
| 40305 | SDValue Op = N0.getOperand(i); |
| 40306 | LowUndef &= Op.isUndef() || (i >= e/2); |
| 40307 | AllUndefOrZero &= (Op.isUndef() || isNullConstant(Op)); |
| 40308 | } |
| 40309 | if (AllUndefOrZero) { |
| 40310 | SDValue N00 = N0.getOperand(0); |
| 40311 | SDLoc dl(N00); |
| 40312 | N00 = LowUndef ? DAG.getAnyExtOrTrunc(N00, dl, MVT::i32) |
| 40313 | : DAG.getZExtOrTrunc(N00, dl, MVT::i32); |
| 40314 | return DAG.getNode(X86ISD::MMX_MOVW2D, dl, VT, N00); |
| 40315 | } |
| 40316 | } |
| 40317 | |
| 40318 | |
| 40319 | |
| 40320 | |
| 40321 | if (N0.getOpcode() == ISD::BUILD_VECTOR && |
| 40322 | (SrcVT == MVT::v2f32 || SrcVT == MVT::v2i32 || SrcVT == MVT::v4i16 || |
| 40323 | SrcVT == MVT::v8i8)) |
| 40324 | return createMMXBuildVector(cast<BuildVectorSDNode>(N0), DAG, Subtarget); |
| 40325 | |
| 40326 | |
| 40327 | if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT || |
| 40328 | N0.getOpcode() == ISD::EXTRACT_SUBVECTOR) && |
| 40329 | isNullConstant(N0.getOperand(1))) { |
| 40330 | SDValue N00 = N0.getOperand(0); |
| 40331 | if (N00.getValueType().is128BitVector()) |
| 40332 | return DAG.getNode(X86ISD::MOVDQ2Q, SDLoc(N00), VT, |
| 40333 | DAG.getBitcast(MVT::v2i64, N00)); |
| 40334 | } |
| 40335 | |
| 40336 | |
| 40337 | if (SrcVT == MVT::v2i32 && N0.getOpcode() == ISD::FP_TO_SINT) { |
| 40338 | SDLoc DL(N0); |
| 40339 | SDValue Res = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4i32, N0, |
| 40340 | DAG.getUNDEF(MVT::v2i32)); |
| 40341 | return DAG.getNode(X86ISD::MOVDQ2Q, DL, VT, |
| 40342 | DAG.getBitcast(MVT::v2i64, Res)); |
| 40343 | } |
| 40344 | } |
| 40345 | |
| 40346 | |
| 40347 | |
| 40348 | if (Subtarget.hasAVX512() && VT.isScalarInteger() && |
| 40349 | SrcVT.isVector() && SrcVT.getVectorElementType() == MVT::i1 && |
| 40350 | ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { |
| 40351 | return combinevXi1ConstantToInteger(N0, DAG); |
| 40352 | } |
| 40353 | |
| 40354 | if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && |
| 40355 | VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| 40356 | isa<ConstantSDNode>(N0)) { |
| 40357 | auto *C = cast<ConstantSDNode>(N0); |
| 40358 | if (C->isAllOnesValue()) |
| 40359 | return DAG.getConstant(1, SDLoc(N0), VT); |
| 40360 | if (C->isNullValue()) |
| 40361 | return DAG.getConstant(0, SDLoc(N0), VT); |
| 40362 | } |
| 40363 | |
| 40364 | |
| 40365 | |
| 40366 | |
| 40367 | if (Subtarget.hasAVX512() && SrcVT.isScalarInteger() && |
| 40368 | VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| 40369 | isPowerOf2_32(VT.getVectorNumElements())) { |
| 40370 | unsigned NumElts = VT.getVectorNumElements(); |
| 40371 | SDValue Src = N0; |
| 40372 | |
| 40373 | |
| 40374 | if (N0.getOpcode() == ISD::TRUNCATE && N0.hasOneUse()) |
| 40375 | Src = N0.getOperand(0); |
| 40376 | |
| 40377 | if (Src.getOpcode() == X86ISD::MOVMSK && Src.hasOneUse()) { |
| 40378 | SDValue MovmskIn = Src.getOperand(0); |
| 40379 | MVT MovmskVT = MovmskIn.getSimpleValueType(); |
| 40380 | unsigned MovMskElts = MovmskVT.getVectorNumElements(); |
| 40381 | |
| 40382 | |
| 40383 | |
| 40384 | if (MovMskElts <= NumElts && |
| 40385 | (Subtarget.hasBWI() || MovmskVT.getVectorElementType() != MVT::i8)) { |
| 40386 | EVT IntVT = EVT(MovmskVT).changeVectorElementTypeToInteger(); |
| 40387 | MovmskIn = DAG.getBitcast(IntVT, MovmskIn); |
| 40388 | SDLoc dl(N); |
| 40389 | MVT CmpVT = MVT::getVectorVT(MVT::i1, MovMskElts); |
| 40390 | SDValue Cmp = DAG.getSetCC(dl, CmpVT, MovmskIn, |
| 40391 | DAG.getConstant(0, dl, IntVT), ISD::SETLT); |
| 40392 | if (EVT(CmpVT) == VT) |
| 40393 | return Cmp; |
| 40394 | |
| 40395 | |
| 40396 | |
| 40397 | unsigned NumConcats = NumElts / MovMskElts; |
| 40398 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, CmpVT)); |
| 40399 | Ops[0] = Cmp; |
| 40400 | return DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, Ops); |
| 40401 | } |
| 40402 | } |
| 40403 | } |
| 40404 | |
| 40405 | |
| 40406 | |
| 40407 | if (SDValue V = combineCastedMaskArithmetic(N, DAG, DCI, Subtarget)) |
| 40408 | return V; |
| 40409 | |
| 40410 | |
| 40411 | |
| 40412 | |
| 40413 | |
| 40414 | |
| 40415 | unsigned FPOpcode; |
| 40416 | switch (N0.getOpcode()) { |
| 40417 | case ISD::AND: FPOpcode = X86ISD::FAND; break; |
| 40418 | case ISD::OR: FPOpcode = X86ISD::FOR; break; |
| 40419 | case ISD::XOR: FPOpcode = X86ISD::FXOR; break; |
| 40420 | default: return SDValue(); |
| 40421 | } |
| 40422 | |
| 40423 | |
| 40424 | if (!((Subtarget.hasSSE1() && VT == MVT::f32) || |
| 40425 | (Subtarget.hasSSE2() && VT == MVT::f64) || |
| 40426 | (Subtarget.hasSSE2() && VT.isInteger() && VT.isVector() && |
| 40427 | TLI.isTypeLegal(VT)))) |
| 40428 | return SDValue(); |
| 40429 | |
| 40430 | SDValue LogicOp0 = N0.getOperand(0); |
| 40431 | SDValue LogicOp1 = N0.getOperand(1); |
| 40432 | SDLoc DL0(N0); |
| 40433 | |
| 40434 | |
| 40435 | if (N0.hasOneUse() && LogicOp0.getOpcode() == ISD::BITCAST && |
| 40436 | LogicOp0.hasOneUse() && LogicOp0.getOperand(0).hasOneUse() && |
| 40437 | LogicOp0.getOperand(0).getValueType() == VT && |
| 40438 | !isa<ConstantSDNode>(LogicOp0.getOperand(0))) { |
| 40439 | SDValue CastedOp1 = DAG.getBitcast(VT, LogicOp1); |
| 40440 | unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); |
| 40441 | return DAG.getNode(Opcode, DL0, VT, LogicOp0.getOperand(0), CastedOp1); |
| 40442 | } |
| 40443 | |
| 40444 | if (N0.hasOneUse() && LogicOp1.getOpcode() == ISD::BITCAST && |
| 40445 | LogicOp1.hasOneUse() && LogicOp1.getOperand(0).hasOneUse() && |
| 40446 | LogicOp1.getOperand(0).getValueType() == VT && |
| 40447 | !isa<ConstantSDNode>(LogicOp1.getOperand(0))) { |
| 40448 | SDValue CastedOp0 = DAG.getBitcast(VT, LogicOp0); |
| 40449 | unsigned Opcode = VT.isFloatingPoint() ? FPOpcode : N0.getOpcode(); |
| 40450 | return DAG.getNode(Opcode, DL0, VT, LogicOp1.getOperand(0), CastedOp0); |
| 40451 | } |
| 40452 | |
| 40453 | return SDValue(); |
| 40454 | } |
| 40455 | |
| 40456 | |
| 40457 | |
| 40458 | |
| 40459 | static bool detectZextAbsDiff(const SDValue &Abs, SDValue &Op0, SDValue &Op1) { |
| 40460 | SDValue AbsOp1 = Abs->getOperand(0); |
| 40461 | if (AbsOp1.getOpcode() != ISD::SUB) |
| 40462 | return false; |
| 40463 | |
| 40464 | Op0 = AbsOp1.getOperand(0); |
| 40465 | Op1 = AbsOp1.getOperand(1); |
| 40466 | |
| 40467 | |
| 40468 | if (Op0.getOpcode() != ISD::ZERO_EXTEND || |
| 40469 | Op0.getOperand(0).getValueType().getVectorElementType() != MVT::i8 || |
| 40470 | Op1.getOpcode() != ISD::ZERO_EXTEND || |
| 40471 | Op1.getOperand(0).getValueType().getVectorElementType() != MVT::i8) |
| 40472 | return false; |
| 40473 | |
| 40474 | return true; |
| 40475 | } |
| 40476 | |
| 40477 | |
| 40478 | |
| 40479 | static SDValue createPSADBW(SelectionDAG &DAG, const SDValue &Zext0, |
| 40480 | const SDValue &Zext1, const SDLoc &DL, |
| 40481 | const X86Subtarget &Subtarget) { |
| 40482 | |
| 40483 | EVT InVT = Zext0.getOperand(0).getValueType(); |
| 40484 | unsigned RegSize = std::max(128u, (unsigned)InVT.getSizeInBits()); |
| 40485 | |
| 40486 | |
| 40487 | |
| 40488 | unsigned NumConcat = RegSize / InVT.getSizeInBits(); |
| 40489 | SmallVector<SDValue, 16> Ops(NumConcat, DAG.getConstant(0, DL, InVT)); |
| 40490 | Ops[0] = Zext0.getOperand(0); |
| 40491 | MVT ExtendedVT = MVT::getVectorVT(MVT::i8, RegSize / 8); |
| 40492 | SDValue SadOp0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); |
| 40493 | Ops[0] = Zext1.getOperand(0); |
| 40494 | SDValue SadOp1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, ExtendedVT, Ops); |
| 40495 | |
| 40496 | |
| 40497 | auto PSADBWBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 40498 | ArrayRef<SDValue> Ops) { |
| 40499 | MVT VT = MVT::getVectorVT(MVT::i64, Ops[0].getValueSizeInBits() / 64); |
| 40500 | return DAG.getNode(X86ISD::PSADBW, DL, VT, Ops); |
| 40501 | }; |
| 40502 | MVT SadVT = MVT::getVectorVT(MVT::i64, RegSize / 64); |
| 40503 | return SplitOpsAndApply(DAG, Subtarget, DL, SadVT, { SadOp0, SadOp1 }, |
| 40504 | PSADBWBuilder); |
| 40505 | } |
| 40506 | |
| 40507 | |
| 40508 | |
| 40509 | static SDValue combineMinMaxReduction(SDNode *Extract, SelectionDAG &DAG, |
| 40510 | const X86Subtarget &Subtarget) { |
| 40511 | |
| 40512 | if (!Subtarget.hasSSE41()) |
| 40513 | return SDValue(); |
| 40514 | |
| 40515 | EVT ExtractVT = Extract->getValueType(0); |
| 40516 | if (ExtractVT != MVT::i16 && ExtractVT != MVT::i8) |
| 40517 | return SDValue(); |
| 40518 | |
| 40519 | |
| 40520 | ISD::NodeType BinOp; |
| 40521 | SDValue Src = DAG.matchBinOpReduction( |
| 40522 | Extract, BinOp, {ISD::SMAX, ISD::SMIN, ISD::UMAX, ISD::UMIN}, true); |
| 40523 | if (!Src) |
| 40524 | return SDValue(); |
| 40525 | |
| 40526 | EVT SrcVT = Src.getValueType(); |
| 40527 | EVT SrcSVT = SrcVT.getScalarType(); |
| 40528 | if (SrcSVT != ExtractVT || (SrcVT.getSizeInBits() % 128) != 0) |
| 40529 | return SDValue(); |
| 40530 | |
| 40531 | SDLoc DL(Extract); |
| 40532 | SDValue MinPos = Src; |
| 40533 | |
| 40534 | |
| 40535 | while (SrcVT.getSizeInBits() > 128) { |
| 40536 | SDValue Lo, Hi; |
| 40537 | std::tie(Lo, Hi) = splitVector(MinPos, DAG, DL); |
| 40538 | SrcVT = Lo.getValueType(); |
| 40539 | MinPos = DAG.getNode(BinOp, DL, SrcVT, Lo, Hi); |
| 40540 | } |
| 40541 | assert(((SrcVT == MVT::v8i16 && ExtractVT == MVT::i16) || |
| 40542 | (SrcVT == MVT::v16i8 && ExtractVT == MVT::i8)) && |
| 40543 | "Unexpected value type"); |
| 40544 | |
| 40545 | |
| 40546 | |
| 40547 | SDValue Mask; |
| 40548 | unsigned MaskEltsBits = ExtractVT.getSizeInBits(); |
| 40549 | if (BinOp == ISD::SMAX) |
| 40550 | Mask = DAG.getConstant(APInt::getSignedMaxValue(MaskEltsBits), DL, SrcVT); |
| 40551 | else if (BinOp == ISD::SMIN) |
| 40552 | Mask = DAG.getConstant(APInt::getSignedMinValue(MaskEltsBits), DL, SrcVT); |
| 40553 | else if (BinOp == ISD::UMAX) |
| 40554 | Mask = DAG.getConstant(APInt::getAllOnesValue(MaskEltsBits), DL, SrcVT); |
| 40555 | |
| 40556 | if (Mask) |
| 40557 | MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); |
| 40558 | |
| 40559 | |
| 40560 | |
| 40561 | |
| 40562 | |
| 40563 | if (ExtractVT == MVT::i8) { |
| 40564 | SDValue Upper = DAG.getVectorShuffle( |
| 40565 | SrcVT, DL, MinPos, DAG.getConstant(0, DL, MVT::v16i8), |
| 40566 | {1, 16, 3, 16, 5, 16, 7, 16, 9, 16, 11, 16, 13, 16, 15, 16}); |
| 40567 | MinPos = DAG.getNode(ISD::UMIN, DL, SrcVT, MinPos, Upper); |
| 40568 | } |
| 40569 | |
| 40570 | |
| 40571 | MinPos = DAG.getBitcast(MVT::v8i16, MinPos); |
| 40572 | MinPos = DAG.getNode(X86ISD::PHMINPOS, DL, MVT::v8i16, MinPos); |
| 40573 | MinPos = DAG.getBitcast(SrcVT, MinPos); |
| 40574 | |
| 40575 | if (Mask) |
| 40576 | MinPos = DAG.getNode(ISD::XOR, DL, SrcVT, Mask, MinPos); |
| 40577 | |
| 40578 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, MinPos, |
| 40579 | DAG.getIntPtrConstant(0, DL)); |
| 40580 | } |
| 40581 | |
| 40582 | |
| 40583 | static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG, |
| 40584 | const X86Subtarget &Subtarget) { |
| 40585 | |
| 40586 | if (!Subtarget.hasSSE2()) |
| 40587 | return SDValue(); |
| 40588 | |
| 40589 | EVT ExtractVT = Extract->getValueType(0); |
| 40590 | unsigned BitWidth = ExtractVT.getSizeInBits(); |
| 40591 | if (ExtractVT != MVT::i64 && ExtractVT != MVT::i32 && ExtractVT != MVT::i16 && |
| 40592 | ExtractVT != MVT::i8 && ExtractVT != MVT::i1) |
| 40593 | return SDValue(); |
| 40594 | |
| 40595 | |
| 40596 | ISD::NodeType BinOp; |
| 40597 | SDValue Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::OR, ISD::AND}); |
| 40598 | if (!Match && ExtractVT == MVT::i1) |
| 40599 | Match = DAG.matchBinOpReduction(Extract, BinOp, {ISD::XOR}); |
| 40600 | if (!Match) |
| 40601 | return SDValue(); |
| 40602 | |
| 40603 | |
| 40604 | |
| 40605 | if (Match.getScalarValueSizeInBits() != BitWidth) |
| 40606 | return SDValue(); |
| 40607 | |
| 40608 | SDValue Movmsk; |
| 40609 | SDLoc DL(Extract); |
| 40610 | EVT MatchVT = Match.getValueType(); |
| 40611 | unsigned NumElts = MatchVT.getVectorNumElements(); |
| 40612 | unsigned MaxElts = Subtarget.hasInt256() ? 32 : 16; |
| 40613 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 40614 | |
| 40615 | if (ExtractVT == MVT::i1) { |
| 40616 | |
| 40617 | if (NumElts > 64 || !isPowerOf2_32(NumElts)) |
| 40618 | return SDValue(); |
| 40619 | if (TLI.isTypeLegal(MatchVT)) { |
| 40620 | |
| 40621 | EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
| 40622 | Movmsk = DAG.getBitcast(MovmskVT, Match); |
| 40623 | } else { |
| 40624 | |
| 40625 | |
| 40626 | if (BinOp == ISD::AND && !Subtarget.hasSSE41() && |
| 40627 | Match.getOpcode() == ISD::SETCC && |
| 40628 | ISD::isBuildVectorAllZeros(Match.getOperand(1).getNode()) && |
| 40629 | cast<CondCodeSDNode>(Match.getOperand(2))->get() == |
| 40630 | ISD::CondCode::SETEQ) { |
| 40631 | SDValue Vec = Match.getOperand(0); |
| 40632 | if (Vec.getValueType().getScalarType() == MVT::i64 && |
| 40633 | (2 * NumElts) <= MaxElts) { |
| 40634 | NumElts *= 2; |
| 40635 | EVT CmpVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); |
| 40636 | MatchVT = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElts); |
| 40637 | Match = DAG.getSetCC( |
| 40638 | DL, MatchVT, DAG.getBitcast(CmpVT, Match.getOperand(0)), |
| 40639 | DAG.getBitcast(CmpVT, Match.getOperand(1)), ISD::CondCode::SETEQ); |
| 40640 | } |
| 40641 | } |
| 40642 | |
| 40643 | |
| 40644 | while (NumElts > MaxElts) { |
| 40645 | SDValue Lo, Hi; |
| 40646 | std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); |
| 40647 | Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); |
| 40648 | NumElts /= 2; |
| 40649 | } |
| 40650 | EVT MovmskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
| 40651 | Movmsk = combineBitcastvxi1(DAG, MovmskVT, Match, DL, Subtarget); |
| 40652 | } |
| 40653 | if (!Movmsk) |
| 40654 | return SDValue(); |
| 40655 | Movmsk = DAG.getZExtOrTrunc(Movmsk, DL, NumElts > 32 ? MVT::i64 : MVT::i32); |
| 40656 | } else { |
| 40657 | |
| 40658 | unsigned MatchSizeInBits = Match.getValueSizeInBits(); |
| 40659 | if (!(MatchSizeInBits == 128 || |
| 40660 | (MatchSizeInBits == 256 && Subtarget.hasAVX()))) |
| 40661 | return SDValue(); |
| 40662 | |
| 40663 | |
| 40664 | |
| 40665 | |
| 40666 | |
| 40667 | if (Match.getValueType().getVectorNumElements() < 2) |
| 40668 | return SDValue(); |
| 40669 | |
| 40670 | |
| 40671 | if (DAG.ComputeNumSignBits(Match) != BitWidth) |
| 40672 | return SDValue(); |
| 40673 | |
| 40674 | if (MatchSizeInBits == 256 && BitWidth < 32 && !Subtarget.hasInt256()) { |
| 40675 | SDValue Lo, Hi; |
| 40676 | std::tie(Lo, Hi) = DAG.SplitVector(Match, DL); |
| 40677 | Match = DAG.getNode(BinOp, DL, Lo.getValueType(), Lo, Hi); |
| 40678 | MatchSizeInBits = Match.getValueSizeInBits(); |
| 40679 | } |
| 40680 | |
| 40681 | |
| 40682 | MVT MaskSrcVT; |
| 40683 | if (64 == BitWidth || 32 == BitWidth) |
| 40684 | MaskSrcVT = MVT::getVectorVT(MVT::getFloatingPointVT(BitWidth), |
| 40685 | MatchSizeInBits / BitWidth); |
| 40686 | else |
| 40687 | MaskSrcVT = MVT::getVectorVT(MVT::i8, MatchSizeInBits / 8); |
| 40688 | |
| 40689 | SDValue BitcastLogicOp = DAG.getBitcast(MaskSrcVT, Match); |
| 40690 | Movmsk = getPMOVMSKB(DL, BitcastLogicOp, DAG, Subtarget); |
| 40691 | NumElts = MaskSrcVT.getVectorNumElements(); |
| 40692 | } |
| 40693 | assert((NumElts <= 32 || NumElts == 64) && |
| 40694 | "Not expecting more than 64 elements"); |
| 40695 | |
| 40696 | MVT CmpVT = NumElts == 64 ? MVT::i64 : MVT::i32; |
| 40697 | if (BinOp == ISD::XOR) { |
| 40698 | |
| 40699 | SDValue Result = DAG.getNode(ISD::PARITY, DL, CmpVT, Movmsk); |
| 40700 | return DAG.getZExtOrTrunc(Result, DL, ExtractVT); |
| 40701 | } |
| 40702 | |
| 40703 | SDValue CmpC; |
| 40704 | ISD::CondCode CondCode; |
| 40705 | if (BinOp == ISD::OR) { |
| 40706 | |
| 40707 | CmpC = DAG.getConstant(0, DL, CmpVT); |
| 40708 | CondCode = ISD::CondCode::SETNE; |
| 40709 | } else { |
| 40710 | |
| 40711 | CmpC = DAG.getConstant(APInt::getLowBitsSet(CmpVT.getSizeInBits(), NumElts), |
| 40712 | DL, CmpVT); |
| 40713 | CondCode = ISD::CondCode::SETEQ; |
| 40714 | } |
| 40715 | |
| 40716 | |
| 40717 | |
| 40718 | EVT SetccVT = |
| 40719 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT); |
| 40720 | SDValue Setcc = DAG.getSetCC(DL, SetccVT, Movmsk, CmpC, CondCode); |
| 40721 | SDValue Zext = DAG.getZExtOrTrunc(Setcc, DL, ExtractVT); |
| 40722 | SDValue Zero = DAG.getConstant(0, DL, ExtractVT); |
| 40723 | return DAG.getNode(ISD::SUB, DL, ExtractVT, Zero, Zext); |
| 40724 | } |
| 40725 | |
| 40726 | static SDValue combineBasicSADPattern(SDNode *Extract, SelectionDAG &DAG, |
| 40727 | const X86Subtarget &Subtarget) { |
| 40728 | |
| 40729 | if (!Subtarget.hasSSE2()) |
| 40730 | return SDValue(); |
| 40731 | |
| 40732 | EVT ExtractVT = Extract->getValueType(0); |
| 40733 | |
| 40734 | |
| 40735 | if (ExtractVT != MVT::i32 && ExtractVT != MVT::i64) |
| 40736 | return SDValue(); |
| 40737 | |
| 40738 | EVT VT = Extract->getOperand(0).getValueType(); |
| 40739 | if (!isPowerOf2_32(VT.getVectorNumElements())) |
| 40740 | return SDValue(); |
| 40741 | |
| 40742 | |
| 40743 | ISD::NodeType BinOp; |
| 40744 | SDValue Root = DAG.matchBinOpReduction(Extract, BinOp, {ISD::ADD}); |
| 40745 | |
| 40746 | |
| 40747 | |
| 40748 | |
| 40749 | |
| 40750 | |
| 40751 | |
| 40752 | |
| 40753 | |
| 40754 | if (Root && (Root.getOpcode() == ISD::SIGN_EXTEND || |
| 40755 | Root.getOpcode() == ISD::ZERO_EXTEND || |
| 40756 | Root.getOpcode() == ISD::ANY_EXTEND)) |
| 40757 | Root = Root.getOperand(0); |
| 40758 | |
| 40759 | |
| 40760 | |
| 40761 | if (!Root || Root.getOpcode() != ISD::ABS) |
| 40762 | return SDValue(); |
| 40763 | |
| 40764 | |
| 40765 | SDValue Zext0, Zext1; |
| 40766 | if (!detectZextAbsDiff(Root, Zext0, Zext1)) |
| 40767 | return SDValue(); |
| 40768 | |
| 40769 | |
| 40770 | SDLoc DL(Extract); |
| 40771 | SDValue SAD = createPSADBW(DAG, Zext0, Zext1, DL, Subtarget); |
| 40772 | |
| 40773 | |
| 40774 | |
| 40775 | unsigned Stages = Log2_32(VT.getVectorNumElements()); |
| 40776 | EVT SadVT = SAD.getValueType(); |
| 40777 | if (Stages > 3) { |
| 40778 | unsigned SadElems = SadVT.getVectorNumElements(); |
| 40779 | |
| 40780 | for(unsigned i = Stages - 3; i > 0; --i) { |
| 40781 | SmallVector<int, 16> Mask(SadElems, -1); |
| 40782 | for(unsigned j = 0, MaskEnd = 1 << (i - 1); j < MaskEnd; ++j) |
| 40783 | Mask[j] = MaskEnd + j; |
| 40784 | |
| 40785 | SDValue Shuffle = |
| 40786 | DAG.getVectorShuffle(SadVT, DL, SAD, DAG.getUNDEF(SadVT), Mask); |
| 40787 | SAD = DAG.getNode(ISD::ADD, DL, SadVT, SAD, Shuffle); |
| 40788 | } |
| 40789 | } |
| 40790 | |
| 40791 | unsigned ExtractSizeInBits = ExtractVT.getSizeInBits(); |
| 40792 | |
| 40793 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), ExtractVT, |
| 40794 | SadVT.getSizeInBits() / ExtractSizeInBits); |
| 40795 | SAD = DAG.getBitcast(ResVT, SAD); |
| 40796 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ExtractVT, SAD, |
| 40797 | Extract->getOperand(1)); |
| 40798 | } |
| 40799 | |
| 40800 | |
| 40801 | |
| 40802 | static SDValue combineExtractWithShuffle(SDNode *N, SelectionDAG &DAG, |
| 40803 | TargetLowering::DAGCombinerInfo &DCI, |
| 40804 | const X86Subtarget &Subtarget) { |
| 40805 | if (DCI.isBeforeLegalizeOps()) |
| 40806 | return SDValue(); |
| 40807 | |
| 40808 | SDLoc dl(N); |
| 40809 | SDValue Src = N->getOperand(0); |
| 40810 | SDValue Idx = N->getOperand(1); |
| 40811 | |
| 40812 | EVT VT = N->getValueType(0); |
| 40813 | EVT SrcVT = Src.getValueType(); |
| 40814 | EVT SrcSVT = SrcVT.getVectorElementType(); |
| 40815 | unsigned SrcEltBits = SrcSVT.getSizeInBits(); |
| 40816 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 40817 | |
| 40818 | |
| 40819 | if (SrcSVT == MVT::i1 || !isa<ConstantSDNode>(Idx)) |
| 40820 | return SDValue(); |
| 40821 | |
| 40822 | const APInt &IdxC = N->getConstantOperandAPInt(1); |
| 40823 | if (IdxC.uge(NumSrcElts)) |
| 40824 | return SDValue(); |
| 40825 | |
| 40826 | SDValue SrcBC = peekThroughBitcasts(Src); |
| 40827 | |
| 40828 | |
| 40829 | if (X86ISD::VBROADCAST == SrcBC.getOpcode()) { |
| 40830 | SDValue SrcOp = SrcBC.getOperand(0); |
| 40831 | EVT SrcOpVT = SrcOp.getValueType(); |
| 40832 | if (SrcOpVT.isScalarInteger() && VT.isInteger() && |
| 40833 | (SrcOpVT.getSizeInBits() % SrcEltBits) == 0) { |
| 40834 | unsigned Scale = SrcOpVT.getSizeInBits() / SrcEltBits; |
| 40835 | unsigned Offset = IdxC.urem(Scale) * SrcEltBits; |
| 40836 | |
| 40837 | if (Offset == 0) { |
| 40838 | SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, SrcVT.getScalarType()); |
| 40839 | SrcOp = DAG.getZExtOrTrunc(SrcOp, dl, VT); |
| 40840 | return SrcOp; |
| 40841 | } |
| 40842 | } |
| 40843 | } |
| 40844 | |
| 40845 | |
| 40846 | |
| 40847 | if (SrcBC.getOpcode() == X86ISD::VBROADCAST_LOAD && SrcBC.hasOneUse()) { |
| 40848 | auto *MemIntr = cast<MemIntrinsicSDNode>(SrcBC); |
| 40849 | unsigned SrcBCWidth = SrcBC.getScalarValueSizeInBits(); |
| 40850 | if (MemIntr->getMemoryVT().getSizeInBits() == SrcBCWidth && |
| 40851 | VT.getSizeInBits() == SrcBCWidth && SrcEltBits == SrcBCWidth) { |
| 40852 | SDValue Load = DAG.getLoad(VT, dl, MemIntr->getChain(), |
| 40853 | MemIntr->getBasePtr(), |
| 40854 | MemIntr->getPointerInfo(), |
| 40855 | MemIntr->getOriginalAlign(), |
| 40856 | MemIntr->getMemOperand()->getFlags()); |
| 40857 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), Load.getValue(1)); |
| 40858 | return Load; |
| 40859 | } |
| 40860 | } |
| 40861 | |
| 40862 | |
| 40863 | |
| 40864 | if (SrcBC.getOpcode() == ISD::SCALAR_TO_VECTOR && VT.isInteger() && |
| 40865 | SrcBC.getValueType().isInteger() && |
| 40866 | (SrcBC.getScalarValueSizeInBits() % SrcEltBits) == 0 && |
| 40867 | SrcBC.getScalarValueSizeInBits() == |
| 40868 | SrcBC.getOperand(0).getValueSizeInBits()) { |
| 40869 | unsigned Scale = SrcBC.getScalarValueSizeInBits() / SrcEltBits; |
| 40870 | if (IdxC.ult(Scale)) { |
| 40871 | unsigned Offset = IdxC.getZExtValue() * SrcVT.getScalarSizeInBits(); |
| 40872 | SDValue Scl = SrcBC.getOperand(0); |
| 40873 | EVT SclVT = Scl.getValueType(); |
| 40874 | if (Offset) { |
| 40875 | Scl = DAG.getNode(ISD::SRL, dl, SclVT, Scl, |
| 40876 | DAG.getShiftAmountConstant(Offset, SclVT, dl)); |
| 40877 | } |
| 40878 | Scl = DAG.getZExtOrTrunc(Scl, dl, SrcVT.getScalarType()); |
| 40879 | Scl = DAG.getZExtOrTrunc(Scl, dl, VT); |
| 40880 | return Scl; |
| 40881 | } |
| 40882 | } |
| 40883 | |
| 40884 | |
| 40885 | |
| 40886 | |
| 40887 | if (ISD::TRUNCATE == Src.getOpcode() && IdxC == 0 && |
| 40888 | (SrcVT.getSizeInBits() % 128) == 0) { |
| 40889 | Src = extract128BitVector(Src.getOperand(0), 0, DAG, dl); |
| 40890 | MVT ExtractVT = MVT::getVectorVT(SrcSVT.getSimpleVT(), 128 / SrcEltBits); |
| 40891 | return DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(ExtractVT, Src), |
| 40892 | Idx); |
| 40893 | } |
| 40894 | |
| 40895 | |
| 40896 | |
| 40897 | |
| 40898 | auto GetLegalExtract = [&Subtarget, &DAG, &dl](SDValue Vec, EVT VecVT, |
| 40899 | unsigned Idx) { |
| 40900 | EVT VecSVT = VecVT.getScalarType(); |
| 40901 | if ((VecVT.is256BitVector() || VecVT.is512BitVector()) && |
| 40902 | (VecSVT == MVT::i8 || VecSVT == MVT::i16 || VecSVT == MVT::i32 || |
| 40903 | VecSVT == MVT::i64)) { |
| 40904 | unsigned EltSizeInBits = VecSVT.getSizeInBits(); |
| 40905 | unsigned NumEltsPerLane = 128 / EltSizeInBits; |
| 40906 | unsigned LaneOffset = (Idx & ~(NumEltsPerLane - 1)) * EltSizeInBits; |
| 40907 | unsigned LaneIdx = LaneOffset / Vec.getScalarValueSizeInBits(); |
| 40908 | VecVT = EVT::getVectorVT(*DAG.getContext(), VecSVT, NumEltsPerLane); |
| 40909 | Vec = extract128BitVector(Vec, LaneIdx, DAG, dl); |
| 40910 | Idx &= (NumEltsPerLane - 1); |
| 40911 | } |
| 40912 | if ((VecVT == MVT::v4i32 || VecVT == MVT::v2i64) && |
| 40913 | ((Idx == 0 && Subtarget.hasSSE2()) || Subtarget.hasSSE41())) { |
| 40914 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VecVT.getScalarType(), |
| 40915 | DAG.getBitcast(VecVT, Vec), |
| 40916 | DAG.getIntPtrConstant(Idx, dl)); |
| 40917 | } |
| 40918 | if ((VecVT == MVT::v8i16 && Subtarget.hasSSE2()) || |
| 40919 | (VecVT == MVT::v16i8 && Subtarget.hasSSE41())) { |
| 40920 | unsigned OpCode = (VecVT == MVT::v8i16 ? X86ISD::PEXTRW : X86ISD::PEXTRB); |
| 40921 | return DAG.getNode(OpCode, dl, MVT::i32, DAG.getBitcast(VecVT, Vec), |
| 40922 | DAG.getTargetConstant(Idx, dl, MVT::i8)); |
| 40923 | } |
| 40924 | return SDValue(); |
| 40925 | }; |
| 40926 | |
| 40927 | |
| 40928 | SmallVector<int, 16> Mask; |
| 40929 | SmallVector<SDValue, 2> Ops; |
| 40930 | if (!getTargetShuffleInputs(SrcBC, Ops, Mask, DAG)) |
| 40931 | return SDValue(); |
| 40932 | |
| 40933 | |
| 40934 | if (llvm::any_of(Ops, [SrcVT](SDValue Op) { |
| 40935 | return SrcVT.getSizeInBits() != Op.getValueSizeInBits(); |
| 40936 | })) |
| 40937 | return SDValue(); |
| 40938 | |
| 40939 | |
| 40940 | if (Mask.size() != NumSrcElts) { |
| 40941 | if ((NumSrcElts % Mask.size()) == 0) { |
| 40942 | SmallVector<int, 16> ScaledMask; |
| 40943 | int Scale = NumSrcElts / Mask.size(); |
| 40944 | narrowShuffleMaskElts(Scale, Mask, ScaledMask); |
| 40945 | Mask = std::move(ScaledMask); |
| 40946 | } else if ((Mask.size() % NumSrcElts) == 0) { |
| 40947 | |
| 40948 | int ExtractIdx = (int)IdxC.getZExtValue(); |
| 40949 | int Scale = Mask.size() / NumSrcElts; |
| 40950 | int Lo = Scale * ExtractIdx; |
| 40951 | int Hi = Scale * (ExtractIdx + 1); |
| 40952 | for (int i = 0, e = (int)Mask.size(); i != e; ++i) |
| 40953 | if (i < Lo || Hi <= i) |
| 40954 | Mask[i] = SM_SentinelUndef; |
| 40955 | |
| 40956 | SmallVector<int, 16> WidenedMask; |
| 40957 | while (Mask.size() > NumSrcElts && |
| 40958 | canWidenShuffleElements(Mask, WidenedMask)) |
| 40959 | Mask = std::move(WidenedMask); |
| 40960 | } |
| 40961 | } |
| 40962 | |
| 40963 | |
| 40964 | int ExtractIdx; |
| 40965 | EVT ExtractVT; |
| 40966 | if (Mask.size() == NumSrcElts) { |
| 40967 | ExtractIdx = Mask[IdxC.getZExtValue()]; |
| 40968 | ExtractVT = SrcVT; |
| 40969 | } else { |
| 40970 | unsigned Scale = Mask.size() / NumSrcElts; |
| 40971 | if ((Mask.size() % NumSrcElts) != 0 || SrcVT.isFloatingPoint()) |
| 40972 | return SDValue(); |
| 40973 | unsigned ScaledIdx = Scale * IdxC.getZExtValue(); |
| 40974 | if (!isUndefOrZeroInRange(Mask, ScaledIdx + 1, Scale - 1)) |
| 40975 | return SDValue(); |
| 40976 | ExtractIdx = Mask[ScaledIdx]; |
| 40977 | EVT ExtractSVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltBits / Scale); |
| 40978 | ExtractVT = EVT::getVectorVT(*DAG.getContext(), ExtractSVT, Mask.size()); |
| 40979 | assert(SrcVT.getSizeInBits() == ExtractVT.getSizeInBits() && |
| 40980 | "Failed to widen vector type"); |
| 40981 | } |
| 40982 | |
| 40983 | |
| 40984 | if (ExtractIdx == SM_SentinelUndef) |
| 40985 | return DAG.getUNDEF(VT); |
| 40986 | |
| 40987 | if (ExtractIdx == SM_SentinelZero) |
| 40988 | return VT.isFloatingPoint() ? DAG.getConstantFP(0.0, dl, VT) |
| 40989 | : DAG.getConstant(0, dl, VT); |
| 40990 | |
| 40991 | SDValue SrcOp = Ops[ExtractIdx / Mask.size()]; |
| 40992 | ExtractIdx = ExtractIdx % Mask.size(); |
| 40993 | if (SDValue V = GetLegalExtract(SrcOp, ExtractVT, ExtractIdx)) |
| 40994 | return DAG.getZExtOrTrunc(V, dl, VT); |
| 40995 | |
| 40996 | return SDValue(); |
| 40997 | } |
| 40998 | |
| 40999 | |
| 41000 | |
| 41001 | static SDValue scalarizeExtEltFP(SDNode *ExtElt, SelectionDAG &DAG) { |
| 41002 | assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Expected extract"); |
| 41003 | SDValue Vec = ExtElt->getOperand(0); |
| 41004 | SDValue Index = ExtElt->getOperand(1); |
| 41005 | EVT VT = ExtElt->getValueType(0); |
| 41006 | EVT VecVT = Vec.getValueType(); |
| 41007 | |
| 41008 | |
| 41009 | |
| 41010 | if (!Vec.hasOneUse() || !isNullConstant(Index) || VecVT.getScalarType() != VT) |
| 41011 | return SDValue(); |
| 41012 | |
| 41013 | |
| 41014 | |
| 41015 | if (Vec.getOpcode() == ISD::SETCC && VT == MVT::i1) { |
| 41016 | EVT OpVT = Vec.getOperand(0).getValueType().getScalarType(); |
| 41017 | if (OpVT != MVT::f32 && OpVT != MVT::f64) |
| 41018 | return SDValue(); |
| 41019 | |
| 41020 | |
| 41021 | SDLoc DL(ExtElt); |
| 41022 | SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, |
| 41023 | Vec.getOperand(0), Index); |
| 41024 | SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, OpVT, |
| 41025 | Vec.getOperand(1), Index); |
| 41026 | return DAG.getNode(Vec.getOpcode(), DL, VT, Ext0, Ext1, Vec.getOperand(2)); |
| 41027 | } |
| 41028 | |
| 41029 | if (VT != MVT::f32 && VT != MVT::f64) |
| 41030 | return SDValue(); |
| 41031 | |
| 41032 | |
| 41033 | |
| 41034 | |
| 41035 | |
| 41036 | |
| 41037 | |
| 41038 | if (Vec.getOpcode() == ISD::VSELECT && |
| 41039 | Vec.getOperand(0).getOpcode() == ISD::SETCC && |
| 41040 | Vec.getOperand(0).getValueType().getScalarType() == MVT::i1 && |
| 41041 | Vec.getOperand(0).getOperand(0).getValueType() == VecVT) { |
| 41042 | |
| 41043 | SDLoc DL(ExtElt); |
| 41044 | SDValue Ext0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, |
| 41045 | Vec.getOperand(0).getValueType().getScalarType(), |
| 41046 | Vec.getOperand(0), Index); |
| 41047 | SDValue Ext1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
| 41048 | Vec.getOperand(1), Index); |
| 41049 | SDValue Ext2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, |
| 41050 | Vec.getOperand(2), Index); |
| 41051 | return DAG.getNode(ISD::SELECT, DL, VT, Ext0, Ext1, Ext2); |
| 41052 | } |
| 41053 | |
| 41054 | |
| 41055 | |
| 41056 | |
| 41057 | switch (Vec.getOpcode()) { |
| 41058 | case ISD::FMA: |
| 41059 | case ISD::FMAD: |
| 41060 | case ISD::FADD: |
| 41061 | case ISD::FSUB: |
| 41062 | case ISD::FMUL: |
| 41063 | case ISD::FDIV: |
| 41064 | case ISD::FREM: |
| 41065 | case ISD::FCOPYSIGN: |
| 41066 | case ISD::FMINNUM: |
| 41067 | case ISD::FMAXNUM: |
| 41068 | case ISD::FMINNUM_IEEE: |
| 41069 | case ISD::FMAXNUM_IEEE: |
| 41070 | case ISD::FMAXIMUM: |
| 41071 | case ISD::FMINIMUM: |
| 41072 | case X86ISD::FMAX: |
| 41073 | case X86ISD::FMIN: |
| 41074 | case ISD::FABS: |
| 41075 | case ISD::FSQRT: |
| 41076 | case ISD::FRINT: |
| 41077 | case ISD::FCEIL: |
| 41078 | case ISD::FTRUNC: |
| 41079 | case ISD::FNEARBYINT: |
| 41080 | case ISD::FROUND: |
| 41081 | case ISD::FFLOOR: |
| 41082 | case X86ISD::FRCP: |
| 41083 | case X86ISD::FRSQRT: { |
| 41084 | |
| 41085 | SDLoc DL(ExtElt); |
| 41086 | SmallVector<SDValue, 4> ExtOps; |
| 41087 | for (SDValue Op : Vec->ops()) |
| 41088 | ExtOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Op, Index)); |
| 41089 | return DAG.getNode(Vec.getOpcode(), DL, VT, ExtOps); |
| 41090 | } |
| 41091 | default: |
| 41092 | return SDValue(); |
| 41093 | } |
| 41094 | llvm_unreachable("All opcodes should return within switch"); |
| 41095 | } |
| 41096 | |
| 41097 | |
| 41098 | |
| 41099 | static SDValue combineArithReduction(SDNode *ExtElt, SelectionDAG &DAG, |
| 41100 | const X86Subtarget &Subtarget) { |
| 41101 | assert(ExtElt->getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unexpected caller"); |
| 41102 | |
| 41103 | |
| 41104 | if (!Subtarget.hasSSE2()) |
| 41105 | return SDValue(); |
| 41106 | |
| 41107 | ISD::NodeType Opc; |
| 41108 | SDValue Rdx = DAG.matchBinOpReduction(ExtElt, Opc, |
| 41109 | {ISD::ADD, ISD::MUL, ISD::FADD}, true); |
| 41110 | if (!Rdx) |
| 41111 | return SDValue(); |
| 41112 | |
| 41113 | SDValue Index = ExtElt->getOperand(1); |
| 41114 | assert(isNullConstant(Index) && |
| 41115 | "Reduction doesn't end in an extract from index 0"); |
| 41116 | |
| 41117 | EVT VT = ExtElt->getValueType(0); |
| 41118 | EVT VecVT = Rdx.getValueType(); |
| 41119 | if (VecVT.getScalarType() != VT) |
| 41120 | return SDValue(); |
| 41121 | |
| 41122 | SDLoc DL(ExtElt); |
| 41123 | |
| 41124 | |
| 41125 | if (Opc == ISD::MUL) { |
| 41126 | unsigned NumElts = VecVT.getVectorNumElements(); |
| 41127 | if (VT != MVT::i8 || NumElts < 4 || !isPowerOf2_32(NumElts)) |
| 41128 | return SDValue(); |
| 41129 | if (VecVT.getSizeInBits() >= 128) { |
| 41130 | EVT WideVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts / 2); |
| 41131 | SDValue Lo = getUnpackl(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT)); |
| 41132 | SDValue Hi = getUnpackh(DAG, DL, VecVT, Rdx, DAG.getUNDEF(VecVT)); |
| 41133 | Lo = DAG.getBitcast(WideVT, Lo); |
| 41134 | Hi = DAG.getBitcast(WideVT, Hi); |
| 41135 | Rdx = DAG.getNode(Opc, DL, WideVT, Lo, Hi); |
| 41136 | while (Rdx.getValueSizeInBits() > 128) { |
| 41137 | std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); |
| 41138 | Rdx = DAG.getNode(Opc, DL, Lo.getValueType(), Lo, Hi); |
| 41139 | } |
| 41140 | } else { |
| 41141 | if (VecVT == MVT::v4i8) |
| 41142 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, |
| 41143 | DAG.getUNDEF(MVT::v4i8)); |
| 41144 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, |
| 41145 | DAG.getUNDEF(MVT::v8i8)); |
| 41146 | Rdx = getUnpackl(DAG, DL, MVT::v16i8, Rdx, DAG.getUNDEF(MVT::v16i8)); |
| 41147 | Rdx = DAG.getBitcast(MVT::v8i16, Rdx); |
| 41148 | } |
| 41149 | if (NumElts >= 8) |
| 41150 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
| 41151 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
| 41152 | {4, 5, 6, 7, -1, -1, -1, -1})); |
| 41153 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
| 41154 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
| 41155 | {2, 3, -1, -1, -1, -1, -1, -1})); |
| 41156 | Rdx = DAG.getNode(Opc, DL, MVT::v8i16, Rdx, |
| 41157 | DAG.getVectorShuffle(MVT::v8i16, DL, Rdx, Rdx, |
| 41158 | {1, -1, -1, -1, -1, -1, -1, -1})); |
| 41159 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
| 41160 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
| 41161 | } |
| 41162 | |
| 41163 | |
| 41164 | if (VecVT == MVT::v4i8 || VecVT == MVT::v8i8) { |
| 41165 | if (VecVT == MVT::v4i8) { |
| 41166 | |
| 41167 | if (Subtarget.hasSSE41()) { |
| 41168 | Rdx = DAG.getBitcast(MVT::i32, Rdx); |
| 41169 | Rdx = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v4i32, |
| 41170 | DAG.getConstant(0, DL, MVT::v4i32), Rdx, |
| 41171 | DAG.getIntPtrConstant(0, DL)); |
| 41172 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
| 41173 | } else { |
| 41174 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v8i8, Rdx, |
| 41175 | DAG.getConstant(0, DL, VecVT)); |
| 41176 | } |
| 41177 | } |
| 41178 | if (Rdx.getValueType() == MVT::v8i8) { |
| 41179 | |
| 41180 | Rdx = DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v16i8, Rdx, |
| 41181 | DAG.getUNDEF(MVT::v8i8)); |
| 41182 | } |
| 41183 | Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, |
| 41184 | DAG.getConstant(0, DL, MVT::v16i8)); |
| 41185 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
| 41186 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
| 41187 | } |
| 41188 | |
| 41189 | |
| 41190 | if ((VecVT.getSizeInBits() % 128) != 0 || |
| 41191 | !isPowerOf2_32(VecVT.getVectorNumElements())) |
| 41192 | return SDValue(); |
| 41193 | |
| 41194 | |
| 41195 | if (VT == MVT::i8) { |
| 41196 | while (Rdx.getValueSizeInBits() > 128) { |
| 41197 | SDValue Lo, Hi; |
| 41198 | std::tie(Lo, Hi) = splitVector(Rdx, DAG, DL); |
| 41199 | VecVT = Lo.getValueType(); |
| 41200 | Rdx = DAG.getNode(ISD::ADD, DL, VecVT, Lo, Hi); |
| 41201 | } |
| 41202 | assert(VecVT == MVT::v16i8 && "v16i8 reduction expected"); |
| 41203 | |
| 41204 | SDValue Hi = DAG.getVectorShuffle( |
| 41205 | MVT::v16i8, DL, Rdx, Rdx, |
| 41206 | {8, 9, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1}); |
| 41207 | Rdx = DAG.getNode(ISD::ADD, DL, MVT::v16i8, Rdx, Hi); |
| 41208 | Rdx = DAG.getNode(X86ISD::PSADBW, DL, MVT::v2i64, Rdx, |
| 41209 | getZeroVector(MVT::v16i8, Subtarget, DAG, DL)); |
| 41210 | Rdx = DAG.getBitcast(MVT::v16i8, Rdx); |
| 41211 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
| 41212 | } |
| 41213 | |
| 41214 | |
| 41215 | if (!shouldUseHorizontalOp(true, DAG, Subtarget)) |
| 41216 | return SDValue(); |
| 41217 | |
| 41218 | unsigned HorizOpcode = Opc == ISD::ADD ? X86ISD::HADD : X86ISD::FHADD; |
| 41219 | |
| 41220 | |
| 41221 | |
| 41222 | |
| 41223 | |
| 41224 | if (((VecVT == MVT::v16i16 || VecVT == MVT::v8i32) && Subtarget.hasSSSE3()) || |
| 41225 | ((VecVT == MVT::v8f32 || VecVT == MVT::v4f64) && Subtarget.hasSSE3())) { |
| 41226 | unsigned NumElts = VecVT.getVectorNumElements(); |
| 41227 | SDValue Hi = extract128BitVector(Rdx, NumElts / 2, DAG, DL); |
| 41228 | SDValue Lo = extract128BitVector(Rdx, 0, DAG, DL); |
| 41229 | Rdx = DAG.getNode(HorizOpcode, DL, Lo.getValueType(), Hi, Lo); |
| 41230 | VecVT = Rdx.getValueType(); |
| 41231 | } |
| 41232 | if (!((VecVT == MVT::v8i16 || VecVT == MVT::v4i32) && Subtarget.hasSSSE3()) && |
| 41233 | !((VecVT == MVT::v4f32 || VecVT == MVT::v2f64) && Subtarget.hasSSE3())) |
| 41234 | return SDValue(); |
| 41235 | |
| 41236 | |
| 41237 | unsigned ReductionSteps = Log2_32(VecVT.getVectorNumElements()); |
| 41238 | for (unsigned i = 0; i != ReductionSteps; ++i) |
| 41239 | Rdx = DAG.getNode(HorizOpcode, DL, VecVT, Rdx, Rdx); |
| 41240 | |
| 41241 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, Rdx, Index); |
| 41242 | } |
| 41243 | |
| 41244 | |
| 41245 | |
| 41246 | |
| 41247 | |
| 41248 | static SDValue combineExtractVectorElt(SDNode *N, SelectionDAG &DAG, |
| 41249 | TargetLowering::DAGCombinerInfo &DCI, |
| 41250 | const X86Subtarget &Subtarget) { |
| 41251 | if (SDValue NewOp = combineExtractWithShuffle(N, DAG, DCI, Subtarget)) |
| 41252 | return NewOp; |
| 41253 | |
| 41254 | SDValue InputVector = N->getOperand(0); |
| 41255 | SDValue EltIdx = N->getOperand(1); |
| 41256 | auto *CIdx = dyn_cast<ConstantSDNode>(EltIdx); |
| 41257 | |
| 41258 | EVT SrcVT = InputVector.getValueType(); |
| 41259 | EVT VT = N->getValueType(0); |
| 41260 | SDLoc dl(InputVector); |
| 41261 | bool IsPextr = N->getOpcode() != ISD::EXTRACT_VECTOR_ELT; |
| 41262 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 41263 | |
| 41264 | if (CIdx && CIdx->getAPIntValue().uge(NumSrcElts)) |
| 41265 | return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); |
| 41266 | |
| 41267 | |
| 41268 | if (CIdx && VT.isInteger()) { |
| 41269 | APInt UndefVecElts; |
| 41270 | SmallVector<APInt, 16> EltBits; |
| 41271 | unsigned VecEltBitWidth = SrcVT.getScalarSizeInBits(); |
| 41272 | if (getTargetConstantBitsFromNode(InputVector, VecEltBitWidth, UndefVecElts, |
| 41273 | EltBits, true, false)) { |
| 41274 | uint64_t Idx = CIdx->getZExtValue(); |
| 41275 | if (UndefVecElts[Idx]) |
| 41276 | return IsPextr ? DAG.getConstant(0, dl, VT) : DAG.getUNDEF(VT); |
| 41277 | return DAG.getConstant(EltBits[Idx].zextOrSelf(VT.getScalarSizeInBits()), |
| 41278 | dl, VT); |
| 41279 | } |
| 41280 | } |
| 41281 | |
| 41282 | if (IsPextr) { |
| 41283 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 41284 | if (TLI.SimplifyDemandedBits( |
| 41285 | SDValue(N, 0), APInt::getAllOnesValue(VT.getSizeInBits()), DCI)) |
| 41286 | return SDValue(N, 0); |
| 41287 | |
| 41288 | |
| 41289 | if ((InputVector.getOpcode() == X86ISD::PINSRB || |
| 41290 | InputVector.getOpcode() == X86ISD::PINSRW) && |
| 41291 | InputVector.getOperand(2) == EltIdx) { |
| 41292 | assert(SrcVT == InputVector.getOperand(0).getValueType() && |
| 41293 | "Vector type mismatch"); |
| 41294 | SDValue Scl = InputVector.getOperand(1); |
| 41295 | Scl = DAG.getNode(ISD::TRUNCATE, dl, SrcVT.getScalarType(), Scl); |
| 41296 | return DAG.getZExtOrTrunc(Scl, dl, VT); |
| 41297 | } |
| 41298 | |
| 41299 | |
| 41300 | |
| 41301 | |
| 41302 | return SDValue(); |
| 41303 | } |
| 41304 | |
| 41305 | |
| 41306 | if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && |
| 41307 | VT == MVT::i64 && SrcVT == MVT::v1i64 && isNullConstant(EltIdx)) { |
| 41308 | SDValue MMXSrc = InputVector.getOperand(0); |
| 41309 | |
| 41310 | |
| 41311 | if (MMXSrc.getValueType() == MVT::x86mmx) |
| 41312 | return DAG.getBitcast(VT, InputVector); |
| 41313 | } |
| 41314 | |
| 41315 | |
| 41316 | if (InputVector.getOpcode() == ISD::BITCAST && InputVector.hasOneUse() && |
| 41317 | VT == MVT::i32 && SrcVT == MVT::v2i32 && isNullConstant(EltIdx)) { |
| 41318 | SDValue MMXSrc = InputVector.getOperand(0); |
| 41319 | |
| 41320 | |
| 41321 | if (MMXSrc.getValueType() == MVT::x86mmx) |
| 41322 | return DAG.getNode(X86ISD::MMX_MOVD2W, dl, MVT::i32, MMXSrc); |
| 41323 | } |
| 41324 | |
| 41325 | |
| 41326 | |
| 41327 | |
| 41328 | if (SDValue SAD = combineBasicSADPattern(N, DAG, Subtarget)) |
| 41329 | return SAD; |
| 41330 | |
| 41331 | |
| 41332 | if (SDValue Cmp = combinePredicateReduction(N, DAG, Subtarget)) |
| 41333 | return Cmp; |
| 41334 | |
| 41335 | |
| 41336 | if (SDValue MinMax = combineMinMaxReduction(N, DAG, Subtarget)) |
| 41337 | return MinMax; |
| 41338 | |
| 41339 | |
| 41340 | if (SDValue V = combineArithReduction(N, DAG, Subtarget)) |
| 41341 | return V; |
| 41342 | |
| 41343 | if (SDValue V = scalarizeExtEltFP(N, DAG)) |
| 41344 | return V; |
| 41345 | |
| 41346 | |
| 41347 | |
| 41348 | |
| 41349 | |
| 41350 | |
| 41351 | |
| 41352 | |
| 41353 | |
| 41354 | |
| 41355 | |
| 41356 | if (CIdx && SrcVT.getScalarType() == MVT::i1) { |
| 41357 | SmallVector<SDNode *, 16> BoolExtracts; |
| 41358 | unsigned ResNo = InputVector.getResNo(); |
| 41359 | auto IsBoolExtract = [&BoolExtracts, &ResNo](SDNode *Use) { |
| 41360 | if (Use->getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 41361 | isa<ConstantSDNode>(Use->getOperand(1)) && |
| 41362 | Use->getOperand(0).getResNo() == ResNo && |
| 41363 | Use->getValueType(0) == MVT::i1) { |
| 41364 | BoolExtracts.push_back(Use); |
| 41365 | return true; |
| 41366 | } |
| 41367 | return false; |
| 41368 | }; |
| 41369 | if (all_of(InputVector->uses(), IsBoolExtract) && |
| 41370 | BoolExtracts.size() > 1) { |
| 41371 | EVT BCVT = EVT::getIntegerVT(*DAG.getContext(), NumSrcElts); |
| 41372 | if (SDValue BC = |
| 41373 | combineBitcastvxi1(DAG, BCVT, InputVector, dl, Subtarget)) { |
| 41374 | for (SDNode *Use : BoolExtracts) { |
| 41375 | |
| 41376 | unsigned MaskIdx = Use->getConstantOperandVal(1); |
| 41377 | APInt MaskBit = APInt::getOneBitSet(NumSrcElts, MaskIdx); |
| 41378 | SDValue Mask = DAG.getConstant(MaskBit, dl, BCVT); |
| 41379 | SDValue Res = DAG.getNode(ISD::AND, dl, BCVT, BC, Mask); |
| 41380 | Res = DAG.getSetCC(dl, MVT::i1, Res, Mask, ISD::SETEQ); |
| 41381 | DCI.CombineTo(Use, Res); |
| 41382 | } |
| 41383 | return SDValue(N, 0); |
| 41384 | } |
| 41385 | } |
| 41386 | } |
| 41387 | |
| 41388 | return SDValue(); |
| 41389 | } |
| 41390 | |
| 41391 | |
| 41392 | |
| 41393 | |
| 41394 | static SDValue |
| 41395 | combineVSelectWithAllOnesOrZeros(SDNode *N, SelectionDAG &DAG, |
| 41396 | TargetLowering::DAGCombinerInfo &DCI, |
| 41397 | const X86Subtarget &Subtarget) { |
| 41398 | SDValue Cond = N->getOperand(0); |
| 41399 | SDValue LHS = N->getOperand(1); |
| 41400 | SDValue RHS = N->getOperand(2); |
| 41401 | EVT VT = LHS.getValueType(); |
| 41402 | EVT CondVT = Cond.getValueType(); |
| 41403 | SDLoc DL(N); |
| 41404 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 41405 | |
| 41406 | if (N->getOpcode() != ISD::VSELECT) |
| 41407 | return SDValue(); |
| 41408 | |
| 41409 | assert(CondVT.isVector() && "Vector select expects a vector selector!"); |
| 41410 | |
| 41411 | |
| 41412 | |
| 41413 | |
| 41414 | bool TValIsAllZeros = ISD::isBuildVectorAllZeros(LHS.getNode()); |
| 41415 | bool FValIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); |
| 41416 | |
| 41417 | |
| 41418 | |
| 41419 | if (TValIsAllZeros && FValIsAllZeros) { |
| 41420 | if (VT.isFloatingPoint()) |
| 41421 | return DAG.getConstantFP(0.0, DL, VT); |
| 41422 | return DAG.getConstant(0, DL, VT); |
| 41423 | } |
| 41424 | |
| 41425 | |
| 41426 | |
| 41427 | |
| 41428 | |
| 41429 | |
| 41430 | if (CondVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) |
| 41431 | return SDValue(); |
| 41432 | |
| 41433 | |
| 41434 | |
| 41435 | bool TValIsAllOnes = ISD::isBuildVectorAllOnes(LHS.getNode()); |
| 41436 | if (!TValIsAllOnes && !FValIsAllZeros && Cond.hasOneUse() && |
| 41437 | |
| 41438 | Cond.getOpcode() == ISD::SETCC && |
| 41439 | |
| 41440 | TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT) == |
| 41441 | CondVT) { |
| 41442 | bool FValIsAllOnes = ISD::isBuildVectorAllOnes(RHS.getNode()); |
| 41443 | |
| 41444 | if (TValIsAllZeros || FValIsAllOnes) { |
| 41445 | SDValue CC = Cond.getOperand(2); |
| 41446 | ISD::CondCode NewCC = ISD::getSetCCInverse( |
| 41447 | cast<CondCodeSDNode>(CC)->get(), Cond.getOperand(0).getValueType()); |
| 41448 | Cond = DAG.getSetCC(DL, CondVT, Cond.getOperand(0), Cond.getOperand(1), |
| 41449 | NewCC); |
| 41450 | std::swap(LHS, RHS); |
| 41451 | TValIsAllOnes = FValIsAllOnes; |
| 41452 | FValIsAllZeros = TValIsAllZeros; |
| 41453 | } |
| 41454 | } |
| 41455 | |
| 41456 | |
| 41457 | if (DAG.ComputeNumSignBits(Cond) != CondVT.getScalarSizeInBits()) |
| 41458 | return SDValue(); |
| 41459 | |
| 41460 | |
| 41461 | if (TValIsAllOnes && FValIsAllZeros) |
| 41462 | return DAG.getBitcast(VT, Cond); |
| 41463 | |
| 41464 | if (!TLI.isTypeLegal(CondVT)) |
| 41465 | return SDValue(); |
| 41466 | |
| 41467 | |
| 41468 | if (TValIsAllOnes) { |
| 41469 | SDValue CastRHS = DAG.getBitcast(CondVT, RHS); |
| 41470 | SDValue Or = DAG.getNode(ISD::OR, DL, CondVT, Cond, CastRHS); |
| 41471 | return DAG.getBitcast(VT, Or); |
| 41472 | } |
| 41473 | |
| 41474 | |
| 41475 | if (FValIsAllZeros) { |
| 41476 | SDValue CastLHS = DAG.getBitcast(CondVT, LHS); |
| 41477 | SDValue And = DAG.getNode(ISD::AND, DL, CondVT, Cond, CastLHS); |
| 41478 | return DAG.getBitcast(VT, And); |
| 41479 | } |
| 41480 | |
| 41481 | |
| 41482 | if (TValIsAllZeros) { |
| 41483 | SDValue CastRHS = DAG.getBitcast(CondVT, RHS); |
| 41484 | SDValue AndN; |
| 41485 | |
| 41486 | if (CondVT.getScalarType() == MVT::i1) |
| 41487 | AndN = DAG.getNode(ISD::AND, DL, CondVT, DAG.getNOT(DL, Cond, CondVT), |
| 41488 | CastRHS); |
| 41489 | else |
| 41490 | AndN = DAG.getNode(X86ISD::ANDNP, DL, CondVT, Cond, CastRHS); |
| 41491 | return DAG.getBitcast(VT, AndN); |
| 41492 | } |
| 41493 | |
| 41494 | return SDValue(); |
| 41495 | } |
| 41496 | |
| 41497 | |
| 41498 | |
| 41499 | |
| 41500 | |
| 41501 | static SDValue narrowVectorSelect(SDNode *N, SelectionDAG &DAG, |
| 41502 | const X86Subtarget &Subtarget) { |
| 41503 | unsigned Opcode = N->getOpcode(); |
| 41504 | if (Opcode != X86ISD::BLENDV && Opcode != ISD::VSELECT) |
| 41505 | return SDValue(); |
| 41506 | |
| 41507 | |
| 41508 | EVT VT = N->getValueType(0); |
| 41509 | if (!VT.is256BitVector()) |
| 41510 | return SDValue(); |
| 41511 | |
| 41512 | |
| 41513 | SDValue Cond = N->getOperand(0); |
| 41514 | SDValue TVal = N->getOperand(1); |
| 41515 | SDValue FVal = N->getOperand(2); |
| 41516 | SmallVector<SDValue, 4> CatOpsT, CatOpsF; |
| 41517 | if (!TVal.hasOneUse() || !FVal.hasOneUse() || |
| 41518 | !collectConcatOps(TVal.getNode(), CatOpsT) || |
| 41519 | !collectConcatOps(FVal.getNode(), CatOpsF)) |
| 41520 | return SDValue(); |
| 41521 | |
| 41522 | auto makeBlend = [Opcode](SelectionDAG &DAG, const SDLoc &DL, |
| 41523 | ArrayRef<SDValue> Ops) { |
| 41524 | return DAG.getNode(Opcode, DL, Ops[1].getValueType(), Ops); |
| 41525 | }; |
| 41526 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { Cond, TVal, FVal }, |
| 41527 | makeBlend, false); |
| 41528 | } |
| 41529 | |
| 41530 | static SDValue combineSelectOfTwoConstants(SDNode *N, SelectionDAG &DAG) { |
| 41531 | SDValue Cond = N->getOperand(0); |
| 41532 | SDValue LHS = N->getOperand(1); |
| 41533 | SDValue RHS = N->getOperand(2); |
| 41534 | SDLoc DL(N); |
| 41535 | |
| 41536 | auto *TrueC = dyn_cast<ConstantSDNode>(LHS); |
| 41537 | auto *FalseC = dyn_cast<ConstantSDNode>(RHS); |
| 41538 | if (!TrueC || !FalseC) |
| 41539 | return SDValue(); |
| 41540 | |
| 41541 | |
| 41542 | EVT VT = N->getValueType(0); |
| 41543 | if (!DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
| 41544 | return SDValue(); |
| 41545 | |
| 41546 | |
| 41547 | |
| 41548 | |
| 41549 | if (Cond.getValueType() != MVT::i1) |
| 41550 | return SDValue(); |
| 41551 | |
| 41552 | |
| 41553 | |
| 41554 | |
| 41555 | |
| 41556 | const APInt &TrueVal = TrueC->getAPIntValue(); |
| 41557 | const APInt &FalseVal = FalseC->getAPIntValue(); |
| 41558 | bool OV; |
| 41559 | APInt Diff = TrueVal.ssub_ov(FalseVal, OV); |
| 41560 | if (OV) |
| 41561 | return SDValue(); |
| 41562 | |
| 41563 | APInt AbsDiff = Diff.abs(); |
| 41564 | if (AbsDiff.isPowerOf2() || |
| 41565 | ((VT == MVT::i32 || VT == MVT::i64) && |
| 41566 | (AbsDiff == 3 || AbsDiff == 5 || AbsDiff == 9))) { |
| 41567 | |
| 41568 | |
| 41569 | |
| 41570 | |
| 41571 | if (TrueVal.slt(FalseVal)) { |
| 41572 | Cond = DAG.getNOT(DL, Cond, MVT::i1); |
| 41573 | std::swap(TrueC, FalseC); |
| 41574 | } |
| 41575 | |
| 41576 | |
| 41577 | SDValue R = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Cond); |
| 41578 | |
| 41579 | |
| 41580 | if (!AbsDiff.isOneValue()) |
| 41581 | R = DAG.getNode(ISD::MUL, DL, VT, R, DAG.getConstant(AbsDiff, DL, VT)); |
| 41582 | |
| 41583 | |
| 41584 | if (!FalseC->isNullValue()) |
| 41585 | R = DAG.getNode(ISD::ADD, DL, VT, R, SDValue(FalseC, 0)); |
| 41586 | |
| 41587 | return R; |
| 41588 | } |
| 41589 | |
| 41590 | return SDValue(); |
| 41591 | } |
| 41592 | |
| 41593 | |
| 41594 | |
| 41595 | |
| 41596 | |
| 41597 | |
| 41598 | static SDValue combineVSelectToBLENDV(SDNode *N, SelectionDAG &DAG, |
| 41599 | TargetLowering::DAGCombinerInfo &DCI, |
| 41600 | const X86Subtarget &Subtarget) { |
| 41601 | SDValue Cond = N->getOperand(0); |
| 41602 | if ((N->getOpcode() != ISD::VSELECT && |
| 41603 | N->getOpcode() != X86ISD::BLENDV) || |
| 41604 | ISD::isBuildVectorOfConstantSDNodes(Cond.getNode())) |
| 41605 | return SDValue(); |
| 41606 | |
| 41607 | |
| 41608 | |
| 41609 | unsigned BitWidth = Cond.getScalarValueSizeInBits(); |
| 41610 | if (BitWidth < 8 || BitWidth > 64) |
| 41611 | return SDValue(); |
| 41612 | |
| 41613 | |
| 41614 | |
| 41615 | |
| 41616 | |
| 41617 | |
| 41618 | |
| 41619 | |
| 41620 | |
| 41621 | |
| 41622 | |
| 41623 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 41624 | EVT VT = N->getValueType(0); |
| 41625 | if (!TLI.isOperationLegalOrCustom(ISD::VSELECT, VT)) |
| 41626 | return SDValue(); |
| 41627 | |
| 41628 | |
| 41629 | |
| 41630 | if (VT.getVectorElementType() == MVT::i16) |
| 41631 | return SDValue(); |
| 41632 | |
| 41633 | if (VT.is128BitVector() && !Subtarget.hasSSE41()) |
| 41634 | return SDValue(); |
| 41635 | |
| 41636 | if (VT == MVT::v32i8 && !Subtarget.hasAVX2()) |
| 41637 | return SDValue(); |
| 41638 | |
| 41639 | if (VT.is512BitVector()) |
| 41640 | return SDValue(); |
| 41641 | |
| 41642 | auto OnlyUsedAsSelectCond = [](SDValue Cond) { |
| 41643 | for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end(); |
| 41644 | UI != UE; ++UI) |
| 41645 | if ((UI->getOpcode() != ISD::VSELECT && |
| 41646 | UI->getOpcode() != X86ISD::BLENDV) || |
| 41647 | UI.getOperandNo() != 0) |
| 41648 | return false; |
| 41649 | |
| 41650 | return true; |
| 41651 | }; |
| 41652 | |
| 41653 | APInt DemandedBits(APInt::getSignMask(BitWidth)); |
| 41654 | |
| 41655 | if (OnlyUsedAsSelectCond(Cond)) { |
| 41656 | KnownBits Known; |
| 41657 | TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), |
| 41658 | !DCI.isBeforeLegalizeOps()); |
| 41659 | if (!TLI.SimplifyDemandedBits(Cond, DemandedBits, Known, TLO, 0, true)) |
| 41660 | return SDValue(); |
| 41661 | |
| 41662 | |
| 41663 | |
| 41664 | |
| 41665 | |
| 41666 | |
| 41667 | for (SDNode *U : Cond->uses()) { |
| 41668 | if (U->getOpcode() == X86ISD::BLENDV) |
| 41669 | continue; |
| 41670 | |
| 41671 | SDValue SB = DAG.getNode(X86ISD::BLENDV, SDLoc(U), U->getValueType(0), |
| 41672 | Cond, U->getOperand(1), U->getOperand(2)); |
| 41673 | DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB); |
| 41674 | DCI.AddToWorklist(U); |
| 41675 | } |
| 41676 | DCI.CommitTargetLoweringOpt(TLO); |
| 41677 | return SDValue(N, 0); |
| 41678 | } |
| 41679 | |
| 41680 | |
| 41681 | if (SDValue V = TLI.SimplifyMultipleUseDemandedBits(Cond, DemandedBits, DAG)) |
| 41682 | return DAG.getNode(X86ISD::BLENDV, SDLoc(N), N->getValueType(0), V, |
| 41683 | N->getOperand(1), N->getOperand(2)); |
| 41684 | |
| 41685 | return SDValue(); |
| 41686 | } |
| 41687 | |
| 41688 | |
| 41689 | |
| 41690 | |
| 41691 | |
| 41692 | |
| 41693 | |
| 41694 | |
| 41695 | |
| 41696 | |
| 41697 | |
| 41698 | |
| 41699 | |
| 41700 | |
| 41701 | |
| 41702 | |
| 41703 | |
| 41704 | static SDValue combineLogicBlendIntoConditionalNegate( |
| 41705 | EVT VT, SDValue Mask, SDValue X, SDValue Y, const SDLoc &DL, |
| 41706 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
| 41707 | EVT MaskVT = Mask.getValueType(); |
| 41708 | assert(MaskVT.isInteger() && |
| 41709 | DAG.ComputeNumSignBits(Mask) == MaskVT.getScalarSizeInBits() && |
| 41710 | "Mask must be zero/all-bits"); |
| 41711 | |
| 41712 | if (X.getValueType() != MaskVT || Y.getValueType() != MaskVT) |
| 41713 | return SDValue(); |
| 41714 | if (!DAG.getTargetLoweringInfo().isOperationLegal(ISD::SUB, MaskVT)) |
| 41715 | return SDValue(); |
| 41716 | |
| 41717 | auto IsNegV = [](SDNode *N, SDValue V) { |
| 41718 | return N->getOpcode() == ISD::SUB && N->getOperand(1) == V && |
| 41719 | ISD::isBuildVectorAllZeros(N->getOperand(0).getNode()); |
| 41720 | }; |
| 41721 | |
| 41722 | SDValue V; |
| 41723 | if (IsNegV(Y.getNode(), X)) |
| 41724 | V = X; |
| 41725 | else if (IsNegV(X.getNode(), Y)) |
| 41726 | V = Y; |
| 41727 | else |
| 41728 | return SDValue(); |
| 41729 | |
| 41730 | SDValue SubOp1 = DAG.getNode(ISD::XOR, DL, MaskVT, V, Mask); |
| 41731 | SDValue SubOp2 = Mask; |
| 41732 | |
| 41733 | |
| 41734 | |
| 41735 | |
| 41736 | |
| 41737 | |
| 41738 | |
| 41739 | |
| 41740 | |
| 41741 | |
| 41742 | |
| 41743 | if (V == Y) |
| 41744 | std::swap(SubOp1, SubOp2); |
| 41745 | |
| 41746 | SDValue Res = DAG.getNode(ISD::SUB, DL, MaskVT, SubOp1, SubOp2); |
| 41747 | return DAG.getBitcast(VT, Res); |
| 41748 | } |
| 41749 | |
| 41750 | |
| 41751 | static SDValue combineSelect(SDNode *N, SelectionDAG &DAG, |
| 41752 | TargetLowering::DAGCombinerInfo &DCI, |
| 41753 | const X86Subtarget &Subtarget) { |
| 41754 | SDLoc DL(N); |
| 41755 | SDValue Cond = N->getOperand(0); |
| 41756 | SDValue LHS = N->getOperand(1); |
| 41757 | SDValue RHS = N->getOperand(2); |
| 41758 | |
| 41759 | |
| 41760 | |
| 41761 | if (SDValue V = DAG.simplifySelect(Cond, LHS, RHS)) |
| 41762 | return V; |
| 41763 | |
| 41764 | EVT VT = LHS.getValueType(); |
| 41765 | EVT CondVT = Cond.getValueType(); |
| 41766 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 41767 | bool CondConstantVector = ISD::isBuildVectorOfConstantSDNodes(Cond.getNode()); |
| 41768 | |
| 41769 | |
| 41770 | |
| 41771 | |
| 41772 | if (CondVT.isVector() && CondVT.isInteger() && |
| 41773 | CondVT.getScalarSizeInBits() == VT.getScalarSizeInBits() && |
| 41774 | (!CondConstantVector || CondVT.getScalarType() == MVT::i8) && |
| 41775 | DAG.ComputeNumSignBits(Cond) == CondVT.getScalarSizeInBits()) |
| 41776 | if (SDValue V = combineLogicBlendIntoConditionalNegate(VT, Cond, RHS, LHS, |
| 41777 | DL, DAG, Subtarget)) |
| 41778 | return V; |
| 41779 | |
| 41780 | |
| 41781 | if (CondConstantVector && DCI.isBeforeLegalizeOps()) { |
| 41782 | SmallVector<int, 64> Mask; |
| 41783 | if (createShuffleMaskFromVSELECT(Mask, Cond)) |
| 41784 | return DAG.getVectorShuffle(VT, DL, LHS, RHS, Mask); |
| 41785 | } |
| 41786 | |
| 41787 | |
| 41788 | |
| 41789 | |
| 41790 | if (N->getOpcode() == ISD::VSELECT && CondVT.isVector() && |
| 41791 | LHS.getOpcode() == X86ISD::PSHUFB && RHS.getOpcode() == X86ISD::PSHUFB && |
| 41792 | LHS.hasOneUse() && RHS.hasOneUse()) { |
| 41793 | MVT SimpleVT = VT.getSimpleVT(); |
| 41794 | SmallVector<SDValue, 1> LHSOps, RHSOps; |
| 41795 | SmallVector<int, 64> LHSMask, RHSMask, CondMask; |
| 41796 | if (createShuffleMaskFromVSELECT(CondMask, Cond) && |
| 41797 | getTargetShuffleMask(LHS.getNode(), SimpleVT, true, LHSOps, LHSMask) && |
| 41798 | getTargetShuffleMask(RHS.getNode(), SimpleVT, true, RHSOps, RHSMask)) { |
| 41799 | int NumElts = VT.getVectorNumElements(); |
| 41800 | for (int i = 0; i != NumElts; ++i) { |
| 41801 | if (CondMask[i] < NumElts) |
| 41802 | RHSMask[i] = 0x80; |
| 41803 | else |
| 41804 | LHSMask[i] = 0x80; |
| 41805 | } |
| 41806 | LHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, LHS.getOperand(0), |
| 41807 | getConstVector(LHSMask, SimpleVT, DAG, DL, true)); |
| 41808 | RHS = DAG.getNode(X86ISD::PSHUFB, DL, VT, RHS.getOperand(0), |
| 41809 | getConstVector(RHSMask, SimpleVT, DAG, DL, true)); |
| 41810 | return DAG.getNode(ISD::OR, DL, VT, LHS, RHS); |
| 41811 | } |
| 41812 | } |
| 41813 | |
| 41814 | |
| 41815 | |
| 41816 | |
| 41817 | |
| 41818 | |
| 41819 | if (Cond.getOpcode() == ISD::SETCC && VT.isFloatingPoint() && |
| 41820 | VT != MVT::f80 && VT != MVT::f128 && |
| 41821 | (TLI.isTypeLegal(VT) || VT == MVT::v2f32) && |
| 41822 | (Subtarget.hasSSE2() || |
| 41823 | (Subtarget.hasSSE1() && VT.getScalarType() == MVT::f32))) { |
| 41824 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
| 41825 | |
| 41826 | unsigned Opcode = 0; |
| 41827 | |
| 41828 | if (DAG.isEqualTo(LHS, Cond.getOperand(0)) && |
| 41829 | DAG.isEqualTo(RHS, Cond.getOperand(1))) { |
| 41830 | switch (CC) { |
| 41831 | default: break; |
| 41832 | case ISD::SETULT: |
| 41833 | |
| 41834 | |
| 41835 | |
| 41836 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { |
| 41837 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
| 41838 | !(DAG.isKnownNeverZeroFloat(LHS) || |
| 41839 | DAG.isKnownNeverZeroFloat(RHS))) |
| 41840 | break; |
| 41841 | std::swap(LHS, RHS); |
| 41842 | } |
| 41843 | Opcode = X86ISD::FMIN; |
| 41844 | break; |
| 41845 | case ISD::SETOLE: |
| 41846 | |
| 41847 | |
| 41848 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
| 41849 | !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) |
| 41850 | break; |
| 41851 | Opcode = X86ISD::FMIN; |
| 41852 | break; |
| 41853 | case ISD::SETULE: |
| 41854 | |
| 41855 | |
| 41856 | std::swap(LHS, RHS); |
| 41857 | LLVM_FALLTHROUGH; |
| 41858 | case ISD::SETOLT: |
| 41859 | case ISD::SETLT: |
| 41860 | case ISD::SETLE: |
| 41861 | Opcode = X86ISD::FMIN; |
| 41862 | break; |
| 41863 | |
| 41864 | case ISD::SETOGE: |
| 41865 | |
| 41866 | |
| 41867 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
| 41868 | !DAG.isKnownNeverZeroFloat(LHS) && !DAG.isKnownNeverZeroFloat(RHS)) |
| 41869 | break; |
| 41870 | Opcode = X86ISD::FMAX; |
| 41871 | break; |
| 41872 | case ISD::SETUGT: |
| 41873 | |
| 41874 | |
| 41875 | |
| 41876 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) { |
| 41877 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
| 41878 | !(DAG.isKnownNeverZeroFloat(LHS) || |
| 41879 | DAG.isKnownNeverZeroFloat(RHS))) |
| 41880 | break; |
| 41881 | std::swap(LHS, RHS); |
| 41882 | } |
| 41883 | Opcode = X86ISD::FMAX; |
| 41884 | break; |
| 41885 | case ISD::SETUGE: |
| 41886 | |
| 41887 | |
| 41888 | std::swap(LHS, RHS); |
| 41889 | LLVM_FALLTHROUGH; |
| 41890 | case ISD::SETOGT: |
| 41891 | case ISD::SETGT: |
| 41892 | case ISD::SETGE: |
| 41893 | Opcode = X86ISD::FMAX; |
| 41894 | break; |
| 41895 | } |
| 41896 | |
| 41897 | } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) && |
| 41898 | DAG.isEqualTo(RHS, Cond.getOperand(0))) { |
| 41899 | switch (CC) { |
| 41900 | default: break; |
| 41901 | case ISD::SETOGE: |
| 41902 | |
| 41903 | |
| 41904 | |
| 41905 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
| 41906 | !(DAG.isKnownNeverZeroFloat(LHS) || |
| 41907 | DAG.isKnownNeverZeroFloat(RHS))) { |
| 41908 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
| 41909 | break; |
| 41910 | std::swap(LHS, RHS); |
| 41911 | } |
| 41912 | Opcode = X86ISD::FMIN; |
| 41913 | break; |
| 41914 | case ISD::SETUGT: |
| 41915 | |
| 41916 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
| 41917 | break; |
| 41918 | Opcode = X86ISD::FMIN; |
| 41919 | break; |
| 41920 | case ISD::SETUGE: |
| 41921 | |
| 41922 | |
| 41923 | std::swap(LHS, RHS); |
| 41924 | LLVM_FALLTHROUGH; |
| 41925 | case ISD::SETOGT: |
| 41926 | case ISD::SETGT: |
| 41927 | case ISD::SETGE: |
| 41928 | Opcode = X86ISD::FMIN; |
| 41929 | break; |
| 41930 | |
| 41931 | case ISD::SETULT: |
| 41932 | |
| 41933 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
| 41934 | break; |
| 41935 | Opcode = X86ISD::FMAX; |
| 41936 | break; |
| 41937 | case ISD::SETOLE: |
| 41938 | |
| 41939 | |
| 41940 | |
| 41941 | if (!DAG.getTarget().Options.NoSignedZerosFPMath && |
| 41942 | !DAG.isKnownNeverZeroFloat(LHS) && |
| 41943 | !DAG.isKnownNeverZeroFloat(RHS)) { |
| 41944 | if (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)) |
| 41945 | break; |
| 41946 | std::swap(LHS, RHS); |
| 41947 | } |
| 41948 | Opcode = X86ISD::FMAX; |
| 41949 | break; |
| 41950 | case ISD::SETULE: |
| 41951 | |
| 41952 | |
| 41953 | std::swap(LHS, RHS); |
| 41954 | LLVM_FALLTHROUGH; |
| 41955 | case ISD::SETOLT: |
| 41956 | case ISD::SETLT: |
| 41957 | case ISD::SETLE: |
| 41958 | Opcode = X86ISD::FMAX; |
| 41959 | break; |
| 41960 | } |
| 41961 | } |
| 41962 | |
| 41963 | if (Opcode) |
| 41964 | return DAG.getNode(Opcode, DL, N->getValueType(0), LHS, RHS); |
| 41965 | } |
| 41966 | |
| 41967 | |
| 41968 | |
| 41969 | |
| 41970 | |
| 41971 | |
| 41972 | if (Subtarget.hasAVX512() && N->getOpcode() == ISD::SELECT && |
| 41973 | Cond.getOpcode() == ISD::SETCC && (VT == MVT::f32 || VT == MVT::f64)) { |
| 41974 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
| 41975 | SDValue AndNode = Cond.getOperand(0); |
| 41976 | if (AndNode.getOpcode() == ISD::AND && CC == ISD::SETEQ && |
| 41977 | isNullConstant(Cond.getOperand(1)) && |
| 41978 | isOneConstant(AndNode.getOperand(1))) { |
| 41979 | |
| 41980 | |
| 41981 | AndNode = DAG.getZExtOrTrunc(AndNode, DL, MVT::i8); |
| 41982 | return DAG.getNode(ISD::SELECT, DL, VT, AndNode, RHS, LHS); |
| 41983 | } |
| 41984 | } |
| 41985 | |
| 41986 | |
| 41987 | |
| 41988 | |
| 41989 | |
| 41990 | |
| 41991 | |
| 41992 | |
| 41993 | if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && CondVT.isVector() && |
| 41994 | CondVT.getVectorElementType() == MVT::i1 && |
| 41995 | (VT.getVectorElementType() == MVT::i8 || |
| 41996 | VT.getVectorElementType() == MVT::i16)) { |
| 41997 | Cond = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Cond); |
| 41998 | return DAG.getNode(N->getOpcode(), DL, VT, Cond, LHS, RHS); |
| 41999 | } |
| 42000 | |
| 42001 | |
| 42002 | |
| 42003 | |
| 42004 | |
| 42005 | if (Subtarget.hasAVX512() && CondVT.isVector() && |
| 42006 | CondVT.getVectorElementType() == MVT::i1) { |
| 42007 | auto SelectableOp = [&TLI](SDValue Op) { |
| 42008 | return Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 42009 | isTargetShuffle(Op.getOperand(0).getOpcode()) && |
| 42010 | isNullConstant(Op.getOperand(1)) && |
| 42011 | TLI.isTypeLegal(Op.getOperand(0).getValueType()) && |
| 42012 | Op.hasOneUse() && Op.getOperand(0).hasOneUse(); |
| 42013 | }; |
| 42014 | |
| 42015 | bool SelectableLHS = SelectableOp(LHS); |
| 42016 | bool SelectableRHS = SelectableOp(RHS); |
| 42017 | bool ZeroLHS = ISD::isBuildVectorAllZeros(LHS.getNode()); |
| 42018 | bool ZeroRHS = ISD::isBuildVectorAllZeros(RHS.getNode()); |
| 42019 | |
| 42020 | if ((SelectableLHS && ZeroRHS) || (SelectableRHS && ZeroLHS)) { |
| 42021 | EVT SrcVT = SelectableLHS ? LHS.getOperand(0).getValueType() |
| 42022 | : RHS.getOperand(0).getValueType(); |
| 42023 | EVT SrcCondVT = SrcVT.changeVectorElementType(MVT::i1); |
| 42024 | LHS = insertSubVector(DAG.getUNDEF(SrcVT), LHS, 0, DAG, DL, |
| 42025 | VT.getSizeInBits()); |
| 42026 | RHS = insertSubVector(DAG.getUNDEF(SrcVT), RHS, 0, DAG, DL, |
| 42027 | VT.getSizeInBits()); |
| 42028 | Cond = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, SrcCondVT, |
| 42029 | DAG.getUNDEF(SrcCondVT), Cond, |
| 42030 | DAG.getIntPtrConstant(0, DL)); |
| 42031 | SDValue Res = DAG.getSelect(DL, SrcVT, Cond, LHS, RHS); |
| 42032 | return extractSubVector(Res, 0, DAG, DL, VT.getSizeInBits()); |
| 42033 | } |
| 42034 | } |
| 42035 | |
| 42036 | if (SDValue V = combineSelectOfTwoConstants(N, DAG)) |
| 42037 | return V; |
| 42038 | |
| 42039 | if (N->getOpcode() == ISD::SELECT && Cond.getOpcode() == ISD::SETCC && |
| 42040 | Cond.hasOneUse()) { |
| 42041 | EVT CondVT = Cond.getValueType(); |
| 42042 | SDValue Cond0 = Cond.getOperand(0); |
| 42043 | SDValue Cond1 = Cond.getOperand(1); |
| 42044 | ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get(); |
| 42045 | |
| 42046 | |
| 42047 | |
| 42048 | |
| 42049 | |
| 42050 | |
| 42051 | |
| 42052 | |
| 42053 | |
| 42054 | |
| 42055 | |
| 42056 | |
| 42057 | |
| 42058 | |
| 42059 | |
| 42060 | |
| 42061 | |
| 42062 | |
| 42063 | |
| 42064 | |
| 42065 | if (LHS == Cond0 && RHS == Cond1) { |
| 42066 | if ((CC == ISD::SETGT && (isNullConstant(RHS) || isOneConstant(RHS))) || |
| 42067 | (CC == ISD::SETLT && isAllOnesConstant(RHS))) { |
| 42068 | ISD::CondCode NewCC = CC == ISD::SETGT ? ISD::SETGE : ISD::SETLE; |
| 42069 | Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC); |
| 42070 | return DAG.getSelect(DL, VT, Cond, LHS, RHS); |
| 42071 | } |
| 42072 | if (CC == ISD::SETUGT && isOneConstant(RHS)) { |
| 42073 | ISD::CondCode NewCC = ISD::SETUGE; |
| 42074 | Cond = DAG.getSetCC(SDLoc(Cond), CondVT, Cond0, Cond1, NewCC); |
| 42075 | return DAG.getSelect(DL, VT, Cond, LHS, RHS); |
| 42076 | } |
| 42077 | } |
| 42078 | |
| 42079 | |
| 42080 | |
| 42081 | |
| 42082 | |
| 42083 | |
| 42084 | |
| 42085 | |
| 42086 | if (RHS.getOpcode() == ISD::SELECT && RHS.getOperand(1) == LHS && |
| 42087 | RHS.getOperand(0).getOpcode() == ISD::SETCC) { |
| 42088 | SDValue InnerSetCC = RHS.getOperand(0); |
| 42089 | ISD::CondCode InnerCC = |
| 42090 | cast<CondCodeSDNode>(InnerSetCC.getOperand(2))->get(); |
| 42091 | if ((CC == ISD::SETEQ || InnerCC == ISD::SETEQ) && |
| 42092 | Cond0 == InnerSetCC.getOperand(0) && |
| 42093 | Cond1 == InnerSetCC.getOperand(1)) { |
| 42094 | ISD::CondCode NewCC; |
| 42095 | switch (CC == ISD::SETEQ ? InnerCC : CC) { |
| 42096 | case ISD::SETGT: NewCC = ISD::SETGE; break; |
| 42097 | case ISD::SETLT: NewCC = ISD::SETLE; break; |
| 42098 | case ISD::SETUGT: NewCC = ISD::SETUGE; break; |
| 42099 | case ISD::SETULT: NewCC = ISD::SETULE; break; |
| 42100 | default: NewCC = ISD::SETCC_INVALID; break; |
| 42101 | } |
| 42102 | if (NewCC != ISD::SETCC_INVALID) { |
| 42103 | Cond = DAG.getSetCC(DL, CondVT, Cond0, Cond1, NewCC); |
| 42104 | return DAG.getSelect(DL, VT, Cond, LHS, RHS.getOperand(2)); |
| 42105 | } |
| 42106 | } |
| 42107 | } |
| 42108 | } |
| 42109 | |
| 42110 | |
| 42111 | |
| 42112 | |
| 42113 | if (N->getOpcode() == ISD::VSELECT && Cond.hasOneUse() && |
| 42114 | Subtarget.hasAVX512() && CondVT.getVectorElementType() == MVT::i1 && |
| 42115 | ISD::isBuildVectorAllZeros(LHS.getNode()) && |
| 42116 | !ISD::isBuildVectorAllZeros(RHS.getNode())) { |
| 42117 | |
| 42118 | SDValue CondNew = DAG.getNOT(DL, Cond, CondVT); |
| 42119 | |
| 42120 | return DAG.getSelect(DL, VT, CondNew, RHS, LHS); |
| 42121 | } |
| 42122 | |
| 42123 | |
| 42124 | if (!TLI.isTypeLegal(VT)) |
| 42125 | return SDValue(); |
| 42126 | |
| 42127 | if (SDValue V = combineVSelectWithAllOnesOrZeros(N, DAG, DCI, Subtarget)) |
| 42128 | return V; |
| 42129 | |
| 42130 | if (SDValue V = combineVSelectToBLENDV(N, DAG, DCI, Subtarget)) |
| 42131 | return V; |
| 42132 | |
| 42133 | if (SDValue V = narrowVectorSelect(N, DAG, Subtarget)) |
| 42134 | return V; |
| 42135 | |
| 42136 | |
| 42137 | if (CondVT.getScalarType() != MVT::i1) { |
| 42138 | if (SDValue CondNot = IsNOT(Cond, DAG)) |
| 42139 | return DAG.getNode(N->getOpcode(), DL, VT, |
| 42140 | DAG.getBitcast(CondVT, CondNot), RHS, LHS); |
| 42141 | |
| 42142 | if (Cond.getOpcode() == X86ISD::PCMPGT && Cond.hasOneUse() && |
| 42143 | ISD::isBuildVectorAllOnes(Cond.getOperand(1).getNode())) { |
| 42144 | Cond = DAG.getNode(X86ISD::PCMPGT, DL, CondVT, |
| 42145 | DAG.getConstant(0, DL, CondVT), Cond.getOperand(0)); |
| 42146 | return DAG.getNode(N->getOpcode(), DL, VT, Cond, RHS, LHS); |
| 42147 | } |
| 42148 | } |
| 42149 | |
| 42150 | |
| 42151 | |
| 42152 | |
| 42153 | |
| 42154 | |
| 42155 | if (N->getOpcode() == ISD::SELECT && VT.isVector() && |
| 42156 | VT.getVectorElementType() == MVT::i1 && |
| 42157 | (DCI.isBeforeLegalize() || (VT != MVT::v64i1 || Subtarget.is64Bit()))) { |
| 42158 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); |
| 42159 | bool LHSIsConst = ISD::isBuildVectorOfConstantSDNodes(LHS.getNode()); |
| 42160 | bool RHSIsConst = ISD::isBuildVectorOfConstantSDNodes(RHS.getNode()); |
| 42161 | |
| 42162 | if ((LHSIsConst || |
| 42163 | (LHS.getOpcode() == ISD::BITCAST && |
| 42164 | LHS.getOperand(0).getValueType() == IntVT)) && |
| 42165 | (RHSIsConst || |
| 42166 | (RHS.getOpcode() == ISD::BITCAST && |
| 42167 | RHS.getOperand(0).getValueType() == IntVT))) { |
| 42168 | if (LHSIsConst) |
| 42169 | LHS = combinevXi1ConstantToInteger(LHS, DAG); |
| 42170 | else |
| 42171 | LHS = LHS.getOperand(0); |
| 42172 | |
| 42173 | if (RHSIsConst) |
| 42174 | RHS = combinevXi1ConstantToInteger(RHS, DAG); |
| 42175 | else |
| 42176 | RHS = RHS.getOperand(0); |
| 42177 | |
| 42178 | SDValue Select = DAG.getSelect(DL, IntVT, Cond, LHS, RHS); |
| 42179 | return DAG.getBitcast(VT, Select); |
| 42180 | } |
| 42181 | } |
| 42182 | |
| 42183 | |
| 42184 | |
| 42185 | |
| 42186 | if (DCI.isBeforeLegalize() && !Subtarget.hasAVX512() && |
| 42187 | N->getOpcode() == ISD::VSELECT && Cond.getOpcode() == ISD::SETCC && |
| 42188 | Cond.hasOneUse() && CondVT.getVectorElementType() == MVT::i1 && |
| 42189 | Cond.getOperand(0).getOpcode() == ISD::AND && |
| 42190 | isNullOrNullSplat(Cond.getOperand(1)) && |
| 42191 | cast<CondCodeSDNode>(Cond.getOperand(2))->get() == ISD::SETEQ && |
| 42192 | Cond.getOperand(0).getValueType() == VT) { |
| 42193 | |
| 42194 | SDValue And = Cond.getOperand(0); |
| 42195 | auto *C = isConstOrConstSplat(And.getOperand(1)); |
| 42196 | if (C && C->getAPIntValue().isPowerOf2()) { |
| 42197 | |
| 42198 | SDValue NotCond = |
| 42199 | DAG.getSetCC(DL, CondVT, And, Cond.getOperand(1), ISD::SETNE); |
| 42200 | return DAG.getSelect(DL, VT, NotCond, RHS, LHS); |
| 42201 | } |
| 42202 | |
| 42203 | |
| 42204 | |
| 42205 | |
| 42206 | unsigned EltBitWidth = VT.getScalarSizeInBits(); |
| 42207 | bool CanShiftBlend = |
| 42208 | TLI.isTypeLegal(VT) && ((Subtarget.hasAVX() && EltBitWidth == 32) || |
| 42209 | (Subtarget.hasAVX2() && EltBitWidth == 64) || |
| 42210 | (Subtarget.hasXOP())); |
| 42211 | if (CanShiftBlend && |
| 42212 | ISD::matchUnaryPredicate(And.getOperand(1), [](ConstantSDNode *C) { |
| 42213 | return C->getAPIntValue().isPowerOf2(); |
| 42214 | })) { |
| 42215 | |
| 42216 | SDValue Mask = And.getOperand(1); |
| 42217 | SmallVector<int, 32> ShlVals; |
| 42218 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) { |
| 42219 | auto *MaskVal = cast<ConstantSDNode>(Mask.getOperand(i)); |
| 42220 | ShlVals.push_back(EltBitWidth - 1 - |
| 42221 | MaskVal->getAPIntValue().exactLogBase2()); |
| 42222 | } |
| 42223 | |
| 42224 | SDValue ShlAmt = getConstVector(ShlVals, VT.getSimpleVT(), DAG, DL); |
| 42225 | SDValue Shl = DAG.getNode(ISD::SHL, DL, VT, And.getOperand(0), ShlAmt); |
| 42226 | SDValue NewCond = |
| 42227 | DAG.getSetCC(DL, CondVT, Shl, Cond.getOperand(1), ISD::SETLT); |
| 42228 | return DAG.getSelect(DL, VT, NewCond, RHS, LHS); |
| 42229 | } |
| 42230 | } |
| 42231 | |
| 42232 | return SDValue(); |
| 42233 | } |
| 42234 | |
| 42235 | |
| 42236 | |
| 42237 | |
| 42238 | |
| 42239 | |
| 42240 | |
| 42241 | static SDValue combineSetCCAtomicArith(SDValue Cmp, X86::CondCode &CC, |
| 42242 | SelectionDAG &DAG, |
| 42243 | const X86Subtarget &Subtarget) { |
| 42244 | |
| 42245 | if (!(Cmp.getOpcode() == X86ISD::CMP || |
| 42246 | (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) |
| 42247 | return SDValue(); |
| 42248 | |
| 42249 | |
| 42250 | |
| 42251 | |
| 42252 | if (!Cmp.hasOneUse()) |
| 42253 | return SDValue(); |
| 42254 | |
| 42255 | |
| 42256 | |
| 42257 | |
| 42258 | |
| 42259 | |
| 42260 | |
| 42261 | |
| 42262 | |
| 42263 | |
| 42264 | |
| 42265 | |
| 42266 | |
| 42267 | SDValue CmpLHS = Cmp.getOperand(0); |
| 42268 | SDValue CmpRHS = Cmp.getOperand(1); |
| 42269 | EVT CmpVT = CmpLHS.getValueType(); |
| 42270 | |
| 42271 | if (!CmpLHS.hasOneUse()) |
| 42272 | return SDValue(); |
| 42273 | |
| 42274 | unsigned Opc = CmpLHS.getOpcode(); |
| 42275 | if (Opc != ISD::ATOMIC_LOAD_ADD && Opc != ISD::ATOMIC_LOAD_SUB) |
| 42276 | return SDValue(); |
| 42277 | |
| 42278 | SDValue OpRHS = CmpLHS.getOperand(2); |
| 42279 | auto *OpRHSC = dyn_cast<ConstantSDNode>(OpRHS); |
| 42280 | if (!OpRHSC) |
| 42281 | return SDValue(); |
| 42282 | |
| 42283 | APInt Addend = OpRHSC->getAPIntValue(); |
| 42284 | if (Opc == ISD::ATOMIC_LOAD_SUB) |
| 42285 | Addend = -Addend; |
| 42286 | |
| 42287 | auto *CmpRHSC = dyn_cast<ConstantSDNode>(CmpRHS); |
| 42288 | if (!CmpRHSC) |
| 42289 | return SDValue(); |
| 42290 | |
| 42291 | APInt Comparison = CmpRHSC->getAPIntValue(); |
| 42292 | APInt NegAddend = -Addend; |
| 42293 | |
| 42294 | |
| 42295 | |
| 42296 | if (Comparison != NegAddend) { |
| 42297 | APInt IncComparison = Comparison + 1; |
| 42298 | if (IncComparison == NegAddend) { |
| 42299 | if (CC == X86::COND_A && !Comparison.isMaxValue()) { |
| 42300 | Comparison = IncComparison; |
| 42301 | CC = X86::COND_AE; |
| 42302 | } else if (CC == X86::COND_LE && !Comparison.isMaxSignedValue()) { |
| 42303 | Comparison = IncComparison; |
| 42304 | CC = X86::COND_L; |
| 42305 | } |
| 42306 | } |
| 42307 | APInt DecComparison = Comparison - 1; |
| 42308 | if (DecComparison == NegAddend) { |
| 42309 | if (CC == X86::COND_AE && !Comparison.isMinValue()) { |
| 42310 | Comparison = DecComparison; |
| 42311 | CC = X86::COND_A; |
| 42312 | } else if (CC == X86::COND_L && !Comparison.isMinSignedValue()) { |
| 42313 | Comparison = DecComparison; |
| 42314 | CC = X86::COND_LE; |
| 42315 | } |
| 42316 | } |
| 42317 | } |
| 42318 | |
| 42319 | |
| 42320 | |
| 42321 | if (Comparison == NegAddend) { |
| 42322 | |
| 42323 | |
| 42324 | auto *AN = cast<AtomicSDNode>(CmpLHS.getNode()); |
| 42325 | auto AtomicSub = DAG.getAtomic( |
| 42326 | ISD::ATOMIC_LOAD_SUB, SDLoc(CmpLHS), CmpVT, |
| 42327 | CmpLHS.getOperand(0), CmpLHS.getOperand(1), |
| 42328 | DAG.getConstant(NegAddend, SDLoc(CmpRHS), CmpVT), |
| 42329 | AN->getMemOperand()); |
| 42330 | auto LockOp = lowerAtomicArithWithLOCK(AtomicSub, DAG, Subtarget); |
| 42331 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT)); |
| 42332 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); |
| 42333 | return LockOp; |
| 42334 | } |
| 42335 | |
| 42336 | |
| 42337 | |
| 42338 | if (!Comparison.isNullValue()) |
| 42339 | return SDValue(); |
| 42340 | |
| 42341 | if (CC == X86::COND_S && Addend == 1) |
| 42342 | CC = X86::COND_LE; |
| 42343 | else if (CC == X86::COND_NS && Addend == 1) |
| 42344 | CC = X86::COND_G; |
| 42345 | else if (CC == X86::COND_G && Addend == -1) |
| 42346 | CC = X86::COND_GE; |
| 42347 | else if (CC == X86::COND_LE && Addend == -1) |
| 42348 | CC = X86::COND_L; |
| 42349 | else |
| 42350 | return SDValue(); |
| 42351 | |
| 42352 | SDValue LockOp = lowerAtomicArithWithLOCK(CmpLHS, DAG, Subtarget); |
| 42353 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(0), DAG.getUNDEF(CmpVT)); |
| 42354 | DAG.ReplaceAllUsesOfValueWith(CmpLHS.getValue(1), LockOp.getValue(1)); |
| 42355 | return LockOp; |
| 42356 | } |
| 42357 | |
| 42358 | |
| 42359 | |
| 42360 | |
| 42361 | |
| 42362 | |
| 42363 | |
| 42364 | |
| 42365 | |
| 42366 | |
| 42367 | |
| 42368 | |
| 42369 | |
| 42370 | |
| 42371 | |
| 42372 | |
| 42373 | static SDValue checkBoolTestSetCCCombine(SDValue Cmp, X86::CondCode &CC) { |
| 42374 | |
| 42375 | if (!(Cmp.getOpcode() == X86ISD::CMP || |
| 42376 | (Cmp.getOpcode() == X86ISD::SUB && !Cmp->hasAnyUseOfValue(0)))) |
| 42377 | return SDValue(); |
| 42378 | |
| 42379 | |
| 42380 | if (CC != X86::COND_E && CC != X86::COND_NE) |
| 42381 | return SDValue(); |
| 42382 | |
| 42383 | |
| 42384 | |
| 42385 | SDValue Op1 = Cmp.getOperand(0); |
| 42386 | SDValue Op2 = Cmp.getOperand(1); |
| 42387 | |
| 42388 | SDValue SetCC; |
| 42389 | const ConstantSDNode* C = nullptr; |
| 42390 | bool needOppositeCond = (CC == X86::COND_E); |
| 42391 | bool checkAgainstTrue = false; |
| 42392 | |
| 42393 | if ((C = dyn_cast<ConstantSDNode>(Op1))) |
| 42394 | SetCC = Op2; |
| 42395 | else if ((C = dyn_cast<ConstantSDNode>(Op2))) |
| 42396 | SetCC = Op1; |
| 42397 | else |
| 42398 | return SDValue(); |
| 42399 | |
| 42400 | if (C->getZExtValue() == 1) { |
| 42401 | needOppositeCond = !needOppositeCond; |
| 42402 | checkAgainstTrue = true; |
| 42403 | } else if (C->getZExtValue() != 0) |
| 42404 | |
| 42405 | return SDValue(); |
| 42406 | |
| 42407 | bool truncatedToBoolWithAnd = false; |
| 42408 | |
| 42409 | while (SetCC.getOpcode() == ISD::ZERO_EXTEND || |
| 42410 | SetCC.getOpcode() == ISD::TRUNCATE || |
| 42411 | SetCC.getOpcode() == ISD::AND) { |
| 42412 | if (SetCC.getOpcode() == ISD::AND) { |
| 42413 | int OpIdx = -1; |
| 42414 | if (isOneConstant(SetCC.getOperand(0))) |
| 42415 | OpIdx = 1; |
| 42416 | if (isOneConstant(SetCC.getOperand(1))) |
| 42417 | OpIdx = 0; |
| 42418 | if (OpIdx < 0) |
| 42419 | break; |
| 42420 | SetCC = SetCC.getOperand(OpIdx); |
| 42421 | truncatedToBoolWithAnd = true; |
| 42422 | } else |
| 42423 | SetCC = SetCC.getOperand(0); |
| 42424 | } |
| 42425 | |
| 42426 | switch (SetCC.getOpcode()) { |
| 42427 | case X86ISD::SETCC_CARRY: |
| 42428 | |
| 42429 | |
| 42430 | |
| 42431 | |
| 42432 | if (checkAgainstTrue && !truncatedToBoolWithAnd) |
| 42433 | break; |
| 42434 | assert(X86::CondCode(SetCC.getConstantOperandVal(0)) == X86::COND_B && |
| 42435 | "Invalid use of SETCC_CARRY!"); |
| 42436 | LLVM_FALLTHROUGH; |
| 42437 | case X86ISD::SETCC: |
| 42438 | |
| 42439 | CC = X86::CondCode(SetCC.getConstantOperandVal(0)); |
| 42440 | if (needOppositeCond) |
| 42441 | CC = X86::GetOppositeBranchCondition(CC); |
| 42442 | return SetCC.getOperand(1); |
| 42443 | case X86ISD::CMOV: { |
| 42444 | |
| 42445 | ConstantSDNode *FVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(0)); |
| 42446 | ConstantSDNode *TVal = dyn_cast<ConstantSDNode>(SetCC.getOperand(1)); |
| 42447 | |
| 42448 | if (!TVal) |
| 42449 | return SDValue(); |
| 42450 | |
| 42451 | if (!FVal) { |
| 42452 | SDValue Op = SetCC.getOperand(0); |
| 42453 | |
| 42454 | if (Op.getOpcode() == ISD::ZERO_EXTEND || |
| 42455 | Op.getOpcode() == ISD::TRUNCATE) |
| 42456 | Op = Op.getOperand(0); |
| 42457 | |
| 42458 | |
| 42459 | if ((Op.getOpcode() != X86ISD::RDRAND && |
| 42460 | Op.getOpcode() != X86ISD::RDSEED) || Op.getResNo() != 0) |
| 42461 | return SDValue(); |
| 42462 | } |
| 42463 | |
| 42464 | bool FValIsFalse = true; |
| 42465 | if (FVal && FVal->getZExtValue() != 0) { |
| 42466 | if (FVal->getZExtValue() != 1) |
| 42467 | return SDValue(); |
| 42468 | |
| 42469 | needOppositeCond = !needOppositeCond; |
| 42470 | FValIsFalse = false; |
| 42471 | } |
| 42472 | |
| 42473 | if (FValIsFalse && TVal->getZExtValue() != 1) |
| 42474 | return SDValue(); |
| 42475 | if (!FValIsFalse && TVal->getZExtValue() != 0) |
| 42476 | return SDValue(); |
| 42477 | CC = X86::CondCode(SetCC.getConstantOperandVal(2)); |
| 42478 | if (needOppositeCond) |
| 42479 | CC = X86::GetOppositeBranchCondition(CC); |
| 42480 | return SetCC.getOperand(3); |
| 42481 | } |
| 42482 | } |
| 42483 | |
| 42484 | return SDValue(); |
| 42485 | } |
| 42486 | |
| 42487 | |
| 42488 | |
| 42489 | |
| 42490 | |
| 42491 | static bool checkBoolTestAndOrSetCCCombine(SDValue Cond, X86::CondCode &CC0, |
| 42492 | X86::CondCode &CC1, SDValue &Flags, |
| 42493 | bool &isAnd) { |
| 42494 | if (Cond->getOpcode() == X86ISD::CMP) { |
| 42495 | if (!isNullConstant(Cond->getOperand(1))) |
| 42496 | return false; |
| 42497 | |
| 42498 | Cond = Cond->getOperand(0); |
| 42499 | } |
| 42500 | |
| 42501 | isAnd = false; |
| 42502 | |
| 42503 | SDValue SetCC0, SetCC1; |
| 42504 | switch (Cond->getOpcode()) { |
| 42505 | default: return false; |
| 42506 | case ISD::AND: |
| 42507 | case X86ISD::AND: |
| 42508 | isAnd = true; |
| 42509 | LLVM_FALLTHROUGH; |
| 42510 | case ISD::OR: |
| 42511 | case X86ISD::OR: |
| 42512 | SetCC0 = Cond->getOperand(0); |
| 42513 | SetCC1 = Cond->getOperand(1); |
| 42514 | break; |
| 42515 | }; |
| 42516 | |
| 42517 | |
| 42518 | if (SetCC0.getOpcode() != X86ISD::SETCC || |
| 42519 | SetCC1.getOpcode() != X86ISD::SETCC || |
| 42520 | SetCC0->getOperand(1) != SetCC1->getOperand(1)) |
| 42521 | return false; |
| 42522 | |
| 42523 | CC0 = (X86::CondCode)SetCC0->getConstantOperandVal(0); |
| 42524 | CC1 = (X86::CondCode)SetCC1->getConstantOperandVal(0); |
| 42525 | Flags = SetCC0->getOperand(1); |
| 42526 | return true; |
| 42527 | } |
| 42528 | |
| 42529 | |
| 42530 | |
| 42531 | |
| 42532 | static SDValue combineCarryThroughADD(SDValue EFLAGS, SelectionDAG &DAG) { |
| 42533 | if (EFLAGS.getOpcode() == X86ISD::ADD) { |
| 42534 | if (isAllOnesConstant(EFLAGS.getOperand(1))) { |
| 42535 | SDValue Carry = EFLAGS.getOperand(0); |
| 42536 | while (Carry.getOpcode() == ISD::TRUNCATE || |
| 42537 | Carry.getOpcode() == ISD::ZERO_EXTEND || |
| 42538 | Carry.getOpcode() == ISD::SIGN_EXTEND || |
| 42539 | Carry.getOpcode() == ISD::ANY_EXTEND || |
| 42540 | (Carry.getOpcode() == ISD::AND && |
| 42541 | isOneConstant(Carry.getOperand(1)))) |
| 42542 | Carry = Carry.getOperand(0); |
| 42543 | if (Carry.getOpcode() == X86ISD::SETCC || |
| 42544 | Carry.getOpcode() == X86ISD::SETCC_CARRY) { |
| 42545 | |
| 42546 | uint64_t CarryCC = Carry.getConstantOperandVal(0); |
| 42547 | SDValue CarryOp1 = Carry.getOperand(1); |
| 42548 | if (CarryCC == X86::COND_B) |
| 42549 | return CarryOp1; |
| 42550 | if (CarryCC == X86::COND_A) { |
| 42551 | |
| 42552 | |
| 42553 | |
| 42554 | |
| 42555 | |
| 42556 | |
| 42557 | if (CarryOp1.getOpcode() == X86ISD::SUB && |
| 42558 | CarryOp1.getNode()->hasOneUse() && |
| 42559 | CarryOp1.getValueType().isInteger() && |
| 42560 | !isa<ConstantSDNode>(CarryOp1.getOperand(1))) { |
| 42561 | SDValue SubCommute = |
| 42562 | DAG.getNode(X86ISD::SUB, SDLoc(CarryOp1), CarryOp1->getVTList(), |
| 42563 | CarryOp1.getOperand(1), CarryOp1.getOperand(0)); |
| 42564 | return SDValue(SubCommute.getNode(), CarryOp1.getResNo()); |
| 42565 | } |
| 42566 | } |
| 42567 | |
| 42568 | |
| 42569 | if (CarryCC == X86::COND_E && |
| 42570 | CarryOp1.getOpcode() == X86ISD::ADD && |
| 42571 | isOneConstant(CarryOp1.getOperand(1))) |
| 42572 | return CarryOp1; |
| 42573 | } |
| 42574 | } |
| 42575 | } |
| 42576 | |
| 42577 | return SDValue(); |
| 42578 | } |
| 42579 | |
| 42580 | |
| 42581 | |
| 42582 | static SDValue combinePTESTCC(SDValue EFLAGS, X86::CondCode &CC, |
| 42583 | SelectionDAG &DAG, |
| 42584 | const X86Subtarget &Subtarget) { |
| 42585 | |
| 42586 | if (EFLAGS.getOpcode() != X86ISD::PTEST && |
| 42587 | EFLAGS.getOpcode() != X86ISD::TESTP) |
| 42588 | return SDValue(); |
| 42589 | |
| 42590 | |
| 42591 | |
| 42592 | |
| 42593 | |
| 42594 | EVT VT = EFLAGS.getValueType(); |
| 42595 | SDValue Op0 = EFLAGS.getOperand(0); |
| 42596 | SDValue Op1 = EFLAGS.getOperand(1); |
| 42597 | EVT OpVT = Op0.getValueType(); |
| 42598 | |
| 42599 | |
| 42600 | if (SDValue NotOp0 = IsNOT(Op0, DAG)) { |
| 42601 | X86::CondCode InvCC; |
| 42602 | switch (CC) { |
| 42603 | case X86::COND_B: |
| 42604 | |
| 42605 | InvCC = X86::COND_E; |
| 42606 | break; |
| 42607 | case X86::COND_AE: |
| 42608 | |
| 42609 | InvCC = X86::COND_NE; |
| 42610 | break; |
| 42611 | case X86::COND_E: |
| 42612 | |
| 42613 | InvCC = X86::COND_B; |
| 42614 | break; |
| 42615 | case X86::COND_NE: |
| 42616 | |
| 42617 | InvCC = X86::COND_AE; |
| 42618 | break; |
| 42619 | case X86::COND_A: |
| 42620 | case X86::COND_BE: |
| 42621 | |
| 42622 | InvCC = CC; |
| 42623 | break; |
| 42624 | default: |
| 42625 | InvCC = X86::COND_INVALID; |
| 42626 | break; |
| 42627 | } |
| 42628 | |
| 42629 | if (InvCC != X86::COND_INVALID) { |
| 42630 | CC = InvCC; |
| 42631 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
| 42632 | DAG.getBitcast(OpVT, NotOp0), Op1); |
| 42633 | } |
| 42634 | } |
| 42635 | |
| 42636 | if (CC == X86::COND_E || CC == X86::COND_NE) { |
| 42637 | |
| 42638 | if (SDValue NotOp1 = IsNOT(Op1, DAG)) { |
| 42639 | CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); |
| 42640 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
| 42641 | DAG.getBitcast(OpVT, NotOp1), Op0); |
| 42642 | } |
| 42643 | |
| 42644 | if (Op0 == Op1) { |
| 42645 | SDValue BC = peekThroughBitcasts(Op0); |
| 42646 | EVT BCVT = BC.getValueType(); |
| 42647 | assert(BCVT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(BCVT) && |
| 42648 | "Unexpected vector type"); |
| 42649 | |
| 42650 | |
| 42651 | if (BC.getOpcode() == ISD::AND || BC.getOpcode() == X86ISD::FAND) { |
| 42652 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
| 42653 | DAG.getBitcast(OpVT, BC.getOperand(0)), |
| 42654 | DAG.getBitcast(OpVT, BC.getOperand(1))); |
| 42655 | } |
| 42656 | |
| 42657 | |
| 42658 | if (BC.getOpcode() == X86ISD::ANDNP || BC.getOpcode() == X86ISD::FANDN) { |
| 42659 | CC = (CC == X86::COND_E ? X86::COND_B : X86::COND_AE); |
| 42660 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, |
| 42661 | DAG.getBitcast(OpVT, BC.getOperand(0)), |
| 42662 | DAG.getBitcast(OpVT, BC.getOperand(1))); |
| 42663 | } |
| 42664 | |
| 42665 | |
| 42666 | |
| 42667 | |
| 42668 | |
| 42669 | |
| 42670 | unsigned EltBits = BCVT.getScalarSizeInBits(); |
| 42671 | if (DAG.ComputeNumSignBits(BC) == EltBits) { |
| 42672 | assert(VT == MVT::i32 && "Expected i32 EFLAGS comparison result"); |
| 42673 | APInt SignMask = APInt::getSignMask(EltBits); |
| 42674 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 42675 | if (SDValue Res = |
| 42676 | TLI.SimplifyMultipleUseDemandedBits(BC, SignMask, DAG)) { |
| 42677 | |
| 42678 | |
| 42679 | SDLoc DL(EFLAGS); |
| 42680 | if (EltBits == 16) { |
| 42681 | MVT MovmskVT = BCVT.is128BitVector() ? MVT::v16i8 : MVT::v32i8; |
| 42682 | Res = DAG.getBitcast(MovmskVT, Res); |
| 42683 | Res = getPMOVMSKB(DL, Res, DAG, Subtarget); |
| 42684 | Res = DAG.getNode(ISD::AND, DL, MVT::i32, Res, |
| 42685 | DAG.getConstant(0xAAAAAAAA, DL, MVT::i32)); |
| 42686 | } else { |
| 42687 | Res = getPMOVMSKB(DL, Res, DAG, Subtarget); |
| 42688 | } |
| 42689 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Res, |
| 42690 | DAG.getConstant(0, DL, MVT::i32)); |
| 42691 | } |
| 42692 | } |
| 42693 | } |
| 42694 | |
| 42695 | |
| 42696 | if (ISD::isBuildVectorAllOnes(Op0.getNode())) |
| 42697 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op1, Op1); |
| 42698 | |
| 42699 | |
| 42700 | if (ISD::isBuildVectorAllOnes(Op1.getNode())) |
| 42701 | return DAG.getNode(EFLAGS.getOpcode(), SDLoc(EFLAGS), VT, Op0, Op0); |
| 42702 | } |
| 42703 | |
| 42704 | return SDValue(); |
| 42705 | } |
| 42706 | |
| 42707 | |
| 42708 | static SDValue combineSetCCMOVMSK(SDValue EFLAGS, X86::CondCode &CC, |
| 42709 | SelectionDAG &DAG, |
| 42710 | const X86Subtarget &Subtarget) { |
| 42711 | |
| 42712 | |
| 42713 | if (!(CC == X86::COND_E || CC == X86::COND_NE)) |
| 42714 | return SDValue(); |
| 42715 | if (EFLAGS.getValueType() != MVT::i32) |
| 42716 | return SDValue(); |
| 42717 | unsigned CmpOpcode = EFLAGS.getOpcode(); |
| 42718 | if (CmpOpcode != X86ISD::CMP && CmpOpcode != X86ISD::SUB) |
| 42719 | return SDValue(); |
| 42720 | auto *CmpConstant = dyn_cast<ConstantSDNode>(EFLAGS.getOperand(1)); |
| 42721 | if (!CmpConstant) |
| 42722 | return SDValue(); |
| 42723 | const APInt &CmpVal = CmpConstant->getAPIntValue(); |
| 42724 | |
| 42725 | SDValue CmpOp = EFLAGS.getOperand(0); |
| 42726 | unsigned CmpBits = CmpOp.getValueSizeInBits(); |
| 42727 | assert(CmpBits == CmpVal.getBitWidth() && "Value size mismatch"); |
| 42728 | |
| 42729 | |
| 42730 | if (CmpOp.getOpcode() == ISD::TRUNCATE) |
| 42731 | CmpOp = CmpOp.getOperand(0); |
| 42732 | |
| 42733 | |
| 42734 | if (CmpOp.getOpcode() != X86ISD::MOVMSK) |
| 42735 | return SDValue(); |
| 42736 | |
| 42737 | SDValue Vec = CmpOp.getOperand(0); |
| 42738 | MVT VecVT = Vec.getSimpleValueType(); |
| 42739 | assert((VecVT.is128BitVector() || VecVT.is256BitVector()) && |
| 42740 | "Unexpected MOVMSK operand"); |
| 42741 | unsigned NumElts = VecVT.getVectorNumElements(); |
| 42742 | unsigned NumEltBits = VecVT.getScalarSizeInBits(); |
| 42743 | |
| 42744 | bool IsAnyOf = CmpOpcode == X86ISD::CMP && CmpVal.isNullValue(); |
| 42745 | bool IsAllOf = CmpOpcode == X86ISD::SUB && NumElts <= CmpBits && |
| 42746 | CmpVal.isMask(NumElts); |
| 42747 | if (!IsAnyOf && !IsAllOf) |
| 42748 | return SDValue(); |
| 42749 | |
| 42750 | |
| 42751 | |
| 42752 | |
| 42753 | |
| 42754 | if (Vec.getOpcode() == ISD::BITCAST) { |
| 42755 | SDValue BC = peekThroughBitcasts(Vec); |
| 42756 | MVT BCVT = BC.getSimpleValueType(); |
| 42757 | unsigned BCNumElts = BCVT.getVectorNumElements(); |
| 42758 | unsigned BCNumEltBits = BCVT.getScalarSizeInBits(); |
| 42759 | if ((BCNumEltBits == 32 || BCNumEltBits == 64) && |
| 42760 | BCNumEltBits > NumEltBits && |
| 42761 | DAG.ComputeNumSignBits(BC) > (BCNumEltBits - NumEltBits)) { |
| 42762 | SDLoc DL(EFLAGS); |
| 42763 | unsigned CmpMask = IsAnyOf ? 0 : ((1 << BCNumElts) - 1); |
| 42764 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, |
| 42765 | DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, BC), |
| 42766 | DAG.getConstant(CmpMask, DL, MVT::i32)); |
| 42767 | } |
| 42768 | } |
| 42769 | |
| 42770 | |
| 42771 | |
| 42772 | if (IsAllOf && Subtarget.hasSSE41()) { |
| 42773 | SDValue BC = peekThroughBitcasts(Vec); |
| 42774 | if (BC.getOpcode() == X86ISD::PCMPEQ && |
| 42775 | ISD::isBuildVectorAllZeros(BC.getOperand(1).getNode())) { |
| 42776 | MVT TestVT = VecVT.is128BitVector() ? MVT::v2i64 : MVT::v4i64; |
| 42777 | SDValue V = DAG.getBitcast(TestVT, BC.getOperand(0)); |
| 42778 | return DAG.getNode(X86ISD::PTEST, SDLoc(EFLAGS), MVT::i32, V, V); |
| 42779 | } |
| 42780 | } |
| 42781 | |
| 42782 | |
| 42783 | |
| 42784 | |
| 42785 | |
| 42786 | |
| 42787 | if (Vec.getOpcode() == X86ISD::PACKSS && VecVT == MVT::v16i8) { |
| 42788 | SDValue VecOp0 = Vec.getOperand(0); |
| 42789 | SDValue VecOp1 = Vec.getOperand(1); |
| 42790 | bool SignExt0 = DAG.ComputeNumSignBits(VecOp0) > 8; |
| 42791 | bool SignExt1 = DAG.ComputeNumSignBits(VecOp1) > 8; |
| 42792 | |
| 42793 | if (IsAnyOf && CmpBits == 8 && VecOp1.isUndef()) { |
| 42794 | SDLoc DL(EFLAGS); |
| 42795 | SDValue Result = DAG.getBitcast(MVT::v16i8, VecOp0); |
| 42796 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
| 42797 | Result = DAG.getZExtOrTrunc(Result, DL, MVT::i16); |
| 42798 | if (!SignExt0) { |
| 42799 | Result = DAG.getNode(ISD::AND, DL, MVT::i16, Result, |
| 42800 | DAG.getConstant(0xAAAA, DL, MVT::i16)); |
| 42801 | } |
| 42802 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
| 42803 | DAG.getConstant(0, DL, MVT::i16)); |
| 42804 | } |
| 42805 | |
| 42806 | |
| 42807 | if (CmpBits >= 16 && Subtarget.hasInt256() && |
| 42808 | VecOp0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 42809 | VecOp1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 42810 | VecOp0.getOperand(0) == VecOp1.getOperand(0) && |
| 42811 | VecOp0.getConstantOperandAPInt(1) == 0 && |
| 42812 | VecOp1.getConstantOperandAPInt(1) == 8 && |
| 42813 | (IsAnyOf || (SignExt0 && SignExt1))) { |
| 42814 | SDLoc DL(EFLAGS); |
| 42815 | SDValue Result = DAG.getBitcast(MVT::v32i8, VecOp0.getOperand(0)); |
| 42816 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
| 42817 | unsigned CmpMask = IsAnyOf ? 0 : 0xFFFFFFFF; |
| 42818 | if (!SignExt0 || !SignExt1) { |
| 42819 | assert(IsAnyOf && "Only perform v16i16 signmasks for any_of patterns"); |
| 42820 | Result = DAG.getNode(ISD::AND, DL, MVT::i32, Result, |
| 42821 | DAG.getConstant(0xAAAAAAAA, DL, MVT::i32)); |
| 42822 | } |
| 42823 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
| 42824 | DAG.getConstant(CmpMask, DL, MVT::i32)); |
| 42825 | } |
| 42826 | } |
| 42827 | |
| 42828 | |
| 42829 | SmallVector<int, 32> ShuffleMask; |
| 42830 | SmallVector<SDValue, 2> ShuffleInputs; |
| 42831 | if (NumElts <= CmpBits && |
| 42832 | getTargetShuffleInputs(peekThroughBitcasts(Vec), ShuffleInputs, |
| 42833 | ShuffleMask, DAG) && |
| 42834 | ShuffleInputs.size() == 1 && !isAnyZeroOrUndef(ShuffleMask) && |
| 42835 | ShuffleInputs[0].getValueSizeInBits() == VecVT.getSizeInBits()) { |
| 42836 | unsigned NumShuffleElts = ShuffleMask.size(); |
| 42837 | APInt DemandedElts = APInt::getNullValue(NumShuffleElts); |
| 42838 | for (int M : ShuffleMask) { |
| 42839 | assert(0 <= M && M < (int)NumShuffleElts && "Bad unary shuffle index"); |
| 42840 | DemandedElts.setBit(M); |
| 42841 | } |
| 42842 | if (DemandedElts.isAllOnesValue()) { |
| 42843 | SDLoc DL(EFLAGS); |
| 42844 | SDValue Result = DAG.getBitcast(VecVT, ShuffleInputs[0]); |
| 42845 | Result = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
| 42846 | Result = |
| 42847 | DAG.getZExtOrTrunc(Result, DL, EFLAGS.getOperand(0).getValueType()); |
| 42848 | return DAG.getNode(X86ISD::CMP, DL, MVT::i32, Result, |
| 42849 | EFLAGS.getOperand(1)); |
| 42850 | } |
| 42851 | } |
| 42852 | |
| 42853 | return SDValue(); |
| 42854 | } |
| 42855 | |
| 42856 | |
| 42857 | |
| 42858 | |
| 42859 | static SDValue combineSetCCEFLAGS(SDValue EFLAGS, X86::CondCode &CC, |
| 42860 | SelectionDAG &DAG, |
| 42861 | const X86Subtarget &Subtarget) { |
| 42862 | if (CC == X86::COND_B) |
| 42863 | if (SDValue Flags = combineCarryThroughADD(EFLAGS, DAG)) |
| 42864 | return Flags; |
| 42865 | |
| 42866 | if (SDValue R = checkBoolTestSetCCCombine(EFLAGS, CC)) |
| 42867 | return R; |
| 42868 | |
| 42869 | if (SDValue R = combinePTESTCC(EFLAGS, CC, DAG, Subtarget)) |
| 42870 | return R; |
| 42871 | |
| 42872 | if (SDValue R = combineSetCCMOVMSK(EFLAGS, CC, DAG, Subtarget)) |
| 42873 | return R; |
| 42874 | |
| 42875 | return combineSetCCAtomicArith(EFLAGS, CC, DAG, Subtarget); |
| 42876 | } |
| 42877 | |
| 42878 | |
| 42879 | static SDValue combineCMov(SDNode *N, SelectionDAG &DAG, |
| 42880 | TargetLowering::DAGCombinerInfo &DCI, |
| 42881 | const X86Subtarget &Subtarget) { |
| 42882 | SDLoc DL(N); |
| 42883 | |
| 42884 | SDValue FalseOp = N->getOperand(0); |
| 42885 | SDValue TrueOp = N->getOperand(1); |
| 42886 | X86::CondCode CC = (X86::CondCode)N->getConstantOperandVal(2); |
| 42887 | SDValue Cond = N->getOperand(3); |
| 42888 | |
| 42889 | |
| 42890 | if (TrueOp == FalseOp) |
| 42891 | return TrueOp; |
| 42892 | |
| 42893 | |
| 42894 | |
| 42895 | if (SDValue Flags = combineSetCCEFLAGS(Cond, CC, DAG, Subtarget)) { |
| 42896 | if (!(FalseOp.getValueType() == MVT::f80 || |
| 42897 | (FalseOp.getValueType() == MVT::f64 && !Subtarget.hasSSE2()) || |
| 42898 | (FalseOp.getValueType() == MVT::f32 && !Subtarget.hasSSE1())) || |
| 42899 | !Subtarget.hasCMov() || hasFPCMov(CC)) { |
| 42900 | SDValue Ops[] = {FalseOp, TrueOp, DAG.getTargetConstant(CC, DL, MVT::i8), |
| 42901 | Flags}; |
| 42902 | return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
| 42903 | } |
| 42904 | } |
| 42905 | |
| 42906 | |
| 42907 | |
| 42908 | |
| 42909 | if (ConstantSDNode *TrueC = dyn_cast<ConstantSDNode>(TrueOp)) { |
| 42910 | if (ConstantSDNode *FalseC = dyn_cast<ConstantSDNode>(FalseOp)) { |
| 42911 | |
| 42912 | |
| 42913 | if (TrueC->getAPIntValue().ult(FalseC->getAPIntValue())) { |
| 42914 | CC = X86::GetOppositeBranchCondition(CC); |
| 42915 | std::swap(TrueC, FalseC); |
| 42916 | std::swap(TrueOp, FalseOp); |
| 42917 | } |
| 42918 | |
| 42919 | |
| 42920 | |
| 42921 | |
| 42922 | if (FalseC->getAPIntValue() == 0 && TrueC->getAPIntValue().isPowerOf2()) { |
| 42923 | Cond = getSETCC(CC, Cond, DL, DAG); |
| 42924 | |
| 42925 | |
| 42926 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, TrueC->getValueType(0), Cond); |
| 42927 | |
| 42928 | unsigned ShAmt = TrueC->getAPIntValue().logBase2(); |
| 42929 | Cond = DAG.getNode(ISD::SHL, DL, Cond.getValueType(), Cond, |
| 42930 | DAG.getConstant(ShAmt, DL, MVT::i8)); |
| 42931 | return Cond; |
| 42932 | } |
| 42933 | |
| 42934 | |
| 42935 | |
| 42936 | if (FalseC->getAPIntValue()+1 == TrueC->getAPIntValue()) { |
| 42937 | Cond = getSETCC(CC, Cond, DL, DAG); |
| 42938 | |
| 42939 | |
| 42940 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, |
| 42941 | FalseC->getValueType(0), Cond); |
| 42942 | Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, |
| 42943 | SDValue(FalseC, 0)); |
| 42944 | return Cond; |
| 42945 | } |
| 42946 | |
| 42947 | |
| 42948 | |
| 42949 | if (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i64) { |
| 42950 | APInt Diff = TrueC->getAPIntValue() - FalseC->getAPIntValue(); |
| 42951 | assert(Diff.getBitWidth() == N->getValueType(0).getSizeInBits() && |
| 42952 | "Implicit constant truncation"); |
| 42953 | |
| 42954 | bool isFastMultiplier = false; |
| 42955 | if (Diff.ult(10)) { |
| 42956 | switch (Diff.getZExtValue()) { |
| 42957 | default: break; |
| 42958 | case 1: |
| 42959 | case 2: |
| 42960 | case 3: |
| 42961 | case 4: |
| 42962 | case 5: |
| 42963 | case 8: |
| 42964 | case 9: |
| 42965 | isFastMultiplier = true; |
| 42966 | break; |
| 42967 | } |
| 42968 | } |
| 42969 | |
| 42970 | if (isFastMultiplier) { |
| 42971 | Cond = getSETCC(CC, Cond, DL ,DAG); |
| 42972 | |
| 42973 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, FalseC->getValueType(0), |
| 42974 | Cond); |
| 42975 | |
| 42976 | if (Diff != 1) |
| 42977 | Cond = DAG.getNode(ISD::MUL, DL, Cond.getValueType(), Cond, |
| 42978 | DAG.getConstant(Diff, DL, Cond.getValueType())); |
| 42979 | |
| 42980 | |
| 42981 | if (FalseC->getAPIntValue() != 0) |
| 42982 | Cond = DAG.getNode(ISD::ADD, DL, Cond.getValueType(), Cond, |
| 42983 | SDValue(FalseC, 0)); |
| 42984 | return Cond; |
| 42985 | } |
| 42986 | } |
| 42987 | } |
| 42988 | } |
| 42989 | |
| 42990 | |
| 42991 | |
| 42992 | |
| 42993 | |
| 42994 | |
| 42995 | |
| 42996 | |
| 42997 | |
| 42998 | |
| 42999 | |
| 43000 | |
| 43001 | |
| 43002 | |
| 43003 | |
| 43004 | if (!DCI.isBeforeLegalize() && !DCI.isBeforeLegalizeOps()) { |
| 43005 | |
| 43006 | |
| 43007 | |
| 43008 | ConstantSDNode *CmpAgainst = nullptr; |
| 43009 | if ((Cond.getOpcode() == X86ISD::CMP || Cond.getOpcode() == X86ISD::SUB) && |
| 43010 | (CmpAgainst = dyn_cast<ConstantSDNode>(Cond.getOperand(1))) && |
| 43011 | !isa<ConstantSDNode>(Cond.getOperand(0))) { |
| 43012 | |
| 43013 | if (CC == X86::COND_NE && |
| 43014 | CmpAgainst == dyn_cast<ConstantSDNode>(FalseOp)) { |
| 43015 | CC = X86::GetOppositeBranchCondition(CC); |
| 43016 | std::swap(TrueOp, FalseOp); |
| 43017 | } |
| 43018 | |
| 43019 | if (CC == X86::COND_E && |
| 43020 | CmpAgainst == dyn_cast<ConstantSDNode>(TrueOp)) { |
| 43021 | SDValue Ops[] = {FalseOp, Cond.getOperand(0), |
| 43022 | DAG.getTargetConstant(CC, DL, MVT::i8), Cond}; |
| 43023 | return DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
| 43024 | } |
| 43025 | } |
| 43026 | } |
| 43027 | |
| 43028 | |
| 43029 | |
| 43030 | |
| 43031 | |
| 43032 | |
| 43033 | |
| 43034 | |
| 43035 | |
| 43036 | |
| 43037 | |
| 43038 | |
| 43039 | |
| 43040 | |
| 43041 | |
| 43042 | |
| 43043 | |
| 43044 | |
| 43045 | if (CC == X86::COND_NE) { |
| 43046 | SDValue Flags; |
| 43047 | X86::CondCode CC0, CC1; |
| 43048 | bool isAndSetCC; |
| 43049 | if (checkBoolTestAndOrSetCCCombine(Cond, CC0, CC1, Flags, isAndSetCC)) { |
| 43050 | if (isAndSetCC) { |
| 43051 | std::swap(FalseOp, TrueOp); |
| 43052 | CC0 = X86::GetOppositeBranchCondition(CC0); |
| 43053 | CC1 = X86::GetOppositeBranchCondition(CC1); |
| 43054 | } |
| 43055 | |
| 43056 | SDValue LOps[] = {FalseOp, TrueOp, |
| 43057 | DAG.getTargetConstant(CC0, DL, MVT::i8), Flags}; |
| 43058 | SDValue LCMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), LOps); |
| 43059 | SDValue Ops[] = {LCMOV, TrueOp, DAG.getTargetConstant(CC1, DL, MVT::i8), |
| 43060 | Flags}; |
| 43061 | SDValue CMOV = DAG.getNode(X86ISD::CMOV, DL, N->getValueType(0), Ops); |
| 43062 | return CMOV; |
| 43063 | } |
| 43064 | } |
| 43065 | |
| 43066 | |
| 43067 | |
| 43068 | |
| 43069 | |
| 43070 | if ((CC == X86::COND_NE || CC == X86::COND_E) && |
| 43071 | Cond.getOpcode() == X86ISD::CMP && isNullConstant(Cond.getOperand(1))) { |
| 43072 | SDValue Add = TrueOp; |
| 43073 | SDValue Const = FalseOp; |
| 43074 | |
| 43075 | if (CC == X86::COND_E) |
| 43076 | std::swap(Add, Const); |
| 43077 | |
| 43078 | |
| 43079 | |
| 43080 | if (Const == Cond.getOperand(0)) |
| 43081 | Const = Cond.getOperand(1); |
| 43082 | |
| 43083 | |
| 43084 | if (isa<ConstantSDNode>(Const) && Add.getOpcode() == ISD::ADD && |
| 43085 | Add.hasOneUse() && isa<ConstantSDNode>(Add.getOperand(1)) && |
| 43086 | (Add.getOperand(0).getOpcode() == ISD::CTTZ_ZERO_UNDEF || |
| 43087 | Add.getOperand(0).getOpcode() == ISD::CTTZ) && |
| 43088 | Add.getOperand(0).getOperand(0) == Cond.getOperand(0)) { |
| 43089 | EVT VT = N->getValueType(0); |
| 43090 | |
| 43091 | SDValue Diff = DAG.getNode(ISD::SUB, DL, VT, Const, Add.getOperand(1)); |
| 43092 | SDValue CMov = |
| 43093 | DAG.getNode(X86ISD::CMOV, DL, VT, Diff, Add.getOperand(0), |
| 43094 | DAG.getTargetConstant(X86::COND_NE, DL, MVT::i8), Cond); |
| 43095 | return DAG.getNode(ISD::ADD, DL, VT, CMov, Add.getOperand(1)); |
| 43096 | } |
| 43097 | } |
| 43098 | |
| 43099 | return SDValue(); |
| 43100 | } |
| 43101 | |
| 43102 | |
| 43103 | enum class ShrinkMode { MULS8, MULU8, MULS16, MULU16 }; |
| 43104 | |
| 43105 | static bool canReduceVMulWidth(SDNode *N, SelectionDAG &DAG, ShrinkMode &Mode) { |
| 43106 | EVT VT = N->getOperand(0).getValueType(); |
| 43107 | if (VT.getScalarSizeInBits() != 32) |
| 43108 | return false; |
| 43109 | |
| 43110 | assert(N->getNumOperands() == 2 && "NumOperands of Mul are 2"); |
| 43111 | unsigned SignBits[2] = {1, 1}; |
| 43112 | bool IsPositive[2] = {false, false}; |
| 43113 | for (unsigned i = 0; i < 2; i++) { |
| 43114 | SDValue Opd = N->getOperand(i); |
| 43115 | |
| 43116 | SignBits[i] = DAG.ComputeNumSignBits(Opd); |
| 43117 | IsPositive[i] = DAG.SignBitIsZero(Opd); |
| 43118 | } |
| 43119 | |
| 43120 | bool AllPositive = IsPositive[0] && IsPositive[1]; |
| 43121 | unsigned MinSignBits = std::min(SignBits[0], SignBits[1]); |
| 43122 | |
| 43123 | if (MinSignBits >= 25) |
| 43124 | Mode = ShrinkMode::MULS8; |
| 43125 | |
| 43126 | else if (AllPositive && MinSignBits >= 24) |
| 43127 | Mode = ShrinkMode::MULU8; |
| 43128 | |
| 43129 | else if (MinSignBits >= 17) |
| 43130 | Mode = ShrinkMode::MULS16; |
| 43131 | |
| 43132 | else if (AllPositive && MinSignBits >= 16) |
| 43133 | Mode = ShrinkMode::MULU16; |
| 43134 | else |
| 43135 | return false; |
| 43136 | return true; |
| 43137 | } |
| 43138 | |
| 43139 | |
| 43140 | |
| 43141 | |
| 43142 | |
| 43143 | |
| 43144 | |
| 43145 | |
| 43146 | |
| 43147 | |
| 43148 | |
| 43149 | |
| 43150 | |
| 43151 | |
| 43152 | |
| 43153 | |
| 43154 | |
| 43155 | |
| 43156 | |
| 43157 | |
| 43158 | |
| 43159 | |
| 43160 | |
| 43161 | |
| 43162 | |
| 43163 | |
| 43164 | |
| 43165 | |
| 43166 | |
| 43167 | static SDValue reduceVMULWidth(SDNode *N, SelectionDAG &DAG, |
| 43168 | const X86Subtarget &Subtarget) { |
| 43169 | |
| 43170 | |
| 43171 | if (!Subtarget.hasSSE2()) |
| 43172 | return SDValue(); |
| 43173 | |
| 43174 | |
| 43175 | |
| 43176 | |
| 43177 | |
| 43178 | bool OptForMinSize = DAG.getMachineFunction().getFunction().hasMinSize(); |
| 43179 | if (Subtarget.hasSSE41() && (OptForMinSize || !Subtarget.isPMULLDSlow())) |
| 43180 | return SDValue(); |
| 43181 | |
| 43182 | ShrinkMode Mode; |
| 43183 | if (!canReduceVMulWidth(N, DAG, Mode)) |
| 43184 | return SDValue(); |
| 43185 | |
| 43186 | SDLoc DL(N); |
| 43187 | SDValue N0 = N->getOperand(0); |
| 43188 | SDValue N1 = N->getOperand(1); |
| 43189 | EVT VT = N->getOperand(0).getValueType(); |
| 43190 | unsigned NumElts = VT.getVectorNumElements(); |
| 43191 | if ((NumElts % 2) != 0) |
| 43192 | return SDValue(); |
| 43193 | |
| 43194 | EVT ReducedVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, NumElts); |
| 43195 | |
| 43196 | |
| 43197 | SDValue NewN0 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N0); |
| 43198 | SDValue NewN1 = DAG.getNode(ISD::TRUNCATE, DL, ReducedVT, N1); |
| 43199 | |
| 43200 | |
| 43201 | |
| 43202 | SDValue MulLo = DAG.getNode(ISD::MUL, DL, ReducedVT, NewN0, NewN1); |
| 43203 | if (Mode == ShrinkMode::MULU8 || Mode == ShrinkMode::MULS8) |
| 43204 | return DAG.getNode((Mode == ShrinkMode::MULU8) ? ISD::ZERO_EXTEND |
| 43205 | : ISD::SIGN_EXTEND, |
| 43206 | DL, VT, MulLo); |
| 43207 | |
| 43208 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts / 2); |
| 43209 | |
| 43210 | |
| 43211 | SDValue MulHi = |
| 43212 | DAG.getNode(Mode == ShrinkMode::MULS16 ? ISD::MULHS : ISD::MULHU, DL, |
| 43213 | ReducedVT, NewN0, NewN1); |
| 43214 | |
| 43215 | |
| 43216 | |
| 43217 | |
| 43218 | SmallVector<int, 16> ShuffleMask(NumElts); |
| 43219 | for (unsigned i = 0, e = NumElts / 2; i < e; i++) { |
| 43220 | ShuffleMask[2 * i] = i; |
| 43221 | ShuffleMask[2 * i + 1] = i + NumElts; |
| 43222 | } |
| 43223 | SDValue ResLo = |
| 43224 | DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); |
| 43225 | ResLo = DAG.getBitcast(ResVT, ResLo); |
| 43226 | |
| 43227 | for (unsigned i = 0, e = NumElts / 2; i < e; i++) { |
| 43228 | ShuffleMask[2 * i] = i + NumElts / 2; |
| 43229 | ShuffleMask[2 * i + 1] = i + NumElts * 3 / 2; |
| 43230 | } |
| 43231 | SDValue ResHi = |
| 43232 | DAG.getVectorShuffle(ReducedVT, DL, MulLo, MulHi, ShuffleMask); |
| 43233 | ResHi = DAG.getBitcast(ResVT, ResHi); |
| 43234 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ResLo, ResHi); |
| 43235 | } |
| 43236 | |
| 43237 | static SDValue combineMulSpecial(uint64_t MulAmt, SDNode *N, SelectionDAG &DAG, |
| 43238 | EVT VT, const SDLoc &DL) { |
| 43239 | |
| 43240 | auto combineMulShlAddOrSub = [&](int Mult, int Shift, bool isAdd) { |
| 43241 | SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
| 43242 | DAG.getConstant(Mult, DL, VT)); |
| 43243 | Result = DAG.getNode(ISD::SHL, DL, VT, Result, |
| 43244 | DAG.getConstant(Shift, DL, MVT::i8)); |
| 43245 | Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, |
| 43246 | N->getOperand(0)); |
| 43247 | return Result; |
| 43248 | }; |
| 43249 | |
| 43250 | auto combineMulMulAddOrSub = [&](int Mul1, int Mul2, bool isAdd) { |
| 43251 | SDValue Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
| 43252 | DAG.getConstant(Mul1, DL, VT)); |
| 43253 | Result = DAG.getNode(X86ISD::MUL_IMM, DL, VT, Result, |
| 43254 | DAG.getConstant(Mul2, DL, VT)); |
| 43255 | Result = DAG.getNode(isAdd ? ISD::ADD : ISD::SUB, DL, VT, Result, |
| 43256 | N->getOperand(0)); |
| 43257 | return Result; |
| 43258 | }; |
| 43259 | |
| 43260 | switch (MulAmt) { |
| 43261 | default: |
| 43262 | break; |
| 43263 | case 11: |
| 43264 | |
| 43265 | return combineMulShlAddOrSub(5, 1, true); |
| 43266 | case 21: |
| 43267 | |
| 43268 | return combineMulShlAddOrSub(5, 2, true); |
| 43269 | case 41: |
| 43270 | |
| 43271 | return combineMulShlAddOrSub(5, 3, true); |
| 43272 | case 22: |
| 43273 | |
| 43274 | return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), |
| 43275 | combineMulShlAddOrSub(5, 2, true)); |
| 43276 | case 19: |
| 43277 | |
| 43278 | return combineMulShlAddOrSub(9, 1, true); |
| 43279 | case 37: |
| 43280 | |
| 43281 | return combineMulShlAddOrSub(9, 2, true); |
| 43282 | case 73: |
| 43283 | |
| 43284 | return combineMulShlAddOrSub(9, 3, true); |
| 43285 | case 13: |
| 43286 | |
| 43287 | return combineMulShlAddOrSub(3, 2, true); |
| 43288 | case 23: |
| 43289 | |
| 43290 | return combineMulShlAddOrSub(3, 3, false); |
| 43291 | case 26: |
| 43292 | |
| 43293 | return combineMulMulAddOrSub(5, 5, true); |
| 43294 | case 28: |
| 43295 | |
| 43296 | return combineMulMulAddOrSub(9, 3, true); |
| 43297 | case 29: |
| 43298 | |
| 43299 | return DAG.getNode(ISD::ADD, DL, VT, N->getOperand(0), |
| 43300 | combineMulMulAddOrSub(9, 3, true)); |
| 43301 | } |
| 43302 | |
| 43303 | |
| 43304 | |
| 43305 | |
| 43306 | |
| 43307 | |
| 43308 | if (isPowerOf2_64(MulAmt & (MulAmt - 1))) { |
| 43309 | unsigned ScaleShift = countTrailingZeros(MulAmt); |
| 43310 | if (ScaleShift >= 1 && ScaleShift < 4) { |
| 43311 | unsigned ShiftAmt = Log2_64((MulAmt & (MulAmt - 1))); |
| 43312 | SDValue Shift1 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43313 | DAG.getConstant(ShiftAmt, DL, MVT::i8)); |
| 43314 | SDValue Shift2 = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43315 | DAG.getConstant(ScaleShift, DL, MVT::i8)); |
| 43316 | return DAG.getNode(ISD::ADD, DL, VT, Shift1, Shift2); |
| 43317 | } |
| 43318 | } |
| 43319 | |
| 43320 | return SDValue(); |
| 43321 | } |
| 43322 | |
| 43323 | |
| 43324 | |
| 43325 | static SDValue combineMulToPMADDWD(SDNode *N, SelectionDAG &DAG, |
| 43326 | const X86Subtarget &Subtarget) { |
| 43327 | if (!Subtarget.hasSSE2()) |
| 43328 | return SDValue(); |
| 43329 | |
| 43330 | if (Subtarget.isPMADDWDSlow()) |
| 43331 | return SDValue(); |
| 43332 | |
| 43333 | EVT VT = N->getValueType(0); |
| 43334 | |
| 43335 | |
| 43336 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32) |
| 43337 | return SDValue(); |
| 43338 | |
| 43339 | |
| 43340 | if (VT != MVT::v2i32 && !DAG.getTargetLoweringInfo().isTypeLegal(VT)) |
| 43341 | return SDValue(); |
| 43342 | |
| 43343 | MVT WVT = MVT::getVectorVT(MVT::i16, 2 * VT.getVectorNumElements()); |
| 43344 | |
| 43345 | |
| 43346 | if (WVT == MVT::v32i16 && !Subtarget.hasBWI()) |
| 43347 | return SDValue(); |
| 43348 | |
| 43349 | SDValue N0 = N->getOperand(0); |
| 43350 | SDValue N1 = N->getOperand(1); |
| 43351 | |
| 43352 | |
| 43353 | |
| 43354 | if (!Subtarget.hasSSE41() && |
| 43355 | (N0.getOpcode() == ISD::ZERO_EXTEND && |
| 43356 | N0.getOperand(0).getScalarValueSizeInBits() <= 8) && |
| 43357 | (N1.getOpcode() == ISD::ZERO_EXTEND && |
| 43358 | N1.getOperand(0).getScalarValueSizeInBits() <= 8)) |
| 43359 | return SDValue(); |
| 43360 | |
| 43361 | APInt Mask17 = APInt::getHighBitsSet(32, 17); |
| 43362 | if (!DAG.MaskedValueIsZero(N1, Mask17) || |
| 43363 | !DAG.MaskedValueIsZero(N0, Mask17)) |
| 43364 | return SDValue(); |
| 43365 | |
| 43366 | |
| 43367 | auto PMADDWDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 43368 | ArrayRef<SDValue> Ops) { |
| 43369 | MVT OpVT = MVT::getVectorVT(MVT::i32, Ops[0].getValueSizeInBits() / 32); |
| 43370 | return DAG.getNode(X86ISD::VPMADDWD, DL, OpVT, Ops); |
| 43371 | }; |
| 43372 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, |
| 43373 | { DAG.getBitcast(WVT, N0), DAG.getBitcast(WVT, N1) }, |
| 43374 | PMADDWDBuilder); |
| 43375 | } |
| 43376 | |
| 43377 | static SDValue combineMulToPMULDQ(SDNode *N, SelectionDAG &DAG, |
| 43378 | const X86Subtarget &Subtarget) { |
| 43379 | if (!Subtarget.hasSSE2()) |
| 43380 | return SDValue(); |
| 43381 | |
| 43382 | EVT VT = N->getValueType(0); |
| 43383 | |
| 43384 | |
| 43385 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i64 || |
| 43386 | VT.getVectorNumElements() < 2 || |
| 43387 | !isPowerOf2_32(VT.getVectorNumElements())) |
| 43388 | return SDValue(); |
| 43389 | |
| 43390 | SDValue N0 = N->getOperand(0); |
| 43391 | SDValue N1 = N->getOperand(1); |
| 43392 | |
| 43393 | |
| 43394 | |
| 43395 | if (Subtarget.hasSSE41() && DAG.ComputeNumSignBits(N0) > 32 && |
| 43396 | DAG.ComputeNumSignBits(N1) > 32) { |
| 43397 | auto PMULDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 43398 | ArrayRef<SDValue> Ops) { |
| 43399 | return DAG.getNode(X86ISD::PMULDQ, DL, Ops[0].getValueType(), Ops); |
| 43400 | }; |
| 43401 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, |
| 43402 | PMULDQBuilder, false); |
| 43403 | } |
| 43404 | |
| 43405 | |
| 43406 | APInt Mask = APInt::getHighBitsSet(64, 32); |
| 43407 | if (DAG.MaskedValueIsZero(N0, Mask) && DAG.MaskedValueIsZero(N1, Mask)) { |
| 43408 | auto PMULUDQBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 43409 | ArrayRef<SDValue> Ops) { |
| 43410 | return DAG.getNode(X86ISD::PMULUDQ, DL, Ops[0].getValueType(), Ops); |
| 43411 | }; |
| 43412 | return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, { N0, N1 }, |
| 43413 | PMULUDQBuilder, false); |
| 43414 | } |
| 43415 | |
| 43416 | return SDValue(); |
| 43417 | } |
| 43418 | |
| 43419 | |
| 43420 | |
| 43421 | static SDValue combineMul(SDNode *N, SelectionDAG &DAG, |
| 43422 | TargetLowering::DAGCombinerInfo &DCI, |
| 43423 | const X86Subtarget &Subtarget) { |
| 43424 | EVT VT = N->getValueType(0); |
| 43425 | |
| 43426 | if (SDValue V = combineMulToPMADDWD(N, DAG, Subtarget)) |
| 43427 | return V; |
| 43428 | |
| 43429 | if (SDValue V = combineMulToPMULDQ(N, DAG, Subtarget)) |
| 43430 | return V; |
| 43431 | |
| 43432 | if (DCI.isBeforeLegalize() && VT.isVector()) |
| 43433 | return reduceVMULWidth(N, DAG, Subtarget); |
| 43434 | |
| 43435 | if (!MulConstantOptimization) |
| 43436 | return SDValue(); |
| 43437 | |
| 43438 | if (DAG.getMachineFunction().getFunction().hasMinSize()) |
| 43439 | return SDValue(); |
| 43440 | |
| 43441 | if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) |
| 43442 | return SDValue(); |
| 43443 | |
| 43444 | if (VT != MVT::i64 && VT != MVT::i32) |
| 43445 | return SDValue(); |
| 43446 | |
| 43447 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1)); |
| 43448 | if (!C) |
| 43449 | return SDValue(); |
| 43450 | if (isPowerOf2_64(C->getZExtValue())) |
| 43451 | return SDValue(); |
| 43452 | |
| 43453 | int64_t SignMulAmt = C->getSExtValue(); |
| 43454 | assert(SignMulAmt != INT64_MIN && "Int min should have been handled!"); |
| 43455 | uint64_t AbsMulAmt = SignMulAmt < 0 ? -SignMulAmt : SignMulAmt; |
| 43456 | |
| 43457 | SDLoc DL(N); |
| 43458 | if (AbsMulAmt == 3 || AbsMulAmt == 5 || AbsMulAmt == 9) { |
| 43459 | SDValue NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
| 43460 | DAG.getConstant(AbsMulAmt, DL, VT)); |
| 43461 | if (SignMulAmt < 0) |
| 43462 | NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
| 43463 | NewMul); |
| 43464 | |
| 43465 | return NewMul; |
| 43466 | } |
| 43467 | |
| 43468 | uint64_t MulAmt1 = 0; |
| 43469 | uint64_t MulAmt2 = 0; |
| 43470 | if ((AbsMulAmt % 9) == 0) { |
| 43471 | MulAmt1 = 9; |
| 43472 | MulAmt2 = AbsMulAmt / 9; |
| 43473 | } else if ((AbsMulAmt % 5) == 0) { |
| 43474 | MulAmt1 = 5; |
| 43475 | MulAmt2 = AbsMulAmt / 5; |
| 43476 | } else if ((AbsMulAmt % 3) == 0) { |
| 43477 | MulAmt1 = 3; |
| 43478 | MulAmt2 = AbsMulAmt / 3; |
| 43479 | } |
| 43480 | |
| 43481 | SDValue NewMul; |
| 43482 | |
| 43483 | if (MulAmt2 && |
| 43484 | (isPowerOf2_64(MulAmt2) || |
| 43485 | (SignMulAmt >= 0 && (MulAmt2 == 3 || MulAmt2 == 5 || MulAmt2 == 9)))) { |
| 43486 | |
| 43487 | if (isPowerOf2_64(MulAmt2) && |
| 43488 | !(SignMulAmt >= 0 && N->hasOneUse() && |
| 43489 | N->use_begin()->getOpcode() == ISD::ADD)) |
| 43490 | |
| 43491 | |
| 43492 | |
| 43493 | |
| 43494 | std::swap(MulAmt1, MulAmt2); |
| 43495 | |
| 43496 | if (isPowerOf2_64(MulAmt1)) |
| 43497 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43498 | DAG.getConstant(Log2_64(MulAmt1), DL, MVT::i8)); |
| 43499 | else |
| 43500 | NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, N->getOperand(0), |
| 43501 | DAG.getConstant(MulAmt1, DL, VT)); |
| 43502 | |
| 43503 | if (isPowerOf2_64(MulAmt2)) |
| 43504 | NewMul = DAG.getNode(ISD::SHL, DL, VT, NewMul, |
| 43505 | DAG.getConstant(Log2_64(MulAmt2), DL, MVT::i8)); |
| 43506 | else |
| 43507 | NewMul = DAG.getNode(X86ISD::MUL_IMM, DL, VT, NewMul, |
| 43508 | DAG.getConstant(MulAmt2, DL, VT)); |
| 43509 | |
| 43510 | |
| 43511 | if (SignMulAmt < 0) |
| 43512 | NewMul = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), |
| 43513 | NewMul); |
| 43514 | } else if (!Subtarget.slowLEA()) |
| 43515 | NewMul = combineMulSpecial(C->getZExtValue(), N, DAG, VT, DL); |
| 43516 | |
| 43517 | if (!NewMul) { |
| 43518 | assert(C->getZExtValue() != 0 && |
| 43519 | C->getZExtValue() != (VT == MVT::i64 ? UINT64_MAX : UINT32_MAX) && |
| 43520 | "Both cases that could cause potential overflows should have " |
| 43521 | "already been handled."); |
| 43522 | if (isPowerOf2_64(AbsMulAmt - 1)) { |
| 43523 | |
| 43524 | NewMul = DAG.getNode( |
| 43525 | ISD::ADD, DL, VT, N->getOperand(0), |
| 43526 | DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43527 | DAG.getConstant(Log2_64(AbsMulAmt - 1), DL, |
| 43528 | MVT::i8))); |
| 43529 | |
| 43530 | if (SignMulAmt < 0) |
| 43531 | NewMul = DAG.getNode(ISD::SUB, DL, VT, |
| 43532 | DAG.getConstant(0, DL, VT), NewMul); |
| 43533 | } else if (isPowerOf2_64(AbsMulAmt + 1)) { |
| 43534 | |
| 43535 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43536 | DAG.getConstant(Log2_64(AbsMulAmt + 1), |
| 43537 | DL, MVT::i8)); |
| 43538 | |
| 43539 | if (SignMulAmt < 0) |
| 43540 | NewMul = DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), NewMul); |
| 43541 | else |
| 43542 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
| 43543 | } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt - 2)) { |
| 43544 | |
| 43545 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43546 | DAG.getConstant(Log2_64(AbsMulAmt - 2), |
| 43547 | DL, MVT::i8)); |
| 43548 | NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); |
| 43549 | NewMul = DAG.getNode(ISD::ADD, DL, VT, NewMul, N->getOperand(0)); |
| 43550 | } else if (SignMulAmt >= 0 && isPowerOf2_64(AbsMulAmt + 2)) { |
| 43551 | |
| 43552 | NewMul = DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0), |
| 43553 | DAG.getConstant(Log2_64(AbsMulAmt + 2), |
| 43554 | DL, MVT::i8)); |
| 43555 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
| 43556 | NewMul = DAG.getNode(ISD::SUB, DL, VT, NewMul, N->getOperand(0)); |
| 43557 | } |
| 43558 | } |
| 43559 | |
| 43560 | return NewMul; |
| 43561 | } |
| 43562 | |
| 43563 | |
| 43564 | |
| 43565 | |
| 43566 | |
| 43567 | |
| 43568 | |
| 43569 | |
| 43570 | static SDValue combineShiftToPMULH(SDNode *N, SelectionDAG &DAG, |
| 43571 | const X86Subtarget &Subtarget) { |
| 43572 | assert((N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SRA) && |
| 43573 | "SRL or SRA node is required here!"); |
| 43574 | SDLoc DL(N); |
| 43575 | |
| 43576 | |
| 43577 | |
| 43578 | if (!Subtarget.hasSSE41()) |
| 43579 | return SDValue(); |
| 43580 | |
| 43581 | |
| 43582 | SDValue ShiftOperand = N->getOperand(0); |
| 43583 | if (ShiftOperand.getOpcode() != ISD::MUL || !ShiftOperand.hasOneUse()) |
| 43584 | return SDValue(); |
| 43585 | |
| 43586 | |
| 43587 | EVT VT = N->getValueType(0); |
| 43588 | if (!VT.isVector() || VT.getVectorElementType().getSizeInBits() < 32) |
| 43589 | return SDValue(); |
| 43590 | |
| 43591 | |
| 43592 | APInt ShiftAmt; |
| 43593 | if (!ISD::isConstantSplatVector(N->getOperand(1).getNode(), ShiftAmt) || |
| 43594 | ShiftAmt != 16) |
| 43595 | return SDValue(); |
| 43596 | |
| 43597 | SDValue LHS = ShiftOperand.getOperand(0); |
| 43598 | SDValue RHS = ShiftOperand.getOperand(1); |
| 43599 | |
| 43600 | unsigned ExtOpc = LHS.getOpcode(); |
| 43601 | if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || |
| 43602 | RHS.getOpcode() != ExtOpc) |
| 43603 | return SDValue(); |
| 43604 | |
| 43605 | |
| 43606 | LHS = LHS.getOperand(0); |
| 43607 | RHS = RHS.getOperand(0); |
| 43608 | |
| 43609 | |
| 43610 | EVT MulVT = LHS.getValueType(); |
| 43611 | if (MulVT.getVectorElementType() != MVT::i16 || RHS.getValueType() != MulVT) |
| 43612 | return SDValue(); |
| 43613 | |
| 43614 | unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; |
| 43615 | SDValue Mulh = DAG.getNode(Opc, DL, MulVT, LHS, RHS); |
| 43616 | |
| 43617 | ExtOpc = N->getOpcode() == ISD::SRA ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| 43618 | return DAG.getNode(ExtOpc, DL, VT, Mulh); |
| 43619 | } |
| 43620 | |
| 43621 | static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { |
| 43622 | SDValue N0 = N->getOperand(0); |
| 43623 | SDValue N1 = N->getOperand(1); |
| 43624 | ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); |
| 43625 | EVT VT = N0.getValueType(); |
| 43626 | |
| 43627 | |
| 43628 | |
| 43629 | if (VT.isInteger() && !VT.isVector() && |
| 43630 | N1C && N0.getOpcode() == ISD::AND && |
| 43631 | N0.getOperand(1).getOpcode() == ISD::Constant) { |
| 43632 | SDValue N00 = N0.getOperand(0); |
| 43633 | APInt Mask = N0.getConstantOperandAPInt(1); |
| 43634 | Mask <<= N1C->getAPIntValue(); |
| 43635 | bool MaskOK = false; |
| 43636 | |
| 43637 | |
| 43638 | |
| 43639 | |
| 43640 | |
| 43641 | |
| 43642 | |
| 43643 | |
| 43644 | |
| 43645 | |
| 43646 | |
| 43647 | if (N00.getOpcode() == X86ISD::SETCC_CARRY) { |
| 43648 | MaskOK = true; |
| 43649 | } else if (N00.getOpcode() == ISD::SIGN_EXTEND && |
| 43650 | N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { |
| 43651 | MaskOK = true; |
| 43652 | } else if ((N00.getOpcode() == ISD::ZERO_EXTEND || |
| 43653 | N00.getOpcode() == ISD::ANY_EXTEND) && |
| 43654 | N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) { |
| 43655 | MaskOK = Mask.isIntN(N00.getOperand(0).getValueSizeInBits()); |
| 43656 | } |
| 43657 | if (MaskOK && Mask != 0) { |
| 43658 | SDLoc DL(N); |
| 43659 | return DAG.getNode(ISD::AND, DL, VT, N00, DAG.getConstant(Mask, DL, VT)); |
| 43660 | } |
| 43661 | } |
| 43662 | |
| 43663 | |
| 43664 | |
| 43665 | |
| 43666 | |
| 43667 | if (auto *N1BV = dyn_cast<BuildVectorSDNode>(N1)) |
| 43668 | if (auto *N1SplatC = N1BV->getConstantSplatNode()) { |
| 43669 | assert(N0.getValueType().isVector() && "Invalid vector shift type"); |
| 43670 | |
| 43671 | |
| 43672 | |
| 43673 | if (N1SplatC->isOne()) |
| 43674 | return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N0); |
| 43675 | } |
| 43676 | |
| 43677 | return SDValue(); |
| 43678 | } |
| 43679 | |
| 43680 | static SDValue combineShiftRightArithmetic(SDNode *N, SelectionDAG &DAG, |
| 43681 | const X86Subtarget &Subtarget) { |
| 43682 | SDValue N0 = N->getOperand(0); |
| 43683 | SDValue N1 = N->getOperand(1); |
| 43684 | EVT VT = N0.getValueType(); |
| 43685 | unsigned Size = VT.getSizeInBits(); |
| 43686 | |
| 43687 | if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) |
| 43688 | return V; |
| 43689 | |
| 43690 | |
| 43691 | |
| 43692 | |
| 43693 | |
| 43694 | |
| 43695 | |
| 43696 | |
| 43697 | |
| 43698 | |
| 43699 | |
| 43700 | |
| 43701 | if (VT.isVector() || N1.getOpcode() != ISD::Constant || |
| 43702 | N0.getOpcode() != ISD::SHL || !N0.hasOneUse() || |
| 43703 | N0.getOperand(1).getOpcode() != ISD::Constant) |
| 43704 | return SDValue(); |
| 43705 | |
| 43706 | SDValue N00 = N0.getOperand(0); |
| 43707 | SDValue N01 = N0.getOperand(1); |
| 43708 | APInt ShlConst = (cast<ConstantSDNode>(N01))->getAPIntValue(); |
| 43709 | APInt SarConst = (cast<ConstantSDNode>(N1))->getAPIntValue(); |
| 43710 | EVT CVT = N1.getValueType(); |
| 43711 | |
| 43712 | if (SarConst.isNegative()) |
| 43713 | return SDValue(); |
| 43714 | |
| 43715 | for (MVT SVT : { MVT::i8, MVT::i16, MVT::i32 }) { |
| 43716 | unsigned ShiftSize = SVT.getSizeInBits(); |
| 43717 | |
| 43718 | |
| 43719 | if (ShiftSize >= Size || ShlConst != Size - ShiftSize) |
| 43720 | continue; |
| 43721 | SDLoc DL(N); |
| 43722 | SDValue NN = |
| 43723 | DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, N00, DAG.getValueType(SVT)); |
| 43724 | SarConst = SarConst - (Size - ShiftSize); |
| 43725 | if (SarConst == 0) |
| 43726 | return NN; |
| 43727 | else if (SarConst.isNegative()) |
| 43728 | return DAG.getNode(ISD::SHL, DL, VT, NN, |
| 43729 | DAG.getConstant(-SarConst, DL, CVT)); |
| 43730 | else |
| 43731 | return DAG.getNode(ISD::SRA, DL, VT, NN, |
| 43732 | DAG.getConstant(SarConst, DL, CVT)); |
| 43733 | } |
| 43734 | return SDValue(); |
| 43735 | } |
| 43736 | |
| 43737 | static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, |
| 43738 | TargetLowering::DAGCombinerInfo &DCI, |
| 43739 | const X86Subtarget &Subtarget) { |
| 43740 | SDValue N0 = N->getOperand(0); |
| 43741 | SDValue N1 = N->getOperand(1); |
| 43742 | EVT VT = N0.getValueType(); |
| 43743 | |
| 43744 | if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) |
| 43745 | return V; |
| 43746 | |
| 43747 | |
| 43748 | |
| 43749 | if (!DCI.isAfterLegalizeDAG()) |
| 43750 | return SDValue(); |
| 43751 | |
| 43752 | |
| 43753 | |
| 43754 | |
| 43755 | |
| 43756 | if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) |
| 43757 | return SDValue(); |
| 43758 | |
| 43759 | auto *ShiftC = dyn_cast<ConstantSDNode>(N1); |
| 43760 | auto *AndC = dyn_cast<ConstantSDNode>(N0.getOperand(1)); |
| 43761 | if (!ShiftC || !AndC) |
| 43762 | return SDValue(); |
| 43763 | |
| 43764 | |
| 43765 | |
| 43766 | |
| 43767 | APInt MaskVal = AndC->getAPIntValue(); |
| 43768 | |
| 43769 | |
| 43770 | if (MaskVal.isMask()) { |
| 43771 | unsigned TO = MaskVal.countTrailingOnes(); |
| 43772 | if (TO >= 8 && isPowerOf2_32(TO)) |
| 43773 | return SDValue(); |
| 43774 | } |
| 43775 | |
| 43776 | APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); |
| 43777 | unsigned OldMaskSize = MaskVal.getMinSignedBits(); |
| 43778 | unsigned NewMaskSize = NewMaskVal.getMinSignedBits(); |
| 43779 | if ((OldMaskSize > 8 && NewMaskSize <= 8) || |
| 43780 | (OldMaskSize > 32 && NewMaskSize <= 32)) { |
| 43781 | |
| 43782 | SDLoc DL(N); |
| 43783 | SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); |
| 43784 | SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); |
| 43785 | return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); |
| 43786 | } |
| 43787 | return SDValue(); |
| 43788 | } |
| 43789 | |
| 43790 | static SDValue combineHorizOpWithShuffle(SDNode *N, SelectionDAG &DAG, |
| 43791 | const X86Subtarget &Subtarget) { |
| 43792 | unsigned Opcode = N->getOpcode(); |
| 43793 | assert(isHorizOp(Opcode) && "Unexpected hadd/hsub/pack opcode"); |
| 43794 | |
| 43795 | SDLoc DL(N); |
| 43796 | EVT VT = N->getValueType(0); |
| 43797 | SDValue N0 = N->getOperand(0); |
| 43798 | SDValue N1 = N->getOperand(1); |
| 43799 | EVT SrcVT = N0.getValueType(); |
| 43800 | |
| 43801 | SDValue BC0 = |
| 43802 | N->isOnlyUserOf(N0.getNode()) ? peekThroughOneUseBitcasts(N0) : N0; |
| 43803 | SDValue BC1 = |
| 43804 | N->isOnlyUserOf(N1.getNode()) ? peekThroughOneUseBitcasts(N1) : N1; |
| 43805 | |
| 43806 | |
| 43807 | |
| 43808 | |
| 43809 | |
| 43810 | |
| 43811 | if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32 && |
| 43812 | BC0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 43813 | BC1.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 43814 | BC0.getOperand(0) == BC1.getOperand(0) && |
| 43815 | BC0.getOperand(0).getValueType().is256BitVector() && |
| 43816 | BC0.getConstantOperandAPInt(1) == 0 && |
| 43817 | BC1.getConstantOperandAPInt(1) == |
| 43818 | BC0.getValueType().getVectorNumElements()) { |
| 43819 | SmallVector<SDValue> ShuffleOps; |
| 43820 | SmallVector<int> ShuffleMask, ScaledMask; |
| 43821 | SDValue Vec = peekThroughBitcasts(BC0.getOperand(0)); |
| 43822 | if (getTargetShuffleInputs(Vec, ShuffleOps, ShuffleMask, DAG)) { |
| 43823 | resolveTargetShuffleInputsAndMask(ShuffleOps, ShuffleMask); |
| 43824 | |
| 43825 | |
| 43826 | if (!isAnyZero(ShuffleMask) && ShuffleOps.size() == 1 && |
| 43827 | ShuffleOps[0].getValueType().is256BitVector() && |
| 43828 | scaleShuffleElements(ShuffleMask, 4, ScaledMask)) { |
| 43829 | SDValue Lo, Hi; |
| 43830 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
| 43831 | std::tie(Lo, Hi) = DAG.SplitVector(ShuffleOps[0], DL); |
| 43832 | Lo = DAG.getBitcast(SrcVT, Lo); |
| 43833 | Hi = DAG.getBitcast(SrcVT, Hi); |
| 43834 | SDValue Res = DAG.getNode(Opcode, DL, VT, Lo, Hi); |
| 43835 | Res = DAG.getBitcast(ShufVT, Res); |
| 43836 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ScaledMask); |
| 43837 | return DAG.getBitcast(VT, Res); |
| 43838 | } |
| 43839 | } |
| 43840 | } |
| 43841 | |
| 43842 | |
| 43843 | if (VT.is128BitVector() && SrcVT.getScalarSizeInBits() <= 32) { |
| 43844 | |
| 43845 | |
| 43846 | SmallVector<SDValue> Ops0, Ops1; |
| 43847 | SmallVector<int> Mask0, Mask1, ScaledMask0, ScaledMask1; |
| 43848 | bool IsShuf0 = |
| 43849 | getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) && |
| 43850 | scaleShuffleElements(Mask0, 2, ScaledMask0) && |
| 43851 | all_of(Ops0, [](SDValue Op) { return Op.getValueSizeInBits() == 128; }); |
| 43852 | bool IsShuf1 = |
| 43853 | getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) && |
| 43854 | scaleShuffleElements(Mask1, 2, ScaledMask1) && |
| 43855 | all_of(Ops1, [](SDValue Op) { return Op.getValueSizeInBits() == 128; }); |
| 43856 | if (IsShuf0 || IsShuf1) { |
| 43857 | if (!IsShuf0) { |
| 43858 | Ops0.assign({BC0}); |
| 43859 | ScaledMask0.assign({0, 1}); |
| 43860 | } |
| 43861 | if (!IsShuf1) { |
| 43862 | Ops1.assign({BC1}); |
| 43863 | ScaledMask1.assign({0, 1}); |
| 43864 | } |
| 43865 | |
| 43866 | SDValue LHS, RHS; |
| 43867 | int PostShuffle[4] = {-1, -1, -1, -1}; |
| 43868 | auto FindShuffleOpAndIdx = [&](int M, int &Idx, ArrayRef<SDValue> Ops) { |
| 43869 | if (M < 0) |
| 43870 | return true; |
| 43871 | Idx = M % 2; |
| 43872 | SDValue Src = Ops[M / 2]; |
| 43873 | if (!LHS || LHS == Src) { |
| 43874 | LHS = Src; |
| 43875 | return true; |
| 43876 | } |
| 43877 | if (!RHS || RHS == Src) { |
| 43878 | Idx += 2; |
| 43879 | RHS = Src; |
| 43880 | return true; |
| 43881 | } |
| 43882 | return false; |
| 43883 | }; |
| 43884 | if (FindShuffleOpAndIdx(ScaledMask0[0], PostShuffle[0], Ops0) && |
| 43885 | FindShuffleOpAndIdx(ScaledMask0[1], PostShuffle[1], Ops0) && |
| 43886 | FindShuffleOpAndIdx(ScaledMask1[0], PostShuffle[2], Ops1) && |
| 43887 | FindShuffleOpAndIdx(ScaledMask1[1], PostShuffle[3], Ops1)) { |
| 43888 | LHS = DAG.getBitcast(SrcVT, LHS); |
| 43889 | RHS = DAG.getBitcast(SrcVT, RHS ? RHS : LHS); |
| 43890 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
| 43891 | SDValue Res = DAG.getNode(Opcode, DL, VT, LHS, RHS); |
| 43892 | Res = DAG.getBitcast(ShufVT, Res); |
| 43893 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, PostShuffle); |
| 43894 | return DAG.getBitcast(VT, Res); |
| 43895 | } |
| 43896 | } |
| 43897 | } |
| 43898 | |
| 43899 | |
| 43900 | if (VT.is256BitVector() && Subtarget.hasInt256()) { |
| 43901 | SmallVector<int> Mask0, Mask1; |
| 43902 | SmallVector<SDValue> Ops0, Ops1; |
| 43903 | SmallVector<int, 2> ScaledMask0, ScaledMask1; |
| 43904 | if (getTargetShuffleInputs(BC0, Ops0, Mask0, DAG) && !isAnyZero(Mask0) && |
| 43905 | getTargetShuffleInputs(BC1, Ops1, Mask1, DAG) && !isAnyZero(Mask1) && |
| 43906 | !Ops0.empty() && !Ops1.empty() && |
| 43907 | all_of(Ops0, |
| 43908 | [](SDValue Op) { return Op.getValueType().is256BitVector(); }) && |
| 43909 | all_of(Ops1, |
| 43910 | [](SDValue Op) { return Op.getValueType().is256BitVector(); }) && |
| 43911 | scaleShuffleElements(Mask0, 2, ScaledMask0) && |
| 43912 | scaleShuffleElements(Mask1, 2, ScaledMask1)) { |
| 43913 | SDValue Op00 = peekThroughBitcasts(Ops0.front()); |
| 43914 | SDValue Op10 = peekThroughBitcasts(Ops1.front()); |
| 43915 | SDValue Op01 = peekThroughBitcasts(Ops0.back()); |
| 43916 | SDValue Op11 = peekThroughBitcasts(Ops1.back()); |
| 43917 | if ((Op00 == Op11) && (Op01 == Op10)) { |
| 43918 | std::swap(Op10, Op11); |
| 43919 | ShuffleVectorSDNode::commuteMask(ScaledMask1); |
| 43920 | } |
| 43921 | if ((Op00 == Op10) && (Op01 == Op11)) { |
| 43922 | const int Map[4] = {0, 2, 1, 3}; |
| 43923 | SmallVector<int, 4> ShuffleMask( |
| 43924 | {Map[ScaledMask0[0]], Map[ScaledMask1[0]], Map[ScaledMask0[1]], |
| 43925 | Map[ScaledMask1[1]]}); |
| 43926 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f64 : MVT::v4i64; |
| 43927 | SDValue Res = DAG.getNode(Opcode, DL, VT, DAG.getBitcast(SrcVT, Op00), |
| 43928 | DAG.getBitcast(SrcVT, Op01)); |
| 43929 | Res = DAG.getBitcast(ShufVT, Res); |
| 43930 | Res = DAG.getVectorShuffle(ShufVT, DL, Res, Res, ShuffleMask); |
| 43931 | return DAG.getBitcast(VT, Res); |
| 43932 | } |
| 43933 | } |
| 43934 | } |
| 43935 | |
| 43936 | return SDValue(); |
| 43937 | } |
| 43938 | |
| 43939 | static SDValue combineVectorPack(SDNode *N, SelectionDAG &DAG, |
| 43940 | TargetLowering::DAGCombinerInfo &DCI, |
| 43941 | const X86Subtarget &Subtarget) { |
| 43942 | unsigned Opcode = N->getOpcode(); |
| 43943 | assert((X86ISD::PACKSS == Opcode || X86ISD::PACKUS == Opcode) && |
| 43944 | "Unexpected pack opcode"); |
| 43945 | |
| 43946 | EVT VT = N->getValueType(0); |
| 43947 | SDValue N0 = N->getOperand(0); |
| 43948 | SDValue N1 = N->getOperand(1); |
| 43949 | unsigned NumDstElts = VT.getVectorNumElements(); |
| 43950 | unsigned DstBitsPerElt = VT.getScalarSizeInBits(); |
| 43951 | unsigned SrcBitsPerElt = 2 * DstBitsPerElt; |
| 43952 | assert(N0.getScalarValueSizeInBits() == SrcBitsPerElt && |
| 43953 | N1.getScalarValueSizeInBits() == SrcBitsPerElt && |
| 43954 | "Unexpected PACKSS/PACKUS input type"); |
| 43955 | |
| 43956 | bool IsSigned = (X86ISD::PACKSS == Opcode); |
| 43957 | |
| 43958 | |
| 43959 | APInt UndefElts0, UndefElts1; |
| 43960 | SmallVector<APInt, 32> EltBits0, EltBits1; |
| 43961 | if ((N0.isUndef() || N->isOnlyUserOf(N0.getNode())) && |
| 43962 | (N1.isUndef() || N->isOnlyUserOf(N1.getNode())) && |
| 43963 | getTargetConstantBitsFromNode(N0, SrcBitsPerElt, UndefElts0, EltBits0) && |
| 43964 | getTargetConstantBitsFromNode(N1, SrcBitsPerElt, UndefElts1, EltBits1)) { |
| 43965 | unsigned NumLanes = VT.getSizeInBits() / 128; |
| 43966 | unsigned NumSrcElts = NumDstElts / 2; |
| 43967 | unsigned NumDstEltsPerLane = NumDstElts / NumLanes; |
| 43968 | unsigned NumSrcEltsPerLane = NumSrcElts / NumLanes; |
| 43969 | |
| 43970 | APInt Undefs(NumDstElts, 0); |
| 43971 | SmallVector<APInt, 32> Bits(NumDstElts, APInt::getNullValue(DstBitsPerElt)); |
| 43972 | for (unsigned Lane = 0; Lane != NumLanes; ++Lane) { |
| 43973 | for (unsigned Elt = 0; Elt != NumDstEltsPerLane; ++Elt) { |
| 43974 | unsigned SrcIdx = Lane * NumSrcEltsPerLane + Elt % NumSrcEltsPerLane; |
| 43975 | auto &UndefElts = (Elt >= NumSrcEltsPerLane ? UndefElts1 : UndefElts0); |
| 43976 | auto &EltBits = (Elt >= NumSrcEltsPerLane ? EltBits1 : EltBits0); |
| 43977 | |
| 43978 | if (UndefElts[SrcIdx]) { |
| 43979 | Undefs.setBit(Lane * NumDstEltsPerLane + Elt); |
| 43980 | continue; |
| 43981 | } |
| 43982 | |
| 43983 | APInt &Val = EltBits[SrcIdx]; |
| 43984 | if (IsSigned) { |
| 43985 | |
| 43986 | |
| 43987 | |
| 43988 | if (Val.isSignedIntN(DstBitsPerElt)) |
| 43989 | Val = Val.trunc(DstBitsPerElt); |
| 43990 | else if (Val.isNegative()) |
| 43991 | Val = APInt::getSignedMinValue(DstBitsPerElt); |
| 43992 | else |
| 43993 | Val = APInt::getSignedMaxValue(DstBitsPerElt); |
| 43994 | } else { |
| 43995 | |
| 43996 | |
| 43997 | |
| 43998 | if (Val.isIntN(DstBitsPerElt)) |
| 43999 | Val = Val.trunc(DstBitsPerElt); |
| 44000 | else if (Val.isNegative()) |
| 44001 | Val = APInt::getNullValue(DstBitsPerElt); |
| 44002 | else |
| 44003 | Val = APInt::getAllOnesValue(DstBitsPerElt); |
| 44004 | } |
| 44005 | Bits[Lane * NumDstEltsPerLane + Elt] = Val; |
| 44006 | } |
| 44007 | } |
| 44008 | |
| 44009 | return getConstVector(Bits, Undefs, VT.getSimpleVT(), DAG, SDLoc(N)); |
| 44010 | } |
| 44011 | |
| 44012 | |
| 44013 | if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget)) |
| 44014 | return V; |
| 44015 | |
| 44016 | |
| 44017 | |
| 44018 | if (Subtarget.hasAVX512() && |
| 44019 | N0.getOpcode() == ISD::TRUNCATE && N1.isUndef() && VT == MVT::v16i8 && |
| 44020 | N0.getOperand(0).getValueType() == MVT::v8i32) { |
| 44021 | if ((IsSigned && DAG.ComputeNumSignBits(N0) > 8) || |
| 44022 | (!IsSigned && |
| 44023 | DAG.MaskedValueIsZero(N0, APInt::getHighBitsSet(16, 8)))) { |
| 44024 | if (Subtarget.hasVLX()) |
| 44025 | return DAG.getNode(X86ISD::VTRUNC, SDLoc(N), VT, N0.getOperand(0)); |
| 44026 | |
| 44027 | |
| 44028 | SDLoc dl(N); |
| 44029 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v16i32, |
| 44030 | N0.getOperand(0), DAG.getUNDEF(MVT::v8i32)); |
| 44031 | return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Concat); |
| 44032 | } |
| 44033 | } |
| 44034 | |
| 44035 | |
| 44036 | if (VT.is128BitVector()) { |
| 44037 | unsigned ExtOpc = IsSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
| 44038 | SDValue Src0, Src1; |
| 44039 | if (N0.getOpcode() == ExtOpc && |
| 44040 | N0.getOperand(0).getValueType().is64BitVector() && |
| 44041 | N0.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) { |
| 44042 | Src0 = N0.getOperand(0); |
| 44043 | } |
| 44044 | if (N1.getOpcode() == ExtOpc && |
| 44045 | N1.getOperand(0).getValueType().is64BitVector() && |
| 44046 | N1.getOperand(0).getScalarValueSizeInBits() == DstBitsPerElt) { |
| 44047 | Src1 = N1.getOperand(0); |
| 44048 | } |
| 44049 | if ((Src0 || N0.isUndef()) && (Src1 || N1.isUndef())) { |
| 44050 | assert((Src0 || Src1) && "Found PACK(UNDEF,UNDEF)"); |
| 44051 | Src0 = Src0 ? Src0 : DAG.getUNDEF(Src1.getValueType()); |
| 44052 | Src1 = Src1 ? Src1 : DAG.getUNDEF(Src0.getValueType()); |
| 44053 | return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Src0, Src1); |
| 44054 | } |
| 44055 | } |
| 44056 | |
| 44057 | |
| 44058 | SDValue Op(N, 0); |
| 44059 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 44060 | return Res; |
| 44061 | |
| 44062 | return SDValue(); |
| 44063 | } |
| 44064 | |
| 44065 | static SDValue combineVectorHADDSUB(SDNode *N, SelectionDAG &DAG, |
| 44066 | TargetLowering::DAGCombinerInfo &DCI, |
| 44067 | const X86Subtarget &Subtarget) { |
| 44068 | assert((X86ISD::HADD == N->getOpcode() || X86ISD::FHADD == N->getOpcode() || |
| 44069 | X86ISD::HSUB == N->getOpcode() || X86ISD::FHSUB == N->getOpcode()) && |
| 44070 | "Unexpected horizontal add/sub opcode"); |
| 44071 | |
| 44072 | if (!shouldUseHorizontalOp(true, DAG, Subtarget)) { |
| 44073 | |
| 44074 | |
| 44075 | MVT VT = N->getSimpleValueType(0); |
| 44076 | SDValue LHS = N->getOperand(0); |
| 44077 | SDValue RHS = N->getOperand(1); |
| 44078 | if (VT.is128BitVector() && LHS == RHS) { |
| 44079 | for (SDNode *User : LHS->uses()) { |
| 44080 | if (User != N && User->getOpcode() == N->getOpcode()) { |
| 44081 | MVT ShufVT = VT.isFloatingPoint() ? MVT::v4f32 : MVT::v4i32; |
| 44082 | if (User->getOperand(0) == LHS && !User->getOperand(1).isUndef()) { |
| 44083 | return DAG.getBitcast( |
| 44084 | VT, |
| 44085 | DAG.getVectorShuffle(ShufVT, SDLoc(N), |
| 44086 | DAG.getBitcast(ShufVT, SDValue(User, 0)), |
| 44087 | DAG.getUNDEF(ShufVT), {0, 1, 0, 1})); |
| 44088 | } |
| 44089 | if (User->getOperand(1) == LHS && !User->getOperand(0).isUndef()) { |
| 44090 | return DAG.getBitcast( |
| 44091 | VT, |
| 44092 | DAG.getVectorShuffle(ShufVT, SDLoc(N), |
| 44093 | DAG.getBitcast(ShufVT, SDValue(User, 0)), |
| 44094 | DAG.getUNDEF(ShufVT), {2, 3, 2, 3})); |
| 44095 | } |
| 44096 | } |
| 44097 | } |
| 44098 | } |
| 44099 | |
| 44100 | |
| 44101 | if (LHS != RHS && LHS.getOpcode() == N->getOpcode() && |
| 44102 | LHS.getOpcode() == RHS.getOpcode() && |
| 44103 | LHS.getValueType() == RHS.getValueType()) { |
| 44104 | SDValue LHS0 = LHS.getOperand(0); |
| 44105 | SDValue RHS0 = LHS.getOperand(1); |
| 44106 | SDValue LHS1 = RHS.getOperand(0); |
| 44107 | SDValue RHS1 = RHS.getOperand(1); |
| 44108 | if ((LHS0 == RHS0 || LHS0.isUndef() || RHS0.isUndef()) && |
| 44109 | (LHS1 == RHS1 || LHS1.isUndef() || RHS1.isUndef())) { |
| 44110 | SDLoc DL(N); |
| 44111 | SDValue Res = DAG.getNode(LHS.getOpcode(), DL, LHS.getValueType(), |
| 44112 | LHS0.isUndef() ? RHS0 : LHS0, |
| 44113 | LHS1.isUndef() ? RHS1 : LHS1); |
| 44114 | MVT ShufVT = MVT::getVectorVT(MVT::i32, VT.getSizeInBits() / 32); |
| 44115 | Res = DAG.getBitcast(ShufVT, Res); |
| 44116 | SDValue NewLHS = |
| 44117 | DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res, |
| 44118 | getV4X86ShuffleImm8ForMask({0, 1, 0, 1}, DL, DAG)); |
| 44119 | SDValue NewRHS = |
| 44120 | DAG.getNode(X86ISD::PSHUFD, DL, ShufVT, Res, |
| 44121 | getV4X86ShuffleImm8ForMask({2, 3, 2, 3}, DL, DAG)); |
| 44122 | DAG.ReplaceAllUsesOfValueWith(LHS, DAG.getBitcast(VT, NewLHS)); |
| 44123 | DAG.ReplaceAllUsesOfValueWith(RHS, DAG.getBitcast(VT, NewRHS)); |
| 44124 | return SDValue(N, 0); |
| 44125 | } |
| 44126 | } |
| 44127 | } |
| 44128 | |
| 44129 | |
| 44130 | if (SDValue V = combineHorizOpWithShuffle(N, DAG, Subtarget)) |
| 44131 | return V; |
| 44132 | |
| 44133 | return SDValue(); |
| 44134 | } |
| 44135 | |
| 44136 | static SDValue combineVectorShiftVar(SDNode *N, SelectionDAG &DAG, |
| 44137 | TargetLowering::DAGCombinerInfo &DCI, |
| 44138 | const X86Subtarget &Subtarget) { |
| 44139 | assert((X86ISD::VSHL == N->getOpcode() || X86ISD::VSRA == N->getOpcode() || |
| 44140 | X86ISD::VSRL == N->getOpcode()) && |
| 44141 | "Unexpected shift opcode"); |
| 44142 | EVT VT = N->getValueType(0); |
| 44143 | SDValue N0 = N->getOperand(0); |
| 44144 | SDValue N1 = N->getOperand(1); |
| 44145 | |
| 44146 | |
| 44147 | if (ISD::isBuildVectorAllZeros(N0.getNode())) |
| 44148 | return DAG.getConstant(0, SDLoc(N), VT); |
| 44149 | |
| 44150 | |
| 44151 | APInt UndefElts; |
| 44152 | SmallVector<APInt, 32> EltBits; |
| 44153 | if (getTargetConstantBitsFromNode(N1, 64, UndefElts, EltBits, true, false)) { |
| 44154 | unsigned X86Opc = getTargetVShiftUniformOpcode(N->getOpcode(), false); |
| 44155 | return getTargetVShiftByConstNode(X86Opc, SDLoc(N), VT.getSimpleVT(), N0, |
| 44156 | EltBits[0].getZExtValue(), DAG); |
| 44157 | } |
| 44158 | |
| 44159 | APInt KnownUndef, KnownZero; |
| 44160 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 44161 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
| 44162 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
| 44163 | KnownZero, DCI)) |
| 44164 | return SDValue(N, 0); |
| 44165 | |
| 44166 | return SDValue(); |
| 44167 | } |
| 44168 | |
| 44169 | static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG, |
| 44170 | TargetLowering::DAGCombinerInfo &DCI, |
| 44171 | const X86Subtarget &Subtarget) { |
| 44172 | unsigned Opcode = N->getOpcode(); |
| 44173 | assert((X86ISD::VSHLI == Opcode || X86ISD::VSRAI == Opcode || |
| 44174 | X86ISD::VSRLI == Opcode) && |
| 44175 | "Unexpected shift opcode"); |
| 44176 | bool LogicalShift = X86ISD::VSHLI == Opcode || X86ISD::VSRLI == Opcode; |
| 44177 | EVT VT = N->getValueType(0); |
| 44178 | SDValue N0 = N->getOperand(0); |
| 44179 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
| 44180 | assert(VT == N0.getValueType() && (NumBitsPerElt % 8) == 0 && |
| 44181 | "Unexpected value type"); |
| 44182 | assert(N->getOperand(1).getValueType() == MVT::i8 && |
| 44183 | "Unexpected shift amount type"); |
| 44184 | |
| 44185 | |
| 44186 | if (N0.isUndef()) |
| 44187 | return DAG.getConstant(0, SDLoc(N), VT); |
| 44188 | |
| 44189 | |
| 44190 | |
| 44191 | unsigned ShiftVal = N->getConstantOperandVal(1); |
| 44192 | if (ShiftVal >= NumBitsPerElt) { |
| 44193 | if (LogicalShift) |
| 44194 | return DAG.getConstant(0, SDLoc(N), VT); |
| 44195 | ShiftVal = NumBitsPerElt - 1; |
| 44196 | } |
| 44197 | |
| 44198 | |
| 44199 | if (!ShiftVal) |
| 44200 | return N0; |
| 44201 | |
| 44202 | |
| 44203 | if (ISD::isBuildVectorAllZeros(N0.getNode())) |
| 44204 | |
| 44205 | |
| 44206 | return DAG.getConstant(0, SDLoc(N), VT); |
| 44207 | |
| 44208 | |
| 44209 | if (!LogicalShift && ISD::isBuildVectorAllOnes(N0.getNode())) |
| 44210 | |
| 44211 | |
| 44212 | return DAG.getConstant(-1, SDLoc(N), VT); |
| 44213 | |
| 44214 | |
| 44215 | if (Opcode == N0.getOpcode()) { |
| 44216 | unsigned ShiftVal2 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue(); |
| 44217 | unsigned NewShiftVal = ShiftVal + ShiftVal2; |
| 44218 | if (NewShiftVal >= NumBitsPerElt) { |
| 44219 | |
| 44220 | |
| 44221 | if (LogicalShift) |
| 44222 | return DAG.getConstant(0, SDLoc(N), VT); |
| 44223 | NewShiftVal = NumBitsPerElt - 1; |
| 44224 | } |
| 44225 | return DAG.getNode(Opcode, SDLoc(N), VT, N0.getOperand(0), |
| 44226 | DAG.getTargetConstant(NewShiftVal, SDLoc(N), MVT::i8)); |
| 44227 | } |
| 44228 | |
| 44229 | |
| 44230 | if (LogicalShift && (ShiftVal % 8) == 0) { |
| 44231 | SDValue Op(N, 0); |
| 44232 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 44233 | return Res; |
| 44234 | } |
| 44235 | |
| 44236 | |
| 44237 | APInt UndefElts; |
| 44238 | SmallVector<APInt, 32> EltBits; |
| 44239 | if (N->isOnlyUserOf(N0.getNode()) && |
| 44240 | getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) { |
| 44241 | assert(EltBits.size() == VT.getVectorNumElements() && |
| 44242 | "Unexpected shift value type"); |
| 44243 | |
| 44244 | |
| 44245 | |
| 44246 | for (unsigned i = 0, e = EltBits.size(); i != e; ++i) { |
| 44247 | APInt &Elt = EltBits[i]; |
| 44248 | if (UndefElts[i]) |
| 44249 | Elt = 0; |
| 44250 | else if (X86ISD::VSHLI == Opcode) |
| 44251 | Elt <<= ShiftVal; |
| 44252 | else if (X86ISD::VSRAI == Opcode) |
| 44253 | Elt.ashrInPlace(ShiftVal); |
| 44254 | else |
| 44255 | Elt.lshrInPlace(ShiftVal); |
| 44256 | } |
| 44257 | |
| 44258 | UndefElts = 0; |
| 44259 | return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N)); |
| 44260 | } |
| 44261 | |
| 44262 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 44263 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
| 44264 | APInt::getAllOnesValue(NumBitsPerElt), DCI)) |
| 44265 | return SDValue(N, 0); |
| 44266 | |
| 44267 | return SDValue(); |
| 44268 | } |
| 44269 | |
| 44270 | static SDValue combineVectorInsert(SDNode *N, SelectionDAG &DAG, |
| 44271 | TargetLowering::DAGCombinerInfo &DCI, |
| 44272 | const X86Subtarget &Subtarget) { |
| 44273 | EVT VT = N->getValueType(0); |
| 44274 | assert(((N->getOpcode() == X86ISD::PINSRB && VT == MVT::v16i8) || |
| 44275 | (N->getOpcode() == X86ISD::PINSRW && VT == MVT::v8i16) || |
| 44276 | N->getOpcode() == ISD::INSERT_VECTOR_ELT) && |
| 44277 | "Unexpected vector insertion"); |
| 44278 | |
| 44279 | if (N->getOpcode() == X86ISD::PINSRB || N->getOpcode() == X86ISD::PINSRW) { |
| 44280 | unsigned NumBitsPerElt = VT.getScalarSizeInBits(); |
| 44281 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 44282 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
| 44283 | APInt::getAllOnesValue(NumBitsPerElt), DCI)) |
| 44284 | return SDValue(N, 0); |
| 44285 | } |
| 44286 | |
| 44287 | |
| 44288 | if (VT.isSimple() && DCI.isAfterLegalizeDAG()) { |
| 44289 | SDValue Op(N, 0); |
| 44290 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 44291 | return Res; |
| 44292 | } |
| 44293 | |
| 44294 | return SDValue(); |
| 44295 | } |
| 44296 | |
| 44297 | |
| 44298 | |
| 44299 | |
| 44300 | static SDValue combineCompareEqual(SDNode *N, SelectionDAG &DAG, |
| 44301 | TargetLowering::DAGCombinerInfo &DCI, |
| 44302 | const X86Subtarget &Subtarget) { |
| 44303 | unsigned opcode; |
| 44304 | |
| 44305 | |
| 44306 | |
| 44307 | if (Subtarget.hasSSE2() && isAndOrOfSetCCs(SDValue(N, 0U), opcode)) { |
| 44308 | SDValue N0 = N->getOperand(0); |
| 44309 | SDValue N1 = N->getOperand(1); |
| 44310 | SDValue CMP0 = N0.getOperand(1); |
| 44311 | SDValue CMP1 = N1.getOperand(1); |
| 44312 | SDLoc DL(N); |
| 44313 | |
| 44314 | |
| 44315 | if (CMP0.getOpcode() != X86ISD::FCMP || CMP0 != CMP1) |
| 44316 | return SDValue(); |
| 44317 | |
| 44318 | SDValue CMP00 = CMP0->getOperand(0); |
| 44319 | SDValue CMP01 = CMP0->getOperand(1); |
| 44320 | EVT VT = CMP00.getValueType(); |
| 44321 | |
| 44322 | if (VT == MVT::f32 || VT == MVT::f64) { |
| 44323 | bool ExpectingFlags = false; |
| 44324 | |
| 44325 | for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); |
| 44326 | !ExpectingFlags && UI != UE; ++UI) |
| 44327 | switch (UI->getOpcode()) { |
| 44328 | default: |
| 44329 | case ISD::BR_CC: |
| 44330 | case ISD::BRCOND: |
| 44331 | case ISD::SELECT: |
| 44332 | ExpectingFlags = true; |
| 44333 | break; |
| 44334 | case ISD::CopyToReg: |
| 44335 | case ISD::SIGN_EXTEND: |
| 44336 | case ISD::ZERO_EXTEND: |
| 44337 | case ISD::ANY_EXTEND: |
| 44338 | break; |
| 44339 | } |
| 44340 | |
| 44341 | if (!ExpectingFlags) { |
| 44342 | enum X86::CondCode cc0 = (enum X86::CondCode)N0.getConstantOperandVal(0); |
| 44343 | enum X86::CondCode cc1 = (enum X86::CondCode)N1.getConstantOperandVal(0); |
| 44344 | |
| 44345 | if (cc1 == X86::COND_E || cc1 == X86::COND_NE) { |
| 44346 | X86::CondCode tmp = cc0; |
| 44347 | cc0 = cc1; |
| 44348 | cc1 = tmp; |
| 44349 | } |
| 44350 | |
| 44351 | if ((cc0 == X86::COND_E && cc1 == X86::COND_NP) || |
| 44352 | (cc0 == X86::COND_NE && cc1 == X86::COND_P)) { |
| 44353 | |
| 44354 | |
| 44355 | unsigned x86cc = (cc0 == X86::COND_E) ? 0 : 4; |
| 44356 | if (Subtarget.hasAVX512()) { |
| 44357 | SDValue FSetCC = |
| 44358 | DAG.getNode(X86ISD::FSETCCM, DL, MVT::v1i1, CMP00, CMP01, |
| 44359 | DAG.getTargetConstant(x86cc, DL, MVT::i8)); |
| 44360 | |
| 44361 | |
| 44362 | SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v16i1, |
| 44363 | DAG.getConstant(0, DL, MVT::v16i1), |
| 44364 | FSetCC, DAG.getIntPtrConstant(0, DL)); |
| 44365 | return DAG.getZExtOrTrunc(DAG.getBitcast(MVT::i16, Ins), DL, |
| 44366 | N->getSimpleValueType(0)); |
| 44367 | } |
| 44368 | SDValue OnesOrZeroesF = |
| 44369 | DAG.getNode(X86ISD::FSETCC, DL, CMP00.getValueType(), CMP00, |
| 44370 | CMP01, DAG.getTargetConstant(x86cc, DL, MVT::i8)); |
| 44371 | |
| 44372 | bool is64BitFP = (CMP00.getValueType() == MVT::f64); |
| 44373 | MVT IntVT = is64BitFP ? MVT::i64 : MVT::i32; |
| 44374 | |
| 44375 | if (is64BitFP && !Subtarget.is64Bit()) { |
| 44376 | |
| 44377 | |
| 44378 | |
| 44379 | |
| 44380 | |
| 44381 | SDValue Vector64 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, |
| 44382 | OnesOrZeroesF); |
| 44383 | SDValue Vector32 = DAG.getBitcast(MVT::v4f32, Vector64); |
| 44384 | OnesOrZeroesF = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, |
| 44385 | Vector32, DAG.getIntPtrConstant(0, DL)); |
| 44386 | IntVT = MVT::i32; |
| 44387 | } |
| 44388 | |
| 44389 | SDValue OnesOrZeroesI = DAG.getBitcast(IntVT, OnesOrZeroesF); |
| 44390 | SDValue ANDed = DAG.getNode(ISD::AND, DL, IntVT, OnesOrZeroesI, |
| 44391 | DAG.getConstant(1, DL, IntVT)); |
| 44392 | SDValue OneBitOfTruth = DAG.getNode(ISD::TRUNCATE, DL, MVT::i8, |
| 44393 | ANDed); |
| 44394 | return OneBitOfTruth; |
| 44395 | } |
| 44396 | } |
| 44397 | } |
| 44398 | } |
| 44399 | return SDValue(); |
| 44400 | } |
| 44401 | |
| 44402 | |
| 44403 | static SDValue combineANDXORWithAllOnesIntoANDNP(SDNode *N, SelectionDAG &DAG) { |
| 44404 | assert(N->getOpcode() == ISD::AND); |
| 44405 | |
| 44406 | MVT VT = N->getSimpleValueType(0); |
| 44407 | if (!VT.is128BitVector() && !VT.is256BitVector() && !VT.is512BitVector()) |
| 44408 | return SDValue(); |
| 44409 | |
| 44410 | SDValue X, Y; |
| 44411 | SDValue N0 = N->getOperand(0); |
| 44412 | SDValue N1 = N->getOperand(1); |
| 44413 | |
| 44414 | auto GetNot = [&VT, &DAG](SDValue V) { |
| 44415 | |
| 44416 | if (SDValue Not = IsNOT(V, DAG)) |
| 44417 | return Not; |
| 44418 | |
| 44419 | if (V.getOpcode() == X86ISD::VBROADCAST) { |
| 44420 | SDValue Src = V.getOperand(0); |
| 44421 | EVT SrcVT = Src.getValueType(); |
| 44422 | if (!SrcVT.isVector()) |
| 44423 | return SDValue(); |
| 44424 | if (SDValue Not = IsNOT(Src, DAG)) |
| 44425 | return DAG.getNode(X86ISD::VBROADCAST, SDLoc(V), VT, |
| 44426 | DAG.getBitcast(SrcVT, Not)); |
| 44427 | } |
| 44428 | return SDValue(); |
| 44429 | }; |
| 44430 | |
| 44431 | if (SDValue Not = GetNot(N0)) { |
| 44432 | X = Not; |
| 44433 | Y = N1; |
| 44434 | } else if (SDValue Not = GetNot(N1)) { |
| 44435 | X = Not; |
| 44436 | Y = N0; |
| 44437 | } else |
| 44438 | return SDValue(); |
| 44439 | |
| 44440 | X = DAG.getBitcast(VT, X); |
| 44441 | Y = DAG.getBitcast(VT, Y); |
| 44442 | return DAG.getNode(X86ISD::ANDNP, SDLoc(N), VT, X, Y); |
| 44443 | } |
| 44444 | |
| 44445 | |
| 44446 | |
| 44447 | |
| 44448 | |
| 44449 | |
| 44450 | |
| 44451 | |
| 44452 | |
| 44453 | |
| 44454 | static SDValue PromoteMaskArithmetic(SDNode *N, EVT VT, SelectionDAG &DAG, |
| 44455 | unsigned Depth) { |
| 44456 | |
| 44457 | if (Depth >= SelectionDAG::MaxRecursionDepth) |
| 44458 | return SDValue(); |
| 44459 | |
| 44460 | if (N->getOpcode() != ISD::XOR && N->getOpcode() != ISD::AND && |
| 44461 | N->getOpcode() != ISD::OR) |
| 44462 | return SDValue(); |
| 44463 | |
| 44464 | SDValue N0 = N->getOperand(0); |
| 44465 | SDValue N1 = N->getOperand(1); |
| 44466 | SDLoc DL(N); |
| 44467 | |
| 44468 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 44469 | if (!TLI.isOperationLegalOrPromote(N->getOpcode(), VT)) |
| 44470 | return SDValue(); |
| 44471 | |
| 44472 | if (SDValue NN0 = PromoteMaskArithmetic(N0.getNode(), VT, DAG, Depth + 1)) |
| 44473 | N0 = NN0; |
| 44474 | else { |
| 44475 | |
| 44476 | if (N0.getOpcode() != ISD::TRUNCATE) |
| 44477 | return SDValue(); |
| 44478 | |
| 44479 | |
| 44480 | if (N0.getOperand(0).getValueType() != VT) |
| 44481 | return SDValue(); |
| 44482 | |
| 44483 | N0 = N0.getOperand(0); |
| 44484 | } |
| 44485 | |
| 44486 | if (SDValue NN1 = PromoteMaskArithmetic(N1.getNode(), VT, DAG, Depth + 1)) |
| 44487 | N1 = NN1; |
| 44488 | else { |
| 44489 | |
| 44490 | bool RHSTrunc = N1.getOpcode() == ISD::TRUNCATE && |
| 44491 | N1.getOperand(0).getValueType() == VT; |
| 44492 | if (!RHSTrunc && !ISD::isBuildVectorOfConstantSDNodes(N1.getNode())) |
| 44493 | return SDValue(); |
| 44494 | |
| 44495 | if (RHSTrunc) |
| 44496 | N1 = N1.getOperand(0); |
| 44497 | else |
| 44498 | N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N1); |
| 44499 | } |
| 44500 | |
| 44501 | return DAG.getNode(N->getOpcode(), DL, VT, N0, N1); |
| 44502 | } |
| 44503 | |
| 44504 | |
| 44505 | |
| 44506 | |
| 44507 | |
| 44508 | |
| 44509 | |
| 44510 | static SDValue PromoteMaskArithmetic(SDNode *N, SelectionDAG &DAG, |
| 44511 | const X86Subtarget &Subtarget) { |
| 44512 | EVT VT = N->getValueType(0); |
| 44513 | assert(VT.isVector() && "Expected vector type"); |
| 44514 | |
| 44515 | SDLoc DL(N); |
| 44516 | assert((N->getOpcode() == ISD::ANY_EXTEND || |
| 44517 | N->getOpcode() == ISD::ZERO_EXTEND || |
| 44518 | N->getOpcode() == ISD::SIGN_EXTEND) && "Invalid Node"); |
| 44519 | |
| 44520 | SDValue Narrow = N->getOperand(0); |
| 44521 | EVT NarrowVT = Narrow.getValueType(); |
| 44522 | |
| 44523 | |
| 44524 | SDValue Op = PromoteMaskArithmetic(Narrow.getNode(), VT, DAG, 0); |
| 44525 | if (!Op) |
| 44526 | return SDValue(); |
| 44527 | switch (N->getOpcode()) { |
| 44528 | default: llvm_unreachable("Unexpected opcode"); |
| 44529 | case ISD::ANY_EXTEND: |
| 44530 | return Op; |
| 44531 | case ISD::ZERO_EXTEND: |
| 44532 | return DAG.getZeroExtendInReg(Op, DL, NarrowVT); |
| 44533 | case ISD::SIGN_EXTEND: |
| 44534 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, |
| 44535 | Op, DAG.getValueType(NarrowVT)); |
| 44536 | } |
| 44537 | } |
| 44538 | |
| 44539 | static unsigned convertIntLogicToFPLogicOpcode(unsigned Opcode) { |
| 44540 | unsigned FPOpcode; |
| 44541 | switch (Opcode) { |
| 44542 | default: llvm_unreachable("Unexpected input node for FP logic conversion"); |
| 44543 | case ISD::AND: FPOpcode = X86ISD::FAND; break; |
| 44544 | case ISD::OR: FPOpcode = X86ISD::FOR; break; |
| 44545 | case ISD::XOR: FPOpcode = X86ISD::FXOR; break; |
| 44546 | } |
| 44547 | return FPOpcode; |
| 44548 | } |
| 44549 | |
| 44550 | |
| 44551 | |
| 44552 | |
| 44553 | static SDValue convertIntLogicToFPLogic(SDNode *N, SelectionDAG &DAG, |
| 44554 | const X86Subtarget &Subtarget) { |
| 44555 | EVT VT = N->getValueType(0); |
| 44556 | SDValue N0 = N->getOperand(0); |
| 44557 | SDValue N1 = N->getOperand(1); |
| 44558 | SDLoc DL(N); |
| 44559 | |
| 44560 | if (N0.getOpcode() != ISD::BITCAST || N1.getOpcode() != ISD::BITCAST) |
| 44561 | return SDValue(); |
| 44562 | |
| 44563 | SDValue N00 = N0.getOperand(0); |
| 44564 | SDValue N10 = N1.getOperand(0); |
| 44565 | EVT N00Type = N00.getValueType(); |
| 44566 | EVT N10Type = N10.getValueType(); |
| 44567 | |
| 44568 | |
| 44569 | if (N00Type != N10Type || |
| 44570 | !((Subtarget.hasSSE1() && N00Type == MVT::f32) || |
| 44571 | (Subtarget.hasSSE2() && N00Type == MVT::f64))) |
| 44572 | return SDValue(); |
| 44573 | |
| 44574 | unsigned FPOpcode = convertIntLogicToFPLogicOpcode(N->getOpcode()); |
| 44575 | SDValue FPLogic = DAG.getNode(FPOpcode, DL, N00Type, N00, N10); |
| 44576 | return DAG.getBitcast(VT, FPLogic); |
| 44577 | } |
| 44578 | |
| 44579 | |
| 44580 | |
| 44581 | static SDValue combineBitOpWithMOVMSK(SDNode *N, SelectionDAG &DAG) { |
| 44582 | unsigned Opc = N->getOpcode(); |
| 44583 | assert((Opc == ISD::OR || Opc == ISD::AND || Opc == ISD::XOR) && |
| 44584 | "Unexpected bit opcode"); |
| 44585 | |
| 44586 | SDValue N0 = N->getOperand(0); |
| 44587 | SDValue N1 = N->getOperand(1); |
| 44588 | |
| 44589 | |
| 44590 | if (N0.getOpcode() != X86ISD::MOVMSK || !N0.hasOneUse() || |
| 44591 | N1.getOpcode() != X86ISD::MOVMSK || !N1.hasOneUse()) |
| 44592 | return SDValue(); |
| 44593 | |
| 44594 | SDValue Vec0 = N0.getOperand(0); |
| 44595 | SDValue Vec1 = N1.getOperand(0); |
| 44596 | EVT VecVT0 = Vec0.getValueType(); |
| 44597 | EVT VecVT1 = Vec1.getValueType(); |
| 44598 | |
| 44599 | |
| 44600 | |
| 44601 | if (VecVT0.getSizeInBits() != VecVT1.getSizeInBits() || |
| 44602 | VecVT0.getScalarSizeInBits() != VecVT1.getScalarSizeInBits()) |
| 44603 | return SDValue(); |
| 44604 | |
| 44605 | SDLoc DL(N); |
| 44606 | unsigned VecOpc = |
| 44607 | VecVT0.isFloatingPoint() ? convertIntLogicToFPLogicOpcode(Opc) : Opc; |
| 44608 | SDValue Result = |
| 44609 | DAG.getNode(VecOpc, DL, VecVT0, Vec0, DAG.getBitcast(VecVT0, Vec1)); |
| 44610 | return DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Result); |
| 44611 | } |
| 44612 | |
| 44613 | |
| 44614 | |
| 44615 | |
| 44616 | static SDValue combineAndMaskToShift(SDNode *N, SelectionDAG &DAG, |
| 44617 | const X86Subtarget &Subtarget) { |
| 44618 | SDValue Op0 = peekThroughBitcasts(N->getOperand(0)); |
| 44619 | SDValue Op1 = peekThroughBitcasts(N->getOperand(1)); |
| 44620 | EVT VT0 = Op0.getValueType(); |
| 44621 | EVT VT1 = Op1.getValueType(); |
| 44622 | |
| 44623 | if (VT0 != VT1 || !VT0.isSimple() || !VT0.isInteger()) |
| 44624 | return SDValue(); |
| 44625 | |
| 44626 | APInt SplatVal; |
| 44627 | if (!ISD::isConstantSplatVector(Op1.getNode(), SplatVal) || |
| 44628 | !SplatVal.isMask()) |
| 44629 | return SDValue(); |
| 44630 | |
| 44631 | |
| 44632 | if (isBitwiseNot(Op0)) |
| 44633 | return SDValue(); |
| 44634 | |
| 44635 | if (!SupportedVectorShiftWithImm(VT0.getSimpleVT(), Subtarget, ISD::SRL)) |
| 44636 | return SDValue(); |
| 44637 | |
| 44638 | unsigned EltBitWidth = VT0.getScalarSizeInBits(); |
| 44639 | if (EltBitWidth != DAG.ComputeNumSignBits(Op0)) |
| 44640 | return SDValue(); |
| 44641 | |
| 44642 | SDLoc DL(N); |
| 44643 | unsigned ShiftVal = SplatVal.countTrailingOnes(); |
| 44644 | SDValue ShAmt = DAG.getTargetConstant(EltBitWidth - ShiftVal, DL, MVT::i8); |
| 44645 | SDValue Shift = DAG.getNode(X86ISD::VSRLI, DL, VT0, Op0, ShAmt); |
| 44646 | return DAG.getBitcast(N->getValueType(0), Shift); |
| 44647 | } |
| 44648 | |
| 44649 | |
| 44650 | |
| 44651 | static SDValue getIndexFromUnindexedLoad(LoadSDNode *Ld) { |
| 44652 | if (Ld->isIndexed()) |
| 44653 | return SDValue(); |
| 44654 | |
| 44655 | SDValue Base = Ld->getBasePtr(); |
| 44656 | |
| 44657 | if (Base.getOpcode() != ISD::ADD) |
| 44658 | return SDValue(); |
| 44659 | |
| 44660 | SDValue ShiftedIndex = Base.getOperand(0); |
| 44661 | |
| 44662 | if (ShiftedIndex.getOpcode() != ISD::SHL) |
| 44663 | return SDValue(); |
| 44664 | |
| 44665 | return ShiftedIndex.getOperand(0); |
| 44666 | |
| 44667 | } |
| 44668 | |
| 44669 | static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) { |
| 44670 | if (Subtarget.hasBMI2() && VT.isScalarInteger()) { |
| 44671 | switch (VT.getSizeInBits()) { |
| 44672 | default: return false; |
| 44673 | case 64: return Subtarget.is64Bit() ? true : false; |
| 44674 | case 32: return true; |
| 44675 | } |
| 44676 | } |
| 44677 | return false; |
| 44678 | } |
| 44679 | |
| 44680 | |
| 44681 | |
| 44682 | |
| 44683 | |
| 44684 | |
| 44685 | |
| 44686 | |
| 44687 | |
| 44688 | |
| 44689 | |
| 44690 | static SDValue combineAndLoadToBZHI(SDNode *Node, SelectionDAG &DAG, |
| 44691 | const X86Subtarget &Subtarget) { |
| 44692 | MVT VT = Node->getSimpleValueType(0); |
| 44693 | SDLoc dl(Node); |
| 44694 | |
| 44695 | |
| 44696 | if (!hasBZHI(Subtarget, VT)) |
| 44697 | return SDValue(); |
| 44698 | |
| 44699 | |
| 44700 | for (unsigned i = 0; i < 2; i++) { |
| 44701 | SDValue N = Node->getOperand(i); |
| 44702 | LoadSDNode *Ld = dyn_cast<LoadSDNode>(N.getNode()); |
| 44703 | |
| 44704 | |
| 44705 | if (!Ld) |
| 44706 | return SDValue(); |
| 44707 | |
| 44708 | const Value *MemOp = Ld->getMemOperand()->getValue(); |
| 44709 | |
| 44710 | if (!MemOp) |
| 44711 | return SDValue(); |
| 44712 | |
| 44713 | if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(MemOp)) { |
| 44714 | if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0))) { |
| 44715 | if (GV->isConstant() && GV->hasDefinitiveInitializer()) { |
| 44716 | |
| 44717 | Constant *Init = GV->getInitializer(); |
| 44718 | Type *Ty = Init->getType(); |
| 44719 | if (!isa<ConstantDataArray>(Init) || |
| 44720 | !Ty->getArrayElementType()->isIntegerTy() || |
| 44721 | Ty->getArrayElementType()->getScalarSizeInBits() != |
| 44722 | VT.getSizeInBits() || |
| 44723 | Ty->getArrayNumElements() > |
| 44724 | Ty->getArrayElementType()->getScalarSizeInBits()) |
| 44725 | continue; |
| 44726 | |
| 44727 | |
| 44728 | uint64_t ArrayElementCount = Init->getType()->getArrayNumElements(); |
| 44729 | bool ConstantsMatch = true; |
| 44730 | for (uint64_t j = 0; j < ArrayElementCount; j++) { |
| 44731 | auto *Elem = cast<ConstantInt>(Init->getAggregateElement(j)); |
| 44732 | if (Elem->getZExtValue() != (((uint64_t)1 << j) - 1)) { |
| 44733 | ConstantsMatch = false; |
| 44734 | break; |
| 44735 | } |
| 44736 | } |
| 44737 | if (!ConstantsMatch) |
| 44738 | continue; |
| 44739 | |
| 44740 | |
| 44741 | |
| 44742 | |
| 44743 | |
| 44744 | SDValue Inp = (i == 0) ? Node->getOperand(1) : Node->getOperand(0); |
| 44745 | SDValue SizeC = DAG.getConstant(VT.getSizeInBits(), dl, MVT::i32); |
| 44746 | |
| 44747 | |
| 44748 | SDValue Index = getIndexFromUnindexedLoad(Ld); |
| 44749 | if (!Index) |
| 44750 | return SDValue(); |
| 44751 | Index = DAG.getZExtOrTrunc(Index, dl, MVT::i32); |
| 44752 | |
| 44753 | SDValue Sub = DAG.getNode(ISD::SUB, dl, MVT::i32, SizeC, Index); |
| 44754 | Sub = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Sub); |
| 44755 | |
| 44756 | SDValue AllOnes = DAG.getAllOnesConstant(dl, VT); |
| 44757 | SDValue LShr = DAG.getNode(ISD::SRL, dl, VT, AllOnes, Sub); |
| 44758 | |
| 44759 | return DAG.getNode(ISD::AND, dl, VT, Inp, LShr); |
| 44760 | } |
| 44761 | } |
| 44762 | } |
| 44763 | } |
| 44764 | return SDValue(); |
| 44765 | } |
| 44766 | |
| 44767 | |
| 44768 | |
| 44769 | |
| 44770 | |
| 44771 | |
| 44772 | static SDValue combineScalarAndWithMaskSetcc(SDNode *N, SelectionDAG &DAG, |
| 44773 | const X86Subtarget &Subtarget) { |
| 44774 | assert(N->getOpcode() == ISD::AND && "Unexpected opcode!"); |
| 44775 | |
| 44776 | EVT VT = N->getValueType(0); |
| 44777 | |
| 44778 | |
| 44779 | |
| 44780 | if (!isa<ConstantSDNode>(N->getOperand(1))) |
| 44781 | return SDValue(); |
| 44782 | |
| 44783 | |
| 44784 | assert(!VT.isVector() && "Expected scalar VT!"); |
| 44785 | |
| 44786 | if (N->getOperand(0).getOpcode() != ISD::BITCAST || |
| 44787 | !N->getOperand(0).hasOneUse() || |
| 44788 | !N->getOperand(0).getOperand(0).hasOneUse()) |
| 44789 | return SDValue(); |
| 44790 | |
| 44791 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 44792 | SDValue Src = N->getOperand(0).getOperand(0); |
| 44793 | EVT SrcVT = Src.getValueType(); |
| 44794 | if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::i1 || |
| 44795 | !TLI.isTypeLegal(SrcVT)) |
| 44796 | return SDValue(); |
| 44797 | |
| 44798 | if (Src.getOpcode() != ISD::CONCAT_VECTORS) |
| 44799 | return SDValue(); |
| 44800 | |
| 44801 | |
| 44802 | |
| 44803 | SDValue SubVec = Src.getOperand(0); |
| 44804 | EVT SubVecVT = SubVec.getValueType(); |
| 44805 | |
| 44806 | |
| 44807 | |
| 44808 | if (SubVec.getOpcode() != ISD::SETCC || !TLI.isTypeLegal(SubVecVT) || |
| 44809 | !N->getConstantOperandAPInt(1).isMask(SubVecVT.getVectorNumElements())) |
| 44810 | return SDValue(); |
| 44811 | |
| 44812 | EVT SetccVT = SubVec.getOperand(0).getValueType(); |
| 44813 | if (!TLI.isTypeLegal(SetccVT) || |
| 44814 | !(Subtarget.hasVLX() || SetccVT.is512BitVector())) |
| 44815 | return SDValue(); |
| 44816 | |
| 44817 | if (!(Subtarget.hasBWI() || SetccVT.getScalarSizeInBits() >= 32)) |
| 44818 | return SDValue(); |
| 44819 | |
| 44820 | |
| 44821 | |
| 44822 | SDLoc dl(N); |
| 44823 | SmallVector<SDValue, 4> Ops(Src.getNumOperands(), |
| 44824 | DAG.getConstant(0, dl, SubVecVT)); |
| 44825 | Ops[0] = SubVec; |
| 44826 | SDValue Concat = DAG.getNode(ISD::CONCAT_VECTORS, dl, SrcVT, |
| 44827 | Ops); |
| 44828 | return DAG.getBitcast(VT, Concat); |
| 44829 | } |
| 44830 | |
| 44831 | static SDValue combineAnd(SDNode *N, SelectionDAG &DAG, |
| 44832 | TargetLowering::DAGCombinerInfo &DCI, |
| 44833 | const X86Subtarget &Subtarget) { |
| 44834 | EVT VT = N->getValueType(0); |
| 44835 | |
| 44836 | |
| 44837 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
| 44838 | return DAG.getBitcast( |
| 44839 | MVT::v4i32, DAG.getNode(X86ISD::FAND, SDLoc(N), MVT::v4f32, |
| 44840 | DAG.getBitcast(MVT::v4f32, N->getOperand(0)), |
| 44841 | DAG.getBitcast(MVT::v4f32, N->getOperand(1)))); |
| 44842 | } |
| 44843 | |
| 44844 | |
| 44845 | if (VT == MVT::i64 && Subtarget.is64Bit() && |
| 44846 | !isa<ConstantSDNode>(N->getOperand(1))) { |
| 44847 | APInt HiMask = APInt::getHighBitsSet(64, 32); |
| 44848 | if (DAG.MaskedValueIsZero(N->getOperand(1), HiMask) || |
| 44849 | DAG.MaskedValueIsZero(N->getOperand(0), HiMask)) { |
| 44850 | SDLoc dl(N); |
| 44851 | SDValue LHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(0)); |
| 44852 | SDValue RHS = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, N->getOperand(1)); |
| 44853 | return DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, |
| 44854 | DAG.getNode(ISD::AND, dl, MVT::i32, LHS, RHS)); |
| 44855 | } |
| 44856 | } |
| 44857 | |
| 44858 | |
| 44859 | |
| 44860 | if (VT == MVT::i1) { |
| 44861 | SmallVector<SDValue, 2> SrcOps; |
| 44862 | SmallVector<APInt, 2> SrcPartials; |
| 44863 | if (matchScalarReduction(SDValue(N, 0), ISD::AND, SrcOps, &SrcPartials) && |
| 44864 | SrcOps.size() == 1) { |
| 44865 | SDLoc dl(N); |
| 44866 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 44867 | unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); |
| 44868 | EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
| 44869 | SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); |
| 44870 | if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType())) |
| 44871 | Mask = DAG.getBitcast(MaskVT, SrcOps[0]); |
| 44872 | if (Mask) { |
| 44873 | assert(SrcPartials[0].getBitWidth() == NumElts && |
| 44874 | "Unexpected partial reduction mask"); |
| 44875 | SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT); |
| 44876 | Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits); |
| 44877 | return DAG.getSetCC(dl, MVT::i1, Mask, PartialBits, ISD::SETEQ); |
| 44878 | } |
| 44879 | } |
| 44880 | } |
| 44881 | |
| 44882 | if (SDValue V = combineScalarAndWithMaskSetcc(N, DAG, Subtarget)) |
| 44883 | return V; |
| 44884 | |
| 44885 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
| 44886 | return R; |
| 44887 | |
| 44888 | if (DCI.isBeforeLegalizeOps()) |
| 44889 | return SDValue(); |
| 44890 | |
| 44891 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) |
| 44892 | return R; |
| 44893 | |
| 44894 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
| 44895 | return FPLogic; |
| 44896 | |
| 44897 | if (SDValue R = combineANDXORWithAllOnesIntoANDNP(N, DAG)) |
| 44898 | return R; |
| 44899 | |
| 44900 | if (SDValue ShiftRight = combineAndMaskToShift(N, DAG, Subtarget)) |
| 44901 | return ShiftRight; |
| 44902 | |
| 44903 | if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget)) |
| 44904 | return R; |
| 44905 | |
| 44906 | |
| 44907 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
| 44908 | SDValue Op(N, 0); |
| 44909 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 44910 | return Res; |
| 44911 | } |
| 44912 | |
| 44913 | |
| 44914 | if ((VT.getScalarSizeInBits() % 8) == 0 && |
| 44915 | N->getOperand(0).getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 44916 | isa<ConstantSDNode>(N->getOperand(0).getOperand(1))) { |
| 44917 | SDValue BitMask = N->getOperand(1); |
| 44918 | SDValue SrcVec = N->getOperand(0).getOperand(0); |
| 44919 | EVT SrcVecVT = SrcVec.getValueType(); |
| 44920 | |
| 44921 | |
| 44922 | APInt UndefElts; |
| 44923 | SmallVector<APInt, 64> EltBits; |
| 44924 | if (VT == SrcVecVT.getScalarType() && |
| 44925 | N->getOperand(0)->isOnlyUserOf(SrcVec.getNode()) && |
| 44926 | getTargetConstantBitsFromNode(BitMask, 8, UndefElts, EltBits) && |
| 44927 | llvm::all_of(EltBits, [](const APInt &M) { |
| 44928 | return M.isNullValue() || M.isAllOnesValue(); |
| 44929 | })) { |
| 44930 | unsigned NumElts = SrcVecVT.getVectorNumElements(); |
| 44931 | unsigned Scale = SrcVecVT.getScalarSizeInBits() / 8; |
| 44932 | unsigned Idx = N->getOperand(0).getConstantOperandVal(1); |
| 44933 | |
| 44934 | |
| 44935 | SmallVector<int, 16> ShuffleMask(NumElts * Scale, SM_SentinelUndef); |
| 44936 | for (unsigned i = 0; i != Scale; ++i) { |
| 44937 | if (UndefElts[i]) |
| 44938 | continue; |
| 44939 | int VecIdx = Scale * Idx + i; |
| 44940 | ShuffleMask[VecIdx] = |
| 44941 | EltBits[i].isNullValue() ? SM_SentinelZero : VecIdx; |
| 44942 | } |
| 44943 | |
| 44944 | if (SDValue Shuffle = combineX86ShufflesRecursively( |
| 44945 | {SrcVec}, 0, SrcVec, ShuffleMask, {}, 1, |
| 44946 | X86::MaxShuffleCombineDepth, |
| 44947 | false, true, |
| 44948 | true, DAG, Subtarget)) |
| 44949 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), VT, Shuffle, |
| 44950 | N->getOperand(0).getOperand(1)); |
| 44951 | } |
| 44952 | } |
| 44953 | |
| 44954 | return SDValue(); |
| 44955 | } |
| 44956 | |
| 44957 | |
| 44958 | static SDValue canonicalizeBitSelect(SDNode *N, SelectionDAG &DAG, |
| 44959 | const X86Subtarget &Subtarget) { |
| 44960 | assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); |
| 44961 | |
| 44962 | MVT VT = N->getSimpleValueType(0); |
| 44963 | if (!VT.isVector() || (VT.getScalarSizeInBits() % 8) != 0) |
| 44964 | return SDValue(); |
| 44965 | |
| 44966 | SDValue N0 = peekThroughBitcasts(N->getOperand(0)); |
| 44967 | SDValue N1 = peekThroughBitcasts(N->getOperand(1)); |
| 44968 | if (N0.getOpcode() != ISD::AND || N1.getOpcode() != ISD::AND) |
| 44969 | return SDValue(); |
| 44970 | |
| 44971 | |
| 44972 | |
| 44973 | bool UseVPTERNLOG = (Subtarget.hasAVX512() && VT.is512BitVector()) || |
| 44974 | Subtarget.hasVLX(); |
| 44975 | if (!(Subtarget.hasXOP() || UseVPTERNLOG || |
| 44976 | !N0.getOperand(1).hasOneUse() || !N1.getOperand(1).hasOneUse())) |
| 44977 | return SDValue(); |
| 44978 | |
| 44979 | |
| 44980 | APInt UndefElts0, UndefElts1; |
| 44981 | SmallVector<APInt, 32> EltBits0, EltBits1; |
| 44982 | if (!getTargetConstantBitsFromNode(N0.getOperand(1), 8, UndefElts0, EltBits0, |
| 44983 | false, false)) |
| 44984 | return SDValue(); |
| 44985 | if (!getTargetConstantBitsFromNode(N1.getOperand(1), 8, UndefElts1, EltBits1, |
| 44986 | false, false)) |
| 44987 | return SDValue(); |
| 44988 | |
| 44989 | for (unsigned i = 0, e = EltBits0.size(); i != e; ++i) { |
| 44990 | |
| 44991 | if (UndefElts0[i] || UndefElts1[i]) |
| 44992 | return SDValue(); |
| 44993 | if (EltBits0[i] != ~EltBits1[i]) |
| 44994 | return SDValue(); |
| 44995 | } |
| 44996 | |
| 44997 | SDLoc DL(N); |
| 44998 | |
| 44999 | if (UseVPTERNLOG) { |
| 45000 | |
| 45001 | SDValue A = DAG.getBitcast(VT, N0.getOperand(1)); |
| 45002 | SDValue B = DAG.getBitcast(VT, N0.getOperand(0)); |
| 45003 | SDValue C = DAG.getBitcast(VT, N1.getOperand(0)); |
| 45004 | SDValue Imm = DAG.getTargetConstant(0xCA, DL, MVT::i8); |
| 45005 | return DAG.getNode(X86ISD::VPTERNLOG, DL, VT, A, B, C, Imm); |
| 45006 | } |
| 45007 | |
| 45008 | SDValue X = N->getOperand(0); |
| 45009 | SDValue Y = |
| 45010 | DAG.getNode(X86ISD::ANDNP, DL, VT, DAG.getBitcast(VT, N0.getOperand(1)), |
| 45011 | DAG.getBitcast(VT, N1.getOperand(0))); |
| 45012 | return DAG.getNode(ISD::OR, DL, VT, X, Y); |
| 45013 | } |
| 45014 | |
| 45015 | |
| 45016 | static bool matchLogicBlend(SDNode *N, SDValue &X, SDValue &Y, SDValue &Mask) { |
| 45017 | if (N->getOpcode() != ISD::OR) |
| 45018 | return false; |
| 45019 | |
| 45020 | SDValue N0 = N->getOperand(0); |
| 45021 | SDValue N1 = N->getOperand(1); |
| 45022 | |
| 45023 | |
| 45024 | if (N1.getOpcode() == ISD::AND) |
| 45025 | std::swap(N0, N1); |
| 45026 | |
| 45027 | |
| 45028 | if (N0.getOpcode() != ISD::AND || N1.getOpcode() != X86ISD::ANDNP) |
| 45029 | return false; |
| 45030 | |
| 45031 | Mask = N1.getOperand(0); |
| 45032 | X = N1.getOperand(1); |
| 45033 | |
| 45034 | |
| 45035 | if (N0.getOperand(0) == Mask) |
| 45036 | Y = N0.getOperand(1); |
| 45037 | else if (N0.getOperand(1) == Mask) |
| 45038 | Y = N0.getOperand(0); |
| 45039 | else |
| 45040 | return false; |
| 45041 | |
| 45042 | |
| 45043 | |
| 45044 | return true; |
| 45045 | } |
| 45046 | |
| 45047 | |
| 45048 | |
| 45049 | |
| 45050 | |
| 45051 | |
| 45052 | |
| 45053 | |
| 45054 | |
| 45055 | static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, |
| 45056 | const X86Subtarget &Subtarget) { |
| 45057 | assert(N->getOpcode() == ISD::OR && "Unexpected Opcode"); |
| 45058 | |
| 45059 | EVT VT = N->getValueType(0); |
| 45060 | if (!((VT.is128BitVector() && Subtarget.hasSSE2()) || |
| 45061 | (VT.is256BitVector() && Subtarget.hasInt256()))) |
| 45062 | return SDValue(); |
| 45063 | |
| 45064 | SDValue X, Y, Mask; |
| 45065 | if (!matchLogicBlend(N, X, Y, Mask)) |
| 45066 | return SDValue(); |
| 45067 | |
| 45068 | |
| 45069 | Mask = peekThroughBitcasts(Mask); |
| 45070 | X = peekThroughBitcasts(X); |
| 45071 | Y = peekThroughBitcasts(Y); |
| 45072 | |
| 45073 | EVT MaskVT = Mask.getValueType(); |
| 45074 | unsigned EltBits = MaskVT.getScalarSizeInBits(); |
| 45075 | |
| 45076 | |
| 45077 | if (!MaskVT.isInteger() || DAG.ComputeNumSignBits(Mask) != EltBits) |
| 45078 | return SDValue(); |
| 45079 | |
| 45080 | SDLoc DL(N); |
| 45081 | |
| 45082 | |
| 45083 | if (SDValue Res = combineLogicBlendIntoConditionalNegate(VT, Mask, X, Y, DL, |
| 45084 | DAG, Subtarget)) |
| 45085 | return Res; |
| 45086 | |
| 45087 | |
| 45088 | if (!Subtarget.hasSSE41()) |
| 45089 | return SDValue(); |
| 45090 | |
| 45091 | |
| 45092 | if (Subtarget.hasVLX()) |
| 45093 | return SDValue(); |
| 45094 | |
| 45095 | MVT BlendVT = VT.is256BitVector() ? MVT::v32i8 : MVT::v16i8; |
| 45096 | |
| 45097 | X = DAG.getBitcast(BlendVT, X); |
| 45098 | Y = DAG.getBitcast(BlendVT, Y); |
| 45099 | Mask = DAG.getBitcast(BlendVT, Mask); |
| 45100 | Mask = DAG.getSelect(DL, BlendVT, Mask, Y, X); |
| 45101 | return DAG.getBitcast(VT, Mask); |
| 45102 | } |
| 45103 | |
| 45104 | |
| 45105 | |
| 45106 | |
| 45107 | |
| 45108 | |
| 45109 | |
| 45110 | static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, |
| 45111 | SelectionDAG &DAG) { |
| 45112 | SDValue Cmp = Op.getOperand(1); |
| 45113 | EVT VT = Cmp.getOperand(0).getValueType(); |
| 45114 | unsigned Log2b = Log2_32(VT.getSizeInBits()); |
| 45115 | SDLoc dl(Op); |
| 45116 | SDValue Clz = DAG.getNode(ISD::CTLZ, dl, VT, Cmp->getOperand(0)); |
| 45117 | |
| 45118 | |
| 45119 | SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); |
| 45120 | SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, |
| 45121 | DAG.getConstant(Log2b, dl, MVT::i8)); |
| 45122 | return DAG.getZExtOrTrunc(Scc, dl, ExtTy); |
| 45123 | } |
| 45124 | |
| 45125 | |
| 45126 | |
| 45127 | |
| 45128 | |
| 45129 | |
| 45130 | |
| 45131 | |
| 45132 | static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, |
| 45133 | TargetLowering::DAGCombinerInfo &DCI, |
| 45134 | const X86Subtarget &Subtarget) { |
| 45135 | if (DCI.isBeforeLegalize() || !Subtarget.getTargetLowering()->isCtlzFast()) |
| 45136 | return SDValue(); |
| 45137 | |
| 45138 | auto isORCandidate = [](SDValue N) { |
| 45139 | return (N->getOpcode() == ISD::OR && N->hasOneUse()); |
| 45140 | }; |
| 45141 | |
| 45142 | |
| 45143 | |
| 45144 | |
| 45145 | if (!N->hasOneUse() || !N->getSimpleValueType(0).bitsGE(MVT::i32) || |
| 45146 | !isORCandidate(N->getOperand(0))) |
| 45147 | return SDValue(); |
| 45148 | |
| 45149 | |
| 45150 | auto isSetCCCandidate = [](SDValue N) { |
| 45151 | return N->getOpcode() == X86ISD::SETCC && N->hasOneUse() && |
| 45152 | X86::CondCode(N->getConstantOperandVal(0)) == X86::COND_E && |
| 45153 | N->getOperand(1).getOpcode() == X86ISD::CMP && |
| 45154 | isNullConstant(N->getOperand(1).getOperand(1)) && |
| 45155 | N->getOperand(1).getValueType().bitsGE(MVT::i32); |
| 45156 | }; |
| 45157 | |
| 45158 | SDNode *OR = N->getOperand(0).getNode(); |
| 45159 | SDValue LHS = OR->getOperand(0); |
| 45160 | SDValue RHS = OR->getOperand(1); |
| 45161 | |
| 45162 | |
| 45163 | SmallVector<SDNode *, 2> ORNodes; |
| 45164 | while (((isORCandidate(LHS) && isSetCCCandidate(RHS)) || |
| 45165 | (isORCandidate(RHS) && isSetCCCandidate(LHS)))) { |
| 45166 | ORNodes.push_back(OR); |
| 45167 | OR = (LHS->getOpcode() == ISD::OR) ? LHS.getNode() : RHS.getNode(); |
| 45168 | LHS = OR->getOperand(0); |
| 45169 | RHS = OR->getOperand(1); |
| 45170 | } |
| 45171 | |
| 45172 | |
| 45173 | if (!(isSetCCCandidate(LHS) && isSetCCCandidate(RHS)) || |
| 45174 | !isORCandidate(SDValue(OR, 0))) |
| 45175 | return SDValue(); |
| 45176 | |
| 45177 | |
| 45178 | |
| 45179 | |
| 45180 | |
| 45181 | |
| 45182 | EVT VT = OR->getValueType(0); |
| 45183 | SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); |
| 45184 | SDValue Ret, NewRHS; |
| 45185 | if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) |
| 45186 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); |
| 45187 | |
| 45188 | if (!Ret) |
| 45189 | return SDValue(); |
| 45190 | |
| 45191 | |
| 45192 | while (ORNodes.size() > 0) { |
| 45193 | OR = ORNodes.pop_back_val(); |
| 45194 | LHS = OR->getOperand(0); |
| 45195 | RHS = OR->getOperand(1); |
| 45196 | |
| 45197 | if (RHS->getOpcode() == ISD::OR) |
| 45198 | std::swap(LHS, RHS); |
| 45199 | NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); |
| 45200 | if (!NewRHS) |
| 45201 | return SDValue(); |
| 45202 | Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); |
| 45203 | } |
| 45204 | |
| 45205 | if (Ret) |
| 45206 | Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); |
| 45207 | |
| 45208 | return Ret; |
| 45209 | } |
| 45210 | |
| 45211 | static SDValue combineOr(SDNode *N, SelectionDAG &DAG, |
| 45212 | TargetLowering::DAGCombinerInfo &DCI, |
| 45213 | const X86Subtarget &Subtarget) { |
| 45214 | SDValue N0 = N->getOperand(0); |
| 45215 | SDValue N1 = N->getOperand(1); |
| 45216 | EVT VT = N->getValueType(0); |
| 45217 | |
| 45218 | |
| 45219 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
| 45220 | return DAG.getBitcast(MVT::v4i32, |
| 45221 | DAG.getNode(X86ISD::FOR, SDLoc(N), MVT::v4f32, |
| 45222 | DAG.getBitcast(MVT::v4f32, N0), |
| 45223 | DAG.getBitcast(MVT::v4f32, N1))); |
| 45224 | } |
| 45225 | |
| 45226 | |
| 45227 | |
| 45228 | if (VT == MVT::i1) { |
| 45229 | SmallVector<SDValue, 2> SrcOps; |
| 45230 | SmallVector<APInt, 2> SrcPartials; |
| 45231 | if (matchScalarReduction(SDValue(N, 0), ISD::OR, SrcOps, &SrcPartials) && |
| 45232 | SrcOps.size() == 1) { |
| 45233 | SDLoc dl(N); |
| 45234 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 45235 | unsigned NumElts = SrcOps[0].getValueType().getVectorNumElements(); |
| 45236 | EVT MaskVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
| 45237 | SDValue Mask = combineBitcastvxi1(DAG, MaskVT, SrcOps[0], dl, Subtarget); |
| 45238 | if (!Mask && TLI.isTypeLegal(SrcOps[0].getValueType())) |
| 45239 | Mask = DAG.getBitcast(MaskVT, SrcOps[0]); |
| 45240 | if (Mask) { |
| 45241 | assert(SrcPartials[0].getBitWidth() == NumElts && |
| 45242 | "Unexpected partial reduction mask"); |
| 45243 | SDValue ZeroBits = DAG.getConstant(0, dl, MaskVT); |
| 45244 | SDValue PartialBits = DAG.getConstant(SrcPartials[0], dl, MaskVT); |
| 45245 | Mask = DAG.getNode(ISD::AND, dl, MaskVT, Mask, PartialBits); |
| 45246 | return DAG.getSetCC(dl, MVT::i1, Mask, ZeroBits, ISD::SETNE); |
| 45247 | } |
| 45248 | } |
| 45249 | } |
| 45250 | |
| 45251 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
| 45252 | return R; |
| 45253 | |
| 45254 | if (DCI.isBeforeLegalizeOps()) |
| 45255 | return SDValue(); |
| 45256 | |
| 45257 | if (SDValue R = combineCompareEqual(N, DAG, DCI, Subtarget)) |
| 45258 | return R; |
| 45259 | |
| 45260 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
| 45261 | return FPLogic; |
| 45262 | |
| 45263 | if (SDValue R = canonicalizeBitSelect(N, DAG, Subtarget)) |
| 45264 | return R; |
| 45265 | |
| 45266 | if (SDValue R = combineLogicBlendIntoPBLENDV(N, DAG, Subtarget)) |
| 45267 | return R; |
| 45268 | |
| 45269 | |
| 45270 | |
| 45271 | |
| 45272 | |
| 45273 | if (N0.getOpcode() == X86ISD::KSHIFTL || N1.getOpcode() == X86ISD::KSHIFTL) { |
| 45274 | unsigned NumElts = VT.getVectorNumElements(); |
| 45275 | unsigned HalfElts = NumElts / 2; |
| 45276 | APInt UpperElts = APInt::getHighBitsSet(NumElts, HalfElts); |
| 45277 | if (NumElts >= 16 && N1.getOpcode() == X86ISD::KSHIFTL && |
| 45278 | N1.getConstantOperandAPInt(1) == HalfElts && |
| 45279 | DAG.MaskedValueIsZero(N0, APInt(1, 1), UpperElts)) { |
| 45280 | SDLoc dl(N); |
| 45281 | return DAG.getNode( |
| 45282 | ISD::CONCAT_VECTORS, dl, VT, |
| 45283 | extractSubVector(N0, 0, DAG, dl, HalfElts), |
| 45284 | extractSubVector(N1.getOperand(0), 0, DAG, dl, HalfElts)); |
| 45285 | } |
| 45286 | if (NumElts >= 16 && N0.getOpcode() == X86ISD::KSHIFTL && |
| 45287 | N0.getConstantOperandAPInt(1) == HalfElts && |
| 45288 | DAG.MaskedValueIsZero(N1, APInt(1, 1), UpperElts)) { |
| 45289 | SDLoc dl(N); |
| 45290 | return DAG.getNode( |
| 45291 | ISD::CONCAT_VECTORS, dl, VT, |
| 45292 | extractSubVector(N1, 0, DAG, dl, HalfElts), |
| 45293 | extractSubVector(N0.getOperand(0), 0, DAG, dl, HalfElts)); |
| 45294 | } |
| 45295 | } |
| 45296 | |
| 45297 | |
| 45298 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
| 45299 | SDValue Op(N, 0); |
| 45300 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 45301 | return Res; |
| 45302 | } |
| 45303 | |
| 45304 | return SDValue(); |
| 45305 | } |
| 45306 | |
| 45307 | |
| 45308 | |
| 45309 | |
| 45310 | |
| 45311 | static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) { |
| 45312 | |
| 45313 | EVT ResultType = N->getValueType(0); |
| 45314 | if (ResultType != MVT::i8 && ResultType != MVT::i1) |
| 45315 | return SDValue(); |
| 45316 | |
| 45317 | SDValue N0 = N->getOperand(0); |
| 45318 | SDValue N1 = N->getOperand(1); |
| 45319 | |
| 45320 | |
| 45321 | if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse()) |
| 45322 | return SDValue(); |
| 45323 | |
| 45324 | |
| 45325 | if (!isOneConstant(N1)) |
| 45326 | return SDValue(); |
| 45327 | |
| 45328 | |
| 45329 | SDValue Shift = N0.getOperand(0); |
| 45330 | if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse()) |
| 45331 | return SDValue(); |
| 45332 | |
| 45333 | |
| 45334 | EVT ShiftTy = Shift.getValueType(); |
| 45335 | if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64) |
| 45336 | return SDValue(); |
| 45337 | |
| 45338 | |
| 45339 | if (!isa<ConstantSDNode>(Shift.getOperand(1)) || |
| 45340 | Shift.getConstantOperandAPInt(1) != (ShiftTy.getSizeInBits() - 1)) |
| 45341 | return SDValue(); |
| 45342 | |
| 45343 | |
| 45344 | |
| 45345 | |
| 45346 | SDLoc DL(N); |
| 45347 | SDValue ShiftOp = Shift.getOperand(0); |
| 45348 | EVT ShiftOpTy = ShiftOp.getValueType(); |
| 45349 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 45350 | EVT SetCCResultType = TLI.getSetCCResultType(DAG.getDataLayout(), |
| 45351 | *DAG.getContext(), ResultType); |
| 45352 | SDValue Cond = DAG.getSetCC(DL, SetCCResultType, ShiftOp, |
| 45353 | DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT); |
| 45354 | if (SetCCResultType != ResultType) |
| 45355 | Cond = DAG.getNode(ISD::ZERO_EXTEND, DL, ResultType, Cond); |
| 45356 | return Cond; |
| 45357 | } |
| 45358 | |
| 45359 | |
| 45360 | |
| 45361 | |
| 45362 | |
| 45363 | |
| 45364 | |
| 45365 | |
| 45366 | static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, |
| 45367 | const X86Subtarget &Subtarget) { |
| 45368 | EVT VT = N->getValueType(0); |
| 45369 | if (!VT.isSimple()) |
| 45370 | return SDValue(); |
| 45371 | |
| 45372 | switch (VT.getSimpleVT().SimpleTy) { |
| 45373 | default: return SDValue(); |
| 45374 | case MVT::v16i8: |
| 45375 | case MVT::v8i16: |
| 45376 | case MVT::v4i32: |
| 45377 | case MVT::v2i64: if (!Subtarget.hasSSE2()) return SDValue(); break; |
| 45378 | case MVT::v32i8: |
| 45379 | case MVT::v16i16: |
| 45380 | case MVT::v8i32: |
| 45381 | case MVT::v4i64: if (!Subtarget.hasAVX2()) return SDValue(); break; |
| 45382 | } |
| 45383 | |
| 45384 | |
| 45385 | |
| 45386 | SDValue Shift = N->getOperand(0); |
| 45387 | SDValue Ones = N->getOperand(1); |
| 45388 | if (Shift.getOpcode() != ISD::SRA || !Shift.hasOneUse() || |
| 45389 | !ISD::isBuildVectorAllOnes(Ones.getNode())) |
| 45390 | return SDValue(); |
| 45391 | |
| 45392 | |
| 45393 | auto *ShiftAmt = |
| 45394 | isConstOrConstSplat(Shift.getOperand(1), true); |
| 45395 | if (!ShiftAmt || |
| 45396 | ShiftAmt->getAPIntValue() != (Shift.getScalarValueSizeInBits() - 1)) |
| 45397 | return SDValue(); |
| 45398 | |
| 45399 | |
| 45400 | |
| 45401 | return DAG.getSetCC(SDLoc(N), VT, Shift.getOperand(0), Ones, ISD::SETGT); |
| 45402 | } |
| 45403 | |
| 45404 | |
| 45405 | |
| 45406 | |
| 45407 | |
| 45408 | |
| 45409 | |
| 45410 | |
| 45411 | |
| 45412 | |
| 45413 | |
| 45414 | |
| 45415 | |
| 45416 | |
| 45417 | |
| 45418 | |
| 45419 | |
| 45420 | static SDValue detectUSatPattern(SDValue In, EVT VT, SelectionDAG &DAG, |
| 45421 | const SDLoc &DL) { |
| 45422 | EVT InVT = In.getValueType(); |
| 45423 | |
| 45424 | |
| 45425 | assert(InVT.getScalarSizeInBits() > VT.getScalarSizeInBits() && |
| 45426 | "Unexpected types for truncate operation"); |
| 45427 | |
| 45428 | |
| 45429 | auto MatchMinMax = [](SDValue V, unsigned Opcode, APInt &Limit) -> SDValue { |
| 45430 | if (V.getOpcode() == Opcode && |
| 45431 | ISD::isConstantSplatVector(V.getOperand(1).getNode(), Limit)) |
| 45432 | return V.getOperand(0); |
| 45433 | return SDValue(); |
| 45434 | }; |
| 45435 | |
| 45436 | APInt C1, C2; |
| 45437 | if (SDValue UMin = MatchMinMax(In, ISD::UMIN, C2)) |
| 45438 | |
| 45439 | |
| 45440 | if (C2.isMask(VT.getScalarSizeInBits())) |
| 45441 | return UMin; |
| 45442 | |
| 45443 | if (SDValue SMin = MatchMinMax(In, ISD::SMIN, C2)) |
| 45444 | if (MatchMinMax(SMin, ISD::SMAX, C1)) |
| 45445 | if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits())) |
| 45446 | return SMin; |
| 45447 | |
| 45448 | if (SDValue SMax = MatchMinMax(In, ISD::SMAX, C1)) |
| 45449 | if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, C2)) |
| 45450 | if (C1.isNonNegative() && C2.isMask(VT.getScalarSizeInBits()) && |
| 45451 | C2.uge(C1)) { |
| 45452 | return DAG.getNode(ISD::SMAX, DL, InVT, SMin, In.getOperand(1)); |
| 45453 | } |
| 45454 | |
| 45455 | return SDValue(); |
| 45456 | } |
| 45457 | |
| 45458 | |
| 45459 | |
| 45460 | |
| 45461 | |
| 45462 | |
| 45463 | |
| 45464 | |
| 45465 | |
| 45466 | |
| 45467 | static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { |
| 45468 | unsigned NumDstBits = VT.getScalarSizeInBits(); |
| 45469 | unsigned NumSrcBits = In.getScalarValueSizeInBits(); |
| 45470 | assert(NumSrcBits > NumDstBits && "Unexpected types for truncate operation"); |
| 45471 | |
| 45472 | auto MatchMinMax = [](SDValue V, unsigned Opcode, |
| 45473 | const APInt &Limit) -> SDValue { |
| 45474 | APInt C; |
| 45475 | if (V.getOpcode() == Opcode && |
| 45476 | ISD::isConstantSplatVector(V.getOperand(1).getNode(), C) && C == Limit) |
| 45477 | return V.getOperand(0); |
| 45478 | return SDValue(); |
| 45479 | }; |
| 45480 | |
| 45481 | APInt SignedMax, SignedMin; |
| 45482 | if (MatchPackUS) { |
| 45483 | SignedMax = APInt::getAllOnesValue(NumDstBits).zext(NumSrcBits); |
| 45484 | SignedMin = APInt(NumSrcBits, 0); |
| 45485 | } else { |
| 45486 | SignedMax = APInt::getSignedMaxValue(NumDstBits).sext(NumSrcBits); |
| 45487 | SignedMin = APInt::getSignedMinValue(NumDstBits).sext(NumSrcBits); |
| 45488 | } |
| 45489 | |
| 45490 | if (SDValue SMin = MatchMinMax(In, ISD::SMIN, SignedMax)) |
| 45491 | if (SDValue SMax = MatchMinMax(SMin, ISD::SMAX, SignedMin)) |
| 45492 | return SMax; |
| 45493 | |
| 45494 | if (SDValue SMax = MatchMinMax(In, ISD::SMAX, SignedMin)) |
| 45495 | if (SDValue SMin = MatchMinMax(SMax, ISD::SMIN, SignedMax)) |
| 45496 | return SMin; |
| 45497 | |
| 45498 | return SDValue(); |
| 45499 | } |
| 45500 | |
| 45501 | static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, |
| 45502 | SelectionDAG &DAG, |
| 45503 | const X86Subtarget &Subtarget) { |
| 45504 | if (!Subtarget.hasSSE2() || !VT.isVector()) |
| 45505 | return SDValue(); |
| 45506 | |
| 45507 | EVT SVT = VT.getVectorElementType(); |
| 45508 | EVT InVT = In.getValueType(); |
| 45509 | EVT InSVT = InVT.getVectorElementType(); |
| 45510 | |
| 45511 | |
| 45512 | |
| 45513 | |
| 45514 | |
| 45515 | if (Subtarget.hasBWI() && !Subtarget.useAVX512Regs() && |
| 45516 | InVT == MVT::v16i32 && VT == MVT::v16i8) { |
| 45517 | if (auto USatVal = detectSSatPattern(In, VT, true)) { |
| 45518 | |
| 45519 | SDValue Mid = truncateVectorWithPACK(X86ISD::PACKUS, MVT::v16i16, USatVal, |
| 45520 | DL, DAG, Subtarget); |
| 45521 | assert(Mid && "Failed to pack!"); |
| 45522 | return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, Mid); |
| 45523 | } |
| 45524 | } |
| 45525 | |
| 45526 | |
| 45527 | |
| 45528 | |
| 45529 | |
| 45530 | |
| 45531 | |
| 45532 | bool PreferAVX512 = ((Subtarget.hasAVX512() && InSVT == MVT::i32) || |
| 45533 | (Subtarget.hasBWI() && InSVT == MVT::i16)) && |
| 45534 | (InVT.getSizeInBits() > 128) && |
| 45535 | (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && |
| 45536 | !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); |
| 45537 | |
| 45538 | if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && |
| 45539 | VT.getSizeInBits() >= 64 && |
| 45540 | (SVT == MVT::i8 || SVT == MVT::i16) && |
| 45541 | (InSVT == MVT::i16 || InSVT == MVT::i32)) { |
| 45542 | if (auto USatVal = detectSSatPattern(In, VT, true)) { |
| 45543 | |
| 45544 | |
| 45545 | |
| 45546 | if (SVT == MVT::i8 && InSVT == MVT::i32) { |
| 45547 | EVT MidVT = VT.changeVectorElementType(MVT::i16); |
| 45548 | SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, |
| 45549 | DAG, Subtarget); |
| 45550 | assert(Mid && "Failed to pack!"); |
| 45551 | SDValue V = truncateVectorWithPACK(X86ISD::PACKUS, VT, Mid, DL, DAG, |
| 45552 | Subtarget); |
| 45553 | assert(V && "Failed to pack!"); |
| 45554 | return V; |
| 45555 | } else if (SVT == MVT::i8 || Subtarget.hasSSE41()) |
| 45556 | return truncateVectorWithPACK(X86ISD::PACKUS, VT, USatVal, DL, DAG, |
| 45557 | Subtarget); |
| 45558 | } |
| 45559 | if (auto SSatVal = detectSSatPattern(In, VT)) |
| 45560 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, |
| 45561 | Subtarget); |
| 45562 | } |
| 45563 | |
| 45564 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 45565 | if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 && |
| 45566 | Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI()) && |
| 45567 | (SVT == MVT::i32 || SVT == MVT::i16 || SVT == MVT::i8)) { |
| 45568 | unsigned TruncOpc = 0; |
| 45569 | SDValue SatVal; |
| 45570 | if (auto SSatVal = detectSSatPattern(In, VT)) { |
| 45571 | SatVal = SSatVal; |
| 45572 | TruncOpc = X86ISD::VTRUNCS; |
| 45573 | } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { |
| 45574 | SatVal = USatVal; |
| 45575 | TruncOpc = X86ISD::VTRUNCUS; |
| 45576 | } |
| 45577 | if (SatVal) { |
| 45578 | unsigned ResElts = VT.getVectorNumElements(); |
| 45579 | |
| 45580 | |
| 45581 | if (!Subtarget.hasVLX() && !InVT.is512BitVector()) { |
| 45582 | unsigned NumConcats = 512 / InVT.getSizeInBits(); |
| 45583 | ResElts *= NumConcats; |
| 45584 | SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT)); |
| 45585 | ConcatOps[0] = SatVal; |
| 45586 | InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, |
| 45587 | NumConcats * InVT.getVectorNumElements()); |
| 45588 | SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps); |
| 45589 | } |
| 45590 | |
| 45591 | if (ResElts * SVT.getSizeInBits() < 128) |
| 45592 | ResElts = 128 / SVT.getSizeInBits(); |
| 45593 | EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts); |
| 45594 | SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal); |
| 45595 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
| 45596 | DAG.getIntPtrConstant(0, DL)); |
| 45597 | } |
| 45598 | } |
| 45599 | |
| 45600 | return SDValue(); |
| 45601 | } |
| 45602 | |
| 45603 | |
| 45604 | |
| 45605 | |
| 45606 | static SDValue detectAVGPattern(SDValue In, EVT VT, SelectionDAG &DAG, |
| 45607 | const X86Subtarget &Subtarget, |
| 45608 | const SDLoc &DL) { |
| 45609 | if (!VT.isVector()) |
| 45610 | return SDValue(); |
| 45611 | EVT InVT = In.getValueType(); |
| 45612 | unsigned NumElems = VT.getVectorNumElements(); |
| 45613 | |
| 45614 | EVT ScalarVT = VT.getVectorElementType(); |
| 45615 | if (!((ScalarVT == MVT::i8 || ScalarVT == MVT::i16) && NumElems >= 2)) |
| 45616 | return SDValue(); |
| 45617 | |
| 45618 | |
| 45619 | |
| 45620 | EVT InScalarVT = InVT.getVectorElementType(); |
| 45621 | if (InScalarVT.getFixedSizeInBits() <= ScalarVT.getFixedSizeInBits()) |
| 45622 | return SDValue(); |
| 45623 | |
| 45624 | if (!Subtarget.hasSSE2()) |
| 45625 | return SDValue(); |
| 45626 | |
| 45627 | |
| 45628 | |
| 45629 | |
| 45630 | |
| 45631 | |
| 45632 | |
| 45633 | |
| 45634 | |
| 45635 | |
| 45636 | |
| 45637 | if (In.getOpcode() != ISD::SRL) |
| 45638 | return SDValue(); |
| 45639 | |
| 45640 | |
| 45641 | |
| 45642 | auto IsConstVectorInRange = [](SDValue V, unsigned Min, unsigned Max) { |
| 45643 | return ISD::matchUnaryPredicate(V, [Min, Max](ConstantSDNode *C) { |
| 45644 | return !(C->getAPIntValue().ult(Min) || C->getAPIntValue().ugt(Max)); |
| 45645 | }); |
| 45646 | }; |
| 45647 | |
| 45648 | |
| 45649 | SDValue LHS = In.getOperand(0); |
| 45650 | SDValue RHS = In.getOperand(1); |
| 45651 | if (!IsConstVectorInRange(RHS, 1, 1)) |
| 45652 | return SDValue(); |
| 45653 | if (LHS.getOpcode() != ISD::ADD) |
| 45654 | return SDValue(); |
| 45655 | |
| 45656 | |
| 45657 | SDValue Operands[3]; |
| 45658 | Operands[0] = LHS.getOperand(0); |
| 45659 | Operands[1] = LHS.getOperand(1); |
| 45660 | |
| 45661 | auto AVGBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 45662 | ArrayRef<SDValue> Ops) { |
| 45663 | return DAG.getNode(X86ISD::AVG, DL, Ops[0].getValueType(), Ops); |
| 45664 | }; |
| 45665 | |
| 45666 | auto AVGSplitter = [&](SDValue Op0, SDValue Op1) { |
| 45667 | |
| 45668 | unsigned NumElemsPow2 = PowerOf2Ceil(NumElems); |
| 45669 | EVT Pow2VT = EVT::getVectorVT(*DAG.getContext(), ScalarVT, NumElemsPow2); |
| 45670 | if (NumElemsPow2 != NumElems) { |
| 45671 | SmallVector<SDValue, 32> Ops0(NumElemsPow2, DAG.getUNDEF(ScalarVT)); |
| 45672 | SmallVector<SDValue, 32> Ops1(NumElemsPow2, DAG.getUNDEF(ScalarVT)); |
| 45673 | for (unsigned i = 0; i != NumElems; ++i) { |
| 45674 | SDValue Idx = DAG.getIntPtrConstant(i, DL); |
| 45675 | Ops0[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op0, Idx); |
| 45676 | Ops1[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, Op1, Idx); |
| 45677 | } |
| 45678 | Op0 = DAG.getBuildVector(Pow2VT, DL, Ops0); |
| 45679 | Op1 = DAG.getBuildVector(Pow2VT, DL, Ops1); |
| 45680 | } |
| 45681 | SDValue Res = |
| 45682 | SplitOpsAndApply(DAG, Subtarget, DL, Pow2VT, {Op0, Op1}, AVGBuilder); |
| 45683 | if (NumElemsPow2 == NumElems) |
| 45684 | return Res; |
| 45685 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, |
| 45686 | DAG.getIntPtrConstant(0, DL)); |
| 45687 | }; |
| 45688 | |
| 45689 | |
| 45690 | |
| 45691 | if (IsConstVectorInRange(Operands[1], 1, ScalarVT == MVT::i8 ? 256 : 65536) && |
| 45692 | Operands[0].getOpcode() == ISD::ZERO_EXTEND && |
| 45693 | Operands[0].getOperand(0).getValueType() == VT) { |
| 45694 | |
| 45695 | |
| 45696 | SDValue VecOnes = DAG.getConstant(1, DL, InVT); |
| 45697 | Operands[1] = DAG.getNode(ISD::SUB, DL, InVT, Operands[1], VecOnes); |
| 45698 | Operands[1] = DAG.getNode(ISD::TRUNCATE, DL, VT, Operands[1]); |
| 45699 | return AVGSplitter(Operands[0].getOperand(0), Operands[1]); |
| 45700 | } |
| 45701 | |
| 45702 | |
| 45703 | |
| 45704 | auto FindAddLike = [&](SDValue V, SDValue &Op0, SDValue &Op1) { |
| 45705 | if (ISD::ADD == V.getOpcode()) { |
| 45706 | Op0 = V.getOperand(0); |
| 45707 | Op1 = V.getOperand(1); |
| 45708 | return true; |
| 45709 | } |
| 45710 | if (ISD::ZERO_EXTEND != V.getOpcode()) |
| 45711 | return false; |
| 45712 | V = V.getOperand(0); |
| 45713 | if (V.getValueType() != VT || ISD::OR != V.getOpcode() || |
| 45714 | !DAG.haveNoCommonBitsSet(V.getOperand(0), V.getOperand(1))) |
| 45715 | return false; |
| 45716 | Op0 = V.getOperand(0); |
| 45717 | Op1 = V.getOperand(1); |
| 45718 | return true; |
| 45719 | }; |
| 45720 | |
| 45721 | SDValue Op0, Op1; |
| 45722 | if (FindAddLike(Operands[0], Op0, Op1)) |
| 45723 | std::swap(Operands[0], Operands[1]); |
| 45724 | else if (!FindAddLike(Operands[1], Op0, Op1)) |
| 45725 | return SDValue(); |
| 45726 | Operands[2] = Op0; |
| 45727 | Operands[1] = Op1; |
| 45728 | |
| 45729 | |
| 45730 | |
| 45731 | for (int i = 0; i < 3; ++i) { |
| 45732 | if (!IsConstVectorInRange(Operands[i], 1, 1)) |
| 45733 | continue; |
| 45734 | std::swap(Operands[i], Operands[2]); |
| 45735 | |
| 45736 | |
| 45737 | for (int j = 0; j < 2; ++j) |
| 45738 | if (Operands[j].getValueType() != VT) { |
| 45739 | if (Operands[j].getOpcode() != ISD::ZERO_EXTEND || |
| 45740 | Operands[j].getOperand(0).getValueType() != VT) |
| 45741 | return SDValue(); |
| 45742 | Operands[j] = Operands[j].getOperand(0); |
| 45743 | } |
| 45744 | |
| 45745 | |
| 45746 | return AVGSplitter(Operands[0], Operands[1]); |
| 45747 | } |
| 45748 | |
| 45749 | return SDValue(); |
| 45750 | } |
| 45751 | |
| 45752 | static SDValue combineLoad(SDNode *N, SelectionDAG &DAG, |
| 45753 | TargetLowering::DAGCombinerInfo &DCI, |
| 45754 | const X86Subtarget &Subtarget) { |
| 45755 | LoadSDNode *Ld = cast<LoadSDNode>(N); |
| 45756 | EVT RegVT = Ld->getValueType(0); |
| 45757 | EVT MemVT = Ld->getMemoryVT(); |
| 45758 | SDLoc dl(Ld); |
| 45759 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 45760 | |
| 45761 | |
| 45762 | |
| 45763 | |
| 45764 | ISD::LoadExtType Ext = Ld->getExtensionType(); |
| 45765 | bool Fast; |
| 45766 | if (RegVT.is256BitVector() && !DCI.isBeforeLegalizeOps() && |
| 45767 | Ext == ISD::NON_EXTLOAD && |
| 45768 | ((Ld->isNonTemporal() && !Subtarget.hasInt256() && |
| 45769 | Ld->getAlignment() >= 16) || |
| 45770 | (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), RegVT, |
| 45771 | *Ld->getMemOperand(), &Fast) && |
| 45772 | !Fast))) { |
| 45773 | unsigned NumElems = RegVT.getVectorNumElements(); |
| 45774 | if (NumElems < 2) |
| 45775 | return SDValue(); |
| 45776 | |
| 45777 | unsigned HalfOffset = 16; |
| 45778 | SDValue Ptr1 = Ld->getBasePtr(); |
| 45779 | SDValue Ptr2 = |
| 45780 | DAG.getMemBasePlusOffset(Ptr1, TypeSize::Fixed(HalfOffset), dl); |
| 45781 | EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), |
| 45782 | NumElems / 2); |
| 45783 | SDValue Load1 = |
| 45784 | DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr1, Ld->getPointerInfo(), |
| 45785 | Ld->getOriginalAlign(), |
| 45786 | Ld->getMemOperand()->getFlags()); |
| 45787 | SDValue Load2 = DAG.getLoad(HalfVT, dl, Ld->getChain(), Ptr2, |
| 45788 | Ld->getPointerInfo().getWithOffset(HalfOffset), |
| 45789 | Ld->getOriginalAlign(), |
| 45790 | Ld->getMemOperand()->getFlags()); |
| 45791 | SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, |
| 45792 | Load1.getValue(1), Load2.getValue(1)); |
| 45793 | |
| 45794 | SDValue NewVec = DAG.getNode(ISD::CONCAT_VECTORS, dl, RegVT, Load1, Load2); |
| 45795 | return DCI.CombineTo(N, NewVec, TF, true); |
| 45796 | } |
| 45797 | |
| 45798 | |
| 45799 | |
| 45800 | if (Ext == ISD::NON_EXTLOAD && !Subtarget.hasAVX512() && RegVT.isVector() && |
| 45801 | RegVT.getScalarType() == MVT::i1 && DCI.isBeforeLegalize()) { |
| 45802 | unsigned NumElts = RegVT.getVectorNumElements(); |
| 45803 | EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumElts); |
| 45804 | if (TLI.isTypeLegal(IntVT)) { |
| 45805 | SDValue IntLoad = DAG.getLoad(IntVT, dl, Ld->getChain(), Ld->getBasePtr(), |
| 45806 | Ld->getPointerInfo(), |
| 45807 | Ld->getOriginalAlign(), |
| 45808 | Ld->getMemOperand()->getFlags()); |
| 45809 | SDValue BoolVec = DAG.getBitcast(RegVT, IntLoad); |
| 45810 | return DCI.CombineTo(N, BoolVec, IntLoad.getValue(1), true); |
| 45811 | } |
| 45812 | } |
| 45813 | |
| 45814 | |
| 45815 | |
| 45816 | if (Ext == ISD::NON_EXTLOAD && Subtarget.hasAVX() && Ld->isSimple() && |
| 45817 | (RegVT.is128BitVector() || RegVT.is256BitVector())) { |
| 45818 | SDValue Ptr = Ld->getBasePtr(); |
| 45819 | SDValue Chain = Ld->getChain(); |
| 45820 | for (SDNode *User : Ptr->uses()) { |
| 45821 | if (User != N && User->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && |
| 45822 | cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && |
| 45823 | cast<MemIntrinsicSDNode>(User)->getChain() == Chain && |
| 45824 | cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == |
| 45825 | MemVT.getSizeInBits() && |
| 45826 | !User->hasAnyUseOfValue(1) && |
| 45827 | User->getValueSizeInBits(0).getFixedSize() > |
| 45828 | RegVT.getFixedSizeInBits()) { |
| 45829 | SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), |
| 45830 | RegVT.getSizeInBits()); |
| 45831 | Extract = DAG.getBitcast(RegVT, Extract); |
| 45832 | return DCI.CombineTo(N, Extract, SDValue(User, 1)); |
| 45833 | } |
| 45834 | } |
| 45835 | } |
| 45836 | |
| 45837 | |
| 45838 | unsigned AddrSpace = Ld->getAddressSpace(); |
| 45839 | if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR || |
| 45840 | AddrSpace == X86AS::PTR32_UPTR) { |
| 45841 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
| 45842 | if (PtrVT != Ld->getBasePtr().getSimpleValueType()) { |
| 45843 | SDValue Cast = |
| 45844 | DAG.getAddrSpaceCast(dl, PtrVT, Ld->getBasePtr(), AddrSpace, 0); |
| 45845 | return DAG.getLoad(RegVT, dl, Ld->getChain(), Cast, Ld->getPointerInfo(), |
| 45846 | Ld->getOriginalAlign(), |
| 45847 | Ld->getMemOperand()->getFlags()); |
| 45848 | } |
| 45849 | } |
| 45850 | |
| 45851 | return SDValue(); |
| 45852 | } |
| 45853 | |
| 45854 | |
| 45855 | |
| 45856 | |
| 45857 | static int getOneTrueElt(SDValue V) { |
| 45858 | |
| 45859 | |
| 45860 | |
| 45861 | |
| 45862 | |
| 45863 | |
| 45864 | |
| 45865 | auto *BV = dyn_cast<BuildVectorSDNode>(V); |
| 45866 | if (!BV || BV->getValueType(0).getVectorElementType() != MVT::i1) |
| 45867 | return -1; |
| 45868 | |
| 45869 | int TrueIndex = -1; |
| 45870 | unsigned NumElts = BV->getValueType(0).getVectorNumElements(); |
| 45871 | for (unsigned i = 0; i < NumElts; ++i) { |
| 45872 | const SDValue &Op = BV->getOperand(i); |
| 45873 | if (Op.isUndef()) |
| 45874 | continue; |
| 45875 | auto *ConstNode = dyn_cast<ConstantSDNode>(Op); |
| 45876 | if (!ConstNode) |
| 45877 | return -1; |
| 45878 | if (ConstNode->getAPIntValue().countTrailingOnes() >= 1) { |
| 45879 | |
| 45880 | if (TrueIndex >= 0) |
| 45881 | return -1; |
| 45882 | TrueIndex = i; |
| 45883 | } |
| 45884 | } |
| 45885 | return TrueIndex; |
| 45886 | } |
| 45887 | |
| 45888 | |
| 45889 | |
| 45890 | |
| 45891 | |
| 45892 | static bool getParamsForOneTrueMaskedElt(MaskedLoadStoreSDNode *MaskedOp, |
| 45893 | SelectionDAG &DAG, SDValue &Addr, |
| 45894 | SDValue &Index, Align &Alignment, |
| 45895 | unsigned &Offset) { |
| 45896 | int TrueMaskElt = getOneTrueElt(MaskedOp->getMask()); |
| 45897 | if (TrueMaskElt < 0) |
| 45898 | return false; |
| 45899 | |
| 45900 | |
| 45901 | |
| 45902 | EVT EltVT = MaskedOp->getMemoryVT().getVectorElementType(); |
| 45903 | Offset = 0; |
| 45904 | Addr = MaskedOp->getBasePtr(); |
| 45905 | if (TrueMaskElt != 0) { |
| 45906 | Offset = TrueMaskElt * EltVT.getStoreSize(); |
| 45907 | Addr = DAG.getMemBasePlusOffset(Addr, TypeSize::Fixed(Offset), |
| 45908 | SDLoc(MaskedOp)); |
| 45909 | } |
| 45910 | |
| 45911 | Index = DAG.getIntPtrConstant(TrueMaskElt, SDLoc(MaskedOp)); |
| 45912 | Alignment = commonAlignment(MaskedOp->getOriginalAlign(), |
| 45913 | EltVT.getStoreSize()); |
| 45914 | return true; |
| 45915 | } |
| 45916 | |
| 45917 | |
| 45918 | |
| 45919 | |
| 45920 | |
| 45921 | static SDValue |
| 45922 | reduceMaskedLoadToScalarLoad(MaskedLoadSDNode *ML, SelectionDAG &DAG, |
| 45923 | TargetLowering::DAGCombinerInfo &DCI, |
| 45924 | const X86Subtarget &Subtarget) { |
| 45925 | assert(ML->isUnindexed() && "Unexpected indexed masked load!"); |
| 45926 | |
| 45927 | |
| 45928 | |
| 45929 | |
| 45930 | SDValue Addr, VecIndex; |
| 45931 | Align Alignment; |
| 45932 | unsigned Offset; |
| 45933 | if (!getParamsForOneTrueMaskedElt(ML, DAG, Addr, VecIndex, Alignment, Offset)) |
| 45934 | return SDValue(); |
| 45935 | |
| 45936 | |
| 45937 | |
| 45938 | SDLoc DL(ML); |
| 45939 | EVT VT = ML->getValueType(0); |
| 45940 | EVT EltVT = VT.getVectorElementType(); |
| 45941 | |
| 45942 | EVT CastVT = VT; |
| 45943 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
| 45944 | EltVT = MVT::f64; |
| 45945 | CastVT = VT.changeVectorElementType(EltVT); |
| 45946 | } |
| 45947 | |
| 45948 | SDValue Load = |
| 45949 | DAG.getLoad(EltVT, DL, ML->getChain(), Addr, |
| 45950 | ML->getPointerInfo().getWithOffset(Offset), |
| 45951 | Alignment, ML->getMemOperand()->getFlags()); |
| 45952 | |
| 45953 | SDValue PassThru = DAG.getBitcast(CastVT, ML->getPassThru()); |
| 45954 | |
| 45955 | |
| 45956 | SDValue Insert = |
| 45957 | DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, CastVT, PassThru, Load, VecIndex); |
| 45958 | Insert = DAG.getBitcast(VT, Insert); |
| 45959 | return DCI.CombineTo(ML, Insert, Load.getValue(1), true); |
| 45960 | } |
| 45961 | |
| 45962 | static SDValue |
| 45963 | combineMaskedLoadConstantMask(MaskedLoadSDNode *ML, SelectionDAG &DAG, |
| 45964 | TargetLowering::DAGCombinerInfo &DCI) { |
| 45965 | assert(ML->isUnindexed() && "Unexpected indexed masked load!"); |
| 45966 | if (!ISD::isBuildVectorOfConstantSDNodes(ML->getMask().getNode())) |
| 45967 | return SDValue(); |
| 45968 | |
| 45969 | SDLoc DL(ML); |
| 45970 | EVT VT = ML->getValueType(0); |
| 45971 | |
| 45972 | |
| 45973 | |
| 45974 | |
| 45975 | unsigned NumElts = VT.getVectorNumElements(); |
| 45976 | BuildVectorSDNode *MaskBV = cast<BuildVectorSDNode>(ML->getMask()); |
| 45977 | bool LoadFirstElt = !isNullConstant(MaskBV->getOperand(0)); |
| 45978 | bool LoadLastElt = !isNullConstant(MaskBV->getOperand(NumElts - 1)); |
| 45979 | if (LoadFirstElt && LoadLastElt) { |
| 45980 | SDValue VecLd = DAG.getLoad(VT, DL, ML->getChain(), ML->getBasePtr(), |
| 45981 | ML->getMemOperand()); |
| 45982 | SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), VecLd, |
| 45983 | ML->getPassThru()); |
| 45984 | return DCI.CombineTo(ML, Blend, VecLd.getValue(1), true); |
| 45985 | } |
| 45986 | |
| 45987 | |
| 45988 | |
| 45989 | |
| 45990 | |
| 45991 | |
| 45992 | |
| 45993 | if (ML->getPassThru().isUndef()) |
| 45994 | return SDValue(); |
| 45995 | |
| 45996 | if (ISD::isBuildVectorAllZeros(ML->getPassThru().getNode())) |
| 45997 | return SDValue(); |
| 45998 | |
| 45999 | |
| 46000 | |
| 46001 | SDValue NewML = DAG.getMaskedLoad( |
| 46002 | VT, DL, ML->getChain(), ML->getBasePtr(), ML->getOffset(), ML->getMask(), |
| 46003 | DAG.getUNDEF(VT), ML->getMemoryVT(), ML->getMemOperand(), |
| 46004 | ML->getAddressingMode(), ML->getExtensionType()); |
| 46005 | SDValue Blend = DAG.getSelect(DL, VT, ML->getMask(), NewML, |
| 46006 | ML->getPassThru()); |
| 46007 | |
| 46008 | return DCI.CombineTo(ML, Blend, NewML.getValue(1), true); |
| 46009 | } |
| 46010 | |
| 46011 | static SDValue combineMaskedLoad(SDNode *N, SelectionDAG &DAG, |
| 46012 | TargetLowering::DAGCombinerInfo &DCI, |
| 46013 | const X86Subtarget &Subtarget) { |
| 46014 | auto *Mld = cast<MaskedLoadSDNode>(N); |
| 46015 | |
| 46016 | |
| 46017 | if (Mld->isExpandingLoad()) |
| 46018 | return SDValue(); |
| 46019 | |
| 46020 | if (Mld->getExtensionType() == ISD::NON_EXTLOAD) { |
| 46021 | if (SDValue ScalarLoad = |
| 46022 | reduceMaskedLoadToScalarLoad(Mld, DAG, DCI, Subtarget)) |
| 46023 | return ScalarLoad; |
| 46024 | |
| 46025 | |
| 46026 | if (!Subtarget.hasAVX512()) |
| 46027 | if (SDValue Blend = combineMaskedLoadConstantMask(Mld, DAG, DCI)) |
| 46028 | return Blend; |
| 46029 | } |
| 46030 | |
| 46031 | |
| 46032 | |
| 46033 | SDValue Mask = Mld->getMask(); |
| 46034 | if (Mask.getScalarValueSizeInBits() != 1) { |
| 46035 | EVT VT = Mld->getValueType(0); |
| 46036 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 46037 | APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); |
| 46038 | if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { |
| 46039 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 46040 | DCI.AddToWorklist(N); |
| 46041 | return SDValue(N, 0); |
| 46042 | } |
| 46043 | if (SDValue NewMask = |
| 46044 | TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG)) |
| 46045 | return DAG.getMaskedLoad( |
| 46046 | VT, SDLoc(N), Mld->getChain(), Mld->getBasePtr(), Mld->getOffset(), |
| 46047 | NewMask, Mld->getPassThru(), Mld->getMemoryVT(), Mld->getMemOperand(), |
| 46048 | Mld->getAddressingMode(), Mld->getExtensionType()); |
| 46049 | } |
| 46050 | |
| 46051 | return SDValue(); |
| 46052 | } |
| 46053 | |
| 46054 | |
| 46055 | |
| 46056 | |
| 46057 | |
| 46058 | static SDValue reduceMaskedStoreToScalarStore(MaskedStoreSDNode *MS, |
| 46059 | SelectionDAG &DAG, |
| 46060 | const X86Subtarget &Subtarget) { |
| 46061 | |
| 46062 | |
| 46063 | |
| 46064 | |
| 46065 | SDValue Addr, VecIndex; |
| 46066 | Align Alignment; |
| 46067 | unsigned Offset; |
| 46068 | if (!getParamsForOneTrueMaskedElt(MS, DAG, Addr, VecIndex, Alignment, Offset)) |
| 46069 | return SDValue(); |
| 46070 | |
| 46071 | |
| 46072 | SDLoc DL(MS); |
| 46073 | SDValue Value = MS->getValue(); |
| 46074 | EVT VT = Value.getValueType(); |
| 46075 | EVT EltVT = VT.getVectorElementType(); |
| 46076 | if (EltVT == MVT::i64 && !Subtarget.is64Bit()) { |
| 46077 | EltVT = MVT::f64; |
| 46078 | EVT CastVT = VT.changeVectorElementType(EltVT); |
| 46079 | Value = DAG.getBitcast(CastVT, Value); |
| 46080 | } |
| 46081 | SDValue Extract = |
| 46082 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Value, VecIndex); |
| 46083 | |
| 46084 | |
| 46085 | return DAG.getStore(MS->getChain(), DL, Extract, Addr, |
| 46086 | MS->getPointerInfo().getWithOffset(Offset), |
| 46087 | Alignment, MS->getMemOperand()->getFlags()); |
| 46088 | } |
| 46089 | |
| 46090 | static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, |
| 46091 | TargetLowering::DAGCombinerInfo &DCI, |
| 46092 | const X86Subtarget &Subtarget) { |
| 46093 | MaskedStoreSDNode *Mst = cast<MaskedStoreSDNode>(N); |
| 46094 | if (Mst->isCompressingStore()) |
| 46095 | return SDValue(); |
| 46096 | |
| 46097 | EVT VT = Mst->getValue().getValueType(); |
| 46098 | SDLoc dl(Mst); |
| 46099 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 46100 | |
| 46101 | if (Mst->isTruncatingStore()) |
| 46102 | return SDValue(); |
| 46103 | |
| 46104 | if (SDValue ScalarStore = reduceMaskedStoreToScalarStore(Mst, DAG, Subtarget)) |
| 46105 | return ScalarStore; |
| 46106 | |
| 46107 | |
| 46108 | |
| 46109 | SDValue Mask = Mst->getMask(); |
| 46110 | if (Mask.getScalarValueSizeInBits() != 1) { |
| 46111 | APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); |
| 46112 | if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) { |
| 46113 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 46114 | DCI.AddToWorklist(N); |
| 46115 | return SDValue(N, 0); |
| 46116 | } |
| 46117 | if (SDValue NewMask = |
| 46118 | TLI.SimplifyMultipleUseDemandedBits(Mask, DemandedBits, DAG)) |
| 46119 | return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(), |
| 46120 | Mst->getBasePtr(), Mst->getOffset(), NewMask, |
| 46121 | Mst->getMemoryVT(), Mst->getMemOperand(), |
| 46122 | Mst->getAddressingMode()); |
| 46123 | } |
| 46124 | |
| 46125 | SDValue Value = Mst->getValue(); |
| 46126 | if (Value.getOpcode() == ISD::TRUNCATE && Value.getNode()->hasOneUse() && |
| 46127 | TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), |
| 46128 | Mst->getMemoryVT())) { |
| 46129 | return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Value.getOperand(0), |
| 46130 | Mst->getBasePtr(), Mst->getOffset(), Mask, |
| 46131 | Mst->getMemoryVT(), Mst->getMemOperand(), |
| 46132 | Mst->getAddressingMode(), true); |
| 46133 | } |
| 46134 | |
| 46135 | return SDValue(); |
| 46136 | } |
| 46137 | |
| 46138 | static SDValue combineStore(SDNode *N, SelectionDAG &DAG, |
| 46139 | TargetLowering::DAGCombinerInfo &DCI, |
| 46140 | const X86Subtarget &Subtarget) { |
| 46141 | StoreSDNode *St = cast<StoreSDNode>(N); |
| 46142 | EVT StVT = St->getMemoryVT(); |
| 46143 | SDLoc dl(St); |
| 46144 | SDValue StoredVal = St->getValue(); |
| 46145 | EVT VT = StoredVal.getValueType(); |
| 46146 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 46147 | |
| 46148 | |
| 46149 | if (!Subtarget.hasAVX512() && VT == StVT && VT.isVector() && |
| 46150 | VT.getVectorElementType() == MVT::i1) { |
| 46151 | |
| 46152 | EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), VT.getVectorNumElements()); |
| 46153 | StoredVal = DAG.getBitcast(NewVT, StoredVal); |
| 46154 | |
| 46155 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
| 46156 | St->getPointerInfo(), St->getOriginalAlign(), |
| 46157 | St->getMemOperand()->getFlags()); |
| 46158 | } |
| 46159 | |
| 46160 | |
| 46161 | |
| 46162 | if (VT == MVT::v1i1 && VT == StVT && Subtarget.hasAVX512() && |
| 46163 | StoredVal.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 46164 | StoredVal.getOperand(0).getValueType() == MVT::i8) { |
| 46165 | SDValue Val = StoredVal.getOperand(0); |
| 46166 | |
| 46167 | Val = DAG.getZeroExtendInReg(Val, dl, MVT::i1); |
| 46168 | return DAG.getStore(St->getChain(), dl, Val, |
| 46169 | St->getBasePtr(), St->getPointerInfo(), |
| 46170 | St->getOriginalAlign(), |
| 46171 | St->getMemOperand()->getFlags()); |
| 46172 | } |
| 46173 | |
| 46174 | |
| 46175 | if ((VT == MVT::v1i1 || VT == MVT::v2i1 || VT == MVT::v4i1) && VT == StVT && |
| 46176 | Subtarget.hasAVX512()) { |
| 46177 | unsigned NumConcats = 8 / VT.getVectorNumElements(); |
| 46178 | |
| 46179 | SmallVector<SDValue, 4> Ops(NumConcats, DAG.getConstant(0, dl, VT)); |
| 46180 | Ops[0] = StoredVal; |
| 46181 | StoredVal = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i1, Ops); |
| 46182 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
| 46183 | St->getPointerInfo(), St->getOriginalAlign(), |
| 46184 | St->getMemOperand()->getFlags()); |
| 46185 | } |
| 46186 | |
| 46187 | |
| 46188 | if ((VT == MVT::v8i1 || VT == MVT::v16i1 || VT == MVT::v32i1 || |
| 46189 | VT == MVT::v64i1) && VT == StVT && TLI.isTypeLegal(VT) && |
| 46190 | ISD::isBuildVectorOfConstantSDNodes(StoredVal.getNode())) { |
| 46191 | |
| 46192 | if (!DCI.isBeforeLegalize() && VT == MVT::v64i1 && !Subtarget.is64Bit()) { |
| 46193 | SDValue Lo = DAG.getBuildVector(MVT::v32i1, dl, |
| 46194 | StoredVal->ops().slice(0, 32)); |
| 46195 | Lo = combinevXi1ConstantToInteger(Lo, DAG); |
| 46196 | SDValue Hi = DAG.getBuildVector(MVT::v32i1, dl, |
| 46197 | StoredVal->ops().slice(32, 32)); |
| 46198 | Hi = combinevXi1ConstantToInteger(Hi, DAG); |
| 46199 | |
| 46200 | SDValue Ptr0 = St->getBasePtr(); |
| 46201 | SDValue Ptr1 = DAG.getMemBasePlusOffset(Ptr0, TypeSize::Fixed(4), dl); |
| 46202 | |
| 46203 | SDValue Ch0 = |
| 46204 | DAG.getStore(St->getChain(), dl, Lo, Ptr0, St->getPointerInfo(), |
| 46205 | St->getOriginalAlign(), |
| 46206 | St->getMemOperand()->getFlags()); |
| 46207 | SDValue Ch1 = |
| 46208 | DAG.getStore(St->getChain(), dl, Hi, Ptr1, |
| 46209 | St->getPointerInfo().getWithOffset(4), |
| 46210 | St->getOriginalAlign(), |
| 46211 | St->getMemOperand()->getFlags()); |
| 46212 | return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Ch0, Ch1); |
| 46213 | } |
| 46214 | |
| 46215 | StoredVal = combinevXi1ConstantToInteger(StoredVal, DAG); |
| 46216 | return DAG.getStore(St->getChain(), dl, StoredVal, St->getBasePtr(), |
| 46217 | St->getPointerInfo(), St->getOriginalAlign(), |
| 46218 | St->getMemOperand()->getFlags()); |
| 46219 | } |
| 46220 | |
| 46221 | |
| 46222 | |
| 46223 | bool Fast; |
| 46224 | if (VT.is256BitVector() && StVT == VT && |
| 46225 | TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
| 46226 | *St->getMemOperand(), &Fast) && |
| 46227 | !Fast) { |
| 46228 | unsigned NumElems = VT.getVectorNumElements(); |
| 46229 | if (NumElems < 2) |
| 46230 | return SDValue(); |
| 46231 | |
| 46232 | return splitVectorStore(St, DAG); |
| 46233 | } |
| 46234 | |
| 46235 | |
| 46236 | if (St->isNonTemporal() && StVT == VT && |
| 46237 | St->getAlignment() < VT.getStoreSize()) { |
| 46238 | |
| 46239 | |
| 46240 | if (VT.is256BitVector() || VT.is512BitVector()) { |
| 46241 | unsigned NumElems = VT.getVectorNumElements(); |
| 46242 | if (NumElems < 2) |
| 46243 | return SDValue(); |
| 46244 | return splitVectorStore(St, DAG); |
| 46245 | } |
| 46246 | |
| 46247 | |
| 46248 | |
| 46249 | if (VT.is128BitVector() && Subtarget.hasSSE2()) { |
| 46250 | MVT NTVT = Subtarget.hasSSE4A() |
| 46251 | ? MVT::v2f64 |
| 46252 | : (TLI.isTypeLegal(MVT::i64) ? MVT::v2i64 : MVT::v4i32); |
| 46253 | return scalarizeVectorStore(St, NTVT, DAG); |
| 46254 | } |
| 46255 | } |
| 46256 | |
| 46257 | |
| 46258 | |
| 46259 | if (!St->isTruncatingStore() && VT == MVT::v16i8 && !Subtarget.hasBWI() && |
| 46260 | St->getValue().getOpcode() == ISD::TRUNCATE && |
| 46261 | St->getValue().getOperand(0).getValueType() == MVT::v16i16 && |
| 46262 | TLI.isTruncStoreLegal(MVT::v16i32, MVT::v16i8) && |
| 46263 | St->getValue().hasOneUse() && !DCI.isBeforeLegalizeOps()) { |
| 46264 | SDValue Ext = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::v16i32, St->getValue()); |
| 46265 | return DAG.getTruncStore(St->getChain(), dl, Ext, St->getBasePtr(), |
| 46266 | MVT::v16i8, St->getMemOperand()); |
| 46267 | } |
| 46268 | |
| 46269 | |
| 46270 | if (!St->isTruncatingStore() && StoredVal.hasOneUse() && |
| 46271 | (StoredVal.getOpcode() == X86ISD::VTRUNCUS || |
| 46272 | StoredVal.getOpcode() == X86ISD::VTRUNCS) && |
| 46273 | TLI.isTruncStoreLegal(StoredVal.getOperand(0).getValueType(), VT)) { |
| 46274 | bool IsSigned = StoredVal.getOpcode() == X86ISD::VTRUNCS; |
| 46275 | return EmitTruncSStore(IsSigned, St->getChain(), |
| 46276 | dl, StoredVal.getOperand(0), St->getBasePtr(), |
| 46277 | VT, St->getMemOperand(), DAG); |
| 46278 | } |
| 46279 | |
| 46280 | |
| 46281 | if (!St->isTruncatingStore() && StoredVal.hasOneUse()) { |
| 46282 | auto IsExtractedElement = [](SDValue V) { |
| 46283 | if (V.getOpcode() == ISD::TRUNCATE && V.getOperand(0).hasOneUse()) |
| 46284 | V = V.getOperand(0); |
| 46285 | unsigned Opc = V.getOpcode(); |
| 46286 | if (Opc == ISD::EXTRACT_VECTOR_ELT || Opc == X86ISD::PEXTRW) { |
| 46287 | if (V.getOperand(0).hasOneUse() && isNullConstant(V.getOperand(1))) |
| 46288 | return V.getOperand(0); |
| 46289 | } |
| 46290 | return SDValue(); |
| 46291 | }; |
| 46292 | if (SDValue Extract = IsExtractedElement(StoredVal)) { |
| 46293 | SDValue Trunc = peekThroughOneUseBitcasts(Extract); |
| 46294 | if (Trunc.getOpcode() == X86ISD::VTRUNC) { |
| 46295 | SDValue Src = Trunc.getOperand(0); |
| 46296 | MVT DstVT = Trunc.getSimpleValueType(); |
| 46297 | MVT SrcVT = Src.getSimpleValueType(); |
| 46298 | unsigned NumSrcElts = SrcVT.getVectorNumElements(); |
| 46299 | unsigned NumTruncBits = DstVT.getScalarSizeInBits() * NumSrcElts; |
| 46300 | MVT TruncVT = MVT::getVectorVT(DstVT.getScalarType(), NumSrcElts); |
| 46301 | if (NumTruncBits == VT.getSizeInBits() && |
| 46302 | TLI.isTruncStoreLegal(SrcVT, TruncVT)) { |
| 46303 | return DAG.getTruncStore(St->getChain(), dl, Src, St->getBasePtr(), |
| 46304 | TruncVT, St->getMemOperand()); |
| 46305 | } |
| 46306 | } |
| 46307 | } |
| 46308 | } |
| 46309 | |
| 46310 | |
| 46311 | |
| 46312 | |
| 46313 | if (St->isTruncatingStore() && VT.isVector()) { |
| 46314 | |
| 46315 | |
| 46316 | |
| 46317 | if (DCI.isBeforeLegalize() || TLI.isTypeLegal(St->getMemoryVT())) |
| 46318 | if (SDValue Avg = detectAVGPattern(St->getValue(), St->getMemoryVT(), DAG, |
| 46319 | Subtarget, dl)) |
| 46320 | return DAG.getStore(St->getChain(), dl, Avg, St->getBasePtr(), |
| 46321 | St->getPointerInfo(), St->getOriginalAlign(), |
| 46322 | St->getMemOperand()->getFlags()); |
| 46323 | |
| 46324 | if (TLI.isTruncStoreLegal(VT, StVT)) { |
| 46325 | if (SDValue Val = detectSSatPattern(St->getValue(), St->getMemoryVT())) |
| 46326 | return EmitTruncSStore(true , St->getChain(), |
| 46327 | dl, Val, St->getBasePtr(), |
| 46328 | St->getMemoryVT(), St->getMemOperand(), DAG); |
| 46329 | if (SDValue Val = detectUSatPattern(St->getValue(), St->getMemoryVT(), |
| 46330 | DAG, dl)) |
| 46331 | return EmitTruncSStore(false , St->getChain(), |
| 46332 | dl, Val, St->getBasePtr(), |
| 46333 | St->getMemoryVT(), St->getMemOperand(), DAG); |
| 46334 | } |
| 46335 | |
| 46336 | return SDValue(); |
| 46337 | } |
| 46338 | |
| 46339 | |
| 46340 | unsigned AddrSpace = St->getAddressSpace(); |
| 46341 | if (AddrSpace == X86AS::PTR64 || AddrSpace == X86AS::PTR32_SPTR || |
| 46342 | AddrSpace == X86AS::PTR32_UPTR) { |
| 46343 | MVT PtrVT = TLI.getPointerTy(DAG.getDataLayout()); |
| 46344 | if (PtrVT != St->getBasePtr().getSimpleValueType()) { |
| 46345 | SDValue Cast = |
| 46346 | DAG.getAddrSpaceCast(dl, PtrVT, St->getBasePtr(), AddrSpace, 0); |
| 46347 | return DAG.getStore(St->getChain(), dl, StoredVal, Cast, |
| 46348 | St->getPointerInfo(), St->getOriginalAlign(), |
| 46349 | St->getMemOperand()->getFlags(), St->getAAInfo()); |
| 46350 | } |
| 46351 | } |
| 46352 | |
| 46353 | |
| 46354 | |
| 46355 | |
| 46356 | |
| 46357 | |
| 46358 | |
| 46359 | if (VT.getSizeInBits() != 64) |
| 46360 | return SDValue(); |
| 46361 | |
| 46362 | const Function &F = DAG.getMachineFunction().getFunction(); |
| 46363 | bool NoImplicitFloatOps = F.hasFnAttribute(Attribute::NoImplicitFloat); |
| 46364 | bool F64IsLegal = |
| 46365 | !Subtarget.useSoftFloat() && !NoImplicitFloatOps && Subtarget.hasSSE2(); |
| 46366 | if ((VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit()) && |
| 46367 | isa<LoadSDNode>(St->getValue()) && |
| 46368 | cast<LoadSDNode>(St->getValue())->isSimple() && |
| 46369 | St->getChain().hasOneUse() && St->isSimple()) { |
| 46370 | LoadSDNode *Ld = cast<LoadSDNode>(St->getValue().getNode()); |
| 46371 | |
| 46372 | if (!ISD::isNormalLoad(Ld)) |
| 46373 | return SDValue(); |
| 46374 | |
| 46375 | |
| 46376 | if (!Ld->hasNUsesOfValue(1, 0)) |
| 46377 | return SDValue(); |
| 46378 | |
| 46379 | SDLoc LdDL(Ld); |
| 46380 | SDLoc StDL(N); |
| 46381 | |
| 46382 | SDValue NewLd = DAG.getLoad(MVT::f64, LdDL, Ld->getChain(), |
| 46383 | Ld->getBasePtr(), Ld->getMemOperand()); |
| 46384 | |
| 46385 | |
| 46386 | DAG.makeEquivalentMemoryOrdering(Ld, NewLd); |
| 46387 | return DAG.getStore(St->getChain(), StDL, NewLd, St->getBasePtr(), |
| 46388 | St->getMemOperand()); |
| 46389 | } |
| 46390 | |
| 46391 | |
| 46392 | |
| 46393 | |
| 46394 | |
| 46395 | |
| 46396 | |
| 46397 | if (VT == MVT::i64 && F64IsLegal && !Subtarget.is64Bit() && |
| 46398 | St->getOperand(1).getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
| 46399 | SDValue OldExtract = St->getOperand(1); |
| 46400 | SDValue ExtOp0 = OldExtract.getOperand(0); |
| 46401 | unsigned VecSize = ExtOp0.getValueSizeInBits(); |
| 46402 | EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VecSize / 64); |
| 46403 | SDValue BitCast = DAG.getBitcast(VecVT, ExtOp0); |
| 46404 | SDValue NewExtract = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, |
| 46405 | BitCast, OldExtract.getOperand(1)); |
| 46406 | return DAG.getStore(St->getChain(), dl, NewExtract, St->getBasePtr(), |
| 46407 | St->getPointerInfo(), St->getOriginalAlign(), |
| 46408 | St->getMemOperand()->getFlags()); |
| 46409 | } |
| 46410 | |
| 46411 | return SDValue(); |
| 46412 | } |
| 46413 | |
| 46414 | static SDValue combineVEXTRACT_STORE(SDNode *N, SelectionDAG &DAG, |
| 46415 | TargetLowering::DAGCombinerInfo &DCI, |
| 46416 | const X86Subtarget &Subtarget) { |
| 46417 | auto *St = cast<MemIntrinsicSDNode>(N); |
| 46418 | |
| 46419 | SDValue StoredVal = N->getOperand(1); |
| 46420 | MVT VT = StoredVal.getSimpleValueType(); |
| 46421 | EVT MemVT = St->getMemoryVT(); |
| 46422 | |
| 46423 | |
| 46424 | unsigned StElts = MemVT.getSizeInBits() / VT.getScalarSizeInBits(); |
| 46425 | APInt DemandedElts = APInt::getLowBitsSet(VT.getVectorNumElements(), StElts); |
| 46426 | |
| 46427 | APInt KnownUndef, KnownZero; |
| 46428 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 46429 | if (TLI.SimplifyDemandedVectorElts(StoredVal, DemandedElts, KnownUndef, |
| 46430 | KnownZero, DCI)) { |
| 46431 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 46432 | DCI.AddToWorklist(N); |
| 46433 | return SDValue(N, 0); |
| 46434 | } |
| 46435 | |
| 46436 | return SDValue(); |
| 46437 | } |
| 46438 | |
| 46439 | |
| 46440 | |
| 46441 | |
| 46442 | |
| 46443 | |
| 46444 | |
| 46445 | |
| 46446 | |
| 46447 | |
| 46448 | |
| 46449 | |
| 46450 | |
| 46451 | |
| 46452 | static bool isHorizontalBinOp(unsigned HOpcode, SDValue &LHS, SDValue &RHS, |
| 46453 | SelectionDAG &DAG, const X86Subtarget &Subtarget, |
| 46454 | bool IsCommutative, |
| 46455 | SmallVectorImpl<int> &PostShuffleMask) { |
| 46456 | |
| 46457 | if (LHS.isUndef() || RHS.isUndef()) |
| 46458 | return false; |
| 46459 | |
| 46460 | |
| 46461 | |
| 46462 | |
| 46463 | |
| 46464 | |
| 46465 | |
| 46466 | |
| 46467 | |
| 46468 | |
| 46469 | MVT VT = LHS.getSimpleValueType(); |
| 46470 | assert((VT.is128BitVector() || VT.is256BitVector()) && |
| 46471 | "Unsupported vector type for horizontal add/sub"); |
| 46472 | unsigned NumElts = VT.getVectorNumElements(); |
| 46473 | |
| 46474 | auto GetShuffle = [&](SDValue Op, SDValue &N0, SDValue &N1, |
| 46475 | SmallVectorImpl<int> &ShuffleMask) { |
| 46476 | bool UseSubVector = false; |
| 46477 | if (Op.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 46478 | Op.getOperand(0).getValueType().is256BitVector() && |
| 46479 | llvm::isNullConstant(Op.getOperand(1))) { |
| 46480 | Op = Op.getOperand(0); |
| 46481 | UseSubVector = true; |
| 46482 | } |
| 46483 | SmallVector<SDValue, 2> SrcOps; |
| 46484 | SmallVector<int, 16> SrcMask, ScaledMask; |
| 46485 | SDValue BC = peekThroughBitcasts(Op); |
| 46486 | if (getTargetShuffleInputs(BC, SrcOps, SrcMask, DAG) && |
| 46487 | !isAnyZero(SrcMask) && all_of(SrcOps, [BC](SDValue Op) { |
| 46488 | return Op.getValueSizeInBits() == BC.getValueSizeInBits(); |
| 46489 | })) { |
| 46490 | resolveTargetShuffleInputsAndMask(SrcOps, SrcMask); |
| 46491 | if (!UseSubVector && SrcOps.size() <= 2 && |
| 46492 | scaleShuffleElements(SrcMask, NumElts, ScaledMask)) { |
| 46493 | N0 = SrcOps.size() > 0 ? SrcOps[0] : SDValue(); |
| 46494 | N1 = SrcOps.size() > 1 ? SrcOps[1] : SDValue(); |
| 46495 | ShuffleMask.assign(ScaledMask.begin(), ScaledMask.end()); |
| 46496 | } |
| 46497 | if (UseSubVector && SrcOps.size() == 1 && |
| 46498 | scaleShuffleElements(SrcMask, 2 * NumElts, ScaledMask)) { |
| 46499 | std::tie(N0, N1) = DAG.SplitVector(SrcOps[0], SDLoc(Op)); |
| 46500 | ArrayRef<int> Mask = ArrayRef<int>(ScaledMask).slice(0, NumElts); |
| 46501 | ShuffleMask.assign(Mask.begin(), Mask.end()); |
| 46502 | } |
| 46503 | } |
| 46504 | }; |
| 46505 | |
| 46506 | |
| 46507 | |
| 46508 | |
| 46509 | |
| 46510 | |
| 46511 | SDValue A, B; |
| 46512 | SmallVector<int, 16> LMask; |
| 46513 | GetShuffle(LHS, A, B, LMask); |
| 46514 | |
| 46515 | |
| 46516 | |
| 46517 | SDValue C, D; |
| 46518 | SmallVector<int, 16> RMask; |
| 46519 | GetShuffle(RHS, C, D, RMask); |
| 46520 | |
| 46521 | |
| 46522 | unsigned NumShuffles = (LMask.empty() ? 0 : 1) + (RMask.empty() ? 0 : 1); |
| 46523 | if (NumShuffles == 0) |
| 46524 | return false; |
| 46525 | |
| 46526 | if (LMask.empty()) { |
| 46527 | A = LHS; |
| 46528 | for (unsigned i = 0; i != NumElts; ++i) |
| 46529 | LMask.push_back(i); |
| 46530 | } |
| 46531 | |
| 46532 | if (RMask.empty()) { |
| 46533 | C = RHS; |
| 46534 | for (unsigned i = 0; i != NumElts; ++i) |
| 46535 | RMask.push_back(i); |
| 46536 | } |
| 46537 | |
| 46538 | |
| 46539 | if (isUndefOrInRange(LMask, 0, NumElts)) |
| 46540 | B = SDValue(); |
| 46541 | else if (isUndefOrInRange(LMask, NumElts, NumElts * 2)) |
| 46542 | A = SDValue(); |
| 46543 | |
| 46544 | if (isUndefOrInRange(RMask, 0, NumElts)) |
| 46545 | D = SDValue(); |
| 46546 | else if (isUndefOrInRange(RMask, NumElts, NumElts * 2)) |
| 46547 | C = SDValue(); |
| 46548 | |
| 46549 | |
| 46550 | |
| 46551 | if (A != C) { |
| 46552 | std::swap(C, D); |
| 46553 | ShuffleVectorSDNode::commuteMask(RMask); |
| 46554 | } |
| 46555 | |
| 46556 | if (!(A == C && B == D)) |
| 46557 | return false; |
| 46558 | |
| 46559 | PostShuffleMask.clear(); |
| 46560 | PostShuffleMask.append(NumElts, SM_SentinelUndef); |
| 46561 | |
| 46562 | |
| 46563 | |
| 46564 | |
| 46565 | |
| 46566 | |
| 46567 | |
| 46568 | unsigned Num128BitChunks = VT.getSizeInBits() / 128; |
| 46569 | unsigned NumEltsPer128BitChunk = NumElts / Num128BitChunks; |
| 46570 | unsigned NumEltsPer64BitChunk = NumEltsPer128BitChunk / 2; |
| 46571 | assert((NumEltsPer128BitChunk % 2 == 0) && |
| 46572 | "Vector type should have an even number of elements in each lane"); |
| 46573 | for (unsigned j = 0; j != NumElts; j += NumEltsPer128BitChunk) { |
| 46574 | for (unsigned i = 0; i != NumEltsPer128BitChunk; ++i) { |
| 46575 | |
| 46576 | int LIdx = LMask[i + j], RIdx = RMask[i + j]; |
| 46577 | if (LIdx < 0 || RIdx < 0 || |
| 46578 | (!A.getNode() && (LIdx < (int)NumElts || RIdx < (int)NumElts)) || |
| 46579 | (!B.getNode() && (LIdx >= (int)NumElts || RIdx >= (int)NumElts))) |
| 46580 | continue; |
| 46581 | |
| 46582 | |
| 46583 | |
| 46584 | if (!((RIdx & 1) == 1 && (LIdx + 1) == RIdx) && |
| 46585 | !((LIdx & 1) == 1 && (RIdx + 1) == LIdx && IsCommutative)) |
| 46586 | return false; |
| 46587 | |
| 46588 | |
| 46589 | |
| 46590 | int Base = LIdx & ~1u; |
| 46591 | int Index = ((Base % NumEltsPer128BitChunk) / 2) + |
| 46592 | ((Base % NumElts) & ~(NumEltsPer128BitChunk - 1)); |
| 46593 | |
| 46594 | |
| 46595 | |
| 46596 | |
| 46597 | if ((B && Base >= (int)NumElts) || (!B && i >= NumEltsPer64BitChunk)) |
| 46598 | Index += NumEltsPer64BitChunk; |
| 46599 | PostShuffleMask[i + j] = Index; |
| 46600 | } |
| 46601 | } |
| 46602 | |
| 46603 | SDValue NewLHS = A.getNode() ? A : B; |
| 46604 | SDValue NewRHS = B.getNode() ? B : A; |
| 46605 | |
| 46606 | bool IsIdentityPostShuffle = |
| 46607 | isSequentialOrUndefInRange(PostShuffleMask, 0, NumElts, 0); |
| 46608 | if (IsIdentityPostShuffle) |
| 46609 | PostShuffleMask.clear(); |
| 46610 | |
| 46611 | |
| 46612 | if (!IsIdentityPostShuffle && !Subtarget.hasAVX2() && VT.isFloatingPoint() && |
| 46613 | isMultiLaneShuffleMask(128, VT.getScalarSizeInBits(), PostShuffleMask)) |
| 46614 | return false; |
| 46615 | |
| 46616 | |
| 46617 | |
| 46618 | bool FoundHorizLHS = llvm::any_of(NewLHS->uses(), [&](SDNode *User) { |
| 46619 | return User->getOpcode() == HOpcode && User->getValueType(0) == VT; |
| 46620 | }); |
| 46621 | bool FoundHorizRHS = llvm::any_of(NewRHS->uses(), [&](SDNode *User) { |
| 46622 | return User->getOpcode() == HOpcode && User->getValueType(0) == VT; |
| 46623 | }); |
| 46624 | bool ForceHorizOp = FoundHorizLHS && FoundHorizRHS; |
| 46625 | |
| 46626 | |
| 46627 | |
| 46628 | if (!ForceHorizOp && |
| 46629 | !shouldUseHorizontalOp(NewLHS == NewRHS && |
| 46630 | (NumShuffles < 2 || !IsIdentityPostShuffle), |
| 46631 | DAG, Subtarget)) |
| 46632 | return false; |
| 46633 | |
| 46634 | LHS = DAG.getBitcast(VT, NewLHS); |
| 46635 | RHS = DAG.getBitcast(VT, NewRHS); |
| 46636 | return true; |
| 46637 | } |
| 46638 | |
| 46639 | |
| 46640 | static SDValue combineToHorizontalAddSub(SDNode *N, SelectionDAG &DAG, |
| 46641 | const X86Subtarget &Subtarget) { |
| 46642 | EVT VT = N->getValueType(0); |
| 46643 | unsigned Opcode = N->getOpcode(); |
| 46644 | bool IsAdd = (Opcode == ISD::FADD) || (Opcode == ISD::ADD); |
| 46645 | SmallVector<int, 8> PostShuffleMask; |
| 46646 | |
| 46647 | switch (Opcode) { |
| 46648 | case ISD::FADD: |
| 46649 | case ISD::FSUB: |
| 46650 | if ((Subtarget.hasSSE3() && (VT == MVT::v4f32 || VT == MVT::v2f64)) || |
| 46651 | (Subtarget.hasAVX() && (VT == MVT::v8f32 || VT == MVT::v4f64))) { |
| 46652 | SDValue LHS = N->getOperand(0); |
| 46653 | SDValue RHS = N->getOperand(1); |
| 46654 | auto HorizOpcode = IsAdd ? X86ISD::FHADD : X86ISD::FHSUB; |
| 46655 | if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, |
| 46656 | PostShuffleMask)) { |
| 46657 | SDValue HorizBinOp = DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS); |
| 46658 | if (!PostShuffleMask.empty()) |
| 46659 | HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, |
| 46660 | DAG.getUNDEF(VT), PostShuffleMask); |
| 46661 | return HorizBinOp; |
| 46662 | } |
| 46663 | } |
| 46664 | break; |
| 46665 | case ISD::ADD: |
| 46666 | case ISD::SUB: |
| 46667 | if (Subtarget.hasSSSE3() && (VT == MVT::v8i16 || VT == MVT::v4i32 || |
| 46668 | VT == MVT::v16i16 || VT == MVT::v8i32)) { |
| 46669 | SDValue LHS = N->getOperand(0); |
| 46670 | SDValue RHS = N->getOperand(1); |
| 46671 | auto HorizOpcode = IsAdd ? X86ISD::HADD : X86ISD::HSUB; |
| 46672 | if (isHorizontalBinOp(HorizOpcode, LHS, RHS, DAG, Subtarget, IsAdd, |
| 46673 | PostShuffleMask)) { |
| 46674 | auto HOpBuilder = [HorizOpcode](SelectionDAG &DAG, const SDLoc &DL, |
| 46675 | ArrayRef<SDValue> Ops) { |
| 46676 | return DAG.getNode(HorizOpcode, DL, Ops[0].getValueType(), Ops); |
| 46677 | }; |
| 46678 | SDValue HorizBinOp = SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, |
| 46679 | {LHS, RHS}, HOpBuilder); |
| 46680 | if (!PostShuffleMask.empty()) |
| 46681 | HorizBinOp = DAG.getVectorShuffle(VT, SDLoc(HorizBinOp), HorizBinOp, |
| 46682 | DAG.getUNDEF(VT), PostShuffleMask); |
| 46683 | return HorizBinOp; |
| 46684 | } |
| 46685 | } |
| 46686 | break; |
| 46687 | } |
| 46688 | |
| 46689 | return SDValue(); |
| 46690 | } |
| 46691 | |
| 46692 | |
| 46693 | static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG, |
| 46694 | const X86Subtarget &Subtarget) { |
| 46695 | if (SDValue HOp = combineToHorizontalAddSub(N, DAG, Subtarget)) |
| 46696 | return HOp; |
| 46697 | return SDValue(); |
| 46698 | } |
| 46699 | |
| 46700 | |
| 46701 | |
| 46702 | |
| 46703 | |
| 46704 | |
| 46705 | static SDValue combineTruncatedArithmetic(SDNode *N, SelectionDAG &DAG, |
| 46706 | const X86Subtarget &Subtarget, |
| 46707 | const SDLoc &DL) { |
| 46708 | assert(N->getOpcode() == ISD::TRUNCATE && "Wrong opcode"); |
| 46709 | SDValue Src = N->getOperand(0); |
| 46710 | unsigned SrcOpcode = Src.getOpcode(); |
| 46711 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 46712 | |
| 46713 | EVT VT = N->getValueType(0); |
| 46714 | EVT SrcVT = Src.getValueType(); |
| 46715 | |
| 46716 | auto IsFreeTruncation = [VT](SDValue Op) { |
| 46717 | unsigned TruncSizeInBits = VT.getScalarSizeInBits(); |
| 46718 | |
| 46719 | |
| 46720 | |
| 46721 | unsigned Opcode = Op.getOpcode(); |
| 46722 | if ((Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND || |
| 46723 | Opcode == ISD::ZERO_EXTEND) && |
| 46724 | Op.getOperand(0).getScalarValueSizeInBits() <= TruncSizeInBits) |
| 46725 | return true; |
| 46726 | |
| 46727 | |
| 46728 | |
| 46729 | |
| 46730 | |
| 46731 | |
| 46732 | return ISD::isBuildVectorOfConstantSDNodes(Op.getNode()); |
| 46733 | }; |
| 46734 | |
| 46735 | auto TruncateArithmetic = [&](SDValue N0, SDValue N1) { |
| 46736 | SDValue Trunc0 = DAG.getNode(ISD::TRUNCATE, DL, VT, N0); |
| 46737 | SDValue Trunc1 = DAG.getNode(ISD::TRUNCATE, DL, VT, N1); |
| 46738 | return DAG.getNode(SrcOpcode, DL, VT, Trunc0, Trunc1); |
| 46739 | }; |
| 46740 | |
| 46741 | |
| 46742 | if (!Src.hasOneUse()) |
| 46743 | return SDValue(); |
| 46744 | |
| 46745 | |
| 46746 | |
| 46747 | if (!VT.isVector()) |
| 46748 | return SDValue(); |
| 46749 | |
| 46750 | |
| 46751 | |
| 46752 | |
| 46753 | switch (SrcOpcode) { |
| 46754 | case ISD::MUL: |
| 46755 | |
| 46756 | |
| 46757 | if (SrcVT.getScalarType() == MVT::i64 && |
| 46758 | TLI.isOperationLegal(SrcOpcode, VT) && |
| 46759 | !TLI.isOperationLegal(SrcOpcode, SrcVT)) |
| 46760 | return TruncateArithmetic(Src.getOperand(0), Src.getOperand(1)); |
| 46761 | LLVM_FALLTHROUGH; |
| 46762 | case ISD::AND: |
| 46763 | case ISD::XOR: |
| 46764 | case ISD::OR: |
| 46765 | case ISD::ADD: |
| 46766 | case ISD::SUB: { |
| 46767 | SDValue Op0 = Src.getOperand(0); |
| 46768 | SDValue Op1 = Src.getOperand(1); |
| 46769 | if (TLI.isOperationLegal(SrcOpcode, VT) && |
| 46770 | (Op0 == Op1 || IsFreeTruncation(Op0) || IsFreeTruncation(Op1))) |
| 46771 | return TruncateArithmetic(Op0, Op1); |
| 46772 | break; |
| 46773 | } |
| 46774 | } |
| 46775 | |
| 46776 | return SDValue(); |
| 46777 | } |
| 46778 | |
| 46779 | |
| 46780 | |
| 46781 | |
| 46782 | |
| 46783 | static SDValue combineVectorTruncationWithPACKUS(SDNode *N, const SDLoc &DL, |
| 46784 | const X86Subtarget &Subtarget, |
| 46785 | SelectionDAG &DAG) { |
| 46786 | SDValue In = N->getOperand(0); |
| 46787 | EVT InVT = In.getValueType(); |
| 46788 | EVT OutVT = N->getValueType(0); |
| 46789 | |
| 46790 | APInt Mask = APInt::getLowBitsSet(InVT.getScalarSizeInBits(), |
| 46791 | OutVT.getScalarSizeInBits()); |
| 46792 | In = DAG.getNode(ISD::AND, DL, InVT, In, DAG.getConstant(Mask, DL, InVT)); |
| 46793 | return truncateVectorWithPACK(X86ISD::PACKUS, OutVT, In, DL, DAG, Subtarget); |
| 46794 | } |
| 46795 | |
| 46796 | |
| 46797 | static SDValue combineVectorTruncationWithPACKSS(SDNode *N, const SDLoc &DL, |
| 46798 | const X86Subtarget &Subtarget, |
| 46799 | SelectionDAG &DAG) { |
| 46800 | SDValue In = N->getOperand(0); |
| 46801 | EVT InVT = In.getValueType(); |
| 46802 | EVT OutVT = N->getValueType(0); |
| 46803 | In = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, InVT, In, |
| 46804 | DAG.getValueType(OutVT)); |
| 46805 | return truncateVectorWithPACK(X86ISD::PACKSS, OutVT, In, DL, DAG, Subtarget); |
| 46806 | } |
| 46807 | |
| 46808 | |
| 46809 | |
| 46810 | |
| 46811 | |
| 46812 | |
| 46813 | static SDValue combineVectorTruncation(SDNode *N, SelectionDAG &DAG, |
| 46814 | const X86Subtarget &Subtarget) { |
| 46815 | EVT OutVT = N->getValueType(0); |
| 46816 | if (!OutVT.isVector()) |
| 46817 | return SDValue(); |
| 46818 | |
| 46819 | SDValue In = N->getOperand(0); |
| 46820 | if (!In.getValueType().isSimple()) |
| 46821 | return SDValue(); |
| 46822 | |
| 46823 | EVT InVT = In.getValueType(); |
| 46824 | unsigned NumElems = OutVT.getVectorNumElements(); |
| 46825 | |
| 46826 | |
| 46827 | if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) |
| 46828 | return SDValue(); |
| 46829 | |
| 46830 | EVT OutSVT = OutVT.getVectorElementType(); |
| 46831 | EVT InSVT = InVT.getVectorElementType(); |
| 46832 | if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) && |
| 46833 | (OutSVT == MVT::i8 || OutSVT == MVT::i16) && isPowerOf2_32(NumElems) && |
| 46834 | NumElems >= 8)) |
| 46835 | return SDValue(); |
| 46836 | |
| 46837 | |
| 46838 | if (Subtarget.hasSSSE3() && NumElems == 8 && InSVT != MVT::i64) |
| 46839 | return SDValue(); |
| 46840 | |
| 46841 | SDLoc DL(N); |
| 46842 | |
| 46843 | |
| 46844 | |
| 46845 | if (Subtarget.hasSSE41() || OutSVT == MVT::i8) |
| 46846 | return combineVectorTruncationWithPACKUS(N, DL, Subtarget, DAG); |
| 46847 | if (InSVT == MVT::i32) |
| 46848 | return combineVectorTruncationWithPACKSS(N, DL, Subtarget, DAG); |
| 46849 | |
| 46850 | return SDValue(); |
| 46851 | } |
| 46852 | |
| 46853 | |
| 46854 | |
| 46855 | |
| 46856 | static SDValue combineVectorSignBitsTruncation(SDNode *N, const SDLoc &DL, |
| 46857 | SelectionDAG &DAG, |
| 46858 | const X86Subtarget &Subtarget) { |
| 46859 | |
| 46860 | if (!Subtarget.hasSSE2()) |
| 46861 | return SDValue(); |
| 46862 | |
| 46863 | if (!N->getValueType(0).isVector() || !N->getValueType(0).isSimple()) |
| 46864 | return SDValue(); |
| 46865 | |
| 46866 | SDValue In = N->getOperand(0); |
| 46867 | if (!In.getValueType().isSimple()) |
| 46868 | return SDValue(); |
| 46869 | |
| 46870 | MVT VT = N->getValueType(0).getSimpleVT(); |
| 46871 | MVT SVT = VT.getScalarType(); |
| 46872 | |
| 46873 | MVT InVT = In.getValueType().getSimpleVT(); |
| 46874 | MVT InSVT = InVT.getScalarType(); |
| 46875 | |
| 46876 | |
| 46877 | if (!isPowerOf2_32(VT.getVectorNumElements())) |
| 46878 | return SDValue(); |
| 46879 | if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32) |
| 46880 | return SDValue(); |
| 46881 | if (InSVT != MVT::i16 && InSVT != MVT::i32 && InSVT != MVT::i64) |
| 46882 | return SDValue(); |
| 46883 | |
| 46884 | |
| 46885 | if (SVT == MVT::i32 && VT.getSizeInBits() < 128) |
| 46886 | return SDValue(); |
| 46887 | |
| 46888 | |
| 46889 | |
| 46890 | if (Subtarget.hasAVX512() && |
| 46891 | !(!Subtarget.useAVX512Regs() && VT.is256BitVector() && |
| 46892 | InVT.is512BitVector())) { |
| 46893 | |
| 46894 | |
| 46895 | SmallVector<SDValue> ConcatOps; |
| 46896 | if (VT.getSizeInBits() > 128 || !collectConcatOps(In.getNode(), ConcatOps)) |
| 46897 | return SDValue(); |
| 46898 | } |
| 46899 | |
| 46900 | unsigned NumPackedSignBits = std::min<unsigned>(SVT.getSizeInBits(), 16); |
| 46901 | unsigned NumPackedZeroBits = Subtarget.hasSSE41() ? NumPackedSignBits : 8; |
| 46902 | |
| 46903 | |
| 46904 | |
| 46905 | KnownBits Known = DAG.computeKnownBits(In); |
| 46906 | unsigned NumLeadingZeroBits = Known.countMinLeadingZeros(); |
| 46907 | if (NumLeadingZeroBits >= (InSVT.getSizeInBits() - NumPackedZeroBits)) |
| 46908 | return truncateVectorWithPACK(X86ISD::PACKUS, VT, In, DL, DAG, Subtarget); |
| 46909 | |
| 46910 | |
| 46911 | |
| 46912 | unsigned NumSignBits = DAG.ComputeNumSignBits(In); |
| 46913 | |
| 46914 | |
| 46915 | |
| 46916 | |
| 46917 | if (SVT == MVT::i32 && NumSignBits != InSVT.getSizeInBits()) |
| 46918 | return SDValue(); |
| 46919 | |
| 46920 | unsigned MinSignBits = InSVT.getSizeInBits() - NumPackedSignBits; |
| 46921 | if (NumSignBits > MinSignBits) |
| 46922 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, In, DL, DAG, Subtarget); |
| 46923 | |
| 46924 | |
| 46925 | |
| 46926 | |
| 46927 | if (In.getOpcode() == ISD::SRL && N->isOnlyUserOf(In.getNode())) |
| 46928 | if (const APInt *ShAmt = DAG.getValidShiftAmountConstant( |
| 46929 | In, APInt::getAllOnesValue(VT.getVectorNumElements()))) { |
| 46930 | if (*ShAmt == MinSignBits) { |
| 46931 | SDValue NewIn = DAG.getNode(ISD::SRA, DL, InVT, In->ops()); |
| 46932 | return truncateVectorWithPACK(X86ISD::PACKSS, VT, NewIn, DL, DAG, |
| 46933 | Subtarget); |
| 46934 | } |
| 46935 | } |
| 46936 | |
| 46937 | return SDValue(); |
| 46938 | } |
| 46939 | |
| 46940 | |
| 46941 | |
| 46942 | |
| 46943 | |
| 46944 | |
| 46945 | |
| 46946 | |
| 46947 | static SDValue combinePMULH(SDValue Src, EVT VT, const SDLoc &DL, |
| 46948 | SelectionDAG &DAG, const X86Subtarget &Subtarget) { |
| 46949 | |
| 46950 | if (Src.getOpcode() != ISD::SRL || |
| 46951 | Src.getOperand(0).getOpcode() != ISD::MUL) |
| 46952 | return SDValue(); |
| 46953 | |
| 46954 | if (!Subtarget.hasSSE2()) |
| 46955 | return SDValue(); |
| 46956 | |
| 46957 | |
| 46958 | |
| 46959 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i16) |
| 46960 | return SDValue(); |
| 46961 | |
| 46962 | |
| 46963 | EVT InVT = Src.getValueType(); |
| 46964 | if (InVT.getVectorElementType().getSizeInBits() < 32) |
| 46965 | return SDValue(); |
| 46966 | |
| 46967 | |
| 46968 | APInt ShiftAmt; |
| 46969 | if (!ISD::isConstantSplatVector(Src.getOperand(1).getNode(), ShiftAmt) || |
| 46970 | ShiftAmt != 16) |
| 46971 | return SDValue(); |
| 46972 | |
| 46973 | SDValue LHS = Src.getOperand(0).getOperand(0); |
| 46974 | SDValue RHS = Src.getOperand(0).getOperand(1); |
| 46975 | |
| 46976 | unsigned ExtOpc = LHS.getOpcode(); |
| 46977 | if ((ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND) || |
| 46978 | RHS.getOpcode() != ExtOpc) |
| 46979 | return SDValue(); |
| 46980 | |
| 46981 | |
| 46982 | LHS = LHS.getOperand(0); |
| 46983 | RHS = RHS.getOperand(0); |
| 46984 | |
| 46985 | |
| 46986 | if (LHS.getValueType() != VT || RHS.getValueType() != VT) |
| 46987 | return SDValue(); |
| 46988 | |
| 46989 | unsigned Opc = ExtOpc == ISD::SIGN_EXTEND ? ISD::MULHS : ISD::MULHU; |
| 46990 | return DAG.getNode(Opc, DL, VT, LHS, RHS); |
| 46991 | } |
| 46992 | |
| 46993 | |
| 46994 | |
| 46995 | |
| 46996 | |
| 46997 | |
| 46998 | |
| 46999 | |
| 47000 | |
| 47001 | static SDValue detectPMADDUBSW(SDValue In, EVT VT, SelectionDAG &DAG, |
| 47002 | const X86Subtarget &Subtarget, |
| 47003 | const SDLoc &DL) { |
| 47004 | if (!VT.isVector() || !Subtarget.hasSSSE3()) |
| 47005 | return SDValue(); |
| 47006 | |
| 47007 | unsigned NumElems = VT.getVectorNumElements(); |
| 47008 | EVT ScalarVT = VT.getVectorElementType(); |
| 47009 | if (ScalarVT != MVT::i16 || NumElems < 8 || !isPowerOf2_32(NumElems)) |
| 47010 | return SDValue(); |
| 47011 | |
| 47012 | SDValue SSatVal = detectSSatPattern(In, VT); |
| 47013 | if (!SSatVal || SSatVal.getOpcode() != ISD::ADD) |
| 47014 | return SDValue(); |
| 47015 | |
| 47016 | |
| 47017 | |
| 47018 | SDValue N0 = SSatVal.getOperand(0); |
| 47019 | SDValue N1 = SSatVal.getOperand(1); |
| 47020 | |
| 47021 | if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) |
| 47022 | return SDValue(); |
| 47023 | |
| 47024 | SDValue N00 = N0.getOperand(0); |
| 47025 | SDValue N01 = N0.getOperand(1); |
| 47026 | SDValue N10 = N1.getOperand(0); |
| 47027 | SDValue N11 = N1.getOperand(1); |
| 47028 | |
| 47029 | |
| 47030 | |
| 47031 | if (N01.getOpcode() == ISD::ZERO_EXTEND) |
| 47032 | std::swap(N00, N01); |
| 47033 | if (N11.getOpcode() == ISD::ZERO_EXTEND) |
| 47034 | std::swap(N10, N11); |
| 47035 | |
| 47036 | |
| 47037 | if (N00.getOpcode() != ISD::ZERO_EXTEND || |
| 47038 | N01.getOpcode() != ISD::SIGN_EXTEND || |
| 47039 | N10.getOpcode() != ISD::ZERO_EXTEND || |
| 47040 | N11.getOpcode() != ISD::SIGN_EXTEND) |
| 47041 | return SDValue(); |
| 47042 | |
| 47043 | |
| 47044 | N00 = N00.getOperand(0); |
| 47045 | N01 = N01.getOperand(0); |
| 47046 | N10 = N10.getOperand(0); |
| 47047 | N11 = N11.getOperand(0); |
| 47048 | |
| 47049 | |
| 47050 | if (N00.getValueType().getVectorElementType() != MVT::i8 || |
| 47051 | N01.getValueType().getVectorElementType() != MVT::i8 || |
| 47052 | N10.getValueType().getVectorElementType() != MVT::i8 || |
| 47053 | N11.getValueType().getVectorElementType() != MVT::i8) |
| 47054 | return SDValue(); |
| 47055 | |
| 47056 | |
| 47057 | if (N00.getOpcode() != ISD::BUILD_VECTOR || |
| 47058 | N01.getOpcode() != ISD::BUILD_VECTOR || |
| 47059 | N10.getOpcode() != ISD::BUILD_VECTOR || |
| 47060 | N11.getOpcode() != ISD::BUILD_VECTOR) |
| 47061 | return SDValue(); |
| 47062 | |
| 47063 | |
| 47064 | |
| 47065 | |
| 47066 | |
| 47067 | |
| 47068 | |
| 47069 | |
| 47070 | |
| 47071 | SDValue ZExtIn, SExtIn; |
| 47072 | for (unsigned i = 0; i != NumElems; ++i) { |
| 47073 | SDValue N00Elt = N00.getOperand(i); |
| 47074 | SDValue N01Elt = N01.getOperand(i); |
| 47075 | SDValue N10Elt = N10.getOperand(i); |
| 47076 | SDValue N11Elt = N11.getOperand(i); |
| 47077 | |
| 47078 | if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 47079 | N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 47080 | N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 47081 | N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
| 47082 | return SDValue(); |
| 47083 | auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); |
| 47084 | auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); |
| 47085 | auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); |
| 47086 | auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); |
| 47087 | if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) |
| 47088 | return SDValue(); |
| 47089 | unsigned IdxN00 = ConstN00Elt->getZExtValue(); |
| 47090 | unsigned IdxN01 = ConstN01Elt->getZExtValue(); |
| 47091 | unsigned IdxN10 = ConstN10Elt->getZExtValue(); |
| 47092 | unsigned IdxN11 = ConstN11Elt->getZExtValue(); |
| 47093 | |
| 47094 | if (IdxN00 > IdxN10) { |
| 47095 | std::swap(IdxN00, IdxN10); |
| 47096 | std::swap(IdxN01, IdxN11); |
| 47097 | } |
| 47098 | |
| 47099 | if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || |
| 47100 | IdxN01 != 2 * i || IdxN11 != 2 * i + 1) |
| 47101 | return SDValue(); |
| 47102 | SDValue N00In = N00Elt.getOperand(0); |
| 47103 | SDValue N01In = N01Elt.getOperand(0); |
| 47104 | SDValue N10In = N10Elt.getOperand(0); |
| 47105 | SDValue N11In = N11Elt.getOperand(0); |
| 47106 | |
| 47107 | if (!ZExtIn) { |
| 47108 | ZExtIn = N00In; |
| 47109 | SExtIn = N01In; |
| 47110 | } |
| 47111 | if (ZExtIn != N00In || SExtIn != N01In || |
| 47112 | ZExtIn != N10In || SExtIn != N11In) |
| 47113 | return SDValue(); |
| 47114 | } |
| 47115 | |
| 47116 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 47117 | ArrayRef<SDValue> Ops) { |
| 47118 | |
| 47119 | |
| 47120 | EVT InVT = Ops[0].getValueType(); |
| 47121 | assert(InVT.getScalarType() == MVT::i8 && |
| 47122 | "Unexpected scalar element type"); |
| 47123 | assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); |
| 47124 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
| 47125 | InVT.getVectorNumElements() / 2); |
| 47126 | return DAG.getNode(X86ISD::VPMADDUBSW, DL, ResVT, Ops[0], Ops[1]); |
| 47127 | }; |
| 47128 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { ZExtIn, SExtIn }, |
| 47129 | PMADDBuilder); |
| 47130 | } |
| 47131 | |
| 47132 | static SDValue combineTruncate(SDNode *N, SelectionDAG &DAG, |
| 47133 | const X86Subtarget &Subtarget) { |
| 47134 | EVT VT = N->getValueType(0); |
| 47135 | SDValue Src = N->getOperand(0); |
| 47136 | SDLoc DL(N); |
| 47137 | |
| 47138 | |
| 47139 | if (SDValue V = combineTruncatedArithmetic(N, DAG, Subtarget, DL)) |
| 47140 | return V; |
| 47141 | |
| 47142 | |
| 47143 | if (SDValue Avg = detectAVGPattern(Src, VT, DAG, Subtarget, DL)) |
| 47144 | return Avg; |
| 47145 | |
| 47146 | |
| 47147 | if (SDValue PMAdd = detectPMADDUBSW(Src, VT, DAG, Subtarget, DL)) |
| 47148 | return PMAdd; |
| 47149 | |
| 47150 | |
| 47151 | if (SDValue Val = combineTruncateWithSat(Src, VT, DL, DAG, Subtarget)) |
| 47152 | return Val; |
| 47153 | |
| 47154 | |
| 47155 | if (SDValue V = combinePMULH(Src, VT, DL, DAG, Subtarget)) |
| 47156 | return V; |
| 47157 | |
| 47158 | |
| 47159 | |
| 47160 | if (Src.getOpcode() == ISD::BITCAST && VT == MVT::i32) { |
| 47161 | SDValue BCSrc = Src.getOperand(0); |
| 47162 | if (BCSrc.getValueType() == MVT::x86mmx) |
| 47163 | return DAG.getNode(X86ISD::MMX_MOVD2W, DL, MVT::i32, BCSrc); |
| 47164 | } |
| 47165 | |
| 47166 | |
| 47167 | if (SDValue V = combineVectorSignBitsTruncation(N, DL, DAG, Subtarget)) |
| 47168 | return V; |
| 47169 | |
| 47170 | return combineVectorTruncation(N, DAG, Subtarget); |
| 47171 | } |
| 47172 | |
| 47173 | static SDValue combineVTRUNC(SDNode *N, SelectionDAG &DAG, |
| 47174 | TargetLowering::DAGCombinerInfo &DCI) { |
| 47175 | EVT VT = N->getValueType(0); |
| 47176 | SDValue In = N->getOperand(0); |
| 47177 | SDLoc DL(N); |
| 47178 | |
| 47179 | if (auto SSatVal = detectSSatPattern(In, VT)) |
| 47180 | return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); |
| 47181 | if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) |
| 47182 | return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); |
| 47183 | |
| 47184 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47185 | APInt DemandedMask(APInt::getAllOnesValue(VT.getScalarSizeInBits())); |
| 47186 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
| 47187 | return SDValue(N, 0); |
| 47188 | |
| 47189 | return SDValue(); |
| 47190 | } |
| 47191 | |
| 47192 | |
| 47193 | |
| 47194 | |
| 47195 | |
| 47196 | |
| 47197 | |
| 47198 | |
| 47199 | |
| 47200 | |
| 47201 | static SDValue isFNEG(SelectionDAG &DAG, SDNode *N, unsigned Depth = 0) { |
| 47202 | if (N->getOpcode() == ISD::FNEG) |
| 47203 | return N->getOperand(0); |
| 47204 | |
| 47205 | |
| 47206 | if (Depth > SelectionDAG::MaxRecursionDepth) |
| 47207 | return SDValue(); |
| 47208 | |
| 47209 | unsigned ScalarSize = N->getValueType(0).getScalarSizeInBits(); |
| 47210 | |
| 47211 | SDValue Op = peekThroughBitcasts(SDValue(N, 0)); |
| 47212 | EVT VT = Op->getValueType(0); |
| 47213 | |
| 47214 | |
| 47215 | if (VT.getScalarSizeInBits() != ScalarSize) |
| 47216 | return SDValue(); |
| 47217 | |
| 47218 | unsigned Opc = Op.getOpcode(); |
| 47219 | switch (Opc) { |
| 47220 | case ISD::VECTOR_SHUFFLE: { |
| 47221 | |
| 47222 | |
| 47223 | if (!Op.getOperand(1).isUndef()) |
| 47224 | return SDValue(); |
| 47225 | if (SDValue NegOp0 = isFNEG(DAG, Op.getOperand(0).getNode(), Depth + 1)) |
| 47226 | if (NegOp0.getValueType() == VT) |
| 47227 | return DAG.getVectorShuffle(VT, SDLoc(Op), NegOp0, DAG.getUNDEF(VT), |
| 47228 | cast<ShuffleVectorSDNode>(Op)->getMask()); |
| 47229 | break; |
| 47230 | } |
| 47231 | case ISD::INSERT_VECTOR_ELT: { |
| 47232 | |
| 47233 | |
| 47234 | SDValue InsVector = Op.getOperand(0); |
| 47235 | SDValue InsVal = Op.getOperand(1); |
| 47236 | if (!InsVector.isUndef()) |
| 47237 | return SDValue(); |
| 47238 | if (SDValue NegInsVal = isFNEG(DAG, InsVal.getNode(), Depth + 1)) |
| 47239 | if (NegInsVal.getValueType() == VT.getVectorElementType()) |
| 47240 | return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(Op), VT, InsVector, |
| 47241 | NegInsVal, Op.getOperand(2)); |
| 47242 | break; |
| 47243 | } |
| 47244 | case ISD::FSUB: |
| 47245 | case ISD::XOR: |
| 47246 | case X86ISD::FXOR: { |
| 47247 | SDValue Op1 = Op.getOperand(1); |
| 47248 | SDValue Op0 = Op.getOperand(0); |
| 47249 | |
| 47250 | |
| 47251 | |
| 47252 | |
| 47253 | |
| 47254 | if (Opc == ISD::FSUB) |
| 47255 | std::swap(Op0, Op1); |
| 47256 | |
| 47257 | APInt UndefElts; |
| 47258 | SmallVector<APInt, 16> EltBits; |
| 47259 | |
| 47260 | |
| 47261 | if (getTargetConstantBitsFromNode(Op1, ScalarSize, UndefElts, EltBits, |
| 47262 | true, |
| 47263 | false)) { |
| 47264 | for (unsigned I = 0, E = EltBits.size(); I < E; I++) |
| 47265 | if (!UndefElts[I] && !EltBits[I].isSignMask()) |
| 47266 | return SDValue(); |
| 47267 | |
| 47268 | return peekThroughBitcasts(Op0); |
| 47269 | } |
| 47270 | } |
| 47271 | } |
| 47272 | |
| 47273 | return SDValue(); |
| 47274 | } |
| 47275 | |
| 47276 | static unsigned negateFMAOpcode(unsigned Opcode, bool NegMul, bool NegAcc, |
| 47277 | bool NegRes) { |
| 47278 | if (NegMul) { |
| 47279 | switch (Opcode) { |
| 47280 | default: llvm_unreachable("Unexpected opcode"); |
| 47281 | case ISD::FMA: Opcode = X86ISD::FNMADD; break; |
| 47282 | case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FNMADD; break; |
| 47283 | case X86ISD::FMADD_RND: Opcode = X86ISD::FNMADD_RND; break; |
| 47284 | case X86ISD::FMSUB: Opcode = X86ISD::FNMSUB; break; |
| 47285 | case X86ISD::STRICT_FMSUB: Opcode = X86ISD::STRICT_FNMSUB; break; |
| 47286 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMSUB_RND; break; |
| 47287 | case X86ISD::FNMADD: Opcode = ISD::FMA; break; |
| 47288 | case X86ISD::STRICT_FNMADD: Opcode = ISD::STRICT_FMA; break; |
| 47289 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FMADD_RND; break; |
| 47290 | case X86ISD::FNMSUB: Opcode = X86ISD::FMSUB; break; |
| 47291 | case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FMSUB; break; |
| 47292 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMSUB_RND; break; |
| 47293 | } |
| 47294 | } |
| 47295 | |
| 47296 | if (NegAcc) { |
| 47297 | switch (Opcode) { |
| 47298 | default: llvm_unreachable("Unexpected opcode"); |
| 47299 | case ISD::FMA: Opcode = X86ISD::FMSUB; break; |
| 47300 | case ISD::STRICT_FMA: Opcode = X86ISD::STRICT_FMSUB; break; |
| 47301 | case X86ISD::FMADD_RND: Opcode = X86ISD::FMSUB_RND; break; |
| 47302 | case X86ISD::FMSUB: Opcode = ISD::FMA; break; |
| 47303 | case X86ISD::STRICT_FMSUB: Opcode = ISD::STRICT_FMA; break; |
| 47304 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FMADD_RND; break; |
| 47305 | case X86ISD::FNMADD: Opcode = X86ISD::FNMSUB; break; |
| 47306 | case X86ISD::STRICT_FNMADD: Opcode = X86ISD::STRICT_FNMSUB; break; |
| 47307 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; |
| 47308 | case X86ISD::FNMSUB: Opcode = X86ISD::FNMADD; break; |
| 47309 | case X86ISD::STRICT_FNMSUB: Opcode = X86ISD::STRICT_FNMADD; break; |
| 47310 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; |
| 47311 | case X86ISD::FMADDSUB: Opcode = X86ISD::FMSUBADD; break; |
| 47312 | case X86ISD::FMADDSUB_RND: Opcode = X86ISD::FMSUBADD_RND; break; |
| 47313 | case X86ISD::FMSUBADD: Opcode = X86ISD::FMADDSUB; break; |
| 47314 | case X86ISD::FMSUBADD_RND: Opcode = X86ISD::FMADDSUB_RND; break; |
| 47315 | } |
| 47316 | } |
| 47317 | |
| 47318 | if (NegRes) { |
| 47319 | switch (Opcode) { |
| 47320 | |
| 47321 | default: llvm_unreachable("Unexpected opcode"); |
| 47322 | case ISD::FMA: Opcode = X86ISD::FNMSUB; break; |
| 47323 | case X86ISD::FMADD_RND: Opcode = X86ISD::FNMSUB_RND; break; |
| 47324 | case X86ISD::FMSUB: Opcode = X86ISD::FNMADD; break; |
| 47325 | case X86ISD::FMSUB_RND: Opcode = X86ISD::FNMADD_RND; break; |
| 47326 | case X86ISD::FNMADD: Opcode = X86ISD::FMSUB; break; |
| 47327 | case X86ISD::FNMADD_RND: Opcode = X86ISD::FMSUB_RND; break; |
| 47328 | case X86ISD::FNMSUB: Opcode = ISD::FMA; break; |
| 47329 | case X86ISD::FNMSUB_RND: Opcode = X86ISD::FMADD_RND; break; |
| 47330 | } |
| 47331 | } |
| 47332 | |
| 47333 | return Opcode; |
| 47334 | } |
| 47335 | |
| 47336 | |
| 47337 | static SDValue combineFneg(SDNode *N, SelectionDAG &DAG, |
| 47338 | TargetLowering::DAGCombinerInfo &DCI, |
| 47339 | const X86Subtarget &Subtarget) { |
| 47340 | EVT OrigVT = N->getValueType(0); |
| 47341 | SDValue Arg = isFNEG(DAG, N); |
| 47342 | if (!Arg) |
| 47343 | return SDValue(); |
| 47344 | |
| 47345 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47346 | EVT VT = Arg.getValueType(); |
| 47347 | EVT SVT = VT.getScalarType(); |
| 47348 | SDLoc DL(N); |
| 47349 | |
| 47350 | |
| 47351 | if (!TLI.isTypeLegal(VT)) |
| 47352 | return SDValue(); |
| 47353 | |
| 47354 | |
| 47355 | |
| 47356 | |
| 47357 | if (Arg.getOpcode() == ISD::FMUL && (SVT == MVT::f32 || SVT == MVT::f64) && |
| 47358 | Arg->getFlags().hasNoSignedZeros() && Subtarget.hasAnyFMA()) { |
| 47359 | SDValue Zero = DAG.getConstantFP(0.0, DL, VT); |
| 47360 | SDValue NewNode = DAG.getNode(X86ISD::FNMSUB, DL, VT, Arg.getOperand(0), |
| 47361 | Arg.getOperand(1), Zero); |
| 47362 | return DAG.getBitcast(OrigVT, NewNode); |
| 47363 | } |
| 47364 | |
| 47365 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
| 47366 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
| 47367 | if (SDValue NegArg = |
| 47368 | TLI.getNegatedExpression(Arg, DAG, LegalOperations, CodeSize)) |
| 47369 | return DAG.getBitcast(OrigVT, NegArg); |
| 47370 | |
| 47371 | return SDValue(); |
| 47372 | } |
| 47373 | |
| 47374 | SDValue X86TargetLowering::getNegatedExpression(SDValue Op, SelectionDAG &DAG, |
| 47375 | bool LegalOperations, |
| 47376 | bool ForCodeSize, |
| 47377 | NegatibleCost &Cost, |
| 47378 | unsigned Depth) const { |
| 47379 | |
| 47380 | if (SDValue Arg = isFNEG(DAG, Op.getNode(), Depth)) { |
| 47381 | Cost = NegatibleCost::Cheaper; |
| 47382 | return DAG.getBitcast(Op.getValueType(), Arg); |
| 47383 | } |
| 47384 | |
| 47385 | EVT VT = Op.getValueType(); |
| 47386 | EVT SVT = VT.getScalarType(); |
| 47387 | unsigned Opc = Op.getOpcode(); |
| 47388 | SDNodeFlags Flags = Op.getNode()->getFlags(); |
| 47389 | switch (Opc) { |
| 47390 | case ISD::FMA: |
| 47391 | case X86ISD::FMSUB: |
| 47392 | case X86ISD::FNMADD: |
| 47393 | case X86ISD::FNMSUB: |
| 47394 | case X86ISD::FMADD_RND: |
| 47395 | case X86ISD::FMSUB_RND: |
| 47396 | case X86ISD::FNMADD_RND: |
| 47397 | case X86ISD::FNMSUB_RND: { |
| 47398 | if (!Op.hasOneUse() || !Subtarget.hasAnyFMA() || !isTypeLegal(VT) || |
| 47399 | !(SVT == MVT::f32 || SVT == MVT::f64) || |
| 47400 | !isOperationLegal(ISD::FMA, VT)) |
| 47401 | break; |
| 47402 | |
| 47403 | |
| 47404 | |
| 47405 | if (!Flags.hasNoSignedZeros()) |
| 47406 | break; |
| 47407 | |
| 47408 | |
| 47409 | |
| 47410 | SmallVector<SDValue, 4> NewOps(Op.getNumOperands(), SDValue()); |
| 47411 | for (int i = 0; i != 3; ++i) |
| 47412 | NewOps[i] = getCheaperNegatedExpression( |
| 47413 | Op.getOperand(i), DAG, LegalOperations, ForCodeSize, Depth + 1); |
| 47414 | |
| 47415 | bool NegA = !!NewOps[0]; |
| 47416 | bool NegB = !!NewOps[1]; |
| 47417 | bool NegC = !!NewOps[2]; |
| 47418 | unsigned NewOpc = negateFMAOpcode(Opc, NegA != NegB, NegC, true); |
| 47419 | |
| 47420 | Cost = (NegA || NegB || NegC) ? NegatibleCost::Cheaper |
| 47421 | : NegatibleCost::Neutral; |
| 47422 | |
| 47423 | |
| 47424 | for (int i = 0, e = Op.getNumOperands(); i != e; ++i) |
| 47425 | if (!NewOps[i]) |
| 47426 | NewOps[i] = Op.getOperand(i); |
| 47427 | return DAG.getNode(NewOpc, SDLoc(Op), VT, NewOps); |
| 47428 | } |
| 47429 | case X86ISD::FRCP: |
| 47430 | if (SDValue NegOp0 = |
| 47431 | getNegatedExpression(Op.getOperand(0), DAG, LegalOperations, |
| 47432 | ForCodeSize, Cost, Depth + 1)) |
| 47433 | return DAG.getNode(Opc, SDLoc(Op), VT, NegOp0); |
| 47434 | break; |
| 47435 | } |
| 47436 | |
| 47437 | return TargetLowering::getNegatedExpression(Op, DAG, LegalOperations, |
| 47438 | ForCodeSize, Cost, Depth); |
| 47439 | } |
| 47440 | |
| 47441 | static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, |
| 47442 | const X86Subtarget &Subtarget) { |
| 47443 | MVT VT = N->getSimpleValueType(0); |
| 47444 | |
| 47445 | if (!VT.isVector() || !Subtarget.hasSSE2()) |
| 47446 | return SDValue(); |
| 47447 | |
| 47448 | SDLoc dl(N); |
| 47449 | |
| 47450 | unsigned IntBits = VT.getScalarSizeInBits(); |
| 47451 | MVT IntSVT = MVT::getIntegerVT(IntBits); |
| 47452 | MVT IntVT = MVT::getVectorVT(IntSVT, VT.getSizeInBits() / IntBits); |
| 47453 | |
| 47454 | SDValue Op0 = DAG.getBitcast(IntVT, N->getOperand(0)); |
| 47455 | SDValue Op1 = DAG.getBitcast(IntVT, N->getOperand(1)); |
| 47456 | unsigned IntOpcode; |
| 47457 | switch (N->getOpcode()) { |
| 47458 | default: llvm_unreachable("Unexpected FP logic op"); |
| 47459 | case X86ISD::FOR: IntOpcode = ISD::OR; break; |
| 47460 | case X86ISD::FXOR: IntOpcode = ISD::XOR; break; |
| 47461 | case X86ISD::FAND: IntOpcode = ISD::AND; break; |
| 47462 | case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; |
| 47463 | } |
| 47464 | SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); |
| 47465 | return DAG.getBitcast(VT, IntOp); |
| 47466 | } |
| 47467 | |
| 47468 | |
| 47469 | |
| 47470 | static SDValue foldXor1SetCC(SDNode *N, SelectionDAG &DAG) { |
| 47471 | if (N->getOpcode() != ISD::XOR) |
| 47472 | return SDValue(); |
| 47473 | |
| 47474 | SDValue LHS = N->getOperand(0); |
| 47475 | if (!isOneConstant(N->getOperand(1)) || LHS->getOpcode() != X86ISD::SETCC) |
| 47476 | return SDValue(); |
| 47477 | |
| 47478 | X86::CondCode NewCC = X86::GetOppositeBranchCondition( |
| 47479 | X86::CondCode(LHS->getConstantOperandVal(0))); |
| 47480 | SDLoc DL(N); |
| 47481 | return getSETCC(NewCC, LHS->getOperand(1), DL, DAG); |
| 47482 | } |
| 47483 | |
| 47484 | static SDValue combineXor(SDNode *N, SelectionDAG &DAG, |
| 47485 | TargetLowering::DAGCombinerInfo &DCI, |
| 47486 | const X86Subtarget &Subtarget) { |
| 47487 | SDValue N0 = N->getOperand(0); |
| 47488 | SDValue N1 = N->getOperand(1); |
| 47489 | EVT VT = N->getValueType(0); |
| 47490 | |
| 47491 | |
| 47492 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32) { |
| 47493 | return DAG.getBitcast(MVT::v4i32, |
| 47494 | DAG.getNode(X86ISD::FXOR, SDLoc(N), MVT::v4f32, |
| 47495 | DAG.getBitcast(MVT::v4f32, N0), |
| 47496 | DAG.getBitcast(MVT::v4f32, N1))); |
| 47497 | } |
| 47498 | |
| 47499 | if (SDValue Cmp = foldVectorXorShiftIntoCmp(N, DAG, Subtarget)) |
| 47500 | return Cmp; |
| 47501 | |
| 47502 | if (SDValue R = combineBitOpWithMOVMSK(N, DAG)) |
| 47503 | return R; |
| 47504 | |
| 47505 | if (DCI.isBeforeLegalizeOps()) |
| 47506 | return SDValue(); |
| 47507 | |
| 47508 | if (SDValue SetCC = foldXor1SetCC(N, DAG)) |
| 47509 | return SetCC; |
| 47510 | |
| 47511 | if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG)) |
| 47512 | return RV; |
| 47513 | |
| 47514 | |
| 47515 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47516 | if (llvm::isAllOnesConstant(N1) && N0.getOpcode() == ISD::BITCAST && |
| 47517 | N0.getOperand(0).getValueType().isVector() && |
| 47518 | N0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
| 47519 | TLI.isTypeLegal(N0.getOperand(0).getValueType()) && N0.hasOneUse()) { |
| 47520 | return DAG.getBitcast(VT, DAG.getNOT(SDLoc(N), N0.getOperand(0), |
| 47521 | N0.getOperand(0).getValueType())); |
| 47522 | } |
| 47523 | |
| 47524 | |
| 47525 | |
| 47526 | if (ISD::isBuildVectorAllOnes(N1.getNode()) && VT.isVector() && |
| 47527 | VT.getVectorElementType() == MVT::i1 && |
| 47528 | N0.getOpcode() == ISD::INSERT_SUBVECTOR && N0.getOperand(0).isUndef() && |
| 47529 | TLI.isTypeLegal(N0.getOperand(1).getValueType())) { |
| 47530 | return DAG.getNode( |
| 47531 | ISD::INSERT_SUBVECTOR, SDLoc(N), VT, N0.getOperand(0), |
| 47532 | DAG.getNOT(SDLoc(N), N0.getOperand(1), N0.getOperand(1).getValueType()), |
| 47533 | N0.getOperand(2)); |
| 47534 | } |
| 47535 | |
| 47536 | |
| 47537 | |
| 47538 | |
| 47539 | if ((N0.getOpcode() == ISD::TRUNCATE || N0.getOpcode() == ISD::ZERO_EXTEND) && |
| 47540 | N0.getOperand(0).getOpcode() == N->getOpcode()) { |
| 47541 | SDValue TruncExtSrc = N0.getOperand(0); |
| 47542 | auto *N1C = dyn_cast<ConstantSDNode>(N1); |
| 47543 | auto *N001C = dyn_cast<ConstantSDNode>(TruncExtSrc.getOperand(1)); |
| 47544 | if (N1C && !N1C->isOpaque() && N001C && !N001C->isOpaque()) { |
| 47545 | SDLoc DL(N); |
| 47546 | SDValue LHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(0), DL, VT); |
| 47547 | SDValue RHS = DAG.getZExtOrTrunc(TruncExtSrc.getOperand(1), DL, VT); |
| 47548 | return DAG.getNode(ISD::XOR, DL, VT, LHS, |
| 47549 | DAG.getNode(ISD::XOR, DL, VT, RHS, N1)); |
| 47550 | } |
| 47551 | } |
| 47552 | |
| 47553 | if (SDValue FPLogic = convertIntLogicToFPLogic(N, DAG, Subtarget)) |
| 47554 | return FPLogic; |
| 47555 | |
| 47556 | return combineFneg(N, DAG, DCI, Subtarget); |
| 47557 | } |
| 47558 | |
| 47559 | static SDValue combineBEXTR(SDNode *N, SelectionDAG &DAG, |
| 47560 | TargetLowering::DAGCombinerInfo &DCI, |
| 47561 | const X86Subtarget &Subtarget) { |
| 47562 | EVT VT = N->getValueType(0); |
| 47563 | unsigned NumBits = VT.getSizeInBits(); |
| 47564 | |
| 47565 | |
| 47566 | |
| 47567 | |
| 47568 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47569 | APInt DemandedMask(APInt::getAllOnesValue(NumBits)); |
| 47570 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
| 47571 | return SDValue(N, 0); |
| 47572 | |
| 47573 | return SDValue(); |
| 47574 | } |
| 47575 | |
| 47576 | static bool isNullFPScalarOrVectorConst(SDValue V) { |
| 47577 | return isNullFPConstant(V) || ISD::isBuildVectorAllZeros(V.getNode()); |
| 47578 | } |
| 47579 | |
| 47580 | |
| 47581 | |
| 47582 | |
| 47583 | |
| 47584 | |
| 47585 | |
| 47586 | static SDValue getNullFPConstForNullVal(SDValue V, SelectionDAG &DAG, |
| 47587 | const X86Subtarget &Subtarget) { |
| 47588 | if (!isNullFPScalarOrVectorConst(V)) |
| 47589 | return SDValue(); |
| 47590 | |
| 47591 | if (V.getValueType().isVector()) |
| 47592 | return getZeroVector(V.getSimpleValueType(), Subtarget, DAG, SDLoc(V)); |
| 47593 | |
| 47594 | return V; |
| 47595 | } |
| 47596 | |
| 47597 | static SDValue combineFAndFNotToFAndn(SDNode *N, SelectionDAG &DAG, |
| 47598 | const X86Subtarget &Subtarget) { |
| 47599 | SDValue N0 = N->getOperand(0); |
| 47600 | SDValue N1 = N->getOperand(1); |
| 47601 | EVT VT = N->getValueType(0); |
| 47602 | SDLoc DL(N); |
| 47603 | |
| 47604 | |
| 47605 | if (!((VT == MVT::f32 && Subtarget.hasSSE1()) || |
| 47606 | (VT == MVT::f64 && Subtarget.hasSSE2()) || |
| 47607 | (VT == MVT::v4f32 && Subtarget.hasSSE1() && !Subtarget.hasSSE2()))) |
| 47608 | return SDValue(); |
| 47609 | |
| 47610 | auto isAllOnesConstantFP = [](SDValue V) { |
| 47611 | if (V.getSimpleValueType().isVector()) |
| 47612 | return ISD::isBuildVectorAllOnes(V.getNode()); |
| 47613 | auto *C = dyn_cast<ConstantFPSDNode>(V); |
| 47614 | return C && C->getConstantFPValue()->isAllOnesValue(); |
| 47615 | }; |
| 47616 | |
| 47617 | |
| 47618 | if (N0.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N0.getOperand(1))) |
| 47619 | return DAG.getNode(X86ISD::FANDN, DL, VT, N0.getOperand(0), N1); |
| 47620 | |
| 47621 | |
| 47622 | if (N1.getOpcode() == X86ISD::FXOR && isAllOnesConstantFP(N1.getOperand(1))) |
| 47623 | return DAG.getNode(X86ISD::FANDN, DL, VT, N1.getOperand(0), N0); |
| 47624 | |
| 47625 | return SDValue(); |
| 47626 | } |
| 47627 | |
| 47628 | |
| 47629 | static SDValue combineFAnd(SDNode *N, SelectionDAG &DAG, |
| 47630 | const X86Subtarget &Subtarget) { |
| 47631 | |
| 47632 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(0), DAG, Subtarget)) |
| 47633 | return V; |
| 47634 | |
| 47635 | |
| 47636 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) |
| 47637 | return V; |
| 47638 | |
| 47639 | if (SDValue V = combineFAndFNotToFAndn(N, DAG, Subtarget)) |
| 47640 | return V; |
| 47641 | |
| 47642 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
| 47643 | } |
| 47644 | |
| 47645 | |
| 47646 | static SDValue combineFAndn(SDNode *N, SelectionDAG &DAG, |
| 47647 | const X86Subtarget &Subtarget) { |
| 47648 | |
| 47649 | if (isNullFPScalarOrVectorConst(N->getOperand(0))) |
| 47650 | return N->getOperand(1); |
| 47651 | |
| 47652 | |
| 47653 | if (SDValue V = getNullFPConstForNullVal(N->getOperand(1), DAG, Subtarget)) |
| 47654 | return V; |
| 47655 | |
| 47656 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
| 47657 | } |
| 47658 | |
| 47659 | |
| 47660 | static SDValue combineFOr(SDNode *N, SelectionDAG &DAG, |
| 47661 | TargetLowering::DAGCombinerInfo &DCI, |
| 47662 | const X86Subtarget &Subtarget) { |
| 47663 | assert(N->getOpcode() == X86ISD::FOR || N->getOpcode() == X86ISD::FXOR); |
| 47664 | |
| 47665 | |
| 47666 | if (isNullFPScalarOrVectorConst(N->getOperand(0))) |
| 47667 | return N->getOperand(1); |
| 47668 | |
| 47669 | |
| 47670 | if (isNullFPScalarOrVectorConst(N->getOperand(1))) |
| 47671 | return N->getOperand(0); |
| 47672 | |
| 47673 | if (SDValue NewVal = combineFneg(N, DAG, DCI, Subtarget)) |
| 47674 | return NewVal; |
| 47675 | |
| 47676 | return lowerX86FPLogicOp(N, DAG, Subtarget); |
| 47677 | } |
| 47678 | |
| 47679 | |
| 47680 | static SDValue combineFMinFMax(SDNode *N, SelectionDAG &DAG) { |
| 47681 | assert(N->getOpcode() == X86ISD::FMIN || N->getOpcode() == X86ISD::FMAX); |
| 47682 | |
| 47683 | |
| 47684 | if (!DAG.getTarget().Options.NoNaNsFPMath || |
| 47685 | !DAG.getTarget().Options.NoSignedZerosFPMath) |
| 47686 | return SDValue(); |
| 47687 | |
| 47688 | |
| 47689 | |
| 47690 | unsigned NewOp = 0; |
| 47691 | switch (N->getOpcode()) { |
| 47692 | default: llvm_unreachable("unknown opcode"); |
| 47693 | case X86ISD::FMIN: NewOp = X86ISD::FMINC; break; |
| 47694 | case X86ISD::FMAX: NewOp = X86ISD::FMAXC; break; |
| 47695 | } |
| 47696 | |
| 47697 | return DAG.getNode(NewOp, SDLoc(N), N->getValueType(0), |
| 47698 | N->getOperand(0), N->getOperand(1)); |
| 47699 | } |
| 47700 | |
| 47701 | static SDValue combineFMinNumFMaxNum(SDNode *N, SelectionDAG &DAG, |
| 47702 | const X86Subtarget &Subtarget) { |
| 47703 | if (Subtarget.useSoftFloat()) |
| 47704 | return SDValue(); |
| 47705 | |
| 47706 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47707 | |
| 47708 | EVT VT = N->getValueType(0); |
| 47709 | if (!((Subtarget.hasSSE1() && VT == MVT::f32) || |
| 47710 | (Subtarget.hasSSE2() && VT == MVT::f64) || |
| 47711 | (VT.isVector() && TLI.isTypeLegal(VT)))) |
| 47712 | return SDValue(); |
| 47713 | |
| 47714 | SDValue Op0 = N->getOperand(0); |
| 47715 | SDValue Op1 = N->getOperand(1); |
| 47716 | SDLoc DL(N); |
| 47717 | auto MinMaxOp = N->getOpcode() == ISD::FMAXNUM ? X86ISD::FMAX : X86ISD::FMIN; |
| 47718 | |
| 47719 | |
| 47720 | |
| 47721 | if (DAG.getTarget().Options.NoNaNsFPMath || N->getFlags().hasNoNaNs()) |
| 47722 | return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); |
| 47723 | |
| 47724 | |
| 47725 | |
| 47726 | if (DAG.isKnownNeverNaN(Op1)) |
| 47727 | return DAG.getNode(MinMaxOp, DL, VT, Op0, Op1, N->getFlags()); |
| 47728 | if (DAG.isKnownNeverNaN(Op0)) |
| 47729 | return DAG.getNode(MinMaxOp, DL, VT, Op1, Op0, N->getFlags()); |
| 47730 | |
| 47731 | |
| 47732 | |
| 47733 | if (!VT.isVector() && DAG.getMachineFunction().getFunction().hasMinSize()) |
| 47734 | return SDValue(); |
| 47735 | |
| 47736 | EVT SetCCType = TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), |
| 47737 | VT); |
| 47738 | |
| 47739 | |
| 47740 | |
| 47741 | |
| 47742 | |
| 47743 | |
| 47744 | |
| 47745 | |
| 47746 | |
| 47747 | |
| 47748 | |
| 47749 | |
| 47750 | |
| 47751 | |
| 47752 | |
| 47753 | |
| 47754 | |
| 47755 | |
| 47756 | |
| 47757 | |
| 47758 | SDValue MinOrMax = DAG.getNode(MinMaxOp, DL, VT, Op1, Op0); |
| 47759 | SDValue IsOp0Nan = DAG.getSetCC(DL, SetCCType, Op0, Op0, ISD::SETUO); |
| 47760 | |
| 47761 | |
| 47762 | |
| 47763 | return DAG.getSelect(DL, VT, IsOp0Nan, Op1, MinOrMax); |
| 47764 | } |
| 47765 | |
| 47766 | static SDValue combineX86INT_TO_FP(SDNode *N, SelectionDAG &DAG, |
| 47767 | TargetLowering::DAGCombinerInfo &DCI) { |
| 47768 | EVT VT = N->getValueType(0); |
| 47769 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47770 | |
| 47771 | APInt KnownUndef, KnownZero; |
| 47772 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
| 47773 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
| 47774 | KnownZero, DCI)) |
| 47775 | return SDValue(N, 0); |
| 47776 | |
| 47777 | |
| 47778 | SDValue In = N->getOperand(0); |
| 47779 | MVT InVT = In.getSimpleValueType(); |
| 47780 | if (VT.getVectorNumElements() < InVT.getVectorNumElements() && |
| 47781 | ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { |
| 47782 | assert(InVT.is128BitVector() && "Expected 128-bit input vector"); |
| 47783 | LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(0)); |
| 47784 | unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); |
| 47785 | MVT MemVT = MVT::getIntegerVT(NumBits); |
| 47786 | MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); |
| 47787 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) { |
| 47788 | SDLoc dl(N); |
| 47789 | SDValue Convert = DAG.getNode(N->getOpcode(), dl, VT, |
| 47790 | DAG.getBitcast(InVT, VZLoad)); |
| 47791 | DCI.CombineTo(N, Convert); |
| 47792 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
| 47793 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 47794 | return SDValue(N, 0); |
| 47795 | } |
| 47796 | } |
| 47797 | |
| 47798 | return SDValue(); |
| 47799 | } |
| 47800 | |
| 47801 | static SDValue combineCVTP2I_CVTTP2I(SDNode *N, SelectionDAG &DAG, |
| 47802 | TargetLowering::DAGCombinerInfo &DCI) { |
| 47803 | bool IsStrict = N->isTargetStrictFPOpcode(); |
| 47804 | EVT VT = N->getValueType(0); |
| 47805 | |
| 47806 | |
| 47807 | SDValue In = N->getOperand(IsStrict ? 1 : 0); |
| 47808 | MVT InVT = In.getSimpleValueType(); |
| 47809 | if (VT.getVectorNumElements() < InVT.getVectorNumElements() && |
| 47810 | ISD::isNormalLoad(In.getNode()) && In.hasOneUse()) { |
| 47811 | assert(InVT.is128BitVector() && "Expected 128-bit input vector"); |
| 47812 | LoadSDNode *LN = cast<LoadSDNode>(In); |
| 47813 | unsigned NumBits = InVT.getScalarSizeInBits() * VT.getVectorNumElements(); |
| 47814 | MVT MemVT = MVT::getFloatingPointVT(NumBits); |
| 47815 | MVT LoadVT = MVT::getVectorVT(MemVT, 128 / NumBits); |
| 47816 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MemVT, LoadVT, DAG)) { |
| 47817 | SDLoc dl(N); |
| 47818 | if (IsStrict) { |
| 47819 | SDValue Convert = |
| 47820 | DAG.getNode(N->getOpcode(), dl, {VT, MVT::Other}, |
| 47821 | {N->getOperand(0), DAG.getBitcast(InVT, VZLoad)}); |
| 47822 | DCI.CombineTo(N, Convert, Convert.getValue(1)); |
| 47823 | } else { |
| 47824 | SDValue Convert = |
| 47825 | DAG.getNode(N->getOpcode(), dl, VT, DAG.getBitcast(InVT, VZLoad)); |
| 47826 | DCI.CombineTo(N, Convert); |
| 47827 | } |
| 47828 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
| 47829 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 47830 | return SDValue(N, 0); |
| 47831 | } |
| 47832 | } |
| 47833 | |
| 47834 | return SDValue(); |
| 47835 | } |
| 47836 | |
| 47837 | |
| 47838 | static SDValue combineAndnp(SDNode *N, SelectionDAG &DAG, |
| 47839 | TargetLowering::DAGCombinerInfo &DCI, |
| 47840 | const X86Subtarget &Subtarget) { |
| 47841 | MVT VT = N->getSimpleValueType(0); |
| 47842 | |
| 47843 | |
| 47844 | if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) |
| 47845 | return N->getOperand(1); |
| 47846 | |
| 47847 | |
| 47848 | if (ISD::isBuildVectorAllZeros(N->getOperand(1).getNode())) |
| 47849 | return DAG.getConstant(0, SDLoc(N), VT); |
| 47850 | |
| 47851 | |
| 47852 | if (SDValue Not = IsNOT(N->getOperand(0), DAG)) |
| 47853 | return DAG.getNode(ISD::AND, SDLoc(N), VT, DAG.getBitcast(VT, Not), |
| 47854 | N->getOperand(1)); |
| 47855 | |
| 47856 | |
| 47857 | if (VT.isVector() && (VT.getScalarSizeInBits() % 8) == 0) { |
| 47858 | SDValue Op(N, 0); |
| 47859 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 47860 | return Res; |
| 47861 | } |
| 47862 | |
| 47863 | return SDValue(); |
| 47864 | } |
| 47865 | |
| 47866 | static SDValue combineBT(SDNode *N, SelectionDAG &DAG, |
| 47867 | TargetLowering::DAGCombinerInfo &DCI) { |
| 47868 | SDValue N1 = N->getOperand(1); |
| 47869 | |
| 47870 | |
| 47871 | unsigned BitWidth = N1.getValueSizeInBits(); |
| 47872 | APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); |
| 47873 | if (DAG.getTargetLoweringInfo().SimplifyDemandedBits(N1, DemandedMask, DCI)) { |
| 47874 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 47875 | DCI.AddToWorklist(N); |
| 47876 | return SDValue(N, 0); |
| 47877 | } |
| 47878 | |
| 47879 | return SDValue(); |
| 47880 | } |
| 47881 | |
| 47882 | static SDValue combineCVTPH2PS(SDNode *N, SelectionDAG &DAG, |
| 47883 | TargetLowering::DAGCombinerInfo &DCI) { |
| 47884 | bool IsStrict = N->getOpcode() == X86ISD::STRICT_CVTPH2PS; |
| 47885 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
| 47886 | |
| 47887 | if (N->getValueType(0) == MVT::v4f32 && Src.getValueType() == MVT::v8i16) { |
| 47888 | APInt KnownUndef, KnownZero; |
| 47889 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 47890 | APInt DemandedElts = APInt::getLowBitsSet(8, 4); |
| 47891 | if (TLI.SimplifyDemandedVectorElts(Src, DemandedElts, KnownUndef, KnownZero, |
| 47892 | DCI)) { |
| 47893 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 47894 | DCI.AddToWorklist(N); |
| 47895 | return SDValue(N, 0); |
| 47896 | } |
| 47897 | |
| 47898 | |
| 47899 | if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { |
| 47900 | LoadSDNode *LN = cast<LoadSDNode>(N->getOperand(IsStrict ? 1 : 0)); |
| 47901 | if (SDValue VZLoad = narrowLoadToVZLoad(LN, MVT::i64, MVT::v2i64, DAG)) { |
| 47902 | SDLoc dl(N); |
| 47903 | if (IsStrict) { |
| 47904 | SDValue Convert = DAG.getNode( |
| 47905 | N->getOpcode(), dl, {MVT::v4f32, MVT::Other}, |
| 47906 | {N->getOperand(0), DAG.getBitcast(MVT::v8i16, VZLoad)}); |
| 47907 | DCI.CombineTo(N, Convert, Convert.getValue(1)); |
| 47908 | } else { |
| 47909 | SDValue Convert = DAG.getNode(N->getOpcode(), dl, MVT::v4f32, |
| 47910 | DAG.getBitcast(MVT::v8i16, VZLoad)); |
| 47911 | DCI.CombineTo(N, Convert); |
| 47912 | } |
| 47913 | |
| 47914 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), VZLoad.getValue(1)); |
| 47915 | DCI.recursivelyDeleteUnusedNodes(LN); |
| 47916 | return SDValue(N, 0); |
| 47917 | } |
| 47918 | } |
| 47919 | } |
| 47920 | |
| 47921 | return SDValue(); |
| 47922 | } |
| 47923 | |
| 47924 | |
| 47925 | static SDValue combineSextInRegCmov(SDNode *N, SelectionDAG &DAG) { |
| 47926 | assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
| 47927 | |
| 47928 | EVT DstVT = N->getValueType(0); |
| 47929 | |
| 47930 | SDValue N0 = N->getOperand(0); |
| 47931 | SDValue N1 = N->getOperand(1); |
| 47932 | EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); |
| 47933 | |
| 47934 | if (ExtraVT != MVT::i8 && ExtraVT != MVT::i16) |
| 47935 | return SDValue(); |
| 47936 | |
| 47937 | |
| 47938 | SDValue IntermediateBitwidthOp; |
| 47939 | if ((N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::TRUNCATE) && |
| 47940 | N0.hasOneUse()) { |
| 47941 | IntermediateBitwidthOp = N0; |
| 47942 | N0 = N0.getOperand(0); |
| 47943 | } |
| 47944 | |
| 47945 | |
| 47946 | if (N0.getOpcode() != X86ISD::CMOV || !N0.hasOneUse()) |
| 47947 | return SDValue(); |
| 47948 | |
| 47949 | SDValue CMovOp0 = N0.getOperand(0); |
| 47950 | SDValue CMovOp1 = N0.getOperand(1); |
| 47951 | |
| 47952 | |
| 47953 | if (!isa<ConstantSDNode>(CMovOp0.getNode()) || |
| 47954 | !isa<ConstantSDNode>(CMovOp1.getNode())) |
| 47955 | return SDValue(); |
| 47956 | |
| 47957 | SDLoc DL(N); |
| 47958 | |
| 47959 | |
| 47960 | if (IntermediateBitwidthOp) { |
| 47961 | unsigned IntermediateOpc = IntermediateBitwidthOp.getOpcode(); |
| 47962 | CMovOp0 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp0); |
| 47963 | CMovOp1 = DAG.getNode(IntermediateOpc, DL, DstVT, CMovOp1); |
| 47964 | } |
| 47965 | |
| 47966 | CMovOp0 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp0, N1); |
| 47967 | CMovOp1 = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, DstVT, CMovOp1, N1); |
| 47968 | |
| 47969 | EVT CMovVT = DstVT; |
| 47970 | |
| 47971 | if (DstVT == MVT::i16) { |
| 47972 | CMovVT = MVT::i32; |
| 47973 | CMovOp0 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp0); |
| 47974 | CMovOp1 = DAG.getNode(ISD::ZERO_EXTEND, DL, CMovVT, CMovOp1); |
| 47975 | } |
| 47976 | |
| 47977 | SDValue CMov = DAG.getNode(X86ISD::CMOV, DL, CMovVT, CMovOp0, CMovOp1, |
| 47978 | N0.getOperand(2), N0.getOperand(3)); |
| 47979 | |
| 47980 | if (CMovVT != DstVT) |
| 47981 | CMov = DAG.getNode(ISD::TRUNCATE, DL, DstVT, CMov); |
| 47982 | |
| 47983 | return CMov; |
| 47984 | } |
| 47985 | |
| 47986 | static SDValue combineSignExtendInReg(SDNode *N, SelectionDAG &DAG, |
| 47987 | const X86Subtarget &Subtarget) { |
| 47988 | assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); |
| 47989 | |
| 47990 | if (SDValue V = combineSextInRegCmov(N, DAG)) |
| 47991 | return V; |
| 47992 | |
| 47993 | EVT VT = N->getValueType(0); |
| 47994 | SDValue N0 = N->getOperand(0); |
| 47995 | SDValue N1 = N->getOperand(1); |
| 47996 | EVT ExtraVT = cast<VTSDNode>(N1)->getVT(); |
| 47997 | SDLoc dl(N); |
| 47998 | |
| 47999 | |
| 48000 | |
| 48001 | |
| 48002 | |
| 48003 | |
| 48004 | if (VT == MVT::v4i64 && (N0.getOpcode() == ISD::ANY_EXTEND || |
| 48005 | N0.getOpcode() == ISD::SIGN_EXTEND)) { |
| 48006 | SDValue N00 = N0.getOperand(0); |
| 48007 | |
| 48008 | |
| 48009 | |
| 48010 | if (N00.getOpcode() == ISD::LOAD && Subtarget.hasInt256()) |
| 48011 | if (!ISD::isNormalLoad(N00.getNode())) |
| 48012 | return SDValue(); |
| 48013 | |
| 48014 | |
| 48015 | |
| 48016 | if (SDValue Promote = PromoteMaskArithmetic(N0.getNode(), DAG, Subtarget)) |
| 48017 | return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Promote, N1); |
| 48018 | |
| 48019 | if (N00.getValueType() == MVT::v4i32 && ExtraVT.getSizeInBits() < 128) { |
| 48020 | SDValue Tmp = |
| 48021 | DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, MVT::v4i32, N00, N1); |
| 48022 | return DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::v4i64, Tmp); |
| 48023 | } |
| 48024 | } |
| 48025 | return SDValue(); |
| 48026 | } |
| 48027 | |
| 48028 | |
| 48029 | |
| 48030 | |
| 48031 | |
| 48032 | |
| 48033 | static SDValue promoteExtBeforeAdd(SDNode *Ext, SelectionDAG &DAG, |
| 48034 | const X86Subtarget &Subtarget) { |
| 48035 | if (Ext->getOpcode() != ISD::SIGN_EXTEND && |
| 48036 | Ext->getOpcode() != ISD::ZERO_EXTEND) |
| 48037 | return SDValue(); |
| 48038 | |
| 48039 | |
| 48040 | EVT VT = Ext->getValueType(0); |
| 48041 | if (VT != MVT::i64) |
| 48042 | return SDValue(); |
| 48043 | |
| 48044 | SDValue Add = Ext->getOperand(0); |
| 48045 | if (Add.getOpcode() != ISD::ADD) |
| 48046 | return SDValue(); |
| 48047 | |
| 48048 | bool Sext = Ext->getOpcode() == ISD::SIGN_EXTEND; |
| 48049 | bool NSW = Add->getFlags().hasNoSignedWrap(); |
| 48050 | bool NUW = Add->getFlags().hasNoUnsignedWrap(); |
| 48051 | |
| 48052 | |
| 48053 | |
| 48054 | if ((Sext && !NSW) || (!Sext && !NUW)) |
| 48055 | return SDValue(); |
| 48056 | |
| 48057 | |
| 48058 | |
| 48059 | |
| 48060 | auto *AddOp1 = dyn_cast<ConstantSDNode>(Add.getOperand(1)); |
| 48061 | if (!AddOp1) |
| 48062 | return SDValue(); |
| 48063 | |
| 48064 | |
| 48065 | |
| 48066 | |
| 48067 | |
| 48068 | |
| 48069 | bool HasLEAPotential = false; |
| 48070 | for (auto *User : Ext->uses()) { |
| 48071 | if (User->getOpcode() == ISD::ADD || User->getOpcode() == ISD::SHL) { |
| 48072 | HasLEAPotential = true; |
| 48073 | break; |
| 48074 | } |
| 48075 | } |
| 48076 | if (!HasLEAPotential) |
| 48077 | return SDValue(); |
| 48078 | |
| 48079 | |
| 48080 | int64_t AddConstant = Sext ? AddOp1->getSExtValue() : AddOp1->getZExtValue(); |
| 48081 | SDValue AddOp0 = Add.getOperand(0); |
| 48082 | SDValue NewExt = DAG.getNode(Ext->getOpcode(), SDLoc(Ext), VT, AddOp0); |
| 48083 | SDValue NewConstant = DAG.getConstant(AddConstant, SDLoc(Add), VT); |
| 48084 | |
| 48085 | |
| 48086 | |
| 48087 | SDNodeFlags Flags; |
| 48088 | Flags.setNoSignedWrap(NSW); |
| 48089 | Flags.setNoUnsignedWrap(NUW); |
| 48090 | return DAG.getNode(ISD::ADD, SDLoc(Add), VT, NewExt, NewConstant, Flags); |
| 48091 | } |
| 48092 | |
| 48093 | |
| 48094 | |
| 48095 | |
| 48096 | |
| 48097 | |
| 48098 | |
| 48099 | |
| 48100 | |
| 48101 | |
| 48102 | |
| 48103 | |
| 48104 | |
| 48105 | static SDValue combineToExtendCMOV(SDNode *Extend, SelectionDAG &DAG) { |
| 48106 | SDValue CMovN = Extend->getOperand(0); |
| 48107 | if (CMovN.getOpcode() != X86ISD::CMOV || !CMovN.hasOneUse()) |
| 48108 | return SDValue(); |
| 48109 | |
| 48110 | EVT TargetVT = Extend->getValueType(0); |
| 48111 | unsigned ExtendOpcode = Extend->getOpcode(); |
| 48112 | SDLoc DL(Extend); |
| 48113 | |
| 48114 | EVT VT = CMovN.getValueType(); |
| 48115 | SDValue CMovOp0 = CMovN.getOperand(0); |
| 48116 | SDValue CMovOp1 = CMovN.getOperand(1); |
| 48117 | |
| 48118 | if (!isa<ConstantSDNode>(CMovOp0.getNode()) || |
| 48119 | !isa<ConstantSDNode>(CMovOp1.getNode())) |
| 48120 | return SDValue(); |
| 48121 | |
| 48122 | |
| 48123 | if (TargetVT != MVT::i32 && TargetVT != MVT::i64) |
| 48124 | return SDValue(); |
| 48125 | |
| 48126 | |
| 48127 | |
| 48128 | if (VT != MVT::i16 && !(ExtendOpcode == ISD::SIGN_EXTEND && VT == MVT::i32)) |
| 48129 | return SDValue(); |
| 48130 | |
| 48131 | |
| 48132 | |
| 48133 | EVT ExtendVT = TargetVT; |
| 48134 | if (TargetVT == MVT::i64 && ExtendOpcode != ISD::SIGN_EXTEND) |
| 48135 | ExtendVT = MVT::i32; |
| 48136 | |
| 48137 | CMovOp0 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp0); |
| 48138 | CMovOp1 = DAG.getNode(ExtendOpcode, DL, ExtendVT, CMovOp1); |
| 48139 | |
| 48140 | SDValue Res = DAG.getNode(X86ISD::CMOV, DL, ExtendVT, CMovOp0, CMovOp1, |
| 48141 | CMovN.getOperand(2), CMovN.getOperand(3)); |
| 48142 | |
| 48143 | |
| 48144 | if (ExtendVT != TargetVT) |
| 48145 | Res = DAG.getNode(ExtendOpcode, DL, TargetVT, Res); |
| 48146 | |
| 48147 | return Res; |
| 48148 | } |
| 48149 | |
| 48150 | |
| 48151 | |
| 48152 | static SDValue |
| 48153 | combineToExtendBoolVectorInReg(SDNode *N, SelectionDAG &DAG, |
| 48154 | TargetLowering::DAGCombinerInfo &DCI, |
| 48155 | const X86Subtarget &Subtarget) { |
| 48156 | unsigned Opcode = N->getOpcode(); |
| 48157 | if (Opcode != ISD::SIGN_EXTEND && Opcode != ISD::ZERO_EXTEND && |
| 48158 | Opcode != ISD::ANY_EXTEND) |
| 48159 | return SDValue(); |
| 48160 | if (!DCI.isBeforeLegalizeOps()) |
| 48161 | return SDValue(); |
| 48162 | if (!Subtarget.hasSSE2() || Subtarget.hasAVX512()) |
| 48163 | return SDValue(); |
| 48164 | |
| 48165 | SDValue N0 = N->getOperand(0); |
| 48166 | EVT VT = N->getValueType(0); |
| 48167 | EVT SVT = VT.getScalarType(); |
| 48168 | EVT InSVT = N0.getValueType().getScalarType(); |
| 48169 | unsigned EltSizeInBits = SVT.getSizeInBits(); |
| 48170 | |
| 48171 | |
| 48172 | |
| 48173 | if (!VT.isVector()) |
| 48174 | return SDValue(); |
| 48175 | if (SVT != MVT::i64 && SVT != MVT::i32 && SVT != MVT::i16 && SVT != MVT::i8) |
| 48176 | return SDValue(); |
| 48177 | if (InSVT != MVT::i1 || N0.getOpcode() != ISD::BITCAST) |
| 48178 | return SDValue(); |
| 48179 | |
| 48180 | SDValue N00 = N0.getOperand(0); |
| 48181 | EVT SclVT = N0.getOperand(0).getValueType(); |
| 48182 | if (!SclVT.isScalarInteger()) |
| 48183 | return SDValue(); |
| 48184 | |
| 48185 | SDLoc DL(N); |
| 48186 | SDValue Vec; |
| 48187 | SmallVector<int, 32> ShuffleMask; |
| 48188 | unsigned NumElts = VT.getVectorNumElements(); |
| 48189 | assert(NumElts == SclVT.getSizeInBits() && "Unexpected bool vector size"); |
| 48190 | |
| 48191 | |
| 48192 | if (NumElts > EltSizeInBits) { |
| 48193 | |
| 48194 | |
| 48195 | |
| 48196 | |
| 48197 | assert((NumElts % EltSizeInBits) == 0 && "Unexpected integer scale"); |
| 48198 | unsigned Scale = NumElts / EltSizeInBits; |
| 48199 | EVT BroadcastVT = |
| 48200 | EVT::getVectorVT(*DAG.getContext(), SclVT, EltSizeInBits); |
| 48201 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); |
| 48202 | Vec = DAG.getBitcast(VT, Vec); |
| 48203 | |
| 48204 | for (unsigned i = 0; i != Scale; ++i) |
| 48205 | ShuffleMask.append(EltSizeInBits, i); |
| 48206 | Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); |
| 48207 | } else if (Subtarget.hasAVX2() && NumElts < EltSizeInBits && |
| 48208 | (SclVT == MVT::i8 || SclVT == MVT::i16 || SclVT == MVT::i32)) { |
| 48209 | |
| 48210 | |
| 48211 | |
| 48212 | |
| 48213 | assert((EltSizeInBits % NumElts) == 0 && "Unexpected integer scale"); |
| 48214 | unsigned Scale = EltSizeInBits / NumElts; |
| 48215 | EVT BroadcastVT = |
| 48216 | EVT::getVectorVT(*DAG.getContext(), SclVT, NumElts * Scale); |
| 48217 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, BroadcastVT, N00); |
| 48218 | ShuffleMask.append(NumElts * Scale, 0); |
| 48219 | Vec = DAG.getVectorShuffle(BroadcastVT, DL, Vec, Vec, ShuffleMask); |
| 48220 | Vec = DAG.getBitcast(VT, Vec); |
| 48221 | } else { |
| 48222 | |
| 48223 | |
| 48224 | |
| 48225 | SDValue Scl = DAG.getAnyExtOrTrunc(N00, DL, SVT); |
| 48226 | Vec = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Scl); |
| 48227 | ShuffleMask.append(NumElts, 0); |
| 48228 | Vec = DAG.getVectorShuffle(VT, DL, Vec, Vec, ShuffleMask); |
| 48229 | } |
| 48230 | |
| 48231 | |
| 48232 | SmallVector<SDValue, 32> Bits; |
| 48233 | for (unsigned i = 0; i != NumElts; ++i) { |
| 48234 | int BitIdx = (i % EltSizeInBits); |
| 48235 | APInt Bit = APInt::getBitsSet(EltSizeInBits, BitIdx, BitIdx + 1); |
| 48236 | Bits.push_back(DAG.getConstant(Bit, DL, SVT)); |
| 48237 | } |
| 48238 | SDValue BitMask = DAG.getBuildVector(VT, DL, Bits); |
| 48239 | Vec = DAG.getNode(ISD::AND, DL, VT, Vec, BitMask); |
| 48240 | |
| 48241 | |
| 48242 | EVT CCVT = VT.changeVectorElementType(MVT::i1); |
| 48243 | Vec = DAG.getSetCC(DL, CCVT, Vec, BitMask, ISD::SETEQ); |
| 48244 | Vec = DAG.getSExtOrTrunc(Vec, DL, VT); |
| 48245 | |
| 48246 | |
| 48247 | |
| 48248 | if (Opcode == ISD::SIGN_EXTEND) |
| 48249 | return Vec; |
| 48250 | return DAG.getNode(ISD::SRL, DL, VT, Vec, |
| 48251 | DAG.getConstant(EltSizeInBits - 1, DL, VT)); |
| 48252 | } |
| 48253 | |
| 48254 | |
| 48255 | |
| 48256 | static SDValue combineExtSetcc(SDNode *N, SelectionDAG &DAG, |
| 48257 | const X86Subtarget &Subtarget) { |
| 48258 | SDValue N0 = N->getOperand(0); |
| 48259 | EVT VT = N->getValueType(0); |
| 48260 | SDLoc dl(N); |
| 48261 | |
| 48262 | |
| 48263 | if (!Subtarget.hasAVX512() || !VT.isVector() || N0.getOpcode() != ISD::SETCC) |
| 48264 | return SDValue(); |
| 48265 | |
| 48266 | |
| 48267 | EVT SVT = VT.getVectorElementType(); |
| 48268 | if (SVT != MVT::i8 && SVT != MVT::i16 && SVT != MVT::i32 && |
| 48269 | SVT != MVT::i64 && SVT != MVT::f32 && SVT != MVT::f64) |
| 48270 | return SDValue(); |
| 48271 | |
| 48272 | |
| 48273 | unsigned Size = VT.getSizeInBits(); |
| 48274 | if (Size > 256 && Subtarget.useAVX512Regs()) |
| 48275 | return SDValue(); |
| 48276 | |
| 48277 | |
| 48278 | |
| 48279 | ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); |
| 48280 | if (ISD::isUnsignedIntSetCC(CC)) |
| 48281 | return SDValue(); |
| 48282 | |
| 48283 | |
| 48284 | EVT N00VT = N0.getOperand(0).getValueType(); |
| 48285 | EVT MatchingVecType = N00VT.changeVectorElementTypeToInteger(); |
| 48286 | if (Size != MatchingVecType.getSizeInBits()) |
| 48287 | return SDValue(); |
| 48288 | |
| 48289 | SDValue Res = DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); |
| 48290 | |
| 48291 | if (N->getOpcode() == ISD::ZERO_EXTEND) |
| 48292 | Res = DAG.getZeroExtendInReg(Res, dl, N0.getValueType()); |
| 48293 | |
| 48294 | return Res; |
| 48295 | } |
| 48296 | |
| 48297 | static SDValue combineSext(SDNode *N, SelectionDAG &DAG, |
| 48298 | TargetLowering::DAGCombinerInfo &DCI, |
| 48299 | const X86Subtarget &Subtarget) { |
| 48300 | SDValue N0 = N->getOperand(0); |
| 48301 | EVT VT = N->getValueType(0); |
| 48302 | SDLoc DL(N); |
| 48303 | |
| 48304 | |
| 48305 | if (!DCI.isBeforeLegalizeOps() && |
| 48306 | N0.getOpcode() == X86ISD::SETCC_CARRY) { |
| 48307 | SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, N0->getOperand(0), |
| 48308 | N0->getOperand(1)); |
| 48309 | bool ReplaceOtherUses = !N0.hasOneUse(); |
| 48310 | DCI.CombineTo(N, Setcc); |
| 48311 | |
| 48312 | if (ReplaceOtherUses) { |
| 48313 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
| 48314 | N0.getValueType(), Setcc); |
| 48315 | DCI.CombineTo(N0.getNode(), Trunc); |
| 48316 | } |
| 48317 | |
| 48318 | return SDValue(N, 0); |
| 48319 | } |
| 48320 | |
| 48321 | if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) |
| 48322 | return NewCMov; |
| 48323 | |
| 48324 | if (!DCI.isBeforeLegalizeOps()) |
| 48325 | return SDValue(); |
| 48326 | |
| 48327 | if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) |
| 48328 | return V; |
| 48329 | |
| 48330 | if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) |
| 48331 | return V; |
| 48332 | |
| 48333 | if (VT.isVector()) { |
| 48334 | if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) |
| 48335 | return R; |
| 48336 | |
| 48337 | if (N0.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) |
| 48338 | return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0)); |
| 48339 | } |
| 48340 | |
| 48341 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) |
| 48342 | return NewAdd; |
| 48343 | |
| 48344 | return SDValue(); |
| 48345 | } |
| 48346 | |
| 48347 | static SDValue combineFMA(SDNode *N, SelectionDAG &DAG, |
| 48348 | TargetLowering::DAGCombinerInfo &DCI, |
| 48349 | const X86Subtarget &Subtarget) { |
| 48350 | SDLoc dl(N); |
| 48351 | EVT VT = N->getValueType(0); |
| 48352 | bool IsStrict = N->isStrictFPOpcode() || N->isTargetStrictFPOpcode(); |
| 48353 | |
| 48354 | |
| 48355 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 48356 | if (!TLI.isTypeLegal(VT)) |
| 48357 | return SDValue(); |
| 48358 | |
| 48359 | SDValue A = N->getOperand(IsStrict ? 1 : 0); |
| 48360 | SDValue B = N->getOperand(IsStrict ? 2 : 1); |
| 48361 | SDValue C = N->getOperand(IsStrict ? 3 : 2); |
| 48362 | |
| 48363 | |
| 48364 | |
| 48365 | SDNodeFlags Flags = N->getFlags(); |
| 48366 | if (!IsStrict && Flags.hasAllowReassociation() && |
| 48367 | TLI.isOperationExpand(ISD::FMA, VT)) { |
| 48368 | SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags); |
| 48369 | return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags); |
| 48370 | } |
| 48371 | |
| 48372 | EVT ScalarVT = VT.getScalarType(); |
| 48373 | if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA()) |
| 48374 | return SDValue(); |
| 48375 | |
| 48376 | auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) { |
| 48377 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
| 48378 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
| 48379 | if (SDValue NegV = TLI.getCheaperNegatedExpression(V, DAG, LegalOperations, |
| 48380 | CodeSize)) { |
| 48381 | V = NegV; |
| 48382 | return true; |
| 48383 | } |
| 48384 | |
| 48385 | |
| 48386 | if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 48387 | isNullConstant(V.getOperand(1))) { |
| 48388 | SDValue Vec = V.getOperand(0); |
| 48389 | if (SDValue NegV = TLI.getCheaperNegatedExpression( |
| 48390 | Vec, DAG, LegalOperations, CodeSize)) { |
| 48391 | V = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(V), V.getValueType(), |
| 48392 | NegV, V.getOperand(1)); |
| 48393 | return true; |
| 48394 | } |
| 48395 | } |
| 48396 | |
| 48397 | return false; |
| 48398 | }; |
| 48399 | |
| 48400 | |
| 48401 | |
| 48402 | bool NegA = invertIfNegative(A); |
| 48403 | bool NegB = invertIfNegative(B); |
| 48404 | bool NegC = invertIfNegative(C); |
| 48405 | |
| 48406 | if (!NegA && !NegB && !NegC) |
| 48407 | return SDValue(); |
| 48408 | |
| 48409 | unsigned NewOpcode = |
| 48410 | negateFMAOpcode(N->getOpcode(), NegA != NegB, NegC, false); |
| 48411 | |
| 48412 | |
| 48413 | SelectionDAG::FlagInserter FlagsInserter(DAG, Flags); |
| 48414 | if (IsStrict) { |
| 48415 | assert(N->getNumOperands() == 4 && "Shouldn't be greater than 4"); |
| 48416 | return DAG.getNode(NewOpcode, dl, {VT, MVT::Other}, |
| 48417 | {N->getOperand(0), A, B, C}); |
| 48418 | } else { |
| 48419 | if (N->getNumOperands() == 4) |
| 48420 | return DAG.getNode(NewOpcode, dl, VT, A, B, C, N->getOperand(3)); |
| 48421 | return DAG.getNode(NewOpcode, dl, VT, A, B, C); |
| 48422 | } |
| 48423 | } |
| 48424 | |
| 48425 | |
| 48426 | |
| 48427 | static SDValue combineFMADDSUB(SDNode *N, SelectionDAG &DAG, |
| 48428 | TargetLowering::DAGCombinerInfo &DCI) { |
| 48429 | SDLoc dl(N); |
| 48430 | EVT VT = N->getValueType(0); |
| 48431 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 48432 | bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize(); |
| 48433 | bool LegalOperations = !DCI.isBeforeLegalizeOps(); |
| 48434 | |
| 48435 | SDValue N2 = N->getOperand(2); |
| 48436 | |
| 48437 | SDValue NegN2 = |
| 48438 | TLI.getCheaperNegatedExpression(N2, DAG, LegalOperations, CodeSize); |
| 48439 | if (!NegN2) |
| 48440 | return SDValue(); |
| 48441 | unsigned NewOpcode = negateFMAOpcode(N->getOpcode(), false, true, false); |
| 48442 | |
| 48443 | if (N->getNumOperands() == 4) |
| 48444 | return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), |
| 48445 | NegN2, N->getOperand(3)); |
| 48446 | return DAG.getNode(NewOpcode, dl, VT, N->getOperand(0), N->getOperand(1), |
| 48447 | NegN2); |
| 48448 | } |
| 48449 | |
| 48450 | static SDValue combineZext(SDNode *N, SelectionDAG &DAG, |
| 48451 | TargetLowering::DAGCombinerInfo &DCI, |
| 48452 | const X86Subtarget &Subtarget) { |
| 48453 | SDLoc dl(N); |
| 48454 | SDValue N0 = N->getOperand(0); |
| 48455 | EVT VT = N->getValueType(0); |
| 48456 | |
| 48457 | |
| 48458 | |
| 48459 | if (!DCI.isBeforeLegalizeOps() && N->getOpcode() == ISD::ANY_EXTEND && |
| 48460 | N0.getOpcode() == X86ISD::SETCC_CARRY) { |
| 48461 | SDValue Setcc = DAG.getNode(X86ISD::SETCC_CARRY, dl, VT, N0->getOperand(0), |
| 48462 | N0->getOperand(1)); |
| 48463 | bool ReplaceOtherUses = !N0.hasOneUse(); |
| 48464 | DCI.CombineTo(N, Setcc); |
| 48465 | |
| 48466 | if (ReplaceOtherUses) { |
| 48467 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), |
| 48468 | N0.getValueType(), Setcc); |
| 48469 | DCI.CombineTo(N0.getNode(), Trunc); |
| 48470 | } |
| 48471 | |
| 48472 | return SDValue(N, 0); |
| 48473 | } |
| 48474 | |
| 48475 | if (SDValue NewCMov = combineToExtendCMOV(N, DAG)) |
| 48476 | return NewCMov; |
| 48477 | |
| 48478 | if (DCI.isBeforeLegalizeOps()) |
| 48479 | if (SDValue V = combineExtSetcc(N, DAG, Subtarget)) |
| 48480 | return V; |
| 48481 | |
| 48482 | if (SDValue V = combineToExtendBoolVectorInReg(N, DAG, DCI, Subtarget)) |
| 48483 | return V; |
| 48484 | |
| 48485 | if (VT.isVector()) |
| 48486 | if (SDValue R = PromoteMaskArithmetic(N, DAG, Subtarget)) |
| 48487 | return R; |
| 48488 | |
| 48489 | if (SDValue NewAdd = promoteExtBeforeAdd(N, DAG, Subtarget)) |
| 48490 | return NewAdd; |
| 48491 | |
| 48492 | if (SDValue R = combineOrCmpEqZeroToCtlzSrl(N, DAG, DCI, Subtarget)) |
| 48493 | return R; |
| 48494 | |
| 48495 | |
| 48496 | if (N0.getOpcode() == X86ISD::PACKUS && N0.getValueSizeInBits() == 128 && |
| 48497 | VT.getScalarSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits()) { |
| 48498 | SDValue N00 = N0.getOperand(0); |
| 48499 | SDValue N01 = N0.getOperand(1); |
| 48500 | unsigned NumSrcEltBits = N00.getScalarValueSizeInBits(); |
| 48501 | APInt ZeroMask = APInt::getHighBitsSet(NumSrcEltBits, NumSrcEltBits / 2); |
| 48502 | if ((N00.isUndef() || DAG.MaskedValueIsZero(N00, ZeroMask)) && |
| 48503 | (N01.isUndef() || DAG.MaskedValueIsZero(N01, ZeroMask))) { |
| 48504 | return concatSubVectors(N00, N01, DAG, dl); |
| 48505 | } |
| 48506 | } |
| 48507 | |
| 48508 | return SDValue(); |
| 48509 | } |
| 48510 | |
| 48511 | |
| 48512 | |
| 48513 | static bool isOrXorXorTree(SDValue X, bool Root = true) { |
| 48514 | if (X.getOpcode() == ISD::OR) |
| 48515 | return isOrXorXorTree(X.getOperand(0), false) && |
| 48516 | isOrXorXorTree(X.getOperand(1), false); |
| 48517 | if (Root) |
| 48518 | return false; |
| 48519 | return X.getOpcode() == ISD::XOR; |
| 48520 | } |
| 48521 | |
| 48522 | |
| 48523 | |
| 48524 | template<typename F> |
| 48525 | static SDValue emitOrXorXorTree(SDValue X, SDLoc &DL, SelectionDAG &DAG, |
| 48526 | EVT VecVT, EVT CmpVT, bool HasPT, F SToV) { |
| 48527 | SDValue Op0 = X.getOperand(0); |
| 48528 | SDValue Op1 = X.getOperand(1); |
| 48529 | if (X.getOpcode() == ISD::OR) { |
| 48530 | SDValue A = emitOrXorXorTree(Op0, DL, DAG, VecVT, CmpVT, HasPT, SToV); |
| 48531 | SDValue B = emitOrXorXorTree(Op1, DL, DAG, VecVT, CmpVT, HasPT, SToV); |
| 48532 | if (VecVT != CmpVT) |
| 48533 | return DAG.getNode(ISD::OR, DL, CmpVT, A, B); |
| 48534 | if (HasPT) |
| 48535 | return DAG.getNode(ISD::OR, DL, VecVT, A, B); |
| 48536 | return DAG.getNode(ISD::AND, DL, CmpVT, A, B); |
| 48537 | } else if (X.getOpcode() == ISD::XOR) { |
| 48538 | SDValue A = SToV(Op0); |
| 48539 | SDValue B = SToV(Op1); |
| 48540 | if (VecVT != CmpVT) |
| 48541 | return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETNE); |
| 48542 | if (HasPT) |
| 48543 | return DAG.getNode(ISD::XOR, DL, VecVT, A, B); |
| 48544 | return DAG.getSetCC(DL, CmpVT, A, B, ISD::SETEQ); |
| 48545 | } |
| 48546 | llvm_unreachable("Impossible"); |
| 48547 | } |
| 48548 | |
| 48549 | |
| 48550 | |
| 48551 | static SDValue combineVectorSizedSetCCEquality(SDNode *SetCC, SelectionDAG &DAG, |
| 48552 | const X86Subtarget &Subtarget) { |
| 48553 | ISD::CondCode CC = cast<CondCodeSDNode>(SetCC->getOperand(2))->get(); |
| 48554 | assert((CC == ISD::SETNE || CC == ISD::SETEQ) && "Bad comparison predicate"); |
| 48555 | |
| 48556 | |
| 48557 | SDValue X = SetCC->getOperand(0); |
| 48558 | SDValue Y = SetCC->getOperand(1); |
| 48559 | EVT OpVT = X.getValueType(); |
| 48560 | unsigned OpSize = OpVT.getSizeInBits(); |
| 48561 | if (!OpVT.isScalarInteger() || OpSize < 128) |
| 48562 | return SDValue(); |
| 48563 | |
| 48564 | |
| 48565 | |
| 48566 | |
| 48567 | |
| 48568 | |
| 48569 | bool IsOrXorXorTreeCCZero = isNullConstant(Y) && isOrXorXorTree(X); |
| 48570 | if (isNullConstant(Y) && !IsOrXorXorTreeCCZero) |
| 48571 | return SDValue(); |
| 48572 | |
| 48573 | |
| 48574 | auto IsVectorBitCastCheap = [](SDValue X) { |
| 48575 | X = peekThroughBitcasts(X); |
| 48576 | return isa<ConstantSDNode>(X) || X.getValueType().isVector() || |
| 48577 | X.getOpcode() == ISD::LOAD; |
| 48578 | }; |
| 48579 | if ((!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y)) && |
| 48580 | !IsOrXorXorTreeCCZero) |
| 48581 | return SDValue(); |
| 48582 | |
| 48583 | EVT VT = SetCC->getValueType(0); |
| 48584 | SDLoc DL(SetCC); |
| 48585 | |
| 48586 | |
| 48587 | |
| 48588 | |
| 48589 | if ((OpSize == 128 && Subtarget.hasSSE2()) || |
| 48590 | (OpSize == 256 && Subtarget.hasAVX()) || |
| 48591 | (OpSize == 512 && Subtarget.useAVX512Regs())) { |
| 48592 | bool HasPT = Subtarget.hasSSE41(); |
| 48593 | |
| 48594 | |
| 48595 | |
| 48596 | |
| 48597 | bool PreferKOT = Subtarget.preferMaskRegisters(); |
| 48598 | bool NeedZExt = PreferKOT && !Subtarget.hasVLX() && OpSize != 512; |
| 48599 | |
| 48600 | EVT VecVT = MVT::v16i8; |
| 48601 | EVT CmpVT = PreferKOT ? MVT::v16i1 : VecVT; |
| 48602 | if (OpSize == 256) { |
| 48603 | VecVT = MVT::v32i8; |
| 48604 | CmpVT = PreferKOT ? MVT::v32i1 : VecVT; |
| 48605 | } |
| 48606 | EVT CastVT = VecVT; |
| 48607 | bool NeedsAVX512FCast = false; |
| 48608 | if (OpSize == 512 || NeedZExt) { |
| 48609 | if (Subtarget.hasBWI()) { |
| 48610 | VecVT = MVT::v64i8; |
| 48611 | CmpVT = MVT::v64i1; |
| 48612 | if (OpSize == 512) |
| 48613 | CastVT = VecVT; |
| 48614 | } else { |
| 48615 | VecVT = MVT::v16i32; |
| 48616 | CmpVT = MVT::v16i1; |
| 48617 | CastVT = OpSize == 512 ? VecVT : |
| 48618 | OpSize == 256 ? MVT::v8i32 : MVT::v4i32; |
| 48619 | NeedsAVX512FCast = true; |
| 48620 | } |
| 48621 | } |
| 48622 | |
| 48623 | auto ScalarToVector = [&](SDValue X) -> SDValue { |
| 48624 | bool TmpZext = false; |
| 48625 | EVT TmpCastVT = CastVT; |
| 48626 | if (X.getOpcode() == ISD::ZERO_EXTEND) { |
| 48627 | SDValue OrigX = X.getOperand(0); |
| 48628 | unsigned OrigSize = OrigX.getScalarValueSizeInBits(); |
| 48629 | if (OrigSize < OpSize) { |
| 48630 | if (OrigSize == 128) { |
| 48631 | TmpCastVT = NeedsAVX512FCast ? MVT::v4i32 : MVT::v16i8; |
| 48632 | X = OrigX; |
| 48633 | TmpZext = true; |
| 48634 | } else if (OrigSize == 256) { |
| 48635 | TmpCastVT = NeedsAVX512FCast ? MVT::v8i32 : MVT::v32i8; |
| 48636 | X = OrigX; |
| 48637 | TmpZext = true; |
| 48638 | } |
| 48639 | } |
| 48640 | } |
| 48641 | X = DAG.getBitcast(TmpCastVT, X); |
| 48642 | if (!NeedZExt && !TmpZext) |
| 48643 | return X; |
| 48644 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, |
| 48645 | DAG.getConstant(0, DL, VecVT), X, |
| 48646 | DAG.getVectorIdxConstant(0, DL)); |
| 48647 | }; |
| 48648 | |
| 48649 | SDValue Cmp; |
| 48650 | if (IsOrXorXorTreeCCZero) { |
| 48651 | |
| 48652 | |
| 48653 | |
| 48654 | |
| 48655 | Cmp = emitOrXorXorTree(X, DL, DAG, VecVT, CmpVT, HasPT, ScalarToVector); |
| 48656 | } else { |
| 48657 | SDValue VecX = ScalarToVector(X); |
| 48658 | SDValue VecY = ScalarToVector(Y); |
| 48659 | if (VecVT != CmpVT) { |
| 48660 | Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETNE); |
| 48661 | } else if (HasPT) { |
| 48662 | Cmp = DAG.getNode(ISD::XOR, DL, VecVT, VecX, VecY); |
| 48663 | } else { |
| 48664 | Cmp = DAG.getSetCC(DL, CmpVT, VecX, VecY, ISD::SETEQ); |
| 48665 | } |
| 48666 | } |
| 48667 | |
| 48668 | if (VecVT != CmpVT) { |
| 48669 | EVT KRegVT = CmpVT == MVT::v64i1 ? MVT::i64 : |
| 48670 | CmpVT == MVT::v32i1 ? MVT::i32 : MVT::i16; |
| 48671 | return DAG.getSetCC(DL, VT, DAG.getBitcast(KRegVT, Cmp), |
| 48672 | DAG.getConstant(0, DL, KRegVT), CC); |
| 48673 | } |
| 48674 | if (HasPT) { |
| 48675 | SDValue BCCmp = DAG.getBitcast(OpSize == 256 ? MVT::v4i64 : MVT::v2i64, |
| 48676 | Cmp); |
| 48677 | SDValue PT = DAG.getNode(X86ISD::PTEST, DL, MVT::i32, BCCmp, BCCmp); |
| 48678 | X86::CondCode X86CC = CC == ISD::SETEQ ? X86::COND_E : X86::COND_NE; |
| 48679 | SDValue X86SetCC = getSETCC(X86CC, PT, DL, DAG); |
| 48680 | return DAG.getNode(ISD::TRUNCATE, DL, VT, X86SetCC.getValue(0)); |
| 48681 | } |
| 48682 | |
| 48683 | |
| 48684 | |
| 48685 | assert(Cmp.getValueType() == MVT::v16i8 && |
| 48686 | "Non 128-bit vector on pre-SSE41 target"); |
| 48687 | SDValue MovMsk = DAG.getNode(X86ISD::MOVMSK, DL, MVT::i32, Cmp); |
| 48688 | SDValue FFFFs = DAG.getConstant(0xFFFF, DL, MVT::i32); |
| 48689 | return DAG.getSetCC(DL, VT, MovMsk, FFFFs, CC); |
| 48690 | } |
| 48691 | |
| 48692 | return SDValue(); |
| 48693 | } |
| 48694 | |
| 48695 | static SDValue combineSetCC(SDNode *N, SelectionDAG &DAG, |
| 48696 | TargetLowering::DAGCombinerInfo &DCI, |
| 48697 | const X86Subtarget &Subtarget) { |
| 48698 | const ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get(); |
| 48699 | const SDValue LHS = N->getOperand(0); |
| 48700 | const SDValue RHS = N->getOperand(1); |
| 48701 | EVT VT = N->getValueType(0); |
| 48702 | EVT OpVT = LHS.getValueType(); |
| 48703 | SDLoc DL(N); |
| 48704 | |
| 48705 | if (CC == ISD::SETNE || CC == ISD::SETEQ) { |
| 48706 | if (SDValue V = combineVectorSizedSetCCEquality(N, DAG, Subtarget)) |
| 48707 | return V; |
| 48708 | |
| 48709 | if (VT == MVT::i1 && isNullConstant(RHS)) { |
| 48710 | SDValue X86CC; |
| 48711 | if (SDValue V = |
| 48712 | MatchVectorAllZeroTest(LHS, CC, DL, Subtarget, DAG, X86CC)) |
| 48713 | return DAG.getNode(ISD::TRUNCATE, DL, VT, |
| 48714 | DAG.getNode(X86ISD::SETCC, DL, MVT::i8, X86CC, V)); |
| 48715 | } |
| 48716 | |
| 48717 | if (OpVT.isScalarInteger()) { |
| 48718 | |
| 48719 | |
| 48720 | auto MatchOrCmpEq = [&](SDValue N0, SDValue N1) { |
| 48721 | if (N0.getOpcode() == ISD::OR && N0->hasOneUse()) { |
| 48722 | if (N0.getOperand(0) == N1) |
| 48723 | return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), |
| 48724 | N0.getOperand(1)); |
| 48725 | if (N0.getOperand(1) == N1) |
| 48726 | return DAG.getNode(ISD::AND, DL, OpVT, DAG.getNOT(DL, N1, OpVT), |
| 48727 | N0.getOperand(0)); |
| 48728 | } |
| 48729 | return SDValue(); |
| 48730 | }; |
| 48731 | if (SDValue AndN = MatchOrCmpEq(LHS, RHS)) |
| 48732 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
| 48733 | if (SDValue AndN = MatchOrCmpEq(RHS, LHS)) |
| 48734 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
| 48735 | |
| 48736 | |
| 48737 | |
| 48738 | auto MatchAndCmpEq = [&](SDValue N0, SDValue N1) { |
| 48739 | if (N0.getOpcode() == ISD::AND && N0->hasOneUse()) { |
| 48740 | if (N0.getOperand(0) == N1) |
| 48741 | return DAG.getNode(ISD::AND, DL, OpVT, N1, |
| 48742 | DAG.getNOT(DL, N0.getOperand(1), OpVT)); |
| 48743 | if (N0.getOperand(1) == N1) |
| 48744 | return DAG.getNode(ISD::AND, DL, OpVT, N1, |
| 48745 | DAG.getNOT(DL, N0.getOperand(0), OpVT)); |
| 48746 | } |
| 48747 | return SDValue(); |
| 48748 | }; |
| 48749 | if (SDValue AndN = MatchAndCmpEq(LHS, RHS)) |
| 48750 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
| 48751 | if (SDValue AndN = MatchAndCmpEq(RHS, LHS)) |
| 48752 | return DAG.getSetCC(DL, VT, AndN, DAG.getConstant(0, DL, OpVT), CC); |
| 48753 | |
| 48754 | |
| 48755 | |
| 48756 | |
| 48757 | |
| 48758 | if (LHS.getOpcode() == ISD::TRUNCATE && |
| 48759 | LHS.getOperand(0).getScalarValueSizeInBits() >= 32 && |
| 48760 | isNullConstant(RHS) && !DCI.isBeforeLegalize()) { |
| 48761 | EVT SrcVT = LHS.getOperand(0).getValueType(); |
| 48762 | APInt UpperBits = APInt::getBitsSetFrom(SrcVT.getScalarSizeInBits(), |
| 48763 | OpVT.getScalarSizeInBits()); |
| 48764 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 48765 | if (DAG.MaskedValueIsZero(LHS.getOperand(0), UpperBits) && |
| 48766 | TLI.isTypeLegal(LHS.getOperand(0).getValueType())) |
| 48767 | return DAG.getSetCC(DL, VT, LHS.getOperand(0), |
| 48768 | DAG.getConstant(0, DL, SrcVT), CC); |
| 48769 | } |
| 48770 | } |
| 48771 | } |
| 48772 | |
| 48773 | if (VT.isVector() && VT.getVectorElementType() == MVT::i1 && |
| 48774 | (CC == ISD::SETNE || CC == ISD::SETEQ || ISD::isSignedIntSetCC(CC))) { |
| 48775 | |
| 48776 | |
| 48777 | SDValue Op0 = LHS; |
| 48778 | SDValue Op1 = RHS; |
| 48779 | ISD::CondCode TmpCC = CC; |
| 48780 | |
| 48781 | if (Op0.getOpcode() == ISD::BUILD_VECTOR) { |
| 48782 | std::swap(Op0, Op1); |
| 48783 | TmpCC = ISD::getSetCCSwappedOperands(TmpCC); |
| 48784 | } |
| 48785 | |
| 48786 | bool IsSEXT0 = |
| 48787 | (Op0.getOpcode() == ISD::SIGN_EXTEND) && |
| 48788 | (Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1); |
| 48789 | bool IsVZero1 = ISD::isBuildVectorAllZeros(Op1.getNode()); |
| 48790 | |
| 48791 | if (IsSEXT0 && IsVZero1) { |
| 48792 | assert(VT == Op0.getOperand(0).getValueType() && |
| 48793 | "Unexpected operand type"); |
| 48794 | if (TmpCC == ISD::SETGT) |
| 48795 | return DAG.getConstant(0, DL, VT); |
| 48796 | if (TmpCC == ISD::SETLE) |
| 48797 | return DAG.getConstant(1, DL, VT); |
| 48798 | if (TmpCC == ISD::SETEQ || TmpCC == ISD::SETGE) |
| 48799 | return DAG.getNOT(DL, Op0.getOperand(0), VT); |
| 48800 | |
| 48801 | assert((TmpCC == ISD::SETNE || TmpCC == ISD::SETLT) && |
| 48802 | "Unexpected condition code!"); |
| 48803 | return Op0.getOperand(0); |
| 48804 | } |
| 48805 | } |
| 48806 | |
| 48807 | |
| 48808 | |
| 48809 | |
| 48810 | |
| 48811 | |
| 48812 | if (Subtarget.hasAVX512() && !Subtarget.hasBWI() && VT.isVector() && |
| 48813 | VT.getVectorElementType() == MVT::i1 && |
| 48814 | (OpVT.getVectorElementType() == MVT::i8 || |
| 48815 | OpVT.getVectorElementType() == MVT::i16)) { |
| 48816 | SDValue Setcc = DAG.getSetCC(DL, OpVT, LHS, RHS, CC); |
| 48817 | return DAG.getNode(ISD::TRUNCATE, DL, VT, Setcc); |
| 48818 | } |
| 48819 | |
| 48820 | |
| 48821 | |
| 48822 | if (Subtarget.hasSSE1() && !Subtarget.hasSSE2() && VT == MVT::v4i32 && |
| 48823 | LHS.getValueType() == MVT::v4f32) |
| 48824 | return LowerVSETCC(SDValue(N, 0), Subtarget, DAG); |
| 48825 | |
| 48826 | return SDValue(); |
| 48827 | } |
| 48828 | |
| 48829 | static SDValue combineMOVMSK(SDNode *N, SelectionDAG &DAG, |
| 48830 | TargetLowering::DAGCombinerInfo &DCI, |
| 48831 | const X86Subtarget &Subtarget) { |
| 48832 | SDValue Src = N->getOperand(0); |
| 48833 | MVT SrcVT = Src.getSimpleValueType(); |
| 48834 | MVT VT = N->getSimpleValueType(0); |
| 48835 | unsigned NumBits = VT.getScalarSizeInBits(); |
| 48836 | unsigned NumElts = SrcVT.getVectorNumElements(); |
| 48837 | |
| 48838 | |
| 48839 | if (ISD::isBuildVectorOfConstantSDNodes(Src.getNode())) { |
| 48840 | assert(VT == MVT::i32 && "Unexpected result type"); |
| 48841 | APInt Imm(32, 0); |
| 48842 | for (unsigned Idx = 0, e = Src.getNumOperands(); Idx < e; ++Idx) { |
| 48843 | if (!Src.getOperand(Idx).isUndef() && |
| 48844 | Src.getConstantOperandAPInt(Idx).isNegative()) |
| 48845 | Imm.setBit(Idx); |
| 48846 | } |
| 48847 | return DAG.getConstant(Imm, SDLoc(N), VT); |
| 48848 | } |
| 48849 | |
| 48850 | |
| 48851 | unsigned EltWidth = SrcVT.getScalarSizeInBits(); |
| 48852 | if (Subtarget.hasSSE2() && Src.getOpcode() == ISD::BITCAST && |
| 48853 | Src.getOperand(0).getScalarValueSizeInBits() == EltWidth) |
| 48854 | return DAG.getNode(X86ISD::MOVMSK, SDLoc(N), VT, Src.getOperand(0)); |
| 48855 | |
| 48856 | |
| 48857 | |
| 48858 | if (SDValue NotSrc = IsNOT(Src, DAG)) { |
| 48859 | SDLoc DL(N); |
| 48860 | APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); |
| 48861 | NotSrc = DAG.getBitcast(SrcVT, NotSrc); |
| 48862 | return DAG.getNode(ISD::XOR, DL, VT, |
| 48863 | DAG.getNode(X86ISD::MOVMSK, DL, VT, NotSrc), |
| 48864 | DAG.getConstant(NotMask, DL, VT)); |
| 48865 | } |
| 48866 | |
| 48867 | |
| 48868 | |
| 48869 | if (Src.getOpcode() == X86ISD::PCMPGT && |
| 48870 | ISD::isBuildVectorAllOnes(Src.getOperand(1).getNode())) { |
| 48871 | SDLoc DL(N); |
| 48872 | APInt NotMask = APInt::getLowBitsSet(NumBits, NumElts); |
| 48873 | return DAG.getNode(ISD::XOR, DL, VT, |
| 48874 | DAG.getNode(X86ISD::MOVMSK, DL, VT, Src.getOperand(0)), |
| 48875 | DAG.getConstant(NotMask, DL, VT)); |
| 48876 | } |
| 48877 | |
| 48878 | |
| 48879 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 48880 | APInt DemandedMask(APInt::getAllOnesValue(NumBits)); |
| 48881 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), DemandedMask, DCI)) |
| 48882 | return SDValue(N, 0); |
| 48883 | |
| 48884 | return SDValue(); |
| 48885 | } |
| 48886 | |
| 48887 | static SDValue combineX86GatherScatter(SDNode *N, SelectionDAG &DAG, |
| 48888 | TargetLowering::DAGCombinerInfo &DCI) { |
| 48889 | |
| 48890 | SDValue Mask = cast<X86MaskedGatherScatterSDNode>(N)->getMask(); |
| 48891 | if (Mask.getScalarValueSizeInBits() != 1) { |
| 48892 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 48893 | APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); |
| 48894 | if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { |
| 48895 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 48896 | DCI.AddToWorklist(N); |
| 48897 | return SDValue(N, 0); |
| 48898 | } |
| 48899 | } |
| 48900 | |
| 48901 | return SDValue(); |
| 48902 | } |
| 48903 | |
| 48904 | static SDValue rebuildGatherScatter(MaskedGatherScatterSDNode *GorS, |
| 48905 | SDValue Index, SDValue Base, SDValue Scale, |
| 48906 | SelectionDAG &DAG) { |
| 48907 | SDLoc DL(GorS); |
| 48908 | |
| 48909 | if (auto *Gather = dyn_cast<MaskedGatherSDNode>(GorS)) { |
| 48910 | SDValue Ops[] = { Gather->getChain(), Gather->getPassThru(), |
| 48911 | Gather->getMask(), Base, Index, Scale } ; |
| 48912 | return DAG.getMaskedGather(Gather->getVTList(), |
| 48913 | Gather->getMemoryVT(), DL, Ops, |
| 48914 | Gather->getMemOperand(), |
| 48915 | Gather->getIndexType(), |
| 48916 | Gather->getExtensionType()); |
| 48917 | } |
| 48918 | auto *Scatter = cast<MaskedScatterSDNode>(GorS); |
| 48919 | SDValue Ops[] = { Scatter->getChain(), Scatter->getValue(), |
| 48920 | Scatter->getMask(), Base, Index, Scale }; |
| 48921 | return DAG.getMaskedScatter(Scatter->getVTList(), |
| 48922 | Scatter->getMemoryVT(), DL, |
| 48923 | Ops, Scatter->getMemOperand(), |
| 48924 | Scatter->getIndexType(), |
| 48925 | Scatter->isTruncatingStore()); |
| 48926 | } |
| 48927 | |
| 48928 | static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, |
| 48929 | TargetLowering::DAGCombinerInfo &DCI) { |
| 48930 | SDLoc DL(N); |
| 48931 | auto *GorS = cast<MaskedGatherScatterSDNode>(N); |
| 48932 | SDValue Index = GorS->getIndex(); |
| 48933 | SDValue Base = GorS->getBasePtr(); |
| 48934 | SDValue Scale = GorS->getScale(); |
| 48935 | |
| 48936 | if (DCI.isBeforeLegalize()) { |
| 48937 | unsigned IndexWidth = Index.getScalarValueSizeInBits(); |
| 48938 | |
| 48939 | |
| 48940 | |
| 48941 | |
| 48942 | |
| 48943 | |
| 48944 | |
| 48945 | |
| 48946 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Index)) { |
| 48947 | if (BV->isConstant() && IndexWidth > 32 && |
| 48948 | DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { |
| 48949 | EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32); |
| 48950 | Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); |
| 48951 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
| 48952 | } |
| 48953 | } |
| 48954 | |
| 48955 | |
| 48956 | |
| 48957 | |
| 48958 | if ((Index.getOpcode() == ISD::SIGN_EXTEND || |
| 48959 | Index.getOpcode() == ISD::ZERO_EXTEND) && |
| 48960 | IndexWidth > 32 && |
| 48961 | Index.getOperand(0).getScalarValueSizeInBits() <= 32 && |
| 48962 | DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { |
| 48963 | EVT NewVT = Index.getValueType().changeVectorElementType(MVT::i32); |
| 48964 | Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); |
| 48965 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
| 48966 | } |
| 48967 | } |
| 48968 | |
| 48969 | if (DCI.isBeforeLegalizeOps()) { |
| 48970 | unsigned IndexWidth = Index.getScalarValueSizeInBits(); |
| 48971 | |
| 48972 | |
| 48973 | if (IndexWidth != 32 && IndexWidth != 64) { |
| 48974 | MVT EltVT = IndexWidth > 32 ? MVT::i64 : MVT::i32; |
| 48975 | EVT IndexVT = Index.getValueType().changeVectorElementType(EltVT); |
| 48976 | Index = DAG.getSExtOrTrunc(Index, DL, IndexVT); |
| 48977 | return rebuildGatherScatter(GorS, Index, Base, Scale, DAG); |
| 48978 | } |
| 48979 | } |
| 48980 | |
| 48981 | |
| 48982 | SDValue Mask = GorS->getMask(); |
| 48983 | if (Mask.getScalarValueSizeInBits() != 1) { |
| 48984 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 48985 | APInt DemandedMask(APInt::getSignMask(Mask.getScalarValueSizeInBits())); |
| 48986 | if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) { |
| 48987 | if (N->getOpcode() != ISD::DELETED_NODE) |
| 48988 | DCI.AddToWorklist(N); |
| 48989 | return SDValue(N, 0); |
| 48990 | } |
| 48991 | } |
| 48992 | |
| 48993 | return SDValue(); |
| 48994 | } |
| 48995 | |
| 48996 | |
| 48997 | static SDValue combineX86SetCC(SDNode *N, SelectionDAG &DAG, |
| 48998 | const X86Subtarget &Subtarget) { |
| 48999 | SDLoc DL(N); |
| 49000 | X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(0)); |
| 49001 | SDValue EFLAGS = N->getOperand(1); |
| 49002 | |
| 49003 | |
| 49004 | if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) |
| 49005 | return getSETCC(CC, Flags, DL, DAG); |
| 49006 | |
| 49007 | return SDValue(); |
| 49008 | } |
| 49009 | |
| 49010 | |
| 49011 | static SDValue combineBrCond(SDNode *N, SelectionDAG &DAG, |
| 49012 | const X86Subtarget &Subtarget) { |
| 49013 | SDLoc DL(N); |
| 49014 | SDValue EFLAGS = N->getOperand(3); |
| 49015 | X86::CondCode CC = X86::CondCode(N->getConstantOperandVal(2)); |
| 49016 | |
| 49017 | |
| 49018 | |
| 49019 | |
| 49020 | if (SDValue Flags = combineSetCCEFLAGS(EFLAGS, CC, DAG, Subtarget)) { |
| 49021 | SDValue Cond = DAG.getTargetConstant(CC, DL, MVT::i8); |
| 49022 | return DAG.getNode(X86ISD::BRCOND, DL, N->getVTList(), N->getOperand(0), |
| 49023 | N->getOperand(1), Cond, Flags); |
| 49024 | } |
| 49025 | |
| 49026 | return SDValue(); |
| 49027 | } |
| 49028 | |
| 49029 | |
| 49030 | static SDValue combineVectorCompareAndMaskUnaryOp(SDNode *N, |
| 49031 | SelectionDAG &DAG) { |
| 49032 | |
| 49033 | |
| 49034 | |
| 49035 | |
| 49036 | |
| 49037 | |
| 49038 | |
| 49039 | |
| 49040 | |
| 49041 | |
| 49042 | |
| 49043 | EVT VT = N->getValueType(0); |
| 49044 | bool IsStrict = N->isStrictFPOpcode(); |
| 49045 | unsigned NumEltBits = VT.getScalarSizeInBits(); |
| 49046 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
| 49047 | if (!VT.isVector() || Op0.getOpcode() != ISD::AND || |
| 49048 | DAG.ComputeNumSignBits(Op0.getOperand(0)) != NumEltBits || |
| 49049 | VT.getSizeInBits() != Op0.getValueSizeInBits()) |
| 49050 | return SDValue(); |
| 49051 | |
| 49052 | |
| 49053 | |
| 49054 | |
| 49055 | |
| 49056 | if (auto *BV = dyn_cast<BuildVectorSDNode>(Op0.getOperand(1))) { |
| 49057 | |
| 49058 | if (!BV->isConstant()) |
| 49059 | return SDValue(); |
| 49060 | |
| 49061 | |
| 49062 | SDLoc DL(N); |
| 49063 | EVT IntVT = BV->getValueType(0); |
| 49064 | |
| 49065 | |
| 49066 | SDValue SourceConst; |
| 49067 | if (IsStrict) |
| 49068 | SourceConst = DAG.getNode(N->getOpcode(), DL, {VT, MVT::Other}, |
| 49069 | {N->getOperand(0), SDValue(BV, 0)}); |
| 49070 | else |
| 49071 | SourceConst = DAG.getNode(N->getOpcode(), DL, VT, SDValue(BV, 0)); |
| 49072 | |
| 49073 | SDValue MaskConst = DAG.getBitcast(IntVT, SourceConst); |
| 49074 | SDValue NewAnd = DAG.getNode(ISD::AND, DL, IntVT, Op0->getOperand(0), |
| 49075 | MaskConst); |
| 49076 | SDValue Res = DAG.getBitcast(VT, NewAnd); |
| 49077 | if (IsStrict) |
| 49078 | return DAG.getMergeValues({Res, SourceConst.getValue(1)}, DL); |
| 49079 | return Res; |
| 49080 | } |
| 49081 | |
| 49082 | return SDValue(); |
| 49083 | } |
| 49084 | |
| 49085 | |
| 49086 | |
| 49087 | |
| 49088 | static SDValue combineToFPTruncExtElt(SDNode *N, SelectionDAG &DAG) { |
| 49089 | |
| 49090 | |
| 49091 | |
| 49092 | SDValue Trunc = N->getOperand(0); |
| 49093 | if (!Trunc.hasOneUse() || Trunc.getOpcode() != ISD::TRUNCATE) |
| 49094 | return SDValue(); |
| 49095 | |
| 49096 | SDValue ExtElt = Trunc.getOperand(0); |
| 49097 | if (!ExtElt.hasOneUse() || ExtElt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49098 | !isNullConstant(ExtElt.getOperand(1))) |
| 49099 | return SDValue(); |
| 49100 | |
| 49101 | EVT TruncVT = Trunc.getValueType(); |
| 49102 | EVT SrcVT = ExtElt.getValueType(); |
| 49103 | unsigned DestWidth = TruncVT.getSizeInBits(); |
| 49104 | unsigned SrcWidth = SrcVT.getSizeInBits(); |
| 49105 | if (SrcWidth % DestWidth != 0) |
| 49106 | return SDValue(); |
| 49107 | |
| 49108 | |
| 49109 | EVT SrcVecVT = ExtElt.getOperand(0).getValueType(); |
| 49110 | unsigned VecWidth = SrcVecVT.getSizeInBits(); |
| 49111 | unsigned NumElts = VecWidth / DestWidth; |
| 49112 | EVT BitcastVT = EVT::getVectorVT(*DAG.getContext(), TruncVT, NumElts); |
| 49113 | SDValue BitcastVec = DAG.getBitcast(BitcastVT, ExtElt.getOperand(0)); |
| 49114 | SDLoc DL(N); |
| 49115 | SDValue NewExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TruncVT, |
| 49116 | BitcastVec, ExtElt.getOperand(1)); |
| 49117 | return DAG.getNode(N->getOpcode(), DL, N->getValueType(0), NewExtElt); |
| 49118 | } |
| 49119 | |
| 49120 | static SDValue combineUIntToFP(SDNode *N, SelectionDAG &DAG, |
| 49121 | const X86Subtarget &Subtarget) { |
| 49122 | bool IsStrict = N->isStrictFPOpcode(); |
| 49123 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
| 49124 | EVT VT = N->getValueType(0); |
| 49125 | EVT InVT = Op0.getValueType(); |
| 49126 | |
| 49127 | |
| 49128 | |
| 49129 | |
| 49130 | if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) { |
| 49131 | SDLoc dl(N); |
| 49132 | EVT DstVT = InVT.changeVectorElementType(MVT::i32); |
| 49133 | SDValue P = DAG.getNode(ISD::ZERO_EXTEND, dl, DstVT, Op0); |
| 49134 | |
| 49135 | |
| 49136 | if (IsStrict) |
| 49137 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
| 49138 | {N->getOperand(0), P}); |
| 49139 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
| 49140 | } |
| 49141 | |
| 49142 | |
| 49143 | |
| 49144 | |
| 49145 | if (DAG.SignBitIsZero(Op0)) { |
| 49146 | if (IsStrict) |
| 49147 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, SDLoc(N), {VT, MVT::Other}, |
| 49148 | {N->getOperand(0), Op0}); |
| 49149 | return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, Op0); |
| 49150 | } |
| 49151 | |
| 49152 | return SDValue(); |
| 49153 | } |
| 49154 | |
| 49155 | static SDValue combineSIntToFP(SDNode *N, SelectionDAG &DAG, |
| 49156 | TargetLowering::DAGCombinerInfo &DCI, |
| 49157 | const X86Subtarget &Subtarget) { |
| 49158 | |
| 49159 | |
| 49160 | bool IsStrict = N->isStrictFPOpcode(); |
| 49161 | if (SDValue Res = combineVectorCompareAndMaskUnaryOp(N, DAG)) |
| 49162 | return Res; |
| 49163 | |
| 49164 | |
| 49165 | SDValue Op0 = N->getOperand(IsStrict ? 1 : 0); |
| 49166 | EVT VT = N->getValueType(0); |
| 49167 | EVT InVT = Op0.getValueType(); |
| 49168 | |
| 49169 | |
| 49170 | |
| 49171 | |
| 49172 | if (InVT.isVector() && InVT.getScalarSizeInBits() < 32) { |
| 49173 | SDLoc dl(N); |
| 49174 | EVT DstVT = InVT.changeVectorElementType(MVT::i32); |
| 49175 | SDValue P = DAG.getNode(ISD::SIGN_EXTEND, dl, DstVT, Op0); |
| 49176 | if (IsStrict) |
| 49177 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
| 49178 | {N->getOperand(0), P}); |
| 49179 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, P); |
| 49180 | } |
| 49181 | |
| 49182 | |
| 49183 | |
| 49184 | |
| 49185 | if (InVT.getScalarSizeInBits() > 32 && !Subtarget.hasDQI()) { |
| 49186 | unsigned BitWidth = InVT.getScalarSizeInBits(); |
| 49187 | unsigned NumSignBits = DAG.ComputeNumSignBits(Op0); |
| 49188 | if (NumSignBits >= (BitWidth - 31)) { |
| 49189 | EVT TruncVT = MVT::i32; |
| 49190 | if (InVT.isVector()) |
| 49191 | TruncVT = InVT.changeVectorElementType(TruncVT); |
| 49192 | SDLoc dl(N); |
| 49193 | if (DCI.isBeforeLegalize() || TruncVT != MVT::v2i32) { |
| 49194 | SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Op0); |
| 49195 | if (IsStrict) |
| 49196 | return DAG.getNode(ISD::STRICT_SINT_TO_FP, dl, {VT, MVT::Other}, |
| 49197 | {N->getOperand(0), Trunc}); |
| 49198 | return DAG.getNode(ISD::SINT_TO_FP, dl, VT, Trunc); |
| 49199 | } |
| 49200 | |
| 49201 | |
| 49202 | assert(InVT == MVT::v2i64 && "Unexpected VT!"); |
| 49203 | SDValue Cast = DAG.getBitcast(MVT::v4i32, Op0); |
| 49204 | SDValue Shuf = DAG.getVectorShuffle(MVT::v4i32, dl, Cast, Cast, |
| 49205 | { 0, 2, -1, -1 }); |
| 49206 | if (IsStrict) |
| 49207 | return DAG.getNode(X86ISD::STRICT_CVTSI2P, dl, {VT, MVT::Other}, |
| 49208 | {N->getOperand(0), Shuf}); |
| 49209 | return DAG.getNode(X86ISD::CVTSI2P, dl, VT, Shuf); |
| 49210 | } |
| 49211 | } |
| 49212 | |
| 49213 | |
| 49214 | |
| 49215 | if (!Subtarget.useSoftFloat() && Subtarget.hasX87() && |
| 49216 | Op0.getOpcode() == ISD::LOAD) { |
| 49217 | LoadSDNode *Ld = cast<LoadSDNode>(Op0.getNode()); |
| 49218 | |
| 49219 | |
| 49220 | if (VT == MVT::f16 || VT == MVT::f128) |
| 49221 | return SDValue(); |
| 49222 | |
| 49223 | |
| 49224 | |
| 49225 | if (Subtarget.hasDQI() && VT != MVT::f80) |
| 49226 | return SDValue(); |
| 49227 | |
| 49228 | if (Ld->isSimple() && !VT.isVector() && ISD::isNormalLoad(Op0.getNode()) && |
| 49229 | Op0.hasOneUse() && !Subtarget.is64Bit() && InVT == MVT::i64) { |
| 49230 | std::pair<SDValue, SDValue> Tmp = |
| 49231 | Subtarget.getTargetLowering()->BuildFILD( |
| 49232 | VT, InVT, SDLoc(N), Ld->getChain(), Ld->getBasePtr(), |
| 49233 | Ld->getPointerInfo(), Ld->getOriginalAlign(), DAG); |
| 49234 | DAG.ReplaceAllUsesOfValueWith(Op0.getValue(1), Tmp.second); |
| 49235 | return Tmp.first; |
| 49236 | } |
| 49237 | } |
| 49238 | |
| 49239 | if (IsStrict) |
| 49240 | return SDValue(); |
| 49241 | |
| 49242 | if (SDValue V = combineToFPTruncExtElt(N, DAG)) |
| 49243 | return V; |
| 49244 | |
| 49245 | return SDValue(); |
| 49246 | } |
| 49247 | |
| 49248 | static bool needCarryOrOverflowFlag(SDValue Flags) { |
| 49249 | assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); |
| 49250 | |
| 49251 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
| 49252 | UI != UE; ++UI) { |
| 49253 | SDNode *User = *UI; |
| 49254 | |
| 49255 | X86::CondCode CC; |
| 49256 | switch (User->getOpcode()) { |
| 49257 | default: |
| 49258 | |
| 49259 | return true; |
| 49260 | case X86ISD::SETCC: |
| 49261 | case X86ISD::SETCC_CARRY: |
| 49262 | CC = (X86::CondCode)User->getConstantOperandVal(0); |
| 49263 | break; |
| 49264 | case X86ISD::BRCOND: |
| 49265 | CC = (X86::CondCode)User->getConstantOperandVal(2); |
| 49266 | break; |
| 49267 | case X86ISD::CMOV: |
| 49268 | CC = (X86::CondCode)User->getConstantOperandVal(2); |
| 49269 | break; |
| 49270 | } |
| 49271 | |
| 49272 | switch (CC) { |
| 49273 | default: break; |
| 49274 | case X86::COND_A: case X86::COND_AE: |
| 49275 | case X86::COND_B: case X86::COND_BE: |
| 49276 | case X86::COND_O: case X86::COND_NO: |
| 49277 | case X86::COND_G: case X86::COND_GE: |
| 49278 | case X86::COND_L: case X86::COND_LE: |
| 49279 | return true; |
| 49280 | } |
| 49281 | } |
| 49282 | |
| 49283 | return false; |
| 49284 | } |
| 49285 | |
| 49286 | static bool onlyZeroFlagUsed(SDValue Flags) { |
| 49287 | assert(Flags.getValueType() == MVT::i32 && "Unexpected VT!"); |
| 49288 | |
| 49289 | for (SDNode::use_iterator UI = Flags->use_begin(), UE = Flags->use_end(); |
| 49290 | UI != UE; ++UI) { |
| 49291 | SDNode *User = *UI; |
| 49292 | |
| 49293 | unsigned CCOpNo; |
| 49294 | switch (User->getOpcode()) { |
| 49295 | default: |
| 49296 | |
| 49297 | return false; |
| 49298 | case X86ISD::SETCC: CCOpNo = 0; break; |
| 49299 | case X86ISD::SETCC_CARRY: CCOpNo = 0; break; |
| 49300 | case X86ISD::BRCOND: CCOpNo = 2; break; |
| 49301 | case X86ISD::CMOV: CCOpNo = 2; break; |
| 49302 | } |
| 49303 | |
| 49304 | X86::CondCode CC = (X86::CondCode)User->getConstantOperandVal(CCOpNo); |
| 49305 | if (CC != X86::COND_E && CC != X86::COND_NE) |
| 49306 | return false; |
| 49307 | } |
| 49308 | |
| 49309 | return true; |
| 49310 | } |
| 49311 | |
| 49312 | static SDValue combineCMP(SDNode *N, SelectionDAG &DAG) { |
| 49313 | |
| 49314 | if (!isNullConstant(N->getOperand(1))) |
| 49315 | return SDValue(); |
| 49316 | |
| 49317 | |
| 49318 | |
| 49319 | |
| 49320 | |
| 49321 | SDLoc dl(N); |
| 49322 | SDValue Op = N->getOperand(0); |
| 49323 | EVT VT = Op.getValueType(); |
| 49324 | |
| 49325 | |
| 49326 | |
| 49327 | |
| 49328 | if ((Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) && |
| 49329 | Op.hasOneUse() && isa<ConstantSDNode>(Op.getOperand(1)) && |
| 49330 | onlyZeroFlagUsed(SDValue(N, 0))) { |
| 49331 | unsigned BitWidth = VT.getSizeInBits(); |
| 49332 | const APInt &ShAmt = Op.getConstantOperandAPInt(1); |
| 49333 | if (ShAmt.ult(BitWidth)) { |
| 49334 | unsigned MaskBits = BitWidth - ShAmt.getZExtValue(); |
| 49335 | APInt Mask = Op.getOpcode() == ISD::SRL |
| 49336 | ? APInt::getHighBitsSet(BitWidth, MaskBits) |
| 49337 | : APInt::getLowBitsSet(BitWidth, MaskBits); |
| 49338 | if (Mask.isSignedIntN(32)) { |
| 49339 | Op = DAG.getNode(ISD::AND, dl, VT, Op.getOperand(0), |
| 49340 | DAG.getConstant(Mask, dl, VT)); |
| 49341 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
| 49342 | DAG.getConstant(0, dl, VT)); |
| 49343 | } |
| 49344 | } |
| 49345 | } |
| 49346 | |
| 49347 | |
| 49348 | if (Op.getOpcode() != ISD::TRUNCATE) |
| 49349 | return SDValue(); |
| 49350 | |
| 49351 | SDValue Trunc = Op; |
| 49352 | Op = Op.getOperand(0); |
| 49353 | |
| 49354 | |
| 49355 | |
| 49356 | |
| 49357 | EVT OpVT = Op.getValueType(); |
| 49358 | APInt UpperBits = |
| 49359 | APInt::getBitsSetFrom(OpVT.getSizeInBits(), VT.getSizeInBits()); |
| 49360 | if (OpVT == MVT::i32 && DAG.MaskedValueIsZero(Op, UpperBits) && |
| 49361 | onlyZeroFlagUsed(SDValue(N, 0))) { |
| 49362 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
| 49363 | DAG.getConstant(0, dl, OpVT)); |
| 49364 | } |
| 49365 | |
| 49366 | |
| 49367 | if (!Trunc.hasOneUse() || !Op.hasOneUse()) |
| 49368 | return SDValue(); |
| 49369 | |
| 49370 | unsigned NewOpc; |
| 49371 | switch (Op.getOpcode()) { |
| 49372 | default: return SDValue(); |
| 49373 | case ISD::AND: |
| 49374 | |
| 49375 | |
| 49376 | if (isa<ConstantSDNode>(Op.getOperand(1))) |
| 49377 | return SDValue(); |
| 49378 | NewOpc = X86ISD::AND; |
| 49379 | break; |
| 49380 | case ISD::OR: NewOpc = X86ISD::OR; break; |
| 49381 | case ISD::XOR: NewOpc = X86ISD::XOR; break; |
| 49382 | case ISD::ADD: |
| 49383 | |
| 49384 | if (needCarryOrOverflowFlag(SDValue(N, 0))) |
| 49385 | return SDValue(); |
| 49386 | NewOpc = X86ISD::ADD; |
| 49387 | break; |
| 49388 | case ISD::SUB: |
| 49389 | |
| 49390 | if (needCarryOrOverflowFlag(SDValue(N, 0))) |
| 49391 | return SDValue(); |
| 49392 | NewOpc = X86ISD::SUB; |
| 49393 | break; |
| 49394 | } |
| 49395 | |
| 49396 | |
| 49397 | SDValue Op0 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(0)); |
| 49398 | SDValue Op1 = DAG.getNode(ISD::TRUNCATE, dl, VT, Op.getOperand(1)); |
| 49399 | |
| 49400 | |
| 49401 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| 49402 | Op = DAG.getNode(NewOpc, dl, VTs, Op0, Op1); |
| 49403 | |
| 49404 | |
| 49405 | if (NewOpc == X86ISD::AND) |
| 49406 | return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, |
| 49407 | DAG.getConstant(0, dl, VT)); |
| 49408 | |
| 49409 | |
| 49410 | return Op.getValue(1); |
| 49411 | } |
| 49412 | |
| 49413 | static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG, |
| 49414 | TargetLowering::DAGCombinerInfo &DCI) { |
| 49415 | assert((X86ISD::ADD == N->getOpcode() || X86ISD::SUB == N->getOpcode()) && |
| 49416 | "Expected X86ISD::ADD or X86ISD::SUB"); |
| 49417 | |
| 49418 | SDLoc DL(N); |
| 49419 | SDValue LHS = N->getOperand(0); |
| 49420 | SDValue RHS = N->getOperand(1); |
| 49421 | MVT VT = LHS.getSimpleValueType(); |
| 49422 | unsigned GenericOpc = X86ISD::ADD == N->getOpcode() ? ISD::ADD : ISD::SUB; |
| 49423 | |
| 49424 | |
| 49425 | if (!N->hasAnyUseOfValue(1)) { |
| 49426 | SDValue Res = DAG.getNode(GenericOpc, DL, VT, LHS, RHS); |
| 49427 | return DAG.getMergeValues({Res, DAG.getConstant(0, DL, MVT::i32)}, DL); |
| 49428 | } |
| 49429 | |
| 49430 | |
| 49431 | auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) { |
| 49432 | SDValue Ops[] = {N0, N1}; |
| 49433 | SDVTList VTs = DAG.getVTList(N->getValueType(0)); |
| 49434 | if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) { |
| 49435 | SDValue Op(N, 0); |
| 49436 | if (Negate) |
| 49437 | Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op); |
| 49438 | DCI.CombineTo(GenericAddSub, Op); |
| 49439 | } |
| 49440 | }; |
| 49441 | MatchGeneric(LHS, RHS, false); |
| 49442 | MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode()); |
| 49443 | |
| 49444 | return SDValue(); |
| 49445 | } |
| 49446 | |
| 49447 | static SDValue combineSBB(SDNode *N, SelectionDAG &DAG) { |
| 49448 | if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { |
| 49449 | MVT VT = N->getSimpleValueType(0); |
| 49450 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| 49451 | return DAG.getNode(X86ISD::SBB, SDLoc(N), VTs, |
| 49452 | N->getOperand(0), N->getOperand(1), |
| 49453 | Flags); |
| 49454 | } |
| 49455 | |
| 49456 | |
| 49457 | |
| 49458 | SDValue Op0 = N->getOperand(0); |
| 49459 | SDValue Op1 = N->getOperand(1); |
| 49460 | if (Op0.getOpcode() == ISD::SUB && isNullConstant(Op1) && |
| 49461 | !N->hasAnyUseOfValue(1)) |
| 49462 | return DAG.getNode(X86ISD::SBB, SDLoc(N), N->getVTList(), Op0.getOperand(0), |
| 49463 | Op0.getOperand(1), N->getOperand(2)); |
| 49464 | |
| 49465 | return SDValue(); |
| 49466 | } |
| 49467 | |
| 49468 | |
| 49469 | static SDValue combineADC(SDNode *N, SelectionDAG &DAG, |
| 49470 | TargetLowering::DAGCombinerInfo &DCI) { |
| 49471 | |
| 49472 | |
| 49473 | |
| 49474 | if (X86::isZeroNode(N->getOperand(0)) && |
| 49475 | X86::isZeroNode(N->getOperand(1)) && |
| 49476 | |
| 49477 | |
| 49478 | SDValue(N, 1).use_empty()) { |
| 49479 | SDLoc DL(N); |
| 49480 | EVT VT = N->getValueType(0); |
| 49481 | SDValue CarryOut = DAG.getConstant(0, DL, N->getValueType(1)); |
| 49482 | SDValue Res1 = |
| 49483 | DAG.getNode(ISD::AND, DL, VT, |
| 49484 | DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
| 49485 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
| 49486 | N->getOperand(2)), |
| 49487 | DAG.getConstant(1, DL, VT)); |
| 49488 | return DCI.CombineTo(N, Res1, CarryOut); |
| 49489 | } |
| 49490 | |
| 49491 | if (SDValue Flags = combineCarryThroughADD(N->getOperand(2), DAG)) { |
| 49492 | MVT VT = N->getSimpleValueType(0); |
| 49493 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| 49494 | return DAG.getNode(X86ISD::ADC, SDLoc(N), VTs, |
| 49495 | N->getOperand(0), N->getOperand(1), |
| 49496 | Flags); |
| 49497 | } |
| 49498 | |
| 49499 | return SDValue(); |
| 49500 | } |
| 49501 | |
| 49502 | |
| 49503 | |
| 49504 | |
| 49505 | static SDValue combineAddOrSubToADCOrSBB(SDNode *N, SelectionDAG &DAG) { |
| 49506 | bool IsSub = N->getOpcode() == ISD::SUB; |
| 49507 | SDValue X = N->getOperand(0); |
| 49508 | SDValue Y = N->getOperand(1); |
| 49509 | |
| 49510 | |
| 49511 | |
| 49512 | if (!IsSub && X.getOpcode() == ISD::ZERO_EXTEND && |
| 49513 | Y.getOpcode() != ISD::ZERO_EXTEND) |
| 49514 | std::swap(X, Y); |
| 49515 | |
| 49516 | |
| 49517 | bool PeekedThroughZext = false; |
| 49518 | if (Y.getOpcode() == ISD::ZERO_EXTEND && Y.hasOneUse()) { |
| 49519 | Y = Y.getOperand(0); |
| 49520 | PeekedThroughZext = true; |
| 49521 | } |
| 49522 | |
| 49523 | |
| 49524 | |
| 49525 | |
| 49526 | if (!IsSub && !PeekedThroughZext && X.getOpcode() == X86ISD::SETCC && |
| 49527 | Y.getOpcode() != X86ISD::SETCC) |
| 49528 | std::swap(X, Y); |
| 49529 | |
| 49530 | if (Y.getOpcode() != X86ISD::SETCC || !Y.hasOneUse()) |
| 49531 | return SDValue(); |
| 49532 | |
| 49533 | SDLoc DL(N); |
| 49534 | EVT VT = N->getValueType(0); |
| 49535 | X86::CondCode CC = (X86::CondCode)Y.getConstantOperandVal(0); |
| 49536 | |
| 49537 | |
| 49538 | |
| 49539 | auto *ConstantX = dyn_cast<ConstantSDNode>(X); |
| 49540 | if (ConstantX) { |
| 49541 | if ((!IsSub && CC == X86::COND_AE && ConstantX->isAllOnesValue()) || |
| 49542 | (IsSub && CC == X86::COND_B && ConstantX->isNullValue())) { |
| 49543 | |
| 49544 | |
| 49545 | |
| 49546 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
| 49547 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
| 49548 | Y.getOperand(1)); |
| 49549 | } |
| 49550 | |
| 49551 | if ((!IsSub && CC == X86::COND_BE && ConstantX->isAllOnesValue()) || |
| 49552 | (IsSub && CC == X86::COND_A && ConstantX->isNullValue())) { |
| 49553 | SDValue EFLAGS = Y->getOperand(1); |
| 49554 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.hasOneUse() && |
| 49555 | EFLAGS.getValueType().isInteger() && |
| 49556 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
| 49557 | |
| 49558 | |
| 49559 | |
| 49560 | SDValue NewSub = DAG.getNode( |
| 49561 | X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), |
| 49562 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
| 49563 | SDValue NewEFLAGS = SDValue(NewSub.getNode(), EFLAGS.getResNo()); |
| 49564 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
| 49565 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
| 49566 | NewEFLAGS); |
| 49567 | } |
| 49568 | } |
| 49569 | } |
| 49570 | |
| 49571 | if (CC == X86::COND_B) { |
| 49572 | |
| 49573 | |
| 49574 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, |
| 49575 | DAG.getVTList(VT, MVT::i32), X, |
| 49576 | DAG.getConstant(0, DL, VT), Y.getOperand(1)); |
| 49577 | } |
| 49578 | |
| 49579 | if (CC == X86::COND_A) { |
| 49580 | SDValue EFLAGS = Y.getOperand(1); |
| 49581 | |
| 49582 | |
| 49583 | |
| 49584 | |
| 49585 | |
| 49586 | |
| 49587 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && |
| 49588 | EFLAGS.getValueType().isInteger() && |
| 49589 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
| 49590 | SDValue NewSub = DAG.getNode(X86ISD::SUB, SDLoc(EFLAGS), |
| 49591 | EFLAGS.getNode()->getVTList(), |
| 49592 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
| 49593 | SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); |
| 49594 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, |
| 49595 | DAG.getVTList(VT, MVT::i32), X, |
| 49596 | DAG.getConstant(0, DL, VT), NewEFLAGS); |
| 49597 | } |
| 49598 | } |
| 49599 | |
| 49600 | if (CC == X86::COND_AE) { |
| 49601 | |
| 49602 | |
| 49603 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, |
| 49604 | DAG.getVTList(VT, MVT::i32), X, |
| 49605 | DAG.getConstant(-1, DL, VT), Y.getOperand(1)); |
| 49606 | } |
| 49607 | |
| 49608 | if (CC == X86::COND_BE) { |
| 49609 | |
| 49610 | |
| 49611 | SDValue EFLAGS = Y.getOperand(1); |
| 49612 | |
| 49613 | |
| 49614 | |
| 49615 | |
| 49616 | |
| 49617 | |
| 49618 | if (EFLAGS.getOpcode() == X86ISD::SUB && EFLAGS.getNode()->hasOneUse() && |
| 49619 | EFLAGS.getValueType().isInteger() && |
| 49620 | !isa<ConstantSDNode>(EFLAGS.getOperand(1))) { |
| 49621 | SDValue NewSub = DAG.getNode( |
| 49622 | X86ISD::SUB, SDLoc(EFLAGS), EFLAGS.getNode()->getVTList(), |
| 49623 | EFLAGS.getOperand(1), EFLAGS.getOperand(0)); |
| 49624 | SDValue NewEFLAGS = NewSub.getValue(EFLAGS.getResNo()); |
| 49625 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, |
| 49626 | DAG.getVTList(VT, MVT::i32), X, |
| 49627 | DAG.getConstant(-1, DL, VT), NewEFLAGS); |
| 49628 | } |
| 49629 | } |
| 49630 | |
| 49631 | if (CC != X86::COND_E && CC != X86::COND_NE) |
| 49632 | return SDValue(); |
| 49633 | |
| 49634 | SDValue Cmp = Y.getOperand(1); |
| 49635 | if (Cmp.getOpcode() != X86ISD::CMP || !Cmp.hasOneUse() || |
| 49636 | !X86::isZeroNode(Cmp.getOperand(1)) || |
| 49637 | !Cmp.getOperand(0).getValueType().isInteger()) |
| 49638 | return SDValue(); |
| 49639 | |
| 49640 | SDValue Z = Cmp.getOperand(0); |
| 49641 | EVT ZVT = Z.getValueType(); |
| 49642 | |
| 49643 | |
| 49644 | |
| 49645 | if (ConstantX) { |
| 49646 | |
| 49647 | |
| 49648 | |
| 49649 | |
| 49650 | if ((IsSub && CC == X86::COND_NE && ConstantX->isNullValue()) || |
| 49651 | (!IsSub && CC == X86::COND_E && ConstantX->isAllOnesValue())) { |
| 49652 | SDValue Zero = DAG.getConstant(0, DL, ZVT); |
| 49653 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
| 49654 | SDValue Neg = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Zero, Z); |
| 49655 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
| 49656 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
| 49657 | SDValue(Neg.getNode(), 1)); |
| 49658 | } |
| 49659 | |
| 49660 | |
| 49661 | |
| 49662 | |
| 49663 | |
| 49664 | if ((IsSub && CC == X86::COND_E && ConstantX->isNullValue()) || |
| 49665 | (!IsSub && CC == X86::COND_NE && ConstantX->isAllOnesValue())) { |
| 49666 | SDValue One = DAG.getConstant(1, DL, ZVT); |
| 49667 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
| 49668 | SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); |
| 49669 | return DAG.getNode(X86ISD::SETCC_CARRY, DL, VT, |
| 49670 | DAG.getTargetConstant(X86::COND_B, DL, MVT::i8), |
| 49671 | Cmp1.getValue(1)); |
| 49672 | } |
| 49673 | } |
| 49674 | |
| 49675 | |
| 49676 | SDValue One = DAG.getConstant(1, DL, ZVT); |
| 49677 | SDVTList X86SubVTs = DAG.getVTList(ZVT, MVT::i32); |
| 49678 | SDValue Cmp1 = DAG.getNode(X86ISD::SUB, DL, X86SubVTs, Z, One); |
| 49679 | |
| 49680 | |
| 49681 | SDVTList VTs = DAG.getVTList(VT, MVT::i32); |
| 49682 | |
| 49683 | |
| 49684 | |
| 49685 | if (CC == X86::COND_NE) |
| 49686 | return DAG.getNode(IsSub ? X86ISD::ADC : X86ISD::SBB, DL, VTs, X, |
| 49687 | DAG.getConstant(-1ULL, DL, VT), Cmp1.getValue(1)); |
| 49688 | |
| 49689 | |
| 49690 | |
| 49691 | return DAG.getNode(IsSub ? X86ISD::SBB : X86ISD::ADC, DL, VTs, X, |
| 49692 | DAG.getConstant(0, DL, VT), Cmp1.getValue(1)); |
| 49693 | } |
| 49694 | |
| 49695 | static SDValue matchPMADDWD(SelectionDAG &DAG, SDValue Op0, SDValue Op1, |
| 49696 | const SDLoc &DL, EVT VT, |
| 49697 | const X86Subtarget &Subtarget) { |
| 49698 | |
| 49699 | |
| 49700 | |
| 49701 | |
| 49702 | |
| 49703 | |
| 49704 | |
| 49705 | |
| 49706 | |
| 49707 | |
| 49708 | |
| 49709 | if (!Subtarget.hasSSE2()) |
| 49710 | return SDValue(); |
| 49711 | |
| 49712 | if (Op0.getOpcode() != ISD::BUILD_VECTOR || |
| 49713 | Op1.getOpcode() != ISD::BUILD_VECTOR) |
| 49714 | return SDValue(); |
| 49715 | |
| 49716 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || |
| 49717 | VT.getVectorNumElements() < 4 || |
| 49718 | !isPowerOf2_32(VT.getVectorNumElements())) |
| 49719 | return SDValue(); |
| 49720 | |
| 49721 | |
| 49722 | |
| 49723 | |
| 49724 | |
| 49725 | |
| 49726 | |
| 49727 | |
| 49728 | |
| 49729 | |
| 49730 | |
| 49731 | |
| 49732 | SDValue Mul; |
| 49733 | for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; i += 2) { |
| 49734 | SDValue Op0L = Op0->getOperand(i), Op1L = Op1->getOperand(i), |
| 49735 | Op0H = Op0->getOperand(i + 1), Op1H = Op1->getOperand(i + 1); |
| 49736 | |
| 49737 | if (Op0L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49738 | Op1L.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49739 | Op0H.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49740 | Op1H.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
| 49741 | return SDValue(); |
| 49742 | auto *Const0L = dyn_cast<ConstantSDNode>(Op0L->getOperand(1)); |
| 49743 | auto *Const1L = dyn_cast<ConstantSDNode>(Op1L->getOperand(1)); |
| 49744 | auto *Const0H = dyn_cast<ConstantSDNode>(Op0H->getOperand(1)); |
| 49745 | auto *Const1H = dyn_cast<ConstantSDNode>(Op1H->getOperand(1)); |
| 49746 | if (!Const0L || !Const1L || !Const0H || !Const1H) |
| 49747 | return SDValue(); |
| 49748 | unsigned Idx0L = Const0L->getZExtValue(), Idx1L = Const1L->getZExtValue(), |
| 49749 | Idx0H = Const0H->getZExtValue(), Idx1H = Const1H->getZExtValue(); |
| 49750 | |
| 49751 | if (Idx0L > Idx1L) |
| 49752 | std::swap(Idx0L, Idx1L); |
| 49753 | if (Idx0H > Idx1H) |
| 49754 | std::swap(Idx0H, Idx1H); |
| 49755 | |
| 49756 | if (Idx0L > Idx0H) { |
| 49757 | std::swap(Idx0L, Idx0H); |
| 49758 | std::swap(Idx1L, Idx1H); |
| 49759 | } |
| 49760 | if (Idx0L != 2 * i || Idx1L != 2 * i + 1 || Idx0H != 2 * i + 2 || |
| 49761 | Idx1H != 2 * i + 3) |
| 49762 | return SDValue(); |
| 49763 | if (!Mul) { |
| 49764 | |
| 49765 | |
| 49766 | |
| 49767 | Mul = Op0L->getOperand(0); |
| 49768 | if (Mul->getOpcode() != ISD::MUL || |
| 49769 | Mul.getValueType().getVectorNumElements() != 2 * e) |
| 49770 | return SDValue(); |
| 49771 | } |
| 49772 | |
| 49773 | if (Mul != Op0L->getOperand(0) || Mul != Op1L->getOperand(0) || |
| 49774 | Mul != Op0H->getOperand(0) || Mul != Op1H->getOperand(0)) |
| 49775 | return SDValue(); |
| 49776 | } |
| 49777 | |
| 49778 | |
| 49779 | ShrinkMode Mode; |
| 49780 | if (!canReduceVMulWidth(Mul.getNode(), DAG, Mode) || |
| 49781 | Mode == ShrinkMode::MULU16) |
| 49782 | return SDValue(); |
| 49783 | |
| 49784 | EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
| 49785 | VT.getVectorNumElements() * 2); |
| 49786 | SDValue N0 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(0)); |
| 49787 | SDValue N1 = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, Mul.getOperand(1)); |
| 49788 | |
| 49789 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 49790 | ArrayRef<SDValue> Ops) { |
| 49791 | EVT InVT = Ops[0].getValueType(); |
| 49792 | assert(InVT == Ops[1].getValueType() && "Operands' types mismatch"); |
| 49793 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, |
| 49794 | InVT.getVectorNumElements() / 2); |
| 49795 | return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); |
| 49796 | }; |
| 49797 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { N0, N1 }, PMADDBuilder); |
| 49798 | } |
| 49799 | |
| 49800 | |
| 49801 | |
| 49802 | |
| 49803 | static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1, |
| 49804 | const SDLoc &DL, EVT VT, |
| 49805 | const X86Subtarget &Subtarget) { |
| 49806 | if (!Subtarget.hasSSE2()) |
| 49807 | return SDValue(); |
| 49808 | |
| 49809 | if (N0.getOpcode() != ISD::MUL || N1.getOpcode() != ISD::MUL) |
| 49810 | return SDValue(); |
| 49811 | |
| 49812 | if (!VT.isVector() || VT.getVectorElementType() != MVT::i32 || |
| 49813 | VT.getVectorNumElements() < 4 || |
| 49814 | !isPowerOf2_32(VT.getVectorNumElements())) |
| 49815 | return SDValue(); |
| 49816 | |
| 49817 | SDValue N00 = N0.getOperand(0); |
| 49818 | SDValue N01 = N0.getOperand(1); |
| 49819 | SDValue N10 = N1.getOperand(0); |
| 49820 | SDValue N11 = N1.getOperand(1); |
| 49821 | |
| 49822 | |
| 49823 | |
| 49824 | if (N00.getOpcode() != ISD::SIGN_EXTEND || |
| 49825 | N01.getOpcode() != ISD::SIGN_EXTEND || |
| 49826 | N10.getOpcode() != ISD::SIGN_EXTEND || |
| 49827 | N11.getOpcode() != ISD::SIGN_EXTEND) |
| 49828 | return SDValue(); |
| 49829 | |
| 49830 | |
| 49831 | N00 = N00.getOperand(0); |
| 49832 | N01 = N01.getOperand(0); |
| 49833 | N10 = N10.getOperand(0); |
| 49834 | N11 = N11.getOperand(0); |
| 49835 | |
| 49836 | |
| 49837 | EVT InVT = N00.getValueType(); |
| 49838 | if (InVT.getVectorElementType() != MVT::i16 || N01.getValueType() != InVT || |
| 49839 | N10.getValueType() != InVT || N11.getValueType() != InVT) |
| 49840 | return SDValue(); |
| 49841 | |
| 49842 | |
| 49843 | if (N00.getOpcode() != ISD::BUILD_VECTOR || |
| 49844 | N01.getOpcode() != ISD::BUILD_VECTOR || |
| 49845 | N10.getOpcode() != ISD::BUILD_VECTOR || |
| 49846 | N11.getOpcode() != ISD::BUILD_VECTOR) |
| 49847 | return SDValue(); |
| 49848 | |
| 49849 | |
| 49850 | |
| 49851 | |
| 49852 | |
| 49853 | |
| 49854 | |
| 49855 | SDValue In0, In1; |
| 49856 | for (unsigned i = 0; i != N00.getNumOperands(); ++i) { |
| 49857 | SDValue N00Elt = N00.getOperand(i); |
| 49858 | SDValue N01Elt = N01.getOperand(i); |
| 49859 | SDValue N10Elt = N10.getOperand(i); |
| 49860 | SDValue N11Elt = N11.getOperand(i); |
| 49861 | |
| 49862 | if (N00Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49863 | N01Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49864 | N10Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT || |
| 49865 | N11Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
| 49866 | return SDValue(); |
| 49867 | auto *ConstN00Elt = dyn_cast<ConstantSDNode>(N00Elt.getOperand(1)); |
| 49868 | auto *ConstN01Elt = dyn_cast<ConstantSDNode>(N01Elt.getOperand(1)); |
| 49869 | auto *ConstN10Elt = dyn_cast<ConstantSDNode>(N10Elt.getOperand(1)); |
| 49870 | auto *ConstN11Elt = dyn_cast<ConstantSDNode>(N11Elt.getOperand(1)); |
| 49871 | if (!ConstN00Elt || !ConstN01Elt || !ConstN10Elt || !ConstN11Elt) |
| 49872 | return SDValue(); |
| 49873 | unsigned IdxN00 = ConstN00Elt->getZExtValue(); |
| 49874 | unsigned IdxN01 = ConstN01Elt->getZExtValue(); |
| 49875 | unsigned IdxN10 = ConstN10Elt->getZExtValue(); |
| 49876 | unsigned IdxN11 = ConstN11Elt->getZExtValue(); |
| 49877 | |
| 49878 | if (IdxN00 > IdxN10) { |
| 49879 | std::swap(IdxN00, IdxN10); |
| 49880 | std::swap(IdxN01, IdxN11); |
| 49881 | } |
| 49882 | |
| 49883 | if (IdxN00 != 2 * i || IdxN10 != 2 * i + 1 || |
| 49884 | IdxN01 != 2 * i || IdxN11 != 2 * i + 1) |
| 49885 | return SDValue(); |
| 49886 | SDValue N00In = N00Elt.getOperand(0); |
| 49887 | SDValue N01In = N01Elt.getOperand(0); |
| 49888 | SDValue N10In = N10Elt.getOperand(0); |
| 49889 | SDValue N11In = N11Elt.getOperand(0); |
| 49890 | |
| 49891 | |
| 49892 | if (!In0) { |
| 49893 | In0 = N00In; |
| 49894 | In1 = N01In; |
| 49895 | |
| 49896 | |
| 49897 | |
| 49898 | if (In0.getValueSizeInBits() < VT.getSizeInBits() || |
| 49899 | In1.getValueSizeInBits() < VT.getSizeInBits()) |
| 49900 | return SDValue(); |
| 49901 | } |
| 49902 | |
| 49903 | |
| 49904 | if (In0 != N00In) |
| 49905 | std::swap(N00In, N01In); |
| 49906 | if (In0 != N10In) |
| 49907 | std::swap(N10In, N11In); |
| 49908 | if (In0 != N00In || In1 != N01In || In0 != N10In || In1 != N11In) |
| 49909 | return SDValue(); |
| 49910 | } |
| 49911 | |
| 49912 | auto PMADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL, |
| 49913 | ArrayRef<SDValue> Ops) { |
| 49914 | EVT OpVT = Ops[0].getValueType(); |
| 49915 | assert(OpVT.getScalarType() == MVT::i16 && |
| 49916 | "Unexpected scalar element type"); |
| 49917 | assert(OpVT == Ops[1].getValueType() && "Operands' types mismatch"); |
| 49918 | EVT ResVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, |
| 49919 | OpVT.getVectorNumElements() / 2); |
| 49920 | return DAG.getNode(X86ISD::VPMADDWD, DL, ResVT, Ops[0], Ops[1]); |
| 49921 | }; |
| 49922 | |
| 49923 | |
| 49924 | |
| 49925 | EVT OutVT16 = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
| 49926 | VT.getVectorNumElements() * 2); |
| 49927 | if (OutVT16.bitsLT(In0.getValueType())) { |
| 49928 | In0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In0, |
| 49929 | DAG.getIntPtrConstant(0, DL)); |
| 49930 | } |
| 49931 | if (OutVT16.bitsLT(In1.getValueType())) { |
| 49932 | In1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, OutVT16, In1, |
| 49933 | DAG.getIntPtrConstant(0, DL)); |
| 49934 | } |
| 49935 | return SplitOpsAndApply(DAG, Subtarget, DL, VT, { In0, In1 }, |
| 49936 | PMADDBuilder); |
| 49937 | } |
| 49938 | |
| 49939 | |
| 49940 | |
| 49941 | |
| 49942 | |
| 49943 | static SDValue pushAddIntoCmovOfConsts(SDNode *N, SelectionDAG &DAG) { |
| 49944 | |
| 49945 | |
| 49946 | |
| 49947 | |
| 49948 | |
| 49949 | auto isSuitableCmov = [](SDValue V) { |
| 49950 | if (V.getOpcode() != X86ISD::CMOV || !V.hasOneUse()) |
| 49951 | return false; |
| 49952 | if (!isa<ConstantSDNode>(V.getOperand(0)) || |
| 49953 | !isa<ConstantSDNode>(V.getOperand(1))) |
| 49954 | return false; |
| 49955 | return isNullConstant(V.getOperand(0)) || isNullConstant(V.getOperand(1)) || |
| 49956 | (V.getConstantOperandAPInt(0).isSignedIntN(32) && |
| 49957 | V.getConstantOperandAPInt(1).isSignedIntN(32)); |
| 49958 | }; |
| 49959 | |
| 49960 | |
| 49961 | SDValue Cmov = N->getOperand(0); |
| 49962 | SDValue OtherOp = N->getOperand(1); |
| 49963 | if (!isSuitableCmov(Cmov)) |
| 49964 | std::swap(Cmov, OtherOp); |
| 49965 | if (!isSuitableCmov(Cmov)) |
| 49966 | return SDValue(); |
| 49967 | |
| 49968 | |
| 49969 | EVT VT = N->getValueType(0); |
| 49970 | SDLoc DL(N); |
| 49971 | SDValue FalseOp = Cmov.getOperand(0); |
| 49972 | SDValue TrueOp = Cmov.getOperand(1); |
| 49973 | FalseOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, FalseOp); |
| 49974 | TrueOp = DAG.getNode(ISD::ADD, DL, VT, OtherOp, TrueOp); |
| 49975 | return DAG.getNode(X86ISD::CMOV, DL, VT, FalseOp, TrueOp, Cmov.getOperand(2), |
| 49976 | Cmov.getOperand(3)); |
| 49977 | } |
| 49978 | |
| 49979 | static SDValue combineAdd(SDNode *N, SelectionDAG &DAG, |
| 49980 | TargetLowering::DAGCombinerInfo &DCI, |
| 49981 | const X86Subtarget &Subtarget) { |
| 49982 | EVT VT = N->getValueType(0); |
| 49983 | SDValue Op0 = N->getOperand(0); |
| 49984 | SDValue Op1 = N->getOperand(1); |
| 49985 | |
| 49986 | if (SDValue Select = pushAddIntoCmovOfConsts(N, DAG)) |
| 49987 | return Select; |
| 49988 | |
| 49989 | if (SDValue MAdd = matchPMADDWD(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) |
| 49990 | return MAdd; |
| 49991 | if (SDValue MAdd = matchPMADDWD_2(DAG, Op0, Op1, SDLoc(N), VT, Subtarget)) |
| 49992 | return MAdd; |
| 49993 | |
| 49994 | |
| 49995 | if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) |
| 49996 | return V; |
| 49997 | |
| 49998 | |
| 49999 | |
| 50000 | |
| 50001 | |
| 50002 | |
| 50003 | if (VT.isVector()) { |
| 50004 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 50005 | if (Op0.getOpcode() == ISD::ZERO_EXTEND && |
| 50006 | Op0.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
| 50007 | TLI.isTypeLegal(Op0.getOperand(0).getValueType())) { |
| 50008 | SDLoc DL(N); |
| 50009 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op0.getOperand(0)); |
| 50010 | return DAG.getNode(ISD::SUB, DL, VT, Op1, SExt); |
| 50011 | } |
| 50012 | |
| 50013 | if (Op1.getOpcode() == ISD::ZERO_EXTEND && |
| 50014 | Op1.getOperand(0).getValueType().getVectorElementType() == MVT::i1 && |
| 50015 | TLI.isTypeLegal(Op1.getOperand(0).getValueType())) { |
| 50016 | SDLoc DL(N); |
| 50017 | SDValue SExt = DAG.getNode(ISD::SIGN_EXTEND, DL, VT, Op1.getOperand(0)); |
| 50018 | return DAG.getNode(ISD::SUB, DL, VT, Op0, SExt); |
| 50019 | } |
| 50020 | } |
| 50021 | |
| 50022 | return combineAddOrSubToADCOrSBB(N, DAG); |
| 50023 | } |
| 50024 | |
| 50025 | static SDValue combineSub(SDNode *N, SelectionDAG &DAG, |
| 50026 | TargetLowering::DAGCombinerInfo &DCI, |
| 50027 | const X86Subtarget &Subtarget) { |
| 50028 | SDValue Op0 = N->getOperand(0); |
| 50029 | SDValue Op1 = N->getOperand(1); |
| 50030 | |
| 50031 | |
| 50032 | auto IsNonOpaqueConstant = [&](SDValue Op) { |
| 50033 | if (SDNode *C = DAG.isConstantIntBuildVectorOrConstantInt(Op)) { |
| 50034 | if (auto *Cst = dyn_cast<ConstantSDNode>(C)) |
| 50035 | return !Cst->isOpaque(); |
| 50036 | return true; |
| 50037 | } |
| 50038 | return false; |
| 50039 | }; |
| 50040 | |
| 50041 | |
| 50042 | |
| 50043 | |
| 50044 | |
| 50045 | if (Op1.getOpcode() == ISD::XOR && IsNonOpaqueConstant(Op0) && |
| 50046 | IsNonOpaqueConstant(Op1.getOperand(1)) && Op1->hasOneUse()) { |
| 50047 | SDLoc DL(N); |
| 50048 | EVT VT = Op0.getValueType(); |
| 50049 | SDValue NewXor = DAG.getNode(ISD::XOR, SDLoc(Op1), VT, Op1.getOperand(0), |
| 50050 | DAG.getNOT(SDLoc(Op1), Op1.getOperand(1), VT)); |
| 50051 | SDValue NewAdd = |
| 50052 | DAG.getNode(ISD::ADD, DL, VT, Op0, DAG.getConstant(1, DL, VT)); |
| 50053 | return DAG.getNode(ISD::ADD, DL, VT, NewXor, NewAdd); |
| 50054 | } |
| 50055 | |
| 50056 | |
| 50057 | if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget)) |
| 50058 | return V; |
| 50059 | |
| 50060 | return combineAddOrSubToADCOrSBB(N, DAG); |
| 50061 | } |
| 50062 | |
| 50063 | static SDValue combineVectorCompare(SDNode *N, SelectionDAG &DAG, |
| 50064 | const X86Subtarget &Subtarget) { |
| 50065 | MVT VT = N->getSimpleValueType(0); |
| 50066 | SDLoc DL(N); |
| 50067 | |
| 50068 | if (N->getOperand(0) == N->getOperand(1)) { |
| 50069 | if (N->getOpcode() == X86ISD::PCMPEQ) |
| 50070 | return DAG.getConstant(-1, DL, VT); |
| 50071 | if (N->getOpcode() == X86ISD::PCMPGT) |
| 50072 | return DAG.getConstant(0, DL, VT); |
| 50073 | } |
| 50074 | |
| 50075 | return SDValue(); |
| 50076 | } |
| 50077 | |
| 50078 | |
| 50079 | |
| 50080 | |
| 50081 | static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT, |
| 50082 | ArrayRef<SDValue> Ops, SelectionDAG &DAG, |
| 50083 | TargetLowering::DAGCombinerInfo &DCI, |
| 50084 | const X86Subtarget &Subtarget) { |
| 50085 | assert(Subtarget.hasAVX() && "AVX assumed for concat_vectors"); |
| 50086 | unsigned EltSizeInBits = VT.getScalarSizeInBits(); |
| 50087 | |
| 50088 | if (llvm::all_of(Ops, [](SDValue Op) { return Op.isUndef(); })) |
| 50089 | return DAG.getUNDEF(VT); |
| 50090 | |
| 50091 | if (llvm::all_of(Ops, [](SDValue Op) { |
| 50092 | return ISD::isBuildVectorAllZeros(Op.getNode()); |
| 50093 | })) |
| 50094 | return getZeroVector(VT, Subtarget, DAG, DL); |
| 50095 | |
| 50096 | SDValue Op0 = Ops[0]; |
| 50097 | bool IsSplat = llvm::all_of(Ops, [&Op0](SDValue Op) { return Op == Op0; }); |
| 50098 | |
| 50099 | |
| 50100 | if (IsSplat && |
| 50101 | (VT.is256BitVector() || (VT.is512BitVector() && Subtarget.hasAVX512()))) { |
| 50102 | |
| 50103 | if (Op0.getOpcode() == X86ISD::VBROADCAST) |
| 50104 | return DAG.getNode(Op0.getOpcode(), DL, VT, Op0.getOperand(0)); |
| 50105 | |
| 50106 | |
| 50107 | |
| 50108 | if (Op0.getOpcode() == X86ISD::VBROADCAST_LOAD || |
| 50109 | Op0.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) { |
| 50110 | auto *MemIntr = cast<MemIntrinsicSDNode>(Op0); |
| 50111 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 50112 | SDValue Ops[] = {MemIntr->getChain(), MemIntr->getBasePtr()}; |
| 50113 | SDValue BcastLd = DAG.getMemIntrinsicNode(Op0.getOpcode(), DL, Tys, Ops, |
| 50114 | MemIntr->getMemoryVT(), |
| 50115 | MemIntr->getMemOperand()); |
| 50116 | DAG.ReplaceAllUsesOfValueWith( |
| 50117 | Op0, extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); |
| 50118 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); |
| 50119 | return BcastLd; |
| 50120 | } |
| 50121 | |
| 50122 | |
| 50123 | |
| 50124 | if (auto *Ld = dyn_cast<LoadSDNode>(Op0)) { |
| 50125 | if (Ld->isSimple() && !Ld->isNonTemporal() && |
| 50126 | Ld->getExtensionType() == ISD::NON_EXTLOAD) { |
| 50127 | SDVTList Tys = DAG.getVTList(VT, MVT::Other); |
| 50128 | SDValue Ops[] = {Ld->getChain(), Ld->getBasePtr()}; |
| 50129 | SDValue BcastLd = |
| 50130 | DAG.getMemIntrinsicNode(X86ISD::SUBV_BROADCAST_LOAD, DL, Tys, Ops, |
| 50131 | Ld->getMemoryVT(), Ld->getMemOperand()); |
| 50132 | DAG.ReplaceAllUsesOfValueWith( |
| 50133 | Op0, |
| 50134 | extractSubVector(BcastLd, 0, DAG, DL, Op0.getValueSizeInBits())); |
| 50135 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), BcastLd.getValue(1)); |
| 50136 | return BcastLd; |
| 50137 | } |
| 50138 | } |
| 50139 | |
| 50140 | |
| 50141 | if (Op0.getOpcode() == X86ISD::MOVDDUP && VT == MVT::v4f64 && |
| 50142 | (Subtarget.hasAVX2() || MayFoldLoad(Op0.getOperand(0)))) |
| 50143 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, |
| 50144 | DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f64, |
| 50145 | Op0.getOperand(0), |
| 50146 | DAG.getIntPtrConstant(0, DL))); |
| 50147 | |
| 50148 | |
| 50149 | if (Op0.getOpcode() == ISD::SCALAR_TO_VECTOR && |
| 50150 | (Subtarget.hasAVX2() || |
| 50151 | (EltSizeInBits >= 32 && MayFoldLoad(Op0.getOperand(0)))) && |
| 50152 | Op0.getOperand(0).getValueType() == VT.getScalarType()) |
| 50153 | return DAG.getNode(X86ISD::VBROADCAST, DL, VT, Op0.getOperand(0)); |
| 50154 | |
| 50155 | |
| 50156 | |
| 50157 | if (Op0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 50158 | Op0.getOperand(0).getValueType() == VT) { |
| 50159 | if (Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST || |
| 50160 | Op0.getOperand(0).getOpcode() == X86ISD::VBROADCAST_LOAD) |
| 50161 | return Op0.getOperand(0); |
| 50162 | } |
| 50163 | } |
| 50164 | |
| 50165 | |
| 50166 | |
| 50167 | |
| 50168 | if (VT.is256BitVector() && Ops.size() == 2) { |
| 50169 | SDValue Src0 = peekThroughBitcasts(Ops[0]); |
| 50170 | SDValue Src1 = peekThroughBitcasts(Ops[1]); |
| 50171 | if (Src0.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 50172 | Src1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { |
| 50173 | EVT SrcVT0 = Src0.getOperand(0).getValueType(); |
| 50174 | EVT SrcVT1 = Src1.getOperand(0).getValueType(); |
| 50175 | unsigned NumSrcElts0 = SrcVT0.getVectorNumElements(); |
| 50176 | unsigned NumSrcElts1 = SrcVT1.getVectorNumElements(); |
| 50177 | if (SrcVT0.is256BitVector() && SrcVT1.is256BitVector() && |
| 50178 | Src0.getConstantOperandAPInt(1) == (NumSrcElts0 / 2) && |
| 50179 | Src1.getConstantOperandAPInt(1) == (NumSrcElts1 / 2)) { |
| 50180 | return DAG.getNode(X86ISD::VPERM2X128, DL, VT, |
| 50181 | DAG.getBitcast(VT, Src0.getOperand(0)), |
| 50182 | DAG.getBitcast(VT, Src1.getOperand(0)), |
| 50183 | DAG.getTargetConstant(0x31, DL, MVT::i8)); |
| 50184 | } |
| 50185 | } |
| 50186 | } |
| 50187 | |
| 50188 | |
| 50189 | |
| 50190 | |
| 50191 | if (llvm::all_of(Ops, [Op0](SDValue Op) { |
| 50192 | return Op.getOpcode() == Op0.getOpcode(); |
| 50193 | })) { |
| 50194 | auto ConcatSubOperand = [&](MVT VT, ArrayRef<SDValue> SubOps, unsigned I) { |
| 50195 | SmallVector<SDValue> Subs; |
| 50196 | for (SDValue SubOp : SubOps) |
| 50197 | Subs.push_back(SubOp.getOperand(I)); |
| 50198 | return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Subs); |
| 50199 | }; |
| 50200 | |
| 50201 | unsigned NumOps = Ops.size(); |
| 50202 | switch (Op0.getOpcode()) { |
| 50203 | case X86ISD::SHUFP: { |
| 50204 | |
| 50205 | if (!IsSplat && VT.getScalarType() == MVT::f32 && |
| 50206 | llvm::all_of(Ops, [Op0](SDValue Op) { |
| 50207 | return Op.getOperand(2) == Op0.getOperand(2); |
| 50208 | })) { |
| 50209 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50210 | ConcatSubOperand(VT, Ops, 0), |
| 50211 | ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); |
| 50212 | } |
| 50213 | break; |
| 50214 | } |
| 50215 | case X86ISD::PSHUFHW: |
| 50216 | case X86ISD::PSHUFLW: |
| 50217 | case X86ISD::PSHUFD: |
| 50218 | if (!IsSplat && NumOps == 2 && VT.is256BitVector() && |
| 50219 | Subtarget.hasInt256() && Op0.getOperand(1) == Ops[1].getOperand(1)) { |
| 50220 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50221 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
| 50222 | } |
| 50223 | LLVM_FALLTHROUGH; |
| 50224 | case X86ISD::VPERMILPI: |
| 50225 | |
| 50226 | if (!IsSplat && NumOps == 2 && (VT == MVT::v8f32 || VT == MVT::v8i32) && |
| 50227 | Subtarget.hasAVX() && Op0.getOperand(1) == Ops[1].getOperand(1)) { |
| 50228 | SDValue Res = DAG.getBitcast(MVT::v8f32, ConcatSubOperand(VT, Ops, 0)); |
| 50229 | Res = DAG.getNode(X86ISD::VPERMILPI, DL, MVT::v8f32, Res, |
| 50230 | Op0.getOperand(1)); |
| 50231 | return DAG.getBitcast(VT, Res); |
| 50232 | } |
| 50233 | break; |
| 50234 | case X86ISD::VPERMV3: |
| 50235 | if (!IsSplat && NumOps == 2 && VT.is512BitVector()) { |
| 50236 | MVT OpVT = Op0.getSimpleValueType(); |
| 50237 | int NumSrcElts = OpVT.getVectorNumElements(); |
| 50238 | SmallVector<int, 64> ConcatMask; |
| 50239 | for (unsigned i = 0; i != NumOps; ++i) { |
| 50240 | SmallVector<int, 64> SubMask; |
| 50241 | SmallVector<SDValue, 2> SubOps; |
| 50242 | if (!getTargetShuffleMask(Ops[i].getNode(), OpVT, false, SubOps, |
| 50243 | SubMask)) |
| 50244 | break; |
| 50245 | for (int M : SubMask) { |
| 50246 | if (0 <= M) { |
| 50247 | M += M < NumSrcElts ? 0 : NumSrcElts; |
| 50248 | M += i * NumSrcElts; |
| 50249 | } |
| 50250 | ConcatMask.push_back(M); |
| 50251 | } |
| 50252 | } |
| 50253 | if (ConcatMask.size() == (NumOps * NumSrcElts)) { |
| 50254 | SDValue Src0 = concatSubVectors(Ops[0].getOperand(0), |
| 50255 | Ops[1].getOperand(0), DAG, DL); |
| 50256 | SDValue Src1 = concatSubVectors(Ops[0].getOperand(2), |
| 50257 | Ops[1].getOperand(2), DAG, DL); |
| 50258 | MVT IntMaskSVT = MVT::getIntegerVT(VT.getScalarSizeInBits()); |
| 50259 | MVT IntMaskVT = MVT::getVectorVT(IntMaskSVT, NumOps * NumSrcElts); |
| 50260 | SDValue Mask = getConstVector(ConcatMask, IntMaskVT, DAG, DL, true); |
| 50261 | return DAG.getNode(X86ISD::VPERMV3, DL, VT, Src0, Mask, Src1); |
| 50262 | } |
| 50263 | } |
| 50264 | break; |
| 50265 | case X86ISD::VSHLI: |
| 50266 | case X86ISD::VSRLI: |
| 50267 | |
| 50268 | |
| 50269 | if (VT == MVT::v4i64 && !Subtarget.hasInt256() && |
| 50270 | llvm::all_of(Ops, [](SDValue Op) { |
| 50271 | return Op.getConstantOperandAPInt(1) == 32; |
| 50272 | })) { |
| 50273 | SDValue Res = DAG.getBitcast(MVT::v8i32, ConcatSubOperand(VT, Ops, 0)); |
| 50274 | SDValue Zero = getZeroVector(MVT::v8i32, Subtarget, DAG, DL); |
| 50275 | if (Op0.getOpcode() == X86ISD::VSHLI) { |
| 50276 | Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, |
| 50277 | {8, 0, 8, 2, 8, 4, 8, 6}); |
| 50278 | } else { |
| 50279 | Res = DAG.getVectorShuffle(MVT::v8i32, DL, Res, Zero, |
| 50280 | {1, 8, 3, 8, 5, 8, 7, 8}); |
| 50281 | } |
| 50282 | return DAG.getBitcast(VT, Res); |
| 50283 | } |
| 50284 | LLVM_FALLTHROUGH; |
| 50285 | case X86ISD::VSRAI: |
| 50286 | if (((VT.is256BitVector() && Subtarget.hasInt256()) || |
| 50287 | (VT.is512BitVector() && Subtarget.useAVX512Regs() && |
| 50288 | (EltSizeInBits >= 32 || Subtarget.useBWIRegs()))) && |
| 50289 | llvm::all_of(Ops, [Op0](SDValue Op) { |
| 50290 | return Op0.getOperand(1) == Op.getOperand(1); |
| 50291 | })) { |
| 50292 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50293 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
| 50294 | } |
| 50295 | break; |
| 50296 | case X86ISD::VPERMI: |
| 50297 | case X86ISD::VROTLI: |
| 50298 | case X86ISD::VROTRI: |
| 50299 | if (VT.is512BitVector() && Subtarget.useAVX512Regs() && |
| 50300 | llvm::all_of(Ops, [Op0](SDValue Op) { |
| 50301 | return Op0.getOperand(1) == Op.getOperand(1); |
| 50302 | })) { |
| 50303 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50304 | ConcatSubOperand(VT, Ops, 0), Op0.getOperand(1)); |
| 50305 | } |
| 50306 | break; |
| 50307 | case ISD::AND: |
| 50308 | case ISD::OR: |
| 50309 | case ISD::XOR: |
| 50310 | case X86ISD::ANDNP: |
| 50311 | |
| 50312 | if (!IsSplat && VT.is512BitVector()) { |
| 50313 | MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); |
| 50314 | SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), |
| 50315 | NumOps * SrcVT.getVectorNumElements()); |
| 50316 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50317 | ConcatSubOperand(SrcVT, Ops, 0), |
| 50318 | ConcatSubOperand(SrcVT, Ops, 1)); |
| 50319 | } |
| 50320 | break; |
| 50321 | case X86ISD::HADD: |
| 50322 | case X86ISD::HSUB: |
| 50323 | case X86ISD::FHADD: |
| 50324 | case X86ISD::FHSUB: |
| 50325 | case X86ISD::PACKSS: |
| 50326 | case X86ISD::PACKUS: |
| 50327 | if (!IsSplat && VT.is256BitVector() && |
| 50328 | (VT.isFloatingPoint() || Subtarget.hasInt256())) { |
| 50329 | MVT SrcVT = Op0.getOperand(0).getSimpleValueType(); |
| 50330 | SrcVT = MVT::getVectorVT(SrcVT.getScalarType(), |
| 50331 | NumOps * SrcVT.getVectorNumElements()); |
| 50332 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50333 | ConcatSubOperand(SrcVT, Ops, 0), |
| 50334 | ConcatSubOperand(SrcVT, Ops, 1)); |
| 50335 | } |
| 50336 | break; |
| 50337 | case X86ISD::PALIGNR: |
| 50338 | if (!IsSplat && |
| 50339 | ((VT.is256BitVector() && Subtarget.hasInt256()) || |
| 50340 | (VT.is512BitVector() && Subtarget.useBWIRegs())) && |
| 50341 | llvm::all_of(Ops, [Op0](SDValue Op) { |
| 50342 | return Op0.getOperand(2) == Op.getOperand(2); |
| 50343 | })) { |
| 50344 | return DAG.getNode(Op0.getOpcode(), DL, VT, |
| 50345 | ConcatSubOperand(VT, Ops, 0), |
| 50346 | ConcatSubOperand(VT, Ops, 1), Op0.getOperand(2)); |
| 50347 | } |
| 50348 | break; |
| 50349 | } |
| 50350 | } |
| 50351 | |
| 50352 | |
| 50353 | |
| 50354 | if (auto *FirstLd = dyn_cast<LoadSDNode>(peekThroughBitcasts(Op0))) { |
| 50355 | bool Fast; |
| 50356 | const X86TargetLowering *TLI = Subtarget.getTargetLowering(); |
| 50357 | if (TLI->allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, |
| 50358 | *FirstLd->getMemOperand(), &Fast) && |
| 50359 | Fast) { |
| 50360 | if (SDValue Ld = |
| 50361 | EltsFromConsecutiveLoads(VT, Ops, DL, DAG, Subtarget, false)) |
| 50362 | return Ld; |
| 50363 | } |
| 50364 | } |
| 50365 | |
| 50366 | return SDValue(); |
| 50367 | } |
| 50368 | |
| 50369 | static SDValue combineConcatVectors(SDNode *N, SelectionDAG &DAG, |
| 50370 | TargetLowering::DAGCombinerInfo &DCI, |
| 50371 | const X86Subtarget &Subtarget) { |
| 50372 | EVT VT = N->getValueType(0); |
| 50373 | EVT SrcVT = N->getOperand(0).getValueType(); |
| 50374 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 50375 | |
| 50376 | |
| 50377 | if (VT.getVectorElementType() == MVT::i1) |
| 50378 | return SDValue(); |
| 50379 | |
| 50380 | if (Subtarget.hasAVX() && TLI.isTypeLegal(VT) && TLI.isTypeLegal(SrcVT)) { |
| 50381 | SmallVector<SDValue, 4> Ops(N->op_begin(), N->op_end()); |
| 50382 | if (SDValue R = combineConcatVectorOps(SDLoc(N), VT.getSimpleVT(), Ops, DAG, |
| 50383 | DCI, Subtarget)) |
| 50384 | return R; |
| 50385 | } |
| 50386 | |
| 50387 | return SDValue(); |
| 50388 | } |
| 50389 | |
| 50390 | static SDValue combineInsertSubvector(SDNode *N, SelectionDAG &DAG, |
| 50391 | TargetLowering::DAGCombinerInfo &DCI, |
| 50392 | const X86Subtarget &Subtarget) { |
| 50393 | if (DCI.isBeforeLegalizeOps()) |
| 50394 | return SDValue(); |
| 50395 | |
| 50396 | MVT OpVT = N->getSimpleValueType(0); |
| 50397 | |
| 50398 | bool IsI1Vector = OpVT.getVectorElementType() == MVT::i1; |
| 50399 | |
| 50400 | SDLoc dl(N); |
| 50401 | SDValue Vec = N->getOperand(0); |
| 50402 | SDValue SubVec = N->getOperand(1); |
| 50403 | |
| 50404 | uint64_t IdxVal = N->getConstantOperandVal(2); |
| 50405 | MVT SubVecVT = SubVec.getSimpleValueType(); |
| 50406 | |
| 50407 | if (Vec.isUndef() && SubVec.isUndef()) |
| 50408 | return DAG.getUNDEF(OpVT); |
| 50409 | |
| 50410 | |
| 50411 | if ((Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())) && |
| 50412 | (SubVec.isUndef() || ISD::isBuildVectorAllZeros(SubVec.getNode()))) |
| 50413 | return getZeroVector(OpVT, Subtarget, DAG, dl); |
| 50414 | |
| 50415 | if (ISD::isBuildVectorAllZeros(Vec.getNode())) { |
| 50416 | |
| 50417 | |
| 50418 | if (SubVec.getOpcode() == ISD::INSERT_SUBVECTOR && |
| 50419 | ISD::isBuildVectorAllZeros(SubVec.getOperand(0).getNode())) { |
| 50420 | uint64_t Idx2Val = SubVec.getConstantOperandVal(2); |
| 50421 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
| 50422 | getZeroVector(OpVT, Subtarget, DAG, dl), |
| 50423 | SubVec.getOperand(1), |
| 50424 | DAG.getIntPtrConstant(IdxVal + Idx2Val, dl)); |
| 50425 | } |
| 50426 | |
| 50427 | |
| 50428 | |
| 50429 | |
| 50430 | |
| 50431 | if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && IdxVal == 0 && |
| 50432 | isNullConstant(SubVec.getOperand(1)) && |
| 50433 | SubVec.getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR) { |
| 50434 | SDValue Ins = SubVec.getOperand(0); |
| 50435 | if (isNullConstant(Ins.getOperand(2)) && |
| 50436 | ISD::isBuildVectorAllZeros(Ins.getOperand(0).getNode()) && |
| 50437 | Ins.getOperand(1).getValueSizeInBits().getFixedSize() <= |
| 50438 | SubVecVT.getFixedSizeInBits()) |
| 50439 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
| 50440 | getZeroVector(OpVT, Subtarget, DAG, dl), |
| 50441 | Ins.getOperand(1), N->getOperand(2)); |
| 50442 | } |
| 50443 | } |
| 50444 | |
| 50445 | |
| 50446 | if (IsI1Vector) |
| 50447 | return SDValue(); |
| 50448 | |
| 50449 | |
| 50450 | |
| 50451 | if (SubVec.getOpcode() == ISD::EXTRACT_SUBVECTOR && |
| 50452 | SubVec.getOperand(0).getSimpleValueType() == OpVT && |
| 50453 | (IdxVal != 0 || |
| 50454 | !(Vec.isUndef() || ISD::isBuildVectorAllZeros(Vec.getNode())))) { |
| 50455 | int ExtIdxVal = SubVec.getConstantOperandVal(1); |
| 50456 | if (ExtIdxVal != 0) { |
| 50457 | int VecNumElts = OpVT.getVectorNumElements(); |
| 50458 | int SubVecNumElts = SubVecVT.getVectorNumElements(); |
| 50459 | SmallVector<int, 64> Mask(VecNumElts); |
| 50460 | |
| 50461 | for (int i = 0; i != VecNumElts; ++i) |
| 50462 | Mask[i] = i; |
| 50463 | |
| 50464 | for (int i = 0; i != SubVecNumElts; ++i) |
| 50465 | Mask[i + IdxVal] = i + ExtIdxVal + VecNumElts; |
| 50466 | |
| 50467 | return DAG.getVectorShuffle(OpVT, dl, Vec, SubVec.getOperand(0), Mask); |
| 50468 | } |
| 50469 | } |
| 50470 | |
| 50471 | |
| 50472 | SmallVector<SDValue, 2> SubVectorOps; |
| 50473 | if (collectConcatOps(N, SubVectorOps)) { |
| 50474 | if (SDValue Fold = |
| 50475 | combineConcatVectorOps(dl, OpVT, SubVectorOps, DAG, DCI, Subtarget)) |
| 50476 | return Fold; |
| 50477 | |
| 50478 | |
| 50479 | |
| 50480 | |
| 50481 | |
| 50482 | |
| 50483 | if (SubVectorOps.size() == 2 && |
| 50484 | ISD::isBuildVectorAllZeros(SubVectorOps[1].getNode())) |
| 50485 | return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, OpVT, |
| 50486 | getZeroVector(OpVT, Subtarget, DAG, dl), |
| 50487 | SubVectorOps[0], DAG.getIntPtrConstant(0, dl)); |
| 50488 | } |
| 50489 | |
| 50490 | |
| 50491 | if (Vec.isUndef() && IdxVal != 0 && SubVec.getOpcode() == X86ISD::VBROADCAST) |
| 50492 | return DAG.getNode(X86ISD::VBROADCAST, dl, OpVT, SubVec.getOperand(0)); |
| 50493 | |
| 50494 | |
| 50495 | |
| 50496 | if (Vec.isUndef() && IdxVal != 0 && SubVec.hasOneUse() && |
| 50497 | SubVec.getOpcode() == X86ISD::VBROADCAST_LOAD) { |
| 50498 | auto *MemIntr = cast<MemIntrinsicSDNode>(SubVec); |
| 50499 | SDVTList Tys = DAG.getVTList(OpVT, MVT::Other); |
| 50500 | SDValue Ops[] = { MemIntr->getChain(), MemIntr->getBasePtr() }; |
| 50501 | SDValue BcastLd = |
| 50502 | DAG.getMemIntrinsicNode(X86ISD::VBROADCAST_LOAD, dl, Tys, Ops, |
| 50503 | MemIntr->getMemoryVT(), |
| 50504 | MemIntr->getMemOperand()); |
| 50505 | DAG.ReplaceAllUsesOfValueWith(SDValue(MemIntr, 1), BcastLd.getValue(1)); |
| 50506 | return BcastLd; |
| 50507 | } |
| 50508 | |
| 50509 | |
| 50510 | |
| 50511 | if (IdxVal == (OpVT.getVectorNumElements() / 2) && SubVec.hasOneUse() && |
| 50512 | Vec.getValueSizeInBits() == (2 * SubVec.getValueSizeInBits())) { |
| 50513 | auto *VecLd = dyn_cast<LoadSDNode>(Vec); |
| 50514 | auto *SubLd = dyn_cast<LoadSDNode>(SubVec); |
| 50515 | if (VecLd && SubLd && |
| 50516 | DAG.areNonVolatileConsecutiveLoads(SubLd, VecLd, |
| 50517 | SubVec.getValueSizeInBits() / 8, 0)) |
| 50518 | return getBROADCAST_LOAD(X86ISD::SUBV_BROADCAST_LOAD, dl, OpVT, SubVecVT, |
| 50519 | SubLd, 0, DAG); |
| 50520 | } |
| 50521 | |
| 50522 | return SDValue(); |
| 50523 | } |
| 50524 | |
| 50525 | |
| 50526 | |
| 50527 | |
| 50528 | |
| 50529 | |
| 50530 | |
| 50531 | static SDValue narrowExtractedVectorSelect(SDNode *Ext, SelectionDAG &DAG) { |
| 50532 | SDValue Sel = peekThroughBitcasts(Ext->getOperand(0)); |
| 50533 | SmallVector<SDValue, 4> CatOps; |
| 50534 | if (Sel.getOpcode() != ISD::VSELECT || |
| 50535 | !collectConcatOps(Sel.getOperand(0).getNode(), CatOps)) |
| 50536 | return SDValue(); |
| 50537 | |
| 50538 | |
| 50539 | |
| 50540 | |
| 50541 | MVT VT = Ext->getSimpleValueType(0); |
| 50542 | if (!VT.is128BitVector()) |
| 50543 | return SDValue(); |
| 50544 | |
| 50545 | MVT SelCondVT = Sel.getOperand(0).getSimpleValueType(); |
| 50546 | if (!SelCondVT.is256BitVector() && !SelCondVT.is512BitVector()) |
| 50547 | return SDValue(); |
| 50548 | |
| 50549 | MVT WideVT = Ext->getOperand(0).getSimpleValueType(); |
| 50550 | MVT SelVT = Sel.getSimpleValueType(); |
| 50551 | assert((SelVT.is256BitVector() || SelVT.is512BitVector()) && |
| 50552 | "Unexpected vector type with legal operations"); |
| 50553 | |
| 50554 | unsigned SelElts = SelVT.getVectorNumElements(); |
| 50555 | unsigned CastedElts = WideVT.getVectorNumElements(); |
| 50556 | unsigned ExtIdx = Ext->getConstantOperandVal(1); |
| 50557 | if (SelElts % CastedElts == 0) { |
| 50558 | |
| 50559 | |
| 50560 | ExtIdx *= (SelElts / CastedElts); |
| 50561 | } else if (CastedElts % SelElts == 0) { |
| 50562 | |
| 50563 | |
| 50564 | unsigned IndexDivisor = CastedElts / SelElts; |
| 50565 | if (ExtIdx % IndexDivisor != 0) |
| 50566 | return SDValue(); |
| 50567 | ExtIdx /= IndexDivisor; |
| 50568 | } else { |
| 50569 | llvm_unreachable("Element count of simple vector types are not divisible?"); |
| 50570 | } |
| 50571 | |
| 50572 | unsigned NarrowingFactor = WideVT.getSizeInBits() / VT.getSizeInBits(); |
| 50573 | unsigned NarrowElts = SelElts / NarrowingFactor; |
| 50574 | MVT NarrowSelVT = MVT::getVectorVT(SelVT.getVectorElementType(), NarrowElts); |
| 50575 | SDLoc DL(Ext); |
| 50576 | SDValue ExtCond = extract128BitVector(Sel.getOperand(0), ExtIdx, DAG, DL); |
| 50577 | SDValue ExtT = extract128BitVector(Sel.getOperand(1), ExtIdx, DAG, DL); |
| 50578 | SDValue ExtF = extract128BitVector(Sel.getOperand(2), ExtIdx, DAG, DL); |
| 50579 | SDValue NarrowSel = DAG.getSelect(DL, NarrowSelVT, ExtCond, ExtT, ExtF); |
| 50580 | return DAG.getBitcast(VT, NarrowSel); |
| 50581 | } |
| 50582 | |
| 50583 | static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, |
| 50584 | TargetLowering::DAGCombinerInfo &DCI, |
| 50585 | const X86Subtarget &Subtarget) { |
| 50586 | |
| 50587 | |
| 50588 | |
| 50589 | |
| 50590 | |
| 50591 | |
| 50592 | |
| 50593 | |
| 50594 | |
| 50595 | |
| 50596 | if (!N->getValueType(0).isSimple()) |
| 50597 | return SDValue(); |
| 50598 | |
| 50599 | MVT VT = N->getSimpleValueType(0); |
| 50600 | SDValue InVec = N->getOperand(0); |
| 50601 | unsigned IdxVal = N->getConstantOperandVal(1); |
| 50602 | SDValue InVecBC = peekThroughBitcasts(InVec); |
| 50603 | EVT InVecVT = InVec.getValueType(); |
| 50604 | unsigned SizeInBits = VT.getSizeInBits(); |
| 50605 | unsigned InSizeInBits = InVecVT.getSizeInBits(); |
| 50606 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 50607 | |
| 50608 | if (Subtarget.hasAVX() && !Subtarget.hasAVX2() && |
| 50609 | TLI.isTypeLegal(InVecVT) && |
| 50610 | InSizeInBits == 256 && InVecBC.getOpcode() == ISD::AND) { |
| 50611 | auto isConcatenatedNot = [](SDValue V) { |
| 50612 | V = peekThroughBitcasts(V); |
| 50613 | if (!isBitwiseNot(V)) |
| 50614 | return false; |
| 50615 | SDValue NotOp = V->getOperand(0); |
| 50616 | return peekThroughBitcasts(NotOp).getOpcode() == ISD::CONCAT_VECTORS; |
| 50617 | }; |
| 50618 | if (isConcatenatedNot(InVecBC.getOperand(0)) || |
| 50619 | isConcatenatedNot(InVecBC.getOperand(1))) { |
| 50620 | |
| 50621 | SDValue Concat = splitVectorIntBinary(InVecBC, DAG); |
| 50622 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), VT, |
| 50623 | DAG.getBitcast(InVecVT, Concat), N->getOperand(1)); |
| 50624 | } |
| 50625 | } |
| 50626 | |
| 50627 | if (DCI.isBeforeLegalizeOps()) |
| 50628 | return SDValue(); |
| 50629 | |
| 50630 | if (SDValue V = narrowExtractedVectorSelect(N, DAG)) |
| 50631 | return V; |
| 50632 | |
| 50633 | if (ISD::isBuildVectorAllZeros(InVec.getNode())) |
| 50634 | return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); |
| 50635 | |
| 50636 | if (ISD::isBuildVectorAllOnes(InVec.getNode())) { |
| 50637 | if (VT.getScalarType() == MVT::i1) |
| 50638 | return DAG.getConstant(1, SDLoc(N), VT); |
| 50639 | return getOnesVector(VT, DAG, SDLoc(N)); |
| 50640 | } |
| 50641 | |
| 50642 | if (InVec.getOpcode() == ISD::BUILD_VECTOR) |
| 50643 | return DAG.getBuildVector( |
| 50644 | VT, SDLoc(N), |
| 50645 | InVec.getNode()->ops().slice(IdxVal, VT.getVectorNumElements())); |
| 50646 | |
| 50647 | |
| 50648 | |
| 50649 | |
| 50650 | if (VT.getVectorElementType() != MVT::i1 && |
| 50651 | InVec.getOpcode() == ISD::INSERT_SUBVECTOR && IdxVal == 0 && |
| 50652 | InVec.hasOneUse() && isNullConstant(InVec.getOperand(2)) && |
| 50653 | ISD::isBuildVectorAllZeros(InVec.getOperand(0).getNode()) && |
| 50654 | InVec.getOperand(1).getValueSizeInBits() <= SizeInBits) { |
| 50655 | SDLoc DL(N); |
| 50656 | return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, |
| 50657 | getZeroVector(VT, Subtarget, DAG, DL), |
| 50658 | InVec.getOperand(1), InVec.getOperand(2)); |
| 50659 | } |
| 50660 | |
| 50661 | |
| 50662 | |
| 50663 | |
| 50664 | if (IdxVal != 0 && (InVec.getOpcode() == X86ISD::VBROADCAST || |
| 50665 | InVec.getOpcode() == X86ISD::VBROADCAST_LOAD || |
| 50666 | DAG.isSplatValue(InVec, false))) |
| 50667 | return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); |
| 50668 | |
| 50669 | |
| 50670 | if (IdxVal != 0 && InVec.getOpcode() == X86ISD::SUBV_BROADCAST_LOAD && |
| 50671 | cast<MemIntrinsicSDNode>(InVec)->getMemoryVT() == VT) |
| 50672 | return extractSubVector(InVec, 0, DAG, SDLoc(N), SizeInBits); |
| 50673 | |
| 50674 | |
| 50675 | if ((InSizeInBits % SizeInBits) == 0 && |
| 50676 | (IdxVal % VT.getVectorNumElements()) == 0) { |
| 50677 | SmallVector<int, 32> ShuffleMask; |
| 50678 | SmallVector<int, 32> ScaledMask; |
| 50679 | SmallVector<SDValue, 2> ShuffleInputs; |
| 50680 | unsigned NumSubVecs = InSizeInBits / SizeInBits; |
| 50681 | |
| 50682 | if (getTargetShuffleInputs(InVecBC, ShuffleInputs, ShuffleMask, DAG) && |
| 50683 | scaleShuffleElements(ShuffleMask, NumSubVecs, ScaledMask)) { |
| 50684 | unsigned SubVecIdx = IdxVal / VT.getVectorNumElements(); |
| 50685 | if (ScaledMask[SubVecIdx] == SM_SentinelUndef) |
| 50686 | return DAG.getUNDEF(VT); |
| 50687 | if (ScaledMask[SubVecIdx] == SM_SentinelZero) |
| 50688 | return getZeroVector(VT, Subtarget, DAG, SDLoc(N)); |
| 50689 | SDValue Src = ShuffleInputs[ScaledMask[SubVecIdx] / NumSubVecs]; |
| 50690 | if (Src.getValueSizeInBits() == InSizeInBits) { |
| 50691 | unsigned SrcSubVecIdx = ScaledMask[SubVecIdx] % NumSubVecs; |
| 50692 | unsigned SrcEltIdx = SrcSubVecIdx * VT.getVectorNumElements(); |
| 50693 | return extractSubVector(DAG.getBitcast(InVecVT, Src), SrcEltIdx, DAG, |
| 50694 | SDLoc(N), SizeInBits); |
| 50695 | } |
| 50696 | } |
| 50697 | } |
| 50698 | |
| 50699 | |
| 50700 | |
| 50701 | unsigned InOpcode = InVec.getOpcode(); |
| 50702 | if (IdxVal == 0 && InVec.hasOneUse()) { |
| 50703 | if (VT == MVT::v2f64 && InVecVT == MVT::v4f64) { |
| 50704 | |
| 50705 | if (InOpcode == ISD::SINT_TO_FP && |
| 50706 | InVec.getOperand(0).getValueType() == MVT::v4i32) { |
| 50707 | return DAG.getNode(X86ISD::CVTSI2P, SDLoc(N), VT, InVec.getOperand(0)); |
| 50708 | } |
| 50709 | |
| 50710 | if (InOpcode == ISD::UINT_TO_FP && Subtarget.hasVLX() && |
| 50711 | InVec.getOperand(0).getValueType() == MVT::v4i32) { |
| 50712 | return DAG.getNode(X86ISD::CVTUI2P, SDLoc(N), VT, InVec.getOperand(0)); |
| 50713 | } |
| 50714 | |
| 50715 | if (InOpcode == ISD::FP_EXTEND && |
| 50716 | InVec.getOperand(0).getValueType() == MVT::v4f32) { |
| 50717 | return DAG.getNode(X86ISD::VFPEXT, SDLoc(N), VT, InVec.getOperand(0)); |
| 50718 | } |
| 50719 | } |
| 50720 | if ((InOpcode == ISD::ANY_EXTEND || |
| 50721 | InOpcode == ISD::ANY_EXTEND_VECTOR_INREG || |
| 50722 | InOpcode == ISD::ZERO_EXTEND || |
| 50723 | InOpcode == ISD::ZERO_EXTEND_VECTOR_INREG || |
| 50724 | InOpcode == ISD::SIGN_EXTEND || |
| 50725 | InOpcode == ISD::SIGN_EXTEND_VECTOR_INREG) && |
| 50726 | (SizeInBits == 128 || SizeInBits == 256) && |
| 50727 | InVec.getOperand(0).getValueSizeInBits() >= SizeInBits) { |
| 50728 | SDLoc DL(N); |
| 50729 | SDValue Ext = InVec.getOperand(0); |
| 50730 | if (Ext.getValueSizeInBits() > SizeInBits) |
| 50731 | Ext = extractSubVector(Ext, 0, DAG, DL, SizeInBits); |
| 50732 | unsigned ExtOp = getOpcode_EXTEND_VECTOR_INREG(InOpcode); |
| 50733 | return DAG.getNode(ExtOp, DL, VT, Ext); |
| 50734 | } |
| 50735 | if (InOpcode == ISD::VSELECT && |
| 50736 | InVec.getOperand(0).getValueType().is256BitVector() && |
| 50737 | InVec.getOperand(1).getValueType().is256BitVector() && |
| 50738 | InVec.getOperand(2).getValueType().is256BitVector()) { |
| 50739 | SDLoc DL(N); |
| 50740 | SDValue Ext0 = extractSubVector(InVec.getOperand(0), 0, DAG, DL, 128); |
| 50741 | SDValue Ext1 = extractSubVector(InVec.getOperand(1), 0, DAG, DL, 128); |
| 50742 | SDValue Ext2 = extractSubVector(InVec.getOperand(2), 0, DAG, DL, 128); |
| 50743 | return DAG.getNode(InOpcode, DL, VT, Ext0, Ext1, Ext2); |
| 50744 | } |
| 50745 | if (InOpcode == ISD::TRUNCATE && Subtarget.hasVLX() && |
| 50746 | (VT.is128BitVector() || VT.is256BitVector())) { |
| 50747 | SDLoc DL(N); |
| 50748 | SDValue InVecSrc = InVec.getOperand(0); |
| 50749 | unsigned Scale = InVecSrc.getValueSizeInBits() / InSizeInBits; |
| 50750 | SDValue Ext = extractSubVector(InVecSrc, 0, DAG, DL, Scale * SizeInBits); |
| 50751 | return DAG.getNode(InOpcode, DL, VT, Ext); |
| 50752 | } |
| 50753 | } |
| 50754 | |
| 50755 | |
| 50756 | |
| 50757 | if ((InOpcode == X86ISD::VSHLI || InOpcode == X86ISD::VSRLI) && |
| 50758 | InVecVT.getScalarSizeInBits() == 64 && |
| 50759 | InVec.getConstantOperandAPInt(1) == 32) { |
| 50760 | SDLoc DL(N); |
| 50761 | SDValue Ext = |
| 50762 | extractSubVector(InVec.getOperand(0), IdxVal, DAG, DL, SizeInBits); |
| 50763 | return DAG.getNode(InOpcode, DL, VT, Ext, InVec.getOperand(1)); |
| 50764 | } |
| 50765 | |
| 50766 | return SDValue(); |
| 50767 | } |
| 50768 | |
| 50769 | static SDValue combineScalarToVector(SDNode *N, SelectionDAG &DAG) { |
| 50770 | EVT VT = N->getValueType(0); |
| 50771 | SDValue Src = N->getOperand(0); |
| 50772 | SDLoc DL(N); |
| 50773 | |
| 50774 | |
| 50775 | |
| 50776 | |
| 50777 | |
| 50778 | if (VT == MVT::v1i1 && Src.getOpcode() == ISD::AND && Src.hasOneUse()) |
| 50779 | if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) |
| 50780 | if (C->getAPIntValue().isOneValue()) |
| 50781 | return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, |
| 50782 | Src.getOperand(0)); |
| 50783 | |
| 50784 | |
| 50785 | if (VT == MVT::v1i1 && Src.getOpcode() == ISD::EXTRACT_VECTOR_ELT && |
| 50786 | Src.hasOneUse() && Src.getOperand(0).getValueType().isVector() && |
| 50787 | Src.getOperand(0).getValueType().getVectorElementType() == MVT::i1) |
| 50788 | if (auto *C = dyn_cast<ConstantSDNode>(Src.getOperand(1))) |
| 50789 | if (C->isNullValue()) |
| 50790 | return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Src.getOperand(0), |
| 50791 | Src.getOperand(1)); |
| 50792 | |
| 50793 | |
| 50794 | |
| 50795 | if (VT == MVT::v2i64 || VT == MVT::v2f64) { |
| 50796 | auto IsAnyExt64 = [](SDValue Op) { |
| 50797 | if (Op.getValueType() != MVT::i64 || !Op.hasOneUse()) |
| 50798 | return SDValue(); |
| 50799 | if (Op.getOpcode() == ISD::ANY_EXTEND && |
| 50800 | Op.getOperand(0).getScalarValueSizeInBits() <= 32) |
| 50801 | return Op.getOperand(0); |
| 50802 | if (auto *Ld = dyn_cast<LoadSDNode>(Op)) |
| 50803 | if (Ld->getExtensionType() == ISD::EXTLOAD && |
| 50804 | Ld->getMemoryVT().getScalarSizeInBits() <= 32) |
| 50805 | return Op; |
| 50806 | return SDValue(); |
| 50807 | }; |
| 50808 | if (SDValue ExtSrc = IsAnyExt64(peekThroughOneUseBitcasts(Src))) |
| 50809 | return DAG.getBitcast( |
| 50810 | VT, DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v4i32, |
| 50811 | DAG.getAnyExtOrTrunc(ExtSrc, DL, MVT::i32))); |
| 50812 | } |
| 50813 | |
| 50814 | |
| 50815 | if (VT == MVT::v2i64 && Src.getOpcode() == ISD::BITCAST && |
| 50816 | Src.getOperand(0).getValueType() == MVT::x86mmx) |
| 50817 | return DAG.getNode(X86ISD::MOVQ2DQ, DL, VT, Src.getOperand(0)); |
| 50818 | |
| 50819 | |
| 50820 | |
| 50821 | if (VT.getScalarType() == Src.getValueType()) |
| 50822 | for (SDNode *User : Src->uses()) |
| 50823 | if (User->getOpcode() == X86ISD::VBROADCAST && |
| 50824 | Src == User->getOperand(0)) { |
| 50825 | unsigned SizeInBits = VT.getFixedSizeInBits(); |
| 50826 | unsigned BroadcastSizeInBits = |
| 50827 | User->getValueSizeInBits(0).getFixedSize(); |
| 50828 | if (BroadcastSizeInBits == SizeInBits) |
| 50829 | return SDValue(User, 0); |
| 50830 | if (BroadcastSizeInBits > SizeInBits) |
| 50831 | return extractSubVector(SDValue(User, 0), 0, DAG, DL, SizeInBits); |
| 50832 | |
| 50833 | |
| 50834 | } |
| 50835 | |
| 50836 | return SDValue(); |
| 50837 | } |
| 50838 | |
| 50839 | |
| 50840 | static SDValue combinePMULDQ(SDNode *N, SelectionDAG &DAG, |
| 50841 | TargetLowering::DAGCombinerInfo &DCI, |
| 50842 | const X86Subtarget &Subtarget) { |
| 50843 | SDValue LHS = N->getOperand(0); |
| 50844 | SDValue RHS = N->getOperand(1); |
| 50845 | |
| 50846 | |
| 50847 | if (DAG.isConstantIntBuildVectorOrConstantInt(LHS) && |
| 50848 | !DAG.isConstantIntBuildVectorOrConstantInt(RHS)) |
| 50849 | return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), RHS, LHS); |
| 50850 | |
| 50851 | |
| 50852 | |
| 50853 | if (ISD::isBuildVectorAllZeros(RHS.getNode())) |
| 50854 | return DAG.getConstant(0, SDLoc(N), N->getValueType(0)); |
| 50855 | |
| 50856 | |
| 50857 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 50858 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), APInt::getAllOnesValue(64), DCI)) |
| 50859 | return SDValue(N, 0); |
| 50860 | |
| 50861 | |
| 50862 | |
| 50863 | |
| 50864 | |
| 50865 | |
| 50866 | |
| 50867 | |
| 50868 | if (N->getValueType(0) == MVT::v2i64 && LHS.hasOneUse() && |
| 50869 | (LHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || |
| 50870 | LHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && |
| 50871 | LHS.getOperand(0).getValueType() == MVT::v4i32) { |
| 50872 | SDLoc dl(N); |
| 50873 | LHS = DAG.getVectorShuffle(MVT::v4i32, dl, LHS.getOperand(0), |
| 50874 | LHS.getOperand(0), { 0, -1, 1, -1 }); |
| 50875 | LHS = DAG.getBitcast(MVT::v2i64, LHS); |
| 50876 | return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); |
| 50877 | } |
| 50878 | if (N->getValueType(0) == MVT::v2i64 && RHS.hasOneUse() && |
| 50879 | (RHS.getOpcode() == ISD::ZERO_EXTEND_VECTOR_INREG || |
| 50880 | RHS.getOpcode() == ISD::SIGN_EXTEND_VECTOR_INREG) && |
| 50881 | RHS.getOperand(0).getValueType() == MVT::v4i32) { |
| 50882 | SDLoc dl(N); |
| 50883 | RHS = DAG.getVectorShuffle(MVT::v4i32, dl, RHS.getOperand(0), |
| 50884 | RHS.getOperand(0), { 0, -1, 1, -1 }); |
| 50885 | RHS = DAG.getBitcast(MVT::v2i64, RHS); |
| 50886 | return DAG.getNode(N->getOpcode(), dl, MVT::v2i64, LHS, RHS); |
| 50887 | } |
| 50888 | |
| 50889 | return SDValue(); |
| 50890 | } |
| 50891 | |
| 50892 | static SDValue combineEXTEND_VECTOR_INREG(SDNode *N, SelectionDAG &DAG, |
| 50893 | TargetLowering::DAGCombinerInfo &DCI, |
| 50894 | const X86Subtarget &Subtarget) { |
| 50895 | EVT VT = N->getValueType(0); |
| 50896 | SDValue In = N->getOperand(0); |
| 50897 | unsigned Opcode = N->getOpcode(); |
| 50898 | unsigned InOpcode = In.getOpcode(); |
| 50899 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 50900 | |
| 50901 | |
| 50902 | if (!DCI.isBeforeLegalizeOps() && ISD::isNormalLoad(In.getNode()) && |
| 50903 | In.hasOneUse()) { |
| 50904 | auto *Ld = cast<LoadSDNode>(In); |
| 50905 | if (Ld->isSimple()) { |
| 50906 | MVT SVT = In.getSimpleValueType().getVectorElementType(); |
| 50907 | ISD::LoadExtType Ext = Opcode == ISD::SIGN_EXTEND_VECTOR_INREG |
| 50908 | ? ISD::SEXTLOAD |
| 50909 | : ISD::ZEXTLOAD; |
| 50910 | EVT MemVT = VT.changeVectorElementType(SVT); |
| 50911 | if (TLI.isLoadExtLegal(Ext, VT, MemVT)) { |
| 50912 | SDValue Load = |
| 50913 | DAG.getExtLoad(Ext, SDLoc(N), VT, Ld->getChain(), Ld->getBasePtr(), |
| 50914 | Ld->getPointerInfo(), MemVT, Ld->getOriginalAlign(), |
| 50915 | Ld->getMemOperand()->getFlags()); |
| 50916 | DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Load.getValue(1)); |
| 50917 | return Load; |
| 50918 | } |
| 50919 | } |
| 50920 | } |
| 50921 | |
| 50922 | |
| 50923 | if (Opcode == InOpcode) |
| 50924 | return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0)); |
| 50925 | |
| 50926 | |
| 50927 | |
| 50928 | |
| 50929 | if (InOpcode == ISD::EXTRACT_SUBVECTOR && In.getConstantOperandVal(1) == 0 && |
| 50930 | In.getOperand(0).getOpcode() == getOpcode_EXTEND(Opcode) && |
| 50931 | In.getOperand(0).getOperand(0).getValueSizeInBits() == |
| 50932 | In.getValueSizeInBits()) |
| 50933 | return DAG.getNode(Opcode, SDLoc(N), VT, In.getOperand(0).getOperand(0)); |
| 50934 | |
| 50935 | |
| 50936 | |
| 50937 | if (Opcode == ISD::ANY_EXTEND_VECTOR_INREG || |
| 50938 | (Opcode == ISD::ZERO_EXTEND_VECTOR_INREG && Subtarget.hasSSE41())) { |
| 50939 | SDValue Op(N, 0); |
| 50940 | if (TLI.isTypeLegal(VT) && TLI.isTypeLegal(In.getValueType())) |
| 50941 | if (SDValue Res = combineX86ShufflesRecursively(Op, DAG, Subtarget)) |
| 50942 | return Res; |
| 50943 | } |
| 50944 | |
| 50945 | return SDValue(); |
| 50946 | } |
| 50947 | |
| 50948 | static SDValue combineKSHIFT(SDNode *N, SelectionDAG &DAG, |
| 50949 | TargetLowering::DAGCombinerInfo &DCI) { |
| 50950 | EVT VT = N->getValueType(0); |
| 50951 | |
| 50952 | if (ISD::isBuildVectorAllZeros(N->getOperand(0).getNode())) |
| 50953 | return DAG.getConstant(0, SDLoc(N), VT); |
| 50954 | |
| 50955 | APInt KnownUndef, KnownZero; |
| 50956 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 50957 | APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); |
| 50958 | if (TLI.SimplifyDemandedVectorElts(SDValue(N, 0), DemandedElts, KnownUndef, |
| 50959 | KnownZero, DCI)) |
| 50960 | return SDValue(N, 0); |
| 50961 | |
| 50962 | return SDValue(); |
| 50963 | } |
| 50964 | |
| 50965 | |
| 50966 | |
| 50967 | |
| 50968 | static SDValue combineFP16_TO_FP(SDNode *N, SelectionDAG &DAG, |
| 50969 | const X86Subtarget &Subtarget) { |
| 50970 | if (Subtarget.useSoftFloat() || !Subtarget.hasF16C()) |
| 50971 | return SDValue(); |
| 50972 | |
| 50973 | if (N->getOperand(0).getOpcode() != ISD::FP_TO_FP16) |
| 50974 | return SDValue(); |
| 50975 | |
| 50976 | if (N->getValueType(0) != MVT::f32 || |
| 50977 | N->getOperand(0).getOperand(0).getValueType() != MVT::f32) |
| 50978 | return SDValue(); |
| 50979 | |
| 50980 | SDLoc dl(N); |
| 50981 | SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v4f32, |
| 50982 | N->getOperand(0).getOperand(0)); |
| 50983 | Res = DAG.getNode(X86ISD::CVTPS2PH, dl, MVT::v8i16, Res, |
| 50984 | DAG.getTargetConstant(4, dl, MVT::i32)); |
| 50985 | Res = DAG.getNode(X86ISD::CVTPH2PS, dl, MVT::v4f32, Res); |
| 50986 | return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f32, Res, |
| 50987 | DAG.getIntPtrConstant(0, dl)); |
| 50988 | } |
| 50989 | |
| 50990 | static SDValue combineFP_EXTEND(SDNode *N, SelectionDAG &DAG, |
| 50991 | const X86Subtarget &Subtarget) { |
| 50992 | if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) |
| 50993 | return SDValue(); |
| 50994 | |
| 50995 | bool IsStrict = N->isStrictFPOpcode(); |
| 50996 | EVT VT = N->getValueType(0); |
| 50997 | SDValue Src = N->getOperand(IsStrict ? 1 : 0); |
| 50998 | EVT SrcVT = Src.getValueType(); |
| 50999 | |
| 51000 | if (!SrcVT.isVector() || SrcVT.getVectorElementType() != MVT::f16) |
| 51001 | return SDValue(); |
| 51002 | |
| 51003 | if (VT.getVectorElementType() != MVT::f32 && |
| 51004 | VT.getVectorElementType() != MVT::f64) |
| 51005 | return SDValue(); |
| 51006 | |
| 51007 | unsigned NumElts = VT.getVectorNumElements(); |
| 51008 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) |
| 51009 | return SDValue(); |
| 51010 | |
| 51011 | SDLoc dl(N); |
| 51012 | |
| 51013 | |
| 51014 | EVT IntVT = SrcVT.changeVectorElementTypeToInteger(); |
| 51015 | Src = DAG.getBitcast(IntVT, Src); |
| 51016 | |
| 51017 | |
| 51018 | if (NumElts < 8) { |
| 51019 | unsigned NumConcats = 8 / NumElts; |
| 51020 | SDValue Fill = NumElts == 4 ? DAG.getUNDEF(IntVT) |
| 51021 | : DAG.getConstant(0, dl, IntVT); |
| 51022 | SmallVector<SDValue, 4> Ops(NumConcats, Fill); |
| 51023 | Ops[0] = Src; |
| 51024 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v8i16, Ops); |
| 51025 | } |
| 51026 | |
| 51027 | |
| 51028 | EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::f32, |
| 51029 | std::max(4U, NumElts)); |
| 51030 | SDValue Cvt, Chain; |
| 51031 | if (IsStrict) { |
| 51032 | Cvt = DAG.getNode(X86ISD::STRICT_CVTPH2PS, dl, {CvtVT, MVT::Other}, |
| 51033 | {N->getOperand(0), Src}); |
| 51034 | Chain = Cvt.getValue(1); |
| 51035 | } else { |
| 51036 | Cvt = DAG.getNode(X86ISD::CVTPH2PS, dl, CvtVT, Src); |
| 51037 | } |
| 51038 | |
| 51039 | if (NumElts < 4) { |
| 51040 | assert(NumElts == 2 && "Unexpected size"); |
| 51041 | Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MVT::v2f32, Cvt, |
| 51042 | DAG.getIntPtrConstant(0, dl)); |
| 51043 | } |
| 51044 | |
| 51045 | if (IsStrict) { |
| 51046 | |
| 51047 | if (Cvt.getValueType() != VT) { |
| 51048 | Cvt = DAG.getNode(ISD::STRICT_FP_EXTEND, dl, {VT, MVT::Other}, |
| 51049 | {Chain, Cvt}); |
| 51050 | Chain = Cvt.getValue(1); |
| 51051 | } |
| 51052 | return DAG.getMergeValues({Cvt, Chain}, dl); |
| 51053 | } |
| 51054 | |
| 51055 | |
| 51056 | return DAG.getNode(ISD::FP_EXTEND, dl, VT, Cvt); |
| 51057 | } |
| 51058 | |
| 51059 | |
| 51060 | |
| 51061 | |
| 51062 | static SDValue combineBROADCAST_LOAD(SDNode *N, SelectionDAG &DAG, |
| 51063 | TargetLowering::DAGCombinerInfo &DCI) { |
| 51064 | assert((N->getOpcode() == X86ISD::VBROADCAST_LOAD || |
| 51065 | N->getOpcode() == X86ISD::SUBV_BROADCAST_LOAD) && |
| 51066 | "Unknown broadcast load type"); |
| 51067 | |
| 51068 | |
| 51069 | if (N->hasAnyUseOfValue(1)) |
| 51070 | return SDValue(); |
| 51071 | |
| 51072 | auto *MemIntrin = cast<MemIntrinsicSDNode>(N); |
| 51073 | |
| 51074 | SDValue Ptr = MemIntrin->getBasePtr(); |
| 51075 | SDValue Chain = MemIntrin->getChain(); |
| 51076 | EVT VT = N->getSimpleValueType(0); |
| 51077 | EVT MemVT = MemIntrin->getMemoryVT(); |
| 51078 | |
| 51079 | |
| 51080 | |
| 51081 | for (SDNode *User : Ptr->uses()) |
| 51082 | if (User != N && User->getOpcode() == N->getOpcode() && |
| 51083 | cast<MemIntrinsicSDNode>(User)->getBasePtr() == Ptr && |
| 51084 | cast<MemIntrinsicSDNode>(User)->getChain() == Chain && |
| 51085 | cast<MemIntrinsicSDNode>(User)->getMemoryVT().getSizeInBits() == |
| 51086 | MemVT.getSizeInBits() && |
| 51087 | !User->hasAnyUseOfValue(1) && |
| 51088 | User->getValueSizeInBits(0).getFixedSize() > VT.getFixedSizeInBits()) { |
| 51089 | SDValue Extract = extractSubVector(SDValue(User, 0), 0, DAG, SDLoc(N), |
| 51090 | VT.getSizeInBits()); |
| 51091 | Extract = DAG.getBitcast(VT, Extract); |
| 51092 | return DCI.CombineTo(N, Extract, SDValue(User, 1)); |
| 51093 | } |
| 51094 | |
| 51095 | return SDValue(); |
| 51096 | } |
| 51097 | |
| 51098 | static SDValue combineFP_ROUND(SDNode *N, SelectionDAG &DAG, |
| 51099 | const X86Subtarget &Subtarget) { |
| 51100 | if (!Subtarget.hasF16C() || Subtarget.useSoftFloat()) |
| 51101 | return SDValue(); |
| 51102 | |
| 51103 | EVT VT = N->getValueType(0); |
| 51104 | SDValue Src = N->getOperand(0); |
| 51105 | EVT SrcVT = Src.getValueType(); |
| 51106 | |
| 51107 | if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 || |
| 51108 | SrcVT.getVectorElementType() != MVT::f32) |
| 51109 | return SDValue(); |
| 51110 | |
| 51111 | unsigned NumElts = VT.getVectorNumElements(); |
| 51112 | if (NumElts == 1 || !isPowerOf2_32(NumElts)) |
| 51113 | return SDValue(); |
| 51114 | |
| 51115 | SDLoc dl(N); |
| 51116 | |
| 51117 | |
| 51118 | if (NumElts < 4) |
| 51119 | Src = DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v4f32, Src, |
| 51120 | DAG.getConstantFP(0.0, dl, SrcVT)); |
| 51121 | |
| 51122 | |
| 51123 | EVT CvtVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, |
| 51124 | std::max(8U, NumElts)); |
| 51125 | SDValue Cvt = DAG.getNode(X86ISD::CVTPS2PH, dl, CvtVT, Src, |
| 51126 | DAG.getTargetConstant(4, dl, MVT::i32)); |
| 51127 | |
| 51128 | |
| 51129 | if (NumElts < 8) { |
| 51130 | EVT IntVT = VT.changeVectorElementTypeToInteger(); |
| 51131 | Cvt = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, IntVT, Cvt, |
| 51132 | DAG.getIntPtrConstant(0, dl)); |
| 51133 | } |
| 51134 | |
| 51135 | return DAG.getBitcast(VT, Cvt); |
| 51136 | } |
| 51137 | |
| 51138 | static SDValue combineMOVDQ2Q(SDNode *N, SelectionDAG &DAG) { |
| 51139 | SDValue Src = N->getOperand(0); |
| 51140 | |
| 51141 | |
| 51142 | if (ISD::isNormalLoad(Src.getNode()) && Src.hasOneUse()) { |
| 51143 | LoadSDNode *LN = cast<LoadSDNode>(Src.getNode()); |
| 51144 | |
| 51145 | if (LN->isSimple()) { |
| 51146 | SDValue NewLd = DAG.getLoad(MVT::x86mmx, SDLoc(N), LN->getChain(), |
| 51147 | LN->getBasePtr(), |
| 51148 | LN->getPointerInfo(), |
| 51149 | LN->getOriginalAlign(), |
| 51150 | LN->getMemOperand()->getFlags()); |
| 51151 | DAG.ReplaceAllUsesOfValueWith(SDValue(LN, 1), NewLd.getValue(1)); |
| 51152 | return NewLd; |
| 51153 | } |
| 51154 | } |
| 51155 | |
| 51156 | return SDValue(); |
| 51157 | } |
| 51158 | |
| 51159 | static SDValue combinePDEP(SDNode *N, SelectionDAG &DAG, |
| 51160 | TargetLowering::DAGCombinerInfo &DCI) { |
| 51161 | unsigned NumBits = N->getSimpleValueType(0).getSizeInBits(); |
| 51162 | const TargetLowering &TLI = DAG.getTargetLoweringInfo(); |
| 51163 | if (TLI.SimplifyDemandedBits(SDValue(N, 0), |
| 51164 | APInt::getAllOnesValue(NumBits), DCI)) |
| 51165 | return SDValue(N, 0); |
| 51166 | |
| 51167 | return SDValue(); |
| 51168 | } |
| 51169 | |
| 51170 | SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, |
| 51171 | DAGCombinerInfo &DCI) const { |
| 51172 | SelectionDAG &DAG = DCI.DAG; |
| 51173 | switch (N->getOpcode()) { |
| 51174 | default: break; |
| 51175 | case ISD::SCALAR_TO_VECTOR: |
| 51176 | return combineScalarToVector(N, DAG); |
| 51177 | case ISD::EXTRACT_VECTOR_ELT: |
| 51178 | case X86ISD::PEXTRW: |
| 51179 | case X86ISD::PEXTRB: |
| 51180 | return combineExtractVectorElt(N, DAG, DCI, Subtarget); |
| 51181 | case ISD::CONCAT_VECTORS: |
| 51182 | return combineConcatVectors(N, DAG, DCI, Subtarget); |
| 51183 | case ISD::INSERT_SUBVECTOR: |
| 51184 | return combineInsertSubvector(N, DAG, DCI, Subtarget); |
| 51185 | case ISD::EXTRACT_SUBVECTOR: |
| 51186 | return combineExtractSubvector(N, DAG, DCI, Subtarget); |
| 51187 | case ISD::VSELECT: |
| 51188 | case ISD::SELECT: |
| 51189 | case X86ISD::BLENDV: return combineSelect(N, DAG, DCI, Subtarget); |
| 51190 | case ISD::BITCAST: return combineBitcast(N, DAG, DCI, Subtarget); |
| 51191 | case X86ISD::CMOV: return combineCMov(N, DAG, DCI, Subtarget); |
| 51192 | case X86ISD::CMP: return combineCMP(N, DAG); |
| 51193 | case ISD::ADD: return combineAdd(N, DAG, DCI, Subtarget); |
| 51194 | case ISD::SUB: return combineSub(N, DAG, DCI, Subtarget); |
| 51195 | case X86ISD::ADD: |
| 51196 | case X86ISD::SUB: return combineX86AddSub(N, DAG, DCI); |
| 51197 | case X86ISD::SBB: return combineSBB(N, DAG); |
| 51198 | case X86ISD::ADC: return combineADC(N, DAG, DCI); |
| 51199 | case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); |
| 51200 | case ISD::SHL: return combineShiftLeft(N, DAG); |
| 51201 | case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget); |
| 51202 | case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget); |
| 51203 | case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); |
| 51204 | case ISD::OR: return combineOr(N, DAG, DCI, Subtarget); |
| 51205 | case ISD::XOR: return combineXor(N, DAG, DCI, Subtarget); |
| 51206 | case X86ISD::BEXTR: |
| 51207 | case X86ISD::BEXTRI: return combineBEXTR(N, DAG, DCI, Subtarget); |
| 51208 | case ISD::LOAD: return combineLoad(N, DAG, DCI, Subtarget); |
| 51209 | case ISD::MLOAD: return combineMaskedLoad(N, DAG, DCI, Subtarget); |
| 51210 | case ISD::STORE: return combineStore(N, DAG, DCI, Subtarget); |
| 51211 | case ISD::MSTORE: return combineMaskedStore(N, DAG, DCI, Subtarget); |
| 51212 | case X86ISD::VEXTRACT_STORE: |
| 51213 | return combineVEXTRACT_STORE(N, DAG, DCI, Subtarget); |
| 51214 | case ISD::SINT_TO_FP: |
| 51215 | case ISD::STRICT_SINT_TO_FP: |
| 51216 | return combineSIntToFP(N, DAG, DCI, Subtarget); |
| 51217 | case ISD::UINT_TO_FP: |
| 51218 | case ISD::STRICT_UINT_TO_FP: |
| 51219 | return combineUIntToFP(N, DAG, Subtarget); |
| 51220 | case ISD::FADD: |
| 51221 | case ISD::FSUB: return combineFaddFsub(N, DAG, Subtarget); |
| 51222 | case ISD::FNEG: return combineFneg(N, DAG, DCI, Subtarget); |
| 51223 | case ISD::TRUNCATE: return combineTruncate(N, DAG, Subtarget); |
| 51224 | case X86ISD::VTRUNC: return combineVTRUNC(N, DAG, DCI); |
| 51225 | case X86ISD::ANDNP: return combineAndnp(N, DAG, DCI, Subtarget); |
| 51226 | case X86ISD::FAND: return combineFAnd(N, DAG, Subtarget); |
| 51227 | case X86ISD::FANDN: return combineFAndn(N, DAG, Subtarget); |
| 51228 | case X86ISD::FXOR: |
| 51229 | case X86ISD::FOR: return combineFOr(N, DAG, DCI, Subtarget); |
| 51230 | case X86ISD::FMIN: |
| 51231 | case X86ISD::FMAX: return combineFMinFMax(N, DAG); |
| 51232 | case ISD::FMINNUM: |
| 51233 | case ISD::FMAXNUM: return combineFMinNumFMaxNum(N, DAG, Subtarget); |
| 51234 | case X86ISD::CVTSI2P: |
| 51235 | case X86ISD::CVTUI2P: return combineX86INT_TO_FP(N, DAG, DCI); |
| 51236 | case X86ISD::CVTP2SI: |
| 51237 | case X86ISD::CVTP2UI: |
| 51238 | case X86ISD::STRICT_CVTTP2SI: |
| 51239 | case X86ISD::CVTTP2SI: |
| 51240 | case X86ISD::STRICT_CVTTP2UI: |
| 51241 | case X86ISD::CVTTP2UI: |
| 51242 | return combineCVTP2I_CVTTP2I(N, DAG, DCI); |
| 51243 | case X86ISD::STRICT_CVTPH2PS: |
| 51244 | case X86ISD::CVTPH2PS: return combineCVTPH2PS(N, DAG, DCI); |
| 51245 | case X86ISD::BT: return combineBT(N, DAG, DCI); |
| 51246 | case ISD::ANY_EXTEND: |
| 51247 | case ISD::ZERO_EXTEND: return combineZext(N, DAG, DCI, Subtarget); |
| 51248 | case ISD::SIGN_EXTEND: return combineSext(N, DAG, DCI, Subtarget); |
| 51249 | case ISD::SIGN_EXTEND_INREG: return combineSignExtendInReg(N, DAG, Subtarget); |
| 51250 | case ISD::ANY_EXTEND_VECTOR_INREG: |
| 51251 | case ISD::SIGN_EXTEND_VECTOR_INREG: |
| 51252 | case ISD::ZERO_EXTEND_VECTOR_INREG: |
| 51253 | return combineEXTEND_VECTOR_INREG(N, DAG, DCI, Subtarget); |
| 51254 | case ISD::SETCC: return combineSetCC(N, DAG, DCI, Subtarget); |
| 51255 | case X86ISD::SETCC: return combineX86SetCC(N, DAG, Subtarget); |
| 51256 | case X86ISD::BRCOND: return combineBrCond(N, DAG, Subtarget); |
| 51257 | case X86ISD::PACKSS: |
| 51258 | case X86ISD::PACKUS: return combineVectorPack(N, DAG, DCI, Subtarget); |
| 51259 | case X86ISD::HADD: |
| 51260 | case X86ISD::HSUB: |
| 51261 | case X86ISD::FHADD: |
| 51262 | case X86ISD::FHSUB: return combineVectorHADDSUB(N, DAG, DCI, Subtarget); |
| 51263 | case X86ISD::VSHL: |
| 51264 | case X86ISD::VSRA: |
| 51265 | case X86ISD::VSRL: |
| 51266 | return combineVectorShiftVar(N, DAG, DCI, Subtarget); |
| 51267 | case X86ISD::VSHLI: |
| 51268 | case X86ISD::VSRAI: |
| 51269 | case X86ISD::VSRLI: |
| 51270 | return combineVectorShiftImm(N, DAG, DCI, Subtarget); |
| 51271 | case ISD::INSERT_VECTOR_ELT: |
| 51272 | case X86ISD::PINSRB: |
| 51273 | case X86ISD::PINSRW: return combineVectorInsert(N, DAG, DCI, Subtarget); |
| 51274 | case X86ISD::SHUFP: |
| 51275 | case X86ISD::INSERTPS: |
| 51276 | case X86ISD::EXTRQI: |
| 51277 | case X86ISD::INSERTQI: |
| 51278 | case X86ISD::VALIGN: |
| 51279 | case X86ISD::PALIGNR: |
| 51280 | case X86ISD::VSHLDQ: |
| 51281 | case X86ISD::VSRLDQ: |
| 51282 | case X86ISD::BLENDI: |
| 51283 | case X86ISD::UNPCKH: |
| 51284 | case X86ISD::UNPCKL: |
| 51285 | case X86ISD::MOVHLPS: |
| 51286 | case X86ISD::MOVLHPS: |
| 51287 | case X86ISD::PSHUFB: |
| 51288 | case X86ISD::PSHUFD: |
| 51289 | case X86ISD::PSHUFHW: |
| 51290 | case X86ISD::PSHUFLW: |
| 51291 | case X86ISD::MOVSHDUP: |
| 51292 | case X86ISD::MOVSLDUP: |
| 51293 | case X86ISD::MOVDDUP: |
| 51294 | case X86ISD::MOVSS: |
| 51295 | case X86ISD::MOVSD: |
| 51296 | case X86ISD::VBROADCAST: |
| 51297 | case X86ISD::VPPERM: |
| 51298 | case X86ISD::VPERMI: |
| 51299 | case X86ISD::VPERMV: |
| 51300 | case X86ISD::VPERMV3: |
| 51301 | case X86ISD::VPERMIL2: |
| 51302 | case X86ISD::VPERMILPI: |
| 51303 | case X86ISD::VPERMILPV: |
| 51304 | case X86ISD::VPERM2X128: |
| 51305 | case X86ISD::SHUF128: |
| 51306 | case X86ISD::VZEXT_MOVL: |
| 51307 | case ISD::VECTOR_SHUFFLE: return combineShuffle(N, DAG, DCI,Subtarget); |
| 51308 | case X86ISD::FMADD_RND: |
| 51309 | case X86ISD::FMSUB: |
| 51310 | case X86ISD::STRICT_FMSUB: |
| 51311 | case X86ISD::FMSUB_RND: |
| 51312 | case X86ISD::FNMADD: |
| 51313 | case X86ISD::STRICT_FNMADD: |
| 51314 | case X86ISD::FNMADD_RND: |
| 51315 | case X86ISD::FNMSUB: |
| 51316 | case X86ISD::STRICT_FNMSUB: |
| 51317 | case X86ISD::FNMSUB_RND: |
| 51318 | case ISD::FMA: |
| 51319 | case ISD::STRICT_FMA: return combineFMA(N, DAG, DCI, Subtarget); |
| 51320 | case X86ISD::FMADDSUB_RND: |
| 51321 | case X86ISD::FMSUBADD_RND: |
| 51322 | case X86ISD::FMADDSUB: |
| 51323 | case X86ISD::FMSUBADD: return combineFMADDSUB(N, DAG, DCI); |
| 51324 | case X86ISD::MOVMSK: return combineMOVMSK(N, DAG, DCI, Subtarget); |
| 51325 | case X86ISD::MGATHER: |
| 51326 | case X86ISD::MSCATTER: return combineX86GatherScatter(N, DAG, DCI); |
| 51327 | case ISD::MGATHER: |
| 51328 | case ISD::MSCATTER: return combineGatherScatter(N, DAG, DCI); |
| 51329 | case X86ISD::PCMPEQ: |
| 51330 | case X86ISD::PCMPGT: return combineVectorCompare(N, DAG, Subtarget); |
| 51331 | case X86ISD::PMULDQ: |
| 51332 | case X86ISD::PMULUDQ: return combinePMULDQ(N, DAG, DCI, Subtarget); |
| 51333 | case X86ISD::KSHIFTL: |
| 51334 | case X86ISD::KSHIFTR: return combineKSHIFT(N, DAG, DCI); |
| 51335 | case ISD::FP16_TO_FP: return combineFP16_TO_FP(N, DAG, Subtarget); |
| 51336 | case ISD::STRICT_FP_EXTEND: |
| 51337 | case ISD::FP_EXTEND: return combineFP_EXTEND(N, DAG, Subtarget); |
| 51338 | case ISD::FP_ROUND: return combineFP_ROUND(N, DAG, Subtarget); |
| 51339 | case X86ISD::VBROADCAST_LOAD: |
| 51340 | case X86ISD::SUBV_BROADCAST_LOAD: return combineBROADCAST_LOAD(N, DAG, DCI); |
| 51341 | case X86ISD::MOVDQ2Q: return combineMOVDQ2Q(N, DAG); |
| 51342 | case X86ISD::PDEP: return combinePDEP(N, DAG, DCI); |
| 51343 | } |
| 51344 | |
| 51345 | return SDValue(); |
| 51346 | } |
| 51347 | |
| 51348 | bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { |
| 51349 | if (!isTypeLegal(VT)) |
| 51350 | return false; |
| 51351 | |
| 51352 | |
| 51353 | if (Opc == ISD::SHL && VT.isVector() && VT.getVectorElementType() == MVT::i8) |
| 51354 | return false; |
| 51355 | |
| 51356 | |
| 51357 | |
| 51358 | |
| 51359 | |
| 51360 | |
| 51361 | |
| 51362 | |
| 51363 | |
| 51364 | if ((Opc == ISD::MUL || Opc == ISD::SHL) && VT == MVT::i8) |
| 51365 | return false; |
| 51366 | |
| 51367 | |
| 51368 | |
| 51369 | if (VT == MVT::i16) { |
| 51370 | switch (Opc) { |
| 51371 | default: |
| 51372 | break; |
| 51373 | case ISD::LOAD: |
| 51374 | case ISD::SIGN_EXTEND: |
| 51375 | case ISD::ZERO_EXTEND: |
| 51376 | case ISD::ANY_EXTEND: |
| 51377 | case ISD::SHL: |
| 51378 | case ISD::SRA: |
| 51379 | case ISD::SRL: |
| 51380 | case ISD::SUB: |
| 51381 | case ISD::ADD: |
| 51382 | case ISD::MUL: |
| 51383 | case ISD::AND: |
| 51384 | case ISD::OR: |
| 51385 | case ISD::XOR: |
| 51386 | return false; |
| 51387 | } |
| 51388 | } |
| 51389 | |
| 51390 | |
| 51391 | return true; |
| 51392 | } |
| 51393 | |
| 51394 | SDValue X86TargetLowering::expandIndirectJTBranch(const SDLoc& dl, |
| 51395 | SDValue Value, SDValue Addr, |
| 51396 | SelectionDAG &DAG) const { |
| 51397 | const Module *M = DAG.getMachineFunction().getMMI().getModule(); |
| 51398 | Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch"); |
| 51399 | if (IsCFProtectionSupported) { |
| 51400 | |
| 51401 | |
| 51402 | |
| 51403 | |
| 51404 | return DAG.getNode(X86ISD::NT_BRIND, dl, MVT::Other, Value, Addr); |
| 51405 | } |
| 51406 | |
| 51407 | return TargetLowering::expandIndirectJTBranch(dl, Value, Addr, DAG); |
| 51408 | } |
| 51409 | |
| 51410 | bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { |
| 51411 | EVT VT = Op.getValueType(); |
| 51412 | bool Is8BitMulByConstant = VT == MVT::i8 && Op.getOpcode() == ISD::MUL && |
| 51413 | isa<ConstantSDNode>(Op.getOperand(1)); |
| 51414 | |
| 51415 | |
| 51416 | |
| 51417 | |
| 51418 | |
| 51419 | if (VT != MVT::i16 && !Is8BitMulByConstant) |
| 51420 | return false; |
| 51421 | |
| 51422 | auto IsFoldableRMW = [](SDValue Load, SDValue Op) { |
| 51423 | if (!Op.hasOneUse()) |
| 51424 | return false; |
| 51425 | SDNode *User = *Op->use_begin(); |
| 51426 | if (!ISD::isNormalStore(User)) |
| 51427 | return false; |
| 51428 | auto *Ld = cast<LoadSDNode>(Load); |
| 51429 | auto *St = cast<StoreSDNode>(User); |
| 51430 | return Ld->getBasePtr() == St->getBasePtr(); |
| 51431 | }; |
| 51432 | |
| 51433 | auto IsFoldableAtomicRMW = [](SDValue Load, SDValue Op) { |
| 51434 | if (!Load.hasOneUse() || Load.getOpcode() != ISD::ATOMIC_LOAD) |
| 51435 | return false; |
| 51436 | if (!Op.hasOneUse()) |
| 51437 | return false; |
| 51438 | SDNode *User = *Op->use_begin(); |
| 51439 | if (User->getOpcode() != ISD::ATOMIC_STORE) |
| 51440 | return false; |
| 51441 | auto *Ld = cast<AtomicSDNode>(Load); |
| 51442 | auto *St = cast<AtomicSDNode>(User); |
| 51443 | return Ld->getBasePtr() == St->getBasePtr(); |
| 51444 | }; |
| 51445 | |
| 51446 | bool Commute = false; |
| 51447 | switch (Op.getOpcode()) { |
| 51448 | default: return false; |
| 51449 | case ISD::SIGN_EXTEND: |
| 51450 | case ISD::ZERO_EXTEND: |
| 51451 | case ISD::ANY_EXTEND: |
| 51452 | break; |
| 51453 | case ISD::SHL: |
| 51454 | case ISD::SRA: |
| 51455 | case ISD::SRL: { |
| 51456 | SDValue N0 = Op.getOperand(0); |
| 51457 | |
| 51458 | if (MayFoldLoad(N0) && IsFoldableRMW(N0, Op)) |
| 51459 | return false; |
| 51460 | break; |
| 51461 | } |
| 51462 | case ISD::ADD: |
| 51463 | case ISD::MUL: |
| 51464 | case ISD::AND: |
| 51465 | case ISD::OR: |
| 51466 | case ISD::XOR: |
| 51467 | Commute = true; |
| 51468 | LLVM_FALLTHROUGH; |
| 51469 | case ISD::SUB: { |
| 51470 | SDValue N0 = Op.getOperand(0); |
| 51471 | SDValue N1 = Op.getOperand(1); |
| 51472 | |
| 51473 | if (MayFoldLoad(N1) && |
| 51474 | (!Commute || !isa<ConstantSDNode>(N0) || |
| 51475 | (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N1, Op)))) |
| 51476 | return false; |
| 51477 | if (MayFoldLoad(N0) && |
| 51478 | ((Commute && !isa<ConstantSDNode>(N1)) || |
| 51479 | (Op.getOpcode() != ISD::MUL && IsFoldableRMW(N0, Op)))) |
| 51480 | return false; |
| 51481 | if (IsFoldableAtomicRMW(N0, Op) || |
| 51482 | (Commute && IsFoldableAtomicRMW(N1, Op))) |
| 51483 | return false; |
| 51484 | } |
| 51485 | } |
| 51486 | |
| 51487 | PVT = MVT::i32; |
| 51488 | return true; |
| 51489 | } |
| 51490 | |
| 51491 | |
| 51492 | |
| 51493 | |
| 51494 | |
| 51495 | |
| 51496 | static bool matchAsm(StringRef S, ArrayRef<const char *> Pieces) { |
| 51497 | S = S.substr(S.find_first_not_of(" \t")); |
| 51498 | |
| 51499 | for (StringRef Piece : Pieces) { |
| 51500 | if (!S.startswith(Piece)) |
| 51501 | return false; |
| 51502 | |
| 51503 | S = S.substr(Piece.size()); |
| 51504 | StringRef::size_type Pos = S.find_first_not_of(" \t"); |
| 51505 | if (Pos == 0) |
| 51506 | return false; |
| 51507 | |
| 51508 | S = S.substr(Pos); |
| 51509 | } |
| 51510 | |
| 51511 | return S.empty(); |
| 51512 | } |
| 51513 | |
| 51514 | static bool clobbersFlagRegisters(const SmallVector<StringRef, 4> &AsmPieces) { |
| 51515 | |
| 51516 | if (AsmPieces.size() == 3 || AsmPieces.size() == 4) { |
| 51517 | if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{cc}") && |
| 51518 | std::count(AsmPieces.begin(), AsmPieces.end(), "~{flags}") && |
| 51519 | std::count(AsmPieces.begin(), AsmPieces.end(), "~{fpsr}")) { |
| 51520 | |
| 51521 | if (AsmPieces.size() == 3) |
| 51522 | return true; |
| 51523 | else if (std::count(AsmPieces.begin(), AsmPieces.end(), "~{dirflag}")) |
| 51524 | return true; |
| 51525 | } |
| 51526 | } |
| 51527 | return false; |
| 51528 | } |
| 51529 | |
| 51530 | bool X86TargetLowering::ExpandInlineAsm(CallInst *CI) const { |
| 51531 | InlineAsm *IA = cast<InlineAsm>(CI->getCalledOperand()); |
| 51532 | |
| 51533 | const std::string &AsmStr = IA->getAsmString(); |
| 51534 | |
| 51535 | IntegerType *Ty = dyn_cast<IntegerType>(CI->getType()); |
| 51536 | if (!Ty || Ty->getBitWidth() % 16 != 0) |
| 51537 | return false; |
| 51538 | |
| 51539 | |
| 51540 | SmallVector<StringRef, 4> AsmPieces; |
| 51541 | SplitString(AsmStr, AsmPieces, ";\n"); |
| 51542 | |
| 51543 | switch (AsmPieces.size()) { |
| 51544 | default: return false; |
| 51545 | case 1: |
| 51546 | |
| 51547 | |
| 51548 | |
| 51549 | |
| 51550 | |
| 51551 | if (matchAsm(AsmPieces[0], {"bswap", "$0"}) || |
| 51552 | matchAsm(AsmPieces[0], {"bswapl", "$0"}) || |
| 51553 | matchAsm(AsmPieces[0], {"bswapq", "$0"}) || |
| 51554 | matchAsm(AsmPieces[0], {"bswap", "${0:q}"}) || |
| 51555 | matchAsm(AsmPieces[0], {"bswapl", "${0:q}"}) || |
| 51556 | matchAsm(AsmPieces[0], {"bswapq", "${0:q}"})) { |
| 51557 | |
| 51558 | |
| 51559 | return IntrinsicLowering::LowerToByteSwap(CI); |
| 51560 | } |
| 51561 | |
| 51562 | |
| 51563 | if (CI->getType()->isIntegerTy(16) && |
| 51564 | IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && |
| 51565 | (matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) || |
| 51566 | matchAsm(AsmPieces[0], {"rolw", "$$8,", "${0:w}"}))) { |
| 51567 | AsmPieces.clear(); |
| 51568 | StringRef ConstraintsStr = IA->getConstraintString(); |
| 51569 | SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); |
| 51570 | array_pod_sort(AsmPieces.begin(), AsmPieces.end()); |
| 51571 | if (clobbersFlagRegisters(AsmPieces)) |
| 51572 | return IntrinsicLowering::LowerToByteSwap(CI); |
| 51573 | } |
| 51574 | break; |
| 51575 | case 3: |
| 51576 | if (CI->getType()->isIntegerTy(32) && |
| 51577 | IA->getConstraintString().compare(0, 5, "=r,0,") == 0 && |
| 51578 | matchAsm(AsmPieces[0], {"rorw", "$$8,", "${0:w}"}) && |
| 51579 | matchAsm(AsmPieces[1], {"rorl", "$$16,", "$0"}) && |
| 51580 | matchAsm(AsmPieces[2], {"rorw", "$$8,", "${0:w}"})) { |
| 51581 | AsmPieces.clear(); |
| 51582 | StringRef ConstraintsStr = IA->getConstraintString(); |
| 51583 | SplitString(StringRef(ConstraintsStr).substr(5), AsmPieces, ","); |
| 51584 | array_pod_sort(AsmPieces.begin(), AsmPieces.end()); |
| 51585 | if (clobbersFlagRegisters(AsmPieces)) |
| 51586 | return IntrinsicLowering::LowerToByteSwap(CI); |
| 51587 | } |
| 51588 | |
| 51589 | if (CI->getType()->isIntegerTy(64)) { |
| 51590 | InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints(); |
| 51591 | if (Constraints.size() >= 2 && |
| 51592 | Constraints[0].Codes.size() == 1 && Constraints[0].Codes[0] == "A" && |
| 51593 | Constraints[1].Codes.size() == 1 && Constraints[1].Codes[0] == "0") { |
| 51594 | |
| 51595 | if (matchAsm(AsmPieces[0], {"bswap", "%eax"}) && |
| 51596 | matchAsm(AsmPieces[1], {"bswap", "%edx"}) && |
| 51597 | matchAsm(AsmPieces[2], {"xchgl", "%eax,", "%edx"})) |
| 51598 | return IntrinsicLowering::LowerToByteSwap(CI); |
| 51599 | } |
| 51600 | } |
| 51601 | break; |
| 51602 | } |
| 51603 | return false; |
| 51604 | } |
| 51605 | |
| 51606 | static X86::CondCode parseConstraintCode(llvm::StringRef Constraint) { |
| 51607 | X86::CondCode Cond = StringSwitch<X86::CondCode>(Constraint) |
| 51608 | .Case("{@cca}", X86::COND_A) |
| 51609 | .Case("{@ccae}", X86::COND_AE) |
| 51610 | .Case("{@ccb}", X86::COND_B) |
| 51611 | .Case("{@ccbe}", X86::COND_BE) |
| 51612 | .Case("{@ccc}", X86::COND_B) |
| 51613 | .Case("{@cce}", X86::COND_E) |
| 51614 | .Case("{@ccz}", X86::COND_E) |
| 51615 | .Case("{@ccg}", X86::COND_G) |
| 51616 | .Case("{@ccge}", X86::COND_GE) |
| 51617 | .Case("{@ccl}", X86::COND_L) |
| 51618 | .Case("{@ccle}", X86::COND_LE) |
| 51619 | .Case("{@ccna}", X86::COND_BE) |
| 51620 | .Case("{@ccnae}", X86::COND_B) |
| 51621 | .Case("{@ccnb}", X86::COND_AE) |
| 51622 | .Case("{@ccnbe}", X86::COND_A) |
| 51623 | .Case("{@ccnc}", X86::COND_AE) |
| 51624 | .Case("{@ccne}", X86::COND_NE) |
| 51625 | .Case("{@ccnz}", X86::COND_NE) |
| 51626 | .Case("{@ccng}", X86::COND_LE) |
| 51627 | .Case("{@ccnge}", X86::COND_L) |
| 51628 | .Case("{@ccnl}", X86::COND_GE) |
| 51629 | .Case("{@ccnle}", X86::COND_G) |
| 51630 | .Case("{@ccno}", X86::COND_NO) |
| 51631 | .Case("{@ccnp}", X86::COND_NP) |
| 51632 | .Case("{@ccns}", X86::COND_NS) |
| 51633 | .Case("{@cco}", X86::COND_O) |
| 51634 | .Case("{@ccp}", X86::COND_P) |
| 51635 | .Case("{@ccs}", X86::COND_S) |
| 51636 | .Default(X86::COND_INVALID); |
| 51637 | return Cond; |
| 51638 | } |
| 51639 | |
| 51640 | |
| 51641 | X86TargetLowering::ConstraintType |
| 51642 | X86TargetLowering::getConstraintType(StringRef Constraint) const { |
| 51643 | if (Constraint.size() == 1) { |
| 51644 | switch (Constraint[0]) { |
| 51645 | case 'R': |
| 51646 | case 'q': |
| 51647 | case 'Q': |
| 51648 | case 'f': |
| 51649 | case 't': |
| 51650 | case 'u': |
| 51651 | case 'y': |
| 51652 | case 'x': |
| 51653 | case 'v': |
| 51654 | case 'l': |
| 51655 | case 'k': |
| 51656 | return C_RegisterClass; |
| 51657 | case 'a': |
| 51658 | case 'b': |
| 51659 | case 'c': |
| 51660 | case 'd': |
| 51661 | case 'S': |
| 51662 | case 'D': |
| 51663 | case 'A': |
| 51664 | return C_Register; |
| 51665 | case 'I': |
| 51666 | case 'J': |
| 51667 | case 'K': |
| 51668 | case 'N': |
| 51669 | case 'G': |
| 51670 | case 'L': |
| 51671 | case 'M': |
| 51672 | return C_Immediate; |
| 51673 | case 'C': |
| 51674 | case 'e': |
| 51675 | case 'Z': |
| 51676 | return C_Other; |
| 51677 | default: |
| 51678 | break; |
| 51679 | } |
| 51680 | } |
| 51681 | else if (Constraint.size() == 2) { |
| 51682 | switch (Constraint[0]) { |
| 51683 | default: |
| 51684 | break; |
| 51685 | case 'Y': |
| 51686 | switch (Constraint[1]) { |
| 51687 | default: |
| 51688 | break; |
| 51689 | case 'z': |
| 51690 | return C_Register; |
| 51691 | case 'i': |
| 51692 | case 'm': |
| 51693 | case 'k': |
| 51694 | case 't': |
| 51695 | case '2': |
| 51696 | return C_RegisterClass; |
| 51697 | } |
| 51698 | } |
| 51699 | } else if (parseConstraintCode(Constraint) != X86::COND_INVALID) |
| 51700 | return C_Other; |
| 51701 | return TargetLowering::getConstraintType(Constraint); |
| 51702 | } |
| 51703 | |
| 51704 | |
| 51705 | |
| 51706 | |
| 51707 | TargetLowering::ConstraintWeight |
| 51708 | X86TargetLowering::getSingleConstraintMatchWeight( |
| 51709 | AsmOperandInfo &info, const char *constraint) const { |
| 51710 | ConstraintWeight weight = CW_Invalid; |
| 51711 | Value *CallOperandVal = info.CallOperandVal; |
| 51712 | |
| 51713 | |
| 51714 | if (!CallOperandVal) |
| 51715 | return CW_Default; |
| 51716 | Type *type = CallOperandVal->getType(); |
| 51717 | |
| 51718 | switch (*constraint) { |
| 51719 | default: |
| 51720 | weight = TargetLowering::getSingleConstraintMatchWeight(info, constraint); |
| 51721 | LLVM_FALLTHROUGH; |
| 51722 | case 'R': |
| 51723 | case 'q': |
| 51724 | case 'Q': |
| 51725 | case 'a': |
| 51726 | case 'b': |
| 51727 | case 'c': |
| 51728 | case 'd': |
| 51729 | case 'S': |
| 51730 | case 'D': |
| 51731 | case 'A': |
| 51732 | if (CallOperandVal->getType()->isIntegerTy()) |
| 51733 | weight = CW_SpecificReg; |
| 51734 | break; |
| 51735 | case 'f': |
| 51736 | case 't': |
| 51737 | case 'u': |
| 51738 | if (type->isFloatingPointTy()) |
| 51739 | weight = CW_SpecificReg; |
| 51740 | break; |
| 51741 | case 'y': |
| 51742 | if (type->isX86_MMXTy() && Subtarget.hasMMX()) |
| 51743 | weight = CW_SpecificReg; |
| 51744 | break; |
| 51745 | case 'Y': |
| 51746 | if (StringRef(constraint).size() != 2) |
| 51747 | break; |
| 51748 | switch (constraint[1]) { |
| 51749 | default: |
| 51750 | return CW_Invalid; |
| 51751 | |
| 51752 | case 'z': |
| 51753 | if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || |
| 51754 | ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX()) || |
| 51755 | ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())) |
| 51756 | return CW_SpecificReg; |
| 51757 | return CW_Invalid; |
| 51758 | |
| 51759 | case 'k': |
| 51760 | if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) |
| 51761 | return CW_Register; |
| 51762 | return CW_Invalid; |
| 51763 | |
| 51764 | case 'm': |
| 51765 | if (type->isX86_MMXTy() && Subtarget.hasMMX()) |
| 51766 | return weight; |
| 51767 | return CW_Invalid; |
| 51768 | |
| 51769 | case 'i': |
| 51770 | case 't': |
| 51771 | case '2': |
| 51772 | if (!Subtarget.hasSSE2()) |
| 51773 | return CW_Invalid; |
| 51774 | break; |
| 51775 | } |
| 51776 | break; |
| 51777 | case 'v': |
| 51778 | if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512()) |
| 51779 | weight = CW_Register; |
| 51780 | LLVM_FALLTHROUGH; |
| 51781 | case 'x': |
| 51782 | if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) || |
| 51783 | ((type->getPrimitiveSizeInBits() == 256) && Subtarget.hasAVX())) |
| 51784 | weight = CW_Register; |
| 51785 | break; |
| 51786 | case 'k': |
| 51787 | |
| 51788 | if ((type->getPrimitiveSizeInBits() == 64) && Subtarget.hasAVX512()) |
| 51789 | weight = CW_Register; |
| 51790 | break; |
| 51791 | case 'I': |
| 51792 | if (ConstantInt *C = dyn_cast<ConstantInt>(info.CallOperandVal)) { |
| 51793 | if (C->getZExtValue() <= 31) |
| 51794 | weight = CW_Constant; |
| 51795 | } |
| 51796 | break; |
| 51797 | case 'J': |
| 51798 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51799 | if (C->getZExtValue() <= 63) |
| 51800 | weight = CW_Constant; |
| 51801 | } |
| 51802 | break; |
| 51803 | case 'K': |
| 51804 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51805 | if ((C->getSExtValue() >= -0x80) && (C->getSExtValue() <= 0x7f)) |
| 51806 | weight = CW_Constant; |
| 51807 | } |
| 51808 | break; |
| 51809 | case 'L': |
| 51810 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51811 | if ((C->getZExtValue() == 0xff) || (C->getZExtValue() == 0xffff)) |
| 51812 | weight = CW_Constant; |
| 51813 | } |
| 51814 | break; |
| 51815 | case 'M': |
| 51816 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51817 | if (C->getZExtValue() <= 3) |
| 51818 | weight = CW_Constant; |
| 51819 | } |
| 51820 | break; |
| 51821 | case 'N': |
| 51822 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51823 | if (C->getZExtValue() <= 0xff) |
| 51824 | weight = CW_Constant; |
| 51825 | } |
| 51826 | break; |
| 51827 | case 'G': |
| 51828 | case 'C': |
| 51829 | if (isa<ConstantFP>(CallOperandVal)) { |
| 51830 | weight = CW_Constant; |
| 51831 | } |
| 51832 | break; |
| 51833 | case 'e': |
| 51834 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51835 | if ((C->getSExtValue() >= -0x80000000LL) && |
| 51836 | (C->getSExtValue() <= 0x7fffffffLL)) |
| 51837 | weight = CW_Constant; |
| 51838 | } |
| 51839 | break; |
| 51840 | case 'Z': |
| 51841 | if (ConstantInt *C = dyn_cast<ConstantInt>(CallOperandVal)) { |
| 51842 | if (C->getZExtValue() <= 0xffffffff) |
| 51843 | weight = CW_Constant; |
| 51844 | } |
| 51845 | break; |
| 51846 | } |
| 51847 | return weight; |
| 51848 | } |
| 51849 | |
| 51850 | |
| 51851 | |
| 51852 | |
| 51853 | const char *X86TargetLowering:: |
| 51854 | LowerXConstraint(EVT ConstraintVT) const { |
| 51855 | |
| 51856 | |
| 51857 | if (ConstraintVT.isFloatingPoint()) { |
| 51858 | if (Subtarget.hasSSE1()) |
| 51859 | return "x"; |
| 51860 | } |
| 51861 | |
| 51862 | return TargetLowering::LowerXConstraint(ConstraintVT); |
| 51863 | } |
| 51864 | |
| 51865 | |
| 51866 | SDValue X86TargetLowering::LowerAsmOutputForConstraint( |
| 51867 | SDValue &Chain, SDValue &Flag, const SDLoc &DL, |
| 51868 | const AsmOperandInfo &OpInfo, SelectionDAG &DAG) const { |
| 51869 | X86::CondCode Cond = parseConstraintCode(OpInfo.ConstraintCode); |
| 51870 | if (Cond == X86::COND_INVALID) |
| 51871 | return SDValue(); |
| 51872 | |
| 51873 | if (OpInfo.ConstraintVT.isVector() || !OpInfo.ConstraintVT.isInteger() || |
| 51874 | OpInfo.ConstraintVT.getSizeInBits() < 8) |
| 51875 | report_fatal_error("Flag output operand is of invalid type"); |
| 51876 | |
| 51877 | |
| 51878 | if (Flag.getNode()) { |
| 51879 | Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32, Flag); |
| 51880 | Chain = Flag.getValue(1); |
| 51881 | } else |
| 51882 | Flag = DAG.getCopyFromReg(Chain, DL, X86::EFLAGS, MVT::i32); |
| 51883 | |
| 51884 | SDValue CC = getSETCC(Cond, Flag, DL, DAG); |
| 51885 | |
| 51886 | SDValue Result = DAG.getNode(ISD::ZERO_EXTEND, DL, OpInfo.ConstraintVT, CC); |
| 51887 | |
| 51888 | return Result; |
| 51889 | } |
| 51890 | |
| 51891 | |
| 51892 | |
| 51893 | void X86TargetLowering::LowerAsmOperandForConstraint(SDValue Op, |
| 51894 | std::string &Constraint, |
| 51895 | std::vector<SDValue>&Ops, |
| 51896 | SelectionDAG &DAG) const { |
| 51897 | SDValue Result; |
| 51898 | |
| 51899 | |
| 51900 | if (Constraint.length() > 1) return; |
| 51901 | |
| 51902 | char ConstraintLetter = Constraint[0]; |
| 51903 | switch (ConstraintLetter) { |
| 51904 | default: break; |
| 51905 | case 'I': |
| 51906 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51907 | if (C->getZExtValue() <= 31) { |
| 51908 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51909 | Op.getValueType()); |
| 51910 | break; |
| 51911 | } |
| 51912 | } |
| 51913 | return; |
| 51914 | case 'J': |
| 51915 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51916 | if (C->getZExtValue() <= 63) { |
| 51917 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51918 | Op.getValueType()); |
| 51919 | break; |
| 51920 | } |
| 51921 | } |
| 51922 | return; |
| 51923 | case 'K': |
| 51924 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51925 | if (isInt<8>(C->getSExtValue())) { |
| 51926 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51927 | Op.getValueType()); |
| 51928 | break; |
| 51929 | } |
| 51930 | } |
| 51931 | return; |
| 51932 | case 'L': |
| 51933 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51934 | if (C->getZExtValue() == 0xff || C->getZExtValue() == 0xffff || |
| 51935 | (Subtarget.is64Bit() && C->getZExtValue() == 0xffffffff)) { |
| 51936 | Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), |
| 51937 | Op.getValueType()); |
| 51938 | break; |
| 51939 | } |
| 51940 | } |
| 51941 | return; |
| 51942 | case 'M': |
| 51943 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51944 | if (C->getZExtValue() <= 3) { |
| 51945 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51946 | Op.getValueType()); |
| 51947 | break; |
| 51948 | } |
| 51949 | } |
| 51950 | return; |
| 51951 | case 'N': |
| 51952 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51953 | if (C->getZExtValue() <= 255) { |
| 51954 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51955 | Op.getValueType()); |
| 51956 | break; |
| 51957 | } |
| 51958 | } |
| 51959 | return; |
| 51960 | case 'O': |
| 51961 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51962 | if (C->getZExtValue() <= 127) { |
| 51963 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51964 | Op.getValueType()); |
| 51965 | break; |
| 51966 | } |
| 51967 | } |
| 51968 | return; |
| 51969 | case 'e': { |
| 51970 | |
| 51971 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51972 | if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), |
| 51973 | C->getSExtValue())) { |
| 51974 | |
| 51975 | Result = DAG.getTargetConstant(C->getSExtValue(), SDLoc(Op), MVT::i64); |
| 51976 | break; |
| 51977 | } |
| 51978 | |
| 51979 | |
| 51980 | } |
| 51981 | return; |
| 51982 | } |
| 51983 | case 'Z': { |
| 51984 | |
| 51985 | if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op)) { |
| 51986 | if (ConstantInt::isValueValidForType(Type::getInt32Ty(*DAG.getContext()), |
| 51987 | C->getZExtValue())) { |
| 51988 | Result = DAG.getTargetConstant(C->getZExtValue(), SDLoc(Op), |
| 51989 | Op.getValueType()); |
| 51990 | break; |
| 51991 | } |
| 51992 | } |
| 51993 | |
| 51994 | |
| 51995 | return; |
| 51996 | } |
| 51997 | case 'i': { |
| 51998 | |
| 51999 | if (ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op)) { |
| 52000 | bool IsBool = CST->getConstantIntValue()->getBitWidth() == 1; |
| 52001 | BooleanContent BCont = getBooleanContents(MVT::i64); |
| 52002 | ISD::NodeType ExtOpc = IsBool ? getExtendForContent(BCont) |
| 52003 | : ISD::SIGN_EXTEND; |
| 52004 | int64_t ExtVal = ExtOpc == ISD::ZERO_EXTEND ? CST->getZExtValue() |
| 52005 | : CST->getSExtValue(); |
| 52006 | Result = DAG.getTargetConstant(ExtVal, SDLoc(Op), MVT::i64); |
| 52007 | break; |
| 52008 | } |
| 52009 | |
| 52010 | |
| 52011 | |
| 52012 | |
| 52013 | if (Subtarget.isPICStyleGOT() || Subtarget.isPICStyleStubPIC()) |
| 52014 | return; |
| 52015 | |
| 52016 | |
| 52017 | |
| 52018 | if (auto *GA = dyn_cast<GlobalAddressSDNode>(Op)) |
| 52019 | |
| 52020 | |
| 52021 | if (isGlobalStubReference( |
| 52022 | Subtarget.classifyGlobalReference(GA->getGlobal()))) |
| 52023 | return; |
| 52024 | break; |
| 52025 | } |
| 52026 | } |
| 52027 | |
| 52028 | if (Result.getNode()) { |
| 52029 | Ops.push_back(Result); |
| 52030 | return; |
| 52031 | } |
| 52032 | return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); |
| 52033 | } |
| 52034 | |
| 52035 | |
| 52036 | |
| 52037 | static bool isGRClass(const TargetRegisterClass &RC) { |
| 52038 | return RC.hasSuperClassEq(&X86::GR8RegClass) || |
| 52039 | RC.hasSuperClassEq(&X86::GR16RegClass) || |
| 52040 | RC.hasSuperClassEq(&X86::GR32RegClass) || |
| 52041 | RC.hasSuperClassEq(&X86::GR64RegClass) || |
| 52042 | RC.hasSuperClassEq(&X86::LOW32_ADDR_ACCESS_RBPRegClass); |
| 52043 | } |
| 52044 | |
| 52045 | |
| 52046 | |
| 52047 | static bool isFRClass(const TargetRegisterClass &RC) { |
| 52048 | return RC.hasSuperClassEq(&X86::FR32XRegClass) || |
| 52049 | RC.hasSuperClassEq(&X86::FR64XRegClass) || |
| 52050 | RC.hasSuperClassEq(&X86::VR128XRegClass) || |
| 52051 | RC.hasSuperClassEq(&X86::VR256XRegClass) || |
| 52052 | RC.hasSuperClassEq(&X86::VR512RegClass); |
| 52053 | } |
| 52054 | |
| 52055 | |
| 52056 | |
| 52057 | static bool isVKClass(const TargetRegisterClass &RC) { |
| 52058 | return RC.hasSuperClassEq(&X86::VK1RegClass) || |
| 52059 | RC.hasSuperClassEq(&X86::VK2RegClass) || |
| 52060 | RC.hasSuperClassEq(&X86::VK4RegClass) || |
| 52061 | RC.hasSuperClassEq(&X86::VK8RegClass) || |
| 52062 | RC.hasSuperClassEq(&X86::VK16RegClass) || |
| 52063 | RC.hasSuperClassEq(&X86::VK32RegClass) || |
| 52064 | RC.hasSuperClassEq(&X86::VK64RegClass); |
| 52065 | } |
| 52066 | |
| 52067 | std::pair<unsigned, const TargetRegisterClass *> |
| 52068 | X86TargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| 52069 | StringRef Constraint, |
| 52070 | MVT VT) const { |
| 52071 | |
| 52072 | |
| 52073 | if (Constraint.size() == 1) { |
| 52074 | |
| 52075 | switch (Constraint[0]) { |
| 52076 | default: break; |
| 52077 | |
| 52078 | case 'A': |
| 52079 | if (Subtarget.is64Bit()) |
| 52080 | return std::make_pair(X86::RAX, &X86::GR64_ADRegClass); |
| 52081 | assert((Subtarget.is32Bit() || Subtarget.is16Bit()) && |
| 52082 | "Expecting 64, 32 or 16 bit subtarget"); |
| 52083 | return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); |
| 52084 | |
| 52085 | |
| 52086 | |
| 52087 | |
| 52088 | case 'k': |
| 52089 | if (Subtarget.hasAVX512()) { |
| 52090 | if (VT == MVT::i1) |
| 52091 | return std::make_pair(0U, &X86::VK1RegClass); |
| 52092 | if (VT == MVT::i8) |
| 52093 | return std::make_pair(0U, &X86::VK8RegClass); |
| 52094 | if (VT == MVT::i16) |
| 52095 | return std::make_pair(0U, &X86::VK16RegClass); |
| 52096 | } |
| 52097 | if (Subtarget.hasBWI()) { |
| 52098 | if (VT == MVT::i32) |
| 52099 | return std::make_pair(0U, &X86::VK32RegClass); |
| 52100 | if (VT == MVT::i64) |
| 52101 | return std::make_pair(0U, &X86::VK64RegClass); |
| 52102 | } |
| 52103 | break; |
| 52104 | case 'q': |
| 52105 | if (Subtarget.is64Bit()) { |
| 52106 | if (VT == MVT::i8 || VT == MVT::i1) |
| 52107 | return std::make_pair(0U, &X86::GR8RegClass); |
| 52108 | if (VT == MVT::i16) |
| 52109 | return std::make_pair(0U, &X86::GR16RegClass); |
| 52110 | if (VT == MVT::i32 || VT == MVT::f32) |
| 52111 | return std::make_pair(0U, &X86::GR32RegClass); |
| 52112 | if (VT != MVT::f80 && !VT.isVector()) |
| 52113 | return std::make_pair(0U, &X86::GR64RegClass); |
| 52114 | break; |
| 52115 | } |
| 52116 | LLVM_FALLTHROUGH; |
| 52117 | |
| 52118 | case 'Q': |
| 52119 | if (VT == MVT::i8 || VT == MVT::i1) |
| 52120 | return std::make_pair(0U, &X86::GR8_ABCD_LRegClass); |
| 52121 | if (VT == MVT::i16) |
| 52122 | return std::make_pair(0U, &X86::GR16_ABCDRegClass); |
| 52123 | if (VT == MVT::i32 || VT == MVT::f32 || |
| 52124 | (!VT.isVector() && !Subtarget.is64Bit())) |
| 52125 | return std::make_pair(0U, &X86::GR32_ABCDRegClass); |
| 52126 | if (VT != MVT::f80 && !VT.isVector()) |
| 52127 | return std::make_pair(0U, &X86::GR64_ABCDRegClass); |
| 52128 | break; |
| 52129 | case 'r': |
| 52130 | case 'l': |
| 52131 | if (VT == MVT::i8 || VT == MVT::i1) |
| 52132 | return std::make_pair(0U, &X86::GR8RegClass); |
| 52133 | if (VT == MVT::i16) |
| 52134 | return std::make_pair(0U, &X86::GR16RegClass); |
| 52135 | if (VT == MVT::i32 || VT == MVT::f32 || |
| 52136 | (!VT.isVector() && !Subtarget.is64Bit())) |
| 52137 | return std::make_pair(0U, &X86::GR32RegClass); |
| 52138 | if (VT != MVT::f80 && !VT.isVector()) |
| 52139 | return std::make_pair(0U, &X86::GR64RegClass); |
| 52140 | break; |
| 52141 | case 'R': |
| 52142 | if (VT == MVT::i8 || VT == MVT::i1) |
| 52143 | return std::make_pair(0U, &X86::GR8_NOREXRegClass); |
| 52144 | if (VT == MVT::i16) |
| 52145 | return std::make_pair(0U, &X86::GR16_NOREXRegClass); |
| 52146 | if (VT == MVT::i32 || VT == MVT::f32 || |
| 52147 | (!VT.isVector() && !Subtarget.is64Bit())) |
| 52148 | return std::make_pair(0U, &X86::GR32_NOREXRegClass); |
| 52149 | if (VT != MVT::f80 && !VT.isVector()) |
| 52150 | return std::make_pair(0U, &X86::GR64_NOREXRegClass); |
| 52151 | break; |
| 52152 | case 'f': |
| 52153 | |
| 52154 | |
| 52155 | if (VT == MVT::f32 && !isScalarFPTypeInSSEReg(VT)) |
| 52156 | return std::make_pair(0U, &X86::RFP32RegClass); |
| 52157 | if (VT == MVT::f64 && !isScalarFPTypeInSSEReg(VT)) |
| 52158 | return std::make_pair(0U, &X86::RFP64RegClass); |
| 52159 | if (VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) |
| 52160 | return std::make_pair(0U, &X86::RFP80RegClass); |
| 52161 | break; |
| 52162 | case 'y': |
| 52163 | if (!Subtarget.hasMMX()) break; |
| 52164 | return std::make_pair(0U, &X86::VR64RegClass); |
| 52165 | case 'v': |
| 52166 | case 'x': |
| 52167 | if (!Subtarget.hasSSE1()) break; |
| 52168 | bool VConstraint = (Constraint[0] == 'v'); |
| 52169 | |
| 52170 | switch (VT.SimpleTy) { |
| 52171 | default: break; |
| 52172 | |
| 52173 | case MVT::f32: |
| 52174 | case MVT::i32: |
| 52175 | if (VConstraint && Subtarget.hasVLX()) |
| 52176 | return std::make_pair(0U, &X86::FR32XRegClass); |
| 52177 | return std::make_pair(0U, &X86::FR32RegClass); |
| 52178 | case MVT::f64: |
| 52179 | case MVT::i64: |
| 52180 | if (VConstraint && Subtarget.hasVLX()) |
| 52181 | return std::make_pair(0U, &X86::FR64XRegClass); |
| 52182 | return std::make_pair(0U, &X86::FR64RegClass); |
| 52183 | case MVT::i128: |
| 52184 | if (Subtarget.is64Bit()) { |
| 52185 | if (VConstraint && Subtarget.hasVLX()) |
| 52186 | return std::make_pair(0U, &X86::VR128XRegClass); |
| 52187 | return std::make_pair(0U, &X86::VR128RegClass); |
| 52188 | } |
| 52189 | break; |
| 52190 | |
| 52191 | case MVT::f128: |
| 52192 | case MVT::v16i8: |
| 52193 | case MVT::v8i16: |
| 52194 | case MVT::v4i32: |
| 52195 | case MVT::v2i64: |
| 52196 | case MVT::v4f32: |
| 52197 | case MVT::v2f64: |
| 52198 | if (VConstraint && Subtarget.hasVLX()) |
| 52199 | return std::make_pair(0U, &X86::VR128XRegClass); |
| 52200 | return std::make_pair(0U, &X86::VR128RegClass); |
| 52201 | |
| 52202 | case MVT::v32i8: |
| 52203 | case MVT::v16i16: |
| 52204 | case MVT::v8i32: |
| 52205 | case MVT::v4i64: |
| 52206 | case MVT::v8f32: |
| 52207 | case MVT::v4f64: |
| 52208 | if (VConstraint && Subtarget.hasVLX()) |
| 52209 | return std::make_pair(0U, &X86::VR256XRegClass); |
| 52210 | if (Subtarget.hasAVX()) |
| 52211 | return std::make_pair(0U, &X86::VR256RegClass); |
| 52212 | break; |
| 52213 | case MVT::v64i8: |
| 52214 | case MVT::v32i16: |
| 52215 | case MVT::v8f64: |
| 52216 | case MVT::v16f32: |
| 52217 | case MVT::v16i32: |
| 52218 | case MVT::v8i64: |
| 52219 | if (!Subtarget.hasAVX512()) break; |
| 52220 | if (VConstraint) |
| 52221 | return std::make_pair(0U, &X86::VR512RegClass); |
| 52222 | return std::make_pair(0U, &X86::VR512_0_15RegClass); |
| 52223 | } |
| 52224 | break; |
| 52225 | } |
| 52226 | } else if (Constraint.size() == 2 && Constraint[0] == 'Y') { |
| 52227 | switch (Constraint[1]) { |
| 52228 | default: |
| 52229 | break; |
| 52230 | case 'i': |
| 52231 | case 't': |
| 52232 | case '2': |
| 52233 | return getRegForInlineAsmConstraint(TRI, "x", VT); |
| 52234 | case 'm': |
| 52235 | if (!Subtarget.hasMMX()) break; |
| 52236 | return std::make_pair(0U, &X86::VR64RegClass); |
| 52237 | case 'z': |
| 52238 | if (!Subtarget.hasSSE1()) break; |
| 52239 | switch (VT.SimpleTy) { |
| 52240 | default: break; |
| 52241 | |
| 52242 | case MVT::f32: |
| 52243 | case MVT::i32: |
| 52244 | return std::make_pair(X86::XMM0, &X86::FR32RegClass); |
| 52245 | case MVT::f64: |
| 52246 | case MVT::i64: |
| 52247 | return std::make_pair(X86::XMM0, &X86::FR64RegClass); |
| 52248 | case MVT::f128: |
| 52249 | case MVT::v16i8: |
| 52250 | case MVT::v8i16: |
| 52251 | case MVT::v4i32: |
| 52252 | case MVT::v2i64: |
| 52253 | case MVT::v4f32: |
| 52254 | case MVT::v2f64: |
| 52255 | return std::make_pair(X86::XMM0, &X86::VR128RegClass); |
| 52256 | |
| 52257 | case MVT::v32i8: |
| 52258 | case MVT::v16i16: |
| 52259 | case MVT::v8i32: |
| 52260 | case MVT::v4i64: |
| 52261 | case MVT::v8f32: |
| 52262 | case MVT::v4f64: |
| 52263 | if (Subtarget.hasAVX()) |
| 52264 | return std::make_pair(X86::YMM0, &X86::VR256RegClass); |
| 52265 | break; |
| 52266 | case MVT::v64i8: |
| 52267 | case MVT::v32i16: |
| 52268 | case MVT::v8f64: |
| 52269 | case MVT::v16f32: |
| 52270 | case MVT::v16i32: |
| 52271 | case MVT::v8i64: |
| 52272 | if (Subtarget.hasAVX512()) |
| 52273 | return std::make_pair(X86::ZMM0, &X86::VR512_0_15RegClass); |
| 52274 | break; |
| 52275 | } |
| 52276 | break; |
| 52277 | case 'k': |
| 52278 | |
| 52279 | if (Subtarget.hasAVX512()) { |
| 52280 | if (VT == MVT::i1) |
| 52281 | return std::make_pair(0U, &X86::VK1WMRegClass); |
| 52282 | if (VT == MVT::i8) |
| 52283 | return std::make_pair(0U, &X86::VK8WMRegClass); |
| 52284 | if (VT == MVT::i16) |
| 52285 | return std::make_pair(0U, &X86::VK16WMRegClass); |
| 52286 | } |
| 52287 | if (Subtarget.hasBWI()) { |
| 52288 | if (VT == MVT::i32) |
| 52289 | return std::make_pair(0U, &X86::VK32WMRegClass); |
| 52290 | if (VT == MVT::i64) |
| 52291 | return std::make_pair(0U, &X86::VK64WMRegClass); |
| 52292 | } |
| 52293 | break; |
| 52294 | } |
| 52295 | } |
| 52296 | |
| 52297 | if (parseConstraintCode(Constraint) != X86::COND_INVALID) |
| 52298 | return std::make_pair(0U, &X86::GR32RegClass); |
| 52299 | |
| 52300 | |
| 52301 | |
| 52302 | std::pair<Register, const TargetRegisterClass*> Res; |
| 52303 | Res = TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); |
| 52304 | |
| 52305 | |
| 52306 | if (!Res.second) { |
| 52307 | |
| 52308 | |
| 52309 | if (VT == MVT::Other || VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f80) { |
| 52310 | |
| 52311 | if (Constraint.size() == 7 && Constraint[0] == '{' && |
| 52312 | tolower(Constraint[1]) == 's' && tolower(Constraint[2]) == 't' && |
| 52313 | Constraint[3] == '(' && |
| 52314 | (Constraint[4] >= '0' && Constraint[4] <= '7') && |
| 52315 | Constraint[5] == ')' && Constraint[6] == '}') { |
| 52316 | |
| 52317 | |
| 52318 | if (Constraint[4] == '7') |
| 52319 | return std::make_pair(X86::FP7, &X86::RFP80_7RegClass); |
| 52320 | return std::make_pair(X86::FP0 + Constraint[4] - '0', |
| 52321 | &X86::RFP80RegClass); |
| 52322 | } |
| 52323 | |
| 52324 | |
| 52325 | if (StringRef("{st}").equals_insensitive(Constraint)) |
| 52326 | return std::make_pair(X86::FP0, &X86::RFP80RegClass); |
| 52327 | } |
| 52328 | |
| 52329 | |
| 52330 | if (StringRef("{flags}").equals_insensitive(Constraint)) |
| 52331 | return std::make_pair(X86::EFLAGS, &X86::CCRRegClass); |
| 52332 | |
| 52333 | |
| 52334 | |
| 52335 | if (StringRef("{dirflag}").equals_insensitive(Constraint) && |
| 52336 | VT == MVT::Other) |
| 52337 | return std::make_pair(X86::DF, &X86::DFCCRRegClass); |
| 52338 | |
| 52339 | |
| 52340 | if (StringRef("{fpsr}").equals_insensitive(Constraint)) |
| 52341 | return std::make_pair(X86::FPSW, &X86::FPCCRRegClass); |
| 52342 | |
| 52343 | return Res; |
| 52344 | } |
| 52345 | |
| 52346 | |
| 52347 | if (!Subtarget.is64Bit() && |
| 52348 | (isFRClass(*Res.second) || isGRClass(*Res.second)) && |
| 52349 | TRI->getEncodingValue(Res.first) >= 8) { |
| 52350 | |
| 52351 | return std::make_pair(0, nullptr); |
| 52352 | } |
| 52353 | |
| 52354 | |
| 52355 | if (!Subtarget.hasAVX512() && isFRClass(*Res.second) && |
| 52356 | TRI->getEncodingValue(Res.first) & 0x10) { |
| 52357 | |
| 52358 | return std::make_pair(0, nullptr); |
| 52359 | } |
| 52360 | |
| 52361 | |
| 52362 | |
| 52363 | |
| 52364 | |
| 52365 | if (TRI->isTypeLegalForClass(*Res.second, VT) || VT == MVT::Other) |
| 52366 | return Res; |
| 52367 | |
| 52368 | |
| 52369 | |
| 52370 | |
| 52371 | const TargetRegisterClass *Class = Res.second; |
| 52372 | |
| 52373 | |
| 52374 | |
| 52375 | |
| 52376 | if (isGRClass(*Class)) { |
| 52377 | unsigned Size = VT.getSizeInBits(); |
| 52378 | if (Size == 1) Size = 8; |
| 52379 | Register DestReg = getX86SubSuperRegisterOrZero(Res.first, Size); |
| 52380 | if (DestReg > 0) { |
| 52381 | bool is64Bit = Subtarget.is64Bit(); |
| 52382 | const TargetRegisterClass *RC = |
| 52383 | Size == 8 ? (is64Bit ? &X86::GR8RegClass : &X86::GR8_NOREXRegClass) |
| 52384 | : Size == 16 ? (is64Bit ? &X86::GR16RegClass : &X86::GR16_NOREXRegClass) |
| 52385 | : Size == 32 ? (is64Bit ? &X86::GR32RegClass : &X86::GR32_NOREXRegClass) |
| 52386 | : Size == 64 ? (is64Bit ? &X86::GR64RegClass : nullptr) |
| 52387 | : nullptr; |
| 52388 | if (Size == 64 && !is64Bit) { |
| 52389 | |
| 52390 | |
| 52391 | switch (DestReg) { |
| 52392 | case X86::RAX: |
| 52393 | return std::make_pair(X86::EAX, &X86::GR32_ADRegClass); |
| 52394 | case X86::RDX: |
| 52395 | return std::make_pair(X86::EDX, &X86::GR32_DCRegClass); |
| 52396 | case X86::RCX: |
| 52397 | return std::make_pair(X86::ECX, &X86::GR32_CBRegClass); |
| 52398 | case X86::RBX: |
| 52399 | return std::make_pair(X86::EBX, &X86::GR32_BSIRegClass); |
| 52400 | case X86::RSI: |
| 52401 | return std::make_pair(X86::ESI, &X86::GR32_SIDIRegClass); |
| 52402 | case X86::RDI: |
| 52403 | return std::make_pair(X86::EDI, &X86::GR32_DIBPRegClass); |
| 52404 | case X86::RBP: |
| 52405 | return std::make_pair(X86::EBP, &X86::GR32_BPSPRegClass); |
| 52406 | default: |
| 52407 | return std::make_pair(0, nullptr); |
| 52408 | } |
| 52409 | } |
| 52410 | if (RC && RC->contains(DestReg)) |
| 52411 | return std::make_pair(DestReg, RC); |
| 52412 | return Res; |
| 52413 | } |
| 52414 | |
| 52415 | return std::make_pair(0, nullptr); |
| 52416 | } else if (isFRClass(*Class)) { |
| 52417 | |
| 52418 | |
| 52419 | |
| 52420 | |
| 52421 | |
| 52422 | |
| 52423 | if (VT == MVT::f32 || VT == MVT::i32) |
| 52424 | Res.second = &X86::FR32XRegClass; |
| 52425 | else if (VT == MVT::f64 || VT == MVT::i64) |
| 52426 | Res.second = &X86::FR64XRegClass; |
| 52427 | else if (TRI->isTypeLegalForClass(X86::VR128XRegClass, VT)) |
| 52428 | Res.second = &X86::VR128XRegClass; |
| 52429 | else if (TRI->isTypeLegalForClass(X86::VR256XRegClass, VT)) |
| 52430 | Res.second = &X86::VR256XRegClass; |
| 52431 | else if (TRI->isTypeLegalForClass(X86::VR512RegClass, VT)) |
| 52432 | Res.second = &X86::VR512RegClass; |
| 52433 | else { |
| 52434 | |
| 52435 | Res.first = 0; |
| 52436 | Res.second = nullptr; |
| 52437 | } |
| 52438 | } else if (isVKClass(*Class)) { |
| 52439 | if (VT == MVT::i1) |
| 52440 | Res.second = &X86::VK1RegClass; |
| 52441 | else if (VT == MVT::i8) |
| 52442 | Res.second = &X86::VK8RegClass; |
| 52443 | else if (VT == MVT::i16) |
| 52444 | Res.second = &X86::VK16RegClass; |
| 52445 | else if (VT == MVT::i32) |
| 52446 | Res.second = &X86::VK32RegClass; |
| 52447 | else if (VT == MVT::i64) |
| 52448 | Res.second = &X86::VK64RegClass; |
| 52449 | else { |
| 52450 | |
| 52451 | Res.first = 0; |
| 52452 | Res.second = nullptr; |
| 52453 | } |
| 52454 | } |
| 52455 | |
| 52456 | return Res; |
| 52457 | } |
| 52458 | |
| 52459 | InstructionCost X86TargetLowering::getScalingFactorCost(const DataLayout &DL, |
| 52460 | const AddrMode &AM, |
| 52461 | Type *Ty, |
| 52462 | unsigned AS) const { |
| 52463 | |
| 52464 | |
| 52465 | |
| 52466 | |
| 52467 | |
| 52468 | |
| 52469 | |
| 52470 | |
| 52471 | |
| 52472 | |
| 52473 | |
| 52474 | |
| 52475 | |
| 52476 | |
| 52477 | |
| 52478 | |
| 52479 | |
| 52480 | |
| 52481 | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
| 52482 | |
| 52483 | |
| 52484 | return AM.Scale != 0; |
| 52485 | return -1; |
| 52486 | } |
| 52487 | |
| 52488 | bool X86TargetLowering::isIntDivCheap(EVT VT, AttributeList Attr) const { |
| 52489 | |
| 52490 | |
| 52491 | |
| 52492 | |
| 52493 | |
| 52494 | |
| 52495 | |
| 52496 | bool OptSize = Attr.hasFnAttribute(Attribute::MinSize); |
| 52497 | return OptSize && !VT.isVector(); |
| 52498 | } |
| 52499 | |
| 52500 | void X86TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { |
| 52501 | if (!Subtarget.is64Bit()) |
| 52502 | return; |
| 52503 | |
| 52504 | |
| 52505 | X86MachineFunctionInfo *AFI = |
| 52506 | Entry->getParent()->getInfo<X86MachineFunctionInfo>(); |
| 52507 | AFI->setIsSplitCSR(true); |
| 52508 | } |
| 52509 | |
| 52510 | void X86TargetLowering::insertCopiesSplitCSR( |
| 52511 | MachineBasicBlock *Entry, |
| 52512 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
| 52513 | const X86RegisterInfo *TRI = Subtarget.getRegisterInfo(); |
| 52514 | const MCPhysReg *IStart = TRI->getCalleeSavedRegsViaCopy(Entry->getParent()); |
| 52515 | if (!IStart) |
| 52516 | return; |
| 52517 | |
| 52518 | const TargetInstrInfo *TII = Subtarget.getInstrInfo(); |
| 52519 | MachineRegisterInfo *MRI = &Entry->getParent()->getRegInfo(); |
| 52520 | MachineBasicBlock::iterator MBBI = Entry->begin(); |
| 52521 | for (const MCPhysReg *I = IStart; *I; ++I) { |
| 52522 | const TargetRegisterClass *RC = nullptr; |
| 52523 | if (X86::GR64RegClass.contains(*I)) |
| 52524 | RC = &X86::GR64RegClass; |
| 52525 | else |
| 52526 | llvm_unreachable("Unexpected register class in CSRsViaCopy!"); |
| 52527 | |
| 52528 | Register NewVR = MRI->createVirtualRegister(RC); |
| 52529 | |
| 52530 | |
| 52531 | |
| 52532 | |
| 52533 | |
| 52534 | assert( |
| 52535 | Entry->getParent()->getFunction().hasFnAttribute(Attribute::NoUnwind) && |
| 52536 | "Function should be nounwind in insertCopiesSplitCSR!"); |
| 52537 | Entry->addLiveIn(*I); |
| 52538 | BuildMI(*Entry, MBBI, DebugLoc(), TII->get(TargetOpcode::COPY), NewVR) |
| 52539 | .addReg(*I); |
| 52540 | |
| 52541 | |
| 52542 | for (auto *Exit : Exits) |
| 52543 | BuildMI(*Exit, Exit->getFirstTerminator(), DebugLoc(), |
| 52544 | TII->get(TargetOpcode::COPY), *I) |
| 52545 | .addReg(NewVR); |
| 52546 | } |
| 52547 | } |
| 52548 | |
| 52549 | bool X86TargetLowering::supportSwiftError() const { |
| 52550 | return Subtarget.is64Bit(); |
| 52551 | } |
| 52552 | |
| 52553 | |
| 52554 | bool X86TargetLowering::hasStackProbeSymbol(MachineFunction &MF) const { |
| 52555 | return !getStackProbeSymbolName(MF).empty(); |
| 52556 | } |
| 52557 | |
| 52558 | |
| 52559 | bool X86TargetLowering::hasInlineStackProbe(MachineFunction &MF) const { |
| 52560 | |
| 52561 | |
| 52562 | if (Subtarget.isOSWindows() || |
| 52563 | MF.getFunction().hasFnAttribute("no-stack-arg-probe")) |
| 52564 | return false; |
| 52565 | |
| 52566 | |
| 52567 | if (MF.getFunction().hasFnAttribute("probe-stack")) |
| 52568 | return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() == |
| 52569 | "inline-asm"; |
| 52570 | |
| 52571 | return false; |
| 52572 | } |
| 52573 | |
| 52574 | |
| 52575 | |
| 52576 | StringRef |
| 52577 | X86TargetLowering::getStackProbeSymbolName(MachineFunction &MF) const { |
| 52578 | |
| 52579 | if (hasInlineStackProbe(MF)) |
| 52580 | return ""; |
| 52581 | |
| 52582 | |
| 52583 | if (MF.getFunction().hasFnAttribute("probe-stack")) |
| 52584 | return MF.getFunction().getFnAttribute("probe-stack").getValueAsString(); |
| 52585 | |
| 52586 | |
| 52587 | |
| 52588 | if (!Subtarget.isOSWindows() || Subtarget.isTargetMachO() || |
| 52589 | MF.getFunction().hasFnAttribute("no-stack-arg-probe")) |
| 52590 | return ""; |
| 52591 | |
| 52592 | |
| 52593 | |
| 52594 | if (Subtarget.is64Bit()) |
| 52595 | return Subtarget.isTargetCygMing() ? "___chkstk_ms" : "__chkstk"; |
| 52596 | return Subtarget.isTargetCygMing() ? "_alloca" : "_chkstk"; |
| 52597 | } |
| 52598 | |
| 52599 | unsigned |
| 52600 | X86TargetLowering::getStackProbeSize(MachineFunction &MF) const { |
| 52601 | |
| 52602 | |
| 52603 | unsigned StackProbeSize = 4096; |
| 52604 | const Function &Fn = MF.getFunction(); |
| 52605 | if (Fn.hasFnAttribute("stack-probe-size")) |
| 52606 | Fn.getFnAttribute("stack-probe-size") |
| 52607 | .getValueAsString() |
| 52608 | .getAsInteger(0, StackProbeSize); |
| 52609 | return StackProbeSize; |
| 52610 | } |
| 52611 | |
| 52612 | Align X86TargetLowering::getPrefLoopAlignment(MachineLoop *ML) const { |
| 52613 | if (ML->isInnermost() && |
| 52614 | ExperimentalPrefInnermostLoopAlignment.getNumOccurrences()) |
| 52615 | return Align(1ULL << ExperimentalPrefInnermostLoopAlignment); |
| 52616 | return TargetLowering::getPrefLoopAlignment(); |
| 52617 | } |