Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
Warning:line 3607, column 50
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name CombinerHelper.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
1//===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8#include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9#include "llvm/ADT/SetVector.h"
10#include "llvm/ADT/SmallBitVector.h"
11#include "llvm/CodeGen/GlobalISel/Combiner.h"
12#include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13#include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15#include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
16#include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
17#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18#include "llvm/CodeGen/GlobalISel/Utils.h"
19#include "llvm/CodeGen/LowLevelType.h"
20#include "llvm/CodeGen/MachineBasicBlock.h"
21#include "llvm/CodeGen/MachineDominators.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineInstr.h"
24#include "llvm/CodeGen/MachineMemOperand.h"
25#include "llvm/CodeGen/MachineRegisterInfo.h"
26#include "llvm/CodeGen/TargetInstrInfo.h"
27#include "llvm/CodeGen/TargetLowering.h"
28#include "llvm/CodeGen/TargetOpcodes.h"
29#include "llvm/Support/MathExtras.h"
30#include "llvm/Target/TargetMachine.h"
31#include <tuple>
32
33#define DEBUG_TYPE"gi-combiner" "gi-combiner"
34
35using namespace llvm;
36using namespace MIPatternMatch;
37
38// Option to allow testing of the combiner while no targets know about indexed
39// addressing.
40static cl::opt<bool>
41 ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
42 cl::desc("Force all indexed operations to be "
43 "legal for the GlobalISel combiner"));
44
45CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
46 MachineIRBuilder &B, GISelKnownBits *KB,
47 MachineDominatorTree *MDT,
48 const LegalizerInfo *LI)
49 : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
50 KB(KB), MDT(MDT), LI(LI) {
51 (void)this->KB;
52}
53
54const TargetLowering &CombinerHelper::getTargetLowering() const {
55 return *Builder.getMF().getSubtarget().getTargetLowering();
56}
57
58/// \returns The little endian in-memory byte position of byte \p I in a
59/// \p ByteWidth bytes wide type.
60///
61/// E.g. Given a 4-byte type x, x[0] -> byte 0
62static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
63 assert(I < ByteWidth && "I must be in [0, ByteWidth)")((void)0);
64 return I;
65}
66
67/// \returns The big endian in-memory byte position of byte \p I in a
68/// \p ByteWidth bytes wide type.
69///
70/// E.g. Given a 4-byte type x, x[0] -> byte 3
71static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
72 assert(I < ByteWidth && "I must be in [0, ByteWidth)")((void)0);
73 return ByteWidth - I - 1;
74}
75
76/// Given a map from byte offsets in memory to indices in a load/store,
77/// determine if that map corresponds to a little or big endian byte pattern.
78///
79/// \param MemOffset2Idx maps memory offsets to address offsets.
80/// \param LowestIdx is the lowest index in \p MemOffset2Idx.
81///
82/// \returns true if the map corresponds to a big endian byte pattern, false
83/// if it corresponds to a little endian byte pattern, and None otherwise.
84///
85/// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
86/// are as follows:
87///
88/// AddrOffset Little endian Big endian
89/// 0 0 3
90/// 1 1 2
91/// 2 2 1
92/// 3 3 0
93static Optional<bool>
94isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
95 int64_t LowestIdx) {
96 // Need at least two byte positions to decide on endianness.
97 unsigned Width = MemOffset2Idx.size();
98 if (Width < 2)
99 return None;
100 bool BigEndian = true, LittleEndian = true;
101 for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
102 auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
103 if (MemOffsetAndIdx == MemOffset2Idx.end())
104 return None;
105 const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
106 assert(Idx >= 0 && "Expected non-negative byte offset?")((void)0);
107 LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
108 BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
109 if (!BigEndian && !LittleEndian)
110 return None;
111 }
112
113 assert((BigEndian != LittleEndian) &&((void)0)
114 "Pattern cannot be both big and little endian!")((void)0);
115 return BigEndian;
116}
117
118bool CombinerHelper::isLegalOrBeforeLegalizer(
119 const LegalityQuery &Query) const {
120 return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
121}
122
123void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
124 Register ToReg) const {
125 Observer.changingAllUsesOfReg(MRI, FromReg);
126
127 if (MRI.constrainRegAttrs(ToReg, FromReg))
128 MRI.replaceRegWith(FromReg, ToReg);
129 else
130 Builder.buildCopy(ToReg, FromReg);
131
132 Observer.finishedChangingAllUsesOfReg();
133}
134
135void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
136 MachineOperand &FromRegOp,
137 Register ToReg) const {
138 assert(FromRegOp.getParent() && "Expected an operand in an MI")((void)0);
139 Observer.changingInstr(*FromRegOp.getParent());
140
141 FromRegOp.setReg(ToReg);
142
143 Observer.changedInstr(*FromRegOp.getParent());
144}
145
146bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
147 if (matchCombineCopy(MI)) {
148 applyCombineCopy(MI);
149 return true;
150 }
151 return false;
152}
153bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
154 if (MI.getOpcode() != TargetOpcode::COPY)
155 return false;
156 Register DstReg = MI.getOperand(0).getReg();
157 Register SrcReg = MI.getOperand(1).getReg();
158 return canReplaceReg(DstReg, SrcReg, MRI);
159}
160void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
161 Register DstReg = MI.getOperand(0).getReg();
162 Register SrcReg = MI.getOperand(1).getReg();
163 MI.eraseFromParent();
164 replaceRegWith(MRI, DstReg, SrcReg);
165}
166
167bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
168 bool IsUndef = false;
169 SmallVector<Register, 4> Ops;
170 if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
171 applyCombineConcatVectors(MI, IsUndef, Ops);
172 return true;
173 }
174 return false;
175}
176
177bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
178 SmallVectorImpl<Register> &Ops) {
179 assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&((void)0)
180 "Invalid instruction")((void)0);
181 IsUndef = true;
182 MachineInstr *Undef = nullptr;
183
184 // Walk over all the operands of concat vectors and check if they are
185 // build_vector themselves or undef.
186 // Then collect their operands in Ops.
187 for (const MachineOperand &MO : MI.uses()) {
188 Register Reg = MO.getReg();
189 MachineInstr *Def = MRI.getVRegDef(Reg);
190 assert(Def && "Operand not defined")((void)0);
191 switch (Def->getOpcode()) {
192 case TargetOpcode::G_BUILD_VECTOR:
193 IsUndef = false;
194 // Remember the operands of the build_vector to fold
195 // them into the yet-to-build flattened concat vectors.
196 for (const MachineOperand &BuildVecMO : Def->uses())
197 Ops.push_back(BuildVecMO.getReg());
198 break;
199 case TargetOpcode::G_IMPLICIT_DEF: {
200 LLT OpType = MRI.getType(Reg);
201 // Keep one undef value for all the undef operands.
202 if (!Undef) {
203 Builder.setInsertPt(*MI.getParent(), MI);
204 Undef = Builder.buildUndef(OpType.getScalarType());
205 }
206 assert(MRI.getType(Undef->getOperand(0).getReg()) ==((void)0)
207 OpType.getScalarType() &&((void)0)
208 "All undefs should have the same type")((void)0);
209 // Break the undef vector in as many scalar elements as needed
210 // for the flattening.
211 for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
212 EltIdx != EltEnd; ++EltIdx)
213 Ops.push_back(Undef->getOperand(0).getReg());
214 break;
215 }
216 default:
217 return false;
218 }
219 }
220 return true;
221}
222void CombinerHelper::applyCombineConcatVectors(
223 MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
224 // We determined that the concat_vectors can be flatten.
225 // Generate the flattened build_vector.
226 Register DstReg = MI.getOperand(0).getReg();
227 Builder.setInsertPt(*MI.getParent(), MI);
228 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
229
230 // Note: IsUndef is sort of redundant. We could have determine it by
231 // checking that at all Ops are undef. Alternatively, we could have
232 // generate a build_vector of undefs and rely on another combine to
233 // clean that up. For now, given we already gather this information
234 // in tryCombineConcatVectors, just save compile time and issue the
235 // right thing.
236 if (IsUndef)
237 Builder.buildUndef(NewDstReg);
238 else
239 Builder.buildBuildVector(NewDstReg, Ops);
240 MI.eraseFromParent();
241 replaceRegWith(MRI, DstReg, NewDstReg);
242}
243
244bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
245 SmallVector<Register, 4> Ops;
246 if (matchCombineShuffleVector(MI, Ops)) {
247 applyCombineShuffleVector(MI, Ops);
248 return true;
249 }
250 return false;
251}
252
253bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
254 SmallVectorImpl<Register> &Ops) {
255 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&((void)0)
256 "Invalid instruction kind")((void)0);
257 LLT DstType = MRI.getType(MI.getOperand(0).getReg());
258 Register Src1 = MI.getOperand(1).getReg();
259 LLT SrcType = MRI.getType(Src1);
260 // As bizarre as it may look, shuffle vector can actually produce
261 // scalar! This is because at the IR level a <1 x ty> shuffle
262 // vector is perfectly valid.
263 unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
264 unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
265
266 // If the resulting vector is smaller than the size of the source
267 // vectors being concatenated, we won't be able to replace the
268 // shuffle vector into a concat_vectors.
269 //
270 // Note: We may still be able to produce a concat_vectors fed by
271 // extract_vector_elt and so on. It is less clear that would
272 // be better though, so don't bother for now.
273 //
274 // If the destination is a scalar, the size of the sources doesn't
275 // matter. we will lower the shuffle to a plain copy. This will
276 // work only if the source and destination have the same size. But
277 // that's covered by the next condition.
278 //
279 // TODO: If the size between the source and destination don't match
280 // we could still emit an extract vector element in that case.
281 if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
282 return false;
283
284 // Check that the shuffle mask can be broken evenly between the
285 // different sources.
286 if (DstNumElts % SrcNumElts != 0)
287 return false;
288
289 // Mask length is a multiple of the source vector length.
290 // Check if the shuffle is some kind of concatenation of the input
291 // vectors.
292 unsigned NumConcat = DstNumElts / SrcNumElts;
293 SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
294 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
295 for (unsigned i = 0; i != DstNumElts; ++i) {
296 int Idx = Mask[i];
297 // Undef value.
298 if (Idx < 0)
299 continue;
300 // Ensure the indices in each SrcType sized piece are sequential and that
301 // the same source is used for the whole piece.
302 if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
303 (ConcatSrcs[i / SrcNumElts] >= 0 &&
304 ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
305 return false;
306 // Remember which source this index came from.
307 ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
308 }
309
310 // The shuffle is concatenating multiple vectors together.
311 // Collect the different operands for that.
312 Register UndefReg;
313 Register Src2 = MI.getOperand(2).getReg();
314 for (auto Src : ConcatSrcs) {
315 if (Src < 0) {
316 if (!UndefReg) {
317 Builder.setInsertPt(*MI.getParent(), MI);
318 UndefReg = Builder.buildUndef(SrcType).getReg(0);
319 }
320 Ops.push_back(UndefReg);
321 } else if (Src == 0)
322 Ops.push_back(Src1);
323 else
324 Ops.push_back(Src2);
325 }
326 return true;
327}
328
329void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
330 const ArrayRef<Register> Ops) {
331 Register DstReg = MI.getOperand(0).getReg();
332 Builder.setInsertPt(*MI.getParent(), MI);
333 Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
334
335 if (Ops.size() == 1)
336 Builder.buildCopy(NewDstReg, Ops[0]);
337 else
338 Builder.buildMerge(NewDstReg, Ops);
339
340 MI.eraseFromParent();
341 replaceRegWith(MRI, DstReg, NewDstReg);
342}
343
344namespace {
345
346/// Select a preference between two uses. CurrentUse is the current preference
347/// while *ForCandidate is attributes of the candidate under consideration.
348PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
349 const LLT TyForCandidate,
350 unsigned OpcodeForCandidate,
351 MachineInstr *MIForCandidate) {
352 if (!CurrentUse.Ty.isValid()) {
353 if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
354 CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
355 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
356 return CurrentUse;
357 }
358
359 // We permit the extend to hoist through basic blocks but this is only
360 // sensible if the target has extending loads. If you end up lowering back
361 // into a load and extend during the legalizer then the end result is
362 // hoisting the extend up to the load.
363
364 // Prefer defined extensions to undefined extensions as these are more
365 // likely to reduce the number of instructions.
366 if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
367 CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
368 return CurrentUse;
369 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
370 OpcodeForCandidate != TargetOpcode::G_ANYEXT)
371 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
372
373 // Prefer sign extensions to zero extensions as sign-extensions tend to be
374 // more expensive.
375 if (CurrentUse.Ty == TyForCandidate) {
376 if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
377 OpcodeForCandidate == TargetOpcode::G_ZEXT)
378 return CurrentUse;
379 else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
380 OpcodeForCandidate == TargetOpcode::G_SEXT)
381 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
382 }
383
384 // This is potentially target specific. We've chosen the largest type
385 // because G_TRUNC is usually free. One potential catch with this is that
386 // some targets have a reduced number of larger registers than smaller
387 // registers and this choice potentially increases the live-range for the
388 // larger value.
389 if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
390 return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
391 }
392 return CurrentUse;
393}
394
395/// Find a suitable place to insert some instructions and insert them. This
396/// function accounts for special cases like inserting before a PHI node.
397/// The current strategy for inserting before PHI's is to duplicate the
398/// instructions for each predecessor. However, while that's ok for G_TRUNC
399/// on most targets since it generally requires no code, other targets/cases may
400/// want to try harder to find a dominating block.
401static void InsertInsnsWithoutSideEffectsBeforeUse(
402 MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
403 std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
404 MachineOperand &UseMO)>
405 Inserter) {
406 MachineInstr &UseMI = *UseMO.getParent();
407
408 MachineBasicBlock *InsertBB = UseMI.getParent();
409
410 // If the use is a PHI then we want the predecessor block instead.
411 if (UseMI.isPHI()) {
412 MachineOperand *PredBB = std::next(&UseMO);
413 InsertBB = PredBB->getMBB();
414 }
415
416 // If the block is the same block as the def then we want to insert just after
417 // the def instead of at the start of the block.
418 if (InsertBB == DefMI.getParent()) {
419 MachineBasicBlock::iterator InsertPt = &DefMI;
420 Inserter(InsertBB, std::next(InsertPt), UseMO);
421 return;
422 }
423
424 // Otherwise we want the start of the BB
425 Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
426}
427} // end anonymous namespace
428
429bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
430 PreferredTuple Preferred;
431 if (matchCombineExtendingLoads(MI, Preferred)) {
432 applyCombineExtendingLoads(MI, Preferred);
433 return true;
434 }
435 return false;
436}
437
438bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
439 PreferredTuple &Preferred) {
440 // We match the loads and follow the uses to the extend instead of matching
441 // the extends and following the def to the load. This is because the load
442 // must remain in the same position for correctness (unless we also add code
443 // to find a safe place to sink it) whereas the extend is freely movable.
444 // It also prevents us from duplicating the load for the volatile case or just
445 // for performance.
446 GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
447 if (!LoadMI)
448 return false;
449
450 Register LoadReg = LoadMI->getDstReg();
451
452 LLT LoadValueTy = MRI.getType(LoadReg);
453 if (!LoadValueTy.isScalar())
454 return false;
455
456 // Most architectures are going to legalize <s8 loads into at least a 1 byte
457 // load, and the MMOs can only describe memory accesses in multiples of bytes.
458 // If we try to perform extload combining on those, we can end up with
459 // %a(s8) = extload %ptr (load 1 byte from %ptr)
460 // ... which is an illegal extload instruction.
461 if (LoadValueTy.getSizeInBits() < 8)
462 return false;
463
464 // For non power-of-2 types, they will very likely be legalized into multiple
465 // loads. Don't bother trying to match them into extending loads.
466 if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
467 return false;
468
469 // Find the preferred type aside from the any-extends (unless it's the only
470 // one) and non-extending ops. We'll emit an extending load to that type and
471 // and emit a variant of (extend (trunc X)) for the others according to the
472 // relative type sizes. At the same time, pick an extend to use based on the
473 // extend involved in the chosen type.
474 unsigned PreferredOpcode =
475 isa<GLoad>(&MI)
476 ? TargetOpcode::G_ANYEXT
477 : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
478 Preferred = {LLT(), PreferredOpcode, nullptr};
479 for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
480 if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
481 UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
482 (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
483 const auto &MMO = LoadMI->getMMO();
484 // For atomics, only form anyextending loads.
485 if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
486 continue;
487 // Check for legality.
488 if (LI) {
489 LegalityQuery::MemDesc MMDesc;
490 MMDesc.MemoryTy = MMO.getMemoryType();
491 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
492 MMDesc.Ordering = MMO.getSuccessOrdering();
493 LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
494 LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
495 if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
496 .Action != LegalizeActions::Legal)
497 continue;
498 }
499 Preferred = ChoosePreferredUse(Preferred,
500 MRI.getType(UseMI.getOperand(0).getReg()),
501 UseMI.getOpcode(), &UseMI);
502 }
503 }
504
505 // There were no extends
506 if (!Preferred.MI)
507 return false;
508 // It should be impossible to chose an extend without selecting a different
509 // type since by definition the result of an extend is larger.
510 assert(Preferred.Ty != LoadValueTy && "Extending to same type?")((void)0);
511
512 LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI)do { } while (false);
513 return true;
514}
515
516void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
517 PreferredTuple &Preferred) {
518 // Rewrite the load to the chosen extending load.
519 Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
520
521 // Inserter to insert a truncate back to the original type at a given point
522 // with some basic CSE to limit truncate duplication to one per BB.
523 DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
524 auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
525 MachineBasicBlock::iterator InsertBefore,
526 MachineOperand &UseMO) {
527 MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
528 if (PreviouslyEmitted) {
529 Observer.changingInstr(*UseMO.getParent());
530 UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
531 Observer.changedInstr(*UseMO.getParent());
532 return;
533 }
534
535 Builder.setInsertPt(*InsertIntoBB, InsertBefore);
536 Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
537 MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
538 EmittedInsns[InsertIntoBB] = NewMI;
539 replaceRegOpWith(MRI, UseMO, NewDstReg);
540 };
541
542 Observer.changingInstr(MI);
543 MI.setDesc(
544 Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
545 ? TargetOpcode::G_SEXTLOAD
546 : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
547 ? TargetOpcode::G_ZEXTLOAD
548 : TargetOpcode::G_LOAD));
549
550 // Rewrite all the uses to fix up the types.
551 auto &LoadValue = MI.getOperand(0);
552 SmallVector<MachineOperand *, 4> Uses;
553 for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
554 Uses.push_back(&UseMO);
555
556 for (auto *UseMO : Uses) {
557 MachineInstr *UseMI = UseMO->getParent();
558
559 // If the extend is compatible with the preferred extend then we should fix
560 // up the type and extend so that it uses the preferred use.
561 if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
562 UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
563 Register UseDstReg = UseMI->getOperand(0).getReg();
564 MachineOperand &UseSrcMO = UseMI->getOperand(1);
565 const LLT UseDstTy = MRI.getType(UseDstReg);
566 if (UseDstReg != ChosenDstReg) {
567 if (Preferred.Ty == UseDstTy) {
568 // If the use has the same type as the preferred use, then merge
569 // the vregs and erase the extend. For example:
570 // %1:_(s8) = G_LOAD ...
571 // %2:_(s32) = G_SEXT %1(s8)
572 // %3:_(s32) = G_ANYEXT %1(s8)
573 // ... = ... %3(s32)
574 // rewrites to:
575 // %2:_(s32) = G_SEXTLOAD ...
576 // ... = ... %2(s32)
577 replaceRegWith(MRI, UseDstReg, ChosenDstReg);
578 Observer.erasingInstr(*UseMO->getParent());
579 UseMO->getParent()->eraseFromParent();
580 } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
581 // If the preferred size is smaller, then keep the extend but extend
582 // from the result of the extending load. For example:
583 // %1:_(s8) = G_LOAD ...
584 // %2:_(s32) = G_SEXT %1(s8)
585 // %3:_(s64) = G_ANYEXT %1(s8)
586 // ... = ... %3(s64)
587 /// rewrites to:
588 // %2:_(s32) = G_SEXTLOAD ...
589 // %3:_(s64) = G_ANYEXT %2:_(s32)
590 // ... = ... %3(s64)
591 replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
592 } else {
593 // If the preferred size is large, then insert a truncate. For
594 // example:
595 // %1:_(s8) = G_LOAD ...
596 // %2:_(s64) = G_SEXT %1(s8)
597 // %3:_(s32) = G_ZEXT %1(s8)
598 // ... = ... %3(s32)
599 /// rewrites to:
600 // %2:_(s64) = G_SEXTLOAD ...
601 // %4:_(s8) = G_TRUNC %2:_(s32)
602 // %3:_(s64) = G_ZEXT %2:_(s8)
603 // ... = ... %3(s64)
604 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
605 InsertTruncAt);
606 }
607 continue;
608 }
609 // The use is (one of) the uses of the preferred use we chose earlier.
610 // We're going to update the load to def this value later so just erase
611 // the old extend.
612 Observer.erasingInstr(*UseMO->getParent());
613 UseMO->getParent()->eraseFromParent();
614 continue;
615 }
616
617 // The use isn't an extend. Truncate back to the type we originally loaded.
618 // This is free on many targets.
619 InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
620 }
621
622 MI.getOperand(0).setReg(ChosenDstReg);
623 Observer.changedInstr(MI);
624}
625
626bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
627 const MachineInstr &UseMI) {
628 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&((void)0)
629 "shouldn't consider debug uses")((void)0);
630 assert(DefMI.getParent() == UseMI.getParent())((void)0);
631 if (&DefMI == &UseMI)
632 return false;
633 const MachineBasicBlock &MBB = *DefMI.getParent();
634 auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
635 return &MI == &DefMI || &MI == &UseMI;
636 });
637 if (DefOrUse == MBB.end())
638 llvm_unreachable("Block must contain both DefMI and UseMI!")__builtin_unreachable();
639 return &*DefOrUse == &DefMI;
640}
641
642bool CombinerHelper::dominates(const MachineInstr &DefMI,
643 const MachineInstr &UseMI) {
644 assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&((void)0)
645 "shouldn't consider debug uses")((void)0);
646 if (MDT)
647 return MDT->dominates(&DefMI, &UseMI);
648 else if (DefMI.getParent() != UseMI.getParent())
649 return false;
650
651 return isPredecessor(DefMI, UseMI);
652}
653
654bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
655 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG)((void)0);
656 Register SrcReg = MI.getOperand(1).getReg();
657 Register LoadUser = SrcReg;
658
659 if (MRI.getType(SrcReg).isVector())
660 return false;
661
662 Register TruncSrc;
663 if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
664 LoadUser = TruncSrc;
665
666 uint64_t SizeInBits = MI.getOperand(2).getImm();
667 // If the source is a G_SEXTLOAD from the same bit width, then we don't
668 // need any extend at all, just a truncate.
669 if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
670 // If truncating more than the original extended value, abort.
671 auto LoadSizeBits = LoadMI->getMemSizeInBits();
672 if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
673 return false;
674 if (LoadSizeBits == SizeInBits)
675 return true;
676 }
677 return false;
678}
679
680void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
681 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG)((void)0);
682 Builder.setInstrAndDebugLoc(MI);
683 Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
684 MI.eraseFromParent();
685}
686
687bool CombinerHelper::matchSextInRegOfLoad(
688 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
689 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG)((void)0);
690
691 // Only supports scalars for now.
692 if (MRI.getType(MI.getOperand(0).getReg()).isVector())
693 return false;
694
695 Register SrcReg = MI.getOperand(1).getReg();
696 auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
697 if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
698 !LoadDef->isSimple())
699 return false;
700
701 // If the sign extend extends from a narrower width than the load's width,
702 // then we can narrow the load width when we combine to a G_SEXTLOAD.
703 // Avoid widening the load at all.
704 unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
705 LoadDef->getMemSizeInBits());
706
707 // Don't generate G_SEXTLOADs with a < 1 byte width.
708 if (NewSizeBits < 8)
709 return false;
710 // Don't bother creating a non-power-2 sextload, it will likely be broken up
711 // anyway for most targets.
712 if (!isPowerOf2_32(NewSizeBits))
713 return false;
714 MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
715 return true;
716}
717
718void CombinerHelper::applySextInRegOfLoad(
719 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
720 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG)((void)0);
721 Register LoadReg;
722 unsigned ScalarSizeBits;
723 std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
724 GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
725
726 // If we have the following:
727 // %ld = G_LOAD %ptr, (load 2)
728 // %ext = G_SEXT_INREG %ld, 8
729 // ==>
730 // %ld = G_SEXTLOAD %ptr (load 1)
731
732 auto &MMO = LoadDef->getMMO();
733 Builder.setInstrAndDebugLoc(*LoadDef);
734 auto &MF = Builder.getMF();
735 auto PtrInfo = MMO.getPointerInfo();
736 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
737 Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
738 LoadDef->getPointerReg(), *NewMMO);
739 MI.eraseFromParent();
740}
741
742bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
743 Register &Base, Register &Offset) {
744 auto &MF = *MI.getParent()->getParent();
745 const auto &TLI = *MF.getSubtarget().getTargetLowering();
746
747#ifndef NDEBUG1
748 unsigned Opcode = MI.getOpcode();
749 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||((void)0)
750 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE)((void)0);
751#endif
752
753 Base = MI.getOperand(1).getReg();
754 MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
755 if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
756 return false;
757
758 LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI)do { } while (false);
759 // FIXME: The following use traversal needs a bail out for patholigical cases.
760 for (auto &Use : MRI.use_nodbg_instructions(Base)) {
761 if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
762 continue;
763
764 Offset = Use.getOperand(2).getReg();
765 if (!ForceLegalIndexing &&
766 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
767 LLVM_DEBUG(dbgs() << " Ignoring candidate with illegal addrmode: "do { } while (false)
768 << Use)do { } while (false);
769 continue;
770 }
771
772 // Make sure the offset calculation is before the potentially indexed op.
773 // FIXME: we really care about dependency here. The offset calculation might
774 // be movable.
775 MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
776 if (!OffsetDef || !dominates(*OffsetDef, MI)) {
777 LLVM_DEBUG(dbgs() << " Ignoring candidate with offset after mem-op: "do { } while (false)
778 << Use)do { } while (false);
779 continue;
780 }
781
782 // FIXME: check whether all uses of Base are load/store with foldable
783 // addressing modes. If so, using the normal addr-modes is better than
784 // forming an indexed one.
785
786 bool MemOpDominatesAddrUses = true;
787 for (auto &PtrAddUse :
788 MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
789 if (!dominates(MI, PtrAddUse)) {
790 MemOpDominatesAddrUses = false;
791 break;
792 }
793 }
794
795 if (!MemOpDominatesAddrUses) {
796 LLVM_DEBUG(do { } while (false)
797 dbgs() << " Ignoring candidate as memop does not dominate uses: "do { } while (false)
798 << Use)do { } while (false);
799 continue;
800 }
801
802 LLVM_DEBUG(dbgs() << " Found match: " << Use)do { } while (false);
803 Addr = Use.getOperand(0).getReg();
804 return true;
805 }
806
807 return false;
808}
809
810bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
811 Register &Base, Register &Offset) {
812 auto &MF = *MI.getParent()->getParent();
813 const auto &TLI = *MF.getSubtarget().getTargetLowering();
814
815#ifndef NDEBUG1
816 unsigned Opcode = MI.getOpcode();
817 assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||((void)0)
818 Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE)((void)0);
819#endif
820
821 Addr = MI.getOperand(1).getReg();
822 MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
823 if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
824 return false;
825
826 Base = AddrDef->getOperand(1).getReg();
827 Offset = AddrDef->getOperand(2).getReg();
828
829 LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI)do { } while (false);
830
831 if (!ForceLegalIndexing &&
832 !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
833 LLVM_DEBUG(dbgs() << " Skipping, not legal for target")do { } while (false);
834 return false;
835 }
836
837 MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
838 if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
839 LLVM_DEBUG(dbgs() << " Skipping, frame index would need copy anyway.")do { } while (false);
840 return false;
841 }
842
843 if (MI.getOpcode() == TargetOpcode::G_STORE) {
844 // Would require a copy.
845 if (Base == MI.getOperand(0).getReg()) {
846 LLVM_DEBUG(dbgs() << " Skipping, storing base so need copy anyway.")do { } while (false);
847 return false;
848 }
849
850 // We're expecting one use of Addr in MI, but it could also be the
851 // value stored, which isn't actually dominated by the instruction.
852 if (MI.getOperand(0).getReg() == Addr) {
853 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses")do { } while (false);
854 return false;
855 }
856 }
857
858 // FIXME: check whether all uses of the base pointer are constant PtrAdds.
859 // That might allow us to end base's liveness here by adjusting the constant.
860
861 for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
862 if (!dominates(MI, UseMI)) {
863 LLVM_DEBUG(dbgs() << " Skipping, does not dominate all addr uses.")do { } while (false);
864 return false;
865 }
866 }
867
868 return true;
869}
870
871bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
872 IndexedLoadStoreMatchInfo MatchInfo;
873 if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
874 applyCombineIndexedLoadStore(MI, MatchInfo);
875 return true;
876 }
877 return false;
878}
879
880bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
881 unsigned Opcode = MI.getOpcode();
882 if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
883 Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
884 return false;
885
886 // For now, no targets actually support these opcodes so don't waste time
887 // running these unless we're forced to for testing.
888 if (!ForceLegalIndexing)
889 return false;
890
891 MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
892 MatchInfo.Offset);
893 if (!MatchInfo.IsPre &&
894 !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
895 MatchInfo.Offset))
896 return false;
897
898 return true;
899}
900
901void CombinerHelper::applyCombineIndexedLoadStore(
902 MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
903 MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
904 MachineIRBuilder MIRBuilder(MI);
905 unsigned Opcode = MI.getOpcode();
906 bool IsStore = Opcode == TargetOpcode::G_STORE;
907 unsigned NewOpcode;
908 switch (Opcode) {
909 case TargetOpcode::G_LOAD:
910 NewOpcode = TargetOpcode::G_INDEXED_LOAD;
911 break;
912 case TargetOpcode::G_SEXTLOAD:
913 NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
914 break;
915 case TargetOpcode::G_ZEXTLOAD:
916 NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
917 break;
918 case TargetOpcode::G_STORE:
919 NewOpcode = TargetOpcode::G_INDEXED_STORE;
920 break;
921 default:
922 llvm_unreachable("Unknown load/store opcode")__builtin_unreachable();
923 }
924
925 auto MIB = MIRBuilder.buildInstr(NewOpcode);
926 if (IsStore) {
927 MIB.addDef(MatchInfo.Addr);
928 MIB.addUse(MI.getOperand(0).getReg());
929 } else {
930 MIB.addDef(MI.getOperand(0).getReg());
931 MIB.addDef(MatchInfo.Addr);
932 }
933
934 MIB.addUse(MatchInfo.Base);
935 MIB.addUse(MatchInfo.Offset);
936 MIB.addImm(MatchInfo.IsPre);
937 MI.eraseFromParent();
938 AddrDef.eraseFromParent();
939
940 LLVM_DEBUG(dbgs() << " Combinined to indexed operation")do { } while (false);
941}
942
943bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
944 MachineInstr *&OtherMI) {
945 unsigned Opcode = MI.getOpcode();
946 bool IsDiv, IsSigned;
947
948 switch (Opcode) {
949 default:
950 llvm_unreachable("Unexpected opcode!")__builtin_unreachable();
951 case TargetOpcode::G_SDIV:
952 case TargetOpcode::G_UDIV: {
953 IsDiv = true;
954 IsSigned = Opcode == TargetOpcode::G_SDIV;
955 break;
956 }
957 case TargetOpcode::G_SREM:
958 case TargetOpcode::G_UREM: {
959 IsDiv = false;
960 IsSigned = Opcode == TargetOpcode::G_SREM;
961 break;
962 }
963 }
964
965 Register Src1 = MI.getOperand(1).getReg();
966 unsigned DivOpcode, RemOpcode, DivremOpcode;
967 if (IsSigned) {
968 DivOpcode = TargetOpcode::G_SDIV;
969 RemOpcode = TargetOpcode::G_SREM;
970 DivremOpcode = TargetOpcode::G_SDIVREM;
971 } else {
972 DivOpcode = TargetOpcode::G_UDIV;
973 RemOpcode = TargetOpcode::G_UREM;
974 DivremOpcode = TargetOpcode::G_UDIVREM;
975 }
976
977 if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
978 return false;
979
980 // Combine:
981 // %div:_ = G_[SU]DIV %src1:_, %src2:_
982 // %rem:_ = G_[SU]REM %src1:_, %src2:_
983 // into:
984 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
985
986 // Combine:
987 // %rem:_ = G_[SU]REM %src1:_, %src2:_
988 // %div:_ = G_[SU]DIV %src1:_, %src2:_
989 // into:
990 // %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
991
992 for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
993 if (MI.getParent() == UseMI.getParent() &&
994 ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
995 (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
996 matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
997 OtherMI = &UseMI;
998 return true;
999 }
1000 }
1001
1002 return false;
1003}
1004
1005void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
1006 MachineInstr *&OtherMI) {
1007 unsigned Opcode = MI.getOpcode();
1008 assert(OtherMI && "OtherMI shouldn't be empty.")((void)0);
1009
1010 Register DestDivReg, DestRemReg;
1011 if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1012 DestDivReg = MI.getOperand(0).getReg();
1013 DestRemReg = OtherMI->getOperand(0).getReg();
1014 } else {
1015 DestDivReg = OtherMI->getOperand(0).getReg();
1016 DestRemReg = MI.getOperand(0).getReg();
1017 }
1018
1019 bool IsSigned =
1020 Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1021
1022 // Check which instruction is first in the block so we don't break def-use
1023 // deps by "moving" the instruction incorrectly.
1024 if (dominates(MI, *OtherMI))
1025 Builder.setInstrAndDebugLoc(MI);
1026 else
1027 Builder.setInstrAndDebugLoc(*OtherMI);
1028
1029 Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1030 : TargetOpcode::G_UDIVREM,
1031 {DestDivReg, DestRemReg},
1032 {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
1033 MI.eraseFromParent();
1034 OtherMI->eraseFromParent();
1035}
1036
1037bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
1038 MachineInstr *&BrCond) {
1039 assert(MI.getOpcode() == TargetOpcode::G_BR)((void)0);
1040
1041 // Try to match the following:
1042 // bb1:
1043 // G_BRCOND %c1, %bb2
1044 // G_BR %bb3
1045 // bb2:
1046 // ...
1047 // bb3:
1048
1049 // The above pattern does not have a fall through to the successor bb2, always
1050 // resulting in a branch no matter which path is taken. Here we try to find
1051 // and replace that pattern with conditional branch to bb3 and otherwise
1052 // fallthrough to bb2. This is generally better for branch predictors.
1053
1054 MachineBasicBlock *MBB = MI.getParent();
1055 MachineBasicBlock::iterator BrIt(MI);
1056 if (BrIt == MBB->begin())
1057 return false;
1058 assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator")((void)0);
1059
1060 BrCond = &*std::prev(BrIt);
1061 if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1062 return false;
1063
1064 // Check that the next block is the conditional branch target. Also make sure
1065 // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1066 MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1067 return BrCondTarget != MI.getOperand(0).getMBB() &&
1068 MBB->isLayoutSuccessor(BrCondTarget);
1069}
1070
1071void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1072 MachineInstr *&BrCond) {
1073 MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1074 Builder.setInstrAndDebugLoc(*BrCond);
1075 LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1076 // FIXME: Does int/fp matter for this? If so, we might need to restrict
1077 // this to i1 only since we might not know for sure what kind of
1078 // compare generated the condition value.
1079 auto True = Builder.buildConstant(
1080 Ty, getICmpTrueVal(getTargetLowering(), false, false));
1081 auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1082
1083 auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1084 Observer.changingInstr(MI);
1085 MI.getOperand(0).setMBB(FallthroughBB);
1086 Observer.changedInstr(MI);
1087
1088 // Change the conditional branch to use the inverted condition and
1089 // new target block.
1090 Observer.changingInstr(*BrCond);
1091 BrCond->getOperand(0).setReg(Xor.getReg(0));
1092 BrCond->getOperand(1).setMBB(BrTarget);
1093 Observer.changedInstr(*BrCond);
1094}
1095
1096static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
1097 // On Darwin, -Os means optimize for size without hurting performance, so
1098 // only really optimize for size when -Oz (MinSize) is used.
1099 if (MF.getTarget().getTargetTriple().isOSDarwin())
1100 return MF.getFunction().hasMinSize();
1101 return MF.getFunction().hasOptSize();
1102}
1103
1104// Returns a list of types to use for memory op lowering in MemOps. A partial
1105// port of findOptimalMemOpLowering in TargetLowering.
1106static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
1107 unsigned Limit, const MemOp &Op,
1108 unsigned DstAS, unsigned SrcAS,
1109 const AttributeList &FuncAttributes,
1110 const TargetLowering &TLI) {
1111 if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
1112 return false;
1113
1114 LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
1115
1116 if (Ty == LLT()) {
1117 // Use the largest scalar type whose alignment constraints are satisfied.
1118 // We only need to check DstAlign here as SrcAlign is always greater or
1119 // equal to DstAlign (or zero).
1120 Ty = LLT::scalar(64);
1121 if (Op.isFixedDstAlign())
1122 while (Op.getDstAlign() < Ty.getSizeInBytes() &&
1123 !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
1124 Ty = LLT::scalar(Ty.getSizeInBytes());
1125 assert(Ty.getSizeInBits() > 0 && "Could not find valid type")((void)0);
1126 // FIXME: check for the largest legal type we can load/store to.
1127 }
1128
1129 unsigned NumMemOps = 0;
1130 uint64_t Size = Op.size();
1131 while (Size) {
1132 unsigned TySize = Ty.getSizeInBytes();
1133 while (TySize > Size) {
1134 // For now, only use non-vector load / store's for the left-over pieces.
1135 LLT NewTy = Ty;
1136 // FIXME: check for mem op safety and legality of the types. Not all of
1137 // SDAGisms map cleanly to GISel concepts.
1138 if (NewTy.isVector())
1139 NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
1140 NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
1141 unsigned NewTySize = NewTy.getSizeInBytes();
1142 assert(NewTySize > 0 && "Could not find appropriate type")((void)0);
1143
1144 // If the new LLT cannot cover all of the remaining bits, then consider
1145 // issuing a (or a pair of) unaligned and overlapping load / store.
1146 bool Fast;
1147 // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
1148 MVT VT = getMVTForLLT(Ty);
1149 if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
1150 TLI.allowsMisalignedMemoryAccesses(
1151 VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
1152 MachineMemOperand::MONone, &Fast) &&
1153 Fast)
1154 TySize = Size;
1155 else {
1156 Ty = NewTy;
1157 TySize = NewTySize;
1158 }
1159 }
1160
1161 if (++NumMemOps > Limit)
1162 return false;
1163
1164 MemOps.push_back(Ty);
1165 Size -= TySize;
1166 }
1167
1168 return true;
1169}
1170
1171static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1172 if (Ty.isVector())
1173 return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1174 Ty.getNumElements());
1175 return IntegerType::get(C, Ty.getSizeInBits());
1176}
1177
1178// Get a vectorized representation of the memset value operand, GISel edition.
1179static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1180 MachineRegisterInfo &MRI = *MIB.getMRI();
1181 unsigned NumBits = Ty.getScalarSizeInBits();
1182 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1183 if (!Ty.isVector() && ValVRegAndVal) {
1184 APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
1185 APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1186 return MIB.buildConstant(Ty, SplatVal).getReg(0);
1187 }
1188
1189 // Extend the byte value to the larger type, and then multiply by a magic
1190 // value 0x010101... in order to replicate it across every byte.
1191 // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1192 if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1193 return MIB.buildConstant(Ty, 0).getReg(0);
1194 }
1195
1196 LLT ExtType = Ty.getScalarType();
1197 auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1198 if (NumBits > 8) {
1199 APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1200 auto MagicMI = MIB.buildConstant(ExtType, Magic);
1201 Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1202 }
1203
1204 // For vector types create a G_BUILD_VECTOR.
1205 if (Ty.isVector())
1206 Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1207
1208 return Val;
1209}
1210
1211bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1212 Register Val, uint64_t KnownLen,
1213 Align Alignment, bool IsVolatile) {
1214 auto &MF = *MI.getParent()->getParent();
1215 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1216 auto &DL = MF.getDataLayout();
1217 LLVMContext &C = MF.getFunction().getContext();
1218
1219 assert(KnownLen != 0 && "Have a zero length memset length!")((void)0);
1220
1221 bool DstAlignCanChange = false;
1222 MachineFrameInfo &MFI = MF.getFrameInfo();
1223 bool OptSize = shouldLowerMemFuncForSize(MF);
1224
1225 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1226 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1227 DstAlignCanChange = true;
1228
1229 unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1230 std::vector<LLT> MemOps;
1231
1232 const auto &DstMMO = **MI.memoperands_begin();
1233 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1234
1235 auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1236 bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1237
1238 if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1239 MemOp::Set(KnownLen, DstAlignCanChange,
1240 Alignment,
1241 /*IsZeroMemset=*/IsZeroVal,
1242 /*IsVolatile=*/IsVolatile),
1243 DstPtrInfo.getAddrSpace(), ~0u,
1244 MF.getFunction().getAttributes(), TLI))
1245 return false;
1246
1247 if (DstAlignCanChange) {
1248 // Get an estimate of the type from the LLT.
1249 Type *IRTy = getTypeForLLT(MemOps[0], C);
1250 Align NewAlign = DL.getABITypeAlign(IRTy);
1251 if (NewAlign > Alignment) {
1252 Alignment = NewAlign;
1253 unsigned FI = FIDef->getOperand(1).getIndex();
1254 // Give the stack frame object a larger alignment if needed.
1255 if (MFI.getObjectAlign(FI) < Alignment)
1256 MFI.setObjectAlignment(FI, Alignment);
1257 }
1258 }
1259
1260 MachineIRBuilder MIB(MI);
1261 // Find the largest store and generate the bit pattern for it.
1262 LLT LargestTy = MemOps[0];
1263 for (unsigned i = 1; i < MemOps.size(); i++)
1264 if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1265 LargestTy = MemOps[i];
1266
1267 // The memset stored value is always defined as an s8, so in order to make it
1268 // work with larger store types we need to repeat the bit pattern across the
1269 // wider type.
1270 Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1271
1272 if (!MemSetValue)
1273 return false;
1274
1275 // Generate the stores. For each store type in the list, we generate the
1276 // matching store of that type to the destination address.
1277 LLT PtrTy = MRI.getType(Dst);
1278 unsigned DstOff = 0;
1279 unsigned Size = KnownLen;
1280 for (unsigned I = 0; I < MemOps.size(); I++) {
1281 LLT Ty = MemOps[I];
1282 unsigned TySize = Ty.getSizeInBytes();
1283 if (TySize > Size) {
1284 // Issuing an unaligned load / store pair that overlaps with the previous
1285 // pair. Adjust the offset accordingly.
1286 assert(I == MemOps.size() - 1 && I != 0)((void)0);
1287 DstOff -= TySize - Size;
1288 }
1289
1290 // If this store is smaller than the largest store see whether we can get
1291 // the smaller value for free with a truncate.
1292 Register Value = MemSetValue;
1293 if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1294 MVT VT = getMVTForLLT(Ty);
1295 MVT LargestVT = getMVTForLLT(LargestTy);
1296 if (!LargestTy.isVector() && !Ty.isVector() &&
1297 TLI.isTruncateFree(LargestVT, VT))
1298 Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1299 else
1300 Value = getMemsetValue(Val, Ty, MIB);
1301 if (!Value)
1302 return false;
1303 }
1304
1305 auto *StoreMMO =
1306 MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
1307
1308 Register Ptr = Dst;
1309 if (DstOff != 0) {
1310 auto Offset =
1311 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1312 Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1313 }
1314
1315 MIB.buildStore(Value, Ptr, *StoreMMO);
1316 DstOff += Ty.getSizeInBytes();
1317 Size -= TySize;
1318 }
1319
1320 MI.eraseFromParent();
1321 return true;
1322}
1323
1324bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
1325 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE)((void)0);
1326
1327 Register Dst = MI.getOperand(0).getReg();
1328 Register Src = MI.getOperand(1).getReg();
1329 Register Len = MI.getOperand(2).getReg();
1330
1331 const auto *MMOIt = MI.memoperands_begin();
1332 const MachineMemOperand *MemOp = *MMOIt;
1333 bool IsVolatile = MemOp->isVolatile();
1334
1335 // See if this is a constant length copy
1336 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1337 // FIXME: support dynamically sized G_MEMCPY_INLINE
1338 assert(LenVRegAndVal.hasValue() &&((void)0)
1339 "inline memcpy with dynamic size is not yet supported")((void)0);
1340 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1341 if (KnownLen == 0) {
1342 MI.eraseFromParent();
1343 return true;
1344 }
1345
1346 const auto &DstMMO = **MI.memoperands_begin();
1347 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1348 Align DstAlign = DstMMO.getBaseAlign();
1349 Align SrcAlign = SrcMMO.getBaseAlign();
1350
1351 return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1352 IsVolatile);
1353}
1354
1355bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
1356 Register Src, uint64_t KnownLen,
1357 Align DstAlign, Align SrcAlign,
1358 bool IsVolatile) {
1359 assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE)((void)0);
1360 return optimizeMemcpy(MI, Dst, Src, KnownLen,
1361 std::numeric_limits<uint64_t>::max(), DstAlign,
1362 SrcAlign, IsVolatile);
1363}
1364
1365bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1366 Register Src, uint64_t KnownLen,
1367 uint64_t Limit, Align DstAlign,
1368 Align SrcAlign, bool IsVolatile) {
1369 auto &MF = *MI.getParent()->getParent();
1370 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1371 auto &DL = MF.getDataLayout();
1372 LLVMContext &C = MF.getFunction().getContext();
1373
1374 assert(KnownLen != 0 && "Have a zero length memcpy length!")((void)0);
1375
1376 bool DstAlignCanChange = false;
1377 MachineFrameInfo &MFI = MF.getFrameInfo();
1378 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1379
1380 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1381 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1382 DstAlignCanChange = true;
1383
1384 // FIXME: infer better src pointer alignment like SelectionDAG does here.
1385 // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1386 // if the memcpy is in a tail call position.
1387
1388 std::vector<LLT> MemOps;
1389
1390 const auto &DstMMO = **MI.memoperands_begin();
1391 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1392 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1393 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1394
1395 if (!findGISelOptimalMemOpLowering(
1396 MemOps, Limit,
1397 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1398 IsVolatile),
1399 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1400 MF.getFunction().getAttributes(), TLI))
1401 return false;
1402
1403 if (DstAlignCanChange) {
1404 // Get an estimate of the type from the LLT.
1405 Type *IRTy = getTypeForLLT(MemOps[0], C);
1406 Align NewAlign = DL.getABITypeAlign(IRTy);
1407
1408 // Don't promote to an alignment that would require dynamic stack
1409 // realignment.
1410 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1411 if (!TRI->hasStackRealignment(MF))
1412 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1413 NewAlign = NewAlign / 2;
1414
1415 if (NewAlign > Alignment) {
1416 Alignment = NewAlign;
1417 unsigned FI = FIDef->getOperand(1).getIndex();
1418 // Give the stack frame object a larger alignment if needed.
1419 if (MFI.getObjectAlign(FI) < Alignment)
1420 MFI.setObjectAlignment(FI, Alignment);
1421 }
1422 }
1423
1424 LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n")do { } while (false);
1425
1426 MachineIRBuilder MIB(MI);
1427 // Now we need to emit a pair of load and stores for each of the types we've
1428 // collected. I.e. for each type, generate a load from the source pointer of
1429 // that type width, and then generate a corresponding store to the dest buffer
1430 // of that value loaded. This can result in a sequence of loads and stores
1431 // mixed types, depending on what the target specifies as good types to use.
1432 unsigned CurrOffset = 0;
1433 LLT PtrTy = MRI.getType(Src);
1434 unsigned Size = KnownLen;
1435 for (auto CopyTy : MemOps) {
1436 // Issuing an unaligned load / store pair that overlaps with the previous
1437 // pair. Adjust the offset accordingly.
1438 if (CopyTy.getSizeInBytes() > Size)
1439 CurrOffset -= CopyTy.getSizeInBytes() - Size;
1440
1441 // Construct MMOs for the accesses.
1442 auto *LoadMMO =
1443 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1444 auto *StoreMMO =
1445 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1446
1447 // Create the load.
1448 Register LoadPtr = Src;
1449 Register Offset;
1450 if (CurrOffset != 0) {
1451 Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1452 .getReg(0);
1453 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1454 }
1455 auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1456
1457 // Create the store.
1458 Register StorePtr =
1459 CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1460 MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1461 CurrOffset += CopyTy.getSizeInBytes();
1462 Size -= CopyTy.getSizeInBytes();
1463 }
1464
1465 MI.eraseFromParent();
1466 return true;
1467}
1468
1469bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1470 Register Src, uint64_t KnownLen,
1471 Align DstAlign, Align SrcAlign,
1472 bool IsVolatile) {
1473 auto &MF = *MI.getParent()->getParent();
1474 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1475 auto &DL = MF.getDataLayout();
1476 LLVMContext &C = MF.getFunction().getContext();
1477
1478 assert(KnownLen != 0 && "Have a zero length memmove length!")((void)0);
1479
1480 bool DstAlignCanChange = false;
1481 MachineFrameInfo &MFI = MF.getFrameInfo();
1482 bool OptSize = shouldLowerMemFuncForSize(MF);
1483 Align Alignment = commonAlignment(DstAlign, SrcAlign);
1484
1485 MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1486 if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1487 DstAlignCanChange = true;
1488
1489 unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1490 std::vector<LLT> MemOps;
1491
1492 const auto &DstMMO = **MI.memoperands_begin();
1493 const auto &SrcMMO = **std::next(MI.memoperands_begin());
1494 MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1495 MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1496
1497 // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1498 // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1499 // same thing here.
1500 if (!findGISelOptimalMemOpLowering(
1501 MemOps, Limit,
1502 MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1503 /*IsVolatile*/ true),
1504 DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1505 MF.getFunction().getAttributes(), TLI))
1506 return false;
1507
1508 if (DstAlignCanChange) {
1509 // Get an estimate of the type from the LLT.
1510 Type *IRTy = getTypeForLLT(MemOps[0], C);
1511 Align NewAlign = DL.getABITypeAlign(IRTy);
1512
1513 // Don't promote to an alignment that would require dynamic stack
1514 // realignment.
1515 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1516 if (!TRI->hasStackRealignment(MF))
1517 while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1518 NewAlign = NewAlign / 2;
1519
1520 if (NewAlign > Alignment) {
1521 Alignment = NewAlign;
1522 unsigned FI = FIDef->getOperand(1).getIndex();
1523 // Give the stack frame object a larger alignment if needed.
1524 if (MFI.getObjectAlign(FI) < Alignment)
1525 MFI.setObjectAlignment(FI, Alignment);
1526 }
1527 }
1528
1529 LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n")do { } while (false);
1530
1531 MachineIRBuilder MIB(MI);
1532 // Memmove requires that we perform the loads first before issuing the stores.
1533 // Apart from that, this loop is pretty much doing the same thing as the
1534 // memcpy codegen function.
1535 unsigned CurrOffset = 0;
1536 LLT PtrTy = MRI.getType(Src);
1537 SmallVector<Register, 16> LoadVals;
1538 for (auto CopyTy : MemOps) {
1539 // Construct MMO for the load.
1540 auto *LoadMMO =
1541 MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1542
1543 // Create the load.
1544 Register LoadPtr = Src;
1545 if (CurrOffset != 0) {
1546 auto Offset =
1547 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1548 LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1549 }
1550 LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1551 CurrOffset += CopyTy.getSizeInBytes();
1552 }
1553
1554 CurrOffset = 0;
1555 for (unsigned I = 0; I < MemOps.size(); ++I) {
1556 LLT CopyTy = MemOps[I];
1557 // Now store the values loaded.
1558 auto *StoreMMO =
1559 MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1560
1561 Register StorePtr = Dst;
1562 if (CurrOffset != 0) {
1563 auto Offset =
1564 MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1565 StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1566 }
1567 MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1568 CurrOffset += CopyTy.getSizeInBytes();
1569 }
1570 MI.eraseFromParent();
1571 return true;
1572}
1573
1574bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1575 const unsigned Opc = MI.getOpcode();
1576 // This combine is fairly complex so it's not written with a separate
1577 // matcher function.
1578 assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||((void)0)
1579 Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction")((void)0);
1580
1581 auto MMOIt = MI.memoperands_begin();
1582 const MachineMemOperand *MemOp = *MMOIt;
1583
1584 Align DstAlign = MemOp->getBaseAlign();
1585 Align SrcAlign;
1586 Register Dst = MI.getOperand(0).getReg();
1587 Register Src = MI.getOperand(1).getReg();
1588 Register Len = MI.getOperand(2).getReg();
1589
1590 if (Opc != TargetOpcode::G_MEMSET) {
1591 assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI")((void)0);
1592 MemOp = *(++MMOIt);
1593 SrcAlign = MemOp->getBaseAlign();
1594 }
1595
1596 // See if this is a constant length copy
1597 auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1598 if (!LenVRegAndVal)
1599 return false; // Leave it to the legalizer to lower it to a libcall.
1600 uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1601
1602 if (KnownLen == 0) {
1603 MI.eraseFromParent();
1604 return true;
1605 }
1606
1607 bool IsVolatile = MemOp->isVolatile();
1608 if (Opc == TargetOpcode::G_MEMCPY_INLINE)
1609 return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1610 IsVolatile);
1611
1612 // Don't try to optimize volatile.
1613 if (IsVolatile)
1614 return false;
1615
1616 if (MaxLen && KnownLen > MaxLen)
1617 return false;
1618
1619 if (Opc == TargetOpcode::G_MEMCPY) {
1620 auto &MF = *MI.getParent()->getParent();
1621 const auto &TLI = *MF.getSubtarget().getTargetLowering();
1622 bool OptSize = shouldLowerMemFuncForSize(MF);
1623 uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1624 return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
1625 IsVolatile);
1626 }
1627 if (Opc == TargetOpcode::G_MEMMOVE)
1628 return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1629 if (Opc == TargetOpcode::G_MEMSET)
1630 return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1631 return false;
1632}
1633
1634static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1635 const Register Op,
1636 const MachineRegisterInfo &MRI) {
1637 const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1638 if (!MaybeCst)
1639 return None;
1640
1641 APFloat V = MaybeCst->getValueAPF();
1642 switch (Opcode) {
1643 default:
1644 llvm_unreachable("Unexpected opcode!")__builtin_unreachable();
1645 case TargetOpcode::G_FNEG: {
1646 V.changeSign();
1647 return V;
1648 }
1649 case TargetOpcode::G_FABS: {
1650 V.clearSign();
1651 return V;
1652 }
1653 case TargetOpcode::G_FPTRUNC:
1654 break;
1655 case TargetOpcode::G_FSQRT: {
1656 bool Unused;
1657 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1658 V = APFloat(sqrt(V.convertToDouble()));
1659 break;
1660 }
1661 case TargetOpcode::G_FLOG2: {
1662 bool Unused;
1663 V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1664 V = APFloat(log2(V.convertToDouble()));
1665 break;
1666 }
1667 }
1668 // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1669 // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1670 // and `G_FLOG2` reach here.
1671 bool Unused;
1672 V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1673 return V;
1674}
1675
1676bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1677 Optional<APFloat> &Cst) {
1678 Register DstReg = MI.getOperand(0).getReg();
1679 Register SrcReg = MI.getOperand(1).getReg();
1680 LLT DstTy = MRI.getType(DstReg);
1681 Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1682 return Cst.hasValue();
1683}
1684
1685void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1686 Optional<APFloat> &Cst) {
1687 assert(Cst.hasValue() && "Optional is unexpectedly empty!")((void)0);
1688 Builder.setInstrAndDebugLoc(MI);
1689 MachineFunction &MF = Builder.getMF();
1690 auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1691 Register DstReg = MI.getOperand(0).getReg();
1692 Builder.buildFConstant(DstReg, *FPVal);
1693 MI.eraseFromParent();
1694}
1695
1696bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1697 PtrAddChain &MatchInfo) {
1698 // We're trying to match the following pattern:
1699 // %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1700 // %root = G_PTR_ADD %t1, G_CONSTANT imm2
1701 // -->
1702 // %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1703
1704 if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1705 return false;
1706
1707 Register Add2 = MI.getOperand(1).getReg();
1708 Register Imm1 = MI.getOperand(2).getReg();
1709 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1710 if (!MaybeImmVal)
1711 return false;
1712
1713 // Don't do this combine if there multiple uses of the first PTR_ADD,
1714 // since we may be able to compute the second PTR_ADD as an immediate
1715 // offset anyway. Folding the first offset into the second may cause us
1716 // to go beyond the bounds of our legal addressing modes.
1717 if (!MRI.hasOneNonDBGUse(Add2))
1718 return false;
1719
1720 MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1721 if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1722 return false;
1723
1724 Register Base = Add2Def->getOperand(1).getReg();
1725 Register Imm2 = Add2Def->getOperand(2).getReg();
1726 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1727 if (!MaybeImm2Val)
1728 return false;
1729
1730 // Pass the combined immediate to the apply function.
1731 MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
1732 MatchInfo.Base = Base;
1733 return true;
1734}
1735
1736void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1737 PtrAddChain &MatchInfo) {
1738 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD")((void)0);
1739 MachineIRBuilder MIB(MI);
1740 LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1741 auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1742 Observer.changingInstr(MI);
1743 MI.getOperand(1).setReg(MatchInfo.Base);
1744 MI.getOperand(2).setReg(NewOffset.getReg(0));
1745 Observer.changedInstr(MI);
1746}
1747
1748bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1749 RegisterImmPair &MatchInfo) {
1750 // We're trying to match the following pattern with any of
1751 // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1752 // %t1 = SHIFT %base, G_CONSTANT imm1
1753 // %root = SHIFT %t1, G_CONSTANT imm2
1754 // -->
1755 // %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1756
1757 unsigned Opcode = MI.getOpcode();
1758 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||((void)0)
1759 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||((void)0)
1760 Opcode == TargetOpcode::G_USHLSAT) &&((void)0)
1761 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT")((void)0);
1762
1763 Register Shl2 = MI.getOperand(1).getReg();
1764 Register Imm1 = MI.getOperand(2).getReg();
1765 auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1766 if (!MaybeImmVal)
1767 return false;
1768
1769 MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1770 if (Shl2Def->getOpcode() != Opcode)
1771 return false;
1772
1773 Register Base = Shl2Def->getOperand(1).getReg();
1774 Register Imm2 = Shl2Def->getOperand(2).getReg();
1775 auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1776 if (!MaybeImm2Val)
1777 return false;
1778
1779 // Pass the combined immediate to the apply function.
1780 MatchInfo.Imm =
1781 (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
1782 MatchInfo.Reg = Base;
1783
1784 // There is no simple replacement for a saturating unsigned left shift that
1785 // exceeds the scalar size.
1786 if (Opcode == TargetOpcode::G_USHLSAT &&
1787 MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1788 return false;
1789
1790 return true;
1791}
1792
1793void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1794 RegisterImmPair &MatchInfo) {
1795 unsigned Opcode = MI.getOpcode();
1796 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||((void)0)
1797 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||((void)0)
1798 Opcode == TargetOpcode::G_USHLSAT) &&((void)0)
1799 "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT")((void)0);
1800
1801 Builder.setInstrAndDebugLoc(MI);
1802 LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1803 unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1804 auto Imm = MatchInfo.Imm;
1805
1806 if (Imm >= ScalarSizeInBits) {
1807 // Any logical shift that exceeds scalar size will produce zero.
1808 if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1809 Builder.buildConstant(MI.getOperand(0), 0);
1810 MI.eraseFromParent();
1811 return;
1812 }
1813 // Arithmetic shift and saturating signed left shift have no effect beyond
1814 // scalar size.
1815 Imm = ScalarSizeInBits - 1;
1816 }
1817
1818 LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1819 Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1820 Observer.changingInstr(MI);
1821 MI.getOperand(1).setReg(MatchInfo.Reg);
1822 MI.getOperand(2).setReg(NewImm);
1823 Observer.changedInstr(MI);
1824}
1825
1826bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1827 ShiftOfShiftedLogic &MatchInfo) {
1828 // We're trying to match the following pattern with any of
1829 // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1830 // with any of G_AND/G_OR/G_XOR logic instructions.
1831 // %t1 = SHIFT %X, G_CONSTANT C0
1832 // %t2 = LOGIC %t1, %Y
1833 // %root = SHIFT %t2, G_CONSTANT C1
1834 // -->
1835 // %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1836 // %t4 = SHIFT %Y, G_CONSTANT C1
1837 // %root = LOGIC %t3, %t4
1838 unsigned ShiftOpcode = MI.getOpcode();
1839 assert((ShiftOpcode == TargetOpcode::G_SHL ||((void)0)
1840 ShiftOpcode == TargetOpcode::G_ASHR ||((void)0)
1841 ShiftOpcode == TargetOpcode::G_LSHR ||((void)0)
1842 ShiftOpcode == TargetOpcode::G_USHLSAT ||((void)0)
1843 ShiftOpcode == TargetOpcode::G_SSHLSAT) &&((void)0)
1844 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT")((void)0);
1845
1846 // Match a one-use bitwise logic op.
1847 Register LogicDest = MI.getOperand(1).getReg();
1848 if (!MRI.hasOneNonDBGUse(LogicDest))
1849 return false;
1850
1851 MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1852 unsigned LogicOpcode = LogicMI->getOpcode();
1853 if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1854 LogicOpcode != TargetOpcode::G_XOR)
1855 return false;
1856
1857 // Find a matching one-use shift by constant.
1858 const Register C1 = MI.getOperand(2).getReg();
1859 auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
1860 if (!MaybeImmVal)
1861 return false;
1862
1863 const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1864
1865 auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1866 // Shift should match previous one and should be a one-use.
1867 if (MI->getOpcode() != ShiftOpcode ||
1868 !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1869 return false;
1870
1871 // Must be a constant.
1872 auto MaybeImmVal =
1873 getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1874 if (!MaybeImmVal)
1875 return false;
1876
1877 ShiftVal = MaybeImmVal->Value.getSExtValue();
1878 return true;
1879 };
1880
1881 // Logic ops are commutative, so check each operand for a match.
1882 Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1883 MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1884 Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1885 MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1886 uint64_t C0Val;
1887
1888 if (matchFirstShift(LogicMIOp1, C0Val)) {
1889 MatchInfo.LogicNonShiftReg = LogicMIReg2;
1890 MatchInfo.Shift2 = LogicMIOp1;
1891 } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1892 MatchInfo.LogicNonShiftReg = LogicMIReg1;
1893 MatchInfo.Shift2 = LogicMIOp2;
1894 } else
1895 return false;
1896
1897 MatchInfo.ValSum = C0Val + C1Val;
1898
1899 // The fold is not valid if the sum of the shift values exceeds bitwidth.
1900 if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1901 return false;
1902
1903 MatchInfo.Logic = LogicMI;
1904 return true;
1905}
1906
1907void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1908 ShiftOfShiftedLogic &MatchInfo) {
1909 unsigned Opcode = MI.getOpcode();
1910 assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||((void)0)
1911 Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||((void)0)
1912 Opcode == TargetOpcode::G_SSHLSAT) &&((void)0)
1913 "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT")((void)0);
1914
1915 LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1916 LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1917 Builder.setInstrAndDebugLoc(MI);
1918
1919 Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1920
1921 Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1922 Register Shift1 =
1923 Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1924
1925 Register Shift2Const = MI.getOperand(2).getReg();
1926 Register Shift2 = Builder
1927 .buildInstr(Opcode, {DestType},
1928 {MatchInfo.LogicNonShiftReg, Shift2Const})
1929 .getReg(0);
1930
1931 Register Dest = MI.getOperand(0).getReg();
1932 Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1933
1934 // These were one use so it's safe to remove them.
1935 MatchInfo.Shift2->eraseFromParent();
1936 MatchInfo.Logic->eraseFromParent();
1937
1938 MI.eraseFromParent();
1939}
1940
1941bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1942 unsigned &ShiftVal) {
1943 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL")((void)0);
1944 auto MaybeImmVal =
1945 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1946 if (!MaybeImmVal)
1947 return false;
1948
1949 ShiftVal = MaybeImmVal->Value.exactLogBase2();
1950 return (static_cast<int32_t>(ShiftVal) != -1);
1951}
1952
1953void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1954 unsigned &ShiftVal) {
1955 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL")((void)0);
1956 MachineIRBuilder MIB(MI);
1957 LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1958 auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1959 Observer.changingInstr(MI);
1960 MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1961 MI.getOperand(2).setReg(ShiftCst.getReg(0));
1962 Observer.changedInstr(MI);
1963}
1964
1965// shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1966bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1967 RegisterImmPair &MatchData) {
1968 assert(MI.getOpcode() == TargetOpcode::G_SHL && KB)((void)0);
1969
1970 Register LHS = MI.getOperand(1).getReg();
1971
1972 Register ExtSrc;
1973 if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1974 !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1975 !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1976 return false;
1977
1978 // TODO: Should handle vector splat.
1979 Register RHS = MI.getOperand(2).getReg();
1980 auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
1981 if (!MaybeShiftAmtVal)
1982 return false;
1983
1984 if (LI) {
1985 LLT SrcTy = MRI.getType(ExtSrc);
1986
1987 // We only really care about the legality with the shifted value. We can
1988 // pick any type the constant shift amount, so ask the target what to
1989 // use. Otherwise we would have to guess and hope it is reported as legal.
1990 LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1991 if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1992 return false;
1993 }
1994
1995 int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
1996 MatchData.Reg = ExtSrc;
1997 MatchData.Imm = ShiftAmt;
1998
1999 unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
2000 return MinLeadingZeros >= ShiftAmt;
2001}
2002
2003void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
2004 const RegisterImmPair &MatchData) {
2005 Register ExtSrcReg = MatchData.Reg;
2006 int64_t ShiftAmtVal = MatchData.Imm;
2007
2008 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2009 Builder.setInstrAndDebugLoc(MI);
2010 auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2011 auto NarrowShift =
2012 Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2013 Builder.buildZExt(MI.getOperand(0), NarrowShift);
2014 MI.eraseFromParent();
2015}
2016
2017bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
2018 Register &MatchInfo) {
2019 GMerge &Merge = cast<GMerge>(MI);
2020 SmallVector<Register, 16> MergedValues;
2021 for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2022 MergedValues.emplace_back(Merge.getSourceReg(I));
2023
2024 auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2025 if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2026 return false;
2027
2028 for (unsigned I = 0; I < MergedValues.size(); ++I)
2029 if (MergedValues[I] != Unmerge->getReg(I))
2030 return false;
2031
2032 MatchInfo = Unmerge->getSourceReg();
2033 return true;
2034}
2035
2036static Register peekThroughBitcast(Register Reg,
2037 const MachineRegisterInfo &MRI) {
2038 while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2039 ;
2040
2041 return Reg;
2042}
2043
2044bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
2045 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2046 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((void)0)
2047 "Expected an unmerge")((void)0);
2048 Register SrcReg =
2049 peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
2050
2051 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2052 if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
2053 SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
2054 SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
2055 return false;
2056
2057 // Check the source type of the merge.
2058 LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
2059 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2060 bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2061 if (SrcMergeTy != Dst0Ty && !SameSize)
2062 return false;
2063 // They are the same now (modulo a bitcast).
2064 // We can collect all the src registers.
2065 for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
2066 ++Idx)
2067 Operands.push_back(SrcInstr->getOperand(Idx).getReg());
2068 return true;
2069}
2070
2071void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
2072 MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2073 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((void)0)
2074 "Expected an unmerge")((void)0);
2075 assert((MI.getNumOperands() - 1 == Operands.size()) &&((void)0)
2076 "Not enough operands to replace all defs")((void)0);
2077 unsigned NumElems = MI.getNumOperands() - 1;
2078
2079 LLT SrcTy = MRI.getType(Operands[0]);
2080 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2081 bool CanReuseInputDirectly = DstTy == SrcTy;
2082 Builder.setInstrAndDebugLoc(MI);
2083 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2084 Register DstReg = MI.getOperand(Idx).getReg();
2085 Register SrcReg = Operands[Idx];
2086 if (CanReuseInputDirectly)
2087 replaceRegWith(MRI, DstReg, SrcReg);
2088 else
2089 Builder.buildCast(DstReg, SrcReg);
2090 }
2091 MI.eraseFromParent();
2092}
2093
2094bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
2095 SmallVectorImpl<APInt> &Csts) {
2096 unsigned SrcIdx = MI.getNumOperands() - 1;
2097 Register SrcReg = MI.getOperand(SrcIdx).getReg();
2098 MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2099 if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2100 SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2101 return false;
2102 // Break down the big constant in smaller ones.
2103 const MachineOperand &CstVal = SrcInstr->getOperand(1);
2104 APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2105 ? CstVal.getCImm()->getValue()
2106 : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2107
2108 LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2109 unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2110 // Unmerge a constant.
2111 for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2112 Csts.emplace_back(Val.trunc(ShiftAmt));
2113 Val = Val.lshr(ShiftAmt);
2114 }
2115
2116 return true;
2117}
2118
2119void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
2120 SmallVectorImpl<APInt> &Csts) {
2121 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((void)0)
2122 "Expected an unmerge")((void)0);
2123 assert((MI.getNumOperands() - 1 == Csts.size()) &&((void)0)
2124 "Not enough operands to replace all defs")((void)0);
2125 unsigned NumElems = MI.getNumOperands() - 1;
2126 Builder.setInstrAndDebugLoc(MI);
2127 for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2128 Register DstReg = MI.getOperand(Idx).getReg();
2129 Builder.buildConstant(DstReg, Csts[Idx]);
2130 }
2131
2132 MI.eraseFromParent();
2133}
2134
2135bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2136 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((void)0)
2137 "Expected an unmerge")((void)0);
2138 // Check that all the lanes are dead except the first one.
2139 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2140 if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2141 return false;
2142 }
2143 return true;
2144}
2145
2146void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2147 Builder.setInstrAndDebugLoc(MI);
2148 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2149 // Truncating a vector is going to truncate every single lane,
2150 // whereas we want the full lowbits.
2151 // Do the operation on a scalar instead.
2152 LLT SrcTy = MRI.getType(SrcReg);
2153 if (SrcTy.isVector())
2154 SrcReg =
2155 Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
2156
2157 Register Dst0Reg = MI.getOperand(0).getReg();
2158 LLT Dst0Ty = MRI.getType(Dst0Reg);
2159 if (Dst0Ty.isVector()) {
2160 auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
2161 Builder.buildCast(Dst0Reg, MIB);
2162 } else
2163 Builder.buildTrunc(Dst0Reg, SrcReg);
2164 MI.eraseFromParent();
2165}
2166
2167bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
2168 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((void)0)
2169 "Expected an unmerge")((void)0);
2170 Register Dst0Reg = MI.getOperand(0).getReg();
2171 LLT Dst0Ty = MRI.getType(Dst0Reg);
2172 // G_ZEXT on vector applies to each lane, so it will
2173 // affect all destinations. Therefore we won't be able
2174 // to simplify the unmerge to just the first definition.
2175 if (Dst0Ty.isVector())
2176 return false;
2177 Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2178 LLT SrcTy = MRI.getType(SrcReg);
2179 if (SrcTy.isVector())
2180 return false;
2181
2182 Register ZExtSrcReg;
2183 if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2184 return false;
2185
2186 // Finally we can replace the first definition with
2187 // a zext of the source if the definition is big enough to hold
2188 // all of ZExtSrc bits.
2189 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2190 return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2191}
2192
2193void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
2194 assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&((void)0)
2195 "Expected an unmerge")((void)0);
2196
2197 Register Dst0Reg = MI.getOperand(0).getReg();
2198
2199 MachineInstr *ZExtInstr =
2200 MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2201 assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&((void)0)
2202 "Expecting a G_ZEXT")((void)0);
2203
2204 Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2205 LLT Dst0Ty = MRI.getType(Dst0Reg);
2206 LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2207
2208 Builder.setInstrAndDebugLoc(MI);
2209
2210 if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2211 Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2212 } else {
2213 assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&((void)0)
2214 "ZExt src doesn't fit in destination")((void)0);
2215 replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2216 }
2217
2218 Register ZeroReg;
2219 for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2220 if (!ZeroReg)
2221 ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2222 replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2223 }
2224 MI.eraseFromParent();
2225}
2226
2227bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
2228 unsigned TargetShiftSize,
2229 unsigned &ShiftVal) {
2230 assert((MI.getOpcode() == TargetOpcode::G_SHL ||((void)0)
2231 MI.getOpcode() == TargetOpcode::G_LSHR ||((void)0)
2232 MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift")((void)0);
2233
2234 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2235 if (Ty.isVector()) // TODO:
2236 return false;
2237
2238 // Don't narrow further than the requested size.
2239 unsigned Size = Ty.getSizeInBits();
2240 if (Size <= TargetShiftSize)
2241 return false;
2242
2243 auto MaybeImmVal =
2244 getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2245 if (!MaybeImmVal)
2246 return false;
2247
2248 ShiftVal = MaybeImmVal->Value.getSExtValue();
2249 return ShiftVal >= Size / 2 && ShiftVal < Size;
2250}
2251
2252void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
2253 const unsigned &ShiftVal) {
2254 Register DstReg = MI.getOperand(0).getReg();
2255 Register SrcReg = MI.getOperand(1).getReg();
2256 LLT Ty = MRI.getType(SrcReg);
2257 unsigned Size = Ty.getSizeInBits();
2258 unsigned HalfSize = Size / 2;
2259 assert(ShiftVal >= HalfSize)((void)0);
2260
2261 LLT HalfTy = LLT::scalar(HalfSize);
2262
2263 Builder.setInstr(MI);
2264 auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2265 unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2266
2267 if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2268 Register Narrowed = Unmerge.getReg(1);
2269
2270 // dst = G_LSHR s64:x, C for C >= 32
2271 // =>
2272 // lo, hi = G_UNMERGE_VALUES x
2273 // dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2274
2275 if (NarrowShiftAmt != 0) {
2276 Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2277 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2278 }
2279
2280 auto Zero = Builder.buildConstant(HalfTy, 0);
2281 Builder.buildMerge(DstReg, { Narrowed, Zero });
2282 } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2283 Register Narrowed = Unmerge.getReg(0);
2284 // dst = G_SHL s64:x, C for C >= 32
2285 // =>
2286 // lo, hi = G_UNMERGE_VALUES x
2287 // dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2288 if (NarrowShiftAmt != 0) {
2289 Narrowed = Builder.buildShl(HalfTy, Narrowed,
2290 Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2291 }
2292
2293 auto Zero = Builder.buildConstant(HalfTy, 0);
2294 Builder.buildMerge(DstReg, { Zero, Narrowed });
2295 } else {
2296 assert(MI.getOpcode() == TargetOpcode::G_ASHR)((void)0);
2297 auto Hi = Builder.buildAShr(
2298 HalfTy, Unmerge.getReg(1),
2299 Builder.buildConstant(HalfTy, HalfSize - 1));
2300
2301 if (ShiftVal == HalfSize) {
2302 // (G_ASHR i64:x, 32) ->
2303 // G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2304 Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
2305 } else if (ShiftVal == Size - 1) {
2306 // Don't need a second shift.
2307 // (G_ASHR i64:x, 63) ->
2308 // %narrowed = (G_ASHR hi_32(x), 31)
2309 // G_MERGE_VALUES %narrowed, %narrowed
2310 Builder.buildMerge(DstReg, { Hi, Hi });
2311 } else {
2312 auto Lo = Builder.buildAShr(
2313 HalfTy, Unmerge.getReg(1),
2314 Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2315
2316 // (G_ASHR i64:x, C) ->, for C >= 32
2317 // G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2318 Builder.buildMerge(DstReg, { Lo, Hi });
2319 }
2320 }
2321
2322 MI.eraseFromParent();
2323}
2324
2325bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
2326 unsigned TargetShiftAmount) {
2327 unsigned ShiftAmt;
2328 if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2329 applyCombineShiftToUnmerge(MI, ShiftAmt);
2330 return true;
2331 }
2332
2333 return false;
2334}
2335
2336bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2337 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR")((void)0);
2338 Register DstReg = MI.getOperand(0).getReg();
2339 LLT DstTy = MRI.getType(DstReg);
2340 Register SrcReg = MI.getOperand(1).getReg();
2341 return mi_match(SrcReg, MRI,
2342 m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2343}
2344
2345void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2346 assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR")((void)0);
2347 Register DstReg = MI.getOperand(0).getReg();
2348 Builder.setInstr(MI);
2349 Builder.buildCopy(DstReg, Reg);
2350 MI.eraseFromParent();
2351}
2352
2353bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2354 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT")((void)0);
2355 Register SrcReg = MI.getOperand(1).getReg();
2356 return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
2357}
2358
2359void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2360 assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT")((void)0);
2361 Register DstReg = MI.getOperand(0).getReg();
2362 Builder.setInstr(MI);
2363 Builder.buildZExtOrTrunc(DstReg, Reg);
2364 MI.eraseFromParent();
2365}
2366
2367bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2368 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2369 assert(MI.getOpcode() == TargetOpcode::G_ADD)((void)0);
2370 Register LHS = MI.getOperand(1).getReg();
2371 Register RHS = MI.getOperand(2).getReg();
2372 LLT IntTy = MRI.getType(LHS);
2373
2374 // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2375 // instruction.
2376 PtrReg.second = false;
2377 for (Register SrcReg : {LHS, RHS}) {
2378 if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2379 // Don't handle cases where the integer is implicitly converted to the
2380 // pointer width.
2381 LLT PtrTy = MRI.getType(PtrReg.first);
2382 if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2383 return true;
2384 }
2385
2386 PtrReg.second = true;
2387 }
2388
2389 return false;
2390}
2391
2392void CombinerHelper::applyCombineAddP2IToPtrAdd(
2393 MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2394 Register Dst = MI.getOperand(0).getReg();
2395 Register LHS = MI.getOperand(1).getReg();
2396 Register RHS = MI.getOperand(2).getReg();
2397
2398 const bool DoCommute = PtrReg.second;
2399 if (DoCommute)
2400 std::swap(LHS, RHS);
2401 LHS = PtrReg.first;
2402
2403 LLT PtrTy = MRI.getType(LHS);
2404
2405 Builder.setInstrAndDebugLoc(MI);
2406 auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2407 Builder.buildPtrToInt(Dst, PtrAdd);
2408 MI.eraseFromParent();
2409}
2410
2411bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2412 int64_t &NewCst) {
2413 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD")((void)0);
2414 Register LHS = MI.getOperand(1).getReg();
2415 Register RHS = MI.getOperand(2).getReg();
2416 MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2417
2418 if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
2419 int64_t Cst;
2420 if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2421 NewCst = Cst + *RHSCst;
2422 return true;
2423 }
2424 }
2425
2426 return false;
2427}
2428
2429void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2430 int64_t &NewCst) {
2431 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD")((void)0);
2432 Register Dst = MI.getOperand(0).getReg();
2433
2434 Builder.setInstrAndDebugLoc(MI);
2435 Builder.buildConstant(Dst, NewCst);
2436 MI.eraseFromParent();
2437}
2438
2439bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2440 assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT")((void)0);
2441 Register DstReg = MI.getOperand(0).getReg();
2442 Register SrcReg = MI.getOperand(1).getReg();
2443 LLT DstTy = MRI.getType(DstReg);
2444 return mi_match(SrcReg, MRI,
2445 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2446}
2447
2448bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
2449 assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT")((void)0);
2450 Register DstReg = MI.getOperand(0).getReg();
2451 Register SrcReg = MI.getOperand(1).getReg();
2452 LLT DstTy = MRI.getType(DstReg);
2453 if (mi_match(SrcReg, MRI,
2454 m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2455 unsigned DstSize = DstTy.getScalarSizeInBits();
2456 unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2457 return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2458 }
2459 return false;
2460}
2461
2462bool CombinerHelper::matchCombineExtOfExt(
2463 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2464 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||((void)0)
2465 MI.getOpcode() == TargetOpcode::G_SEXT ||((void)0)
2466 MI.getOpcode() == TargetOpcode::G_ZEXT) &&((void)0)
2467 "Expected a G_[ASZ]EXT")((void)0);
2468 Register SrcReg = MI.getOperand(1).getReg();
2469 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2470 // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2471 unsigned Opc = MI.getOpcode();
2472 unsigned SrcOpc = SrcMI->getOpcode();
2473 if (Opc == SrcOpc ||
2474 (Opc == TargetOpcode::G_ANYEXT &&
2475 (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2476 (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2477 MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2478 return true;
2479 }
2480 return false;
2481}
2482
2483void CombinerHelper::applyCombineExtOfExt(
2484 MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2485 assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||((void)0)
2486 MI.getOpcode() == TargetOpcode::G_SEXT ||((void)0)
2487 MI.getOpcode() == TargetOpcode::G_ZEXT) &&((void)0)
2488 "Expected a G_[ASZ]EXT")((void)0);
2489
2490 Register Reg = std::get<0>(MatchInfo);
2491 unsigned SrcExtOp = std::get<1>(MatchInfo);
2492
2493 // Combine exts with the same opcode.
2494 if (MI.getOpcode() == SrcExtOp) {
2495 Observer.changingInstr(MI);
2496 MI.getOperand(1).setReg(Reg);
2497 Observer.changedInstr(MI);
2498 return;
2499 }
2500
2501 // Combine:
2502 // - anyext([sz]ext x) to [sz]ext x
2503 // - sext(zext x) to zext x
2504 if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2505 (MI.getOpcode() == TargetOpcode::G_SEXT &&
2506 SrcExtOp == TargetOpcode::G_ZEXT)) {
2507 Register DstReg = MI.getOperand(0).getReg();
2508 Builder.setInstrAndDebugLoc(MI);
2509 Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2510 MI.eraseFromParent();
2511 }
2512}
2513
2514void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2515 assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL")((void)0);
2516 Register DstReg = MI.getOperand(0).getReg();
2517 Register SrcReg = MI.getOperand(1).getReg();
2518 LLT DstTy = MRI.getType(DstReg);
2519
2520 Builder.setInstrAndDebugLoc(MI);
2521 Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2522 MI.getFlags());
2523 MI.eraseFromParent();
2524}
2525
2526bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2527 assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG")((void)0);
2528 Register SrcReg = MI.getOperand(1).getReg();
2529 return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2530}
2531
2532bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2533 assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS")((void)0);
2534 Src = MI.getOperand(1).getReg();
2535 Register AbsSrc;
2536 return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2537}
2538
2539bool CombinerHelper::matchCombineTruncOfExt(
2540 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2541 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC")((void)0);
2542 Register SrcReg = MI.getOperand(1).getReg();
2543 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2544 unsigned SrcOpc = SrcMI->getOpcode();
2545 if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2546 SrcOpc == TargetOpcode::G_ZEXT) {
2547 MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2548 return true;
2549 }
2550 return false;
2551}
2552
2553void CombinerHelper::applyCombineTruncOfExt(
2554 MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2555 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC")((void)0);
2556 Register SrcReg = MatchInfo.first;
2557 unsigned SrcExtOp = MatchInfo.second;
2558 Register DstReg = MI.getOperand(0).getReg();
2559 LLT SrcTy = MRI.getType(SrcReg);
2560 LLT DstTy = MRI.getType(DstReg);
2561 if (SrcTy == DstTy) {
2562 MI.eraseFromParent();
2563 replaceRegWith(MRI, DstReg, SrcReg);
2564 return;
2565 }
2566 Builder.setInstrAndDebugLoc(MI);
2567 if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2568 Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2569 else
2570 Builder.buildTrunc(DstReg, SrcReg);
2571 MI.eraseFromParent();
2572}
2573
2574bool CombinerHelper::matchCombineTruncOfShl(
2575 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2576 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC")((void)0);
2577 Register DstReg = MI.getOperand(0).getReg();
2578 Register SrcReg = MI.getOperand(1).getReg();
2579 LLT DstTy = MRI.getType(DstReg);
2580 Register ShiftSrc;
2581 Register ShiftAmt;
2582
2583 if (MRI.hasOneNonDBGUse(SrcReg) &&
2584 mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2585 isLegalOrBeforeLegalizer(
2586 {TargetOpcode::G_SHL,
2587 {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2588 KnownBits Known = KB->getKnownBits(ShiftAmt);
2589 unsigned Size = DstTy.getSizeInBits();
2590 if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
2591 MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2592 return true;
2593 }
2594 }
2595 return false;
2596}
2597
2598void CombinerHelper::applyCombineTruncOfShl(
2599 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2600 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC")((void)0);
2601 Register DstReg = MI.getOperand(0).getReg();
2602 Register SrcReg = MI.getOperand(1).getReg();
2603 LLT DstTy = MRI.getType(DstReg);
2604 MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2605
2606 Register ShiftSrc = MatchInfo.first;
2607 Register ShiftAmt = MatchInfo.second;
2608 Builder.setInstrAndDebugLoc(MI);
2609 auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2610 Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2611 MI.eraseFromParent();
2612}
2613
2614bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2615 return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2616 return MO.isReg() &&
2617 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2618 });
2619}
2620
2621bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2622 return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2623 return !MO.isReg() ||
2624 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2625 });
2626}
2627
2628bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2629 assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR)((void)0);
2630 ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2631 return all_of(Mask, [](int Elt) { return Elt < 0; });
2632}
2633
2634bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2635 assert(MI.getOpcode() == TargetOpcode::G_STORE)((void)0);
2636 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2637 MRI);
2638}
2639
2640bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2641 assert(MI.getOpcode() == TargetOpcode::G_SELECT)((void)0);
2642 return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2643 MRI);
2644}
2645
2646bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2647 assert(MI.getOpcode() == TargetOpcode::G_SELECT)((void)0);
2648 if (auto MaybeCstCmp =
2649 getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
2650 OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
2651 return true;
2652 }
2653 return false;
2654}
2655
2656bool CombinerHelper::eraseInst(MachineInstr &MI) {
2657 MI.eraseFromParent();
2658 return true;
2659}
2660
2661bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2662 const MachineOperand &MOP2) {
2663 if (!MOP1.isReg() || !MOP2.isReg())
2664 return false;
2665 MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
2666 if (!I1)
2667 return false;
2668 MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
2669 if (!I2)
2670 return false;
2671
2672 // Handle a case like this:
2673 //
2674 // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2675 //
2676 // Even though %0 and %1 are produced by the same instruction they are not
2677 // the same values.
2678 if (I1 == I2)
2679 return MOP1.getReg() == MOP2.getReg();
2680
2681 // If we have an instruction which loads or stores, we can't guarantee that
2682 // it is identical.
2683 //
2684 // For example, we may have
2685 //
2686 // %x1 = G_LOAD %addr (load N from @somewhere)
2687 // ...
2688 // call @foo
2689 // ...
2690 // %x2 = G_LOAD %addr (load N from @somewhere)
2691 // ...
2692 // %or = G_OR %x1, %x2
2693 //
2694 // It's possible that @foo will modify whatever lives at the address we're
2695 // loading from. To be safe, let's just assume that all loads and stores
2696 // are different (unless we have something which is guaranteed to not
2697 // change.)
2698 if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2699 return false;
2700
2701 // Check for physical registers on the instructions first to avoid cases
2702 // like this:
2703 //
2704 // %a = COPY $physreg
2705 // ...
2706 // SOMETHING implicit-def $physreg
2707 // ...
2708 // %b = COPY $physreg
2709 //
2710 // These copies are not equivalent.
2711 if (any_of(I1->uses(), [](const MachineOperand &MO) {
2712 return MO.isReg() && MO.getReg().isPhysical();
2713 })) {
2714 // Check if we have a case like this:
2715 //
2716 // %a = COPY $physreg
2717 // %b = COPY %a
2718 //
2719 // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2720 // From that, we know that they must have the same value, since they must
2721 // have come from the same COPY.
2722 return I1->isIdenticalTo(*I2);
2723 }
2724
2725 // We don't have any physical registers, so we don't necessarily need the
2726 // same vreg defs.
2727 //
2728 // On the off-chance that there's some target instruction feeding into the
2729 // instruction, let's use produceSameValue instead of isIdenticalTo.
2730 return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
2731}
2732
2733bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2734 if (!MOP.isReg())
2735 return false;
2736 // MIPatternMatch doesn't let us look through G_ZEXT etc.
2737 auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
2738 return ValAndVReg && ValAndVReg->Value == C;
2739}
2740
2741bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2742 unsigned OpIdx) {
2743 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?")((void)0);
2744 Register OldReg = MI.getOperand(0).getReg();
2745 Register Replacement = MI.getOperand(OpIdx).getReg();
2746 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?")((void)0);
2747 MI.eraseFromParent();
2748 replaceRegWith(MRI, OldReg, Replacement);
2749 return true;
2750}
2751
2752bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2753 Register Replacement) {
2754 assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?")((void)0);
2755 Register OldReg = MI.getOperand(0).getReg();
2756 assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?")((void)0);
2757 MI.eraseFromParent();
2758 replaceRegWith(MRI, OldReg, Replacement);
2759 return true;
2760}
2761
2762bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2763 assert(MI.getOpcode() == TargetOpcode::G_SELECT)((void)0);
2764 // Match (cond ? x : x)
2765 return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2766 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2767 MRI);
2768}
2769
2770bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2771 return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2772 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2773 MRI);
2774}
2775
2776bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2777 return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2778 canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2779 MRI);
2780}
2781
2782bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2783 MachineOperand &MO = MI.getOperand(OpIdx);
2784 return MO.isReg() &&
2785 getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2786}
2787
2788bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
2789 unsigned OpIdx) {
2790 MachineOperand &MO = MI.getOperand(OpIdx);
2791 return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2792}
2793
2794bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2795 assert(MI.getNumDefs() == 1 && "Expected only one def?")((void)0);
2796 Builder.setInstr(MI);
2797 Builder.buildFConstant(MI.getOperand(0), C);
2798 MI.eraseFromParent();
2799 return true;
2800}
2801
2802bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2803 assert(MI.getNumDefs() == 1 && "Expected only one def?")((void)0);
2804 Builder.setInstr(MI);
2805 Builder.buildConstant(MI.getOperand(0), C);
2806 MI.eraseFromParent();
2807 return true;
2808}
2809
2810bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
2811 assert(MI.getNumDefs() == 1 && "Expected only one def?")((void)0);
2812 Builder.setInstr(MI);
2813 Builder.buildConstant(MI.getOperand(0), C);
2814 MI.eraseFromParent();
2815 return true;
2816}
2817
2818bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2819 assert(MI.getNumDefs() == 1 && "Expected only one def?")((void)0);
2820 Builder.setInstr(MI);
2821 Builder.buildUndef(MI.getOperand(0));
2822 MI.eraseFromParent();
2823 return true;
2824}
2825
2826bool CombinerHelper::matchSimplifyAddToSub(
2827 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2828 Register LHS = MI.getOperand(1).getReg();
2829 Register RHS = MI.getOperand(2).getReg();
2830 Register &NewLHS = std::get<0>(MatchInfo);
2831 Register &NewRHS = std::get<1>(MatchInfo);
2832
2833 // Helper lambda to check for opportunities for
2834 // ((0-A) + B) -> B - A
2835 // (A + (0-B)) -> A - B
2836 auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2837 if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2838 return false;
2839 NewLHS = MaybeNewLHS;
2840 return true;
2841 };
2842
2843 return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2844}
2845
2846bool CombinerHelper::matchCombineInsertVecElts(
2847 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2848 assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&((void)0)
2849 "Invalid opcode")((void)0);
2850 Register DstReg = MI.getOperand(0).getReg();
2851 LLT DstTy = MRI.getType(DstReg);
2852 assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?")((void)0);
2853 unsigned NumElts = DstTy.getNumElements();
2854 // If this MI is part of a sequence of insert_vec_elts, then
2855 // don't do the combine in the middle of the sequence.
2856 if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2857 TargetOpcode::G_INSERT_VECTOR_ELT)
2858 return false;
2859 MachineInstr *CurrInst = &MI;
2860 MachineInstr *TmpInst;
2861 int64_t IntImm;
2862 Register TmpReg;
2863 MatchInfo.resize(NumElts);
2864 while (mi_match(
2865 CurrInst->getOperand(0).getReg(), MRI,
2866 m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2867 if (IntImm >= NumElts)
2868 return false;
2869 if (!MatchInfo[IntImm])
2870 MatchInfo[IntImm] = TmpReg;
2871 CurrInst = TmpInst;
2872 }
2873 // Variable index.
2874 if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2875 return false;
2876 if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2877 for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2878 if (!MatchInfo[I - 1].isValid())
2879 MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2880 }
2881 return true;
2882 }
2883 // If we didn't end in a G_IMPLICIT_DEF, bail out.
2884 return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2885}
2886
2887void CombinerHelper::applyCombineInsertVecElts(
2888 MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2889 Builder.setInstr(MI);
2890 Register UndefReg;
2891 auto GetUndef = [&]() {
2892 if (UndefReg)
2893 return UndefReg;
2894 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2895 UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2896 return UndefReg;
2897 };
2898 for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2899 if (!MatchInfo[I])
2900 MatchInfo[I] = GetUndef();
2901 }
2902 Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2903 MI.eraseFromParent();
2904}
2905
2906void CombinerHelper::applySimplifyAddToSub(
2907 MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2908 Builder.setInstr(MI);
2909 Register SubLHS, SubRHS;
2910 std::tie(SubLHS, SubRHS) = MatchInfo;
2911 Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2912 MI.eraseFromParent();
2913}
2914
2915bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2916 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2917 // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2918 //
2919 // Creates the new hand + logic instruction (but does not insert them.)
2920 //
2921 // On success, MatchInfo is populated with the new instructions. These are
2922 // inserted in applyHoistLogicOpWithSameOpcodeHands.
2923 unsigned LogicOpcode = MI.getOpcode();
2924 assert(LogicOpcode == TargetOpcode::G_AND ||((void)0)
2925 LogicOpcode == TargetOpcode::G_OR ||((void)0)
2926 LogicOpcode == TargetOpcode::G_XOR)((void)0);
2927 MachineIRBuilder MIB(MI);
2928 Register Dst = MI.getOperand(0).getReg();
2929 Register LHSReg = MI.getOperand(1).getReg();
2930 Register RHSReg = MI.getOperand(2).getReg();
2931
2932 // Don't recompute anything.
2933 if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2934 return false;
2935
2936 // Make sure we have (hand x, ...), (hand y, ...)
2937 MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2938 MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2939 if (!LeftHandInst || !RightHandInst)
2940 return false;
2941 unsigned HandOpcode = LeftHandInst->getOpcode();
2942 if (HandOpcode != RightHandInst->getOpcode())
2943 return false;
2944 if (!LeftHandInst->getOperand(1).isReg() ||
2945 !RightHandInst->getOperand(1).isReg())
2946 return false;
2947
2948 // Make sure the types match up, and if we're doing this post-legalization,
2949 // we end up with legal types.
2950 Register X = LeftHandInst->getOperand(1).getReg();
2951 Register Y = RightHandInst->getOperand(1).getReg();
2952 LLT XTy = MRI.getType(X);
2953 LLT YTy = MRI.getType(Y);
2954 if (XTy != YTy)
2955 return false;
2956 if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2957 return false;
2958
2959 // Optional extra source register.
2960 Register ExtraHandOpSrcReg;
2961 switch (HandOpcode) {
2962 default:
2963 return false;
2964 case TargetOpcode::G_ANYEXT:
2965 case TargetOpcode::G_SEXT:
2966 case TargetOpcode::G_ZEXT: {
2967 // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2968 break;
2969 }
2970 case TargetOpcode::G_AND:
2971 case TargetOpcode::G_ASHR:
2972 case TargetOpcode::G_LSHR:
2973 case TargetOpcode::G_SHL: {
2974 // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2975 MachineOperand &ZOp = LeftHandInst->getOperand(2);
2976 if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2977 return false;
2978 ExtraHandOpSrcReg = ZOp.getReg();
2979 break;
2980 }
2981 }
2982
2983 // Record the steps to build the new instructions.
2984 //
2985 // Steps to build (logic x, y)
2986 auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2987 OperandBuildSteps LogicBuildSteps = {
2988 [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2989 [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2990 [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2991 InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2992
2993 // Steps to build hand (logic x, y), ...z
2994 OperandBuildSteps HandBuildSteps = {
2995 [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2996 [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2997 if (ExtraHandOpSrcReg.isValid())
2998 HandBuildSteps.push_back(
2999 [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3000 InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3001
3002 MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3003 return true;
3004}
3005
3006void CombinerHelper::applyBuildInstructionSteps(
3007 MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
3008 assert(MatchInfo.InstrsToBuild.size() &&((void)0)
3009 "Expected at least one instr to build?")((void)0);
3010 Builder.setInstr(MI);
3011 for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3012 assert(InstrToBuild.Opcode && "Expected a valid opcode?")((void)0);
3013 assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?")((void)0);
3014 MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3015 for (auto &OperandFn : InstrToBuild.OperandFns)
3016 OperandFn(Instr);
3017 }
3018 MI.eraseFromParent();
3019}
3020
3021bool CombinerHelper::matchAshrShlToSextInreg(
3022 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3023 assert(MI.getOpcode() == TargetOpcode::G_ASHR)((void)0);
3024 int64_t ShlCst, AshrCst;
3025 Register Src;
3026 // FIXME: detect splat constant vectors.
3027 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3028 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
3029 return false;
3030 if (ShlCst != AshrCst)
3031 return false;
3032 if (!isLegalOrBeforeLegalizer(
3033 {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3034 return false;
3035 MatchInfo = std::make_tuple(Src, ShlCst);
3036 return true;
3037}
3038
3039void CombinerHelper::applyAshShlToSextInreg(
3040 MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3041 assert(MI.getOpcode() == TargetOpcode::G_ASHR)((void)0);
3042 Register Src;
3043 int64_t ShiftAmt;
3044 std::tie(Src, ShiftAmt) = MatchInfo;
3045 unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3046 Builder.setInstrAndDebugLoc(MI);
3047 Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3048 MI.eraseFromParent();
3049}
3050
3051/// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3052bool CombinerHelper::matchOverlappingAnd(
3053 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3054 assert(MI.getOpcode() == TargetOpcode::G_AND)((void)0);
3055
3056 Register Dst = MI.getOperand(0).getReg();
3057 LLT Ty = MRI.getType(Dst);
3058
3059 Register R;
3060 int64_t C1;
3061 int64_t C2;
3062 if (!mi_match(
3063 Dst, MRI,
3064 m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3065 return false;
3066
3067 MatchInfo = [=](MachineIRBuilder &B) {
3068 if (C1 & C2) {
3069 B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3070 return;
3071 }
3072 auto Zero = B.buildConstant(Ty, 0);
3073 replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3074 };
3075 return true;
3076}
3077
3078bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
3079 Register &Replacement) {
3080 // Given
3081 //
3082 // %y:_(sN) = G_SOMETHING
3083 // %x:_(sN) = G_SOMETHING
3084 // %res:_(sN) = G_AND %x, %y
3085 //
3086 // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3087 //
3088 // Patterns like this can appear as a result of legalization. E.g.
3089 //
3090 // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3091 // %one:_(s32) = G_CONSTANT i32 1
3092 // %and:_(s32) = G_AND %cmp, %one
3093 //
3094 // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3095 assert(MI.getOpcode() == TargetOpcode::G_AND)((void)0);
3096 if (!KB)
3097 return false;
3098
3099 Register AndDst = MI.getOperand(0).getReg();
3100 LLT DstTy = MRI.getType(AndDst);
3101
3102 // FIXME: This should be removed once GISelKnownBits supports vectors.
3103 if (DstTy.isVector())
3104 return false;
3105
3106 Register LHS = MI.getOperand(1).getReg();
3107 Register RHS = MI.getOperand(2).getReg();
3108 KnownBits LHSBits = KB->getKnownBits(LHS);
3109 KnownBits RHSBits = KB->getKnownBits(RHS);
3110
3111 // Check that x & Mask == x.
3112 // x & 1 == x, always
3113 // x & 0 == x, only if x is also 0
3114 // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3115 //
3116 // Check if we can replace AndDst with the LHS of the G_AND
3117 if (canReplaceReg(AndDst, LHS, MRI) &&
3118 (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3119 Replacement = LHS;
3120 return true;
3121 }
3122
3123 // Check if we can replace AndDst with the RHS of the G_AND
3124 if (canReplaceReg(AndDst, RHS, MRI) &&
3125 (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3126 Replacement = RHS;
3127 return true;
3128 }
3129
3130 return false;
3131}
3132
3133bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
3134 // Given
3135 //
3136 // %y:_(sN) = G_SOMETHING
3137 // %x:_(sN) = G_SOMETHING
3138 // %res:_(sN) = G_OR %x, %y
3139 //
3140 // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3141 assert(MI.getOpcode() == TargetOpcode::G_OR)((void)0);
3142 if (!KB)
3143 return false;
3144
3145 Register OrDst = MI.getOperand(0).getReg();
3146 LLT DstTy = MRI.getType(OrDst);
3147
3148 // FIXME: This should be removed once GISelKnownBits supports vectors.
3149 if (DstTy.isVector())
3150 return false;
3151
3152 Register LHS = MI.getOperand(1).getReg();
3153 Register RHS = MI.getOperand(2).getReg();
3154 KnownBits LHSBits = KB->getKnownBits(LHS);
3155 KnownBits RHSBits = KB->getKnownBits(RHS);
3156
3157 // Check that x | Mask == x.
3158 // x | 0 == x, always
3159 // x | 1 == x, only if x is also 1
3160 // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3161 //
3162 // Check if we can replace OrDst with the LHS of the G_OR
3163 if (canReplaceReg(OrDst, LHS, MRI) &&
3164 (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3165 Replacement = LHS;
3166 return true;
3167 }
3168
3169 // Check if we can replace OrDst with the RHS of the G_OR
3170 if (canReplaceReg(OrDst, RHS, MRI) &&
3171 (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3172 Replacement = RHS;
3173 return true;
3174 }
3175
3176 return false;
3177}
3178
3179bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
3180 // If the input is already sign extended, just drop the extension.
3181 Register Src = MI.getOperand(1).getReg();
3182 unsigned ExtBits = MI.getOperand(2).getImm();
3183 unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3184 return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3185}
3186
3187static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3188 int64_t Cst, bool IsVector, bool IsFP) {
3189 // For i1, Cst will always be -1 regardless of boolean contents.
3190 return (ScalarSizeBits == 1 && Cst == -1) ||
3191 isConstTrueVal(TLI, Cst, IsVector, IsFP);
3192}
3193
3194bool CombinerHelper::matchNotCmp(MachineInstr &MI,
3195 SmallVectorImpl<Register> &RegsToNegate) {
3196 assert(MI.getOpcode() == TargetOpcode::G_XOR)((void)0);
3197 LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3198 const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3199 Register XorSrc;
3200 Register CstReg;
3201 // We match xor(src, true) here.
3202 if (!mi_match(MI.getOperand(0).getReg(), MRI,
3203 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3204 return false;
3205
3206 if (!MRI.hasOneNonDBGUse(XorSrc))
3207 return false;
3208
3209 // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3210 // and ORs. The suffix of RegsToNegate starting from index I is used a work
3211 // list of tree nodes to visit.
3212 RegsToNegate.push_back(XorSrc);
3213 // Remember whether the comparisons are all integer or all floating point.
3214 bool IsInt = false;
3215 bool IsFP = false;
3216 for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3217 Register Reg = RegsToNegate[I];
3218 if (!MRI.hasOneNonDBGUse(Reg))
3219 return false;
3220 MachineInstr *Def = MRI.getVRegDef(Reg);
3221 switch (Def->getOpcode()) {
3222 default:
3223 // Don't match if the tree contains anything other than ANDs, ORs and
3224 // comparisons.
3225 return false;
3226 case TargetOpcode::G_ICMP:
3227 if (IsFP)
3228 return false;
3229 IsInt = true;
3230 // When we apply the combine we will invert the predicate.
3231 break;
3232 case TargetOpcode::G_FCMP:
3233 if (IsInt)
3234 return false;
3235 IsFP = true;
3236 // When we apply the combine we will invert the predicate.
3237 break;
3238 case TargetOpcode::G_AND:
3239 case TargetOpcode::G_OR:
3240 // Implement De Morgan's laws:
3241 // ~(x & y) -> ~x | ~y
3242 // ~(x | y) -> ~x & ~y
3243 // When we apply the combine we will change the opcode and recursively
3244 // negate the operands.
3245 RegsToNegate.push_back(Def->getOperand(1).getReg());
3246 RegsToNegate.push_back(Def->getOperand(2).getReg());
3247 break;
3248 }
3249 }
3250
3251 // Now we know whether the comparisons are integer or floating point, check
3252 // the constant in the xor.
3253 int64_t Cst;
3254 if (Ty.isVector()) {
3255 MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3256 auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
3257 if (!MaybeCst)
3258 return false;
3259 if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3260 return false;
3261 } else {
3262 if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3263 return false;
3264 if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3265 return false;
3266 }
3267
3268 return true;
3269}
3270
3271void CombinerHelper::applyNotCmp(MachineInstr &MI,
3272 SmallVectorImpl<Register> &RegsToNegate) {
3273 for (Register Reg : RegsToNegate) {
3274 MachineInstr *Def = MRI.getVRegDef(Reg);
3275 Observer.changingInstr(*Def);
3276 // For each comparison, invert the opcode. For each AND and OR, change the
3277 // opcode.
3278 switch (Def->getOpcode()) {
3279 default:
3280 llvm_unreachable("Unexpected opcode")__builtin_unreachable();
3281 case TargetOpcode::G_ICMP:
3282 case TargetOpcode::G_FCMP: {
3283 MachineOperand &PredOp = Def->getOperand(1);
3284 CmpInst::Predicate NewP = CmpInst::getInversePredicate(
3285 (CmpInst::Predicate)PredOp.getPredicate());
3286 PredOp.setPredicate(NewP);
3287 break;
3288 }
3289 case TargetOpcode::G_AND:
3290 Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3291 break;
3292 case TargetOpcode::G_OR:
3293 Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3294 break;
3295 }
3296 Observer.changedInstr(*Def);
3297 }
3298
3299 replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3300 MI.eraseFromParent();
3301}
3302
3303bool CombinerHelper::matchXorOfAndWithSameReg(
3304 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3305 // Match (xor (and x, y), y) (or any of its commuted cases)
3306 assert(MI.getOpcode() == TargetOpcode::G_XOR)((void)0);
3307 Register &X = MatchInfo.first;
3308 Register &Y = MatchInfo.second;
3309 Register AndReg = MI.getOperand(1).getReg();
3310 Register SharedReg = MI.getOperand(2).getReg();
3311
3312 // Find a G_AND on either side of the G_XOR.
3313 // Look for one of
3314 //
3315 // (xor (and x, y), SharedReg)
3316 // (xor SharedReg, (and x, y))
3317 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3318 std::swap(AndReg, SharedReg);
3319 if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3320 return false;
3321 }
3322
3323 // Only do this if we'll eliminate the G_AND.
3324 if (!MRI.hasOneNonDBGUse(AndReg))
3325 return false;
3326
3327 // We can combine if SharedReg is the same as either the LHS or RHS of the
3328 // G_AND.
3329 if (Y != SharedReg)
3330 std::swap(X, Y);
3331 return Y == SharedReg;
3332}
3333
3334void CombinerHelper::applyXorOfAndWithSameReg(
3335 MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3336 // Fold (xor (and x, y), y) -> (and (not x), y)
3337 Builder.setInstrAndDebugLoc(MI);
3338 Register X, Y;
3339 std::tie(X, Y) = MatchInfo;
3340 auto Not = Builder.buildNot(MRI.getType(X), X);
3341 Observer.changingInstr(MI);
3342 MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3343 MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3344 MI.getOperand(2).setReg(Y);
3345 Observer.changedInstr(MI);
3346}
3347
3348bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3349 Register DstReg = MI.getOperand(0).getReg();
3350 LLT Ty = MRI.getType(DstReg);
3351 const DataLayout &DL = Builder.getMF().getDataLayout();
3352
3353 if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3354 return false;
3355
3356 if (Ty.isPointer()) {
3357 auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
3358 return ConstVal && *ConstVal == 0;
3359 }
3360
3361 assert(Ty.isVector() && "Expecting a vector type")((void)0);
3362 const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
3363 return isBuildVectorAllZeros(*VecMI, MRI);
3364}
3365
3366void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3367 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD)((void)0);
3368 Builder.setInstrAndDebugLoc(MI);
3369 Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
3370 MI.eraseFromParent();
3371}
3372
3373/// The second source operand is known to be a power of 2.
3374void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
3375 Register DstReg = MI.getOperand(0).getReg();
3376 Register Src0 = MI.getOperand(1).getReg();
3377 Register Pow2Src1 = MI.getOperand(2).getReg();
3378 LLT Ty = MRI.getType(DstReg);
3379 Builder.setInstrAndDebugLoc(MI);
3380
3381 // Fold (urem x, pow2) -> (and x, pow2-1)
3382 auto NegOne = Builder.buildConstant(Ty, -1);
3383 auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3384 Builder.buildAnd(DstReg, Src0, Add);
3385 MI.eraseFromParent();
3386}
3387
3388Optional<SmallVector<Register, 8>>
3389CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3390 assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!")((void)0);
3391 // We want to detect if Root is part of a tree which represents a bunch
3392 // of loads being merged into a larger load. We'll try to recognize patterns
3393 // like, for example:
3394 //
3395 // Reg Reg
3396 // \ /
3397 // OR_1 Reg
3398 // \ /
3399 // OR_2
3400 // \ Reg
3401 // .. /
3402 // Root
3403 //
3404 // Reg Reg Reg Reg
3405 // \ / \ /
3406 // OR_1 OR_2
3407 // \ /
3408 // \ /
3409 // ...
3410 // Root
3411 //
3412 // Each "Reg" may have been produced by a load + some arithmetic. This
3413 // function will save each of them.
3414 SmallVector<Register, 8> RegsToVisit;
3415 SmallVector<const MachineInstr *, 7> Ors = {Root};
3416
3417 // In the "worst" case, we're dealing with a load for each byte. So, there
3418 // are at most #bytes - 1 ORs.
3419 const unsigned MaxIter =
3420 MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3421 for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3422 if (Ors.empty())
3423 break;
3424 const MachineInstr *Curr = Ors.pop_back_val();
3425 Register OrLHS = Curr->getOperand(1).getReg();
3426 Register OrRHS = Curr->getOperand(2).getReg();
3427
3428 // In the combine, we want to elimate the entire tree.
3429 if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3430 return None;
3431
3432 // If it's a G_OR, save it and continue to walk. If it's not, then it's
3433 // something that may be a load + arithmetic.
3434 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3435 Ors.push_back(Or);
3436 else
3437 RegsToVisit.push_back(OrLHS);
3438 if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3439 Ors.push_back(Or);
3440 else
3441 RegsToVisit.push_back(OrRHS);
3442 }
3443
3444 // We're going to try and merge each register into a wider power-of-2 type,
3445 // so we ought to have an even number of registers.
3446 if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3447 return None;
3448 return RegsToVisit;
3449}
3450
3451/// Helper function for findLoadOffsetsForLoadOrCombine.
3452///
3453/// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3454/// and then moving that value into a specific byte offset.
3455///
3456/// e.g. x[i] << 24
3457///
3458/// \returns The load instruction and the byte offset it is moved into.
3459static Optional<std::pair<GZExtLoad *, int64_t>>
3460matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3461 const MachineRegisterInfo &MRI) {
3462 assert(MRI.hasOneNonDBGUse(Reg) &&((void)0)
3463 "Expected Reg to only have one non-debug use?")((void)0);
3464 Register MaybeLoad;
3465 int64_t Shift;
3466 if (!mi_match(Reg, MRI,
3467 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3468 Shift = 0;
3469 MaybeLoad = Reg;
3470 }
3471
3472 if (Shift % MemSizeInBits != 0)
3473 return None;
3474
3475 // TODO: Handle other types of loads.
3476 auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3477 if (!Load)
3478 return None;
3479
3480 if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3481 return None;
3482
3483 return std::make_pair(Load, Shift / MemSizeInBits);
3484}
3485
3486Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3487CombinerHelper::findLoadOffsetsForLoadOrCombine(
3488 SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
3489 const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3490
3491 // Each load found for the pattern. There should be one for each RegsToVisit.
3492 SmallSetVector<const MachineInstr *, 8> Loads;
3493
3494 // The lowest index used in any load. (The lowest "i" for each x[i].)
3495 int64_t LowestIdx = INT64_MAX0x7fffffffffffffffLL;
3496
3497 // The load which uses the lowest index.
3498 GZExtLoad *LowestIdxLoad = nullptr;
3499
3500 // Keeps track of the load indices we see. We shouldn't see any indices twice.
3501 SmallSet<int64_t, 8> SeenIdx;
3502
3503 // Ensure each load is in the same MBB.
3504 // TODO: Support multiple MachineBasicBlocks.
3505 MachineBasicBlock *MBB = nullptr;
3506 const MachineMemOperand *MMO = nullptr;
3507
3508 // Earliest instruction-order load in the pattern.
3509 GZExtLoad *EarliestLoad = nullptr;
10
'EarliestLoad' initialized to a null pointer value
3510
3511 // Latest instruction-order load in the pattern.
3512 GZExtLoad *LatestLoad = nullptr;
3513
3514 // Base pointer which every load should share.
3515 Register BasePtr;
3516
3517 // We want to find a load for each register. Each load should have some
3518 // appropriate bit twiddling arithmetic. During this loop, we will also keep
3519 // track of the load which uses the lowest index. Later, we will check if we
3520 // can use its pointer in the final, combined load.
3521 for (auto Reg : RegsToVisit) {
11
Assuming '__begin1' is equal to '__end1'
3522 // Find the load, and find the position that it will end up in (e.g. a
3523 // shifted) value.
3524 auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3525 if (!LoadAndPos)
3526 return None;
3527 GZExtLoad *Load;
3528 int64_t DstPos;
3529 std::tie(Load, DstPos) = *LoadAndPos;
3530
3531 // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3532 // it is difficult to check for stores/calls/etc between loads.
3533 MachineBasicBlock *LoadMBB = Load->getParent();
3534 if (!MBB)
3535 MBB = LoadMBB;
3536 if (LoadMBB != MBB)
3537 return None;
3538
3539 // Make sure that the MachineMemOperands of every seen load are compatible.
3540 auto &LoadMMO = Load->getMMO();
3541 if (!MMO)
3542 MMO = &LoadMMO;
3543 if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3544 return None;
3545
3546 // Find out what the base pointer and index for the load is.
3547 Register LoadPtr;
3548 int64_t Idx;
3549 if (!mi_match(Load->getOperand(1).getReg(), MRI,
3550 m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3551 LoadPtr = Load->getOperand(1).getReg();
3552 Idx = 0;
3553 }
3554
3555 // Don't combine things like a[i], a[i] -> a bigger load.
3556 if (!SeenIdx.insert(Idx).second)
3557 return None;
3558
3559 // Every load must share the same base pointer; don't combine things like:
3560 //
3561 // a[i], b[i + 1] -> a bigger load.
3562 if (!BasePtr.isValid())
3563 BasePtr = LoadPtr;
3564 if (BasePtr != LoadPtr)
3565 return None;
3566
3567 if (Idx < LowestIdx) {
3568 LowestIdx = Idx;
3569 LowestIdxLoad = Load;
3570 }
3571
3572 // Keep track of the byte offset that this load ends up at. If we have seen
3573 // the byte offset, then stop here. We do not want to combine:
3574 //
3575 // a[i] << 16, a[i + k] << 16 -> a bigger load.
3576 if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3577 return None;
3578 Loads.insert(Load);
3579
3580 // Keep track of the position of the earliest/latest loads in the pattern.
3581 // We will check that there are no load fold barriers between them later
3582 // on.
3583 //
3584 // FIXME: Is there a better way to check for load fold barriers?
3585 if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3586 EarliestLoad = Load;
3587 if (!LatestLoad || dominates(*LatestLoad, *Load))
3588 LatestLoad = Load;
3589 }
3590
3591 // We found a load for each register. Let's check if each load satisfies the
3592 // pattern.
3593 assert(Loads.size() == RegsToVisit.size() &&((void)0)
3594 "Expected to find a load for each register?")((void)0);
3595 assert(EarliestLoad != LatestLoad && EarliestLoad &&((void)0)
3596 LatestLoad && "Expected at least two loads?")((void)0);
3597
3598 // Check if there are any stores, calls, etc. between any of the loads. If
3599 // there are, then we can't safely perform the combine.
3600 //
3601 // MaxIter is chosen based off the (worst case) number of iterations it
3602 // typically takes to succeed in the LLVM test suite plus some padding.
3603 //
3604 // FIXME: Is there a better way to check for load fold barriers?
3605 const unsigned MaxIter = 20;
3606 unsigned Iter = 0;
3607 for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
12
Called C++ object pointer is null
3608 LatestLoad->getIterator())) {
3609 if (Loads.count(&MI))
3610 continue;
3611 if (MI.isLoadFoldBarrier())
3612 return None;
3613 if (Iter++ == MaxIter)
3614 return None;
3615 }
3616
3617 return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3618}
3619
3620bool CombinerHelper::matchLoadOrCombine(
3621 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3622 assert(MI.getOpcode() == TargetOpcode::G_OR)((void)0);
3623 MachineFunction &MF = *MI.getMF();
3624 // Assuming a little-endian target, transform:
3625 // s8 *a = ...
3626 // s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3627 // =>
3628 // s32 val = *((i32)a)
3629 //
3630 // s8 *a = ...
3631 // s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3632 // =>
3633 // s32 val = BSWAP(*((s32)a))
3634 Register Dst = MI.getOperand(0).getReg();
3635 LLT Ty = MRI.getType(Dst);
3636 if (Ty.isVector())
1
Assuming the condition is false
2
Taking false branch
3637 return false;
3638
3639 // We need to combine at least two loads into this type. Since the smallest
3640 // possible load is into a byte, we need at least a 16-bit wide type.
3641 const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3642 if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3
Assuming 'WideMemSizeInBits' is >= 16
4
Assuming the condition is false
5
Taking false branch
3643 return false;
3644
3645 // Match a collection of non-OR instructions in the pattern.
3646 auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3647 if (!RegsToVisit)
6
Taking false branch
3648 return false;
3649
3650 // We have a collection of non-OR instructions. Figure out how wide each of
3651 // the small loads should be based off of the number of potential loads we
3652 // found.
3653 const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3654 if (NarrowMemSizeInBits % 8 != 0)
7
Assuming the condition is false
8
Taking false branch
3655 return false;
3656
3657 // Check if each register feeding into each OR is a load from the same
3658 // base pointer + some arithmetic.
3659 //
3660 // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3661 //
3662 // Also verify that each of these ends up putting a[i] into the same memory
3663 // offset as a load into a wide type would.
3664 SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
3665 GZExtLoad *LowestIdxLoad, *LatestLoad;
3666 int64_t LowestIdx;
3667 auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
9
Calling 'CombinerHelper::findLoadOffsetsForLoadOrCombine'
3668 MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3669 if (!MaybeLoadInfo)
3670 return false;
3671 std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3672
3673 // We have a bunch of loads being OR'd together. Using the addresses + offsets
3674 // we found before, check if this corresponds to a big or little endian byte
3675 // pattern. If it does, then we can represent it using a load + possibly a
3676 // BSWAP.
3677 bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3678 Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3679 if (!IsBigEndian.hasValue())
3680 return false;
3681 bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3682 if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3683 return false;
3684
3685 // Make sure that the load from the lowest index produces offset 0 in the
3686 // final value.
3687 //
3688 // This ensures that we won't combine something like this:
3689 //
3690 // load x[i] -> byte 2
3691 // load x[i+1] -> byte 0 ---> wide_load x[i]
3692 // load x[i+2] -> byte 1
3693 const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3694 const unsigned ZeroByteOffset =
3695 *IsBigEndian
3696 ? bigEndianByteAt(NumLoadsInTy, 0)
3697 : littleEndianByteAt(NumLoadsInTy, 0);
3698 auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3699 if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3700 ZeroOffsetIdx->second != LowestIdx)
3701 return false;
3702
3703 // We wil reuse the pointer from the load which ends up at byte offset 0. It
3704 // may not use index 0.
3705 Register Ptr = LowestIdxLoad->getPointerReg();
3706 const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3707 LegalityQuery::MemDesc MMDesc;
3708 MMDesc.MemoryTy = Ty;
3709 MMDesc.AlignInBits = MMO.getAlign().value() * 8;
3710 MMDesc.Ordering = MMO.getSuccessOrdering();
3711 if (!isLegalOrBeforeLegalizer(
3712 {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3713 return false;
3714 auto PtrInfo = MMO.getPointerInfo();
3715 auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3716
3717 // Load must be allowed and fast on the target.
3718 LLVMContext &C = MF.getFunction().getContext();
3719 auto &DL = MF.getDataLayout();
3720 bool Fast = false;
3721 if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3722 !Fast)
3723 return false;
3724
3725 MatchInfo = [=](MachineIRBuilder &MIB) {
3726 MIB.setInstrAndDebugLoc(*LatestLoad);
3727 Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3728 MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3729 if (NeedsBSwap)
3730 MIB.buildBSwap(Dst, LoadDst);
3731 };
3732 return true;
3733}
3734
3735bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
3736 MachineInstr *&ExtMI) {
3737 assert(MI.getOpcode() == TargetOpcode::G_PHI)((void)0);
3738
3739 Register DstReg = MI.getOperand(0).getReg();
3740
3741 // TODO: Extending a vector may be expensive, don't do this until heuristics
3742 // are better.
3743 if (MRI.getType(DstReg).isVector())
3744 return false;
3745
3746 // Try to match a phi, whose only use is an extend.
3747 if (!MRI.hasOneNonDBGUse(DstReg))
3748 return false;
3749 ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3750 switch (ExtMI->getOpcode()) {
3751 case TargetOpcode::G_ANYEXT:
3752 return true; // G_ANYEXT is usually free.
3753 case TargetOpcode::G_ZEXT:
3754 case TargetOpcode::G_SEXT:
3755 break;
3756 default:
3757 return false;
3758 }
3759
3760 // If the target is likely to fold this extend away, don't propagate.
3761 if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
3762 return false;
3763
3764 // We don't want to propagate the extends unless there's a good chance that
3765 // they'll be optimized in some way.
3766 // Collect the unique incoming values.
3767 SmallPtrSet<MachineInstr *, 4> InSrcs;
3768 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
3769 auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
3770 switch (DefMI->getOpcode()) {
3771 case TargetOpcode::G_LOAD:
3772 case TargetOpcode::G_TRUNC:
3773 case TargetOpcode::G_SEXT:
3774 case TargetOpcode::G_ZEXT:
3775 case TargetOpcode::G_ANYEXT:
3776 case TargetOpcode::G_CONSTANT:
3777 InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
3778 // Don't try to propagate if there are too many places to create new
3779 // extends, chances are it'll increase code size.
3780 if (InSrcs.size() > 2)
3781 return false;
3782 break;
3783 default:
3784 return false;
3785 }
3786 }
3787 return true;
3788}
3789
3790void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
3791 MachineInstr *&ExtMI) {
3792 assert(MI.getOpcode() == TargetOpcode::G_PHI)((void)0);
3793 Register DstReg = ExtMI->getOperand(0).getReg();
3794 LLT ExtTy = MRI.getType(DstReg);
3795
3796 // Propagate the extension into the block of each incoming reg's block.
3797 // Use a SetVector here because PHIs can have duplicate edges, and we want
3798 // deterministic iteration order.
3799 SmallSetVector<MachineInstr *, 8> SrcMIs;
3800 SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
3801 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
3802 auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
3803 if (!SrcMIs.insert(SrcMI))
3804 continue;
3805
3806 // Build an extend after each src inst.
3807 auto *MBB = SrcMI->getParent();
3808 MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3809 if (InsertPt != MBB->end() && InsertPt->isPHI())
3810 InsertPt = MBB->getFirstNonPHI();
3811
3812 Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3813 Builder.setDebugLoc(MI.getDebugLoc());
3814 auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
3815 SrcMI->getOperand(0).getReg());
3816 OldToNewSrcMap[SrcMI] = NewExt;
3817 }
3818
3819 // Create a new phi with the extended inputs.
3820 Builder.setInstrAndDebugLoc(MI);
3821 auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3822 NewPhi.addDef(DstReg);
3823 for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
3824 auto &MO = MI.getOperand(SrcIdx);
3825 if (!MO.isReg()) {
3826 NewPhi.addMBB(MO.getMBB());
3827 continue;
3828 }
3829 auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3830 NewPhi.addUse(NewSrc->getOperand(0).getReg());
3831 }
3832 Builder.insertInstr(NewPhi);
3833 ExtMI->eraseFromParent();
3834}
3835
3836bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
3837 Register &Reg) {
3838 assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT)((void)0);
3839 // If we have a constant index, look for a G_BUILD_VECTOR source
3840 // and find the source register that the index maps to.
3841 Register SrcVec = MI.getOperand(1).getReg();
3842 LLT SrcTy = MRI.getType(SrcVec);
3843 if (!isLegalOrBeforeLegalizer(
3844 {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
3845 return false;
3846
3847 auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3848 if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3849 return false;
3850
3851 unsigned VecIdx = Cst->Value.getZExtValue();
3852 MachineInstr *BuildVecMI =
3853 getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
3854 if (!BuildVecMI) {
3855 BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
3856 if (!BuildVecMI)
3857 return false;
3858 LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
3859 if (!isLegalOrBeforeLegalizer(
3860 {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
3861 return false;
3862 }
3863
3864 EVT Ty(getMVTForLLT(SrcTy));
3865 if (!MRI.hasOneNonDBGUse(SrcVec) &&
3866 !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3867 return false;
3868
3869 Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
3870 return true;
3871}
3872
3873void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
3874 Register &Reg) {
3875 // Check the type of the register, since it may have come from a
3876 // G_BUILD_VECTOR_TRUNC.
3877 LLT ScalarTy = MRI.getType(Reg);
3878 Register DstReg = MI.getOperand(0).getReg();
3879 LLT DstTy = MRI.getType(DstReg);
3880
3881 Builder.setInstrAndDebugLoc(MI);
3882 if (ScalarTy != DstTy) {
3883 assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits())((void)0);
3884 Builder.buildTrunc(DstReg, Reg);
3885 MI.eraseFromParent();
3886 return;
3887 }
3888 replaceSingleDefInstWithReg(MI, Reg);
3889}
3890
3891bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3892 MachineInstr &MI,
3893 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3894 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((void)0);
3895 // This combine tries to find build_vector's which have every source element
3896 // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3897 // the masked load scalarization is run late in the pipeline. There's already
3898 // a combine for a similar pattern starting from the extract, but that
3899 // doesn't attempt to do it if there are multiple uses of the build_vector,
3900 // which in this case is true. Starting the combine from the build_vector
3901 // feels more natural than trying to find sibling nodes of extracts.
3902 // E.g.
3903 // %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3904 // %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3905 // %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3906 // %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3907 // %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3908 // ==>
3909 // replace ext{1,2,3,4} with %s{1,2,3,4}
3910
3911 Register DstReg = MI.getOperand(0).getReg();
3912 LLT DstTy = MRI.getType(DstReg);
3913 unsigned NumElts = DstTy.getNumElements();
3914
3915 SmallBitVector ExtractedElts(NumElts);
3916 for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
3917 MRI.use_instr_nodbg_end())) {
3918 if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
3919 return false;
3920 auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
3921 if (!Cst)
3922 return false;
3923 unsigned Idx = Cst.getValue().getZExtValue();
3924 if (Idx >= NumElts)
3925 return false; // Out of range.
3926 ExtractedElts.set(Idx);
3927 SrcDstPairs.emplace_back(
3928 std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
3929 }
3930 // Match if every element was extracted.
3931 return ExtractedElts.all();
3932}
3933
3934void CombinerHelper::applyExtractAllEltsFromBuildVector(
3935 MachineInstr &MI,
3936 SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3937 assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR)((void)0);
3938 for (auto &Pair : SrcDstPairs) {
3939 auto *ExtMI = Pair.second;
3940 replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
3941 ExtMI->eraseFromParent();
3942 }
3943 MI.eraseFromParent();
3944}
3945
3946void CombinerHelper::applyBuildFn(
3947 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3948 Builder.setInstrAndDebugLoc(MI);
3949 MatchInfo(Builder);
3950 MI.eraseFromParent();
3951}
3952
3953void CombinerHelper::applyBuildFnNoErase(
3954 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3955 Builder.setInstrAndDebugLoc(MI);
3956 MatchInfo(Builder);
3957}
3958
3959/// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
3960bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
3961 unsigned Opc = MI.getOpcode();
3962 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR)((void)0);
3963 Register X = MI.getOperand(1).getReg();
3964 Register Y = MI.getOperand(2).getReg();
3965 if (X != Y)
3966 return false;
3967 unsigned RotateOpc =
3968 Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
3969 return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
3970}
3971
3972void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
3973 unsigned Opc = MI.getOpcode();
3974 assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR)((void)0);
3975 bool IsFSHL = Opc == TargetOpcode::G_FSHL;
3976 Observer.changingInstr(MI);
3977 MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
3978 : TargetOpcode::G_ROTR));
3979 MI.RemoveOperand(2);
3980 Observer.changedInstr(MI);
3981}
3982
3983// Fold (rot x, c) -> (rot x, c % BitSize)
3984bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
3985 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||((void)0)
3986 MI.getOpcode() == TargetOpcode::G_ROTR)((void)0);
3987 unsigned Bitsize =
3988 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3989 Register AmtReg = MI.getOperand(2).getReg();
3990 bool OutOfRange = false;
3991 auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
3992 if (auto *CI = dyn_cast<ConstantInt>(C))
3993 OutOfRange |= CI->getValue().uge(Bitsize);
3994 return true;
3995 };
3996 return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
3997}
3998
3999void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
4000 assert(MI.getOpcode() == TargetOpcode::G_ROTL ||((void)0)
4001 MI.getOpcode() == TargetOpcode::G_ROTR)((void)0);
4002 unsigned Bitsize =
4003 MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4004 Builder.setInstrAndDebugLoc(MI);
4005 Register Amt = MI.getOperand(2).getReg();
4006 LLT AmtTy = MRI.getType(Amt);
4007 auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4008 Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4009 Observer.changingInstr(MI);
4010 MI.getOperand(2).setReg(Amt);
4011 Observer.changedInstr(MI);
4012}
4013
4014bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
4015 int64_t &MatchInfo) {
4016 assert(MI.getOpcode() == TargetOpcode::G_ICMP)((void)0);
4017 auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4018 auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4019 auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4020 Optional<bool> KnownVal;
4021 switch (Pred) {
4022 default:
4023 llvm_unreachable("Unexpected G_ICMP predicate?")__builtin_unreachable();
4024 case CmpInst::ICMP_EQ:
4025 KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4026 break;
4027 case CmpInst::ICMP_NE:
4028 KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4029 break;
4030 case CmpInst::ICMP_SGE:
4031 KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4032 break;
4033 case CmpInst::ICMP_SGT:
4034 KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4035 break;
4036 case CmpInst::ICMP_SLE:
4037 KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4038 break;
4039 case CmpInst::ICMP_SLT:
4040 KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4041 break;
4042 case CmpInst::ICMP_UGE:
4043 KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4044 break;
4045 case CmpInst::ICMP_UGT:
4046 KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4047 break;
4048 case CmpInst::ICMP_ULE:
4049 KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4050 break;
4051 case CmpInst::ICMP_ULT:
4052 KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4053 break;
4054 }
4055 if (!KnownVal)
4056 return false;
4057 MatchInfo =
4058 *KnownVal
4059 ? getICmpTrueVal(getTargetLowering(),
4060 /*IsVector = */
4061 MRI.getType(MI.getOperand(0).getReg()).isVector(),
4062 /* IsFP = */ false)
4063 : 0;
4064 return true;
4065}
4066
4067/// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4068bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
4069 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4070 assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG)((void)0);
4071 Register Dst = MI.getOperand(0).getReg();
4072 Register Src = MI.getOperand(1).getReg();
4073 LLT Ty = MRI.getType(Src);
4074 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4075 if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4076 return false;
4077 int64_t Width = MI.getOperand(2).getImm();
4078 Register ShiftSrc;
4079 int64_t ShiftImm;
4080 if (!mi_match(
4081 Src, MRI,
4082 m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4083 m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4084 return false;
4085 if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4086 return false;
4087
4088 MatchInfo = [=](MachineIRBuilder &B) {
4089 auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4090 auto Cst2 = B.buildConstant(ExtractTy, Width);
4091 B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4092 };
4093 return true;
4094}
4095
4096/// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4097bool CombinerHelper::matchBitfieldExtractFromAnd(
4098 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4099 assert(MI.getOpcode() == TargetOpcode::G_AND)((void)0);
4100 Register Dst = MI.getOperand(0).getReg();
4101 LLT Ty = MRI.getType(Dst);
4102 if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
4103 TargetOpcode::G_UBFX, Ty, Ty))
4104 return false;
4105
4106 int64_t AndImm, LSBImm;
4107 Register ShiftSrc;
4108 const unsigned Size = Ty.getScalarSizeInBits();
4109 if (!mi_match(MI.getOperand(0).getReg(), MRI,
4110 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4111 m_ICst(AndImm))))
4112 return false;
4113
4114 // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4115 auto MaybeMask = static_cast<uint64_t>(AndImm);
4116 if (MaybeMask & (MaybeMask + 1))
4117 return false;
4118
4119 // LSB must fit within the register.
4120 if (static_cast<uint64_t>(LSBImm) >= Size)
4121 return false;
4122
4123 LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4124 uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
4125 MatchInfo = [=](MachineIRBuilder &B) {
4126 auto WidthCst = B.buildConstant(ExtractTy, Width);
4127 auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4128 B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4129 };
4130 return true;
4131}
4132
4133bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4134 MachineInstr &PtrAdd) {
4135 assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD)((void)0);
4136
4137 Register Src1Reg = PtrAdd.getOperand(1).getReg();
4138 MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
4139 if (!Src1Def)
4140 return false;
4141
4142 Register Src2Reg = PtrAdd.getOperand(2).getReg();
4143
4144 if (MRI.hasOneNonDBGUse(Src1Reg))
4145 return false;
4146
4147 auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
4148 if (!C1)
4149 return false;
4150 auto C2 = getConstantVRegVal(Src2Reg, MRI);
4151 if (!C2)
4152 return false;
4153
4154 const APInt &C1APIntVal = *C1;
4155 const APInt &C2APIntVal = *C2;
4156 const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4157
4158 for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
4159 // This combine may end up running before ptrtoint/inttoptr combines
4160 // manage to eliminate redundant conversions, so try to look through them.
4161 MachineInstr *ConvUseMI = &UseMI;
4162 unsigned ConvUseOpc = ConvUseMI->getOpcode();
4163 while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4164 ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4165 Register DefReg = ConvUseMI->getOperand(0).getReg();
4166 if (!MRI.hasOneNonDBGUse(DefReg))
4167 break;
4168 ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4169 ConvUseOpc = ConvUseMI->getOpcode();
4170 }
4171 auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
4172 ConvUseOpc == TargetOpcode::G_STORE;
4173 if (!LoadStore)
4174 continue;
4175 // Is x[offset2] already not a legal addressing mode? If so then
4176 // reassociating the constants breaks nothing (we test offset2 because
4177 // that's the one we hope to fold into the load or store).
4178 TargetLoweringBase::AddrMode AM;
4179 AM.HasBaseReg = true;
4180 AM.BaseOffs = C2APIntVal.getSExtValue();
4181 unsigned AS =
4182 MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
4183 Type *AccessTy =
4184 getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
4185 PtrAdd.getMF()->getFunction().getContext());
4186 const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4187 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4188 AccessTy, AS))
4189 continue;
4190
4191 // Would x[offset1+offset2] still be a legal addressing mode?
4192 AM.BaseOffs = CombinedValue;
4193 if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4194 AccessTy, AS))
4195 return true;
4196 }
4197
4198 return false;
4199}
4200
4201bool CombinerHelper::matchReassocPtrAdd(
4202 MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4203 assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD)((void)0);
4204 // We're trying to match a few pointer computation patterns here for
4205 // re-association opportunities.
4206 // 1) Isolating a constant operand to be on the RHS, e.g.:
4207 // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4208 //
4209 // 2) Folding two constants in each sub-tree as long as such folding
4210 // doesn't break a legal addressing mode.
4211 // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4212 Register Src1Reg = MI.getOperand(1).getReg();
4213 Register Src2Reg = MI.getOperand(2).getReg();
4214 MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
4215 MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
4216
4217 if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
4218 // Try to match example 1).
4219 if (RHS->getOpcode() != TargetOpcode::G_ADD)
4220 return false;
4221 auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4222 if (!C2)
4223 return false;
4224
4225 MatchInfo = [=,&MI](MachineIRBuilder &B) {
4226 LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4227
4228 auto NewBase =
4229 Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4230 Observer.changingInstr(MI);
4231 MI.getOperand(1).setReg(NewBase.getReg(0));
4232 MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4233 Observer.changedInstr(MI);
4234 };
4235 } else {
4236 // Try to match example 2.
4237 Register LHSSrc1 = LHS->getOperand(1).getReg();
4238 Register LHSSrc2 = LHS->getOperand(2).getReg();
4239 auto C1 = getConstantVRegVal(LHSSrc2, MRI);
4240 if (!C1)
4241 return false;
4242 auto C2 = getConstantVRegVal(Src2Reg, MRI);
4243 if (!C2)
4244 return false;
4245
4246 MatchInfo = [=, &MI](MachineIRBuilder &B) {
4247 auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4248 Observer.changingInstr(MI);
4249 MI.getOperand(1).setReg(LHSSrc1);
4250 MI.getOperand(2).setReg(NewCst.getReg(0));
4251 Observer.changedInstr(MI);
4252 };
4253 }
4254 return !reassociationCanBreakAddressingModePattern(MI);
4255}
4256
4257bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
4258 Register Op1 = MI.getOperand(1).getReg();
4259 Register Op2 = MI.getOperand(2).getReg();
4260 auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4261 if (!MaybeCst)
4262 return false;
4263 MatchInfo = *MaybeCst;
4264 return true;
4265}
4266
4267bool CombinerHelper::tryCombine(MachineInstr &MI) {
4268 if (tryCombineCopy(MI))
4269 return true;
4270 if (tryCombineExtendingLoads(MI))
4271 return true;
4272 if (tryCombineIndexedLoadStore(MI))
4273 return true;
4274 return false;
4275}