Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Warning:line 2883, column 21
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SimpleLoopUnswitch.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/CFG.h"
20#include "llvm/Analysis/CodeMetrics.h"
21#include "llvm/Analysis/GuardUtils.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/LoopAnalysisManager.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/LoopIterator.h"
26#include "llvm/Analysis/LoopPass.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/MustExecute.h"
30#include "llvm/Analysis/ScalarEvolution.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/PatternMatch.h"
42#include "llvm/IR/Use.h"
43#include "llvm/IR/Value.h"
44#include "llvm/InitializePasses.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/GenericDomTree.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
53#include "llvm/Transforms/Utils/BasicBlockUtils.h"
54#include "llvm/Transforms/Utils/Cloning.h"
55#include "llvm/Transforms/Utils/Local.h"
56#include "llvm/Transforms/Utils/LoopUtils.h"
57#include "llvm/Transforms/Utils/ValueMapper.h"
58#include <algorithm>
59#include <cassert>
60#include <iterator>
61#include <numeric>
62#include <utility>
63
64#define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch"
65
66using namespace llvm;
67using namespace llvm::PatternMatch;
68
69STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch",
"NumBranches", "Number of branches unswitched"}
;
70STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch",
"NumSwitches", "Number of switches unswitched"}
;
71STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards"
, "Number of guards turned into branches for unswitching"}
;
72STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial"
, "Number of unswitches that are trivial"}
;
73STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
74 NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
75 "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
;
76
77static cl::opt<bool> EnableNonTrivialUnswitch(
78 "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
79 cl::desc("Forcibly enables non-trivial loop unswitching rather than "
80 "following the configuration passed into the pass."));
81
82static cl::opt<int>
83 UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
84 cl::desc("The cost threshold for unswitching a loop."));
85
86static cl::opt<bool> EnableUnswitchCostMultiplier(
87 "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
88 cl::desc("Enable unswitch cost multiplier that prohibits exponential "
89 "explosion in nontrivial unswitch."));
90static cl::opt<int> UnswitchSiblingsToplevelDiv(
91 "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
92 cl::desc("Toplevel siblings divisor for cost multiplier."));
93static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
94 "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
95 cl::desc("Number of unswitch candidates that are ignored when calculating "
96 "cost multiplier."));
97static cl::opt<bool> UnswitchGuards(
98 "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
99 cl::desc("If enabled, simple loop unswitching will also consider "
100 "llvm.experimental.guard intrinsics as unswitch candidates."));
101static cl::opt<bool> DropNonTrivialImplicitNullChecks(
102 "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
103 cl::init(false), cl::Hidden,
104 cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
105 "null checks to save time analyzing if we can keep it."));
106static cl::opt<unsigned>
107 MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
108 cl::desc("Max number of memory uses to explore during "
109 "partial unswitching analysis"),
110 cl::init(100), cl::Hidden);
111
112/// Collect all of the loop invariant input values transitively used by the
113/// homogeneous instruction graph from a given root.
114///
115/// This essentially walks from a root recursively through loop variant operands
116/// which have the exact same opcode and finds all inputs which are loop
117/// invariant. For some operations these can be re-associated and unswitched out
118/// of the loop entirely.
119static TinyPtrVector<Value *>
120collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
121 LoopInfo &LI) {
122 assert(!L.isLoopInvariant(&Root) &&((void)0)
123 "Only need to walk the graph if root itself is not invariant.")((void)0);
124 TinyPtrVector<Value *> Invariants;
125
126 bool IsRootAnd = match(&Root, m_LogicalAnd());
127 bool IsRootOr = match(&Root, m_LogicalOr());
128
129 // Build a worklist and recurse through operators collecting invariants.
130 SmallVector<Instruction *, 4> Worklist;
131 SmallPtrSet<Instruction *, 8> Visited;
132 Worklist.push_back(&Root);
133 Visited.insert(&Root);
134 do {
135 Instruction &I = *Worklist.pop_back_val();
136 for (Value *OpV : I.operand_values()) {
137 // Skip constants as unswitching isn't interesting for them.
138 if (isa<Constant>(OpV))
139 continue;
140
141 // Add it to our result if loop invariant.
142 if (L.isLoopInvariant(OpV)) {
143 Invariants.push_back(OpV);
144 continue;
145 }
146
147 // If not an instruction with the same opcode, nothing we can do.
148 Instruction *OpI = dyn_cast<Instruction>(OpV);
149
150 if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
151 (IsRootOr && match(OpI, m_LogicalOr())))) {
152 // Visit this operand.
153 if (Visited.insert(OpI).second)
154 Worklist.push_back(OpI);
155 }
156 }
157 } while (!Worklist.empty());
158
159 return Invariants;
160}
161
162static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
163 Constant &Replacement) {
164 assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")((void)0);
165
166 // Replace uses of LIC in the loop with the given constant.
167 // We use make_early_inc_range as set invalidates the iterator.
168 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
169 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
170
171 // Replace this use within the loop body.
172 if (UserI && L.contains(UserI))
173 U.set(&Replacement);
174 }
175}
176
177/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
178/// incoming values along this edge.
179static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
180 BasicBlock &ExitBB) {
181 for (Instruction &I : ExitBB) {
182 auto *PN = dyn_cast<PHINode>(&I);
183 if (!PN)
184 // No more PHIs to check.
185 return true;
186
187 // If the incoming value for this edge isn't loop invariant the unswitch
188 // won't be trivial.
189 if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
190 return false;
191 }
192 llvm_unreachable("Basic blocks should never be empty!")__builtin_unreachable();
193}
194
195/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
196/// end of \p BB and conditionally branch on the copied condition. We only
197/// branch on a single value.
198static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
199 ArrayRef<Value *> Invariants,
200 bool Direction,
201 BasicBlock &UnswitchedSucc,
202 BasicBlock &NormalSucc) {
203 IRBuilder<> IRB(&BB);
204
205 Value *Cond = Direction ? IRB.CreateOr(Invariants) :
206 IRB.CreateAnd(Invariants);
207 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
208 Direction ? &NormalSucc : &UnswitchedSucc);
209}
210
211/// Copy a set of loop invariant values, and conditionally branch on them.
212static void buildPartialInvariantUnswitchConditionalBranch(
213 BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
214 BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
215 MemorySSAUpdater *MSSAU) {
216 ValueToValueMapTy VMap;
217 for (auto *Val : reverse(ToDuplicate)) {
218 Instruction *Inst = cast<Instruction>(Val);
219 Instruction *NewInst = Inst->clone();
220 BB.getInstList().insert(BB.end(), NewInst);
221 RemapInstruction(NewInst, VMap,
222 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
223 VMap[Val] = NewInst;
224
225 if (!MSSAU)
226 continue;
227
228 MemorySSA *MSSA = MSSAU->getMemorySSA();
229 if (auto *MemUse =
230 dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
231 auto *DefiningAccess = MemUse->getDefiningAccess();
232 // Get the first defining access before the loop.
233 while (L.contains(DefiningAccess->getBlock())) {
234 // If the defining access is a MemoryPhi, get the incoming
235 // value for the pre-header as defining access.
236 if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
237 DefiningAccess =
238 MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
239 else
240 DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
241 }
242 MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
243 NewInst->getParent(),
244 MemorySSA::BeforeTerminator);
245 }
246 }
247
248 IRBuilder<> IRB(&BB);
249 Value *Cond = VMap[ToDuplicate[0]];
250 IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
251 Direction ? &NormalSucc : &UnswitchedSucc);
252}
253
254/// Rewrite the PHI nodes in an unswitched loop exit basic block.
255///
256/// Requires that the loop exit and unswitched basic block are the same, and
257/// that the exiting block was a unique predecessor of that block. Rewrites the
258/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
259/// PHI nodes from the old preheader that now contains the unswitched
260/// terminator.
261static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
262 BasicBlock &OldExitingBB,
263 BasicBlock &OldPH) {
264 for (PHINode &PN : UnswitchedBB.phis()) {
265 // When the loop exit is directly unswitched we just need to update the
266 // incoming basic block. We loop to handle weird cases with repeated
267 // incoming blocks, but expect to typically only have one operand here.
268 for (auto i : seq<int>(0, PN.getNumOperands())) {
269 assert(PN.getIncomingBlock(i) == &OldExitingBB &&((void)0)
270 "Found incoming block different from unique predecessor!")((void)0);
271 PN.setIncomingBlock(i, &OldPH);
272 }
273 }
274}
275
276/// Rewrite the PHI nodes in the loop exit basic block and the split off
277/// unswitched block.
278///
279/// Because the exit block remains an exit from the loop, this rewrites the
280/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
281/// nodes into the unswitched basic block to select between the value in the
282/// old preheader and the loop exit.
283static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
284 BasicBlock &UnswitchedBB,
285 BasicBlock &OldExitingBB,
286 BasicBlock &OldPH,
287 bool FullUnswitch) {
288 assert(&ExitBB != &UnswitchedBB &&((void)0)
289 "Must have different loop exit and unswitched blocks!")((void)0);
290 Instruction *InsertPt = &*UnswitchedBB.begin();
291 for (PHINode &PN : ExitBB.phis()) {
292 auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
293 PN.getName() + ".split", InsertPt);
294
295 // Walk backwards over the old PHI node's inputs to minimize the cost of
296 // removing each one. We have to do this weird loop manually so that we
297 // create the same number of new incoming edges in the new PHI as we expect
298 // each case-based edge to be included in the unswitched switch in some
299 // cases.
300 // FIXME: This is really, really gross. It would be much cleaner if LLVM
301 // allowed us to create a single entry for a predecessor block without
302 // having separate entries for each "edge" even though these edges are
303 // required to produce identical results.
304 for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
305 if (PN.getIncomingBlock(i) != &OldExitingBB)
306 continue;
307
308 Value *Incoming = PN.getIncomingValue(i);
309 if (FullUnswitch)
310 // No more edge from the old exiting block to the exit block.
311 PN.removeIncomingValue(i);
312
313 NewPN->addIncoming(Incoming, &OldPH);
314 }
315
316 // Now replace the old PHI with the new one and wire the old one in as an
317 // input to the new one.
318 PN.replaceAllUsesWith(NewPN);
319 NewPN->addIncoming(&PN, &ExitBB);
320 }
321}
322
323/// Hoist the current loop up to the innermost loop containing a remaining exit.
324///
325/// Because we've removed an exit from the loop, we may have changed the set of
326/// loops reachable and need to move the current loop up the loop nest or even
327/// to an entirely separate nest.
328static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
329 DominatorTree &DT, LoopInfo &LI,
330 MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
331 // If the loop is already at the top level, we can't hoist it anywhere.
332 Loop *OldParentL = L.getParentLoop();
333 if (!OldParentL)
334 return;
335
336 SmallVector<BasicBlock *, 4> Exits;
337 L.getExitBlocks(Exits);
338 Loop *NewParentL = nullptr;
339 for (auto *ExitBB : Exits)
340 if (Loop *ExitL = LI.getLoopFor(ExitBB))
341 if (!NewParentL || NewParentL->contains(ExitL))
342 NewParentL = ExitL;
343
344 if (NewParentL == OldParentL)
345 return;
346
347 // The new parent loop (if different) should always contain the old one.
348 if (NewParentL)
349 assert(NewParentL->contains(OldParentL) &&((void)0)
350 "Can only hoist this loop up the nest!")((void)0);
351
352 // The preheader will need to move with the body of this loop. However,
353 // because it isn't in this loop we also need to update the primary loop map.
354 assert(OldParentL == LI.getLoopFor(&Preheader) &&((void)0)
355 "Parent loop of this loop should contain this loop's preheader!")((void)0);
356 LI.changeLoopFor(&Preheader, NewParentL);
357
358 // Remove this loop from its old parent.
359 OldParentL->removeChildLoop(&L);
360
361 // Add the loop either to the new parent or as a top-level loop.
362 if (NewParentL)
363 NewParentL->addChildLoop(&L);
364 else
365 LI.addTopLevelLoop(&L);
366
367 // Remove this loops blocks from the old parent and every other loop up the
368 // nest until reaching the new parent. Also update all of these
369 // no-longer-containing loops to reflect the nesting change.
370 for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
371 OldContainingL = OldContainingL->getParentLoop()) {
372 llvm::erase_if(OldContainingL->getBlocksVector(),
373 [&](const BasicBlock *BB) {
374 return BB == &Preheader || L.contains(BB);
375 });
376
377 OldContainingL->getBlocksSet().erase(&Preheader);
378 for (BasicBlock *BB : L.blocks())
379 OldContainingL->getBlocksSet().erase(BB);
380
381 // Because we just hoisted a loop out of this one, we have essentially
382 // created new exit paths from it. That means we need to form LCSSA PHI
383 // nodes for values used in the no-longer-nested loop.
384 formLCSSA(*OldContainingL, DT, &LI, SE);
385
386 // We shouldn't need to form dedicated exits because the exit introduced
387 // here is the (just split by unswitching) preheader. However, after trivial
388 // unswitching it is possible to get new non-dedicated exits out of parent
389 // loop so let's conservatively form dedicated exit blocks and figure out
390 // if we can optimize later.
391 formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
392 /*PreserveLCSSA*/ true);
393 }
394}
395
396// Return the top-most loop containing ExitBB and having ExitBB as exiting block
397// or the loop containing ExitBB, if there is no parent loop containing ExitBB
398// as exiting block.
399static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
400 Loop *TopMost = LI.getLoopFor(ExitBB);
401 Loop *Current = TopMost;
402 while (Current) {
403 if (Current->isLoopExiting(ExitBB))
404 TopMost = Current;
405 Current = Current->getParentLoop();
406 }
407 return TopMost;
408}
409
410/// Unswitch a trivial branch if the condition is loop invariant.
411///
412/// This routine should only be called when loop code leading to the branch has
413/// been validated as trivial (no side effects). This routine checks if the
414/// condition is invariant and one of the successors is a loop exit. This
415/// allows us to unswitch without duplicating the loop, making it trivial.
416///
417/// If this routine fails to unswitch the branch it returns false.
418///
419/// If the branch can be unswitched, this routine splits the preheader and
420/// hoists the branch above that split. Preserves loop simplified form
421/// (splitting the exit block as necessary). It simplifies the branch within
422/// the loop to an unconditional branch but doesn't remove it entirely. Further
423/// cleanup can be done with some simplifycfg like pass.
424///
425/// If `SE` is not null, it will be updated based on the potential loop SCEVs
426/// invalidated by this.
427static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
428 LoopInfo &LI, ScalarEvolution *SE,
429 MemorySSAUpdater *MSSAU) {
430 assert(BI.isConditional() && "Can only unswitch a conditional branch!")((void)0);
431 LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n")do { } while (false);
432
433 // The loop invariant values that we want to unswitch.
434 TinyPtrVector<Value *> Invariants;
435
436 // When true, we're fully unswitching the branch rather than just unswitching
437 // some input conditions to the branch.
438 bool FullUnswitch = false;
439
440 if (L.isLoopInvariant(BI.getCondition())) {
441 Invariants.push_back(BI.getCondition());
442 FullUnswitch = true;
443 } else {
444 if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
445 Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
446 if (Invariants.empty()) {
447 LLVM_DEBUG(dbgs() << " Couldn't find invariant inputs!\n")do { } while (false);
448 return false;
449 }
450 }
451
452 // Check that one of the branch's successors exits, and which one.
453 bool ExitDirection = true;
454 int LoopExitSuccIdx = 0;
455 auto *LoopExitBB = BI.getSuccessor(0);
456 if (L.contains(LoopExitBB)) {
457 ExitDirection = false;
458 LoopExitSuccIdx = 1;
459 LoopExitBB = BI.getSuccessor(1);
460 if (L.contains(LoopExitBB)) {
461 LLVM_DEBUG(dbgs() << " Branch doesn't exit the loop!\n")do { } while (false);
462 return false;
463 }
464 }
465 auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
466 auto *ParentBB = BI.getParent();
467 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
468 LLVM_DEBUG(dbgs() << " Loop exit PHI's aren't loop-invariant!\n")do { } while (false);
469 return false;
470 }
471
472 // When unswitching only part of the branch's condition, we need the exit
473 // block to be reached directly from the partially unswitched input. This can
474 // be done when the exit block is along the true edge and the branch condition
475 // is a graph of `or` operations, or the exit block is along the false edge
476 // and the condition is a graph of `and` operations.
477 if (!FullUnswitch) {
478 if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
479 : !match(BI.getCondition(), m_LogicalAnd())) {
480 LLVM_DEBUG(dbgs() << " Branch condition is in improper form for "do { } while (false)
481 "non-full unswitch!\n")do { } while (false);
482 return false;
483 }
484 }
485
486 LLVM_DEBUG({do { } while (false)
487 dbgs() << " unswitching trivial invariant conditions for: " << BIdo { } while (false)
488 << "\n";do { } while (false)
489 for (Value *Invariant : Invariants) {do { } while (false)
490 dbgs() << " " << *Invariant << " == true";do { } while (false)
491 if (Invariant != Invariants.back())do { } while (false)
492 dbgs() << " ||";do { } while (false)
493 dbgs() << "\n";do { } while (false)
494 }do { } while (false)
495 })do { } while (false);
496
497 // If we have scalar evolutions, we need to invalidate them including this
498 // loop, the loop containing the exit block and the topmost parent loop
499 // exiting via LoopExitBB.
500 if (SE) {
501 if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
502 SE->forgetLoop(ExitL);
503 else
504 // Forget the entire nest as this exits the entire nest.
505 SE->forgetTopmostLoop(&L);
506 }
507
508 if (MSSAU && VerifyMemorySSA)
509 MSSAU->getMemorySSA()->verifyMemorySSA();
510
511 // Split the preheader, so that we know that there is a safe place to insert
512 // the conditional branch. We will change the preheader to have a conditional
513 // branch on LoopCond.
514 BasicBlock *OldPH = L.getLoopPreheader();
515 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
516
517 // Now that we have a place to insert the conditional branch, create a place
518 // to branch to: this is the exit block out of the loop that we are
519 // unswitching. We need to split this if there are other loop predecessors.
520 // Because the loop is in simplified form, *any* other predecessor is enough.
521 BasicBlock *UnswitchedBB;
522 if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
523 assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&((void)0)
524 "A branch's parent isn't a predecessor!")((void)0);
525 UnswitchedBB = LoopExitBB;
526 } else {
527 UnswitchedBB =
528 SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
529 }
530
531 if (MSSAU && VerifyMemorySSA)
532 MSSAU->getMemorySSA()->verifyMemorySSA();
533
534 // Actually move the invariant uses into the unswitched position. If possible,
535 // we do this by moving the instructions, but when doing partial unswitching
536 // we do it by building a new merge of the values in the unswitched position.
537 OldPH->getTerminator()->eraseFromParent();
538 if (FullUnswitch) {
539 // If fully unswitching, we can use the existing branch instruction.
540 // Splice it into the old PH to gate reaching the new preheader and re-point
541 // its successors.
542 OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
543 BI);
544 if (MSSAU) {
545 // Temporarily clone the terminator, to make MSSA update cheaper by
546 // separating "insert edge" updates from "remove edge" ones.
547 ParentBB->getInstList().push_back(BI.clone());
548 } else {
549 // Create a new unconditional branch that will continue the loop as a new
550 // terminator.
551 BranchInst::Create(ContinueBB, ParentBB);
552 }
553 BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
554 BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
555 } else {
556 // Only unswitching a subset of inputs to the condition, so we will need to
557 // build a new branch that merges the invariant inputs.
558 if (ExitDirection)
559 assert(match(BI.getCondition(), m_LogicalOr()) &&((void)0)
560 "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "((void)0)
561 "condition!")((void)0);
562 else
563 assert(match(BI.getCondition(), m_LogicalAnd()) &&((void)0)
564 "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"((void)0)
565 " condition!")((void)0);
566 buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
567 *UnswitchedBB, *NewPH);
568 }
569
570 // Update the dominator tree with the added edge.
571 DT.insertEdge(OldPH, UnswitchedBB);
572
573 // After the dominator tree was updated with the added edge, update MemorySSA
574 // if available.
575 if (MSSAU) {
576 SmallVector<CFGUpdate, 1> Updates;
577 Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
578 MSSAU->applyInsertUpdates(Updates, DT);
579 }
580
581 // Finish updating dominator tree and memory ssa for full unswitch.
582 if (FullUnswitch) {
583 if (MSSAU) {
584 // Remove the cloned branch instruction.
585 ParentBB->getTerminator()->eraseFromParent();
586 // Create unconditional branch now.
587 BranchInst::Create(ContinueBB, ParentBB);
588 MSSAU->removeEdge(ParentBB, LoopExitBB);
589 }
590 DT.deleteEdge(ParentBB, LoopExitBB);
591 }
592
593 if (MSSAU && VerifyMemorySSA)
594 MSSAU->getMemorySSA()->verifyMemorySSA();
595
596 // Rewrite the relevant PHI nodes.
597 if (UnswitchedBB == LoopExitBB)
598 rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
599 else
600 rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
601 *ParentBB, *OldPH, FullUnswitch);
602
603 // The constant we can replace all of our invariants with inside the loop
604 // body. If any of the invariants have a value other than this the loop won't
605 // be entered.
606 ConstantInt *Replacement = ExitDirection
607 ? ConstantInt::getFalse(BI.getContext())
608 : ConstantInt::getTrue(BI.getContext());
609
610 // Since this is an i1 condition we can also trivially replace uses of it
611 // within the loop with a constant.
612 for (Value *Invariant : Invariants)
613 replaceLoopInvariantUses(L, Invariant, *Replacement);
614
615 // If this was full unswitching, we may have changed the nesting relationship
616 // for this loop so hoist it to its correct parent if needed.
617 if (FullUnswitch)
618 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
619
620 if (MSSAU && VerifyMemorySSA)
621 MSSAU->getMemorySSA()->verifyMemorySSA();
622
623 LLVM_DEBUG(dbgs() << " done: unswitching trivial branch...\n")do { } while (false);
624 ++NumTrivial;
625 ++NumBranches;
626 return true;
627}
628
629/// Unswitch a trivial switch if the condition is loop invariant.
630///
631/// This routine should only be called when loop code leading to the switch has
632/// been validated as trivial (no side effects). This routine checks if the
633/// condition is invariant and that at least one of the successors is a loop
634/// exit. This allows us to unswitch without duplicating the loop, making it
635/// trivial.
636///
637/// If this routine fails to unswitch the switch it returns false.
638///
639/// If the switch can be unswitched, this routine splits the preheader and
640/// copies the switch above that split. If the default case is one of the
641/// exiting cases, it copies the non-exiting cases and points them at the new
642/// preheader. If the default case is not exiting, it copies the exiting cases
643/// and points the default at the preheader. It preserves loop simplified form
644/// (splitting the exit blocks as necessary). It simplifies the switch within
645/// the loop by removing now-dead cases. If the default case is one of those
646/// unswitched, it replaces its destination with a new basic block containing
647/// only unreachable. Such basic blocks, while technically loop exits, are not
648/// considered for unswitching so this is a stable transform and the same
649/// switch will not be revisited. If after unswitching there is only a single
650/// in-loop successor, the switch is further simplified to an unconditional
651/// branch. Still more cleanup can be done with some simplifycfg like pass.
652///
653/// If `SE` is not null, it will be updated based on the potential loop SCEVs
654/// invalidated by this.
655static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
656 LoopInfo &LI, ScalarEvolution *SE,
657 MemorySSAUpdater *MSSAU) {
658 LLVM_DEBUG(dbgs() << " Trying to unswitch switch: " << SI << "\n")do { } while (false);
659 Value *LoopCond = SI.getCondition();
660
661 // If this isn't switching on an invariant condition, we can't unswitch it.
662 if (!L.isLoopInvariant(LoopCond))
663 return false;
664
665 auto *ParentBB = SI.getParent();
666
667 // The same check must be used both for the default and the exit cases. We
668 // should never leave edges from the switch instruction to a basic block that
669 // we are unswitching, hence the condition used to determine the default case
670 // needs to also be used to populate ExitCaseIndices, which is then used to
671 // remove cases from the switch.
672 auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
673 // BBToCheck is not an exit block if it is inside loop L.
674 if (L.contains(&BBToCheck))
675 return false;
676 // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
677 if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
678 return false;
679 // We do not unswitch a block that only has an unreachable statement, as
680 // it's possible this is a previously unswitched block. Only unswitch if
681 // either the terminator is not unreachable, or, if it is, it's not the only
682 // instruction in the block.
683 auto *TI = BBToCheck.getTerminator();
684 bool isUnreachable = isa<UnreachableInst>(TI);
685 return !isUnreachable ||
686 (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
687 };
688
689 SmallVector<int, 4> ExitCaseIndices;
690 for (auto Case : SI.cases())
691 if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
692 ExitCaseIndices.push_back(Case.getCaseIndex());
693 BasicBlock *DefaultExitBB = nullptr;
694 SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
695 SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
696 if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
697 DefaultExitBB = SI.getDefaultDest();
698 } else if (ExitCaseIndices.empty())
699 return false;
700
701 LLVM_DEBUG(dbgs() << " unswitching trivial switch...\n")do { } while (false);
702
703 if (MSSAU && VerifyMemorySSA)
704 MSSAU->getMemorySSA()->verifyMemorySSA();
705
706 // We may need to invalidate SCEVs for the outermost loop reached by any of
707 // the exits.
708 Loop *OuterL = &L;
709
710 if (DefaultExitBB) {
711 // Clear out the default destination temporarily to allow accurate
712 // predecessor lists to be examined below.
713 SI.setDefaultDest(nullptr);
714 // Check the loop containing this exit.
715 Loop *ExitL = LI.getLoopFor(DefaultExitBB);
716 if (!ExitL || ExitL->contains(OuterL))
717 OuterL = ExitL;
718 }
719
720 // Store the exit cases into a separate data structure and remove them from
721 // the switch.
722 SmallVector<std::tuple<ConstantInt *, BasicBlock *,
723 SwitchInstProfUpdateWrapper::CaseWeightOpt>,
724 4> ExitCases;
725 ExitCases.reserve(ExitCaseIndices.size());
726 SwitchInstProfUpdateWrapper SIW(SI);
727 // We walk the case indices backwards so that we remove the last case first
728 // and don't disrupt the earlier indices.
729 for (unsigned Index : reverse(ExitCaseIndices)) {
730 auto CaseI = SI.case_begin() + Index;
731 // Compute the outer loop from this exit.
732 Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
733 if (!ExitL || ExitL->contains(OuterL))
734 OuterL = ExitL;
735 // Save the value of this case.
736 auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
737 ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
738 // Delete the unswitched cases.
739 SIW.removeCase(CaseI);
740 }
741
742 if (SE) {
743 if (OuterL)
744 SE->forgetLoop(OuterL);
745 else
746 SE->forgetTopmostLoop(&L);
747 }
748
749 // Check if after this all of the remaining cases point at the same
750 // successor.
751 BasicBlock *CommonSuccBB = nullptr;
752 if (SI.getNumCases() > 0 &&
753 all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
754 return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
755 }))
756 CommonSuccBB = SI.case_begin()->getCaseSuccessor();
757 if (!DefaultExitBB) {
758 // If we're not unswitching the default, we need it to match any cases to
759 // have a common successor or if we have no cases it is the common
760 // successor.
761 if (SI.getNumCases() == 0)
762 CommonSuccBB = SI.getDefaultDest();
763 else if (SI.getDefaultDest() != CommonSuccBB)
764 CommonSuccBB = nullptr;
765 }
766
767 // Split the preheader, so that we know that there is a safe place to insert
768 // the switch.
769 BasicBlock *OldPH = L.getLoopPreheader();
770 BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
771 OldPH->getTerminator()->eraseFromParent();
772
773 // Now add the unswitched switch.
774 auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
775 SwitchInstProfUpdateWrapper NewSIW(*NewSI);
776
777 // Rewrite the IR for the unswitched basic blocks. This requires two steps.
778 // First, we split any exit blocks with remaining in-loop predecessors. Then
779 // we update the PHIs in one of two ways depending on if there was a split.
780 // We walk in reverse so that we split in the same order as the cases
781 // appeared. This is purely for convenience of reading the resulting IR, but
782 // it doesn't cost anything really.
783 SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
784 SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
785 // Handle the default exit if necessary.
786 // FIXME: It'd be great if we could merge this with the loop below but LLVM's
787 // ranges aren't quite powerful enough yet.
788 if (DefaultExitBB) {
789 if (pred_empty(DefaultExitBB)) {
790 UnswitchedExitBBs.insert(DefaultExitBB);
791 rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
792 } else {
793 auto *SplitBB =
794 SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
795 rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
796 *ParentBB, *OldPH,
797 /*FullUnswitch*/ true);
798 DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
799 }
800 }
801 // Note that we must use a reference in the for loop so that we update the
802 // container.
803 for (auto &ExitCase : reverse(ExitCases)) {
804 // Grab a reference to the exit block in the pair so that we can update it.
805 BasicBlock *ExitBB = std::get<1>(ExitCase);
806
807 // If this case is the last edge into the exit block, we can simply reuse it
808 // as it will no longer be a loop exit. No mapping necessary.
809 if (pred_empty(ExitBB)) {
810 // Only rewrite once.
811 if (UnswitchedExitBBs.insert(ExitBB).second)
812 rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
813 continue;
814 }
815
816 // Otherwise we need to split the exit block so that we retain an exit
817 // block from the loop and a target for the unswitched condition.
818 BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
819 if (!SplitExitBB) {
820 // If this is the first time we see this, do the split and remember it.
821 SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
822 rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
823 *ParentBB, *OldPH,
824 /*FullUnswitch*/ true);
825 }
826 // Update the case pair to point to the split block.
827 std::get<1>(ExitCase) = SplitExitBB;
828 }
829
830 // Now add the unswitched cases. We do this in reverse order as we built them
831 // in reverse order.
832 for (auto &ExitCase : reverse(ExitCases)) {
833 ConstantInt *CaseVal = std::get<0>(ExitCase);
834 BasicBlock *UnswitchedBB = std::get<1>(ExitCase);
835
836 NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
837 }
838
839 // If the default was unswitched, re-point it and add explicit cases for
840 // entering the loop.
841 if (DefaultExitBB) {
842 NewSIW->setDefaultDest(DefaultExitBB);
843 NewSIW.setSuccessorWeight(0, DefaultCaseWeight);
844
845 // We removed all the exit cases, so we just copy the cases to the
846 // unswitched switch.
847 for (const auto &Case : SI.cases())
848 NewSIW.addCase(Case.getCaseValue(), NewPH,
849 SIW.getSuccessorWeight(Case.getSuccessorIndex()));
850 } else if (DefaultCaseWeight) {
851 // We have to set branch weight of the default case.
852 uint64_t SW = *DefaultCaseWeight;
853 for (const auto &Case : SI.cases()) {
854 auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
855 assert(W &&((void)0)
856 "case weight must be defined as default case weight is defined")((void)0);
857 SW += *W;
858 }
859 NewSIW.setSuccessorWeight(0, SW);
860 }
861
862 // If we ended up with a common successor for every path through the switch
863 // after unswitching, rewrite it to an unconditional branch to make it easy
864 // to recognize. Otherwise we potentially have to recognize the default case
865 // pointing at unreachable and other complexity.
866 if (CommonSuccBB) {
867 BasicBlock *BB = SI.getParent();
868 // We may have had multiple edges to this common successor block, so remove
869 // them as predecessors. We skip the first one, either the default or the
870 // actual first case.
871 bool SkippedFirst = DefaultExitBB == nullptr;
872 for (auto Case : SI.cases()) {
873 assert(Case.getCaseSuccessor() == CommonSuccBB &&((void)0)
874 "Non-common successor!")((void)0);
875 (void)Case;
876 if (!SkippedFirst) {
877 SkippedFirst = true;
878 continue;
879 }
880 CommonSuccBB->removePredecessor(BB,
881 /*KeepOneInputPHIs*/ true);
882 }
883 // Now nuke the switch and replace it with a direct branch.
884 SIW.eraseFromParent();
885 BranchInst::Create(CommonSuccBB, BB);
886 } else if (DefaultExitBB) {
887 assert(SI.getNumCases() > 0 &&((void)0)
888 "If we had no cases we'd have a common successor!")((void)0);
889 // Move the last case to the default successor. This is valid as if the
890 // default got unswitched it cannot be reached. This has the advantage of
891 // being simple and keeping the number of edges from this switch to
892 // successors the same, and avoiding any PHI update complexity.
893 auto LastCaseI = std::prev(SI.case_end());
894
895 SI.setDefaultDest(LastCaseI->getCaseSuccessor());
896 SIW.setSuccessorWeight(
897 0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
898 SIW.removeCase(LastCaseI);
899 }
900
901 // Walk the unswitched exit blocks and the unswitched split blocks and update
902 // the dominator tree based on the CFG edits. While we are walking unordered
903 // containers here, the API for applyUpdates takes an unordered list of
904 // updates and requires them to not contain duplicates.
905 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
906 for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
907 DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
908 DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
909 }
910 for (auto SplitUnswitchedPair : SplitExitBBMap) {
911 DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
912 DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
913 }
914
915 if (MSSAU) {
916 MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
917 if (VerifyMemorySSA)
918 MSSAU->getMemorySSA()->verifyMemorySSA();
919 } else {
920 DT.applyUpdates(DTUpdates);
921 }
922
923 assert(DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);
924
925 // We may have changed the nesting relationship for this loop so hoist it to
926 // its correct parent if needed.
927 hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);
928
929 if (MSSAU && VerifyMemorySSA)
930 MSSAU->getMemorySSA()->verifyMemorySSA();
931
932 ++NumTrivial;
933 ++NumSwitches;
934 LLVM_DEBUG(dbgs() << " done: unswitching trivial switch...\n")do { } while (false);
935 return true;
936}
937
938/// This routine scans the loop to find a branch or switch which occurs before
939/// any side effects occur. These can potentially be unswitched without
940/// duplicating the loop. If a branch or switch is successfully unswitched the
941/// scanning continues to see if subsequent branches or switches have become
942/// trivial. Once all trivial candidates have been unswitched, this routine
943/// returns.
944///
945/// The return value indicates whether anything was unswitched (and therefore
946/// changed).
947///
948/// If `SE` is not null, it will be updated based on the potential loop SCEVs
949/// invalidated by this.
950static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
951 LoopInfo &LI, ScalarEvolution *SE,
952 MemorySSAUpdater *MSSAU) {
953 bool Changed = false;
954
955 // If loop header has only one reachable successor we should keep looking for
956 // trivial condition candidates in the successor as well. An alternative is
957 // to constant fold conditions and merge successors into loop header (then we
958 // only need to check header's terminator). The reason for not doing this in
959 // LoopUnswitch pass is that it could potentially break LoopPassManager's
960 // invariants. Folding dead branches could either eliminate the current loop
961 // or make other loops unreachable. LCSSA form might also not be preserved
962 // after deleting branches. The following code keeps traversing loop header's
963 // successors until it finds the trivial condition candidate (condition that
964 // is not a constant). Since unswitching generates branches with constant
965 // conditions, this scenario could be very common in practice.
966 BasicBlock *CurrentBB = L.getHeader();
967 SmallPtrSet<BasicBlock *, 8> Visited;
968 Visited.insert(CurrentBB);
969 do {
970 // Check if there are any side-effecting instructions (e.g. stores, calls,
971 // volatile loads) in the part of the loop that the code *would* execute
972 // without unswitching.
973 if (MSSAU) // Possible early exit with MSSA
974 if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
975 if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
976 return Changed;
977 if (llvm::any_of(*CurrentBB,
978 [](Instruction &I) { return I.mayHaveSideEffects(); }))
979 return Changed;
980
981 Instruction *CurrentTerm = CurrentBB->getTerminator();
982
983 if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
984 // Don't bother trying to unswitch past a switch with a constant
985 // condition. This should be removed prior to running this pass by
986 // simplifycfg.
987 if (isa<Constant>(SI->getCondition()))
988 return Changed;
989
990 if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
991 // Couldn't unswitch this one so we're done.
992 return Changed;
993
994 // Mark that we managed to unswitch something.
995 Changed = true;
996
997 // If unswitching turned the terminator into an unconditional branch then
998 // we can continue. The unswitching logic specifically works to fold any
999 // cases it can into an unconditional branch to make it easier to
1000 // recognize here.
1001 auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
1002 if (!BI || BI->isConditional())
1003 return Changed;
1004
1005 CurrentBB = BI->getSuccessor(0);
1006 continue;
1007 }
1008
1009 auto *BI = dyn_cast<BranchInst>(CurrentTerm);
1010 if (!BI)
1011 // We do not understand other terminator instructions.
1012 return Changed;
1013
1014 // Don't bother trying to unswitch past an unconditional branch or a branch
1015 // with a constant value. These should be removed by simplifycfg prior to
1016 // running this pass.
1017 if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
1018 return Changed;
1019
1020 // Found a trivial condition candidate: non-foldable conditional branch. If
1021 // we fail to unswitch this, we can't do anything else that is trivial.
1022 if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
1023 return Changed;
1024
1025 // Mark that we managed to unswitch something.
1026 Changed = true;
1027
1028 // If we only unswitched some of the conditions feeding the branch, we won't
1029 // have collapsed it to a single successor.
1030 BI = cast<BranchInst>(CurrentBB->getTerminator());
1031 if (BI->isConditional())
1032 return Changed;
1033
1034 // Follow the newly unconditional branch into its successor.
1035 CurrentBB = BI->getSuccessor(0);
1036
1037 // When continuing, if we exit the loop or reach a previous visited block,
1038 // then we can not reach any trivial condition candidates (unfoldable
1039 // branch instructions or switch instructions) and no unswitch can happen.
1040 } while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);
1041
1042 return Changed;
1043}
1044
1045/// Build the cloned blocks for an unswitched copy of the given loop.
1046///
1047/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1048/// after the split block (`SplitBB`) that will be used to select between the
1049/// cloned and original loop.
1050///
1051/// This routine handles cloning all of the necessary loop blocks and exit
1052/// blocks including rewriting their instructions and the relevant PHI nodes.
1053/// Any loop blocks or exit blocks which are dominated by a different successor
1054/// than the one for this clone of the loop blocks can be trivially skipped. We
1055/// use the `DominatingSucc` map to determine whether a block satisfies that
1056/// property with a simple map lookup.
1057///
1058/// It also correctly creates the unconditional branch in the cloned
1059/// unswitched parent block to only point at the unswitched successor.
1060///
1061/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1062/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1063/// the cloned blocks (and their loops) are left without full `LoopInfo`
1064/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1065/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1066/// instead the caller must recompute an accurate DT. It *does* correctly
1067/// update the `AssumptionCache` provided in `AC`.
1068static BasicBlock *buildClonedLoopBlocks(
1069 Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
1070 ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
1071 BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
1072 const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
1073 ValueToValueMapTy &VMap,
1074 SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
1075 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
1076 SmallVector<BasicBlock *, 4> NewBlocks;
1077 NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());
1078
1079 // We will need to clone a bunch of blocks, wrap up the clone operation in
1080 // a helper.
1081 auto CloneBlock = [&](BasicBlock *OldBB) {
1082 // Clone the basic block and insert it before the new preheader.
1083 BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
1084 NewBB->moveBefore(LoopPH);
1085
1086 // Record this block and the mapping.
1087 NewBlocks.push_back(NewBB);
1088 VMap[OldBB] = NewBB;
1089
1090 return NewBB;
1091 };
1092
1093 // We skip cloning blocks when they have a dominating succ that is not the
1094 // succ we are cloning for.
1095 auto SkipBlock = [&](BasicBlock *BB) {
1096 auto It = DominatingSucc.find(BB);
1097 return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
1098 };
1099
1100 // First, clone the preheader.
1101 auto *ClonedPH = CloneBlock(LoopPH);
1102
1103 // Then clone all the loop blocks, skipping the ones that aren't necessary.
1104 for (auto *LoopBB : L.blocks())
1105 if (!SkipBlock(LoopBB))
1106 CloneBlock(LoopBB);
1107
1108 // Split all the loop exit edges so that when we clone the exit blocks, if
1109 // any of the exit blocks are *also* a preheader for some other loop, we
1110 // don't create multiple predecessors entering the loop header.
1111 for (auto *ExitBB : ExitBlocks) {
1112 if (SkipBlock(ExitBB))
1113 continue;
1114
1115 // When we are going to clone an exit, we don't need to clone all the
1116 // instructions in the exit block and we want to ensure we have an easy
1117 // place to merge the CFG, so split the exit first. This is always safe to
1118 // do because there cannot be any non-loop predecessors of a loop exit in
1119 // loop simplified form.
1120 auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
1121
1122 // Rearrange the names to make it easier to write test cases by having the
1123 // exit block carry the suffix rather than the merge block carrying the
1124 // suffix.
1125 MergeBB->takeName(ExitBB);
1126 ExitBB->setName(Twine(MergeBB->getName()) + ".split");
1127
1128 // Now clone the original exit block.
1129 auto *ClonedExitBB = CloneBlock(ExitBB);
1130 assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&((void)0)
1131 "Exit block should have been split to have one successor!")((void)0);
1132 assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&((void)0)
1133 "Cloned exit block has the wrong successor!")((void)0);
1134
1135 // Remap any cloned instructions and create a merge phi node for them.
1136 for (auto ZippedInsts : llvm::zip_first(
1137 llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
1138 llvm::make_range(ClonedExitBB->begin(),
1139 std::prev(ClonedExitBB->end())))) {
1140 Instruction &I = std::get<0>(ZippedInsts);
1141 Instruction &ClonedI = std::get<1>(ZippedInsts);
1142
1143 // The only instructions in the exit block should be PHI nodes and
1144 // potentially a landing pad.
1145 assert(((void)0)
1146 (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&((void)0)
1147 "Bad instruction in exit block!")((void)0);
1148 // We should have a value map between the instruction and its clone.
1149 assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")((void)0);
1150
1151 auto *MergePN =
1152 PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
1153 &*MergeBB->getFirstInsertionPt());
1154 I.replaceAllUsesWith(MergePN);
1155 MergePN->addIncoming(&I, ExitBB);
1156 MergePN->addIncoming(&ClonedI, ClonedExitBB);
1157 }
1158 }
1159
1160 // Rewrite the instructions in the cloned blocks to refer to the instructions
1161 // in the cloned blocks. We have to do this as a second pass so that we have
1162 // everything available. Also, we have inserted new instructions which may
1163 // include assume intrinsics, so we update the assumption cache while
1164 // processing this.
1165 for (auto *ClonedBB : NewBlocks)
1166 for (Instruction &I : *ClonedBB) {
1167 RemapInstruction(&I, VMap,
1168 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1169 if (auto *II = dyn_cast<AssumeInst>(&I))
1170 AC.registerAssumption(II);
1171 }
1172
1173 // Update any PHI nodes in the cloned successors of the skipped blocks to not
1174 // have spurious incoming values.
1175 for (auto *LoopBB : L.blocks())
1176 if (SkipBlock(LoopBB))
1177 for (auto *SuccBB : successors(LoopBB))
1178 if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
1179 for (PHINode &PN : ClonedSuccBB->phis())
1180 PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);
1181
1182 // Remove the cloned parent as a predecessor of any successor we ended up
1183 // cloning other than the unswitched one.
1184 auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
1185 for (auto *SuccBB : successors(ParentBB)) {
1186 if (SuccBB == UnswitchedSuccBB)
1187 continue;
1188
1189 auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
1190 if (!ClonedSuccBB)
1191 continue;
1192
1193 ClonedSuccBB->removePredecessor(ClonedParentBB,
1194 /*KeepOneInputPHIs*/ true);
1195 }
1196
1197 // Replace the cloned branch with an unconditional branch to the cloned
1198 // unswitched successor.
1199 auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
1200 Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
1201 // Trivial Simplification. If Terminator is a conditional branch and
1202 // condition becomes dead - erase it.
1203 Value *ClonedConditionToErase = nullptr;
1204 if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
1205 ClonedConditionToErase = BI->getCondition();
1206 else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
1207 ClonedConditionToErase = SI->getCondition();
1208
1209 ClonedTerminator->eraseFromParent();
1210 BranchInst::Create(ClonedSuccBB, ClonedParentBB);
1211
1212 if (ClonedConditionToErase)
1213 RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
1214 MSSAU);
1215
1216 // If there are duplicate entries in the PHI nodes because of multiple edges
1217 // to the unswitched successor, we need to nuke all but one as we replaced it
1218 // with a direct branch.
1219 for (PHINode &PN : ClonedSuccBB->phis()) {
1220 bool Found = false;
1221 // Loop over the incoming operands backwards so we can easily delete as we
1222 // go without invalidating the index.
1223 for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
1224 if (PN.getIncomingBlock(i) != ClonedParentBB)
1225 continue;
1226 if (!Found) {
1227 Found = true;
1228 continue;
1229 }
1230 PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
1231 }
1232 }
1233
1234 // Record the domtree updates for the new blocks.
1235 SmallPtrSet<BasicBlock *, 4> SuccSet;
1236 for (auto *ClonedBB : NewBlocks) {
1237 for (auto *SuccBB : successors(ClonedBB))
1238 if (SuccSet.insert(SuccBB).second)
1239 DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
1240 SuccSet.clear();
1241 }
1242
1243 return ClonedPH;
1244}
1245
1246/// Recursively clone the specified loop and all of its children.
1247///
1248/// The target parent loop for the clone should be provided, or can be null if
1249/// the clone is a top-level loop. While cloning, all the blocks are mapped
1250/// with the provided value map. The entire original loop must be present in
1251/// the value map. The cloned loop is returned.
1252static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
1253 const ValueToValueMapTy &VMap, LoopInfo &LI) {
1254 auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
1255 assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")((void)0);
1256 ClonedL.reserveBlocks(OrigL.getNumBlocks());
1257 for (auto *BB : OrigL.blocks()) {
1258 auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
1259 ClonedL.addBlockEntry(ClonedBB);
1260 if (LI.getLoopFor(BB) == &OrigL)
1261 LI.changeLoopFor(ClonedBB, &ClonedL);
1262 }
1263 };
1264
1265 // We specially handle the first loop because it may get cloned into
1266 // a different parent and because we most commonly are cloning leaf loops.
1267 Loop *ClonedRootL = LI.AllocateLoop();
1268 if (RootParentL)
1269 RootParentL->addChildLoop(ClonedRootL);
1270 else
1271 LI.addTopLevelLoop(ClonedRootL);
1272 AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);
1273
1274 if (OrigRootL.isInnermost())
1275 return ClonedRootL;
1276
1277 // If we have a nest, we can quickly clone the entire loop nest using an
1278 // iterative approach because it is a tree. We keep the cloned parent in the
1279 // data structure to avoid repeatedly querying through a map to find it.
1280 SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
1281 // Build up the loops to clone in reverse order as we'll clone them from the
1282 // back.
1283 for (Loop *ChildL : llvm::reverse(OrigRootL))
1284 LoopsToClone.push_back({ClonedRootL, ChildL});
1285 do {
1286 Loop *ClonedParentL, *L;
1287 std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
1288 Loop *ClonedL = LI.AllocateLoop();
1289 ClonedParentL->addChildLoop(ClonedL);
1290 AddClonedBlocksToLoop(*L, *ClonedL);
1291 for (Loop *ChildL : llvm::reverse(*L))
1292 LoopsToClone.push_back({ClonedL, ChildL});
1293 } while (!LoopsToClone.empty());
1294
1295 return ClonedRootL;
1296}
1297
1298/// Build the cloned loops of an original loop from unswitching.
1299///
1300/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1301/// operation. We need to re-verify that there even is a loop (as the backedge
1302/// may not have been cloned), and even if there are remaining backedges the
1303/// backedge set may be different. However, we know that each child loop is
1304/// undisturbed, we only need to find where to place each child loop within
1305/// either any parent loop or within a cloned version of the original loop.
1306///
1307/// Because child loops may end up cloned outside of any cloned version of the
1308/// original loop, multiple cloned sibling loops may be created. All of them
1309/// are returned so that the newly introduced loop nest roots can be
1310/// identified.
1311static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
1312 const ValueToValueMapTy &VMap, LoopInfo &LI,
1313 SmallVectorImpl<Loop *> &NonChildClonedLoops) {
1314 Loop *ClonedL = nullptr;
1315
1316 auto *OrigPH = OrigL.getLoopPreheader();
1317 auto *OrigHeader = OrigL.getHeader();
1318
1319 auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
1320 auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));
1321
1322 // We need to know the loops of the cloned exit blocks to even compute the
1323 // accurate parent loop. If we only clone exits to some parent of the
1324 // original parent, we want to clone into that outer loop. We also keep track
1325 // of the loops that our cloned exit blocks participate in.
1326 Loop *ParentL = nullptr;
1327 SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
1328 SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
1329 ClonedExitsInLoops.reserve(ExitBlocks.size());
1330 for (auto *ExitBB : ExitBlocks)
1331 if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
1332 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1333 ExitLoopMap[ClonedExitBB] = ExitL;
1334 ClonedExitsInLoops.push_back(ClonedExitBB);
1335 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1336 ParentL = ExitL;
1337 }
1338 assert((!ParentL || ParentL == OrigL.getParentLoop() ||((void)0)
1339 ParentL->contains(OrigL.getParentLoop())) &&((void)0)
1340 "The computed parent loop should always contain (or be) the parent of "((void)0)
1341 "the original loop.")((void)0);
1342
1343 // We build the set of blocks dominated by the cloned header from the set of
1344 // cloned blocks out of the original loop. While not all of these will
1345 // necessarily be in the cloned loop, it is enough to establish that they
1346 // aren't in unreachable cycles, etc.
1347 SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
1348 for (auto *BB : OrigL.blocks())
1349 if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
1350 ClonedLoopBlocks.insert(ClonedBB);
1351
1352 // Rebuild the set of blocks that will end up in the cloned loop. We may have
1353 // skipped cloning some region of this loop which can in turn skip some of
1354 // the backedges so we have to rebuild the blocks in the loop based on the
1355 // backedges that remain after cloning.
1356 SmallVector<BasicBlock *, 16> Worklist;
1357 SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
1358 for (auto *Pred : predecessors(ClonedHeader)) {
1359 // The only possible non-loop header predecessor is the preheader because
1360 // we know we cloned the loop in simplified form.
1361 if (Pred == ClonedPH)
1362 continue;
1363
1364 // Because the loop was in simplified form, the only non-loop predecessor
1365 // should be the preheader.
1366 assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "((void)0)
1367 "header other than the preheader "((void)0)
1368 "that is not part of the loop!")((void)0);
1369
1370 // Insert this block into the loop set and on the first visit (and if it
1371 // isn't the header we're currently walking) put it into the worklist to
1372 // recurse through.
1373 if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
1374 Worklist.push_back(Pred);
1375 }
1376
1377 // If we had any backedges then there *is* a cloned loop. Put the header into
1378 // the loop set and then walk the worklist backwards to find all the blocks
1379 // that remain within the loop after cloning.
1380 if (!BlocksInClonedLoop.empty()) {
1381 BlocksInClonedLoop.insert(ClonedHeader);
1382
1383 while (!Worklist.empty()) {
1384 BasicBlock *BB = Worklist.pop_back_val();
1385 assert(BlocksInClonedLoop.count(BB) &&((void)0)
1386 "Didn't put block into the loop set!")((void)0);
1387
1388 // Insert any predecessors that are in the possible set into the cloned
1389 // set, and if the insert is successful, add them to the worklist. Note
1390 // that we filter on the blocks that are definitely reachable via the
1391 // backedge to the loop header so we may prune out dead code within the
1392 // cloned loop.
1393 for (auto *Pred : predecessors(BB))
1394 if (ClonedLoopBlocks.count(Pred) &&
1395 BlocksInClonedLoop.insert(Pred).second)
1396 Worklist.push_back(Pred);
1397 }
1398
1399 ClonedL = LI.AllocateLoop();
1400 if (ParentL) {
1401 ParentL->addBasicBlockToLoop(ClonedPH, LI);
1402 ParentL->addChildLoop(ClonedL);
1403 } else {
1404 LI.addTopLevelLoop(ClonedL);
1405 }
1406 NonChildClonedLoops.push_back(ClonedL);
1407
1408 ClonedL->reserveBlocks(BlocksInClonedLoop.size());
1409 // We don't want to just add the cloned loop blocks based on how we
1410 // discovered them. The original order of blocks was carefully built in
1411 // a way that doesn't rely on predecessor ordering. Rather than re-invent
1412 // that logic, we just re-walk the original blocks (and those of the child
1413 // loops) and filter them as we add them into the cloned loop.
1414 for (auto *BB : OrigL.blocks()) {
1415 auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
1416 if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
1417 continue;
1418
1419 // Directly add the blocks that are only in this loop.
1420 if (LI.getLoopFor(BB) == &OrigL) {
1421 ClonedL->addBasicBlockToLoop(ClonedBB, LI);
1422 continue;
1423 }
1424
1425 // We want to manually add it to this loop and parents.
1426 // Registering it with LoopInfo will happen when we clone the top
1427 // loop for this block.
1428 for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
1429 PL->addBlockEntry(ClonedBB);
1430 }
1431
1432 // Now add each child loop whose header remains within the cloned loop. All
1433 // of the blocks within the loop must satisfy the same constraints as the
1434 // header so once we pass the header checks we can just clone the entire
1435 // child loop nest.
1436 for (Loop *ChildL : OrigL) {
1437 auto *ClonedChildHeader =
1438 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1439 if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
1440 continue;
1441
1442#ifndef NDEBUG1
1443 // We should never have a cloned child loop header but fail to have
1444 // all of the blocks for that child loop.
1445 for (auto *ChildLoopBB : ChildL->blocks())
1446 assert(BlocksInClonedLoop.count(((void)0)
1447 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&((void)0)
1448 "Child cloned loop has a header within the cloned outer "((void)0)
1449 "loop but not all of its blocks!")((void)0);
1450#endif
1451
1452 cloneLoopNest(*ChildL, ClonedL, VMap, LI);
1453 }
1454 }
1455
1456 // Now that we've handled all the components of the original loop that were
1457 // cloned into a new loop, we still need to handle anything from the original
1458 // loop that wasn't in a cloned loop.
1459
1460 // Figure out what blocks are left to place within any loop nest containing
1461 // the unswitched loop. If we never formed a loop, the cloned PH is one of
1462 // them.
1463 SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
1464 if (BlocksInClonedLoop.empty())
1465 UnloopedBlockSet.insert(ClonedPH);
1466 for (auto *ClonedBB : ClonedLoopBlocks)
1467 if (!BlocksInClonedLoop.count(ClonedBB))
1468 UnloopedBlockSet.insert(ClonedBB);
1469
1470 // Copy the cloned exits and sort them in ascending loop depth, we'll work
1471 // backwards across these to process them inside out. The order shouldn't
1472 // matter as we're just trying to build up the map from inside-out; we use
1473 // the map in a more stably ordered way below.
1474 auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
1475 llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1476 return ExitLoopMap.lookup(LHS)->getLoopDepth() <
1477 ExitLoopMap.lookup(RHS)->getLoopDepth();
1478 });
1479
1480 // Populate the existing ExitLoopMap with everything reachable from each
1481 // exit, starting from the inner most exit.
1482 while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
1483 assert(Worklist.empty() && "Didn't clear worklist!")((void)0);
1484
1485 BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
1486 Loop *ExitL = ExitLoopMap.lookup(ExitBB);
1487
1488 // Walk the CFG back until we hit the cloned PH adding everything reachable
1489 // and in the unlooped set to this exit block's loop.
1490 Worklist.push_back(ExitBB);
1491 do {
1492 BasicBlock *BB = Worklist.pop_back_val();
1493 // We can stop recursing at the cloned preheader (if we get there).
1494 if (BB == ClonedPH)
1495 continue;
1496
1497 for (BasicBlock *PredBB : predecessors(BB)) {
1498 // If this pred has already been moved to our set or is part of some
1499 // (inner) loop, no update needed.
1500 if (!UnloopedBlockSet.erase(PredBB)) {
1501 assert(((void)0)
1502 (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&((void)0)
1503 "Predecessor not mapped to a loop!")((void)0);
1504 continue;
1505 }
1506
1507 // We just insert into the loop set here. We'll add these blocks to the
1508 // exit loop after we build up the set in an order that doesn't rely on
1509 // predecessor order (which in turn relies on use list order).
1510 bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
1511 (void)Inserted;
1512 assert(Inserted && "Should only visit an unlooped block once!")((void)0);
1513
1514 // And recurse through to its predecessors.
1515 Worklist.push_back(PredBB);
1516 }
1517 } while (!Worklist.empty());
1518 }
1519
1520 // Now that the ExitLoopMap gives as mapping for all the non-looping cloned
1521 // blocks to their outer loops, walk the cloned blocks and the cloned exits
1522 // in their original order adding them to the correct loop.
1523
1524 // We need a stable insertion order. We use the order of the original loop
1525 // order and map into the correct parent loop.
1526 for (auto *BB : llvm::concat<BasicBlock *const>(
1527 makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
1528 if (Loop *OuterL = ExitLoopMap.lookup(BB))
1529 OuterL->addBasicBlockToLoop(BB, LI);
1530
1531#ifndef NDEBUG1
1532 for (auto &BBAndL : ExitLoopMap) {
1533 auto *BB = BBAndL.first;
1534 auto *OuterL = BBAndL.second;
1535 assert(LI.getLoopFor(BB) == OuterL &&((void)0)
1536 "Failed to put all blocks into outer loops!")((void)0);
1537 }
1538#endif
1539
1540 // Now that all the blocks are placed into the correct containing loop in the
1541 // absence of child loops, find all the potentially cloned child loops and
1542 // clone them into whatever outer loop we placed their header into.
1543 for (Loop *ChildL : OrigL) {
1544 auto *ClonedChildHeader =
1545 cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
1546 if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
1547 continue;
1548
1549#ifndef NDEBUG1
1550 for (auto *ChildLoopBB : ChildL->blocks())
1551 assert(VMap.count(ChildLoopBB) &&((void)0)
1552 "Cloned a child loop header but not all of that loops blocks!")((void)0);
1553#endif
1554
1555 NonChildClonedLoops.push_back(cloneLoopNest(
1556 *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
1557 }
1558}
1559
1560static void
1561deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1562 ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
1563 DominatorTree &DT, MemorySSAUpdater *MSSAU) {
1564 // Find all the dead clones, and remove them from their successors.
1565 SmallVector<BasicBlock *, 16> DeadBlocks;
1566 for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
1567 for (auto &VMap : VMaps)
1568 if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
1569 if (!DT.isReachableFromEntry(ClonedBB)) {
1570 for (BasicBlock *SuccBB : successors(ClonedBB))
1571 SuccBB->removePredecessor(ClonedBB);
1572 DeadBlocks.push_back(ClonedBB);
1573 }
1574
1575 // Remove all MemorySSA in the dead blocks
1576 if (MSSAU) {
1577 SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
1578 DeadBlocks.end());
1579 MSSAU->removeBlocks(DeadBlockSet);
1580 }
1581
1582 // Drop any remaining references to break cycles.
1583 for (BasicBlock *BB : DeadBlocks)
1584 BB->dropAllReferences();
1585 // Erase them from the IR.
1586 for (BasicBlock *BB : DeadBlocks)
1587 BB->eraseFromParent();
1588}
1589
1590static void
1591deleteDeadBlocksFromLoop(Loop &L,
1592 SmallVectorImpl<BasicBlock *> &ExitBlocks,
1593 DominatorTree &DT, LoopInfo &LI,
1594 MemorySSAUpdater *MSSAU,
1595 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
1596 // Find all the dead blocks tied to this loop, and remove them from their
1597 // successors.
1598 SmallSetVector<BasicBlock *, 8> DeadBlockSet;
1599
1600 // Start with loop/exit blocks and get a transitive closure of reachable dead
1601 // blocks.
1602 SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
1603 ExitBlocks.end());
1604 DeathCandidates.append(L.blocks().begin(), L.blocks().end());
1605 while (!DeathCandidates.empty()) {
1606 auto *BB = DeathCandidates.pop_back_val();
1607 if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
1608 for (BasicBlock *SuccBB : successors(BB)) {
1609 SuccBB->removePredecessor(BB);
1610 DeathCandidates.push_back(SuccBB);
1611 }
1612 DeadBlockSet.insert(BB);
1613 }
1614 }
1615
1616 // Remove all MemorySSA in the dead blocks
1617 if (MSSAU)
1618 MSSAU->removeBlocks(DeadBlockSet);
1619
1620 // Filter out the dead blocks from the exit blocks list so that it can be
1621 // used in the caller.
1622 llvm::erase_if(ExitBlocks,
1623 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1624
1625 // Walk from this loop up through its parents removing all of the dead blocks.
1626 for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
1627 for (auto *BB : DeadBlockSet)
1628 ParentL->getBlocksSet().erase(BB);
1629 llvm::erase_if(ParentL->getBlocksVector(),
1630 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
1631 }
1632
1633 // Now delete the dead child loops. This raw delete will clear them
1634 // recursively.
1635 llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
1636 if (!DeadBlockSet.count(ChildL->getHeader()))
1637 return false;
1638
1639 assert(llvm::all_of(ChildL->blocks(),((void)0)
1640 [&](BasicBlock *ChildBB) {((void)0)
1641 return DeadBlockSet.count(ChildBB);((void)0)
1642 }) &&((void)0)
1643 "If the child loop header is dead all blocks in the child loop must "((void)0)
1644 "be dead as well!")((void)0);
1645 DestroyLoopCB(*ChildL, ChildL->getName());
1646 LI.destroy(ChildL);
1647 return true;
1648 });
1649
1650 // Remove the loop mappings for the dead blocks and drop all the references
1651 // from these blocks to others to handle cyclic references as we start
1652 // deleting the blocks themselves.
1653 for (auto *BB : DeadBlockSet) {
1654 // Check that the dominator tree has already been updated.
1655 assert(!DT.getNode(BB) && "Should already have cleared domtree!")((void)0);
1656 LI.changeLoopFor(BB, nullptr);
1657 // Drop all uses of the instructions to make sure we won't have dangling
1658 // uses in other blocks.
1659 for (auto &I : *BB)
1660 if (!I.use_empty())
1661 I.replaceAllUsesWith(UndefValue::get(I.getType()));
1662 BB->dropAllReferences();
1663 }
1664
1665 // Actually delete the blocks now that they've been fully unhooked from the
1666 // IR.
1667 for (auto *BB : DeadBlockSet)
1668 BB->eraseFromParent();
1669}
1670
1671/// Recompute the set of blocks in a loop after unswitching.
1672///
1673/// This walks from the original headers predecessors to rebuild the loop. We
1674/// take advantage of the fact that new blocks can't have been added, and so we
1675/// filter by the original loop's blocks. This also handles potentially
1676/// unreachable code that we don't want to explore but might be found examining
1677/// the predecessors of the header.
1678///
1679/// If the original loop is no longer a loop, this will return an empty set. If
1680/// it remains a loop, all the blocks within it will be added to the set
1681/// (including those blocks in inner loops).
1682static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
1683 LoopInfo &LI) {
1684 SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;
1685
1686 auto *PH = L.getLoopPreheader();
1687 auto *Header = L.getHeader();
1688
1689 // A worklist to use while walking backwards from the header.
1690 SmallVector<BasicBlock *, 16> Worklist;
1691
1692 // First walk the predecessors of the header to find the backedges. This will
1693 // form the basis of our walk.
1694 for (auto *Pred : predecessors(Header)) {
1695 // Skip the preheader.
1696 if (Pred == PH)
1697 continue;
1698
1699 // Because the loop was in simplified form, the only non-loop predecessor
1700 // is the preheader.
1701 assert(L.contains(Pred) && "Found a predecessor of the loop header other "((void)0)
1702 "than the preheader that is not part of the "((void)0)
1703 "loop!")((void)0);
1704
1705 // Insert this block into the loop set and on the first visit and, if it
1706 // isn't the header we're currently walking, put it into the worklist to
1707 // recurse through.
1708 if (LoopBlockSet.insert(Pred).second && Pred != Header)
1709 Worklist.push_back(Pred);
1710 }
1711
1712 // If no backedges were found, we're done.
1713 if (LoopBlockSet.empty())
1714 return LoopBlockSet;
1715
1716 // We found backedges, recurse through them to identify the loop blocks.
1717 while (!Worklist.empty()) {
1718 BasicBlock *BB = Worklist.pop_back_val();
1719 assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")((void)0);
1720
1721 // No need to walk past the header.
1722 if (BB == Header)
1723 continue;
1724
1725 // Because we know the inner loop structure remains valid we can use the
1726 // loop structure to jump immediately across the entire nested loop.
1727 // Further, because it is in loop simplified form, we can directly jump
1728 // to its preheader afterward.
1729 if (Loop *InnerL = LI.getLoopFor(BB))
1730 if (InnerL != &L) {
1731 assert(L.contains(InnerL) &&((void)0)
1732 "Should not reach a loop *outside* this loop!")((void)0);
1733 // The preheader is the only possible predecessor of the loop so
1734 // insert it into the set and check whether it was already handled.
1735 auto *InnerPH = InnerL->getLoopPreheader();
1736 assert(L.contains(InnerPH) && "Cannot contain an inner loop block "((void)0)
1737 "but not contain the inner loop "((void)0)
1738 "preheader!")((void)0);
1739 if (!LoopBlockSet.insert(InnerPH).second)
1740 // The only way to reach the preheader is through the loop body
1741 // itself so if it has been visited the loop is already handled.
1742 continue;
1743
1744 // Insert all of the blocks (other than those already present) into
1745 // the loop set. We expect at least the block that led us to find the
1746 // inner loop to be in the block set, but we may also have other loop
1747 // blocks if they were already enqueued as predecessors of some other
1748 // outer loop block.
1749 for (auto *InnerBB : InnerL->blocks()) {
1750 if (InnerBB == BB) {
1751 assert(LoopBlockSet.count(InnerBB) &&((void)0)
1752 "Block should already be in the set!")((void)0);
1753 continue;
1754 }
1755
1756 LoopBlockSet.insert(InnerBB);
1757 }
1758
1759 // Add the preheader to the worklist so we will continue past the
1760 // loop body.
1761 Worklist.push_back(InnerPH);
1762 continue;
1763 }
1764
1765 // Insert any predecessors that were in the original loop into the new
1766 // set, and if the insert is successful, add them to the worklist.
1767 for (auto *Pred : predecessors(BB))
1768 if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
1769 Worklist.push_back(Pred);
1770 }
1771
1772 assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")((void)0);
1773
1774 // We've found all the blocks participating in the loop, return our completed
1775 // set.
1776 return LoopBlockSet;
1777}
1778
1779/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1780///
1781/// The removal may have removed some child loops entirely but cannot have
1782/// disturbed any remaining child loops. However, they may need to be hoisted
1783/// to the parent loop (or to be top-level loops). The original loop may be
1784/// completely removed.
1785///
1786/// The sibling loops resulting from this update are returned. If the original
1787/// loop remains a valid loop, it will be the first entry in this list with all
1788/// of the newly sibling loops following it.
1789///
1790/// Returns true if the loop remains a loop after unswitching, and false if it
1791/// is no longer a loop after unswitching (and should not continue to be
1792/// referenced).
1793static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
1794 LoopInfo &LI,
1795 SmallVectorImpl<Loop *> &HoistedLoops) {
1796 auto *PH = L.getLoopPreheader();
1797
1798 // Compute the actual parent loop from the exit blocks. Because we may have
1799 // pruned some exits the loop may be different from the original parent.
1800 Loop *ParentL = nullptr;
1801 SmallVector<Loop *, 4> ExitLoops;
1802 SmallVector<BasicBlock *, 4> ExitsInLoops;
1803 ExitsInLoops.reserve(ExitBlocks.size());
1804 for (auto *ExitBB : ExitBlocks)
1805 if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
1806 ExitLoops.push_back(ExitL);
1807 ExitsInLoops.push_back(ExitBB);
1808 if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
1809 ParentL = ExitL;
1810 }
1811
1812 // Recompute the blocks participating in this loop. This may be empty if it
1813 // is no longer a loop.
1814 auto LoopBlockSet = recomputeLoopBlockSet(L, LI);
1815
1816 // If we still have a loop, we need to re-set the loop's parent as the exit
1817 // block set changing may have moved it within the loop nest. Note that this
1818 // can only happen when this loop has a parent as it can only hoist the loop
1819 // *up* the nest.
1820 if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
1821 // Remove this loop's (original) blocks from all of the intervening loops.
1822 for (Loop *IL = L.getParentLoop(); IL != ParentL;
1823 IL = IL->getParentLoop()) {
1824 IL->getBlocksSet().erase(PH);
1825 for (auto *BB : L.blocks())
1826 IL->getBlocksSet().erase(BB);
1827 llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
1828 return BB == PH || L.contains(BB);
1829 });
1830 }
1831
1832 LI.changeLoopFor(PH, ParentL);
1833 L.getParentLoop()->removeChildLoop(&L);
1834 if (ParentL)
1835 ParentL->addChildLoop(&L);
1836 else
1837 LI.addTopLevelLoop(&L);
1838 }
1839
1840 // Now we update all the blocks which are no longer within the loop.
1841 auto &Blocks = L.getBlocksVector();
1842 auto BlocksSplitI =
1843 LoopBlockSet.empty()
1844 ? Blocks.begin()
1845 : std::stable_partition(
1846 Blocks.begin(), Blocks.end(),
1847 [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });
1848
1849 // Before we erase the list of unlooped blocks, build a set of them.
1850 SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
1851 if (LoopBlockSet.empty())
1852 UnloopedBlocks.insert(PH);
1853
1854 // Now erase these blocks from the loop.
1855 for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
1856 L.getBlocksSet().erase(BB);
1857 Blocks.erase(BlocksSplitI, Blocks.end());
1858
1859 // Sort the exits in ascending loop depth, we'll work backwards across these
1860 // to process them inside out.
1861 llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
1862 return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
1863 });
1864
1865 // We'll build up a set for each exit loop.
1866 SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
1867 Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.
1868
1869 auto RemoveUnloopedBlocksFromLoop =
1870 [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
1871 for (auto *BB : UnloopedBlocks)
1872 L.getBlocksSet().erase(BB);
1873 llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
1874 return UnloopedBlocks.count(BB);
1875 });
1876 };
1877
1878 SmallVector<BasicBlock *, 16> Worklist;
1879 while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
1880 assert(Worklist.empty() && "Didn't clear worklist!")((void)0);
1881 assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")((void)0);
1882
1883 // Grab the next exit block, in decreasing loop depth order.
1884 BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
1885 Loop &ExitL = *LI.getLoopFor(ExitBB);
1886 assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")((void)0);
1887
1888 // Erase all of the unlooped blocks from the loops between the previous
1889 // exit loop and this exit loop. This works because the ExitInLoops list is
1890 // sorted in increasing order of loop depth and thus we visit loops in
1891 // decreasing order of loop depth.
1892 for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
1893 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1894
1895 // Walk the CFG back until we hit the cloned PH adding everything reachable
1896 // and in the unlooped set to this exit block's loop.
1897 Worklist.push_back(ExitBB);
1898 do {
1899 BasicBlock *BB = Worklist.pop_back_val();
1900 // We can stop recursing at the cloned preheader (if we get there).
1901 if (BB == PH)
1902 continue;
1903
1904 for (BasicBlock *PredBB : predecessors(BB)) {
1905 // If this pred has already been moved to our set or is part of some
1906 // (inner) loop, no update needed.
1907 if (!UnloopedBlocks.erase(PredBB)) {
1908 assert((NewExitLoopBlocks.count(PredBB) ||((void)0)
1909 ExitL.contains(LI.getLoopFor(PredBB))) &&((void)0)
1910 "Predecessor not in a nested loop (or already visited)!")((void)0);
1911 continue;
1912 }
1913
1914 // We just insert into the loop set here. We'll add these blocks to the
1915 // exit loop after we build up the set in a deterministic order rather
1916 // than the predecessor-influenced visit order.
1917 bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
1918 (void)Inserted;
1919 assert(Inserted && "Should only visit an unlooped block once!")((void)0);
1920
1921 // And recurse through to its predecessors.
1922 Worklist.push_back(PredBB);
1923 }
1924 } while (!Worklist.empty());
1925
1926 // If blocks in this exit loop were directly part of the original loop (as
1927 // opposed to a child loop) update the map to point to this exit loop. This
1928 // just updates a map and so the fact that the order is unstable is fine.
1929 for (auto *BB : NewExitLoopBlocks)
1930 if (Loop *BBL = LI.getLoopFor(BB))
1931 if (BBL == &L || !L.contains(BBL))
1932 LI.changeLoopFor(BB, &ExitL);
1933
1934 // We will remove the remaining unlooped blocks from this loop in the next
1935 // iteration or below.
1936 NewExitLoopBlocks.clear();
1937 }
1938
1939 // Any remaining unlooped blocks are no longer part of any loop unless they
1940 // are part of some child loop.
1941 for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
1942 RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
1943 for (auto *BB : UnloopedBlocks)
1944 if (Loop *BBL = LI.getLoopFor(BB))
1945 if (BBL == &L || !L.contains(BBL))
1946 LI.changeLoopFor(BB, nullptr);
1947
1948 // Sink all the child loops whose headers are no longer in the loop set to
1949 // the parent (or to be top level loops). We reach into the loop and directly
1950 // update its subloop vector to make this batch update efficient.
1951 auto &SubLoops = L.getSubLoopsVector();
1952 auto SubLoopsSplitI =
1953 LoopBlockSet.empty()
1954 ? SubLoops.begin()
1955 : std::stable_partition(
1956 SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
1957 return LoopBlockSet.count(SubL->getHeader());
1958 });
1959 for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
1960 HoistedLoops.push_back(HoistedL);
1961 HoistedL->setParentLoop(nullptr);
1962
1963 // To compute the new parent of this hoisted loop we look at where we
1964 // placed the preheader above. We can't lookup the header itself because we
1965 // retained the mapping from the header to the hoisted loop. But the
1966 // preheader and header should have the exact same new parent computed
1967 // based on the set of exit blocks from the original loop as the preheader
1968 // is a predecessor of the header and so reached in the reverse walk. And
1969 // because the loops were all in simplified form the preheader of the
1970 // hoisted loop can't be part of some *other* loop.
1971 if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
1972 NewParentL->addChildLoop(HoistedL);
1973 else
1974 LI.addTopLevelLoop(HoistedL);
1975 }
1976 SubLoops.erase(SubLoopsSplitI, SubLoops.end());
1977
1978 // Actually delete the loop if nothing remained within it.
1979 if (Blocks.empty()) {
1980 assert(SubLoops.empty() &&((void)0)
1981 "Failed to remove all subloops from the original loop!")((void)0);
1982 if (Loop *ParentL = L.getParentLoop())
1983 ParentL->removeChildLoop(llvm::find(*ParentL, &L));
1984 else
1985 LI.removeLoop(llvm::find(LI, &L));
1986 // markLoopAsDeleted for L should be triggered by the caller (it is typically
1987 // done by using the UnswitchCB callback).
1988 LI.destroy(&L);
1989 return false;
1990 }
1991
1992 return true;
1993}
1994
1995/// Helper to visit a dominator subtree, invoking a callable on each node.
1996///
1997/// Returning false at any point will stop walking past that node of the tree.
1998template <typename CallableT>
1999void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
2000 SmallVector<DomTreeNode *, 4> DomWorklist;
2001 DomWorklist.push_back(DT[BB]);
2002#ifndef NDEBUG1
2003 SmallPtrSet<DomTreeNode *, 4> Visited;
2004 Visited.insert(DT[BB]);
2005#endif
2006 do {
2007 DomTreeNode *N = DomWorklist.pop_back_val();
2008
2009 // Visit this node.
2010 if (!Callable(N->getBlock()))
2011 continue;
2012
2013 // Accumulate the child nodes.
2014 for (DomTreeNode *ChildN : *N) {
2015 assert(Visited.insert(ChildN).second &&((void)0)
2016 "Cannot visit a node twice when walking a tree!")((void)0);
2017 DomWorklist.push_back(ChildN);
2018 }
2019 } while (!DomWorklist.empty());
2020}
2021
2022static void unswitchNontrivialInvariants(
2023 Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
2024 SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
2025 DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2026 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2027 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2028 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2029 auto *ParentBB = TI.getParent();
2030 BranchInst *BI = dyn_cast<BranchInst>(&TI);
2031 SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);
2032
2033 // We can only unswitch switches, conditional branches with an invariant
2034 // condition, or combining invariant conditions with an instruction or
2035 // partially invariant instructions.
2036 assert((SI || (BI && BI->isConditional())) &&((void)0)
2037 "Can only unswitch switches and conditional branch!")((void)0);
2038 bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
2039 bool FullUnswitch =
2040 SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
2041 if (FullUnswitch)
2042 assert(Invariants.size() == 1 &&((void)0)
2043 "Cannot have other invariants with full unswitching!")((void)0);
2044 else
2045 assert(isa<Instruction>(BI->getCondition()) &&((void)0)
2046 "Partial unswitching requires an instruction as the condition!")((void)0);
2047
2048 if (MSSAU && VerifyMemorySSA)
2049 MSSAU->getMemorySSA()->verifyMemorySSA();
2050
2051 // Constant and BBs tracking the cloned and continuing successor. When we are
2052 // unswitching the entire condition, this can just be trivially chosen to
2053 // unswitch towards `true`. However, when we are unswitching a set of
2054 // invariants combined with `and` or `or` or partially invariant instructions,
2055 // the combining operation determines the best direction to unswitch: we want
2056 // to unswitch the direction that will collapse the branch.
2057 bool Direction = true;
2058 int ClonedSucc = 0;
2059 if (!FullUnswitch) {
2060 Value *Cond = BI->getCondition();
2061 (void)Cond;
2062 assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||((void)0)
2063 PartiallyInvariant) &&((void)0)
2064 "Only `or`, `and`, an `select`, partially invariant instructions "((void)0)
2065 "can combine invariants being unswitched.")((void)0);
2066 if (!match(BI->getCondition(), m_LogicalOr())) {
2067 if (match(BI->getCondition(), m_LogicalAnd()) ||
2068 (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
2069 Direction = false;
2070 ClonedSucc = 1;
2071 }
2072 }
2073 }
2074
2075 BasicBlock *RetainedSuccBB =
2076 BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
2077 SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
2078 if (BI)
2079 UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
2080 else
2081 for (auto Case : SI->cases())
2082 if (Case.getCaseSuccessor() != RetainedSuccBB)
2083 UnswitchedSuccBBs.insert(Case.getCaseSuccessor());
2084
2085 assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&((void)0)
2086 "Should not unswitch the same successor we are retaining!")((void)0);
2087
2088 // The branch should be in this exact loop. Any inner loop's invariant branch
2089 // should be handled by unswitching that inner loop. The caller of this
2090 // routine should filter out any candidates that remain (but were skipped for
2091 // whatever reason).
2092 assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")((void)0);
2093
2094 // Compute the parent loop now before we start hacking on things.
2095 Loop *ParentL = L.getParentLoop();
2096 // Get blocks in RPO order for MSSA update, before changing the CFG.
2097 LoopBlocksRPO LBRPO(&L);
2098 if (MSSAU)
2099 LBRPO.perform(&LI);
2100
2101 // Compute the outer-most loop containing one of our exit blocks. This is the
2102 // furthest up our loopnest which can be mutated, which we will use below to
2103 // update things.
2104 Loop *OuterExitL = &L;
2105 for (auto *ExitBB : ExitBlocks) {
2106 Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
2107 if (!NewOuterExitL) {
2108 // We exited the entire nest with this block, so we're done.
2109 OuterExitL = nullptr;
2110 break;
2111 }
2112 if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
2113 OuterExitL = NewOuterExitL;
2114 }
2115
2116 // At this point, we're definitely going to unswitch something so invalidate
2117 // any cached information in ScalarEvolution for the outer most loop
2118 // containing an exit block and all nested loops.
2119 if (SE) {
2120 if (OuterExitL)
2121 SE->forgetLoop(OuterExitL);
2122 else
2123 SE->forgetTopmostLoop(&L);
2124 }
2125
2126 // If the edge from this terminator to a successor dominates that successor,
2127 // store a map from each block in its dominator subtree to it. This lets us
2128 // tell when cloning for a particular successor if a block is dominated by
2129 // some *other* successor with a single data structure. We use this to
2130 // significantly reduce cloning.
2131 SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
2132 for (auto *SuccBB : llvm::concat<BasicBlock *const>(
2133 makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
2134 if (SuccBB->getUniquePredecessor() ||
2135 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2136 return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
2137 }))
2138 visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
2139 DominatingSucc[BB] = SuccBB;
2140 return true;
2141 });
2142
2143 // Split the preheader, so that we know that there is a safe place to insert
2144 // the conditional branch. We will change the preheader to have a conditional
2145 // branch on LoopCond. The original preheader will become the split point
2146 // between the unswitched versions, and we will have a new preheader for the
2147 // original loop.
2148 BasicBlock *SplitBB = L.getLoopPreheader();
2149 BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);
2150
2151 // Keep track of the dominator tree updates needed.
2152 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2153
2154 // Clone the loop for each unswitched successor.
2155 SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
2156 VMaps.reserve(UnswitchedSuccBBs.size());
2157 SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
2158 for (auto *SuccBB : UnswitchedSuccBBs) {
2159 VMaps.emplace_back(new ValueToValueMapTy());
2160 ClonedPHs[SuccBB] = buildClonedLoopBlocks(
2161 L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
2162 DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
2163 }
2164
2165 // Drop metadata if we may break its semantics by moving this instr into the
2166 // split block.
2167 if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
2168 if (DropNonTrivialImplicitNullChecks)
2169 // Do not spend time trying to understand if we can keep it, just drop it
2170 // to save compile time.
2171 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2172 else {
2173 // It is only legal to preserve make.implicit metadata if we are
2174 // guaranteed no reach implicit null check after following this branch.
2175 ICFLoopSafetyInfo SafetyInfo;
2176 SafetyInfo.computeLoopSafetyInfo(&L);
2177 if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
2178 TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
2179 }
2180 }
2181
2182 // The stitching of the branched code back together depends on whether we're
2183 // doing full unswitching or not with the exception that we always want to
2184 // nuke the initial terminator placed in the split block.
2185 SplitBB->getTerminator()->eraseFromParent();
2186 if (FullUnswitch) {
2187 // Splice the terminator from the original loop and rewrite its
2188 // successors.
2189 SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);
2190
2191 // Keep a clone of the terminator for MSSA updates.
2192 Instruction *NewTI = TI.clone();
2193 ParentBB->getInstList().push_back(NewTI);
2194
2195 // First wire up the moved terminator to the preheaders.
2196 if (BI) {
2197 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2198 BI->setSuccessor(ClonedSucc, ClonedPH);
2199 BI->setSuccessor(1 - ClonedSucc, LoopPH);
2200 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2201 } else {
2202 assert(SI && "Must either be a branch or switch!")((void)0);
2203
2204 // Walk the cases and directly update their successors.
2205 assert(SI->getDefaultDest() == RetainedSuccBB &&((void)0)
2206 "Not retaining default successor!")((void)0);
2207 SI->setDefaultDest(LoopPH);
2208 for (auto &Case : SI->cases())
2209 if (Case.getCaseSuccessor() == RetainedSuccBB)
2210 Case.setSuccessor(LoopPH);
2211 else
2212 Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);
2213
2214 // We need to use the set to populate domtree updates as even when there
2215 // are multiple cases pointing at the same successor we only want to
2216 // remove and insert one edge in the domtree.
2217 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2218 DTUpdates.push_back(
2219 {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
2220 }
2221
2222 if (MSSAU) {
2223 DT.applyUpdates(DTUpdates);
2224 DTUpdates.clear();
2225
2226 // Remove all but one edge to the retained block and all unswitched
2227 // blocks. This is to avoid having duplicate entries in the cloned Phis,
2228 // when we know we only keep a single edge for each case.
2229 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
2230 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2231 MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);
2232
2233 for (auto &VMap : VMaps)
2234 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2235 /*IgnoreIncomingWithNoClones=*/true);
2236 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2237
2238 // Remove all edges to unswitched blocks.
2239 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2240 MSSAU->removeEdge(ParentBB, SuccBB);
2241 }
2242
2243 // Now unhook the successor relationship as we'll be replacing
2244 // the terminator with a direct branch. This is much simpler for branches
2245 // than switches so we handle those first.
2246 if (BI) {
2247 // Remove the parent as a predecessor of the unswitched successor.
2248 assert(UnswitchedSuccBBs.size() == 1 &&((void)0)
2249 "Only one possible unswitched block for a branch!")((void)0);
2250 BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
2251 UnswitchedSuccBB->removePredecessor(ParentBB,
2252 /*KeepOneInputPHIs*/ true);
2253 DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
2254 } else {
2255 // Note that we actually want to remove the parent block as a predecessor
2256 // of *every* case successor. The case successor is either unswitched,
2257 // completely eliminating an edge from the parent to that successor, or it
2258 // is a duplicate edge to the retained successor as the retained successor
2259 // is always the default successor and as we'll replace this with a direct
2260 // branch we no longer need the duplicate entries in the PHI nodes.
2261 SwitchInst *NewSI = cast<SwitchInst>(NewTI);
2262 assert(NewSI->getDefaultDest() == RetainedSuccBB &&((void)0)
2263 "Not retaining default successor!")((void)0);
2264 for (auto &Case : NewSI->cases())
2265 Case.getCaseSuccessor()->removePredecessor(
2266 ParentBB,
2267 /*KeepOneInputPHIs*/ true);
2268
2269 // We need to use the set to populate domtree updates as even when there
2270 // are multiple cases pointing at the same successor we only want to
2271 // remove and insert one edge in the domtree.
2272 for (BasicBlock *SuccBB : UnswitchedSuccBBs)
2273 DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
2274 }
2275
2276 // After MSSAU update, remove the cloned terminator instruction NewTI.
2277 ParentBB->getTerminator()->eraseFromParent();
2278
2279 // Create a new unconditional branch to the continuing block (as opposed to
2280 // the one cloned).
2281 BranchInst::Create(RetainedSuccBB, ParentBB);
2282 } else {
2283 assert(BI && "Only branches have partial unswitching.")((void)0);
2284 assert(UnswitchedSuccBBs.size() == 1 &&((void)0)
2285 "Only one possible unswitched block for a branch!")((void)0);
2286 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2287 // When doing a partial unswitch, we have to do a bit more work to build up
2288 // the branch in the split block.
2289 if (PartiallyInvariant)
2290 buildPartialInvariantUnswitchConditionalBranch(
2291 *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
2292 else
2293 buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
2294 *ClonedPH, *LoopPH);
2295 DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
2296
2297 if (MSSAU) {
2298 DT.applyUpdates(DTUpdates);
2299 DTUpdates.clear();
2300
2301 // Perform MSSA cloning updates.
2302 for (auto &VMap : VMaps)
2303 MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
2304 /*IgnoreIncomingWithNoClones=*/true);
2305 MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
2306 }
2307 }
2308
2309 // Apply the updates accumulated above to get an up-to-date dominator tree.
2310 DT.applyUpdates(DTUpdates);
2311
2312 // Now that we have an accurate dominator tree, first delete the dead cloned
2313 // blocks so that we can accurately build any cloned loops. It is important to
2314 // not delete the blocks from the original loop yet because we still want to
2315 // reference the original loop to understand the cloned loop's structure.
2316 deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);
2317
2318 // Build the cloned loop structure itself. This may be substantially
2319 // different from the original structure due to the simplified CFG. This also
2320 // handles inserting all the cloned blocks into the correct loops.
2321 SmallVector<Loop *, 4> NonChildClonedLoops;
2322 for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
2323 buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);
2324
2325 // Now that our cloned loops have been built, we can update the original loop.
2326 // First we delete the dead blocks from it and then we rebuild the loop
2327 // structure taking these deletions into account.
2328 deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);
2329
2330 if (MSSAU && VerifyMemorySSA)
2331 MSSAU->getMemorySSA()->verifyMemorySSA();
2332
2333 SmallVector<Loop *, 4> HoistedLoops;
2334 bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);
2335
2336 if (MSSAU && VerifyMemorySSA)
2337 MSSAU->getMemorySSA()->verifyMemorySSA();
2338
2339 // This transformation has a high risk of corrupting the dominator tree, and
2340 // the below steps to rebuild loop structures will result in hard to debug
2341 // errors in that case so verify that the dominator tree is sane first.
2342 // FIXME: Remove this when the bugs stop showing up and rely on existing
2343 // verification steps.
2344 assert(DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);
2345
2346 if (BI && !PartiallyInvariant) {
2347 // If we unswitched a branch which collapses the condition to a known
2348 // constant we want to replace all the uses of the invariants within both
2349 // the original and cloned blocks. We do this here so that we can use the
2350 // now updated dominator tree to identify which side the users are on.
2351 assert(UnswitchedSuccBBs.size() == 1 &&((void)0)
2352 "Only one possible unswitched block for a branch!")((void)0);
2353 BasicBlock *ClonedPH = ClonedPHs.begin()->second;
2354
2355 // When considering multiple partially-unswitched invariants
2356 // we cant just go replace them with constants in both branches.
2357 //
2358 // For 'AND' we infer that true branch ("continue") means true
2359 // for each invariant operand.
2360 // For 'OR' we can infer that false branch ("continue") means false
2361 // for each invariant operand.
2362 // So it happens that for multiple-partial case we dont replace
2363 // in the unswitched branch.
2364 bool ReplaceUnswitched =
2365 FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;
2366
2367 ConstantInt *UnswitchedReplacement =
2368 Direction ? ConstantInt::getTrue(BI->getContext())
2369 : ConstantInt::getFalse(BI->getContext());
2370 ConstantInt *ContinueReplacement =
2371 Direction ? ConstantInt::getFalse(BI->getContext())
2372 : ConstantInt::getTrue(BI->getContext());
2373 for (Value *Invariant : Invariants)
2374 // Use make_early_inc_range here as set invalidates the iterator.
2375 for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
2376 Instruction *UserI = dyn_cast<Instruction>(U.getUser());
2377 if (!UserI)
2378 continue;
2379
2380 // Replace it with the 'continue' side if in the main loop body, and the
2381 // unswitched if in the cloned blocks.
2382 if (DT.dominates(LoopPH, UserI->getParent()))
2383 U.set(ContinueReplacement);
2384 else if (ReplaceUnswitched &&
2385 DT.dominates(ClonedPH, UserI->getParent()))
2386 U.set(UnswitchedReplacement);
2387 }
2388 }
2389
2390 // We can change which blocks are exit blocks of all the cloned sibling
2391 // loops, the current loop, and any parent loops which shared exit blocks
2392 // with the current loop. As a consequence, we need to re-form LCSSA for
2393 // them. But we shouldn't need to re-form LCSSA for any child loops.
2394 // FIXME: This could be made more efficient by tracking which exit blocks are
2395 // new, and focusing on them, but that isn't likely to be necessary.
2396 //
2397 // In order to reasonably rebuild LCSSA we need to walk inside-out across the
2398 // loop nest and update every loop that could have had its exits changed. We
2399 // also need to cover any intervening loops. We add all of these loops to
2400 // a list and sort them by loop depth to achieve this without updating
2401 // unnecessary loops.
2402 auto UpdateLoop = [&](Loop &UpdateL) {
2403#ifndef NDEBUG1
2404 UpdateL.verifyLoop();
2405 for (Loop *ChildL : UpdateL) {
2406 ChildL->verifyLoop();
2407 assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&((void)0)
2408 "Perturbed a child loop's LCSSA form!")((void)0);
2409 }
2410#endif
2411 // First build LCSSA for this loop so that we can preserve it when
2412 // forming dedicated exits. We don't want to perturb some other loop's
2413 // LCSSA while doing that CFG edit.
2414 formLCSSA(UpdateL, DT, &LI, SE);
2415
2416 // For loops reached by this loop's original exit blocks we may
2417 // introduced new, non-dedicated exits. At least try to re-form dedicated
2418 // exits for these loops. This may fail if they couldn't have dedicated
2419 // exits to start with.
2420 formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
2421 };
2422
2423 // For non-child cloned loops and hoisted loops, we just need to update LCSSA
2424 // and we can do it in any order as they don't nest relative to each other.
2425 //
2426 // Also check if any of the loops we have updated have become top-level loops
2427 // as that will necessitate widening the outer loop scope.
2428 for (Loop *UpdatedL :
2429 llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
2430 UpdateLoop(*UpdatedL);
2431 if (UpdatedL->isOutermost())
2432 OuterExitL = nullptr;
2433 }
2434 if (IsStillLoop) {
2435 UpdateLoop(L);
2436 if (L.isOutermost())
2437 OuterExitL = nullptr;
2438 }
2439
2440 // If the original loop had exit blocks, walk up through the outer most loop
2441 // of those exit blocks to update LCSSA and form updated dedicated exits.
2442 if (OuterExitL != &L)
2443 for (Loop *OuterL = ParentL; OuterL != OuterExitL;
2444 OuterL = OuterL->getParentLoop())
2445 UpdateLoop(*OuterL);
2446
2447#ifndef NDEBUG1
2448 // Verify the entire loop structure to catch any incorrect updates before we
2449 // progress in the pass pipeline.
2450 LI.verify(DT);
2451#endif
2452
2453 // Now that we've unswitched something, make callbacks to report the changes.
2454 // For that we need to merge together the updated loops and the cloned loops
2455 // and check whether the original loop survived.
2456 SmallVector<Loop *, 4> SibLoops;
2457 for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
2458 if (UpdatedL->getParentLoop() == ParentL)
2459 SibLoops.push_back(UpdatedL);
2460 UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);
2461
2462 if (MSSAU && VerifyMemorySSA)
2463 MSSAU->getMemorySSA()->verifyMemorySSA();
2464
2465 if (BI)
2466 ++NumBranches;
2467 else
2468 ++NumSwitches;
2469}
2470
2471/// Recursively compute the cost of a dominator subtree based on the per-block
2472/// cost map provided.
2473///
2474/// The recursive computation is memozied into the provided DT-indexed cost map
2475/// to allow querying it for most nodes in the domtree without it becoming
2476/// quadratic.
2477static InstructionCost computeDomSubtreeCost(
2478 DomTreeNode &N,
2479 const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
2480 SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
2481 // Don't accumulate cost (or recurse through) blocks not in our block cost
2482 // map and thus not part of the duplication cost being considered.
2483 auto BBCostIt = BBCostMap.find(N.getBlock());
2484 if (BBCostIt == BBCostMap.end())
2485 return 0;
2486
2487 // Lookup this node to see if we already computed its cost.
2488 auto DTCostIt = DTCostMap.find(&N);
2489 if (DTCostIt != DTCostMap.end())
2490 return DTCostIt->second;
2491
2492 // If not, we have to compute it. We can't use insert above and update
2493 // because computing the cost may insert more things into the map.
2494 InstructionCost Cost = std::accumulate(
2495 N.begin(), N.end(), BBCostIt->second,
2496 [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
2497 return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
2498 });
2499 bool Inserted = DTCostMap.insert({&N, Cost}).second;
2500 (void)Inserted;
2501 assert(Inserted && "Should not insert a node while visiting children!")((void)0);
2502 return Cost;
2503}
2504
2505/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2506/// making the following replacement:
2507///
2508/// --code before guard--
2509/// call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2510/// --code after guard--
2511///
2512/// into
2513///
2514/// --code before guard--
2515/// br i1 %cond, label %guarded, label %deopt
2516///
2517/// guarded:
2518/// --code after guard--
2519///
2520/// deopt:
2521/// call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2522/// unreachable
2523///
2524/// It also makes all relevant DT and LI updates, so that all structures are in
2525/// valid state after this transform.
2526static BranchInst *
2527turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
2528 SmallVectorImpl<BasicBlock *> &ExitBlocks,
2529 DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
2530 SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
2531 LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { } while (false);
2532 BasicBlock *CheckBB = GI->getParent();
2533
2534 if (MSSAU && VerifyMemorySSA)
2535 MSSAU->getMemorySSA()->verifyMemorySSA();
2536
2537 // Remove all CheckBB's successors from DomTree. A block can be seen among
2538 // successors more than once, but for DomTree it should be added only once.
2539 SmallPtrSet<BasicBlock *, 4> Successors;
2540 for (auto *Succ : successors(CheckBB))
2541 if (Successors.insert(Succ).second)
2542 DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});
2543
2544 Instruction *DeoptBlockTerm =
2545 SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
2546 BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
2547 // SplitBlockAndInsertIfThen inserts control flow that branches to
2548 // DeoptBlockTerm if the condition is true. We want the opposite.
2549 CheckBI->swapSuccessors();
2550
2551 BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
2552 GuardedBlock->setName("guarded");
2553 CheckBI->getSuccessor(1)->setName("deopt");
2554 BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);
2555
2556 // We now have a new exit block.
2557 ExitBlocks.push_back(CheckBI->getSuccessor(1));
2558
2559 if (MSSAU)
2560 MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);
2561
2562 GI->moveBefore(DeoptBlockTerm);
2563 GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));
2564
2565 // Add new successors of CheckBB into DomTree.
2566 for (auto *Succ : successors(CheckBB))
2567 DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});
2568
2569 // Now the blocks that used to be CheckBB's successors are GuardedBlock's
2570 // successors.
2571 for (auto *Succ : Successors)
2572 DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});
2573
2574 // Make proper changes to DT.
2575 DT.applyUpdates(DTUpdates);
2576 // Inform LI of a new loop block.
2577 L.addBasicBlockToLoop(GuardedBlock, LI);
2578
2579 if (MSSAU) {
2580 MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
2581 MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
2582 if (VerifyMemorySSA)
2583 MSSAU->getMemorySSA()->verifyMemorySSA();
2584 }
2585
2586 ++NumGuards;
2587 return CheckBI;
2588}
2589
2590/// Cost multiplier is a way to limit potentially exponential behavior
2591/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2592/// candidates available. Also accounting for the number of "sibling" loops with
2593/// the idea to account for previous unswitches that already happened on this
2594/// cluster of loops. There was an attempt to keep this formula simple,
2595/// just enough to limit the worst case behavior. Even if it is not that simple
2596/// now it is still not an attempt to provide a detailed heuristic size
2597/// prediction.
2598///
2599/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2600/// unswitch candidates, making adequate predictions instead of wild guesses.
2601/// That requires knowing not just the number of "remaining" candidates but
2602/// also costs of unswitching for each of these candidates.
2603static int CalculateUnswitchCostMultiplier(
2604 Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
2605 ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
2606 UnswitchCandidates) {
2607
2608 // Guards and other exiting conditions do not contribute to exponential
2609 // explosion as soon as they dominate the latch (otherwise there might be
2610 // another path to the latch remaining that does not allow to eliminate the
2611 // loop copy on unswitch).
2612 BasicBlock *Latch = L.getLoopLatch();
2613 BasicBlock *CondBlock = TI.getParent();
2614 if (DT.dominates(CondBlock, Latch) &&
2615 (isGuard(&TI) ||
2616 llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
2617 return L.contains(SuccBB);
2618 }) <= 1)) {
2619 NumCostMultiplierSkipped++;
2620 return 1;
2621 }
2622
2623 auto *ParentL = L.getParentLoop();
2624 int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
2625 : std::distance(LI.begin(), LI.end()));
2626 // Count amount of clones that all the candidates might cause during
2627 // unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
2628 int UnswitchedClones = 0;
2629 for (auto Candidate : UnswitchCandidates) {
2630 Instruction *CI = Candidate.first;
2631 BasicBlock *CondBlock = CI->getParent();
2632 bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
2633 if (isGuard(CI)) {
2634 if (!SkipExitingSuccessors)
2635 UnswitchedClones++;
2636 continue;
2637 }
2638 int NonExitingSuccessors = llvm::count_if(
2639 successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
2640 return !SkipExitingSuccessors || L.contains(SuccBB);
2641 });
2642 UnswitchedClones += Log2_32(NonExitingSuccessors);
2643 }
2644
2645 // Ignore up to the "unscaled candidates" number of unswitch candidates
2646 // when calculating the power-of-two scaling of the cost. The main idea
2647 // with this control is to allow a small number of unswitches to happen
2648 // and rely more on siblings multiplier (see below) when the number
2649 // of candidates is small.
2650 unsigned ClonesPower =
2651 std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);
2652
2653 // Allowing top-level loops to spread a bit more than nested ones.
2654 int SiblingsMultiplier =
2655 std::max((ParentL ? SiblingsCount
2656 : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
2657 1);
2658 // Compute the cost multiplier in a way that won't overflow by saturating
2659 // at an upper bound.
2660 int CostMultiplier;
2661 if (ClonesPower > Log2_32(UnswitchThreshold) ||
2662 SiblingsMultiplier > UnswitchThreshold)
2663 CostMultiplier = UnswitchThreshold;
2664 else
2665 CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
2666 (int)UnswitchThreshold);
2667
2668 LLVM_DEBUG(dbgs() << " Computed multiplier " << CostMultiplierdo { } while (false)
2669 << " (siblings " << SiblingsMultiplier << " * clones "do { } while (false)
2670 << (1 << ClonesPower) << ")"do { } while (false)
2671 << " for unswitch candidate: " << TI << "\n")do { } while (false);
2672 return CostMultiplier;
2673}
2674
2675static bool unswitchBestCondition(
2676 Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2677 AAResults &AA, TargetTransformInfo &TTI,
2678 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2679 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2680 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
2681 // Collect all invariant conditions within this loop (as opposed to an inner
2682 // loop which would be handled when visiting that inner loop).
2683 SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
2684 UnswitchCandidates;
2685
2686 // Whether or not we should also collect guards in the loop.
2687 bool CollectGuards = false;
2688 if (UnswitchGuards) {
1
Assuming the condition is false
2
Taking false branch
2689 auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
2690 Intrinsic::getName(Intrinsic::experimental_guard));
2691 if (GuardDecl && !GuardDecl->use_empty())
2692 CollectGuards = true;
2693 }
2694
2695 IVConditionInfo PartialIVInfo;
3
Calling implicit default constructor for 'IVConditionInfo'
5
Returning from default constructor for 'IVConditionInfo'
2696 for (auto *BB : L.blocks()) {
6
Assuming '__begin1' is equal to '__end1'
2697 if (LI.getLoopFor(BB) != &L)
2698 continue;
2699
2700 if (CollectGuards)
2701 for (auto &I : *BB)
2702 if (isGuard(&I)) {
2703 auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
2704 // TODO: Support AND, OR conditions and partial unswitching.
2705 if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
2706 UnswitchCandidates.push_back({&I, {Cond}});
2707 }
2708
2709 if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
2710 // We can only consider fully loop-invariant switch conditions as we need
2711 // to completely eliminate the switch after unswitching.
2712 if (!isa<Constant>(SI->getCondition()) &&
2713 L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
2714 UnswitchCandidates.push_back({SI, {SI->getCondition()}});
2715 continue;
2716 }
2717
2718 auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
2719 if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
2720 BI->getSuccessor(0) == BI->getSuccessor(1))
2721 continue;
2722
2723 // If BI's condition is 'select _, true, false', simplify it to confuse
2724 // matchers
2725 Value *Cond = BI->getCondition(), *CondNext;
2726 while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
2727 Cond = CondNext;
2728 BI->setCondition(Cond);
2729
2730 if (L.isLoopInvariant(BI->getCondition())) {
2731 UnswitchCandidates.push_back({BI, {BI->getCondition()}});
2732 continue;
2733 }
2734
2735 Instruction &CondI = *cast<Instruction>(BI->getCondition());
2736 if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
2737 TinyPtrVector<Value *> Invariants =
2738 collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
2739 if (Invariants.empty())
2740 continue;
2741
2742 UnswitchCandidates.push_back({BI, std::move(Invariants)});
2743 continue;
2744 }
2745 }
2746
2747 Instruction *PartialIVCondBranch = nullptr;
2748 if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
7
Assuming 'MSSAU' is null
8
Taking false branch
2749 !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
2750 return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
2751 })) {
2752 MemorySSA *MSSA = MSSAU->getMemorySSA();
2753 if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
2754 LLVM_DEBUG(do { } while (false)
2755 dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { } while (false)
2756 << *Info->InstToDuplicate[0] << "\n")do { } while (false);
2757 PartialIVInfo = *Info;
2758 PartialIVCondBranch = L.getHeader()->getTerminator();
2759 TinyPtrVector<Value *> ValsToDuplicate;
2760 for (auto *Inst : Info->InstToDuplicate)
2761 ValsToDuplicate.push_back(Inst);
2762 UnswitchCandidates.push_back(
2763 {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
2764 }
2765 }
2766
2767 // If we didn't find any candidates, we're done.
2768 if (UnswitchCandidates.empty())
9
Calling 'SmallVectorBase::empty'
12
Returning from 'SmallVectorBase::empty'
13
Taking false branch
2769 return false;
2770
2771 // Check if there are irreducible CFG cycles in this loop. If so, we cannot
2772 // easily unswitch non-trivial edges out of the loop. Doing so might turn the
2773 // irreducible control flow into reducible control flow and introduce new
2774 // loops "out of thin air". If we ever discover important use cases for doing
2775 // this, we can add support to loop unswitch, but it is a lot of complexity
2776 // for what seems little or no real world benefit.
2777 LoopBlocksRPO RPOT(&L);
2778 RPOT.perform(&LI);
2779 if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
14
Calling 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'
16
Returning from 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'
17
Taking false branch
2780 return false;
2781
2782 SmallVector<BasicBlock *, 4> ExitBlocks;
2783 L.getUniqueExitBlocks(ExitBlocks);
2784
2785 // We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
2786 // instruction as we don't know how to split those exit blocks.
2787 // FIXME: We should teach SplitBlock to handle this and remove this
2788 // restriction.
2789 for (auto *ExitBB : ExitBlocks) {
18
Assuming '__begin1' is equal to '__end1'
2790 auto *I = ExitBB->getFirstNonPHI();
2791 if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
2792 LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "do { } while (false)
2793 "in exit block\n")do { } while (false);
2794 return false;
2795 }
2796 }
2797
2798 LLVM_DEBUG(do { } while (false)
19
Loop condition is false. Exiting loop
2799 dbgs() << "Considering " << UnswitchCandidates.size()do { } while (false)
2800 << " non-trivial loop invariant conditions for unswitching.\n")do { } while (false);
2801
2802 // Given that unswitching these terminators will require duplicating parts of
2803 // the loop, so we need to be able to model that cost. Compute the ephemeral
2804 // values and set up a data structure to hold per-BB costs. We cache each
2805 // block's cost so that we don't recompute this when considering different
2806 // subsets of the loop for duplication during unswitching.
2807 SmallPtrSet<const Value *, 4> EphValues;
2808 CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
2809 SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;
2810
2811 // Compute the cost of each block, as well as the total loop cost. Also, bail
2812 // out if we see instructions which are incompatible with loop unswitching
2813 // (convergent, noduplicate, or cross-basic-block tokens).
2814 // FIXME: We might be able to safely handle some of these in non-duplicated
2815 // regions.
2816 TargetTransformInfo::TargetCostKind CostKind =
2817 L.getHeader()->getParent()->hasMinSize()
20
Assuming the condition is false
21
'?' condition is false
2818 ? TargetTransformInfo::TCK_CodeSize
2819 : TargetTransformInfo::TCK_SizeAndLatency;
2820 InstructionCost LoopCost = 0;
2821 for (auto *BB : L.blocks()) {
22
Assuming '__begin1' is equal to '__end1'
2822 InstructionCost Cost = 0;
2823 for (auto &I : *BB) {
2824 if (EphValues.count(&I))
2825 continue;
2826
2827 if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
2828 return false;
2829 if (auto *CB = dyn_cast<CallBase>(&I))
2830 if (CB->isConvergent() || CB->cannotDuplicate())
2831 return false;
2832
2833 Cost += TTI.getUserCost(&I, CostKind);
2834 }
2835 assert(Cost >= 0 && "Must not have negative costs!")((void)0);
2836 LoopCost += Cost;
2837 assert(LoopCost >= 0 && "Must not have negative loop costs!")((void)0);
2838 BBCostMap[BB] = Cost;
2839 }
2840 LLVM_DEBUG(dbgs() << " Total loop cost: " << LoopCost << "\n")do { } while (false);
23
Loop condition is false. Exiting loop
2841
2842 // Now we find the best candidate by searching for the one with the following
2843 // properties in order:
2844 //
2845 // 1) An unswitching cost below the threshold
2846 // 2) The smallest number of duplicated unswitch candidates (to avoid
2847 // creating redundant subsequent unswitching)
2848 // 3) The smallest cost after unswitching.
2849 //
2850 // We prioritize reducing fanout of unswitch candidates provided the cost
2851 // remains below the threshold because this has a multiplicative effect.
2852 //
2853 // This requires memoizing each dominator subtree to avoid redundant work.
2854 //
2855 // FIXME: Need to actually do the number of candidates part above.
2856 SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
2857 // Given a terminator which might be unswitched, computes the non-duplicated
2858 // cost for that terminator.
2859 auto ComputeUnswitchedCost = [&](Instruction &TI,
2860 bool FullUnswitch) -> InstructionCost {
2861 BasicBlock &BB = *TI.getParent();
2862 SmallPtrSet<BasicBlock *, 4> Visited;
2863
2864 InstructionCost Cost = 0;
2865 for (BasicBlock *SuccBB : successors(&BB)) {
2866 // Don't count successors more than once.
2867 if (!Visited.insert(SuccBB).second)
28
Assuming field 'second' is true
29
Taking false branch
2868 continue;
2869
2870 // If this is a partial unswitch candidate, then it must be a conditional
2871 // branch with a condition of either `or`, `and`, their corresponding
2872 // select forms or partially invariant instructions. In that case, one of
2873 // the successors is necessarily duplicated, so don't even try to remove
2874 // its cost.
2875 if (!FullUnswitch
29.1
'FullUnswitch' is false
29.1
'FullUnswitch' is false
29.1
'FullUnswitch' is false
29.1
'FullUnswitch' is false
29.1
'FullUnswitch' is false
) {
30
Taking true branch
2876 auto &BI = cast<BranchInst>(TI);
31
'TI' is a 'BranchInst'
2877 if (match(BI.getCondition(), m_LogicalAnd())) {
32
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'
39
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'
40
Taking false branch
2878 if (SuccBB == BI.getSuccessor(1))
2879 continue;
2880 } else if (match(BI.getCondition(), m_LogicalOr())) {
41
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'
48
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'
49
Taking false branch
2881 if (SuccBB == BI.getSuccessor(0))
2882 continue;
2883 } else if ((PartialIVInfo.KnownValue->isOneValue() &&
50
Called C++ object pointer is null
2884 SuccBB == BI.getSuccessor(0)) ||
2885 (!PartialIVInfo.KnownValue->isOneValue() &&
2886 SuccBB == BI.getSuccessor(1)))
2887 continue;
2888 }
2889
2890 // This successor's domtree will not need to be duplicated after
2891 // unswitching if the edge to the successor dominates it (and thus the
2892 // entire tree). This essentially means there is no other path into this
2893 // subtree and so it will end up live in only one clone of the loop.
2894 if (SuccBB->getUniquePredecessor() ||
2895 llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
2896 return PredBB == &BB || DT.dominates(SuccBB, PredBB);
2897 })) {
2898 Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
2899 assert(Cost <= LoopCost &&((void)0)
2900 "Non-duplicated cost should never exceed total loop cost!")((void)0);
2901 }
2902 }
2903
2904 // Now scale the cost by the number of unique successors minus one. We
2905 // subtract one because there is already at least one copy of the entire
2906 // loop. This is computing the new cost of unswitching a condition.
2907 // Note that guards always have 2 unique successors that are implicit and
2908 // will be materialized if we decide to unswitch it.
2909 int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
2910 assert(SuccessorsCount > 1 &&((void)0)
2911 "Cannot unswitch a condition without multiple distinct successors!")((void)0);
2912 return (LoopCost - Cost) * (SuccessorsCount - 1);
2913 };
2914 Instruction *BestUnswitchTI = nullptr;
2915 InstructionCost BestUnswitchCost = 0;
2916 ArrayRef<Value *> BestUnswitchInvariants;
2917 for (auto &TerminatorAndInvariants : UnswitchCandidates) {
24
Assuming '__begin1' is not equal to '__end1'
2918 Instruction &TI = *TerminatorAndInvariants.first;
2919 ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
2920 BranchInst *BI = dyn_cast<BranchInst>(&TI);
25
Assuming the object is a 'BranchInst'
2921 InstructionCost CandidateCost = ComputeUnswitchedCost(
27
Calling 'operator()'
2922 TI, /*FullUnswitch*/ !BI
25.1
'BI' is non-null
25.1
'BI' is non-null
25.1
'BI' is non-null
25.1
'BI' is non-null
25.1
'BI' is non-null
|| (Invariants.size() == 1 &&
26
Assuming the condition is false
2923 Invariants[0] == BI->getCondition()));
2924 // Calculate cost multiplier which is a tool to limit potentially
2925 // exponential behavior of loop-unswitch.
2926 if (EnableUnswitchCostMultiplier) {
2927 int CostMultiplier =
2928 CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
2929 assert(((void)0)
2930 (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&((void)0)
2931 "cost multiplier needs to be in the range of 1..UnswitchThreshold")((void)0);
2932 CandidateCost *= CostMultiplier;
2933 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { } while (false)
2934 << " (multiplier: " << CostMultiplier << ")"do { } while (false)
2935 << " for unswitch candidate: " << TI << "\n")do { } while (false);
2936 } else {
2937 LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCostdo { } while (false)
2938 << " for unswitch candidate: " << TI << "\n")do { } while (false);
2939 }
2940
2941 if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
2942 BestUnswitchTI = &TI;
2943 BestUnswitchCost = CandidateCost;
2944 BestUnswitchInvariants = Invariants;
2945 }
2946 }
2947 assert(BestUnswitchTI && "Failed to find loop unswitch candidate")((void)0);
2948
2949 if (BestUnswitchCost >= UnswitchThreshold) {
2950 LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "do { } while (false)
2951 << BestUnswitchCost << "\n")do { } while (false);
2952 return false;
2953 }
2954
2955 if (BestUnswitchTI != PartialIVCondBranch)
2956 PartialIVInfo.InstToDuplicate.clear();
2957
2958 // If the best candidate is a guard, turn it into a branch.
2959 if (isGuard(BestUnswitchTI))
2960 BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
2961 ExitBlocks, DT, LI, MSSAU);
2962
2963 LLVM_DEBUG(dbgs() << " Unswitching non-trivial (cost = "do { } while (false)
2964 << BestUnswitchCost << ") terminator: " << *BestUnswitchTIdo { } while (false)
2965 << "\n")do { } while (false);
2966 unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
2967 ExitBlocks, PartialIVInfo, DT, LI, AC,
2968 UnswitchCB, SE, MSSAU, DestroyLoopCB);
2969 return true;
2970}
2971
2972/// Unswitch control flow predicated on loop invariant conditions.
2973///
2974/// This first hoists all branches or switches which are trivial (IE, do not
2975/// require duplicating any part of the loop) out of the loop body. It then
2976/// looks at other loop invariant control flows and tries to unswitch those as
2977/// well by cloning the loop if the result is small enough.
2978///
2979/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
2980/// also updated based on the unswitch. The `MSSA` analysis is also updated if
2981/// valid (i.e. its use is enabled).
2982///
2983/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2984/// true, we will attempt to do non-trivial unswitching as well as trivial
2985/// unswitching.
2986///
2987/// The `UnswitchCB` callback provided will be run after unswitching is
2988/// complete, with the first parameter set to `true` if the provided loop
2989/// remains a loop, and a list of new sibling loops created.
2990///
2991/// If `SE` is non-null, we will update that analysis based on the unswitching
2992/// done.
2993static bool
2994unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
2995 AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
2996 bool NonTrivial,
2997 function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
2998 ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
2999 function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
3000 assert(L.isRecursivelyLCSSAForm(DT, LI) &&((void)0)
3001 "Loops must be in LCSSA form before unswitching.")((void)0);
3002
3003 // Must be in loop simplified form: we need a preheader and dedicated exits.
3004 if (!L.isLoopSimplifyForm())
3005 return false;
3006
3007 // Try trivial unswitch first before loop over other basic blocks in the loop.
3008 if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
3009 // If we unswitched successfully we will want to clean up the loop before
3010 // processing it further so just mark it as unswitched and return.
3011 UnswitchCB(/*CurrentLoopValid*/ true, false, {});
3012 return true;
3013 }
3014
3015 // Check whether we should continue with non-trivial conditions.
3016 // EnableNonTrivialUnswitch: Global variable that forces non-trivial
3017 // unswitching for testing and debugging.
3018 // NonTrivial: Parameter that enables non-trivial unswitching for this
3019 // invocation of the transform. But this should be allowed only
3020 // for targets without branch divergence.
3021 //
3022 // FIXME: If divergence analysis becomes available to a loop
3023 // transform, we should allow unswitching for non-trivial uniform
3024 // branches even on targets that have divergence.
3025 // https://bugs.llvm.org/show_bug.cgi?id=48819
3026 bool ContinueWithNonTrivial =
3027 EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
3028 if (!ContinueWithNonTrivial)
3029 return false;
3030
3031 // Skip non-trivial unswitching for optsize functions.
3032 if (L.getHeader()->getParent()->hasOptSize())
3033 return false;
3034
3035 // Skip non-trivial unswitching for loops that cannot be cloned.
3036 if (!L.isSafeToClone())
3037 return false;
3038
3039 // For non-trivial unswitching, because it often creates new loops, we rely on
3040 // the pass manager to iterate on the loops rather than trying to immediately
3041 // reach a fixed point. There is no substantial advantage to iterating
3042 // internally, and if any of the new loops are simplified enough to contain
3043 // trivial unswitching we want to prefer those.
3044
3045 // Try to unswitch the best invariant condition. We prefer this full unswitch to
3046 // a partial unswitch when possible below the threshold.
3047 if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
3048 DestroyLoopCB))
3049 return true;
3050
3051 // No other opportunities to unswitch.
3052 return false;
3053}
3054
3055PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
3056 LoopStandardAnalysisResults &AR,
3057 LPMUpdater &U) {
3058 Function &F = *L.getHeader()->getParent();
3059 (void)F;
3060
3061 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { } while (false)
3062 << "\n")do { } while (false);
3063
3064 // Save the current loop name in a variable so that we can report it even
3065 // after it has been deleted.
3066 std::string LoopName = std::string(L.getName());
3067
3068 auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
3069 bool PartiallyInvariant,
3070 ArrayRef<Loop *> NewLoops) {
3071 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3072 if (!NewLoops.empty())
3073 U.addSiblingLoops(NewLoops);
3074
3075 // If the current loop remains valid, we should revisit it to catch any
3076 // other unswitch opportunities. Otherwise, we need to mark it as deleted.
3077 if (CurrentLoopValid) {
3078 if (PartiallyInvariant) {
3079 // Mark the new loop as partially unswitched, to avoid unswitching on
3080 // the same condition again.
3081 auto &Context = L.getHeader()->getContext();
3082 MDNode *DisableUnswitchMD = MDNode::get(
3083 Context,
3084 MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
3085 MDNode *NewLoopID = makePostTransformationMetadata(
3086 Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
3087 {DisableUnswitchMD});
3088 L.setLoopID(NewLoopID);
3089 } else
3090 U.revisitCurrentLoop();
3091 } else
3092 U.markLoopAsDeleted(L, LoopName);
3093 };
3094
3095 auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
3096 U.markLoopAsDeleted(L, Name);
3097 };
3098
3099 Optional<MemorySSAUpdater> MSSAU;
3100 if (AR.MSSA) {
3101 MSSAU = MemorySSAUpdater(AR.MSSA);
3102 if (VerifyMemorySSA)
3103 AR.MSSA->verifyMemorySSA();
3104 }
3105 if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
3106 UnswitchCB, &AR.SE,
3107 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
3108 DestroyLoopCB))
3109 return PreservedAnalyses::all();
3110
3111 if (AR.MSSA && VerifyMemorySSA)
3112 AR.MSSA->verifyMemorySSA();
3113
3114 // Historically this pass has had issues with the dominator tree so verify it
3115 // in asserts builds.
3116 assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);
3117
3118 auto PA = getLoopPassPreservedAnalyses();
3119 if (AR.MSSA)
3120 PA.preserve<MemorySSAAnalysis>();
3121 return PA;
3122}
3123
3124namespace {
3125
3126class SimpleLoopUnswitchLegacyPass : public LoopPass {
3127 bool NonTrivial;
3128
3129public:
3130 static char ID; // Pass ID, replacement for typeid
3131
3132 explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
3133 : LoopPass(ID), NonTrivial(NonTrivial) {
3134 initializeSimpleLoopUnswitchLegacyPassPass(
3135 *PassRegistry::getPassRegistry());
3136 }
3137
3138 bool runOnLoop(Loop *L, LPPassManager &LPM) override;
3139
3140 void getAnalysisUsage(AnalysisUsage &AU) const override {
3141 AU.addRequired<AssumptionCacheTracker>();
3142 AU.addRequired<TargetTransformInfoWrapperPass>();
3143 if (EnableMSSALoopDependency) {
3144 AU.addRequired<MemorySSAWrapperPass>();
3145 AU.addPreserved<MemorySSAWrapperPass>();
3146 }
3147 getLoopAnalysisUsage(AU);
3148 }
3149};
3150
3151} // end anonymous namespace
3152
3153bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
3154 if (skipLoop(L))
3155 return false;
3156
3157 Function &F = *L->getHeader()->getParent();
3158
3159 LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { } while (false)
3160 << "\n")do { } while (false);
3161
3162 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
3163 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
3164 auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
3165 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
3166 auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
3167 MemorySSA *MSSA = nullptr;
3168 Optional<MemorySSAUpdater> MSSAU;
3169 if (EnableMSSALoopDependency) {
3170 MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
3171 MSSAU = MemorySSAUpdater(MSSA);
3172 }
3173
3174 auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
3175 auto *SE = SEWP ? &SEWP->getSE() : nullptr;
3176
3177 auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
3178 ArrayRef<Loop *> NewLoops) {
3179 // If we did a non-trivial unswitch, we have added new (cloned) loops.
3180 for (auto *NewL : NewLoops)
3181 LPM.addLoop(*NewL);
3182
3183 // If the current loop remains valid, re-add it to the queue. This is
3184 // a little wasteful as we'll finish processing the current loop as well,
3185 // but it is the best we can do in the old PM.
3186 if (CurrentLoopValid) {
3187 // If the current loop has been unswitched using a partially invariant
3188 // condition, we should not re-add the current loop to avoid unswitching
3189 // on the same condition again.
3190 if (!PartiallyInvariant)
3191 LPM.addLoop(*L);
3192 } else
3193 LPM.markLoopAsDeleted(*L);
3194 };
3195
3196 auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
3197 LPM.markLoopAsDeleted(L);
3198 };
3199
3200 if (MSSA && VerifyMemorySSA)
3201 MSSA->verifyMemorySSA();
3202
3203 bool Changed =
3204 unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
3205 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
3206 DestroyLoopCB);
3207
3208 if (MSSA && VerifyMemorySSA)
3209 MSSA->verifyMemorySSA();
3210
3211 // Historically this pass has had issues with the dominator tree so verify it
3212 // in asserts builds.
3213 assert(DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);
3214
3215 return Changed;
3216}
3217
3218char SimpleLoopUnswitchLegacyPass::ID = 0;
3219INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3220 "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
&Registry) {
3221INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
3222INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3223INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
3224INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
3225INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
3226INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3227INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3228 "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
3229
3230Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
3231 return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3232}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils/LoopUtils.h

1//===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some loop transformation utilities.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
14#define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
15
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Analysis/IVDescriptors.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20
21namespace llvm {
22
23template <typename T> class DomTreeNodeBase;
24using DomTreeNode = DomTreeNodeBase<BasicBlock>;
25class AAResults;
26class AliasSet;
27class AliasSetTracker;
28class BasicBlock;
29class BlockFrequencyInfo;
30class ICFLoopSafetyInfo;
31class IRBuilderBase;
32class Loop;
33class LoopInfo;
34class MemoryAccess;
35class MemorySSA;
36class MemorySSAUpdater;
37class OptimizationRemarkEmitter;
38class PredIteratorCache;
39class ScalarEvolution;
40class ScalarEvolutionExpander;
41class SCEV;
42class SCEVExpander;
43class TargetLibraryInfo;
44class LPPassManager;
45class Instruction;
46struct RuntimeCheckingPtrGroup;
47typedef std::pair<const RuntimeCheckingPtrGroup *,
48 const RuntimeCheckingPtrGroup *>
49 RuntimePointerCheck;
50
51template <typename T> class Optional;
52template <typename T, unsigned N> class SmallSetVector;
53template <typename T, unsigned N> class SmallVector;
54template <typename T> class SmallVectorImpl;
55template <typename T, unsigned N> class SmallPriorityWorklist;
56
57BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
58 MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
59
60/// Ensure that all exit blocks of the loop are dedicated exits.
61///
62/// For any loop exit block with non-loop predecessors, we split the loop
63/// predecessors to use a dedicated loop exit block. We update the dominator
64/// tree and loop info if provided, and will preserve LCSSA if requested.
65bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
66 MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
67
68/// Ensures LCSSA form for every instruction from the Worklist in the scope of
69/// innermost containing loop.
70///
71/// For the given instruction which have uses outside of the loop, an LCSSA PHI
72/// node is inserted and the uses outside the loop are rewritten to use this
73/// node.
74///
75/// LoopInfo and DominatorTree are required and, since the routine makes no
76/// changes to CFG, preserved.
77///
78/// Returns true if any modifications are made.
79///
80/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not
81/// nullptr, those are added to it (before removing, the caller has to check if
82/// they still do not have any uses). Otherwise the PHIs are directly removed.
83bool formLCSSAForInstructions(
84 SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT,
85 const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder,
86 SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr);
87
88/// Put loop into LCSSA form.
89///
90/// Looks at all instructions in the loop which have uses outside of the
91/// current loop. For each, an LCSSA PHI node is inserted and the uses outside
92/// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
93/// already.
94///
95/// LoopInfo and DominatorTree are required and preserved.
96///
97/// If ScalarEvolution is passed in, it will be preserved.
98///
99/// Returns true if any modifications are made to the loop.
100bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
101 ScalarEvolution *SE);
102
103/// Put a loop nest into LCSSA form.
104///
105/// This recursively forms LCSSA for a loop nest.
106///
107/// LoopInfo and DominatorTree are required and preserved.
108///
109/// If ScalarEvolution is passed in, it will be preserved.
110///
111/// Returns true if any modifications are made to the loop.
112bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
113 ScalarEvolution *SE);
114
115/// Flags controlling how much is checked when sinking or hoisting
116/// instructions. The number of memory access in the loop (and whether there
117/// are too many) is determined in the constructors when using MemorySSA.
118class SinkAndHoistLICMFlags {
119public:
120 // Explicitly set limits.
121 SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,
122 unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
123 Loop *L = nullptr, MemorySSA *MSSA = nullptr);
124 // Use default limits.
125 SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr,
126 MemorySSA *MSSA = nullptr);
127
128 void setIsSink(bool B) { IsSink = B; }
129 bool getIsSink() { return IsSink; }
130 bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; }
131 bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; }
132 void incrementClobberingCalls() { ++LicmMssaOptCounter; }
133
134protected:
135 bool NoOfMemAccTooLarge = false;
136 unsigned LicmMssaOptCounter = 0;
137 unsigned LicmMssaOptCap;
138 unsigned LicmMssaNoAccForPromotionCap;
139 bool IsSink;
140};
141
142/// Walk the specified region of the CFG (defined by all blocks
143/// dominated by the specified block, and that are in the current loop) in
144/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
145/// uses before definitions, allowing us to sink a loop body in one pass without
146/// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
147/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
148/// instructions of the loop and loop safety information as
149/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
150bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
151 BlockFrequencyInfo *, TargetLibraryInfo *,
152 TargetTransformInfo *, Loop *, AliasSetTracker *,
153 MemorySSAUpdater *, ICFLoopSafetyInfo *,
154 SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
155
156/// Walk the specified region of the CFG (defined by all blocks
157/// dominated by the specified block, and that are in the current loop) in depth
158/// first order w.r.t the DominatorTree. This allows us to visit definitions
159/// before uses, allowing us to hoist a loop body in one pass without iteration.
160/// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
161/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
162/// instructions of the loop and loop safety information as arguments.
163/// Diagnostics is emitted via \p ORE. It returns changed status.
164bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
165 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
166 AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
167 ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
168 OptimizationRemarkEmitter *, bool);
169
170/// This function deletes dead loops. The caller of this function needs to
171/// guarantee that the loop is infact dead.
172/// The function requires a bunch or prerequisites to be present:
173/// - The loop needs to be in LCSSA form
174/// - The loop needs to have a Preheader
175/// - A unique dedicated exit block must exist
176///
177/// This also updates the relevant analysis information in \p DT, \p SE, \p LI
178/// and \p MSSA if pointers to those are provided.
179/// It also updates the loop PM if an updater struct is provided.
180
181void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
182 LoopInfo *LI, MemorySSA *MSSA = nullptr);
183
184/// Remove the backedge of the specified loop. Handles loop nests and general
185/// loop structures subject to the precondition that the loop has no parent
186/// loop and has a single latch block. Preserves all listed analyses.
187void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
188 LoopInfo &LI, MemorySSA *MSSA);
189
190/// Try to promote memory values to scalars by sinking stores out of
191/// the loop and moving loads to before the loop. We do this by looping over
192/// the stores in the loop, looking for stores to Must pointers which are
193/// loop invariant. It takes a set of must-alias values, Loop exit blocks
194/// vector, loop exit blocks insertion point vector, PredIteratorCache,
195/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
196/// of the loop and loop safety information as arguments.
197/// Diagnostics is emitted via \p ORE. It returns changed status.
198bool promoteLoopAccessesToScalars(
199 const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
200 SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
201 PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
202 Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
203 OptimizationRemarkEmitter *);
204
205/// Does a BFS from a given node to all of its children inside a given loop.
206/// The returned vector of nodes includes the starting point.
207SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N,
208 const Loop *CurLoop);
209
210/// Returns the instructions that use values defined in the loop.
211SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
212
213/// Find a combination of metadata ("llvm.loop.vectorize.width" and
214/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
215/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
216/// then None is returned.
217Optional<ElementCount>
218getOptionalElementCountLoopAttribute(const Loop *TheLoop);
219
220/// Create a new loop identifier for a loop created from a loop transformation.
221///
222/// @param OrigLoopID The loop ID of the loop before the transformation.
223/// @param FollowupAttrs List of attribute names that contain attributes to be
224/// added to the new loop ID.
225/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
226/// from the original loop. The following values
227/// are considered:
228/// nullptr : Inherit all attributes from @p OrigLoopID.
229/// "" : Do not inherit any attribute from @p OrigLoopID; only use
230/// those specified by a followup attribute.
231/// "<prefix>": Inherit all attributes except those which start with
232/// <prefix>; commonly used to remove metadata for the
233/// applied transformation.
234/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
235/// None.
236///
237/// @return The loop ID for the after-transformation loop. The following values
238/// can be returned:
239/// None : No followup attribute was found; it is up to the
240/// transformation to choose attributes that make sense.
241/// @p OrigLoopID: The original identifier can be reused.
242/// nullptr : The new loop has no attributes.
243/// MDNode* : A new unique loop identifier.
244Optional<MDNode *>
245makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
246 const char *InheritOptionsAttrsPrefix = "",
247 bool AlwaysNew = false);
248
249/// Look for the loop attribute that disables all transformation heuristic.
250bool hasDisableAllTransformsHint(const Loop *L);
251
252/// Look for the loop attribute that disables the LICM transformation heuristics.
253bool hasDisableLICMTransformsHint(const Loop *L);
254
255/// The mode sets how eager a transformation should be applied.
256enum TransformationMode {
257 /// The pass can use heuristics to determine whether a transformation should
258 /// be applied.
259 TM_Unspecified,
260
261 /// The transformation should be applied without considering a cost model.
262 TM_Enable,
263
264 /// The transformation should not be applied.
265 TM_Disable,
266
267 /// Force is a flag and should not be used alone.
268 TM_Force = 0x04,
269
270 /// The transformation was directed by the user, e.g. by a #pragma in
271 /// the source code. If the transformation could not be applied, a
272 /// warning should be emitted.
273 TM_ForcedByUser = TM_Enable | TM_Force,
274
275 /// The transformation must not be applied. For instance, `#pragma clang loop
276 /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
277 /// general loop metadata, it must not be dropped. Most passes should not
278 /// behave differently under TM_Disable and TM_SuppressedByUser.
279 TM_SuppressedByUser = TM_Disable | TM_Force
280};
281
282/// @{
283/// Get the mode for LLVM's supported loop transformations.
284TransformationMode hasUnrollTransformation(const Loop *L);
285TransformationMode hasUnrollAndJamTransformation(const Loop *L);
286TransformationMode hasVectorizeTransformation(const Loop *L);
287TransformationMode hasDistributeTransformation(const Loop *L);
288TransformationMode hasLICMVersioningTransformation(const Loop *L);
289/// @}
290
291/// Set input string into loop metadata by keeping other values intact.
292/// If the string is already in loop metadata update value if it is
293/// different.
294void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
295 unsigned V = 0);
296
297/// Returns a loop's estimated trip count based on branch weight metadata.
298/// In addition if \p EstimatedLoopInvocationWeight is not null it is
299/// initialized with weight of loop's latch leading to the exit.
300/// Returns 0 when the count is estimated to be 0, or None when a meaningful
301/// estimate can not be made.
302Optional<unsigned>
303getLoopEstimatedTripCount(Loop *L,
304 unsigned *EstimatedLoopInvocationWeight = nullptr);
305
306/// Set a loop's branch weight metadata to reflect that loop has \p
307/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits
308/// through latch. Returns true if metadata is successfully updated, false
309/// otherwise. Note that loop must have a latch block which controls loop exit
310/// in order to succeed.
311bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
312 unsigned EstimatedLoopInvocationWeight);
313
314/// Check inner loop (L) backedge count is known to be invariant on all
315/// iterations of its outer loop. If the loop has no parent, this is trivially
316/// true.
317bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
318
319/// Helper to consistently add the set of standard passes to a loop pass's \c
320/// AnalysisUsage.
321///
322/// All loop passes should call this as part of implementing their \c
323/// getAnalysisUsage.
324void getLoopAnalysisUsage(AnalysisUsage &AU);
325
326/// Returns true if is legal to hoist or sink this instruction disregarding the
327/// possible introduction of faults. Reasoning about potential faulting
328/// instructions is the responsibility of the caller since it is challenging to
329/// do efficiently from within this routine.
330/// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
331/// target executes at most once per execution of the loop body. This is used
332/// to assess the legality of duplicating atomic loads. Generally, this is
333/// true when moving out of loop and not true when moving into loops.
334/// If \p ORE is set use it to emit optimization remarks.
335bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
336 Loop *CurLoop, AliasSetTracker *CurAST,
337 MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
338 SinkAndHoistLICMFlags *LICMFlags = nullptr,
339 OptimizationRemarkEmitter *ORE = nullptr);
340
341/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
342/// The Builder's fast-math-flags must be set to propagate the expected values.
343Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
344 Value *Right);
345
346/// Generates an ordered vector reduction using extracts to reduce the value.
347Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
348 unsigned Op, RecurKind MinMaxKind = RecurKind::None,
349 ArrayRef<Value *> RedOps = None);
350
351/// Generates a vector reduction using shufflevectors to reduce the value.
352/// Fast-math-flags are propagated using the IRBuilder's setting.
353Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
354 RecurKind MinMaxKind = RecurKind::None,
355 ArrayRef<Value *> RedOps = None);
356
357/// Create a target reduction of the given vector. The reduction operation
358/// is described by the \p Opcode parameter. min/max reductions require
359/// additional information supplied in \p RdxKind.
360/// The target is queried to determine if intrinsics or shuffle sequences are
361/// required to implement the reduction.
362/// Fast-math-flags are propagated using the IRBuilder's setting.
363Value *createSimpleTargetReduction(IRBuilderBase &B,
364 const TargetTransformInfo *TTI, Value *Src,
365 RecurKind RdxKind,
366 ArrayRef<Value *> RedOps = None);
367
368/// Create a generic target reduction using a recurrence descriptor \p Desc
369/// The target is queried to determine if intrinsics or shuffle sequences are
370/// required to implement the reduction.
371/// Fast-math-flags are propagated using the RecurrenceDescriptor.
372Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
373 const RecurrenceDescriptor &Desc, Value *Src);
374
375/// Create an ordered reduction intrinsic using the given recurrence
376/// descriptor \p Desc.
377Value *createOrderedReduction(IRBuilderBase &B,
378 const RecurrenceDescriptor &Desc, Value *Src,
379 Value *Start);
380
381/// Get the intersection (logical and) of all of the potential IR flags
382/// of each scalar operation (VL) that will be converted into a vector (I).
383/// If OpValue is non-null, we only consider operations similar to OpValue
384/// when intersecting.
385/// Flag set: NSW, NUW, exact, and all of fast-math.
386void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
387
388/// Returns true if we can prove that \p S is defined and always negative in
389/// loop \p L.
390bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
391
392/// Returns true if we can prove that \p S is defined and always non-negative in
393/// loop \p L.
394bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
395 ScalarEvolution &SE);
396
397/// Returns true if \p S is defined and never is equal to signed/unsigned max.
398bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
399 bool Signed);
400
401/// Returns true if \p S is defined and never is equal to signed/unsigned min.
402bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
403 bool Signed);
404
405enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
406
407/// If the final value of any expressions that are recurrent in the loop can
408/// be computed, substitute the exit values from the loop into any instructions
409/// outside of the loop that use the final values of the current expressions.
410/// Return the number of loop exit values that have been replaced, and the
411/// corresponding phi node will be added to DeadInsts.
412int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
413 ScalarEvolution *SE, const TargetTransformInfo *TTI,
414 SCEVExpander &Rewriter, DominatorTree *DT,
415 ReplaceExitVal ReplaceExitValue,
416 SmallVector<WeakTrackingVH, 16> &DeadInsts);
417
418/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
419/// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
420/// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
421/// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
422/// the remaining TC%UF iterations.
423///
424/// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
425/// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
426/// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
427/// equal. \p UF must be greater than zero.
428/// If \p OrigLoop has no profile info associated nothing happens.
429///
430/// This utility may be useful for such optimizations as unroller and
431/// vectorizer as it's typical transformation for them.
432void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
433 Loop *RemainderLoop, uint64_t UF);
434
435/// Utility that implements appending of loops onto a worklist given a range.
436/// We want to process loops in postorder, but the worklist is a LIFO data
437/// structure, so we append to it in *reverse* postorder.
438/// For trees, a preorder traversal is a viable reverse postorder, so we
439/// actually append using a preorder walk algorithm.
440template <typename RangeT>
441void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &);
442/// Utility that implements appending of loops onto a worklist given a range.
443/// It has the same behavior as appendLoopsToWorklist, but assumes the range of
444/// loops has already been reversed, so it processes loops in the given order.
445template <typename RangeT>
446void appendReversedLoopsToWorklist(RangeT &&,
447 SmallPriorityWorklist<Loop *, 4> &);
448
449/// Utility that implements appending of loops onto a worklist given LoopInfo.
450/// Calls the templated utility taking a Range of loops, handing it the Loops
451/// in LoopInfo, iterated in reverse. This is because the loops are stored in
452/// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,
453/// loop deletion, and LICM, we largely want to work forward across the CFG so
454/// that we visit defs before uses and can propagate simplifications from one
455/// loop nest into the next. Calls appendReversedLoopsToWorklist with the
456/// already reversed loops in LI.
457/// FIXME: Consider changing the order in LoopInfo.
458void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
459
460/// Recursively clone the specified loop and all of its children,
461/// mapping the blocks with the specified map.
462Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
463 LoopInfo *LI, LPPassManager *LPM);
464
465/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
466/// overlap.
467///
468/// Returns a pair of instructions where the first element is the first
469/// instruction generated in possibly a sequence of instructions and the
470/// second value is the final comparator value or NULL if no check is needed.
471std::pair<Instruction *, Instruction *>
472addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
473 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
474 SCEVExpander &Expander);
475
476/// Struct to hold information about a partially invariant condition.
477struct IVConditionInfo {
478 /// Instructions that need to be duplicated and checked for the unswitching
479 /// condition.
480 SmallVector<Instruction *> InstToDuplicate;
481
482 /// Constant to indicate for which value the condition is invariant.
483 Constant *KnownValue = nullptr;
4
Null pointer value stored to 'PartialIVInfo.KnownValue'
484
485 /// True if the partially invariant path is no-op (=does not have any
486 /// side-effects and no loop value is used outside the loop).
487 bool PathIsNoop = true;
488
489 /// If the partially invariant path reaches a single exit block, ExitForPath
490 /// is set to that block. Otherwise it is nullptr.
491 BasicBlock *ExitForPath = nullptr;
492};
493
494/// Check if the loop header has a conditional branch that is not
495/// loop-invariant, because it involves load instructions. If all paths from
496/// either the true or false successor to the header or loop exists do not
497/// modify the memory feeding the condition, perform 'partial unswitching'. That
498/// is, duplicate the instructions feeding the condition in the pre-header. Then
499/// unswitch on the duplicated condition. The condition is now known in the
500/// unswitched version for the 'invariant' path through the original loop.
501///
502/// If the branch condition of the header is partially invariant, return a pair
503/// containing the instructions to duplicate and a boolean Constant to update
504/// the condition in the loops created for the true or false successors.
505Optional<IVConditionInfo> hasPartialIVCondition(Loop &L, unsigned MSSAThreshold,
506 MemorySSA &MSSA, AAResults &AA);
507
508} // end namespace llvm
509
510#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT/SmallVector.h

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H
15
16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <functional>
27#include <initializer_list>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <new>
32#include <type_traits>
33#include <utility>
34
35namespace llvm {
36
37/// This is all the stuff common to all SmallVectors.
38///
39/// The template parameter specifies the type which should be used to hold the
40/// Size and Capacity of the SmallVector, so it can be adjusted.
41/// Using 32 bit size is desirable to shrink the size of the SmallVector.
42/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
43/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
44/// buffering bitcode output - which can exceed 4GB.
45template <class Size_T> class SmallVectorBase {
46protected:
47 void *BeginX;
48 Size_T Size = 0, Capacity;
49
50 /// The maximum value of the Size_T used.
51 static constexpr size_t SizeTypeMax() {
52 return std::numeric_limits<Size_T>::max();
53 }
54
55 SmallVectorBase() = delete;
56 SmallVectorBase(void *FirstEl, size_t TotalCapacity)
57 : BeginX(FirstEl), Capacity(TotalCapacity) {}
58
59 /// This is a helper for \a grow() that's out of line to reduce code
60 /// duplication. This function will report a fatal error if it can't grow at
61 /// least to \p MinSize.
62 void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
63
64 /// This is an implementation of the grow() method which only works
65 /// on POD-like data types and is out of line to reduce code duplication.
66 /// This function will report a fatal error if it cannot increase capacity.
67 void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
68
69public:
70 size_t size() const { return Size; }
71 size_t capacity() const { return Capacity; }
72
73 LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
10
Assuming field 'Size' is not equal to 0
11
Returning zero, which participates in a condition later
74
75 /// Set the array size to \p N, which the current array must have enough
76 /// capacity for.
77 ///
78 /// This does not construct or destroy any elements in the vector.
79 ///
80 /// Clients can use this in conjunction with capacity() to write past the end
81 /// of the buffer when they know that more elements are available, and only
82 /// update the size later. This avoids the cost of value initializing elements
83 /// which will only be overwritten.
84 void set_size(size_t N) {
85 assert(N <= capacity())((void)0);
86 Size = N;
87 }
88};
89
90template <class T>
91using SmallVectorSizeType =
92 typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
93 uint32_t>::type;
94
95/// Figure out the offset of the first element.
96template <class T, typename = void> struct SmallVectorAlignmentAndSize {
97 alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
98 SmallVectorBase<SmallVectorSizeType<T>>)];
99 alignas(T) char FirstEl[sizeof(T)];
100};
101
102/// This is the part of SmallVectorTemplateBase which does not depend on whether
103/// the type T is a POD. The extra dummy template argument is used by ArrayRef
104/// to avoid unnecessarily requiring T to be complete.
105template <typename T, typename = void>
106class SmallVectorTemplateCommon
107 : public SmallVectorBase<SmallVectorSizeType<T>> {
108 using Base = SmallVectorBase<SmallVectorSizeType<T>>;
109
110 /// Find the address of the first element. For this pointer math to be valid
111 /// with small-size of 0 for T with lots of alignment, it's important that
112 /// SmallVectorStorage is properly-aligned even for small-size of 0.
113 void *getFirstEl() const {
114 return const_cast<void *>(reinterpret_cast<const void *>(
115 reinterpret_cast<const char *>(this) +
116 offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)
));
117 }
118 // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
119
120protected:
121 SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
122
123 void grow_pod(size_t MinSize, size_t TSize) {
124 Base::grow_pod(getFirstEl(), MinSize, TSize);
125 }
126
127 /// Return true if this is a smallvector which has not had dynamic
128 /// memory allocated for it.
129 bool isSmall() const { return this->BeginX == getFirstEl(); }
130
131 /// Put this vector in a state of being small.
132 void resetToSmall() {
133 this->BeginX = getFirstEl();
134 this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
135 }
136
137 /// Return true if V is an internal reference to the given range.
138 bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
139 // Use std::less to avoid UB.
140 std::less<> LessThan;
141 return !LessThan(V, First) && LessThan(V, Last);
142 }
143
144 /// Return true if V is an internal reference to this vector.
145 bool isReferenceToStorage(const void *V) const {
146 return isReferenceToRange(V, this->begin(), this->end());
147 }
148
149 /// Return true if First and Last form a valid (possibly empty) range in this
150 /// vector's storage.
151 bool isRangeInStorage(const void *First, const void *Last) const {
152 // Use std::less to avoid UB.
153 std::less<> LessThan;
154 return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
155 !LessThan(this->end(), Last);
156 }
157
158 /// Return true unless Elt will be invalidated by resizing the vector to
159 /// NewSize.
160 bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
161 // Past the end.
162 if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
163 return true;
164
165 // Return false if Elt will be destroyed by shrinking.
166 if (NewSize <= this->size())
167 return Elt < this->begin() + NewSize;
168
169 // Return false if we need to grow.
170 return NewSize <= this->capacity();
171 }
172
173 /// Check whether Elt will be invalidated by resizing the vector to NewSize.
174 void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
175 assert(isSafeToReferenceAfterResize(Elt, NewSize) &&((void)0)
176 "Attempting to reference an element of the vector in an operation "((void)0)
177 "that invalidates it")((void)0);
178 }
179
180 /// Check whether Elt will be invalidated by increasing the size of the
181 /// vector by N.
182 void assertSafeToAdd(const void *Elt, size_t N = 1) {
183 this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
184 }
185
186 /// Check whether any part of the range will be invalidated by clearing.
187 void assertSafeToReferenceAfterClear(const T *From, const T *To) {
188 if (From == To)
189 return;
190 this->assertSafeToReferenceAfterResize(From, 0);
191 this->assertSafeToReferenceAfterResize(To - 1, 0);
192 }
193 template <
194 class ItTy,
195 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
196 bool> = false>
197 void assertSafeToReferenceAfterClear(ItTy, ItTy) {}
198
199 /// Check whether any part of the range will be invalidated by growing.
200 void assertSafeToAddRange(const T *From, const T *To) {
201 if (From == To)
202 return;
203 this->assertSafeToAdd(From, To - From);
204 this->assertSafeToAdd(To - 1, To - From);
205 }
206 template <
207 class ItTy,
208 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
209 bool> = false>
210 void assertSafeToAddRange(ItTy, ItTy) {}
211
212 /// Reserve enough space to add one element, and return the updated element
213 /// pointer in case it was a reference to the storage.
214 template <class U>
215 static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
216 size_t N) {
217 size_t NewSize = This->size() + N;
218 if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
)
)
219 return &Elt;
220
221 bool ReferencesStorage = false;
222 int64_t Index = -1;
223 if (!U::TakesParamByValue) {
224 if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)
) {
225 ReferencesStorage = true;
226 Index = &Elt - This->begin();
227 }
228 }
229 This->grow(NewSize);
230 return ReferencesStorage ? This->begin() + Index : &Elt;
231 }
232
233public:
234 using size_type = size_t;
235 using difference_type = ptrdiff_t;
236 using value_type = T;
237 using iterator = T *;
238 using const_iterator = const T *;
239
240 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
241 using reverse_iterator = std::reverse_iterator<iterator>;
242
243 using reference = T &;
244 using const_reference = const T &;
245 using pointer = T *;
246 using const_pointer = const T *;
247
248 using Base::capacity;
249 using Base::empty;
250 using Base::size;
251
252 // forward iterator creation methods.
253 iterator begin() { return (iterator)this->BeginX; }
254 const_iterator begin() const { return (const_iterator)this->BeginX; }
255 iterator end() { return begin() + size(); }
256 const_iterator end() const { return begin() + size(); }
257
258 // reverse iterator creation methods.
259 reverse_iterator rbegin() { return reverse_iterator(end()); }
260 const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
261 reverse_iterator rend() { return reverse_iterator(begin()); }
262 const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
263
264 size_type size_in_bytes() const { return size() * sizeof(T); }
265 size_type max_size() const {
266 return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
267 }
268
269 size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
270
271 /// Return a pointer to the vector's buffer, even if empty().
272 pointer data() { return pointer(begin()); }
273 /// Return a pointer to the vector's buffer, even if empty().
274 const_pointer data() const { return const_pointer(begin()); }
275
276 reference operator[](size_type idx) {
277 assert(idx < size())((void)0);
278 return begin()[idx];
279 }
280 const_reference operator[](size_type idx) const {
281 assert(idx < size())((void)0);
282 return begin()[idx];
283 }
284
285 reference front() {
286 assert(!empty())((void)0);
287 return begin()[0];
288 }
289 const_reference front() const {
290 assert(!empty())((void)0);
291 return begin()[0];
292 }
293
294 reference back() {
295 assert(!empty())((void)0);
296 return end()[-1];
297 }
298 const_reference back() const {
299 assert(!empty())((void)0);
300 return end()[-1];
301 }
302};
303
304/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
305/// method implementations that are designed to work with non-trivial T's.
306///
307/// We approximate is_trivially_copyable with trivial move/copy construction and
308/// trivial destruction. While the standard doesn't specify that you're allowed
309/// copy these types with memcpy, there is no way for the type to observe this.
310/// This catches the important case of std::pair<POD, POD>, which is not
311/// trivially assignable.
312template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
313 (is_trivially_move_constructible<T>::value) &&
314 std::is_trivially_destructible<T>::value>
315class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
316 friend class SmallVectorTemplateCommon<T>;
317
318protected:
319 static constexpr bool TakesParamByValue = false;
320 using ValueParamT = const T &;
321
322 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
323
324 static void destroy_range(T *S, T *E) {
325 while (S != E) {
326 --E;
327 E->~T();
328 }
329 }
330
331 /// Move the range [I, E) into the uninitialized memory starting with "Dest",
332 /// constructing elements as needed.
333 template<typename It1, typename It2>
334 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
335 std::uninitialized_copy(std::make_move_iterator(I),
336 std::make_move_iterator(E), Dest);
337 }
338
339 /// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
340 /// constructing elements as needed.
341 template<typename It1, typename It2>
342 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
343 std::uninitialized_copy(I, E, Dest);
344 }
345
346 /// Grow the allocated memory (without initializing new elements), doubling
347 /// the size of the allocated memory. Guarantees space for at least one more
348 /// element, or MinSize more elements if specified.
349 void grow(size_t MinSize = 0);
350
351 /// Create a new allocation big enough for \p MinSize and pass back its size
352 /// in \p NewCapacity. This is the first section of \a grow().
353 T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
354 return static_cast<T *>(
355 SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
356 MinSize, sizeof(T), NewCapacity));
357 }
358
359 /// Move existing elements over to the new allocation \p NewElts, the middle
360 /// section of \a grow().
361 void moveElementsForGrow(T *NewElts);
362
363 /// Transfer ownership of the allocation, finishing up \a grow().
364 void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
365
366 /// Reserve enough space to add one element, and return the updated element
367 /// pointer in case it was a reference to the storage.
368 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
369 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
370 }
371
372 /// Reserve enough space to add one element, and return the updated element
373 /// pointer in case it was a reference to the storage.
374 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
375 return const_cast<T *>(
376 this->reserveForParamAndGetAddressImpl(this, Elt, N));
377 }
378
379 static T &&forward_value_param(T &&V) { return std::move(V); }
380 static const T &forward_value_param(const T &V) { return V; }
381
382 void growAndAssign(size_t NumElts, const T &Elt) {
383 // Grow manually in case Elt is an internal reference.
384 size_t NewCapacity;
385 T *NewElts = mallocForGrow(NumElts, NewCapacity);
386 std::uninitialized_fill_n(NewElts, NumElts, Elt);
387 this->destroy_range(this->begin(), this->end());
388 takeAllocationForGrow(NewElts, NewCapacity);
389 this->set_size(NumElts);
390 }
391
392 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
393 // Grow manually in case one of Args is an internal reference.
394 size_t NewCapacity;
395 T *NewElts = mallocForGrow(0, NewCapacity);
396 ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
397 moveElementsForGrow(NewElts);
398 takeAllocationForGrow(NewElts, NewCapacity);
399 this->set_size(this->size() + 1);
400 return this->back();
401 }
402
403public:
404 void push_back(const T &Elt) {
405 const T *EltPtr = reserveForParamAndGetAddress(Elt);
406 ::new ((void *)this->end()) T(*EltPtr);
407 this->set_size(this->size() + 1);
408 }
409
410 void push_back(T &&Elt) {
411 T *EltPtr = reserveForParamAndGetAddress(Elt);
412 ::new ((void *)this->end()) T(::std::move(*EltPtr));
413 this->set_size(this->size() + 1);
414 }
415
416 void pop_back() {
417 this->set_size(this->size() - 1);
418 this->end()->~T();
419 }
420};
421
422// Define this out-of-line to dissuade the C++ compiler from inlining it.
423template <typename T, bool TriviallyCopyable>
424void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
425 size_t NewCapacity;
426 T *NewElts = mallocForGrow(MinSize, NewCapacity);
427 moveElementsForGrow(NewElts);
428 takeAllocationForGrow(NewElts, NewCapacity);
429}
430
431// Define this out-of-line to dissuade the C++ compiler from inlining it.
432template <typename T, bool TriviallyCopyable>
433void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
434 T *NewElts) {
435 // Move the elements over.
436 this->uninitialized_move(this->begin(), this->end(), NewElts);
437
438 // Destroy the original elements.
439 destroy_range(this->begin(), this->end());
440}
441
442// Define this out-of-line to dissuade the C++ compiler from inlining it.
443template <typename T, bool TriviallyCopyable>
444void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
445 T *NewElts, size_t NewCapacity) {
446 // If this wasn't grown from the inline copy, deallocate the old space.
447 if (!this->isSmall())
448 free(this->begin());
449
450 this->BeginX = NewElts;
451 this->Capacity = NewCapacity;
452}
453
454/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
455/// method implementations that are designed to work with trivially copyable
456/// T's. This allows using memcpy in place of copy/move construction and
457/// skipping destruction.
458template <typename T>
459class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
460 friend class SmallVectorTemplateCommon<T>;
461
462protected:
463 /// True if it's cheap enough to take parameters by value. Doing so avoids
464 /// overhead related to mitigations for reference invalidation.
465 static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);
466
467 /// Either const T& or T, depending on whether it's cheap enough to take
468 /// parameters by value.
469 using ValueParamT =
470 typename std::conditional<TakesParamByValue, T, const T &>::type;
471
472 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
473
474 // No need to do a destroy loop for POD's.
475 static void destroy_range(T *, T *) {}
476
477 /// Move the range [I, E) onto the uninitialized memory
478 /// starting with "Dest", constructing elements into it as needed.
479 template<typename It1, typename It2>
480 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
481 // Just do a copy.
482 uninitialized_copy(I, E, Dest);
483 }
484
485 /// Copy the range [I, E) onto the uninitialized memory
486 /// starting with "Dest", constructing elements into it as needed.
487 template<typename It1, typename It2>
488 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
489 // Arbitrary iterator types; just use the basic implementation.
490 std::uninitialized_copy(I, E, Dest);
491 }
492
493 /// Copy the range [I, E) onto the uninitialized memory
494 /// starting with "Dest", constructing elements into it as needed.
495 template <typename T1, typename T2>
496 static void uninitialized_copy(
497 T1 *I, T1 *E, T2 *Dest,
498 std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
499 T2>::value> * = nullptr) {
500 // Use memcpy for PODs iterated by pointers (which includes SmallVector
501 // iterators): std::uninitialized_copy optimizes to memmove, but we can
502 // use memcpy here. Note that I and E are iterators and thus might be
503 // invalid for memcpy if they are equal.
504 if (I != E)
505 memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
506 }
507
508 /// Double the size of the allocated memory, guaranteeing space for at
509 /// least one more element or MinSize if specified.
510 void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
511
512 /// Reserve enough space to add one element, and return the updated element
513 /// pointer in case it was a reference to the storage.
514 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
515 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
516 }
517
518 /// Reserve enough space to add one element, and return the updated element
519 /// pointer in case it was a reference to the storage.
520 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
521 return const_cast<T *>(
522 this->reserveForParamAndGetAddressImpl(this, Elt, N));
523 }
524
525 /// Copy \p V or return a reference, depending on \a ValueParamT.
526 static ValueParamT forward_value_param(ValueParamT V) { return V; }
527
528 void growAndAssign(size_t NumElts, T Elt) {
529 // Elt has been copied in case it's an internal reference, side-stepping
530 // reference invalidation problems without losing the realloc optimization.
531 this->set_size(0);
532 this->grow(NumElts);
533 std::uninitialized_fill_n(this->begin(), NumElts, Elt);
534 this->set_size(NumElts);
535 }
536
537 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
538 // Use push_back with a copy in case Args has an internal reference,
539 // side-stepping reference invalidation problems without losing the realloc
540 // optimization.
541 push_back(T(std::forward<ArgTypes>(Args)...));
542 return this->back();
543 }
544
545public:
546 void push_back(ValueParamT Elt) {
547 const T *EltPtr = reserveForParamAndGetAddress(Elt);
548 memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
549 this->set_size(this->size() + 1);
550 }
551
552 void pop_back() { this->set_size(this->size() - 1); }
553};
554
555/// This class consists of common code factored out of the SmallVector class to
556/// reduce code duplication based on the SmallVector 'N' template parameter.
557template <typename T>
558class SmallVectorImpl : public SmallVectorTemplateBase<T> {
559 using SuperClass = SmallVectorTemplateBase<T>;
560
561public:
562 using iterator = typename SuperClass::iterator;
563 using const_iterator = typename SuperClass::const_iterator;
564 using reference = typename SuperClass::reference;
565 using size_type = typename SuperClass::size_type;
566
567protected:
568 using SmallVectorTemplateBase<T>::TakesParamByValue;
569 using ValueParamT = typename SuperClass::ValueParamT;
570
571 // Default ctor - Initialize to empty.
572 explicit SmallVectorImpl(unsigned N)
573 : SmallVectorTemplateBase<T>(N) {}
574
575public:
576 SmallVectorImpl(const SmallVectorImpl &) = delete;
577
578 ~SmallVectorImpl() {
579 // Subclass has already destructed this vector's elements.
580 // If this wasn't grown from the inline copy, deallocate the old space.
581 if (!this->isSmall())
582 free(this->begin());
583 }
584
585 void clear() {
586 this->destroy_range(this->begin(), this->end());
587 this->Size = 0;
588 }
589
590private:
591 template <bool ForOverwrite> void resizeImpl(size_type N) {
592 if (N < this->size()) {
593 this->pop_back_n(this->size() - N);
594 } else if (N > this->size()) {
595 this->reserve(N);
596 for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
597 if (ForOverwrite)
598 new (&*I) T;
599 else
600 new (&*I) T();
601 this->set_size(N);
602 }
603 }
604
605public:
606 void resize(size_type N) { resizeImpl<false>(N); }
607
608 /// Like resize, but \ref T is POD, the new values won't be initialized.
609 void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }
610
611 void resize(size_type N, ValueParamT NV) {
612 if (N == this->size())
613 return;
614
615 if (N < this->size()) {
616 this->pop_back_n(this->size() - N);
617 return;
618 }
619
620 // N > this->size(). Defer to append.
621 this->append(N - this->size(), NV);
622 }
623
624 void reserve(size_type N) {
625 if (this->capacity() < N)
626 this->grow(N);
627 }
628
629 void pop_back_n(size_type NumItems) {
630 assert(this->size() >= NumItems)((void)0);
631 this->destroy_range(this->end() - NumItems, this->end());
632 this->set_size(this->size() - NumItems);
633 }
634
635 LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
636 T Result = ::std::move(this->back());
637 this->pop_back();
638 return Result;
639 }
640
641 void swap(SmallVectorImpl &RHS);
642
643 /// Add the specified range to the end of the SmallVector.
644 template <typename in_iter,
645 typename = std::enable_if_t<std::is_convertible<
646 typename std::iterator_traits<in_iter>::iterator_category,
647 std::input_iterator_tag>::value>>
648 void append(in_iter in_start, in_iter in_end) {
649 this->assertSafeToAddRange(in_start, in_end);
650 size_type NumInputs = std::distance(in_start, in_end);
651 this->reserve(this->size() + NumInputs);
652 this->uninitialized_copy(in_start, in_end, this->end());
653 this->set_size(this->size() + NumInputs);
654 }
655
656 /// Append \p NumInputs copies of \p Elt to the end.
657 void append(size_type NumInputs, ValueParamT Elt) {
658 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
659 std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
660 this->set_size(this->size() + NumInputs);
661 }
662
663 void append(std::initializer_list<T> IL) {
664 append(IL.begin(), IL.end());
665 }
666
667 void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }
668
669 void assign(size_type NumElts, ValueParamT Elt) {
670 // Note that Elt could be an internal reference.
671 if (NumElts > this->capacity()) {
672 this->growAndAssign(NumElts, Elt);
673 return;
674 }
675
676 // Assign over existing elements.
677 std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
678 if (NumElts > this->size())
679 std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
680 else if (NumElts < this->size())
681 this->destroy_range(this->begin() + NumElts, this->end());
682 this->set_size(NumElts);
683 }
684
685 // FIXME: Consider assigning over existing elements, rather than clearing &
686 // re-initializing them - for all assign(...) variants.
687
688 template <typename in_iter,
689 typename = std::enable_if_t<std::is_convertible<
690 typename std::iterator_traits<in_iter>::iterator_category,
691 std::input_iterator_tag>::value>>
692 void assign(in_iter in_start, in_iter in_end) {
693 this->assertSafeToReferenceAfterClear(in_start, in_end);
694 clear();
695 append(in_start, in_end);
696 }
697
698 void assign(std::initializer_list<T> IL) {
699 clear();
700 append(IL);
701 }
702
703 void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }
704
705 iterator erase(const_iterator CI) {
706 // Just cast away constness because this is a non-const member function.
707 iterator I = const_cast<iterator>(CI);
708
709 assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")((void)0);
710
711 iterator N = I;
712 // Shift all elts down one.
713 std::move(I+1, this->end(), I);
714 // Drop the last elt.
715 this->pop_back();
716 return(N);
717 }
718
719 iterator erase(const_iterator CS, const_iterator CE) {
720 // Just cast away constness because this is a non-const member function.
721 iterator S = const_cast<iterator>(CS);
722 iterator E = const_cast<iterator>(CE);
723
724 assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")((void)0);
725
726 iterator N = S;
727 // Shift all elts down.
728 iterator I = std::move(E, this->end(), S);
729 // Drop the last elts.
730 this->destroy_range(I, this->end());
731 this->set_size(I - this->begin());
732 return(N);
733 }
734
735private:
736 template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
737 // Callers ensure that ArgType is derived from T.
738 static_assert(
739 std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
740 T>::value,
741 "ArgType must be derived from T!");
742
743 if (I == this->end()) { // Important special case for empty vector.
744 this->push_back(::std::forward<ArgType>(Elt));
745 return this->end()-1;
746 }
747
748 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);
749
750 // Grow if necessary.
751 size_t Index = I - this->begin();
752 std::remove_reference_t<ArgType> *EltPtr =
753 this->reserveForParamAndGetAddress(Elt);
754 I = this->begin() + Index;
755
756 ::new ((void*) this->end()) T(::std::move(this->back()));
757 // Push everything else over.
758 std::move_backward(I, this->end()-1, this->end());
759 this->set_size(this->size() + 1);
760
761 // If we just moved the element we're inserting, be sure to update
762 // the reference (never happens if TakesParamByValue).
763 static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
764 "ArgType must be 'T' when taking by value!");
765 if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
766 ++EltPtr;
767
768 *I = ::std::forward<ArgType>(*EltPtr);
769 return I;
770 }
771
772public:
773 iterator insert(iterator I, T &&Elt) {
774 return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
775 }
776
777 iterator insert(iterator I, const T &Elt) {
778 return insert_one_impl(I, this->forward_value_param(Elt));
779 }
780
781 iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
782 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
783 size_t InsertElt = I - this->begin();
784
785 if (I == this->end()) { // Important special case for empty vector.
786 append(NumToInsert, Elt);
787 return this->begin()+InsertElt;
788 }
789
790 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);
791
792 // Ensure there is enough space, and get the (maybe updated) address of
793 // Elt.
794 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);
795
796 // Uninvalidate the iterator.
797 I = this->begin()+InsertElt;
798
799 // If there are more elements between the insertion point and the end of the
800 // range than there are being inserted, we can use a simple approach to
801 // insertion. Since we already reserved space, we know that this won't
802 // reallocate the vector.
803 if (size_t(this->end()-I) >= NumToInsert) {
804 T *OldEnd = this->end();
805 append(std::move_iterator<iterator>(this->end() - NumToInsert),
806 std::move_iterator<iterator>(this->end()));
807
808 // Copy the existing elements that get replaced.
809 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
810
811 // If we just moved the element we're inserting, be sure to update
812 // the reference (never happens if TakesParamByValue).
813 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
814 EltPtr += NumToInsert;
815
816 std::fill_n(I, NumToInsert, *EltPtr);
817 return I;
818 }
819
820 // Otherwise, we're inserting more elements than exist already, and we're
821 // not inserting at the end.
822
823 // Move over the elements that we're about to overwrite.
824 T *OldEnd = this->end();
825 this->set_size(this->size() + NumToInsert);
826 size_t NumOverwritten = OldEnd-I;
827 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
828
829 // If we just moved the element we're inserting, be sure to update
830 // the reference (never happens if TakesParamByValue).
831 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
832 EltPtr += NumToInsert;
833
834 // Replace the overwritten part.
835 std::fill_n(I, NumOverwritten, *EltPtr);
836
837 // Insert the non-overwritten middle part.
838 std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
839 return I;
840 }
841
842 template <typename ItTy,
843 typename = std::enable_if_t<std::is_convertible<
844 typename std::iterator_traits<ItTy>::iterator_category,
845 std::input_iterator_tag>::value>>
846 iterator insert(iterator I, ItTy From, ItTy To) {
847 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
848 size_t InsertElt = I - this->begin();
849
850 if (I == this->end()) { // Important special case for empty vector.
851 append(From, To);
852 return this->begin()+InsertElt;
853 }
854
855 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);
856
857 // Check that the reserve that follows doesn't invalidate the iterators.
858 this->assertSafeToAddRange(From, To);
859
860 size_t NumToInsert = std::distance(From, To);
861
862 // Ensure there is enough space.
863 reserve(this->size() + NumToInsert);
864
865 // Uninvalidate the iterator.
866 I = this->begin()+InsertElt;
867
868 // If there are more elements between the insertion point and the end of the
869 // range than there are being inserted, we can use a simple approach to
870 // insertion. Since we already reserved space, we know that this won't
871 // reallocate the vector.
872 if (size_t(this->end()-I) >= NumToInsert) {
873 T *OldEnd = this->end();
874 append(std::move_iterator<iterator>(this->end() - NumToInsert),
875 std::move_iterator<iterator>(this->end()));
876
877 // Copy the existing elements that get replaced.
878 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
879
880 std::copy(From, To, I);
881 return I;
882 }
883
884 // Otherwise, we're inserting more elements than exist already, and we're
885 // not inserting at the end.
886
887 // Move over the elements that we're about to overwrite.
888 T *OldEnd = this->end();
889 this->set_size(this->size() + NumToInsert);
890 size_t NumOverwritten = OldEnd-I;
891 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
892
893 // Replace the overwritten part.
894 for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
895 *J = *From;
896 ++J; ++From;
897 }
898
899 // Insert the non-overwritten middle part.
900 this->uninitialized_copy(From, To, OldEnd);
901 return I;
902 }
903
904 void insert(iterator I, std::initializer_list<T> IL) {
905 insert(I, IL.begin(), IL.end());
906 }
907
908 template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
909 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
910 return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
911
912 ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
913 this->set_size(this->size() + 1);
914 return this->back();
915 }
916
917 SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
918
919 SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
920
921 bool operator==(const SmallVectorImpl &RHS) const {
922 if (this->size() != RHS.size()) return false;
923 return std::equal(this->begin(), this->end(), RHS.begin());
924 }
925 bool operator!=(const SmallVectorImpl &RHS) const {
926 return !(*this == RHS);
927 }
928
929 bool operator<(const SmallVectorImpl &RHS) const {
930 return std::lexicographical_compare(this->begin(), this->end(),
931 RHS.begin(), RHS.end());
932 }
933};
934
935template <typename T>
936void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
937 if (this == &RHS) return;
938
939 // We can only avoid copying elements if neither vector is small.
940 if (!this->isSmall() && !RHS.isSmall()) {
941 std::swap(this->BeginX, RHS.BeginX);
942 std::swap(this->Size, RHS.Size);
943 std::swap(this->Capacity, RHS.Capacity);
944 return;
945 }
946 this->reserve(RHS.size());
947 RHS.reserve(this->size());
948
949 // Swap the shared elements.
950 size_t NumShared = this->size();
951 if (NumShared > RHS.size()) NumShared = RHS.size();
952 for (size_type i = 0; i != NumShared; ++i)
953 std::swap((*this)[i], RHS[i]);
954
955 // Copy over the extra elts.
956 if (this->size() > RHS.size()) {
957 size_t EltDiff = this->size() - RHS.size();
958 this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
959 RHS.set_size(RHS.size() + EltDiff);
960 this->destroy_range(this->begin()+NumShared, this->end());
961 this->set_size(NumShared);
962 } else if (RHS.size() > this->size()) {
963 size_t EltDiff = RHS.size() - this->size();
964 this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
965 this->set_size(this->size() + EltDiff);
966 this->destroy_range(RHS.begin()+NumShared, RHS.end());
967 RHS.set_size(NumShared);
968 }
969}
970
971template <typename T>
972SmallVectorImpl<T> &SmallVectorImpl<T>::
973 operator=(const SmallVectorImpl<T> &RHS) {
974 // Avoid self-assignment.
975 if (this == &RHS) return *this;
976
977 // If we already have sufficient space, assign the common elements, then
978 // destroy any excess.
979 size_t RHSSize = RHS.size();
980 size_t CurSize = this->size();
981 if (CurSize >= RHSSize) {
982 // Assign common elements.
983 iterator NewEnd;
984 if (RHSSize)
985 NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
986 else
987 NewEnd = this->begin();
988
989 // Destroy excess elements.
990 this->destroy_range(NewEnd, this->end());
991
992 // Trim.
993 this->set_size(RHSSize);
994 return *this;
995 }
996
997 // If we have to grow to have enough elements, destroy the current elements.
998 // This allows us to avoid copying them during the grow.
999 // FIXME: don't do this if they're efficiently moveable.
1000 if (this->capacity() < RHSSize) {
1001 // Destroy current elements.
1002 this->clear();
1003 CurSize = 0;
1004 this->grow(RHSSize);
1005 } else if (CurSize) {
1006 // Otherwise, use assignment for the already-constructed elements.
1007 std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
1008 }
1009
1010 // Copy construct the new elements in place.
1011 this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
1012 this->begin()+CurSize);
1013
1014 // Set end.
1015 this->set_size(RHSSize);
1016 return *this;
1017}
1018
1019template <typename T>
1020SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
1021 // Avoid self-assignment.
1022 if (this == &RHS) return *this;
1023
1024 // If the RHS isn't small, clear this vector and then steal its buffer.
1025 if (!RHS.isSmall()) {
1026 this->destroy_range(this->begin(), this->end());
1027 if (!this->isSmall()) free(this->begin());
1028 this->BeginX = RHS.BeginX;
1029 this->Size = RHS.Size;
1030 this->Capacity = RHS.Capacity;
1031 RHS.resetToSmall();
1032 return *this;
1033 }
1034
1035 // If we already have sufficient space, assign the common elements, then
1036 // destroy any excess.
1037 size_t RHSSize = RHS.size();
1038 size_t CurSize = this->size();
1039 if (CurSize >= RHSSize) {
1040 // Assign common elements.
1041 iterator NewEnd = this->begin();
1042 if (RHSSize)
1043 NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);
1044
1045 // Destroy excess elements and trim the bounds.
1046 this->destroy_range(NewEnd, this->end());
1047 this->set_size(RHSSize);
1048
1049 // Clear the RHS.
1050 RHS.clear();
1051
1052 return *this;
1053 }
1054
1055 // If we have to grow to have enough elements, destroy the current elements.
1056 // This allows us to avoid copying them during the grow.
1057 // FIXME: this may not actually make any sense if we can efficiently move
1058 // elements.
1059 if (this->capacity() < RHSSize) {
1060 // Destroy current elements.
1061 this->clear();
1062 CurSize = 0;
1063 this->grow(RHSSize);
1064 } else if (CurSize) {
1065 // Otherwise, use assignment for the already-constructed elements.
1066 std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
1067 }
1068
1069 // Move-construct the new elements in place.
1070 this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
1071 this->begin()+CurSize);
1072
1073 // Set end.
1074 this->set_size(RHSSize);
1075
1076 RHS.clear();
1077 return *this;
1078}
1079
1080/// Storage for the SmallVector elements. This is specialized for the N=0 case
1081/// to avoid allocating unnecessary storage.
1082template <typename T, unsigned N>
1083struct SmallVectorStorage {
1084 alignas(T) char InlineElts[N * sizeof(T)];
1085};
1086
1087/// We need the storage to be properly aligned even for small-size of 0 so that
1088/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1089/// well-defined.
1090template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};
1091
1092/// Forward declaration of SmallVector so that
1093/// calculateSmallVectorDefaultInlinedElements can reference
1094/// `sizeof(SmallVector<T, 0>)`.
1095template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;
1096
1097/// Helper class for calculating the default number of inline elements for
1098/// `SmallVector<T>`.
1099///
1100/// This should be migrated to a constexpr function when our minimum
1101/// compiler support is enough for multi-statement constexpr functions.
1102template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
1103 // Parameter controlling the default number of inlined elements
1104 // for `SmallVector<T>`.
1105 //
1106 // The default number of inlined elements ensures that
1107 // 1. There is at least one inlined element.
1108 // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
1109 // it contradicts 1.
1110 static constexpr size_t kPreferredSmallVectorSizeof = 64;
1111
1112 // static_assert that sizeof(T) is not "too big".
1113 //
1114 // Because our policy guarantees at least one inlined element, it is possible
1115 // for an arbitrarily large inlined element to allocate an arbitrarily large
1116 // amount of inline storage. We generally consider it an antipattern for a
1117 // SmallVector to allocate an excessive amount of inline storage, so we want
1118 // to call attention to these cases and make sure that users are making an
1119 // intentional decision if they request a lot of inline storage.
1120 //
1121 // We want this assertion to trigger in pathological cases, but otherwise
1122 // not be too easy to hit. To accomplish that, the cutoff is actually somewhat
1123 // larger than kPreferredSmallVectorSizeof (otherwise,
1124 // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
1125 // pattern seems useful in practice).
1126 //
1127 // One wrinkle is that this assertion is in theory non-portable, since
1128 // sizeof(T) is in general platform-dependent. However, we don't expect this
1129 // to be much of an issue, because most LLVM development happens on 64-bit
1130 // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
1131 // 32-bit hosts, dodging the issue. The reverse situation, where development
1132 // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
1133 // 64-bit host, is expected to be very rare.
1134 static_assert(
1135 sizeof(T) <= 256,
1136 "You are trying to use a default number of inlined elements for "
1137 "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
1138 "explicit number of inlined elements with `SmallVector<T, N>` to make "
1139 "sure you really want that much inline storage.");
1140
1141 // Discount the size of the header itself when calculating the maximum inline
1142 // bytes.
1143 static constexpr size_t PreferredInlineBytes =
1144 kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
1145 static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
1146 static constexpr size_t value =
1147 NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1148};
1149
1150/// This is a 'vector' (really, a variable-sized array), optimized
1151/// for the case when the array is small. It contains some number of elements
1152/// in-place, which allows it to avoid heap allocation when the actual number of
1153/// elements is below that threshold. This allows normal "small" cases to be
1154/// fast without losing generality for large inputs.
1155///
1156/// \note
1157/// In the absence of a well-motivated choice for the number of inlined
1158/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1159/// omitting the \p N). This will choose a default number of inlined elements
1160/// reasonable for allocation on the stack (for example, trying to keep \c
1161/// sizeof(SmallVector<T>) around 64 bytes).
1162///
1163/// \warning This does not attempt to be exception safe.
1164///
1165/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1166template <typename T,
1167 unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1168class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
1169 SmallVectorStorage<T, N> {
1170public:
1171 SmallVector() : SmallVectorImpl<T>(N) {}
1172
1173 ~SmallVector() {
1174 // Destroy the constructed elements in the vector.
1175 this->destroy_range(this->begin(), this->end());
1176 }
1177
1178 explicit SmallVector(size_t Size, const T &Value = T())
1179 : SmallVectorImpl<T>(N) {
1180 this->assign(Size, Value);
1181 }
1182
1183 template <typename ItTy,
1184 typename = std::enable_if_t<std::is_convertible<
1185 typename std::iterator_traits<ItTy>::iterator_category,
1186 std::input_iterator_tag>::value>>
1187 SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
1188 this->append(S, E);
1189 }
1190
1191 template <typename RangeTy>
1192 explicit SmallVector(const iterator_range<RangeTy> &R)
1193 : SmallVectorImpl<T>(N) {
1194 this->append(R.begin(), R.end());
1195 }
1196
1197 SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
1198 this->assign(IL);
1199 }
1200
1201 SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
1202 if (!RHS.empty())
1203 SmallVectorImpl<T>::operator=(RHS);
1204 }
1205
1206 SmallVector &operator=(const SmallVector &RHS) {
1207 SmallVectorImpl<T>::operator=(RHS);
1208 return *this;
1209 }
1210
1211 SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
1212 if (!RHS.empty())
1213 SmallVectorImpl<T>::operator=(::std::move(RHS));
1214 }
1215
1216 SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
1217 if (!RHS.empty())
1218 SmallVectorImpl<T>::operator=(::std::move(RHS));
1219 }
1220
1221 SmallVector &operator=(SmallVector &&RHS) {
1222 SmallVectorImpl<T>::operator=(::std::move(RHS));
1223 return *this;
1224 }
1225
1226 SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
1227 SmallVectorImpl<T>::operator=(::std::move(RHS));
1228 return *this;
1229 }
1230
1231 SmallVector &operator=(std::initializer_list<T> IL) {
1232 this->assign(IL);
1233 return *this;
1234 }
1235};
1236
1237template <typename T, unsigned N>
1238inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
1239 return X.capacity_in_bytes();
1240}
1241
1242/// Given a range of type R, iterate the entire range and return a
1243/// SmallVector with elements of the vector. This is useful, for example,
1244/// when you want to iterate a range and then sort the results.
1245template <unsigned Size, typename R>
1246SmallVector<typename std::remove_const<typename std::remove_reference<
1247 decltype(*std::begin(std::declval<R &>()))>::type>::type,
1248 Size>
1249to_vector(R &&Range) {
1250 return {std::begin(Range), std::end(Range)};
1251}
1252
1253} // end namespace llvm
1254
1255namespace std {
1256
1257 /// Implement std::swap in terms of SmallVector swap.
1258 template<typename T>
1259 inline void
1260 swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
1261 LHS.swap(RHS);
1262 }
1263
1264 /// Implement std::swap in terms of SmallVector swap.
1265 template<typename T, unsigned N>
1266 inline void
1267 swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
1268 LHS.swap(RHS);
1269 }
1270
1271} // end namespace std
1272
1273#endif // LLVM_ADT_SMALLVECTOR_H

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis/CFG.h

1//===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions performs analyses on basic blocks, and instructions
10// contained within basic blocks.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_CFG_H
15#define LLVM_ANALYSIS_CFG_H
16
17#include "llvm/ADT/GraphTraits.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include <utility>
20
21namespace llvm {
22
23class BasicBlock;
24class DominatorTree;
25class Function;
26class Instruction;
27class LoopInfo;
28template <typename T> class SmallVectorImpl;
29
30/// Analyze the specified function to find all of the loop backedges in the
31/// function and return them. This is a relatively cheap (compared to
32/// computing dominators and loop info) analysis.
33///
34/// The output is added to Result, as pairs of <from,to> edge info.
35void FindFunctionBackedges(
36 const Function &F,
37 SmallVectorImpl<std::pair<const BasicBlock *, const BasicBlock *> > &
38 Result);
39
40/// Search for the specified successor of basic block BB and return its position
41/// in the terminator instruction's list of successors. It is an error to call
42/// this with a block that is not a successor.
43unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
44
45/// Return true if the specified edge is a critical edge. Critical edges are
46/// edges from a block with multiple successors to a block with multiple
47/// predecessors.
48///
49bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
50 bool AllowIdenticalEdges = false);
51bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
52 bool AllowIdenticalEdges = false);
53
54/// Determine whether instruction 'To' is reachable from 'From', without passing
55/// through any blocks in ExclusionSet, returning true if uncertain.
56///
57/// Determine whether there is a path from From to To within a single function.
58/// Returns false only if we can prove that once 'From' has been executed then
59/// 'To' can not be executed. Conservatively returns true.
60///
61/// This function is linear with respect to the number of blocks in the CFG,
62/// walking down successors from From to reach To, with a fixed threshold.
63/// Using DT or LI allows us to answer more quickly. LI reduces the cost of
64/// an entire loop of any number of blocks to be the same as the cost of a
65/// single block. DT reduces the cost by allowing the search to terminate when
66/// we find a block that dominates the block containing 'To'. DT is most useful
67/// on branchy code but not loops, and LI is most useful on code with loops but
68/// does not help on branchy code outside loops.
69bool isPotentiallyReachable(
70 const Instruction *From, const Instruction *To,
71 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
72 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
73
74/// Determine whether block 'To' is reachable from 'From', returning
75/// true if uncertain.
76///
77/// Determine whether there is a path from From to To within a single function.
78/// Returns false only if we can prove that once 'From' has been reached then
79/// 'To' can not be executed. Conservatively returns true.
80bool isPotentiallyReachable(
81 const BasicBlock *From, const BasicBlock *To,
82 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
83 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
84
85/// Determine whether there is at least one path from a block in
86/// 'Worklist' to 'StopBB' without passing through any blocks in
87/// 'ExclusionSet', returning true if uncertain.
88///
89/// Determine whether there is a path from at least one block in Worklist to
90/// StopBB within a single function without passing through any of the blocks
91/// in 'ExclusionSet'. Returns false only if we can prove that once any block
92/// in 'Worklist' has been reached then 'StopBB' can not be executed.
93/// Conservatively returns true.
94bool isPotentiallyReachableFromMany(
95 SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
96 const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
97 const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
98
99/// Return true if the control flow in \p RPOTraversal is irreducible.
100///
101/// This is a generic implementation to detect CFG irreducibility based on loop
102/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop,
103/// Function, MachineFunction, etc.) by providing an RPO traversal (\p
104/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility
105/// function is only recommended when loop info analysis is available. If loop
106/// info analysis isn't available, please, don't compute it explicitly for this
107/// purpose. There are more efficient ways to detect CFG irreducibility that
108/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's
109/// algorithm).
110///
111/// Requirements:
112/// 1) GraphTraits must be implemented for NodeT type. It is used to access
113/// NodeT successors.
114// 2) \p RPOTraversal must be a valid reverse post-order traversal of the
115/// target CFG with begin()/end() iterator interfaces.
116/// 3) \p LI must be a valid LoopInfoBase that contains up-to-date loop
117/// analysis information of the CFG.
118///
119/// This algorithm uses the information about reducible loop back-edges already
120/// computed in \p LI. When a back-edge is found during the RPO traversal, the
121/// algorithm checks whether the back-edge is one of the reducible back-edges in
122/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG
123/// below (canonical irreducible graph) loop info won't contain any loop, so the
124/// algorithm will return that the CFG is irreducible when checking the B <-
125/// -> C back-edge.
126///
127/// (A->B, A->C, B->C, C->B, C->D)
128/// A
129/// / \
130/// B<- ->C
131/// |
132/// D
133///
134template <class NodeT, class RPOTraversalT, class LoopInfoT,
135 class GT = GraphTraits<NodeT>>
136bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) {
137 /// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge
138 /// according to LI. I.e., check if there exists a loop that contains Src and
139 /// where Dst is the loop header.
140 auto isProperBackedge = [&](NodeT Src, NodeT Dst) {
141 for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) {
142 if (Lp->getHeader() == Dst)
143 return true;
144 }
145 return false;
146 };
147
148 SmallPtrSet<NodeT, 32> Visited;
149 for (NodeT Node : RPOTraversal) {
150 Visited.insert(Node);
151 for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) {
152 // Succ hasn't been visited yet
153 if (!Visited.count(Succ))
154 continue;
155 // We already visited Succ, thus Node->Succ must be a backedge. Check that
156 // the head matches what we have in the loop information. Otherwise, we
157 // have an irreducible graph.
158 if (!isProperBackedge(Node, Succ))
159 return true;
160 }
161 }
162
163 return false;
15
Returning zero, which participates in a condition later
164}
165} // End llvm namespace
166
167#endif

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16// Value *Exp = ...
17// Value *X, *Y; ConstantInt *C1, *C2; // (X & C1) | (Y & C2)
18// if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19// m_And(m_Value(Y), m_ConstantInt(C2))))) {
20// ... Pattern is matched and variables are bound ...
21// }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45
46namespace llvm {
47namespace PatternMatch {
48
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50 return const_cast<Pattern &>(P).match(V);
33
Calling 'LogicalOp_match::match'
37
Returning from 'LogicalOp_match::match'
38
Returning zero, which participates in a condition later
42
Calling 'LogicalOp_match::match'
46
Returning from 'LogicalOp_match::match'
47
Returning zero, which participates in a condition later
51}
52
53template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) {
54 return const_cast<Pattern &>(P).match(Mask);
55}
56
57template <typename SubPattern_t> struct OneUse_match {
58 SubPattern_t SubPattern;
59
60 OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
61
62 template <typename OpTy> bool match(OpTy *V) {
63 return V->hasOneUse() && SubPattern.match(V);
64 }
65};
66
67template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
68 return SubPattern;
69}
70
71template <typename Class> struct class_match {
72 template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
73};
74
75/// Match an arbitrary value and ignore it.
76inline class_match<Value> m_Value() { return class_match<Value>(); }
77
78/// Match an arbitrary unary operation and ignore it.
79inline class_match<UnaryOperator> m_UnOp() {
80 return class_match<UnaryOperator>();
81}
82
83/// Match an arbitrary binary operation and ignore it.
84inline class_match<BinaryOperator> m_BinOp() {
85 return class_match<BinaryOperator>();
86}
87
88/// Matches any compare instruction and ignore it.
89inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
90
91struct undef_match {
92 static bool check(const Value *V) {
93 if (isa<UndefValue>(V))
94 return true;
95
96 const auto *CA = dyn_cast<ConstantAggregate>(V);
97 if (!CA)
98 return false;
99
100 SmallPtrSet<const ConstantAggregate *, 8> Seen;
101 SmallVector<const ConstantAggregate *, 8> Worklist;
102
103 // Either UndefValue, PoisonValue, or an aggregate that only contains
104 // these is accepted by matcher.
105 // CheckValue returns false if CA cannot satisfy this constraint.
106 auto CheckValue = [&](const ConstantAggregate *CA) {
107 for (const Value *Op : CA->operand_values()) {
108 if (isa<UndefValue>(Op))
109 continue;
110
111 const auto *CA = dyn_cast<ConstantAggregate>(Op);
112 if (!CA)
113 return false;
114 if (Seen.insert(CA).second)
115 Worklist.emplace_back(CA);
116 }
117
118 return true;
119 };
120
121 if (!CheckValue(CA))
122 return false;
123
124 while (!Worklist.empty()) {
125 if (!CheckValue(Worklist.pop_back_val()))
126 return false;
127 }
128 return true;
129 }
130 template <typename ITy> bool match(ITy *V) { return check(V); }
131};
132
133/// Match an arbitrary undef constant. This matches poison as well.
134/// If this is an aggregate and contains a non-aggregate element that is
135/// neither undef nor poison, the aggregate is not matched.
136inline auto m_Undef() { return undef_match(); }
137
138/// Match an arbitrary poison constant.
139inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
140
141/// Match an arbitrary Constant and ignore it.
142inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
143
144/// Match an arbitrary ConstantInt and ignore it.
145inline class_match<ConstantInt> m_ConstantInt() {
146 return class_match<ConstantInt>();
147}
148
149/// Match an arbitrary ConstantFP and ignore it.
150inline class_match<ConstantFP> m_ConstantFP() {
151 return class_match<ConstantFP>();
152}
153
154/// Match an arbitrary ConstantExpr and ignore it.
155inline class_match<ConstantExpr> m_ConstantExpr() {
156 return class_match<ConstantExpr>();
157}
158
159/// Match an arbitrary basic block value and ignore it.
160inline class_match<BasicBlock> m_BasicBlock() {
161 return class_match<BasicBlock>();
162}
163
164/// Inverting matcher
165template <typename Ty> struct match_unless {
166 Ty M;
167
168 match_unless(const Ty &Matcher) : M(Matcher) {}
169
170 template <typename ITy> bool match(ITy *V) { return !M.match(V); }
171};
172
173/// Match if the inner matcher does *NOT* match.
174template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
175 return match_unless<Ty>(M);
176}
177
178/// Matching combinators
179template <typename LTy, typename RTy> struct match_combine_or {
180 LTy L;
181 RTy R;
182
183 match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
184
185 template <typename ITy> bool match(ITy *V) {
186 if (L.match(V))
187 return true;
188 if (R.match(V))
189 return true;
190 return false;
191 }
192};
193
194template <typename LTy, typename RTy> struct match_combine_and {
195 LTy L;
196 RTy R;
197
198 match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
199
200 template <typename ITy> bool match(ITy *V) {
201 if (L.match(V))
202 if (R.match(V))
203 return true;
204 return false;
205 }
206};
207
208/// Combine two pattern matchers matching L || R
209template <typename LTy, typename RTy>
210inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
211 return match_combine_or<LTy, RTy>(L, R);
212}
213
214/// Combine two pattern matchers matching L && R
215template <typename LTy, typename RTy>
216inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
217 return match_combine_and<LTy, RTy>(L, R);
218}
219
220struct apint_match {
221 const APInt *&Res;
222 bool AllowUndef;
223
224 apint_match(const APInt *&Res, bool AllowUndef)
225 : Res(Res), AllowUndef(AllowUndef) {}
226
227 template <typename ITy> bool match(ITy *V) {
228 if (auto *CI = dyn_cast<ConstantInt>(V)) {
229 Res = &CI->getValue();
230 return true;
231 }
232 if (V->getType()->isVectorTy())
233 if (const auto *C = dyn_cast<Constant>(V))
234 if (auto *CI = dyn_cast_or_null<ConstantInt>(
235 C->getSplatValue(AllowUndef))) {
236 Res = &CI->getValue();
237 return true;
238 }
239 return false;
240 }
241};
242// Either constexpr if or renaming ConstantFP::getValueAPF to
243// ConstantFP::getValue is needed to do it via single template
244// function for both apint/apfloat.
245struct apfloat_match {
246 const APFloat *&Res;
247 bool AllowUndef;
248
249 apfloat_match(const APFloat *&Res, bool AllowUndef)
250 : Res(Res), AllowUndef(AllowUndef) {}
251
252 template <typename ITy> bool match(ITy *V) {
253 if (auto *CI = dyn_cast<ConstantFP>(V)) {
254 Res = &CI->getValueAPF();
255 return true;
256 }
257 if (V->getType()->isVectorTy())
258 if (const auto *C = dyn_cast<Constant>(V))
259 if (auto *CI = dyn_cast_or_null<ConstantFP>(
260 C->getSplatValue(AllowUndef))) {
261 Res = &CI->getValueAPF();
262 return true;
263 }
264 return false;
265 }
266};
267
268/// Match a ConstantInt or splatted ConstantVector, binding the
269/// specified pointer to the contained APInt.
270inline apint_match m_APInt(const APInt *&Res) {
271 // Forbid undefs by default to maintain previous behavior.
272 return apint_match(Res, /* AllowUndef */ false);
273}
274
275/// Match APInt while allowing undefs in splat vector constants.
276inline apint_match m_APIntAllowUndef(const APInt *&Res) {
277 return apint_match(Res, /* AllowUndef */ true);
278}
279
280/// Match APInt while forbidding undefs in splat vector constants.
281inline apint_match m_APIntForbidUndef(const APInt *&Res) {
282 return apint_match(Res, /* AllowUndef */ false);
283}
284
285/// Match a ConstantFP or splatted ConstantVector, binding the
286/// specified pointer to the contained APFloat.
287inline apfloat_match m_APFloat(const APFloat *&Res) {
288 // Forbid undefs by default to maintain previous behavior.
289 return apfloat_match(Res, /* AllowUndef */ false);
290}
291
292/// Match APFloat while allowing undefs in splat vector constants.
293inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) {
294 return apfloat_match(Res, /* AllowUndef */ true);
295}
296
297/// Match APFloat while forbidding undefs in splat vector constants.
298inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) {
299 return apfloat_match(Res, /* AllowUndef */ false);
300}
301
302template <int64_t Val> struct constantint_match {
303 template <typename ITy> bool match(ITy *V) {
304 if (const auto *CI = dyn_cast<ConstantInt>(V)) {
305 const APInt &CIV = CI->getValue();
306 if (Val >= 0)
307 return CIV == static_cast<uint64_t>(Val);
308 // If Val is negative, and CI is shorter than it, truncate to the right
309 // number of bits. If it is larger, then we have to sign extend. Just
310 // compare their negated values.
311 return -CIV == -Val;
312 }
313 return false;
314 }
315};
316
317/// Match a ConstantInt with a specific value.
318template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
319 return constantint_match<Val>();
320}
321
322/// This helper class is used to match constant scalars, vector splats,
323/// and fixed width vectors that satisfy a specified predicate.
324/// For fixed width vector constants, undefined elements are ignored.
325template <typename Predicate, typename ConstantVal>
326struct cstval_pred_ty : public Predicate {
327 template <typename ITy> bool match(ITy *V) {
328 if (const auto *CV = dyn_cast<ConstantVal>(V))
329 return this->isValue(CV->getValue());
330 if (const auto *VTy = dyn_cast<VectorType>(V->getType())) {
331 if (const auto *C = dyn_cast<Constant>(V)) {
332 if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue()))
333 return this->isValue(CV->getValue());
334
335 // Number of elements of a scalable vector unknown at compile time
336 auto *FVTy = dyn_cast<FixedVectorType>(VTy);
337 if (!FVTy)
338 return false;
339
340 // Non-splat vector constant: check each element for a match.
341 unsigned NumElts = FVTy->getNumElements();
342 assert(NumElts != 0 && "Constant vector with no elements?")((void)0);
343 bool HasNonUndefElements = false;
344 for (unsigned i = 0; i != NumElts; ++i) {
345 Constant *Elt = C->getAggregateElement(i);
346 if (!Elt)
347 return false;
348 if (isa<UndefValue>(Elt))
349 continue;
350 auto *CV = dyn_cast<ConstantVal>(Elt);
351 if (!CV || !this->isValue(CV->getValue()))
352 return false;
353 HasNonUndefElements = true;
354 }
355 return HasNonUndefElements;
356 }
357 }
358 return false;
359 }
360};
361
362/// specialization of cstval_pred_ty for ConstantInt
363template <typename Predicate>
364using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>;
365
366/// specialization of cstval_pred_ty for ConstantFP
367template <typename Predicate>
368using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>;
369
370/// This helper class is used to match scalar and vector constants that
371/// satisfy a specified predicate, and bind them to an APInt.
372template <typename Predicate> struct api_pred_ty : public Predicate {
373 const APInt *&Res;
374
375 api_pred_ty(const APInt *&R) : Res(R) {}
376
377 template <typename ITy> bool match(ITy *V) {
378 if (const auto *CI = dyn_cast<ConstantInt>(V))
379 if (this->isValue(CI->getValue())) {
380 Res = &CI->getValue();
381 return true;
382 }
383 if (V->getType()->isVectorTy())
384 if (const auto *C = dyn_cast<Constant>(V))
385 if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
386 if (this->isValue(CI->getValue())) {
387 Res = &CI->getValue();
388 return true;
389 }
390
391 return false;
392 }
393};
394
395/// This helper class is used to match scalar and vector constants that
396/// satisfy a specified predicate, and bind them to an APFloat.
397/// Undefs are allowed in splat vector constants.
398template <typename Predicate> struct apf_pred_ty : public Predicate {
399 const APFloat *&Res;
400
401 apf_pred_ty(const APFloat *&R) : Res(R) {}
402
403 template <typename ITy> bool match(ITy *V) {
404 if (const auto *CI = dyn_cast<ConstantFP>(V))
405 if (this->isValue(CI->getValue())) {
406 Res = &CI->getValue();
407 return true;
408 }
409 if (V->getType()->isVectorTy())
410 if (const auto *C = dyn_cast<Constant>(V))
411 if (auto *CI = dyn_cast_or_null<ConstantFP>(
412 C->getSplatValue(/* AllowUndef */ true)))
413 if (this->isValue(CI->getValue())) {
414 Res = &CI->getValue();
415 return true;
416 }
417
418 return false;
419 }
420};
421
422///////////////////////////////////////////////////////////////////////////////
423//
424// Encapsulate constant value queries for use in templated predicate matchers.
425// This allows checking if constants match using compound predicates and works
426// with vector constants, possibly with relaxed constraints. For example, ignore
427// undef values.
428//
429///////////////////////////////////////////////////////////////////////////////
430
431struct is_any_apint {
432 bool isValue(const APInt &C) { return true; }
433};
434/// Match an integer or vector with any integral constant.
435/// For vectors, this includes constants with undefined elements.
436inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
437 return cst_pred_ty<is_any_apint>();
438}
439
440struct is_all_ones {
441 bool isValue(const APInt &C) { return C.isAllOnesValue(); }
442};
443/// Match an integer or vector with all bits set.
444/// For vectors, this includes constants with undefined elements.
445inline cst_pred_ty<is_all_ones> m_AllOnes() {
446 return cst_pred_ty<is_all_ones>();
447}
448
449struct is_maxsignedvalue {
450 bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
451};
452/// Match an integer or vector with values having all bits except for the high
453/// bit set (0x7f...).
454/// For vectors, this includes constants with undefined elements.
455inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
456 return cst_pred_ty<is_maxsignedvalue>();
457}
458inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
459 return V;
460}
461
462struct is_negative {
463 bool isValue(const APInt &C) { return C.isNegative(); }
464};
465/// Match an integer or vector of negative values.
466/// For vectors, this includes constants with undefined elements.
467inline cst_pred_ty<is_negative> m_Negative() {
468 return cst_pred_ty<is_negative>();
469}
470inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
471 return V;
472}
473
474struct is_nonnegative {
475 bool isValue(const APInt &C) { return C.isNonNegative(); }
476};
477/// Match an integer or vector of non-negative values.
478/// For vectors, this includes constants with undefined elements.
479inline cst_pred_ty<is_nonnegative> m_NonNegative() {
480 return cst_pred_ty<is_nonnegative>();
481}
482inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
483 return V;
484}
485
486struct is_strictlypositive {
487 bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
488};
489/// Match an integer or vector of strictly positive values.
490/// For vectors, this includes constants with undefined elements.
491inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
492 return cst_pred_ty<is_strictlypositive>();
493}
494inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
495 return V;
496}
497
498struct is_nonpositive {
499 bool isValue(const APInt &C) { return C.isNonPositive(); }
500};
501/// Match an integer or vector of non-positive values.
502/// For vectors, this includes constants with undefined elements.
503inline cst_pred_ty<is_nonpositive> m_NonPositive() {
504 return cst_pred_ty<is_nonpositive>();
505}
506inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
507
508struct is_one {
509 bool isValue(const APInt &C) { return C.isOneValue(); }
510};
511/// Match an integer 1 or a vector with all elements equal to 1.
512/// For vectors, this includes constants with undefined elements.
513inline cst_pred_ty<is_one> m_One() {
514 return cst_pred_ty<is_one>();
515}
516
517struct is_zero_int {
518 bool isValue(const APInt &C) { return C.isNullValue(); }
519};
520/// Match an integer 0 or a vector with all elements equal to 0.
521/// For vectors, this includes constants with undefined elements.
522inline cst_pred_ty<is_zero_int> m_ZeroInt() {
523 return cst_pred_ty<is_zero_int>();
524}
525
526struct is_zero {
527 template <typename ITy> bool match(ITy *V) {
528 auto *C = dyn_cast<Constant>(V);
529 // FIXME: this should be able to do something for scalable vectors
530 return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
531 }
532};
533/// Match any null constant or a vector with all elements equal to 0.
534/// For vectors, this includes constants with undefined elements.
535inline is_zero m_Zero() {
536 return is_zero();
537}
538
539struct is_power2 {
540 bool isValue(const APInt &C) { return C.isPowerOf2(); }
541};
542/// Match an integer or vector power-of-2.
543/// For vectors, this includes constants with undefined elements.
544inline cst_pred_ty<is_power2> m_Power2() {
545 return cst_pred_ty<is_power2>();
546}
547inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
548 return V;
549}
550
551struct is_negated_power2 {
552 bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
553};
554/// Match a integer or vector negated power-of-2.
555/// For vectors, this includes constants with undefined elements.
556inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
557 return cst_pred_ty<is_negated_power2>();
558}
559inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
560 return V;
561}
562
563struct is_power2_or_zero {
564 bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
565};
566/// Match an integer or vector of 0 or power-of-2 values.
567/// For vectors, this includes constants with undefined elements.
568inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
569 return cst_pred_ty<is_power2_or_zero>();
570}
571inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
572 return V;
573}
574
575struct is_sign_mask {
576 bool isValue(const APInt &C) { return C.isSignMask(); }
577};
578/// Match an integer or vector with only the sign bit(s) set.
579/// For vectors, this includes constants with undefined elements.
580inline cst_pred_ty<is_sign_mask> m_SignMask() {
581 return cst_pred_ty<is_sign_mask>();
582}
583
584struct is_lowbit_mask {
585 bool isValue(const APInt &C) { return C.isMask(); }
586};
587/// Match an integer or vector with only the low bit(s) set.
588/// For vectors, this includes constants with undefined elements.
589inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
590 return cst_pred_ty<is_lowbit_mask>();
591}
592
593struct icmp_pred_with_threshold {
594 ICmpInst::Predicate Pred;
595 const APInt *Thr;
596 bool isValue(const APInt &C) {
597 switch (Pred) {
598 case ICmpInst::Predicate::ICMP_EQ:
599 return C.eq(*Thr);
600 case ICmpInst::Predicate::ICMP_NE:
601 return C.ne(*Thr);
602 case ICmpInst::Predicate::ICMP_UGT:
603 return C.ugt(*Thr);
604 case ICmpInst::Predicate::ICMP_UGE:
605 return C.uge(*Thr);
606 case ICmpInst::Predicate::ICMP_ULT:
607 return C.ult(*Thr);
608 case ICmpInst::Predicate::ICMP_ULE:
609 return C.ule(*Thr);
610 case ICmpInst::Predicate::ICMP_SGT:
611 return C.sgt(*Thr);
612 case ICmpInst::Predicate::ICMP_SGE:
613 return C.sge(*Thr);
614 case ICmpInst::Predicate::ICMP_SLT:
615 return C.slt(*Thr);
616 case ICmpInst::Predicate::ICMP_SLE:
617 return C.sle(*Thr);
618 default:
619 llvm_unreachable("Unhandled ICmp predicate")__builtin_unreachable();
620 }
621 }
622};
623/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
624/// to Threshold. For vectors, this includes constants with undefined elements.
625inline cst_pred_ty<icmp_pred_with_threshold>
626m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
627 cst_pred_ty<icmp_pred_with_threshold> P;
628 P.Pred = Predicate;
629 P.Thr = &Threshold;
630 return P;
631}
632
633struct is_nan {
634 bool isValue(const APFloat &C) { return C.isNaN(); }
635};
636/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
637/// For vectors, this includes constants with undefined elements.
638inline cstfp_pred_ty<is_nan> m_NaN() {
639 return cstfp_pred_ty<is_nan>();
640}
641
642struct is_nonnan {
643 bool isValue(const APFloat &C) { return !C.isNaN(); }
644};
645/// Match a non-NaN FP constant.
646/// For vectors, this includes constants with undefined elements.
647inline cstfp_pred_ty<is_nonnan> m_NonNaN() {
648 return cstfp_pred_ty<is_nonnan>();
649}
650
651struct is_inf {
652 bool isValue(const APFloat &C) { return C.isInfinity(); }
653};
654/// Match a positive or negative infinity FP constant.
655/// For vectors, this includes constants with undefined elements.
656inline cstfp_pred_ty<is_inf> m_Inf() {
657 return cstfp_pred_ty<is_inf>();
658}
659
660struct is_noninf {
661 bool isValue(const APFloat &C) { return !C.isInfinity(); }
662};
663/// Match a non-infinity FP constant, i.e. finite or NaN.
664/// For vectors, this includes constants with undefined elements.
665inline cstfp_pred_ty<is_noninf> m_NonInf() {
666 return cstfp_pred_ty<is_noninf>();
667}
668
669struct is_finite {
670 bool isValue(const APFloat &C) { return C.isFinite(); }
671};
672/// Match a finite FP constant, i.e. not infinity or NaN.
673/// For vectors, this includes constants with undefined elements.
674inline cstfp_pred_ty<is_finite> m_Finite() {
675 return cstfp_pred_ty<is_finite>();
676}
677inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
678
679struct is_finitenonzero {
680 bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
681};
682/// Match a finite non-zero FP constant.
683/// For vectors, this includes constants with undefined elements.
684inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
685 return cstfp_pred_ty<is_finitenonzero>();
686}
687inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
688 return V;
689}
690
691struct is_any_zero_fp {
692 bool isValue(const APFloat &C) { return C.isZero(); }
693};
694/// Match a floating-point negative zero or positive zero.
695/// For vectors, this includes constants with undefined elements.
696inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
697 return cstfp_pred_ty<is_any_zero_fp>();
698}
699
700struct is_pos_zero_fp {
701 bool isValue(const APFloat &C) { return C.isPosZero(); }
702};
703/// Match a floating-point positive zero.
704/// For vectors, this includes constants with undefined elements.
705inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
706 return cstfp_pred_ty<is_pos_zero_fp>();
707}
708
709struct is_neg_zero_fp {
710 bool isValue(const APFloat &C) { return C.isNegZero(); }
711};
712/// Match a floating-point negative zero.
713/// For vectors, this includes constants with undefined elements.
714inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
715 return cstfp_pred_ty<is_neg_zero_fp>();
716}
717
718struct is_non_zero_fp {
719 bool isValue(const APFloat &C) { return C.isNonZero(); }
720};
721/// Match a floating-point non-zero.
722/// For vectors, this includes constants with undefined elements.
723inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() {
724 return cstfp_pred_ty<is_non_zero_fp>();
725}
726
727///////////////////////////////////////////////////////////////////////////////
728
729template <typename Class> struct bind_ty {
730 Class *&VR;
731
732 bind_ty(Class *&V) : VR(V) {}
733
734 template <typename ITy> bool match(ITy *V) {
735 if (auto *CV = dyn_cast<Class>(V)) {
736 VR = CV;
737 return true;
738 }
739 return false;
740 }
741};
742
743/// Match a value, capturing it if we match.
744inline bind_ty<Value> m_Value(Value *&V) { return V; }
745inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
746
747/// Match an instruction, capturing it if we match.
748inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
749/// Match a unary operator, capturing it if we match.
750inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
751/// Match a binary operator, capturing it if we match.
752inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
753/// Match a with overflow intrinsic, capturing it if we match.
754inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
755inline bind_ty<const WithOverflowInst>
756m_WithOverflowInst(const WithOverflowInst *&I) {
757 return I;
758}
759
760/// Match a Constant, capturing the value if we match.
761inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
762
763/// Match a ConstantInt, capturing the value if we match.
764inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
765
766/// Match a ConstantFP, capturing the value if we match.
767inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
768
769/// Match a ConstantExpr, capturing the value if we match.
770inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; }
771
772/// Match a basic block value, capturing it if we match.
773inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
774inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
775 return V;
776}
777
778/// Match an arbitrary immediate Constant and ignore it.
779inline match_combine_and<class_match<Constant>,
780 match_unless<class_match<ConstantExpr>>>
781m_ImmConstant() {
782 return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr()));
783}
784
785/// Match an immediate Constant, capturing the value if we match.
786inline match_combine_and<bind_ty<Constant>,
787 match_unless<class_match<ConstantExpr>>>
788m_ImmConstant(Constant *&C) {
789 return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr()));
790}
791
792/// Match a specified Value*.
793struct specificval_ty {
794 const Value *Val;
795
796 specificval_ty(const Value *V) : Val(V) {}
797
798 template <typename ITy> bool match(ITy *V) { return V == Val; }
799};
800
801/// Match if we have a specific specified value.
802inline specificval_ty m_Specific(const Value *V) { return V; }
803
804/// Stores a reference to the Value *, not the Value * itself,
805/// thus can be used in commutative matchers.
806template <typename Class> struct deferredval_ty {
807 Class *const &Val;
808
809 deferredval_ty(Class *const &V) : Val(V) {}
810
811 template <typename ITy> bool match(ITy *const V) { return V == Val; }
812};
813
814/// Like m_Specific(), but works if the specific value to match is determined
815/// as part of the same match() expression. For example:
816/// m_Add(m_Value(X), m_Specific(X)) is incorrect, because m_Specific() will
817/// bind X before the pattern match starts.
818/// m_Add(m_Value(X), m_Deferred(X)) is correct, and will check against
819/// whichever value m_Value(X) populated.
820inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
821inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
822 return V;
823}
824
825/// Match a specified floating point value or vector of all elements of
826/// that value.
827struct specific_fpval {
828 double Val;
829
830 specific_fpval(double V) : Val(V) {}
831
832 template <typename ITy> bool match(ITy *V) {
833 if (const auto *CFP = dyn_cast<ConstantFP>(V))
834 return CFP->isExactlyValue(Val);
835 if (V->getType()->isVectorTy())
836 if (const auto *C = dyn_cast<Constant>(V))
837 if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
838 return CFP->isExactlyValue(Val);
839 return false;
840 }
841};
842
843/// Match a specific floating point value or vector with all elements
844/// equal to the value.
845inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
846
847/// Match a float 1.0 or vector with all elements equal to 1.0.
848inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
849
850struct bind_const_intval_ty {
851 uint64_t &VR;
852
853 bind_const_intval_ty(uint64_t &V) : VR(V) {}
854
855 template <typename ITy> bool match(ITy *V) {
856 if (const auto *CV = dyn_cast<ConstantInt>(V))
857 if (CV->getValue().ule(UINT64_MAX0xffffffffffffffffULL)) {
858 VR = CV->getZExtValue();
859 return true;
860 }
861 return false;
862 }
863};
864
865/// Match a specified integer value or vector of all elements of that
866/// value.
867template <bool AllowUndefs>
868struct specific_intval {
869 APInt Val;
870
871 specific_intval(APInt V) : Val(std::move(V)) {}
872
873 template <typename ITy> bool match(ITy *V) {
874 const auto *CI = dyn_cast<ConstantInt>(V);
875 if (!CI && V->getType()->isVectorTy())
876 if (const auto *C = dyn_cast<Constant>(V))
877 CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs));
878
879 return CI && APInt::isSameValue(CI->getValue(), Val);
880 }
881};
882
883/// Match a specific integer value or vector with all elements equal to
884/// the value.
885inline specific_intval<false> m_SpecificInt(APInt V) {
886 return specific_intval<false>(std::move(V));
887}
888
889inline specific_intval<false> m_SpecificInt(uint64_t V) {
890 return m_SpecificInt(APInt(64, V));
891}
892
893inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) {
894 return specific_intval<true>(std::move(V));
895}
896
897inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) {
898 return m_SpecificIntAllowUndef(APInt(64, V));
899}
900
901/// Match a ConstantInt and bind to its value. This does not match
902/// ConstantInts wider than 64-bits.
903inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
904
905/// Match a specified basic block value.
906struct specific_bbval {
907 BasicBlock *Val;
908
909 specific_bbval(BasicBlock *Val) : Val(Val) {}
910
911 template <typename ITy> bool match(ITy *V) {
912 const auto *BB = dyn_cast<BasicBlock>(V);
913 return BB && BB == Val;
914 }
915};
916
917/// Match a specific basic block value.
918inline specific_bbval m_SpecificBB(BasicBlock *BB) {
919 return specific_bbval(BB);
920}
921
922/// A commutative-friendly version of m_Specific().
923inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
924 return BB;
925}
926inline deferredval_ty<const BasicBlock>
927m_Deferred(const BasicBlock *const &BB) {
928 return BB;
929}
930
931//===----------------------------------------------------------------------===//
932// Matcher for any binary operator.
933//
934template <typename LHS_t, typename RHS_t, bool Commutable = false>
935struct AnyBinaryOp_match {
936 LHS_t L;
937 RHS_t R;
938
939 // The evaluation order is always stable, regardless of Commutability.
940 // The LHS is always matched first.
941 AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
942
943 template <typename OpTy> bool match(OpTy *V) {
944 if (auto *I = dyn_cast<BinaryOperator>(V))
945 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
946 (Commutable && L.match(I->getOperand(1)) &&
947 R.match(I->getOperand(0)));
948 return false;
949 }
950};
951
952template <typename LHS, typename RHS>
953inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
954 return AnyBinaryOp_match<LHS, RHS>(L, R);
955}
956
957//===----------------------------------------------------------------------===//
958// Matcher for any unary operator.
959// TODO fuse unary, binary matcher into n-ary matcher
960//
961template <typename OP_t> struct AnyUnaryOp_match {
962 OP_t X;
963
964 AnyUnaryOp_match(const OP_t &X) : X(X) {}
965
966 template <typename OpTy> bool match(OpTy *V) {
967 if (auto *I = dyn_cast<UnaryOperator>(V))
968 return X.match(I->getOperand(0));
969 return false;
970 }
971};
972
973template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) {
974 return AnyUnaryOp_match<OP_t>(X);
975}
976
977//===----------------------------------------------------------------------===//
978// Matchers for specific binary operators.
979//
980
981template <typename LHS_t, typename RHS_t, unsigned Opcode,
982 bool Commutable = false>
983struct BinaryOp_match {
984 LHS_t L;
985 RHS_t R;
986
987 // The evaluation order is always stable, regardless of Commutability.
988 // The LHS is always matched first.
989 BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
990
991 template <typename OpTy> bool match(OpTy *V) {
992 if (V->getValueID() == Value::InstructionVal + Opcode) {
993 auto *I = cast<BinaryOperator>(V);
994 return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
995 (Commutable && L.match(I->getOperand(1)) &&
996 R.match(I->getOperand(0)));
997 }
998 if (auto *CE = dyn_cast<ConstantExpr>(V))
999 return CE->getOpcode() == Opcode &&
1000 ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
1001 (Commutable && L.match(CE->getOperand(1)) &&
1002 R.match(CE->getOperand(0))));
1003 return false;
1004 }
1005};
1006
1007template <typename LHS, typename RHS>
1008inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
1009 const RHS &R) {
1010 return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
1011}
1012
1013template <typename LHS, typename RHS>
1014inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
1015 const RHS &R) {
1016 return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
1017}
1018
1019template <typename LHS, typename RHS>
1020inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
1021 const RHS &R) {
1022 return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
1023}
1024
1025template <typename LHS, typename RHS>
1026inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
1027 const RHS &R) {
1028 return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
1029}
1030
1031template <typename Op_t> struct FNeg_match {
1032 Op_t X;
1033
1034 FNeg_match(const Op_t &Op) : X(Op) {}
1035 template <typename OpTy> bool match(OpTy *V) {
1036 auto *FPMO = dyn_cast<FPMathOperator>(V);
1037 if (!FPMO) return false;
1038
1039 if (FPMO->getOpcode() == Instruction::FNeg)
1040 return X.match(FPMO->getOperand(0));
1041
1042 if (FPMO->getOpcode() == Instruction::FSub) {
1043 if (FPMO->hasNoSignedZeros()) {
1044 // With 'nsz', any zero goes.
1045 if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
1046 return false;
1047 } else {
1048 // Without 'nsz', we need fsub -0.0, X exactly.
1049 if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
1050 return false;
1051 }
1052
1053 return X.match(FPMO->getOperand(1));
1054 }
1055
1056 return false;
1057 }
1058};
1059
1060/// Match 'fneg X' as 'fsub -0.0, X'.
1061template <typename OpTy>
1062inline FNeg_match<OpTy>
1063m_FNeg(const OpTy &X) {
1064 return FNeg_match<OpTy>(X);
1065}
1066
1067/// Match 'fneg X' as 'fsub +-0.0, X'.
1068template <typename RHS>
1069inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
1070m_FNegNSZ(const RHS &X) {
1071 return m_FSub(m_AnyZeroFP(), X);
1072}
1073
1074template <typename LHS, typename RHS>
1075inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
1076 const RHS &R) {
1077 return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
1078}
1079
1080template <typename LHS, typename RHS>
1081inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
1082 const RHS &R) {
1083 return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
1084}
1085
1086template <typename LHS, typename RHS>
1087inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
1088 const RHS &R) {
1089 return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
1090}
1091
1092template <typename LHS, typename RHS>
1093inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
1094 const RHS &R) {
1095 return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
1096}
1097
1098template <typename LHS, typename RHS>
1099inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
1100 const RHS &R) {
1101 return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
1102}
1103
1104template <typename LHS, typename RHS>
1105inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
1106 const RHS &R) {
1107 return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
1108}
1109
1110template <typename LHS, typename RHS>
1111inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
1112 const RHS &R) {
1113 return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
1114}
1115
1116template <typename LHS, typename RHS>
1117inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
1118 const RHS &R) {
1119 return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
1120}
1121
1122template <typename LHS, typename RHS>
1123inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
1124 const RHS &R) {
1125 return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
1126}
1127
1128template <typename LHS, typename RHS>
1129inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
1130 const RHS &R) {
1131 return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
1132}
1133
1134template <typename LHS, typename RHS>
1135inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
1136 const RHS &R) {
1137 return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
1138}
1139
1140template <typename LHS, typename RHS>
1141inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
1142 const RHS &R) {
1143 return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
1144}
1145
1146template <typename LHS, typename RHS>
1147inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
1148 const RHS &R) {
1149 return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
1150}
1151
1152template <typename LHS, typename RHS>
1153inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
1154 const RHS &R) {
1155 return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
1156}
1157
1158template <typename LHS_t, typename RHS_t, unsigned Opcode,
1159 unsigned WrapFlags = 0>
1160struct OverflowingBinaryOp_match {
1161 LHS_t L;
1162 RHS_t R;
1163
1164 OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
1165 : L(LHS), R(RHS) {}
1166
1167 template <typename OpTy> bool match(OpTy *V) {
1168 if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
1169 if (Op->getOpcode() != Opcode)
1170 return false;
1171 if ((WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap) &&
1172 !Op->hasNoUnsignedWrap())
1173 return false;
1174 if ((WrapFlags & OverflowingBinaryOperator::NoSignedWrap) &&
1175 !Op->hasNoSignedWrap())
1176 return false;
1177 return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
1178 }
1179 return false;
1180 }
1181};
1182
1183template <typename LHS, typename RHS>
1184inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1185 OverflowingBinaryOperator::NoSignedWrap>
1186m_NSWAdd(const LHS &L, const RHS &R) {
1187 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1188 OverflowingBinaryOperator::NoSignedWrap>(
1189 L, R);
1190}
1191template <typename LHS, typename RHS>
1192inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1193 OverflowingBinaryOperator::NoSignedWrap>
1194m_NSWSub(const LHS &L, const RHS &R) {
1195 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1196 OverflowingBinaryOperator::NoSignedWrap>(
1197 L, R);
1198}
1199template <typename LHS, typename RHS>
1200inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1201 OverflowingBinaryOperator::NoSignedWrap>
1202m_NSWMul(const LHS &L, const RHS &R) {
1203 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1204 OverflowingBinaryOperator::NoSignedWrap>(
1205 L, R);
1206}
1207template <typename LHS, typename RHS>
1208inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1209 OverflowingBinaryOperator::NoSignedWrap>
1210m_NSWShl(const LHS &L, const RHS &R) {
1211 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1212 OverflowingBinaryOperator::NoSignedWrap>(
1213 L, R);
1214}
1215
1216template <typename LHS, typename RHS>
1217inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1218 OverflowingBinaryOperator::NoUnsignedWrap>
1219m_NUWAdd(const LHS &L, const RHS &R) {
1220 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1221 OverflowingBinaryOperator::NoUnsignedWrap>(
1222 L, R);
1223}
1224template <typename LHS, typename RHS>
1225inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1226 OverflowingBinaryOperator::NoUnsignedWrap>
1227m_NUWSub(const LHS &L, const RHS &R) {
1228 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1229 OverflowingBinaryOperator::NoUnsignedWrap>(
1230 L, R);
1231}
1232template <typename LHS, typename RHS>
1233inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1234 OverflowingBinaryOperator::NoUnsignedWrap>
1235m_NUWMul(const LHS &L, const RHS &R) {
1236 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1237 OverflowingBinaryOperator::NoUnsignedWrap>(
1238 L, R);
1239}
1240template <typename LHS, typename RHS>
1241inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1242 OverflowingBinaryOperator::NoUnsignedWrap>
1243m_NUWShl(const LHS &L, const RHS &R) {
1244 return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1245 OverflowingBinaryOperator::NoUnsignedWrap>(
1246 L, R);
1247}
1248
1249//===----------------------------------------------------------------------===//
1250// Class that matches a group of binary opcodes.
1251//
1252template <typename LHS_t, typename RHS_t, typename Predicate>
1253struct BinOpPred_match : Predicate {
1254 LHS_t L;
1255 RHS_t R;
1256
1257 BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1258
1259 template <typename OpTy> bool match(OpTy *V) {
1260 if (auto *I = dyn_cast<Instruction>(V))
1261 return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1262 R.match(I->getOperand(1));
1263 if (auto *CE = dyn_cast<ConstantExpr>(V))
1264 return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1265 R.match(CE->getOperand(1));
1266 return false;
1267 }
1268};
1269
1270struct is_shift_op {
1271 bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1272};
1273
1274struct is_right_shift_op {
1275 bool isOpType(unsigned Opcode) {
1276 return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1277 }
1278};
1279
1280struct is_logical_shift_op {
1281 bool isOpType(unsigned Opcode) {
1282 return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1283 }
1284};
1285
1286struct is_bitwiselogic_op {
1287 bool isOpType(unsigned Opcode) {
1288 return Instruction::isBitwiseLogicOp(Opcode);
1289 }
1290};
1291
1292struct is_idiv_op {
1293 bool isOpType(unsigned Opcode) {
1294 return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1295 }
1296};
1297
1298struct is_irem_op {
1299 bool isOpType(unsigned Opcode) {
1300 return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1301 }
1302};
1303
1304/// Matches shift operations.
1305template <typename LHS, typename RHS>
1306inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1307 const RHS &R) {
1308 return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1309}
1310
1311/// Matches logical shift operations.
1312template <typename LHS, typename RHS>
1313inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1314 const RHS &R) {
1315 return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1316}
1317
1318/// Matches logical shift operations.
1319template <typename LHS, typename RHS>
1320inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1321m_LogicalShift(const LHS &L, const RHS &R) {
1322 return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1323}
1324
1325/// Matches bitwise logic operations.
1326template <typename LHS, typename RHS>
1327inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1328m_BitwiseLogic(const LHS &L, const RHS &R) {
1329 return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1330}
1331
1332/// Matches integer division operations.
1333template <typename LHS, typename RHS>
1334inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1335 const RHS &R) {
1336 return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1337}
1338
1339/// Matches integer remainder operations.
1340template <typename LHS, typename RHS>
1341inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1342 const RHS &R) {
1343 return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1344}
1345
1346//===----------------------------------------------------------------------===//
1347// Class that matches exact binary ops.
1348//
1349template <typename SubPattern_t> struct Exact_match {
1350 SubPattern_t SubPattern;
1351
1352 Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1353
1354 template <typename OpTy> bool match(OpTy *V) {
1355 if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1356 return PEO->isExact() && SubPattern.match(V);
1357 return false;
1358 }
1359};
1360
1361template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1362 return SubPattern;
1363}
1364
1365//===----------------------------------------------------------------------===//
1366// Matchers for CmpInst classes
1367//
1368
1369template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1370 bool Commutable = false>
1371struct CmpClass_match {
1372 PredicateTy &Predicate;
1373 LHS_t L;
1374 RHS_t R;
1375
1376 // The evaluation order is always stable, regardless of Commutability.
1377 // The LHS is always matched first.
1378 CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1379 : Predicate(Pred), L(LHS), R(RHS) {}
1380
1381 template <typename OpTy> bool match(OpTy *V) {
1382 if (auto *I = dyn_cast<Class>(V)) {
1383 if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
1384 Predicate = I->getPredicate();
1385 return true;
1386 } else if (Commutable && L.match(I->getOperand(1)) &&
1387 R.match(I->getOperand(0))) {
1388 Predicate = I->getSwappedPredicate();
1389 return true;
1390 }
1391 }
1392 return false;
1393 }
1394};
1395
1396template <typename LHS, typename RHS>
1397inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1398m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1399 return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1400}
1401
1402template <typename LHS, typename RHS>
1403inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1404m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1405 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1406}
1407
1408template <typename LHS, typename RHS>
1409inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1410m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1411 return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1412}
1413
1414//===----------------------------------------------------------------------===//
1415// Matchers for instructions with a given opcode and number of operands.
1416//
1417
1418/// Matches instructions with Opcode and three operands.
1419template <typename T0, unsigned Opcode> struct OneOps_match {
1420 T0 Op1;
1421
1422 OneOps_match(const T0 &Op1) : Op1(Op1) {}
1423
1424 template <typename OpTy> bool match(OpTy *V) {
1425 if (V->getValueID() == Value::InstructionVal + Opcode) {
1426 auto *I = cast<Instruction>(V);
1427 return Op1.match(I->getOperand(0));
1428 }
1429 return false;
1430 }
1431};
1432
1433/// Matches instructions with Opcode and three operands.
1434template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1435 T0 Op1;
1436 T1 Op2;
1437
1438 TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1439
1440 template <typename OpTy> bool match(OpTy *V) {
1441 if (V->getValueID() == Value::InstructionVal + Opcode) {
1442 auto *I = cast<Instruction>(V);
1443 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1444 }
1445 return false;
1446 }
1447};
1448
1449/// Matches instructions with Opcode and three operands.
1450template <typename T0, typename T1, typename T2, unsigned Opcode>
1451struct ThreeOps_match {
1452 T0 Op1;
1453 T1 Op2;
1454 T2 Op3;
1455
1456 ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1457 : Op1(Op1), Op2(Op2), Op3(Op3) {}
1458
1459 template <typename OpTy> bool match(OpTy *V) {
1460 if (V->getValueID() == Value::InstructionVal + Opcode) {
1461 auto *I = cast<Instruction>(V);
1462 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1463 Op3.match(I->getOperand(2));
1464 }
1465 return false;
1466 }
1467};
1468
1469/// Matches SelectInst.
1470template <typename Cond, typename LHS, typename RHS>
1471inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1472m_Select(const Cond &C, const LHS &L, const RHS &R) {
1473 return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1474}
1475
1476/// This matches a select of two constants, e.g.:
1477/// m_SelectCst<-1, 0>(m_Value(V))
1478template <int64_t L, int64_t R, typename Cond>
1479inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1480 Instruction::Select>
1481m_SelectCst(const Cond &C) {
1482 return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1483}
1484
1485/// Matches FreezeInst.
1486template <typename OpTy>
1487inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1488 return OneOps_match<OpTy, Instruction::Freeze>(Op);
1489}
1490
1491/// Matches InsertElementInst.
1492template <typename Val_t, typename Elt_t, typename Idx_t>
1493inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1494m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1495 return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1496 Val, Elt, Idx);
1497}
1498
1499/// Matches ExtractElementInst.
1500template <typename Val_t, typename Idx_t>
1501inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1502m_ExtractElt(const Val_t &Val, const Idx_t &Idx) {
1503 return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1504}
1505
1506/// Matches shuffle.
1507template <typename T0, typename T1, typename T2> struct Shuffle_match {
1508 T0 Op1;
1509 T1 Op2;
1510 T2 Mask;
1511
1512 Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask)
1513 : Op1(Op1), Op2(Op2), Mask(Mask) {}
1514
1515 template <typename OpTy> bool match(OpTy *V) {
1516 if (auto *I = dyn_cast<ShuffleVectorInst>(V)) {
1517 return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1518 Mask.match(I->getShuffleMask());
1519 }
1520 return false;
1521 }
1522};
1523
1524struct m_Mask {
1525 ArrayRef<int> &MaskRef;
1526 m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1527 bool match(ArrayRef<int> Mask) {
1528 MaskRef = Mask;
1529 return true;
1530 }
1531};
1532
1533struct m_ZeroMask {
1534 bool match(ArrayRef<int> Mask) {
1535 return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; });
1536 }
1537};
1538
1539struct m_SpecificMask {
1540 ArrayRef<int> &MaskRef;
1541 m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1542 bool match(ArrayRef<int> Mask) { return MaskRef == Mask; }
1543};
1544
1545struct m_SplatOrUndefMask {
1546 int &SplatIndex;
1547 m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {}
1548 bool match(ArrayRef<int> Mask) {
1549 auto First = find_if(Mask, [](int Elem) { return Elem != -1; });
1550 if (First == Mask.end())
1551 return false;
1552 SplatIndex = *First;
1553 return all_of(Mask,
1554 [First](int Elem) { return Elem == *First || Elem == -1; });
1555 }
1556};
1557
1558/// Matches ShuffleVectorInst independently of mask value.
1559template <typename V1_t, typename V2_t>
1560inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>
1561m_Shuffle(const V1_t &v1, const V2_t &v2) {
1562 return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2);
1563}
1564
1565template <typename V1_t, typename V2_t, typename Mask_t>
1566inline Shuffle_match<V1_t, V2_t, Mask_t>
1567m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) {
1568 return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask);
1569}
1570
1571/// Matches LoadInst.
1572template <typename OpTy>
1573inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1574 return OneOps_match<OpTy, Instruction::Load>(Op);
1575}
1576
1577/// Matches StoreInst.
1578template <typename ValueOpTy, typename PointerOpTy>
1579inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1580m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1581 return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1582 PointerOp);
1583}
1584
1585//===----------------------------------------------------------------------===//
1586// Matchers for CastInst classes
1587//
1588
1589template <typename Op_t, unsigned Opcode> struct CastClass_match {
1590 Op_t Op;
1591
1592 CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1593
1594 template <typename OpTy> bool match(OpTy *V) {
1595 if (auto *O = dyn_cast<Operator>(V))
1596 return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1597 return false;
1598 }
1599};
1600
1601/// Matches BitCast.
1602template <typename OpTy>
1603inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1604 return CastClass_match<OpTy, Instruction::BitCast>(Op);
1605}
1606
1607/// Matches PtrToInt.
1608template <typename OpTy>
1609inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1610 return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1611}
1612
1613/// Matches IntToPtr.
1614template <typename OpTy>
1615inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
1616 return CastClass_match<OpTy, Instruction::IntToPtr>(Op);
1617}
1618
1619/// Matches Trunc.
1620template <typename OpTy>
1621inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1622 return CastClass_match<OpTy, Instruction::Trunc>(Op);
1623}
1624
1625template <typename OpTy>
1626inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1627m_TruncOrSelf(const OpTy &Op) {
1628 return m_CombineOr(m_Trunc(Op), Op);
1629}
1630
1631/// Matches SExt.
1632template <typename OpTy>
1633inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1634 return CastClass_match<OpTy, Instruction::SExt>(Op);
1635}
1636
1637/// Matches ZExt.
1638template <typename OpTy>
1639inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1640 return CastClass_match<OpTy, Instruction::ZExt>(Op);
1641}
1642
1643template <typename OpTy>
1644inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1645m_ZExtOrSelf(const OpTy &Op) {
1646 return m_CombineOr(m_ZExt(Op), Op);
1647}
1648
1649template <typename OpTy>
1650inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1651m_SExtOrSelf(const OpTy &Op) {
1652 return m_CombineOr(m_SExt(Op), Op);
1653}
1654
1655template <typename OpTy>
1656inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1657 CastClass_match<OpTy, Instruction::SExt>>
1658m_ZExtOrSExt(const OpTy &Op) {
1659 return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1660}
1661
1662template <typename OpTy>
1663inline match_combine_or<
1664 match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1665 CastClass_match<OpTy, Instruction::SExt>>,
1666 OpTy>
1667m_ZExtOrSExtOrSelf(const OpTy &Op) {
1668 return m_CombineOr(m_ZExtOrSExt(Op), Op);
1669}
1670
1671template <typename OpTy>
1672inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1673 return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1674}
1675
1676template <typename OpTy>
1677inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1678 return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1679}
1680
1681template <typename OpTy>
1682inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) {
1683 return CastClass_match<OpTy, Instruction::FPToUI>(Op);
1684}
1685
1686template <typename OpTy>
1687inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) {
1688 return CastClass_match<OpTy, Instruction::FPToSI>(Op);
1689}
1690
1691template <typename OpTy>
1692inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1693 return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1694}
1695
1696template <typename OpTy>
1697inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1698 return CastClass_match<OpTy, Instruction::FPExt>(Op);
1699}
1700
1701//===----------------------------------------------------------------------===//
1702// Matchers for control flow.
1703//
1704
1705struct br_match {
1706 BasicBlock *&Succ;
1707
1708 br_match(BasicBlock *&Succ) : Succ(Succ) {}
1709
1710 template <typename OpTy> bool match(OpTy *V) {
1711 if (auto *BI = dyn_cast<BranchInst>(V))
1712 if (BI->isUnconditional()) {
1713 Succ = BI->getSuccessor(0);
1714 return true;
1715 }
1716 return false;
1717 }
1718};
1719
1720inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1721
1722template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1723struct brc_match {
1724 Cond_t Cond;
1725 TrueBlock_t T;
1726 FalseBlock_t F;
1727
1728 brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1729 : Cond(C), T(t), F(f) {}
1730
1731 template <typename OpTy> bool match(OpTy *V) {
1732 if (auto *BI = dyn_cast<BranchInst>(V))
1733 if (BI->isConditional() && Cond.match(BI->getCondition()))
1734 return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1735 return false;
1736 }
1737};
1738
1739template <typename Cond_t>
1740inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1741m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1742 return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1743 C, m_BasicBlock(T), m_BasicBlock(F));
1744}
1745
1746template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1747inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1748m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1749 return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1750}
1751
1752//===----------------------------------------------------------------------===//
1753// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1754//
1755
1756template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1757 bool Commutable = false>
1758struct MaxMin_match {
1759 using PredType = Pred_t;
1760 LHS_t L;
1761 RHS_t R;
1762
1763 // The evaluation order is always stable, regardless of Commutability.
1764 // The LHS is always matched first.
1765 MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1766
1767 template <typename OpTy> bool match(OpTy *V) {
1768 if (auto *II = dyn_cast<IntrinsicInst>(V)) {
1769 Intrinsic::ID IID = II->getIntrinsicID();
1770 if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) ||
1771 (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) ||
1772 (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) ||
1773 (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) {
1774 Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1775 return (L.match(LHS) && R.match(RHS)) ||
1776 (Commutable && L.match(RHS) && R.match(LHS));
1777 }
1778 }
1779 // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1780 auto *SI = dyn_cast<SelectInst>(V);
1781 if (!SI)
1782 return false;
1783 auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1784 if (!Cmp)
1785 return false;
1786 // At this point we have a select conditioned on a comparison. Check that
1787 // it is the values returned by the select that are being compared.
1788 auto *TrueVal = SI->getTrueValue();
1789 auto *FalseVal = SI->getFalseValue();
1790 auto *LHS = Cmp->getOperand(0);
1791 auto *RHS = Cmp->getOperand(1);
1792 if ((TrueVal != LHS || FalseVal != RHS) &&
1793 (TrueVal != RHS || FalseVal != LHS))
1794 return false;
1795 typename CmpInst_t::Predicate Pred =
1796 LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1797 // Does "(x pred y) ? x : y" represent the desired max/min operation?
1798 if (!Pred_t::match(Pred))
1799 return false;
1800 // It does! Bind the operands.
1801 return (L.match(LHS) && R.match(RHS)) ||
1802 (Commutable && L.match(RHS) && R.match(LHS));
1803 }
1804};
1805
1806/// Helper class for identifying signed max predicates.
1807struct smax_pred_ty {
1808 static bool match(ICmpInst::Predicate Pred) {
1809 return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1810 }
1811};
1812
1813/// Helper class for identifying signed min predicates.
1814struct smin_pred_ty {
1815 static bool match(ICmpInst::Predicate Pred) {
1816 return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1817 }
1818};
1819
1820/// Helper class for identifying unsigned max predicates.
1821struct umax_pred_ty {
1822 static bool match(ICmpInst::Predicate Pred) {
1823 return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1824 }
1825};
1826
1827/// Helper class for identifying unsigned min predicates.
1828struct umin_pred_ty {
1829 static bool match(ICmpInst::Predicate Pred) {
1830 return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1831 }
1832};
1833
1834/// Helper class for identifying ordered max predicates.
1835struct ofmax_pred_ty {
1836 static bool match(FCmpInst::Predicate Pred) {
1837 return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1838 }
1839};
1840
1841/// Helper class for identifying ordered min predicates.
1842struct ofmin_pred_ty {
1843 static bool match(FCmpInst::Predicate Pred) {
1844 return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1845 }
1846};
1847
1848/// Helper class for identifying unordered max predicates.
1849struct ufmax_pred_ty {
1850 static bool match(FCmpInst::Predicate Pred) {
1851 return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1852 }
1853};
1854
1855/// Helper class for identifying unordered min predicates.
1856struct ufmin_pred_ty {
1857 static bool match(FCmpInst::Predicate Pred) {
1858 return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1859 }
1860};
1861
1862template <typename LHS, typename RHS>
1863inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1864 const RHS &R) {
1865 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1866}
1867
1868template <typename LHS, typename RHS>
1869inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1870 const RHS &R) {
1871 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1872}
1873
1874template <typename LHS, typename RHS>
1875inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1876 const RHS &R) {
1877 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1878}
1879
1880template <typename LHS, typename RHS>
1881inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1882 const RHS &R) {
1883 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1884}
1885
1886template <typename LHS, typename RHS>
1887inline match_combine_or<
1888 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>,
1889 MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>,
1890 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>,
1891 MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>>
1892m_MaxOrMin(const LHS &L, const RHS &R) {
1893 return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)),
1894 m_CombineOr(m_UMax(L, R), m_UMin(L, R)));
1895}
1896
1897/// Match an 'ordered' floating point maximum function.
1898/// Floating point has one special value 'NaN'. Therefore, there is no total
1899/// order. However, if we can ignore the 'NaN' value (for example, because of a
1900/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1901/// semantics. In the presence of 'NaN' we have to preserve the original
1902/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1903///
1904/// max(L, R) iff L and R are not NaN
1905/// m_OrdFMax(L, R) = R iff L or R are NaN
1906template <typename LHS, typename RHS>
1907inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1908 const RHS &R) {
1909 return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1910}
1911
1912/// Match an 'ordered' floating point minimum function.
1913/// Floating point has one special value 'NaN'. Therefore, there is no total
1914/// order. However, if we can ignore the 'NaN' value (for example, because of a
1915/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1916/// semantics. In the presence of 'NaN' we have to preserve the original
1917/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1918///
1919/// min(L, R) iff L and R are not NaN
1920/// m_OrdFMin(L, R) = R iff L or R are NaN
1921template <typename LHS, typename RHS>
1922inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1923 const RHS &R) {
1924 return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1925}
1926
1927/// Match an 'unordered' floating point maximum function.
1928/// Floating point has one special value 'NaN'. Therefore, there is no total
1929/// order. However, if we can ignore the 'NaN' value (for example, because of a
1930/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1931/// semantics. In the presence of 'NaN' we have to preserve the original
1932/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1933///
1934/// max(L, R) iff L and R are not NaN
1935/// m_UnordFMax(L, R) = L iff L or R are NaN
1936template <typename LHS, typename RHS>
1937inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1938m_UnordFMax(const LHS &L, const RHS &R) {
1939 return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1940}
1941
1942/// Match an 'unordered' floating point minimum function.
1943/// Floating point has one special value 'NaN'. Therefore, there is no total
1944/// order. However, if we can ignore the 'NaN' value (for example, because of a
1945/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1946/// semantics. In the presence of 'NaN' we have to preserve the original
1947/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1948///
1949/// min(L, R) iff L and R are not NaN
1950/// m_UnordFMin(L, R) = L iff L or R are NaN
1951template <typename LHS, typename RHS>
1952inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1953m_UnordFMin(const LHS &L, const RHS &R) {
1954 return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1955}
1956
1957//===----------------------------------------------------------------------===//
1958// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
1959// Note that S might be matched to other instructions than AddInst.
1960//
1961
1962template <typename LHS_t, typename RHS_t, typename Sum_t>
1963struct UAddWithOverflow_match {
1964 LHS_t L;
1965 RHS_t R;
1966 Sum_t S;
1967
1968 UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1969 : L(L), R(R), S(S) {}
1970
1971 template <typename OpTy> bool match(OpTy *V) {
1972 Value *ICmpLHS, *ICmpRHS;
1973 ICmpInst::Predicate Pred;
1974 if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1975 return false;
1976
1977 Value *AddLHS, *AddRHS;
1978 auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1979
1980 // (a + b) u< a, (a + b) u< b
1981 if (Pred == ICmpInst::ICMP_ULT)
1982 if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1983 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1984
1985 // a >u (a + b), b >u (a + b)
1986 if (Pred == ICmpInst::ICMP_UGT)
1987 if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1988 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1989
1990 Value *Op1;
1991 auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
1992 // (a ^ -1) <u b
1993 if (Pred == ICmpInst::ICMP_ULT) {
1994 if (XorExpr.match(ICmpLHS))
1995 return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
1996 }
1997 // b > u (a ^ -1)
1998 if (Pred == ICmpInst::ICMP_UGT) {
1999 if (XorExpr.match(ICmpRHS))
2000 return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
2001 }
2002
2003 // Match special-case for increment-by-1.
2004 if (Pred == ICmpInst::ICMP_EQ) {
2005 // (a + 1) == 0
2006 // (1 + a) == 0
2007 if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
2008 (m_One().match(AddLHS) || m_One().match(AddRHS)))
2009 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
2010 // 0 == (a + 1)
2011 // 0 == (1 + a)
2012 if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
2013 (m_One().match(AddLHS) || m_One().match(AddRHS)))
2014 return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
2015 }
2016
2017 return false;
2018 }
2019};
2020
2021/// Match an icmp instruction checking for unsigned overflow on addition.
2022///
2023/// S is matched to the addition whose result is being checked for overflow, and
2024/// L and R are matched to the LHS and RHS of S.
2025template <typename LHS_t, typename RHS_t, typename Sum_t>
2026UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
2027m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
2028 return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
2029}
2030
2031template <typename Opnd_t> struct Argument_match {
2032 unsigned OpI;
2033 Opnd_t Val;
2034
2035 Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
2036
2037 template <typename OpTy> bool match(OpTy *V) {
2038 // FIXME: Should likely be switched to use `CallBase`.
2039 if (const auto *CI = dyn_cast<CallInst>(V))
2040 return Val.match(CI->getArgOperand(OpI));
2041 return false;
2042 }
2043};
2044
2045/// Match an argument.
2046template <unsigned OpI, typename Opnd_t>
2047inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
2048 return Argument_match<Opnd_t>(OpI, Op);
2049}
2050
2051/// Intrinsic matchers.
2052struct IntrinsicID_match {
2053 unsigned ID;
2054
2055 IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
2056
2057 template <typename OpTy> bool match(OpTy *V) {
2058 if (const auto *CI = dyn_cast<CallInst>(V))
2059 if (const auto *F = CI->getCalledFunction())
2060 return F->getIntrinsicID() == ID;
2061 return false;
2062 }
2063};
2064
2065/// Intrinsic matches are combinations of ID matchers, and argument
2066/// matchers. Higher arity matcher are defined recursively in terms of and-ing
2067/// them with lower arity matchers. Here's some convenient typedefs for up to
2068/// several arguments, and more can be added as needed
2069template <typename T0 = void, typename T1 = void, typename T2 = void,
2070 typename T3 = void, typename T4 = void, typename T5 = void,
2071 typename T6 = void, typename T7 = void, typename T8 = void,
2072 typename T9 = void, typename T10 = void>
2073struct m_Intrinsic_Ty;
2074template <typename T0> struct m_Intrinsic_Ty<T0> {
2075 using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
2076};
2077template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
2078 using Ty =
2079 match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
2080};
2081template <typename T0, typename T1, typename T2>
2082struct m_Intrinsic_Ty<T0, T1, T2> {
2083 using Ty =
2084 match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
2085 Argument_match<T2>>;
2086};
2087template <typename T0, typename T1, typename T2, typename T3>
2088struct m_Intrinsic_Ty<T0, T1, T2, T3> {
2089 using Ty =
2090 match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
2091 Argument_match<T3>>;
2092};
2093
2094template <typename T0, typename T1, typename T2, typename T3, typename T4>
2095struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
2096 using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
2097 Argument_match<T4>>;
2098};
2099
2100template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
2101struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> {
2102 using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty,
2103 Argument_match<T5>>;
2104};
2105
2106/// Match intrinsic calls like this:
2107/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
2108template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
2109 return IntrinsicID_match(IntrID);
2110}
2111
2112/// Matches MaskedLoad Intrinsic.
2113template <typename Opnd0, typename Opnd1, typename Opnd2, typename Opnd3>
2114inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2, Opnd3>::Ty
2115m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2,
2116 const Opnd3 &Op3) {
2117 return m_Intrinsic<Intrinsic::masked_load>(Op0, Op1, Op2, Op3);
2118}
2119
2120template <Intrinsic::ID IntrID, typename T0>
2121inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
2122 return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
2123}
2124
2125template <Intrinsic::ID IntrID, typename T0, typename T1>
2126inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
2127 const T1 &Op1) {
2128 return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
2129}
2130
2131template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
2132inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
2133m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
2134 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
2135}
2136
2137template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2138 typename T3>
2139inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
2140m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
2141 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
2142}
2143
2144template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2145 typename T3, typename T4>
2146inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
2147m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2148 const T4 &Op4) {
2149 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
2150 m_Argument<4>(Op4));
2151}
2152
2153template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2154 typename T3, typename T4, typename T5>
2155inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty
2156m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2157 const T4 &Op4, const T5 &Op5) {
2158 return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4),
2159 m_Argument<5>(Op5));
2160}
2161
2162// Helper intrinsic matching specializations.
2163template <typename Opnd0>
2164inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
2165 return m_Intrinsic<Intrinsic::bitreverse>(Op0);
2166}
2167
2168template <typename Opnd0>
2169inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
2170 return m_Intrinsic<Intrinsic::bswap>(Op0);
2171}
2172
2173template <typename Opnd0>
2174inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
2175 return m_Intrinsic<Intrinsic::fabs>(Op0);
2176}
2177
2178template <typename Opnd0>
2179inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
2180 return m_Intrinsic<Intrinsic::canonicalize>(Op0);
2181}
2182
2183template <typename Opnd0, typename Opnd1>
2184inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
2185 const Opnd1 &Op1) {
2186 return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
2187}
2188
2189template <typename Opnd0, typename Opnd1>
2190inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
2191 const Opnd1 &Op1) {
2192 return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
2193}
2194
2195template <typename Opnd0, typename Opnd1, typename Opnd2>
2196inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2197m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2198 return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2);
2199}
2200
2201template <typename Opnd0, typename Opnd1, typename Opnd2>
2202inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2203m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2204 return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2);
2205}
2206
2207//===----------------------------------------------------------------------===//
2208// Matchers for two-operands operators with the operators in either order
2209//
2210
2211/// Matches a BinaryOperator with LHS and RHS in either order.
2212template <typename LHS, typename RHS>
2213inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
2214 return AnyBinaryOp_match<LHS, RHS, true>(L, R);
2215}
2216
2217/// Matches an ICmp with a predicate over LHS and RHS in either order.
2218/// Swaps the predicate if operands are commuted.
2219template <typename LHS, typename RHS>
2220inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
2221m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
2222 return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
2223 R);
2224}
2225
2226/// Matches a Add with LHS and RHS in either order.
2227template <typename LHS, typename RHS>
2228inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
2229 const RHS &R) {
2230 return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
2231}
2232
2233/// Matches a Mul with LHS and RHS in either order.
2234template <typename LHS, typename RHS>
2235inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
2236 const RHS &R) {
2237 return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
2238}
2239
2240/// Matches an And with LHS and RHS in either order.
2241template <typename LHS, typename RHS>
2242inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
2243 const RHS &R) {
2244 return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
2245}
2246
2247/// Matches an Or with LHS and RHS in either order.
2248template <typename LHS, typename RHS>
2249inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
2250 const RHS &R) {
2251 return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
2252}
2253
2254/// Matches an Xor with LHS and RHS in either order.
2255template <typename LHS, typename RHS>
2256inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
2257 const RHS &R) {
2258 return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
2259}
2260
2261/// Matches a 'Neg' as 'sub 0, V'.
2262template <typename ValTy>
2263inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
2264m_Neg(const ValTy &V) {
2265 return m_Sub(m_ZeroInt(), V);
2266}
2267
2268/// Matches a 'Neg' as 'sub nsw 0, V'.
2269template <typename ValTy>
2270inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy,
2271 Instruction::Sub,
2272 OverflowingBinaryOperator::NoSignedWrap>
2273m_NSWNeg(const ValTy &V) {
2274 return m_NSWSub(m_ZeroInt(), V);
2275}
2276
2277/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
2278template <typename ValTy>
2279inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
2280m_Not(const ValTy &V) {
2281 return m_c_Xor(V, m_AllOnes());
2282}
2283
2284/// Matches an SMin with LHS and RHS in either order.
2285template <typename LHS, typename RHS>
2286inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
2287m_c_SMin(const LHS &L, const RHS &R) {
2288 return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
2289}
2290/// Matches an SMax with LHS and RHS in either order.
2291template <typename LHS, typename RHS>
2292inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
2293m_c_SMax(const LHS &L, const RHS &R) {
2294 return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
2295}
2296/// Matches a UMin with LHS and RHS in either order.
2297template <typename LHS, typename RHS>
2298inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
2299m_c_UMin(const LHS &L, const RHS &R) {
2300 return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
2301}
2302/// Matches a UMax with LHS and RHS in either order.
2303template <typename LHS, typename RHS>
2304inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
2305m_c_UMax(const LHS &L, const RHS &R) {
2306 return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
2307}
2308
2309template <typename LHS, typename RHS>
2310inline match_combine_or<
2311 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>,
2312 MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>,
2313 match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>,
2314 MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>>
2315m_c_MaxOrMin(const LHS &L, const RHS &R) {
2316 return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)),
2317 m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
2318}
2319
2320/// Matches FAdd with LHS and RHS in either order.
2321template <typename LHS, typename RHS>
2322inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
2323m_c_FAdd(const LHS &L, const RHS &R) {
2324 return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
2325}
2326
2327/// Matches FMul with LHS and RHS in either order.
2328template <typename LHS, typename RHS>
2329inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
2330m_c_FMul(const LHS &L, const RHS &R) {
2331 return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
2332}
2333
2334template <typename Opnd_t> struct Signum_match {
2335 Opnd_t Val;
2336 Signum_match(const Opnd_t &V) : Val(V) {}
2337
2338 template <typename OpTy> bool match(OpTy *V) {
2339 unsigned TypeSize = V->getType()->getScalarSizeInBits();
2340 if (TypeSize == 0)
2341 return false;
2342
2343 unsigned ShiftWidth = TypeSize - 1;
2344 Value *OpL = nullptr, *OpR = nullptr;
2345
2346 // This is the representation of signum we match:
2347 //
2348 // signum(x) == (x >> 63) | (-x >>u 63)
2349 //
2350 // An i1 value is its own signum, so it's correct to match
2351 //
2352 // signum(x) == (x >> 0) | (-x >>u 0)
2353 //
2354 // for i1 values.
2355
2356 auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
2357 auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
2358 auto Signum = m_Or(LHS, RHS);
2359
2360 return Signum.match(V) && OpL == OpR && Val.match(OpL);
2361 }
2362};
2363
2364/// Matches a signum pattern.
2365///
2366/// signum(x) =
2367/// x > 0 -> 1
2368/// x == 0 -> 0
2369/// x < 0 -> -1
2370template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
2371 return Signum_match<Val_t>(V);
2372}
2373
2374template <int Ind, typename Opnd_t> struct ExtractValue_match {
2375 Opnd_t Val;
2376 ExtractValue_match(const Opnd_t &V) : Val(V) {}
2377
2378 template <typename OpTy> bool match(OpTy *V) {
2379 if (auto *I = dyn_cast<ExtractValueInst>(V)) {
2380 // If Ind is -1, don't inspect indices
2381 if (Ind != -1 &&
2382 !(I->getNumIndices() == 1 && I->getIndices()[0] == (unsigned)Ind))
2383 return false;
2384 return Val.match(I->getAggregateOperand());
2385 }
2386 return false;
2387 }
2388};
2389
2390/// Match a single index ExtractValue instruction.
2391/// For example m_ExtractValue<1>(...)
2392template <int Ind, typename Val_t>
2393inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
2394 return ExtractValue_match<Ind, Val_t>(V);
2395}
2396
2397/// Match an ExtractValue instruction with any index.
2398/// For example m_ExtractValue(...)
2399template <typename Val_t>
2400inline ExtractValue_match<-1, Val_t> m_ExtractValue(const Val_t &V) {
2401 return ExtractValue_match<-1, Val_t>(V);
2402}
2403
2404/// Matcher for a single index InsertValue instruction.
2405template <int Ind, typename T0, typename T1> struct InsertValue_match {
2406 T0 Op0;
2407 T1 Op1;
2408
2409 InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {}
2410
2411 template <typename OpTy> bool match(OpTy *V) {
2412 if (auto *I = dyn_cast<InsertValueInst>(V)) {
2413 return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) &&
2414 I->getNumIndices() == 1 && Ind == I->getIndices()[0];
2415 }
2416 return false;
2417 }
2418};
2419
2420/// Matches a single index InsertValue instruction.
2421template <int Ind, typename Val_t, typename Elt_t>
2422inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val,
2423 const Elt_t &Elt) {
2424 return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt);
2425}
2426
2427/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
2428/// the constant expression
2429/// `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
2430/// under the right conditions determined by DataLayout.
2431struct VScaleVal_match {
2432 const DataLayout &DL;
2433 VScaleVal_match(const DataLayout &DL) : DL(DL) {}
2434
2435 template <typename ITy> bool match(ITy *V) {
2436 if (m_Intrinsic<Intrinsic::vscale>().match(V))
2437 return true;
2438
2439 Value *Ptr;
2440 if (m_PtrToInt(m_Value(Ptr)).match(V)) {
2441 if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
2442 auto *DerefTy = GEP->getSourceElementType();
2443 if (GEP->getNumIndices() == 1 && isa<ScalableVectorType>(DerefTy) &&
2444 m_Zero().match(GEP->getPointerOperand()) &&
2445 m_SpecificInt(1).match(GEP->idx_begin()->get()) &&
2446 DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8)
2447 return true;
2448 }
2449 }
2450
2451 return false;
2452 }
2453};
2454
2455inline VScaleVal_match m_VScale(const DataLayout &DL) {
2456 return VScaleVal_match(DL);
2457}
2458
2459template <typename LHS, typename RHS, unsigned Opcode>
2460struct LogicalOp_match {
2461 LHS L;
2462 RHS R;
2463
2464 LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
2465
2466 template <typename T> bool match(T *V) {
2467 if (auto *I
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
= dyn_cast<Instruction>(V)) {
34
Assuming 'V' is not a 'Instruction'
35
Taking false branch
43
'V' is not a 'Instruction'
44
Taking false branch
2468 if (!I->getType()->isIntOrIntVectorTy(1))
2469 return false;
2470
2471 if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
2472 R.match(I->getOperand(1)))
2473 return true;
2474
2475 if (auto *SI = dyn_cast<SelectInst>(I)) {
2476 if (Opcode == Instruction::And) {
2477 if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
2478 if (C->isNullValue() && L.match(SI->getCondition()) &&
2479 R.match(SI->getTrueValue()))
2480 return true;
2481 } else {
2482 assert(Opcode == Instruction::Or)((void)0);
2483 if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
2484 if (C->isOneValue() && L.match(SI->getCondition()) &&
2485 R.match(SI->getFalseValue()))
2486 return true;
2487 }
2488 }
2489 }
2490
2491 return false;
36
Returning zero, which participates in a condition later
45
Returning zero, which participates in a condition later
2492 }
2493};
2494
2495/// Matches L && R either in the form of L & R or L ? R : false.
2496/// Note that the latter form is poison-blocking.
2497template <typename LHS, typename RHS>
2498inline LogicalOp_match<LHS, RHS, Instruction::And>
2499m_LogicalAnd(const LHS &L, const RHS &R) {
2500 return LogicalOp_match<LHS, RHS, Instruction::And>(L, R);
2501}
2502
2503/// Matches L && R where L and R are arbitrary values.
2504inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
2505
2506/// Matches L || R either in the form of L | R or L ? true : R.
2507/// Note that the latter form is poison-blocking.
2508template <typename LHS, typename RHS>
2509inline LogicalOp_match<LHS, RHS, Instruction::Or>
2510m_LogicalOr(const LHS &L, const RHS &R) {
2511 return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R);
2512}
2513
2514/// Matches L || R where L and R are arbitrary values.
2515inline auto m_LogicalOr() {
2516 return m_LogicalOr(m_Value(), m_Value());
2517}
2518
2519} // end namespace PatternMatch
2520} // end namespace llvm
2521
2522#endif // LLVM_IR_PATTERNMATCH_H