Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
Warning:line 5321, column 24
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SimplifyCFG.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
1//===- SimplifyCFG.cpp - Code to perform CFG simplification ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Peephole optimize the CFG.
10//
11//===----------------------------------------------------------------------===//
12
13#include "llvm/ADT/APInt.h"
14#include "llvm/ADT/ArrayRef.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/MapVector.h"
17#include "llvm/ADT/Optional.h"
18#include "llvm/ADT/STLExtras.h"
19#include "llvm/ADT/ScopeExit.h"
20#include "llvm/ADT/Sequence.h"
21#include "llvm/ADT/SetOperations.h"
22#include "llvm/ADT/SetVector.h"
23#include "llvm/ADT/SmallPtrSet.h"
24#include "llvm/ADT/SmallVector.h"
25#include "llvm/ADT/Statistic.h"
26#include "llvm/ADT/StringRef.h"
27#include "llvm/Analysis/AssumptionCache.h"
28#include "llvm/Analysis/ConstantFolding.h"
29#include "llvm/Analysis/EHPersonalities.h"
30#include "llvm/Analysis/GuardUtils.h"
31#include "llvm/Analysis/InstructionSimplify.h"
32#include "llvm/Analysis/MemorySSA.h"
33#include "llvm/Analysis/MemorySSAUpdater.h"
34#include "llvm/Analysis/TargetTransformInfo.h"
35#include "llvm/Analysis/ValueTracking.h"
36#include "llvm/IR/Attributes.h"
37#include "llvm/IR/BasicBlock.h"
38#include "llvm/IR/CFG.h"
39#include "llvm/IR/Constant.h"
40#include "llvm/IR/ConstantRange.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/DataLayout.h"
43#include "llvm/IR/DerivedTypes.h"
44#include "llvm/IR/Function.h"
45#include "llvm/IR/GlobalValue.h"
46#include "llvm/IR/GlobalVariable.h"
47#include "llvm/IR/IRBuilder.h"
48#include "llvm/IR/InstrTypes.h"
49#include "llvm/IR/Instruction.h"
50#include "llvm/IR/Instructions.h"
51#include "llvm/IR/IntrinsicInst.h"
52#include "llvm/IR/Intrinsics.h"
53#include "llvm/IR/LLVMContext.h"
54#include "llvm/IR/MDBuilder.h"
55#include "llvm/IR/Metadata.h"
56#include "llvm/IR/Module.h"
57#include "llvm/IR/NoFolder.h"
58#include "llvm/IR/Operator.h"
59#include "llvm/IR/PatternMatch.h"
60#include "llvm/IR/PseudoProbe.h"
61#include "llvm/IR/Type.h"
62#include "llvm/IR/Use.h"
63#include "llvm/IR/User.h"
64#include "llvm/IR/Value.h"
65#include "llvm/IR/ValueHandle.h"
66#include "llvm/Support/BranchProbability.h"
67#include "llvm/Support/Casting.h"
68#include "llvm/Support/CommandLine.h"
69#include "llvm/Support/Debug.h"
70#include "llvm/Support/ErrorHandling.h"
71#include "llvm/Support/KnownBits.h"
72#include "llvm/Support/MathExtras.h"
73#include "llvm/Support/raw_ostream.h"
74#include "llvm/Transforms/Utils/BasicBlockUtils.h"
75#include "llvm/Transforms/Utils/Local.h"
76#include "llvm/Transforms/Utils/SSAUpdater.h"
77#include "llvm/Transforms/Utils/ValueMapper.h"
78#include <algorithm>
79#include <cassert>
80#include <climits>
81#include <cstddef>
82#include <cstdint>
83#include <iterator>
84#include <map>
85#include <set>
86#include <tuple>
87#include <utility>
88#include <vector>
89
90using namespace llvm;
91using namespace PatternMatch;
92
93#define DEBUG_TYPE"simplifycfg" "simplifycfg"
94
95cl::opt<bool> llvm::RequireAndPreserveDomTree(
96 "simplifycfg-require-and-preserve-domtree", cl::Hidden, cl::ZeroOrMore,
97 cl::init(false),
98 cl::desc("Temorary development switch used to gradually uplift SimplifyCFG "
99 "into preserving DomTree,"));
100
101// Chosen as 2 so as to be cheap, but still to have enough power to fold
102// a select, so the "clamp" idiom (of a min followed by a max) will be caught.
103// To catch this, we need to fold a compare and a select, hence '2' being the
104// minimum reasonable default.
105static cl::opt<unsigned> PHINodeFoldingThreshold(
106 "phi-node-folding-threshold", cl::Hidden, cl::init(2),
107 cl::desc(
108 "Control the amount of phi node folding to perform (default = 2)"));
109
110static cl::opt<unsigned> TwoEntryPHINodeFoldingThreshold(
111 "two-entry-phi-node-folding-threshold", cl::Hidden, cl::init(4),
112 cl::desc("Control the maximal total instruction cost that we are willing "
113 "to speculatively execute to fold a 2-entry PHI node into a "
114 "select (default = 4)"));
115
116static cl::opt<bool>
117 HoistCommon("simplifycfg-hoist-common", cl::Hidden, cl::init(true),
118 cl::desc("Hoist common instructions up to the parent block"));
119
120static cl::opt<bool>
121 SinkCommon("simplifycfg-sink-common", cl::Hidden, cl::init(true),
122 cl::desc("Sink common instructions down to the end block"));
123
124static cl::opt<bool> HoistCondStores(
125 "simplifycfg-hoist-cond-stores", cl::Hidden, cl::init(true),
126 cl::desc("Hoist conditional stores if an unconditional store precedes"));
127
128static cl::opt<bool> MergeCondStores(
129 "simplifycfg-merge-cond-stores", cl::Hidden, cl::init(true),
130 cl::desc("Hoist conditional stores even if an unconditional store does not "
131 "precede - hoist multiple conditional stores into a single "
132 "predicated store"));
133
134static cl::opt<bool> MergeCondStoresAggressively(
135 "simplifycfg-merge-cond-stores-aggressively", cl::Hidden, cl::init(false),
136 cl::desc("When merging conditional stores, do so even if the resultant "
137 "basic blocks are unlikely to be if-converted as a result"));
138
139static cl::opt<bool> SpeculateOneExpensiveInst(
140 "speculate-one-expensive-inst", cl::Hidden, cl::init(true),
141 cl::desc("Allow exactly one expensive instruction to be speculatively "
142 "executed"));
143
144static cl::opt<unsigned> MaxSpeculationDepth(
145 "max-speculation-depth", cl::Hidden, cl::init(10),
146 cl::desc("Limit maximum recursion depth when calculating costs of "
147 "speculatively executed instructions"));
148
149static cl::opt<int>
150 MaxSmallBlockSize("simplifycfg-max-small-block-size", cl::Hidden,
151 cl::init(10),
152 cl::desc("Max size of a block which is still considered "
153 "small enough to thread through"));
154
155// Two is chosen to allow one negation and a logical combine.
156static cl::opt<unsigned>
157 BranchFoldThreshold("simplifycfg-branch-fold-threshold", cl::Hidden,
158 cl::init(2),
159 cl::desc("Maximum cost of combining conditions when "
160 "folding branches"));
161
162STATISTIC(NumBitMaps, "Number of switch instructions turned into bitmaps")static llvm::Statistic NumBitMaps = {"simplifycfg", "NumBitMaps"
, "Number of switch instructions turned into bitmaps"}
;
163STATISTIC(NumLinearMaps,static llvm::Statistic NumLinearMaps = {"simplifycfg", "NumLinearMaps"
, "Number of switch instructions turned into linear mapping"}
164 "Number of switch instructions turned into linear mapping")static llvm::Statistic NumLinearMaps = {"simplifycfg", "NumLinearMaps"
, "Number of switch instructions turned into linear mapping"}
;
165STATISTIC(NumLookupTables,static llvm::Statistic NumLookupTables = {"simplifycfg", "NumLookupTables"
, "Number of switch instructions turned into lookup tables"}
166 "Number of switch instructions turned into lookup tables")static llvm::Statistic NumLookupTables = {"simplifycfg", "NumLookupTables"
, "Number of switch instructions turned into lookup tables"}
;
167STATISTIC(static llvm::Statistic NumLookupTablesHoles = {"simplifycfg",
"NumLookupTablesHoles", "Number of switch instructions turned into lookup tables (holes checked)"
}
168 NumLookupTablesHoles,static llvm::Statistic NumLookupTablesHoles = {"simplifycfg",
"NumLookupTablesHoles", "Number of switch instructions turned into lookup tables (holes checked)"
}
169 "Number of switch instructions turned into lookup tables (holes checked)")static llvm::Statistic NumLookupTablesHoles = {"simplifycfg",
"NumLookupTablesHoles", "Number of switch instructions turned into lookup tables (holes checked)"
}
;
170STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares")static llvm::Statistic NumTableCmpReuses = {"simplifycfg", "NumTableCmpReuses"
, "Number of reused switch table lookup compares"}
;
171STATISTIC(NumFoldValueComparisonIntoPredecessors,static llvm::Statistic NumFoldValueComparisonIntoPredecessors
= {"simplifycfg", "NumFoldValueComparisonIntoPredecessors", "Number of value comparisons folded into predecessor basic blocks"
}
172 "Number of value comparisons folded into predecessor basic blocks")static llvm::Statistic NumFoldValueComparisonIntoPredecessors
= {"simplifycfg", "NumFoldValueComparisonIntoPredecessors", "Number of value comparisons folded into predecessor basic blocks"
}
;
173STATISTIC(NumFoldBranchToCommonDest,static llvm::Statistic NumFoldBranchToCommonDest = {"simplifycfg"
, "NumFoldBranchToCommonDest", "Number of branches folded into predecessor basic block"
}
174 "Number of branches folded into predecessor basic block")static llvm::Statistic NumFoldBranchToCommonDest = {"simplifycfg"
, "NumFoldBranchToCommonDest", "Number of branches folded into predecessor basic block"
}
;
175STATISTIC(static llvm::Statistic NumHoistCommonCode = {"simplifycfg", "NumHoistCommonCode"
, "Number of common instruction 'blocks' hoisted up to the begin block"
}
176 NumHoistCommonCode,static llvm::Statistic NumHoistCommonCode = {"simplifycfg", "NumHoistCommonCode"
, "Number of common instruction 'blocks' hoisted up to the begin block"
}
177 "Number of common instruction 'blocks' hoisted up to the begin block")static llvm::Statistic NumHoistCommonCode = {"simplifycfg", "NumHoistCommonCode"
, "Number of common instruction 'blocks' hoisted up to the begin block"
}
;
178STATISTIC(NumHoistCommonInstrs,static llvm::Statistic NumHoistCommonInstrs = {"simplifycfg",
"NumHoistCommonInstrs", "Number of common instructions hoisted up to the begin block"
}
179 "Number of common instructions hoisted up to the begin block")static llvm::Statistic NumHoistCommonInstrs = {"simplifycfg",
"NumHoistCommonInstrs", "Number of common instructions hoisted up to the begin block"
}
;
180STATISTIC(NumSinkCommonCode,static llvm::Statistic NumSinkCommonCode = {"simplifycfg", "NumSinkCommonCode"
, "Number of common instruction 'blocks' sunk down to the end block"
}
181 "Number of common instruction 'blocks' sunk down to the end block")static llvm::Statistic NumSinkCommonCode = {"simplifycfg", "NumSinkCommonCode"
, "Number of common instruction 'blocks' sunk down to the end block"
}
;
182STATISTIC(NumSinkCommonInstrs,static llvm::Statistic NumSinkCommonInstrs = {"simplifycfg", "NumSinkCommonInstrs"
, "Number of common instructions sunk down to the end block"}
183 "Number of common instructions sunk down to the end block")static llvm::Statistic NumSinkCommonInstrs = {"simplifycfg", "NumSinkCommonInstrs"
, "Number of common instructions sunk down to the end block"}
;
184STATISTIC(NumSpeculations, "Number of speculative executed instructions")static llvm::Statistic NumSpeculations = {"simplifycfg", "NumSpeculations"
, "Number of speculative executed instructions"}
;
185STATISTIC(NumInvokes,static llvm::Statistic NumInvokes = {"simplifycfg", "NumInvokes"
, "Number of invokes with empty resume blocks simplified into calls"
}
186 "Number of invokes with empty resume blocks simplified into calls")static llvm::Statistic NumInvokes = {"simplifycfg", "NumInvokes"
, "Number of invokes with empty resume blocks simplified into calls"
}
;
187
188namespace {
189
190// The first field contains the value that the switch produces when a certain
191// case group is selected, and the second field is a vector containing the
192// cases composing the case group.
193using SwitchCaseResultVectorTy =
194 SmallVector<std::pair<Constant *, SmallVector<ConstantInt *, 4>>, 2>;
195
196// The first field contains the phi node that generates a result of the switch
197// and the second field contains the value generated for a certain case in the
198// switch for that PHI.
199using SwitchCaseResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
200
201/// ValueEqualityComparisonCase - Represents a case of a switch.
202struct ValueEqualityComparisonCase {
203 ConstantInt *Value;
204 BasicBlock *Dest;
205
206 ValueEqualityComparisonCase(ConstantInt *Value, BasicBlock *Dest)
207 : Value(Value), Dest(Dest) {}
208
209 bool operator<(ValueEqualityComparisonCase RHS) const {
210 // Comparing pointers is ok as we only rely on the order for uniquing.
211 return Value < RHS.Value;
212 }
213
214 bool operator==(BasicBlock *RHSDest) const { return Dest == RHSDest; }
215};
216
217class SimplifyCFGOpt {
218 const TargetTransformInfo &TTI;
219 DomTreeUpdater *DTU;
220 const DataLayout &DL;
221 ArrayRef<WeakVH> LoopHeaders;
222 const SimplifyCFGOptions &Options;
223 bool Resimplify;
224
225 Value *isValueEqualityComparison(Instruction *TI);
226 BasicBlock *GetValueEqualityComparisonCases(
227 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases);
228 bool SimplifyEqualityComparisonWithOnlyPredecessor(Instruction *TI,
229 BasicBlock *Pred,
230 IRBuilder<> &Builder);
231 bool PerformValueComparisonIntoPredecessorFolding(Instruction *TI, Value *&CV,
232 Instruction *PTI,
233 IRBuilder<> &Builder);
234 bool FoldValueComparisonIntoPredecessors(Instruction *TI,
235 IRBuilder<> &Builder);
236
237 bool simplifyResume(ResumeInst *RI, IRBuilder<> &Builder);
238 bool simplifySingleResume(ResumeInst *RI);
239 bool simplifyCommonResume(ResumeInst *RI);
240 bool simplifyCleanupReturn(CleanupReturnInst *RI);
241 bool simplifyUnreachable(UnreachableInst *UI);
242 bool simplifySwitch(SwitchInst *SI, IRBuilder<> &Builder);
243 bool simplifyIndirectBr(IndirectBrInst *IBI);
244 bool simplifyBranch(BranchInst *Branch, IRBuilder<> &Builder);
245 bool simplifyUncondBranch(BranchInst *BI, IRBuilder<> &Builder);
246 bool simplifyCondBranch(BranchInst *BI, IRBuilder<> &Builder);
247
248 bool tryToSimplifyUncondBranchWithICmpInIt(ICmpInst *ICI,
249 IRBuilder<> &Builder);
250
251 bool HoistThenElseCodeToIf(BranchInst *BI, const TargetTransformInfo &TTI,
252 bool EqTermsOnly);
253 bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
254 const TargetTransformInfo &TTI);
255 bool SimplifyTerminatorOnSelect(Instruction *OldTerm, Value *Cond,
256 BasicBlock *TrueBB, BasicBlock *FalseBB,
257 uint32_t TrueWeight, uint32_t FalseWeight);
258 bool SimplifyBranchOnICmpChain(BranchInst *BI, IRBuilder<> &Builder,
259 const DataLayout &DL);
260 bool SimplifySwitchOnSelect(SwitchInst *SI, SelectInst *Select);
261 bool SimplifyIndirectBrOnSelect(IndirectBrInst *IBI, SelectInst *SI);
262 bool TurnSwitchRangeIntoICmp(SwitchInst *SI, IRBuilder<> &Builder);
263
264public:
265 SimplifyCFGOpt(const TargetTransformInfo &TTI, DomTreeUpdater *DTU,
266 const DataLayout &DL, ArrayRef<WeakVH> LoopHeaders,
267 const SimplifyCFGOptions &Opts)
268 : TTI(TTI), DTU(DTU), DL(DL), LoopHeaders(LoopHeaders), Options(Opts) {
269 assert((!DTU || !DTU->hasPostDomTree()) &&((void)0)
270 "SimplifyCFG is not yet capable of maintaining validity of a "((void)0)
271 "PostDomTree, so don't ask for it.")((void)0);
272 }
273
274 bool simplifyOnce(BasicBlock *BB);
275 bool simplifyOnceImpl(BasicBlock *BB);
276 bool run(BasicBlock *BB);
277
278 // Helper to set Resimplify and return change indication.
279 bool requestResimplify() {
280 Resimplify = true;
281 return true;
282 }
283};
284
285} // end anonymous namespace
286
287/// Return true if it is safe to merge these two
288/// terminator instructions together.
289static bool
290SafeToMergeTerminators(Instruction *SI1, Instruction *SI2,
291 SmallSetVector<BasicBlock *, 4> *FailBlocks = nullptr) {
292 if (SI1 == SI2)
293 return false; // Can't merge with self!
294
295 // It is not safe to merge these two switch instructions if they have a common
296 // successor, and if that successor has a PHI node, and if *that* PHI node has
297 // conflicting incoming values from the two switch blocks.
298 BasicBlock *SI1BB = SI1->getParent();
299 BasicBlock *SI2BB = SI2->getParent();
300
301 SmallPtrSet<BasicBlock *, 16> SI1Succs(succ_begin(SI1BB), succ_end(SI1BB));
302 bool Fail = false;
303 for (BasicBlock *Succ : successors(SI2BB))
304 if (SI1Succs.count(Succ))
305 for (BasicBlock::iterator BBI = Succ->begin(); isa<PHINode>(BBI); ++BBI) {
306 PHINode *PN = cast<PHINode>(BBI);
307 if (PN->getIncomingValueForBlock(SI1BB) !=
308 PN->getIncomingValueForBlock(SI2BB)) {
309 if (FailBlocks)
310 FailBlocks->insert(Succ);
311 Fail = true;
312 }
313 }
314
315 return !Fail;
316}
317
318/// Update PHI nodes in Succ to indicate that there will now be entries in it
319/// from the 'NewPred' block. The values that will be flowing into the PHI nodes
320/// will be the same as those coming in from ExistPred, an existing predecessor
321/// of Succ.
322static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
323 BasicBlock *ExistPred,
324 MemorySSAUpdater *MSSAU = nullptr) {
325 for (PHINode &PN : Succ->phis())
326 PN.addIncoming(PN.getIncomingValueForBlock(ExistPred), NewPred);
327 if (MSSAU)
328 if (auto *MPhi = MSSAU->getMemorySSA()->getMemoryAccess(Succ))
329 MPhi->addIncoming(MPhi->getIncomingValueForBlock(ExistPred), NewPred);
330}
331
332/// Compute an abstract "cost" of speculating the given instruction,
333/// which is assumed to be safe to speculate. TCC_Free means cheap,
334/// TCC_Basic means less cheap, and TCC_Expensive means prohibitively
335/// expensive.
336static InstructionCost computeSpeculationCost(const User *I,
337 const TargetTransformInfo &TTI) {
338 assert(isSafeToSpeculativelyExecute(I) &&((void)0)
339 "Instruction is not safe to speculatively execute!")((void)0);
340 return TTI.getUserCost(I, TargetTransformInfo::TCK_SizeAndLatency);
341}
342
343/// If we have a merge point of an "if condition" as accepted above,
344/// return true if the specified value dominates the block. We
345/// don't handle the true generality of domination here, just a special case
346/// which works well enough for us.
347///
348/// If AggressiveInsts is non-null, and if V does not dominate BB, we check to
349/// see if V (which must be an instruction) and its recursive operands
350/// that do not dominate BB have a combined cost lower than Budget and
351/// are non-trapping. If both are true, the instruction is inserted into the
352/// set and true is returned.
353///
354/// The cost for most non-trapping instructions is defined as 1 except for
355/// Select whose cost is 2.
356///
357/// After this function returns, Cost is increased by the cost of
358/// V plus its non-dominating operands. If that cost is greater than
359/// Budget, false is returned and Cost is undefined.
360static bool dominatesMergePoint(Value *V, BasicBlock *BB,
361 SmallPtrSetImpl<Instruction *> &AggressiveInsts,
362 InstructionCost &Cost,
363 InstructionCost Budget,
364 const TargetTransformInfo &TTI,
365 unsigned Depth = 0) {
366 // It is possible to hit a zero-cost cycle (phi/gep instructions for example),
367 // so limit the recursion depth.
368 // TODO: While this recursion limit does prevent pathological behavior, it
369 // would be better to track visited instructions to avoid cycles.
370 if (Depth == MaxSpeculationDepth)
371 return false;
372
373 Instruction *I = dyn_cast<Instruction>(V);
374 if (!I) {
375 // Non-instructions all dominate instructions, but not all constantexprs
376 // can be executed unconditionally.
377 if (ConstantExpr *C = dyn_cast<ConstantExpr>(V))
378 if (C->canTrap())
379 return false;
380 return true;
381 }
382 BasicBlock *PBB = I->getParent();
383
384 // We don't want to allow weird loops that might have the "if condition" in
385 // the bottom of this block.
386 if (PBB == BB)
387 return false;
388
389 // If this instruction is defined in a block that contains an unconditional
390 // branch to BB, then it must be in the 'conditional' part of the "if
391 // statement". If not, it definitely dominates the region.
392 BranchInst *BI = dyn_cast<BranchInst>(PBB->getTerminator());
393 if (!BI || BI->isConditional() || BI->getSuccessor(0) != BB)
394 return true;
395
396 // If we have seen this instruction before, don't count it again.
397 if (AggressiveInsts.count(I))
398 return true;
399
400 // Okay, it looks like the instruction IS in the "condition". Check to
401 // see if it's a cheap instruction to unconditionally compute, and if it
402 // only uses stuff defined outside of the condition. If so, hoist it out.
403 if (!isSafeToSpeculativelyExecute(I))
404 return false;
405
406 Cost += computeSpeculationCost(I, TTI);
407
408 // Allow exactly one instruction to be speculated regardless of its cost
409 // (as long as it is safe to do so).
410 // This is intended to flatten the CFG even if the instruction is a division
411 // or other expensive operation. The speculation of an expensive instruction
412 // is expected to be undone in CodeGenPrepare if the speculation has not
413 // enabled further IR optimizations.
414 if (Cost > Budget &&
415 (!SpeculateOneExpensiveInst || !AggressiveInsts.empty() || Depth > 0 ||
416 !Cost.isValid()))
417 return false;
418
419 // Okay, we can only really hoist these out if their operands do
420 // not take us over the cost threshold.
421 for (Use &Op : I->operands())
422 if (!dominatesMergePoint(Op, BB, AggressiveInsts, Cost, Budget, TTI,
423 Depth + 1))
424 return false;
425 // Okay, it's safe to do this! Remember this instruction.
426 AggressiveInsts.insert(I);
427 return true;
428}
429
430/// Extract ConstantInt from value, looking through IntToPtr
431/// and PointerNullValue. Return NULL if value is not a constant int.
432static ConstantInt *GetConstantInt(Value *V, const DataLayout &DL) {
433 // Normal constant int.
434 ConstantInt *CI = dyn_cast<ConstantInt>(V);
435 if (CI || !isa<Constant>(V) || !V->getType()->isPointerTy())
436 return CI;
437
438 // This is some kind of pointer constant. Turn it into a pointer-sized
439 // ConstantInt if possible.
440 IntegerType *PtrTy = cast<IntegerType>(DL.getIntPtrType(V->getType()));
441
442 // Null pointer means 0, see SelectionDAGBuilder::getValue(const Value*).
443 if (isa<ConstantPointerNull>(V))
444 return ConstantInt::get(PtrTy, 0);
445
446 // IntToPtr const int.
447 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
448 if (CE->getOpcode() == Instruction::IntToPtr)
449 if (ConstantInt *CI = dyn_cast<ConstantInt>(CE->getOperand(0))) {
450 // The constant is very likely to have the right type already.
451 if (CI->getType() == PtrTy)
452 return CI;
453 else
454 return cast<ConstantInt>(
455 ConstantExpr::getIntegerCast(CI, PtrTy, /*isSigned=*/false));
456 }
457 return nullptr;
458}
459
460namespace {
461
462/// Given a chain of or (||) or and (&&) comparison of a value against a
463/// constant, this will try to recover the information required for a switch
464/// structure.
465/// It will depth-first traverse the chain of comparison, seeking for patterns
466/// like %a == 12 or %a < 4 and combine them to produce a set of integer
467/// representing the different cases for the switch.
468/// Note that if the chain is composed of '||' it will build the set of elements
469/// that matches the comparisons (i.e. any of this value validate the chain)
470/// while for a chain of '&&' it will build the set elements that make the test
471/// fail.
472struct ConstantComparesGatherer {
473 const DataLayout &DL;
474
475 /// Value found for the switch comparison
476 Value *CompValue = nullptr;
477
478 /// Extra clause to be checked before the switch
479 Value *Extra = nullptr;
480
481 /// Set of integers to match in switch
482 SmallVector<ConstantInt *, 8> Vals;
483
484 /// Number of comparisons matched in the and/or chain
485 unsigned UsedICmps = 0;
486
487 /// Construct and compute the result for the comparison instruction Cond
488 ConstantComparesGatherer(Instruction *Cond, const DataLayout &DL) : DL(DL) {
489 gather(Cond);
490 }
491
492 ConstantComparesGatherer(const ConstantComparesGatherer &) = delete;
493 ConstantComparesGatherer &
494 operator=(const ConstantComparesGatherer &) = delete;
495
496private:
497 /// Try to set the current value used for the comparison, it succeeds only if
498 /// it wasn't set before or if the new value is the same as the old one
499 bool setValueOnce(Value *NewVal) {
500 if (CompValue && CompValue != NewVal)
501 return false;
502 CompValue = NewVal;
503 return (CompValue != nullptr);
504 }
505
506 /// Try to match Instruction "I" as a comparison against a constant and
507 /// populates the array Vals with the set of values that match (or do not
508 /// match depending on isEQ).
509 /// Return false on failure. On success, the Value the comparison matched
510 /// against is placed in CompValue.
511 /// If CompValue is already set, the function is expected to fail if a match
512 /// is found but the value compared to is different.
513 bool matchInstruction(Instruction *I, bool isEQ) {
514 // If this is an icmp against a constant, handle this as one of the cases.
515 ICmpInst *ICI;
516 ConstantInt *C;
517 if (!((ICI = dyn_cast<ICmpInst>(I)) &&
518 (C = GetConstantInt(I->getOperand(1), DL)))) {
519 return false;
520 }
521
522 Value *RHSVal;
523 const APInt *RHSC;
524
525 // Pattern match a special case
526 // (x & ~2^z) == y --> x == y || x == y|2^z
527 // This undoes a transformation done by instcombine to fuse 2 compares.
528 if (ICI->getPredicate() == (isEQ ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE)) {
529 // It's a little bit hard to see why the following transformations are
530 // correct. Here is a CVC3 program to verify them for 64-bit values:
531
532 /*
533 ONE : BITVECTOR(64) = BVZEROEXTEND(0bin1, 63);
534 x : BITVECTOR(64);
535 y : BITVECTOR(64);
536 z : BITVECTOR(64);
537 mask : BITVECTOR(64) = BVSHL(ONE, z);
538 QUERY( (y & ~mask = y) =>
539 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
540 );
541 QUERY( (y | mask = y) =>
542 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
543 );
544 */
545
546 // Please note that each pattern must be a dual implication (<--> or
547 // iff). One directional implication can create spurious matches. If the
548 // implication is only one-way, an unsatisfiable condition on the left
549 // side can imply a satisfiable condition on the right side. Dual
550 // implication ensures that satisfiable conditions are transformed to
551 // other satisfiable conditions and unsatisfiable conditions are
552 // transformed to other unsatisfiable conditions.
553
554 // Here is a concrete example of a unsatisfiable condition on the left
555 // implying a satisfiable condition on the right:
556 //
557 // mask = (1 << z)
558 // (x & ~mask) == y --> (x == y || x == (y | mask))
559 //
560 // Substituting y = 3, z = 0 yields:
561 // (x & -2) == 3 --> (x == 3 || x == 2)
562
563 // Pattern match a special case:
564 /*
565 QUERY( (y & ~mask = y) =>
566 ((x & ~mask = y) <=> (x = y OR x = (y | mask)))
567 );
568 */
569 if (match(ICI->getOperand(0),
570 m_And(m_Value(RHSVal), m_APInt(RHSC)))) {
571 APInt Mask = ~*RHSC;
572 if (Mask.isPowerOf2() && (C->getValue() & ~Mask) == C->getValue()) {
573 // If we already have a value for the switch, it has to match!
574 if (!setValueOnce(RHSVal))
575 return false;
576
577 Vals.push_back(C);
578 Vals.push_back(
579 ConstantInt::get(C->getContext(),
580 C->getValue() | Mask));
581 UsedICmps++;
582 return true;
583 }
584 }
585
586 // Pattern match a special case:
587 /*
588 QUERY( (y | mask = y) =>
589 ((x | mask = y) <=> (x = y OR x = (y & ~mask)))
590 );
591 */
592 if (match(ICI->getOperand(0),
593 m_Or(m_Value(RHSVal), m_APInt(RHSC)))) {
594 APInt Mask = *RHSC;
595 if (Mask.isPowerOf2() && (C->getValue() | Mask) == C->getValue()) {
596 // If we already have a value for the switch, it has to match!
597 if (!setValueOnce(RHSVal))
598 return false;
599
600 Vals.push_back(C);
601 Vals.push_back(ConstantInt::get(C->getContext(),
602 C->getValue() & ~Mask));
603 UsedICmps++;
604 return true;
605 }
606 }
607
608 // If we already have a value for the switch, it has to match!
609 if (!setValueOnce(ICI->getOperand(0)))
610 return false;
611
612 UsedICmps++;
613 Vals.push_back(C);
614 return ICI->getOperand(0);
615 }
616
617 // If we have "x ult 3", for example, then we can add 0,1,2 to the set.
618 ConstantRange Span =
619 ConstantRange::makeExactICmpRegion(ICI->getPredicate(), C->getValue());
620
621 // Shift the range if the compare is fed by an add. This is the range
622 // compare idiom as emitted by instcombine.
623 Value *CandidateVal = I->getOperand(0);
624 if (match(I->getOperand(0), m_Add(m_Value(RHSVal), m_APInt(RHSC)))) {
625 Span = Span.subtract(*RHSC);
626 CandidateVal = RHSVal;
627 }
628
629 // If this is an and/!= check, then we are looking to build the set of
630 // value that *don't* pass the and chain. I.e. to turn "x ugt 2" into
631 // x != 0 && x != 1.
632 if (!isEQ)
633 Span = Span.inverse();
634
635 // If there are a ton of values, we don't want to make a ginormous switch.
636 if (Span.isSizeLargerThan(8) || Span.isEmptySet()) {
637 return false;
638 }
639
640 // If we already have a value for the switch, it has to match!
641 if (!setValueOnce(CandidateVal))
642 return false;
643
644 // Add all values from the range to the set
645 for (APInt Tmp = Span.getLower(); Tmp != Span.getUpper(); ++Tmp)
646 Vals.push_back(ConstantInt::get(I->getContext(), Tmp));
647
648 UsedICmps++;
649 return true;
650 }
651
652 /// Given a potentially 'or'd or 'and'd together collection of icmp
653 /// eq/ne/lt/gt instructions that compare a value against a constant, extract
654 /// the value being compared, and stick the list constants into the Vals
655 /// vector.
656 /// One "Extra" case is allowed to differ from the other.
657 void gather(Value *V) {
658 bool isEQ = match(V, m_LogicalOr(m_Value(), m_Value()));
659
660 // Keep a stack (SmallVector for efficiency) for depth-first traversal
661 SmallVector<Value *, 8> DFT;
662 SmallPtrSet<Value *, 8> Visited;
663
664 // Initialize
665 Visited.insert(V);
666 DFT.push_back(V);
667
668 while (!DFT.empty()) {
669 V = DFT.pop_back_val();
670
671 if (Instruction *I = dyn_cast<Instruction>(V)) {
672 // If it is a || (or && depending on isEQ), process the operands.
673 Value *Op0, *Op1;
674 if (isEQ ? match(I, m_LogicalOr(m_Value(Op0), m_Value(Op1)))
675 : match(I, m_LogicalAnd(m_Value(Op0), m_Value(Op1)))) {
676 if (Visited.insert(Op1).second)
677 DFT.push_back(Op1);
678 if (Visited.insert(Op0).second)
679 DFT.push_back(Op0);
680
681 continue;
682 }
683
684 // Try to match the current instruction
685 if (matchInstruction(I, isEQ))
686 // Match succeed, continue the loop
687 continue;
688 }
689
690 // One element of the sequence of || (or &&) could not be match as a
691 // comparison against the same value as the others.
692 // We allow only one "Extra" case to be checked before the switch
693 if (!Extra) {
694 Extra = V;
695 continue;
696 }
697 // Failed to parse a proper sequence, abort now
698 CompValue = nullptr;
699 break;
700 }
701 }
702};
703
704} // end anonymous namespace
705
706static void EraseTerminatorAndDCECond(Instruction *TI,
707 MemorySSAUpdater *MSSAU = nullptr) {
708 Instruction *Cond = nullptr;
709 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
710 Cond = dyn_cast<Instruction>(SI->getCondition());
711 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
712 if (BI->isConditional())
713 Cond = dyn_cast<Instruction>(BI->getCondition());
714 } else if (IndirectBrInst *IBI = dyn_cast<IndirectBrInst>(TI)) {
715 Cond = dyn_cast<Instruction>(IBI->getAddress());
716 }
717
718 TI->eraseFromParent();
719 if (Cond)
720 RecursivelyDeleteTriviallyDeadInstructions(Cond, nullptr, MSSAU);
721}
722
723/// Return true if the specified terminator checks
724/// to see if a value is equal to constant integer value.
725Value *SimplifyCFGOpt::isValueEqualityComparison(Instruction *TI) {
726 Value *CV = nullptr;
727 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
728 // Do not permit merging of large switch instructions into their
729 // predecessors unless there is only one predecessor.
730 if (!SI->getParent()->hasNPredecessorsOrMore(128 / SI->getNumSuccessors()))
731 CV = SI->getCondition();
732 } else if (BranchInst *BI = dyn_cast<BranchInst>(TI))
733 if (BI->isConditional() && BI->getCondition()->hasOneUse())
734 if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition())) {
735 if (ICI->isEquality() && GetConstantInt(ICI->getOperand(1), DL))
736 CV = ICI->getOperand(0);
737 }
738
739 // Unwrap any lossless ptrtoint cast.
740 if (CV) {
741 if (PtrToIntInst *PTII = dyn_cast<PtrToIntInst>(CV)) {
742 Value *Ptr = PTII->getPointerOperand();
743 if (PTII->getType() == DL.getIntPtrType(Ptr->getType()))
744 CV = Ptr;
745 }
746 }
747 return CV;
748}
749
750/// Given a value comparison instruction,
751/// decode all of the 'cases' that it represents and return the 'default' block.
752BasicBlock *SimplifyCFGOpt::GetValueEqualityComparisonCases(
753 Instruction *TI, std::vector<ValueEqualityComparisonCase> &Cases) {
754 if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) {
755 Cases.reserve(SI->getNumCases());
756 for (auto Case : SI->cases())
757 Cases.push_back(ValueEqualityComparisonCase(Case.getCaseValue(),
758 Case.getCaseSuccessor()));
759 return SI->getDefaultDest();
760 }
761
762 BranchInst *BI = cast<BranchInst>(TI);
763 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
764 BasicBlock *Succ = BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_NE);
765 Cases.push_back(ValueEqualityComparisonCase(
766 GetConstantInt(ICI->getOperand(1), DL), Succ));
767 return BI->getSuccessor(ICI->getPredicate() == ICmpInst::ICMP_EQ);
768}
769
770/// Given a vector of bb/value pairs, remove any entries
771/// in the list that match the specified block.
772static void
773EliminateBlockCases(BasicBlock *BB,
774 std::vector<ValueEqualityComparisonCase> &Cases) {
775 llvm::erase_value(Cases, BB);
776}
777
778/// Return true if there are any keys in C1 that exist in C2 as well.
779static bool ValuesOverlap(std::vector<ValueEqualityComparisonCase> &C1,
780 std::vector<ValueEqualityComparisonCase> &C2) {
781 std::vector<ValueEqualityComparisonCase> *V1 = &C1, *V2 = &C2;
782
783 // Make V1 be smaller than V2.
784 if (V1->size() > V2->size())
785 std::swap(V1, V2);
786
787 if (V1->empty())
788 return false;
789 if (V1->size() == 1) {
790 // Just scan V2.
791 ConstantInt *TheVal = (*V1)[0].Value;
792 for (unsigned i = 0, e = V2->size(); i != e; ++i)
793 if (TheVal == (*V2)[i].Value)
794 return true;
795 }
796
797 // Otherwise, just sort both lists and compare element by element.
798 array_pod_sort(V1->begin(), V1->end());
799 array_pod_sort(V2->begin(), V2->end());
800 unsigned i1 = 0, i2 = 0, e1 = V1->size(), e2 = V2->size();
801 while (i1 != e1 && i2 != e2) {
802 if ((*V1)[i1].Value == (*V2)[i2].Value)
803 return true;
804 if ((*V1)[i1].Value < (*V2)[i2].Value)
805 ++i1;
806 else
807 ++i2;
808 }
809 return false;
810}
811
812// Set branch weights on SwitchInst. This sets the metadata if there is at
813// least one non-zero weight.
814static void setBranchWeights(SwitchInst *SI, ArrayRef<uint32_t> Weights) {
815 // Check that there is at least one non-zero weight. Otherwise, pass
816 // nullptr to setMetadata which will erase the existing metadata.
817 MDNode *N = nullptr;
818 if (llvm::any_of(Weights, [](uint32_t W) { return W != 0; }))
819 N = MDBuilder(SI->getParent()->getContext()).createBranchWeights(Weights);
820 SI->setMetadata(LLVMContext::MD_prof, N);
821}
822
823// Similar to the above, but for branch and select instructions that take
824// exactly 2 weights.
825static void setBranchWeights(Instruction *I, uint32_t TrueWeight,
826 uint32_t FalseWeight) {
827 assert(isa<BranchInst>(I) || isa<SelectInst>(I))((void)0);
828 // Check that there is at least one non-zero weight. Otherwise, pass
829 // nullptr to setMetadata which will erase the existing metadata.
830 MDNode *N = nullptr;
831 if (TrueWeight || FalseWeight)
832 N = MDBuilder(I->getParent()->getContext())
833 .createBranchWeights(TrueWeight, FalseWeight);
834 I->setMetadata(LLVMContext::MD_prof, N);
835}
836
837/// If TI is known to be a terminator instruction and its block is known to
838/// only have a single predecessor block, check to see if that predecessor is
839/// also a value comparison with the same value, and if that comparison
840/// determines the outcome of this comparison. If so, simplify TI. This does a
841/// very limited form of jump threading.
842bool SimplifyCFGOpt::SimplifyEqualityComparisonWithOnlyPredecessor(
843 Instruction *TI, BasicBlock *Pred, IRBuilder<> &Builder) {
844 Value *PredVal = isValueEqualityComparison(Pred->getTerminator());
845 if (!PredVal)
846 return false; // Not a value comparison in predecessor.
847
848 Value *ThisVal = isValueEqualityComparison(TI);
849 assert(ThisVal && "This isn't a value comparison!!")((void)0);
850 if (ThisVal != PredVal)
851 return false; // Different predicates.
852
853 // TODO: Preserve branch weight metadata, similarly to how
854 // FoldValueComparisonIntoPredecessors preserves it.
855
856 // Find out information about when control will move from Pred to TI's block.
857 std::vector<ValueEqualityComparisonCase> PredCases;
858 BasicBlock *PredDef =
859 GetValueEqualityComparisonCases(Pred->getTerminator(), PredCases);
860 EliminateBlockCases(PredDef, PredCases); // Remove default from cases.
861
862 // Find information about how control leaves this block.
863 std::vector<ValueEqualityComparisonCase> ThisCases;
864 BasicBlock *ThisDef = GetValueEqualityComparisonCases(TI, ThisCases);
865 EliminateBlockCases(ThisDef, ThisCases); // Remove default from cases.
866
867 // If TI's block is the default block from Pred's comparison, potentially
868 // simplify TI based on this knowledge.
869 if (PredDef == TI->getParent()) {
870 // If we are here, we know that the value is none of those cases listed in
871 // PredCases. If there are any cases in ThisCases that are in PredCases, we
872 // can simplify TI.
873 if (!ValuesOverlap(PredCases, ThisCases))
874 return false;
875
876 if (isa<BranchInst>(TI)) {
877 // Okay, one of the successors of this condbr is dead. Convert it to a
878 // uncond br.
879 assert(ThisCases.size() == 1 && "Branch can only have one case!")((void)0);
880 // Insert the new branch.
881 Instruction *NI = Builder.CreateBr(ThisDef);
882 (void)NI;
883
884 // Remove PHI node entries for the dead edge.
885 ThisCases[0].Dest->removePredecessor(PredDef);
886
887 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()do { } while (false)
888 << "Through successor TI: " << *TI << "Leaving: " << *NIdo { } while (false)
889 << "\n")do { } while (false);
890
891 EraseTerminatorAndDCECond(TI);
892
893 if (DTU)
894 DTU->applyUpdates(
895 {{DominatorTree::Delete, PredDef, ThisCases[0].Dest}});
896
897 return true;
898 }
899
900 SwitchInstProfUpdateWrapper SI = *cast<SwitchInst>(TI);
901 // Okay, TI has cases that are statically dead, prune them away.
902 SmallPtrSet<Constant *, 16> DeadCases;
903 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
904 DeadCases.insert(PredCases[i].Value);
905
906 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()do { } while (false)
907 << "Through successor TI: " << *TI)do { } while (false);
908
909 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
910 for (SwitchInst::CaseIt i = SI->case_end(), e = SI->case_begin(); i != e;) {
911 --i;
912 auto *Successor = i->getCaseSuccessor();
913 if (DTU)
914 ++NumPerSuccessorCases[Successor];
915 if (DeadCases.count(i->getCaseValue())) {
916 Successor->removePredecessor(PredDef);
917 SI.removeCase(i);
918 if (DTU)
919 --NumPerSuccessorCases[Successor];
920 }
921 }
922
923 if (DTU) {
924 std::vector<DominatorTree::UpdateType> Updates;
925 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
926 if (I.second == 0)
927 Updates.push_back({DominatorTree::Delete, PredDef, I.first});
928 DTU->applyUpdates(Updates);
929 }
930
931 LLVM_DEBUG(dbgs() << "Leaving: " << *TI << "\n")do { } while (false);
932 return true;
933 }
934
935 // Otherwise, TI's block must correspond to some matched value. Find out
936 // which value (or set of values) this is.
937 ConstantInt *TIV = nullptr;
938 BasicBlock *TIBB = TI->getParent();
939 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
940 if (PredCases[i].Dest == TIBB) {
941 if (TIV)
942 return false; // Cannot handle multiple values coming to this block.
943 TIV = PredCases[i].Value;
944 }
945 assert(TIV && "No edge from pred to succ?")((void)0);
946
947 // Okay, we found the one constant that our value can be if we get into TI's
948 // BB. Find out which successor will unconditionally be branched to.
949 BasicBlock *TheRealDest = nullptr;
950 for (unsigned i = 0, e = ThisCases.size(); i != e; ++i)
951 if (ThisCases[i].Value == TIV) {
952 TheRealDest = ThisCases[i].Dest;
953 break;
954 }
955
956 // If not handled by any explicit cases, it is handled by the default case.
957 if (!TheRealDest)
958 TheRealDest = ThisDef;
959
960 SmallPtrSet<BasicBlock *, 2> RemovedSuccs;
961
962 // Remove PHI node entries for dead edges.
963 BasicBlock *CheckEdge = TheRealDest;
964 for (BasicBlock *Succ : successors(TIBB))
965 if (Succ != CheckEdge) {
966 if (Succ != TheRealDest)
967 RemovedSuccs.insert(Succ);
968 Succ->removePredecessor(TIBB);
969 } else
970 CheckEdge = nullptr;
971
972 // Insert the new branch.
973 Instruction *NI = Builder.CreateBr(TheRealDest);
974 (void)NI;
975
976 LLVM_DEBUG(dbgs() << "Threading pred instr: " << *Pred->getTerminator()do { } while (false)
977 << "Through successor TI: " << *TI << "Leaving: " << *NIdo { } while (false)
978 << "\n")do { } while (false);
979
980 EraseTerminatorAndDCECond(TI);
981 if (DTU) {
982 SmallVector<DominatorTree::UpdateType, 2> Updates;
983 Updates.reserve(RemovedSuccs.size());
984 for (auto *RemovedSucc : RemovedSuccs)
985 Updates.push_back({DominatorTree::Delete, TIBB, RemovedSucc});
986 DTU->applyUpdates(Updates);
987 }
988 return true;
989}
990
991namespace {
992
993/// This class implements a stable ordering of constant
994/// integers that does not depend on their address. This is important for
995/// applications that sort ConstantInt's to ensure uniqueness.
996struct ConstantIntOrdering {
997 bool operator()(const ConstantInt *LHS, const ConstantInt *RHS) const {
998 return LHS->getValue().ult(RHS->getValue());
999 }
1000};
1001
1002} // end anonymous namespace
1003
1004static int ConstantIntSortPredicate(ConstantInt *const *P1,
1005 ConstantInt *const *P2) {
1006 const ConstantInt *LHS = *P1;
1007 const ConstantInt *RHS = *P2;
1008 if (LHS == RHS)
1009 return 0;
1010 return LHS->getValue().ult(RHS->getValue()) ? 1 : -1;
1011}
1012
1013static inline bool HasBranchWeights(const Instruction *I) {
1014 MDNode *ProfMD = I->getMetadata(LLVMContext::MD_prof);
1015 if (ProfMD && ProfMD->getOperand(0))
1016 if (MDString *MDS = dyn_cast<MDString>(ProfMD->getOperand(0)))
1017 return MDS->getString().equals("branch_weights");
1018
1019 return false;
1020}
1021
1022/// Get Weights of a given terminator, the default weight is at the front
1023/// of the vector. If TI is a conditional eq, we need to swap the branch-weight
1024/// metadata.
1025static void GetBranchWeights(Instruction *TI,
1026 SmallVectorImpl<uint64_t> &Weights) {
1027 MDNode *MD = TI->getMetadata(LLVMContext::MD_prof);
1028 assert(MD)((void)0);
1029 for (unsigned i = 1, e = MD->getNumOperands(); i < e; ++i) {
1030 ConstantInt *CI = mdconst::extract<ConstantInt>(MD->getOperand(i));
1031 Weights.push_back(CI->getValue().getZExtValue());
1032 }
1033
1034 // If TI is a conditional eq, the default case is the false case,
1035 // and the corresponding branch-weight data is at index 2. We swap the
1036 // default weight to be the first entry.
1037 if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
1038 assert(Weights.size() == 2)((void)0);
1039 ICmpInst *ICI = cast<ICmpInst>(BI->getCondition());
1040 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
1041 std::swap(Weights.front(), Weights.back());
1042 }
1043}
1044
1045/// Keep halving the weights until all can fit in uint32_t.
1046static void FitWeights(MutableArrayRef<uint64_t> Weights) {
1047 uint64_t Max = *std::max_element(Weights.begin(), Weights.end());
1048 if (Max > UINT_MAX(2147483647 *2U +1U)) {
1049 unsigned Offset = 32 - countLeadingZeros(Max);
1050 for (uint64_t &I : Weights)
1051 I >>= Offset;
1052 }
1053}
1054
1055static void CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(
1056 BasicBlock *BB, BasicBlock *PredBlock, ValueToValueMapTy &VMap) {
1057 Instruction *PTI = PredBlock->getTerminator();
1058
1059 // If we have bonus instructions, clone them into the predecessor block.
1060 // Note that there may be multiple predecessor blocks, so we cannot move
1061 // bonus instructions to a predecessor block.
1062 for (Instruction &BonusInst : *BB) {
1063 if (isa<DbgInfoIntrinsic>(BonusInst) || BonusInst.isTerminator())
1064 continue;
1065
1066 Instruction *NewBonusInst = BonusInst.clone();
1067
1068 if (PTI->getDebugLoc() != NewBonusInst->getDebugLoc()) {
1069 // Unless the instruction has the same !dbg location as the original
1070 // branch, drop it. When we fold the bonus instructions we want to make
1071 // sure we reset their debug locations in order to avoid stepping on
1072 // dead code caused by folding dead branches.
1073 NewBonusInst->setDebugLoc(DebugLoc());
1074 }
1075
1076 RemapInstruction(NewBonusInst, VMap,
1077 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1078 VMap[&BonusInst] = NewBonusInst;
1079
1080 // If we moved a load, we cannot any longer claim any knowledge about
1081 // its potential value. The previous information might have been valid
1082 // only given the branch precondition.
1083 // For an analogous reason, we must also drop all the metadata whose
1084 // semantics we don't understand. We *can* preserve !annotation, because
1085 // it is tied to the instruction itself, not the value or position.
1086 // Similarly strip attributes on call parameters that may cause UB in
1087 // location the call is moved to.
1088 NewBonusInst->dropUndefImplyingAttrsAndUnknownMetadata(
1089 LLVMContext::MD_annotation);
1090
1091 PredBlock->getInstList().insert(PTI->getIterator(), NewBonusInst);
1092 NewBonusInst->takeName(&BonusInst);
1093 BonusInst.setName(NewBonusInst->getName() + ".old");
1094
1095 // Update (liveout) uses of bonus instructions,
1096 // now that the bonus instruction has been cloned into predecessor.
1097 // Note that we expect to be in a block-closed SSA form for this to work!
1098 for (Use &U : make_early_inc_range(BonusInst.uses())) {
1099 auto *UI = cast<Instruction>(U.getUser());
1100 auto *PN = dyn_cast<PHINode>(UI);
1101 if (!PN) {
1102 assert(UI->getParent() == BB && BonusInst.comesBefore(UI) &&((void)0)
1103 "If the user is not a PHI node, then it should be in the same "((void)0)
1104 "block as, and come after, the original bonus instruction.")((void)0);
1105 continue; // Keep using the original bonus instruction.
1106 }
1107 // Is this the block-closed SSA form PHI node?
1108 if (PN->getIncomingBlock(U) == BB)
1109 continue; // Great, keep using the original bonus instruction.
1110 // The only other alternative is an "use" when coming from
1111 // the predecessor block - here we should refer to the cloned bonus instr.
1112 assert(PN->getIncomingBlock(U) == PredBlock &&((void)0)
1113 "Not in block-closed SSA form?")((void)0);
1114 U.set(NewBonusInst);
1115 }
1116 }
1117}
1118
1119bool SimplifyCFGOpt::PerformValueComparisonIntoPredecessorFolding(
1120 Instruction *TI, Value *&CV, Instruction *PTI, IRBuilder<> &Builder) {
1121 BasicBlock *BB = TI->getParent();
1122 BasicBlock *Pred = PTI->getParent();
1123
1124 SmallVector<DominatorTree::UpdateType, 32> Updates;
1125
1126 // Figure out which 'cases' to copy from SI to PSI.
1127 std::vector<ValueEqualityComparisonCase> BBCases;
1128 BasicBlock *BBDefault = GetValueEqualityComparisonCases(TI, BBCases);
1129
1130 std::vector<ValueEqualityComparisonCase> PredCases;
1131 BasicBlock *PredDefault = GetValueEqualityComparisonCases(PTI, PredCases);
1132
1133 // Based on whether the default edge from PTI goes to BB or not, fill in
1134 // PredCases and PredDefault with the new switch cases we would like to
1135 // build.
1136 SmallMapVector<BasicBlock *, int, 8> NewSuccessors;
1137
1138 // Update the branch weight metadata along the way
1139 SmallVector<uint64_t, 8> Weights;
1140 bool PredHasWeights = HasBranchWeights(PTI);
1141 bool SuccHasWeights = HasBranchWeights(TI);
1142
1143 if (PredHasWeights) {
1144 GetBranchWeights(PTI, Weights);
1145 // branch-weight metadata is inconsistent here.
1146 if (Weights.size() != 1 + PredCases.size())
1147 PredHasWeights = SuccHasWeights = false;
1148 } else if (SuccHasWeights)
1149 // If there are no predecessor weights but there are successor weights,
1150 // populate Weights with 1, which will later be scaled to the sum of
1151 // successor's weights
1152 Weights.assign(1 + PredCases.size(), 1);
1153
1154 SmallVector<uint64_t, 8> SuccWeights;
1155 if (SuccHasWeights) {
1156 GetBranchWeights(TI, SuccWeights);
1157 // branch-weight metadata is inconsistent here.
1158 if (SuccWeights.size() != 1 + BBCases.size())
1159 PredHasWeights = SuccHasWeights = false;
1160 } else if (PredHasWeights)
1161 SuccWeights.assign(1 + BBCases.size(), 1);
1162
1163 if (PredDefault == BB) {
1164 // If this is the default destination from PTI, only the edges in TI
1165 // that don't occur in PTI, or that branch to BB will be activated.
1166 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1167 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1168 if (PredCases[i].Dest != BB)
1169 PTIHandled.insert(PredCases[i].Value);
1170 else {
1171 // The default destination is BB, we don't need explicit targets.
1172 std::swap(PredCases[i], PredCases.back());
1173
1174 if (PredHasWeights || SuccHasWeights) {
1175 // Increase weight for the default case.
1176 Weights[0] += Weights[i + 1];
1177 std::swap(Weights[i + 1], Weights.back());
1178 Weights.pop_back();
1179 }
1180
1181 PredCases.pop_back();
1182 --i;
1183 --e;
1184 }
1185
1186 // Reconstruct the new switch statement we will be building.
1187 if (PredDefault != BBDefault) {
1188 PredDefault->removePredecessor(Pred);
1189 if (DTU && PredDefault != BB)
1190 Updates.push_back({DominatorTree::Delete, Pred, PredDefault});
1191 PredDefault = BBDefault;
1192 ++NewSuccessors[BBDefault];
1193 }
1194
1195 unsigned CasesFromPred = Weights.size();
1196 uint64_t ValidTotalSuccWeight = 0;
1197 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1198 if (!PTIHandled.count(BBCases[i].Value) && BBCases[i].Dest != BBDefault) {
1199 PredCases.push_back(BBCases[i]);
1200 ++NewSuccessors[BBCases[i].Dest];
1201 if (SuccHasWeights || PredHasWeights) {
1202 // The default weight is at index 0, so weight for the ith case
1203 // should be at index i+1. Scale the cases from successor by
1204 // PredDefaultWeight (Weights[0]).
1205 Weights.push_back(Weights[0] * SuccWeights[i + 1]);
1206 ValidTotalSuccWeight += SuccWeights[i + 1];
1207 }
1208 }
1209
1210 if (SuccHasWeights || PredHasWeights) {
1211 ValidTotalSuccWeight += SuccWeights[0];
1212 // Scale the cases from predecessor by ValidTotalSuccWeight.
1213 for (unsigned i = 1; i < CasesFromPred; ++i)
1214 Weights[i] *= ValidTotalSuccWeight;
1215 // Scale the default weight by SuccDefaultWeight (SuccWeights[0]).
1216 Weights[0] *= SuccWeights[0];
1217 }
1218 } else {
1219 // If this is not the default destination from PSI, only the edges
1220 // in SI that occur in PSI with a destination of BB will be
1221 // activated.
1222 std::set<ConstantInt *, ConstantIntOrdering> PTIHandled;
1223 std::map<ConstantInt *, uint64_t> WeightsForHandled;
1224 for (unsigned i = 0, e = PredCases.size(); i != e; ++i)
1225 if (PredCases[i].Dest == BB) {
1226 PTIHandled.insert(PredCases[i].Value);
1227
1228 if (PredHasWeights || SuccHasWeights) {
1229 WeightsForHandled[PredCases[i].Value] = Weights[i + 1];
1230 std::swap(Weights[i + 1], Weights.back());
1231 Weights.pop_back();
1232 }
1233
1234 std::swap(PredCases[i], PredCases.back());
1235 PredCases.pop_back();
1236 --i;
1237 --e;
1238 }
1239
1240 // Okay, now we know which constants were sent to BB from the
1241 // predecessor. Figure out where they will all go now.
1242 for (unsigned i = 0, e = BBCases.size(); i != e; ++i)
1243 if (PTIHandled.count(BBCases[i].Value)) {
1244 // If this is one we are capable of getting...
1245 if (PredHasWeights || SuccHasWeights)
1246 Weights.push_back(WeightsForHandled[BBCases[i].Value]);
1247 PredCases.push_back(BBCases[i]);
1248 ++NewSuccessors[BBCases[i].Dest];
1249 PTIHandled.erase(BBCases[i].Value); // This constant is taken care of
1250 }
1251
1252 // If there are any constants vectored to BB that TI doesn't handle,
1253 // they must go to the default destination of TI.
1254 for (ConstantInt *I : PTIHandled) {
1255 if (PredHasWeights || SuccHasWeights)
1256 Weights.push_back(WeightsForHandled[I]);
1257 PredCases.push_back(ValueEqualityComparisonCase(I, BBDefault));
1258 ++NewSuccessors[BBDefault];
1259 }
1260 }
1261
1262 // Okay, at this point, we know which new successor Pred will get. Make
1263 // sure we update the number of entries in the PHI nodes for these
1264 // successors.
1265 SmallPtrSet<BasicBlock *, 2> SuccsOfPred;
1266 if (DTU) {
1267 SuccsOfPred = {succ_begin(Pred), succ_end(Pred)};
1268 Updates.reserve(Updates.size() + NewSuccessors.size());
1269 }
1270 for (const std::pair<BasicBlock *, int /*Num*/> &NewSuccessor :
1271 NewSuccessors) {
1272 for (auto I : seq(0, NewSuccessor.second)) {
1273 (void)I;
1274 AddPredecessorToBlock(NewSuccessor.first, Pred, BB);
1275 }
1276 if (DTU && !SuccsOfPred.contains(NewSuccessor.first))
1277 Updates.push_back({DominatorTree::Insert, Pred, NewSuccessor.first});
1278 }
1279
1280 Builder.SetInsertPoint(PTI);
1281 // Convert pointer to int before we switch.
1282 if (CV->getType()->isPointerTy()) {
1283 CV =
1284 Builder.CreatePtrToInt(CV, DL.getIntPtrType(CV->getType()), "magicptr");
1285 }
1286
1287 // Now that the successors are updated, create the new Switch instruction.
1288 SwitchInst *NewSI = Builder.CreateSwitch(CV, PredDefault, PredCases.size());
1289 NewSI->setDebugLoc(PTI->getDebugLoc());
1290 for (ValueEqualityComparisonCase &V : PredCases)
1291 NewSI->addCase(V.Value, V.Dest);
1292
1293 if (PredHasWeights || SuccHasWeights) {
1294 // Halve the weights if any of them cannot fit in an uint32_t
1295 FitWeights(Weights);
1296
1297 SmallVector<uint32_t, 8> MDWeights(Weights.begin(), Weights.end());
1298
1299 setBranchWeights(NewSI, MDWeights);
1300 }
1301
1302 EraseTerminatorAndDCECond(PTI);
1303
1304 // Okay, last check. If BB is still a successor of PSI, then we must
1305 // have an infinite loop case. If so, add an infinitely looping block
1306 // to handle the case to preserve the behavior of the code.
1307 BasicBlock *InfLoopBlock = nullptr;
1308 for (unsigned i = 0, e = NewSI->getNumSuccessors(); i != e; ++i)
1309 if (NewSI->getSuccessor(i) == BB) {
1310 if (!InfLoopBlock) {
1311 // Insert it at the end of the function, because it's either code,
1312 // or it won't matter if it's hot. :)
1313 InfLoopBlock =
1314 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
1315 BranchInst::Create(InfLoopBlock, InfLoopBlock);
1316 if (DTU)
1317 Updates.push_back(
1318 {DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
1319 }
1320 NewSI->setSuccessor(i, InfLoopBlock);
1321 }
1322
1323 if (DTU) {
1324 if (InfLoopBlock)
1325 Updates.push_back({DominatorTree::Insert, Pred, InfLoopBlock});
1326
1327 Updates.push_back({DominatorTree::Delete, Pred, BB});
1328
1329 DTU->applyUpdates(Updates);
1330 }
1331
1332 ++NumFoldValueComparisonIntoPredecessors;
1333 return true;
1334}
1335
1336/// The specified terminator is a value equality comparison instruction
1337/// (either a switch or a branch on "X == c").
1338/// See if any of the predecessors of the terminator block are value comparisons
1339/// on the same value. If so, and if safe to do so, fold them together.
1340bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(Instruction *TI,
1341 IRBuilder<> &Builder) {
1342 BasicBlock *BB = TI->getParent();
1343 Value *CV = isValueEqualityComparison(TI); // CondVal
1344 assert(CV && "Not a comparison?")((void)0);
1345
1346 bool Changed = false;
1347
1348 SmallSetVector<BasicBlock *, 16> Preds(pred_begin(BB), pred_end(BB));
1349 while (!Preds.empty()) {
1350 BasicBlock *Pred = Preds.pop_back_val();
1351 Instruction *PTI = Pred->getTerminator();
1352
1353 // Don't try to fold into itself.
1354 if (Pred == BB)
1355 continue;
1356
1357 // See if the predecessor is a comparison with the same value.
1358 Value *PCV = isValueEqualityComparison(PTI); // PredCondVal
1359 if (PCV != CV)
1360 continue;
1361
1362 SmallSetVector<BasicBlock *, 4> FailBlocks;
1363 if (!SafeToMergeTerminators(TI, PTI, &FailBlocks)) {
1364 for (auto *Succ : FailBlocks) {
1365 if (!SplitBlockPredecessors(Succ, TI->getParent(), ".fold.split", DTU))
1366 return false;
1367 }
1368 }
1369
1370 PerformValueComparisonIntoPredecessorFolding(TI, CV, PTI, Builder);
1371 Changed = true;
1372 }
1373 return Changed;
1374}
1375
1376// If we would need to insert a select that uses the value of this invoke
1377// (comments in HoistThenElseCodeToIf explain why we would need to do this), we
1378// can't hoist the invoke, as there is nowhere to put the select in this case.
1379static bool isSafeToHoistInvoke(BasicBlock *BB1, BasicBlock *BB2,
1380 Instruction *I1, Instruction *I2) {
1381 for (BasicBlock *Succ : successors(BB1)) {
1382 for (const PHINode &PN : Succ->phis()) {
1383 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1384 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1385 if (BB1V != BB2V && (BB1V == I1 || BB2V == I2)) {
1386 return false;
1387 }
1388 }
1389 }
1390 return true;
1391}
1392
1393static bool passingValueIsAlwaysUndefined(Value *V, Instruction *I, bool PtrValueMayBeModified = false);
1394
1395/// Given a conditional branch that goes to BB1 and BB2, hoist any common code
1396/// in the two blocks up into the branch block. The caller of this function
1397/// guarantees that BI's block dominates BB1 and BB2. If EqTermsOnly is given,
1398/// only perform hoisting in case both blocks only contain a terminator. In that
1399/// case, only the original BI will be replaced and selects for PHIs are added.
1400bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
1401 const TargetTransformInfo &TTI,
1402 bool EqTermsOnly) {
1403 // This does very trivial matching, with limited scanning, to find identical
1404 // instructions in the two blocks. In particular, we don't want to get into
1405 // O(M*N) situations here where M and N are the sizes of BB1 and BB2. As
1406 // such, we currently just scan for obviously identical instructions in an
1407 // identical order.
1408 BasicBlock *BB1 = BI->getSuccessor(0); // The true destination.
1409 BasicBlock *BB2 = BI->getSuccessor(1); // The false destination
1410
1411 // If either of the blocks has it's address taken, then we can't do this fold,
1412 // because the code we'd hoist would no longer run when we jump into the block
1413 // by it's address.
1414 if (BB1->hasAddressTaken() || BB2->hasAddressTaken())
1415 return false;
1416
1417 BasicBlock::iterator BB1_Itr = BB1->begin();
1418 BasicBlock::iterator BB2_Itr = BB2->begin();
1419
1420 Instruction *I1 = &*BB1_Itr++, *I2 = &*BB2_Itr++;
1421 // Skip debug info if it is not identical.
1422 DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1423 DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1424 if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
1425 while (isa<DbgInfoIntrinsic>(I1))
1426 I1 = &*BB1_Itr++;
1427 while (isa<DbgInfoIntrinsic>(I2))
1428 I2 = &*BB2_Itr++;
1429 }
1430 // FIXME: Can we define a safety predicate for CallBr?
1431 if (isa<PHINode>(I1) || !I1->isIdenticalToWhenDefined(I2) ||
1432 (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2)) ||
1433 isa<CallBrInst>(I1))
1434 return false;
1435
1436 BasicBlock *BIParent = BI->getParent();
1437
1438 bool Changed = false;
1439
1440 auto _ = make_scope_exit([&]() {
1441 if (Changed)
1442 ++NumHoistCommonCode;
1443 });
1444
1445 // Check if only hoisting terminators is allowed. This does not add new
1446 // instructions to the hoist location.
1447 if (EqTermsOnly) {
1448 // Skip any debug intrinsics, as they are free to hoist.
1449 auto *I1NonDbg = &*skipDebugIntrinsics(I1->getIterator());
1450 auto *I2NonDbg = &*skipDebugIntrinsics(I2->getIterator());
1451 if (!I1NonDbg->isIdenticalToWhenDefined(I2NonDbg))
1452 return false;
1453 if (!I1NonDbg->isTerminator())
1454 return false;
1455 // Now we know that we only need to hoist debug instrinsics and the
1456 // terminator. Let the loop below handle those 2 cases.
1457 }
1458
1459 do {
1460 // If we are hoisting the terminator instruction, don't move one (making a
1461 // broken BB), instead clone it, and remove BI.
1462 if (I1->isTerminator())
1463 goto HoistTerminator;
1464
1465 // If we're going to hoist a call, make sure that the two instructions we're
1466 // commoning/hoisting are both marked with musttail, or neither of them is
1467 // marked as such. Otherwise, we might end up in a situation where we hoist
1468 // from a block where the terminator is a `ret` to a block where the terminator
1469 // is a `br`, and `musttail` calls expect to be followed by a return.
1470 auto *C1 = dyn_cast<CallInst>(I1);
1471 auto *C2 = dyn_cast<CallInst>(I2);
1472 if (C1 && C2)
1473 if (C1->isMustTailCall() != C2->isMustTailCall())
1474 return Changed;
1475
1476 if (!TTI.isProfitableToHoist(I1) || !TTI.isProfitableToHoist(I2))
1477 return Changed;
1478
1479 // If any of the two call sites has nomerge attribute, stop hoisting.
1480 if (const auto *CB1 = dyn_cast<CallBase>(I1))
1481 if (CB1->cannotMerge())
1482 return Changed;
1483 if (const auto *CB2 = dyn_cast<CallBase>(I2))
1484 if (CB2->cannotMerge())
1485 return Changed;
1486
1487 if (isa<DbgInfoIntrinsic>(I1) || isa<DbgInfoIntrinsic>(I2)) {
1488 assert (isa<DbgInfoIntrinsic>(I1) && isa<DbgInfoIntrinsic>(I2))((void)0);
1489 // The debug location is an integral part of a debug info intrinsic
1490 // and can't be separated from it or replaced. Instead of attempting
1491 // to merge locations, simply hoist both copies of the intrinsic.
1492 BIParent->getInstList().splice(BI->getIterator(),
1493 BB1->getInstList(), I1);
1494 BIParent->getInstList().splice(BI->getIterator(),
1495 BB2->getInstList(), I2);
1496 Changed = true;
1497 } else {
1498 // For a normal instruction, we just move one to right before the branch,
1499 // then replace all uses of the other with the first. Finally, we remove
1500 // the now redundant second instruction.
1501 BIParent->getInstList().splice(BI->getIterator(),
1502 BB1->getInstList(), I1);
1503 if (!I2->use_empty())
1504 I2->replaceAllUsesWith(I1);
1505 I1->andIRFlags(I2);
1506 unsigned KnownIDs[] = {LLVMContext::MD_tbaa,
1507 LLVMContext::MD_range,
1508 LLVMContext::MD_fpmath,
1509 LLVMContext::MD_invariant_load,
1510 LLVMContext::MD_nonnull,
1511 LLVMContext::MD_invariant_group,
1512 LLVMContext::MD_align,
1513 LLVMContext::MD_dereferenceable,
1514 LLVMContext::MD_dereferenceable_or_null,
1515 LLVMContext::MD_mem_parallel_loop_access,
1516 LLVMContext::MD_access_group,
1517 LLVMContext::MD_preserve_access_index};
1518 combineMetadata(I1, I2, KnownIDs, true);
1519
1520 // I1 and I2 are being combined into a single instruction. Its debug
1521 // location is the merged locations of the original instructions.
1522 I1->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1523
1524 I2->eraseFromParent();
1525 Changed = true;
1526 }
1527 ++NumHoistCommonInstrs;
1528
1529 I1 = &*BB1_Itr++;
1530 I2 = &*BB2_Itr++;
1531 // Skip debug info if it is not identical.
1532 DbgInfoIntrinsic *DBI1 = dyn_cast<DbgInfoIntrinsic>(I1);
1533 DbgInfoIntrinsic *DBI2 = dyn_cast<DbgInfoIntrinsic>(I2);
1534 if (!DBI1 || !DBI2 || !DBI1->isIdenticalToWhenDefined(DBI2)) {
1535 while (isa<DbgInfoIntrinsic>(I1))
1536 I1 = &*BB1_Itr++;
1537 while (isa<DbgInfoIntrinsic>(I2))
1538 I2 = &*BB2_Itr++;
1539 }
1540 } while (I1->isIdenticalToWhenDefined(I2));
1541
1542 return true;
1543
1544HoistTerminator:
1545 // It may not be possible to hoist an invoke.
1546 // FIXME: Can we define a safety predicate for CallBr?
1547 if (isa<InvokeInst>(I1) && !isSafeToHoistInvoke(BB1, BB2, I1, I2))
1548 return Changed;
1549
1550 // TODO: callbr hoisting currently disabled pending further study.
1551 if (isa<CallBrInst>(I1))
1552 return Changed;
1553
1554 for (BasicBlock *Succ : successors(BB1)) {
1555 for (PHINode &PN : Succ->phis()) {
1556 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1557 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1558 if (BB1V == BB2V)
1559 continue;
1560
1561 // Check for passingValueIsAlwaysUndefined here because we would rather
1562 // eliminate undefined control flow then converting it to a select.
1563 if (passingValueIsAlwaysUndefined(BB1V, &PN) ||
1564 passingValueIsAlwaysUndefined(BB2V, &PN))
1565 return Changed;
1566
1567 if (isa<ConstantExpr>(BB1V) && !isSafeToSpeculativelyExecute(BB1V))
1568 return Changed;
1569 if (isa<ConstantExpr>(BB2V) && !isSafeToSpeculativelyExecute(BB2V))
1570 return Changed;
1571 }
1572 }
1573
1574 // Okay, it is safe to hoist the terminator.
1575 Instruction *NT = I1->clone();
1576 BIParent->getInstList().insert(BI->getIterator(), NT);
1577 if (!NT->getType()->isVoidTy()) {
1578 I1->replaceAllUsesWith(NT);
1579 I2->replaceAllUsesWith(NT);
1580 NT->takeName(I1);
1581 }
1582 Changed = true;
1583 ++NumHoistCommonInstrs;
1584
1585 // Ensure terminator gets a debug location, even an unknown one, in case
1586 // it involves inlinable calls.
1587 NT->applyMergedLocation(I1->getDebugLoc(), I2->getDebugLoc());
1588
1589 // PHIs created below will adopt NT's merged DebugLoc.
1590 IRBuilder<NoFolder> Builder(NT);
1591
1592 // Hoisting one of the terminators from our successor is a great thing.
1593 // Unfortunately, the successors of the if/else blocks may have PHI nodes in
1594 // them. If they do, all PHI entries for BB1/BB2 must agree for all PHI
1595 // nodes, so we insert select instruction to compute the final result.
1596 std::map<std::pair<Value *, Value *>, SelectInst *> InsertedSelects;
1597 for (BasicBlock *Succ : successors(BB1)) {
1598 for (PHINode &PN : Succ->phis()) {
1599 Value *BB1V = PN.getIncomingValueForBlock(BB1);
1600 Value *BB2V = PN.getIncomingValueForBlock(BB2);
1601 if (BB1V == BB2V)
1602 continue;
1603
1604 // These values do not agree. Insert a select instruction before NT
1605 // that determines the right value.
1606 SelectInst *&SI = InsertedSelects[std::make_pair(BB1V, BB2V)];
1607 if (!SI) {
1608 // Propagate fast-math-flags from phi node to its replacement select.
1609 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
1610 if (isa<FPMathOperator>(PN))
1611 Builder.setFastMathFlags(PN.getFastMathFlags());
1612
1613 SI = cast<SelectInst>(
1614 Builder.CreateSelect(BI->getCondition(), BB1V, BB2V,
1615 BB1V->getName() + "." + BB2V->getName(), BI));
1616 }
1617
1618 // Make the PHI node use the select for all incoming values for BB1/BB2
1619 for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
1620 if (PN.getIncomingBlock(i) == BB1 || PN.getIncomingBlock(i) == BB2)
1621 PN.setIncomingValue(i, SI);
1622 }
1623 }
1624
1625 SmallVector<DominatorTree::UpdateType, 4> Updates;
1626
1627 // Update any PHI nodes in our new successors.
1628 for (BasicBlock *Succ : successors(BB1)) {
1629 AddPredecessorToBlock(Succ, BIParent, BB1);
1630 if (DTU)
1631 Updates.push_back({DominatorTree::Insert, BIParent, Succ});
1632 }
1633
1634 if (DTU)
1635 for (BasicBlock *Succ : successors(BI))
1636 Updates.push_back({DominatorTree::Delete, BIParent, Succ});
1637
1638 EraseTerminatorAndDCECond(BI);
1639 if (DTU)
1640 DTU->applyUpdates(Updates);
1641 return Changed;
1642}
1643
1644// Check lifetime markers.
1645static bool isLifeTimeMarker(const Instruction *I) {
1646 if (auto II = dyn_cast<IntrinsicInst>(I)) {
1647 switch (II->getIntrinsicID()) {
1648 default:
1649 break;
1650 case Intrinsic::lifetime_start:
1651 case Intrinsic::lifetime_end:
1652 return true;
1653 }
1654 }
1655 return false;
1656}
1657
1658// TODO: Refine this. This should avoid cases like turning constant memcpy sizes
1659// into variables.
1660static bool replacingOperandWithVariableIsCheap(const Instruction *I,
1661 int OpIdx) {
1662 return !isa<IntrinsicInst>(I);
1663}
1664
1665// All instructions in Insts belong to different blocks that all unconditionally
1666// branch to a common successor. Analyze each instruction and return true if it
1667// would be possible to sink them into their successor, creating one common
1668// instruction instead. For every value that would be required to be provided by
1669// PHI node (because an operand varies in each input block), add to PHIOperands.
1670static bool canSinkInstructions(
1671 ArrayRef<Instruction *> Insts,
1672 DenseMap<Instruction *, SmallVector<Value *, 4>> &PHIOperands) {
1673 // Prune out obviously bad instructions to move. Each instruction must have
1674 // exactly zero or one use, and we check later that use is by a single, common
1675 // PHI instruction in the successor.
1676 bool HasUse = !Insts.front()->user_empty();
1677 for (auto *I : Insts) {
1678 // These instructions may change or break semantics if moved.
1679 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
1680 I->getType()->isTokenTy())
1681 return false;
1682
1683 // Do not try to sink an instruction in an infinite loop - it can cause
1684 // this algorithm to infinite loop.
1685 if (I->getParent()->getSingleSuccessor() == I->getParent())
1686 return false;
1687
1688 // Conservatively return false if I is an inline-asm instruction. Sinking
1689 // and merging inline-asm instructions can potentially create arguments
1690 // that cannot satisfy the inline-asm constraints.
1691 // If the instruction has nomerge attribute, return false.
1692 if (const auto *C = dyn_cast<CallBase>(I))
1693 if (C->isInlineAsm() || C->cannotMerge())
1694 return false;
1695
1696 // Each instruction must have zero or one use.
1697 if (HasUse && !I->hasOneUse())
1698 return false;
1699 if (!HasUse && !I->user_empty())
1700 return false;
1701 }
1702
1703 const Instruction *I0 = Insts.front();
1704 for (auto *I : Insts)
1705 if (!I->isSameOperationAs(I0))
1706 return false;
1707
1708 // All instructions in Insts are known to be the same opcode. If they have a
1709 // use, check that the only user is a PHI or in the same block as the
1710 // instruction, because if a user is in the same block as an instruction we're
1711 // contemplating sinking, it must already be determined to be sinkable.
1712 if (HasUse) {
1713 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1714 auto *Succ = I0->getParent()->getTerminator()->getSuccessor(0);
1715 if (!all_of(Insts, [&PNUse,&Succ](const Instruction *I) -> bool {
1716 auto *U = cast<Instruction>(*I->user_begin());
1717 return (PNUse &&
1718 PNUse->getParent() == Succ &&
1719 PNUse->getIncomingValueForBlock(I->getParent()) == I) ||
1720 U->getParent() == I->getParent();
1721 }))
1722 return false;
1723 }
1724
1725 // Because SROA can't handle speculating stores of selects, try not to sink
1726 // loads, stores or lifetime markers of allocas when we'd have to create a
1727 // PHI for the address operand. Also, because it is likely that loads or
1728 // stores of allocas will disappear when Mem2Reg/SROA is run, don't sink
1729 // them.
1730 // This can cause code churn which can have unintended consequences down
1731 // the line - see https://llvm.org/bugs/show_bug.cgi?id=30244.
1732 // FIXME: This is a workaround for a deficiency in SROA - see
1733 // https://llvm.org/bugs/show_bug.cgi?id=30188
1734 if (isa<StoreInst>(I0) && any_of(Insts, [](const Instruction *I) {
1735 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1736 }))
1737 return false;
1738 if (isa<LoadInst>(I0) && any_of(Insts, [](const Instruction *I) {
1739 return isa<AllocaInst>(I->getOperand(0)->stripPointerCasts());
1740 }))
1741 return false;
1742 if (isLifeTimeMarker(I0) && any_of(Insts, [](const Instruction *I) {
1743 return isa<AllocaInst>(I->getOperand(1)->stripPointerCasts());
1744 }))
1745 return false;
1746
1747 // For calls to be sinkable, they must all be indirect, or have same callee.
1748 // I.e. if we have two direct calls to different callees, we don't want to
1749 // turn that into an indirect call. Likewise, if we have an indirect call,
1750 // and a direct call, we don't actually want to have a single indirect call.
1751 if (isa<CallBase>(I0)) {
1752 auto IsIndirectCall = [](const Instruction *I) {
1753 return cast<CallBase>(I)->isIndirectCall();
1754 };
1755 bool HaveIndirectCalls = any_of(Insts, IsIndirectCall);
1756 bool AllCallsAreIndirect = all_of(Insts, IsIndirectCall);
1757 if (HaveIndirectCalls) {
1758 if (!AllCallsAreIndirect)
1759 return false;
1760 } else {
1761 // All callees must be identical.
1762 Value *Callee = nullptr;
1763 for (const Instruction *I : Insts) {
1764 Value *CurrCallee = cast<CallBase>(I)->getCalledOperand();
1765 if (!Callee)
1766 Callee = CurrCallee;
1767 else if (Callee != CurrCallee)
1768 return false;
1769 }
1770 }
1771 }
1772
1773 for (unsigned OI = 0, OE = I0->getNumOperands(); OI != OE; ++OI) {
1774 Value *Op = I0->getOperand(OI);
1775 if (Op->getType()->isTokenTy())
1776 // Don't touch any operand of token type.
1777 return false;
1778
1779 auto SameAsI0 = [&I0, OI](const Instruction *I) {
1780 assert(I->getNumOperands() == I0->getNumOperands())((void)0);
1781 return I->getOperand(OI) == I0->getOperand(OI);
1782 };
1783 if (!all_of(Insts, SameAsI0)) {
1784 if ((isa<Constant>(Op) && !replacingOperandWithVariableIsCheap(I0, OI)) ||
1785 !canReplaceOperandWithVariable(I0, OI))
1786 // We can't create a PHI from this GEP.
1787 return false;
1788 for (auto *I : Insts)
1789 PHIOperands[I].push_back(I->getOperand(OI));
1790 }
1791 }
1792 return true;
1793}
1794
1795// Assuming canSinkInstructions(Blocks) has returned true, sink the last
1796// instruction of every block in Blocks to their common successor, commoning
1797// into one instruction.
1798static bool sinkLastInstruction(ArrayRef<BasicBlock*> Blocks) {
1799 auto *BBEnd = Blocks[0]->getTerminator()->getSuccessor(0);
1800
1801 // canSinkInstructions returning true guarantees that every block has at
1802 // least one non-terminator instruction.
1803 SmallVector<Instruction*,4> Insts;
1804 for (auto *BB : Blocks) {
1805 Instruction *I = BB->getTerminator();
1806 do {
1807 I = I->getPrevNode();
1808 } while (isa<DbgInfoIntrinsic>(I) && I != &BB->front());
1809 if (!isa<DbgInfoIntrinsic>(I))
1810 Insts.push_back(I);
1811 }
1812
1813 // The only checking we need to do now is that all users of all instructions
1814 // are the same PHI node. canSinkInstructions should have checked this but
1815 // it is slightly over-aggressive - it gets confused by commutative
1816 // instructions so double-check it here.
1817 Instruction *I0 = Insts.front();
1818 if (!I0->user_empty()) {
1819 auto *PNUse = dyn_cast<PHINode>(*I0->user_begin());
1820 if (!all_of(Insts, [&PNUse](const Instruction *I) -> bool {
1821 auto *U = cast<Instruction>(*I->user_begin());
1822 return U == PNUse;
1823 }))
1824 return false;
1825 }
1826
1827 // We don't need to do any more checking here; canSinkInstructions should
1828 // have done it all for us.
1829 SmallVector<Value*, 4> NewOperands;
1830 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
1831 // This check is different to that in canSinkInstructions. There, we
1832 // cared about the global view once simplifycfg (and instcombine) have
1833 // completed - it takes into account PHIs that become trivially
1834 // simplifiable. However here we need a more local view; if an operand
1835 // differs we create a PHI and rely on instcombine to clean up the very
1836 // small mess we may make.
1837 bool NeedPHI = any_of(Insts, [&I0, O](const Instruction *I) {
1838 return I->getOperand(O) != I0->getOperand(O);
1839 });
1840 if (!NeedPHI) {
1841 NewOperands.push_back(I0->getOperand(O));
1842 continue;
1843 }
1844
1845 // Create a new PHI in the successor block and populate it.
1846 auto *Op = I0->getOperand(O);
1847 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!")((void)0);
1848 auto *PN = PHINode::Create(Op->getType(), Insts.size(),
1849 Op->getName() + ".sink", &BBEnd->front());
1850 for (auto *I : Insts)
1851 PN->addIncoming(I->getOperand(O), I->getParent());
1852 NewOperands.push_back(PN);
1853 }
1854
1855 // Arbitrarily use I0 as the new "common" instruction; remap its operands
1856 // and move it to the start of the successor block.
1857 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
1858 I0->getOperandUse(O).set(NewOperands[O]);
1859 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
1860
1861 // Update metadata and IR flags, and merge debug locations.
1862 for (auto *I : Insts)
1863 if (I != I0) {
1864 // The debug location for the "common" instruction is the merged locations
1865 // of all the commoned instructions. We start with the original location
1866 // of the "common" instruction and iteratively merge each location in the
1867 // loop below.
1868 // This is an N-way merge, which will be inefficient if I0 is a CallInst.
1869 // However, as N-way merge for CallInst is rare, so we use simplified API
1870 // instead of using complex API for N-way merge.
1871 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
1872 combineMetadataForCSE(I0, I, true);
1873 I0->andIRFlags(I);
1874 }
1875
1876 if (!I0->user_empty()) {
1877 // canSinkLastInstruction checked that all instructions were used by
1878 // one and only one PHI node. Find that now, RAUW it to our common
1879 // instruction and nuke it.
1880 auto *PN = cast<PHINode>(*I0->user_begin());
1881 PN->replaceAllUsesWith(I0);
1882 PN->eraseFromParent();
1883 }
1884
1885 // Finally nuke all instructions apart from the common instruction.
1886 for (auto *I : Insts)
1887 if (I != I0)
1888 I->eraseFromParent();
1889
1890 return true;
1891}
1892
1893namespace {
1894
1895 // LockstepReverseIterator - Iterates through instructions
1896 // in a set of blocks in reverse order from the first non-terminator.
1897 // For example (assume all blocks have size n):
1898 // LockstepReverseIterator I([B1, B2, B3]);
1899 // *I-- = [B1[n], B2[n], B3[n]];
1900 // *I-- = [B1[n-1], B2[n-1], B3[n-1]];
1901 // *I-- = [B1[n-2], B2[n-2], B3[n-2]];
1902 // ...
1903 class LockstepReverseIterator {
1904 ArrayRef<BasicBlock*> Blocks;
1905 SmallVector<Instruction*,4> Insts;
1906 bool Fail;
1907
1908 public:
1909 LockstepReverseIterator(ArrayRef<BasicBlock*> Blocks) : Blocks(Blocks) {
1910 reset();
1911 }
1912
1913 void reset() {
1914 Fail = false;
1915 Insts.clear();
1916 for (auto *BB : Blocks) {
1917 Instruction *Inst = BB->getTerminator();
1918 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1919 Inst = Inst->getPrevNode();
1920 if (!Inst) {
1921 // Block wasn't big enough.
1922 Fail = true;
1923 return;
1924 }
1925 Insts.push_back(Inst);
1926 }
1927 }
1928
1929 bool isValid() const {
1930 return !Fail;
1931 }
1932
1933 void operator--() {
1934 if (Fail)
1935 return;
1936 for (auto *&Inst : Insts) {
1937 for (Inst = Inst->getPrevNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1938 Inst = Inst->getPrevNode();
1939 // Already at beginning of block.
1940 if (!Inst) {
1941 Fail = true;
1942 return;
1943 }
1944 }
1945 }
1946
1947 void operator++() {
1948 if (Fail)
1949 return;
1950 for (auto *&Inst : Insts) {
1951 for (Inst = Inst->getNextNode(); Inst && isa<DbgInfoIntrinsic>(Inst);)
1952 Inst = Inst->getNextNode();
1953 // Already at end of block.
1954 if (!Inst) {
1955 Fail = true;
1956 return;
1957 }
1958 }
1959 }
1960
1961 ArrayRef<Instruction*> operator * () const {
1962 return Insts;
1963 }
1964 };
1965
1966} // end anonymous namespace
1967
1968/// Check whether BB's predecessors end with unconditional branches. If it is
1969/// true, sink any common code from the predecessors to BB.
1970static bool SinkCommonCodeFromPredecessors(BasicBlock *BB,
1971 DomTreeUpdater *DTU) {
1972 // We support two situations:
1973 // (1) all incoming arcs are unconditional
1974 // (2) there are non-unconditional incoming arcs
1975 //
1976 // (2) is very common in switch defaults and
1977 // else-if patterns;
1978 //
1979 // if (a) f(1);
1980 // else if (b) f(2);
1981 //
1982 // produces:
1983 //
1984 // [if]
1985 // / \
1986 // [f(1)] [if]
1987 // | | \
1988 // | | |
1989 // | [f(2)]|
1990 // \ | /
1991 // [ end ]
1992 //
1993 // [end] has two unconditional predecessor arcs and one conditional. The
1994 // conditional refers to the implicit empty 'else' arc. This conditional
1995 // arc can also be caused by an empty default block in a switch.
1996 //
1997 // In this case, we attempt to sink code from all *unconditional* arcs.
1998 // If we can sink instructions from these arcs (determined during the scan
1999 // phase below) we insert a common successor for all unconditional arcs and
2000 // connect that to [end], to enable sinking:
2001 //
2002 // [if]
2003 // / \
2004 // [x(1)] [if]
2005 // | | \
2006 // | | \
2007 // | [x(2)] |
2008 // \ / |
2009 // [sink.split] |
2010 // \ /
2011 // [ end ]
2012 //
2013 SmallVector<BasicBlock*,4> UnconditionalPreds;
2014 bool HaveNonUnconditionalPredecessors = false;
2015 for (auto *PredBB : predecessors(BB)) {
2016 auto *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator());
2017 if (PredBr && PredBr->isUnconditional())
2018 UnconditionalPreds.push_back(PredBB);
2019 else
2020 HaveNonUnconditionalPredecessors = true;
2021 }
2022 if (UnconditionalPreds.size() < 2)
2023 return false;
2024
2025 // We take a two-step approach to tail sinking. First we scan from the end of
2026 // each block upwards in lockstep. If the n'th instruction from the end of each
2027 // block can be sunk, those instructions are added to ValuesToSink and we
2028 // carry on. If we can sink an instruction but need to PHI-merge some operands
2029 // (because they're not identical in each instruction) we add these to
2030 // PHIOperands.
2031 int ScanIdx = 0;
2032 SmallPtrSet<Value*,4> InstructionsToSink;
2033 DenseMap<Instruction*, SmallVector<Value*,4>> PHIOperands;
2034 LockstepReverseIterator LRI(UnconditionalPreds);
2035 while (LRI.isValid() &&
2036 canSinkInstructions(*LRI, PHIOperands)) {
2037 LLVM_DEBUG(dbgs() << "SINK: instruction can be sunk: " << *(*LRI)[0]do { } while (false)
2038 << "\n")do { } while (false);
2039 InstructionsToSink.insert((*LRI).begin(), (*LRI).end());
2040 ++ScanIdx;
2041 --LRI;
2042 }
2043
2044 // If no instructions can be sunk, early-return.
2045 if (ScanIdx == 0)
2046 return false;
2047
2048 // Okay, we *could* sink last ScanIdx instructions. But how many can we
2049 // actually sink before encountering instruction that is unprofitable to sink?
2050 auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
2051 unsigned NumPHIdValues = 0;
2052 for (auto *I : *LRI)
2053 for (auto *V : PHIOperands[I]) {
2054 if (InstructionsToSink.count(V) == 0)
2055 ++NumPHIdValues;
2056 // FIXME: this check is overly optimistic. We may end up not sinking
2057 // said instruction, due to the very same profitability check.
2058 // See @creating_too_many_phis in sink-common-code.ll.
2059 }
2060 LLVM_DEBUG(dbgs() << "SINK: #phid values: " << NumPHIdValues << "\n")do { } while (false);
2061 unsigned NumPHIInsts = NumPHIdValues / UnconditionalPreds.size();
2062 if ((NumPHIdValues % UnconditionalPreds.size()) != 0)
2063 NumPHIInsts++;
2064
2065 return NumPHIInsts <= 1;
2066 };
2067
2068 // We've determined that we are going to sink last ScanIdx instructions,
2069 // and recorded them in InstructionsToSink. Now, some instructions may be
2070 // unprofitable to sink. But that determination depends on the instructions
2071 // that we are going to sink.
2072
2073 // First, forward scan: find the first instruction unprofitable to sink,
2074 // recording all the ones that are profitable to sink.
2075 // FIXME: would it be better, after we detect that not all are profitable.
2076 // to either record the profitable ones, or erase the unprofitable ones?
2077 // Maybe we need to choose (at runtime) the one that will touch least instrs?
2078 LRI.reset();
2079 int Idx = 0;
2080 SmallPtrSet<Value *, 4> InstructionsProfitableToSink;
2081 while (Idx < ScanIdx) {
2082 if (!ProfitableToSinkInstruction(LRI)) {
2083 // Too many PHIs would be created.
2084 LLVM_DEBUG(do { } while (false)
2085 dbgs() << "SINK: stopping here, too many PHIs would be created!\n")do { } while (false);
2086 break;
2087 }
2088 InstructionsProfitableToSink.insert((*LRI).begin(), (*LRI).end());
2089 --LRI;
2090 ++Idx;
2091 }
2092
2093 // If no instructions can be sunk, early-return.
2094 if (Idx == 0)
2095 return false;
2096
2097 // Did we determine that (only) some instructions are unprofitable to sink?
2098 if (Idx < ScanIdx) {
2099 // Okay, some instructions are unprofitable.
2100 ScanIdx = Idx;
2101 InstructionsToSink = InstructionsProfitableToSink;
2102
2103 // But, that may make other instructions unprofitable, too.
2104 // So, do a backward scan, do any earlier instructions become unprofitable?
2105 assert(!ProfitableToSinkInstruction(LRI) &&((void)0)
2106 "We already know that the last instruction is unprofitable to sink")((void)0);
2107 ++LRI;
2108 --Idx;
2109 while (Idx >= 0) {
2110 // If we detect that an instruction becomes unprofitable to sink,
2111 // all earlier instructions won't be sunk either,
2112 // so preemptively keep InstructionsProfitableToSink in sync.
2113 // FIXME: is this the most performant approach?
2114 for (auto *I : *LRI)
2115 InstructionsProfitableToSink.erase(I);
2116 if (!ProfitableToSinkInstruction(LRI)) {
2117 // Everything starting with this instruction won't be sunk.
2118 ScanIdx = Idx;
2119 InstructionsToSink = InstructionsProfitableToSink;
2120 }
2121 ++LRI;
2122 --Idx;
2123 }
2124 }
2125
2126 // If no instructions can be sunk, early-return.
2127 if (ScanIdx == 0)
2128 return false;
2129
2130 bool Changed = false;
2131
2132 if (HaveNonUnconditionalPredecessors) {
2133 // It is always legal to sink common instructions from unconditional
2134 // predecessors. However, if not all predecessors are unconditional,
2135 // this transformation might be pessimizing. So as a rule of thumb,
2136 // don't do it unless we'd sink at least one non-speculatable instruction.
2137 // See https://bugs.llvm.org/show_bug.cgi?id=30244
2138 LRI.reset();
2139 int Idx = 0;
2140 bool Profitable = false;
2141 while (Idx < ScanIdx) {
2142 if (!isSafeToSpeculativelyExecute((*LRI)[0])) {
2143 Profitable = true;
2144 break;
2145 }
2146 --LRI;
2147 ++Idx;
2148 }
2149 if (!Profitable)
2150 return false;
2151
2152 LLVM_DEBUG(dbgs() << "SINK: Splitting edge\n")do { } while (false);
2153 // We have a conditional edge and we're going to sink some instructions.
2154 // Insert a new block postdominating all blocks we're going to sink from.
2155 if (!SplitBlockPredecessors(BB, UnconditionalPreds, ".sink.split", DTU))
2156 // Edges couldn't be split.
2157 return false;
2158 Changed = true;
2159 }
2160
2161 // Now that we've analyzed all potential sinking candidates, perform the
2162 // actual sink. We iteratively sink the last non-terminator of the source
2163 // blocks into their common successor unless doing so would require too
2164 // many PHI instructions to be generated (currently only one PHI is allowed
2165 // per sunk instruction).
2166 //
2167 // We can use InstructionsToSink to discount values needing PHI-merging that will
2168 // actually be sunk in a later iteration. This allows us to be more
2169 // aggressive in what we sink. This does allow a false positive where we
2170 // sink presuming a later value will also be sunk, but stop half way through
2171 // and never actually sink it which means we produce more PHIs than intended.
2172 // This is unlikely in practice though.
2173 int SinkIdx = 0;
2174 for (; SinkIdx != ScanIdx; ++SinkIdx) {
2175 LLVM_DEBUG(dbgs() << "SINK: Sink: "do { } while (false)
2176 << *UnconditionalPreds[0]->getTerminator()->getPrevNode()do { } while (false)
2177 << "\n")do { } while (false);
2178
2179 // Because we've sunk every instruction in turn, the current instruction to
2180 // sink is always at index 0.
2181 LRI.reset();
2182
2183 if (!sinkLastInstruction(UnconditionalPreds)) {
2184 LLVM_DEBUG(do { } while (false)
2185 dbgs()do { } while (false)
2186 << "SINK: stopping here, failed to actually sink instruction!\n")do { } while (false);
2187 break;
2188 }
2189
2190 NumSinkCommonInstrs++;
2191 Changed = true;
2192 }
2193 if (SinkIdx != 0)
2194 ++NumSinkCommonCode;
2195 return Changed;
2196}
2197
2198/// Determine if we can hoist sink a sole store instruction out of a
2199/// conditional block.
2200///
2201/// We are looking for code like the following:
2202/// BrBB:
2203/// store i32 %add, i32* %arrayidx2
2204/// ... // No other stores or function calls (we could be calling a memory
2205/// ... // function).
2206/// %cmp = icmp ult %x, %y
2207/// br i1 %cmp, label %EndBB, label %ThenBB
2208/// ThenBB:
2209/// store i32 %add5, i32* %arrayidx2
2210/// br label EndBB
2211/// EndBB:
2212/// ...
2213/// We are going to transform this into:
2214/// BrBB:
2215/// store i32 %add, i32* %arrayidx2
2216/// ... //
2217/// %cmp = icmp ult %x, %y
2218/// %add.add5 = select i1 %cmp, i32 %add, %add5
2219/// store i32 %add.add5, i32* %arrayidx2
2220/// ...
2221///
2222/// \return The pointer to the value of the previous store if the store can be
2223/// hoisted into the predecessor block. 0 otherwise.
2224static Value *isSafeToSpeculateStore(Instruction *I, BasicBlock *BrBB,
2225 BasicBlock *StoreBB, BasicBlock *EndBB) {
2226 StoreInst *StoreToHoist = dyn_cast<StoreInst>(I);
2227 if (!StoreToHoist)
2228 return nullptr;
2229
2230 // Volatile or atomic.
2231 if (!StoreToHoist->isSimple())
2232 return nullptr;
2233
2234 Value *StorePtr = StoreToHoist->getPointerOperand();
2235 Type *StoreTy = StoreToHoist->getValueOperand()->getType();
2236
2237 // Look for a store to the same pointer in BrBB.
2238 unsigned MaxNumInstToLookAt = 9;
2239 // Skip pseudo probe intrinsic calls which are not really killing any memory
2240 // accesses.
2241 for (Instruction &CurI : reverse(BrBB->instructionsWithoutDebug(true))) {
2242 if (!MaxNumInstToLookAt)
2243 break;
2244 --MaxNumInstToLookAt;
2245
2246 // Could be calling an instruction that affects memory like free().
2247 if (CurI.mayWriteToMemory() && !isa<StoreInst>(CurI))
2248 return nullptr;
2249
2250 if (auto *SI = dyn_cast<StoreInst>(&CurI)) {
2251 // Found the previous store to same location and type. Make sure it is
2252 // simple, to avoid introducing a spurious non-atomic write after an
2253 // atomic write.
2254 if (SI->getPointerOperand() == StorePtr &&
2255 SI->getValueOperand()->getType() == StoreTy && SI->isSimple())
2256 // Found the previous store, return its value operand.
2257 return SI->getValueOperand();
2258 return nullptr; // Unknown store.
2259 }
2260 }
2261
2262 return nullptr;
2263}
2264
2265/// Estimate the cost of the insertion(s) and check that the PHI nodes can be
2266/// converted to selects.
2267static bool validateAndCostRequiredSelects(BasicBlock *BB, BasicBlock *ThenBB,
2268 BasicBlock *EndBB,
2269 unsigned &SpeculatedInstructions,
2270 InstructionCost &Cost,
2271 const TargetTransformInfo &TTI) {
2272 TargetTransformInfo::TargetCostKind CostKind =
2273 BB->getParent()->hasMinSize()
2274 ? TargetTransformInfo::TCK_CodeSize
2275 : TargetTransformInfo::TCK_SizeAndLatency;
2276
2277 bool HaveRewritablePHIs = false;
2278 for (PHINode &PN : EndBB->phis()) {
2279 Value *OrigV = PN.getIncomingValueForBlock(BB);
2280 Value *ThenV = PN.getIncomingValueForBlock(ThenBB);
2281
2282 // FIXME: Try to remove some of the duplication with HoistThenElseCodeToIf.
2283 // Skip PHIs which are trivial.
2284 if (ThenV == OrigV)
2285 continue;
2286
2287 Cost += TTI.getCmpSelInstrCost(Instruction::Select, PN.getType(), nullptr,
2288 CmpInst::BAD_ICMP_PREDICATE, CostKind);
2289
2290 // Don't convert to selects if we could remove undefined behavior instead.
2291 if (passingValueIsAlwaysUndefined(OrigV, &PN) ||
2292 passingValueIsAlwaysUndefined(ThenV, &PN))
2293 return false;
2294
2295 HaveRewritablePHIs = true;
2296 ConstantExpr *OrigCE = dyn_cast<ConstantExpr>(OrigV);
2297 ConstantExpr *ThenCE = dyn_cast<ConstantExpr>(ThenV);
2298 if (!OrigCE && !ThenCE)
2299 continue; // Known safe and cheap.
2300
2301 if ((ThenCE && !isSafeToSpeculativelyExecute(ThenCE)) ||
2302 (OrigCE && !isSafeToSpeculativelyExecute(OrigCE)))
2303 return false;
2304 InstructionCost OrigCost = OrigCE ? computeSpeculationCost(OrigCE, TTI) : 0;
2305 InstructionCost ThenCost = ThenCE ? computeSpeculationCost(ThenCE, TTI) : 0;
2306 InstructionCost MaxCost =
2307 2 * PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2308 if (OrigCost + ThenCost > MaxCost)
2309 return false;
2310
2311 // Account for the cost of an unfolded ConstantExpr which could end up
2312 // getting expanded into Instructions.
2313 // FIXME: This doesn't account for how many operations are combined in the
2314 // constant expression.
2315 ++SpeculatedInstructions;
2316 if (SpeculatedInstructions > 1)
2317 return false;
2318 }
2319
2320 return HaveRewritablePHIs;
2321}
2322
2323/// Speculate a conditional basic block flattening the CFG.
2324///
2325/// Note that this is a very risky transform currently. Speculating
2326/// instructions like this is most often not desirable. Instead, there is an MI
2327/// pass which can do it with full awareness of the resource constraints.
2328/// However, some cases are "obvious" and we should do directly. An example of
2329/// this is speculating a single, reasonably cheap instruction.
2330///
2331/// There is only one distinct advantage to flattening the CFG at the IR level:
2332/// it makes very common but simplistic optimizations such as are common in
2333/// instcombine and the DAG combiner more powerful by removing CFG edges and
2334/// modeling their effects with easier to reason about SSA value graphs.
2335///
2336///
2337/// An illustration of this transform is turning this IR:
2338/// \code
2339/// BB:
2340/// %cmp = icmp ult %x, %y
2341/// br i1 %cmp, label %EndBB, label %ThenBB
2342/// ThenBB:
2343/// %sub = sub %x, %y
2344/// br label BB2
2345/// EndBB:
2346/// %phi = phi [ %sub, %ThenBB ], [ 0, %EndBB ]
2347/// ...
2348/// \endcode
2349///
2350/// Into this IR:
2351/// \code
2352/// BB:
2353/// %cmp = icmp ult %x, %y
2354/// %sub = sub %x, %y
2355/// %cond = select i1 %cmp, 0, %sub
2356/// ...
2357/// \endcode
2358///
2359/// \returns true if the conditional block is removed.
2360bool SimplifyCFGOpt::SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *ThenBB,
2361 const TargetTransformInfo &TTI) {
2362 // Be conservative for now. FP select instruction can often be expensive.
2363 Value *BrCond = BI->getCondition();
2364 if (isa<FCmpInst>(BrCond))
2365 return false;
2366
2367 BasicBlock *BB = BI->getParent();
2368 BasicBlock *EndBB = ThenBB->getTerminator()->getSuccessor(0);
2369 InstructionCost Budget =
2370 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2371
2372 // If ThenBB is actually on the false edge of the conditional branch, remember
2373 // to swap the select operands later.
2374 bool Invert = false;
2375 if (ThenBB != BI->getSuccessor(0)) {
2376 assert(ThenBB == BI->getSuccessor(1) && "No edge from 'if' block?")((void)0);
2377 Invert = true;
2378 }
2379 assert(EndBB == BI->getSuccessor(!Invert) && "No edge from to end block")((void)0);
2380
2381 // If the branch is non-unpredictable, and is predicted to *not* branch to
2382 // the `then` block, then avoid speculating it.
2383 if (!BI->getMetadata(LLVMContext::MD_unpredictable)) {
2384 uint64_t TWeight, FWeight;
2385 if (BI->extractProfMetadata(TWeight, FWeight) && (TWeight + FWeight) != 0) {
2386 uint64_t EndWeight = Invert ? TWeight : FWeight;
2387 BranchProbability BIEndProb =
2388 BranchProbability::getBranchProbability(EndWeight, TWeight + FWeight);
2389 BranchProbability Likely = TTI.getPredictableBranchThreshold();
2390 if (BIEndProb >= Likely)
2391 return false;
2392 }
2393 }
2394
2395 // Keep a count of how many times instructions are used within ThenBB when
2396 // they are candidates for sinking into ThenBB. Specifically:
2397 // - They are defined in BB, and
2398 // - They have no side effects, and
2399 // - All of their uses are in ThenBB.
2400 SmallDenseMap<Instruction *, unsigned, 4> SinkCandidateUseCounts;
2401
2402 SmallVector<Instruction *, 4> SpeculatedDbgIntrinsics;
2403
2404 unsigned SpeculatedInstructions = 0;
2405 Value *SpeculatedStoreValue = nullptr;
2406 StoreInst *SpeculatedStore = nullptr;
2407 for (BasicBlock::iterator BBI = ThenBB->begin(),
2408 BBE = std::prev(ThenBB->end());
2409 BBI != BBE; ++BBI) {
2410 Instruction *I = &*BBI;
2411 // Skip debug info.
2412 if (isa<DbgInfoIntrinsic>(I)) {
2413 SpeculatedDbgIntrinsics.push_back(I);
2414 continue;
2415 }
2416
2417 // Skip pseudo probes. The consequence is we lose track of the branch
2418 // probability for ThenBB, which is fine since the optimization here takes
2419 // place regardless of the branch probability.
2420 if (isa<PseudoProbeInst>(I)) {
2421 // The probe should be deleted so that it will not be over-counted when
2422 // the samples collected on the non-conditional path are counted towards
2423 // the conditional path. We leave it for the counts inference algorithm to
2424 // figure out a proper count for an unknown probe.
2425 SpeculatedDbgIntrinsics.push_back(I);
2426 continue;
2427 }
2428
2429 // Only speculatively execute a single instruction (not counting the
2430 // terminator) for now.
2431 ++SpeculatedInstructions;
2432 if (SpeculatedInstructions > 1)
2433 return false;
2434
2435 // Don't hoist the instruction if it's unsafe or expensive.
2436 if (!isSafeToSpeculativelyExecute(I) &&
2437 !(HoistCondStores && (SpeculatedStoreValue = isSafeToSpeculateStore(
2438 I, BB, ThenBB, EndBB))))
2439 return false;
2440 if (!SpeculatedStoreValue &&
2441 computeSpeculationCost(I, TTI) >
2442 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic)
2443 return false;
2444
2445 // Store the store speculation candidate.
2446 if (SpeculatedStoreValue)
2447 SpeculatedStore = cast<StoreInst>(I);
2448
2449 // Do not hoist the instruction if any of its operands are defined but not
2450 // used in BB. The transformation will prevent the operand from
2451 // being sunk into the use block.
2452 for (Use &Op : I->operands()) {
2453 Instruction *OpI = dyn_cast<Instruction>(Op);
2454 if (!OpI || OpI->getParent() != BB || OpI->mayHaveSideEffects())
2455 continue; // Not a candidate for sinking.
2456
2457 ++SinkCandidateUseCounts[OpI];
2458 }
2459 }
2460
2461 // Consider any sink candidates which are only used in ThenBB as costs for
2462 // speculation. Note, while we iterate over a DenseMap here, we are summing
2463 // and so iteration order isn't significant.
2464 for (SmallDenseMap<Instruction *, unsigned, 4>::iterator
2465 I = SinkCandidateUseCounts.begin(),
2466 E = SinkCandidateUseCounts.end();
2467 I != E; ++I)
2468 if (I->first->hasNUses(I->second)) {
2469 ++SpeculatedInstructions;
2470 if (SpeculatedInstructions > 1)
2471 return false;
2472 }
2473
2474 // Check that we can insert the selects and that it's not too expensive to do
2475 // so.
2476 bool Convert = SpeculatedStore != nullptr;
2477 InstructionCost Cost = 0;
2478 Convert |= validateAndCostRequiredSelects(BB, ThenBB, EndBB,
2479 SpeculatedInstructions,
2480 Cost, TTI);
2481 if (!Convert || Cost > Budget)
2482 return false;
2483
2484 // If we get here, we can hoist the instruction and if-convert.
2485 LLVM_DEBUG(dbgs() << "SPECULATIVELY EXECUTING BB" << *ThenBB << "\n";)do { } while (false);
2486
2487 // Insert a select of the value of the speculated store.
2488 if (SpeculatedStoreValue) {
2489 IRBuilder<NoFolder> Builder(BI);
2490 Value *TrueV = SpeculatedStore->getValueOperand();
2491 Value *FalseV = SpeculatedStoreValue;
2492 if (Invert)
2493 std::swap(TrueV, FalseV);
2494 Value *S = Builder.CreateSelect(
2495 BrCond, TrueV, FalseV, "spec.store.select", BI);
2496 SpeculatedStore->setOperand(0, S);
2497 SpeculatedStore->applyMergedLocation(BI->getDebugLoc(),
2498 SpeculatedStore->getDebugLoc());
2499 }
2500
2501 // Metadata can be dependent on the condition we are hoisting above.
2502 // Conservatively strip all metadata on the instruction. Drop the debug loc
2503 // to avoid making it appear as if the condition is a constant, which would
2504 // be misleading while debugging.
2505 // Similarly strip attributes that maybe dependent on condition we are
2506 // hoisting above.
2507 for (auto &I : *ThenBB) {
2508 if (!SpeculatedStoreValue || &I != SpeculatedStore)
2509 I.setDebugLoc(DebugLoc());
2510 I.dropUndefImplyingAttrsAndUnknownMetadata();
2511 }
2512
2513 // Hoist the instructions.
2514 BB->getInstList().splice(BI->getIterator(), ThenBB->getInstList(),
2515 ThenBB->begin(), std::prev(ThenBB->end()));
2516
2517 // Insert selects and rewrite the PHI operands.
2518 IRBuilder<NoFolder> Builder(BI);
2519 for (PHINode &PN : EndBB->phis()) {
2520 unsigned OrigI = PN.getBasicBlockIndex(BB);
2521 unsigned ThenI = PN.getBasicBlockIndex(ThenBB);
2522 Value *OrigV = PN.getIncomingValue(OrigI);
2523 Value *ThenV = PN.getIncomingValue(ThenI);
2524
2525 // Skip PHIs which are trivial.
2526 if (OrigV == ThenV)
2527 continue;
2528
2529 // Create a select whose true value is the speculatively executed value and
2530 // false value is the pre-existing value. Swap them if the branch
2531 // destinations were inverted.
2532 Value *TrueV = ThenV, *FalseV = OrigV;
2533 if (Invert)
2534 std::swap(TrueV, FalseV);
2535 Value *V = Builder.CreateSelect(BrCond, TrueV, FalseV, "spec.select", BI);
2536 PN.setIncomingValue(OrigI, V);
2537 PN.setIncomingValue(ThenI, V);
2538 }
2539
2540 // Remove speculated dbg intrinsics.
2541 // FIXME: Is it possible to do this in a more elegant way? Moving/merging the
2542 // dbg value for the different flows and inserting it after the select.
2543 for (Instruction *I : SpeculatedDbgIntrinsics)
2544 I->eraseFromParent();
2545
2546 ++NumSpeculations;
2547 return true;
2548}
2549
2550/// Return true if we can thread a branch across this block.
2551static bool BlockIsSimpleEnoughToThreadThrough(BasicBlock *BB) {
2552 int Size = 0;
2553
2554 SmallPtrSet<const Value *, 32> EphValues;
2555 auto IsEphemeral = [&](const Value *V) {
2556 if (isa<AssumeInst>(V))
2557 return true;
2558 return isSafeToSpeculativelyExecute(V) &&
2559 all_of(V->users(),
2560 [&](const User *U) { return EphValues.count(U); });
2561 };
2562
2563 // Walk the loop in reverse so that we can identify ephemeral values properly
2564 // (values only feeding assumes).
2565 for (Instruction &I : reverse(BB->instructionsWithoutDebug())) {
2566 // Can't fold blocks that contain noduplicate or convergent calls.
2567 if (CallInst *CI = dyn_cast<CallInst>(&I))
2568 if (CI->cannotDuplicate() || CI->isConvergent())
2569 return false;
2570
2571 // Ignore ephemeral values which are deleted during codegen.
2572 if (IsEphemeral(&I))
2573 EphValues.insert(&I);
2574 // We will delete Phis while threading, so Phis should not be accounted in
2575 // block's size.
2576 else if (!isa<PHINode>(I)) {
2577 if (Size++ > MaxSmallBlockSize)
2578 return false; // Don't clone large BB's.
2579 }
2580
2581 // We can only support instructions that do not define values that are
2582 // live outside of the current basic block.
2583 for (User *U : I.users()) {
2584 Instruction *UI = cast<Instruction>(U);
2585 if (UI->getParent() != BB || isa<PHINode>(UI))
2586 return false;
2587 }
2588
2589 // Looks ok, continue checking.
2590 }
2591
2592 return true;
2593}
2594
2595/// If we have a conditional branch on a PHI node value that is defined in the
2596/// same block as the branch and if any PHI entries are constants, thread edges
2597/// corresponding to that entry to be branches to their ultimate destination.
2598static bool FoldCondBranchOnPHI(BranchInst *BI, DomTreeUpdater *DTU,
2599 const DataLayout &DL, AssumptionCache *AC) {
2600 BasicBlock *BB = BI->getParent();
2601 PHINode *PN = dyn_cast<PHINode>(BI->getCondition());
2602 // NOTE: we currently cannot transform this case if the PHI node is used
2603 // outside of the block.
2604 if (!PN || PN->getParent() != BB || !PN->hasOneUse())
2605 return false;
2606
2607 // Degenerate case of a single entry PHI.
2608 if (PN->getNumIncomingValues() == 1) {
2609 FoldSingleEntryPHINodes(PN->getParent());
2610 return true;
2611 }
2612
2613 // Now we know that this block has multiple preds and two succs.
2614 if (!BlockIsSimpleEnoughToThreadThrough(BB))
2615 return false;
2616
2617 // Okay, this is a simple enough basic block. See if any phi values are
2618 // constants.
2619 for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
2620 ConstantInt *CB = dyn_cast<ConstantInt>(PN->getIncomingValue(i));
2621 if (!CB || !CB->getType()->isIntegerTy(1))
2622 continue;
2623
2624 // Okay, we now know that all edges from PredBB should be revectored to
2625 // branch to RealDest.
2626 BasicBlock *PredBB = PN->getIncomingBlock(i);
2627 BasicBlock *RealDest = BI->getSuccessor(!CB->getZExtValue());
2628
2629 if (RealDest == BB)
2630 continue; // Skip self loops.
2631 // Skip if the predecessor's terminator is an indirect branch.
2632 if (isa<IndirectBrInst>(PredBB->getTerminator()))
2633 continue;
2634
2635 SmallVector<DominatorTree::UpdateType, 3> Updates;
2636
2637 // The dest block might have PHI nodes, other predecessors and other
2638 // difficult cases. Instead of being smart about this, just insert a new
2639 // block that jumps to the destination block, effectively splitting
2640 // the edge we are about to create.
2641 BasicBlock *EdgeBB =
2642 BasicBlock::Create(BB->getContext(), RealDest->getName() + ".critedge",
2643 RealDest->getParent(), RealDest);
2644 BranchInst *CritEdgeBranch = BranchInst::Create(RealDest, EdgeBB);
2645 if (DTU)
2646 Updates.push_back({DominatorTree::Insert, EdgeBB, RealDest});
2647 CritEdgeBranch->setDebugLoc(BI->getDebugLoc());
2648
2649 // Update PHI nodes.
2650 AddPredecessorToBlock(RealDest, EdgeBB, BB);
2651
2652 // BB may have instructions that are being threaded over. Clone these
2653 // instructions into EdgeBB. We know that there will be no uses of the
2654 // cloned instructions outside of EdgeBB.
2655 BasicBlock::iterator InsertPt = EdgeBB->begin();
2656 DenseMap<Value *, Value *> TranslateMap; // Track translated values.
2657 for (BasicBlock::iterator BBI = BB->begin(); &*BBI != BI; ++BBI) {
2658 if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
2659 TranslateMap[PN] = PN->getIncomingValueForBlock(PredBB);
2660 continue;
2661 }
2662 // Clone the instruction.
2663 Instruction *N = BBI->clone();
2664 if (BBI->hasName())
2665 N->setName(BBI->getName() + ".c");
2666
2667 // Update operands due to translation.
2668 for (Use &Op : N->operands()) {
2669 DenseMap<Value *, Value *>::iterator PI = TranslateMap.find(Op);
2670 if (PI != TranslateMap.end())
2671 Op = PI->second;
2672 }
2673
2674 // Check for trivial simplification.
2675 if (Value *V = SimplifyInstruction(N, {DL, nullptr, nullptr, AC})) {
2676 if (!BBI->use_empty())
2677 TranslateMap[&*BBI] = V;
2678 if (!N->mayHaveSideEffects()) {
2679 N->deleteValue(); // Instruction folded away, don't need actual inst
2680 N = nullptr;
2681 }
2682 } else {
2683 if (!BBI->use_empty())
2684 TranslateMap[&*BBI] = N;
2685 }
2686 if (N) {
2687 // Insert the new instruction into its new home.
2688 EdgeBB->getInstList().insert(InsertPt, N);
2689
2690 // Register the new instruction with the assumption cache if necessary.
2691 if (auto *Assume = dyn_cast<AssumeInst>(N))
2692 if (AC)
2693 AC->registerAssumption(Assume);
2694 }
2695 }
2696
2697 // Loop over all of the edges from PredBB to BB, changing them to branch
2698 // to EdgeBB instead.
2699 Instruction *PredBBTI = PredBB->getTerminator();
2700 for (unsigned i = 0, e = PredBBTI->getNumSuccessors(); i != e; ++i)
2701 if (PredBBTI->getSuccessor(i) == BB) {
2702 BB->removePredecessor(PredBB);
2703 PredBBTI->setSuccessor(i, EdgeBB);
2704 }
2705
2706 if (DTU) {
2707 Updates.push_back({DominatorTree::Insert, PredBB, EdgeBB});
2708 Updates.push_back({DominatorTree::Delete, PredBB, BB});
2709
2710 DTU->applyUpdates(Updates);
2711 }
2712
2713 // Recurse, simplifying any other constants.
2714 return FoldCondBranchOnPHI(BI, DTU, DL, AC) || true;
2715 }
2716
2717 return false;
2718}
2719
2720/// Given a BB that starts with the specified two-entry PHI node,
2721/// see if we can eliminate it.
2722static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
2723 DomTreeUpdater *DTU, const DataLayout &DL) {
2724 // Ok, this is a two entry PHI node. Check to see if this is a simple "if
2725 // statement", which has a very simple dominance structure. Basically, we
2726 // are trying to find the condition that is being branched on, which
2727 // subsequently causes this merge to happen. We really want control
2728 // dependence information for this check, but simplifycfg can't keep it up
2729 // to date, and this catches most of the cases we care about anyway.
2730 BasicBlock *BB = PN->getParent();
2731
2732 BasicBlock *IfTrue, *IfFalse;
2733 BranchInst *DomBI = GetIfCondition(BB, IfTrue, IfFalse);
2734 if (!DomBI)
2735 return false;
2736 Value *IfCond = DomBI->getCondition();
2737 // Don't bother if the branch will be constant folded trivially.
2738 if (isa<ConstantInt>(IfCond))
2739 return false;
2740
2741 BasicBlock *DomBlock = DomBI->getParent();
2742 SmallVector<BasicBlock *, 2> IfBlocks;
2743 llvm::copy_if(
2744 PN->blocks(), std::back_inserter(IfBlocks), [](BasicBlock *IfBlock) {
2745 return cast<BranchInst>(IfBlock->getTerminator())->isUnconditional();
2746 });
2747 assert((IfBlocks.size() == 1 || IfBlocks.size() == 2) &&((void)0)
2748 "Will have either one or two blocks to speculate.")((void)0);
2749
2750 // If the branch is non-unpredictable, see if we either predictably jump to
2751 // the merge bb (if we have only a single 'then' block), or if we predictably
2752 // jump to one specific 'then' block (if we have two of them).
2753 // It isn't beneficial to speculatively execute the code
2754 // from the block that we know is predictably not entered.
2755 if (!DomBI->getMetadata(LLVMContext::MD_unpredictable)) {
2756 uint64_t TWeight, FWeight;
2757 if (DomBI->extractProfMetadata(TWeight, FWeight) &&
2758 (TWeight + FWeight) != 0) {
2759 BranchProbability BITrueProb =
2760 BranchProbability::getBranchProbability(TWeight, TWeight + FWeight);
2761 BranchProbability Likely = TTI.getPredictableBranchThreshold();
2762 BranchProbability BIFalseProb = BITrueProb.getCompl();
2763 if (IfBlocks.size() == 1) {
2764 BranchProbability BIBBProb =
2765 DomBI->getSuccessor(0) == BB ? BITrueProb : BIFalseProb;
2766 if (BIBBProb >= Likely)
2767 return false;
2768 } else {
2769 if (BITrueProb >= Likely || BIFalseProb >= Likely)
2770 return false;
2771 }
2772 }
2773 }
2774
2775 // Don't try to fold an unreachable block. For example, the phi node itself
2776 // can't be the candidate if-condition for a select that we want to form.
2777 if (auto *IfCondPhiInst = dyn_cast<PHINode>(IfCond))
2778 if (IfCondPhiInst->getParent() == BB)
2779 return false;
2780
2781 // Okay, we found that we can merge this two-entry phi node into a select.
2782 // Doing so would require us to fold *all* two entry phi nodes in this block.
2783 // At some point this becomes non-profitable (particularly if the target
2784 // doesn't support cmov's). Only do this transformation if there are two or
2785 // fewer PHI nodes in this block.
2786 unsigned NumPhis = 0;
2787 for (BasicBlock::iterator I = BB->begin(); isa<PHINode>(I); ++NumPhis, ++I)
2788 if (NumPhis > 2)
2789 return false;
2790
2791 // Loop over the PHI's seeing if we can promote them all to select
2792 // instructions. While we are at it, keep track of the instructions
2793 // that need to be moved to the dominating block.
2794 SmallPtrSet<Instruction *, 4> AggressiveInsts;
2795 InstructionCost Cost = 0;
2796 InstructionCost Budget =
2797 TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
2798
2799 bool Changed = false;
2800 for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
2801 PHINode *PN = cast<PHINode>(II++);
2802 if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
2803 PN->replaceAllUsesWith(V);
2804 PN->eraseFromParent();
2805 Changed = true;
2806 continue;
2807 }
2808
2809 if (!dominatesMergePoint(PN->getIncomingValue(0), BB, AggressiveInsts,
2810 Cost, Budget, TTI) ||
2811 !dominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
2812 Cost, Budget, TTI))
2813 return Changed;
2814 }
2815
2816 // If we folded the first phi, PN dangles at this point. Refresh it. If
2817 // we ran out of PHIs then we simplified them all.
2818 PN = dyn_cast<PHINode>(BB->begin());
2819 if (!PN)
2820 return true;
2821
2822 // Return true if at least one of these is a 'not', and another is either
2823 // a 'not' too, or a constant.
2824 auto CanHoistNotFromBothValues = [](Value *V0, Value *V1) {
2825 if (!match(V0, m_Not(m_Value())))
2826 std::swap(V0, V1);
2827 auto Invertible = m_CombineOr(m_Not(m_Value()), m_AnyIntegralConstant());
2828 return match(V0, m_Not(m_Value())) && match(V1, Invertible);
2829 };
2830
2831 // Don't fold i1 branches on PHIs which contain binary operators or
2832 // (possibly inverted) select form of or/ands, unless one of
2833 // the incoming values is an 'not' and another one is freely invertible.
2834 // These can often be turned into switches and other things.
2835 auto IsBinOpOrAnd = [](Value *V) {
2836 return match(
2837 V, m_CombineOr(
2838 m_BinOp(),
2839 m_CombineOr(m_Select(m_Value(), m_ImmConstant(), m_Value()),
2840 m_Select(m_Value(), m_Value(), m_ImmConstant()))));
2841 };
2842 if (PN->getType()->isIntegerTy(1) &&
2843 (IsBinOpOrAnd(PN->getIncomingValue(0)) ||
2844 IsBinOpOrAnd(PN->getIncomingValue(1)) || IsBinOpOrAnd(IfCond)) &&
2845 !CanHoistNotFromBothValues(PN->getIncomingValue(0),
2846 PN->getIncomingValue(1)))
2847 return Changed;
2848
2849 // If all PHI nodes are promotable, check to make sure that all instructions
2850 // in the predecessor blocks can be promoted as well. If not, we won't be able
2851 // to get rid of the control flow, so it's not worth promoting to select
2852 // instructions.
2853 for (BasicBlock *IfBlock : IfBlocks)
2854 for (BasicBlock::iterator I = IfBlock->begin(); !I->isTerminator(); ++I)
2855 if (!AggressiveInsts.count(&*I) && !isa<DbgInfoIntrinsic>(I) &&
2856 !isa<PseudoProbeInst>(I)) {
2857 // This is not an aggressive instruction that we can promote.
2858 // Because of this, we won't be able to get rid of the control flow, so
2859 // the xform is not worth it.
2860 return Changed;
2861 }
2862
2863 // If either of the blocks has it's address taken, we can't do this fold.
2864 if (any_of(IfBlocks,
2865 [](BasicBlock *IfBlock) { return IfBlock->hasAddressTaken(); }))
2866 return Changed;
2867
2868 LLVM_DEBUG(dbgs() << "FOUND IF CONDITION! " << *IfConddo { } while (false)
2869 << " T: " << IfTrue->getName()do { } while (false)
2870 << " F: " << IfFalse->getName() << "\n")do { } while (false);
2871
2872 // If we can still promote the PHI nodes after this gauntlet of tests,
2873 // do all of the PHI's now.
2874
2875 // Move all 'aggressive' instructions, which are defined in the
2876 // conditional parts of the if's up to the dominating block.
2877 for (BasicBlock *IfBlock : IfBlocks)
2878 hoistAllInstructionsInto(DomBlock, DomBI, IfBlock);
2879
2880 IRBuilder<NoFolder> Builder(DomBI);
2881 // Propagate fast-math-flags from phi nodes to replacement selects.
2882 IRBuilder<>::FastMathFlagGuard FMFGuard(Builder);
2883 while (PHINode *PN = dyn_cast<PHINode>(BB->begin())) {
2884 if (isa<FPMathOperator>(PN))
2885 Builder.setFastMathFlags(PN->getFastMathFlags());
2886
2887 // Change the PHI node into a select instruction.
2888 Value *TrueVal = PN->getIncomingValueForBlock(IfTrue);
2889 Value *FalseVal = PN->getIncomingValueForBlock(IfFalse);
2890
2891 Value *Sel = Builder.CreateSelect(IfCond, TrueVal, FalseVal, "", DomBI);
2892 PN->replaceAllUsesWith(Sel);
2893 Sel->takeName(PN);
2894 PN->eraseFromParent();
2895 }
2896
2897 // At this point, all IfBlocks are empty, so our if statement
2898 // has been flattened. Change DomBlock to jump directly to our new block to
2899 // avoid other simplifycfg's kicking in on the diamond.
2900 Builder.CreateBr(BB);
2901
2902 SmallVector<DominatorTree::UpdateType, 3> Updates;
2903 if (DTU) {
2904 Updates.push_back({DominatorTree::Insert, DomBlock, BB});
2905 for (auto *Successor : successors(DomBlock))
2906 Updates.push_back({DominatorTree::Delete, DomBlock, Successor});
2907 }
2908
2909 DomBI->eraseFromParent();
2910 if (DTU)
2911 DTU->applyUpdates(Updates);
2912
2913 return true;
2914}
2915
2916static Value *createLogicalOp(IRBuilderBase &Builder,
2917 Instruction::BinaryOps Opc, Value *LHS,
2918 Value *RHS, const Twine &Name = "") {
2919 // Try to relax logical op to binary op.
2920 if (impliesPoison(RHS, LHS))
2921 return Builder.CreateBinOp(Opc, LHS, RHS, Name);
2922 if (Opc == Instruction::And)
2923 return Builder.CreateLogicalAnd(LHS, RHS, Name);
2924 if (Opc == Instruction::Or)
2925 return Builder.CreateLogicalOr(LHS, RHS, Name);
2926 llvm_unreachable("Invalid logical opcode")__builtin_unreachable();
2927}
2928
2929/// Return true if either PBI or BI has branch weight available, and store
2930/// the weights in {Pred|Succ}{True|False}Weight. If one of PBI and BI does
2931/// not have branch weight, use 1:1 as its weight.
2932static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
2933 uint64_t &PredTrueWeight,
2934 uint64_t &PredFalseWeight,
2935 uint64_t &SuccTrueWeight,
2936 uint64_t &SuccFalseWeight) {
2937 bool PredHasWeights =
2938 PBI->extractProfMetadata(PredTrueWeight, PredFalseWeight);
2939 bool SuccHasWeights =
2940 BI->extractProfMetadata(SuccTrueWeight, SuccFalseWeight);
2941 if (PredHasWeights || SuccHasWeights) {
2942 if (!PredHasWeights)
2943 PredTrueWeight = PredFalseWeight = 1;
2944 if (!SuccHasWeights)
2945 SuccTrueWeight = SuccFalseWeight = 1;
2946 return true;
2947 } else {
2948 return false;
2949 }
2950}
2951
2952/// Determine if the two branches share a common destination and deduce a glue
2953/// that joins the branches' conditions to arrive at the common destination if
2954/// that would be profitable.
2955static Optional<std::pair<Instruction::BinaryOps, bool>>
2956shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
2957 const TargetTransformInfo *TTI) {
2958 assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&((void)0)
2959 "Both blocks must end with a conditional branches.")((void)0);
2960 assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&((void)0)
2961 "PredBB must be a predecessor of BB.")((void)0);
2962
2963 // We have the potential to fold the conditions together, but if the
2964 // predecessor branch is predictable, we may not want to merge them.
2965 uint64_t PTWeight, PFWeight;
2966 BranchProbability PBITrueProb, Likely;
2967 if (TTI && !PBI->getMetadata(LLVMContext::MD_unpredictable) &&
2968 PBI->extractProfMetadata(PTWeight, PFWeight) &&
2969 (PTWeight + PFWeight) != 0) {
2970 PBITrueProb =
2971 BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
2972 Likely = TTI->getPredictableBranchThreshold();
2973 }
2974
2975 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
2976 // Speculate the 2nd condition unless the 1st is probably true.
2977 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
2978 return {{Instruction::Or, false}};
2979 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
2980 // Speculate the 2nd condition unless the 1st is probably false.
2981 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
2982 return {{Instruction::And, false}};
2983 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
2984 // Speculate the 2nd condition unless the 1st is probably true.
2985 if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
2986 return {{Instruction::And, true}};
2987 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
2988 // Speculate the 2nd condition unless the 1st is probably false.
2989 if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
2990 return {{Instruction::Or, true}};
2991 }
2992 return None;
2993}
2994
2995static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
2996 DomTreeUpdater *DTU,
2997 MemorySSAUpdater *MSSAU,
2998 const TargetTransformInfo *TTI) {
2999 BasicBlock *BB = BI->getParent();
3000 BasicBlock *PredBlock = PBI->getParent();
3001
3002 // Determine if the two branches share a common destination.
3003 Instruction::BinaryOps Opc;
3004 bool InvertPredCond;
3005 std::tie(Opc, InvertPredCond) =
3006 *shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
3007
3008 LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB)do { } while (false);
3009
3010 IRBuilder<> Builder(PBI);
3011 // The builder is used to create instructions to eliminate the branch in BB.
3012 // If BB's terminator has !annotation metadata, add it to the new
3013 // instructions.
3014 Builder.CollectMetadataToCopy(BB->getTerminator(),
3015 {LLVMContext::MD_annotation});
3016
3017 // If we need to invert the condition in the pred block to match, do so now.
3018 if (InvertPredCond) {
3019 Value *NewCond = PBI->getCondition();
3020 if (NewCond->hasOneUse() && isa<CmpInst>(NewCond)) {
3021 CmpInst *CI = cast<CmpInst>(NewCond);
3022 CI->setPredicate(CI->getInversePredicate());
3023 } else {
3024 NewCond =
3025 Builder.CreateNot(NewCond, PBI->getCondition()->getName() + ".not");
3026 }
3027
3028 PBI->setCondition(NewCond);
3029 PBI->swapSuccessors();
3030 }
3031
3032 BasicBlock *UniqueSucc =
3033 PBI->getSuccessor(0) == BB ? BI->getSuccessor(0) : BI->getSuccessor(1);
3034
3035 // Before cloning instructions, notify the successor basic block that it
3036 // is about to have a new predecessor. This will update PHI nodes,
3037 // which will allow us to update live-out uses of bonus instructions.
3038 AddPredecessorToBlock(UniqueSucc, PredBlock, BB, MSSAU);
3039
3040 // Try to update branch weights.
3041 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3042 if (extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3043 SuccTrueWeight, SuccFalseWeight)) {
3044 SmallVector<uint64_t, 8> NewWeights;
3045
3046 if (PBI->getSuccessor(0) == BB) {
3047 // PBI: br i1 %x, BB, FalseDest
3048 // BI: br i1 %y, UniqueSucc, FalseDest
3049 // TrueWeight is TrueWeight for PBI * TrueWeight for BI.
3050 NewWeights.push_back(PredTrueWeight * SuccTrueWeight);
3051 // FalseWeight is FalseWeight for PBI * TotalWeight for BI +
3052 // TrueWeight for PBI * FalseWeight for BI.
3053 // We assume that total weights of a BranchInst can fit into 32 bits.
3054 // Therefore, we will not have overflow using 64-bit arithmetic.
3055 NewWeights.push_back(PredFalseWeight *
3056 (SuccFalseWeight + SuccTrueWeight) +
3057 PredTrueWeight * SuccFalseWeight);
3058 } else {
3059 // PBI: br i1 %x, TrueDest, BB
3060 // BI: br i1 %y, TrueDest, UniqueSucc
3061 // TrueWeight is TrueWeight for PBI * TotalWeight for BI +
3062 // FalseWeight for PBI * TrueWeight for BI.
3063 NewWeights.push_back(PredTrueWeight * (SuccFalseWeight + SuccTrueWeight) +
3064 PredFalseWeight * SuccTrueWeight);
3065 // FalseWeight is FalseWeight for PBI * FalseWeight for BI.
3066 NewWeights.push_back(PredFalseWeight * SuccFalseWeight);
3067 }
3068
3069 // Halve the weights if any of them cannot fit in an uint32_t
3070 FitWeights(NewWeights);
3071
3072 SmallVector<uint32_t, 8> MDWeights(NewWeights.begin(), NewWeights.end());
3073 setBranchWeights(PBI, MDWeights[0], MDWeights[1]);
3074
3075 // TODO: If BB is reachable from all paths through PredBlock, then we
3076 // could replace PBI's branch probabilities with BI's.
3077 } else
3078 PBI->setMetadata(LLVMContext::MD_prof, nullptr);
3079
3080 // Now, update the CFG.
3081 PBI->setSuccessor(PBI->getSuccessor(0) != BB, UniqueSucc);
3082
3083 if (DTU)
3084 DTU->applyUpdates({{DominatorTree::Insert, PredBlock, UniqueSucc},
3085 {DominatorTree::Delete, PredBlock, BB}});
3086
3087 // If BI was a loop latch, it may have had associated loop metadata.
3088 // We need to copy it to the new latch, that is, PBI.
3089 if (MDNode *LoopMD = BI->getMetadata(LLVMContext::MD_loop))
3090 PBI->setMetadata(LLVMContext::MD_loop, LoopMD);
3091
3092 ValueToValueMapTy VMap; // maps original values to cloned values
3093 CloneInstructionsIntoPredecessorBlockAndUpdateSSAUses(BB, PredBlock, VMap);
3094
3095 // Now that the Cond was cloned into the predecessor basic block,
3096 // or/and the two conditions together.
3097 Value *BICond = VMap[BI->getCondition()];
3098 PBI->setCondition(
3099 createLogicalOp(Builder, Opc, PBI->getCondition(), BICond, "or.cond"));
3100
3101 // Copy any debug value intrinsics into the end of PredBlock.
3102 for (Instruction &I : *BB) {
3103 if (isa<DbgInfoIntrinsic>(I)) {
3104 Instruction *NewI = I.clone();
3105 RemapInstruction(NewI, VMap,
3106 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
3107 NewI->insertBefore(PBI);
3108 }
3109 }
3110
3111 ++NumFoldBranchToCommonDest;
3112 return true;
3113}
3114
3115/// If this basic block is simple enough, and if a predecessor branches to us
3116/// and one of our successors, fold the block into the predecessor and use
3117/// logical operations to pick the right destination.
3118bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
3119 MemorySSAUpdater *MSSAU,
3120 const TargetTransformInfo *TTI,
3121 unsigned BonusInstThreshold) {
3122 // If this block ends with an unconditional branch,
3123 // let SpeculativelyExecuteBB() deal with it.
3124 if (!BI->isConditional())
3125 return false;
3126
3127 BasicBlock *BB = BI->getParent();
3128 TargetTransformInfo::TargetCostKind CostKind =
3129 BB->getParent()->hasMinSize() ? TargetTransformInfo::TCK_CodeSize
3130 : TargetTransformInfo::TCK_SizeAndLatency;
3131
3132 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
3133
3134 if (!Cond || (!isa<CmpInst>(Cond) && !isa<BinaryOperator>(Cond)) ||
3135 Cond->getParent() != BB || !Cond->hasOneUse())
3136 return false;
3137
3138 // Cond is known to be a compare or binary operator. Check to make sure that
3139 // neither operand is a potentially-trapping constant expression.
3140 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(0)))
3141 if (CE->canTrap())
3142 return false;
3143 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Cond->getOperand(1)))
3144 if (CE->canTrap())
3145 return false;
3146
3147 // Finally, don't infinitely unroll conditional loops.
3148 if (is_contained(successors(BB), BB))
3149 return false;
3150
3151 // With which predecessors will we want to deal with?
3152 SmallVector<BasicBlock *, 8> Preds;
3153 for (BasicBlock *PredBlock : predecessors(BB)) {
3154 BranchInst *PBI = dyn_cast<BranchInst>(PredBlock->getTerminator());
3155
3156 // Check that we have two conditional branches. If there is a PHI node in
3157 // the common successor, verify that the same value flows in from both
3158 // blocks.
3159 if (!PBI || PBI->isUnconditional() || !SafeToMergeTerminators(BI, PBI))
3160 continue;
3161
3162 // Determine if the two branches share a common destination.
3163 Instruction::BinaryOps Opc;
3164 bool InvertPredCond;
3165 if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3166 std::tie(Opc, InvertPredCond) = *Recipe;
3167 else
3168 continue;
3169
3170 // Check the cost of inserting the necessary logic before performing the
3171 // transformation.
3172 if (TTI) {
3173 Type *Ty = BI->getCondition()->getType();
3174 InstructionCost Cost = TTI->getArithmeticInstrCost(Opc, Ty, CostKind);
3175 if (InvertPredCond && (!PBI->getCondition()->hasOneUse() ||
3176 !isa<CmpInst>(PBI->getCondition())))
3177 Cost += TTI->getArithmeticInstrCost(Instruction::Xor, Ty, CostKind);
3178
3179 if (Cost > BranchFoldThreshold)
3180 continue;
3181 }
3182
3183 // Ok, we do want to deal with this predecessor. Record it.
3184 Preds.emplace_back(PredBlock);
3185 }
3186
3187 // If there aren't any predecessors into which we can fold,
3188 // don't bother checking the cost.
3189 if (Preds.empty())
3190 return false;
3191
3192 // Only allow this transformation if computing the condition doesn't involve
3193 // too many instructions and these involved instructions can be executed
3194 // unconditionally. We denote all involved instructions except the condition
3195 // as "bonus instructions", and only allow this transformation when the
3196 // number of the bonus instructions we'll need to create when cloning into
3197 // each predecessor does not exceed a certain threshold.
3198 unsigned NumBonusInsts = 0;
3199 const unsigned PredCount = Preds.size();
3200 for (Instruction &I : *BB) {
3201 // Don't check the branch condition comparison itself.
3202 if (&I == Cond)
3203 continue;
3204 // Ignore dbg intrinsics, and the terminator.
3205 if (isa<DbgInfoIntrinsic>(I) || isa<BranchInst>(I))
3206 continue;
3207 // I must be safe to execute unconditionally.
3208 if (!isSafeToSpeculativelyExecute(&I))
3209 return false;
3210
3211 // Account for the cost of duplicating this instruction into each
3212 // predecessor.
3213 NumBonusInsts += PredCount;
3214 // Early exits once we reach the limit.
3215 if (NumBonusInsts > BonusInstThreshold)
3216 return false;
3217
3218 auto IsBCSSAUse = [BB, &I](Use &U) {
3219 auto *UI = cast<Instruction>(U.getUser());
3220 if (auto *PN = dyn_cast<PHINode>(UI))
3221 return PN->getIncomingBlock(U) == BB;
3222 return UI->getParent() == BB && I.comesBefore(UI);
3223 };
3224
3225 // Does this instruction require rewriting of uses?
3226 if (!all_of(I.uses(), IsBCSSAUse))
3227 return false;
3228 }
3229
3230 // Ok, we have the budget. Perform the transformation.
3231 for (BasicBlock *PredBlock : Preds) {
3232 auto *PBI = cast<BranchInst>(PredBlock->getTerminator());
3233 return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
3234 }
3235 return false;
3236}
3237
3238// If there is only one store in BB1 and BB2, return it, otherwise return
3239// nullptr.
3240static StoreInst *findUniqueStoreInBlocks(BasicBlock *BB1, BasicBlock *BB2) {
3241 StoreInst *S = nullptr;
3242 for (auto *BB : {BB1, BB2}) {
3243 if (!BB)
3244 continue;
3245 for (auto &I : *BB)
3246 if (auto *SI = dyn_cast<StoreInst>(&I)) {
3247 if (S)
3248 // Multiple stores seen.
3249 return nullptr;
3250 else
3251 S = SI;
3252 }
3253 }
3254 return S;
3255}
3256
3257static Value *ensureValueAvailableInSuccessor(Value *V, BasicBlock *BB,
3258 Value *AlternativeV = nullptr) {
3259 // PHI is going to be a PHI node that allows the value V that is defined in
3260 // BB to be referenced in BB's only successor.
3261 //
3262 // If AlternativeV is nullptr, the only value we care about in PHI is V. It
3263 // doesn't matter to us what the other operand is (it'll never get used). We
3264 // could just create a new PHI with an undef incoming value, but that could
3265 // increase register pressure if EarlyCSE/InstCombine can't fold it with some
3266 // other PHI. So here we directly look for some PHI in BB's successor with V
3267 // as an incoming operand. If we find one, we use it, else we create a new
3268 // one.
3269 //
3270 // If AlternativeV is not nullptr, we care about both incoming values in PHI.
3271 // PHI must be exactly: phi <ty> [ %BB, %V ], [ %OtherBB, %AlternativeV]
3272 // where OtherBB is the single other predecessor of BB's only successor.
3273 PHINode *PHI = nullptr;
3274 BasicBlock *Succ = BB->getSingleSuccessor();
3275
3276 for (auto I = Succ->begin(); isa<PHINode>(I); ++I)
3277 if (cast<PHINode>(I)->getIncomingValueForBlock(BB) == V) {
3278 PHI = cast<PHINode>(I);
3279 if (!AlternativeV)
3280 break;
3281
3282 assert(Succ->hasNPredecessors(2))((void)0);
3283 auto PredI = pred_begin(Succ);
3284 BasicBlock *OtherPredBB = *PredI == BB ? *++PredI : *PredI;
3285 if (PHI->getIncomingValueForBlock(OtherPredBB) == AlternativeV)
3286 break;
3287 PHI = nullptr;
3288 }
3289 if (PHI)
3290 return PHI;
3291
3292 // If V is not an instruction defined in BB, just return it.
3293 if (!AlternativeV &&
3294 (!isa<Instruction>(V) || cast<Instruction>(V)->getParent() != BB))
3295 return V;
3296
3297 PHI = PHINode::Create(V->getType(), 2, "simplifycfg.merge", &Succ->front());
3298 PHI->addIncoming(V, BB);
3299 for (BasicBlock *PredBB : predecessors(Succ))
3300 if (PredBB != BB)
3301 PHI->addIncoming(
3302 AlternativeV ? AlternativeV : UndefValue::get(V->getType()), PredBB);
3303 return PHI;
3304}
3305
3306static bool mergeConditionalStoreToAddress(
3307 BasicBlock *PTB, BasicBlock *PFB, BasicBlock *QTB, BasicBlock *QFB,
3308 BasicBlock *PostBB, Value *Address, bool InvertPCond, bool InvertQCond,
3309 DomTreeUpdater *DTU, const DataLayout &DL, const TargetTransformInfo &TTI) {
3310 // For every pointer, there must be exactly two stores, one coming from
3311 // PTB or PFB, and the other from QTB or QFB. We don't support more than one
3312 // store (to any address) in PTB,PFB or QTB,QFB.
3313 // FIXME: We could relax this restriction with a bit more work and performance
3314 // testing.
3315 StoreInst *PStore = findUniqueStoreInBlocks(PTB, PFB);
3316 StoreInst *QStore = findUniqueStoreInBlocks(QTB, QFB);
3317 if (!PStore || !QStore)
3318 return false;
3319
3320 // Now check the stores are compatible.
3321 if (!QStore->isUnordered() || !PStore->isUnordered())
3322 return false;
3323
3324 // Check that sinking the store won't cause program behavior changes. Sinking
3325 // the store out of the Q blocks won't change any behavior as we're sinking
3326 // from a block to its unconditional successor. But we're moving a store from
3327 // the P blocks down through the middle block (QBI) and past both QFB and QTB.
3328 // So we need to check that there are no aliasing loads or stores in
3329 // QBI, QTB and QFB. We also need to check there are no conflicting memory
3330 // operations between PStore and the end of its parent block.
3331 //
3332 // The ideal way to do this is to query AliasAnalysis, but we don't
3333 // preserve AA currently so that is dangerous. Be super safe and just
3334 // check there are no other memory operations at all.
3335 for (auto &I : *QFB->getSinglePredecessor())
3336 if (I.mayReadOrWriteMemory())
3337 return false;
3338 for (auto &I : *QFB)
3339 if (&I != QStore && I.mayReadOrWriteMemory())
3340 return false;
3341 if (QTB)
3342 for (auto &I : *QTB)
3343 if (&I != QStore && I.mayReadOrWriteMemory())
3344 return false;
3345 for (auto I = BasicBlock::iterator(PStore), E = PStore->getParent()->end();
3346 I != E; ++I)
3347 if (&*I != PStore && I->mayReadOrWriteMemory())
3348 return false;
3349
3350 // If we're not in aggressive mode, we only optimize if we have some
3351 // confidence that by optimizing we'll allow P and/or Q to be if-converted.
3352 auto IsWorthwhile = [&](BasicBlock *BB, ArrayRef<StoreInst *> FreeStores) {
3353 if (!BB)
3354 return true;
3355 // Heuristic: if the block can be if-converted/phi-folded and the
3356 // instructions inside are all cheap (arithmetic/GEPs), it's worthwhile to
3357 // thread this store.
3358 InstructionCost Cost = 0;
3359 InstructionCost Budget =
3360 PHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
3361 for (auto &I : BB->instructionsWithoutDebug()) {
3362 // Consider terminator instruction to be free.
3363 if (I.isTerminator())
3364 continue;
3365 // If this is one the stores that we want to speculate out of this BB,
3366 // then don't count it's cost, consider it to be free.
3367 if (auto *S = dyn_cast<StoreInst>(&I))
3368 if (llvm::find(FreeStores, S))
3369 continue;
3370 // Else, we have a white-list of instructions that we are ak speculating.
3371 if (!isa<BinaryOperator>(I) && !isa<GetElementPtrInst>(I))
3372 return false; // Not in white-list - not worthwhile folding.
3373 // And finally, if this is a non-free instruction that we are okay
3374 // speculating, ensure that we consider the speculation budget.
3375 Cost += TTI.getUserCost(&I, TargetTransformInfo::TCK_SizeAndLatency);
3376 if (Cost > Budget)
3377 return false; // Eagerly refuse to fold as soon as we're out of budget.
3378 }
3379 assert(Cost <= Budget &&((void)0)
3380 "When we run out of budget we will eagerly return from within the "((void)0)
3381 "per-instruction loop.")((void)0);
3382 return true;
3383 };
3384
3385 const std::array<StoreInst *, 2> FreeStores = {PStore, QStore};
3386 if (!MergeCondStoresAggressively &&
3387 (!IsWorthwhile(PTB, FreeStores) || !IsWorthwhile(PFB, FreeStores) ||
3388 !IsWorthwhile(QTB, FreeStores) || !IsWorthwhile(QFB, FreeStores)))
3389 return false;
3390
3391 // If PostBB has more than two predecessors, we need to split it so we can
3392 // sink the store.
3393 if (std::next(pred_begin(PostBB), 2) != pred_end(PostBB)) {
3394 // We know that QFB's only successor is PostBB. And QFB has a single
3395 // predecessor. If QTB exists, then its only successor is also PostBB.
3396 // If QTB does not exist, then QFB's only predecessor has a conditional
3397 // branch to QFB and PostBB.
3398 BasicBlock *TruePred = QTB ? QTB : QFB->getSinglePredecessor();
3399 BasicBlock *NewBB =
3400 SplitBlockPredecessors(PostBB, {QFB, TruePred}, "condstore.split", DTU);
3401 if (!NewBB)
3402 return false;
3403 PostBB = NewBB;
3404 }
3405
3406 // OK, we're going to sink the stores to PostBB. The store has to be
3407 // conditional though, so first create the predicate.
3408 Value *PCond = cast<BranchInst>(PFB->getSinglePredecessor()->getTerminator())
3409 ->getCondition();
3410 Value *QCond = cast<BranchInst>(QFB->getSinglePredecessor()->getTerminator())
3411 ->getCondition();
3412
3413 Value *PPHI = ensureValueAvailableInSuccessor(PStore->getValueOperand(),
3414 PStore->getParent());
3415 Value *QPHI = ensureValueAvailableInSuccessor(QStore->getValueOperand(),
3416 QStore->getParent(), PPHI);
3417
3418 IRBuilder<> QB(&*PostBB->getFirstInsertionPt());
3419
3420 Value *PPred = PStore->getParent() == PTB ? PCond : QB.CreateNot(PCond);
3421 Value *QPred = QStore->getParent() == QTB ? QCond : QB.CreateNot(QCond);
3422
3423 if (InvertPCond)
3424 PPred = QB.CreateNot(PPred);
3425 if (InvertQCond)
3426 QPred = QB.CreateNot(QPred);
3427 Value *CombinedPred = QB.CreateOr(PPred, QPred);
3428
3429 auto *T = SplitBlockAndInsertIfThen(CombinedPred, &*QB.GetInsertPoint(),
3430 /*Unreachable=*/false,
3431 /*BranchWeights=*/nullptr, DTU);
3432 QB.SetInsertPoint(T);
3433 StoreInst *SI = cast<StoreInst>(QB.CreateStore(QPHI, Address));
3434 AAMDNodes AAMD;
3435 PStore->getAAMetadata(AAMD, /*Merge=*/false);
3436 PStore->getAAMetadata(AAMD, /*Merge=*/true);
3437 SI->setAAMetadata(AAMD);
3438 // Choose the minimum alignment. If we could prove both stores execute, we
3439 // could use biggest one. In this case, though, we only know that one of the
3440 // stores executes. And we don't know it's safe to take the alignment from a
3441 // store that doesn't execute.
3442 SI->setAlignment(std::min(PStore->getAlign(), QStore->getAlign()));
3443
3444 QStore->eraseFromParent();
3445 PStore->eraseFromParent();
3446
3447 return true;
3448}
3449
3450static bool mergeConditionalStores(BranchInst *PBI, BranchInst *QBI,
3451 DomTreeUpdater *DTU, const DataLayout &DL,
3452 const TargetTransformInfo &TTI) {
3453 // The intention here is to find diamonds or triangles (see below) where each
3454 // conditional block contains a store to the same address. Both of these
3455 // stores are conditional, so they can't be unconditionally sunk. But it may
3456 // be profitable to speculatively sink the stores into one merged store at the
3457 // end, and predicate the merged store on the union of the two conditions of
3458 // PBI and QBI.
3459 //
3460 // This can reduce the number of stores executed if both of the conditions are
3461 // true, and can allow the blocks to become small enough to be if-converted.
3462 // This optimization will also chain, so that ladders of test-and-set
3463 // sequences can be if-converted away.
3464 //
3465 // We only deal with simple diamonds or triangles:
3466 //
3467 // PBI or PBI or a combination of the two
3468 // / \ | \
3469 // PTB PFB | PFB
3470 // \ / | /
3471 // QBI QBI
3472 // / \ | \
3473 // QTB QFB | QFB
3474 // \ / | /
3475 // PostBB PostBB
3476 //
3477 // We model triangles as a type of diamond with a nullptr "true" block.
3478 // Triangles are canonicalized so that the fallthrough edge is represented by
3479 // a true condition, as in the diagram above.
3480 BasicBlock *PTB = PBI->getSuccessor(0);
3481 BasicBlock *PFB = PBI->getSuccessor(1);
3482 BasicBlock *QTB = QBI->getSuccessor(0);
3483 BasicBlock *QFB = QBI->getSuccessor(1);
3484 BasicBlock *PostBB = QFB->getSingleSuccessor();
3485
3486 // Make sure we have a good guess for PostBB. If QTB's only successor is
3487 // QFB, then QFB is a better PostBB.
3488 if (QTB->getSingleSuccessor() == QFB)
3489 PostBB = QFB;
3490
3491 // If we couldn't find a good PostBB, stop.
3492 if (!PostBB)
3493 return false;
3494
3495 bool InvertPCond = false, InvertQCond = false;
3496 // Canonicalize fallthroughs to the true branches.
3497 if (PFB == QBI->getParent()) {
3498 std::swap(PFB, PTB);
3499 InvertPCond = true;
3500 }
3501 if (QFB == PostBB) {
3502 std::swap(QFB, QTB);
3503 InvertQCond = true;
3504 }
3505
3506 // From this point on we can assume PTB or QTB may be fallthroughs but PFB
3507 // and QFB may not. Model fallthroughs as a nullptr block.
3508 if (PTB == QBI->getParent())
3509 PTB = nullptr;
3510 if (QTB == PostBB)
3511 QTB = nullptr;
3512
3513 // Legality bailouts. We must have at least the non-fallthrough blocks and
3514 // the post-dominating block, and the non-fallthroughs must only have one
3515 // predecessor.
3516 auto HasOnePredAndOneSucc = [](BasicBlock *BB, BasicBlock *P, BasicBlock *S) {
3517 return BB->getSinglePredecessor() == P && BB->getSingleSuccessor() == S;
3518 };
3519 if (!HasOnePredAndOneSucc(PFB, PBI->getParent(), QBI->getParent()) ||
3520 !HasOnePredAndOneSucc(QFB, QBI->getParent(), PostBB))
3521 return false;
3522 if ((PTB && !HasOnePredAndOneSucc(PTB, PBI->getParent(), QBI->getParent())) ||
3523 (QTB && !HasOnePredAndOneSucc(QTB, QBI->getParent(), PostBB)))
3524 return false;
3525 if (!QBI->getParent()->hasNUses(2))
3526 return false;
3527
3528 // OK, this is a sequence of two diamonds or triangles.
3529 // Check if there are stores in PTB or PFB that are repeated in QTB or QFB.
3530 SmallPtrSet<Value *, 4> PStoreAddresses, QStoreAddresses;
3531 for (auto *BB : {PTB, PFB}) {
3532 if (!BB)
3533 continue;
3534 for (auto &I : *BB)
3535 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3536 PStoreAddresses.insert(SI->getPointerOperand());
3537 }
3538 for (auto *BB : {QTB, QFB}) {
3539 if (!BB)
3540 continue;
3541 for (auto &I : *BB)
3542 if (StoreInst *SI = dyn_cast<StoreInst>(&I))
3543 QStoreAddresses.insert(SI->getPointerOperand());
3544 }
3545
3546 set_intersect(PStoreAddresses, QStoreAddresses);
3547 // set_intersect mutates PStoreAddresses in place. Rename it here to make it
3548 // clear what it contains.
3549 auto &CommonAddresses = PStoreAddresses;
3550
3551 bool Changed = false;
3552 for (auto *Address : CommonAddresses)
3553 Changed |=
3554 mergeConditionalStoreToAddress(PTB, PFB, QTB, QFB, PostBB, Address,
3555 InvertPCond, InvertQCond, DTU, DL, TTI);
3556 return Changed;
3557}
3558
3559/// If the previous block ended with a widenable branch, determine if reusing
3560/// the target block is profitable and legal. This will have the effect of
3561/// "widening" PBI, but doesn't require us to reason about hosting safety.
3562static bool tryWidenCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
3563 DomTreeUpdater *DTU) {
3564 // TODO: This can be generalized in two important ways:
3565 // 1) We can allow phi nodes in IfFalseBB and simply reuse all the input
3566 // values from the PBI edge.
3567 // 2) We can sink side effecting instructions into BI's fallthrough
3568 // successor provided they doesn't contribute to computation of
3569 // BI's condition.
3570 Value *CondWB, *WC;
3571 BasicBlock *IfTrueBB, *IfFalseBB;
3572 if (!parseWidenableBranch(PBI, CondWB, WC, IfTrueBB, IfFalseBB) ||
3573 IfTrueBB != BI->getParent() || !BI->getParent()->getSinglePredecessor())
3574 return false;
3575 if (!IfFalseBB->phis().empty())
3576 return false; // TODO
3577 // Use lambda to lazily compute expensive condition after cheap ones.
3578 auto NoSideEffects = [](BasicBlock &BB) {
3579 return !llvm::any_of(BB, [](const Instruction &I) {
3580 return I.mayWriteToMemory() || I.mayHaveSideEffects();
3581 });
3582 };
3583 if (BI->getSuccessor(1) != IfFalseBB && // no inf looping
3584 BI->getSuccessor(1)->getTerminatingDeoptimizeCall() && // profitability
3585 NoSideEffects(*BI->getParent())) {
3586 auto *OldSuccessor = BI->getSuccessor(1);
3587 OldSuccessor->removePredecessor(BI->getParent());
3588 BI->setSuccessor(1, IfFalseBB);
3589 if (DTU)
3590 DTU->applyUpdates(
3591 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
3592 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
3593 return true;
3594 }
3595 if (BI->getSuccessor(0) != IfFalseBB && // no inf looping
3596 BI->getSuccessor(0)->getTerminatingDeoptimizeCall() && // profitability
3597 NoSideEffects(*BI->getParent())) {
3598 auto *OldSuccessor = BI->getSuccessor(0);
3599 OldSuccessor->removePredecessor(BI->getParent());
3600 BI->setSuccessor(0, IfFalseBB);
3601 if (DTU)
3602 DTU->applyUpdates(
3603 {{DominatorTree::Insert, BI->getParent(), IfFalseBB},
3604 {DominatorTree::Delete, BI->getParent(), OldSuccessor}});
3605 return true;
3606 }
3607 return false;
3608}
3609
3610/// If we have a conditional branch as a predecessor of another block,
3611/// this function tries to simplify it. We know
3612/// that PBI and BI are both conditional branches, and BI is in one of the
3613/// successor blocks of PBI - PBI branches to BI.
3614static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI,
3615 DomTreeUpdater *DTU,
3616 const DataLayout &DL,
3617 const TargetTransformInfo &TTI) {
3618 assert(PBI->isConditional() && BI->isConditional())((void)0);
3619 BasicBlock *BB = BI->getParent();
3620
3621 // If this block ends with a branch instruction, and if there is a
3622 // predecessor that ends on a branch of the same condition, make
3623 // this conditional branch redundant.
3624 if (PBI->getCondition() == BI->getCondition() &&
3625 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
3626 // Okay, the outcome of this conditional branch is statically
3627 // knowable. If this block had a single pred, handle specially.
3628 if (BB->getSinglePredecessor()) {
3629 // Turn this into a branch on constant.
3630 bool CondIsTrue = PBI->getSuccessor(0) == BB;
3631 BI->setCondition(
3632 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue));
3633 return true; // Nuke the branch on constant.
3634 }
3635
3636 // Otherwise, if there are multiple predecessors, insert a PHI that merges
3637 // in the constant and simplify the block result. Subsequent passes of
3638 // simplifycfg will thread the block.
3639 if (BlockIsSimpleEnoughToThreadThrough(BB)) {
3640 pred_iterator PB = pred_begin(BB), PE = pred_end(BB);
3641 PHINode *NewPN = PHINode::Create(
3642 Type::getInt1Ty(BB->getContext()), std::distance(PB, PE),
3643 BI->getCondition()->getName() + ".pr", &BB->front());
3644 // Okay, we're going to insert the PHI node. Since PBI is not the only
3645 // predecessor, compute the PHI'd conditional value for all of the preds.
3646 // Any predecessor where the condition is not computable we keep symbolic.
3647 for (pred_iterator PI = PB; PI != PE; ++PI) {
3648 BasicBlock *P = *PI;
3649 if ((PBI = dyn_cast<BranchInst>(P->getTerminator())) && PBI != BI &&
3650 PBI->isConditional() && PBI->getCondition() == BI->getCondition() &&
3651 PBI->getSuccessor(0) != PBI->getSuccessor(1)) {
3652 bool CondIsTrue = PBI->getSuccessor(0) == BB;
3653 NewPN->addIncoming(
3654 ConstantInt::get(Type::getInt1Ty(BB->getContext()), CondIsTrue),
3655 P);
3656 } else {
3657 NewPN->addIncoming(BI->getCondition(), P);
3658 }
3659 }
3660
3661 BI->setCondition(NewPN);
3662 return true;
3663 }
3664 }
3665
3666 // If the previous block ended with a widenable branch, determine if reusing
3667 // the target block is profitable and legal. This will have the effect of
3668 // "widening" PBI, but doesn't require us to reason about hosting safety.
3669 if (tryWidenCondBranchToCondBranch(PBI, BI, DTU))
3670 return true;
3671
3672 if (auto *CE = dyn_cast<ConstantExpr>(BI->getCondition()))
3673 if (CE->canTrap())
3674 return false;
3675
3676 // If both branches are conditional and both contain stores to the same
3677 // address, remove the stores from the conditionals and create a conditional
3678 // merged store at the end.
3679 if (MergeCondStores && mergeConditionalStores(PBI, BI, DTU, DL, TTI))
3680 return true;
3681
3682 // If this is a conditional branch in an empty block, and if any
3683 // predecessors are a conditional branch to one of our destinations,
3684 // fold the conditions into logical ops and one cond br.
3685
3686 // Ignore dbg intrinsics.
3687 if (&*BB->instructionsWithoutDebug().begin() != BI)
3688 return false;
3689
3690 int PBIOp, BIOp;
3691 if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
3692 PBIOp = 0;
3693 BIOp = 0;
3694 } else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
3695 PBIOp = 0;
3696 BIOp = 1;
3697 } else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
3698 PBIOp = 1;
3699 BIOp = 0;
3700 } else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
3701 PBIOp = 1;
3702 BIOp = 1;
3703 } else {
3704 return false;
3705 }
3706
3707 // Check to make sure that the other destination of this branch
3708 // isn't BB itself. If so, this is an infinite loop that will
3709 // keep getting unwound.
3710 if (PBI->getSuccessor(PBIOp) == BB)
3711 return false;
3712
3713 // Do not perform this transformation if it would require
3714 // insertion of a large number of select instructions. For targets
3715 // without predication/cmovs, this is a big pessimization.
3716
3717 // Also do not perform this transformation if any phi node in the common
3718 // destination block can trap when reached by BB or PBB (PR17073). In that
3719 // case, it would be unsafe to hoist the operation into a select instruction.
3720
3721 BasicBlock *CommonDest = PBI->getSuccessor(PBIOp);
3722 BasicBlock *RemovedDest = PBI->getSuccessor(PBIOp ^ 1);
3723 unsigned NumPhis = 0;
3724 for (BasicBlock::iterator II = CommonDest->begin(); isa<PHINode>(II);
3725 ++II, ++NumPhis) {
3726 if (NumPhis > 2) // Disable this xform.
3727 return false;
3728
3729 PHINode *PN = cast<PHINode>(II);
3730 Value *BIV = PN->getIncomingValueForBlock(BB);
3731 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(BIV))
3732 if (CE->canTrap())
3733 return false;
3734
3735 unsigned PBBIdx = PN->getBasicBlockIndex(PBI->getParent());
3736 Value *PBIV = PN->getIncomingValue(PBBIdx);
3737 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(PBIV))
3738 if (CE->canTrap())
3739 return false;
3740 }
3741
3742 // Finally, if everything is ok, fold the branches to logical ops.
3743 BasicBlock *OtherDest = BI->getSuccessor(BIOp ^ 1);
3744
3745 LLVM_DEBUG(dbgs() << "FOLDING BRs:" << *PBI->getParent()do { } while (false)
3746 << "AND: " << *BI->getParent())do { } while (false);
3747
3748 SmallVector<DominatorTree::UpdateType, 5> Updates;
3749
3750 // If OtherDest *is* BB, then BB is a basic block with a single conditional
3751 // branch in it, where one edge (OtherDest) goes back to itself but the other
3752 // exits. We don't *know* that the program avoids the infinite loop
3753 // (even though that seems likely). If we do this xform naively, we'll end up
3754 // recursively unpeeling the loop. Since we know that (after the xform is
3755 // done) that the block *is* infinite if reached, we just make it an obviously
3756 // infinite loop with no cond branch.
3757 if (OtherDest == BB) {
3758 // Insert it at the end of the function, because it's either code,
3759 // or it won't matter if it's hot. :)
3760 BasicBlock *InfLoopBlock =
3761 BasicBlock::Create(BB->getContext(), "infloop", BB->getParent());
3762 BranchInst::Create(InfLoopBlock, InfLoopBlock);
3763 if (DTU)
3764 Updates.push_back({DominatorTree::Insert, InfLoopBlock, InfLoopBlock});
3765 OtherDest = InfLoopBlock;
3766 }
3767
3768 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent())do { } while (false);
3769
3770 // BI may have other predecessors. Because of this, we leave
3771 // it alone, but modify PBI.
3772
3773 // Make sure we get to CommonDest on True&True directions.
3774 Value *PBICond = PBI->getCondition();
3775 IRBuilder<NoFolder> Builder(PBI);
3776 if (PBIOp)
3777 PBICond = Builder.CreateNot(PBICond, PBICond->getName() + ".not");
3778
3779 Value *BICond = BI->getCondition();
3780 if (BIOp)
3781 BICond = Builder.CreateNot(BICond, BICond->getName() + ".not");
3782
3783 // Merge the conditions.
3784 Value *Cond =
3785 createLogicalOp(Builder, Instruction::Or, PBICond, BICond, "brmerge");
3786
3787 // Modify PBI to branch on the new condition to the new dests.
3788 PBI->setCondition(Cond);
3789 PBI->setSuccessor(0, CommonDest);
3790 PBI->setSuccessor(1, OtherDest);
3791
3792 if (DTU) {
3793 Updates.push_back({DominatorTree::Insert, PBI->getParent(), OtherDest});
3794 Updates.push_back({DominatorTree::Delete, PBI->getParent(), RemovedDest});
3795
3796 DTU->applyUpdates(Updates);
3797 }
3798
3799 // Update branch weight for PBI.
3800 uint64_t PredTrueWeight, PredFalseWeight, SuccTrueWeight, SuccFalseWeight;
3801 uint64_t PredCommon, PredOther, SuccCommon, SuccOther;
3802 bool HasWeights =
3803 extractPredSuccWeights(PBI, BI, PredTrueWeight, PredFalseWeight,
3804 SuccTrueWeight, SuccFalseWeight);
3805 if (HasWeights) {
3806 PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
3807 PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
3808 SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
3809 SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
3810 // The weight to CommonDest should be PredCommon * SuccTotal +
3811 // PredOther * SuccCommon.
3812 // The weight to OtherDest should be PredOther * SuccOther.
3813 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther) +
3814 PredOther * SuccCommon,
3815 PredOther * SuccOther};
3816 // Halve the weights if any of them cannot fit in an uint32_t
3817 FitWeights(NewWeights);
3818
3819 setBranchWeights(PBI, NewWeights[0], NewWeights[1]);
3820 }
3821
3822 // OtherDest may have phi nodes. If so, add an entry from PBI's
3823 // block that are identical to the entries for BI's block.
3824 AddPredecessorToBlock(OtherDest, PBI->getParent(), BB);
3825
3826 // We know that the CommonDest already had an edge from PBI to
3827 // it. If it has PHIs though, the PHIs may have different
3828 // entries for BB and PBI's BB. If so, insert a select to make
3829 // them agree.
3830 for (PHINode &PN : CommonDest->phis()) {
3831 Value *BIV = PN.getIncomingValueForBlock(BB);
3832 unsigned PBBIdx = PN.getBasicBlockIndex(PBI->getParent());
3833 Value *PBIV = PN.getIncomingValue(PBBIdx);
3834 if (BIV != PBIV) {
3835 // Insert a select in PBI to pick the right value.
3836 SelectInst *NV = cast<SelectInst>(
3837 Builder.CreateSelect(PBICond, PBIV, BIV, PBIV->getName() + ".mux"));
3838 PN.setIncomingValue(PBBIdx, NV);
3839 // Although the select has the same condition as PBI, the original branch
3840 // weights for PBI do not apply to the new select because the select's
3841 // 'logical' edges are incoming edges of the phi that is eliminated, not
3842 // the outgoing edges of PBI.
3843 if (HasWeights) {
3844 uint64_t PredCommon = PBIOp ? PredFalseWeight : PredTrueWeight;
3845 uint64_t PredOther = PBIOp ? PredTrueWeight : PredFalseWeight;
3846 uint64_t SuccCommon = BIOp ? SuccFalseWeight : SuccTrueWeight;
3847 uint64_t SuccOther = BIOp ? SuccTrueWeight : SuccFalseWeight;
3848 // The weight to PredCommonDest should be PredCommon * SuccTotal.
3849 // The weight to PredOtherDest should be PredOther * SuccCommon.
3850 uint64_t NewWeights[2] = {PredCommon * (SuccCommon + SuccOther),
3851 PredOther * SuccCommon};
3852
3853 FitWeights(NewWeights);
3854
3855 setBranchWeights(NV, NewWeights[0], NewWeights[1]);
3856 }
3857 }
3858 }
3859
3860 LLVM_DEBUG(dbgs() << "INTO: " << *PBI->getParent())do { } while (false);
3861 LLVM_DEBUG(dbgs() << *PBI->getParent()->getParent())do { } while (false);
3862
3863 // This basic block is probably dead. We know it has at least
3864 // one fewer predecessor.
3865 return true;
3866}
3867
3868// Simplifies a terminator by replacing it with a branch to TrueBB if Cond is
3869// true or to FalseBB if Cond is false.
3870// Takes care of updating the successors and removing the old terminator.
3871// Also makes sure not to introduce new successors by assuming that edges to
3872// non-successor TrueBBs and FalseBBs aren't reachable.
3873bool SimplifyCFGOpt::SimplifyTerminatorOnSelect(Instruction *OldTerm,
3874 Value *Cond, BasicBlock *TrueBB,
3875 BasicBlock *FalseBB,
3876 uint32_t TrueWeight,
3877 uint32_t FalseWeight) {
3878 auto *BB = OldTerm->getParent();
3879 // Remove any superfluous successor edges from the CFG.
3880 // First, figure out which successors to preserve.
3881 // If TrueBB and FalseBB are equal, only try to preserve one copy of that
3882 // successor.
3883 BasicBlock *KeepEdge1 = TrueBB;
3884 BasicBlock *KeepEdge2 = TrueBB != FalseBB ? FalseBB : nullptr;
3885
3886 SmallPtrSet<BasicBlock *, 2> RemovedSuccessors;
3887
3888 // Then remove the rest.
3889 for (BasicBlock *Succ : successors(OldTerm)) {
3890 // Make sure only to keep exactly one copy of each edge.
3891 if (Succ == KeepEdge1)
3892 KeepEdge1 = nullptr;
3893 else if (Succ == KeepEdge2)
3894 KeepEdge2 = nullptr;
3895 else {
3896 Succ->removePredecessor(BB,
3897 /*KeepOneInputPHIs=*/true);
3898
3899 if (Succ != TrueBB && Succ != FalseBB)
3900 RemovedSuccessors.insert(Succ);
3901 }
3902 }
3903
3904 IRBuilder<> Builder(OldTerm);
3905 Builder.SetCurrentDebugLocation(OldTerm->getDebugLoc());
3906
3907 // Insert an appropriate new terminator.
3908 if (!KeepEdge1 && !KeepEdge2) {
3909 if (TrueBB == FalseBB) {
3910 // We were only looking for one successor, and it was present.
3911 // Create an unconditional branch to it.
3912 Builder.CreateBr(TrueBB);
3913 } else {
3914 // We found both of the successors we were looking for.
3915 // Create a conditional branch sharing the condition of the select.
3916 BranchInst *NewBI = Builder.CreateCondBr(Cond, TrueBB, FalseBB);
3917 if (TrueWeight != FalseWeight)
3918 setBranchWeights(NewBI, TrueWeight, FalseWeight);
3919 }
3920 } else if (KeepEdge1 && (KeepEdge2 || TrueBB == FalseBB)) {
3921 // Neither of the selected blocks were successors, so this
3922 // terminator must be unreachable.
3923 new UnreachableInst(OldTerm->getContext(), OldTerm);
3924 } else {
3925 // One of the selected values was a successor, but the other wasn't.
3926 // Insert an unconditional branch to the one that was found;
3927 // the edge to the one that wasn't must be unreachable.
3928 if (!KeepEdge1) {
3929 // Only TrueBB was found.
3930 Builder.CreateBr(TrueBB);
3931 } else {
3932 // Only FalseBB was found.
3933 Builder.CreateBr(FalseBB);
3934 }
3935 }
3936
3937 EraseTerminatorAndDCECond(OldTerm);
3938
3939 if (DTU) {
3940 SmallVector<DominatorTree::UpdateType, 2> Updates;
3941 Updates.reserve(RemovedSuccessors.size());
3942 for (auto *RemovedSuccessor : RemovedSuccessors)
3943 Updates.push_back({DominatorTree::Delete, BB, RemovedSuccessor});
3944 DTU->applyUpdates(Updates);
3945 }
3946
3947 return true;
3948}
3949
3950// Replaces
3951// (switch (select cond, X, Y)) on constant X, Y
3952// with a branch - conditional if X and Y lead to distinct BBs,
3953// unconditional otherwise.
3954bool SimplifyCFGOpt::SimplifySwitchOnSelect(SwitchInst *SI,
3955 SelectInst *Select) {
3956 // Check for constant integer values in the select.
3957 ConstantInt *TrueVal = dyn_cast<ConstantInt>(Select->getTrueValue());
3958 ConstantInt *FalseVal = dyn_cast<ConstantInt>(Select->getFalseValue());
3959 if (!TrueVal || !FalseVal)
3960 return false;
3961
3962 // Find the relevant condition and destinations.
3963 Value *Condition = Select->getCondition();
3964 BasicBlock *TrueBB = SI->findCaseValue(TrueVal)->getCaseSuccessor();
3965 BasicBlock *FalseBB = SI->findCaseValue(FalseVal)->getCaseSuccessor();
3966
3967 // Get weight for TrueBB and FalseBB.
3968 uint32_t TrueWeight = 0, FalseWeight = 0;
3969 SmallVector<uint64_t, 8> Weights;
3970 bool HasWeights = HasBranchWeights(SI);
3971 if (HasWeights) {
3972 GetBranchWeights(SI, Weights);
3973 if (Weights.size() == 1 + SI->getNumCases()) {
3974 TrueWeight =
3975 (uint32_t)Weights[SI->findCaseValue(TrueVal)->getSuccessorIndex()];
3976 FalseWeight =
3977 (uint32_t)Weights[SI->findCaseValue(FalseVal)->getSuccessorIndex()];
3978 }
3979 }
3980
3981 // Perform the actual simplification.
3982 return SimplifyTerminatorOnSelect(SI, Condition, TrueBB, FalseBB, TrueWeight,
3983 FalseWeight);
3984}
3985
3986// Replaces
3987// (indirectbr (select cond, blockaddress(@fn, BlockA),
3988// blockaddress(@fn, BlockB)))
3989// with
3990// (br cond, BlockA, BlockB).
3991bool SimplifyCFGOpt::SimplifyIndirectBrOnSelect(IndirectBrInst *IBI,
3992 SelectInst *SI) {
3993 // Check that both operands of the select are block addresses.
3994 BlockAddress *TBA = dyn_cast<BlockAddress>(SI->getTrueValue());
3995 BlockAddress *FBA = dyn_cast<BlockAddress>(SI->getFalseValue());
3996 if (!TBA || !FBA)
3997 return false;
3998
3999 // Extract the actual blocks.
4000 BasicBlock *TrueBB = TBA->getBasicBlock();
4001 BasicBlock *FalseBB = FBA->getBasicBlock();
4002
4003 // Perform the actual simplification.
4004 return SimplifyTerminatorOnSelect(IBI, SI->getCondition(), TrueBB, FalseBB, 0,
4005 0);
4006}
4007
4008/// This is called when we find an icmp instruction
4009/// (a seteq/setne with a constant) as the only instruction in a
4010/// block that ends with an uncond branch. We are looking for a very specific
4011/// pattern that occurs when "A == 1 || A == 2 || A == 3" gets simplified. In
4012/// this case, we merge the first two "or's of icmp" into a switch, but then the
4013/// default value goes to an uncond block with a seteq in it, we get something
4014/// like:
4015///
4016/// switch i8 %A, label %DEFAULT [ i8 1, label %end i8 2, label %end ]
4017/// DEFAULT:
4018/// %tmp = icmp eq i8 %A, 92
4019/// br label %end
4020/// end:
4021/// ... = phi i1 [ true, %entry ], [ %tmp, %DEFAULT ], [ true, %entry ]
4022///
4023/// We prefer to split the edge to 'end' so that there is a true/false entry to
4024/// the PHI, merging the third icmp into the switch.
4025bool SimplifyCFGOpt::tryToSimplifyUncondBranchWithICmpInIt(
4026 ICmpInst *ICI, IRBuilder<> &Builder) {
4027 BasicBlock *BB = ICI->getParent();
4028
4029 // If the block has any PHIs in it or the icmp has multiple uses, it is too
4030 // complex.
4031 if (isa<PHINode>(BB->begin()) || !ICI->hasOneUse())
4032 return false;
4033
4034 Value *V = ICI->getOperand(0);
4035 ConstantInt *Cst = cast<ConstantInt>(ICI->getOperand(1));
4036
4037 // The pattern we're looking for is where our only predecessor is a switch on
4038 // 'V' and this block is the default case for the switch. In this case we can
4039 // fold the compared value into the switch to simplify things.
4040 BasicBlock *Pred = BB->getSinglePredecessor();
4041 if (!Pred || !isa<SwitchInst>(Pred->getTerminator()))
4042 return false;
4043
4044 SwitchInst *SI = cast<SwitchInst>(Pred->getTerminator());
4045 if (SI->getCondition() != V)
4046 return false;
4047
4048 // If BB is reachable on a non-default case, then we simply know the value of
4049 // V in this block. Substitute it and constant fold the icmp instruction
4050 // away.
4051 if (SI->getDefaultDest() != BB) {
4052 ConstantInt *VVal = SI->findCaseDest(BB);
4053 assert(VVal && "Should have a unique destination value")((void)0);
4054 ICI->setOperand(0, VVal);
4055
4056 if (Value *V = SimplifyInstruction(ICI, {DL, ICI})) {
4057 ICI->replaceAllUsesWith(V);
4058 ICI->eraseFromParent();
4059 }
4060 // BB is now empty, so it is likely to simplify away.
4061 return requestResimplify();
4062 }
4063
4064 // Ok, the block is reachable from the default dest. If the constant we're
4065 // comparing exists in one of the other edges, then we can constant fold ICI
4066 // and zap it.
4067 if (SI->findCaseValue(Cst) != SI->case_default()) {
4068 Value *V;
4069 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4070 V = ConstantInt::getFalse(BB->getContext());
4071 else
4072 V = ConstantInt::getTrue(BB->getContext());
4073
4074 ICI->replaceAllUsesWith(V);
4075 ICI->eraseFromParent();
4076 // BB is now empty, so it is likely to simplify away.
4077 return requestResimplify();
4078 }
4079
4080 // The use of the icmp has to be in the 'end' block, by the only PHI node in
4081 // the block.
4082 BasicBlock *SuccBlock = BB->getTerminator()->getSuccessor(0);
4083 PHINode *PHIUse = dyn_cast<PHINode>(ICI->user_back());
4084 if (PHIUse == nullptr || PHIUse != &SuccBlock->front() ||
4085 isa<PHINode>(++BasicBlock::iterator(PHIUse)))
4086 return false;
4087
4088 // If the icmp is a SETEQ, then the default dest gets false, the new edge gets
4089 // true in the PHI.
4090 Constant *DefaultCst = ConstantInt::getTrue(BB->getContext());
4091 Constant *NewCst = ConstantInt::getFalse(BB->getContext());
4092
4093 if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
4094 std::swap(DefaultCst, NewCst);
4095
4096 // Replace ICI (which is used by the PHI for the default value) with true or
4097 // false depending on if it is EQ or NE.
4098 ICI->replaceAllUsesWith(DefaultCst);
4099 ICI->eraseFromParent();
4100
4101 SmallVector<DominatorTree::UpdateType, 2> Updates;
4102
4103 // Okay, the switch goes to this block on a default value. Add an edge from
4104 // the switch to the merge point on the compared value.
4105 BasicBlock *NewBB =
4106 BasicBlock::Create(BB->getContext(), "switch.edge", BB->getParent(), BB);
4107 {
4108 SwitchInstProfUpdateWrapper SIW(*SI);
4109 auto W0 = SIW.getSuccessorWeight(0);
4110 SwitchInstProfUpdateWrapper::CaseWeightOpt NewW;
4111 if (W0) {
4112 NewW = ((uint64_t(*W0) + 1) >> 1);
4113 SIW.setSuccessorWeight(0, *NewW);
4114 }
4115 SIW.addCase(Cst, NewBB, NewW);
4116 if (DTU)
4117 Updates.push_back({DominatorTree::Insert, Pred, NewBB});
4118 }
4119
4120 // NewBB branches to the phi block, add the uncond branch and the phi entry.
4121 Builder.SetInsertPoint(NewBB);
4122 Builder.SetCurrentDebugLocation(SI->getDebugLoc());
4123 Builder.CreateBr(SuccBlock);
4124 PHIUse->addIncoming(NewCst, NewBB);
4125 if (DTU) {
4126 Updates.push_back({DominatorTree::Insert, NewBB, SuccBlock});
4127 DTU->applyUpdates(Updates);
4128 }
4129 return true;
4130}
4131
4132/// The specified branch is a conditional branch.
4133/// Check to see if it is branching on an or/and chain of icmp instructions, and
4134/// fold it into a switch instruction if so.
4135bool SimplifyCFGOpt::SimplifyBranchOnICmpChain(BranchInst *BI,
4136 IRBuilder<> &Builder,
4137 const DataLayout &DL) {
4138 Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
4139 if (!Cond)
4140 return false;
4141
4142 // Change br (X == 0 | X == 1), T, F into a switch instruction.
4143 // If this is a bunch of seteq's or'd together, or if it's a bunch of
4144 // 'setne's and'ed together, collect them.
4145
4146 // Try to gather values from a chain of and/or to be turned into a switch
4147 ConstantComparesGatherer ConstantCompare(Cond, DL);
4148 // Unpack the result
4149 SmallVectorImpl<ConstantInt *> &Values = ConstantCompare.Vals;
4150 Value *CompVal = ConstantCompare.CompValue;
4151 unsigned UsedICmps = ConstantCompare.UsedICmps;
4152 Value *ExtraCase = ConstantCompare.Extra;
4153
4154 // If we didn't have a multiply compared value, fail.
4155 if (!CompVal)
4156 return false;
4157
4158 // Avoid turning single icmps into a switch.
4159 if (UsedICmps <= 1)
4160 return false;
4161
4162 bool TrueWhenEqual = match(Cond, m_LogicalOr(m_Value(), m_Value()));
4163
4164 // There might be duplicate constants in the list, which the switch
4165 // instruction can't handle, remove them now.
4166 array_pod_sort(Values.begin(), Values.end(), ConstantIntSortPredicate);
4167 Values.erase(std::unique(Values.begin(), Values.end()), Values.end());
4168
4169 // If Extra was used, we require at least two switch values to do the
4170 // transformation. A switch with one value is just a conditional branch.
4171 if (ExtraCase && Values.size() < 2)
4172 return false;
4173
4174 // TODO: Preserve branch weight metadata, similarly to how
4175 // FoldValueComparisonIntoPredecessors preserves it.
4176
4177 // Figure out which block is which destination.
4178 BasicBlock *DefaultBB = BI->getSuccessor(1);
4179 BasicBlock *EdgeBB = BI->getSuccessor(0);
4180 if (!TrueWhenEqual)
4181 std::swap(DefaultBB, EdgeBB);
4182
4183 BasicBlock *BB = BI->getParent();
4184
4185 LLVM_DEBUG(dbgs() << "Converting 'icmp' chain with " << Values.size()do { } while (false)
4186 << " cases into SWITCH. BB is:\n"do { } while (false)
4187 << *BB)do { } while (false);
4188
4189 SmallVector<DominatorTree::UpdateType, 2> Updates;
4190
4191 // If there are any extra values that couldn't be folded into the switch
4192 // then we evaluate them with an explicit branch first. Split the block
4193 // right before the condbr to handle it.
4194 if (ExtraCase) {
4195 BasicBlock *NewBB = SplitBlock(BB, BI, DTU, /*LI=*/nullptr,
4196 /*MSSAU=*/nullptr, "switch.early.test");
4197
4198 // Remove the uncond branch added to the old block.
4199 Instruction *OldTI = BB->getTerminator();
4200 Builder.SetInsertPoint(OldTI);
4201
4202 // There can be an unintended UB if extra values are Poison. Before the
4203 // transformation, extra values may not be evaluated according to the
4204 // condition, and it will not raise UB. But after transformation, we are
4205 // evaluating extra values before checking the condition, and it will raise
4206 // UB. It can be solved by adding freeze instruction to extra values.
4207 AssumptionCache *AC = Options.AC;
4208
4209 if (!isGuaranteedNotToBeUndefOrPoison(ExtraCase, AC, BI, nullptr))
4210 ExtraCase = Builder.CreateFreeze(ExtraCase);
4211
4212 if (TrueWhenEqual)
4213 Builder.CreateCondBr(ExtraCase, EdgeBB, NewBB);
4214 else
4215 Builder.CreateCondBr(ExtraCase, NewBB, EdgeBB);
4216
4217 OldTI->eraseFromParent();
4218
4219 if (DTU)
4220 Updates.push_back({DominatorTree::Insert, BB, EdgeBB});
4221
4222 // If there are PHI nodes in EdgeBB, then we need to add a new entry to them
4223 // for the edge we just added.
4224 AddPredecessorToBlock(EdgeBB, BB, NewBB);
4225
4226 LLVM_DEBUG(dbgs() << " ** 'icmp' chain unhandled condition: " << *ExtraCasedo { } while (false)
4227 << "\nEXTRABB = " << *BB)do { } while (false);
4228 BB = NewBB;
4229 }
4230
4231 Builder.SetInsertPoint(BI);
4232 // Convert pointer to int before we switch.
4233 if (CompVal->getType()->isPointerTy()) {
4234 CompVal = Builder.CreatePtrToInt(
4235 CompVal, DL.getIntPtrType(CompVal->getType()), "magicptr");
4236 }
4237
4238 // Create the new switch instruction now.
4239 SwitchInst *New = Builder.CreateSwitch(CompVal, DefaultBB, Values.size());
4240
4241 // Add all of the 'cases' to the switch instruction.
4242 for (unsigned i = 0, e = Values.size(); i != e; ++i)
4243 New->addCase(Values[i], EdgeBB);
4244
4245 // We added edges from PI to the EdgeBB. As such, if there were any
4246 // PHI nodes in EdgeBB, they need entries to be added corresponding to
4247 // the number of edges added.
4248 for (BasicBlock::iterator BBI = EdgeBB->begin(); isa<PHINode>(BBI); ++BBI) {
4249 PHINode *PN = cast<PHINode>(BBI);
4250 Value *InVal = PN->getIncomingValueForBlock(BB);
4251 for (unsigned i = 0, e = Values.size() - 1; i != e; ++i)
4252 PN->addIncoming(InVal, BB);
4253 }
4254
4255 // Erase the old branch instruction.
4256 EraseTerminatorAndDCECond(BI);
4257 if (DTU)
4258 DTU->applyUpdates(Updates);
4259
4260 LLVM_DEBUG(dbgs() << " ** 'icmp' chain result is:\n" << *BB << '\n')do { } while (false);
4261 return true;
4262}
4263
4264bool SimplifyCFGOpt::simplifyResume(ResumeInst *RI, IRBuilder<> &Builder) {
4265 if (isa<PHINode>(RI->getValue()))
4266 return simplifyCommonResume(RI);
4267 else if (isa<LandingPadInst>(RI->getParent()->getFirstNonPHI()) &&
4268 RI->getValue() == RI->getParent()->getFirstNonPHI())
4269 // The resume must unwind the exception that caused control to branch here.
4270 return simplifySingleResume(RI);
4271
4272 return false;
4273}
4274
4275// Check if cleanup block is empty
4276static bool isCleanupBlockEmpty(iterator_range<BasicBlock::iterator> R) {
4277 for (Instruction &I : R) {
4278 auto *II = dyn_cast<IntrinsicInst>(&I);
4279 if (!II)
4280 return false;
4281
4282 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
4283 switch (IntrinsicID) {
4284 case Intrinsic::dbg_declare:
4285 case Intrinsic::dbg_value:
4286 case Intrinsic::dbg_label:
4287 case Intrinsic::lifetime_end:
4288 break;
4289 default:
4290 return false;
4291 }
4292 }
4293 return true;
4294}
4295
4296// Simplify resume that is shared by several landing pads (phi of landing pad).
4297bool SimplifyCFGOpt::simplifyCommonResume(ResumeInst *RI) {
4298 BasicBlock *BB = RI->getParent();
4299
4300 // Check that there are no other instructions except for debug and lifetime
4301 // intrinsics between the phi's and resume instruction.
4302 if (!isCleanupBlockEmpty(
4303 make_range(RI->getParent()->getFirstNonPHI(), BB->getTerminator())))
4304 return false;
4305
4306 SmallSetVector<BasicBlock *, 4> TrivialUnwindBlocks;
4307 auto *PhiLPInst = cast<PHINode>(RI->getValue());
4308
4309 // Check incoming blocks to see if any of them are trivial.
4310 for (unsigned Idx = 0, End = PhiLPInst->getNumIncomingValues(); Idx != End;
4311 Idx++) {
4312 auto *IncomingBB = PhiLPInst->getIncomingBlock(Idx);
4313 auto *IncomingValue = PhiLPInst->getIncomingValue(Idx);
4314
4315 // If the block has other successors, we can not delete it because
4316 // it has other dependents.
4317 if (IncomingBB->getUniqueSuccessor() != BB)
4318 continue;
4319
4320 auto *LandingPad = dyn_cast<LandingPadInst>(IncomingBB->getFirstNonPHI());
4321 // Not the landing pad that caused the control to branch here.
4322 if (IncomingValue != LandingPad)
4323 continue;
4324
4325 if (isCleanupBlockEmpty(
4326 make_range(LandingPad->getNextNode(), IncomingBB->getTerminator())))
4327 TrivialUnwindBlocks.insert(IncomingBB);
4328 }
4329
4330 // If no trivial unwind blocks, don't do any simplifications.
4331 if (TrivialUnwindBlocks.empty())
4332 return false;
4333
4334 // Turn all invokes that unwind here into calls.
4335 for (auto *TrivialBB : TrivialUnwindBlocks) {
4336 // Blocks that will be simplified should be removed from the phi node.
4337 // Note there could be multiple edges to the resume block, and we need
4338 // to remove them all.
4339 while (PhiLPInst->getBasicBlockIndex(TrivialBB) != -1)
4340 BB->removePredecessor(TrivialBB, true);
4341
4342 for (BasicBlock *Pred :
4343 llvm::make_early_inc_range(predecessors(TrivialBB))) {
4344 removeUnwindEdge(Pred, DTU);
4345 ++NumInvokes;
4346 }
4347
4348 // In each SimplifyCFG run, only the current processed block can be erased.
4349 // Otherwise, it will break the iteration of SimplifyCFG pass. So instead
4350 // of erasing TrivialBB, we only remove the branch to the common resume
4351 // block so that we can later erase the resume block since it has no
4352 // predecessors.
4353 TrivialBB->getTerminator()->eraseFromParent();
4354 new UnreachableInst(RI->getContext(), TrivialBB);
4355 if (DTU)
4356 DTU->applyUpdates({{DominatorTree::Delete, TrivialBB, BB}});
4357 }
4358
4359 // Delete the resume block if all its predecessors have been removed.
4360 if (pred_empty(BB))
4361 DeleteDeadBlock(BB, DTU);
4362
4363 return !TrivialUnwindBlocks.empty();
4364}
4365
4366// Simplify resume that is only used by a single (non-phi) landing pad.
4367bool SimplifyCFGOpt::simplifySingleResume(ResumeInst *RI) {
4368 BasicBlock *BB = RI->getParent();
4369 auto *LPInst = cast<LandingPadInst>(BB->getFirstNonPHI());
4370 assert(RI->getValue() == LPInst &&((void)0)
4371 "Resume must unwind the exception that caused control to here")((void)0);
4372
4373 // Check that there are no other instructions except for debug intrinsics.
4374 if (!isCleanupBlockEmpty(
4375 make_range<Instruction *>(LPInst->getNextNode(), RI)))
4376 return false;
4377
4378 // Turn all invokes that unwind here into calls and delete the basic block.
4379 for (BasicBlock *Pred : llvm::make_early_inc_range(predecessors(BB))) {
4380 removeUnwindEdge(Pred, DTU);
4381 ++NumInvokes;
4382 }
4383
4384 // The landingpad is now unreachable. Zap it.
4385 DeleteDeadBlock(BB, DTU);
4386 return true;
4387}
4388
4389static bool removeEmptyCleanup(CleanupReturnInst *RI, DomTreeUpdater *DTU) {
4390 // If this is a trivial cleanup pad that executes no instructions, it can be
4391 // eliminated. If the cleanup pad continues to the caller, any predecessor
4392 // that is an EH pad will be updated to continue to the caller and any
4393 // predecessor that terminates with an invoke instruction will have its invoke
4394 // instruction converted to a call instruction. If the cleanup pad being
4395 // simplified does not continue to the caller, each predecessor will be
4396 // updated to continue to the unwind destination of the cleanup pad being
4397 // simplified.
4398 BasicBlock *BB = RI->getParent();
4399 CleanupPadInst *CPInst = RI->getCleanupPad();
4400 if (CPInst->getParent() != BB)
4401 // This isn't an empty cleanup.
4402 return false;
4403
4404 // We cannot kill the pad if it has multiple uses. This typically arises
4405 // from unreachable basic blocks.
4406 if (!CPInst->hasOneUse())
4407 return false;
4408
4409 // Check that there are no other instructions except for benign intrinsics.
4410 if (!isCleanupBlockEmpty(
4411 make_range<Instruction *>(CPInst->getNextNode(), RI)))
4412 return false;
4413
4414 // If the cleanup return we are simplifying unwinds to the caller, this will
4415 // set UnwindDest to nullptr.
4416 BasicBlock *UnwindDest = RI->getUnwindDest();
4417 Instruction *DestEHPad = UnwindDest ? UnwindDest->getFirstNonPHI() : nullptr;
4418
4419 // We're about to remove BB from the control flow. Before we do, sink any
4420 // PHINodes into the unwind destination. Doing this before changing the
4421 // control flow avoids some potentially slow checks, since we can currently
4422 // be certain that UnwindDest and BB have no common predecessors (since they
4423 // are both EH pads).
4424 if (UnwindDest) {
4425 // First, go through the PHI nodes in UnwindDest and update any nodes that
4426 // reference the block we are removing
4427 for (PHINode &DestPN : UnwindDest->phis()) {
4428 int Idx = DestPN.getBasicBlockIndex(BB);
4429 // Since BB unwinds to UnwindDest, it has to be in the PHI node.
4430 assert(Idx != -1)((void)0);
4431 // This PHI node has an incoming value that corresponds to a control
4432 // path through the cleanup pad we are removing. If the incoming
4433 // value is in the cleanup pad, it must be a PHINode (because we
4434 // verified above that the block is otherwise empty). Otherwise, the
4435 // value is either a constant or a value that dominates the cleanup
4436 // pad being removed.
4437 //
4438 // Because BB and UnwindDest are both EH pads, all of their
4439 // predecessors must unwind to these blocks, and since no instruction
4440 // can have multiple unwind destinations, there will be no overlap in
4441 // incoming blocks between SrcPN and DestPN.
4442 Value *SrcVal = DestPN.getIncomingValue(Idx);
4443 PHINode *SrcPN = dyn_cast<PHINode>(SrcVal);
4444
4445 bool NeedPHITranslation = SrcPN && SrcPN->getParent() == BB;
4446 for (auto *Pred : predecessors(BB)) {
4447 Value *Incoming =
4448 NeedPHITranslation ? SrcPN->getIncomingValueForBlock(Pred) : SrcVal;
4449 DestPN.addIncoming(Incoming, Pred);
4450 }
4451 }
4452
4453 // Sink any remaining PHI nodes directly into UnwindDest.
4454 Instruction *InsertPt = DestEHPad;
4455 for (PHINode &PN : make_early_inc_range(BB->phis())) {
4456 if (PN.use_empty() || !PN.isUsedOutsideOfBlock(BB))
4457 // If the PHI node has no uses or all of its uses are in this basic
4458 // block (meaning they are debug or lifetime intrinsics), just leave
4459 // it. It will be erased when we erase BB below.
4460 continue;
4461
4462 // Otherwise, sink this PHI node into UnwindDest.
4463 // Any predecessors to UnwindDest which are not already represented
4464 // must be back edges which inherit the value from the path through
4465 // BB. In this case, the PHI value must reference itself.
4466 for (auto *pred : predecessors(UnwindDest))
4467 if (pred != BB)
4468 PN.addIncoming(&PN, pred);
4469 PN.moveBefore(InsertPt);
4470 // Also, add a dummy incoming value for the original BB itself,
4471 // so that the PHI is well-formed until we drop said predecessor.
4472 PN.addIncoming(UndefValue::get(PN.getType()), BB);
4473 }
4474 }
4475
4476 std::vector<DominatorTree::UpdateType> Updates;
4477
4478 // We use make_early_inc_range here because we will remove all predecessors.
4479 for (BasicBlock *PredBB : llvm::make_early_inc_range(predecessors(BB))) {
4480 if (UnwindDest == nullptr) {
4481 if (DTU) {
4482 DTU->applyUpdates(Updates);
4483 Updates.clear();
4484 }
4485 removeUnwindEdge(PredBB, DTU);
4486 ++NumInvokes;
4487 } else {
4488 BB->removePredecessor(PredBB);
4489 Instruction *TI = PredBB->getTerminator();
4490 TI->replaceUsesOfWith(BB, UnwindDest);
4491 if (DTU) {
4492 Updates.push_back({DominatorTree::Insert, PredBB, UnwindDest});
4493 Updates.push_back({DominatorTree::Delete, PredBB, BB});
4494 }
4495 }
4496 }
4497
4498 if (DTU)
4499 DTU->applyUpdates(Updates);
4500
4501 DeleteDeadBlock(BB, DTU);
4502
4503 return true;
4504}
4505
4506// Try to merge two cleanuppads together.
4507static bool mergeCleanupPad(CleanupReturnInst *RI) {
4508 // Skip any cleanuprets which unwind to caller, there is nothing to merge
4509 // with.
4510 BasicBlock *UnwindDest = RI->getUnwindDest();
4511 if (!UnwindDest)
4512 return false;
4513
4514 // This cleanupret isn't the only predecessor of this cleanuppad, it wouldn't
4515 // be safe to merge without code duplication.
4516 if (UnwindDest->getSinglePredecessor() != RI->getParent())
4517 return false;
4518
4519 // Verify that our cleanuppad's unwind destination is another cleanuppad.
4520 auto *SuccessorCleanupPad = dyn_cast<CleanupPadInst>(&UnwindDest->front());
4521 if (!SuccessorCleanupPad)
4522 return false;
4523
4524 CleanupPadInst *PredecessorCleanupPad = RI->getCleanupPad();
4525 // Replace any uses of the successor cleanupad with the predecessor pad
4526 // The only cleanuppad uses should be this cleanupret, it's cleanupret and
4527 // funclet bundle operands.
4528 SuccessorCleanupPad->replaceAllUsesWith(PredecessorCleanupPad);
4529 // Remove the old cleanuppad.
4530 SuccessorCleanupPad->eraseFromParent();
4531 // Now, we simply replace the cleanupret with a branch to the unwind
4532 // destination.
4533 BranchInst::Create(UnwindDest, RI->getParent());
4534 RI->eraseFromParent();
4535
4536 return true;
4537}
4538
4539bool SimplifyCFGOpt::simplifyCleanupReturn(CleanupReturnInst *RI) {
4540 // It is possible to transiantly have an undef cleanuppad operand because we
4541 // have deleted some, but not all, dead blocks.
4542 // Eventually, this block will be deleted.
4543 if (isa<UndefValue>(RI->getOperand(0)))
4544 return false;
4545
4546 if (mergeCleanupPad(RI))
4547 return true;
4548
4549 if (removeEmptyCleanup(RI, DTU))
4550 return true;
4551
4552 return false;
4553}
4554
4555// WARNING: keep in sync with InstCombinerImpl::visitUnreachableInst()!
4556bool SimplifyCFGOpt::simplifyUnreachable(UnreachableInst *UI) {
4557 BasicBlock *BB = UI->getParent();
4558
4559 bool Changed = false;
4560
4561 // If there are any instructions immediately before the unreachable that can
4562 // be removed, do so.
4563 while (UI->getIterator() != BB->begin()) {
4564 BasicBlock::iterator BBI = UI->getIterator();
4565 --BBI;
4566
4567 if (!isGuaranteedToTransferExecutionToSuccessor(&*BBI))
4568 break; // Can not drop any more instructions. We're done here.
4569 // Otherwise, this instruction can be freely erased,
4570 // even if it is not side-effect free.
4571
4572 // Note that deleting EH's here is in fact okay, although it involves a bit
4573 // of subtle reasoning. If this inst is an EH, all the predecessors of this
4574 // block will be the unwind edges of Invoke/CatchSwitch/CleanupReturn,
4575 // and we can therefore guarantee this block will be erased.
4576
4577 // Delete this instruction (any uses are guaranteed to be dead)
4578 BBI->replaceAllUsesWith(PoisonValue::get(BBI->getType()));
4579 BBI->eraseFromParent();
4580 Changed = true;
4581 }
4582
4583 // If the unreachable instruction is the first in the block, take a gander
4584 // at all of the predecessors of this instruction, and simplify them.
4585 if (&BB->front() != UI)
4586 return Changed;
4587
4588 std::vector<DominatorTree::UpdateType> Updates;
4589
4590 SmallSetVector<BasicBlock *, 8> Preds(pred_begin(BB), pred_end(BB));
4591 for (unsigned i = 0, e = Preds.size(); i != e; ++i) {
4592 auto *Predecessor = Preds[i];
4593 Instruction *TI = Predecessor->getTerminator();
4594 IRBuilder<> Builder(TI);
4595 if (auto *BI = dyn_cast<BranchInst>(TI)) {
4596 // We could either have a proper unconditional branch,
4597 // or a degenerate conditional branch with matching destinations.
4598 if (all_of(BI->successors(),
4599 [BB](auto *Successor) { return Successor == BB; })) {
4600 new UnreachableInst(TI->getContext(), TI);
4601 TI->eraseFromParent();
4602 Changed = true;
4603 } else {
4604 assert(BI->isConditional() && "Can't get here with an uncond branch.")((void)0);
4605 Value* Cond = BI->getCondition();
4606 assert(BI->getSuccessor(0) != BI->getSuccessor(1) &&((void)0)
4607 "The destinations are guaranteed to be different here.")((void)0);
4608 if (BI->getSuccessor(0) == BB) {
4609 Builder.CreateAssumption(Builder.CreateNot(Cond));
4610 Builder.CreateBr(BI->getSuccessor(1));
4611 } else {
4612 assert(BI->getSuccessor(1) == BB && "Incorrect CFG")((void)0);
4613 Builder.CreateAssumption(Cond);
4614 Builder.CreateBr(BI->getSuccessor(0));
4615 }
4616 EraseTerminatorAndDCECond(BI);
4617 Changed = true;
4618 }
4619 if (DTU)
4620 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4621 } else if (auto *SI = dyn_cast<SwitchInst>(TI)) {
4622 SwitchInstProfUpdateWrapper SU(*SI);
4623 for (auto i = SU->case_begin(), e = SU->case_end(); i != e;) {
4624 if (i->getCaseSuccessor() != BB) {
4625 ++i;
4626 continue;
4627 }
4628 BB->removePredecessor(SU->getParent());
4629 i = SU.removeCase(i);
4630 e = SU->case_end();
4631 Changed = true;
4632 }
4633 // Note that the default destination can't be removed!
4634 if (DTU && SI->getDefaultDest() != BB)
4635 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4636 } else if (auto *II = dyn_cast<InvokeInst>(TI)) {
4637 if (II->getUnwindDest() == BB) {
4638 if (DTU) {
4639 DTU->applyUpdates(Updates);
4640 Updates.clear();
4641 }
4642 removeUnwindEdge(TI->getParent(), DTU);
4643 Changed = true;
4644 }
4645 } else if (auto *CSI = dyn_cast<CatchSwitchInst>(TI)) {
4646 if (CSI->getUnwindDest() == BB) {
4647 if (DTU) {
4648 DTU->applyUpdates(Updates);
4649 Updates.clear();
4650 }
4651 removeUnwindEdge(TI->getParent(), DTU);
4652 Changed = true;
4653 continue;
4654 }
4655
4656 for (CatchSwitchInst::handler_iterator I = CSI->handler_begin(),
4657 E = CSI->handler_end();
4658 I != E; ++I) {
4659 if (*I == BB) {
4660 CSI->removeHandler(I);
4661 --I;
4662 --E;
4663 Changed = true;
4664 }
4665 }
4666 if (DTU)
4667 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4668 if (CSI->getNumHandlers() == 0) {
4669 if (CSI->hasUnwindDest()) {
4670 // Redirect all predecessors of the block containing CatchSwitchInst
4671 // to instead branch to the CatchSwitchInst's unwind destination.
4672 if (DTU) {
4673 for (auto *PredecessorOfPredecessor : predecessors(Predecessor)) {
4674 Updates.push_back({DominatorTree::Insert,
4675 PredecessorOfPredecessor,
4676 CSI->getUnwindDest()});
4677 Updates.push_back({DominatorTree::Delete,
4678 PredecessorOfPredecessor, Predecessor});
4679 }
4680 }
4681 Predecessor->replaceAllUsesWith(CSI->getUnwindDest());
4682 } else {
4683 // Rewrite all preds to unwind to caller (or from invoke to call).
4684 if (DTU) {
4685 DTU->applyUpdates(Updates);
4686 Updates.clear();
4687 }
4688 SmallVector<BasicBlock *, 8> EHPreds(predecessors(Predecessor));
4689 for (BasicBlock *EHPred : EHPreds)
4690 removeUnwindEdge(EHPred, DTU);
4691 }
4692 // The catchswitch is no longer reachable.
4693 new UnreachableInst(CSI->getContext(), CSI);
4694 CSI->eraseFromParent();
4695 Changed = true;
4696 }
4697 } else if (auto *CRI = dyn_cast<CleanupReturnInst>(TI)) {
4698 (void)CRI;
4699 assert(CRI->hasUnwindDest() && CRI->getUnwindDest() == BB &&((void)0)
4700 "Expected to always have an unwind to BB.")((void)0);
4701 if (DTU)
4702 Updates.push_back({DominatorTree::Delete, Predecessor, BB});
4703 new UnreachableInst(TI->getContext(), TI);
4704 TI->eraseFromParent();
4705 Changed = true;
4706 }
4707 }
4708
4709 if (DTU)
4710 DTU->applyUpdates(Updates);
4711
4712 // If this block is now dead, remove it.
4713 if (pred_empty(BB) && BB != &BB->getParent()->getEntryBlock()) {
4714 DeleteDeadBlock(BB, DTU);
4715 return true;
4716 }
4717
4718 return Changed;
4719}
4720
4721static bool CasesAreContiguous(SmallVectorImpl<ConstantInt *> &Cases) {
4722 assert(Cases.size() >= 1)((void)0);
4723
4724 array_pod_sort(Cases.begin(), Cases.end(), ConstantIntSortPredicate);
4725 for (size_t I = 1, E = Cases.size(); I != E; ++I) {
4726 if (Cases[I - 1]->getValue() != Cases[I]->getValue() + 1)
4727 return false;
4728 }
4729 return true;
4730}
4731
4732static void createUnreachableSwitchDefault(SwitchInst *Switch,
4733 DomTreeUpdater *DTU) {
4734 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch default is dead.\n")do { } while (false);
4735 auto *BB = Switch->getParent();
4736 BasicBlock *NewDefaultBlock = SplitBlockPredecessors(
4737 Switch->getDefaultDest(), Switch->getParent(), "", DTU);
4738 auto *OrigDefaultBlock = Switch->getDefaultDest();
4739 Switch->setDefaultDest(&*NewDefaultBlock);
4740 if (DTU)
4741 DTU->applyUpdates({{DominatorTree::Insert, BB, &*NewDefaultBlock},
4742 {DominatorTree::Delete, BB, OrigDefaultBlock}});
4743 SplitBlock(&*NewDefaultBlock, &NewDefaultBlock->front(), DTU);
4744 SmallVector<DominatorTree::UpdateType, 2> Updates;
4745 if (DTU)
4746 for (auto *Successor : successors(NewDefaultBlock))
4747 Updates.push_back({DominatorTree::Delete, NewDefaultBlock, Successor});
4748 auto *NewTerminator = NewDefaultBlock->getTerminator();
4749 new UnreachableInst(Switch->getContext(), NewTerminator);
4750 EraseTerminatorAndDCECond(NewTerminator);
4751 if (DTU)
4752 DTU->applyUpdates(Updates);
4753}
4754
4755/// Turn a switch with two reachable destinations into an integer range
4756/// comparison and branch.
4757bool SimplifyCFGOpt::TurnSwitchRangeIntoICmp(SwitchInst *SI,
4758 IRBuilder<> &Builder) {
4759 assert(SI->getNumCases() > 1 && "Degenerate switch?")((void)0);
4760
4761 bool HasDefault =
4762 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4763
4764 auto *BB = SI->getParent();
4765
4766 // Partition the cases into two sets with different destinations.
4767 BasicBlock *DestA = HasDefault ? SI->getDefaultDest() : nullptr;
4768 BasicBlock *DestB = nullptr;
4769 SmallVector<ConstantInt *, 16> CasesA;
4770 SmallVector<ConstantInt *, 16> CasesB;
4771
4772 for (auto Case : SI->cases()) {
4773 BasicBlock *Dest = Case.getCaseSuccessor();
4774 if (!DestA)
4775 DestA = Dest;
4776 if (Dest == DestA) {
4777 CasesA.push_back(Case.getCaseValue());
4778 continue;
4779 }
4780 if (!DestB)
4781 DestB = Dest;
4782 if (Dest == DestB) {
4783 CasesB.push_back(Case.getCaseValue());
4784 continue;
4785 }
4786 return false; // More than two destinations.
4787 }
4788
4789 assert(DestA && DestB &&((void)0)
4790 "Single-destination switch should have been folded.")((void)0);
4791 assert(DestA != DestB)((void)0);
4792 assert(DestB != SI->getDefaultDest())((void)0);
4793 assert(!CasesB.empty() && "There must be non-default cases.")((void)0);
4794 assert(!CasesA.empty() || HasDefault)((void)0);
4795
4796 // Figure out if one of the sets of cases form a contiguous range.
4797 SmallVectorImpl<ConstantInt *> *ContiguousCases = nullptr;
4798 BasicBlock *ContiguousDest = nullptr;
4799 BasicBlock *OtherDest = nullptr;
4800 if (!CasesA.empty() && CasesAreContiguous(CasesA)) {
4801 ContiguousCases = &CasesA;
4802 ContiguousDest = DestA;
4803 OtherDest = DestB;
4804 } else if (CasesAreContiguous(CasesB)) {
4805 ContiguousCases = &CasesB;
4806 ContiguousDest = DestB;
4807 OtherDest = DestA;
4808 } else
4809 return false;
4810
4811 // Start building the compare and branch.
4812
4813 Constant *Offset = ConstantExpr::getNeg(ContiguousCases->back());
4814 Constant *NumCases =
4815 ConstantInt::get(Offset->getType(), ContiguousCases->size());
4816
4817 Value *Sub = SI->getCondition();
4818 if (!Offset->isNullValue())
4819 Sub = Builder.CreateAdd(Sub, Offset, Sub->getName() + ".off");
4820
4821 Value *Cmp;
4822 // If NumCases overflowed, then all possible values jump to the successor.
4823 if (NumCases->isNullValue() && !ContiguousCases->empty())
4824 Cmp = ConstantInt::getTrue(SI->getContext());
4825 else
4826 Cmp = Builder.CreateICmpULT(Sub, NumCases, "switch");
4827 BranchInst *NewBI = Builder.CreateCondBr(Cmp, ContiguousDest, OtherDest);
4828
4829 // Update weight for the newly-created conditional branch.
4830 if (HasBranchWeights(SI)) {
4831 SmallVector<uint64_t, 8> Weights;
4832 GetBranchWeights(SI, Weights);
4833 if (Weights.size() == 1 + SI->getNumCases()) {
4834 uint64_t TrueWeight = 0;
4835 uint64_t FalseWeight = 0;
4836 for (size_t I = 0, E = Weights.size(); I != E; ++I) {
4837 if (SI->getSuccessor(I) == ContiguousDest)
4838 TrueWeight += Weights[I];
4839 else
4840 FalseWeight += Weights[I];
4841 }
4842 while (TrueWeight > UINT32_MAX0xffffffffU || FalseWeight > UINT32_MAX0xffffffffU) {
4843 TrueWeight /= 2;
4844 FalseWeight /= 2;
4845 }
4846 setBranchWeights(NewBI, TrueWeight, FalseWeight);
4847 }
4848 }
4849
4850 // Prune obsolete incoming values off the successors' PHI nodes.
4851 for (auto BBI = ContiguousDest->begin(); isa<PHINode>(BBI); ++BBI) {
4852 unsigned PreviousEdges = ContiguousCases->size();
4853 if (ContiguousDest == SI->getDefaultDest())
4854 ++PreviousEdges;
4855 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
4856 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4857 }
4858 for (auto BBI = OtherDest->begin(); isa<PHINode>(BBI); ++BBI) {
4859 unsigned PreviousEdges = SI->getNumCases() - ContiguousCases->size();
4860 if (OtherDest == SI->getDefaultDest())
4861 ++PreviousEdges;
4862 for (unsigned I = 0, E = PreviousEdges - 1; I != E; ++I)
4863 cast<PHINode>(BBI)->removeIncomingValue(SI->getParent());
4864 }
4865
4866 // Clean up the default block - it may have phis or other instructions before
4867 // the unreachable terminator.
4868 if (!HasDefault)
4869 createUnreachableSwitchDefault(SI, DTU);
4870
4871 auto *UnreachableDefault = SI->getDefaultDest();
4872
4873 // Drop the switch.
4874 SI->eraseFromParent();
4875
4876 if (!HasDefault && DTU)
4877 DTU->applyUpdates({{DominatorTree::Delete, BB, UnreachableDefault}});
4878
4879 return true;
4880}
4881
4882/// Compute masked bits for the condition of a switch
4883/// and use it to remove dead cases.
4884static bool eliminateDeadSwitchCases(SwitchInst *SI, DomTreeUpdater *DTU,
4885 AssumptionCache *AC,
4886 const DataLayout &DL) {
4887 Value *Cond = SI->getCondition();
4888 unsigned Bits = Cond->getType()->getIntegerBitWidth();
4889 KnownBits Known = computeKnownBits(Cond, DL, 0, AC, SI);
4890
4891 // We can also eliminate cases by determining that their values are outside of
4892 // the limited range of the condition based on how many significant (non-sign)
4893 // bits are in the condition value.
4894 unsigned ExtraSignBits = ComputeNumSignBits(Cond, DL, 0, AC, SI) - 1;
4895 unsigned MaxSignificantBitsInCond = Bits - ExtraSignBits;
4896
4897 // Gather dead cases.
4898 SmallVector<ConstantInt *, 8> DeadCases;
4899 SmallDenseMap<BasicBlock *, int, 8> NumPerSuccessorCases;
4900 for (auto &Case : SI->cases()) {
4901 auto *Successor = Case.getCaseSuccessor();
4902 if (DTU)
4903 ++NumPerSuccessorCases[Successor];
4904 const APInt &CaseVal = Case.getCaseValue()->getValue();
4905 if (Known.Zero.intersects(CaseVal) || !Known.One.isSubsetOf(CaseVal) ||
4906 (CaseVal.getMinSignedBits() > MaxSignificantBitsInCond)) {
4907 DeadCases.push_back(Case.getCaseValue());
4908 if (DTU)
4909 --NumPerSuccessorCases[Successor];
4910 LLVM_DEBUG(dbgs() << "SimplifyCFG: switch case " << CaseValdo { } while (false)
4911 << " is dead.\n")do { } while (false);
4912 }
4913 }
4914
4915 // If we can prove that the cases must cover all possible values, the
4916 // default destination becomes dead and we can remove it. If we know some
4917 // of the bits in the value, we can use that to more precisely compute the
4918 // number of possible unique case values.
4919 bool HasDefault =
4920 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
4921 const unsigned NumUnknownBits =
4922 Bits - (Known.Zero | Known.One).countPopulation();
4923 assert(NumUnknownBits <= Bits)((void)0);
4924 if (HasDefault && DeadCases.empty() &&
4925 NumUnknownBits < 64 /* avoid overflow */ &&
4926 SI->getNumCases() == (1ULL << NumUnknownBits)) {
4927 createUnreachableSwitchDefault(SI, DTU);
4928 return true;
4929 }
4930
4931 if (DeadCases.empty())
4932 return false;
4933
4934 SwitchInstProfUpdateWrapper SIW(*SI);
4935 for (ConstantInt *DeadCase : DeadCases) {
4936 SwitchInst::CaseIt CaseI = SI->findCaseValue(DeadCase);
4937 assert(CaseI != SI->case_default() &&((void)0)
4938 "Case was not found. Probably mistake in DeadCases forming.")((void)0);
4939 // Prune unused values from PHI nodes.
4940 CaseI->getCaseSuccessor()->removePredecessor(SI->getParent());
4941 SIW.removeCase(CaseI);
4942 }
4943
4944 if (DTU) {
4945 std::vector<DominatorTree::UpdateType> Updates;
4946 for (const std::pair<BasicBlock *, int> &I : NumPerSuccessorCases)
4947 if (I.second == 0)
4948 Updates.push_back({DominatorTree::Delete, SI->getParent(), I.first});
4949 DTU->applyUpdates(Updates);
4950 }
4951
4952 return true;
4953}
4954
4955/// If BB would be eligible for simplification by
4956/// TryToSimplifyUncondBranchFromEmptyBlock (i.e. it is empty and terminated
4957/// by an unconditional branch), look at the phi node for BB in the successor
4958/// block and see if the incoming value is equal to CaseValue. If so, return
4959/// the phi node, and set PhiIndex to BB's index in the phi node.
4960static PHINode *FindPHIForConditionForwarding(ConstantInt *CaseValue,
4961 BasicBlock *BB, int *PhiIndex) {
4962 if (BB->getFirstNonPHIOrDbg() != BB->getTerminator())
4963 return nullptr; // BB must be empty to be a candidate for simplification.
4964 if (!BB->getSinglePredecessor())
4965 return nullptr; // BB must be dominated by the switch.
4966
4967 BranchInst *Branch = dyn_cast<BranchInst>(BB->getTerminator());
4968 if (!Branch || !Branch->isUnconditional())
4969 return nullptr; // Terminator must be unconditional branch.
4970
4971 BasicBlock *Succ = Branch->getSuccessor(0);
4972
4973 for (PHINode &PHI : Succ->phis()) {
4974 int Idx = PHI.getBasicBlockIndex(BB);
4975 assert(Idx >= 0 && "PHI has no entry for predecessor?")((void)0);
4976
4977 Value *InValue = PHI.getIncomingValue(Idx);
4978 if (InValue != CaseValue)
4979 continue;
4980
4981 *PhiIndex = Idx;
4982 return &PHI;
4983 }
4984
4985 return nullptr;
4986}
4987
4988/// Try to forward the condition of a switch instruction to a phi node
4989/// dominated by the switch, if that would mean that some of the destination
4990/// blocks of the switch can be folded away. Return true if a change is made.
4991static bool ForwardSwitchConditionToPHI(SwitchInst *SI) {
4992 using ForwardingNodesMap = DenseMap<PHINode *, SmallVector<int, 4>>;
4993
4994 ForwardingNodesMap ForwardingNodes;
4995 BasicBlock *SwitchBlock = SI->getParent();
4996 bool Changed = false;
4997 for (auto &Case : SI->cases()) {
4998 ConstantInt *CaseValue = Case.getCaseValue();
4999 BasicBlock *CaseDest = Case.getCaseSuccessor();
5000
5001 // Replace phi operands in successor blocks that are using the constant case
5002 // value rather than the switch condition variable:
5003 // switchbb:
5004 // switch i32 %x, label %default [
5005 // i32 17, label %succ
5006 // ...
5007 // succ:
5008 // %r = phi i32 ... [ 17, %switchbb ] ...
5009 // -->
5010 // %r = phi i32 ... [ %x, %switchbb ] ...
5011
5012 for (PHINode &Phi : CaseDest->phis()) {
5013 // This only works if there is exactly 1 incoming edge from the switch to
5014 // a phi. If there is >1, that means multiple cases of the switch map to 1
5015 // value in the phi, and that phi value is not the switch condition. Thus,
5016 // this transform would not make sense (the phi would be invalid because
5017 // a phi can't have different incoming values from the same block).
5018 int SwitchBBIdx = Phi.getBasicBlockIndex(SwitchBlock);
5019 if (Phi.getIncomingValue(SwitchBBIdx) == CaseValue &&
5020 count(Phi.blocks(), SwitchBlock) == 1) {
5021 Phi.setIncomingValue(SwitchBBIdx, SI->getCondition());
5022 Changed = true;
5023 }
5024 }
5025
5026 // Collect phi nodes that are indirectly using this switch's case constants.
5027 int PhiIdx;
5028 if (auto *Phi = FindPHIForConditionForwarding(CaseValue, CaseDest, &PhiIdx))
5029 ForwardingNodes[Phi].push_back(PhiIdx);
5030 }
5031
5032 for (auto &ForwardingNode : ForwardingNodes) {
5033 PHINode *Phi = ForwardingNode.first;
5034 SmallVectorImpl<int> &Indexes = ForwardingNode.second;
5035 if (Indexes.size() < 2)
5036 continue;
5037
5038 for (int Index : Indexes)
5039 Phi->setIncomingValue(Index, SI->getCondition());
5040 Changed = true;
5041 }
5042
5043 return Changed;
5044}
5045
5046/// Return true if the backend will be able to handle
5047/// initializing an array of constants like C.
5048static bool ValidLookupTableConstant(Constant *C, const TargetTransformInfo &TTI) {
5049 if (C->isThreadDependent())
5050 return false;
5051 if (C->isDLLImportDependent())
5052 return false;
5053
5054 if (!isa<ConstantFP>(C) && !isa<ConstantInt>(C) &&
5055 !isa<ConstantPointerNull>(C) && !isa<GlobalValue>(C) &&
5056 !isa<UndefValue>(C) && !isa<ConstantExpr>(C))
5057 return false;
5058
5059 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
5060 if (!CE->isGEPWithNoNotionalOverIndexing())
5061 return false;
5062 if (!ValidLookupTableConstant(CE->getOperand(0), TTI))
5063 return false;
5064 }
5065
5066 if (!TTI.shouldBuildLookupTablesForConstant(C))
5067 return false;
5068
5069 return true;
5070}
5071
5072/// If V is a Constant, return it. Otherwise, try to look up
5073/// its constant value in ConstantPool, returning 0 if it's not there.
5074static Constant *
5075LookupConstant(Value *V,
5076 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
5077 if (Constant *C = dyn_cast<Constant>(V))
5078 return C;
5079 return ConstantPool.lookup(V);
5080}
5081
5082/// Try to fold instruction I into a constant. This works for
5083/// simple instructions such as binary operations where both operands are
5084/// constant or can be replaced by constants from the ConstantPool. Returns the
5085/// resulting constant on success, 0 otherwise.
5086static Constant *
5087ConstantFold(Instruction *I, const DataLayout &DL,
5088 const SmallDenseMap<Value *, Constant *> &ConstantPool) {
5089 if (SelectInst *Select = dyn_cast<SelectInst>(I)) {
5090 Constant *A = LookupConstant(Select->getCondition(), ConstantPool);
5091 if (!A)
5092 return nullptr;
5093 if (A->isAllOnesValue())
5094 return LookupConstant(Select->getTrueValue(), ConstantPool);
5095 if (A->isNullValue())
5096 return LookupConstant(Select->getFalseValue(), ConstantPool);
5097 return nullptr;
5098 }
5099
5100 SmallVector<Constant *, 4> COps;
5101 for (unsigned N = 0, E = I->getNumOperands(); N != E; ++N) {
5102 if (Constant *A = LookupConstant(I->getOperand(N), ConstantPool))
5103 COps.push_back(A);
5104 else
5105 return nullptr;
5106 }
5107
5108 if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
5109 return ConstantFoldCompareInstOperands(Cmp->getPredicate(), COps[0],
5110 COps[1], DL);
5111 }
5112
5113 return ConstantFoldInstOperands(I, COps, DL);
5114}
5115
5116/// Try to determine the resulting constant values in phi nodes
5117/// at the common destination basic block, *CommonDest, for one of the case
5118/// destionations CaseDest corresponding to value CaseVal (0 for the default
5119/// case), of a switch instruction SI.
5120static bool
5121GetCaseResults(SwitchInst *SI, ConstantInt *CaseVal, BasicBlock *CaseDest,
5122 BasicBlock **CommonDest,
5123 SmallVectorImpl<std::pair<PHINode *, Constant *>> &Res,
5124 const DataLayout &DL, const TargetTransformInfo &TTI) {
5125 // The block from which we enter the common destination.
5126 BasicBlock *Pred = SI->getParent();
5127
5128 // If CaseDest is empty except for some side-effect free instructions through
5129 // which we can constant-propagate the CaseVal, continue to its successor.
5130 SmallDenseMap<Value *, Constant *> ConstantPool;
5131 ConstantPool.insert(std::make_pair(SI->getCondition(), CaseVal));
5132 for (Instruction &I :CaseDest->instructionsWithoutDebug()) {
5133 if (I.isTerminator()) {
5134 // If the terminator is a simple branch, continue to the next block.
5135 if (I.getNumSuccessors() != 1 || I.isExceptionalTerminator())
5136 return false;
5137 Pred = CaseDest;
5138 CaseDest = I.getSuccessor(0);
5139 } else if (Constant *C = ConstantFold(&I, DL, ConstantPool)) {
5140 // Instruction is side-effect free and constant.
5141
5142 // If the instruction has uses outside this block or a phi node slot for
5143 // the block, it is not safe to bypass the instruction since it would then
5144 // no longer dominate all its uses.
5145 for (auto &Use : I.uses()) {
5146 User *User = Use.getUser();
5147 if (Instruction *I = dyn_cast<Instruction>(User))
5148 if (I->getParent() == CaseDest)
5149 continue;
5150 if (PHINode *Phi = dyn_cast<PHINode>(User))
5151 if (Phi->getIncomingBlock(Use) == CaseDest)
5152 continue;
5153 return false;
5154 }
5155
5156 ConstantPool.insert(std::make_pair(&I, C));
5157 } else {
5158 break;
5159 }
5160 }
5161
5162 // If we did not have a CommonDest before, use the current one.
5163 if (!*CommonDest)
5164 *CommonDest = CaseDest;
5165 // If the destination isn't the common one, abort.
5166 if (CaseDest != *CommonDest)
5167 return false;
5168
5169 // Get the values for this case from phi nodes in the destination block.
5170 for (PHINode &PHI : (*CommonDest)->phis()) {
5171 int Idx = PHI.getBasicBlockIndex(Pred);
5172 if (Idx == -1)
5173 continue;
5174
5175 Constant *ConstVal =
5176 LookupConstant(PHI.getIncomingValue(Idx), ConstantPool);
5177 if (!ConstVal)
5178 return false;
5179
5180 // Be conservative about which kinds of constants we support.
5181 if (!ValidLookupTableConstant(ConstVal, TTI))
5182 return false;
5183
5184 Res.push_back(std::make_pair(&PHI, ConstVal));
5185 }
5186
5187 return Res.size() > 0;
5188}
5189
5190// Helper function used to add CaseVal to the list of cases that generate
5191// Result. Returns the updated number of cases that generate this result.
5192static uintptr_t MapCaseToResult(ConstantInt *CaseVal,
5193 SwitchCaseResultVectorTy &UniqueResults,
5194 Constant *Result) {
5195 for (auto &I : UniqueResults) {
5196 if (I.first == Result) {
5197 I.second.push_back(CaseVal);
5198 return I.second.size();
5199 }
5200 }
5201 UniqueResults.push_back(
5202 std::make_pair(Result, SmallVector<ConstantInt *, 4>(1, CaseVal)));
5203 return 1;
5204}
5205
5206// Helper function that initializes a map containing
5207// results for the PHI node of the common destination block for a switch
5208// instruction. Returns false if multiple PHI nodes have been found or if
5209// there is not a common destination block for the switch.
5210static bool
5211InitializeUniqueCases(SwitchInst *SI, PHINode *&PHI, BasicBlock *&CommonDest,
5212 SwitchCaseResultVectorTy &UniqueResults,
5213 Constant *&DefaultResult, const DataLayout &DL,
5214 const TargetTransformInfo &TTI,
5215 uintptr_t MaxUniqueResults, uintptr_t MaxCasesPerResult) {
5216 for (auto &I : SI->cases()) {
5217 ConstantInt *CaseVal = I.getCaseValue();
5218
5219 // Resulting value at phi nodes for this case value.
5220 SwitchCaseResultsTy Results;
5221 if (!GetCaseResults(SI, CaseVal, I.getCaseSuccessor(), &CommonDest, Results,
5222 DL, TTI))
5223 return false;
5224
5225 // Only one value per case is permitted.
5226 if (Results.size() > 1)
5227 return false;
5228
5229 // Add the case->result mapping to UniqueResults.
5230 const uintptr_t NumCasesForResult =
5231 MapCaseToResult(CaseVal, UniqueResults, Results.begin()->second);
5232
5233 // Early out if there are too many cases for this result.
5234 if (NumCasesForResult > MaxCasesPerResult)
5235 return false;
5236
5237 // Early out if there are too many unique results.
5238 if (UniqueResults.size() > MaxUniqueResults)
5239 return false;
5240
5241 // Check the PHI consistency.
5242 if (!PHI)
5243 PHI = Results[0].first;
5244 else if (PHI != Results[0].first)
5245 return false;
5246 }
5247 // Find the default result value.
5248 SmallVector<std::pair<PHINode *, Constant *>, 1> DefaultResults;
5249 BasicBlock *DefaultDest = SI->getDefaultDest();
5250 GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest, DefaultResults,
5251 DL, TTI);
5252 // If the default value is not found abort unless the default destination
5253 // is unreachable.
5254 DefaultResult =
5255 DefaultResults.size() == 1 ? DefaultResults.begin()->second : nullptr;
21
Assuming the condition is false
22
'?' condition is false
5256 if ((!DefaultResult
22.1
'DefaultResult' is null
&&
24
Taking false branch
5257 !isa<UnreachableInst>(DefaultDest->getFirstNonPHIOrDbg())))
23
Assuming the object is a 'UnreachableInst'
5258 return false;
5259
5260 return true;
25
Returning without writing to 'PHI'
26
Returning the value 1, which participates in a condition later
5261}
5262
5263// Helper function that checks if it is possible to transform a switch with only
5264// two cases (or two cases + default) that produces a result into a select.
5265// Example:
5266// switch (a) {
5267// case 10: %0 = icmp eq i32 %a, 10
5268// return 10; %1 = select i1 %0, i32 10, i32 4
5269// case 20: ----> %2 = icmp eq i32 %a, 20
5270// return 2; %3 = select i1 %2, i32 2, i32 %1
5271// default:
5272// return 4;
5273// }
5274static Value *ConvertTwoCaseSwitch(const SwitchCaseResultVectorTy &ResultVector,
5275 Constant *DefaultResult, Value *Condition,
5276 IRBuilder<> &Builder) {
5277 // If we are selecting between only two cases transform into a simple
5278 // select or a two-way select if default is possible.
5279 if (ResultVector.size() == 2 && ResultVector[0].second.size() == 1 &&
30
Assuming the condition is true
31
Assuming the condition is true
33
Taking true branch
5280 ResultVector[1].second.size() == 1) {
32
Assuming the condition is true
5281 ConstantInt *const FirstCase = ResultVector[0].second[0];
5282 ConstantInt *const SecondCase = ResultVector[1].second[0];
5283
5284 bool DefaultCanTrigger = DefaultResult;
5285 Value *SelectValue = ResultVector[1].first;
5286 if (DefaultCanTrigger
33.1
'DefaultCanTrigger' is false
) {
34
Taking false branch
5287 Value *const ValueCompare =
5288 Builder.CreateICmpEQ(Condition, SecondCase, "switch.selectcmp");
5289 SelectValue = Builder.CreateSelect(ValueCompare, ResultVector[1].first,
5290 DefaultResult, "switch.select");
5291 }
5292 Value *const ValueCompare =
5293 Builder.CreateICmpEQ(Condition, FirstCase, "switch.selectcmp");
5294 return Builder.CreateSelect(ValueCompare, ResultVector[0].first,
35
Returning pointer, which participates in a condition later
5295 SelectValue, "switch.select");
5296 }
5297
5298 // Handle the degenerate case where two cases have the same value.
5299 if (ResultVector.size() == 1 && ResultVector[0].second.size() == 2 &&
5300 DefaultResult) {
5301 Value *Cmp1 = Builder.CreateICmpEQ(
5302 Condition, ResultVector[0].second[0], "switch.selectcmp.case1");
5303 Value *Cmp2 = Builder.CreateICmpEQ(
5304 Condition, ResultVector[0].second[1], "switch.selectcmp.case2");
5305 Value *Cmp = Builder.CreateOr(Cmp1, Cmp2, "switch.selectcmp");
5306 return Builder.CreateSelect(Cmp, ResultVector[0].first, DefaultResult);
5307 }
5308
5309 return nullptr;
5310}
5311
5312// Helper function to cleanup a switch instruction that has been converted into
5313// a select, fixing up PHI nodes and basic blocks.
5314static void RemoveSwitchAfterSelectConversion(SwitchInst *SI, PHINode *PHI,
5315 Value *SelectValue,
5316 IRBuilder<> &Builder,
5317 DomTreeUpdater *DTU) {
5318 std::vector<DominatorTree::UpdateType> Updates;
5319
5320 BasicBlock *SelectBB = SI->getParent();
5321 BasicBlock *DestBB = PHI->getParent();
41
Called C++ object pointer is null
5322
5323 if (DTU && !is_contained(predecessors(DestBB), SelectBB))
5324 Updates.push_back({DominatorTree::Insert, SelectBB, DestBB});
5325 Builder.CreateBr(DestBB);
5326
5327 // Remove the switch.
5328
5329 while (PHI->getBasicBlockIndex(SelectBB) >= 0)
5330 PHI->removeIncomingValue(SelectBB);
5331 PHI->addIncoming(SelectValue, SelectBB);
5332
5333 SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
5334 for (unsigned i = 0, e = SI->getNumSuccessors(); i < e; ++i) {
5335 BasicBlock *Succ = SI->getSuccessor(i);
5336
5337 if (Succ == DestBB)
5338 continue;
5339 Succ->removePredecessor(SelectBB);
5340 if (DTU && RemovedSuccessors.insert(Succ).second)
5341 Updates.push_back({DominatorTree::Delete, SelectBB, Succ});
5342 }
5343 SI->eraseFromParent();
5344 if (DTU)
5345 DTU->applyUpdates(Updates);
5346}
5347
5348/// If the switch is only used to initialize one or more
5349/// phi nodes in a common successor block with only two different
5350/// constant values, replace the switch with select.
5351static bool switchToSelect(SwitchInst *SI, IRBuilder<> &Builder,
5352 DomTreeUpdater *DTU, const DataLayout &DL,
5353 const TargetTransformInfo &TTI) {
5354 Value *const Cond = SI->getCondition();
5355 PHINode *PHI = nullptr;
19
'PHI' initialized to a null pointer value
5356 BasicBlock *CommonDest = nullptr;
5357 Constant *DefaultResult;
5358 SwitchCaseResultVectorTy UniqueResults;
5359 // Collect all the cases that will deliver the same value from the switch.
5360 if (!InitializeUniqueCases(SI, PHI, CommonDest, UniqueResults, DefaultResult,
20
Calling 'InitializeUniqueCases'
27
Returning from 'InitializeUniqueCases'
28
Taking false branch
5361 DL, TTI, /*MaxUniqueResults*/2,
5362 /*MaxCasesPerResult*/2))
5363 return false;
5364 assert(PHI != nullptr && "PHI for value select not found")((void)0);
5365
5366 Builder.SetInsertPoint(SI);
5367 Value *SelectValue =
5368 ConvertTwoCaseSwitch(UniqueResults, DefaultResult, Cond, Builder);
29
Calling 'ConvertTwoCaseSwitch'
36
Returning from 'ConvertTwoCaseSwitch'
5369 if (SelectValue) {
37
Assuming 'SelectValue' is non-null
38
Taking true branch
5370 RemoveSwitchAfterSelectConversion(SI, PHI, SelectValue, Builder, DTU);
39
Passing null pointer value via 2nd parameter 'PHI'
40
Calling 'RemoveSwitchAfterSelectConversion'
5371 return true;
5372 }
5373 // The switch couldn't be converted into a select.
5374 return false;
5375}
5376
5377namespace {
5378
5379/// This class represents a lookup table that can be used to replace a switch.
5380class SwitchLookupTable {
5381public:
5382 /// Create a lookup table to use as a switch replacement with the contents
5383 /// of Values, using DefaultValue to fill any holes in the table.
5384 SwitchLookupTable(
5385 Module &M, uint64_t TableSize, ConstantInt *Offset,
5386 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
5387 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName);
5388
5389 /// Build instructions with Builder to retrieve the value at
5390 /// the position given by Index in the lookup table.
5391 Value *BuildLookup(Value *Index, IRBuilder<> &Builder);
5392
5393 /// Return true if a table with TableSize elements of
5394 /// type ElementType would fit in a target-legal register.
5395 static bool WouldFitInRegister(const DataLayout &DL, uint64_t TableSize,
5396 Type *ElementType);
5397
5398private:
5399 // Depending on the contents of the table, it can be represented in
5400 // different ways.
5401 enum {
5402 // For tables where each element contains the same value, we just have to
5403 // store that single value and return it for each lookup.
5404 SingleValueKind,
5405
5406 // For tables where there is a linear relationship between table index
5407 // and values. We calculate the result with a simple multiplication
5408 // and addition instead of a table lookup.
5409 LinearMapKind,
5410
5411 // For small tables with integer elements, we can pack them into a bitmap
5412 // that fits into a target-legal register. Values are retrieved by
5413 // shift and mask operations.
5414 BitMapKind,
5415
5416 // The table is stored as an array of values. Values are retrieved by load
5417 // instructions from the table.
5418 ArrayKind
5419 } Kind;
5420
5421 // For SingleValueKind, this is the single value.
5422 Constant *SingleValue = nullptr;
5423
5424 // For BitMapKind, this is the bitmap.
5425 ConstantInt *BitMap = nullptr;
5426 IntegerType *BitMapElementTy = nullptr;
5427
5428 // For LinearMapKind, these are the constants used to derive the value.
5429 ConstantInt *LinearOffset = nullptr;
5430 ConstantInt *LinearMultiplier = nullptr;
5431
5432 // For ArrayKind, this is the array.
5433 GlobalVariable *Array = nullptr;
5434};
5435
5436} // end anonymous namespace
5437
5438SwitchLookupTable::SwitchLookupTable(
5439 Module &M, uint64_t TableSize, ConstantInt *Offset,
5440 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values,
5441 Constant *DefaultValue, const DataLayout &DL, const StringRef &FuncName) {
5442 assert(Values.size() && "Can't build lookup table without values!")((void)0);
5443 assert(TableSize >= Values.size() && "Can't fit values in table!")((void)0);
5444
5445 // If all values in the table are equal, this is that value.
5446 SingleValue = Values.begin()->second;
5447
5448 Type *ValueType = Values.begin()->second->getType();
5449
5450 // Build up the table contents.
5451 SmallVector<Constant *, 64> TableContents(TableSize);
5452 for (size_t I = 0, E = Values.size(); I != E; ++I) {
5453 ConstantInt *CaseVal = Values[I].first;
5454 Constant *CaseRes = Values[I].second;
5455 assert(CaseRes->getType() == ValueType)((void)0);
5456
5457 uint64_t Idx = (CaseVal->getValue() - Offset->getValue()).getLimitedValue();
5458 TableContents[Idx] = CaseRes;
5459
5460 if (CaseRes != SingleValue)
5461 SingleValue = nullptr;
5462 }
5463
5464 // Fill in any holes in the table with the default result.
5465 if (Values.size() < TableSize) {
5466 assert(DefaultValue &&((void)0)
5467 "Need a default value to fill the lookup table holes.")((void)0);
5468 assert(DefaultValue->getType() == ValueType)((void)0);
5469 for (uint64_t I = 0; I < TableSize; ++I) {
5470 if (!TableContents[I])
5471 TableContents[I] = DefaultValue;
5472 }
5473
5474 if (DefaultValue != SingleValue)
5475 SingleValue = nullptr;
5476 }
5477
5478 // If each element in the table contains the same value, we only need to store
5479 // that single value.
5480 if (SingleValue) {
5481 Kind = SingleValueKind;
5482 return;
5483 }
5484
5485 // Check if we can derive the value with a linear transformation from the
5486 // table index.
5487 if (isa<IntegerType>(ValueType)) {
5488 bool LinearMappingPossible = true;
5489 APInt PrevVal;
5490 APInt DistToPrev;
5491 assert(TableSize >= 2 && "Should be a SingleValue table.")((void)0);
5492 // Check if there is the same distance between two consecutive values.
5493 for (uint64_t I = 0; I < TableSize; ++I) {
5494 ConstantInt *ConstVal = dyn_cast<ConstantInt>(TableContents[I]);
5495 if (!ConstVal) {
5496 // This is an undef. We could deal with it, but undefs in lookup tables
5497 // are very seldom. It's probably not worth the additional complexity.
5498 LinearMappingPossible = false;
5499 break;
5500 }
5501 const APInt &Val = ConstVal->getValue();
5502 if (I != 0) {
5503 APInt Dist = Val - PrevVal;
5504 if (I == 1) {
5505 DistToPrev = Dist;
5506 } else if (Dist != DistToPrev) {
5507 LinearMappingPossible = false;
5508 break;
5509 }
5510 }
5511 PrevVal = Val;
5512 }
5513 if (LinearMappingPossible) {
5514 LinearOffset = cast<ConstantInt>(TableContents[0]);
5515 LinearMultiplier = ConstantInt::get(M.getContext(), DistToPrev);
5516 Kind = LinearMapKind;
5517 ++NumLinearMaps;
5518 return;
5519 }
5520 }
5521
5522 // If the type is integer and the table fits in a register, build a bitmap.
5523 if (WouldFitInRegister(DL, TableSize, ValueType)) {
5524 IntegerType *IT = cast<IntegerType>(ValueType);
5525 APInt TableInt(TableSize * IT->getBitWidth(), 0);
5526 for (uint64_t I = TableSize; I > 0; --I) {
5527 TableInt <<= IT->getBitWidth();
5528 // Insert values into the bitmap. Undef values are set to zero.
5529 if (!isa<UndefValue>(TableContents[I - 1])) {
5530 ConstantInt *Val = cast<ConstantInt>(TableContents[I - 1]);
5531 TableInt |= Val->getValue().zext(TableInt.getBitWidth());
5532 }
5533 }
5534 BitMap = ConstantInt::get(M.getContext(), TableInt);
5535 BitMapElementTy = IT;
5536 Kind = BitMapKind;
5537 ++NumBitMaps;
5538 return;
5539 }
5540
5541 // Store the table in an array.
5542 ArrayType *ArrayTy = ArrayType::get(ValueType, TableSize);
5543 Constant *Initializer = ConstantArray::get(ArrayTy, TableContents);
5544
5545 Array = new GlobalVariable(M, ArrayTy, /*isConstant=*/true,
5546 GlobalVariable::PrivateLinkage, Initializer,
5547 "switch.table." + FuncName);
5548 Array->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
5549 // Set the alignment to that of an array items. We will be only loading one
5550 // value out of it.
5551 Array->setAlignment(Align(DL.getPrefTypeAlignment(ValueType)));
5552 Kind = ArrayKind;
5553}
5554
5555Value *SwitchLookupTable::BuildLookup(Value *Index, IRBuilder<> &Builder) {
5556 switch (Kind) {
5557 case SingleValueKind:
5558 return SingleValue;
5559 case LinearMapKind: {
5560 // Derive the result value from the input value.
5561 Value *Result = Builder.CreateIntCast(Index, LinearMultiplier->getType(),
5562 false, "switch.idx.cast");
5563 if (!LinearMultiplier->isOne())
5564 Result = Builder.CreateMul(Result, LinearMultiplier, "switch.idx.mult");
5565 if (!LinearOffset->isZero())
5566 Result = Builder.CreateAdd(Result, LinearOffset, "switch.offset");
5567 return Result;
5568 }
5569 case BitMapKind: {
5570 // Type of the bitmap (e.g. i59).
5571 IntegerType *MapTy = BitMap->getType();
5572
5573 // Cast Index to the same type as the bitmap.
5574 // Note: The Index is <= the number of elements in the table, so
5575 // truncating it to the width of the bitmask is safe.
5576 Value *ShiftAmt = Builder.CreateZExtOrTrunc(Index, MapTy, "switch.cast");
5577
5578 // Multiply the shift amount by the element width.
5579 ShiftAmt = Builder.CreateMul(
5580 ShiftAmt, ConstantInt::get(MapTy, BitMapElementTy->getBitWidth()),
5581 "switch.shiftamt");
5582
5583 // Shift down.
5584 Value *DownShifted =
5585 Builder.CreateLShr(BitMap, ShiftAmt, "switch.downshift");
5586 // Mask off.
5587 return Builder.CreateTrunc(DownShifted, BitMapElementTy, "switch.masked");
5588 }
5589 case ArrayKind: {
5590 // Make sure the table index will not overflow when treated as signed.
5591 IntegerType *IT = cast<IntegerType>(Index->getType());
5592 uint64_t TableSize =
5593 Array->getInitializer()->getType()->getArrayNumElements();
5594 if (TableSize > (1ULL << (IT->getBitWidth() - 1)))
5595 Index = Builder.CreateZExt(
5596 Index, IntegerType::get(IT->getContext(), IT->getBitWidth() + 1),
5597 "switch.tableidx.zext");
5598
5599 Value *GEPIndices[] = {Builder.getInt32(0), Index};
5600 Value *GEP = Builder.CreateInBoundsGEP(Array->getValueType(), Array,
5601 GEPIndices, "switch.gep");
5602 return Builder.CreateLoad(
5603 cast<ArrayType>(Array->getValueType())->getElementType(), GEP,
5604 "switch.load");
5605 }
5606 }
5607 llvm_unreachable("Unknown lookup table kind!")__builtin_unreachable();
5608}
5609
5610bool SwitchLookupTable::WouldFitInRegister(const DataLayout &DL,
5611 uint64_t TableSize,
5612 Type *ElementType) {
5613 auto *IT = dyn_cast<IntegerType>(ElementType);
5614 if (!IT)
5615 return false;
5616 // FIXME: If the type is wider than it needs to be, e.g. i8 but all values
5617 // are <= 15, we could try to narrow the type.
5618
5619 // Avoid overflow, fitsInLegalInteger uses unsigned int for the width.
5620 if (TableSize >= UINT_MAX(2147483647 *2U +1U) / IT->getBitWidth())
5621 return false;
5622 return DL.fitsInLegalInteger(TableSize * IT->getBitWidth());
5623}
5624
5625/// Determine whether a lookup table should be built for this switch, based on
5626/// the number of cases, size of the table, and the types of the results.
5627static bool
5628ShouldBuildLookupTable(SwitchInst *SI, uint64_t TableSize,
5629 const TargetTransformInfo &TTI, const DataLayout &DL,
5630 const SmallDenseMap<PHINode *, Type *> &ResultTypes) {
5631 if (SI->getNumCases() > TableSize || TableSize >= UINT64_MAX0xffffffffffffffffULL / 10)
5632 return false; // TableSize overflowed, or mul below might overflow.
5633
5634 bool AllTablesFitInRegister = true;
5635 bool HasIllegalType = false;
5636 for (const auto &I : ResultTypes) {
5637 Type *Ty = I.second;
5638
5639 // Saturate this flag to true.
5640 HasIllegalType = HasIllegalType || !TTI.isTypeLegal(Ty);
5641
5642 // Saturate this flag to false.
5643 AllTablesFitInRegister =
5644 AllTablesFitInRegister &&
5645 SwitchLookupTable::WouldFitInRegister(DL, TableSize, Ty);
5646
5647 // If both flags saturate, we're done. NOTE: This *only* works with
5648 // saturating flags, and all flags have to saturate first due to the
5649 // non-deterministic behavior of iterating over a dense map.
5650 if (HasIllegalType && !AllTablesFitInRegister)
5651 break;
5652 }
5653
5654 // If each table would fit in a register, we should build it anyway.
5655 if (AllTablesFitInRegister)
5656 return true;
5657
5658 // Don't build a table that doesn't fit in-register if it has illegal types.
5659 if (HasIllegalType)
5660 return false;
5661
5662 // The table density should be at least 40%. This is the same criterion as for
5663 // jump tables, see SelectionDAGBuilder::handleJTSwitchCase.
5664 // FIXME: Find the best cut-off.
5665 return SI->getNumCases() * 10 >= TableSize * 4;
5666}
5667
5668/// Try to reuse the switch table index compare. Following pattern:
5669/// \code
5670/// if (idx < tablesize)
5671/// r = table[idx]; // table does not contain default_value
5672/// else
5673/// r = default_value;
5674/// if (r != default_value)
5675/// ...
5676/// \endcode
5677/// Is optimized to:
5678/// \code
5679/// cond = idx < tablesize;
5680/// if (cond)
5681/// r = table[idx];
5682/// else
5683/// r = default_value;
5684/// if (cond)
5685/// ...
5686/// \endcode
5687/// Jump threading will then eliminate the second if(cond).
5688static void reuseTableCompare(
5689 User *PhiUser, BasicBlock *PhiBlock, BranchInst *RangeCheckBranch,
5690 Constant *DefaultValue,
5691 const SmallVectorImpl<std::pair<ConstantInt *, Constant *>> &Values) {
5692 ICmpInst *CmpInst = dyn_cast<ICmpInst>(PhiUser);
5693 if (!CmpInst)
5694 return;
5695
5696 // We require that the compare is in the same block as the phi so that jump
5697 // threading can do its work afterwards.
5698 if (CmpInst->getParent() != PhiBlock)
5699 return;
5700
5701 Constant *CmpOp1 = dyn_cast<Constant>(CmpInst->getOperand(1));
5702 if (!CmpOp1)
5703 return;
5704
5705 Value *RangeCmp = RangeCheckBranch->getCondition();
5706 Constant *TrueConst = ConstantInt::getTrue(RangeCmp->getType());
5707 Constant *FalseConst = ConstantInt::getFalse(RangeCmp->getType());
5708
5709 // Check if the compare with the default value is constant true or false.
5710 Constant *DefaultConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
5711 DefaultValue, CmpOp1, true);
5712 if (DefaultConst != TrueConst && DefaultConst != FalseConst)
5713 return;
5714
5715 // Check if the compare with the case values is distinct from the default
5716 // compare result.
5717 for (auto ValuePair : Values) {
5718 Constant *CaseConst = ConstantExpr::getICmp(CmpInst->getPredicate(),
5719 ValuePair.second, CmpOp1, true);
5720 if (!CaseConst || CaseConst == DefaultConst || isa<UndefValue>(CaseConst))
5721 return;
5722 assert((CaseConst == TrueConst || CaseConst == FalseConst) &&((void)0)
5723 "Expect true or false as compare result.")((void)0);
5724 }
5725
5726 // Check if the branch instruction dominates the phi node. It's a simple
5727 // dominance check, but sufficient for our needs.
5728 // Although this check is invariant in the calling loops, it's better to do it
5729 // at this late stage. Practically we do it at most once for a switch.
5730 BasicBlock *BranchBlock = RangeCheckBranch->getParent();
5731 for (BasicBlock *Pred : predecessors(PhiBlock)) {
5732 if (Pred != BranchBlock && Pred->getUniquePredecessor() != BranchBlock)
5733 return;
5734 }
5735
5736 if (DefaultConst == FalseConst) {
5737 // The compare yields the same result. We can replace it.
5738 CmpInst->replaceAllUsesWith(RangeCmp);
5739 ++NumTableCmpReuses;
5740 } else {
5741 // The compare yields the same result, just inverted. We can replace it.
5742 Value *InvertedTableCmp = BinaryOperator::CreateXor(
5743 RangeCmp, ConstantInt::get(RangeCmp->getType(), 1), "inverted.cmp",
5744 RangeCheckBranch);
5745 CmpInst->replaceAllUsesWith(InvertedTableCmp);
5746 ++NumTableCmpReuses;
5747 }
5748}
5749
5750/// If the switch is only used to initialize one or more phi nodes in a common
5751/// successor block with different constant values, replace the switch with
5752/// lookup tables.
5753static bool SwitchToLookupTable(SwitchInst *SI, IRBuilder<> &Builder,
5754 DomTreeUpdater *DTU, const DataLayout &DL,
5755 const TargetTransformInfo &TTI) {
5756 assert(SI->getNumCases() > 1 && "Degenerate switch?")((void)0);
5757
5758 BasicBlock *BB = SI->getParent();
5759 Function *Fn = BB->getParent();
5760 // Only build lookup table when we have a target that supports it or the
5761 // attribute is not set.
5762 if (!TTI.shouldBuildLookupTables() ||
5763 (Fn->getFnAttribute("no-jump-tables").getValueAsBool()))
5764 return false;
5765
5766 // FIXME: If the switch is too sparse for a lookup table, perhaps we could
5767 // split off a dense part and build a lookup table for that.
5768
5769 // FIXME: This creates arrays of GEPs to constant strings, which means each
5770 // GEP needs a runtime relocation in PIC code. We should just build one big
5771 // string and lookup indices into that.
5772
5773 // Ignore switches with less than three cases. Lookup tables will not make
5774 // them faster, so we don't analyze them.
5775 if (SI->getNumCases() < 3)
5776 return false;
5777
5778 // Figure out the corresponding result for each case value and phi node in the
5779 // common destination, as well as the min and max case values.
5780 assert(!SI->cases().empty())((void)0);
5781 SwitchInst::CaseIt CI = SI->case_begin();
5782 ConstantInt *MinCaseVal = CI->getCaseValue();
5783 ConstantInt *MaxCaseVal = CI->getCaseValue();
5784
5785 BasicBlock *CommonDest = nullptr;
5786
5787 using ResultListTy = SmallVector<std::pair<ConstantInt *, Constant *>, 4>;
5788 SmallDenseMap<PHINode *, ResultListTy> ResultLists;
5789
5790 SmallDenseMap<PHINode *, Constant *> DefaultResults;
5791 SmallDenseMap<PHINode *, Type *> ResultTypes;
5792 SmallVector<PHINode *, 4> PHIs;
5793
5794 for (SwitchInst::CaseIt E = SI->case_end(); CI != E; ++CI) {
5795 ConstantInt *CaseVal = CI->getCaseValue();
5796 if (CaseVal->getValue().slt(MinCaseVal->getValue()))
5797 MinCaseVal = CaseVal;
5798 if (CaseVal->getValue().sgt(MaxCaseVal->getValue()))
5799 MaxCaseVal = CaseVal;
5800
5801 // Resulting value at phi nodes for this case value.
5802 using ResultsTy = SmallVector<std::pair<PHINode *, Constant *>, 4>;
5803 ResultsTy Results;
5804 if (!GetCaseResults(SI, CaseVal, CI->getCaseSuccessor(), &CommonDest,
5805 Results, DL, TTI))
5806 return false;
5807
5808 // Append the result from this case to the list for each phi.
5809 for (const auto &I : Results) {
5810 PHINode *PHI = I.first;
5811 Constant *Value = I.second;
5812 if (!ResultLists.count(PHI))
5813 PHIs.push_back(PHI);
5814 ResultLists[PHI].push_back(std::make_pair(CaseVal, Value));
5815 }
5816 }
5817
5818 // Keep track of the result types.
5819 for (PHINode *PHI : PHIs) {
5820 ResultTypes[PHI] = ResultLists[PHI][0].second->getType();
5821 }
5822
5823 uint64_t NumResults = ResultLists[PHIs[0]].size();
5824 APInt RangeSpread = MaxCaseVal->getValue() - MinCaseVal->getValue();
5825 uint64_t TableSize = RangeSpread.getLimitedValue() + 1;
5826 bool TableHasHoles = (NumResults < TableSize);
5827
5828 // If the table has holes, we need a constant result for the default case
5829 // or a bitmask that fits in a register.
5830 SmallVector<std::pair<PHINode *, Constant *>, 4> DefaultResultsList;
5831 bool HasDefaultResults =
5832 GetCaseResults(SI, nullptr, SI->getDefaultDest(), &CommonDest,
5833 DefaultResultsList, DL, TTI);
5834
5835 bool NeedMask = (TableHasHoles && !HasDefaultResults);
5836 if (NeedMask) {
5837 // As an extra penalty for the validity test we require more cases.
5838 if (SI->getNumCases() < 4) // FIXME: Find best threshold value (benchmark).
5839 return false;
5840 if (!DL.fitsInLegalInteger(TableSize))
5841 return false;
5842 }
5843
5844 for (const auto &I : DefaultResultsList) {
5845 PHINode *PHI = I.first;
5846 Constant *Result = I.second;
5847 DefaultResults[PHI] = Result;
5848 }
5849
5850 if (!ShouldBuildLookupTable(SI, TableSize, TTI, DL, ResultTypes))
5851 return false;
5852
5853 std::vector<DominatorTree::UpdateType> Updates;
5854
5855 // Create the BB that does the lookups.
5856 Module &Mod = *CommonDest->getParent()->getParent();
5857 BasicBlock *LookupBB = BasicBlock::Create(
5858 Mod.getContext(), "switch.lookup", CommonDest->getParent(), CommonDest);
5859
5860 // Compute the table index value.
5861 Builder.SetInsertPoint(SI);
5862 Value *TableIndex;
5863 if (MinCaseVal->isNullValue())
5864 TableIndex = SI->getCondition();
5865 else
5866 TableIndex = Builder.CreateSub(SI->getCondition(), MinCaseVal,
5867 "switch.tableidx");
5868
5869 // Compute the maximum table size representable by the integer type we are
5870 // switching upon.
5871 unsigned CaseSize = MinCaseVal->getType()->getPrimitiveSizeInBits();
5872 uint64_t MaxTableSize = CaseSize > 63 ? UINT64_MAX0xffffffffffffffffULL : 1ULL << CaseSize;
5873 assert(MaxTableSize >= TableSize &&((void)0)
5874 "It is impossible for a switch to have more entries than the max "((void)0)
5875 "representable value of its input integer type's size.")((void)0);
5876
5877 // If the default destination is unreachable, or if the lookup table covers
5878 // all values of the conditional variable, branch directly to the lookup table
5879 // BB. Otherwise, check that the condition is within the case range.
5880 const bool DefaultIsReachable =
5881 !isa<UnreachableInst>(SI->getDefaultDest()->getFirstNonPHIOrDbg());
5882 const bool GeneratingCoveredLookupTable = (MaxTableSize == TableSize);
5883 BranchInst *RangeCheckBranch = nullptr;
5884
5885 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
5886 Builder.CreateBr(LookupBB);
5887 if (DTU)
5888 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
5889 // Note: We call removeProdecessor later since we need to be able to get the
5890 // PHI value for the default case in case we're using a bit mask.
5891 } else {
5892 Value *Cmp = Builder.CreateICmpULT(
5893 TableIndex, ConstantInt::get(MinCaseVal->getType(), TableSize));
5894 RangeCheckBranch =
5895 Builder.CreateCondBr(Cmp, LookupBB, SI->getDefaultDest());
5896 if (DTU)
5897 Updates.push_back({DominatorTree::Insert, BB, LookupBB});
5898 }
5899
5900 // Populate the BB that does the lookups.
5901 Builder.SetInsertPoint(LookupBB);
5902
5903 if (NeedMask) {
5904 // Before doing the lookup, we do the hole check. The LookupBB is therefore
5905 // re-purposed to do the hole check, and we create a new LookupBB.
5906 BasicBlock *MaskBB = LookupBB;
5907 MaskBB->setName("switch.hole_check");
5908 LookupBB = BasicBlock::Create(Mod.getContext(), "switch.lookup",
5909 CommonDest->getParent(), CommonDest);
5910
5911 // Make the mask's bitwidth at least 8-bit and a power-of-2 to avoid
5912 // unnecessary illegal types.
5913 uint64_t TableSizePowOf2 = NextPowerOf2(std::max(7ULL, TableSize - 1ULL));
5914 APInt MaskInt(TableSizePowOf2, 0);
5915 APInt One(TableSizePowOf2, 1);
5916 // Build bitmask; fill in a 1 bit for every case.
5917 const ResultListTy &ResultList = ResultLists[PHIs[0]];
5918 for (size_t I = 0, E = ResultList.size(); I != E; ++I) {
5919 uint64_t Idx = (ResultList[I].first->getValue() - MinCaseVal->getValue())
5920 .getLimitedValue();
5921 MaskInt |= One << Idx;
5922 }
5923 ConstantInt *TableMask = ConstantInt::get(Mod.getContext(), MaskInt);
5924
5925 // Get the TableIndex'th bit of the bitmask.
5926 // If this bit is 0 (meaning hole) jump to the default destination,
5927 // else continue with table lookup.
5928 IntegerType *MapTy = TableMask->getType();
5929 Value *MaskIndex =
5930 Builder.CreateZExtOrTrunc(TableIndex, MapTy, "switch.maskindex");
5931 Value *Shifted = Builder.CreateLShr(TableMask, MaskIndex, "switch.shifted");
5932 Value *LoBit = Builder.CreateTrunc(
5933 Shifted, Type::getInt1Ty(Mod.getContext()), "switch.lobit");
5934 Builder.CreateCondBr(LoBit, LookupBB, SI->getDefaultDest());
5935 if (DTU) {
5936 Updates.push_back({DominatorTree::Insert, MaskBB, LookupBB});
5937 Updates.push_back({DominatorTree::Insert, MaskBB, SI->getDefaultDest()});
5938 }
5939 Builder.SetInsertPoint(LookupBB);
5940 AddPredecessorToBlock(SI->getDefaultDest(), MaskBB, BB);
5941 }
5942
5943 if (!DefaultIsReachable || GeneratingCoveredLookupTable) {
5944 // We cached PHINodes in PHIs. To avoid accessing deleted PHINodes later,
5945 // do not delete PHINodes here.
5946 SI->getDefaultDest()->removePredecessor(BB,
5947 /*KeepOneInputPHIs=*/true);
5948 if (DTU)
5949 Updates.push_back({DominatorTree::Delete, BB, SI->getDefaultDest()});
5950 }
5951
5952 for (PHINode *PHI : PHIs) {
5953 const ResultListTy &ResultList = ResultLists[PHI];
5954
5955 // If using a bitmask, use any value to fill the lookup table holes.
5956 Constant *DV = NeedMask ? ResultLists[PHI][0].second : DefaultResults[PHI];
5957 StringRef FuncName = Fn->getName();
5958 SwitchLookupTable Table(Mod, TableSize, MinCaseVal, ResultList, DV, DL,
5959 FuncName);
5960
5961 Value *Result = Table.BuildLookup(TableIndex, Builder);
5962
5963 // Do a small peephole optimization: re-use the switch table compare if
5964 // possible.
5965 if (!TableHasHoles && HasDefaultResults && RangeCheckBranch) {
5966 BasicBlock *PhiBlock = PHI->getParent();
5967 // Search for compare instructions which use the phi.
5968 for (auto *User : PHI->users()) {
5969