Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp
Warning:line 939, column 42
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name ControlHeightReduction.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp

1//===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass merges conditional blocks of code and reduces the number of
10// conditional branches in the hot paths based on profiles.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/SmallVector.h"
18#include "llvm/ADT/StringSet.h"
19#include "llvm/Analysis/BlockFrequencyInfo.h"
20#include "llvm/Analysis/GlobalsModRef.h"
21#include "llvm/Analysis/OptimizationRemarkEmitter.h"
22#include "llvm/Analysis/ProfileSummaryInfo.h"
23#include "llvm/Analysis/RegionInfo.h"
24#include "llvm/Analysis/RegionIterator.h"
25#include "llvm/Analysis/ValueTracking.h"
26#include "llvm/IR/CFG.h"
27#include "llvm/IR/Dominators.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/MDBuilder.h"
30#include "llvm/IR/PassManager.h"
31#include "llvm/InitializePasses.h"
32#include "llvm/Support/BranchProbability.h"
33#include "llvm/Support/CommandLine.h"
34#include "llvm/Support/MemoryBuffer.h"
35#include "llvm/Transforms/Utils.h"
36#include "llvm/Transforms/Utils/BasicBlockUtils.h"
37#include "llvm/Transforms/Utils/Cloning.h"
38#include "llvm/Transforms/Utils/ValueMapper.h"
39
40#include <set>
41#include <sstream>
42
43using namespace llvm;
44
45#define DEBUG_TYPE"chr" "chr"
46
47#define CHR_DEBUG(X)do { } while (false) LLVM_DEBUG(X)do { } while (false)
48
49static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50 cl::desc("Apply CHR for all functions"));
51
52static cl::opt<double> CHRBiasThreshold(
53 "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54 cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55
56static cl::opt<unsigned> CHRMergeThreshold(
57 "chr-merge-threshold", cl::init(2), cl::Hidden,
58 cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59
60static cl::opt<std::string> CHRModuleList(
61 "chr-module-list", cl::init(""), cl::Hidden,
62 cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63
64static cl::opt<std::string> CHRFunctionList(
65 "chr-function-list", cl::init(""), cl::Hidden,
66 cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67
68static StringSet<> CHRModules;
69static StringSet<> CHRFunctions;
70
71static void parseCHRFilterFiles() {
72 if (!CHRModuleList.empty()) {
73 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74 if (!FileOrErr) {
75 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76 std::exit(1);
77 }
78 StringRef Buf = FileOrErr->get()->getBuffer();
79 SmallVector<StringRef, 0> Lines;
80 Buf.split(Lines, '\n');
81 for (StringRef Line : Lines) {
82 Line = Line.trim();
83 if (!Line.empty())
84 CHRModules.insert(Line);
85 }
86 }
87 if (!CHRFunctionList.empty()) {
88 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89 if (!FileOrErr) {
90 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91 std::exit(1);
92 }
93 StringRef Buf = FileOrErr->get()->getBuffer();
94 SmallVector<StringRef, 0> Lines;
95 Buf.split(Lines, '\n');
96 for (StringRef Line : Lines) {
97 Line = Line.trim();
98 if (!Line.empty())
99 CHRFunctions.insert(Line);
100 }
101 }
102}
103
104namespace {
105class ControlHeightReductionLegacyPass : public FunctionPass {
106public:
107 static char ID;
108
109 ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110 initializeControlHeightReductionLegacyPassPass(
111 *PassRegistry::getPassRegistry());
112 parseCHRFilterFiles();
113 }
114
115 bool runOnFunction(Function &F) override;
116 void getAnalysisUsage(AnalysisUsage &AU) const override {
117 AU.addRequired<BlockFrequencyInfoWrapperPass>();
118 AU.addRequired<DominatorTreeWrapperPass>();
119 AU.addRequired<ProfileSummaryInfoWrapperPass>();
120 AU.addRequired<RegionInfoPass>();
121 AU.addPreserved<GlobalsAAWrapperPass>();
122 }
123};
124} // end anonymous namespace
125
126char ControlHeightReductionLegacyPass::ID = 0;
127
128INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
129 "chr",static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
130 "Reduce control height in the hot paths",static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
131 false, false)static void *initializeControlHeightReductionLegacyPassPassOnce
(PassRegistry &Registry) {
132INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry);
133INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
134INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
135INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)initializeRegionInfoPassPass(Registry);
136INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
137 "chr",PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
138 "Reduce control height in the hot paths",PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
139 false, false)PassInfo *PI = new PassInfo( "Reduce control height in the hot paths"
, "chr", &ControlHeightReductionLegacyPass::ID, PassInfo::
NormalCtor_t(callDefaultCtor<ControlHeightReductionLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializeControlHeightReductionLegacyPassPassFlag
; void llvm::initializeControlHeightReductionLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeControlHeightReductionLegacyPassPassFlag
, initializeControlHeightReductionLegacyPassPassOnce, std::ref
(Registry)); }
140
141FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142 return new ControlHeightReductionLegacyPass();
143}
144
145namespace {
146
147struct CHRStats {
148 CHRStats() : NumBranches(0), NumBranchesDelta(0),
149 WeightedNumBranchesDelta(0) {}
150 void print(raw_ostream &OS) const {
151 OS << "CHRStats: NumBranches " << NumBranches
152 << " NumBranchesDelta " << NumBranchesDelta
153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154 }
155 uint64_t NumBranches; // The original number of conditional branches /
156 // selects
157 uint64_t NumBranchesDelta; // The decrease of the number of conditional
158 // branches / selects in the hot paths due to CHR.
159 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160 // count at the scope entry.
161};
162
163// RegInfo - some properties of a Region.
164struct RegInfo {
165 RegInfo() : R(nullptr), HasBranch(false) {}
166 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
167 Region *R;
168 bool HasBranch;
169 SmallVector<SelectInst *, 8> Selects;
170};
171
172typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
173
174// CHRScope - a sequence of regions to CHR together. It corresponds to a
175// sequence of conditional blocks. It can have subscopes which correspond to
176// nested conditional blocks. Nested CHRScopes form a tree.
177class CHRScope {
178 public:
179 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
180 assert(RI.R && "Null RegionIn")((void)0);
181 RegInfos.push_back(RI);
182 }
183
184 Region *getParentRegion() {
185 assert(RegInfos.size() > 0 && "Empty CHRScope")((void)0);
186 Region *Parent = RegInfos[0].R->getParent();
187 assert(Parent && "Unexpected to call this on the top-level region")((void)0);
188 return Parent;
189 }
190
191 BasicBlock *getEntryBlock() {
192 assert(RegInfos.size() > 0 && "Empty CHRScope")((void)0);
193 return RegInfos.front().R->getEntry();
194 }
195
196 BasicBlock *getExitBlock() {
197 assert(RegInfos.size() > 0 && "Empty CHRScope")((void)0);
198 return RegInfos.back().R->getExit();
199 }
200
201 bool appendable(CHRScope *Next) {
202 // The next scope is appendable only if this scope is directly connected to
203 // it (which implies it post-dominates this scope) and this scope dominates
204 // it (no edge to the next scope outside this scope).
205 BasicBlock *NextEntry = Next->getEntryBlock();
206 if (getExitBlock() != NextEntry)
207 // Not directly connected.
208 return false;
209 Region *LastRegion = RegInfos.back().R;
210 for (BasicBlock *Pred : predecessors(NextEntry))
211 if (!LastRegion->contains(Pred))
212 // There's an edge going into the entry of the next scope from outside
213 // of this scope.
214 return false;
215 return true;
216 }
217
218 void append(CHRScope *Next) {
219 assert(RegInfos.size() > 0 && "Empty CHRScope")((void)0);
220 assert(Next->RegInfos.size() > 0 && "Empty CHRScope")((void)0);
221 assert(getParentRegion() == Next->getParentRegion() &&((void)0)
222 "Must be siblings")((void)0);
223 assert(getExitBlock() == Next->getEntryBlock() &&((void)0)
224 "Must be adjacent")((void)0);
225 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
226 Subs.append(Next->Subs.begin(), Next->Subs.end());
227 }
228
229 void addSub(CHRScope *SubIn) {
230#ifndef NDEBUG1
231 bool IsChild = false;
232 for (RegInfo &RI : RegInfos)
233 if (RI.R == SubIn->getParentRegion()) {
234 IsChild = true;
235 break;
236 }
237 assert(IsChild && "Must be a child")((void)0);
238#endif
239 Subs.push_back(SubIn);
240 }
241
242 // Split this scope at the boundary region into two, which will belong to the
243 // tail and returns the tail.
244 CHRScope *split(Region *Boundary) {
245 assert(Boundary && "Boundary null")((void)0);
246 assert(RegInfos.begin()->R != Boundary &&((void)0)
247 "Can't be split at beginning")((void)0);
248 auto BoundaryIt = llvm::find_if(
249 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
250 if (BoundaryIt == RegInfos.end())
251 return nullptr;
252 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
253 DenseSet<Region *> TailRegionSet;
254 for (const RegInfo &RI : TailRegInfos)
255 TailRegionSet.insert(RI.R);
256
257 auto TailIt =
258 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
259 assert(Sub && "null Sub")((void)0);
260 Region *Parent = Sub->getParentRegion();
261 if (TailRegionSet.count(Parent))
262 return false;
263
264 assert(llvm::any_of(((void)0)
265 RegInfos,((void)0)
266 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&((void)0)
267 "Must be in head")((void)0);
268 return true;
269 });
270 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
271
272 assert(HoistStopMap.empty() && "MapHoistStops must be empty")((void)0);
273 auto *Scope = new CHRScope(TailRegInfos, TailSubs);
274 RegInfos.erase(BoundaryIt, RegInfos.end());
275 Subs.erase(TailIt, Subs.end());
276 return Scope;
277 }
278
279 bool contains(Instruction *I) const {
280 BasicBlock *Parent = I->getParent();
281 for (const RegInfo &RI : RegInfos)
282 if (RI.R->contains(Parent))
283 return true;
284 return false;
285 }
286
287 void print(raw_ostream &OS) const;
288
289 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
290 SmallVector<CHRScope *, 8> Subs; // Subscopes.
291
292 // The instruction at which to insert the CHR conditional branch (and hoist
293 // the dependent condition values).
294 Instruction *BranchInsertPoint;
295
296 // True-biased and false-biased regions (conditional blocks),
297 // respectively. Used only for the outermost scope and includes regions in
298 // subscopes. The rest are unbiased.
299 DenseSet<Region *> TrueBiasedRegions;
300 DenseSet<Region *> FalseBiasedRegions;
301 // Among the biased regions, the regions that get CHRed.
302 SmallVector<RegInfo, 8> CHRRegions;
303
304 // True-biased and false-biased selects, respectively. Used only for the
305 // outermost scope and includes ones in subscopes.
306 DenseSet<SelectInst *> TrueBiasedSelects;
307 DenseSet<SelectInst *> FalseBiasedSelects;
308
309 // Map from one of the above regions to the instructions to stop
310 // hoisting instructions at through use-def chains.
311 HoistStopMapTy HoistStopMap;
312
313 private:
314 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
315 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
316 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
317};
318
319class CHR {
320 public:
321 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
322 ProfileSummaryInfo &PSIin, RegionInfo &RIin,
323 OptimizationRemarkEmitter &OREin)
324 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
325
326 ~CHR() {
327 for (CHRScope *Scope : Scopes) {
328 delete Scope;
329 }
330 }
331
332 bool run();
333
334 private:
335 // See the comments in CHR::run() for the high level flow of the algorithm and
336 // what the following functions do.
337
338 void findScopes(SmallVectorImpl<CHRScope *> &Output) {
339 Region *R = RI.getTopLevelRegion();
340 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
341 Output.push_back(Scope);
342 }
343 }
344 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
345 SmallVectorImpl<CHRScope *> &Scopes);
346 CHRScope *findScope(Region *R);
347 void checkScopeHoistable(CHRScope *Scope);
348
349 void splitScopes(SmallVectorImpl<CHRScope *> &Input,
350 SmallVectorImpl<CHRScope *> &Output);
351 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
352 CHRScope *Outer,
353 DenseSet<Value *> *OuterConditionValues,
354 Instruction *OuterInsertPoint,
355 SmallVectorImpl<CHRScope *> &Output,
356 DenseSet<Instruction *> &Unhoistables);
357
358 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
359 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
360
361 void filterScopes(SmallVectorImpl<CHRScope *> &Input,
362 SmallVectorImpl<CHRScope *> &Output);
363
364 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
365 SmallVectorImpl<CHRScope *> &Output);
366 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
367
368 void sortScopes(SmallVectorImpl<CHRScope *> &Input,
369 SmallVectorImpl<CHRScope *> &Output);
370
371 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
372 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
373 void cloneScopeBlocks(CHRScope *Scope,
374 BasicBlock *PreEntryBlock,
375 BasicBlock *ExitBlock,
376 Region *LastRegion,
377 ValueToValueMapTy &VMap);
378 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
379 BasicBlock *EntryBlock,
380 BasicBlock *NewEntryBlock,
381 ValueToValueMapTy &VMap);
382 void fixupBranchesAndSelects(CHRScope *Scope,
383 BasicBlock *PreEntryBlock,
384 BranchInst *MergedBR,
385 uint64_t ProfileCount);
386 void fixupBranch(Region *R,
387 CHRScope *Scope,
388 IRBuilder<> &IRB,
389 Value *&MergedCondition, BranchProbability &CHRBranchBias);
390 void fixupSelect(SelectInst* SI,
391 CHRScope *Scope,
392 IRBuilder<> &IRB,
393 Value *&MergedCondition, BranchProbability &CHRBranchBias);
394 void addToMergedCondition(bool IsTrueBiased, Value *Cond,
395 Instruction *BranchOrSelect,
396 CHRScope *Scope,
397 IRBuilder<> &IRB,
398 Value *&MergedCondition);
399
400 Function &F;
401 BlockFrequencyInfo &BFI;
402 DominatorTree &DT;
403 ProfileSummaryInfo &PSI;
404 RegionInfo &RI;
405 OptimizationRemarkEmitter &ORE;
406 CHRStats Stats;
407
408 // All the true-biased regions in the function
409 DenseSet<Region *> TrueBiasedRegionsGlobal;
410 // All the false-biased regions in the function
411 DenseSet<Region *> FalseBiasedRegionsGlobal;
412 // All the true-biased selects in the function
413 DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
414 // All the false-biased selects in the function
415 DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
416 // A map from biased regions to their branch bias
417 DenseMap<Region *, BranchProbability> BranchBiasMap;
418 // A map from biased selects to their branch bias
419 DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
420 // All the scopes.
421 DenseSet<CHRScope *> Scopes;
422};
423
424} // end anonymous namespace
425
426static inline
427raw_ostream LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) &operator<<(raw_ostream &OS,
428 const CHRStats &Stats) {
429 Stats.print(OS);
430 return OS;
431}
432
433static inline
434raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
435 Scope.print(OS);
436 return OS;
437}
438
439static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
440 if (ForceCHR)
441 return true;
442
443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
444 if (CHRModules.count(F.getParent()->getName()))
445 return true;
446 return CHRFunctions.count(F.getName());
447 }
448
449 assert(PSI.hasProfileSummary() && "Empty PSI?")((void)0);
450 return PSI.isFunctionEntryHot(&F);
451}
452
453static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) dumpIR(Function &F, const char *Label,
454 CHRStats *Stats) {
455 StringRef FuncName = F.getName();
456 StringRef ModuleName = F.getParent()->getName();
457 (void)(FuncName); // Unused in release build.
458 (void)(ModuleName); // Unused in release build.
459 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "do { } while (false)
460 << FuncName)do { } while (false);
461 if (Stats)
462 CHR_DEBUG(dbgs() << " " << *Stats)do { } while (false);
463 CHR_DEBUG(dbgs() << "\n")do { } while (false);
464 CHR_DEBUG(F.dump())do { } while (false);
465}
466
467void CHRScope::print(raw_ostream &OS) const {
468 assert(RegInfos.size() > 0 && "Empty CHRScope")((void)0);
469 OS << "CHRScope[";
470 OS << RegInfos.size() << ", Regions[";
471 for (const RegInfo &RI : RegInfos) {
472 OS << RI.R->getNameStr();
473 if (RI.HasBranch)
474 OS << " B";
475 if (RI.Selects.size() > 0)
476 OS << " S" << RI.Selects.size();
477 OS << ", ";
478 }
479 if (RegInfos[0].R->getParent()) {
480 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
481 } else {
482 // top level region
483 OS << "]";
484 }
485 OS << ", Subs[";
486 for (CHRScope *Sub : Subs) {
487 OS << *Sub << ", ";
488 }
489 OS << "]]";
490}
491
492// Return true if the given instruction type can be hoisted by CHR.
493static bool isHoistableInstructionType(Instruction *I) {
494 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
495 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
496 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
497 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
498 isa<InsertValueInst>(I);
499}
500
501// Return true if the given instruction can be hoisted by CHR.
502static bool isHoistable(Instruction *I, DominatorTree &DT) {
503 if (!isHoistableInstructionType(I))
504 return false;
505 return isSafeToSpeculativelyExecute(I, nullptr, &DT);
506}
507
508// Recursively traverse the use-def chains of the given value and return a set
509// of the unhoistable base values defined within the scope (excluding the
510// first-region entry block) or the (hoistable or unhoistable) base values that
511// are defined outside (including the first-region entry block) of the
512// scope. The returned set doesn't include constants.
513static const std::set<Value *> &
514getBaseValues(Value *V, DominatorTree &DT,
515 DenseMap<Value *, std::set<Value *>> &Visited) {
516 auto It = Visited.find(V);
517 if (It != Visited.end()) {
518 return It->second;
519 }
520 std::set<Value *> Result;
521 if (auto *I = dyn_cast<Instruction>(V)) {
522 // We don't stop at a block that's not in the Scope because we would miss
523 // some instructions that are based on the same base values if we stop
524 // there.
525 if (!isHoistable(I, DT)) {
526 Result.insert(I);
527 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
528 }
529 // I is hoistable above the Scope.
530 for (Value *Op : I->operands()) {
531 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
532 Result.insert(OpResult.begin(), OpResult.end());
533 }
534 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
535 }
536 if (isa<Argument>(V)) {
537 Result.insert(V);
538 }
539 // We don't include others like constants because those won't lead to any
540 // chance of folding of conditions (eg two bit checks merged into one check)
541 // after CHR.
542 return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
543}
544
545// Return true if V is already hoisted or can be hoisted (along with its
546// operands) above the insert point. When it returns true and HoistStops is
547// non-null, the instructions to stop hoisting at through the use-def chains are
548// inserted into HoistStops.
549static bool
550checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
551 DenseSet<Instruction *> &Unhoistables,
552 DenseSet<Instruction *> *HoistStops,
553 DenseMap<Instruction *, bool> &Visited) {
554 assert(InsertPoint && "Null InsertPoint")((void)0);
555 if (auto *I = dyn_cast<Instruction>(V)) {
556 auto It = Visited.find(I);
557 if (It != Visited.end()) {
558 return It->second;
559 }
560 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block")((void)0);
561 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination")((void)0);
562 if (Unhoistables.count(I)) {
563 // Don't hoist if they are not to be hoisted.
564 Visited[I] = false;
565 return false;
566 }
567 if (DT.dominates(I, InsertPoint)) {
568 // We are already above the insert point. Stop here.
569 if (HoistStops)
570 HoistStops->insert(I);
571 Visited[I] = true;
572 return true;
573 }
574 // We aren't not above the insert point, check if we can hoist it above the
575 // insert point.
576 if (isHoistable(I, DT)) {
577 // Check operands first.
578 DenseSet<Instruction *> OpsHoistStops;
579 bool AllOpsHoisted = true;
580 for (Value *Op : I->operands()) {
581 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
582 Visited)) {
583 AllOpsHoisted = false;
584 break;
585 }
586 }
587 if (AllOpsHoisted) {
588 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n")do { } while (false);
589 if (HoistStops)
590 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
591 Visited[I] = true;
592 return true;
593 }
594 }
595 Visited[I] = false;
596 return false;
597 }
598 // Non-instructions are considered hoistable.
599 return true;
600}
601
602// Returns true and sets the true probability and false probability of an
603// MD_prof metadata if it's well-formed.
604static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
605 BranchProbability &FalseProb) {
606 if (!MD) return false;
607 MDString *MDName = cast<MDString>(MD->getOperand(0));
608 if (MDName->getString() != "branch_weights" ||
609 MD->getNumOperands() != 3)
610 return false;
611 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
612 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
613 if (!TrueWeight || !FalseWeight)
614 return false;
615 uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
616 uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
617 uint64_t SumWt = TrueWt + FalseWt;
618
619 assert(SumWt >= TrueWt && SumWt >= FalseWt &&((void)0)
620 "Overflow calculating branch probabilities.")((void)0);
621
622 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
623 if (SumWt == 0)
624 return false;
625
626 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
627 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
628 return true;
629}
630
631static BranchProbability getCHRBiasThreshold() {
632 return BranchProbability::getBranchProbability(
633 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
634}
635
636// A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
637// CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
638// CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
639// false.
640template <typename K, typename S, typename M>
641static bool checkBias(K *Key, BranchProbability TrueProb,
642 BranchProbability FalseProb, S &TrueSet, S &FalseSet,
643 M &BiasMap) {
644 BranchProbability Threshold = getCHRBiasThreshold();
645 if (TrueProb >= Threshold) {
646 TrueSet.insert(Key);
647 BiasMap[Key] = TrueProb;
648 return true;
649 } else if (FalseProb >= Threshold) {
650 FalseSet.insert(Key);
651 BiasMap[Key] = FalseProb;
652 return true;
653 }
654 return false;
655}
656
657// Returns true and insert a region into the right biased set and the map if the
658// branch of the region is biased.
659static bool checkBiasedBranch(BranchInst *BI, Region *R,
660 DenseSet<Region *> &TrueBiasedRegionsGlobal,
661 DenseSet<Region *> &FalseBiasedRegionsGlobal,
662 DenseMap<Region *, BranchProbability> &BranchBiasMap) {
663 if (!BI->isConditional())
664 return false;
665 BranchProbability ThenProb, ElseProb;
666 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
667 ThenProb, ElseProb))
668 return false;
669 BasicBlock *IfThen = BI->getSuccessor(0);
670 BasicBlock *IfElse = BI->getSuccessor(1);
671 assert((IfThen == R->getExit() || IfElse == R->getExit()) &&((void)0)
672 IfThen != IfElse &&((void)0)
673 "Invariant from findScopes")((void)0);
674 if (IfThen == R->getExit()) {
675 // Swap them so that IfThen/ThenProb means going into the conditional code
676 // and IfElse/ElseProb means skipping it.
677 std::swap(IfThen, IfElse);
678 std::swap(ThenProb, ElseProb);
679 }
680 CHR_DEBUG(dbgs() << "BI " << *BI << " ")do { } while (false);
681 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ")do { } while (false);
682 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n")do { } while (false);
683 return checkBias(R, ThenProb, ElseProb,
684 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
685 BranchBiasMap);
686}
687
688// Returns true and insert a select into the right biased set and the map if the
689// select is biased.
690static bool checkBiasedSelect(
691 SelectInst *SI, Region *R,
692 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
693 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
694 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
695 BranchProbability TrueProb, FalseProb;
696 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
697 TrueProb, FalseProb))
698 return false;
699 CHR_DEBUG(dbgs() << "SI " << *SI << " ")do { } while (false);
700 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ")do { } while (false);
701 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n")do { } while (false);
702 return checkBias(SI, TrueProb, FalseProb,
703 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
704 SelectBiasMap);
705}
706
707// Returns the instruction at which to hoist the dependent condition values and
708// insert the CHR branch for a region. This is the terminator branch in the
709// entry block or the first select in the entry block, if any.
710static Instruction* getBranchInsertPoint(RegInfo &RI) {
711 Region *R = RI.R;
712 BasicBlock *EntryBB = R->getEntry();
713 // The hoist point is by default the terminator of the entry block, which is
714 // the same as the branch instruction if RI.HasBranch is true.
715 Instruction *HoistPoint = EntryBB->getTerminator();
716 for (SelectInst *SI : RI.Selects) {
717 if (SI->getParent() == EntryBB) {
718 // Pick the first select in Selects in the entry block. Note Selects is
719 // sorted in the instruction order within a block (asserted below).
720 HoistPoint = SI;
721 break;
722 }
723 }
724 assert(HoistPoint && "Null HoistPoint")((void)0);
725#ifndef NDEBUG1
726 // Check that HoistPoint is the first one in Selects in the entry block,
727 // if any.
728 DenseSet<Instruction *> EntryBlockSelectSet;
729 for (SelectInst *SI : RI.Selects) {
730 if (SI->getParent() == EntryBB) {
731 EntryBlockSelectSet.insert(SI);
732 }
733 }
734 for (Instruction &I : *EntryBB) {
735 if (EntryBlockSelectSet.contains(&I)) {
736 assert(&I == HoistPoint &&((void)0)
737 "HoistPoint must be the first one in Selects")((void)0);
738 break;
739 }
740 }
741#endif
742 return HoistPoint;
743}
744
745// Find a CHR scope in the given region.
746CHRScope * CHR::findScope(Region *R) {
747 CHRScope *Result = nullptr;
748 BasicBlock *Entry = R->getEntry();
749 BasicBlock *Exit = R->getExit(); // null if top level.
750 assert(Entry && "Entry must not be null")((void)0);
751 assert((Exit == nullptr) == (R->isTopLevelRegion()) &&((void)0)
752 "Only top level region has a null exit")((void)0);
753 if (Entry)
754 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n")do { } while (false);
755 else
756 CHR_DEBUG(dbgs() << "Entry null\n")do { } while (false);
757 if (Exit)
758 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n")do { } while (false);
759 else
760 CHR_DEBUG(dbgs() << "Exit null\n")do { } while (false);
761 // Exclude cases where Entry is part of a subregion (hence it doesn't belong
762 // to this region).
763 bool EntryInSubregion = RI.getRegionFor(Entry) != R;
764 if (EntryInSubregion)
765 return nullptr;
766 // Exclude loops
767 for (BasicBlock *Pred : predecessors(Entry))
768 if (R->contains(Pred))
769 return nullptr;
770 // If any of the basic blocks have address taken, we must skip this region
771 // because we cannot clone basic blocks that have address taken.
772 for (BasicBlock *BB : R->blocks())
773 if (BB->hasAddressTaken())
774 return nullptr;
775 if (Exit) {
776 // Try to find an if-then block (check if R is an if-then).
777 // if (cond) {
778 // ...
779 // }
780 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
781 if (BI)
782 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n")do { } while (false);
783 else
784 CHR_DEBUG(dbgs() << "BI null\n")do { } while (false);
785 if (BI && BI->isConditional()) {
786 BasicBlock *S0 = BI->getSuccessor(0);
787 BasicBlock *S1 = BI->getSuccessor(1);
788 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n")do { } while (false);
789 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n")do { } while (false);
790 if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
791 RegInfo RI(R);
792 RI.HasBranch = checkBiasedBranch(
793 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
794 BranchBiasMap);
795 Result = new CHRScope(RI);
796 Scopes.insert(Result);
797 CHR_DEBUG(dbgs() << "Found a region with a branch\n")do { } while (false);
798 ++Stats.NumBranches;
799 if (!RI.HasBranch) {
800 ORE.emit([&]() {
801 return OptimizationRemarkMissed(DEBUG_TYPE"chr", "BranchNotBiased", BI)
802 << "Branch not biased";
803 });
804 }
805 }
806 }
807 }
808 {
809 // Try to look for selects in the direct child blocks (as opposed to in
810 // subregions) of R.
811 // ...
812 // if (..) { // Some subregion
813 // ...
814 // }
815 // if (..) { // Some subregion
816 // ...
817 // }
818 // ...
819 // a = cond ? b : c;
820 // ...
821 SmallVector<SelectInst *, 8> Selects;
822 for (RegionNode *E : R->elements()) {
823 if (E->isSubRegion())
824 continue;
825 // This returns the basic block of E if E is a direct child of R (not a
826 // subregion.)
827 BasicBlock *BB = E->getEntry();
828 // Need to push in the order to make it easier to find the first Select
829 // later.
830 for (Instruction &I : *BB) {
831 if (auto *SI = dyn_cast<SelectInst>(&I)) {
832 Selects.push_back(SI);
833 ++Stats.NumBranches;
834 }
835 }
836 }
837 if (Selects.size() > 0) {
838 auto AddSelects = [&](RegInfo &RI) {
839 for (auto *SI : Selects)
840 if (checkBiasedSelect(SI, RI.R,
841 TrueBiasedSelectsGlobal,
842 FalseBiasedSelectsGlobal,
843 SelectBiasMap))
844 RI.Selects.push_back(SI);
845 else
846 ORE.emit([&]() {
847 return OptimizationRemarkMissed(DEBUG_TYPE"chr", "SelectNotBiased", SI)
848 << "Select not biased";
849 });
850 };
851 if (!Result) {
852 CHR_DEBUG(dbgs() << "Found a select-only region\n")do { } while (false);
853 RegInfo RI(R);
854 AddSelects(RI);
855 Result = new CHRScope(RI);
856 Scopes.insert(Result);
857 } else {
858 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n")do { } while (false);
859 AddSelects(Result->RegInfos[0]);
860 }
861 }
862 }
863
864 if (Result) {
865 checkScopeHoistable(Result);
866 }
867 return Result;
868}
869
870// Check that any of the branch and the selects in the region could be
871// hoisted above the the CHR branch insert point (the most dominating of
872// them, either the branch (at the end of the first block) or the first
873// select in the first block). If the branch can't be hoisted, drop the
874// selects in the first blocks.
875//
876// For example, for the following scope/region with selects, we want to insert
877// the merged branch right before the first select in the first/entry block by
878// hoisting c1, c2, c3, and c4.
879//
880// // Branch insert point here.
881// a = c1 ? b : c; // Select 1
882// d = c2 ? e : f; // Select 2
883// if (c3) { // Branch
884// ...
885// c4 = foo() // A call.
886// g = c4 ? h : i; // Select 3
887// }
888//
889// But suppose we can't hoist c4 because it's dependent on the preceding
890// call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
891// Select 2. If we can't hoist c3, we drop Selects 1 & 2.
892void CHR::checkScopeHoistable(CHRScope *Scope) {
893 RegInfo &RI = Scope->RegInfos[0];
894 Region *R = RI.R;
895 BasicBlock *EntryBB = R->getEntry();
896 auto *Branch = RI.HasBranch ?
1
Assuming field 'HasBranch' is false
2
'?' condition is false
3
'Branch' initialized to a null pointer value
897 cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
898 SmallVector<SelectInst *, 8> &Selects = RI.Selects;
899 if (RI.HasBranch
3.1
Field 'HasBranch' is false
3.1
Field 'HasBranch' is false
|| !Selects.empty()) {
4
Calling 'SmallVectorBase::empty'
7
Returning from 'SmallVectorBase::empty'
8
Taking true branch
900 Instruction *InsertPoint = getBranchInsertPoint(RI);
901 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n")do { } while (false);
9
Loop condition is false. Exiting loop
902 // Avoid a data dependence from a select or a branch to a(nother)
903 // select. Note no instruction can't data-depend on a branch (a branch
904 // instruction doesn't produce a value).
905 DenseSet<Instruction *> Unhoistables;
906 // Initialize Unhoistables with the selects.
907 for (SelectInst *SI : Selects) {
10
Assuming '__begin2' is equal to '__end2'
908 Unhoistables.insert(SI);
909 }
910 // Remove Selects that can't be hoisted.
911 for (auto it = Selects.begin(); it != Selects.end(); ) {
11
Assuming the condition is false
12
Loop condition is false. Execution continues on line 935
912 SelectInst *SI = *it;
913 if (SI == InsertPoint) {
914 ++it;
915 continue;
916 }
917 DenseMap<Instruction *, bool> Visited;
918 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
919 DT, Unhoistables, nullptr, Visited);
920 if (!IsHoistable) {
921 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n")do { } while (false);
922 ORE.emit([&]() {
923 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
924 "DropUnhoistableSelect", SI)
925 << "Dropped unhoistable select";
926 });
927 it = Selects.erase(it);
928 // Since we are dropping the select here, we also drop it from
929 // Unhoistables.
930 Unhoistables.erase(SI);
931 } else
932 ++it;
933 }
934 // Update InsertPoint after potentially removing selects.
935 InsertPoint = getBranchInsertPoint(RI);
936 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n")do { } while (false);
13
Loop condition is false. Exiting loop
937 if (RI.HasBranch && InsertPoint != Branch) {
14
Assuming field 'HasBranch' is true
15
Assuming 'InsertPoint' is not equal to 'Branch'
16
Taking true branch
938 DenseMap<Instruction *, bool> Visited;
939 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
17
Called C++ object pointer is null
940 DT, Unhoistables, nullptr, Visited);
941 if (!IsHoistable) {
942 // If the branch isn't hoistable, drop the selects in the entry
943 // block, preferring the branch, which makes the branch the hoist
944 // point.
945 assert(InsertPoint != Branch && "Branch must not be the hoist point")((void)0);
946 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n")do { } while (false);
947 CHR_DEBUG(do { } while (false)
948 for (SelectInst *SI : Selects) {do { } while (false)
949 dbgs() << "SI " << *SI << "\n";do { } while (false)
950 })do { } while (false);
951 for (SelectInst *SI : Selects) {
952 ORE.emit([&]() {
953 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
954 "DropSelectUnhoistableBranch", SI)
955 << "Dropped select due to unhoistable branch";
956 });
957 }
958 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
959 return SI->getParent() == EntryBB;
960 });
961 Unhoistables.clear();
962 InsertPoint = Branch;
963 }
964 }
965 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n")do { } while (false);
966#ifndef NDEBUG1
967 if (RI.HasBranch) {
968 assert(!DT.dominates(Branch, InsertPoint) &&((void)0)
969 "Branch can't be already above the hoist point")((void)0);
970 DenseMap<Instruction *, bool> Visited;
971 assert(checkHoistValue(Branch->getCondition(), InsertPoint,((void)0)
972 DT, Unhoistables, nullptr, Visited) &&((void)0)
973 "checkHoistValue for branch")((void)0);
974 }
975 for (auto *SI : Selects) {
976 assert(!DT.dominates(SI, InsertPoint) &&((void)0)
977 "SI can't be already above the hoist point")((void)0);
978 DenseMap<Instruction *, bool> Visited;
979 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,((void)0)
980 Unhoistables, nullptr, Visited) &&((void)0)
981 "checkHoistValue for selects")((void)0);
982 }
983 CHR_DEBUG(dbgs() << "Result\n")do { } while (false);
984 if (RI.HasBranch) {
985 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n")do { } while (false);
986 }
987 for (auto *SI : Selects) {
988 CHR_DEBUG(dbgs() << "SI " << *SI << "\n")do { } while (false);
989 }
990#endif
991 }
992}
993
994// Traverse the region tree, find all nested scopes and merge them if possible.
995CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
996 SmallVectorImpl<CHRScope *> &Scopes) {
997 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n")do { } while (false);
998 CHRScope *Result = findScope(R);
999 // Visit subscopes.
1000 CHRScope *ConsecutiveSubscope = nullptr;
1001 SmallVector<CHRScope *, 8> Subscopes;
1002 for (auto It = R->begin(); It != R->end(); ++It) {
1003 const std::unique_ptr<Region> &SubR = *It;
1004 auto NextIt = std::next(It);
1005 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
1006 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()do { } while (false)
1007 << "\n")do { } while (false);
1008 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
1009 if (SubCHRScope) {
1010 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n")do { } while (false);
1011 } else {
1012 CHR_DEBUG(dbgs() << "Subregion Scope null\n")do { } while (false);
1013 }
1014 if (SubCHRScope) {
1015 if (!ConsecutiveSubscope)
1016 ConsecutiveSubscope = SubCHRScope;
1017 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1018 Subscopes.push_back(ConsecutiveSubscope);
1019 ConsecutiveSubscope = SubCHRScope;
1020 } else
1021 ConsecutiveSubscope->append(SubCHRScope);
1022 } else {
1023 if (ConsecutiveSubscope) {
1024 Subscopes.push_back(ConsecutiveSubscope);
1025 }
1026 ConsecutiveSubscope = nullptr;
1027 }
1028 }
1029 if (ConsecutiveSubscope) {
1030 Subscopes.push_back(ConsecutiveSubscope);
1031 }
1032 for (CHRScope *Sub : Subscopes) {
1033 if (Result) {
1034 // Combine it with the parent.
1035 Result->addSub(Sub);
1036 } else {
1037 // Push Subscopes as they won't be combined with the parent.
1038 Scopes.push_back(Sub);
1039 }
1040 }
1041 return Result;
1042}
1043
1044static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1045 DenseSet<Value *> ConditionValues;
1046 if (RI.HasBranch) {
1047 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1048 ConditionValues.insert(BI->getCondition());
1049 }
1050 for (SelectInst *SI : RI.Selects) {
1051 ConditionValues.insert(SI->getCondition());
1052 }
1053 return ConditionValues;
1054}
1055
1056
1057// Determine whether to split a scope depending on the sets of the branch
1058// condition values of the previous region and the current region. We split
1059// (return true) it if 1) the condition values of the inner/lower scope can't be
1060// hoisted up to the outer/upper scope, or 2) the two sets of the condition
1061// values have an empty intersection (because the combined branch conditions
1062// won't probably lead to a simpler combined condition).
1063static bool shouldSplit(Instruction *InsertPoint,
1064 DenseSet<Value *> &PrevConditionValues,
1065 DenseSet<Value *> &ConditionValues,
1066 DominatorTree &DT,
1067 DenseSet<Instruction *> &Unhoistables) {
1068 assert(InsertPoint && "Null InsertPoint")((void)0);
1069 CHR_DEBUG(do { } while (false)
1070 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";do { } while (false)
1071 for (Value *V : PrevConditionValues) {do { } while (false)
1072 dbgs() << *V << ", ";do { } while (false)
1073 }do { } while (false)
1074 dbgs() << " ConditionValues ";do { } while (false)
1075 for (Value *V : ConditionValues) {do { } while (false)
1076 dbgs() << *V << ", ";do { } while (false)
1077 }do { } while (false)
1078 dbgs() << "\n")do { } while (false);
1079 // If any of Bases isn't hoistable to the hoist point, split.
1080 for (Value *V : ConditionValues) {
1081 DenseMap<Instruction *, bool> Visited;
1082 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1083 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n")do { } while (false);
1084 return true; // Not hoistable, split.
1085 }
1086 }
1087 // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1088 // unnecessary splits at scopes with no branch/selects. If
1089 // PrevConditionValues and ConditionValues don't intersect at all, split.
1090 if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1091 // Use std::set as DenseSet doesn't work with set_intersection.
1092 std::set<Value *> PrevBases, Bases;
1093 DenseMap<Value *, std::set<Value *>> Visited;
1094 for (Value *V : PrevConditionValues) {
1095 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1096 PrevBases.insert(BaseValues.begin(), BaseValues.end());
1097 }
1098 for (Value *V : ConditionValues) {
1099 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1100 Bases.insert(BaseValues.begin(), BaseValues.end());
1101 }
1102 CHR_DEBUG(do { } while (false)
1103 dbgs() << "PrevBases ";do { } while (false)
1104 for (Value *V : PrevBases) {do { } while (false)
1105 dbgs() << *V << ", ";do { } while (false)
1106 }do { } while (false)
1107 dbgs() << " Bases ";do { } while (false)
1108 for (Value *V : Bases) {do { } while (false)
1109 dbgs() << *V << ", ";do { } while (false)
1110 }do { } while (false)
1111 dbgs() << "\n")do { } while (false);
1112 std::vector<Value *> Intersection;
1113 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1114 Bases.end(), std::back_inserter(Intersection));
1115 if (Intersection.empty()) {
1116 // Empty intersection, split.
1117 CHR_DEBUG(dbgs() << "Split. Intersection empty\n")do { } while (false);
1118 return true;
1119 }
1120 }
1121 CHR_DEBUG(dbgs() << "No split\n")do { } while (false);
1122 return false; // Don't split.
1123}
1124
1125static void getSelectsInScope(CHRScope *Scope,
1126 DenseSet<Instruction *> &Output) {
1127 for (RegInfo &RI : Scope->RegInfos)
1128 for (SelectInst *SI : RI.Selects)
1129 Output.insert(SI);
1130 for (CHRScope *Sub : Scope->Subs)
1131 getSelectsInScope(Sub, Output);
1132}
1133
1134void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1135 SmallVectorImpl<CHRScope *> &Output) {
1136 for (CHRScope *Scope : Input) {
1137 assert(!Scope->BranchInsertPoint &&((void)0)
1138 "BranchInsertPoint must not be set")((void)0);
1139 DenseSet<Instruction *> Unhoistables;
1140 getSelectsInScope(Scope, Unhoistables);
1141 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1142 }
1143#ifndef NDEBUG1
1144 for (CHRScope *Scope : Output) {
1145 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set")((void)0);
1146 }
1147#endif
1148}
1149
1150SmallVector<CHRScope *, 8> CHR::splitScope(
1151 CHRScope *Scope,
1152 CHRScope *Outer,
1153 DenseSet<Value *> *OuterConditionValues,
1154 Instruction *OuterInsertPoint,
1155 SmallVectorImpl<CHRScope *> &Output,
1156 DenseSet<Instruction *> &Unhoistables) {
1157 if (Outer) {
1158 assert(OuterConditionValues && "Null OuterConditionValues")((void)0);
1159 assert(OuterInsertPoint && "Null OuterInsertPoint")((void)0);
1160 }
1161 bool PrevSplitFromOuter = true;
1162 DenseSet<Value *> PrevConditionValues;
1163 Instruction *PrevInsertPoint = nullptr;
1164 SmallVector<CHRScope *, 8> Splits;
1165 SmallVector<bool, 8> SplitsSplitFromOuter;
1166 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1167 SmallVector<Instruction *, 8> SplitsInsertPoints;
1168 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy
1169 for (RegInfo &RI : RegInfos) {
1170 Instruction *InsertPoint = getBranchInsertPoint(RI);
1171 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1172 CHR_DEBUG(do { } while (false)
1173 dbgs() << "ConditionValues ";do { } while (false)
1174 for (Value *V : ConditionValues) {do { } while (false)
1175 dbgs() << *V << ", ";do { } while (false)
1176 }do { } while (false)
1177 dbgs() << "\n")do { } while (false);
1178 if (RI.R == RegInfos[0].R) {
1179 // First iteration. Check to see if we should split from the outer.
1180 if (Outer) {
1181 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n")do { } while (false);
1182 CHR_DEBUG(dbgs() << "Should split from outer at "do { } while (false)
1183 << RI.R->getNameStr() << "\n")do { } while (false);
1184 if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1185 ConditionValues, DT, Unhoistables)) {
1186 PrevConditionValues = ConditionValues;
1187 PrevInsertPoint = InsertPoint;
1188 ORE.emit([&]() {
1189 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
1190 "SplitScopeFromOuter",
1191 RI.R->getEntry()->getTerminator())
1192 << "Split scope from outer due to unhoistable branch/select "
1193 << "and/or lack of common condition values";
1194 });
1195 } else {
1196 // Not splitting from the outer. Use the outer bases and insert
1197 // point. Union the bases.
1198 PrevSplitFromOuter = false;
1199 PrevConditionValues = *OuterConditionValues;
1200 PrevConditionValues.insert(ConditionValues.begin(),
1201 ConditionValues.end());
1202 PrevInsertPoint = OuterInsertPoint;
1203 }
1204 } else {
1205 CHR_DEBUG(dbgs() << "Outer null\n")do { } while (false);
1206 PrevConditionValues = ConditionValues;
1207 PrevInsertPoint = InsertPoint;
1208 }
1209 } else {
1210 CHR_DEBUG(dbgs() << "Should split from prev at "do { } while (false)
1211 << RI.R->getNameStr() << "\n")do { } while (false);
1212 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1213 DT, Unhoistables)) {
1214 CHRScope *Tail = Scope->split(RI.R);
1215 Scopes.insert(Tail);
1216 Splits.push_back(Scope);
1217 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1218 SplitsConditionValues.push_back(PrevConditionValues);
1219 SplitsInsertPoints.push_back(PrevInsertPoint);
1220 Scope = Tail;
1221 PrevConditionValues = ConditionValues;
1222 PrevInsertPoint = InsertPoint;
1223 PrevSplitFromOuter = true;
1224 ORE.emit([&]() {
1225 return OptimizationRemarkMissed(DEBUG_TYPE"chr",
1226 "SplitScopeFromPrev",
1227 RI.R->getEntry()->getTerminator())
1228 << "Split scope from previous due to unhoistable branch/select "
1229 << "and/or lack of common condition values";
1230 });
1231 } else {
1232 // Not splitting. Union the bases. Keep the hoist point.
1233 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1234 }
1235 }
1236 }
1237 Splits.push_back(Scope);
1238 SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1239 SplitsConditionValues.push_back(PrevConditionValues);
1240 assert(PrevInsertPoint && "Null PrevInsertPoint")((void)0);
1241 SplitsInsertPoints.push_back(PrevInsertPoint);
1242 assert(Splits.size() == SplitsConditionValues.size() &&((void)0)
1243 Splits.size() == SplitsSplitFromOuter.size() &&((void)0)
1244 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes")((void)0);
1245 for (size_t I = 0; I < Splits.size(); ++I) {
1246 CHRScope *Split = Splits[I];
1247 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1248 Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1249 SmallVector<CHRScope *, 8> NewSubs;
1250 DenseSet<Instruction *> SplitUnhoistables;
1251 getSelectsInScope(Split, SplitUnhoistables);
1252 for (CHRScope *Sub : Split->Subs) {
1253 SmallVector<CHRScope *, 8> SubSplits = splitScope(
1254 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1255 SplitUnhoistables);
1256 llvm::append_range(NewSubs, SubSplits);
1257 }
1258 Split->Subs = NewSubs;
1259 }
1260 SmallVector<CHRScope *, 8> Result;
1261 for (size_t I = 0; I < Splits.size(); ++I) {
1262 CHRScope *Split = Splits[I];
1263 if (SplitsSplitFromOuter[I]) {
1264 // Split from the outer.
1265 Output.push_back(Split);
1266 Split->BranchInsertPoint = SplitsInsertPoints[I];
1267 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]do { } while (false)
1268 << "\n")do { } while (false);
1269 } else {
1270 // Connected to the outer.
1271 Result.push_back(Split);
1272 }
1273 }
1274 if (!Outer)
1275 assert(Result.empty() &&((void)0)
1276 "If no outer (top-level), must return no nested ones")((void)0);
1277 return Result;
1278}
1279
1280void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1281 for (CHRScope *Scope : Scopes) {
1282 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty")((void)0);
1283 classifyBiasedScopes(Scope, Scope);
1284 CHR_DEBUG(do { } while (false)
1285 dbgs() << "classifyBiasedScopes " << *Scope << "\n";do { } while (false)
1286 dbgs() << "TrueBiasedRegions ";do { } while (false)
1287 for (Region *R : Scope->TrueBiasedRegions) {do { } while (false)
1288 dbgs() << R->getNameStr() << ", ";do { } while (false)
1289 }do { } while (false)
1290 dbgs() << "\n";do { } while (false)
1291 dbgs() << "FalseBiasedRegions ";do { } while (false)
1292 for (Region *R : Scope->FalseBiasedRegions) {do { } while (false)
1293 dbgs() << R->getNameStr() << ", ";do { } while (false)
1294 }do { } while (false)
1295 dbgs() << "\n";do { } while (false)
1296 dbgs() << "TrueBiasedSelects ";do { } while (false)
1297 for (SelectInst *SI : Scope->TrueBiasedSelects) {do { } while (false)
1298 dbgs() << *SI << ", ";do { } while (false)
1299 }do { } while (false)
1300 dbgs() << "\n";do { } while (false)
1301 dbgs() << "FalseBiasedSelects ";do { } while (false)
1302 for (SelectInst *SI : Scope->FalseBiasedSelects) {do { } while (false)
1303 dbgs() << *SI << ", ";do { } while (false)
1304 }do { } while (false)
1305 dbgs() << "\n";)do { } while (false);
1306 }
1307}
1308
1309void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1310 for (RegInfo &RI : Scope->RegInfos) {
1311 if (RI.HasBranch) {
1312 Region *R = RI.R;
1313 if (TrueBiasedRegionsGlobal.contains(R))
1314 OutermostScope->TrueBiasedRegions.insert(R);
1315 else if (FalseBiasedRegionsGlobal.contains(R))
1316 OutermostScope->FalseBiasedRegions.insert(R);
1317 else
1318 llvm_unreachable("Must be biased")__builtin_unreachable();
1319 }
1320 for (SelectInst *SI : RI.Selects) {
1321 if (TrueBiasedSelectsGlobal.contains(SI))
1322 OutermostScope->TrueBiasedSelects.insert(SI);
1323 else if (FalseBiasedSelectsGlobal.contains(SI))
1324 OutermostScope->FalseBiasedSelects.insert(SI);
1325 else
1326 llvm_unreachable("Must be biased")__builtin_unreachable();
1327 }
1328 }
1329 for (CHRScope *Sub : Scope->Subs) {
1330 classifyBiasedScopes(Sub, OutermostScope);
1331 }
1332}
1333
1334static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1335 unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1336 Scope->FalseBiasedRegions.size() +
1337 Scope->TrueBiasedSelects.size() +
1338 Scope->FalseBiasedSelects.size();
1339 return NumBiased >= CHRMergeThreshold;
1340}
1341
1342void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1343 SmallVectorImpl<CHRScope *> &Output) {
1344 for (CHRScope *Scope : Input) {
1345 // Filter out the ones with only one region and no subs.
1346 if (!hasAtLeastTwoBiasedBranches(Scope)) {
1347 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "do { } while (false)
1348 << Scope->TrueBiasedRegions.size()do { } while (false)
1349 << " falsy-regions " << Scope->FalseBiasedRegions.size()do { } while (false)
1350 << " true-selects " << Scope->TrueBiasedSelects.size()do { } while (false)
1351 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n")do { } while (false);
1352 ORE.emit([&]() {
1353 return OptimizationRemarkMissed(
1354 DEBUG_TYPE"chr",
1355 "DropScopeWithOneBranchOrSelect",
1356 Scope->RegInfos[0].R->getEntry()->getTerminator())
1357 << "Drop scope with < "
1358 << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1359 << " biased branch(es) or select(s)";
1360 });
1361 continue;
1362 }
1363 Output.push_back(Scope);
1364 }
1365}
1366
1367void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1368 SmallVectorImpl<CHRScope *> &Output) {
1369 for (CHRScope *Scope : Input) {
1370 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&((void)0)
1371 "Empty")((void)0);
1372 setCHRRegions(Scope, Scope);
1373 Output.push_back(Scope);
1374 CHR_DEBUG(do { } while (false)
1375 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";do { } while (false)
1376 for (auto pair : Scope->HoistStopMap) {do { } while (false)
1377 Region *R = pair.first;do { } while (false)
1378 dbgs() << "Region " << R->getNameStr() << "\n";do { } while (false)
1379 for (Instruction *I : pair.second) {do { } while (false)
1380 dbgs() << "HoistStop " << *I << "\n";do { } while (false)
1381 }do { } while (false)
1382 }do { } while (false)
1383 dbgs() << "CHRRegions" << "\n";do { } while (false)
1384 for (RegInfo &RI : Scope->CHRRegions) {do { } while (false)
1385 dbgs() << RI.R->getNameStr() << "\n";do { } while (false)
1386 })do { } while (false);
1387 }
1388}
1389
1390void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1391 DenseSet<Instruction *> Unhoistables;
1392 // Put the biased selects in Unhoistables because they should stay where they
1393 // are and constant-folded after CHR (in case one biased select or a branch
1394 // can depend on another biased select.)
1395 for (RegInfo &RI : Scope->RegInfos) {
1396 for (SelectInst *SI : RI.Selects) {
1397 Unhoistables.insert(SI);
1398 }
1399 }
1400 Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1401 for (RegInfo &RI : Scope->RegInfos) {
1402 Region *R = RI.R;
1403 DenseSet<Instruction *> HoistStops;
1404 bool IsHoisted = false;
1405 if (RI.HasBranch) {
1406 assert((OutermostScope->TrueBiasedRegions.contains(R) ||((void)0)
1407 OutermostScope->FalseBiasedRegions.contains(R)) &&((void)0)
1408 "Must be truthy or falsy")((void)0);
1409 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1410 // Note checkHoistValue fills in HoistStops.
1411 DenseMap<Instruction *, bool> Visited;
1412 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1413 Unhoistables, &HoistStops, Visited);
1414 assert(IsHoistable && "Must be hoistable")((void)0);
1415 (void)(IsHoistable); // Unused in release build
1416 IsHoisted = true;
1417 }
1418 for (SelectInst *SI : RI.Selects) {
1419 assert((OutermostScope->TrueBiasedSelects.contains(SI) ||((void)0)
1420 OutermostScope->FalseBiasedSelects.contains(SI)) &&((void)0)
1421 "Must be true or false biased")((void)0);
1422 // Note checkHoistValue fills in HoistStops.
1423 DenseMap<Instruction *, bool> Visited;
1424 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1425 Unhoistables, &HoistStops, Visited);
1426 assert(IsHoistable && "Must be hoistable")((void)0);
1427 (void)(IsHoistable); // Unused in release build
1428 IsHoisted = true;
1429 }
1430 if (IsHoisted) {
1431 OutermostScope->CHRRegions.push_back(RI);
1432 OutermostScope->HoistStopMap[R] = HoistStops;
1433 }
1434 }
1435 for (CHRScope *Sub : Scope->Subs)
1436 setCHRRegions(Sub, OutermostScope);
1437}
1438
1439static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1440 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1441}
1442
1443void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1444 SmallVectorImpl<CHRScope *> &Output) {
1445 Output.resize(Input.size());
1446 llvm::copy(Input, Output.begin());
1447 llvm::stable_sort(Output, CHRScopeSorter);
1448}
1449
1450// Return true if V is already hoisted or was hoisted (along with its operands)
1451// to the insert point.
1452static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1453 HoistStopMapTy &HoistStopMap,
1454 DenseSet<Instruction *> &HoistedSet,
1455 DenseSet<PHINode *> &TrivialPHIs,
1456 DominatorTree &DT) {
1457 auto IT = HoistStopMap.find(R);
1458 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map")((void)0);
1459 DenseSet<Instruction *> &HoistStops = IT->second;
1460 if (auto *I = dyn_cast<Instruction>(V)) {
1461 if (I == HoistPoint)
1462 return;
1463 if (HoistStops.count(I))
1464 return;
1465 if (auto *PN = dyn_cast<PHINode>(I))
1466 if (TrivialPHIs.count(PN))
1467 // The trivial phi inserted by the previous CHR scope could replace a
1468 // non-phi in HoistStops. Note that since this phi is at the exit of a
1469 // previous CHR scope, which dominates this scope, it's safe to stop
1470 // hoisting there.
1471 return;
1472 if (HoistedSet.count(I))
1473 // Already hoisted, return.
1474 return;
1475 assert(isHoistableInstructionType(I) && "Unhoistable instruction type")((void)0);
1476 assert(DT.getNode(I->getParent()) && "DT must contain I's block")((void)0);
1477 assert(DT.getNode(HoistPoint->getParent()) &&((void)0)
1478 "DT must contain HoistPoint block")((void)0);
1479 if (DT.dominates(I, HoistPoint))
1480 // We are already above the hoist point. Stop here. This may be necessary
1481 // when multiple scopes would independently hoist the same
1482 // instruction. Since an outer (dominating) scope would hoist it to its
1483 // entry before an inner (dominated) scope would to its entry, the inner
1484 // scope may see the instruction already hoisted, in which case it
1485 // potentially wrong for the inner scope to hoist it and could cause bad
1486 // IR (non-dominating def), but safe to skip hoisting it instead because
1487 // it's already in a block that dominates the inner scope.
1488 return;
1489 for (Value *Op : I->operands()) {
1490 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1491 }
1492 I->moveBefore(HoistPoint);
1493 HoistedSet.insert(I);
1494 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n")do { } while (false);
1495 }
1496}
1497
1498// Hoist the dependent condition values of the branches and the selects in the
1499// scope to the insert point.
1500static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1501 DenseSet<PHINode *> &TrivialPHIs,
1502 DominatorTree &DT) {
1503 DenseSet<Instruction *> HoistedSet;
1504 for (const RegInfo &RI : Scope->CHRRegions) {
1505 Region *R = RI.R;
1506 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1507 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1508 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1509 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1510 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1511 HoistedSet, TrivialPHIs, DT);
1512 }
1513 for (SelectInst *SI : RI.Selects) {
1514 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1515 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1516 if (!(IsTrueBiased || IsFalseBiased))
1517 continue;
1518 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1519 HoistedSet, TrivialPHIs, DT);
1520 }
1521 }
1522}
1523
1524// Negate the predicate if an ICmp if it's used only by branches or selects by
1525// swapping the operands of the branches or the selects. Returns true if success.
1526static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1527 Instruction *ExcludedUser,
1528 CHRScope *Scope) {
1529 for (User *U : ICmp->users()) {
1530 if (U == ExcludedUser)
1531 continue;
1532 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1533 continue;
1534 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1535 continue;
1536 return false;
1537 }
1538 for (User *U : ICmp->users()) {
1539 if (U == ExcludedUser)
1540 continue;
1541 if (auto *BI = dyn_cast<BranchInst>(U)) {
1542 assert(BI->isConditional() && "Must be conditional")((void)0);
1543 BI->swapSuccessors();
1544 // Don't need to swap this in terms of
1545 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1546 // mean whehter the branch is likely go into the if-then rather than
1547 // successor0/successor1 and because we can tell which edge is the then or
1548 // the else one by comparing the destination to the region exit block.
1549 continue;
1550 }
1551 if (auto *SI = dyn_cast<SelectInst>(U)) {
1552 // Swap operands
1553 SI->swapValues();
1554 SI->swapProfMetadata();
1555 if (Scope->TrueBiasedSelects.count(SI)) {
1556 assert(Scope->FalseBiasedSelects.count(SI) == 0 &&((void)0)
1557 "Must not be already in")((void)0);
1558 Scope->FalseBiasedSelects.insert(SI);
1559 } else if (Scope->FalseBiasedSelects.count(SI)) {
1560 assert(Scope->TrueBiasedSelects.count(SI) == 0 &&((void)0)
1561 "Must not be already in")((void)0);
1562 Scope->TrueBiasedSelects.insert(SI);
1563 }
1564 continue;
1565 }
1566 llvm_unreachable("Must be a branch or a select")__builtin_unreachable();
1567 }
1568 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1569 return true;
1570}
1571
1572// A helper for transformScopes. Insert a trivial phi at the scope exit block
1573// for a value that's defined in the scope but used outside it (meaning it's
1574// alive at the exit block).
1575static void insertTrivialPHIs(CHRScope *Scope,
1576 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1577 DenseSet<PHINode *> &TrivialPHIs) {
1578 SmallSetVector<BasicBlock *, 8> BlocksInScope;
1579 for (RegInfo &RI : Scope->RegInfos) {
1580 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1581 // sub-Scopes.
1582 BlocksInScope.insert(BB);
1583 }
1584 }
1585 CHR_DEBUG({do { } while (false)
1586 dbgs() << "Inserting redundant phis\n";do { } while (false)
1587 for (BasicBlock *BB : BlocksInScope)do { } while (false)
1588 dbgs() << "BlockInScope " << BB->getName() << "\n";do { } while (false)
1589 })do { } while (false);
1590 for (BasicBlock *BB : BlocksInScope) {
1591 for (Instruction &I : *BB) {
1592 SmallVector<Instruction *, 8> Users;
1593 for (User *U : I.users()) {
1594 if (auto *UI = dyn_cast<Instruction>(U)) {
1595 if (BlocksInScope.count(UI->getParent()) == 0 &&
1596 // Unless there's already a phi for I at the exit block.
1597 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1598 CHR_DEBUG(dbgs() << "V " << I << "\n")do { } while (false);
1599 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n")do { } while (false);
1600 Users.push_back(UI);
1601 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1602 // There's a loop backedge from a block that's dominated by this
1603 // scope to the entry block.
1604 CHR_DEBUG(dbgs() << "V " << I << "\n")do { } while (false);
1605 CHR_DEBUG(dbgs()do { } while (false)
1606 << "Used at entry block (for a back edge) by a phi user "do { } while (false)
1607 << *UI << "\n")do { } while (false);
1608 Users.push_back(UI);
1609 }
1610 }
1611 }
1612 if (Users.size() > 0) {
1613 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1614 // ExitBlock. Replace I with the new phi in UI unless UI is another
1615 // phi at ExitBlock.
1616 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
1617 &ExitBlock->front());
1618 for (BasicBlock *Pred : predecessors(ExitBlock)) {
1619 PN->addIncoming(&I, Pred);
1620 }
1621 TrivialPHIs.insert(PN);
1622 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n")do { } while (false);
1623 for (Instruction *UI : Users) {
1624 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1625 if (UI->getOperand(J) == &I) {
1626 UI->setOperand(J, PN);
1627 }
1628 }
1629 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n")do { } while (false);
1630 }
1631 }
1632 }
1633 }
1634}
1635
1636// Assert that all the CHR regions of the scope have a biased branch or select.
1637static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__))
1638assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1639#ifndef NDEBUG1
1640 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1641 if (Scope->TrueBiasedRegions.count(RI.R) ||
1642 Scope->FalseBiasedRegions.count(RI.R))
1643 return true;
1644 for (SelectInst *SI : RI.Selects)
1645 if (Scope->TrueBiasedSelects.count(SI) ||
1646 Scope->FalseBiasedSelects.count(SI))
1647 return true;
1648 return false;
1649 };
1650 for (RegInfo &RI : Scope->CHRRegions) {
1651 assert(HasBiasedBranchOrSelect(RI, Scope) &&((void)0)
1652 "Must have biased branch or select")((void)0);
1653 }
1654#endif
1655}
1656
1657// Assert that all the condition values of the biased branches and selects have
1658// been hoisted to the pre-entry block or outside of the scope.
1659static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__)) assertBranchOrSelectConditionHoisted(
1660 CHRScope *Scope, BasicBlock *PreEntryBlock) {
1661 CHR_DEBUG(dbgs() << "Biased regions condition values \n")do { } while (false);
1662 for (RegInfo &RI : Scope->CHRRegions) {
1663 Region *R = RI.R;
1664 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1665 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1666 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1667 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1668 Value *V = BI->getCondition();
1669 CHR_DEBUG(dbgs() << *V << "\n")do { } while (false);
1670 if (auto *I = dyn_cast<Instruction>(V)) {
1671 (void)(I); // Unused in release build.
1672 assert((I->getParent() == PreEntryBlock ||((void)0)
1673 !Scope->contains(I)) &&((void)0)
1674 "Must have been hoisted to PreEntryBlock or outside the scope")((void)0);
1675 }
1676 }
1677 for (SelectInst *SI : RI.Selects) {
1678 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1679 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1680 if (!(IsTrueBiased || IsFalseBiased))
1681 continue;
1682 Value *V = SI->getCondition();
1683 CHR_DEBUG(dbgs() << *V << "\n")do { } while (false);
1684 if (auto *I = dyn_cast<Instruction>(V)) {
1685 (void)(I); // Unused in release build.
1686 assert((I->getParent() == PreEntryBlock ||((void)0)
1687 !Scope->contains(I)) &&((void)0)
1688 "Must have been hoisted to PreEntryBlock or outside the scope")((void)0);
1689 }
1690 }
1691 }
1692}
1693
1694void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1695 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n")do { } while (false);
1696
1697 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region")((void)0);
1698 Region *FirstRegion = Scope->RegInfos[0].R;
1699 BasicBlock *EntryBlock = FirstRegion->getEntry();
1700 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1701 BasicBlock *ExitBlock = LastRegion->getExit();
1702 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1703
1704 if (ExitBlock) {
1705 // Insert a trivial phi at the exit block (where the CHR hot path and the
1706 // cold path merges) for a value that's defined in the scope but used
1707 // outside it (meaning it's alive at the exit block). We will add the
1708 // incoming values for the CHR cold paths to it below. Without this, we'd
1709 // miss updating phi's for such values unless there happens to already be a
1710 // phi for that value there.
1711 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1712 }
1713
1714 // Split the entry block of the first region. The new block becomes the new
1715 // entry block of the first region. The old entry block becomes the block to
1716 // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1717 // through the split, we update the entry of the first region after the split,
1718 // and Region only points to the entry and the exit blocks, rather than
1719 // keeping everything in a list or set, the blocks membership and the
1720 // entry/exit blocks of the region are still valid after the split.
1721 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()do { } while (false)
1722 << " at " << *Scope->BranchInsertPoint << "\n")do { } while (false);
1723 BasicBlock *NewEntryBlock =
1724 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1725 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&((void)0)
1726 "NewEntryBlock's only pred must be EntryBlock")((void)0);
1727 FirstRegion->replaceEntryRecursive(NewEntryBlock);
1728 BasicBlock *PreEntryBlock = EntryBlock;
1729
1730 ValueToValueMapTy VMap;
1731 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1732 // hot path (originals) and a cold path (clones) and update the PHIs at the
1733 // exit block.
1734 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1735
1736 // Replace the old (placeholder) branch with the new (merged) conditional
1737 // branch.
1738 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1739 NewEntryBlock, VMap);
1740
1741#ifndef NDEBUG1
1742 assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1743#endif
1744
1745 // Hoist the conditional values of the branches/selects.
1746 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1747
1748#ifndef NDEBUG1
1749 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1750#endif
1751
1752 // Create the combined branch condition and constant-fold the branches/selects
1753 // in the hot path.
1754 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1755 ProfileCount ? ProfileCount.getValue() : 0);
1756}
1757
1758// A helper for transformScopes. Clone the blocks in the scope (excluding the
1759// PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1760// at the exit block.
1761void CHR::cloneScopeBlocks(CHRScope *Scope,
1762 BasicBlock *PreEntryBlock,
1763 BasicBlock *ExitBlock,
1764 Region *LastRegion,
1765 ValueToValueMapTy &VMap) {
1766 // Clone all the blocks. The original blocks will be the hot-path
1767 // CHR-optimized code and the cloned blocks will be the original unoptimized
1768 // code. This is so that the block pointers from the
1769 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1770 // which CHR should apply to.
1771 SmallVector<BasicBlock*, 8> NewBlocks;
1772 for (RegInfo &RI : Scope->RegInfos)
1773 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1774 // sub-Scopes.
1775 assert(BB != PreEntryBlock && "Don't copy the preetntry block")((void)0);
1776 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1777 NewBlocks.push_back(NewBB);
1778 VMap[BB] = NewBB;
1779 }
1780
1781 // Place the cloned blocks right after the original blocks (right before the
1782 // exit block of.)
1783 if (ExitBlock)
1784 F.getBasicBlockList().splice(ExitBlock->getIterator(),
1785 F.getBasicBlockList(),
1786 NewBlocks[0]->getIterator(), F.end());
1787
1788 // Update the cloned blocks/instructions to refer to themselves.
1789 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1790 for (Instruction &I : *NewBlocks[i])
1791 RemapInstruction(&I, VMap,
1792 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1793
1794 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1795 // the top-level region but we don't need to add PHIs. The trivial PHIs
1796 // inserted above will be updated here.
1797 if (ExitBlock)
1798 for (PHINode &PN : ExitBlock->phis())
1799 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1800 ++I) {
1801 BasicBlock *Pred = PN.getIncomingBlock(I);
1802 if (LastRegion->contains(Pred)) {
1803 Value *V = PN.getIncomingValue(I);
1804 auto It = VMap.find(V);
1805 if (It != VMap.end()) V = It->second;
1806 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned")((void)0);
1807 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1808 }
1809 }
1810}
1811
1812// A helper for transformScope. Replace the old (placeholder) branch with the
1813// new (merged) conditional branch.
1814BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1815 BasicBlock *EntryBlock,
1816 BasicBlock *NewEntryBlock,
1817 ValueToValueMapTy &VMap) {
1818 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1819 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&((void)0)
1820 "SplitBlock did not work correctly!")((void)0);
1821 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&((void)0)
1822 "NewEntryBlock's only pred must be EntryBlock")((void)0);
1823 assert(VMap.find(NewEntryBlock) != VMap.end() &&((void)0)
1824 "NewEntryBlock must have been copied")((void)0);
1825 OldBR->dropAllReferences();
1826 OldBR->eraseFromParent();
1827 // The true predicate is a placeholder. It will be replaced later in
1828 // fixupBranchesAndSelects().
1829 BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1830 cast<BasicBlock>(VMap[NewEntryBlock]),
1831 ConstantInt::getTrue(F.getContext()));
1832 PreEntryBlock->getInstList().push_back(NewBR);
1833 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&((void)0)
1834 "NewEntryBlock's only pred must be EntryBlock")((void)0);
1835 return NewBR;
1836}
1837
1838// A helper for transformScopes. Create the combined branch condition and
1839// constant-fold the branches/selects in the hot path.
1840void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1841 BasicBlock *PreEntryBlock,
1842 BranchInst *MergedBR,
1843 uint64_t ProfileCount) {
1844 Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1845 BranchProbability CHRBranchBias(1, 1);
1846 uint64_t NumCHRedBranches = 0;
1847 IRBuilder<> IRB(PreEntryBlock->getTerminator());
1848 for (RegInfo &RI : Scope->CHRRegions) {
1849 Region *R = RI.R;
1850 if (RI.HasBranch) {
1851 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1852 ++NumCHRedBranches;
1853 }
1854 for (SelectInst *SI : RI.Selects) {
1855 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1856 ++NumCHRedBranches;
1857 }
1858 }
1859 Stats.NumBranchesDelta += NumCHRedBranches - 1;
1860 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1861 ORE.emit([&]() {
1862 return OptimizationRemark(DEBUG_TYPE"chr",
1863 "CHR",
1864 // Refer to the hot (original) path
1865 MergedBR->getSuccessor(0)->getTerminator())
1866 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1867 << " branches or selects";
1868 });
1869 MergedBR->setCondition(MergedCondition);
1870 uint32_t Weights[] = {
1871 static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1872 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1873 };
1874 MDBuilder MDB(F.getContext());
1875 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1876 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]do { } while (false)
1877 << "\n")do { } while (false);
1878}
1879
1880// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1881// and constant-fold a branch in the hot path.
1882void CHR::fixupBranch(Region *R, CHRScope *Scope,
1883 IRBuilder<> &IRB,
1884 Value *&MergedCondition,
1885 BranchProbability &CHRBranchBias) {
1886 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1887 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&((void)0)
1888 "Must be truthy or falsy")((void)0);
1889 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1890 assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&((void)0)
1891 "Must be in the bias map")((void)0);
1892 BranchProbability Bias = BranchBiasMap[R];
1893 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased")((void)0);
1894 // Take the min.
1895 if (CHRBranchBias > Bias)
1896 CHRBranchBias = Bias;
1897 BasicBlock *IfThen = BI->getSuccessor(1);
1898 BasicBlock *IfElse = BI->getSuccessor(0);
1899 BasicBlock *RegionExitBlock = R->getExit();
1900 assert(RegionExitBlock && "Null ExitBlock")((void)0);
1901 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&((void)0)
1902 IfThen != IfElse && "Invariant from findScopes")((void)0);
1903 if (IfThen == RegionExitBlock) {
1904 // Swap them so that IfThen means going into it and IfElse means skipping
1905 // it.
1906 std::swap(IfThen, IfElse);
1907 }
1908 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()do { } while (false)
1909 << " IfElse " << IfElse->getName() << "\n")do { } while (false);
1910 Value *Cond = BI->getCondition();
1911 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1912 bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1913 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1914 MergedCondition);
1915 // Constant-fold the branch at ClonedEntryBlock.
1916 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&((void)0)
1917 "The successor shouldn't change")((void)0);
1918 Value *NewCondition = ConditionTrue ?
1919 ConstantInt::getTrue(F.getContext()) :
1920 ConstantInt::getFalse(F.getContext());
1921 BI->setCondition(NewCondition);
1922}
1923
1924// A helper for fixupBranchesAndSelects. Add to the combined branch condition
1925// and constant-fold a select in the hot path.
1926void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1927 IRBuilder<> &IRB,
1928 Value *&MergedCondition,
1929 BranchProbability &CHRBranchBias) {
1930 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1931 assert((IsTrueBiased ||((void)0)
1932 Scope->FalseBiasedSelects.count(SI)) && "Must be biased")((void)0);
1933 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&((void)0)
1934 "Must be in the bias map")((void)0);
1935 BranchProbability Bias = SelectBiasMap[SI];
1936 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased")((void)0);
1937 // Take the min.
1938 if (CHRBranchBias > Bias)
1939 CHRBranchBias = Bias;
1940 Value *Cond = SI->getCondition();
1941 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1942 MergedCondition);
1943 Value *NewCondition = IsTrueBiased ?
1944 ConstantInt::getTrue(F.getContext()) :
1945 ConstantInt::getFalse(F.getContext());
1946 SI->setCondition(NewCondition);
1947}
1948
1949// A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1950// condition.
1951void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1952 Instruction *BranchOrSelect,
1953 CHRScope *Scope,
1954 IRBuilder<> &IRB,
1955 Value *&MergedCondition) {
1956 if (IsTrueBiased) {
1957 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1958 } else {
1959 // If Cond is an icmp and all users of V except for BranchOrSelect is a
1960 // branch, negate the icmp predicate and swap the branch targets and avoid
1961 // inserting an Xor to negate Cond.
1962 bool Done = false;
1963 if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1964 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1965 MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1966 Done = true;
1967 }
1968 if (!Done) {
1969 Value *Negate = IRB.CreateXor(
1970 ConstantInt::getTrue(F.getContext()), Cond);
1971 MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1972 }
1973 }
1974}
1975
1976void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1977 unsigned I = 0;
1978 DenseSet<PHINode *> TrivialPHIs;
1979 for (CHRScope *Scope : CHRScopes) {
1980 transformScopes(Scope, TrivialPHIs);
1981 CHR_DEBUG(do { } while (false)
1982 std::ostringstream oss;do { } while (false)
1983 oss << " after transformScopes " << I++;do { } while (false)
1984 dumpIR(F, oss.str().c_str(), nullptr))do { } while (false);
1985 (void)I;
1986 }
1987}
1988
1989static void LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__))
1990dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1991 dbgs() << Label << " " << Scopes.size() << "\n";
1992 for (CHRScope *Scope : Scopes) {
1993 dbgs() << *Scope << "\n";
1994 }
1995}
1996
1997bool CHR::run() {
1998 if (!shouldApply(F, PSI))
1999 return false;
2000
2001 CHR_DEBUG(dumpIR(F, "before", nullptr))do { } while (false);
2002
2003 bool Changed = false;
2004 {
2005 CHR_DEBUG(do { } while (false)
2006 dbgs() << "RegionInfo:\n";do { } while (false)
2007 RI.print(dbgs()))do { } while (false);
2008
2009 // Recursively traverse the region tree and find regions that have biased
2010 // branches and/or selects and create scopes.
2011 SmallVector<CHRScope *, 8> AllScopes;
2012 findScopes(AllScopes);
2013 CHR_DEBUG(dumpScopes(AllScopes, "All scopes"))do { } while (false);
2014
2015 // Split the scopes if 1) the conditiona values of the biased
2016 // branches/selects of the inner/lower scope can't be hoisted up to the
2017 // outermost/uppermost scope entry, or 2) the condition values of the biased
2018 // branches/selects in a scope (including subscopes) don't share at least
2019 // one common value.
2020 SmallVector<CHRScope *, 8> SplitScopes;
2021 splitScopes(AllScopes, SplitScopes);
2022 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"))do { } while (false);
2023
2024 // After splitting, set the biased regions and selects of a scope (a tree
2025 // root) that include those of the subscopes.
2026 classifyBiasedScopes(SplitScopes);
2027 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n")do { } while (false);
2028
2029 // Filter out the scopes that has only one biased region or select (CHR
2030 // isn't useful in such a case).
2031 SmallVector<CHRScope *, 8> FilteredScopes;
2032 filterScopes(SplitScopes, FilteredScopes);
2033 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"))do { } while (false);
2034
2035 // Set the regions to be CHR'ed and their hoist stops for each scope.
2036 SmallVector<CHRScope *, 8> SetScopes;
2037 setCHRRegions(FilteredScopes, SetScopes);
2038 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"))do { } while (false);
2039
2040 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2041 // ones. We need to apply CHR from outer to inner so that we apply CHR only
2042 // to the hot path, rather than both hot and cold paths.
2043 SmallVector<CHRScope *, 8> SortedScopes;
2044 sortScopes(SetScopes, SortedScopes);
2045 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"))do { } while (false);
2046
2047 CHR_DEBUG(do { } while (false)
2048 dbgs() << "RegionInfo:\n";do { } while (false)
2049 RI.print(dbgs()))do { } while (false);
2050
2051 // Apply the CHR transformation.
2052 if (!SortedScopes.empty()) {
2053 transformScopes(SortedScopes);
2054 Changed = true;
2055 }
2056 }
2057
2058 if (Changed) {
2059 CHR_DEBUG(dumpIR(F, "after", &Stats))do { } while (false);
2060 ORE.emit([&]() {
2061 return OptimizationRemark(DEBUG_TYPE"chr", "Stats", &F)
2062 << ore::NV("Function", &F) << " "
2063 << "Reduced the number of branches in hot paths by "
2064 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2065 << " (static) and "
2066 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2067 << " (weighted by PGO count)";
2068 });
2069 }
2070
2071 return Changed;
2072}
2073
2074bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
2075 BlockFrequencyInfo &BFI =
2076 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
2077 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2078 ProfileSummaryInfo &PSI =
2079 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2080 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
2081 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
2082 std::make_unique<OptimizationRemarkEmitter>(&F);
2083 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
2084}
2085
2086namespace llvm {
2087
2088ControlHeightReductionPass::ControlHeightReductionPass() {
2089 parseCHRFilterFiles();
2090}
2091
2092PreservedAnalyses ControlHeightReductionPass::run(
2093 Function &F,
2094 FunctionAnalysisManager &FAM) {
2095 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2096 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2097 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2098 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2099 auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2100 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2101 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2102 if (!Changed)
2103 return PreservedAnalyses::all();
2104 return PreservedAnalyses::none();
2105}
2106
2107} // namespace llvm

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT/SmallVector.h

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H
15
16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <functional>
27#include <initializer_list>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <new>
32#include <type_traits>
33#include <utility>
34
35namespace llvm {
36
37/// This is all the stuff common to all SmallVectors.
38///
39/// The template parameter specifies the type which should be used to hold the
40/// Size and Capacity of the SmallVector, so it can be adjusted.
41/// Using 32 bit size is desirable to shrink the size of the SmallVector.
42/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
43/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
44/// buffering bitcode output - which can exceed 4GB.
45template <class Size_T> class SmallVectorBase {
46protected:
47 void *BeginX;
48 Size_T Size = 0, Capacity;
49
50 /// The maximum value of the Size_T used.
51 static constexpr size_t SizeTypeMax() {
52 return std::numeric_limits<Size_T>::max();
53 }
54
55 SmallVectorBase() = delete;
56 SmallVectorBase(void *FirstEl, size_t TotalCapacity)
57 : BeginX(FirstEl), Capacity(TotalCapacity) {}
58
59 /// This is a helper for \a grow() that's out of line to reduce code
60 /// duplication. This function will report a fatal error if it can't grow at
61 /// least to \p MinSize.
62 void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);
63
64 /// This is an implementation of the grow() method which only works
65 /// on POD-like data types and is out of line to reduce code duplication.
66 /// This function will report a fatal error if it cannot increase capacity.
67 void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);
68
69public:
70 size_t size() const { return Size; }
71 size_t capacity() const { return Capacity; }
72
73 LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
5
Assuming field 'Size' is not equal to 0, which participates in a condition later
6
Returning zero, which participates in a condition later
74
75 /// Set the array size to \p N, which the current array must have enough
76 /// capacity for.
77 ///
78 /// This does not construct or destroy any elements in the vector.
79 ///
80 /// Clients can use this in conjunction with capacity() to write past the end
81 /// of the buffer when they know that more elements are available, and only
82 /// update the size later. This avoids the cost of value initializing elements
83 /// which will only be overwritten.
84 void set_size(size_t N) {
85 assert(N <= capacity())((void)0);
86 Size = N;
87 }
88};
89
90template <class T>
91using SmallVectorSizeType =
92 typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
93 uint32_t>::type;
94
95/// Figure out the offset of the first element.
96template <class T, typename = void> struct SmallVectorAlignmentAndSize {
97 alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
98 SmallVectorBase<SmallVectorSizeType<T>>)];
99 alignas(T) char FirstEl[sizeof(T)];
100};
101
102/// This is the part of SmallVectorTemplateBase which does not depend on whether
103/// the type T is a POD. The extra dummy template argument is used by ArrayRef
104/// to avoid unnecessarily requiring T to be complete.
105template <typename T, typename = void>
106class SmallVectorTemplateCommon
107 : public SmallVectorBase<SmallVectorSizeType<T>> {
108 using Base = SmallVectorBase<SmallVectorSizeType<T>>;
109
110 /// Find the address of the first element. For this pointer math to be valid
111 /// with small-size of 0 for T with lots of alignment, it's important that
112 /// SmallVectorStorage is properly-aligned even for small-size of 0.
113 void *getFirstEl() const {
114 return const_cast<void *>(reinterpret_cast<const void *>(
115 reinterpret_cast<const char *>(this) +
116 offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)
));
117 }
118 // Space after 'FirstEl' is clobbered, do not add any instance vars after it.
119
120protected:
121 SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}
122
123 void grow_pod(size_t MinSize, size_t TSize) {
124 Base::grow_pod(getFirstEl(), MinSize, TSize);
125 }
126
127 /// Return true if this is a smallvector which has not had dynamic
128 /// memory allocated for it.
129 bool isSmall() const { return this->BeginX == getFirstEl(); }
130
131 /// Put this vector in a state of being small.
132 void resetToSmall() {
133 this->BeginX = getFirstEl();
134 this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
135 }
136
137 /// Return true if V is an internal reference to the given range.
138 bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
139 // Use std::less to avoid UB.
140 std::less<> LessThan;
141 return !LessThan(V, First) && LessThan(V, Last);
142 }
143
144 /// Return true if V is an internal reference to this vector.
145 bool isReferenceToStorage(const void *V) const {
146 return isReferenceToRange(V, this->begin(), this->end());
147 }
148
149 /// Return true if First and Last form a valid (possibly empty) range in this
150 /// vector's storage.
151 bool isRangeInStorage(const void *First, const void *Last) const {
152 // Use std::less to avoid UB.
153 std::less<> LessThan;
154 return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
155 !LessThan(this->end(), Last);
156 }
157
158 /// Return true unless Elt will be invalidated by resizing the vector to
159 /// NewSize.
160 bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
161 // Past the end.
162 if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
163 return true;
164
165 // Return false if Elt will be destroyed by shrinking.
166 if (NewSize <= this->size())
167 return Elt < this->begin() + NewSize;
168
169 // Return false if we need to grow.
170 return NewSize <= this->capacity();
171 }
172
173 /// Check whether Elt will be invalidated by resizing the vector to NewSize.
174 void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
175 assert(isSafeToReferenceAfterResize(Elt, NewSize) &&((void)0)
176 "Attempting to reference an element of the vector in an operation "((void)0)
177 "that invalidates it")((void)0);
178 }
179
180 /// Check whether Elt will be invalidated by increasing the size of the
181 /// vector by N.
182 void assertSafeToAdd(const void *Elt, size_t N = 1) {
183 this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
184 }
185
186 /// Check whether any part of the range will be invalidated by clearing.
187 void assertSafeToReferenceAfterClear(const T *From, const T *To) {
188 if (From == To)
189 return;
190 this->assertSafeToReferenceAfterResize(From, 0);
191 this->assertSafeToReferenceAfterResize(To - 1, 0);
192 }
193 template <
194 class ItTy,
195 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
196 bool> = false>
197 void assertSafeToReferenceAfterClear(ItTy, ItTy) {}
198
199 /// Check whether any part of the range will be invalidated by growing.
200 void assertSafeToAddRange(const T *From, const T *To) {
201 if (From == To)
202 return;
203 this->assertSafeToAdd(From, To - From);
204 this->assertSafeToAdd(To - 1, To - From);
205 }
206 template <
207 class ItTy,
208 std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
209 bool> = false>
210 void assertSafeToAddRange(ItTy, ItTy) {}
211
212 /// Reserve enough space to add one element, and return the updated element
213 /// pointer in case it was a reference to the storage.
214 template <class U>
215 static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
216 size_t N) {
217 size_t NewSize = This->size() + N;
218 if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
)
)
219 return &Elt;
220
221 bool ReferencesStorage = false;
222 int64_t Index = -1;
223 if (!U::TakesParamByValue) {
224 if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)
) {
225 ReferencesStorage = true;
226 Index = &Elt - This->begin();
227 }
228 }
229 This->grow(NewSize);
230 return ReferencesStorage ? This->begin() + Index : &Elt;
231 }
232
233public:
234 using size_type = size_t;
235 using difference_type = ptrdiff_t;
236 using value_type = T;
237 using iterator = T *;
238 using const_iterator = const T *;
239
240 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
241 using reverse_iterator = std::reverse_iterator<iterator>;
242
243 using reference = T &;
244 using const_reference = const T &;
245 using pointer = T *;
246 using const_pointer = const T *;
247
248 using Base::capacity;
249 using Base::empty;
250 using Base::size;
251
252 // forward iterator creation methods.
253 iterator begin() { return (iterator)this->BeginX; }
254 const_iterator begin() const { return (const_iterator)this->BeginX; }
255 iterator end() { return begin() + size(); }
256 const_iterator end() const { return begin() + size(); }
257
258 // reverse iterator creation methods.
259 reverse_iterator rbegin() { return reverse_iterator(end()); }
260 const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
261 reverse_iterator rend() { return reverse_iterator(begin()); }
262 const_reverse_iterator rend() const { return const_reverse_iterator(begin());}
263
264 size_type size_in_bytes() const { return size() * sizeof(T); }
265 size_type max_size() const {
266 return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
267 }
268
269 size_t capacity_in_bytes() const { return capacity() * sizeof(T); }
270
271 /// Return a pointer to the vector's buffer, even if empty().
272 pointer data() { return pointer(begin()); }
273 /// Return a pointer to the vector's buffer, even if empty().
274 const_pointer data() const { return const_pointer(begin()); }
275
276 reference operator[](size_type idx) {
277 assert(idx < size())((void)0);
278 return begin()[idx];
279 }
280 const_reference operator[](size_type idx) const {
281 assert(idx < size())((void)0);
282 return begin()[idx];
283 }
284
285 reference front() {
286 assert(!empty())((void)0);
287 return begin()[0];
288 }
289 const_reference front() const {
290 assert(!empty())((void)0);
291 return begin()[0];
292 }
293
294 reference back() {
295 assert(!empty())((void)0);
296 return end()[-1];
297 }
298 const_reference back() const {
299 assert(!empty())((void)0);
300 return end()[-1];
301 }
302};
303
304/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
305/// method implementations that are designed to work with non-trivial T's.
306///
307/// We approximate is_trivially_copyable with trivial move/copy construction and
308/// trivial destruction. While the standard doesn't specify that you're allowed
309/// copy these types with memcpy, there is no way for the type to observe this.
310/// This catches the important case of std::pair<POD, POD>, which is not
311/// trivially assignable.
312template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
313 (is_trivially_move_constructible<T>::value) &&
314 std::is_trivially_destructible<T>::value>
315class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
316 friend class SmallVectorTemplateCommon<T>;
317
318protected:
319 static constexpr bool TakesParamByValue = false;
320 using ValueParamT = const T &;
321
322 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
323
324 static void destroy_range(T *S, T *E) {
325 while (S != E) {
326 --E;
327 E->~T();
328 }
329 }
330
331 /// Move the range [I, E) into the uninitialized memory starting with "Dest",
332 /// constructing elements as needed.
333 template<typename It1, typename It2>
334 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
335 std::uninitialized_copy(std::make_move_iterator(I),
336 std::make_move_iterator(E), Dest);
337 }
338
339 /// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
340 /// constructing elements as needed.
341 template<typename It1, typename It2>
342 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
343 std::uninitialized_copy(I, E, Dest);
344 }
345
346 /// Grow the allocated memory (without initializing new elements), doubling
347 /// the size of the allocated memory. Guarantees space for at least one more
348 /// element, or MinSize more elements if specified.
349 void grow(size_t MinSize = 0);
350
351 /// Create a new allocation big enough for \p MinSize and pass back its size
352 /// in \p NewCapacity. This is the first section of \a grow().
353 T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
354 return static_cast<T *>(
355 SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
356 MinSize, sizeof(T), NewCapacity));
357 }
358
359 /// Move existing elements over to the new allocation \p NewElts, the middle
360 /// section of \a grow().
361 void moveElementsForGrow(T *NewElts);
362
363 /// Transfer ownership of the allocation, finishing up \a grow().
364 void takeAllocationForGrow(T *NewElts, size_t NewCapacity);
365
366 /// Reserve enough space to add one element, and return the updated element
367 /// pointer in case it was a reference to the storage.
368 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
369 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
370 }
371
372 /// Reserve enough space to add one element, and return the updated element
373 /// pointer in case it was a reference to the storage.
374 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
375 return const_cast<T *>(
376 this->reserveForParamAndGetAddressImpl(this, Elt, N));
377 }
378
379 static T &&forward_value_param(T &&V) { return std::move(V); }
380 static const T &forward_value_param(const T &V) { return V; }
381
382 void growAndAssign(size_t NumElts, const T &Elt) {
383 // Grow manually in case Elt is an internal reference.
384 size_t NewCapacity;
385 T *NewElts = mallocForGrow(NumElts, NewCapacity);
386 std::uninitialized_fill_n(NewElts, NumElts, Elt);
387 this->destroy_range(this->begin(), this->end());
388 takeAllocationForGrow(NewElts, NewCapacity);
389 this->set_size(NumElts);
390 }
391
392 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
393 // Grow manually in case one of Args is an internal reference.
394 size_t NewCapacity;
395 T *NewElts = mallocForGrow(0, NewCapacity);
396 ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
397 moveElementsForGrow(NewElts);
398 takeAllocationForGrow(NewElts, NewCapacity);
399 this->set_size(this->size() + 1);
400 return this->back();
401 }
402
403public:
404 void push_back(const T &Elt) {
405 const T *EltPtr = reserveForParamAndGetAddress(Elt);
406 ::new ((void *)this->end()) T(*EltPtr);
407 this->set_size(this->size() + 1);
408 }
409
410 void push_back(T &&Elt) {
411 T *EltPtr = reserveForParamAndGetAddress(Elt);
412 ::new ((void *)this->end()) T(::std::move(*EltPtr));
413 this->set_size(this->size() + 1);
414 }
415
416 void pop_back() {
417 this->set_size(this->size() - 1);
418 this->end()->~T();
419 }
420};
421
422// Define this out-of-line to dissuade the C++ compiler from inlining it.
423template <typename T, bool TriviallyCopyable>
424void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
425 size_t NewCapacity;
426 T *NewElts = mallocForGrow(MinSize, NewCapacity);
427 moveElementsForGrow(NewElts);
428 takeAllocationForGrow(NewElts, NewCapacity);
429}
430
431// Define this out-of-line to dissuade the C++ compiler from inlining it.
432template <typename T, bool TriviallyCopyable>
433void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
434 T *NewElts) {
435 // Move the elements over.
436 this->uninitialized_move(this->begin(), this->end(), NewElts);
437
438 // Destroy the original elements.
439 destroy_range(this->begin(), this->end());
440}
441
442// Define this out-of-line to dissuade the C++ compiler from inlining it.
443template <typename T, bool TriviallyCopyable>
444void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
445 T *NewElts, size_t NewCapacity) {
446 // If this wasn't grown from the inline copy, deallocate the old space.
447 if (!this->isSmall())
448 free(this->begin());
449
450 this->BeginX = NewElts;
451 this->Capacity = NewCapacity;
452}
453
454/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
455/// method implementations that are designed to work with trivially copyable
456/// T's. This allows using memcpy in place of copy/move construction and
457/// skipping destruction.
458template <typename T>
459class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
460 friend class SmallVectorTemplateCommon<T>;
461
462protected:
463 /// True if it's cheap enough to take parameters by value. Doing so avoids
464 /// overhead related to mitigations for reference invalidation.
465 static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);
466
467 /// Either const T& or T, depending on whether it's cheap enough to take
468 /// parameters by value.
469 using ValueParamT =
470 typename std::conditional<TakesParamByValue, T, const T &>::type;
471
472 SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}
473
474 // No need to do a destroy loop for POD's.
475 static void destroy_range(T *, T *) {}
476
477 /// Move the range [I, E) onto the uninitialized memory
478 /// starting with "Dest", constructing elements into it as needed.
479 template<typename It1, typename It2>
480 static void uninitialized_move(It1 I, It1 E, It2 Dest) {
481 // Just do a copy.
482 uninitialized_copy(I, E, Dest);
483 }
484
485 /// Copy the range [I, E) onto the uninitialized memory
486 /// starting with "Dest", constructing elements into it as needed.
487 template<typename It1, typename It2>
488 static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
489 // Arbitrary iterator types; just use the basic implementation.
490 std::uninitialized_copy(I, E, Dest);
491 }
492
493 /// Copy the range [I, E) onto the uninitialized memory
494 /// starting with "Dest", constructing elements into it as needed.
495 template <typename T1, typename T2>
496 static void uninitialized_copy(
497 T1 *I, T1 *E, T2 *Dest,
498 std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
499 T2>::value> * = nullptr) {
500 // Use memcpy for PODs iterated by pointers (which includes SmallVector
501 // iterators): std::uninitialized_copy optimizes to memmove, but we can
502 // use memcpy here. Note that I and E are iterators and thus might be
503 // invalid for memcpy if they are equal.
504 if (I != E)
505 memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
506 }
507
508 /// Double the size of the allocated memory, guaranteeing space for at
509 /// least one more element or MinSize if specified.
510 void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }
511
512 /// Reserve enough space to add one element, and return the updated element
513 /// pointer in case it was a reference to the storage.
514 const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
515 return this->reserveForParamAndGetAddressImpl(this, Elt, N);
516 }
517
518 /// Reserve enough space to add one element, and return the updated element
519 /// pointer in case it was a reference to the storage.
520 T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
521 return const_cast<T *>(
522 this->reserveForParamAndGetAddressImpl(this, Elt, N));
523 }
524
525 /// Copy \p V or return a reference, depending on \a ValueParamT.
526 static ValueParamT forward_value_param(ValueParamT V) { return V; }
527
528 void growAndAssign(size_t NumElts, T Elt) {
529 // Elt has been copied in case it's an internal reference, side-stepping
530 // reference invalidation problems without losing the realloc optimization.
531 this->set_size(0);
532 this->grow(NumElts);
533 std::uninitialized_fill_n(this->begin(), NumElts, Elt);
534 this->set_size(NumElts);
535 }
536
537 template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
538 // Use push_back with a copy in case Args has an internal reference,
539 // side-stepping reference invalidation problems without losing the realloc
540 // optimization.
541 push_back(T(std::forward<ArgTypes>(Args)...));
542 return this->back();
543 }
544
545public:
546 void push_back(ValueParamT Elt) {
547 const T *EltPtr = reserveForParamAndGetAddress(Elt);
548 memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
549 this->set_size(this->size() + 1);
550 }
551
552 void pop_back() { this->set_size(this->size() - 1); }
553};
554
555/// This class consists of common code factored out of the SmallVector class to
556/// reduce code duplication based on the SmallVector 'N' template parameter.
557template <typename T>
558class SmallVectorImpl : public SmallVectorTemplateBase<T> {
559 using SuperClass = SmallVectorTemplateBase<T>;
560
561public:
562 using iterator = typename SuperClass::iterator;
563 using const_iterator = typename SuperClass::const_iterator;
564 using reference = typename SuperClass::reference;
565 using size_type = typename SuperClass::size_type;
566
567protected:
568 using SmallVectorTemplateBase<T>::TakesParamByValue;
569 using ValueParamT = typename SuperClass::ValueParamT;
570
571 // Default ctor - Initialize to empty.
572 explicit SmallVectorImpl(unsigned N)
573 : SmallVectorTemplateBase<T>(N) {}
574
575public:
576 SmallVectorImpl(const SmallVectorImpl &) = delete;
577
578 ~SmallVectorImpl() {
579 // Subclass has already destructed this vector's elements.
580 // If this wasn't grown from the inline copy, deallocate the old space.
581 if (!this->isSmall())
582 free(this->begin());
583 }
584
585 void clear() {
586 this->destroy_range(this->begin(), this->end());
587 this->Size = 0;
588 }
589
590private:
591 template <bool ForOverwrite> void resizeImpl(size_type N) {
592 if (N < this->size()) {
593 this->pop_back_n(this->size() - N);
594 } else if (N > this->size()) {
595 this->reserve(N);
596 for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
597 if (ForOverwrite)
598 new (&*I) T;
599 else
600 new (&*I) T();
601 this->set_size(N);
602 }
603 }
604
605public:
606 void resize(size_type N) { resizeImpl<false>(N); }
607
608 /// Like resize, but \ref T is POD, the new values won't be initialized.
609 void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }
610
611 void resize(size_type N, ValueParamT NV) {
612 if (N == this->size())
613 return;
614
615 if (N < this->size()) {
616 this->pop_back_n(this->size() - N);
617 return;
618 }
619
620 // N > this->size(). Defer to append.
621 this->append(N - this->size(), NV);
622 }
623
624 void reserve(size_type N) {
625 if (this->capacity() < N)
626 this->grow(N);
627 }
628
629 void pop_back_n(size_type NumItems) {
630 assert(this->size() >= NumItems)((void)0);
631 this->destroy_range(this->end() - NumItems, this->end());
632 this->set_size(this->size() - NumItems);
633 }
634
635 LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
636 T Result = ::std::move(this->back());
637 this->pop_back();
638 return Result;
639 }
640
641 void swap(SmallVectorImpl &RHS);
642
643 /// Add the specified range to the end of the SmallVector.
644 template <typename in_iter,
645 typename = std::enable_if_t<std::is_convertible<
646 typename std::iterator_traits<in_iter>::iterator_category,
647 std::input_iterator_tag>::value>>
648 void append(in_iter in_start, in_iter in_end) {
649 this->assertSafeToAddRange(in_start, in_end);
650 size_type NumInputs = std::distance(in_start, in_end);
651 this->reserve(this->size() + NumInputs);
652 this->uninitialized_copy(in_start, in_end, this->end());
653 this->set_size(this->size() + NumInputs);
654 }
655
656 /// Append \p NumInputs copies of \p Elt to the end.
657 void append(size_type NumInputs, ValueParamT Elt) {
658 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
659 std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
660 this->set_size(this->size() + NumInputs);
661 }
662
663 void append(std::initializer_list<T> IL) {
664 append(IL.begin(), IL.end());
665 }
666
667 void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }
668
669 void assign(size_type NumElts, ValueParamT Elt) {
670 // Note that Elt could be an internal reference.
671 if (NumElts > this->capacity()) {
672 this->growAndAssign(NumElts, Elt);
673 return;
674 }
675
676 // Assign over existing elements.
677 std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
678 if (NumElts > this->size())
679 std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
680 else if (NumElts < this->size())
681 this->destroy_range(this->begin() + NumElts, this->end());
682 this->set_size(NumElts);
683 }
684
685 // FIXME: Consider assigning over existing elements, rather than clearing &
686 // re-initializing them - for all assign(...) variants.
687
688 template <typename in_iter,
689 typename = std::enable_if_t<std::is_convertible<
690 typename std::iterator_traits<in_iter>::iterator_category,
691 std::input_iterator_tag>::value>>
692 void assign(in_iter in_start, in_iter in_end) {
693 this->assertSafeToReferenceAfterClear(in_start, in_end);
694 clear();
695 append(in_start, in_end);
696 }
697
698 void assign(std::initializer_list<T> IL) {
699 clear();
700 append(IL);
701 }
702
703 void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }
704
705 iterator erase(const_iterator CI) {
706 // Just cast away constness because this is a non-const member function.
707 iterator I = const_cast<iterator>(CI);
708
709 assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")((void)0);
710
711 iterator N = I;
712 // Shift all elts down one.
713 std::move(I+1, this->end(), I);
714 // Drop the last elt.
715 this->pop_back();
716 return(N);
717 }
718
719 iterator erase(const_iterator CS, const_iterator CE) {
720 // Just cast away constness because this is a non-const member function.
721 iterator S = const_cast<iterator>(CS);
722 iterator E = const_cast<iterator>(CE);
723
724 assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")((void)0);
725
726 iterator N = S;
727 // Shift all elts down.
728 iterator I = std::move(E, this->end(), S);
729 // Drop the last elts.
730 this->destroy_range(I, this->end());
731 this->set_size(I - this->begin());
732 return(N);
733 }
734
735private:
736 template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
737 // Callers ensure that ArgType is derived from T.
738 static_assert(
739 std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
740 T>::value,
741 "ArgType must be derived from T!");
742
743 if (I == this->end()) { // Important special case for empty vector.
744 this->push_back(::std::forward<ArgType>(Elt));
745 return this->end()-1;
746 }
747
748 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);
749
750 // Grow if necessary.
751 size_t Index = I - this->begin();
752 std::remove_reference_t<ArgType> *EltPtr =
753 this->reserveForParamAndGetAddress(Elt);
754 I = this->begin() + Index;
755
756 ::new ((void*) this->end()) T(::std::move(this->back()));
757 // Push everything else over.
758 std::move_backward(I, this->end()-1, this->end());
759 this->set_size(this->size() + 1);
760
761 // If we just moved the element we're inserting, be sure to update
762 // the reference (never happens if TakesParamByValue).
763 static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
764 "ArgType must be 'T' when taking by value!");
765 if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
766 ++EltPtr;
767
768 *I = ::std::forward<ArgType>(*EltPtr);
769 return I;
770 }
771
772public:
773 iterator insert(iterator I, T &&Elt) {
774 return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
775 }
776
777 iterator insert(iterator I, const T &Elt) {
778 return insert_one_impl(I, this->forward_value_param(Elt));
779 }
780
781 iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
782 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
783 size_t InsertElt = I - this->begin();
784
785 if (I == this->end()) { // Important special case for empty vector.
786 append(NumToInsert, Elt);
787 return this->begin()+InsertElt;
788 }
789
790 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);
791
792 // Ensure there is enough space, and get the (maybe updated) address of
793 // Elt.
794 const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);
795
796 // Uninvalidate the iterator.
797 I = this->begin()+InsertElt;
798
799 // If there are more elements between the insertion point and the end of the
800 // range than there are being inserted, we can use a simple approach to
801 // insertion. Since we already reserved space, we know that this won't
802 // reallocate the vector.
803 if (size_t(this->end()-I) >= NumToInsert) {
804 T *OldEnd = this->end();
805 append(std::move_iterator<iterator>(this->end() - NumToInsert),
806 std::move_iterator<iterator>(this->end()));
807
808 // Copy the existing elements that get replaced.
809 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
810
811 // If we just moved the element we're inserting, be sure to update
812 // the reference (never happens if TakesParamByValue).
813 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
814 EltPtr += NumToInsert;
815
816 std::fill_n(I, NumToInsert, *EltPtr);
817 return I;
818 }
819
820 // Otherwise, we're inserting more elements than exist already, and we're
821 // not inserting at the end.
822
823 // Move over the elements that we're about to overwrite.
824 T *OldEnd = this->end();
825 this->set_size(this->size() + NumToInsert);
826 size_t NumOverwritten = OldEnd-I;
827 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
828
829 // If we just moved the element we're inserting, be sure to update
830 // the reference (never happens if TakesParamByValue).
831 if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
832 EltPtr += NumToInsert;
833
834 // Replace the overwritten part.
835 std::fill_n(I, NumOverwritten, *EltPtr);
836
837 // Insert the non-overwritten middle part.
838 std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
839 return I;
840 }
841
842 template <typename ItTy,
843 typename = std::enable_if_t<std::is_convertible<
844 typename std::iterator_traits<ItTy>::iterator_category,
845 std::input_iterator_tag>::value>>
846 iterator insert(iterator I, ItTy From, ItTy To) {
847 // Convert iterator to elt# to avoid invalidating iterator when we reserve()
848 size_t InsertElt = I - this->begin();
849
850 if (I == this->end()) { // Important special case for empty vector.
851 append(From, To);
852 return this->begin()+InsertElt;
853 }
854
855 assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);
856
857 // Check that the reserve that follows doesn't invalidate the iterators.
858 this->assertSafeToAddRange(From, To);
859
860 size_t NumToInsert = std::distance(From, To);
861
862 // Ensure there is enough space.
863 reserve(this->size() + NumToInsert);
864
865 // Uninvalidate the iterator.
866 I = this->begin()+InsertElt;
867
868 // If there are more elements between the insertion point and the end of the
869 // range than there are being inserted, we can use a simple approach to
870 // insertion. Since we already reserved space, we know that this won't
871 // reallocate the vector.
872 if (size_t(this->end()-I) >= NumToInsert) {
873 T *OldEnd = this->end();
874 append(std::move_iterator<iterator>(this->end() - NumToInsert),
875 std::move_iterator<iterator>(this->end()));
876
877 // Copy the existing elements that get replaced.
878 std::move_backward(I, OldEnd-NumToInsert, OldEnd);
879
880 std::copy(From, To, I);
881 return I;
882 }
883
884 // Otherwise, we're inserting more elements than exist already, and we're
885 // not inserting at the end.
886
887 // Move over the elements that we're about to overwrite.
888 T *OldEnd = this->end();
889 this->set_size(this->size() + NumToInsert);
890 size_t NumOverwritten = OldEnd-I;
891 this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);
892
893 // Replace the overwritten part.
894 for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
895 *J = *From;
896 ++J; ++From;
897 }
898
899 // Insert the non-overwritten middle part.
900 this->uninitialized_copy(From, To, OldEnd);
901 return I;
902 }
903
904 void insert(iterator I, std::initializer_list<T> IL) {
905 insert(I, IL.begin(), IL.end());
906 }
907
908 template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
909 if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false)
)
910 return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);
911
912 ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
913 this->set_size(this->size() + 1);
914 return this->back();
915 }
916
917 SmallVectorImpl &operator=(const SmallVectorImpl &RHS);
918
919 SmallVectorImpl &operator=(SmallVectorImpl &&RHS);
920
921 bool operator==(const SmallVectorImpl &RHS) const {
922 if (this->size() != RHS.size()) return false;
923 return std::equal(this->begin(), this->end(), RHS.begin());
924 }
925 bool operator!=(const SmallVectorImpl &RHS) const {
926 return !(*this == RHS);
927 }
928
929 bool operator<(const SmallVectorImpl &RHS) const {
930 return std::lexicographical_compare(this->begin(), this->end(),
931 RHS.begin(), RHS.end());
932 }
933};
934
935template <typename T>
936void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
937 if (this == &RHS) return;
938
939 // We can only avoid copying elements if neither vector is small.
940 if (!this->isSmall() && !RHS.isSmall()) {
941 std::swap(this->BeginX, RHS.BeginX);
942 std::swap(this->Size, RHS.Size);
943 std::swap(this->Capacity, RHS.Capacity);
944 return;
945 }
946 this->reserve(RHS.size());
947 RHS.reserve(this->size());
948
949 // Swap the shared elements.
950 size_t NumShared = this->size();
951 if (NumShared > RHS.size()) NumShared = RHS.size();
952 for (size_type i = 0; i != NumShared; ++i)
953 std::swap((*this)[i], RHS[i]);
954
955 // Copy over the extra elts.
956 if (this->size() > RHS.size()) {
957 size_t EltDiff = this->size() - RHS.size();
958 this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
959 RHS.set_size(RHS.size() + EltDiff);
960 this->destroy_range(this->begin()+NumShared, this->end());
961 this->set_size(NumShared);
962 } else if (RHS.size() > this->size()) {
963 size_t EltDiff = RHS.size() - this->size();
964 this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
965 this->set_size(this->size() + EltDiff);
966 this->destroy_range(RHS.begin()+NumShared, RHS.end());
967 RHS.set_size(NumShared);
968 }
969}
970
971template <typename T>
972SmallVectorImpl<T> &SmallVectorImpl<T>::
973 operator=(const SmallVectorImpl<T> &RHS) {
974 // Avoid self-assignment.
975 if (this == &RHS) return *this;
976
977 // If we already have sufficient space, assign the common elements, then
978 // destroy any excess.
979 size_t RHSSize = RHS.size();
980 size_t CurSize = this->size();
981 if (CurSize >= RHSSize) {
982 // Assign common elements.
983 iterator NewEnd;
984 if (RHSSize)
985 NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
986 else
987 NewEnd = this->begin();
988
989 // Destroy excess elements.
990 this->destroy_range(NewEnd, this->end());
991
992 // Trim.
993 this->set_size(RHSSize);
994 return *this;
995 }
996
997 // If we have to grow to have enough elements, destroy the current elements.
998 // This allows us to avoid copying them during the grow.
999 // FIXME: don't do this if they're efficiently moveable.
1000 if (this->capacity() < RHSSize) {
1001 // Destroy current elements.
1002 this->clear();
1003 CurSize = 0;
1004 this->grow(RHSSize);
1005 } else if (CurSize) {
1006 // Otherwise, use assignment for the already-constructed elements.
1007 std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
1008 }
1009
1010 // Copy construct the new elements in place.
1011 this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
1012 this->begin()+CurSize);
1013
1014 // Set end.
1015 this->set_size(RHSSize);
1016 return *this;
1017}
1018
1019template <typename T>
1020SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
1021 // Avoid self-assignment.
1022 if (this == &RHS) return *this;
1023
1024 // If the RHS isn't small, clear this vector and then steal its buffer.
1025 if (!RHS.isSmall()) {
1026 this->destroy_range(this->begin(), this->end());
1027 if (!this->isSmall()) free(this->begin());
1028 this->BeginX = RHS.BeginX;
1029 this->Size = RHS.Size;
1030 this->Capacity = RHS.Capacity;
1031 RHS.resetToSmall();
1032 return *this;
1033 }
1034
1035 // If we already have sufficient space, assign the common elements, then
1036 // destroy any excess.
1037 size_t RHSSize = RHS.size();
1038 size_t CurSize = this->size();
1039 if (CurSize >= RHSSize) {
1040 // Assign common elements.
1041 iterator NewEnd = this->begin();
1042 if (RHSSize)
1043 NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);
1044
1045 // Destroy excess elements and trim the bounds.
1046 this->destroy_range(NewEnd, this->end());
1047 this->set_size(RHSSize);
1048
1049 // Clear the RHS.
1050 RHS.clear();
1051
1052 return *this;
1053 }
1054
1055 // If we have to grow to have enough elements, destroy the current elements.
1056 // This allows us to avoid copying them during the grow.
1057 // FIXME: this may not actually make any sense if we can efficiently move
1058 // elements.
1059 if (this->capacity() < RHSSize) {
1060 // Destroy current elements.
1061 this->clear();
1062 CurSize = 0;
1063 this->grow(RHSSize);
1064 } else if (CurSize) {
1065 // Otherwise, use assignment for the already-constructed elements.
1066 std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
1067 }
1068
1069 // Move-construct the new elements in place.
1070 this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
1071 this->begin()+CurSize);
1072
1073 // Set end.
1074 this->set_size(RHSSize);
1075
1076 RHS.clear();
1077 return *this;
1078}
1079
1080/// Storage for the SmallVector elements. This is specialized for the N=0 case
1081/// to avoid allocating unnecessary storage.
1082template <typename T, unsigned N>
1083struct SmallVectorStorage {
1084 alignas(T) char InlineElts[N * sizeof(T)];
1085};
1086
1087/// We need the storage to be properly aligned even for small-size of 0 so that
1088/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1089/// well-defined.
1090template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};
1091
1092/// Forward declaration of SmallVector so that
1093/// calculateSmallVectorDefaultInlinedElements can reference
1094/// `sizeof(SmallVector<T, 0>)`.
1095template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;
1096
1097/// Helper class for calculating the default number of inline elements for
1098/// `SmallVector<T>`.
1099///
1100/// This should be migrated to a constexpr function when our minimum
1101/// compiler support is enough for multi-statement constexpr functions.
1102template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
1103 // Parameter controlling the default number of inlined elements
1104 // for `SmallVector<T>`.
1105 //
1106 // The default number of inlined elements ensures that
1107 // 1. There is at least one inlined element.
1108 // 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
1109 // it contradicts 1.
1110 static constexpr size_t kPreferredSmallVectorSizeof = 64;
1111
1112 // static_assert that sizeof(T) is not "too big".
1113 //
1114 // Because our policy guarantees at least one inlined element, it is possible
1115 // for an arbitrarily large inlined element to allocate an arbitrarily large
1116 // amount of inline storage. We generally consider it an antipattern for a
1117 // SmallVector to allocate an excessive amount of inline storage, so we want
1118 // to call attention to these cases and make sure that users are making an
1119 // intentional decision if they request a lot of inline storage.
1120 //
1121 // We want this assertion to trigger in pathological cases, but otherwise
1122 // not be too easy to hit. To accomplish that, the cutoff is actually somewhat
1123 // larger than kPreferredSmallVectorSizeof (otherwise,
1124 // `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
1125 // pattern seems useful in practice).
1126 //
1127 // One wrinkle is that this assertion is in theory non-portable, since
1128 // sizeof(T) is in general platform-dependent. However, we don't expect this
1129 // to be much of an issue, because most LLVM development happens on 64-bit
1130 // hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
1131 // 32-bit hosts, dodging the issue. The reverse situation, where development
1132 // happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
1133 // 64-bit host, is expected to be very rare.
1134 static_assert(
1135 sizeof(T) <= 256,
1136 "You are trying to use a default number of inlined elements for "
1137 "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
1138 "explicit number of inlined elements with `SmallVector<T, N>` to make "
1139 "sure you really want that much inline storage.");
1140
1141 // Discount the size of the header itself when calculating the maximum inline
1142 // bytes.
1143 static constexpr size_t PreferredInlineBytes =
1144 kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
1145 static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
1146 static constexpr size_t value =
1147 NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1148};
1149
1150/// This is a 'vector' (really, a variable-sized array), optimized
1151/// for the case when the array is small. It contains some number of elements
1152/// in-place, which allows it to avoid heap allocation when the actual number of
1153/// elements is below that threshold. This allows normal "small" cases to be
1154/// fast without losing generality for large inputs.
1155///
1156/// \note
1157/// In the absence of a well-motivated choice for the number of inlined
1158/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1159/// omitting the \p N). This will choose a default number of inlined elements
1160/// reasonable for allocation on the stack (for example, trying to keep \c
1161/// sizeof(SmallVector<T>) around 64 bytes).
1162///
1163/// \warning This does not attempt to be exception safe.
1164///
1165/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1166template <typename T,
1167 unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1168class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
1169 SmallVectorStorage<T, N> {
1170public:
1171 SmallVector() : SmallVectorImpl<T>(N) {}
1172
1173 ~SmallVector() {
1174 // Destroy the constructed elements in the vector.
1175 this->destroy_range(this->begin(), this->end());
1176 }
1177
1178 explicit SmallVector(size_t Size, const T &Value = T())
1179 : SmallVectorImpl<T>(N) {
1180 this->assign(Size, Value);
1181 }
1182
1183 template <typename ItTy,
1184 typename = std::enable_if_t<std::is_convertible<
1185 typename std::iterator_traits<ItTy>::iterator_category,
1186 std::input_iterator_tag>::value>>
1187 SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
1188 this->append(S, E);
1189 }
1190
1191 template <typename RangeTy>
1192 explicit SmallVector(const iterator_range<RangeTy> &R)
1193 : SmallVectorImpl<T>(N) {
1194 this->append(R.begin(), R.end());
1195 }
1196
1197 SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
1198 this->assign(IL);
1199 }
1200
1201 SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
1202 if (!RHS.empty())
1203 SmallVectorImpl<T>::operator=(RHS);
1204 }
1205
1206 SmallVector &operator=(const SmallVector &RHS) {
1207 SmallVectorImpl<T>::operator=(RHS);
1208 return *this;
1209 }
1210
1211 SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
1212 if (!RHS.empty())
1213 SmallVectorImpl<T>::operator=(::std::move(RHS));
1214 }
1215
1216 SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
1217 if (!RHS.empty())
1218 SmallVectorImpl<T>::operator=(::std::move(RHS));
1219 }
1220
1221 SmallVector &operator=(SmallVector &&RHS) {
1222 SmallVectorImpl<T>::operator=(::std::move(RHS));
1223 return *this;
1224 }
1225
1226 SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
1227 SmallVectorImpl<T>::operator=(::std::move(RHS));
1228 return *this;
1229 }
1230
1231 SmallVector &operator=(std::initializer_list<T> IL) {
1232 this->assign(IL);
1233 return *this;
1234 }
1235};
1236
1237template <typename T, unsigned N>
1238inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
1239 return X.capacity_in_bytes();
1240}
1241
1242/// Given a range of type R, iterate the entire range and return a
1243/// SmallVector with elements of the vector. This is useful, for example,
1244/// when you want to iterate a range and then sort the results.
1245template <unsigned Size, typename R>
1246SmallVector<typename std::remove_const<typename std::remove_reference<
1247 decltype(*std::begin(std::declval<R &>()))>::type>::type,
1248 Size>
1249to_vector(R &&Range) {
1250 return {std::begin(Range), std::end(Range)};
1251}
1252
1253} // end namespace llvm
1254
1255namespace std {
1256
1257 /// Implement std::swap in terms of SmallVector swap.
1258 template<typename T>
1259 inline void
1260 swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
1261 LHS.swap(RHS);
1262 }
1263
1264 /// Implement std::swap in terms of SmallVector swap.
1265 template<typename T, unsigned N>
1266 inline void
1267 swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
1268 LHS.swap(RHS);
1269 }
1270
1271} // end namespace std
1272
1273#endif // LLVM_ADT_SMALLVECTOR_H