Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/PartialInlining.cpp
Warning:line 986, column 18
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PartialInlining.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/PartialInlining.cpp
1//===- PartialInlining.cpp - Inline parts of functions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs partial inlining, typically by inlining an if statement
10// that surrounds the body of the function.
11//
12//===----------------------------------------------------------------------===//
13
14#include "llvm/Transforms/IPO/PartialInlining.h"
15#include "llvm/ADT/DenseMap.h"
16#include "llvm/ADT/DenseSet.h"
17#include "llvm/ADT/None.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/ADT/STLExtras.h"
20#include "llvm/ADT/SmallVector.h"
21#include "llvm/ADT/Statistic.h"
22#include "llvm/Analysis/BlockFrequencyInfo.h"
23#include "llvm/Analysis/BranchProbabilityInfo.h"
24#include "llvm/Analysis/InlineCost.h"
25#include "llvm/Analysis/LoopInfo.h"
26#include "llvm/Analysis/OptimizationRemarkEmitter.h"
27#include "llvm/Analysis/ProfileSummaryInfo.h"
28#include "llvm/Analysis/TargetLibraryInfo.h"
29#include "llvm/Analysis/TargetTransformInfo.h"
30#include "llvm/IR/Attributes.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/CFG.h"
33#include "llvm/IR/DebugLoc.h"
34#include "llvm/IR/DiagnosticInfo.h"
35#include "llvm/IR/Dominators.h"
36#include "llvm/IR/Function.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/Intrinsics.h"
42#include "llvm/IR/Module.h"
43#include "llvm/IR/User.h"
44#include "llvm/InitializePasses.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/BlockFrequency.h"
47#include "llvm/Support/BranchProbability.h"
48#include "llvm/Support/Casting.h"
49#include "llvm/Support/CommandLine.h"
50#include "llvm/Support/ErrorHandling.h"
51#include "llvm/Transforms/IPO.h"
52#include "llvm/Transforms/Utils/Cloning.h"
53#include "llvm/Transforms/Utils/CodeExtractor.h"
54#include "llvm/Transforms/Utils/ValueMapper.h"
55#include <algorithm>
56#include <cassert>
57#include <cstdint>
58#include <functional>
59#include <iterator>
60#include <memory>
61#include <tuple>
62#include <vector>
63
64using namespace llvm;
65
66#define DEBUG_TYPE"partial-inlining" "partial-inlining"
67
68STATISTIC(NumPartialInlined,static llvm::Statistic NumPartialInlined = {"partial-inlining"
, "NumPartialInlined", "Number of callsites functions partially inlined into."
}
69 "Number of callsites functions partially inlined into.")static llvm::Statistic NumPartialInlined = {"partial-inlining"
, "NumPartialInlined", "Number of callsites functions partially inlined into."
}
;
70STATISTIC(NumColdOutlinePartialInlined, "Number of times functions with "static llvm::Statistic NumColdOutlinePartialInlined = {"partial-inlining"
, "NumColdOutlinePartialInlined", "Number of times functions with "
"cold outlined regions were partially " "inlined into its caller(s)."
}
71 "cold outlined regions were partially "static llvm::Statistic NumColdOutlinePartialInlined = {"partial-inlining"
, "NumColdOutlinePartialInlined", "Number of times functions with "
"cold outlined regions were partially " "inlined into its caller(s)."
}
72 "inlined into its caller(s).")static llvm::Statistic NumColdOutlinePartialInlined = {"partial-inlining"
, "NumColdOutlinePartialInlined", "Number of times functions with "
"cold outlined regions were partially " "inlined into its caller(s)."
}
;
73STATISTIC(NumColdRegionsFound,static llvm::Statistic NumColdRegionsFound = {"partial-inlining"
, "NumColdRegionsFound", "Number of cold single entry/exit regions found."
}
74 "Number of cold single entry/exit regions found.")static llvm::Statistic NumColdRegionsFound = {"partial-inlining"
, "NumColdRegionsFound", "Number of cold single entry/exit regions found."
}
;
75STATISTIC(NumColdRegionsOutlined,static llvm::Statistic NumColdRegionsOutlined = {"partial-inlining"
, "NumColdRegionsOutlined", "Number of cold single entry/exit regions outlined."
}
76 "Number of cold single entry/exit regions outlined.")static llvm::Statistic NumColdRegionsOutlined = {"partial-inlining"
, "NumColdRegionsOutlined", "Number of cold single entry/exit regions outlined."
}
;
77
78// Command line option to disable partial-inlining. The default is false:
79static cl::opt<bool>
80 DisablePartialInlining("disable-partial-inlining", cl::init(false),
81 cl::Hidden, cl::desc("Disable partial inlining"));
82// Command line option to disable multi-region partial-inlining. The default is
83// false:
84static cl::opt<bool> DisableMultiRegionPartialInline(
85 "disable-mr-partial-inlining", cl::init(false), cl::Hidden,
86 cl::desc("Disable multi-region partial inlining"));
87
88// Command line option to force outlining in regions with live exit variables.
89// The default is false:
90static cl::opt<bool>
91 ForceLiveExit("pi-force-live-exit-outline", cl::init(false), cl::Hidden,
92 cl::desc("Force outline regions with live exits"));
93
94// Command line option to enable marking outline functions with Cold Calling
95// Convention. The default is false:
96static cl::opt<bool>
97 MarkOutlinedColdCC("pi-mark-coldcc", cl::init(false), cl::Hidden,
98 cl::desc("Mark outline function calls with ColdCC"));
99
100// This is an option used by testing:
101static cl::opt<bool> SkipCostAnalysis("skip-partial-inlining-cost-analysis",
102 cl::init(false), cl::ZeroOrMore,
103 cl::ReallyHidden,
104 cl::desc("Skip Cost Analysis"));
105// Used to determine if a cold region is worth outlining based on
106// its inlining cost compared to the original function. Default is set at 10%.
107// ie. if the cold region reduces the inlining cost of the original function by
108// at least 10%.
109static cl::opt<float> MinRegionSizeRatio(
110 "min-region-size-ratio", cl::init(0.1), cl::Hidden,
111 cl::desc("Minimum ratio comparing relative sizes of each "
112 "outline candidate and original function"));
113// Used to tune the minimum number of execution counts needed in the predecessor
114// block to the cold edge. ie. confidence interval.
115static cl::opt<unsigned>
116 MinBlockCounterExecution("min-block-execution", cl::init(100), cl::Hidden,
117 cl::desc("Minimum block executions to consider "
118 "its BranchProbabilityInfo valid"));
119// Used to determine when an edge is considered cold. Default is set to 10%. ie.
120// if the branch probability is 10% or less, then it is deemed as 'cold'.
121static cl::opt<float> ColdBranchRatio(
122 "cold-branch-ratio", cl::init(0.1), cl::Hidden,
123 cl::desc("Minimum BranchProbability to consider a region cold."));
124
125static cl::opt<unsigned> MaxNumInlineBlocks(
126 "max-num-inline-blocks", cl::init(5), cl::Hidden,
127 cl::desc("Max number of blocks to be partially inlined"));
128
129// Command line option to set the maximum number of partial inlining allowed
130// for the module. The default value of -1 means no limit.
131static cl::opt<int> MaxNumPartialInlining(
132 "max-partial-inlining", cl::init(-1), cl::Hidden, cl::ZeroOrMore,
133 cl::desc("Max number of partial inlining. The default is unlimited"));
134
135// Used only when PGO or user annotated branch data is absent. It is
136// the least value that is used to weigh the outline region. If BFI
137// produces larger value, the BFI value will be used.
138static cl::opt<int>
139 OutlineRegionFreqPercent("outline-region-freq-percent", cl::init(75),
140 cl::Hidden, cl::ZeroOrMore,
141 cl::desc("Relative frequency of outline region to "
142 "the entry block"));
143
144static cl::opt<unsigned> ExtraOutliningPenalty(
145 "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
146 cl::desc("A debug option to add additional penalty to the computed one."));
147
148namespace {
149
150struct FunctionOutliningInfo {
151 FunctionOutliningInfo() = default;
152
153 // Returns the number of blocks to be inlined including all blocks
154 // in Entries and one return block.
155 unsigned getNumInlinedBlocks() const { return Entries.size() + 1; }
156
157 // A set of blocks including the function entry that guard
158 // the region to be outlined.
159 SmallVector<BasicBlock *, 4> Entries;
160
161 // The return block that is not included in the outlined region.
162 BasicBlock *ReturnBlock = nullptr;
163
164 // The dominating block of the region to be outlined.
165 BasicBlock *NonReturnBlock = nullptr;
166
167 // The set of blocks in Entries that that are predecessors to ReturnBlock
168 SmallVector<BasicBlock *, 4> ReturnBlockPreds;
169};
170
171struct FunctionOutliningMultiRegionInfo {
172 FunctionOutliningMultiRegionInfo()
173 : ORI() {}
174
175 // Container for outline regions
176 struct OutlineRegionInfo {
177 OutlineRegionInfo(ArrayRef<BasicBlock *> Region,
178 BasicBlock *EntryBlock, BasicBlock *ExitBlock,
179 BasicBlock *ReturnBlock)
180 : Region(Region.begin(), Region.end()), EntryBlock(EntryBlock),
181 ExitBlock(ExitBlock), ReturnBlock(ReturnBlock) {}
182 SmallVector<BasicBlock *, 8> Region;
183 BasicBlock *EntryBlock;
184 BasicBlock *ExitBlock;
185 BasicBlock *ReturnBlock;
186 };
187
188 SmallVector<OutlineRegionInfo, 4> ORI;
189};
190
191struct PartialInlinerImpl {
192
193 PartialInlinerImpl(
194 function_ref<AssumptionCache &(Function &)> GetAC,
195 function_ref<AssumptionCache *(Function &)> LookupAC,
196 function_ref<TargetTransformInfo &(Function &)> GTTI,
197 function_ref<const TargetLibraryInfo &(Function &)> GTLI,
198 ProfileSummaryInfo &ProfSI,
199 function_ref<BlockFrequencyInfo &(Function &)> GBFI = nullptr)
200 : GetAssumptionCache(GetAC), LookupAssumptionCache(LookupAC),
201 GetTTI(GTTI), GetBFI(GBFI), GetTLI(GTLI), PSI(ProfSI) {}
202
203 bool run(Module &M);
204 // Main part of the transformation that calls helper functions to find
205 // outlining candidates, clone & outline the function, and attempt to
206 // partially inline the resulting function. Returns true if
207 // inlining was successful, false otherwise. Also returns the outline
208 // function (only if we partially inlined early returns) as there is a
209 // possibility to further "peel" early return statements that were left in the
210 // outline function due to code size.
211 std::pair<bool, Function *> unswitchFunction(Function &F);
212
213 // This class speculatively clones the function to be partial inlined.
214 // At the end of partial inlining, the remaining callsites to the cloned
215 // function that are not partially inlined will be fixed up to reference
216 // the original function, and the cloned function will be erased.
217 struct FunctionCloner {
218 // Two constructors, one for single region outlining, the other for
219 // multi-region outlining.
220 FunctionCloner(Function *F, FunctionOutliningInfo *OI,
221 OptimizationRemarkEmitter &ORE,
222 function_ref<AssumptionCache *(Function &)> LookupAC,
223 function_ref<TargetTransformInfo &(Function &)> GetTTI);
224 FunctionCloner(Function *F, FunctionOutliningMultiRegionInfo *OMRI,
225 OptimizationRemarkEmitter &ORE,
226 function_ref<AssumptionCache *(Function &)> LookupAC,
227 function_ref<TargetTransformInfo &(Function &)> GetTTI);
228
229 ~FunctionCloner();
230
231 // Prepare for function outlining: making sure there is only
232 // one incoming edge from the extracted/outlined region to
233 // the return block.
234 void normalizeReturnBlock() const;
235
236 // Do function outlining for cold regions.
237 bool doMultiRegionFunctionOutlining();
238 // Do function outlining for region after early return block(s).
239 // NOTE: For vararg functions that do the vararg handling in the outlined
240 // function, we temporarily generate IR that does not properly
241 // forward varargs to the outlined function. Calling InlineFunction
242 // will update calls to the outlined functions to properly forward
243 // the varargs.
244 Function *doSingleRegionFunctionOutlining();
245
246 Function *OrigFunc = nullptr;
247 Function *ClonedFunc = nullptr;
248
249 typedef std::pair<Function *, BasicBlock *> FuncBodyCallerPair;
250 // Keep track of Outlined Functions and the basic block they're called from.
251 SmallVector<FuncBodyCallerPair, 4> OutlinedFunctions;
252
253 // ClonedFunc is inlined in one of its callers after function
254 // outlining.
255 bool IsFunctionInlined = false;
256 // The cost of the region to be outlined.
257 InstructionCost OutlinedRegionCost = 0;
258 // ClonedOI is specific to outlining non-early return blocks.
259 std::unique_ptr<FunctionOutliningInfo> ClonedOI = nullptr;
260 // ClonedOMRI is specific to outlining cold regions.
261 std::unique_ptr<FunctionOutliningMultiRegionInfo> ClonedOMRI = nullptr;
262 std::unique_ptr<BlockFrequencyInfo> ClonedFuncBFI = nullptr;
263 OptimizationRemarkEmitter &ORE;
264 function_ref<AssumptionCache *(Function &)> LookupAC;
265 function_ref<TargetTransformInfo &(Function &)> GetTTI;
266 };
267
268private:
269 int NumPartialInlining = 0;
270 function_ref<AssumptionCache &(Function &)> GetAssumptionCache;
271 function_ref<AssumptionCache *(Function &)> LookupAssumptionCache;
272 function_ref<TargetTransformInfo &(Function &)> GetTTI;
273 function_ref<BlockFrequencyInfo &(Function &)> GetBFI;
274 function_ref<const TargetLibraryInfo &(Function &)> GetTLI;
275 ProfileSummaryInfo &PSI;
276
277 // Return the frequency of the OutlininingBB relative to F's entry point.
278 // The result is no larger than 1 and is represented using BP.
279 // (Note that the outlined region's 'head' block can only have incoming
280 // edges from the guarding entry blocks).
281 BranchProbability
282 getOutliningCallBBRelativeFreq(FunctionCloner &Cloner) const;
283
284 // Return true if the callee of CB should be partially inlined with
285 // profit.
286 bool shouldPartialInline(CallBase &CB, FunctionCloner &Cloner,
287 BlockFrequency WeightedOutliningRcost,
288 OptimizationRemarkEmitter &ORE) const;
289
290 // Try to inline DuplicateFunction (cloned from F with call to
291 // the OutlinedFunction into its callers. Return true
292 // if there is any successful inlining.
293 bool tryPartialInline(FunctionCloner &Cloner);
294
295 // Compute the mapping from use site of DuplicationFunction to the enclosing
296 // BB's profile count.
297 void
298 computeCallsiteToProfCountMap(Function *DuplicateFunction,
299 DenseMap<User *, uint64_t> &SiteCountMap) const;
300
301 bool isLimitReached() const {
302 return (MaxNumPartialInlining != -1 &&
303 NumPartialInlining >= MaxNumPartialInlining);
304 }
305
306 static CallBase *getSupportedCallBase(User *U) {
307 if (isa<CallInst>(U) || isa<InvokeInst>(U))
308 return cast<CallBase>(U);
309 llvm_unreachable("All uses must be calls")__builtin_unreachable();
310 return nullptr;
311 }
312
313 static CallBase *getOneCallSiteTo(Function &F) {
314 User *User = *F.user_begin();
315 return getSupportedCallBase(User);
316 }
317
318 std::tuple<DebugLoc, BasicBlock *> getOneDebugLoc(Function &F) const {
319 CallBase *CB = getOneCallSiteTo(F);
320 DebugLoc DLoc = CB->getDebugLoc();
321 BasicBlock *Block = CB->getParent();
322 return std::make_tuple(DLoc, Block);
323 }
324
325 // Returns the costs associated with function outlining:
326 // - The first value is the non-weighted runtime cost for making the call
327 // to the outlined function, including the addtional setup cost in the
328 // outlined function itself;
329 // - The second value is the estimated size of the new call sequence in
330 // basic block Cloner.OutliningCallBB;
331 std::tuple<InstructionCost, InstructionCost>
332 computeOutliningCosts(FunctionCloner &Cloner) const;
333
334 // Compute the 'InlineCost' of block BB. InlineCost is a proxy used to
335 // approximate both the size and runtime cost (Note that in the current
336 // inline cost analysis, there is no clear distinction there either).
337 static InstructionCost computeBBInlineCost(BasicBlock *BB,
338 TargetTransformInfo *TTI);
339
340 std::unique_ptr<FunctionOutliningInfo>
341 computeOutliningInfo(Function &F) const;
342
343 std::unique_ptr<FunctionOutliningMultiRegionInfo>
344 computeOutliningColdRegionsInfo(Function &F,
345 OptimizationRemarkEmitter &ORE) const;
346};
347
348struct PartialInlinerLegacyPass : public ModulePass {
349 static char ID; // Pass identification, replacement for typeid
350
351 PartialInlinerLegacyPass() : ModulePass(ID) {
352 initializePartialInlinerLegacyPassPass(*PassRegistry::getPassRegistry());
353 }
354
355 void getAnalysisUsage(AnalysisUsage &AU) const override {
356 AU.addRequired<AssumptionCacheTracker>();
357 AU.addRequired<ProfileSummaryInfoWrapperPass>();
358 AU.addRequired<TargetTransformInfoWrapperPass>();
359 AU.addRequired<TargetLibraryInfoWrapperPass>();
360 }
361
362 bool runOnModule(Module &M) override {
363 if (skipModule(M))
364 return false;
365
366 AssumptionCacheTracker *ACT = &getAnalysis<AssumptionCacheTracker>();
367 TargetTransformInfoWrapperPass *TTIWP =
368 &getAnalysis<TargetTransformInfoWrapperPass>();
369 ProfileSummaryInfo &PSI =
370 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
371
372 auto GetAssumptionCache = [&ACT](Function &F) -> AssumptionCache & {
373 return ACT->getAssumptionCache(F);
374 };
375
376 auto LookupAssumptionCache = [ACT](Function &F) -> AssumptionCache * {
377 return ACT->lookupAssumptionCache(F);
378 };
379
380 auto GetTTI = [&TTIWP](Function &F) -> TargetTransformInfo & {
381 return TTIWP->getTTI(F);
382 };
383
384 auto GetTLI = [this](Function &F) -> TargetLibraryInfo & {
385 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
386 };
387
388 return PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
389 GetTLI, PSI)
390 .run(M);
391 }
392};
393
394} // end anonymous namespace
395
396std::unique_ptr<FunctionOutliningMultiRegionInfo>
397PartialInlinerImpl::computeOutliningColdRegionsInfo(
398 Function &F, OptimizationRemarkEmitter &ORE) const {
399 BasicBlock *EntryBlock = &F.front();
400
401 DominatorTree DT(F);
402 LoopInfo LI(DT);
403 BranchProbabilityInfo BPI(F, LI);
404 std::unique_ptr<BlockFrequencyInfo> ScopedBFI;
405 BlockFrequencyInfo *BFI;
406 if (!GetBFI) {
407 ScopedBFI.reset(new BlockFrequencyInfo(F, BPI, LI));
408 BFI = ScopedBFI.get();
409 } else
410 BFI = &(GetBFI(F));
411
412 // Return if we don't have profiling information.
413 if (!PSI.hasInstrumentationProfile())
414 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
415
416 std::unique_ptr<FunctionOutliningMultiRegionInfo> OutliningInfo =
417 std::make_unique<FunctionOutliningMultiRegionInfo>();
418
419 auto IsSingleExit =
420 [&ORE](SmallVectorImpl<BasicBlock *> &BlockList) -> BasicBlock * {
421 BasicBlock *ExitBlock = nullptr;
422 for (auto *Block : BlockList) {
423 for (BasicBlock *Succ : successors(Block)) {
424 if (!is_contained(BlockList, Succ)) {
425 if (ExitBlock) {
426 ORE.emit([&]() {
427 return OptimizationRemarkMissed(DEBUG_TYPE"partial-inlining", "MultiExitRegion",
428 &Succ->front())
429 << "Region dominated by "
430 << ore::NV("Block", BlockList.front()->getName())
431 << " has more than one region exit edge.";
432 });
433 return nullptr;
434 }
435
436 ExitBlock = Block;
437 }
438 }
439 }
440 return ExitBlock;
441 };
442
443 auto BBProfileCount = [BFI](BasicBlock *BB) {
444 return BFI->getBlockProfileCount(BB)
445 ? BFI->getBlockProfileCount(BB).getValue()
446 : 0;
447 };
448
449 // Use the same computeBBInlineCost function to compute the cost savings of
450 // the outlining the candidate region.
451 TargetTransformInfo *FTTI = &GetTTI(F);
452 InstructionCost OverallFunctionCost = 0;
453 for (auto &BB : F)
454 OverallFunctionCost += computeBBInlineCost(&BB, FTTI);
455
456 LLVM_DEBUG(dbgs() << "OverallFunctionCost = " << OverallFunctionCostdo { } while (false)
457 << "\n";)do { } while (false);
458
459 InstructionCost MinOutlineRegionCost = OverallFunctionCost.map(
460 [&](auto Cost) { return Cost * MinRegionSizeRatio; });
461
462 BranchProbability MinBranchProbability(
463 static_cast<int>(ColdBranchRatio * MinBlockCounterExecution),
464 MinBlockCounterExecution);
465 bool ColdCandidateFound = false;
466 BasicBlock *CurrEntry = EntryBlock;
467 std::vector<BasicBlock *> DFS;
468 DenseMap<BasicBlock *, bool> VisitedMap;
469 DFS.push_back(CurrEntry);
470 VisitedMap[CurrEntry] = true;
471
472 // Use Depth First Search on the basic blocks to find CFG edges that are
473 // considered cold.
474 // Cold regions considered must also have its inline cost compared to the
475 // overall inline cost of the original function. The region is outlined only
476 // if it reduced the inline cost of the function by 'MinOutlineRegionCost' or
477 // more.
478 while (!DFS.empty()) {
479 auto *ThisBB = DFS.back();
480 DFS.pop_back();
481 // Only consider regions with predecessor blocks that are considered
482 // not-cold (default: part of the top 99.99% of all block counters)
483 // AND greater than our minimum block execution count (default: 100).
484 if (PSI.isColdBlock(ThisBB, BFI) ||
485 BBProfileCount(ThisBB) < MinBlockCounterExecution)
486 continue;
487 for (auto SI = succ_begin(ThisBB); SI != succ_end(ThisBB); ++SI) {
488 if (VisitedMap[*SI])
489 continue;
490 VisitedMap[*SI] = true;
491 DFS.push_back(*SI);
492 // If branch isn't cold, we skip to the next one.
493 BranchProbability SuccProb = BPI.getEdgeProbability(ThisBB, *SI);
494 if (SuccProb > MinBranchProbability)
495 continue;
496
497 LLVM_DEBUG(dbgs() << "Found cold edge: " << ThisBB->getName() << "->"do { } while (false)
498 << SI->getName()do { } while (false)
499 << "\nBranch Probability = " << SuccProb << "\n";)do { } while (false);
500
501 SmallVector<BasicBlock *, 8> DominateVector;
502 DT.getDescendants(*SI, DominateVector);
503 assert(!DominateVector.empty() &&((void)0)
504 "SI should be reachable and have at least itself as descendant")((void)0);
505
506 // We can only outline single entry regions (for now).
507 if (!DominateVector.front()->hasNPredecessors(1)) {
508 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()do { } while (false)
509 << " doesn't have a single predecessor in the "do { } while (false)
510 "dominator tree\n";)do { } while (false);
511 continue;
512 }
513
514 BasicBlock *ExitBlock = nullptr;
515 // We can only outline single exit regions (for now).
516 if (!(ExitBlock = IsSingleExit(DominateVector))) {
517 LLVM_DEBUG(dbgs() << "ABORT: Block " << SI->getName()do { } while (false)
518 << " doesn't have a unique successor\n";)do { } while (false);
519 continue;
520 }
521
522 InstructionCost OutlineRegionCost = 0;
523 for (auto *BB : DominateVector)
524 OutlineRegionCost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
525
526 LLVM_DEBUG(dbgs() << "OutlineRegionCost = " << OutlineRegionCostdo { } while (false)
527 << "\n";)do { } while (false);
528
529 if (!SkipCostAnalysis && OutlineRegionCost < MinOutlineRegionCost) {
530 ORE.emit([&]() {
531 return OptimizationRemarkAnalysis(DEBUG_TYPE"partial-inlining", "TooCostly",
532 &SI->front())
533 << ore::NV("Callee", &F)
534 << " inline cost-savings smaller than "
535 << ore::NV("Cost", MinOutlineRegionCost);
536 });
537
538 LLVM_DEBUG(dbgs() << "ABORT: Outline region cost is smaller than "do { } while (false)
539 << MinOutlineRegionCost << "\n";)do { } while (false);
540 continue;
541 }
542
543 // For now, ignore blocks that belong to a SISE region that is a
544 // candidate for outlining. In the future, we may want to look
545 // at inner regions because the outer region may have live-exit
546 // variables.
547 for (auto *BB : DominateVector)
548 VisitedMap[BB] = true;
549
550 // ReturnBlock here means the block after the outline call
551 BasicBlock *ReturnBlock = ExitBlock->getSingleSuccessor();
552 FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegInfo(
553 DominateVector, DominateVector.front(), ExitBlock, ReturnBlock);
554 OutliningInfo->ORI.push_back(RegInfo);
555 LLVM_DEBUG(dbgs() << "Found Cold Candidate starting at block: "do { } while (false)
556 << DominateVector.front()->getName() << "\n";)do { } while (false);
557 ColdCandidateFound = true;
558 NumColdRegionsFound++;
559 }
560 }
561
562 if (ColdCandidateFound)
563 return OutliningInfo;
564
565 return std::unique_ptr<FunctionOutliningMultiRegionInfo>();
566}
567
568std::unique_ptr<FunctionOutliningInfo>
569PartialInlinerImpl::computeOutliningInfo(Function &F) const {
570 BasicBlock *EntryBlock = &F.front();
571 BranchInst *BR = dyn_cast<BranchInst>(EntryBlock->getTerminator());
572 if (!BR || BR->isUnconditional())
573 return std::unique_ptr<FunctionOutliningInfo>();
574
575 // Returns true if Succ is BB's successor
576 auto IsSuccessor = [](BasicBlock *Succ, BasicBlock *BB) {
577 return is_contained(successors(BB), Succ);
578 };
579
580 auto IsReturnBlock = [](BasicBlock *BB) {
581 Instruction *TI = BB->getTerminator();
582 return isa<ReturnInst>(TI);
583 };
584
585 auto GetReturnBlock = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
586 if (IsReturnBlock(Succ1))
587 return std::make_tuple(Succ1, Succ2);
588 if (IsReturnBlock(Succ2))
589 return std::make_tuple(Succ2, Succ1);
590
591 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
592 };
593
594 // Detect a triangular shape:
595 auto GetCommonSucc = [&](BasicBlock *Succ1, BasicBlock *Succ2) {
596 if (IsSuccessor(Succ1, Succ2))
597 return std::make_tuple(Succ1, Succ2);
598 if (IsSuccessor(Succ2, Succ1))
599 return std::make_tuple(Succ2, Succ1);
600
601 return std::make_tuple<BasicBlock *, BasicBlock *>(nullptr, nullptr);
602 };
603
604 std::unique_ptr<FunctionOutliningInfo> OutliningInfo =
605 std::make_unique<FunctionOutliningInfo>();
606
607 BasicBlock *CurrEntry = EntryBlock;
608 bool CandidateFound = false;
609 do {
610 // The number of blocks to be inlined has already reached
611 // the limit. When MaxNumInlineBlocks is set to 0 or 1, this
612 // disables partial inlining for the function.
613 if (OutliningInfo->getNumInlinedBlocks() >= MaxNumInlineBlocks)
614 break;
615
616 if (succ_size(CurrEntry) != 2)
617 break;
618
619 BasicBlock *Succ1 = *succ_begin(CurrEntry);
620 BasicBlock *Succ2 = *(succ_begin(CurrEntry) + 1);
621
622 BasicBlock *ReturnBlock, *NonReturnBlock;
623 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
624
625 if (ReturnBlock) {
626 OutliningInfo->Entries.push_back(CurrEntry);
627 OutliningInfo->ReturnBlock = ReturnBlock;
628 OutliningInfo->NonReturnBlock = NonReturnBlock;
629 CandidateFound = true;
630 break;
631 }
632
633 BasicBlock *CommSucc, *OtherSucc;
634 std::tie(CommSucc, OtherSucc) = GetCommonSucc(Succ1, Succ2);
635
636 if (!CommSucc)
637 break;
638
639 OutliningInfo->Entries.push_back(CurrEntry);
640 CurrEntry = OtherSucc;
641 } while (true);
642
643 if (!CandidateFound)
644 return std::unique_ptr<FunctionOutliningInfo>();
645
646 // Do sanity check of the entries: threre should not
647 // be any successors (not in the entry set) other than
648 // {ReturnBlock, NonReturnBlock}
649 assert(OutliningInfo->Entries[0] == &F.front() &&((void)0)
650 "Function Entry must be the first in Entries vector")((void)0);
651 DenseSet<BasicBlock *> Entries;
652 for (BasicBlock *E : OutliningInfo->Entries)
653 Entries.insert(E);
654
655 // Returns true of BB has Predecessor which is not
656 // in Entries set.
657 auto HasNonEntryPred = [Entries](BasicBlock *BB) {
658 for (auto *Pred : predecessors(BB)) {
659 if (!Entries.count(Pred))
660 return true;
661 }
662 return false;
663 };
664 auto CheckAndNormalizeCandidate =
665 [Entries, HasNonEntryPred](FunctionOutliningInfo *OutliningInfo) {
666 for (BasicBlock *E : OutliningInfo->Entries) {
667 for (auto *Succ : successors(E)) {
668 if (Entries.count(Succ))
669 continue;
670 if (Succ == OutliningInfo->ReturnBlock)
671 OutliningInfo->ReturnBlockPreds.push_back(E);
672 else if (Succ != OutliningInfo->NonReturnBlock)
673 return false;
674 }
675 // There should not be any outside incoming edges either:
676 if (HasNonEntryPred(E))
677 return false;
678 }
679 return true;
680 };
681
682 if (!CheckAndNormalizeCandidate(OutliningInfo.get()))
683 return std::unique_ptr<FunctionOutliningInfo>();
684
685 // Now further growing the candidate's inlining region by
686 // peeling off dominating blocks from the outlining region:
687 while (OutliningInfo->getNumInlinedBlocks() < MaxNumInlineBlocks) {
688 BasicBlock *Cand = OutliningInfo->NonReturnBlock;
689 if (succ_size(Cand) != 2)
690 break;
691
692 if (HasNonEntryPred(Cand))
693 break;
694
695 BasicBlock *Succ1 = *succ_begin(Cand);
696 BasicBlock *Succ2 = *(succ_begin(Cand) + 1);
697
698 BasicBlock *ReturnBlock, *NonReturnBlock;
699 std::tie(ReturnBlock, NonReturnBlock) = GetReturnBlock(Succ1, Succ2);
700 if (!ReturnBlock || ReturnBlock != OutliningInfo->ReturnBlock)
701 break;
702
703 if (NonReturnBlock->getSinglePredecessor() != Cand)
704 break;
705
706 // Now grow and update OutlininigInfo:
707 OutliningInfo->Entries.push_back(Cand);
708 OutliningInfo->NonReturnBlock = NonReturnBlock;
709 OutliningInfo->ReturnBlockPreds.push_back(Cand);
710 Entries.insert(Cand);
711 }
712
713 return OutliningInfo;
714}
715
716// Check if there is PGO data or user annotated branch data:
717static bool hasProfileData(const Function &F, const FunctionOutliningInfo &OI) {
718 if (F.hasProfileData())
719 return true;
720 // Now check if any of the entry block has MD_prof data:
721 for (auto *E : OI.Entries) {
722 BranchInst *BR = dyn_cast<BranchInst>(E->getTerminator());
723 if (!BR || BR->isUnconditional())
724 continue;
725 uint64_t T, F;
726 if (BR->extractProfMetadata(T, F))
727 return true;
728 }
729 return false;
730}
731
732BranchProbability PartialInlinerImpl::getOutliningCallBBRelativeFreq(
733 FunctionCloner &Cloner) const {
734 BasicBlock *OutliningCallBB = Cloner.OutlinedFunctions.back().second;
735 auto EntryFreq =
736 Cloner.ClonedFuncBFI->getBlockFreq(&Cloner.ClonedFunc->getEntryBlock());
737 auto OutliningCallFreq =
738 Cloner.ClonedFuncBFI->getBlockFreq(OutliningCallBB);
739 // FIXME Hackery needed because ClonedFuncBFI is based on the function BEFORE
740 // we outlined any regions, so we may encounter situations where the
741 // OutliningCallFreq is *slightly* bigger than the EntryFreq.
742 if (OutliningCallFreq.getFrequency() > EntryFreq.getFrequency())
743 OutliningCallFreq = EntryFreq;
744
745 auto OutlineRegionRelFreq = BranchProbability::getBranchProbability(
746 OutliningCallFreq.getFrequency(), EntryFreq.getFrequency());
747
748 if (hasProfileData(*Cloner.OrigFunc, *Cloner.ClonedOI.get()))
749 return OutlineRegionRelFreq;
750
751 // When profile data is not available, we need to be conservative in
752 // estimating the overall savings. Static branch prediction can usually
753 // guess the branch direction right (taken/non-taken), but the guessed
754 // branch probability is usually not biased enough. In case when the
755 // outlined region is predicted to be likely, its probability needs
756 // to be made higher (more biased) to not under-estimate the cost of
757 // function outlining. On the other hand, if the outlined region
758 // is predicted to be less likely, the predicted probablity is usually
759 // higher than the actual. For instance, the actual probability of the
760 // less likely target is only 5%, but the guessed probablity can be
761 // 40%. In the latter case, there is no need for further adjustement.
762 // FIXME: add an option for this.
763 if (OutlineRegionRelFreq < BranchProbability(45, 100))
764 return OutlineRegionRelFreq;
765
766 OutlineRegionRelFreq = std::max(
767 OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
768
769 return OutlineRegionRelFreq;
770}
771
772bool PartialInlinerImpl::shouldPartialInline(
773 CallBase &CB, FunctionCloner &Cloner, BlockFrequency WeightedOutliningRcost,
774 OptimizationRemarkEmitter &ORE) const {
775 using namespace ore;
776
777 Function *Callee = CB.getCalledFunction();
778 assert(Callee == Cloner.ClonedFunc)((void)0);
779
780 if (SkipCostAnalysis)
781 return isInlineViable(*Callee).isSuccess();
782
783 Function *Caller = CB.getCaller();
784 auto &CalleeTTI = GetTTI(*Callee);
785 bool RemarksEnabled =
786 Callee->getContext().getDiagHandlerPtr()->isMissedOptRemarkEnabled(
787 DEBUG_TYPE"partial-inlining");
788 InlineCost IC =
789 getInlineCost(CB, getInlineParams(), CalleeTTI, GetAssumptionCache,
790 GetTLI, GetBFI, &PSI, RemarksEnabled ? &ORE : nullptr);
791
792 if (IC.isAlways()) {
793 ORE.emit([&]() {
794 return OptimizationRemarkAnalysis(DEBUG_TYPE"partial-inlining", "AlwaysInline", &CB)
795 << NV("Callee", Cloner.OrigFunc)
796 << " should always be fully inlined, not partially";
797 });
798 return false;
799 }
800
801 if (IC.isNever()) {
802 ORE.emit([&]() {
803 return OptimizationRemarkMissed(DEBUG_TYPE"partial-inlining", "NeverInline", &CB)
804 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
805 << NV("Caller", Caller)
806 << " because it should never be inlined (cost=never)";
807 });
808 return false;
809 }
810
811 if (!IC) {
812 ORE.emit([&]() {
813 return OptimizationRemarkAnalysis(DEBUG_TYPE"partial-inlining", "TooCostly", &CB)
814 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
815 << NV("Caller", Caller) << " because too costly to inline (cost="
816 << NV("Cost", IC.getCost()) << ", threshold="
817 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
818 });
819 return false;
820 }
821 const DataLayout &DL = Caller->getParent()->getDataLayout();
822
823 // The savings of eliminating the call:
824 int NonWeightedSavings = getCallsiteCost(CB, DL);
825 BlockFrequency NormWeightedSavings(NonWeightedSavings);
826
827 // Weighted saving is smaller than weighted cost, return false
828 if (NormWeightedSavings < WeightedOutliningRcost) {
829 ORE.emit([&]() {
830 return OptimizationRemarkAnalysis(DEBUG_TYPE"partial-inlining", "OutliningCallcostTooHigh",
831 &CB)
832 << NV("Callee", Cloner.OrigFunc) << " not partially inlined into "
833 << NV("Caller", Caller) << " runtime overhead (overhead="
834 << NV("Overhead", (unsigned)WeightedOutliningRcost.getFrequency())
835 << ", savings="
836 << NV("Savings", (unsigned)NormWeightedSavings.getFrequency())
837 << ")"
838 << " of making the outlined call is too high";
839 });
840
841 return false;
842 }
843
844 ORE.emit([&]() {
845 return OptimizationRemarkAnalysis(DEBUG_TYPE"partial-inlining", "CanBePartiallyInlined", &CB)
846 << NV("Callee", Cloner.OrigFunc) << " can be partially inlined into "
847 << NV("Caller", Caller) << " with cost=" << NV("Cost", IC.getCost())
848 << " (threshold="
849 << NV("Threshold", IC.getCostDelta() + IC.getCost()) << ")";
850 });
851 return true;
852}
853
854// TODO: Ideally we should share Inliner's InlineCost Analysis code.
855// For now use a simplified version. The returned 'InlineCost' will be used
856// to esimate the size cost as well as runtime cost of the BB.
857InstructionCost
858PartialInlinerImpl::computeBBInlineCost(BasicBlock *BB,
859 TargetTransformInfo *TTI) {
860 InstructionCost InlineCost = 0;
861 const DataLayout &DL = BB->getParent()->getParent()->getDataLayout();
862 for (Instruction &I : BB->instructionsWithoutDebug()) {
863 // Skip free instructions.
864 switch (I.getOpcode()) {
865 case Instruction::BitCast:
866 case Instruction::PtrToInt:
867 case Instruction::IntToPtr:
868 case Instruction::Alloca:
869 case Instruction::PHI:
870 continue;
871 case Instruction::GetElementPtr:
872 if (cast<GetElementPtrInst>(&I)->hasAllZeroIndices())
873 continue;
874 break;
875 default:
876 break;
877 }
878
879 if (I.isLifetimeStartOrEnd())
880 continue;
881
882 if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
883 Intrinsic::ID IID = II->getIntrinsicID();
884 SmallVector<Type *, 4> Tys;
885 FastMathFlags FMF;
886 for (Value *Val : II->args())
887 Tys.push_back(Val->getType());
888
889 if (auto *FPMO = dyn_cast<FPMathOperator>(II))
890 FMF = FPMO->getFastMathFlags();
891
892 IntrinsicCostAttributes ICA(IID, II->getType(), Tys, FMF);
893 InlineCost += TTI->getIntrinsicInstrCost(ICA, TTI::TCK_SizeAndLatency);
894 continue;
895 }
896
897 if (CallInst *CI = dyn_cast<CallInst>(&I)) {
898 InlineCost += getCallsiteCost(*CI, DL);
899 continue;
900 }
901
902 if (InvokeInst *II = dyn_cast<InvokeInst>(&I)) {
903 InlineCost += getCallsiteCost(*II, DL);
904 continue;
905 }
906
907 if (SwitchInst *SI = dyn_cast<SwitchInst>(&I)) {
908 InlineCost += (SI->getNumCases() + 1) * InlineConstants::InstrCost;
909 continue;
910 }
911 InlineCost += InlineConstants::InstrCost;
912 }
913
914 return InlineCost;
915}
916
917std::tuple<InstructionCost, InstructionCost>
918PartialInlinerImpl::computeOutliningCosts(FunctionCloner &Cloner) const {
919 InstructionCost OutliningFuncCallCost = 0, OutlinedFunctionCost = 0;
920 for (auto FuncBBPair : Cloner.OutlinedFunctions) {
921 Function *OutlinedFunc = FuncBBPair.first;
922 BasicBlock* OutliningCallBB = FuncBBPair.second;
923 // Now compute the cost of the call sequence to the outlined function
924 // 'OutlinedFunction' in BB 'OutliningCallBB':
925 auto *OutlinedFuncTTI = &GetTTI(*OutlinedFunc);
926 OutliningFuncCallCost +=
927 computeBBInlineCost(OutliningCallBB, OutlinedFuncTTI);
928
929 // Now compute the cost of the extracted/outlined function itself:
930 for (BasicBlock &BB : *OutlinedFunc)
931 OutlinedFunctionCost += computeBBInlineCost(&BB, OutlinedFuncTTI);
932 }
933 assert(OutlinedFunctionCost >= Cloner.OutlinedRegionCost &&((void)0)
934 "Outlined function cost should be no less than the outlined region")((void)0);
935
936 // The code extractor introduces a new root and exit stub blocks with
937 // additional unconditional branches. Those branches will be eliminated
938 // later with bb layout. The cost should be adjusted accordingly:
939 OutlinedFunctionCost -=
940 2 * InlineConstants::InstrCost * Cloner.OutlinedFunctions.size();
941
942 InstructionCost OutliningRuntimeOverhead =
943 OutliningFuncCallCost +
944 (OutlinedFunctionCost - Cloner.OutlinedRegionCost) +
945 ExtraOutliningPenalty.getValue();
946
947 return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead);
948}
949
950// Create the callsite to profile count map which is
951// used to update the original function's entry count,
952// after the function is partially inlined into the callsite.
953void PartialInlinerImpl::computeCallsiteToProfCountMap(
954 Function *DuplicateFunction,
955 DenseMap<User *, uint64_t> &CallSiteToProfCountMap) const {
956 std::vector<User *> Users(DuplicateFunction->user_begin(),
957 DuplicateFunction->user_end());
958 Function *CurrentCaller = nullptr;
959 std::unique_ptr<BlockFrequencyInfo> TempBFI;
960 BlockFrequencyInfo *CurrentCallerBFI = nullptr;
32
'CurrentCallerBFI' initialized to a null pointer value
961
962 auto ComputeCurrBFI = [&,this](Function *Caller) {
963 // For the old pass manager:
964 if (!GetBFI) {
965 DominatorTree DT(*Caller);
966 LoopInfo LI(DT);
967 BranchProbabilityInfo BPI(*Caller, LI);
968 TempBFI.reset(new BlockFrequencyInfo(*Caller, BPI, LI));
969 CurrentCallerBFI = TempBFI.get();
970 } else {
971 // New pass manager:
972 CurrentCallerBFI = &(GetBFI(*Caller));
973 }
974 };
975
976 for (User *User : Users) {
977 CallBase *CB = getSupportedCallBase(User);
978 Function *Caller = CB->getCaller();
979 if (CurrentCaller != Caller) {
33
Assuming 'CurrentCaller' is equal to 'Caller'
34
Taking false branch
980 CurrentCaller = Caller;
981 ComputeCurrBFI(Caller);
982 } else {
983 assert(CurrentCallerBFI && "CallerBFI is not set")((void)0);
984 }
985 BasicBlock *CallBB = CB->getParent();
986 auto Count = CurrentCallerBFI->getBlockProfileCount(CallBB);
35
Called C++ object pointer is null
987 if (Count)
988 CallSiteToProfCountMap[User] = *Count;
989 else
990 CallSiteToProfCountMap[User] = 0;
991 }
992}
993
994PartialInlinerImpl::FunctionCloner::FunctionCloner(
995 Function *F, FunctionOutliningInfo *OI, OptimizationRemarkEmitter &ORE,
996 function_ref<AssumptionCache *(Function &)> LookupAC,
997 function_ref<TargetTransformInfo &(Function &)> GetTTI)
998 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
999 ClonedOI = std::make_unique<FunctionOutliningInfo>();
1000
1001 // Clone the function, so that we can hack away on it.
1002 ValueToValueMapTy VMap;
1003 ClonedFunc = CloneFunction(F, VMap);
1004
1005 ClonedOI->ReturnBlock = cast<BasicBlock>(VMap[OI->ReturnBlock]);
1006 ClonedOI->NonReturnBlock = cast<BasicBlock>(VMap[OI->NonReturnBlock]);
1007 for (BasicBlock *BB : OI->Entries)
1008 ClonedOI->Entries.push_back(cast<BasicBlock>(VMap[BB]));
1009
1010 for (BasicBlock *E : OI->ReturnBlockPreds) {
1011 BasicBlock *NewE = cast<BasicBlock>(VMap[E]);
1012 ClonedOI->ReturnBlockPreds.push_back(NewE);
1013 }
1014 // Go ahead and update all uses to the duplicate, so that we can just
1015 // use the inliner functionality when we're done hacking.
1016 F->replaceAllUsesWith(ClonedFunc);
1017}
1018
1019PartialInlinerImpl::FunctionCloner::FunctionCloner(
1020 Function *F, FunctionOutliningMultiRegionInfo *OI,
1021 OptimizationRemarkEmitter &ORE,
1022 function_ref<AssumptionCache *(Function &)> LookupAC,
1023 function_ref<TargetTransformInfo &(Function &)> GetTTI)
1024 : OrigFunc(F), ORE(ORE), LookupAC(LookupAC), GetTTI(GetTTI) {
1025 ClonedOMRI = std::make_unique<FunctionOutliningMultiRegionInfo>();
1026
1027 // Clone the function, so that we can hack away on it.
1028 ValueToValueMapTy VMap;
1029 ClonedFunc = CloneFunction(F, VMap);
1030
1031 // Go through all Outline Candidate Regions and update all BasicBlock
1032 // information.
1033 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1034 OI->ORI) {
1035 SmallVector<BasicBlock *, 8> Region;
1036 for (BasicBlock *BB : RegionInfo.Region)
1037 Region.push_back(cast<BasicBlock>(VMap[BB]));
1038
1039 BasicBlock *NewEntryBlock = cast<BasicBlock>(VMap[RegionInfo.EntryBlock]);
1040 BasicBlock *NewExitBlock = cast<BasicBlock>(VMap[RegionInfo.ExitBlock]);
1041 BasicBlock *NewReturnBlock = nullptr;
1042 if (RegionInfo.ReturnBlock)
1043 NewReturnBlock = cast<BasicBlock>(VMap[RegionInfo.ReturnBlock]);
1044 FunctionOutliningMultiRegionInfo::OutlineRegionInfo MappedRegionInfo(
1045 Region, NewEntryBlock, NewExitBlock, NewReturnBlock);
1046 ClonedOMRI->ORI.push_back(MappedRegionInfo);
1047 }
1048 // Go ahead and update all uses to the duplicate, so that we can just
1049 // use the inliner functionality when we're done hacking.
1050 F->replaceAllUsesWith(ClonedFunc);
1051}
1052
1053void PartialInlinerImpl::FunctionCloner::normalizeReturnBlock() const {
1054 auto GetFirstPHI = [](BasicBlock *BB) {
1055 BasicBlock::iterator I = BB->begin();
1056 PHINode *FirstPhi = nullptr;
1057 while (I != BB->end()) {
1058 PHINode *Phi = dyn_cast<PHINode>(I);
1059 if (!Phi)
1060 break;
1061 if (!FirstPhi) {
1062 FirstPhi = Phi;
1063 break;
1064 }
1065 }
1066 return FirstPhi;
1067 };
1068
1069 // Shouldn't need to normalize PHIs if we're not outlining non-early return
1070 // blocks.
1071 if (!ClonedOI)
1072 return;
1073
1074 // Special hackery is needed with PHI nodes that have inputs from more than
1075 // one extracted block. For simplicity, just split the PHIs into a two-level
1076 // sequence of PHIs, some of which will go in the extracted region, and some
1077 // of which will go outside.
1078 BasicBlock *PreReturn = ClonedOI->ReturnBlock;
1079 // only split block when necessary:
1080 PHINode *FirstPhi = GetFirstPHI(PreReturn);
1081 unsigned NumPredsFromEntries = ClonedOI->ReturnBlockPreds.size();
1082
1083 if (!FirstPhi || FirstPhi->getNumIncomingValues() <= NumPredsFromEntries + 1)
1084 return;
1085
1086 auto IsTrivialPhi = [](PHINode *PN) -> Value * {
1087 Value *CommonValue = PN->getIncomingValue(0);
1088 if (all_of(PN->incoming_values(),
1089 [&](Value *V) { return V == CommonValue; }))
1090 return CommonValue;
1091 return nullptr;
1092 };
1093
1094 ClonedOI->ReturnBlock = ClonedOI->ReturnBlock->splitBasicBlock(
1095 ClonedOI->ReturnBlock->getFirstNonPHI()->getIterator());
1096 BasicBlock::iterator I = PreReturn->begin();
1097 Instruction *Ins = &ClonedOI->ReturnBlock->front();
1098 SmallVector<Instruction *, 4> DeadPhis;
1099 while (I != PreReturn->end()) {
1100 PHINode *OldPhi = dyn_cast<PHINode>(I);
1101 if (!OldPhi)
1102 break;
1103
1104 PHINode *RetPhi =
1105 PHINode::Create(OldPhi->getType(), NumPredsFromEntries + 1, "", Ins);
1106 OldPhi->replaceAllUsesWith(RetPhi);
1107 Ins = ClonedOI->ReturnBlock->getFirstNonPHI();
1108
1109 RetPhi->addIncoming(&*I, PreReturn);
1110 for (BasicBlock *E : ClonedOI->ReturnBlockPreds) {
1111 RetPhi->addIncoming(OldPhi->getIncomingValueForBlock(E), E);
1112 OldPhi->removeIncomingValue(E);
1113 }
1114
1115 // After incoming values splitting, the old phi may become trivial.
1116 // Keeping the trivial phi can introduce definition inside the outline
1117 // region which is live-out, causing necessary overhead (load, store
1118 // arg passing etc).
1119 if (auto *OldPhiVal = IsTrivialPhi(OldPhi)) {
1120 OldPhi->replaceAllUsesWith(OldPhiVal);
1121 DeadPhis.push_back(OldPhi);
1122 }
1123 ++I;
1124 }
1125 for (auto *DP : DeadPhis)
1126 DP->eraseFromParent();
1127
1128 for (auto *E : ClonedOI->ReturnBlockPreds)
1129 E->getTerminator()->replaceUsesOfWith(PreReturn, ClonedOI->ReturnBlock);
1130}
1131
1132bool PartialInlinerImpl::FunctionCloner::doMultiRegionFunctionOutlining() {
1133
1134 auto ComputeRegionCost =
1135 [&](SmallVectorImpl<BasicBlock *> &Region) -> InstructionCost {
1136 InstructionCost Cost = 0;
1137 for (BasicBlock* BB : Region)
1138 Cost += computeBBInlineCost(BB, &GetTTI(*BB->getParent()));
1139 return Cost;
1140 };
1141
1142 assert(ClonedOMRI && "Expecting OutlineInfo for multi region outline")((void)0);
1143
1144 if (ClonedOMRI->ORI.empty())
1145 return false;
1146
1147 // The CodeExtractor needs a dominator tree.
1148 DominatorTree DT;
1149 DT.recalculate(*ClonedFunc);
1150
1151 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1152 LoopInfo LI(DT);
1153 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1154 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1155
1156 // Cache and recycle the CodeExtractor analysis to avoid O(n^2) compile-time.
1157 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1158
1159 SetVector<Value *> Inputs, Outputs, Sinks;
1160 for (FunctionOutliningMultiRegionInfo::OutlineRegionInfo RegionInfo :
1161 ClonedOMRI->ORI) {
1162 InstructionCost CurrentOutlinedRegionCost =
1163 ComputeRegionCost(RegionInfo.Region);
1164
1165 CodeExtractor CE(RegionInfo.Region, &DT, /*AggregateArgs*/ false,
1166 ClonedFuncBFI.get(), &BPI,
1167 LookupAC(*RegionInfo.EntryBlock->getParent()),
1168 /* AllowVarargs */ false);
1169
1170 CE.findInputsOutputs(Inputs, Outputs, Sinks);
1171
1172 LLVM_DEBUG({do { } while (false)
1173 dbgs() << "inputs: " << Inputs.size() << "\n";do { } while (false)
1174 dbgs() << "outputs: " << Outputs.size() << "\n";do { } while (false)
1175 for (Value *value : Inputs)do { } while (false)
1176 dbgs() << "value used in func: " << *value << "\n";do { } while (false)
1177 for (Value *output : Outputs)do { } while (false)
1178 dbgs() << "instr used in func: " << *output << "\n";do { } while (false)
1179 })do { } while (false);
1180
1181 // Do not extract regions that have live exit variables.
1182 if (Outputs.size() > 0 && !ForceLiveExit)
1183 continue;
1184
1185 if (Function *OutlinedFunc = CE.extractCodeRegion(CEAC)) {
1186 CallBase *OCS = PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc);
1187 BasicBlock *OutliningCallBB = OCS->getParent();
1188 assert(OutliningCallBB->getParent() == ClonedFunc)((void)0);
1189 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc,OutliningCallBB));
1190 NumColdRegionsOutlined++;
1191 OutlinedRegionCost += CurrentOutlinedRegionCost;
1192
1193 if (MarkOutlinedColdCC) {
1194 OutlinedFunc->setCallingConv(CallingConv::Cold);
1195 OCS->setCallingConv(CallingConv::Cold);
1196 }
1197 } else
1198 ORE.emit([&]() {
1199 return OptimizationRemarkMissed(DEBUG_TYPE"partial-inlining", "ExtractFailed",
1200 &RegionInfo.Region.front()->front())
1201 << "Failed to extract region at block "
1202 << ore::NV("Block", RegionInfo.Region.front());
1203 });
1204 }
1205
1206 return !OutlinedFunctions.empty();
1207}
1208
1209Function *
1210PartialInlinerImpl::FunctionCloner::doSingleRegionFunctionOutlining() {
1211 // Returns true if the block is to be partial inlined into the caller
1212 // (i.e. not to be extracted to the out of line function)
1213 auto ToBeInlined = [&, this](BasicBlock *BB) {
1214 return BB == ClonedOI->ReturnBlock ||
1215 llvm::is_contained(ClonedOI->Entries, BB);
1216 };
1217
1218 assert(ClonedOI && "Expecting OutlineInfo for single region outline")((void)0);
1219 // The CodeExtractor needs a dominator tree.
1220 DominatorTree DT;
1221 DT.recalculate(*ClonedFunc);
1222
1223 // Manually calculate a BlockFrequencyInfo and BranchProbabilityInfo.
1224 LoopInfo LI(DT);
1225 BranchProbabilityInfo BPI(*ClonedFunc, LI);
1226 ClonedFuncBFI.reset(new BlockFrequencyInfo(*ClonedFunc, BPI, LI));
1227
1228 // Gather up the blocks that we're going to extract.
1229 std::vector<BasicBlock *> ToExtract;
1230 auto *ClonedFuncTTI = &GetTTI(*ClonedFunc);
1231 ToExtract.push_back(ClonedOI->NonReturnBlock);
1232 OutlinedRegionCost += PartialInlinerImpl::computeBBInlineCost(
1233 ClonedOI->NonReturnBlock, ClonedFuncTTI);
1234 for (BasicBlock &BB : *ClonedFunc)
1235 if (!ToBeInlined(&BB) && &BB != ClonedOI->NonReturnBlock) {
1236 ToExtract.push_back(&BB);
1237 // FIXME: the code extractor may hoist/sink more code
1238 // into the outlined function which may make the outlining
1239 // overhead (the difference of the outlined function cost
1240 // and OutliningRegionCost) look larger.
1241 OutlinedRegionCost += computeBBInlineCost(&BB, ClonedFuncTTI);
1242 }
1243
1244 // Extract the body of the if.
1245 CodeExtractorAnalysisCache CEAC(*ClonedFunc);
1246 Function *OutlinedFunc =
1247 CodeExtractor(ToExtract, &DT, /*AggregateArgs*/ false,
1248 ClonedFuncBFI.get(), &BPI, LookupAC(*ClonedFunc),
1249 /* AllowVarargs */ true)
1250 .extractCodeRegion(CEAC);
1251
1252 if (OutlinedFunc) {
1253 BasicBlock *OutliningCallBB =
1254 PartialInlinerImpl::getOneCallSiteTo(*OutlinedFunc)->getParent();
1255 assert(OutliningCallBB->getParent() == ClonedFunc)((void)0);
1256 OutlinedFunctions.push_back(std::make_pair(OutlinedFunc, OutliningCallBB));
1257 } else
1258 ORE.emit([&]() {
1259 return OptimizationRemarkMissed(DEBUG_TYPE"partial-inlining", "ExtractFailed",
1260 &ToExtract.front()->front())
1261 << "Failed to extract region at block "
1262 << ore::NV("Block", ToExtract.front());
1263 });
1264
1265 return OutlinedFunc;
1266}
1267
1268PartialInlinerImpl::FunctionCloner::~FunctionCloner() {
1269 // Ditch the duplicate, since we're done with it, and rewrite all remaining
1270 // users (function pointers, etc.) back to the original function.
1271 ClonedFunc->replaceAllUsesWith(OrigFunc);
1272 ClonedFunc->eraseFromParent();
1273 if (!IsFunctionInlined) {
1274 // Remove each function that was speculatively created if there is no
1275 // reference.
1276 for (auto FuncBBPair : OutlinedFunctions) {
1277 Function *Func = FuncBBPair.first;
1278 Func->eraseFromParent();
1279 }
1280 }
1281}
1282
1283std::pair<bool, Function *> PartialInlinerImpl::unswitchFunction(Function &F) {
1284 if (F.hasAddressTaken())
14
Assuming the condition is false
15
Taking false branch
1285 return {false, nullptr};
1286
1287 // Let inliner handle it
1288 if (F.hasFnAttribute(Attribute::AlwaysInline))
16
Assuming the condition is false
17
Taking false branch
1289 return {false, nullptr};
1290
1291 if (F.hasFnAttribute(Attribute::NoInline))
18
Assuming the condition is false
19
Taking false branch
1292 return {false, nullptr};
1293
1294 if (PSI.isFunctionEntryCold(&F))
20
Assuming the condition is false
21
Taking false branch
1295 return {false, nullptr};
1296
1297 if (F.users().empty())
22
Assuming the condition is false
23
Taking false branch
1298 return {false, nullptr};
1299
1300 OptimizationRemarkEmitter ORE(&F);
1301
1302 // Only try to outline cold regions if we have a profile summary, which
1303 // implies we have profiling information.
1304 if (PSI.hasProfileSummary() && F.hasProfileData() &&
1305 !DisableMultiRegionPartialInline) {
1306 std::unique_ptr<FunctionOutliningMultiRegionInfo> OMRI =
1307 computeOutliningColdRegionsInfo(F, ORE);
1308 if (OMRI) {
1309 FunctionCloner Cloner(&F, OMRI.get(), ORE, LookupAssumptionCache, GetTTI);
1310
1311 LLVM_DEBUG({do { } while (false)
1312 dbgs() << "HotCountThreshold = " << PSI.getHotCountThreshold() << "\n";do { } while (false)
1313 dbgs() << "ColdCountThreshold = " << PSI.getColdCountThreshold()do { } while (false)
1314 << "\n";do { } while (false)
1315 })do { } while (false);
1316
1317 bool DidOutline = Cloner.doMultiRegionFunctionOutlining();
1318
1319 if (DidOutline) {
1320 LLVM_DEBUG({do { } while (false)
1321 dbgs() << ">>>>>> Outlined (Cloned) Function >>>>>>\n";do { } while (false)
1322 Cloner.ClonedFunc->print(dbgs());do { } while (false)
1323 dbgs() << "<<<<<< Outlined (Cloned) Function <<<<<<\n";do { } while (false)
1324 })do { } while (false);
1325
1326 if (tryPartialInline(Cloner))
1327 return {true, nullptr};
1328 }
1329 }
1330 }
1331
1332 // Fall-thru to regular partial inlining if we:
1333 // i) can't find any cold regions to outline, or
1334 // ii) can't inline the outlined function anywhere.
1335 std::unique_ptr<FunctionOutliningInfo> OI = computeOutliningInfo(F);
1336 if (!OI)
24
Taking false branch
1337 return {false, nullptr};
1338
1339 FunctionCloner Cloner(&F, OI.get(), ORE, LookupAssumptionCache, GetTTI);
1340 Cloner.normalizeReturnBlock();
1341
1342 Function *OutlinedFunction = Cloner.doSingleRegionFunctionOutlining();
1343
1344 if (!OutlinedFunction
24.1
'OutlinedFunction' is non-null
)
25
Taking false branch
1345 return {false, nullptr};
1346
1347 if (tryPartialInline(Cloner))
26
Calling 'PartialInlinerImpl::tryPartialInline'
1348 return {true, OutlinedFunction};
1349
1350 return {false, nullptr};
1351}
1352
1353bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) {
1354 if (Cloner.OutlinedFunctions.empty())
27
Taking false branch
1355 return false;
1356
1357 int SizeCost = 0;
1358 BlockFrequency WeightedRcost;
1359 int NonWeightedRcost;
1360
1361 auto OutliningCosts = computeOutliningCosts(Cloner);
1362 assert(std::get<0>(OutliningCosts).isValid() &&((void)0)
1363 std::get<1>(OutliningCosts).isValid() && "Expected valid costs")((void)0);
1364
1365 SizeCost = *std::get<0>(OutliningCosts).getValue();
1366 NonWeightedRcost = *std::get<1>(OutliningCosts).getValue();
1367
1368 // Only calculate RelativeToEntryFreq when we are doing single region
1369 // outlining.
1370 BranchProbability RelativeToEntryFreq;
1371 if (Cloner.ClonedOI)
28
Taking false branch
1372 RelativeToEntryFreq = getOutliningCallBBRelativeFreq(Cloner);
1373 else
1374 // RelativeToEntryFreq doesn't make sense when we have more than one
1375 // outlined call because each call will have a different relative frequency
1376 // to the entry block. We can consider using the average, but the
1377 // usefulness of that information is questionable. For now, assume we never
1378 // execute the calls to outlined functions.
1379 RelativeToEntryFreq = BranchProbability(0, 1);
1380
1381 WeightedRcost = BlockFrequency(NonWeightedRcost) * RelativeToEntryFreq;
1382
1383 // The call sequence(s) to the outlined function(s) are larger than the sum of
1384 // the original outlined region size(s), it does not increase the chances of
1385 // inlining the function with outlining (The inliner uses the size increase to
1386 // model the cost of inlining a callee).
1387 if (!SkipCostAnalysis && Cloner.OutlinedRegionCost < SizeCost) {
29
Assuming the condition is false
1388 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1389 DebugLoc DLoc;
1390 BasicBlock *Block;
1391 std::tie(DLoc, Block) = getOneDebugLoc(*Cloner.ClonedFunc);
1392 OrigFuncORE.emit([&]() {
1393 return OptimizationRemarkAnalysis(DEBUG_TYPE"partial-inlining", "OutlineRegionTooSmall",
1394 DLoc, Block)
1395 << ore::NV("Function", Cloner.OrigFunc)
1396 << " not partially inlined into callers (Original Size = "
1397 << ore::NV("OutlinedRegionOriginalSize", Cloner.OutlinedRegionCost)
1398 << ", Size of call sequence to outlined function = "
1399 << ore::NV("NewSize", SizeCost) << ")";
1400 });
1401 return false;
1402 }
1403
1404 assert(Cloner.OrigFunc->users().empty() &&((void)0)
1405 "F's users should all be replaced!")((void)0);
1406
1407 std::vector<User *> Users(Cloner.ClonedFunc->user_begin(),
1408 Cloner.ClonedFunc->user_end());
1409
1410 DenseMap<User *, uint64_t> CallSiteToProfCountMap;
1411 auto CalleeEntryCount = Cloner.OrigFunc->getEntryCount();
1412 if (CalleeEntryCount)
30
Taking true branch
1413 computeCallsiteToProfCountMap(Cloner.ClonedFunc, CallSiteToProfCountMap);
31
Calling 'PartialInlinerImpl::computeCallsiteToProfCountMap'
1414
1415 uint64_t CalleeEntryCountV =
1416 (CalleeEntryCount ? CalleeEntryCount.getCount() : 0);
1417
1418 bool AnyInline = false;
1419 for (User *User : Users) {
1420 CallBase *CB = getSupportedCallBase(User);
1421
1422 if (isLimitReached())
1423 continue;
1424
1425 OptimizationRemarkEmitter CallerORE(CB->getCaller());
1426 if (!shouldPartialInline(*CB, Cloner, WeightedRcost, CallerORE))
1427 continue;
1428
1429 // Construct remark before doing the inlining, as after successful inlining
1430 // the callsite is removed.
1431 OptimizationRemark OR(DEBUG_TYPE"partial-inlining", "PartiallyInlined", CB);
1432 OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into "
1433 << ore::NV("Caller", CB->getCaller());
1434
1435 InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI);
1436 // We can only forward varargs when we outlined a single region, else we
1437 // bail on vararg functions.
1438 if (!InlineFunction(*CB, IFI, nullptr, true,
1439 (Cloner.ClonedOI ? Cloner.OutlinedFunctions.back().first
1440 : nullptr))
1441 .isSuccess())
1442 continue;
1443
1444 CallerORE.emit(OR);
1445
1446 // Now update the entry count:
1447 if (CalleeEntryCountV && CallSiteToProfCountMap.count(User)) {
1448 uint64_t CallSiteCount = CallSiteToProfCountMap[User];
1449 CalleeEntryCountV -= std::min(CalleeEntryCountV, CallSiteCount);
1450 }
1451
1452 AnyInline = true;
1453 NumPartialInlining++;
1454 // Update the stats
1455 if (Cloner.ClonedOI)
1456 NumPartialInlined++;
1457 else
1458 NumColdOutlinePartialInlined++;
1459 }
1460
1461 if (AnyInline) {
1462 Cloner.IsFunctionInlined = true;
1463 if (CalleeEntryCount)
1464 Cloner.OrigFunc->setEntryCount(
1465 CalleeEntryCount.setCount(CalleeEntryCountV));
1466 OptimizationRemarkEmitter OrigFuncORE(Cloner.OrigFunc);
1467 OrigFuncORE.emit([&]() {
1468 return OptimizationRemark(DEBUG_TYPE"partial-inlining", "PartiallyInlined", Cloner.OrigFunc)
1469 << "Partially inlined into at least one caller";
1470 });
1471 }
1472
1473 return AnyInline;
1474}
1475
1476bool PartialInlinerImpl::run(Module &M) {
1477 if (DisablePartialInlining)
2
Assuming the condition is false
3
Taking false branch
1478 return false;
1479
1480 std::vector<Function *> Worklist;
1481 Worklist.reserve(M.size());
1482 for (Function &F : M)
1483 if (!F.use_empty() && !F.isDeclaration())
1484 Worklist.push_back(&F);
1485
1486 bool Changed = false;
1487 while (!Worklist.empty()) {
4
Assuming the condition is true
5
Loop condition is true. Entering loop body
9
Assuming the condition is true
10
Loop condition is true. Entering loop body
1488 Function *CurrFunc = Worklist.back();
1489 Worklist.pop_back();
1490
1491 if (CurrFunc->use_empty())
6
Taking false branch
11
Taking false branch
1492 continue;
1493
1494 bool Recursive = false;
1495 for (User *U : CurrFunc->users())
1496 if (Instruction *I = dyn_cast<Instruction>(U))
1497 if (I->getParent()->getParent() == CurrFunc) {
1498 Recursive = true;
1499 break;
1500 }
1501 if (Recursive
6.1
'Recursive' is false
11.1
'Recursive' is false
)
7
Taking false branch
12
Taking false branch
1502 continue;
1503
1504 std::pair<bool, Function *> Result = unswitchFunction(*CurrFunc);
13
Calling 'PartialInlinerImpl::unswitchFunction'
1505 if (Result.second
7.1
Field 'second' is null
)
8
Taking false branch
1506 Worklist.push_back(Result.second);
1507 Changed |= Result.first;
1508 }
1509
1510 return Changed;
1511}
1512
1513char PartialInlinerLegacyPass::ID = 0;
1514
1515INITIALIZE_PASS_BEGIN(PartialInlinerLegacyPass, "partial-inliner",static void *initializePartialInlinerLegacyPassPassOnce(PassRegistry
&Registry) {
1516 "Partial Inliner", false, false)static void *initializePartialInlinerLegacyPassPassOnce(PassRegistry
&Registry) {
1517INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
1518INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
1519INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
1520INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
1521INITIALIZE_PASS_END(PartialInlinerLegacyPass, "partial-inliner",PassInfo *PI = new PassInfo( "Partial Inliner", "partial-inliner"
, &PartialInlinerLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<PartialInlinerLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializePartialInlinerLegacyPassPassFlag
; void llvm::initializePartialInlinerLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializePartialInlinerLegacyPassPassFlag
, initializePartialInlinerLegacyPassPassOnce, std::ref(Registry
)); }
1522 "Partial Inliner", false, false)PassInfo *PI = new PassInfo( "Partial Inliner", "partial-inliner"
, &PartialInlinerLegacyPass::ID, PassInfo::NormalCtor_t(callDefaultCtor
<PartialInlinerLegacyPass>), false, false); Registry.registerPass
(*PI, true); return PI; } static llvm::once_flag InitializePartialInlinerLegacyPassPassFlag
; void llvm::initializePartialInlinerLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializePartialInlinerLegacyPassPassFlag
, initializePartialInlinerLegacyPassPassOnce, std::ref(Registry
)); }
1523
1524ModulePass *llvm::createPartialInliningPass() {
1525 return new PartialInlinerLegacyPass();
1526}
1527
1528PreservedAnalyses PartialInlinerPass::run(Module &M,
1529 ModuleAnalysisManager &AM) {
1530 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1531
1532 auto GetAssumptionCache = [&FAM](Function &F) -> AssumptionCache & {
1533 return FAM.getResult<AssumptionAnalysis>(F);
1534 };
1535
1536 auto LookupAssumptionCache = [&FAM](Function &F) -> AssumptionCache * {
1537 return FAM.getCachedResult<AssumptionAnalysis>(F);
1538 };
1539
1540 auto GetBFI = [&FAM](Function &F) -> BlockFrequencyInfo & {
1541 return FAM.getResult<BlockFrequencyAnalysis>(F);
1542 };
1543
1544 auto GetTTI = [&FAM](Function &F) -> TargetTransformInfo & {
1545 return FAM.getResult<TargetIRAnalysis>(F);
1546 };
1547
1548 auto GetTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1549 return FAM.getResult<TargetLibraryAnalysis>(F);
1550 };
1551
1552 ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
1553
1554 if (PartialInlinerImpl(GetAssumptionCache, LookupAssumptionCache, GetTTI,
1
Calling 'PartialInlinerImpl::run'
1555 GetTLI, PSI, GetBFI)
1556 .run(M))
1557 return PreservedAnalyses::none();
1558 return PreservedAnalyses::all();
1559}