Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Warning:line 883, column 48
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name PGOInstrumentation.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
1//===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements PGO instrumentation using a minimum spanning tree based
10// on the following paper:
11// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
12// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
13// Issue 3, pp 313-322
14// The idea of the algorithm based on the fact that for each node (except for
15// the entry and exit), the sum of incoming edge counts equals the sum of
16// outgoing edge counts. The count of edge on spanning tree can be derived from
17// those edges not on the spanning tree. Knuth proves this method instruments
18// the minimum number of edges.
19//
20// The minimal spanning tree here is actually a maximum weight tree -- on-tree
21// edges have higher frequencies (more likely to execute). The idea is to
22// instrument those less frequently executed edges to reduce the runtime
23// overhead of instrumented binaries.
24//
25// This file contains two passes:
26// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
27// count profile, and generates the instrumentation for indirect call
28// profiling.
29// (2) Pass PGOInstrumentationUse which reads the edge count profile and
30// annotates the branch weights. It also reads the indirect call value
31// profiling records and annotate the indirect call instructions.
32//
33// To get the precise counter information, These two passes need to invoke at
34// the same compilation point (so they see the same IR). For pass
35// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
36// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
37// the profile is opened in module level and passed to each PGOUseFunc instance.
38// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
39// in class FuncPGOInstrumentation.
40//
41// Class PGOEdge represents a CFG edge and some auxiliary information. Class
42// BBInfo contains auxiliary information for each BB. These two classes are used
43// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
44// class of PGOEdge and BBInfo, respectively. They contains extra data structure
45// used in populating profile counters.
46// The MST implementation is in Class CFGMST (CFGMST.h).
47//
48//===----------------------------------------------------------------------===//
49
50#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
51#include "CFGMST.h"
52#include "ValueProfileCollector.h"
53#include "llvm/ADT/APInt.h"
54#include "llvm/ADT/ArrayRef.h"
55#include "llvm/ADT/MapVector.h"
56#include "llvm/ADT/STLExtras.h"
57#include "llvm/ADT/SmallVector.h"
58#include "llvm/ADT/Statistic.h"
59#include "llvm/ADT/StringRef.h"
60#include "llvm/ADT/Triple.h"
61#include "llvm/ADT/Twine.h"
62#include "llvm/ADT/iterator.h"
63#include "llvm/ADT/iterator_range.h"
64#include "llvm/Analysis/BlockFrequencyInfo.h"
65#include "llvm/Analysis/BranchProbabilityInfo.h"
66#include "llvm/Analysis/CFG.h"
67#include "llvm/Analysis/EHPersonalities.h"
68#include "llvm/Analysis/LoopInfo.h"
69#include "llvm/Analysis/OptimizationRemarkEmitter.h"
70#include "llvm/Analysis/ProfileSummaryInfo.h"
71#include "llvm/IR/Attributes.h"
72#include "llvm/IR/BasicBlock.h"
73#include "llvm/IR/CFG.h"
74#include "llvm/IR/Comdat.h"
75#include "llvm/IR/Constant.h"
76#include "llvm/IR/Constants.h"
77#include "llvm/IR/DiagnosticInfo.h"
78#include "llvm/IR/Dominators.h"
79#include "llvm/IR/Function.h"
80#include "llvm/IR/GlobalAlias.h"
81#include "llvm/IR/GlobalValue.h"
82#include "llvm/IR/GlobalVariable.h"
83#include "llvm/IR/IRBuilder.h"
84#include "llvm/IR/InstVisitor.h"
85#include "llvm/IR/InstrTypes.h"
86#include "llvm/IR/Instruction.h"
87#include "llvm/IR/Instructions.h"
88#include "llvm/IR/IntrinsicInst.h"
89#include "llvm/IR/Intrinsics.h"
90#include "llvm/IR/LLVMContext.h"
91#include "llvm/IR/MDBuilder.h"
92#include "llvm/IR/Module.h"
93#include "llvm/IR/PassManager.h"
94#include "llvm/IR/ProfileSummary.h"
95#include "llvm/IR/Type.h"
96#include "llvm/IR/Value.h"
97#include "llvm/InitializePasses.h"
98#include "llvm/Pass.h"
99#include "llvm/ProfileData/InstrProf.h"
100#include "llvm/ProfileData/InstrProfReader.h"
101#include "llvm/Support/BranchProbability.h"
102#include "llvm/Support/CRC.h"
103#include "llvm/Support/Casting.h"
104#include "llvm/Support/CommandLine.h"
105#include "llvm/Support/DOTGraphTraits.h"
106#include "llvm/Support/Debug.h"
107#include "llvm/Support/Error.h"
108#include "llvm/Support/ErrorHandling.h"
109#include "llvm/Support/GraphWriter.h"
110#include "llvm/Support/raw_ostream.h"
111#include "llvm/Transforms/Instrumentation.h"
112#include "llvm/Transforms/Utils/BasicBlockUtils.h"
113#include <algorithm>
114#include <cassert>
115#include <cstdint>
116#include <memory>
117#include <numeric>
118#include <string>
119#include <unordered_map>
120#include <utility>
121#include <vector>
122
123using namespace llvm;
124using ProfileCount = Function::ProfileCount;
125using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
126
127#define DEBUG_TYPE"pgo-instrumentation" "pgo-instrumentation"
128
129STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.")static llvm::Statistic NumOfPGOInstrument = {"pgo-instrumentation"
, "NumOfPGOInstrument", "Number of edges instrumented."}
;
130STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.")static llvm::Statistic NumOfPGOSelectInsts = {"pgo-instrumentation"
, "NumOfPGOSelectInsts", "Number of select instruction instrumented."
}
;
131STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.")static llvm::Statistic NumOfPGOMemIntrinsics = {"pgo-instrumentation"
, "NumOfPGOMemIntrinsics", "Number of mem intrinsics instrumented."
}
;
132STATISTIC(NumOfPGOEdge, "Number of edges.")static llvm::Statistic NumOfPGOEdge = {"pgo-instrumentation",
"NumOfPGOEdge", "Number of edges."}
;
133STATISTIC(NumOfPGOBB, "Number of basic-blocks.")static llvm::Statistic NumOfPGOBB = {"pgo-instrumentation", "NumOfPGOBB"
, "Number of basic-blocks."}
;
134STATISTIC(NumOfPGOSplit, "Number of critical edge splits.")static llvm::Statistic NumOfPGOSplit = {"pgo-instrumentation"
, "NumOfPGOSplit", "Number of critical edge splits."}
;
135STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.")static llvm::Statistic NumOfPGOFunc = {"pgo-instrumentation",
"NumOfPGOFunc", "Number of functions having valid profile counts."
}
;
136STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.")static llvm::Statistic NumOfPGOMismatch = {"pgo-instrumentation"
, "NumOfPGOMismatch", "Number of functions having mismatch profile."
}
;
137STATISTIC(NumOfPGOMissing, "Number of functions without profile.")static llvm::Statistic NumOfPGOMissing = {"pgo-instrumentation"
, "NumOfPGOMissing", "Number of functions without profile."}
;
138STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.")static llvm::Statistic NumOfPGOICall = {"pgo-instrumentation"
, "NumOfPGOICall", "Number of indirect call value instrumentations."
}
;
139STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.")static llvm::Statistic NumOfCSPGOInstrument = {"pgo-instrumentation"
, "NumOfCSPGOInstrument", "Number of edges instrumented in CSPGO."
}
;
140STATISTIC(NumOfCSPGOSelectInsts,static llvm::Statistic NumOfCSPGOSelectInsts = {"pgo-instrumentation"
, "NumOfCSPGOSelectInsts", "Number of select instruction instrumented in CSPGO."
}
141 "Number of select instruction instrumented in CSPGO.")static llvm::Statistic NumOfCSPGOSelectInsts = {"pgo-instrumentation"
, "NumOfCSPGOSelectInsts", "Number of select instruction instrumented in CSPGO."
}
;
142STATISTIC(NumOfCSPGOMemIntrinsics,static llvm::Statistic NumOfCSPGOMemIntrinsics = {"pgo-instrumentation"
, "NumOfCSPGOMemIntrinsics", "Number of mem intrinsics instrumented in CSPGO."
}
143 "Number of mem intrinsics instrumented in CSPGO.")static llvm::Statistic NumOfCSPGOMemIntrinsics = {"pgo-instrumentation"
, "NumOfCSPGOMemIntrinsics", "Number of mem intrinsics instrumented in CSPGO."
}
;
144STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.")static llvm::Statistic NumOfCSPGOEdge = {"pgo-instrumentation"
, "NumOfCSPGOEdge", "Number of edges in CSPGO."}
;
145STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.")static llvm::Statistic NumOfCSPGOBB = {"pgo-instrumentation",
"NumOfCSPGOBB", "Number of basic-blocks in CSPGO."}
;
146STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.")static llvm::Statistic NumOfCSPGOSplit = {"pgo-instrumentation"
, "NumOfCSPGOSplit", "Number of critical edge splits in CSPGO."
}
;
147STATISTIC(NumOfCSPGOFunc,static llvm::Statistic NumOfCSPGOFunc = {"pgo-instrumentation"
, "NumOfCSPGOFunc", "Number of functions having valid profile counts in CSPGO."
}
148 "Number of functions having valid profile counts in CSPGO.")static llvm::Statistic NumOfCSPGOFunc = {"pgo-instrumentation"
, "NumOfCSPGOFunc", "Number of functions having valid profile counts in CSPGO."
}
;
149STATISTIC(NumOfCSPGOMismatch,static llvm::Statistic NumOfCSPGOMismatch = {"pgo-instrumentation"
, "NumOfCSPGOMismatch", "Number of functions having mismatch profile in CSPGO."
}
150 "Number of functions having mismatch profile in CSPGO.")static llvm::Statistic NumOfCSPGOMismatch = {"pgo-instrumentation"
, "NumOfCSPGOMismatch", "Number of functions having mismatch profile in CSPGO."
}
;
151STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.")static llvm::Statistic NumOfCSPGOMissing = {"pgo-instrumentation"
, "NumOfCSPGOMissing", "Number of functions without profile in CSPGO."
}
;
152
153// Command line option to specify the file to read profile from. This is
154// mainly used for testing.
155static cl::opt<std::string>
156 PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
157 cl::value_desc("filename"),
158 cl::desc("Specify the path of profile data file. This is"
159 "mainly for test purpose."));
160static cl::opt<std::string> PGOTestProfileRemappingFile(
161 "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
162 cl::value_desc("filename"),
163 cl::desc("Specify the path of profile remapping file. This is mainly for "
164 "test purpose."));
165
166// Command line option to disable value profiling. The default is false:
167// i.e. value profiling is enabled by default. This is for debug purpose.
168static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
169 cl::Hidden,
170 cl::desc("Disable Value Profiling"));
171
172// Command line option to set the maximum number of VP annotations to write to
173// the metadata for a single indirect call callsite.
174static cl::opt<unsigned> MaxNumAnnotations(
175 "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
176 cl::desc("Max number of annotations for a single indirect "
177 "call callsite"));
178
179// Command line option to set the maximum number of value annotations
180// to write to the metadata for a single memop intrinsic.
181static cl::opt<unsigned> MaxNumMemOPAnnotations(
182 "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
183 cl::desc("Max number of preicise value annotations for a single memop"
184 "intrinsic"));
185
186// Command line option to control appending FunctionHash to the name of a COMDAT
187// function. This is to avoid the hash mismatch caused by the preinliner.
188static cl::opt<bool> DoComdatRenaming(
189 "do-comdat-renaming", cl::init(false), cl::Hidden,
190 cl::desc("Append function hash to the name of COMDAT function to avoid "
191 "function hash mismatch due to the preinliner"));
192
193// Command line option to enable/disable the warning about missing profile
194// information.
195static cl::opt<bool>
196 PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
197 cl::desc("Use this option to turn on/off "
198 "warnings about missing profile data for "
199 "functions."));
200
201// Command line option to enable/disable the warning about a hash mismatch in
202// the profile data.
203static cl::opt<bool>
204 NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
205 cl::desc("Use this option to turn off/on "
206 "warnings about profile cfg mismatch."));
207
208// Command line option to enable/disable the warning about a hash mismatch in
209// the profile data for Comdat functions, which often turns out to be false
210// positive due to the pre-instrumentation inline.
211static cl::opt<bool>
212 NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
213 cl::Hidden,
214 cl::desc("The option is used to turn on/off "
215 "warnings about hash mismatch for comdat "
216 "functions."));
217
218// Command line option to enable/disable select instruction instrumentation.
219static cl::opt<bool>
220 PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
221 cl::desc("Use this option to turn on/off SELECT "
222 "instruction instrumentation. "));
223
224// Command line option to turn on CFG dot or text dump of raw profile counts
225static cl::opt<PGOViewCountsType> PGOViewRawCounts(
226 "pgo-view-raw-counts", cl::Hidden,
227 cl::desc("A boolean option to show CFG dag or text "
228 "with raw profile counts from "
229 "profile data. See also option "
230 "-pgo-view-counts. To limit graph "
231 "display to only one function, use "
232 "filtering option -view-bfi-func-name."),
233 cl::values(clEnumValN(PGOVCT_None, "none", "do not show.")llvm::cl::OptionEnumValue { "none", int(PGOVCT_None), "do not show."
}
,
234 clEnumValN(PGOVCT_Graph, "graph", "show a graph.")llvm::cl::OptionEnumValue { "graph", int(PGOVCT_Graph), "show a graph."
}
,
235 clEnumValN(PGOVCT_Text, "text", "show in text.")llvm::cl::OptionEnumValue { "text", int(PGOVCT_Text), "show in text."
}
));
236
237// Command line option to enable/disable memop intrinsic call.size profiling.
238static cl::opt<bool>
239 PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
240 cl::desc("Use this option to turn on/off "
241 "memory intrinsic size profiling."));
242
243// Emit branch probability as optimization remarks.
244static cl::opt<bool>
245 EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
246 cl::desc("When this option is on, the annotated "
247 "branch probability will be emitted as "
248 "optimization remarks: -{Rpass|"
249 "pass-remarks}=pgo-instrumentation"));
250
251static cl::opt<bool> PGOInstrumentEntry(
252 "pgo-instrument-entry", cl::init(false), cl::Hidden,
253 cl::desc("Force to instrument function entry basicblock."));
254
255static cl::opt<bool>
256 PGOFixEntryCount("pgo-fix-entry-count", cl::init(true), cl::Hidden,
257 cl::desc("Fix function entry count in profile use."));
258
259static cl::opt<bool> PGOVerifyHotBFI(
260 "pgo-verify-hot-bfi", cl::init(false), cl::Hidden,
261 cl::desc("Print out the non-match BFI count if a hot raw profile count "
262 "becomes non-hot, or a cold raw profile count becomes hot. "
263 "The print is enabled under -Rpass-analysis=pgo, or "
264 "internal option -pass-remakrs-analysis=pgo."));
265
266static cl::opt<bool> PGOVerifyBFI(
267 "pgo-verify-bfi", cl::init(false), cl::Hidden,
268 cl::desc("Print out mismatched BFI counts after setting profile metadata "
269 "The print is enabled under -Rpass-analysis=pgo, or "
270 "internal option -pass-remakrs-analysis=pgo."));
271
272static cl::opt<unsigned> PGOVerifyBFIRatio(
273 "pgo-verify-bfi-ratio", cl::init(5), cl::Hidden,
274 cl::desc("Set the threshold for pgo-verify-big -- only print out "
275 "mismatched BFI if the difference percentage is greater than "
276 "this value (in percentage)."));
277
278static cl::opt<unsigned> PGOVerifyBFICutoff(
279 "pgo-verify-bfi-cutoff", cl::init(1), cl::Hidden,
280 cl::desc("Set the threshold for pgo-verify-bfi -- skip the counts whose "
281 "profile count value is below."));
282
283namespace llvm {
284// Command line option to turn on CFG dot dump after profile annotation.
285// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
286extern cl::opt<PGOViewCountsType> PGOViewCounts;
287
288// Command line option to specify the name of the function for CFG dump
289// Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
290extern cl::opt<std::string> ViewBlockFreqFuncName;
291} // namespace llvm
292
293static cl::opt<bool>
294 PGOOldCFGHashing("pgo-instr-old-cfg-hashing", cl::init(false), cl::Hidden,
295 cl::desc("Use the old CFG function hashing"));
296
297// Return a string describing the branch condition that can be
298// used in static branch probability heuristics:
299static std::string getBranchCondString(Instruction *TI) {
300 BranchInst *BI = dyn_cast<BranchInst>(TI);
301 if (!BI || !BI->isConditional())
302 return std::string();
303
304 Value *Cond = BI->getCondition();
305 ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
306 if (!CI)
307 return std::string();
308
309 std::string result;
310 raw_string_ostream OS(result);
311 OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
312 CI->getOperand(0)->getType()->print(OS, true);
313
314 Value *RHS = CI->getOperand(1);
315 ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
316 if (CV) {
317 if (CV->isZero())
318 OS << "_Zero";
319 else if (CV->isOne())
320 OS << "_One";
321 else if (CV->isMinusOne())
322 OS << "_MinusOne";
323 else
324 OS << "_Const";
325 }
326 OS.flush();
327 return result;
328}
329
330static const char *ValueProfKindDescr[] = {
331#define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
332#include "llvm/ProfileData/InstrProfData.inc"
333};
334
335namespace {
336
337/// The select instruction visitor plays three roles specified
338/// by the mode. In \c VM_counting mode, it simply counts the number of
339/// select instructions. In \c VM_instrument mode, it inserts code to count
340/// the number times TrueValue of select is taken. In \c VM_annotate mode,
341/// it reads the profile data and annotate the select instruction with metadata.
342enum VisitMode { VM_counting, VM_instrument, VM_annotate };
343class PGOUseFunc;
344
345/// Instruction Visitor class to visit select instructions.
346struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
347 Function &F;
348 unsigned NSIs = 0; // Number of select instructions instrumented.
349 VisitMode Mode = VM_counting; // Visiting mode.
350 unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
351 unsigned TotalNumCtrs = 0; // Total number of counters
352 GlobalVariable *FuncNameVar = nullptr;
353 uint64_t FuncHash = 0;
354 PGOUseFunc *UseFunc = nullptr;
355
356 SelectInstVisitor(Function &Func) : F(Func) {}
357
358 void countSelects(Function &Func) {
359 NSIs = 0;
360 Mode = VM_counting;
361 visit(Func);
362 }
363
364 // Visit the IR stream and instrument all select instructions. \p
365 // Ind is a pointer to the counter index variable; \p TotalNC
366 // is the total number of counters; \p FNV is the pointer to the
367 // PGO function name var; \p FHash is the function hash.
368 void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
369 GlobalVariable *FNV, uint64_t FHash) {
370 Mode = VM_instrument;
371 CurCtrIdx = Ind;
372 TotalNumCtrs = TotalNC;
373 FuncHash = FHash;
374 FuncNameVar = FNV;
375 visit(Func);
376 }
377
378 // Visit the IR stream and annotate all select instructions.
379 void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
380 Mode = VM_annotate;
381 UseFunc = UF;
382 CurCtrIdx = Ind;
383 visit(Func);
384 }
385
386 void instrumentOneSelectInst(SelectInst &SI);
387 void annotateOneSelectInst(SelectInst &SI);
388
389 // Visit \p SI instruction and perform tasks according to visit mode.
390 void visitSelectInst(SelectInst &SI);
391
392 // Return the number of select instructions. This needs be called after
393 // countSelects().
394 unsigned getNumOfSelectInsts() const { return NSIs; }
395};
396
397
398class PGOInstrumentationGenLegacyPass : public ModulePass {
399public:
400 static char ID;
401
402 PGOInstrumentationGenLegacyPass(bool IsCS = false)
403 : ModulePass(ID), IsCS(IsCS) {
404 initializePGOInstrumentationGenLegacyPassPass(
405 *PassRegistry::getPassRegistry());
406 }
407
408 StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
409
410private:
411 // Is this is context-sensitive instrumentation.
412 bool IsCS;
413 bool runOnModule(Module &M) override;
414
415 void getAnalysisUsage(AnalysisUsage &AU) const override {
416 AU.addRequired<BlockFrequencyInfoWrapperPass>();
417 AU.addRequired<TargetLibraryInfoWrapperPass>();
418 }
419};
420
421class PGOInstrumentationUseLegacyPass : public ModulePass {
422public:
423 static char ID;
424
425 // Provide the profile filename as the parameter.
426 PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
427 : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
428 if (!PGOTestProfileFile.empty())
429 ProfileFileName = PGOTestProfileFile;
430 initializePGOInstrumentationUseLegacyPassPass(
431 *PassRegistry::getPassRegistry());
432 }
433
434 StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
435
436private:
437 std::string ProfileFileName;
438 // Is this is context-sensitive instrumentation use.
439 bool IsCS;
440
441 bool runOnModule(Module &M) override;
442
443 void getAnalysisUsage(AnalysisUsage &AU) const override {
444 AU.addRequired<ProfileSummaryInfoWrapperPass>();
445 AU.addRequired<BlockFrequencyInfoWrapperPass>();
446 AU.addRequired<TargetLibraryInfoWrapperPass>();
447 }
448};
449
450class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
451public:
452 static char ID;
453 StringRef getPassName() const override {
454 return "PGOInstrumentationGenCreateVarPass";
455 }
456 PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
457 : ModulePass(ID), InstrProfileOutput(CSInstrName) {
458 initializePGOInstrumentationGenCreateVarLegacyPassPass(
459 *PassRegistry::getPassRegistry());
460 }
461
462private:
463 bool runOnModule(Module &M) override {
464 createProfileFileNameVar(M, InstrProfileOutput);
465 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
466 return false;
467 }
468 std::string InstrProfileOutput;
469};
470
471} // end anonymous namespace
472
473char PGOInstrumentationGenLegacyPass::ID = 0;
474
475INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",static void *initializePGOInstrumentationGenLegacyPassPassOnce
(PassRegistry &Registry) {
476 "PGO instrumentation.", false, false)static void *initializePGOInstrumentationGenLegacyPassPassOnce
(PassRegistry &Registry) {
477INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry);
478INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)initializeBranchProbabilityInfoWrapperPassPass(Registry);
479INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)initializeTargetLibraryInfoWrapperPassPass(Registry);
480INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",PassInfo *PI = new PassInfo( "PGO instrumentation.", "pgo-instr-gen"
, &PGOInstrumentationGenLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<PGOInstrumentationGenLegacyPass>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializePGOInstrumentationGenLegacyPassPassFlag
; void llvm::initializePGOInstrumentationGenLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializePGOInstrumentationGenLegacyPassPassFlag
, initializePGOInstrumentationGenLegacyPassPassOnce, std::ref
(Registry)); }
481 "PGO instrumentation.", false, false)PassInfo *PI = new PassInfo( "PGO instrumentation.", "pgo-instr-gen"
, &PGOInstrumentationGenLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<PGOInstrumentationGenLegacyPass>), false
, false); Registry.registerPass(*PI, true); return PI; } static
llvm::once_flag InitializePGOInstrumentationGenLegacyPassPassFlag
; void llvm::initializePGOInstrumentationGenLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializePGOInstrumentationGenLegacyPassPassFlag
, initializePGOInstrumentationGenLegacyPassPassOnce, std::ref
(Registry)); }
482
483ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
484 return new PGOInstrumentationGenLegacyPass(IsCS);
485}
486
487char PGOInstrumentationUseLegacyPass::ID = 0;
488
489INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",static void *initializePGOInstrumentationUseLegacyPassPassOnce
(PassRegistry &Registry) {
490 "Read PGO instrumentation profile.", false, false)static void *initializePGOInstrumentationUseLegacyPassPassOnce
(PassRegistry &Registry) {
491INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)initializeBlockFrequencyInfoWrapperPassPass(Registry);
492INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)initializeBranchProbabilityInfoWrapperPassPass(Registry);
493INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)initializeProfileSummaryInfoWrapperPassPass(Registry);
494INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",PassInfo *PI = new PassInfo( "Read PGO instrumentation profile."
, "pgo-instr-use", &PGOInstrumentationUseLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<PGOInstrumentationUseLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializePGOInstrumentationUseLegacyPassPassFlag
; void llvm::initializePGOInstrumentationUseLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializePGOInstrumentationUseLegacyPassPassFlag
, initializePGOInstrumentationUseLegacyPassPassOnce, std::ref
(Registry)); }
495 "Read PGO instrumentation profile.", false, false)PassInfo *PI = new PassInfo( "Read PGO instrumentation profile."
, "pgo-instr-use", &PGOInstrumentationUseLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<PGOInstrumentationUseLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializePGOInstrumentationUseLegacyPassPassFlag
; void llvm::initializePGOInstrumentationUseLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializePGOInstrumentationUseLegacyPassPassFlag
, initializePGOInstrumentationUseLegacyPassPassOnce, std::ref
(Registry)); }
496
497ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
498 bool IsCS) {
499 return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
500}
501
502char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
503
504INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,static void *initializePGOInstrumentationGenCreateVarLegacyPassPassOnce
(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Create PGO instrumentation version variable for CSPGO."
, "pgo-instr-gen-create-var", &PGOInstrumentationGenCreateVarLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<PGOInstrumentationGenCreateVarLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
; void llvm::initializePGOInstrumentationGenCreateVarLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
, initializePGOInstrumentationGenCreateVarLegacyPassPassOnce,
std::ref(Registry)); }
505 "pgo-instr-gen-create-var",static void *initializePGOInstrumentationGenCreateVarLegacyPassPassOnce
(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Create PGO instrumentation version variable for CSPGO."
, "pgo-instr-gen-create-var", &PGOInstrumentationGenCreateVarLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<PGOInstrumentationGenCreateVarLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
; void llvm::initializePGOInstrumentationGenCreateVarLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
, initializePGOInstrumentationGenCreateVarLegacyPassPassOnce,
std::ref(Registry)); }
506 "Create PGO instrumentation version variable for CSPGO.", false,static void *initializePGOInstrumentationGenCreateVarLegacyPassPassOnce
(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Create PGO instrumentation version variable for CSPGO."
, "pgo-instr-gen-create-var", &PGOInstrumentationGenCreateVarLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<PGOInstrumentationGenCreateVarLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
; void llvm::initializePGOInstrumentationGenCreateVarLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
, initializePGOInstrumentationGenCreateVarLegacyPassPassOnce,
std::ref(Registry)); }
507 false)static void *initializePGOInstrumentationGenCreateVarLegacyPassPassOnce
(PassRegistry &Registry) { PassInfo *PI = new PassInfo( "Create PGO instrumentation version variable for CSPGO."
, "pgo-instr-gen-create-var", &PGOInstrumentationGenCreateVarLegacyPass
::ID, PassInfo::NormalCtor_t(callDefaultCtor<PGOInstrumentationGenCreateVarLegacyPass
>), false, false); Registry.registerPass(*PI, true); return
PI; } static llvm::once_flag InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
; void llvm::initializePGOInstrumentationGenCreateVarLegacyPassPass
(PassRegistry &Registry) { llvm::call_once(InitializePGOInstrumentationGenCreateVarLegacyPassPassFlag
, initializePGOInstrumentationGenCreateVarLegacyPassPassOnce,
std::ref(Registry)); }
508
509ModulePass *
510llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
511 return new PGOInstrumentationGenCreateVarLegacyPass(std::string(CSInstrName));
512}
513
514namespace {
515
516/// An MST based instrumentation for PGO
517///
518/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
519/// in the function level.
520struct PGOEdge {
521 // This class implements the CFG edges. Note the CFG can be a multi-graph.
522 // So there might be multiple edges with same SrcBB and DestBB.
523 const BasicBlock *SrcBB;
524 const BasicBlock *DestBB;
525 uint64_t Weight;
526 bool InMST = false;
527 bool Removed = false;
528 bool IsCritical = false;
529
530 PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
531 : SrcBB(Src), DestBB(Dest), Weight(W) {}
532
533 // Return the information string of an edge.
534 std::string infoString() const {
535 return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
536 (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
537 }
538};
539
540// This class stores the auxiliary information for each BB.
541struct BBInfo {
542 BBInfo *Group;
543 uint32_t Index;
544 uint32_t Rank = 0;
545
546 BBInfo(unsigned IX) : Group(this), Index(IX) {}
547
548 // Return the information string of this object.
549 std::string infoString() const {
550 return (Twine("Index=") + Twine(Index)).str();
551 }
552
553 // Empty function -- only applicable to UseBBInfo.
554 void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__))) {}
555
556 // Empty function -- only applicable to UseBBInfo.
557 void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED__attribute__((__unused__))) {}
558};
559
560// This class implements the CFG edges. Note the CFG can be a multi-graph.
561template <class Edge, class BBInfo> class FuncPGOInstrumentation {
562private:
563 Function &F;
564
565 // Is this is context-sensitive instrumentation.
566 bool IsCS;
567
568 // A map that stores the Comdat group in function F.
569 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
570
571 ValueProfileCollector VPC;
572
573 void computeCFGHash();
574 void renameComdatFunction();
575
576public:
577 std::vector<std::vector<VPCandidateInfo>> ValueSites;
578 SelectInstVisitor SIVisitor;
579 std::string FuncName;
580 GlobalVariable *FuncNameVar;
581
582 // CFG hash value for this function.
583 uint64_t FunctionHash = 0;
584
585 // The Minimum Spanning Tree of function CFG.
586 CFGMST<Edge, BBInfo> MST;
587
588 // Collect all the BBs that will be instrumented, and store them in
589 // InstrumentBBs.
590 void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
591
592 // Give an edge, find the BB that will be instrumented.
593 // Return nullptr if there is no BB to be instrumented.
594 BasicBlock *getInstrBB(Edge *E);
595
596 // Return the auxiliary BB information.
597 BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
598
599 // Return the auxiliary BB information if available.
600 BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
601
602 // Dump edges and BB information.
603 void dumpInfo(std::string Str = "") const {
604 MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
605 Twine(FunctionHash) + "\t" + Str);
606 }
607
608 FuncPGOInstrumentation(
609 Function &Func, TargetLibraryInfo &TLI,
610 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
611 bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
612 BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
613 bool InstrumentFuncEntry = true)
614 : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
615 ValueSites(IPVK_Last + 1), SIVisitor(Func),
616 MST(F, InstrumentFuncEntry, BPI, BFI) {
617 // This should be done before CFG hash computation.
618 SIVisitor.countSelects(Func);
619 ValueSites[IPVK_MemOPSize] = VPC.get(IPVK_MemOPSize);
620 if (!IsCS) {
621 NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
622 NumOfPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
623 NumOfPGOBB += MST.BBInfos.size();
624 ValueSites[IPVK_IndirectCallTarget] = VPC.get(IPVK_IndirectCallTarget);
625 } else {
626 NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
627 NumOfCSPGOMemIntrinsics += ValueSites[IPVK_MemOPSize].size();
628 NumOfCSPGOBB += MST.BBInfos.size();
629 }
630
631 FuncName = getPGOFuncName(F);
632 computeCFGHash();
633 if (!ComdatMembers.empty())
634 renameComdatFunction();
635 LLVM_DEBUG(dumpInfo("after CFGMST"))do { } while (false);
636
637 for (auto &E : MST.AllEdges) {
638 if (E->Removed)
639 continue;
640 IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
641 if (!E->InMST)
642 IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
643 }
644
645 if (CreateGlobalVar)
646 FuncNameVar = createPGOFuncNameVar(F, FuncName);
647 }
648};
649
650} // end anonymous namespace
651
652// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
653// value of each BB in the CFG. The higher 32 bits are the CRC32 of the numbers
654// of selects, indirect calls, mem ops and edges.
655template <class Edge, class BBInfo>
656void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
657 std::vector<uint8_t> Indexes;
658 JamCRC JC;
659 for (auto &BB : F) {
660 const Instruction *TI = BB.getTerminator();
661 for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
662 BasicBlock *Succ = TI->getSuccessor(I);
663 auto BI = findBBInfo(Succ);
664 if (BI == nullptr)
665 continue;
666 uint32_t Index = BI->Index;
667 for (int J = 0; J < 4; J++)
668 Indexes.push_back((uint8_t)(Index >> (J * 8)));
669 }
670 }
671 JC.update(Indexes);
672
673 JamCRC JCH;
674 if (PGOOldCFGHashing) {
675 // Hash format for context sensitive profile. Reserve 4 bits for other
676 // information.
677 FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
678 (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
679 //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
680 (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
681 } else {
682 // The higher 32 bits.
683 auto updateJCH = [&JCH](uint64_t Num) {
684 uint8_t Data[8];
685 support::endian::write64le(Data, Num);
686 JCH.update(Data);
687 };
688 updateJCH((uint64_t)SIVisitor.getNumOfSelectInsts());
689 updateJCH((uint64_t)ValueSites[IPVK_IndirectCallTarget].size());
690 updateJCH((uint64_t)ValueSites[IPVK_MemOPSize].size());
691 updateJCH((uint64_t)MST.AllEdges.size());
692
693 // Hash format for context sensitive profile. Reserve 4 bits for other
694 // information.
695 FunctionHash = (((uint64_t)JCH.getCRC()) << 28) + JC.getCRC();
696 }
697
698 // Reserve bit 60-63 for other information purpose.
699 FunctionHash &= 0x0FFFFFFFFFFFFFFF;
700 if (IsCS)
701 NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
702 LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"do { } while (false)
703 << " CRC = " << JC.getCRC()do { } while (false)
704 << ", Selects = " << SIVisitor.getNumOfSelectInsts()do { } while (false)
705 << ", Edges = " << MST.AllEdges.size() << ", ICSites = "do { } while (false)
706 << ValueSites[IPVK_IndirectCallTarget].size())do { } while (false);
707 if (!PGOOldCFGHashing) {
708 LLVM_DEBUG(dbgs() << ", Memops = " << ValueSites[IPVK_MemOPSize].size()do { } while (false)
709 << ", High32 CRC = " << JCH.getCRC())do { } while (false);
710 }
711 LLVM_DEBUG(dbgs() << ", Hash = " << FunctionHash << "\n";)do { } while (false);
712}
713
714// Check if we can safely rename this Comdat function.
715static bool canRenameComdat(
716 Function &F,
717 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
718 if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
719 return false;
720
721 // FIXME: Current only handle those Comdat groups that only containing one
722 // function.
723 // (1) For a Comdat group containing multiple functions, we need to have a
724 // unique postfix based on the hashes for each function. There is a
725 // non-trivial code refactoring to do this efficiently.
726 // (2) Variables can not be renamed, so we can not rename Comdat function in a
727 // group including global vars.
728 Comdat *C = F.getComdat();
729 for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
730 assert(!isa<GlobalAlias>(CM.second))((void)0);
731 Function *FM = dyn_cast<Function>(CM.second);
732 if (FM != &F)
733 return false;
734 }
735 return true;
736}
737
738// Append the CFGHash to the Comdat function name.
739template <class Edge, class BBInfo>
740void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
741 if (!canRenameComdat(F, ComdatMembers))
742 return;
743 std::string OrigName = F.getName().str();
744 std::string NewFuncName =
745 Twine(F.getName() + "." + Twine(FunctionHash)).str();
746 F.setName(Twine(NewFuncName));
747 GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
748 FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
749 Comdat *NewComdat;
750 Module *M = F.getParent();
751 // For AvailableExternallyLinkage functions, change the linkage to
752 // LinkOnceODR and put them into comdat. This is because after renaming, there
753 // is no backup external copy available for the function.
754 if (!F.hasComdat()) {
755 assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage)((void)0);
756 NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
757 F.setLinkage(GlobalValue::LinkOnceODRLinkage);
758 F.setComdat(NewComdat);
759 return;
760 }
761
762 // This function belongs to a single function Comdat group.
763 Comdat *OrigComdat = F.getComdat();
764 std::string NewComdatName =
765 Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
766 NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
767 NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
768
769 for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
770 // Must be a function.
771 cast<Function>(CM.second)->setComdat(NewComdat);
772 }
773}
774
775// Collect all the BBs that will be instruments and return them in
776// InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
777template <class Edge, class BBInfo>
778void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
779 std::vector<BasicBlock *> &InstrumentBBs) {
780 // Use a worklist as we will update the vector during the iteration.
781 std::vector<Edge *> EdgeList;
782 EdgeList.reserve(MST.AllEdges.size());
783 for (auto &E : MST.AllEdges)
784 EdgeList.push_back(E.get());
785
786 for (auto &E : EdgeList) {
787 BasicBlock *InstrBB = getInstrBB(E);
788 if (InstrBB)
789 InstrumentBBs.push_back(InstrBB);
790 }
791
792 // Set up InEdges/OutEdges for all BBs.
793 for (auto &E : MST.AllEdges) {
794 if (E->Removed)
795 continue;
796 const BasicBlock *SrcBB = E->SrcBB;
797 const BasicBlock *DestBB = E->DestBB;
798 BBInfo &SrcInfo = getBBInfo(SrcBB);
799 BBInfo &DestInfo = getBBInfo(DestBB);
800 SrcInfo.addOutEdge(E.get());
801 DestInfo.addInEdge(E.get());
802 }
803}
804
805// Given a CFG E to be instrumented, find which BB to place the instrumented
806// code. The function will split the critical edge if necessary.
807template <class Edge, class BBInfo>
808BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
809 if (E->InMST || E->Removed)
810 return nullptr;
811
812 BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
813 BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
814 // For a fake edge, instrument the real BB.
815 if (SrcBB == nullptr)
816 return DestBB;
817 if (DestBB == nullptr)
818 return SrcBB;
819
820 auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
821 // There are basic blocks (such as catchswitch) cannot be instrumented.
822 // If the returned first insertion point is the end of BB, skip this BB.
823 if (BB->getFirstInsertionPt() == BB->end())
824 return nullptr;
825 return BB;
826 };
827
828 // Instrument the SrcBB if it has a single successor,
829 // otherwise, the DestBB if this is not a critical edge.
830 Instruction *TI = SrcBB->getTerminator();
831 if (TI->getNumSuccessors() <= 1)
832 return canInstrument(SrcBB);
833 if (!E->IsCritical)
834 return canInstrument(DestBB);
835
836 // Some IndirectBr critical edges cannot be split by the previous
837 // SplitIndirectBrCriticalEdges call. Bail out.
838 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
839 BasicBlock *InstrBB =
840 isa<IndirectBrInst>(TI) ? nullptr : SplitCriticalEdge(TI, SuccNum);
841 if (!InstrBB) {
842 LLVM_DEBUG(do { } while (false)
843 dbgs() << "Fail to split critical edge: not instrument this edge.\n")do { } while (false);
844 return nullptr;
845 }
846 // For a critical edge, we have to split. Instrument the newly
847 // created BB.
848 IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
849 LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Indexdo { } while (false)
850 << " --> " << getBBInfo(DestBB).Index << "\n")do { } while (false);
851 // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
852 MST.addEdge(SrcBB, InstrBB, 0);
853 // Second one: Add new edge of InstrBB->DestBB.
854 Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
855 NewEdge1.InMST = true;
856 E->Removed = true;
857
858 return canInstrument(InstrBB);
859}
860
861// When generating value profiling calls on Windows routines that make use of
862// handler funclets for exception processing an operand bundle needs to attached
863// to the called function. This routine will set \p OpBundles to contain the
864// funclet information, if any is needed, that should be placed on the generated
865// value profiling call for the value profile candidate call.
866static void
867populateEHOperandBundle(VPCandidateInfo &Cand,
868 DenseMap<BasicBlock *, ColorVector> &BlockColors,
869 SmallVectorImpl<OperandBundleDef> &OpBundles) {
870 auto *OrigCall = dyn_cast<CallBase>(Cand.AnnotatedInst);
17
Assuming field 'AnnotatedInst' is not a 'CallBase'
18
'OrigCall' initialized to a null pointer value
871 if (OrigCall
18.1
'OrigCall' is null
&& !isa<IntrinsicInst>(OrigCall)) {
872 // The instrumentation call should belong to the same funclet as a
873 // non-intrinsic call, so just copy the operand bundle, if any exists.
874 Optional<OperandBundleUse> ParentFunclet =
875 OrigCall->getOperandBundle(LLVMContext::OB_funclet);
876 if (ParentFunclet)
877 OpBundles.emplace_back(OperandBundleDef(*ParentFunclet));
878 } else {
879 // Intrinsics or other instructions do not get funclet information from the
880 // front-end. Need to use the BlockColors that was computed by the routine
881 // colorEHFunclets to determine whether a funclet is needed.
882 if (!BlockColors.empty()) {
19
Assuming the condition is true
20
Taking true branch
883 const ColorVector &CV = BlockColors.find(OrigCall->getParent())->second;
21
Called C++ object pointer is null
884 assert(CV.size() == 1 && "non-unique color for block!")((void)0);
885 Instruction *EHPad = CV.front()->getFirstNonPHI();
886 if (EHPad->isEHPad())
887 OpBundles.emplace_back("funclet", EHPad);
888 }
889 }
890}
891
892// Visit all edge and instrument the edges not in MST, and do value profiling.
893// Critical edges will be split.
894static void instrumentOneFunc(
895 Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
896 BlockFrequencyInfo *BFI,
897 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
898 bool IsCS) {
899 // Split indirectbr critical edges here before computing the MST rather than
900 // later in getInstrBB() to avoid invalidating it.
901 SplitIndirectBrCriticalEdges(F, BPI, BFI);
902
903 FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(
904 F, TLI, ComdatMembers, true, BPI, BFI, IsCS, PGOInstrumentEntry);
905 std::vector<BasicBlock *> InstrumentBBs;
906 FuncInfo.getInstrumentBBs(InstrumentBBs);
907 unsigned NumCounters =
908 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
909
910 uint32_t I = 0;
911 Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
912 for (auto *InstrBB : InstrumentBBs) {
913 IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
914 assert(Builder.GetInsertPoint() != InstrBB->end() &&((void)0)
915 "Cannot get the Instrumentation point")((void)0);
916 Builder.CreateCall(
917 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
918 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
919 Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
920 Builder.getInt32(I++)});
921 }
922
923 // Now instrument select instructions:
924 FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
925 FuncInfo.FunctionHash);
926 assert(I == NumCounters)((void)0);
927
928 if (DisableValueProfiling)
9
Assuming the condition is false
10
Taking false branch
929 return;
930
931 NumOfPGOICall += FuncInfo.ValueSites[IPVK_IndirectCallTarget].size();
932
933 // Intrinsic function calls do not have funclet operand bundles needed for
934 // Windows exception handling attached to them. However, if value profiling is
935 // inserted for one of these calls, then a funclet value will need to be set
936 // on the instrumentation call based on the funclet coloring.
937 DenseMap<BasicBlock *, ColorVector> BlockColors;
938 if (F.hasPersonalityFn() &&
11
Assuming the condition is false
939 isFuncletEHPersonality(classifyEHPersonality(F.getPersonalityFn())))
940 BlockColors = colorEHFunclets(F);
941
942 // For each VP Kind, walk the VP candidates and instrument each one.
943 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) {
12
Loop condition is true. Entering loop body
944 unsigned SiteIndex = 0;
945 if (Kind
12.1
'Kind' is not equal to IPVK_MemOPSize
== IPVK_MemOPSize && !PGOInstrMemOP)
946 continue;
947
948 for (VPCandidateInfo Cand : FuncInfo.ValueSites[Kind]) {
949 LLVM_DEBUG(dbgs() << "Instrument one VP " << ValueProfKindDescr[Kind]do { } while (false)
13
Loop condition is false. Exiting loop
950 << " site: CallSite Index = " << SiteIndex << "\n")do { } while (false);
951
952 IRBuilder<> Builder(Cand.InsertPt);
953 assert(Builder.GetInsertPoint() != Cand.InsertPt->getParent()->end() &&((void)0)
954 "Cannot get the Instrumentation point")((void)0);
955
956 Value *ToProfile = nullptr;
957 if (Cand.V->getType()->isIntegerTy())
14
Taking false branch
958 ToProfile = Builder.CreateZExtOrTrunc(Cand.V, Builder.getInt64Ty());
959 else if (Cand.V->getType()->isPointerTy())
15
Taking false branch
960 ToProfile = Builder.CreatePtrToInt(Cand.V, Builder.getInt64Ty());
961 assert(ToProfile && "value profiling Value is of unexpected type")((void)0);
962
963 SmallVector<OperandBundleDef, 1> OpBundles;
964 populateEHOperandBundle(Cand, BlockColors, OpBundles);
16
Calling 'populateEHOperandBundle'
965 Builder.CreateCall(
966 Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
967 {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
968 Builder.getInt64(FuncInfo.FunctionHash), ToProfile,
969 Builder.getInt32(Kind), Builder.getInt32(SiteIndex++)},
970 OpBundles);
971 }
972 } // IPVK_First <= Kind <= IPVK_Last
973}
974
975namespace {
976
977// This class represents a CFG edge in profile use compilation.
978struct PGOUseEdge : public PGOEdge {
979 bool CountValid = false;
980 uint64_t CountValue = 0;
981
982 PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
983 : PGOEdge(Src, Dest, W) {}
984
985 // Set edge count value
986 void setEdgeCount(uint64_t Value) {
987 CountValue = Value;
988 CountValid = true;
989 }
990
991 // Return the information string for this object.
992 std::string infoString() const {
993 if (!CountValid)
994 return PGOEdge::infoString();
995 return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
996 .str();
997 }
998};
999
1000using DirectEdges = SmallVector<PGOUseEdge *, 2>;
1001
1002// This class stores the auxiliary information for each BB.
1003struct UseBBInfo : public BBInfo {
1004 uint64_t CountValue = 0;
1005 bool CountValid;
1006 int32_t UnknownCountInEdge = 0;
1007 int32_t UnknownCountOutEdge = 0;
1008 DirectEdges InEdges;
1009 DirectEdges OutEdges;
1010
1011 UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
1012
1013 UseBBInfo(unsigned IX, uint64_t C)
1014 : BBInfo(IX), CountValue(C), CountValid(true) {}
1015
1016 // Set the profile count value for this BB.
1017 void setBBInfoCount(uint64_t Value) {
1018 CountValue = Value;
1019 CountValid = true;
1020 }
1021
1022 // Return the information string of this object.
1023 std::string infoString() const {
1024 if (!CountValid)
1025 return BBInfo::infoString();
1026 return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
1027 }
1028
1029 // Add an OutEdge and update the edge count.
1030 void addOutEdge(PGOUseEdge *E) {
1031 OutEdges.push_back(E);
1032 UnknownCountOutEdge++;
1033 }
1034
1035 // Add an InEdge and update the edge count.
1036 void addInEdge(PGOUseEdge *E) {
1037 InEdges.push_back(E);
1038 UnknownCountInEdge++;
1039 }
1040};
1041
1042} // end anonymous namespace
1043
1044// Sum up the count values for all the edges.
1045static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
1046 uint64_t Total = 0;
1047 for (auto &E : Edges) {
1048 if (E->Removed)
1049 continue;
1050 Total += E->CountValue;
1051 }
1052 return Total;
1053}
1054
1055namespace {
1056
1057class PGOUseFunc {
1058public:
1059 PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
1060 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
1061 BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
1062 ProfileSummaryInfo *PSI, bool IsCS, bool InstrumentFuncEntry)
1063 : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
1064 FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS,
1065 InstrumentFuncEntry),
1066 FreqAttr(FFA_Normal), IsCS(IsCS) {}
1067
1068 // Read counts for the instrumented BB from profile.
1069 bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1070 bool &AllMinusOnes);
1071
1072 // Populate the counts for all BBs.
1073 void populateCounters();
1074
1075 // Set the branch weights based on the count values.
1076 void setBranchWeights();
1077
1078 // Annotate the value profile call sites for all value kind.
1079 void annotateValueSites();
1080
1081 // Annotate the value profile call sites for one value kind.
1082 void annotateValueSites(uint32_t Kind);
1083
1084 // Annotate the irreducible loop header weights.
1085 void annotateIrrLoopHeaderWeights();
1086
1087 // The hotness of the function from the profile count.
1088 enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
1089
1090 // Return the function hotness from the profile.
1091 FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
1092
1093 // Return the function hash.
1094 uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
1095
1096 // Return the profile record for this function;
1097 InstrProfRecord &getProfileRecord() { return ProfileRecord; }
1098
1099 // Return the auxiliary BB information.
1100 UseBBInfo &getBBInfo(const BasicBlock *BB) const {
1101 return FuncInfo.getBBInfo(BB);
1102 }
1103
1104 // Return the auxiliary BB information if available.
1105 UseBBInfo *findBBInfo(const BasicBlock *BB) const {
1106 return FuncInfo.findBBInfo(BB);
1107 }
1108
1109 Function &getFunc() const { return F; }
1110
1111 void dumpInfo(std::string Str = "") const {
1112 FuncInfo.dumpInfo(Str);
1113 }
1114
1115 uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
1116private:
1117 Function &F;
1118 Module *M;
1119 BlockFrequencyInfo *BFI;
1120 ProfileSummaryInfo *PSI;
1121
1122 // This member stores the shared information with class PGOGenFunc.
1123 FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
1124
1125 // The maximum count value in the profile. This is only used in PGO use
1126 // compilation.
1127 uint64_t ProgramMaxCount;
1128
1129 // Position of counter that remains to be read.
1130 uint32_t CountPosition = 0;
1131
1132 // Total size of the profile count for this function.
1133 uint32_t ProfileCountSize = 0;
1134
1135 // ProfileRecord for this function.
1136 InstrProfRecord ProfileRecord;
1137
1138 // Function hotness info derived from profile.
1139 FuncFreqAttr FreqAttr;
1140
1141 // Is to use the context sensitive profile.
1142 bool IsCS;
1143
1144 // Find the Instrumented BB and set the value. Return false on error.
1145 bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
1146
1147 // Set the edge counter value for the unknown edge -- there should be only
1148 // one unknown edge.
1149 void setEdgeCount(DirectEdges &Edges, uint64_t Value);
1150
1151 // Return FuncName string;
1152 std::string getFuncName() const { return FuncInfo.FuncName; }
1153
1154 // Set the hot/cold inline hints based on the count values.
1155 // FIXME: This function should be removed once the functionality in
1156 // the inliner is implemented.
1157 void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
1158 if (PSI->isHotCount(EntryCount))
1159 FreqAttr = FFA_Hot;
1160 else if (PSI->isColdCount(MaxCount))
1161 FreqAttr = FFA_Cold;
1162 }
1163};
1164
1165} // end anonymous namespace
1166
1167// Visit all the edges and assign the count value for the instrumented
1168// edges and the BB. Return false on error.
1169bool PGOUseFunc::setInstrumentedCounts(
1170 const std::vector<uint64_t> &CountFromProfile) {
1171
1172 std::vector<BasicBlock *> InstrumentBBs;
1173 FuncInfo.getInstrumentBBs(InstrumentBBs);
1174 unsigned NumCounters =
1175 InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
1176 // The number of counters here should match the number of counters
1177 // in profile. Return if they mismatch.
1178 if (NumCounters != CountFromProfile.size()) {
1179 return false;
1180 }
1181 auto *FuncEntry = &*F.begin();
1182
1183 // Set the profile count to the Instrumented BBs.
1184 uint32_t I = 0;
1185 for (BasicBlock *InstrBB : InstrumentBBs) {
1186 uint64_t CountValue = CountFromProfile[I++];
1187 UseBBInfo &Info = getBBInfo(InstrBB);
1188 // If we reach here, we know that we have some nonzero count
1189 // values in this function. The entry count should not be 0.
1190 // Fix it if necessary.
1191 if (InstrBB == FuncEntry && CountValue == 0)
1192 CountValue = 1;
1193 Info.setBBInfoCount(CountValue);
1194 }
1195 ProfileCountSize = CountFromProfile.size();
1196 CountPosition = I;
1197
1198 // Set the edge count and update the count of unknown edges for BBs.
1199 auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
1200 E->setEdgeCount(Value);
1201 this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1202 this->getBBInfo(E->DestBB).UnknownCountInEdge--;
1203 };
1204
1205 // Set the profile count the Instrumented edges. There are BBs that not in
1206 // MST but not instrumented. Need to set the edge count value so that we can
1207 // populate the profile counts later.
1208 for (auto &E : FuncInfo.MST.AllEdges) {
1209 if (E->Removed || E->InMST)
1210 continue;
1211 const BasicBlock *SrcBB = E->SrcBB;
1212 UseBBInfo &SrcInfo = getBBInfo(SrcBB);
1213
1214 // If only one out-edge, the edge profile count should be the same as BB
1215 // profile count.
1216 if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
1217 setEdgeCount(E.get(), SrcInfo.CountValue);
1218 else {
1219 const BasicBlock *DestBB = E->DestBB;
1220 UseBBInfo &DestInfo = getBBInfo(DestBB);
1221 // If only one in-edge, the edge profile count should be the same as BB
1222 // profile count.
1223 if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
1224 setEdgeCount(E.get(), DestInfo.CountValue);
1225 }
1226 if (E->CountValid)
1227 continue;
1228 // E's count should have been set from profile. If not, this meenas E skips
1229 // the instrumentation. We set the count to 0.
1230 setEdgeCount(E.get(), 0);
1231 }
1232 return true;
1233}
1234
1235// Set the count value for the unknown edge. There should be one and only one
1236// unknown edge in Edges vector.
1237void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
1238 for (auto &E : Edges) {
1239 if (E->CountValid)
1240 continue;
1241 E->setEdgeCount(Value);
1242
1243 getBBInfo(E->SrcBB).UnknownCountOutEdge--;
1244 getBBInfo(E->DestBB).UnknownCountInEdge--;
1245 return;
1246 }
1247 llvm_unreachable("Cannot find the unknown count edge")__builtin_unreachable();
1248}
1249
1250// Emit function metadata indicating PGO profile mismatch.
1251static void annotateFunctionWithHashMismatch(Function &F,
1252 LLVMContext &ctx) {
1253 const char MetadataName[] = "instr_prof_hash_mismatch";
1254 SmallVector<Metadata *, 2> Names;
1255 // If this metadata already exists, ignore.
1256 auto *Existing = F.getMetadata(LLVMContext::MD_annotation);
1257 if (Existing) {
1258 MDTuple *Tuple = cast<MDTuple>(Existing);
1259 for (auto &N : Tuple->operands()) {
1260 if (cast<MDString>(N.get())->getString() == MetadataName)
1261 return;
1262 Names.push_back(N.get());
1263 }
1264 }
1265
1266 MDBuilder MDB(ctx);
1267 Names.push_back(MDB.createString(MetadataName));
1268 MDNode *MD = MDTuple::get(ctx, Names);
1269 F.setMetadata(LLVMContext::MD_annotation, MD);
1270}
1271
1272// Read the profile from ProfileFileName and assign the value to the
1273// instrumented BB and the edges. This function also updates ProgramMaxCount.
1274// Return true if the profile are successfully read, and false on errors.
1275bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
1276 bool &AllMinusOnes) {
1277 auto &Ctx = M->getContext();
1278 Expected<InstrProfRecord> Result =
1279 PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
1280 if (Error E = Result.takeError()) {
1281 handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
1282 auto Err = IPE.get();
1283 bool SkipWarning = false;
1284 LLVM_DEBUG(dbgs() << "Error in reading profile for Func "do { } while (false)
1285 << FuncInfo.FuncName << ": ")do { } while (false);
1286 if (Err == instrprof_error::unknown_function) {
1287 IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
1288 SkipWarning = !PGOWarnMissing;
1289 LLVM_DEBUG(dbgs() << "unknown function")do { } while (false);
1290 } else if (Err == instrprof_error::hash_mismatch ||
1291 Err == instrprof_error::malformed) {
1292 IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
1293 SkipWarning =
1294 NoPGOWarnMismatch ||
1295 (NoPGOWarnMismatchComdat &&
1296 (F.hasComdat() ||
1297 F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
1298 LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")")do { } while (false);
1299 // Emit function metadata indicating PGO profile mismatch.
1300 annotateFunctionWithHashMismatch(F, M->getContext());
1301 }
1302
1303 LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n")do { } while (false);
1304 if (SkipWarning)
1305 return;
1306
1307 std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
1308 std::string(" Hash = ") +
1309 std::to_string(FuncInfo.FunctionHash);
1310
1311 Ctx.diagnose(
1312 DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
1313 });
1314 return false;
1315 }
1316 ProfileRecord = std::move(Result.get());
1317 std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
1318
1319 IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
1320 LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n")do { } while (false);
1321 AllMinusOnes = (CountFromProfile.size() > 0);
1322 uint64_t ValueSum = 0;
1323 for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
1324 LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n")do { } while (false);
1325 ValueSum += CountFromProfile[I];
1326 if (CountFromProfile[I] != (uint64_t)-1)
1327 AllMinusOnes = false;
1328 }
1329 AllZeros = (ValueSum == 0);
1330
1331 LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n")do { } while (false);
1332
1333 getBBInfo(nullptr).UnknownCountOutEdge = 2;
1334 getBBInfo(nullptr).UnknownCountInEdge = 2;
1335
1336 if (!setInstrumentedCounts(CountFromProfile)) {
1337 LLVM_DEBUG(do { } while (false)
1338 dbgs() << "Inconsistent number of counts, skipping this function")do { } while (false);
1339 Ctx.diagnose(DiagnosticInfoPGOProfile(
1340 M->getName().data(),
1341 Twine("Inconsistent number of counts in ") + F.getName().str()
1342 + Twine(": the profile may be stale or there is a function name collision."),
1343 DS_Warning));
1344 return false;
1345 }
1346 ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
1347 return true;
1348}
1349
1350// Populate the counters from instrumented BBs to all BBs.
1351// In the end of this operation, all BBs should have a valid count value.
1352void PGOUseFunc::populateCounters() {
1353 bool Changes = true;
1354 unsigned NumPasses = 0;
1355 while (Changes) {
1356 NumPasses++;
1357 Changes = false;
1358
1359 // For efficient traversal, it's better to start from the end as most
1360 // of the instrumented edges are at the end.
1361 for (auto &BB : reverse(F)) {
1362 UseBBInfo *Count = findBBInfo(&BB);
1363 if (Count == nullptr)
1364 continue;
1365 if (!Count->CountValid) {
1366 if (Count->UnknownCountOutEdge == 0) {
1367 Count->CountValue = sumEdgeCount(Count->OutEdges);
1368 Count->CountValid = true;
1369 Changes = true;
1370 } else if (Count->UnknownCountInEdge == 0) {
1371 Count->CountValue = sumEdgeCount(Count->InEdges);
1372 Count->CountValid = true;
1373 Changes = true;
1374 }
1375 }
1376 if (Count->CountValid) {
1377 if (Count->UnknownCountOutEdge == 1) {
1378 uint64_t Total = 0;
1379 uint64_t OutSum = sumEdgeCount(Count->OutEdges);
1380 // If the one of the successor block can early terminate (no-return),
1381 // we can end up with situation where out edge sum count is larger as
1382 // the source BB's count is collected by a post-dominated block.
1383 if (Count->CountValue > OutSum)
1384 Total = Count->CountValue - OutSum;
1385 setEdgeCount(Count->OutEdges, Total);
1386 Changes = true;
1387 }
1388 if (Count->UnknownCountInEdge == 1) {
1389 uint64_t Total = 0;
1390 uint64_t InSum = sumEdgeCount(Count->InEdges);
1391 if (Count->CountValue > InSum)
1392 Total = Count->CountValue - InSum;
1393 setEdgeCount(Count->InEdges, Total);
1394 Changes = true;
1395 }
1396 }
1397 }
1398 }
1399
1400 LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n")do { } while (false);
1401#ifndef NDEBUG1
1402 // Assert every BB has a valid counter.
1403 for (auto &BB : F) {
1404 auto BI = findBBInfo(&BB);
1405 if (BI == nullptr)
1406 continue;
1407 assert(BI->CountValid && "BB count is not valid")((void)0);
1408 }
1409#endif
1410 uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
1411 uint64_t FuncMaxCount = FuncEntryCount;
1412 for (auto &BB : F) {
1413 auto BI = findBBInfo(&BB);
1414 if (BI == nullptr)
1415 continue;
1416 FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
1417 }
1418
1419 // Fix the obviously inconsistent entry count.
1420 if (FuncMaxCount > 0 && FuncEntryCount == 0)
1421 FuncEntryCount = 1;
1422 F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
1423 markFunctionAttributes(FuncEntryCount, FuncMaxCount);
1424
1425 // Now annotate select instructions
1426 FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
1427 assert(CountPosition == ProfileCountSize)((void)0);
1428
1429 LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."))do { } while (false);
1430}
1431
1432// Assign the scaled count values to the BB with multiple out edges.
1433void PGOUseFunc::setBranchWeights() {
1434 // Generate MD_prof metadata for every branch instruction.
1435 LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()do { } while (false)
1436 << " IsCS=" << IsCS << "\n")do { } while (false);
1437 for (auto &BB : F) {
1438 Instruction *TI = BB.getTerminator();
1439 if (TI->getNumSuccessors() < 2)
1440 continue;
1441 if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
1442 isa<IndirectBrInst>(TI) || isa<InvokeInst>(TI)))
1443 continue;
1444
1445 if (getBBInfo(&BB).CountValue == 0)
1446 continue;
1447
1448 // We have a non-zero Branch BB.
1449 const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1450 unsigned Size = BBCountInfo.OutEdges.size();
1451 SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
1452 uint64_t MaxCount = 0;
1453 for (unsigned s = 0; s < Size; s++) {
1454 const PGOUseEdge *E = BBCountInfo.OutEdges[s];
1455 const BasicBlock *SrcBB = E->SrcBB;
1456 const BasicBlock *DestBB = E->DestBB;
1457 if (DestBB == nullptr)
1458 continue;
1459 unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
1460 uint64_t EdgeCount = E->CountValue;
1461 if (EdgeCount > MaxCount)
1462 MaxCount = EdgeCount;
1463 EdgeCounts[SuccNum] = EdgeCount;
1464 }
1465 setProfMetadata(M, TI, EdgeCounts, MaxCount);
1466 }
1467}
1468
1469static bool isIndirectBrTarget(BasicBlock *BB) {
1470 for (BasicBlock *Pred : predecessors(BB)) {
1471 if (isa<IndirectBrInst>(Pred->getTerminator()))
1472 return true;
1473 }
1474 return false;
1475}
1476
1477void PGOUseFunc::annotateIrrLoopHeaderWeights() {
1478 LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n")do { } while (false);
1479 // Find irr loop headers
1480 for (auto &BB : F) {
1481 // As a heuristic also annotate indrectbr targets as they have a high chance
1482 // to become an irreducible loop header after the indirectbr tail
1483 // duplication.
1484 if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
1485 Instruction *TI = BB.getTerminator();
1486 const UseBBInfo &BBCountInfo = getBBInfo(&BB);
1487 setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
1488 }
1489 }
1490}
1491
1492void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
1493 Module *M = F.getParent();
1494 IRBuilder<> Builder(&SI);
1495 Type *Int64Ty = Builder.getInt64Ty();
1496 Type *I8PtrTy = Builder.getInt8PtrTy();
1497 auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
1498 Builder.CreateCall(
1499 Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
1500 {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
1501 Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
1502 Builder.getInt32(*CurCtrIdx), Step});
1503 ++(*CurCtrIdx);
1504}
1505
1506void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
1507 std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
1508 assert(*CurCtrIdx < CountFromProfile.size() &&((void)0)
1509 "Out of bound access of counters")((void)0);
1510 uint64_t SCounts[2];
1511 SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
1512 ++(*CurCtrIdx);
1513 uint64_t TotalCount = 0;
1514 auto BI = UseFunc->findBBInfo(SI.getParent());
1515 if (BI != nullptr)
1516 TotalCount = BI->CountValue;
1517 // False Count
1518 SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
1519 uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
1520 if (MaxCount)
1521 setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
1522}
1523
1524void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
1525 if (!PGOInstrSelect)
1526 return;
1527 // FIXME: do not handle this yet.
1528 if (SI.getCondition()->getType()->isVectorTy())
1529 return;
1530
1531 switch (Mode) {
1532 case VM_counting:
1533 NSIs++;
1534 return;
1535 case VM_instrument:
1536 instrumentOneSelectInst(SI);
1537 return;
1538 case VM_annotate:
1539 annotateOneSelectInst(SI);
1540 return;
1541 }
1542
1543 llvm_unreachable("Unknown visiting mode")__builtin_unreachable();
1544}
1545
1546// Traverse all valuesites and annotate the instructions for all value kind.
1547void PGOUseFunc::annotateValueSites() {
1548 if (DisableValueProfiling)
1549 return;
1550
1551 // Create the PGOFuncName meta data.
1552 createPGOFuncNameMetadata(F, FuncInfo.FuncName);
1553
1554 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
1555 annotateValueSites(Kind);
1556}
1557
1558// Annotate the instructions for a specific value kind.
1559void PGOUseFunc::annotateValueSites(uint32_t Kind) {
1560 assert(Kind <= IPVK_Last)((void)0);
1561 unsigned ValueSiteIndex = 0;
1562 auto &ValueSites = FuncInfo.ValueSites[Kind];
1563 unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
1564 if (NumValueSites != ValueSites.size()) {
1565 auto &Ctx = M->getContext();
1566 Ctx.diagnose(DiagnosticInfoPGOProfile(
1567 M->getName().data(),
1568 Twine("Inconsistent number of value sites for ") +
1569 Twine(ValueProfKindDescr[Kind]) +
1570 Twine(" profiling in \"") + F.getName().str() +
1571 Twine("\", possibly due to the use of a stale profile."),
1572 DS_Warning));
1573 return;
1574 }
1575
1576 for (VPCandidateInfo &I : ValueSites) {
1577 LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kinddo { } while (false)
1578 << "): Index = " << ValueSiteIndex << " out of "do { } while (false)
1579 << NumValueSites << "\n")do { } while (false);
1580 annotateValueSite(*M, *I.AnnotatedInst, ProfileRecord,
1581 static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
1582 Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
1583 : MaxNumAnnotations);
1584 ValueSiteIndex++;
1585 }
1586}
1587
1588// Collect the set of members for each Comdat in module M and store
1589// in ComdatMembers.
1590static void collectComdatMembers(
1591 Module &M,
1592 std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
1593 if (!DoComdatRenaming)
1594 return;
1595 for (Function &F : M)
1596 if (Comdat *C = F.getComdat())
1597 ComdatMembers.insert(std::make_pair(C, &F));
1598 for (GlobalVariable &GV : M.globals())
1599 if (Comdat *C = GV.getComdat())
1600 ComdatMembers.insert(std::make_pair(C, &GV));
1601 for (GlobalAlias &GA : M.aliases())
1602 if (Comdat *C = GA.getComdat())
1603 ComdatMembers.insert(std::make_pair(C, &GA));
1604}
1605
1606static bool InstrumentAllFunctions(
1607 Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1608 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1609 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
1610 // For the context-sensitve instrumentation, we should have a separated pass
1611 // (before LTO/ThinLTO linking) to create these variables.
1612 if (!IsCS)
2
Assuming 'IsCS' is true
3
Taking false branch
1613 createIRLevelProfileFlagVar(M, /* IsCS */ false, PGOInstrumentEntry);
1614 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1615 collectComdatMembers(M, ComdatMembers);
1616
1617 for (auto &F : M) {
1618 if (F.isDeclaration())
4
Assuming the condition is false
5
Taking false branch
1619 continue;
1620 if (F.hasFnAttribute(llvm::Attribute::NoProfile))
6
Assuming the condition is false
7
Taking false branch
1621 continue;
1622 auto &TLI = LookupTLI(F);
1623 auto *BPI = LookupBPI(F);
1624 auto *BFI = LookupBFI(F);
1625 instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
8
Calling 'instrumentOneFunc'
1626 }
1627 return true;
1628}
1629
1630PreservedAnalyses
1631PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
1632 createProfileFileNameVar(M, CSInstrName);
1633 createIRLevelProfileFlagVar(M, /* IsCS */ true, PGOInstrumentEntry);
1634 return PreservedAnalyses::all();
1635}
1636
1637bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
1638 if (skipModule(M))
1639 return false;
1640
1641 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
1642 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1643 };
1644 auto LookupBPI = [this](Function &F) {
1645 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
1646 };
1647 auto LookupBFI = [this](Function &F) {
1648 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
1649 };
1650 return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
1651}
1652
1653PreservedAnalyses PGOInstrumentationGen::run(Module &M,
1654 ModuleAnalysisManager &AM) {
1655 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1656 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1657 return FAM.getResult<TargetLibraryAnalysis>(F);
1658 };
1659 auto LookupBPI = [&FAM](Function &F) {
1660 return &FAM.getResult<BranchProbabilityAnalysis>(F);
1661 };
1662 auto LookupBFI = [&FAM](Function &F) {
1663 return &FAM.getResult<BlockFrequencyAnalysis>(F);
1664 };
1665
1666 if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
1
Calling 'InstrumentAllFunctions'
1667 return PreservedAnalyses::all();
1668
1669 return PreservedAnalyses::none();
1670}
1671
1672// Using the ratio b/w sums of profile count values and BFI count values to
1673// adjust the func entry count.
1674static void fixFuncEntryCount(PGOUseFunc &Func, LoopInfo &LI,
1675 BranchProbabilityInfo &NBPI) {
1676 Function &F = Func.getFunc();
1677 BlockFrequencyInfo NBFI(F, NBPI, LI);
1678#ifndef NDEBUG1
1679 auto BFIEntryCount = F.getEntryCount();
1680 assert(BFIEntryCount.hasValue() && (BFIEntryCount.getCount() > 0) &&((void)0)
1681 "Invalid BFI Entrycount")((void)0);
1682#endif
1683 auto SumCount = APFloat::getZero(APFloat::IEEEdouble());
1684 auto SumBFICount = APFloat::getZero(APFloat::IEEEdouble());
1685 for (auto &BBI : F) {
1686 uint64_t CountValue = 0;
1687 uint64_t BFICountValue = 0;
1688 if (!Func.findBBInfo(&BBI))
1689 continue;
1690 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1691 CountValue = Func.getBBInfo(&BBI).CountValue;
1692 BFICountValue = BFICount.getValue();
1693 SumCount.add(APFloat(CountValue * 1.0), APFloat::rmNearestTiesToEven);
1694 SumBFICount.add(APFloat(BFICountValue * 1.0), APFloat::rmNearestTiesToEven);
1695 }
1696 if (SumCount.isZero())
1697 return;
1698
1699 assert(SumBFICount.compare(APFloat(0.0)) == APFloat::cmpGreaterThan &&((void)0)
1700 "Incorrect sum of BFI counts")((void)0);
1701 if (SumBFICount.compare(SumCount) == APFloat::cmpEqual)
1702 return;
1703 double Scale = (SumCount / SumBFICount).convertToDouble();
1704 if (Scale < 1.001 && Scale > 0.999)
1705 return;
1706
1707 uint64_t FuncEntryCount = Func.getBBInfo(&*F.begin()).CountValue;
1708 uint64_t NewEntryCount = 0.5 + FuncEntryCount * Scale;
1709 if (NewEntryCount == 0)
1710 NewEntryCount = 1;
1711 if (NewEntryCount != FuncEntryCount) {
1712 F.setEntryCount(ProfileCount(NewEntryCount, Function::PCT_Real));
1713 LLVM_DEBUG(dbgs() << "FixFuncEntryCount: in " << F.getName()do { } while (false)
1714 << ", entry_count " << FuncEntryCount << " --> "do { } while (false)
1715 << NewEntryCount << "\n")do { } while (false);
1716 }
1717}
1718
1719// Compare the profile count values with BFI count values, and print out
1720// the non-matching ones.
1721static void verifyFuncBFI(PGOUseFunc &Func, LoopInfo &LI,
1722 BranchProbabilityInfo &NBPI,
1723 uint64_t HotCountThreshold,
1724 uint64_t ColdCountThreshold) {
1725 Function &F = Func.getFunc();
1726 BlockFrequencyInfo NBFI(F, NBPI, LI);
1727 // bool PrintFunc = false;
1728 bool HotBBOnly = PGOVerifyHotBFI;
1729 std::string Msg;
1730 OptimizationRemarkEmitter ORE(&F);
1731
1732 unsigned BBNum = 0, BBMisMatchNum = 0, NonZeroBBNum = 0;
1733 for (auto &BBI : F) {
1734 uint64_t CountValue = 0;
1735 uint64_t BFICountValue = 0;
1736
1737 if (Func.getBBInfo(&BBI).CountValid)
1738 CountValue = Func.getBBInfo(&BBI).CountValue;
1739
1740 BBNum++;
1741 if (CountValue)
1742 NonZeroBBNum++;
1743 auto BFICount = NBFI.getBlockProfileCount(&BBI);
1744 if (BFICount)
1745 BFICountValue = BFICount.getValue();
1746
1747 if (HotBBOnly) {
1748 bool rawIsHot = CountValue >= HotCountThreshold;
1749 bool BFIIsHot = BFICountValue >= HotCountThreshold;
1750 bool rawIsCold = CountValue <= ColdCountThreshold;
1751 bool ShowCount = false;
1752 if (rawIsHot && !BFIIsHot) {
1753 Msg = "raw-Hot to BFI-nonHot";
1754 ShowCount = true;
1755 } else if (rawIsCold && BFIIsHot) {
1756 Msg = "raw-Cold to BFI-Hot";
1757 ShowCount = true;
1758 }
1759 if (!ShowCount)
1760 continue;
1761 } else {
1762 if ((CountValue < PGOVerifyBFICutoff) &&
1763 (BFICountValue < PGOVerifyBFICutoff))
1764 continue;
1765 uint64_t Diff = (BFICountValue >= CountValue)
1766 ? BFICountValue - CountValue
1767 : CountValue - BFICountValue;
1768 if (Diff < CountValue / 100 * PGOVerifyBFIRatio)
1769 continue;
1770 }
1771 BBMisMatchNum++;
1772
1773 ORE.emit([&]() {
1774 OptimizationRemarkAnalysis Remark(DEBUG_TYPE"pgo-instrumentation", "bfi-verify",
1775 F.getSubprogram(), &BBI);
1776 Remark << "BB " << ore::NV("Block", BBI.getName())
1777 << " Count=" << ore::NV("Count", CountValue)
1778 << " BFI_Count=" << ore::NV("Count", BFICountValue);
1779 if (!Msg.empty())
1780 Remark << " (" << Msg << ")";
1781 return Remark;
1782 });
1783 }
1784 if (BBMisMatchNum)
1785 ORE.emit([&]() {
1786 return OptimizationRemarkAnalysis(DEBUG_TYPE"pgo-instrumentation", "bfi-verify",
1787 F.getSubprogram(), &F.getEntryBlock())
1788 << "In Func " << ore::NV("Function", F.getName())
1789 << ": Num_of_BB=" << ore::NV("Count", BBNum)
1790 << ", Num_of_non_zerovalue_BB=" << ore::NV("Count", NonZeroBBNum)
1791 << ", Num_of_mis_matching_BB=" << ore::NV("Count", BBMisMatchNum);
1792 });
1793}
1794
1795static bool annotateAllFunctions(
1796 Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
1797 function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
1798 function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
1799 function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
1800 ProfileSummaryInfo *PSI, bool IsCS) {
1801 LLVM_DEBUG(dbgs() << "Read in profile counters: ")do { } while (false);
1802 auto &Ctx = M.getContext();
1803 // Read the counter array from file.
1804 auto ReaderOrErr =
1805 IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
1806 if (Error E = ReaderOrErr.takeError()) {
1807 handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
1808 Ctx.diagnose(
1809 DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
1810 });
1811 return false;
1812 }
1813
1814 std::unique_ptr<IndexedInstrProfReader> PGOReader =
1815 std::move(ReaderOrErr.get());
1816 if (!PGOReader) {
1817 Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
1818 StringRef("Cannot get PGOReader")));
1819 return false;
1820 }
1821 if (!PGOReader->hasCSIRLevelProfile() && IsCS)
1822 return false;
1823
1824 // TODO: might need to change the warning once the clang option is finalized.
1825 if (!PGOReader->isIRLevelProfile()) {
1826 Ctx.diagnose(DiagnosticInfoPGOProfile(
1827 ProfileFileName.data(), "Not an IR level instrumentation profile"));
1828 return false;
1829 }
1830
1831 // Add the profile summary (read from the header of the indexed summary) here
1832 // so that we can use it below when reading counters (which checks if the
1833 // function should be marked with a cold or inlinehint attribute).
1834 M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
1835 IsCS ? ProfileSummary::PSK_CSInstr
1836 : ProfileSummary::PSK_Instr);
1837 PSI->refresh();
1838
1839 std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
1840 collectComdatMembers(M, ComdatMembers);
1841 std::vector<Function *> HotFunctions;
1842 std::vector<Function *> ColdFunctions;
1843
1844 // If the profile marked as always instrument the entry BB, do the
1845 // same. Note this can be overwritten by the internal option in CFGMST.h
1846 bool InstrumentFuncEntry = PGOReader->instrEntryBBEnabled();
1847 if (PGOInstrumentEntry.getNumOccurrences() > 0)
1848 InstrumentFuncEntry = PGOInstrumentEntry;
1849 for (auto &F : M) {
1850 if (F.isDeclaration())
1851 continue;
1852 auto &TLI = LookupTLI(F);
1853 auto *BPI = LookupBPI(F);
1854 auto *BFI = LookupBFI(F);
1855 // Split indirectbr critical edges here before computing the MST rather than
1856 // later in getInstrBB() to avoid invalidating it.
1857 SplitIndirectBrCriticalEdges(F, BPI, BFI);
1858 PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
1859 InstrumentFuncEntry);
1860 // When AllMinusOnes is true, it means the profile for the function
1861 // is unrepresentative and this function is actually hot. Set the
1862 // entry count of the function to be multiple times of hot threshold
1863 // and drop all its internal counters.
1864 bool AllMinusOnes = false;
1865 bool AllZeros = false;
1866 if (!Func.readCounters(PGOReader.get(), AllZeros, AllMinusOnes))
1867 continue;
1868 if (AllZeros) {
1869 F.setEntryCount(ProfileCount(0, Function::PCT_Real));
1870 if (Func.getProgramMaxCount() != 0)
1871 ColdFunctions.push_back(&F);
1872 continue;
1873 }
1874 const unsigned MultiplyFactor = 3;
1875 if (AllMinusOnes) {
1876 uint64_t HotThreshold = PSI->getHotCountThreshold();
1877 if (HotThreshold)
1878 F.setEntryCount(
1879 ProfileCount(HotThreshold * MultiplyFactor, Function::PCT_Real));
1880 HotFunctions.push_back(&F);
1881 continue;
1882 }
1883 Func.populateCounters();
1884 Func.setBranchWeights();
1885 Func.annotateValueSites();
1886 Func.annotateIrrLoopHeaderWeights();
1887 PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
1888 if (FreqAttr == PGOUseFunc::FFA_Cold)
1889 ColdFunctions.push_back(&F);
1890 else if (FreqAttr == PGOUseFunc::FFA_Hot)
1891 HotFunctions.push_back(&F);
1892 if (PGOViewCounts != PGOVCT_None &&
1893 (ViewBlockFreqFuncName.empty() ||
1894 F.getName().equals(ViewBlockFreqFuncName))) {
1895 LoopInfo LI{DominatorTree(F)};
1896 std::unique_ptr<BranchProbabilityInfo> NewBPI =
1897 std::make_unique<BranchProbabilityInfo>(F, LI);
1898 std::unique_ptr<BlockFrequencyInfo> NewBFI =
1899 std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
1900 if (PGOViewCounts == PGOVCT_Graph)
1901 NewBFI->view();
1902 else if (PGOViewCounts == PGOVCT_Text) {
1903 dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
1904 NewBFI->print(dbgs());
1905 }
1906 }
1907 if (PGOViewRawCounts != PGOVCT_None &&
1908 (ViewBlockFreqFuncName.empty() ||
1909 F.getName().equals(ViewBlockFreqFuncName))) {
1910 if (PGOViewRawCounts == PGOVCT_Graph)
1911 if (ViewBlockFreqFuncName.empty())
1912 WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1913 else
1914 ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
1915 else if (PGOViewRawCounts == PGOVCT_Text) {
1916 dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
1917 Func.dumpInfo();
1918 }
1919 }
1920
1921 if (PGOVerifyBFI || PGOVerifyHotBFI || PGOFixEntryCount) {
1922 LoopInfo LI{DominatorTree(F)};
1923 BranchProbabilityInfo NBPI(F, LI);
1924
1925 // Fix func entry count.
1926 if (PGOFixEntryCount)
1927 fixFuncEntryCount(Func, LI, NBPI);
1928
1929 // Verify BlockFrequency information.
1930 uint64_t HotCountThreshold = 0, ColdCountThreshold = 0;
1931 if (PGOVerifyHotBFI) {
1932 HotCountThreshold = PSI->getOrCompHotCountThreshold();
1933 ColdCountThreshold = PSI->getOrCompColdCountThreshold();
1934 }
1935 verifyFuncBFI(Func, LI, NBPI, HotCountThreshold, ColdCountThreshold);
1936 }
1937 }
1938
1939 // Set function hotness attribute from the profile.
1940 // We have to apply these attributes at the end because their presence
1941 // can affect the BranchProbabilityInfo of any callers, resulting in an
1942 // inconsistent MST between prof-gen and prof-use.
1943 for (auto &F : HotFunctions) {
1944 F->addFnAttr(Attribute::InlineHint);
1945 LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()do { } while (false)
1946 << "\n")do { } while (false);
1947 }
1948 for (auto &F : ColdFunctions) {
1949 // Only set when there is no Attribute::Hot set by the user. For Hot
1950 // attribute, user's annotation has the precedence over the profile.
1951 if (F->hasFnAttribute(Attribute::Hot)) {
1952 auto &Ctx = M.getContext();
1953 std::string Msg = std::string("Function ") + F->getName().str() +
1954 std::string(" is annotated as a hot function but"
1955 " the profile is cold");
1956 Ctx.diagnose(
1957 DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
1958 continue;
1959 }
1960 F->addFnAttr(Attribute::Cold);
1961 LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()do { } while (false)
1962 << "\n")do { } while (false);
1963 }
1964 return true;
1965}
1966
1967PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
1968 std::string RemappingFilename,
1969 bool IsCS)
1970 : ProfileFileName(std::move(Filename)),
1971 ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
1972 if (!PGOTestProfileFile.empty())
1973 ProfileFileName = PGOTestProfileFile;
1974 if (!PGOTestProfileRemappingFile.empty())
1975 ProfileRemappingFileName = PGOTestProfileRemappingFile;
1976}
1977
1978PreservedAnalyses PGOInstrumentationUse::run(Module &M,
1979 ModuleAnalysisManager &AM) {
1980
1981 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
1982 auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
1983 return FAM.getResult<TargetLibraryAnalysis>(F);
1984 };
1985 auto LookupBPI = [&FAM](Function &F) {
1986 return &FAM.getResult<BranchProbabilityAnalysis>(F);
1987 };
1988 auto LookupBFI = [&FAM](Function &F) {
1989 return &FAM.getResult<BlockFrequencyAnalysis>(F);
1990 };
1991
1992 auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
1993
1994 if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
1995 LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
1996 return PreservedAnalyses::all();
1997
1998 return PreservedAnalyses::none();
1999}
2000
2001bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
2002 if (skipModule(M))
2003 return false;
2004
2005 auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
2006 return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
2007 };
2008 auto LookupBPI = [this](Function &F) {
2009 return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
2010 };
2011 auto LookupBFI = [this](Function &F) {
2012 return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
2013 };
2014
2015 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2016 return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
2017 LookupBFI, PSI, IsCS);
2018}
2019
2020static std::string getSimpleNodeName(const BasicBlock *Node) {
2021 if (!Node->getName().empty())
2022 return std::string(Node->getName());
2023
2024 std::string SimpleNodeName;
2025 raw_string_ostream OS(SimpleNodeName);
2026 Node->printAsOperand(OS, false);
2027 return OS.str();
2028}
2029
2030void llvm::setProfMetadata(Module *M, Instruction *TI,
2031 ArrayRef<uint64_t> EdgeCounts,
2032 uint64_t MaxCount) {
2033 MDBuilder MDB(M->getContext());
2034 assert(MaxCount > 0 && "Bad max count")((void)0);
2035 uint64_t Scale = calculateCountScale(MaxCount);
2036 SmallVector<unsigned, 4> Weights;
2037 for (const auto &ECI : EdgeCounts)
2038 Weights.push_back(scaleBranchCount(ECI, Scale));
2039
2040 LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &Wdo { } while (false)
2041 : Weights) {do { } while (false)
2042 dbgs() << W << " ";do { } while (false)
2043 } dbgs() << "\n";)do { } while (false);
2044
2045 TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
2046 if (EmitBranchProbability) {
2047 std::string BrCondStr = getBranchCondString(TI);
2048 if (BrCondStr.empty())
2049 return;
2050
2051 uint64_t WSum =
2052 std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
2053 [](uint64_t w1, uint64_t w2) { return w1 + w2; });
2054 uint64_t TotalCount =
2055 std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
2056 [](uint64_t c1, uint64_t c2) { return c1 + c2; });
2057 Scale = calculateCountScale(WSum);
2058 BranchProbability BP(scaleBranchCount(Weights[0], Scale),
2059 scaleBranchCount(WSum, Scale));
2060 std::string BranchProbStr;
2061 raw_string_ostream OS(BranchProbStr);
2062 OS << BP;
2063 OS << " (total count : " << TotalCount << ")";
2064 OS.flush();
2065 Function *F = TI->getParent()->getParent();
2066 OptimizationRemarkEmitter ORE(F);
2067 ORE.emit([&]() {
2068 return OptimizationRemark(DEBUG_TYPE"pgo-instrumentation", "pgo-instrumentation", TI)
2069 << BrCondStr << " is true with probability : " << BranchProbStr;
2070 });
2071 }
2072}
2073
2074namespace llvm {
2075
2076void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
2077 MDBuilder MDB(M->getContext());
2078 TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
2079 MDB.createIrrLoopHeaderWeight(Count));
2080}
2081
2082template <> struct GraphTraits<PGOUseFunc *> {
2083 using NodeRef = const BasicBlock *;
2084 using ChildIteratorType = const_succ_iterator;
2085 using nodes_iterator = pointer_iterator<Function::const_iterator>;
2086
2087 static NodeRef getEntryNode(const PGOUseFunc *G) {
2088 return &G->getFunc().front();
2089 }
2090
2091 static ChildIteratorType child_begin(const NodeRef N) {
2092 return succ_begin(N);
2093 }
2094
2095 static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
2096
2097 static nodes_iterator nodes_begin(const PGOUseFunc *G) {
2098 return nodes_iterator(G->getFunc().begin());
2099 }
2100
2101 static nodes_iterator nodes_end(const PGOUseFunc *G) {
2102 return nodes_iterator(G->getFunc().end());
2103 }
2104};
2105
2106template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
2107 explicit DOTGraphTraits(bool isSimple = false)
2108 : DefaultDOTGraphTraits(isSimple) {}
2109
2110 static std::string getGraphName(const PGOUseFunc *G) {
2111 return std::string(G->getFunc().getName());
2112 }
2113
2114 std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
2115 std::string Result;
2116 raw_string_ostream OS(Result);
2117
2118 OS << getSimpleNodeName(Node) << ":\\l";
2119 UseBBInfo *BI = Graph->findBBInfo(Node);
2120 OS << "Count : ";
2121 if (BI && BI->CountValid)
2122 OS << BI->CountValue << "\\l";
2123 else
2124 OS << "Unknown\\l";
2125
2126 if (!PGOInstrSelect)
2127 return Result;
2128
2129 for (const Instruction &I : *Node) {
2130 if (!isa<SelectInst>(&I))
2131 continue;
2132 // Display scaled counts for SELECT instruction:
2133 OS << "SELECT : { T = ";
2134 uint64_t TC, FC;
2135 bool HasProf = I.extractProfMetadata(TC, FC);
2136 if (!HasProf)
2137 OS << "Unknown, F = Unknown }\\l";
2138 else
2139 OS << TC << ", F = " << FC << " }\\l";
2140 }
2141 return Result;
2142 }
2143};
2144
2145} // end namespace llvm