Bug Summary

File:src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Warning:line 3396, column 9
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name OpenMPOpt.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

1//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// OpenMP specific optimizations:
10//
11// - Deduplication of runtime calls, e.g., omp_get_thread_num.
12// - Replacing globalized device memory with stack memory.
13// - Replacing globalized device memory with shared memory.
14// - Parallel region merging.
15// - Transforming generic-mode device kernels to SPMD mode.
16// - Specializing the state machine for generic-mode device kernels.
17//
18//===----------------------------------------------------------------------===//
19
20#include "llvm/Transforms/IPO/OpenMPOpt.h"
21
22#include "llvm/ADT/EnumeratedArray.h"
23#include "llvm/ADT/PostOrderIterator.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/CallGraph.h"
26#include "llvm/Analysis/CallGraphSCCPass.h"
27#include "llvm/Analysis/OptimizationRemarkEmitter.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/Frontend/OpenMP/OMPConstants.h"
30#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31#include "llvm/IR/Assumptions.h"
32#include "llvm/IR/DiagnosticInfo.h"
33#include "llvm/IR/GlobalValue.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Transforms/IPO.h"
39#include "llvm/Transforms/IPO/Attributor.h"
40#include "llvm/Transforms/Utils/BasicBlockUtils.h"
41#include "llvm/Transforms/Utils/CallGraphUpdater.h"
42#include "llvm/Transforms/Utils/CodeExtractor.h"
43
44using namespace llvm;
45using namespace omp;
46
47#define DEBUG_TYPE"openmp-opt" "openmp-opt"
48
49static cl::opt<bool> DisableOpenMPOptimizations(
50 "openmp-opt-disable", cl::ZeroOrMore,
51 cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
52 cl::init(false));
53
54static cl::opt<bool> EnableParallelRegionMerging(
55 "openmp-opt-enable-merging", cl::ZeroOrMore,
56 cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
57 cl::init(false));
58
59static cl::opt<bool>
60 DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
61 cl::desc("Disable function internalization."),
62 cl::Hidden, cl::init(false));
63
64static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
65 cl::Hidden);
66static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
67 cl::init(false), cl::Hidden);
68
69static cl::opt<bool> HideMemoryTransferLatency(
70 "openmp-hide-memory-transfer-latency",
71 cl::desc("[WIP] Tries to hide the latency of host to device memory"
72 " transfers"),
73 cl::Hidden, cl::init(false));
74
75STATISTIC(NumOpenMPRuntimeCallsDeduplicated,static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt"
, "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated"
}
76 "Number of OpenMP runtime calls deduplicated")static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt"
, "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated"
}
;
77STATISTIC(NumOpenMPParallelRegionsDeleted,static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt"
, "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted"
}
78 "Number of OpenMP parallel regions deleted")static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt"
, "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted"
}
;
79STATISTIC(NumOpenMPRuntimeFunctionsIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = {
"openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified"
}
80 "Number of OpenMP runtime functions identified")static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = {
"openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified"
}
;
81STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified
= {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified"
}
82 "Number of OpenMP runtime function uses identified")static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified
= {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified"
}
;
83STATISTIC(NumOpenMPTargetRegionKernels,static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt"
, "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified"
}
84 "Number of OpenMP target region entry points (=kernels) identified")static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt"
, "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified"
}
;
85STATISTIC(NumOpenMPTargetRegionKernelsSPMD,static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode"}
86 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode"}
87 "SPMD-mode instead of generic-mode")static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
"SPMD-mode instead of generic-mode"}
;
88STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
= {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines"}
89 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
= {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines"}
90 "generic-mode without a state machines")static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
= {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode without a state machines"}
;
91STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback"}
92 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback"}
93 "generic-mode with customized state machines with fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines with fallback"}
;
94STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback"
}
95 "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback"
}
96 "generic-mode with customized state machines without fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
= {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
"generic-mode with customized state machines without fallback"
}
;
97STATISTIC(static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
= {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
98 NumOpenMPParallelRegionsReplacedInGPUStateMachine,static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
= {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
99 "Number of OpenMP parallel regions replaced with ID in GPU state machines")static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
= {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
;
100STATISTIC(NumOpenMPParallelRegionsMerged,static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt"
, "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged"
}
101 "Number of OpenMP parallel regions merged")static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt"
, "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged"
}
;
102STATISTIC(NumBytesMovedToSharedMemory,static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt"
, "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory"
}
103 "Amount of memory pushed to shared memory")static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt"
, "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory"
}
;
104
105#if !defined(NDEBUG1)
106static constexpr auto TAG = "[" DEBUG_TYPE"openmp-opt" "]";
107#endif
108
109namespace {
110
111enum class AddressSpace : unsigned {
112 Generic = 0,
113 Global = 1,
114 Shared = 3,
115 Constant = 4,
116 Local = 5,
117};
118
119struct AAHeapToShared;
120
121struct AAICVTracker;
122
123/// OpenMP specific information. For now, stores RFIs and ICVs also needed for
124/// Attributor runs.
125struct OMPInformationCache : public InformationCache {
126 OMPInformationCache(Module &M, AnalysisGetter &AG,
127 BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
128 SmallPtrSetImpl<Kernel> &Kernels)
129 : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
130 Kernels(Kernels) {
131
132 OMPBuilder.initialize();
133 initializeRuntimeFunctions();
134 initializeInternalControlVars();
135 }
136
137 /// Generic information that describes an internal control variable.
138 struct InternalControlVarInfo {
139 /// The kind, as described by InternalControlVar enum.
140 InternalControlVar Kind;
141
142 /// The name of the ICV.
143 StringRef Name;
144
145 /// Environment variable associated with this ICV.
146 StringRef EnvVarName;
147
148 /// Initial value kind.
149 ICVInitValue InitKind;
150
151 /// Initial value.
152 ConstantInt *InitValue;
153
154 /// Setter RTL function associated with this ICV.
155 RuntimeFunction Setter;
156
157 /// Getter RTL function associated with this ICV.
158 RuntimeFunction Getter;
159
160 /// RTL Function corresponding to the override clause of this ICV
161 RuntimeFunction Clause;
162 };
163
164 /// Generic information that describes a runtime function
165 struct RuntimeFunctionInfo {
166
167 /// The kind, as described by the RuntimeFunction enum.
168 RuntimeFunction Kind;
169
170 /// The name of the function.
171 StringRef Name;
172
173 /// Flag to indicate a variadic function.
174 bool IsVarArg;
175
176 /// The return type of the function.
177 Type *ReturnType;
178
179 /// The argument types of the function.
180 SmallVector<Type *, 8> ArgumentTypes;
181
182 /// The declaration if available.
183 Function *Declaration = nullptr;
184
185 /// Uses of this runtime function per function containing the use.
186 using UseVector = SmallVector<Use *, 16>;
187
188 /// Clear UsesMap for runtime function.
189 void clearUsesMap() { UsesMap.clear(); }
190
191 /// Boolean conversion that is true if the runtime function was found.
192 operator bool() const { return Declaration; }
193
194 /// Return the vector of uses in function \p F.
195 UseVector &getOrCreateUseVector(Function *F) {
196 std::shared_ptr<UseVector> &UV = UsesMap[F];
197 if (!UV)
198 UV = std::make_shared<UseVector>();
199 return *UV;
200 }
201
202 /// Return the vector of uses in function \p F or `nullptr` if there are
203 /// none.
204 const UseVector *getUseVector(Function &F) const {
205 auto I = UsesMap.find(&F);
206 if (I != UsesMap.end())
207 return I->second.get();
208 return nullptr;
209 }
210
211 /// Return how many functions contain uses of this runtime function.
212 size_t getNumFunctionsWithUses() const { return UsesMap.size(); }
213
214 /// Return the number of arguments (or the minimal number for variadic
215 /// functions).
216 size_t getNumArgs() const { return ArgumentTypes.size(); }
217
218 /// Run the callback \p CB on each use and forget the use if the result is
219 /// true. The callback will be fed the function in which the use was
220 /// encountered as second argument.
221 void foreachUse(SmallVectorImpl<Function *> &SCC,
222 function_ref<bool(Use &, Function &)> CB) {
223 for (Function *F : SCC)
224 foreachUse(CB, F);
225 }
226
227 /// Run the callback \p CB on each use within the function \p F and forget
228 /// the use if the result is true.
229 void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
230 SmallVector<unsigned, 8> ToBeDeleted;
231 ToBeDeleted.clear();
232
233 unsigned Idx = 0;
234 UseVector &UV = getOrCreateUseVector(F);
235
236 for (Use *U : UV) {
237 if (CB(*U, *F))
238 ToBeDeleted.push_back(Idx);
239 ++Idx;
240 }
241
242 // Remove the to-be-deleted indices in reverse order as prior
243 // modifications will not modify the smaller indices.
244 while (!ToBeDeleted.empty()) {
245 unsigned Idx = ToBeDeleted.pop_back_val();
246 UV[Idx] = UV.back();
247 UV.pop_back();
248 }
249 }
250
251 private:
252 /// Map from functions to all uses of this runtime function contained in
253 /// them.
254 DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
255
256 public:
257 /// Iterators for the uses of this runtime function.
258 decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
259 decltype(UsesMap)::iterator end() { return UsesMap.end(); }
260 };
261
262 /// An OpenMP-IR-Builder instance
263 OpenMPIRBuilder OMPBuilder;
264
265 /// Map from runtime function kind to the runtime function description.
266 EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
267 RuntimeFunction::OMPRTL___last>
268 RFIs;
269
270 /// Map from function declarations/definitions to their runtime enum type.
271 DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;
272
273 /// Map from ICV kind to the ICV description.
274 EnumeratedArray<InternalControlVarInfo, InternalControlVar,
275 InternalControlVar::ICV___last>
276 ICVs;
277
278 /// Helper to initialize all internal control variable information for those
279 /// defined in OMPKinds.def.
280 void initializeInternalControlVars() {
281#define ICV_RT_SET(_Name, RTL) \
282 { \
283 auto &ICV = ICVs[_Name]; \
284 ICV.Setter = RTL; \
285 }
286#define ICV_RT_GET(Name, RTL) \
287 { \
288 auto &ICV = ICVs[Name]; \
289 ICV.Getter = RTL; \
290 }
291#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \
292 { \
293 auto &ICV = ICVs[Enum]; \
294 ICV.Name = _Name; \
295 ICV.Kind = Enum; \
296 ICV.InitKind = Init; \
297 ICV.EnvVarName = _EnvVarName; \
298 switch (ICV.InitKind) { \
299 case ICV_IMPLEMENTATION_DEFINED: \
300 ICV.InitValue = nullptr; \
301 break; \
302 case ICV_ZERO: \
303 ICV.InitValue = ConstantInt::get( \
304 Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \
305 break; \
306 case ICV_FALSE: \
307 ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \
308 break; \
309 case ICV_LAST: \
310 break; \
311 } \
312 }
313#include "llvm/Frontend/OpenMP/OMPKinds.def"
314 }
315
316 /// Returns true if the function declaration \p F matches the runtime
317 /// function types, that is, return type \p RTFRetType, and argument types
318 /// \p RTFArgTypes.
319 static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
320 SmallVector<Type *, 8> &RTFArgTypes) {
321 // TODO: We should output information to the user (under debug output
322 // and via remarks).
323
324 if (!F)
325 return false;
326 if (F->getReturnType() != RTFRetType)
327 return false;
328 if (F->arg_size() != RTFArgTypes.size())
329 return false;
330
331 auto RTFTyIt = RTFArgTypes.begin();
332 for (Argument &Arg : F->args()) {
333 if (Arg.getType() != *RTFTyIt)
334 return false;
335
336 ++RTFTyIt;
337 }
338
339 return true;
340 }
341
342 // Helper to collect all uses of the declaration in the UsesMap.
343 unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
344 unsigned NumUses = 0;
345 if (!RFI.Declaration)
346 return NumUses;
347 OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);
348
349 if (CollectStats) {
350 NumOpenMPRuntimeFunctionsIdentified += 1;
351 NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
352 }
353
354 // TODO: We directly convert uses into proper calls and unknown uses.
355 for (Use &U : RFI.Declaration->uses()) {
356 if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
357 if (ModuleSlice.count(UserI->getFunction())) {
358 RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
359 ++NumUses;
360 }
361 } else {
362 RFI.getOrCreateUseVector(nullptr).push_back(&U);
363 ++NumUses;
364 }
365 }
366 return NumUses;
367 }
368
369 // Helper function to recollect uses of a runtime function.
370 void recollectUsesForFunction(RuntimeFunction RTF) {
371 auto &RFI = RFIs[RTF];
372 RFI.clearUsesMap();
373 collectUses(RFI, /*CollectStats*/ false);
374 }
375
376 // Helper function to recollect uses of all runtime functions.
377 void recollectUses() {
378 for (int Idx = 0; Idx < RFIs.size(); ++Idx)
379 recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
380 }
381
382 /// Helper to initialize all runtime function information for those defined
383 /// in OpenMPKinds.def.
384 void initializeRuntimeFunctions() {
385 Module &M = *((*ModuleSlice.begin())->getParent());
386
387 // Helper macros for handling __VA_ARGS__ in OMP_RTL
388#define OMP_TYPE(VarName, ...) \
389 Type *VarName = OMPBuilder.VarName; \
390 (void)VarName;
391
392#define OMP_ARRAY_TYPE(VarName, ...) \
393 ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \
394 (void)VarName##Ty; \
395 PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \
396 (void)VarName##PtrTy;
397
398#define OMP_FUNCTION_TYPE(VarName, ...) \
399 FunctionType *VarName = OMPBuilder.VarName; \
400 (void)VarName; \
401 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
402 (void)VarName##Ptr;
403
404#define OMP_STRUCT_TYPE(VarName, ...) \
405 StructType *VarName = OMPBuilder.VarName; \
406 (void)VarName; \
407 PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \
408 (void)VarName##Ptr;
409
410#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
411 { \
412 SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \
413 Function *F = M.getFunction(_Name); \
414 RTLFunctions.insert(F); \
415 if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \
416 RuntimeFunctionIDMap[F] = _Enum; \
417 F->removeFnAttr(Attribute::NoInline); \
418 auto &RFI = RFIs[_Enum]; \
419 RFI.Kind = _Enum; \
420 RFI.Name = _Name; \
421 RFI.IsVarArg = _IsVarArg; \
422 RFI.ReturnType = OMPBuilder._ReturnType; \
423 RFI.ArgumentTypes = std::move(ArgsTypes); \
424 RFI.Declaration = F; \
425 unsigned NumUses = collectUses(RFI); \
426 (void)NumUses; \
427 LLVM_DEBUG({ \do { } while (false)
428 dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \do { } while (false)
429 << " found\n"; \do { } while (false)
430 if (RFI.Declaration) \do { } while (false)
431 dbgs() << TAG << "-> got " << NumUses << " uses in " \do { } while (false)
432 << RFI.getNumFunctionsWithUses() \do { } while (false)
433 << " different functions.\n"; \do { } while (false)
434 })do { } while (false); \
435 } \
436 }
437#include "llvm/Frontend/OpenMP/OMPKinds.def"
438
439 // TODO: We should attach the attributes defined in OMPKinds.def.
440 }
441
442 /// Collection of known kernels (\see Kernel) in the module.
443 SmallPtrSetImpl<Kernel> &Kernels;
444
445 /// Collection of known OpenMP runtime functions..
446 DenseSet<const Function *> RTLFunctions;
447};
448
449template <typename Ty, bool InsertInvalidates = true>
450struct BooleanStateWithSetVector : public BooleanState {
451 bool contains(const Ty &Elem) const { return Set.contains(Elem); }
452 bool insert(const Ty &Elem) {
453 if (InsertInvalidates)
454 BooleanState::indicatePessimisticFixpoint();
455 return Set.insert(Elem);
456 }
457
458 const Ty &operator[](int Idx) const { return Set[Idx]; }
459 bool operator==(const BooleanStateWithSetVector &RHS) const {
460 return BooleanState::operator==(RHS) && Set == RHS.Set;
461 }
462 bool operator!=(const BooleanStateWithSetVector &RHS) const {
463 return !(*this == RHS);
464 }
465
466 bool empty() const { return Set.empty(); }
467 size_t size() const { return Set.size(); }
468
469 /// "Clamp" this state with \p RHS.
470 BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
471 BooleanState::operator^=(RHS);
472 Set.insert(RHS.Set.begin(), RHS.Set.end());
473 return *this;
474 }
475
476private:
477 /// A set to keep track of elements.
478 SetVector<Ty> Set;
479
480public:
481 typename decltype(Set)::iterator begin() { return Set.begin(); }
482 typename decltype(Set)::iterator end() { return Set.end(); }
483 typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
484 typename decltype(Set)::const_iterator end() const { return Set.end(); }
485};
486
487template <typename Ty, bool InsertInvalidates = true>
488using BooleanStateWithPtrSetVector =
489 BooleanStateWithSetVector<Ty *, InsertInvalidates>;
490
491struct KernelInfoState : AbstractState {
492 /// Flag to track if we reached a fixpoint.
493 bool IsAtFixpoint = false;
494
495 /// The parallel regions (identified by the outlined parallel functions) that
496 /// can be reached from the associated function.
497 BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
498 ReachedKnownParallelRegions;
499
500 /// State to track what parallel region we might reach.
501 BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;
502
503 /// State to track if we are in SPMD-mode, assumed or know, and why we decided
504 /// we cannot be. If it is assumed, then RequiresFullRuntime should also be
505 /// false.
506 BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;
507
508 /// The __kmpc_target_init call in this kernel, if any. If we find more than
509 /// one we abort as the kernel is malformed.
510 CallBase *KernelInitCB = nullptr;
511
512 /// The __kmpc_target_deinit call in this kernel, if any. If we find more than
513 /// one we abort as the kernel is malformed.
514 CallBase *KernelDeinitCB = nullptr;
515
516 /// Flag to indicate if the associated function is a kernel entry.
517 bool IsKernelEntry = false;
518
519 /// State to track what kernel entries can reach the associated function.
520 BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;
521
522 /// State to indicate if we can track parallel level of the associated
523 /// function. We will give up tracking if we encounter unknown caller or the
524 /// caller is __kmpc_parallel_51.
525 BooleanStateWithSetVector<uint8_t> ParallelLevels;
526
527 /// Abstract State interface
528 ///{
529
530 KernelInfoState() {}
531 KernelInfoState(bool BestState) {
532 if (!BestState)
533 indicatePessimisticFixpoint();
534 }
535
536 /// See AbstractState::isValidState(...)
537 bool isValidState() const override { return true; }
538
539 /// See AbstractState::isAtFixpoint(...)
540 bool isAtFixpoint() const override { return IsAtFixpoint; }
541
542 /// See AbstractState::indicatePessimisticFixpoint(...)
543 ChangeStatus indicatePessimisticFixpoint() override {
544 IsAtFixpoint = true;
545 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
546 ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
547 return ChangeStatus::CHANGED;
548 }
549
550 /// See AbstractState::indicateOptimisticFixpoint(...)
551 ChangeStatus indicateOptimisticFixpoint() override {
552 IsAtFixpoint = true;
553 return ChangeStatus::UNCHANGED;
554 }
555
556 /// Return the assumed state
557 KernelInfoState &getAssumed() { return *this; }
558 const KernelInfoState &getAssumed() const { return *this; }
559
560 bool operator==(const KernelInfoState &RHS) const {
561 if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
562 return false;
563 if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
564 return false;
565 if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
566 return false;
567 if (ReachingKernelEntries != RHS.ReachingKernelEntries)
568 return false;
569 return true;
570 }
571
572 /// Return empty set as the best state of potential values.
573 static KernelInfoState getBestState() { return KernelInfoState(true); }
574
575 static KernelInfoState getBestState(KernelInfoState &KIS) {
576 return getBestState();
577 }
578
579 /// Return full set as the worst state of potential values.
580 static KernelInfoState getWorstState() { return KernelInfoState(false); }
581
582 /// "Clamp" this state with \p KIS.
583 KernelInfoState operator^=(const KernelInfoState &KIS) {
584 // Do not merge two different _init and _deinit call sites.
585 if (KIS.KernelInitCB) {
586 if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
587 indicatePessimisticFixpoint();
588 KernelInitCB = KIS.KernelInitCB;
589 }
590 if (KIS.KernelDeinitCB) {
591 if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
592 indicatePessimisticFixpoint();
593 KernelDeinitCB = KIS.KernelDeinitCB;
594 }
595 SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
596 ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
597 ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
598 return *this;
599 }
600
601 KernelInfoState operator&=(const KernelInfoState &KIS) {
602 return (*this ^= KIS);
603 }
604
605 ///}
606};
607
608/// Used to map the values physically (in the IR) stored in an offload
609/// array, to a vector in memory.
610struct OffloadArray {
611 /// Physical array (in the IR).
612 AllocaInst *Array = nullptr;
613 /// Mapped values.
614 SmallVector<Value *, 8> StoredValues;
615 /// Last stores made in the offload array.
616 SmallVector<StoreInst *, 8> LastAccesses;
617
618 OffloadArray() = default;
619
620 /// Initializes the OffloadArray with the values stored in \p Array before
621 /// instruction \p Before is reached. Returns false if the initialization
622 /// fails.
623 /// This MUST be used immediately after the construction of the object.
624 bool initialize(AllocaInst &Array, Instruction &Before) {
625 if (!Array.getAllocatedType()->isArrayTy())
626 return false;
627
628 if (!getValues(Array, Before))
629 return false;
630
631 this->Array = &Array;
632 return true;
633 }
634
635 static const unsigned DeviceIDArgNum = 1;
636 static const unsigned BasePtrsArgNum = 3;
637 static const unsigned PtrsArgNum = 4;
638 static const unsigned SizesArgNum = 5;
639
640private:
641 /// Traverses the BasicBlock where \p Array is, collecting the stores made to
642 /// \p Array, leaving StoredValues with the values stored before the
643 /// instruction \p Before is reached.
644 bool getValues(AllocaInst &Array, Instruction &Before) {
645 // Initialize container.
646 const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
647 StoredValues.assign(NumValues, nullptr);
648 LastAccesses.assign(NumValues, nullptr);
649
650 // TODO: This assumes the instruction \p Before is in the same
651 // BasicBlock as Array. Make it general, for any control flow graph.
652 BasicBlock *BB = Array.getParent();
653 if (BB != Before.getParent())
654 return false;
655
656 const DataLayout &DL = Array.getModule()->getDataLayout();
657 const unsigned int PointerSize = DL.getPointerSize();
658
659 for (Instruction &I : *BB) {
660 if (&I == &Before)
661 break;
662
663 if (!isa<StoreInst>(&I))
664 continue;
665
666 auto *S = cast<StoreInst>(&I);
667 int64_t Offset = -1;
668 auto *Dst =
669 GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
670 if (Dst == &Array) {
671 int64_t Idx = Offset / PointerSize;
672 StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
673 LastAccesses[Idx] = S;
674 }
675 }
676
677 return isFilled();
678 }
679
680 /// Returns true if all values in StoredValues and
681 /// LastAccesses are not nullptrs.
682 bool isFilled() {
683 const unsigned NumValues = StoredValues.size();
684 for (unsigned I = 0; I < NumValues; ++I) {
685 if (!StoredValues[I] || !LastAccesses[I])
686 return false;
687 }
688
689 return true;
690 }
691};
692
693struct OpenMPOpt {
694
695 using OptimizationRemarkGetter =
696 function_ref<OptimizationRemarkEmitter &(Function *)>;
697
698 OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
699 OptimizationRemarkGetter OREGetter,
700 OMPInformationCache &OMPInfoCache, Attributor &A)
701 : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
702 OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}
703
704 /// Check if any remarks are enabled for openmp-opt
705 bool remarksEnabled() {
706 auto &Ctx = M.getContext();
707 return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE"openmp-opt");
708 }
709
710 /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
711 bool run(bool IsModulePass) {
712 if (SCC.empty())
713 return false;
714
715 bool Changed = false;
716
717 LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()do { } while (false)
718 << " functions in a slice with "do { } while (false)
719 << OMPInfoCache.ModuleSlice.size() << " functions\n")do { } while (false);
720
721 if (IsModulePass) {
722 Changed |= runAttributor(IsModulePass);
723
724 // Recollect uses, in case Attributor deleted any.
725 OMPInfoCache.recollectUses();
726
727 // TODO: This should be folded into buildCustomStateMachine.
728 Changed |= rewriteDeviceCodeStateMachine();
729
730 if (remarksEnabled())
731 analysisGlobalization();
732 } else {
733 if (PrintICVValues)
734 printICVs();
735 if (PrintOpenMPKernels)
736 printKernels();
737
738 Changed |= runAttributor(IsModulePass);
739
740 // Recollect uses, in case Attributor deleted any.
741 OMPInfoCache.recollectUses();
742
743 Changed |= deleteParallelRegions();
744
745 if (HideMemoryTransferLatency)
746 Changed |= hideMemTransfersLatency();
747 Changed |= deduplicateRuntimeCalls();
748 if (EnableParallelRegionMerging) {
749 if (mergeParallelRegions()) {
750 deduplicateRuntimeCalls();
751 Changed = true;
752 }
753 }
754 }
755
756 return Changed;
757 }
758
759 /// Print initial ICV values for testing.
760 /// FIXME: This should be done from the Attributor once it is added.
761 void printICVs() const {
762 InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
763 ICV_proc_bind};
764
765 for (Function *F : OMPInfoCache.ModuleSlice) {
766 for (auto ICV : ICVs) {
767 auto ICVInfo = OMPInfoCache.ICVs[ICV];
768 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
769 return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
770 << " Value: "
771 << (ICVInfo.InitValue
772 ? toString(ICVInfo.InitValue->getValue(), 10, true)
773 : "IMPLEMENTATION_DEFINED");
774 };
775
776 emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
777 }
778 }
779 }
780
781 /// Print OpenMP GPU kernels for testing.
782 void printKernels() const {
783 for (Function *F : SCC) {
784 if (!OMPInfoCache.Kernels.count(F))
785 continue;
786
787 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
788 return ORA << "OpenMP GPU kernel "
789 << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
790 };
791
792 emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
793 }
794 }
795
796 /// Return the call if \p U is a callee use in a regular call. If \p RFI is
797 /// given it has to be the callee or a nullptr is returned.
798 static CallInst *getCallIfRegularCall(
799 Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
800 CallInst *CI = dyn_cast<CallInst>(U.getUser());
801 if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
802 (!RFI ||
803 (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
804 return CI;
805 return nullptr;
806 }
807
808 /// Return the call if \p V is a regular call. If \p RFI is given it has to be
809 /// the callee or a nullptr is returned.
810 static CallInst *getCallIfRegularCall(
811 Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
812 CallInst *CI = dyn_cast<CallInst>(&V);
813 if (CI && !CI->hasOperandBundles() &&
814 (!RFI ||
815 (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
816 return CI;
817 return nullptr;
818 }
819
820private:
821 /// Merge parallel regions when it is safe.
822 bool mergeParallelRegions() {
823 const unsigned CallbackCalleeOperand = 2;
824 const unsigned CallbackFirstArgOperand = 3;
825 using InsertPointTy = OpenMPIRBuilder::InsertPointTy;
826
827 // Check if there are any __kmpc_fork_call calls to merge.
828 OMPInformationCache::RuntimeFunctionInfo &RFI =
829 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
830
831 if (!RFI.Declaration)
832 return false;
833
834 // Unmergable calls that prevent merging a parallel region.
835 OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
836 OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
837 OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
838 };
839
840 bool Changed = false;
841 LoopInfo *LI = nullptr;
842 DominatorTree *DT = nullptr;
843
844 SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;
845
846 BasicBlock *StartBB = nullptr, *EndBB = nullptr;
847 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
848 BasicBlock &ContinuationIP) {
849 BasicBlock *CGStartBB = CodeGenIP.getBlock();
850 BasicBlock *CGEndBB =
851 SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
852 assert(StartBB != nullptr && "StartBB should not be null")((void)0);
853 CGStartBB->getTerminator()->setSuccessor(0, StartBB);
854 assert(EndBB != nullptr && "EndBB should not be null")((void)0);
855 EndBB->getTerminator()->setSuccessor(0, CGEndBB);
856 };
857
858 auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
859 Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
860 ReplacementValue = &Inner;
861 return CodeGenIP;
862 };
863
864 auto FiniCB = [&](InsertPointTy CodeGenIP) {};
865
866 /// Create a sequential execution region within a merged parallel region,
867 /// encapsulated in a master construct with a barrier for synchronization.
868 auto CreateSequentialRegion = [&](Function *OuterFn,
869 BasicBlock *OuterPredBB,
870 Instruction *SeqStartI,
871 Instruction *SeqEndI) {
872 // Isolate the instructions of the sequential region to a separate
873 // block.
874 BasicBlock *ParentBB = SeqStartI->getParent();
875 BasicBlock *SeqEndBB =
876 SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
877 BasicBlock *SeqAfterBB =
878 SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
879 BasicBlock *SeqStartBB =
880 SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");
881
882 assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&((void)0)
883 "Expected a different CFG")((void)0);
884 const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
885 ParentBB->getTerminator()->eraseFromParent();
886
887 auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
888 BasicBlock &ContinuationIP) {
889 BasicBlock *CGStartBB = CodeGenIP.getBlock();
890 BasicBlock *CGEndBB =
891 SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
892 assert(SeqStartBB != nullptr && "SeqStartBB should not be null")((void)0);
893 CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
894 assert(SeqEndBB != nullptr && "SeqEndBB should not be null")((void)0);
895 SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
896 };
897 auto FiniCB = [&](InsertPointTy CodeGenIP) {};
898
899 // Find outputs from the sequential region to outside users and
900 // broadcast their values to them.
901 for (Instruction &I : *SeqStartBB) {
902 SmallPtrSet<Instruction *, 4> OutsideUsers;
903 for (User *Usr : I.users()) {
904 Instruction &UsrI = *cast<Instruction>(Usr);
905 // Ignore outputs to LT intrinsics, code extraction for the merged
906 // parallel region will fix them.
907 if (UsrI.isLifetimeStartOrEnd())
908 continue;
909
910 if (UsrI.getParent() != SeqStartBB)
911 OutsideUsers.insert(&UsrI);
912 }
913
914 if (OutsideUsers.empty())
915 continue;
916
917 // Emit an alloca in the outer region to store the broadcasted
918 // value.
919 const DataLayout &DL = M.getDataLayout();
920 AllocaInst *AllocaI = new AllocaInst(
921 I.getType(), DL.getAllocaAddrSpace(), nullptr,
922 I.getName() + ".seq.output.alloc", &OuterFn->front().front());
923
924 // Emit a store instruction in the sequential BB to update the
925 // value.
926 new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());
927
928 // Emit a load instruction and replace the use of the output value
929 // with it.
930 for (Instruction *UsrI : OutsideUsers) {
931 LoadInst *LoadI = new LoadInst(
932 I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
933 UsrI->replaceUsesOfWith(&I, LoadI);
934 }
935 }
936
937 OpenMPIRBuilder::LocationDescription Loc(
938 InsertPointTy(ParentBB, ParentBB->end()), DL);
939 InsertPointTy SeqAfterIP =
940 OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);
941
942 OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);
943
944 BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());
945
946 LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFndo { } while (false)
947 << "\n")do { } while (false);
948 };
949
950 // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
951 // contained in BB and only separated by instructions that can be
952 // redundantly executed in parallel. The block BB is split before the first
953 // call (in MergableCIs) and after the last so the entire region we merge
954 // into a single parallel region is contained in a single basic block
955 // without any other instructions. We use the OpenMPIRBuilder to outline
956 // that block and call the resulting function via __kmpc_fork_call.
957 auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
958 // TODO: Change the interface to allow single CIs expanded, e.g, to
959 // include an outer loop.
960 assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs")((void)0);
961
962 auto Remark = [&](OptimizationRemark OR) {
963 OR << "Parallel region merged with parallel region"
964 << (MergableCIs.size() > 2 ? "s" : "") << " at ";
965 for (auto *CI : llvm::drop_begin(MergableCIs)) {
966 OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
967 if (CI != MergableCIs.back())
968 OR << ", ";
969 }
970 return OR << ".";
971 };
972
973 emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);
974
975 Function *OriginalFn = BB->getParent();
976 LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()do { } while (false)
977 << " parallel regions in " << OriginalFn->getName()do { } while (false)
978 << "\n")do { } while (false);
979
980 // Isolate the calls to merge in a separate block.
981 EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
982 BasicBlock *AfterBB =
983 SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
984 StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
985 "omp.par.merged");
986
987 assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG")((void)0);
988 const DebugLoc DL = BB->getTerminator()->getDebugLoc();
989 BB->getTerminator()->eraseFromParent();
990
991 // Create sequential regions for sequential instructions that are
992 // in-between mergable parallel regions.
993 for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
994 It != End; ++It) {
995 Instruction *ForkCI = *It;
996 Instruction *NextForkCI = *(It + 1);
997
998 // Continue if there are not in-between instructions.
999 if (ForkCI->getNextNode() == NextForkCI)
1000 continue;
1001
1002 CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
1003 NextForkCI->getPrevNode());
1004 }
1005
1006 OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
1007 DL);
1008 IRBuilder<>::InsertPoint AllocaIP(
1009 &OriginalFn->getEntryBlock(),
1010 OriginalFn->getEntryBlock().getFirstInsertionPt());
1011 // Create the merged parallel region with default proc binding, to
1012 // avoid overriding binding settings, and without explicit cancellation.
1013 InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
1014 Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
1015 OMP_PROC_BIND_default, /* IsCancellable */ false);
1016 BranchInst::Create(AfterBB, AfterIP.getBlock());
1017
1018 // Perform the actual outlining.
1019 OMPInfoCache.OMPBuilder.finalize(OriginalFn,
1020 /* AllowExtractorSinking */ true);
1021
1022 Function *OutlinedFn = MergableCIs.front()->getCaller();
1023
1024 // Replace the __kmpc_fork_call calls with direct calls to the outlined
1025 // callbacks.
1026 SmallVector<Value *, 8> Args;
1027 for (auto *CI : MergableCIs) {
1028 Value *Callee =
1029 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
1030 FunctionType *FT =
1031 cast<FunctionType>(Callee->getType()->getPointerElementType());
1032 Args.clear();
1033 Args.push_back(OutlinedFn->getArg(0));
1034 Args.push_back(OutlinedFn->getArg(1));
1035 for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
1036 U < E; ++U)
1037 Args.push_back(CI->getArgOperand(U));
1038
1039 CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
1040 if (CI->getDebugLoc())
1041 NewCI->setDebugLoc(CI->getDebugLoc());
1042
1043 // Forward parameter attributes from the callback to the callee.
1044 for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
1045 U < E; ++U)
1046 for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
1047 NewCI->addParamAttr(
1048 U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);
1049
1050 // Emit an explicit barrier to replace the implicit fork-join barrier.
1051 if (CI != MergableCIs.back()) {
1052 // TODO: Remove barrier if the merged parallel region includes the
1053 // 'nowait' clause.
1054 OMPInfoCache.OMPBuilder.createBarrier(
1055 InsertPointTy(NewCI->getParent(),
1056 NewCI->getNextNode()->getIterator()),
1057 OMPD_parallel);
1058 }
1059
1060 CI->eraseFromParent();
1061 }
1062
1063 assert(OutlinedFn != OriginalFn && "Outlining failed")((void)0);
1064 CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
1065 CGUpdater.reanalyzeFunction(*OriginalFn);
1066
1067 NumOpenMPParallelRegionsMerged += MergableCIs.size();
1068
1069 return true;
1070 };
1071
1072 // Helper function that identifes sequences of
1073 // __kmpc_fork_call uses in a basic block.
1074 auto DetectPRsCB = [&](Use &U, Function &F) {
1075 CallInst *CI = getCallIfRegularCall(U, &RFI);
1076 BB2PRMap[CI->getParent()].insert(CI);
1077
1078 return false;
1079 };
1080
1081 BB2PRMap.clear();
1082 RFI.foreachUse(SCC, DetectPRsCB);
1083 SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
1084 // Find mergable parallel regions within a basic block that are
1085 // safe to merge, that is any in-between instructions can safely
1086 // execute in parallel after merging.
1087 // TODO: support merging across basic-blocks.
1088 for (auto &It : BB2PRMap) {
1089 auto &CIs = It.getSecond();
1090 if (CIs.size() < 2)
1091 continue;
1092
1093 BasicBlock *BB = It.getFirst();
1094 SmallVector<CallInst *, 4> MergableCIs;
1095
1096 /// Returns true if the instruction is mergable, false otherwise.
1097 /// A terminator instruction is unmergable by definition since merging
1098 /// works within a BB. Instructions before the mergable region are
1099 /// mergable if they are not calls to OpenMP runtime functions that may
1100 /// set different execution parameters for subsequent parallel regions.
1101 /// Instructions in-between parallel regions are mergable if they are not
1102 /// calls to any non-intrinsic function since that may call a non-mergable
1103 /// OpenMP runtime function.
1104 auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
1105 // We do not merge across BBs, hence return false (unmergable) if the
1106 // instruction is a terminator.
1107 if (I.isTerminator())
1108 return false;
1109
1110 if (!isa<CallInst>(&I))
1111 return true;
1112
1113 CallInst *CI = cast<CallInst>(&I);
1114 if (IsBeforeMergableRegion) {
1115 Function *CalledFunction = CI->getCalledFunction();
1116 if (!CalledFunction)
1117 return false;
1118 // Return false (unmergable) if the call before the parallel
1119 // region calls an explicit affinity (proc_bind) or number of
1120 // threads (num_threads) compiler-generated function. Those settings
1121 // may be incompatible with following parallel regions.
1122 // TODO: ICV tracking to detect compatibility.
1123 for (const auto &RFI : UnmergableCallsInfo) {
1124 if (CalledFunction == RFI.Declaration)
1125 return false;
1126 }
1127 } else {
1128 // Return false (unmergable) if there is a call instruction
1129 // in-between parallel regions when it is not an intrinsic. It
1130 // may call an unmergable OpenMP runtime function in its callpath.
1131 // TODO: Keep track of possible OpenMP calls in the callpath.
1132 if (!isa<IntrinsicInst>(CI))
1133 return false;
1134 }
1135
1136 return true;
1137 };
1138 // Find maximal number of parallel region CIs that are safe to merge.
1139 for (auto It = BB->begin(), End = BB->end(); It != End;) {
1140 Instruction &I = *It;
1141 ++It;
1142
1143 if (CIs.count(&I)) {
1144 MergableCIs.push_back(cast<CallInst>(&I));
1145 continue;
1146 }
1147
1148 // Continue expanding if the instruction is mergable.
1149 if (IsMergable(I, MergableCIs.empty()))
1150 continue;
1151
1152 // Forward the instruction iterator to skip the next parallel region
1153 // since there is an unmergable instruction which can affect it.
1154 for (; It != End; ++It) {
1155 Instruction &SkipI = *It;
1156 if (CIs.count(&SkipI)) {
1157 LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipIdo { } while (false)
1158 << " due to " << I << "\n")do { } while (false);
1159 ++It;
1160 break;
1161 }
1162 }
1163
1164 // Store mergable regions found.
1165 if (MergableCIs.size() > 1) {
1166 MergableCIsVector.push_back(MergableCIs);
1167 LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()do { } while (false)
1168 << " parallel regions in block " << BB->getName()do { } while (false)
1169 << " of function " << BB->getParent()->getName()do { } while (false)
1170 << "\n";)do { } while (false);
1171 }
1172
1173 MergableCIs.clear();
1174 }
1175
1176 if (!MergableCIsVector.empty()) {
1177 Changed = true;
1178
1179 for (auto &MergableCIs : MergableCIsVector)
1180 Merge(MergableCIs, BB);
1181 MergableCIsVector.clear();
1182 }
1183 }
1184
1185 if (Changed) {
1186 /// Re-collect use for fork calls, emitted barrier calls, and
1187 /// any emitted master/end_master calls.
1188 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
1189 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
1190 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
1191 OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
1192 }
1193
1194 return Changed;
1195 }
1196
1197 /// Try to delete parallel regions if possible.
1198 bool deleteParallelRegions() {
1199 const unsigned CallbackCalleeOperand = 2;
1200
1201 OMPInformationCache::RuntimeFunctionInfo &RFI =
1202 OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];
1203
1204 if (!RFI.Declaration)
1205 return false;
1206
1207 bool Changed = false;
1208 auto DeleteCallCB = [&](Use &U, Function &) {
1209 CallInst *CI = getCallIfRegularCall(U);
1210 if (!CI)
1211 return false;
1212 auto *Fn = dyn_cast<Function>(
1213 CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
1214 if (!Fn)
1215 return false;
1216 if (!Fn->onlyReadsMemory())
1217 return false;
1218 if (!Fn->hasFnAttribute(Attribute::WillReturn))
1219 return false;
1220
1221 LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "do { } while (false)
1222 << CI->getCaller()->getName() << "\n")do { } while (false);
1223
1224 auto Remark = [&](OptimizationRemark OR) {
1225 return OR << "Removing parallel region with no side-effects.";
1226 };
1227 emitRemark<OptimizationRemark>(CI, "OMP160", Remark);
1228
1229 CGUpdater.removeCallSite(*CI);
1230 CI->eraseFromParent();
1231 Changed = true;
1232 ++NumOpenMPParallelRegionsDeleted;
1233 return true;
1234 };
1235
1236 RFI.foreachUse(SCC, DeleteCallCB);
1237
1238 return Changed;
1239 }
1240
1241 /// Try to eliminate runtime calls by reusing existing ones.
1242 bool deduplicateRuntimeCalls() {
1243 bool Changed = false;
1244
1245 RuntimeFunction DeduplicableRuntimeCallIDs[] = {
1246 OMPRTL_omp_get_num_threads,
1247 OMPRTL_omp_in_parallel,
1248 OMPRTL_omp_get_cancellation,
1249 OMPRTL_omp_get_thread_limit,
1250 OMPRTL_omp_get_supported_active_levels,
1251 OMPRTL_omp_get_level,
1252 OMPRTL_omp_get_ancestor_thread_num,
1253 OMPRTL_omp_get_team_size,
1254 OMPRTL_omp_get_active_level,
1255 OMPRTL_omp_in_final,
1256 OMPRTL_omp_get_proc_bind,
1257 OMPRTL_omp_get_num_places,
1258 OMPRTL_omp_get_num_procs,
1259 OMPRTL_omp_get_place_num,
1260 OMPRTL_omp_get_partition_num_places,
1261 OMPRTL_omp_get_partition_place_nums};
1262
1263 // Global-tid is handled separately.
1264 SmallSetVector<Value *, 16> GTIdArgs;
1265 collectGlobalThreadIdArguments(GTIdArgs);
1266 LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()do { } while (false)
1267 << " global thread ID arguments\n")do { } while (false);
1268
1269 for (Function *F : SCC) {
1270 for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
1271 Changed |= deduplicateRuntimeCalls(
1272 *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);
1273
1274 // __kmpc_global_thread_num is special as we can replace it with an
1275 // argument in enough cases to make it worth trying.
1276 Value *GTIdArg = nullptr;
1277 for (Argument &Arg : F->args())
1278 if (GTIdArgs.count(&Arg)) {
1279 GTIdArg = &Arg;
1280 break;
1281 }
1282 Changed |= deduplicateRuntimeCalls(
1283 *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
1284 }
1285
1286 return Changed;
1287 }
1288
1289 /// Tries to hide the latency of runtime calls that involve host to
1290 /// device memory transfers by splitting them into their "issue" and "wait"
1291 /// versions. The "issue" is moved upwards as much as possible. The "wait" is
1292 /// moved downards as much as possible. The "issue" issues the memory transfer
1293 /// asynchronously, returning a handle. The "wait" waits in the returned
1294 /// handle for the memory transfer to finish.
1295 bool hideMemTransfersLatency() {
1296 auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
1297 bool Changed = false;
1298 auto SplitMemTransfers = [&](Use &U, Function &Decl) {
1299 auto *RTCall = getCallIfRegularCall(U, &RFI);
1300 if (!RTCall)
1301 return false;
1302
1303 OffloadArray OffloadArrays[3];
1304 if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
1305 return false;
1306
1307 LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays))do { } while (false);
1308
1309 // TODO: Check if can be moved upwards.
1310 bool WasSplit = false;
1311 Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
1312 if (WaitMovementPoint)
1313 WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);
1314
1315 Changed |= WasSplit;
1316 return WasSplit;
1317 };
1318 RFI.foreachUse(SCC, SplitMemTransfers);
1319
1320 return Changed;
1321 }
1322
1323 void analysisGlobalization() {
1324 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
1325
1326 auto CheckGlobalization = [&](Use &U, Function &Decl) {
1327 if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
1328 auto Remark = [&](OptimizationRemarkMissed ORM) {
1329 return ORM
1330 << "Found thread data sharing on the GPU. "
1331 << "Expect degraded performance due to data globalization.";
1332 };
1333 emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
1334 }
1335
1336 return false;
1337 };
1338
1339 RFI.foreachUse(SCC, CheckGlobalization);
1340 }
1341
1342 /// Maps the values stored in the offload arrays passed as arguments to
1343 /// \p RuntimeCall into the offload arrays in \p OAs.
1344 bool getValuesInOffloadArrays(CallInst &RuntimeCall,
1345 MutableArrayRef<OffloadArray> OAs) {
1346 assert(OAs.size() == 3 && "Need space for three offload arrays!")((void)0);
1347
1348 // A runtime call that involves memory offloading looks something like:
1349 // call void @__tgt_target_data_begin_mapper(arg0, arg1,
1350 // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
1351 // ...)
1352 // So, the idea is to access the allocas that allocate space for these
1353 // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
1354 // Therefore:
1355 // i8** %offload_baseptrs.
1356 Value *BasePtrsArg =
1357 RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
1358 // i8** %offload_ptrs.
1359 Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
1360 // i8** %offload_sizes.
1361 Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);
1362
1363 // Get values stored in **offload_baseptrs.
1364 auto *V = getUnderlyingObject(BasePtrsArg);
1365 if (!isa<AllocaInst>(V))
1366 return false;
1367 auto *BasePtrsArray = cast<AllocaInst>(V);
1368 if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
1369 return false;
1370
1371 // Get values stored in **offload_baseptrs.
1372 V = getUnderlyingObject(PtrsArg);
1373 if (!isa<AllocaInst>(V))
1374 return false;
1375 auto *PtrsArray = cast<AllocaInst>(V);
1376 if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
1377 return false;
1378
1379 // Get values stored in **offload_sizes.
1380 V = getUnderlyingObject(SizesArg);
1381 // If it's a [constant] global array don't analyze it.
1382 if (isa<GlobalValue>(V))
1383 return isa<Constant>(V);
1384 if (!isa<AllocaInst>(V))
1385 return false;
1386
1387 auto *SizesArray = cast<AllocaInst>(V);
1388 if (!OAs[2].initialize(*SizesArray, RuntimeCall))
1389 return false;
1390
1391 return true;
1392 }
1393
1394 /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
1395 /// For now this is a way to test that the function getValuesInOffloadArrays
1396 /// is working properly.
1397 /// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
1398 void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
1399 assert(OAs.size() == 3 && "There are three offload arrays to debug!")((void)0);
1400
1401 LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n")do { } while (false);
1402 std::string ValuesStr;
1403 raw_string_ostream Printer(ValuesStr);
1404 std::string Separator = " --- ";
1405
1406 for (auto *BP : OAs[0].StoredValues) {
1407 BP->print(Printer);
1408 Printer << Separator;
1409 }
1410 LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n")do { } while (false);
1411 ValuesStr.clear();
1412
1413 for (auto *P : OAs[1].StoredValues) {
1414 P->print(Printer);
1415 Printer << Separator;
1416 }
1417 LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n")do { } while (false);
1418 ValuesStr.clear();
1419
1420 for (auto *S : OAs[2].StoredValues) {
1421 S->print(Printer);
1422 Printer << Separator;
1423 }
1424 LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n")do { } while (false);
1425 }
1426
1427 /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
1428 /// moved. Returns nullptr if the movement is not possible, or not worth it.
1429 Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
1430 // FIXME: This traverses only the BasicBlock where RuntimeCall is.
1431 // Make it traverse the CFG.
1432
1433 Instruction *CurrentI = &RuntimeCall;
1434 bool IsWorthIt = false;
1435 while ((CurrentI = CurrentI->getNextNode())) {
1436
1437 // TODO: Once we detect the regions to be offloaded we should use the
1438 // alias analysis manager to check if CurrentI may modify one of
1439 // the offloaded regions.
1440 if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
1441 if (IsWorthIt)
1442 return CurrentI;
1443
1444 return nullptr;
1445 }
1446
1447 // FIXME: For now if we move it over anything without side effect
1448 // is worth it.
1449 IsWorthIt = true;
1450 }
1451
1452 // Return end of BasicBlock.
1453 return RuntimeCall.getParent()->getTerminator();
1454 }
1455
1456 /// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
1457 bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
1458 Instruction &WaitMovementPoint) {
1459 // Create stack allocated handle (__tgt_async_info) at the beginning of the
1460 // function. Used for storing information of the async transfer, allowing to
1461 // wait on it later.
1462 auto &IRBuilder = OMPInfoCache.OMPBuilder;
1463 auto *F = RuntimeCall.getCaller();
1464 Instruction *FirstInst = &(F->getEntryBlock().front());
1465 AllocaInst *Handle = new AllocaInst(
1466 IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);
1467
1468 // Add "issue" runtime call declaration:
1469 // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
1470 // i8**, i8**, i64*, i64*)
1471 FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
1472 M, OMPRTL___tgt_target_data_begin_mapper_issue);
1473
1474 // Change RuntimeCall call site for its asynchronous version.
1475 SmallVector<Value *, 16> Args;
1476 for (auto &Arg : RuntimeCall.args())
1477 Args.push_back(Arg.get());
1478 Args.push_back(Handle);
1479
1480 CallInst *IssueCallsite =
1481 CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
1482 RuntimeCall.eraseFromParent();
1483
1484 // Add "wait" runtime call declaration:
1485 // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
1486 FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
1487 M, OMPRTL___tgt_target_data_begin_mapper_wait);
1488
1489 Value *WaitParams[2] = {
1490 IssueCallsite->getArgOperand(
1491 OffloadArray::DeviceIDArgNum), // device_id.
1492 Handle // handle to wait on.
1493 };
1494 CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);
1495
1496 return true;
1497 }
1498
1499 static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
1500 bool GlobalOnly, bool &SingleChoice) {
1501 if (CurrentIdent == NextIdent)
1502 return CurrentIdent;
1503
1504 // TODO: Figure out how to actually combine multiple debug locations. For
1505 // now we just keep an existing one if there is a single choice.
1506 if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
1507 SingleChoice = !CurrentIdent;
1508 return NextIdent;
1509 }
1510 return nullptr;
1511 }
1512
1513 /// Return an `struct ident_t*` value that represents the ones used in the
1514 /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
1515 /// return a local `struct ident_t*`. For now, if we cannot find a suitable
1516 /// return value we create one from scratch. We also do not yet combine
1517 /// information, e.g., the source locations, see combinedIdentStruct.
1518 Value *
1519 getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
1520 Function &F, bool GlobalOnly) {
1521 bool SingleChoice = true;
1522 Value *Ident = nullptr;
1523 auto CombineIdentStruct = [&](Use &U, Function &Caller) {
1524 CallInst *CI = getCallIfRegularCall(U, &RFI);
1525 if (!CI || &F != &Caller)
1526 return false;
1527 Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
1528 /* GlobalOnly */ true, SingleChoice);
1529 return false;
1530 };
1531 RFI.foreachUse(SCC, CombineIdentStruct);
1532
1533 if (!Ident || !SingleChoice) {
1534 // The IRBuilder uses the insertion block to get to the module, this is
1535 // unfortunate but we work around it for now.
1536 if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
1537 OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
1538 &F.getEntryBlock(), F.getEntryBlock().begin()));
1539 // Create a fallback location if non was found.
1540 // TODO: Use the debug locations of the calls instead.
1541 Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
1542 Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
1543 }
1544 return Ident;
1545 }
1546
1547 /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
1548 /// \p ReplVal if given.
1549 bool deduplicateRuntimeCalls(Function &F,
1550 OMPInformationCache::RuntimeFunctionInfo &RFI,
1551 Value *ReplVal = nullptr) {
1552 auto *UV = RFI.getUseVector(F);
1553 if (!UV || UV->size() + (ReplVal != nullptr) < 2)
1554 return false;
1555
1556 LLVM_DEBUG(do { } while (false)
1557 dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Namedo { } while (false)
1558 << (ReplVal ? " with an existing value\n" : "\n") << "\n")do { } while (false);
1559
1560 assert((!ReplVal || (isa<Argument>(ReplVal) &&((void)0)
1561 cast<Argument>(ReplVal)->getParent() == &F)) &&((void)0)
1562 "Unexpected replacement value!")((void)0);
1563
1564 // TODO: Use dominance to find a good position instead.
1565 auto CanBeMoved = [this](CallBase &CB) {
1566 unsigned NumArgs = CB.getNumArgOperands();
1567 if (NumArgs == 0)
1568 return true;
1569 if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
1570 return false;
1571 for (unsigned u = 1; u < NumArgs; ++u)
1572 if (isa<Instruction>(CB.getArgOperand(u)))
1573 return false;
1574 return true;
1575 };
1576
1577 if (!ReplVal) {
1578 for (Use *U : *UV)
1579 if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
1580 if (!CanBeMoved(*CI))
1581 continue;
1582
1583 // If the function is a kernel, dedup will move
1584 // the runtime call right after the kernel init callsite. Otherwise,
1585 // it will move it to the beginning of the caller function.
1586 if (isKernel(F)) {
1587 auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
1588 auto *KernelInitUV = KernelInitRFI.getUseVector(F);
1589
1590 if (KernelInitUV->empty())
1591 continue;
1592
1593 assert(KernelInitUV->size() == 1 &&((void)0)
1594 "Expected a single __kmpc_target_init in kernel\n")((void)0);
1595
1596 CallInst *KernelInitCI =
1597 getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
1598 assert(KernelInitCI &&((void)0)
1599 "Expected a call to __kmpc_target_init in kernel\n")((void)0);
1600
1601 CI->moveAfter(KernelInitCI);
1602 } else
1603 CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
1604 ReplVal = CI;
1605 break;
1606 }
1607 if (!ReplVal)
1608 return false;
1609 }
1610
1611 // If we use a call as a replacement value we need to make sure the ident is
1612 // valid at the new location. For now we just pick a global one, either
1613 // existing and used by one of the calls, or created from scratch.
1614 if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
1615 if (CI->getNumArgOperands() > 0 &&
1616 CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
1617 Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
1618 /* GlobalOnly */ true);
1619 CI->setArgOperand(0, Ident);
1620 }
1621 }
1622
1623 bool Changed = false;
1624 auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
1625 CallInst *CI = getCallIfRegularCall(U, &RFI);
1626 if (!CI || CI == ReplVal || &F != &Caller)
1627 return false;
1628 assert(CI->getCaller() == &F && "Unexpected call!")((void)0);
1629
1630 auto Remark = [&](OptimizationRemark OR) {
1631 return OR << "OpenMP runtime call "
1632 << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
1633 };
1634 if (CI->getDebugLoc())
1635 emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
1636 else
1637 emitRemark<OptimizationRemark>(&F, "OMP170", Remark);
1638
1639 CGUpdater.removeCallSite(*CI);
1640 CI->replaceAllUsesWith(ReplVal);
1641 CI->eraseFromParent();
1642 ++NumOpenMPRuntimeCallsDeduplicated;
1643 Changed = true;
1644 return true;
1645 };
1646 RFI.foreachUse(SCC, ReplaceAndDeleteCB);
1647
1648 return Changed;
1649 }
1650
1651 /// Collect arguments that represent the global thread id in \p GTIdArgs.
1652 void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
1653 // TODO: Below we basically perform a fixpoint iteration with a pessimistic
1654 // initialization. We could define an AbstractAttribute instead and
1655 // run the Attributor here once it can be run as an SCC pass.
1656
1657 // Helper to check the argument \p ArgNo at all call sites of \p F for
1658 // a GTId.
1659 auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
1660 if (!F.hasLocalLinkage())
1661 return false;
1662 for (Use &U : F.uses()) {
1663 if (CallInst *CI = getCallIfRegularCall(U)) {
1664 Value *ArgOp = CI->getArgOperand(ArgNo);
1665 if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
1666 getCallIfRegularCall(
1667 *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
1668 continue;
1669 }
1670 return false;
1671 }
1672 return true;
1673 };
1674
1675 // Helper to identify uses of a GTId as GTId arguments.
1676 auto AddUserArgs = [&](Value &GTId) {
1677 for (Use &U : GTId.uses())
1678 if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
1679 if (CI->isArgOperand(&U))
1680 if (Function *Callee = CI->getCalledFunction())
1681 if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
1682 GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
1683 };
1684
1685 // The argument users of __kmpc_global_thread_num calls are GTIds.
1686 OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
1687 OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
1688
1689 GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
1690 if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
1691 AddUserArgs(*CI);
1692 return false;
1693 });
1694
1695 // Transitively search for more arguments by looking at the users of the
1696 // ones we know already. During the search the GTIdArgs vector is extended
1697 // so we cannot cache the size nor can we use a range based for.
1698 for (unsigned u = 0; u < GTIdArgs.size(); ++u)
1699 AddUserArgs(*GTIdArgs[u]);
1700 }
1701
1702 /// Kernel (=GPU) optimizations and utility functions
1703 ///
1704 ///{{
1705
1706 /// Check if \p F is a kernel, hence entry point for target offloading.
1707 bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
1708
1709 /// Cache to remember the unique kernel for a function.
1710 DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
1711
1712 /// Find the unique kernel that will execute \p F, if any.
1713 Kernel getUniqueKernelFor(Function &F);
1714
1715 /// Find the unique kernel that will execute \p I, if any.
1716 Kernel getUniqueKernelFor(Instruction &I) {
1717 return getUniqueKernelFor(*I.getFunction());
1718 }
1719
1720 /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
1721 /// the cases we can avoid taking the address of a function.
1722 bool rewriteDeviceCodeStateMachine();
1723
1724 ///
1725 ///}}
1726
1727 /// Emit a remark generically
1728 ///
1729 /// This template function can be used to generically emit a remark. The
1730 /// RemarkKind should be one of the following:
1731 /// - OptimizationRemark to indicate a successful optimization attempt
1732 /// - OptimizationRemarkMissed to report a failed optimization attempt
1733 /// - OptimizationRemarkAnalysis to provide additional information about an
1734 /// optimization attempt
1735 ///
1736 /// The remark is built using a callback function provided by the caller that
1737 /// takes a RemarkKind as input and returns a RemarkKind.
1738 template <typename RemarkKind, typename RemarkCallBack>
1739 void emitRemark(Instruction *I, StringRef RemarkName,
1740 RemarkCallBack &&RemarkCB) const {
1741 Function *F = I->getParent()->getParent();
1742 auto &ORE = OREGetter(F);
1743
1744 if (RemarkName.startswith("OMP"))
1745 ORE.emit([&]() {
1746 return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I))
1747 << " [" << RemarkName << "]";
1748 });
1749 else
1750 ORE.emit(
1751 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)); });
1752 }
1753
1754 /// Emit a remark on a function.
1755 template <typename RemarkKind, typename RemarkCallBack>
1756 void emitRemark(Function *F, StringRef RemarkName,
1757 RemarkCallBack &&RemarkCB) const {
1758 auto &ORE = OREGetter(F);
1759
1760 if (RemarkName.startswith("OMP"))
1761 ORE.emit([&]() {
1762 return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F))
1763 << " [" << RemarkName << "]";
1764 });
1765 else
1766 ORE.emit(
1767 [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)); });
1768 }
1769
1770 /// RAII struct to temporarily change an RTL function's linkage to external.
1771 /// This prevents it from being mistakenly removed by other optimizations.
1772 struct ExternalizationRAII {
1773 ExternalizationRAII(OMPInformationCache &OMPInfoCache,
1774 RuntimeFunction RFKind)
1775 : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) {
1776 if (!Declaration)
1777 return;
1778
1779 LinkageType = Declaration->getLinkage();
1780 Declaration->setLinkage(GlobalValue::ExternalLinkage);
1781 }
1782
1783 ~ExternalizationRAII() {
1784 if (!Declaration)
1785 return;
1786
1787 Declaration->setLinkage(LinkageType);
1788 }
1789
1790 Function *Declaration;
1791 GlobalValue::LinkageTypes LinkageType;
1792 };
1793
1794 /// The underlying module.
1795 Module &M;
1796
1797 /// The SCC we are operating on.
1798 SmallVectorImpl<Function *> &SCC;
1799
1800 /// Callback to update the call graph, the first argument is a removed call,
1801 /// the second an optional replacement call.
1802 CallGraphUpdater &CGUpdater;
1803
1804 /// Callback to get an OptimizationRemarkEmitter from a Function *
1805 OptimizationRemarkGetter OREGetter;
1806
1807 /// OpenMP-specific information cache. Also Used for Attributor runs.
1808 OMPInformationCache &OMPInfoCache;
1809
1810 /// Attributor instance.
1811 Attributor &A;
1812
1813 /// Helper function to run Attributor on SCC.
1814 bool runAttributor(bool IsModulePass) {
1815 if (SCC.empty())
1816 return false;
1817
1818 // Temporarily make these function have external linkage so the Attributor
1819 // doesn't remove them when we try to look them up later.
1820 ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel);
1821 ExternalizationRAII EndParallel(OMPInfoCache,
1822 OMPRTL___kmpc_kernel_end_parallel);
1823 ExternalizationRAII BarrierSPMD(OMPInfoCache,
1824 OMPRTL___kmpc_barrier_simple_spmd);
1825
1826 registerAAs(IsModulePass);
1827
1828 ChangeStatus Changed = A.run();
1829
1830 LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()do { } while (false)
1831 << " functions, result: " << Changed << ".\n")do { } while (false);
1832
1833 return Changed == ChangeStatus::CHANGED;
1834 }
1835
1836 void registerFoldRuntimeCall(RuntimeFunction RF);
1837
1838 /// Populate the Attributor with abstract attribute opportunities in the
1839 /// function.
1840 void registerAAs(bool IsModulePass);
1841};
1842
1843Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
1844 if (!OMPInfoCache.ModuleSlice.count(&F))
1845 return nullptr;
1846
1847 // Use a scope to keep the lifetime of the CachedKernel short.
1848 {
1849 Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
1850 if (CachedKernel)
1851 return *CachedKernel;
1852
1853 // TODO: We should use an AA to create an (optimistic and callback
1854 // call-aware) call graph. For now we stick to simple patterns that
1855 // are less powerful, basically the worst fixpoint.
1856 if (isKernel(F)) {
1857 CachedKernel = Kernel(&F);
1858 return *CachedKernel;
1859 }
1860
1861 CachedKernel = nullptr;
1862 if (!F.hasLocalLinkage()) {
1863
1864 // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
1865 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1866 return ORA << "Potentially unknown OpenMP target region caller.";
1867 };
1868 emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);
1869
1870 return nullptr;
1871 }
1872 }
1873
1874 auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
1875 if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
1876 // Allow use in equality comparisons.
1877 if (Cmp->isEquality())
1878 return getUniqueKernelFor(*Cmp);
1879 return nullptr;
1880 }
1881 if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
1882 // Allow direct calls.
1883 if (CB->isCallee(&U))
1884 return getUniqueKernelFor(*CB);
1885
1886 OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1887 OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1888 // Allow the use in __kmpc_parallel_51 calls.
1889 if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
1890 return getUniqueKernelFor(*CB);
1891 return nullptr;
1892 }
1893 // Disallow every other use.
1894 return nullptr;
1895 };
1896
1897 // TODO: In the future we want to track more than just a unique kernel.
1898 SmallPtrSet<Kernel, 2> PotentialKernels;
1899 OMPInformationCache::foreachUse(F, [&](const Use &U) {
1900 PotentialKernels.insert(GetUniqueKernelForUse(U));
1901 });
1902
1903 Kernel K = nullptr;
1904 if (PotentialKernels.size() == 1)
1905 K = *PotentialKernels.begin();
1906
1907 // Cache the result.
1908 UniqueKernelMap[&F] = K;
1909
1910 return K;
1911}
1912
1913bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
1914 OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
1915 OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
1916
1917 bool Changed = false;
1918 if (!KernelParallelRFI)
1919 return Changed;
1920
1921 for (Function *F : SCC) {
1922
1923 // Check if the function is a use in a __kmpc_parallel_51 call at
1924 // all.
1925 bool UnknownUse = false;
1926 bool KernelParallelUse = false;
1927 unsigned NumDirectCalls = 0;
1928
1929 SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
1930 OMPInformationCache::foreachUse(*F, [&](Use &U) {
1931 if (auto *CB = dyn_cast<CallBase>(U.getUser()))
1932 if (CB->isCallee(&U)) {
1933 ++NumDirectCalls;
1934 return;
1935 }
1936
1937 if (isa<ICmpInst>(U.getUser())) {
1938 ToBeReplacedStateMachineUses.push_back(&U);
1939 return;
1940 }
1941
1942 // Find wrapper functions that represent parallel kernels.
1943 CallInst *CI =
1944 OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
1945 const unsigned int WrapperFunctionArgNo = 6;
1946 if (!KernelParallelUse && CI &&
1947 CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
1948 KernelParallelUse = true;
1949 ToBeReplacedStateMachineUses.push_back(&U);
1950 return;
1951 }
1952 UnknownUse = true;
1953 });
1954
1955 // Do not emit a remark if we haven't seen a __kmpc_parallel_51
1956 // use.
1957 if (!KernelParallelUse)
1958 continue;
1959
1960 // If this ever hits, we should investigate.
1961 // TODO: Checking the number of uses is not a necessary restriction and
1962 // should be lifted.
1963 if (UnknownUse || NumDirectCalls != 1 ||
1964 ToBeReplacedStateMachineUses.size() > 2) {
1965 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1966 return ORA << "Parallel region is used in "
1967 << (UnknownUse ? "unknown" : "unexpected")
1968 << " ways. Will not attempt to rewrite the state machine.";
1969 };
1970 emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
1971 continue;
1972 }
1973
1974 // Even if we have __kmpc_parallel_51 calls, we (for now) give
1975 // up if the function is not called from a unique kernel.
1976 Kernel K = getUniqueKernelFor(*F);
1977 if (!K) {
1978 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
1979 return ORA << "Parallel region is not called from a unique kernel. "
1980 "Will not attempt to rewrite the state machine.";
1981 };
1982 emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
1983 continue;
1984 }
1985
1986 // We now know F is a parallel body function called only from the kernel K.
1987 // We also identified the state machine uses in which we replace the
1988 // function pointer by a new global symbol for identification purposes. This
1989 // ensures only direct calls to the function are left.
1990
1991 Module &M = *F->getParent();
1992 Type *Int8Ty = Type::getInt8Ty(M.getContext());
1993
1994 auto *ID = new GlobalVariable(
1995 M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
1996 UndefValue::get(Int8Ty), F->getName() + ".ID");
1997
1998 for (Use *U : ToBeReplacedStateMachineUses)
1999 U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
2000 ID, U->get()->getType()));
2001
2002 ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
2003
2004 Changed = true;
2005 }
2006
2007 return Changed;
2008}
2009
2010/// Abstract Attribute for tracking ICV values.
2011struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
2012 using Base = StateWrapper<BooleanState, AbstractAttribute>;
2013 AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2014
2015 void initialize(Attributor &A) override {
2016 Function *F = getAnchorScope();
2017 if (!F || !A.isFunctionIPOAmendable(*F))
2018 indicatePessimisticFixpoint();
2019 }
2020
2021 /// Returns true if value is assumed to be tracked.
2022 bool isAssumedTracked() const { return getAssumed(); }
2023
2024 /// Returns true if value is known to be tracked.
2025 bool isKnownTracked() const { return getAssumed(); }
2026
2027 /// Create an abstract attribute biew for the position \p IRP.
2028 static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);
2029
2030 /// Return the value with which \p I can be replaced for specific \p ICV.
2031 virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
2032 const Instruction *I,
2033 Attributor &A) const {
2034 return None;
2035 }
2036
2037 /// Return an assumed unique ICV value if a single candidate is found. If
2038 /// there cannot be one, return a nullptr. If it is not clear yet, return the
2039 /// Optional::NoneType.
2040 virtual Optional<Value *>
2041 getUniqueReplacementValue(InternalControlVar ICV) const = 0;
2042
2043 // Currently only nthreads is being tracked.
2044 // this array will only grow with time.
2045 InternalControlVar TrackableICVs[1] = {ICV_nthreads};
2046
2047 /// See AbstractAttribute::getName()
2048 const std::string getName() const override { return "AAICVTracker"; }
2049
2050 /// See AbstractAttribute::getIdAddr()
2051 const char *getIdAddr() const override { return &ID; }
2052
2053 /// This function should return true if the type of the \p AA is AAICVTracker
2054 static bool classof(const AbstractAttribute *AA) {
2055 return (AA->getIdAddr() == &ID);
2056 }
2057
2058 static const char ID;
2059};
2060
2061struct AAICVTrackerFunction : public AAICVTracker {
2062 AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
2063 : AAICVTracker(IRP, A) {}
2064
2065 // FIXME: come up with better string.
2066 const std::string getAsStr() const override { return "ICVTrackerFunction"; }
2067
2068 // FIXME: come up with some stats.
2069 void trackStatistics() const override {}
2070
2071 /// We don't manifest anything for this AA.
2072 ChangeStatus manifest(Attributor &A) override {
2073 return ChangeStatus::UNCHANGED;
2074 }
2075
2076 // Map of ICV to their values at specific program point.
2077 EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
2078 InternalControlVar::ICV___last>
2079 ICVReplacementValuesMap;
2080
2081 ChangeStatus updateImpl(Attributor &A) override {
2082 ChangeStatus HasChanged = ChangeStatus::UNCHANGED;
2083
2084 Function *F = getAnchorScope();
2085
2086 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2087
2088 for (InternalControlVar ICV : TrackableICVs) {
2089 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2090
2091 auto &ValuesMap = ICVReplacementValuesMap[ICV];
2092 auto TrackValues = [&](Use &U, Function &) {
2093 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
2094 if (!CI)
2095 return false;
2096
2097 // FIXME: handle setters with more that 1 arguments.
2098 /// Track new value.
2099 if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
2100 HasChanged = ChangeStatus::CHANGED;
2101
2102 return false;
2103 };
2104
2105 auto CallCheck = [&](Instruction &I) {
2106 Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
2107 if (ReplVal.hasValue() &&
2108 ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
2109 HasChanged = ChangeStatus::CHANGED;
2110
2111 return true;
2112 };
2113
2114 // Track all changes of an ICV.
2115 SetterRFI.foreachUse(TrackValues, F);
2116
2117 bool UsedAssumedInformation = false;
2118 A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
2119 UsedAssumedInformation,
2120 /* CheckBBLivenessOnly */ true);
2121
2122 /// TODO: Figure out a way to avoid adding entry in
2123 /// ICVReplacementValuesMap
2124 Instruction *Entry = &F->getEntryBlock().front();
2125 if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
2126 ValuesMap.insert(std::make_pair(Entry, nullptr));
2127 }
2128
2129 return HasChanged;
2130 }
2131
2132 /// Hepler to check if \p I is a call and get the value for it if it is
2133 /// unique.
2134 Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
2135 InternalControlVar &ICV) const {
2136
2137 const auto *CB = dyn_cast<CallBase>(I);
2138 if (!CB || CB->hasFnAttr("no_openmp") ||
2139 CB->hasFnAttr("no_openmp_routines"))
2140 return None;
2141
2142 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2143 auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
2144 auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
2145 Function *CalledFunction = CB->getCalledFunction();
2146
2147 // Indirect call, assume ICV changes.
2148 if (CalledFunction == nullptr)
2149 return nullptr;
2150 if (CalledFunction == GetterRFI.Declaration)
2151 return None;
2152 if (CalledFunction == SetterRFI.Declaration) {
2153 if (ICVReplacementValuesMap[ICV].count(I))
2154 return ICVReplacementValuesMap[ICV].lookup(I);
2155
2156 return nullptr;
2157 }
2158
2159 // Since we don't know, assume it changes the ICV.
2160 if (CalledFunction->isDeclaration())
2161 return nullptr;
2162
2163 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2164 *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);
2165
2166 if (ICVTrackingAA.isAssumedTracked())
2167 return ICVTrackingAA.getUniqueReplacementValue(ICV);
2168
2169 // If we don't know, assume it changes.
2170 return nullptr;
2171 }
2172
2173 // We don't check unique value for a function, so return None.
2174 Optional<Value *>
2175 getUniqueReplacementValue(InternalControlVar ICV) const override {
2176 return None;
2177 }
2178
2179 /// Return the value with which \p I can be replaced for specific \p ICV.
2180 Optional<Value *> getReplacementValue(InternalControlVar ICV,
2181 const Instruction *I,
2182 Attributor &A) const override {
2183 const auto &ValuesMap = ICVReplacementValuesMap[ICV];
2184 if (ValuesMap.count(I))
2185 return ValuesMap.lookup(I);
2186
2187 SmallVector<const Instruction *, 16> Worklist;
2188 SmallPtrSet<const Instruction *, 16> Visited;
2189 Worklist.push_back(I);
2190
2191 Optional<Value *> ReplVal;
2192
2193 while (!Worklist.empty()) {
2194 const Instruction *CurrInst = Worklist.pop_back_val();
2195 if (!Visited.insert(CurrInst).second)
2196 continue;
2197
2198 const BasicBlock *CurrBB = CurrInst->getParent();
2199
2200 // Go up and look for all potential setters/calls that might change the
2201 // ICV.
2202 while ((CurrInst = CurrInst->getPrevNode())) {
2203 if (ValuesMap.count(CurrInst)) {
2204 Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
2205 // Unknown value, track new.
2206 if (!ReplVal.hasValue()) {
2207 ReplVal = NewReplVal;
2208 break;
2209 }
2210
2211 // If we found a new value, we can't know the icv value anymore.
2212 if (NewReplVal.hasValue())
2213 if (ReplVal != NewReplVal)
2214 return nullptr;
2215
2216 break;
2217 }
2218
2219 Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
2220 if (!NewReplVal.hasValue())
2221 continue;
2222
2223 // Unknown value, track new.
2224 if (!ReplVal.hasValue()) {
2225 ReplVal = NewReplVal;
2226 break;
2227 }
2228
2229 // if (NewReplVal.hasValue())
2230 // We found a new value, we can't know the icv value anymore.
2231 if (ReplVal != NewReplVal)
2232 return nullptr;
2233 }
2234
2235 // If we are in the same BB and we have a value, we are done.
2236 if (CurrBB == I->getParent() && ReplVal.hasValue())
2237 return ReplVal;
2238
2239 // Go through all predecessors and add terminators for analysis.
2240 for (const BasicBlock *Pred : predecessors(CurrBB))
2241 if (const Instruction *Terminator = Pred->getTerminator())
2242 Worklist.push_back(Terminator);
2243 }
2244
2245 return ReplVal;
2246 }
2247};
2248
2249struct AAICVTrackerFunctionReturned : AAICVTracker {
2250 AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
2251 : AAICVTracker(IRP, A) {}
2252
2253 // FIXME: come up with better string.
2254 const std::string getAsStr() const override {
2255 return "ICVTrackerFunctionReturned";
2256 }
2257
2258 // FIXME: come up with some stats.
2259 void trackStatistics() const override {}
2260
2261 /// We don't manifest anything for this AA.
2262 ChangeStatus manifest(Attributor &A) override {
2263 return ChangeStatus::UNCHANGED;
2264 }
2265
2266 // Map of ICV to their values at specific program point.
2267 EnumeratedArray<Optional<Value *>, InternalControlVar,
2268 InternalControlVar::ICV___last>
2269 ICVReplacementValuesMap;
2270
2271 /// Return the value with which \p I can be replaced for specific \p ICV.
2272 Optional<Value *>
2273 getUniqueReplacementValue(InternalControlVar ICV) const override {
2274 return ICVReplacementValuesMap[ICV];
2275 }
2276
2277 ChangeStatus updateImpl(Attributor &A) override {
2278 ChangeStatus Changed = ChangeStatus::UNCHANGED;
2279 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2280 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2281
2282 if (!ICVTrackingAA.isAssumedTracked())
2283 return indicatePessimisticFixpoint();
2284
2285 for (InternalControlVar ICV : TrackableICVs) {
2286 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2287 Optional<Value *> UniqueICVValue;
2288
2289 auto CheckReturnInst = [&](Instruction &I) {
2290 Optional<Value *> NewReplVal =
2291 ICVTrackingAA.getReplacementValue(ICV, &I, A);
2292
2293 // If we found a second ICV value there is no unique returned value.
2294 if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
2295 return false;
2296
2297 UniqueICVValue = NewReplVal;
2298
2299 return true;
2300 };
2301
2302 bool UsedAssumedInformation = false;
2303 if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
2304 UsedAssumedInformation,
2305 /* CheckBBLivenessOnly */ true))
2306 UniqueICVValue = nullptr;
2307
2308 if (UniqueICVValue == ReplVal)
2309 continue;
2310
2311 ReplVal = UniqueICVValue;
2312 Changed = ChangeStatus::CHANGED;
2313 }
2314
2315 return Changed;
2316 }
2317};
2318
2319struct AAICVTrackerCallSite : AAICVTracker {
2320 AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
2321 : AAICVTracker(IRP, A) {}
2322
2323 void initialize(Attributor &A) override {
2324 Function *F = getAnchorScope();
2325 if (!F || !A.isFunctionIPOAmendable(*F))
2326 indicatePessimisticFixpoint();
2327
2328 // We only initialize this AA for getters, so we need to know which ICV it
2329 // gets.
2330 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2331 for (InternalControlVar ICV : TrackableICVs) {
2332 auto ICVInfo = OMPInfoCache.ICVs[ICV];
2333 auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
2334 if (Getter.Declaration == getAssociatedFunction()) {
2335 AssociatedICV = ICVInfo.Kind;
2336 return;
2337 }
2338 }
2339
2340 /// Unknown ICV.
2341 indicatePessimisticFixpoint();
2342 }
2343
2344 ChangeStatus manifest(Attributor &A) override {
2345 if (!ReplVal.hasValue() || !ReplVal.getValue())
2346 return ChangeStatus::UNCHANGED;
2347
2348 A.changeValueAfterManifest(*getCtxI(), **ReplVal);
2349 A.deleteAfterManifest(*getCtxI());
2350
2351 return ChangeStatus::CHANGED;
2352 }
2353
2354 // FIXME: come up with better string.
2355 const std::string getAsStr() const override { return "ICVTrackerCallSite"; }
2356
2357 // FIXME: come up with some stats.
2358 void trackStatistics() const override {}
2359
2360 InternalControlVar AssociatedICV;
2361 Optional<Value *> ReplVal;
2362
2363 ChangeStatus updateImpl(Attributor &A) override {
2364 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2365 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
2366
2367 // We don't have any information, so we assume it changes the ICV.
2368 if (!ICVTrackingAA.isAssumedTracked())
2369 return indicatePessimisticFixpoint();
2370
2371 Optional<Value *> NewReplVal =
2372 ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);
2373
2374 if (ReplVal == NewReplVal)
2375 return ChangeStatus::UNCHANGED;
2376
2377 ReplVal = NewReplVal;
2378 return ChangeStatus::CHANGED;
2379 }
2380
2381 // Return the value with which associated value can be replaced for specific
2382 // \p ICV.
2383 Optional<Value *>
2384 getUniqueReplacementValue(InternalControlVar ICV) const override {
2385 return ReplVal;
2386 }
2387};
2388
2389struct AAICVTrackerCallSiteReturned : AAICVTracker {
2390 AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
2391 : AAICVTracker(IRP, A) {}
2392
2393 // FIXME: come up with better string.
2394 const std::string getAsStr() const override {
2395 return "ICVTrackerCallSiteReturned";
2396 }
2397
2398 // FIXME: come up with some stats.
2399 void trackStatistics() const override {}
2400
2401 /// We don't manifest anything for this AA.
2402 ChangeStatus manifest(Attributor &A) override {
2403 return ChangeStatus::UNCHANGED;
2404 }
2405
2406 // Map of ICV to their values at specific program point.
2407 EnumeratedArray<Optional<Value *>, InternalControlVar,
2408 InternalControlVar::ICV___last>
2409 ICVReplacementValuesMap;
2410
2411 /// Return the value with which associated value can be replaced for specific
2412 /// \p ICV.
2413 Optional<Value *>
2414 getUniqueReplacementValue(InternalControlVar ICV) const override {
2415 return ICVReplacementValuesMap[ICV];
2416 }
2417
2418 ChangeStatus updateImpl(Attributor &A) override {
2419 ChangeStatus Changed = ChangeStatus::UNCHANGED;
2420 const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
2421 *this, IRPosition::returned(*getAssociatedFunction()),
2422 DepClassTy::REQUIRED);
2423
2424 // We don't have any information, so we assume it changes the ICV.
2425 if (!ICVTrackingAA.isAssumedTracked())
2426 return indicatePessimisticFixpoint();
2427
2428 for (InternalControlVar ICV : TrackableICVs) {
2429 Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
2430 Optional<Value *> NewReplVal =
2431 ICVTrackingAA.getUniqueReplacementValue(ICV);
2432
2433 if (ReplVal == NewReplVal)
2434 continue;
2435
2436 ReplVal = NewReplVal;
2437 Changed = ChangeStatus::CHANGED;
2438 }
2439 return Changed;
2440 }
2441};
2442
2443struct AAExecutionDomainFunction : public AAExecutionDomain {
2444 AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
2445 : AAExecutionDomain(IRP, A) {}
2446
2447 const std::string getAsStr() const override {
2448 return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
2449 "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
2450 }
2451
2452 /// See AbstractAttribute::trackStatistics().
2453 void trackStatistics() const override {}
2454
2455 void initialize(Attributor &A) override {
2456 Function *F = getAnchorScope();
2457 for (const auto &BB : *F)
2458 SingleThreadedBBs.insert(&BB);
2459 NumBBs = SingleThreadedBBs.size();
2460 }
2461
2462 ChangeStatus manifest(Attributor &A) override {
2463 LLVM_DEBUG({do { } while (false)
2464 for (const BasicBlock *BB : SingleThreadedBBs)do { } while (false)
2465 dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "do { } while (false)
2466 << BB->getName() << " is executed by a single thread.\n";do { } while (false)
2467 })do { } while (false);
2468 return ChangeStatus::UNCHANGED;
2469 }
2470
2471 ChangeStatus updateImpl(Attributor &A) override;
2472
2473 /// Check if an instruction is executed by a single thread.
2474 bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
2475 return isExecutedByInitialThreadOnly(*I.getParent());
2476 }
2477
2478 bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
2479 return isValidState() && SingleThreadedBBs.contains(&BB);
2480 }
2481
2482 /// Set of basic blocks that are executed by a single thread.
2483 DenseSet<const BasicBlock *> SingleThreadedBBs;
2484
2485 /// Total number of basic blocks in this function.
2486 long unsigned NumBBs;
2487};
2488
2489ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
2490 Function *F = getAnchorScope();
2491 ReversePostOrderTraversal<Function *> RPOT(F);
2492 auto NumSingleThreadedBBs = SingleThreadedBBs.size();
2493
2494 bool AllCallSitesKnown;
2495 auto PredForCallSite = [&](AbstractCallSite ACS) {
2496 const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
2497 *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
2498 DepClassTy::REQUIRED);
2499 return ACS.isDirectCall() &&
2500 ExecutionDomainAA.isExecutedByInitialThreadOnly(
2501 *ACS.getInstruction());
2502 };
2503
2504 if (!A.checkForAllCallSites(PredForCallSite, *this,
2505 /* RequiresAllCallSites */ true,
2506 AllCallSitesKnown))
2507 SingleThreadedBBs.erase(&F->getEntryBlock());
2508
2509 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2510 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2511
2512 // Check if the edge into the successor block compares the __kmpc_target_init
2513 // result with -1. If we are in non-SPMD-mode that signals only the main
2514 // thread will execute the edge.
2515 auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
2516 if (!Edge || !Edge->isConditional())
2517 return false;
2518 if (Edge->getSuccessor(0) != SuccessorBB)
2519 return false;
2520
2521 auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
2522 if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
2523 return false;
2524
2525 ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
2526 if (!C)
2527 return false;
2528
2529 // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!)
2530 if (C->isAllOnesValue()) {
2531 auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
2532 CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
2533 if (!CB)
2534 return false;
2535 const int InitIsSPMDArgNo = 1;
2536 auto *IsSPMDModeCI =
2537 dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
2538 return IsSPMDModeCI && IsSPMDModeCI->isZero();
2539 }
2540
2541 return false;
2542 };
2543
2544 // Merge all the predecessor states into the current basic block. A basic
2545 // block is executed by a single thread if all of its predecessors are.
2546 auto MergePredecessorStates = [&](BasicBlock *BB) {
2547 if (pred_begin(BB) == pred_end(BB))
2548 return SingleThreadedBBs.contains(BB);
2549
2550 bool IsInitialThread = true;
2551 for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
2552 PredBB != PredEndBB; ++PredBB) {
2553 if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
2554 BB))
2555 IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
2556 }
2557
2558 return IsInitialThread;
2559 };
2560
2561 for (auto *BB : RPOT) {
2562 if (!MergePredecessorStates(BB))
2563 SingleThreadedBBs.erase(BB);
2564 }
2565
2566 return (NumSingleThreadedBBs == SingleThreadedBBs.size())
2567 ? ChangeStatus::UNCHANGED
2568 : ChangeStatus::CHANGED;
2569}
2570
2571/// Try to replace memory allocation calls called by a single thread with a
2572/// static buffer of shared memory.
2573struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
2574 using Base = StateWrapper<BooleanState, AbstractAttribute>;
2575 AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2576
2577 /// Create an abstract attribute view for the position \p IRP.
2578 static AAHeapToShared &createForPosition(const IRPosition &IRP,
2579 Attributor &A);
2580
2581 /// Returns true if HeapToShared conversion is assumed to be possible.
2582 virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;
2583
2584 /// Returns true if HeapToShared conversion is assumed and the CB is a
2585 /// callsite to a free operation to be removed.
2586 virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;
2587
2588 /// See AbstractAttribute::getName().
2589 const std::string getName() const override { return "AAHeapToShared"; }
2590
2591 /// See AbstractAttribute::getIdAddr().
2592 const char *getIdAddr() const override { return &ID; }
2593
2594 /// This function should return true if the type of the \p AA is
2595 /// AAHeapToShared.
2596 static bool classof(const AbstractAttribute *AA) {
2597 return (AA->getIdAddr() == &ID);
2598 }
2599
2600 /// Unique ID (due to the unique address)
2601 static const char ID;
2602};
2603
2604struct AAHeapToSharedFunction : public AAHeapToShared {
2605 AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
2606 : AAHeapToShared(IRP, A) {}
2607
2608 const std::string getAsStr() const override {
2609 return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
2610 " malloc calls eligible.";
2611 }
2612
2613 /// See AbstractAttribute::trackStatistics().
2614 void trackStatistics() const override {}
2615
2616 /// This functions finds free calls that will be removed by the
2617 /// HeapToShared transformation.
2618 void findPotentialRemovedFreeCalls(Attributor &A) {
2619 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2620 auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
2621
2622 PotentialRemovedFreeCalls.clear();
2623 // Update free call users of found malloc calls.
2624 for (CallBase *CB : MallocCalls) {
2625 SmallVector<CallBase *, 4> FreeCalls;
2626 for (auto *U : CB->users()) {
2627 CallBase *C = dyn_cast<CallBase>(U);
2628 if (C && C->getCalledFunction() == FreeRFI.Declaration)
2629 FreeCalls.push_back(C);
2630 }
2631
2632 if (FreeCalls.size() != 1)
2633 continue;
2634
2635 PotentialRemovedFreeCalls.insert(FreeCalls.front());
2636 }
2637 }
2638
2639 void initialize(Attributor &A) override {
2640 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2641 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
2642
2643 for (User *U : RFI.Declaration->users())
2644 if (CallBase *CB = dyn_cast<CallBase>(U))
2645 MallocCalls.insert(CB);
2646
2647 findPotentialRemovedFreeCalls(A);
2648 }
2649
2650 bool isAssumedHeapToShared(CallBase &CB) const override {
2651 return isValidState() && MallocCalls.count(&CB);
2652 }
2653
2654 bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
2655 return isValidState() && PotentialRemovedFreeCalls.count(&CB);
2656 }
2657
2658 ChangeStatus manifest(Attributor &A) override {
2659 if (MallocCalls.empty())
2660 return ChangeStatus::UNCHANGED;
2661
2662 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2663 auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];
2664
2665 Function *F = getAnchorScope();
2666 auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
2667 DepClassTy::OPTIONAL);
2668
2669 ChangeStatus Changed = ChangeStatus::UNCHANGED;
2670 for (CallBase *CB : MallocCalls) {
2671 // Skip replacing this if HeapToStack has already claimed it.
2672 if (HS && HS->isAssumedHeapToStack(*CB))
2673 continue;
2674
2675 // Find the unique free call to remove it.
2676 SmallVector<CallBase *, 4> FreeCalls;
2677 for (auto *U : CB->users()) {
2678 CallBase *C = dyn_cast<CallBase>(U);
2679 if (C && C->getCalledFunction() == FreeCall.Declaration)
2680 FreeCalls.push_back(C);
2681 }
2682 if (FreeCalls.size() != 1)
2683 continue;
2684
2685 ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));
2686
2687 LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "do { } while (false)
2688 << CB->getCaller()->getName() << " with "do { } while (false)
2689 << AllocSize->getZExtValue()do { } while (false)
2690 << " bytes of shared memory\n")do { } while (false);
2691
2692 // Create a new shared memory buffer of the same size as the allocation
2693 // and replace all the uses of the original allocation with it.
2694 Module *M = CB->getModule();
2695 Type *Int8Ty = Type::getInt8Ty(M->getContext());
2696 Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
2697 auto *SharedMem = new GlobalVariable(
2698 *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
2699 UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
2700 GlobalValue::NotThreadLocal,
2701 static_cast<unsigned>(AddressSpace::Shared));
2702 auto *NewBuffer =
2703 ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());
2704
2705 auto Remark = [&](OptimizationRemark OR) {
2706 return OR << "Replaced globalized variable with "
2707 << ore::NV("SharedMemory", AllocSize->getZExtValue())
2708 << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
2709 << "of shared memory.";
2710 };
2711 A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);
2712
2713 SharedMem->setAlignment(MaybeAlign(32));
2714
2715 A.changeValueAfterManifest(*CB, *NewBuffer);
2716 A.deleteAfterManifest(*CB);
2717 A.deleteAfterManifest(*FreeCalls.front());
2718
2719 NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
2720 Changed = ChangeStatus::CHANGED;
2721 }
2722
2723 return Changed;
2724 }
2725
2726 ChangeStatus updateImpl(Attributor &A) override {
2727 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2728 auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
2729 Function *F = getAnchorScope();
2730
2731 auto NumMallocCalls = MallocCalls.size();
2732
2733 // Only consider malloc calls executed by a single thread with a constant.
2734 for (User *U : RFI.Declaration->users()) {
2735 const auto &ED = A.getAAFor<AAExecutionDomain>(
2736 *this, IRPosition::function(*F), DepClassTy::REQUIRED);
2737 if (CallBase *CB = dyn_cast<CallBase>(U))
2738 if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
2739 !ED.isExecutedByInitialThreadOnly(*CB))
2740 MallocCalls.erase(CB);
2741 }
2742
2743 findPotentialRemovedFreeCalls(A);
2744
2745 if (NumMallocCalls != MallocCalls.size())
2746 return ChangeStatus::CHANGED;
2747
2748 return ChangeStatus::UNCHANGED;
2749 }
2750
2751 /// Collection of all malloc calls in a function.
2752 SmallPtrSet<CallBase *, 4> MallocCalls;
2753 /// Collection of potentially removed free calls in a function.
2754 SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
2755};
2756
2757struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
2758 using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
2759 AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2760
2761 /// Statistics are tracked as part of manifest for now.
2762 void trackStatistics() const override {}
2763
2764 /// See AbstractAttribute::getAsStr()
2765 const std::string getAsStr() const override {
2766 if (!isValidState())
2767 return "<invalid>";
2768 return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
2769 : "generic") +
2770 std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
2771 : "") +
2772 std::string(" #PRs: ") +
2773 std::to_string(ReachedKnownParallelRegions.size()) +
2774 ", #Unknown PRs: " +
2775 std::to_string(ReachedUnknownParallelRegions.size());
2776 }
2777
2778 /// Create an abstract attribute biew for the position \p IRP.
2779 static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);
2780
2781 /// See AbstractAttribute::getName()
2782 const std::string getName() const override { return "AAKernelInfo"; }
2783
2784 /// See AbstractAttribute::getIdAddr()
2785 const char *getIdAddr() const override { return &ID; }
2786
2787 /// This function should return true if the type of the \p AA is AAKernelInfo
2788 static bool classof(const AbstractAttribute *AA) {
2789 return (AA->getIdAddr() == &ID);
2790 }
2791
2792 static const char ID;
2793};
2794
2795/// The function kernel info abstract attribute, basically, what can we say
2796/// about a function with regards to the KernelInfoState.
2797struct AAKernelInfoFunction : AAKernelInfo {
2798 AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
2799 : AAKernelInfo(IRP, A) {}
2800
2801 /// See AbstractAttribute::initialize(...).
2802 void initialize(Attributor &A) override {
2803 // This is a high-level transform that might change the constant arguments
2804 // of the init and dinit calls. We need to tell the Attributor about this
2805 // to avoid other parts using the current constant value for simpliication.
2806 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2807
2808 Function *Fn = getAnchorScope();
2809 if (!OMPInfoCache.Kernels.count(Fn))
2810 return;
2811
2812 // Add itself to the reaching kernel and set IsKernelEntry.
2813 ReachingKernelEntries.insert(Fn);
2814 IsKernelEntry = true;
2815
2816 OMPInformationCache::RuntimeFunctionInfo &InitRFI =
2817 OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
2818 OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
2819 OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];
2820
2821 // For kernels we perform more initialization work, first we find the init
2822 // and deinit calls.
2823 auto StoreCallBase = [](Use &U,
2824 OMPInformationCache::RuntimeFunctionInfo &RFI,
2825 CallBase *&Storage) {
2826 CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
2827 assert(CB &&((void)0)
2828 "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!")((void)0);
2829 assert(!Storage &&((void)0)
2830 "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!")((void)0);
2831 Storage = CB;
2832 return false;
2833 };
2834 InitRFI.foreachUse(
2835 [&](Use &U, Function &) {
2836 StoreCallBase(U, InitRFI, KernelInitCB);
2837 return false;
2838 },
2839 Fn);
2840 DeinitRFI.foreachUse(
2841 [&](Use &U, Function &) {
2842 StoreCallBase(U, DeinitRFI, KernelDeinitCB);
2843 return false;
2844 },
2845 Fn);
2846
2847 assert((KernelInitCB && KernelDeinitCB) &&((void)0)
2848 "Kernel without __kmpc_target_init or __kmpc_target_deinit!")((void)0);
2849
2850 // For kernels we might need to initialize/finalize the IsSPMD state and
2851 // we need to register a simplification callback so that the Attributor
2852 // knows the constant arguments to __kmpc_target_init and
2853 // __kmpc_target_deinit might actually change.
2854
2855 Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
2856 [&](const IRPosition &IRP, const AbstractAttribute *AA,
2857 bool &UsedAssumedInformation) -> Optional<Value *> {
2858 // IRP represents the "use generic state machine" argument of an
2859 // __kmpc_target_init call. We will answer this one with the internal
2860 // state. As long as we are not in an invalid state, we will create a
2861 // custom state machine so the value should be a `i1 false`. If we are
2862 // in an invalid state, we won't change the value that is in the IR.
2863 if (!isValidState())
2864 return nullptr;
2865 if (AA)
2866 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
2867 UsedAssumedInformation = !isAtFixpoint();
2868 auto *FalseVal =
2869 ConstantInt::getBool(IRP.getAnchorValue().getContext(), 0);
2870 return FalseVal;
2871 };
2872
2873 Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
2874 [&](const IRPosition &IRP, const AbstractAttribute *AA,
2875 bool &UsedAssumedInformation) -> Optional<Value *> {
2876 // IRP represents the "SPMDCompatibilityTracker" argument of an
2877 // __kmpc_target_init or
2878 // __kmpc_target_deinit call. We will answer this one with the internal
2879 // state.
2880 if (!SPMDCompatibilityTracker.isValidState())
2881 return nullptr;
2882 if (!SPMDCompatibilityTracker.isAtFixpoint()) {
2883 if (AA)
2884 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
2885 UsedAssumedInformation = true;
2886 } else {
2887 UsedAssumedInformation = false;
2888 }
2889 auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
2890 SPMDCompatibilityTracker.isAssumed());
2891 return Val;
2892 };
2893
2894 Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
2895 [&](const IRPosition &IRP, const AbstractAttribute *AA,
2896 bool &UsedAssumedInformation) -> Optional<Value *> {
2897 // IRP represents the "RequiresFullRuntime" argument of an
2898 // __kmpc_target_init or __kmpc_target_deinit call. We will answer this
2899 // one with the internal state of the SPMDCompatibilityTracker, so if
2900 // generic then true, if SPMD then false.
2901 if (!SPMDCompatibilityTracker.isValidState())
2902 return nullptr;
2903 if (!SPMDCompatibilityTracker.isAtFixpoint()) {
2904 if (AA)
2905 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
2906 UsedAssumedInformation = true;
2907 } else {
2908 UsedAssumedInformation = false;
2909 }
2910 auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
2911 !SPMDCompatibilityTracker.isAssumed());
2912 return Val;
2913 };
2914
2915 constexpr const int InitIsSPMDArgNo = 1;
2916 constexpr const int DeinitIsSPMDArgNo = 1;
2917 constexpr const int InitUseStateMachineArgNo = 2;
2918 constexpr const int InitRequiresFullRuntimeArgNo = 3;
2919 constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
2920 A.registerSimplificationCallback(
2921 IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
2922 StateMachineSimplifyCB);
2923 A.registerSimplificationCallback(
2924 IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
2925 IsSPMDModeSimplifyCB);
2926 A.registerSimplificationCallback(
2927 IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
2928 IsSPMDModeSimplifyCB);
2929 A.registerSimplificationCallback(
2930 IRPosition::callsite_argument(*KernelInitCB,
2931 InitRequiresFullRuntimeArgNo),
2932 IsGenericModeSimplifyCB);
2933 A.registerSimplificationCallback(
2934 IRPosition::callsite_argument(*KernelDeinitCB,
2935 DeinitRequiresFullRuntimeArgNo),
2936 IsGenericModeSimplifyCB);
2937
2938 // Check if we know we are in SPMD-mode already.
2939 ConstantInt *IsSPMDArg =
2940 dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
2941 if (IsSPMDArg && !IsSPMDArg->isZero())
2942 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
2943 }
2944
2945 /// Modify the IR based on the KernelInfoState as the fixpoint iteration is
2946 /// finished now.
2947 ChangeStatus manifest(Attributor &A) override {
2948 // If we are not looking at a kernel with __kmpc_target_init and
2949 // __kmpc_target_deinit call we cannot actually manifest the information.
2950 if (!KernelInitCB || !KernelDeinitCB)
2951 return ChangeStatus::UNCHANGED;
2952
2953 // Known SPMD-mode kernels need no manifest changes.
2954 if (SPMDCompatibilityTracker.isKnown())
2955 return ChangeStatus::UNCHANGED;
2956
2957 // If we can we change the execution mode to SPMD-mode otherwise we build a
2958 // custom state machine.
2959 if (!changeToSPMDMode(A))
2960 buildCustomStateMachine(A);
2961
2962 return ChangeStatus::CHANGED;
2963 }
2964
2965 bool changeToSPMDMode(Attributor &A) {
2966 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
2967
2968 if (!SPMDCompatibilityTracker.isAssumed()) {
2969 for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
2970 if (!NonCompatibleI)
2971 continue;
2972
2973 // Skip diagnostics on calls to known OpenMP runtime functions for now.
2974 if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
2975 if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
2976 continue;
2977
2978 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
2979 ORA << "Value has potential side effects preventing SPMD-mode "
2980 "execution";
2981 if (isa<CallBase>(NonCompatibleI)) {
2982 ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to "
2983 "the called function to override";
2984 }
2985 return ORA << ".";
2986 };
2987 A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
2988 Remark);
2989
2990 LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "do { } while (false)
2991 << *NonCompatibleI << "\n")do { } while (false);
2992 }
2993
2994 return false;
2995 }
2996
2997 // Adjust the global exec mode flag that tells the runtime what mode this
2998 // kernel is executed in.
2999 Function *Kernel = getAnchorScope();
3000 GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
3001 (Kernel->getName() + "_exec_mode").str());
3002 assert(ExecMode && "Kernel without exec mode?")((void)0);
3003 assert(ExecMode->getInitializer() &&((void)0)
3004 ExecMode->getInitializer()->isOneValue() &&((void)0)
3005 "Initially non-SPMD kernel has SPMD exec mode!")((void)0);
3006
3007 // Set the global exec mode flag to indicate SPMD-Generic mode.
3008 constexpr int SPMDGeneric = 2;
3009 if (!ExecMode->getInitializer()->isZeroValue())
3010 ExecMode->setInitializer(
3011 ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));
3012
3013 // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
3014 const int InitIsSPMDArgNo = 1;
3015 const int DeinitIsSPMDArgNo = 1;
3016 const int InitUseStateMachineArgNo = 2;
3017 const int InitRequiresFullRuntimeArgNo = 3;
3018 const int DeinitRequiresFullRuntimeArgNo = 2;
3019
3020 auto &Ctx = getAnchorValue().getContext();
3021 A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
3022 *ConstantInt::getBool(Ctx, 1));
3023 A.changeUseAfterManifest(
3024 KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
3025 *ConstantInt::getBool(Ctx, 0));
3026 A.changeUseAfterManifest(
3027 KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
3028 *ConstantInt::getBool(Ctx, 1));
3029 A.changeUseAfterManifest(
3030 KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
3031 *ConstantInt::getBool(Ctx, 0));
3032 A.changeUseAfterManifest(
3033 KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
3034 *ConstantInt::getBool(Ctx, 0));
3035
3036 ++NumOpenMPTargetRegionKernelsSPMD;
3037
3038 auto Remark = [&](OptimizationRemark OR) {
3039 return OR << "Transformed generic-mode kernel to SPMD-mode.";
3040 };
3041 A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
3042 return true;
3043 };
3044
3045 ChangeStatus buildCustomStateMachine(Attributor &A) {
3046 assert(ReachedKnownParallelRegions.isValidState() &&((void)0)
3047 "Custom state machine with invalid parallel region states?")((void)0);
3048
3049 const int InitIsSPMDArgNo = 1;
3050 const int InitUseStateMachineArgNo = 2;
3051
3052 // Check if the current configuration is non-SPMD and generic state machine.
3053 // If we already have SPMD mode or a custom state machine we do not need to
3054 // go any further. If it is anything but a constant something is weird and
3055 // we give up.
3056 ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
3057 KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
3058 ConstantInt *IsSPMD =
3059 dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
3060
3061 // If we are stuck with generic mode, try to create a custom device (=GPU)
3062 // state machine which is specialized for the parallel regions that are
3063 // reachable by the kernel.
3064 if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
3065 !IsSPMD->isZero())
3066 return ChangeStatus::UNCHANGED;
3067
3068 // If not SPMD mode, indicate we use a custom state machine now.
3069 auto &Ctx = getAnchorValue().getContext();
3070 auto *FalseVal = ConstantInt::getBool(Ctx, 0);
3071 A.changeUseAfterManifest(
3072 KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);
3073
3074 // If we don't actually need a state machine we are done here. This can
3075 // happen if there simply are no parallel regions. In the resulting kernel
3076 // all worker threads will simply exit right away, leaving the main thread
3077 // to do the work alone.
3078 if (ReachedKnownParallelRegions.empty() &&
3079 ReachedUnknownParallelRegions.empty()) {
3080 ++NumOpenMPTargetRegionKernelsWithoutStateMachine;
3081
3082 auto Remark = [&](OptimizationRemark OR) {
3083 return OR << "Removing unused state machine from generic-mode kernel.";
3084 };
3085 A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);
3086
3087 return ChangeStatus::CHANGED;
3088 }
3089
3090 // Keep track in the statistics of our new shiny custom state machine.
3091 if (ReachedUnknownParallelRegions.empty()) {
3092 ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;
3093
3094 auto Remark = [&](OptimizationRemark OR) {
3095 return OR << "Rewriting generic-mode kernel with a customized state "
3096 "machine.";
3097 };
3098 A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
3099 } else {
3100 ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;
3101
3102 auto Remark = [&](OptimizationRemarkAnalysis OR) {
3103 return OR << "Generic-mode kernel is executed with a customized state "
3104 "machine that requires a fallback.";
3105 };
3106 A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);
3107
3108 // Tell the user why we ended up with a fallback.
3109 for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
3110 if (!UnknownParallelRegionCB)
3111 continue;
3112 auto Remark = [&](OptimizationRemarkAnalysis ORA) {
3113 return ORA << "Call may contain unknown parallel regions. Use "
3114 << "`__attribute__((assume(\"omp_no_parallelism\")))` to "
3115 "override.";
3116 };
3117 A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
3118 "OMP133", Remark);
3119 }
3120 }
3121
3122 // Create all the blocks:
3123 //
3124 // InitCB = __kmpc_target_init(...)
3125 // bool IsWorker = InitCB >= 0;
3126 // if (IsWorker) {
3127 // SMBeginBB: __kmpc_barrier_simple_spmd(...);
3128 // void *WorkFn;
3129 // bool Active = __kmpc_kernel_parallel(&WorkFn);
3130 // if (!WorkFn) return;
3131 // SMIsActiveCheckBB: if (Active) {
3132 // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>)
3133 // ParFn0(...);
3134 // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>)
3135 // ParFn1(...);
3136 // ...
3137 // SMIfCascadeCurrentBB: else
3138 // ((WorkFnTy*)WorkFn)(...);
3139 // SMEndParallelBB: __kmpc_kernel_end_parallel(...);
3140 // }
3141 // SMDoneBB: __kmpc_barrier_simple_spmd(...);
3142 // goto SMBeginBB;
3143 // }
3144 // UserCodeEntryBB: // user code
3145 // __kmpc_target_deinit(...)
3146 //
3147 Function *Kernel = getAssociatedFunction();
3148 assert(Kernel && "Expected an associated function!")((void)0);
3149
3150 BasicBlock *InitBB = KernelInitCB->getParent();
3151 BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
3152 KernelInitCB->getNextNode(), "thread.user_code.check");
3153 BasicBlock *StateMachineBeginBB = BasicBlock::Create(
3154 Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
3155 BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
3156 Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
3157 BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
3158 Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
3159 BasicBlock *StateMachineIfCascadeCurrentBB =
3160 BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3161 Kernel, UserCodeEntryBB);
3162 BasicBlock *StateMachineEndParallelBB =
3163 BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
3164 Kernel, UserCodeEntryBB);
3165 BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
3166 Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
3167 A.registerManifestAddedBasicBlock(*InitBB);
3168 A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
3169 A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
3170 A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
3171 A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
3172 A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
3173 A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
3174 A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);
3175
3176 const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
3177 ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);
3178
3179 InitBB->getTerminator()->eraseFromParent();
3180 Instruction *IsWorker =
3181 ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
3182 ConstantInt::get(KernelInitCB->getType(), -1),
3183 "thread.is_worker", InitBB);
3184 IsWorker->setDebugLoc(DLoc);
3185 BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);
3186
3187 Module &M = *Kernel->getParent();
3188
3189 // Create local storage for the work function pointer.
3190 const DataLayout &DL = M.getDataLayout();
3191 Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
3192 Instruction *WorkFnAI =
3193 new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
3194 "worker.work_fn.addr", &Kernel->getEntryBlock().front());
3195 WorkFnAI->setDebugLoc(DLoc);
3196
3197 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3198 OMPInfoCache.OMPBuilder.updateToLocation(
3199 OpenMPIRBuilder::LocationDescription(
3200 IRBuilder<>::InsertPoint(StateMachineBeginBB,
3201 StateMachineBeginBB->end()),
3202 DLoc));
3203
3204 Value *Ident = KernelInitCB->getArgOperand(0);
3205 Value *GTid = KernelInitCB;
3206
3207 FunctionCallee BarrierFn =
3208 OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3209 M, OMPRTL___kmpc_barrier_simple_spmd);
3210 CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
3211 ->setDebugLoc(DLoc);
3212
3213 if (WorkFnAI->getType()->getPointerAddressSpace() !=
3214 (unsigned int)AddressSpace::Generic) {
3215 WorkFnAI = new AddrSpaceCastInst(
3216 WorkFnAI,
3217 PointerType::getWithSamePointeeType(
3218 cast<PointerType>(WorkFnAI->getType()),
3219 (unsigned int)AddressSpace::Generic),
3220 WorkFnAI->getName() + ".generic", StateMachineBeginBB);
3221 WorkFnAI->setDebugLoc(DLoc);
3222 }
3223
3224 FunctionCallee KernelParallelFn =
3225 OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3226 M, OMPRTL___kmpc_kernel_parallel);
3227 Instruction *IsActiveWorker = CallInst::Create(
3228 KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
3229 IsActiveWorker->setDebugLoc(DLoc);
3230 Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
3231 StateMachineBeginBB);
3232 WorkFn->setDebugLoc(DLoc);
3233
3234 FunctionType *ParallelRegionFnTy = FunctionType::get(
3235 Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
3236 false);
3237 Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
3238 WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
3239 StateMachineBeginBB);
3240
3241 Instruction *IsDone =
3242 ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
3243 Constant::getNullValue(VoidPtrTy), "worker.is_done",
3244 StateMachineBeginBB);
3245 IsDone->setDebugLoc(DLoc);
3246 BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
3247 IsDone, StateMachineBeginBB)
3248 ->setDebugLoc(DLoc);
3249
3250 BranchInst::Create(StateMachineIfCascadeCurrentBB,
3251 StateMachineDoneBarrierBB, IsActiveWorker,
3252 StateMachineIsActiveCheckBB)
3253 ->setDebugLoc(DLoc);
3254
3255 Value *ZeroArg =
3256 Constant::getNullValue(ParallelRegionFnTy->getParamType(0));
3257
3258 // Now that we have most of the CFG skeleton it is time for the if-cascade
3259 // that checks the function pointer we got from the runtime against the
3260 // parallel regions we expect, if there are any.
3261 for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) {
3262 auto *ParallelRegion = ReachedKnownParallelRegions[i];
3263 BasicBlock *PRExecuteBB = BasicBlock::Create(
3264 Ctx, "worker_state_machine.parallel_region.execute", Kernel,
3265 StateMachineEndParallelBB);
3266 CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
3267 ->setDebugLoc(DLoc);
3268 BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
3269 ->setDebugLoc(DLoc);
3270
3271 BasicBlock *PRNextBB =
3272 BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
3273 Kernel, StateMachineEndParallelBB);
3274
3275 // Check if we need to compare the pointer at all or if we can just
3276 // call the parallel region function.
3277 Value *IsPR;
3278 if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) {
3279 Instruction *CmpI = ICmpInst::Create(
3280 ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
3281 "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
3282 CmpI->setDebugLoc(DLoc);
3283 IsPR = CmpI;
3284 } else {
3285 IsPR = ConstantInt::getTrue(Ctx);
3286 }
3287
3288 BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
3289 StateMachineIfCascadeCurrentBB)
3290 ->setDebugLoc(DLoc);
3291 StateMachineIfCascadeCurrentBB = PRNextBB;
3292 }
3293
3294 // At the end of the if-cascade we place the indirect function pointer call
3295 // in case we might need it, that is if there can be parallel regions we
3296 // have not handled in the if-cascade above.
3297 if (!ReachedUnknownParallelRegions.empty()) {
3298 StateMachineIfCascadeCurrentBB->setName(
3299 "worker_state_machine.parallel_region.fallback.execute");
3300 CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
3301 StateMachineIfCascadeCurrentBB)
3302 ->setDebugLoc(DLoc);
3303 }
3304 BranchInst::Create(StateMachineEndParallelBB,
3305 StateMachineIfCascadeCurrentBB)
3306 ->setDebugLoc(DLoc);
3307
3308 CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
3309 M, OMPRTL___kmpc_kernel_end_parallel),
3310 {}, "", StateMachineEndParallelBB)
3311 ->setDebugLoc(DLoc);
3312 BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
3313 ->setDebugLoc(DLoc);
3314
3315 CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
3316 ->setDebugLoc(DLoc);
3317 BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
3318 ->setDebugLoc(DLoc);
3319
3320 return ChangeStatus::CHANGED;
3321 }
3322
3323 /// Fixpoint iteration update function. Will be called every time a dependence
3324 /// changed its state (and in the beginning).
3325 ChangeStatus updateImpl(Attributor &A) override {
3326 KernelInfoState StateBefore = getState();
3327
3328 // Callback to check a read/write instruction.
3329 auto CheckRWInst = [&](Instruction &I) {
3330 // We handle calls later.
3331 if (isa<CallBase>(I))
3332 return true;
3333 // We only care about write effects.
3334 if (!I.mayWriteToMemory())
3335 return true;
3336 if (auto *SI = dyn_cast<StoreInst>(&I)) {
3337 SmallVector<const Value *> Objects;
3338 getUnderlyingObjects(SI->getPointerOperand(), Objects);
3339 if (llvm::all_of(Objects,
3340 [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
3341 return true;
3342 }
3343 // For now we give up on everything but stores.
3344 SPMDCompatibilityTracker.insert(&I);
3345 return true;
3346 };
3347
3348 bool UsedAssumedInformationInCheckRWInst = false;
3349 if (!SPMDCompatibilityTracker.isAtFixpoint())
3350 if (!A.checkForAllReadWriteInstructions(
3351 CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
3352 SPMDCompatibilityTracker.indicatePessimisticFixpoint();
3353
3354 if (!IsKernelEntry) {
3355 updateReachingKernelEntries(A);
3356 updateParallelLevels(A);
3357 }
3358
3359 // Callback to check a call instruction.
3360 bool AllSPMDStatesWereFixed = true;
3361 auto CheckCallInst = [&](Instruction &I) {
3362 auto &CB = cast<CallBase>(I);
3363 auto &CBAA = A.getAAFor<AAKernelInfo>(
3364 *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
3365 getState() ^= CBAA.getState();
3366 AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
3367 return true;
3368 };
3369
3370 bool UsedAssumedInformationInCheckCallInst = false;
3371 if (!A.checkForAllCallLikeInstructions(
3372 CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
3373 return indicatePessimisticFixpoint();
3374
3375 // If we haven't used any assumed information for the SPMD state we can fix
3376 // it.
3377 if (!UsedAssumedInformationInCheckRWInst &&
3378 !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed)
3379 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3380
3381 return StateBefore == getState() ? ChangeStatus::UNCHANGED
3382 : ChangeStatus::CHANGED;
3383 }
3384
3385private:
3386 /// Update info regarding reaching kernels.
3387 void updateReachingKernelEntries(Attributor &A) {
3388 auto PredCallSite = [&](AbstractCallSite ACS) {
3389 Function *Caller = ACS.getInstruction()->getFunction();
3390
3391 assert(Caller && "Caller is nullptr")((void)0);
3392
3393 auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
3394 IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
3395 if (CAA.ReachingKernelEntries.isValidState()) {
1
Calling 'IntegerStateBase::isValidState'
4
Returning from 'IntegerStateBase::isValidState'
5
Taking true branch
3396 ReachingKernelEntries ^= CAA.ReachingKernelEntries;
6
Called C++ object pointer is null
3397 return true;
3398 }
3399
3400 // We lost track of the caller of the associated function, any kernel
3401 // could reach now.
3402 ReachingKernelEntries.indicatePessimisticFixpoint();
3403
3404 return true;
3405 };
3406
3407 bool AllCallSitesKnown;
3408 if (!A.checkForAllCallSites(PredCallSite, *this,
3409 true /* RequireAllCallSites */,
3410 AllCallSitesKnown))
3411 ReachingKernelEntries.indicatePessimisticFixpoint();
3412 }
3413
3414 /// Update info regarding parallel levels.
3415 void updateParallelLevels(Attributor &A) {
3416 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3417 OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
3418 OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
3419
3420 auto PredCallSite = [&](AbstractCallSite ACS) {
3421 Function *Caller = ACS.getInstruction()->getFunction();
3422
3423 assert(Caller && "Caller is nullptr")((void)0);
3424
3425 auto &CAA =
3426 A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
3427 if (CAA.ParallelLevels.isValidState()) {
3428 // Any function that is called by `__kmpc_parallel_51` will not be
3429 // folded as the parallel level in the function is updated. In order to
3430 // get it right, all the analysis would depend on the implentation. That
3431 // said, if in the future any change to the implementation, the analysis
3432 // could be wrong. As a consequence, we are just conservative here.
3433 if (Caller == Parallel51RFI.Declaration) {
3434 ParallelLevels.indicatePessimisticFixpoint();
3435 return true;
3436 }
3437
3438 ParallelLevels ^= CAA.ParallelLevels;
3439
3440 return true;
3441 }
3442
3443 // We lost track of the caller of the associated function, any kernel
3444 // could reach now.
3445 ParallelLevels.indicatePessimisticFixpoint();
3446
3447 return true;
3448 };
3449
3450 bool AllCallSitesKnown = true;
3451 if (!A.checkForAllCallSites(PredCallSite, *this,
3452 true /* RequireAllCallSites */,
3453 AllCallSitesKnown))
3454 ParallelLevels.indicatePessimisticFixpoint();
3455 }
3456};
3457
3458/// The call site kernel info abstract attribute, basically, what can we say
3459/// about a call site with regards to the KernelInfoState. For now this simply
3460/// forwards the information from the callee.
3461struct AAKernelInfoCallSite : AAKernelInfo {
3462 AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
3463 : AAKernelInfo(IRP, A) {}
3464
3465 /// See AbstractAttribute::initialize(...).
3466 void initialize(Attributor &A) override {
3467 AAKernelInfo::initialize(A);
3468
3469 CallBase &CB = cast<CallBase>(getAssociatedValue());
3470 Function *Callee = getAssociatedFunction();
3471
3472 // Helper to lookup an assumption string.
3473 auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
3474 return Fn && hasAssumption(*Fn, AssumptionStr);
3475 };
3476
3477 // Check for SPMD-mode assumptions.
3478 if (HasAssumption(Callee, "ompx_spmd_amenable"))
3479 SPMDCompatibilityTracker.indicateOptimisticFixpoint();
3480
3481 // First weed out calls we do not care about, that is readonly/readnone
3482 // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
3483 // parallel region or anything else we are looking for.
3484 if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
3485 indicateOptimisticFixpoint();
3486 return;
3487 }
3488
3489 // Next we check if we know the callee. If it is a known OpenMP function
3490 // we will handle them explicitly in the switch below. If it is not, we
3491 // will use an AAKernelInfo object on the callee to gather information and
3492 // merge that into the current state. The latter happens in the updateImpl.
3493 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3494 const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
3495 if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
3496 // Unknown caller or declarations are not analyzable, we give up.
3497 if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {
3498
3499 // Unknown callees might contain parallel regions, except if they have
3500 // an appropriate assumption attached.
3501 if (!(HasAssumption(Callee, "omp_no_openmp") ||
3502 HasAssumption(Callee, "omp_no_parallelism")))
3503 ReachedUnknownParallelRegions.insert(&CB);
3504
3505 // If SPMDCompatibilityTracker is not fixed, we need to give up on the
3506 // idea we can run something unknown in SPMD-mode.
3507 if (!SPMDCompatibilityTracker.isAtFixpoint())
3508 SPMDCompatibilityTracker.insert(&CB);
3509
3510 // We have updated the state for this unknown call properly, there won't
3511 // be any change so we indicate a fixpoint.
3512 indicateOptimisticFixpoint();
3513 }
3514 // If the callee is known and can be used in IPO, we will update the state
3515 // based on the callee state in updateImpl.
3516 return;
3517 }
3518
3519 const unsigned int WrapperFunctionArgNo = 6;
3520 RuntimeFunction RF = It->getSecond();
3521 switch (RF) {
3522 // All the functions we know are compatible with SPMD mode.
3523 case OMPRTL___kmpc_is_spmd_exec_mode:
3524 case OMPRTL___kmpc_for_static_fini:
3525 case OMPRTL___kmpc_global_thread_num:
3526 case OMPRTL___kmpc_get_hardware_num_threads_in_block:
3527 case OMPRTL___kmpc_get_hardware_num_blocks:
3528 case OMPRTL___kmpc_single:
3529 case OMPRTL___kmpc_end_single:
3530 case OMPRTL___kmpc_master:
3531 case OMPRTL___kmpc_end_master:
3532 case OMPRTL___kmpc_barrier:
3533 break;
3534 case OMPRTL___kmpc_for_static_init_4:
3535 case OMPRTL___kmpc_for_static_init_4u:
3536 case OMPRTL___kmpc_for_static_init_8:
3537 case OMPRTL___kmpc_for_static_init_8u: {
3538 // Check the schedule and allow static schedule in SPMD mode.
3539 unsigned ScheduleArgOpNo = 2;
3540 auto *ScheduleTypeCI =
3541 dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
3542 unsigned ScheduleTypeVal =
3543 ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
3544 switch (OMPScheduleType(ScheduleTypeVal)) {
3545 case OMPScheduleType::Static:
3546 case OMPScheduleType::StaticChunked:
3547 case OMPScheduleType::Distribute:
3548 case OMPScheduleType::DistributeChunked:
3549 break;
3550 default:
3551 SPMDCompatibilityTracker.insert(&CB);
3552 break;
3553 };
3554 } break;
3555 case OMPRTL___kmpc_target_init:
3556 KernelInitCB = &CB;
3557 break;
3558 case OMPRTL___kmpc_target_deinit:
3559 KernelDeinitCB = &CB;
3560 break;
3561 case OMPRTL___kmpc_parallel_51:
3562 if (auto *ParallelRegion = dyn_cast<Function>(
3563 CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
3564 ReachedKnownParallelRegions.insert(ParallelRegion);
3565 break;
3566 }
3567 // The condition above should usually get the parallel region function
3568 // pointer and record it. In the off chance it doesn't we assume the
3569 // worst.
3570 ReachedUnknownParallelRegions.insert(&CB);
3571 break;
3572 case OMPRTL___kmpc_omp_task:
3573 // We do not look into tasks right now, just give up.
3574 SPMDCompatibilityTracker.insert(&CB);
3575 ReachedUnknownParallelRegions.insert(&CB);
3576 break;
3577 case OMPRTL___kmpc_alloc_shared:
3578 case OMPRTL___kmpc_free_shared:
3579 // Return without setting a fixpoint, to be resolved in updateImpl.
3580 return;
3581 default:
3582 // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
3583 // generally.
3584 SPMDCompatibilityTracker.insert(&CB);
3585 break;
3586 }
3587 // All other OpenMP runtime calls will not reach parallel regions so they
3588 // can be safely ignored for now. Since it is a known OpenMP runtime call we
3589 // have now modeled all effects and there is no need for any update.
3590 indicateOptimisticFixpoint();
3591 }
3592
3593 ChangeStatus updateImpl(Attributor &A) override {
3594 // TODO: Once we have call site specific value information we can provide
3595 // call site specific liveness information and then it makes
3596 // sense to specialize attributes for call sites arguments instead of
3597 // redirecting requests to the callee argument.
3598 Function *F = getAssociatedFunction();
3599
3600 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3601 const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);
3602
3603 // If F is not a runtime function, propagate the AAKernelInfo of the callee.
3604 if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
3605 const IRPosition &FnPos = IRPosition::function(*F);
3606 auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
3607 if (getState() == FnAA.getState())
3608 return ChangeStatus::UNCHANGED;
3609 getState() = FnAA.getState();
3610 return ChangeStatus::CHANGED;
3611 }
3612
3613 // F is a runtime function that allocates or frees memory, check
3614 // AAHeapToStack and AAHeapToShared.
3615 KernelInfoState StateBefore = getState();
3616 assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||((void)0)
3617 It->getSecond() == OMPRTL___kmpc_free_shared) &&((void)0)
3618 "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call")((void)0);
3619
3620 CallBase &CB = cast<CallBase>(getAssociatedValue());
3621
3622 auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
3623 *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
3624 auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
3625 *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
3626
3627 RuntimeFunction RF = It->getSecond();
3628
3629 switch (RF) {
3630 // If neither HeapToStack nor HeapToShared assume the call is removed,
3631 // assume SPMD incompatibility.
3632 case OMPRTL___kmpc_alloc_shared:
3633 if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
3634 !HeapToSharedAA.isAssumedHeapToShared(CB))
3635 SPMDCompatibilityTracker.insert(&CB);
3636 break;
3637 case OMPRTL___kmpc_free_shared:
3638 if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
3639 !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
3640 SPMDCompatibilityTracker.insert(&CB);
3641 break;
3642 default:
3643 SPMDCompatibilityTracker.insert(&CB);
3644 }
3645
3646 return StateBefore == getState() ? ChangeStatus::UNCHANGED
3647 : ChangeStatus::CHANGED;
3648 }
3649};
3650
3651struct AAFoldRuntimeCall
3652 : public StateWrapper<BooleanState, AbstractAttribute> {
3653 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3654
3655 AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3656
3657 /// Statistics are tracked as part of manifest for now.
3658 void trackStatistics() const override {}
3659
3660 /// Create an abstract attribute biew for the position \p IRP.
3661 static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
3662 Attributor &A);
3663
3664 /// See AbstractAttribute::getName()
3665 const std::string getName() const override { return "AAFoldRuntimeCall"; }
3666
3667 /// See AbstractAttribute::getIdAddr()
3668 const char *getIdAddr() const override { return &ID; }
3669
3670 /// This function should return true if the type of the \p AA is
3671 /// AAFoldRuntimeCall
3672 static bool classof(const AbstractAttribute *AA) {
3673 return (AA->getIdAddr() == &ID);
3674 }
3675
3676 static const char ID;
3677};
3678
3679struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
3680 AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
3681 : AAFoldRuntimeCall(IRP, A) {}
3682
3683 /// See AbstractAttribute::getAsStr()
3684 const std::string getAsStr() const override {
3685 if (!isValidState())
3686 return "<invalid>";
3687
3688 std::string Str("simplified value: ");
3689
3690 if (!SimplifiedValue.hasValue())
3691 return Str + std::string("none");
3692
3693 if (!SimplifiedValue.getValue())
3694 return Str + std::string("nullptr");
3695
3696 if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue()))
3697 return Str + std::to_string(CI->getSExtValue());
3698
3699 return Str + std::string("unknown");
3700 }
3701
3702 void initialize(Attributor &A) override {
3703 Function *Callee = getAssociatedFunction();
3704
3705 auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
3706 const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
3707 assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&((void)0)
3708 "Expected a known OpenMP runtime function")((void)0);
3709
3710 RFKind = It->getSecond();
3711
3712 CallBase &CB = cast<CallBase>(getAssociatedValue());
3713 A.registerSimplificationCallback(
3714 IRPosition::callsite_returned(CB),
3715 [&](const IRPosition &IRP, const AbstractAttribute *AA,
3716 bool &UsedAssumedInformation) -> Optional<Value *> {
3717 assert((isValidState() || (SimplifiedValue.hasValue() &&((void)0)
3718 SimplifiedValue.getValue() == nullptr)) &&((void)0)
3719 "Unexpected invalid state!")((void)0);
3720
3721 if (!isAtFixpoint()) {
3722 UsedAssumedInformation = true;
3723 if (AA)
3724 A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
3725 }
3726 return SimplifiedValue;
3727 });
3728 }
3729
3730 ChangeStatus updateImpl(Attributor &A) override {
3731 ChangeStatus Changed = ChangeStatus::UNCHANGED;
3732 switch (RFKind) {
3733 case OMPRTL___kmpc_is_spmd_exec_mode:
3734 Changed |= foldIsSPMDExecMode(A);
3735 break;
3736 case OMPRTL___kmpc_is_generic_main_thread_id:
3737 Changed |= foldIsGenericMainThread(A);
3738 break;
3739 case OMPRTL___kmpc_parallel_level:
3740 Changed |= foldParallelLevel(A);
3741 break;
3742 case OMPRTL___kmpc_get_hardware_num_threads_in_block:
3743 Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
3744 break;
3745 case OMPRTL___kmpc_get_hardware_num_blocks:
3746 Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
3747 break;
3748 default:
3749 llvm_unreachable("Unhandled OpenMP runtime function!")__builtin_unreachable();
3750 }
3751
3752 return Changed;
3753 }
3754
3755 ChangeStatus manifest(Attributor &A) override {
3756 ChangeStatus Changed = ChangeStatus::UNCHANGED;
3757
3758 if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
3759 Instruction &CB = *getCtxI();
3760 A.changeValueAfterManifest(CB, **SimplifiedValue);
3761 A.deleteAfterManifest(CB);
3762
3763 LLVM_DEBUG(dbgs() << TAG << "Folding runtime call: " << CB << " with "do { } while (false)
3764 << **SimplifiedValue << "\n")do { } while (false);
3765
3766 Changed = ChangeStatus::CHANGED;
3767 }
3768
3769 return Changed;
3770 }
3771
3772 ChangeStatus indicatePessimisticFixpoint() override {
3773 SimplifiedValue = nullptr;
3774 return AAFoldRuntimeCall::indicatePessimisticFixpoint();
3775 }
3776
3777private:
3778 /// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
3779 ChangeStatus foldIsSPMDExecMode(Attributor &A) {
3780 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
3781
3782 unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
3783 unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
3784 auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
3785 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
3786
3787 if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
3788 return indicatePessimisticFixpoint();
3789
3790 for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
3791 auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
3792 DepClassTy::REQUIRED);
3793
3794 if (!AA.isValidState()) {
3795 SimplifiedValue = nullptr;
3796 return indicatePessimisticFixpoint();
3797 }
3798
3799 if (AA.SPMDCompatibilityTracker.isAssumed()) {
3800 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
3801 ++KnownSPMDCount;
3802 else
3803 ++AssumedSPMDCount;
3804 } else {
3805 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
3806 ++KnownNonSPMDCount;
3807 else
3808 ++AssumedNonSPMDCount;
3809 }
3810 }
3811
3812 if ((AssumedSPMDCount + KnownSPMDCount) &&
3813 (AssumedNonSPMDCount + KnownNonSPMDCount))
3814 return indicatePessimisticFixpoint();
3815
3816 auto &Ctx = getAnchorValue().getContext();
3817 if (KnownSPMDCount || AssumedSPMDCount) {
3818 assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&((void)0)
3819 "Expected only SPMD kernels!")((void)0);
3820 // All reaching kernels are in SPMD mode. Update all function calls to
3821 // __kmpc_is_spmd_exec_mode to 1.
3822 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
3823 } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
3824 assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&((void)0)
3825 "Expected only non-SPMD kernels!")((void)0);
3826 // All reaching kernels are in non-SPMD mode. Update all function
3827 // calls to __kmpc_is_spmd_exec_mode to 0.
3828 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
3829 } else {
3830 // We have empty reaching kernels, therefore we cannot tell if the
3831 // associated call site can be folded. At this moment, SimplifiedValue
3832 // must be none.
3833 assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none")((void)0);
3834 }
3835
3836 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
3837 : ChangeStatus::CHANGED;
3838 }
3839
3840 /// Fold __kmpc_is_generic_main_thread_id into a constant if possible.
3841 ChangeStatus foldIsGenericMainThread(Attributor &A) {
3842 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
3843
3844 CallBase &CB = cast<CallBase>(getAssociatedValue());
3845 Function *F = CB.getFunction();
3846 const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
3847 *this, IRPosition::function(*F), DepClassTy::REQUIRED);
3848
3849 if (!ExecutionDomainAA.isValidState())
3850 return indicatePessimisticFixpoint();
3851
3852 auto &Ctx = getAnchorValue().getContext();
3853 if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB))
3854 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
3855 else
3856 return indicatePessimisticFixpoint();
3857
3858 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
3859 : ChangeStatus::CHANGED;
3860 }
3861
3862 /// Fold __kmpc_parallel_level into a constant if possible.
3863 ChangeStatus foldParallelLevel(Attributor &A) {
3864 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
3865
3866 auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
3867 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
3868
3869 if (!CallerKernelInfoAA.ParallelLevels.isValidState())
3870 return indicatePessimisticFixpoint();
3871
3872 if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
3873 return indicatePessimisticFixpoint();
3874
3875 if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
3876 assert(!SimplifiedValue.hasValue() &&((void)0)
3877 "SimplifiedValue should keep none at this point")((void)0);
3878 return ChangeStatus::UNCHANGED;
3879 }
3880
3881 unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
3882 unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
3883 for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
3884 auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
3885 DepClassTy::REQUIRED);
3886 if (!AA.SPMDCompatibilityTracker.isValidState())
3887 return indicatePessimisticFixpoint();
3888
3889 if (AA.SPMDCompatibilityTracker.isAssumed()) {
3890 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
3891 ++KnownSPMDCount;
3892 else
3893 ++AssumedSPMDCount;
3894 } else {
3895 if (AA.SPMDCompatibilityTracker.isAtFixpoint())
3896 ++KnownNonSPMDCount;
3897 else
3898 ++AssumedNonSPMDCount;
3899 }
3900 }
3901
3902 if ((AssumedSPMDCount + KnownSPMDCount) &&
3903 (AssumedNonSPMDCount + KnownNonSPMDCount))
3904 return indicatePessimisticFixpoint();
3905
3906 auto &Ctx = getAnchorValue().getContext();
3907 // If the caller can only be reached by SPMD kernel entries, the parallel
3908 // level is 1. Similarly, if the caller can only be reached by non-SPMD
3909 // kernel entries, it is 0.
3910 if (AssumedSPMDCount || KnownSPMDCount) {
3911 assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&((void)0)
3912 "Expected only SPMD kernels!")((void)0);
3913 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
3914 } else {
3915 assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&((void)0)
3916 "Expected only non-SPMD kernels!")((void)0);
3917 SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
3918 }
3919 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
3920 : ChangeStatus::CHANGED;
3921 }
3922
3923 ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
3924 // Specialize only if all the calls agree with the attribute constant value
3925 int32_t CurrentAttrValue = -1;
3926 Optional<Value *> SimplifiedValueBefore = SimplifiedValue;
3927
3928 auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
3929 *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);
3930
3931 if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
3932 return indicatePessimisticFixpoint();
3933
3934 // Iterate over the kernels that reach this function
3935 for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
3936 int32_t NextAttrVal = -1;
3937 if (K->hasFnAttribute(Attr))
3938 NextAttrVal =
3939 std::stoi(K->getFnAttribute(Attr).getValueAsString().str());
3940
3941 if (NextAttrVal == -1 ||
3942 (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
3943 return indicatePessimisticFixpoint();
3944 CurrentAttrValue = NextAttrVal;
3945 }
3946
3947 if (CurrentAttrValue != -1) {
3948 auto &Ctx = getAnchorValue().getContext();
3949 SimplifiedValue =
3950 ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
3951 }
3952 return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
3953 : ChangeStatus::CHANGED;
3954 }
3955
3956 /// An optional value the associated value is assumed to fold to. That is, we
3957 /// assume the associated value (which is a call) can be replaced by this
3958 /// simplified value.
3959 Optional<Value *> SimplifiedValue;
3960
3961 /// The runtime function kind of the callee of the associated call site.
3962 RuntimeFunction RFKind;
3963};
3964
3965} // namespace
3966
3967/// Register folding callsite
3968void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
3969 auto &RFI = OMPInfoCache.RFIs[RF];
3970 RFI.foreachUse(SCC, [&](Use &U, Function &F) {
3971 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
3972 if (!CI)
3973 return false;
3974 A.getOrCreateAAFor<AAFoldRuntimeCall>(
3975 IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
3976 DepClassTy::NONE, /* ForceUpdate */ false,
3977 /* UpdateAfterInit */ false);
3978 return false;
3979 });
3980}
3981
3982void OpenMPOpt::registerAAs(bool IsModulePass) {
3983 if (SCC.empty())
3984
3985 return;
3986 if (IsModulePass) {
3987 // Ensure we create the AAKernelInfo AAs first and without triggering an
3988 // update. This will make sure we register all value simplification
3989 // callbacks before any other AA has the chance to create an AAValueSimplify
3990 // or similar.
3991 for (Function *Kernel : OMPInfoCache.Kernels)
3992 A.getOrCreateAAFor<AAKernelInfo>(
3993 IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
3994 DepClassTy::NONE, /* ForceUpdate */ false,
3995 /* UpdateAfterInit */ false);
3996
3997
3998 registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
3999 registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
4000 registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
4001 registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
4002 registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
4003 }
4004
4005 // Create CallSite AA for all Getters.
4006 for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
4007 auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];
4008
4009 auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];
4010
4011 auto CreateAA = [&](Use &U, Function &Caller) {
4012 CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
4013 if (!CI)
4014 return false;
4015
4016 auto &CB = cast<CallBase>(*CI);
4017
4018 IRPosition CBPos = IRPosition::callsite_function(CB);
4019 A.getOrCreateAAFor<AAICVTracker>(CBPos);
4020 return false;
4021 };
4022
4023 GetterRFI.foreachUse(SCC, CreateAA);
4024 }
4025 auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
4026 auto CreateAA = [&](Use &U, Function &F) {
4027 A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
4028 return false;
4029 };
4030 GlobalizationRFI.foreachUse(SCC, CreateAA);
4031
4032 // Create an ExecutionDomain AA for every function and a HeapToStack AA for
4033 // every function if there is a device kernel.
4034 if (!isOpenMPDevice(M))
4035 return;
4036
4037 for (auto *F : SCC) {
4038 if (F->isDeclaration())
4039 continue;
4040
4041 A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
4042 A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));
4043
4044 for (auto &I : instructions(*F)) {
4045 if (auto *LI = dyn_cast<LoadInst>(&I)) {
4046 bool UsedAssumedInformation = false;
4047 A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
4048 UsedAssumedInformation);
4049 }
4050 }
4051 }
4052}
4053
4054const char AAICVTracker::ID = 0;
4055const char AAKernelInfo::ID = 0;
4056const char AAExecutionDomain::ID = 0;
4057const char AAHeapToShared::ID = 0;
4058const char AAFoldRuntimeCall::ID = 0;
4059
4060AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
4061 Attributor &A) {
4062 AAICVTracker *AA = nullptr;
4063 switch (IRP.getPositionKind()) {
4064 case IRPosition::IRP_INVALID:
4065 case IRPosition::IRP_FLOAT:
4066 case IRPosition::IRP_ARGUMENT:
4067 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4068 llvm_unreachable("ICVTracker can only be created for function position!")__builtin_unreachable();
4069 case IRPosition::IRP_RETURNED:
4070 AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
4071 break;
4072 case IRPosition::IRP_CALL_SITE_RETURNED:
4073 AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
4074 break;
4075 case IRPosition::IRP_CALL_SITE:
4076 AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
4077 break;
4078 case IRPosition::IRP_FUNCTION:
4079 AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
4080 break;
4081 }
4082
4083 return *AA;
4084}
4085
4086AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
4087 Attributor &A) {
4088 AAExecutionDomainFunction *AA = nullptr;
4089 switch (IRP.getPositionKind()) {
4090 case IRPosition::IRP_INVALID:
4091 case IRPosition::IRP_FLOAT:
4092 case IRPosition::IRP_ARGUMENT:
4093 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4094 case IRPosition::IRP_RETURNED:
4095 case IRPosition::IRP_CALL_SITE_RETURNED:
4096 case IRPosition::IRP_CALL_SITE:
4097 llvm_unreachable(__builtin_unreachable()
4098 "AAExecutionDomain can only be created for function position!")__builtin_unreachable();
4099 case IRPosition::IRP_FUNCTION:
4100 AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
4101 break;
4102 }
4103
4104 return *AA;
4105}
4106
4107AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
4108 Attributor &A) {
4109 AAHeapToSharedFunction *AA = nullptr;
4110 switch (IRP.getPositionKind()) {
4111 case IRPosition::IRP_INVALID:
4112 case IRPosition::IRP_FLOAT:
4113 case IRPosition::IRP_ARGUMENT:
4114 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4115 case IRPosition::IRP_RETURNED:
4116 case IRPosition::IRP_CALL_SITE_RETURNED:
4117 case IRPosition::IRP_CALL_SITE:
4118 llvm_unreachable(__builtin_unreachable()
4119 "AAHeapToShared can only be created for function position!")__builtin_unreachable();
4120 case IRPosition::IRP_FUNCTION:
4121 AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
4122 break;
4123 }
4124
4125 return *AA;
4126}
4127
4128AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
4129 Attributor &A) {
4130 AAKernelInfo *AA = nullptr;
4131 switch (IRP.getPositionKind()) {
4132 case IRPosition::IRP_INVALID:
4133 case IRPosition::IRP_FLOAT:
4134 case IRPosition::IRP_ARGUMENT:
4135 case IRPosition::IRP_RETURNED:
4136 case IRPosition::IRP_CALL_SITE_RETURNED:
4137 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4138 llvm_unreachable("KernelInfo can only be created for function position!")__builtin_unreachable();
4139 case IRPosition::IRP_CALL_SITE:
4140 AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
4141 break;
4142 case IRPosition::IRP_FUNCTION:
4143 AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
4144 break;
4145 }
4146
4147 return *AA;
4148}
4149
4150AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
4151 Attributor &A) {
4152 AAFoldRuntimeCall *AA = nullptr;
4153 switch (IRP.getPositionKind()) {
4154 case IRPosition::IRP_INVALID:
4155 case IRPosition::IRP_FLOAT:
4156 case IRPosition::IRP_ARGUMENT:
4157 case IRPosition::IRP_RETURNED:
4158 case IRPosition::IRP_FUNCTION:
4159 case IRPosition::IRP_CALL_SITE:
4160 case IRPosition::IRP_CALL_SITE_ARGUMENT:
4161 llvm_unreachable("KernelInfo can only be created for call site position!")__builtin_unreachable();
4162 case IRPosition::IRP_CALL_SITE_RETURNED:
4163 AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
4164 break;
4165 }
4166
4167 return *AA;
4168}
4169
4170PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
4171 if (!containsOpenMP(M))
4172 return PreservedAnalyses::all();
4173 if (DisableOpenMPOptimizations)
4174 return PreservedAnalyses::all();
4175
4176 FunctionAnalysisManager &FAM =
4177 AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
4178 KernelSet Kernels = getDeviceKernels(M);
4179
4180 auto IsCalled = [&](Function &F) {
4181 if (Kernels.contains(&F))
4182 return true;
4183 for (const User *U : F.users())
4184 if (!isa<BlockAddress>(U))
4185 return true;
4186 return false;
4187 };
4188
4189 auto EmitRemark = [&](Function &F) {
4190 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
4191 ORE.emit([&]() {
4192 OptimizationRemarkAnalysis ORA(DEBUG_TYPE"openmp-opt", "OMP140", &F);
4193 return ORA << "Could not internalize function. "
4194 << "Some optimizations may not be possible. [OMP140]";
4195 });
4196 };
4197
4198 // Create internal copies of each function if this is a kernel Module. This
4199 // allows iterprocedural passes to see every call edge.
4200 DenseMap<Function *, Function *> InternalizedMap;
4201 if (isOpenMPDevice(M)) {
4202 SmallPtrSet<Function *, 16> InternalizeFns;
4203 for (Function &F : M)
4204 if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
4205 !DisableInternalization) {
4206 if (Attributor::isInternalizable(F)) {
4207 InternalizeFns.insert(&F);
4208 } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
4209 EmitRemark(F);
4210 }
4211 }
4212
4213 Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
4214 }
4215
4216 // Look at every function in the Module unless it was internalized.
4217 SmallVector<Function *, 16> SCC;
4218 for (Function &F : M)
4219 if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
4220 SCC.push_back(&F);
4221
4222 if (SCC.empty())
4223 return PreservedAnalyses::all();
4224
4225 AnalysisGetter AG(FAM);
4226
4227 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
4228 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
4229 };
4230
4231 BumpPtrAllocator Allocator;
4232 CallGraphUpdater CGUpdater;
4233
4234 SetVector<Function *> Functions(SCC.begin(), SCC.end());
4235 OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);
4236
4237 unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
4238 Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
4239 MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");
4240
4241 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
4242 bool Changed = OMPOpt.run(true);
4243 if (Changed)
4244 return PreservedAnalyses::none();
4245
4246 return PreservedAnalyses::all();
4247}
4248
4249PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
4250 CGSCCAnalysisManager &AM,
4251 LazyCallGraph &CG,
4252 CGSCCUpdateResult &UR) {
4253 if (!containsOpenMP(*C.begin()->getFunction().getParent()))
4254 return PreservedAnalyses::all();
4255 if (DisableOpenMPOptimizations)
4256 return PreservedAnalyses::all();
4257
4258 SmallVector<Function *, 16> SCC;
4259 // If there are kernels in the module, we have to run on all SCC's.
4260 for (LazyCallGraph::Node &N : C) {
4261 Function *Fn = &N.getFunction();
4262 SCC.push_back(Fn);
4263 }
4264
4265 if (SCC.empty())
4266 return PreservedAnalyses::all();
4267
4268 Module &M = *C.begin()->getFunction().getParent();
4269
4270 KernelSet Kernels = getDeviceKernels(M);
4271
4272 FunctionAnalysisManager &FAM =
4273 AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();
4274
4275 AnalysisGetter AG(FAM);
4276
4277 auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
4278 return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
4279 };
4280
4281 BumpPtrAllocator Allocator;
4282 CallGraphUpdater CGUpdater;
4283 CGUpdater.initialize(CG, C, AM, UR);
4284
4285 SetVector<Function *> Functions(SCC.begin(), SCC.end());
4286 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
4287 /*CGSCC*/ Functions, Kernels);
4288
4289 unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
4290 Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
4291 MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");
4292
4293 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
4294 bool Changed = OMPOpt.run(false);
4295 if (Changed)
4296 return PreservedAnalyses::none();
4297
4298 return PreservedAnalyses::all();
4299}
4300
4301namespace {
4302
4303struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
4304 CallGraphUpdater CGUpdater;
4305 static char ID;
4306
4307 OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
4308 initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
4309 }
4310
4311 void getAnalysisUsage(AnalysisUsage &AU) const override {
4312 CallGraphSCCPass::getAnalysisUsage(AU);
4313 }
4314
4315 bool runOnSCC(CallGraphSCC &CGSCC) override {
4316 if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
4317 return false;
4318 if (DisableOpenMPOptimizations || skipSCC(CGSCC))
4319 return false;
4320
4321 SmallVector<Function *, 16> SCC;
4322 // If there are kernels in the module, we have to run on all SCC's.
4323 for (CallGraphNode *CGN : CGSCC) {
4324 Function *Fn = CGN->getFunction();
4325 if (!Fn || Fn->isDeclaration())
4326 continue;
4327 SCC.push_back(Fn);
4328 }
4329
4330 if (SCC.empty())
4331 return false;
4332
4333 Module &M = CGSCC.getCallGraph().getModule();
4334 KernelSet Kernels = getDeviceKernels(M);
4335
4336 CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
4337 CGUpdater.initialize(CG, CGSCC);
4338
4339 // Maintain a map of functions to avoid rebuilding the ORE
4340 DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
4341 auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
4342 std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
4343 if (!ORE)
4344 ORE = std::make_unique<OptimizationRemarkEmitter>(F);
4345 return *ORE;
4346 };
4347
4348 AnalysisGetter AG;
4349 SetVector<Function *> Functions(SCC.begin(), SCC.end());
4350 BumpPtrAllocator Allocator;
4351 OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
4352 Allocator,
4353 /*CGSCC*/ Functions, Kernels);
4354
4355 unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
4356 Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
4357 MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");
4358
4359 OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
4360 return OMPOpt.run(false);
4361 }
4362
4363 bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
4364};
4365
4366} // end anonymous namespace
4367
4368KernelSet llvm::omp::getDeviceKernels(Module &M) {
4369 // TODO: Create a more cross-platform way of determining device kernels.
4370 NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
4371 KernelSet Kernels;
4372
4373 if (!MD)
4374 return Kernels;
4375
4376 for (auto *Op : MD->operands()) {
4377 if (Op->getNumOperands() < 2)
4378 continue;
4379 MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
4380 if (!KindID || KindID->getString() != "kernel")
4381 continue;
4382
4383 Function *KernelFn =
4384 mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
4385 if (!KernelFn)
4386 continue;
4387
4388 ++NumOpenMPTargetRegionKernels;
4389
4390 Kernels.insert(KernelFn);
4391 }
4392
4393 return Kernels;
4394}
4395
4396bool llvm::omp::containsOpenMP(Module &M) {
4397 Metadata *MD = M.getModuleFlag("openmp");
4398 if (!MD)
4399 return false;
4400
4401 return true;
4402}
4403
4404bool llvm::omp::isOpenMPDevice(Module &M) {
4405 Metadata *MD = M.getModuleFlag("openmp-device");
4406 if (!MD)
4407 return false;
4408
4409 return true;
4410}
4411
4412char OpenMPOptCGSCCLegacyPass::ID = 0;
4413
4414INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry
&Registry) {
4415 "OpenMP specific optimizations", false, false)static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry
&Registry) {
4416INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)initializeCallGraphWrapperPassPass(Registry);
4417INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",PassInfo *PI = new PassInfo( "OpenMP specific optimizations",
"openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag
; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag
, initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry
)); }
4418 "OpenMP specific optimizations", false, false)PassInfo *PI = new PassInfo( "OpenMP specific optimizations",
"openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag
; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry
&Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag
, initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry
)); }
4419
4420Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
4421 return new OpenMPOptCGSCCLegacyPass();
4422}

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO/Attributor.h

1//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Attributor: An inter procedural (abstract) "attribute" deduction framework.
10//
11// The Attributor framework is an inter procedural abstract analysis (fixpoint
12// iteration analysis). The goal is to allow easy deduction of new attributes as
13// well as information exchange between abstract attributes in-flight.
14//
15// The Attributor class is the driver and the link between the various abstract
16// attributes. The Attributor will iterate until a fixpoint state is reached by
17// all abstract attributes in-flight, or until it will enforce a pessimistic fix
18// point because an iteration limit is reached.
19//
20// Abstract attributes, derived from the AbstractAttribute class, actually
21// describe properties of the code. They can correspond to actual LLVM-IR
22// attributes, or they can be more general, ultimately unrelated to LLVM-IR
23// attributes. The latter is useful when an abstract attributes provides
24// information to other abstract attributes in-flight but we might not want to
25// manifest the information. The Attributor allows to query in-flight abstract
26// attributes through the `Attributor::getAAFor` method (see the method
27// description for an example). If the method is used by an abstract attribute
28// P, and it results in an abstract attribute Q, the Attributor will
29// automatically capture a potential dependence from Q to P. This dependence
30// will cause P to be reevaluated whenever Q changes in the future.
31//
32// The Attributor will only reevaluate abstract attributes that might have
33// changed since the last iteration. That means that the Attribute will not
34// revisit all instructions/blocks/functions in the module but only query
35// an update from a subset of the abstract attributes.
36//
37// The update method `AbstractAttribute::updateImpl` is implemented by the
38// specific "abstract attribute" subclasses. The method is invoked whenever the
39// currently assumed state (see the AbstractState class) might not be valid
40// anymore. This can, for example, happen if the state was dependent on another
41// abstract attribute that changed. In every invocation, the update method has
42// to adjust the internal state of an abstract attribute to a point that is
43// justifiable by the underlying IR and the current state of abstract attributes
44// in-flight. Since the IR is given and assumed to be valid, the information
45// derived from it can be assumed to hold. However, information derived from
46// other abstract attributes is conditional on various things. If the justifying
47// state changed, the `updateImpl` has to revisit the situation and potentially
48// find another justification or limit the optimistic assumes made.
49//
50// Change is the key in this framework. Until a state of no-change, thus a
51// fixpoint, is reached, the Attributor will query the abstract attributes
52// in-flight to re-evaluate their state. If the (current) state is too
53// optimistic, hence it cannot be justified anymore through other abstract
54// attributes or the state of the IR, the state of the abstract attribute will
55// have to change. Generally, we assume abstract attribute state to be a finite
56// height lattice and the update function to be monotone. However, these
57// conditions are not enforced because the iteration limit will guarantee
58// termination. If an optimistic fixpoint is reached, or a pessimistic fix
59// point is enforced after a timeout, the abstract attributes are tasked to
60// manifest their result in the IR for passes to come.
61//
62// Attribute manifestation is not mandatory. If desired, there is support to
63// generate a single or multiple LLVM-IR attributes already in the helper struct
64// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with
65// a proper Attribute::AttrKind as template parameter. The Attributor
66// manifestation framework will then create and place a new attribute if it is
67// allowed to do so (based on the abstract state). Other use cases can be
68// achieved by overloading AbstractAttribute or IRAttribute methods.
69//
70//
71// The "mechanics" of adding a new "abstract attribute":
72// - Define a class (transitively) inheriting from AbstractAttribute and one
73// (which could be the same) that (transitively) inherits from AbstractState.
74// For the latter, consider the already available BooleanState and
75// {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a
76// number tracking or bit-encoding.
77// - Implement all pure methods. Also use overloading if the attribute is not
78// conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
79// an argument, call site argument, function return value, or function. See
80// the class and method descriptions for more information on the two
81// "Abstract" classes and their respective methods.
82// - Register opportunities for the new abstract attribute in the
83// `Attributor::identifyDefaultAbstractAttributes` method if it should be
84// counted as a 'default' attribute.
85// - Add sufficient tests.
86// - Add a Statistics object for bookkeeping. If it is a simple (set of)
87// attribute(s) manifested through the Attributor manifestation framework, see
88// the bookkeeping function in Attributor.cpp.
89// - If instructions with a certain opcode are interesting to the attribute, add
90// that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
91// will make it possible to query all those instructions through the
92// `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
93// need to traverse the IR repeatedly.
94//
95//===----------------------------------------------------------------------===//
96
97#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
98#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
99
100#include "llvm/ADT/DenseSet.h"
101#include "llvm/ADT/GraphTraits.h"
102#include "llvm/ADT/MapVector.h"
103#include "llvm/ADT/STLExtras.h"
104#include "llvm/ADT/SetVector.h"
105#include "llvm/ADT/Triple.h"
106#include "llvm/ADT/iterator.h"
107#include "llvm/Analysis/AssumeBundleQueries.h"
108#include "llvm/Analysis/CFG.h"
109#include "llvm/Analysis/CGSCCPassManager.h"
110#include "llvm/Analysis/LazyCallGraph.h"
111#include "llvm/Analysis/LoopInfo.h"
112#include "llvm/Analysis/MustExecute.h"
113#include "llvm/Analysis/OptimizationRemarkEmitter.h"
114#include "llvm/Analysis/PostDominators.h"
115#include "llvm/Analysis/TargetLibraryInfo.h"
116#include "llvm/IR/AbstractCallSite.h"
117#include "llvm/IR/ConstantRange.h"
118#include "llvm/IR/PassManager.h"
119#include "llvm/Support/Allocator.h"
120#include "llvm/Support/Casting.h"
121#include "llvm/Support/GraphWriter.h"
122#include "llvm/Support/TimeProfiler.h"
123#include "llvm/Transforms/Utils/CallGraphUpdater.h"
124
125namespace llvm {
126
127struct AADepGraphNode;
128struct AADepGraph;
129struct Attributor;
130struct AbstractAttribute;
131struct InformationCache;
132struct AAIsDead;
133struct AttributorCallGraph;
134
135class AAManager;
136class AAResults;
137class Function;
138
139/// Abstract Attribute helper functions.
140namespace AA {
141
142/// Return true if \p V is dynamically unique, that is, there are no two
143/// "instances" of \p V at runtime with different values.
144bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
145 const Value &V);
146
147/// Return true if \p V is a valid value in \p Scope, that is a constant or an
148/// instruction/argument of \p Scope.
149bool isValidInScope(const Value &V, const Function *Scope);
150
151/// Return true if \p V is a valid value at position \p CtxI, that is a
152/// constant, an argument of the same function as \p CtxI, or an instruction in
153/// that function that dominates \p CtxI.
154bool isValidAtPosition(const Value &V, const Instruction &CtxI,
155 InformationCache &InfoCache);
156
157/// Try to convert \p V to type \p Ty without introducing new instructions. If
158/// this is not possible return `nullptr`. Note: this function basically knows
159/// how to cast various constants.
160Value *getWithType(Value &V, Type &Ty);
161
162/// Return the combination of \p A and \p B such that the result is a possible
163/// value of both. \p B is potentially casted to match the type \p Ty or the
164/// type of \p A if \p Ty is null.
165///
166/// Examples:
167/// X + none => X
168/// not_none + undef => not_none
169/// V1 + V2 => nullptr
170Optional<Value *>
171combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
172 const Optional<Value *> &B, Type *Ty);
173
174/// Return the initial value of \p Obj with type \p Ty if that is a constant.
175Constant *getInitialValueForObj(Value &Obj, Type &Ty);
176
177/// Collect all potential underlying objects of \p Ptr at position \p CtxI in
178/// \p Objects. Assumed information is used and dependences onto \p QueryingAA
179/// are added appropriately.
180///
181/// \returns True if \p Objects contains all assumed underlying objects, and
182/// false if something went wrong and the objects could not be
183/// determined.
184bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
185 SmallVectorImpl<Value *> &Objects,
186 const AbstractAttribute &QueryingAA,
187 const Instruction *CtxI);
188
189/// Collect all potential values of the one stored by \p SI into
190/// \p PotentialCopies. That is, the only copies that were made via the
191/// store are assumed to be known and all in \p PotentialCopies. Dependences
192/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
193/// inform the caller if assumed information was used.
194///
195/// \returns True if the assumed potential copies are all in \p PotentialCopies,
196/// false if something went wrong and the copies could not be
197/// determined.
198bool getPotentialCopiesOfStoredValue(
199 Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
200 const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);
201
202} // namespace AA
203
204/// The value passed to the line option that defines the maximal initialization
205/// chain length.
206extern unsigned MaxInitializationChainLength;
207
208///{
209enum class ChangeStatus {
210 CHANGED,
211 UNCHANGED,
212};
213
214ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
215ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r);
216ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
217ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r);
218
219enum class DepClassTy {
220 REQUIRED, ///< The target cannot be valid if the source is not.
221 OPTIONAL, ///< The target may be valid if the source is not.
222 NONE, ///< Do not track a dependence between source and target.
223};
224///}
225
226/// The data structure for the nodes of a dependency graph
227struct AADepGraphNode {
228public:
229 virtual ~AADepGraphNode(){};
230 using DepTy = PointerIntPair<AADepGraphNode *, 1>;
231
232protected:
233 /// Set of dependency graph nodes which should be updated if this one
234 /// is updated. The bit encodes if it is optional.
235 TinyPtrVector<DepTy> Deps;
236
237 static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
238 static AbstractAttribute *DepGetValAA(DepTy &DT) {
239 return cast<AbstractAttribute>(DT.getPointer());
240 }
241
242 operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
243
244public:
245 using iterator =
246 mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
247 using aaiterator =
248 mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
249
250 aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
251 aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
252 iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
253 iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
254
255 virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
256 TinyPtrVector<DepTy> &getDeps() { return Deps; }
257
258 friend struct Attributor;
259 friend struct AADepGraph;
260};
261
262/// The data structure for the dependency graph
263///
264/// Note that in this graph if there is an edge from A to B (A -> B),
265/// then it means that B depends on A, and when the state of A is
266/// updated, node B should also be updated
267struct AADepGraph {
268 AADepGraph() {}
269 ~AADepGraph() {}
270
271 using DepTy = AADepGraphNode::DepTy;
272 static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
273 using iterator =
274 mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
275
276 /// There is no root node for the dependency graph. But the SCCIterator
277 /// requires a single entry point, so we maintain a fake("synthetic") root
278 /// node that depends on every node.
279 AADepGraphNode SyntheticRoot;
280 AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
281
282 iterator begin() { return SyntheticRoot.child_begin(); }
283 iterator end() { return SyntheticRoot.child_end(); }
284
285 void viewGraph();
286
287 /// Dump graph to file
288 void dumpGraph();
289
290 /// Print dependency graph
291 void print();
292};
293
294/// Helper to describe and deal with positions in the LLVM-IR.
295///
296/// A position in the IR is described by an anchor value and an "offset" that
297/// could be the argument number, for call sites and arguments, or an indicator
298/// of the "position kind". The kinds, specified in the Kind enum below, include
299/// the locations in the attribute list, i.a., function scope and return value,
300/// as well as a distinction between call sites and functions. Finally, there
301/// are floating values that do not have a corresponding attribute list
302/// position.
303struct IRPosition {
304 // NOTE: In the future this definition can be changed to support recursive
305 // functions.
306 using CallBaseContext = CallBase;
307
308 /// The positions we distinguish in the IR.
309 enum Kind : char {
310 IRP_INVALID, ///< An invalid position.
311 IRP_FLOAT, ///< A position that is not associated with a spot suitable
312 ///< for attributes. This could be any value or instruction.
313 IRP_RETURNED, ///< An attribute for the function return value.
314 IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value.
315 IRP_FUNCTION, ///< An attribute for a function (scope).
316 IRP_CALL_SITE, ///< An attribute for a call site (function scope).
317 IRP_ARGUMENT, ///< An attribute for a function argument.
318 IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
319 };
320
321 /// Default constructor available to create invalid positions implicitly. All
322 /// other positions need to be created explicitly through the appropriate
323 /// static member function.
324 IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); }
325
326 /// Create a position describing the value of \p V.
327 static const IRPosition value(const Value &V,
328 const CallBaseContext *CBContext = nullptr) {
329 if (auto *Arg = dyn_cast<Argument>(&V))
330 return IRPosition::argument(*Arg, CBContext);
331 if (auto *CB = dyn_cast<CallBase>(&V))
332 return IRPosition::callsite_returned(*CB);
333 return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext);
334 }
335
336 /// Create a position describing the function scope of \p F.
337 /// \p CBContext is used for call base specific analysis.
338 static const IRPosition function(const Function &F,
339 const CallBaseContext *CBContext = nullptr) {
340 return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext);
341 }
342
343 /// Create a position describing the returned value of \p F.
344 /// \p CBContext is used for call base specific analysis.
345 static const IRPosition returned(const Function &F,
346 const CallBaseContext *CBContext = nullptr) {
347 return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext);
348 }
349
350 /// Create a position describing the argument \p Arg.
351 /// \p CBContext is used for call base specific analysis.
352 static const IRPosition argument(const Argument &Arg,
353 const CallBaseContext *CBContext = nullptr) {
354 return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext);
355 }
356
357 /// Create a position describing the function scope of \p CB.
358 static const IRPosition callsite_function(const CallBase &CB) {
359 return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE);
360 }
361
362 /// Create a position describing the returned value of \p CB.
363 static const IRPosition callsite_returned(const CallBase &CB) {
364 return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED);
365 }
366
367 /// Create a position describing the argument of \p CB at position \p ArgNo.
368 static const IRPosition callsite_argument(const CallBase &CB,
369 unsigned ArgNo) {
370 return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)),
371 IRP_CALL_SITE_ARGUMENT);
372 }
373
374 /// Create a position describing the argument of \p ACS at position \p ArgNo.
375 static const IRPosition callsite_argument(AbstractCallSite ACS,
376 unsigned ArgNo) {
377 if (ACS.getNumArgOperands() <= ArgNo)
378 return IRPosition();
379 int CSArgNo = ACS.getCallArgOperandNo(ArgNo);
380 if (CSArgNo >= 0)
381 return IRPosition::callsite_argument(
382 cast<CallBase>(*ACS.getInstruction()), CSArgNo);
383 return IRPosition();
384 }
385
386 /// Create a position with function scope matching the "context" of \p IRP.
387 /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result
388 /// will be a call site position, otherwise the function position of the
389 /// associated function.
390 static const IRPosition
391 function_scope(const IRPosition &IRP,
392 const CallBaseContext *CBContext = nullptr) {
393 if (IRP.isAnyCallSitePosition()) {
394 return IRPosition::callsite_function(
395 cast<CallBase>(IRP.getAnchorValue()));
396 }
397 assert(IRP.getAssociatedFunction())((void)0);
398 return IRPosition::function(*IRP.getAssociatedFunction(), CBContext);
399 }
400
401 bool operator==(const IRPosition &RHS) const {
402 return Enc == RHS.Enc && RHS.CBContext == CBContext;
403 }
404 bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); }
405
406 /// Return the value this abstract attribute is anchored with.
407 ///
408 /// The anchor value might not be the associated value if the latter is not
409 /// sufficient to determine where arguments will be manifested. This is, so
410 /// far, only the case for call site arguments as the value is not sufficient
411 /// to pinpoint them. Instead, we can use the call site as an anchor.
412 Value &getAnchorValue() const {
413 switch (getEncodingBits()) {
414 case ENC_VALUE:
415 case ENC_RETURNED_VALUE:
416 case ENC_FLOATING_FUNCTION:
417 return *getAsValuePtr();
418 case ENC_CALL_SITE_ARGUMENT_USE:
419 return *(getAsUsePtr()->getUser());
420 default:
421 llvm_unreachable("Unkown encoding!")__builtin_unreachable();
422 };
423 }
424
425 /// Return the associated function, if any.
426 Function *getAssociatedFunction() const {
427 if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
428 // We reuse the logic that associates callback calles to arguments of a
429 // call site here to identify the callback callee as the associated
430 // function.
431 if (Argument *Arg = getAssociatedArgument())
432 return Arg->getParent();
433 return CB->getCalledFunction();
434 }
435 return getAnchorScope();
436 }
437
438 /// Return the associated argument, if any.
439 Argument *getAssociatedArgument() const;
440
441 /// Return true if the position refers to a function interface, that is the
442 /// function scope, the function return, or an argument.
443 bool isFnInterfaceKind() const {
444 switch (getPositionKind()) {
445 case IRPosition::IRP_FUNCTION:
446 case IRPosition::IRP_RETURNED:
447 case IRPosition::IRP_ARGUMENT:
448 return true;
449 default:
450 return false;
451 }
452 }
453
454 /// Return the Function surrounding the anchor value.
455 Function *getAnchorScope() const {
456 Value &V = getAnchorValue();
457 if (isa<Function>(V))
458 return &cast<Function>(V);
459 if (isa<Argument>(V))
460 return cast<Argument>(V).getParent();
461 if (isa<Instruction>(V))
462 return cast<Instruction>(V).getFunction();
463 return nullptr;
464 }
465
466 /// Return the context instruction, if any.
467 Instruction *getCtxI() const {
468 Value &V = getAnchorValue();
469 if (auto *I = dyn_cast<Instruction>(&V))
470 return I;
471 if (auto *Arg = dyn_cast<Argument>(&V))
472 if (!Arg->getParent()->isDeclaration())
473 return &Arg->getParent()->getEntryBlock().front();
474 if (auto *F = dyn_cast<Function>(&V))
475 if (!F->isDeclaration())
476 return &(F->getEntryBlock().front());
477 return nullptr;
478 }
479
480 /// Return the value this abstract attribute is associated with.
481 Value &getAssociatedValue() const {
482 if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue()))
483 return getAnchorValue();
484 assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!")((void)0);
485 return *cast<CallBase>(&getAnchorValue())
486 ->getArgOperand(getCallSiteArgNo());
487 }
488
489 /// Return the type this abstract attribute is associated with.
490 Type *getAssociatedType() const {
491 if (getPositionKind() == IRPosition::IRP_RETURNED)
492 return getAssociatedFunction()->getReturnType();
493 return getAssociatedValue().getType();
494 }
495
496 /// Return the callee argument number of the associated value if it is an
497 /// argument or call site argument, otherwise a negative value. In contrast to
498 /// `getCallSiteArgNo` this method will always return the "argument number"
499 /// from the perspective of the callee. This may not the same as the call site
500 /// if this is a callback call.
501 int getCalleeArgNo() const {
502 return getArgNo(/* CallbackCalleeArgIfApplicable */ true);
503 }
504
505 /// Return the call site argument number of the associated value if it is an
506 /// argument or call site argument, otherwise a negative value. In contrast to
507 /// `getCalleArgNo` this method will always return the "operand number" from
508 /// the perspective of the call site. This may not the same as the callee
509 /// perspective if this is a callback call.
510 int getCallSiteArgNo() const {
511 return getArgNo(/* CallbackCalleeArgIfApplicable */ false);
512 }
513
514 /// Return the index in the attribute list for this position.
515 unsigned getAttrIdx() const {
516 switch (getPositionKind()) {
517 case IRPosition::IRP_INVALID:
518 case IRPosition::IRP_FLOAT:
519 break;
520 case IRPosition::IRP_FUNCTION:
521 case IRPosition::IRP_CALL_SITE:
522 return AttributeList::FunctionIndex;
523 case IRPosition::IRP_RETURNED:
524 case IRPosition::IRP_CALL_SITE_RETURNED:
525 return AttributeList::ReturnIndex;
526 case IRPosition::IRP_ARGUMENT:
527 case IRPosition::IRP_CALL_SITE_ARGUMENT:
528 return getCallSiteArgNo() + AttributeList::FirstArgIndex;
529 }
530 llvm_unreachable(__builtin_unreachable()
531 "There is no attribute index for a floating or invalid position!")__builtin_unreachable();
532 }
533
534 /// Return the associated position kind.
535 Kind getPositionKind() const {
536 char EncodingBits = getEncodingBits();
537 if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE)
538 return IRP_CALL_SITE_ARGUMENT;
539 if (EncodingBits == ENC_FLOATING_FUNCTION)
540 return IRP_FLOAT;
541
542 Value *V = getAsValuePtr();
543 if (!V)
544 return IRP_INVALID;
545 if (isa<Argument>(V))
546 return IRP_ARGUMENT;
547 if (isa<Function>(V))
548 return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION;
549 if (isa<CallBase>(V))
550 return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED
551 : IRP_CALL_SITE;
552 return IRP_FLOAT;
553 }
554
555 /// TODO: Figure out if the attribute related helper functions should live
556 /// here or somewhere else.
557
558 /// Return true if any kind in \p AKs existing in the IR at a position that
559 /// will affect this one. See also getAttrs(...).
560 /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
561 /// e.g., the function position if this is an
562 /// argument position, should be ignored.
563 bool hasAttr(ArrayRef<Attribute::AttrKind> AKs,
564 bool IgnoreSubsumingPositions = false,
565 Attributor *A = nullptr) const;
566
567 /// Return the attributes of any kind in \p AKs existing in the IR at a
568 /// position that will affect this one. While each position can only have a
569 /// single attribute of any kind in \p AKs, there are "subsuming" positions
570 /// that could have an attribute as well. This method returns all attributes
571 /// found in \p Attrs.
572 /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
573 /// e.g., the function position if this is an
574 /// argument position, should be ignored.
575 void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
576 SmallVectorImpl<Attribute> &Attrs,
577 bool IgnoreSubsumingPositions = false,
578 Attributor *A = nullptr) const;
579
580 /// Remove the attribute of kind \p AKs existing in the IR at this position.
581 void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const {
582 if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
583 return;
584
585 AttributeList AttrList;
586 auto *CB = dyn_cast<CallBase>(&getAnchorValue());
587 if (CB)
588 AttrList = CB->getAttributes();
589 else
590 AttrList = getAssociatedFunction()->getAttributes();
591
592 LLVMContext &Ctx = getAnchorValue().getContext();
593 for (Attribute::AttrKind AK : AKs)
594 AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);
595
596 if (CB)
597 CB->setAttributes(AttrList);
598 else
599 getAssociatedFunction()->setAttributes(AttrList);
600 }
601
602 bool isAnyCallSitePosition() const {
603 switch (getPositionKind()) {
604 case IRPosition::IRP_CALL_SITE:
605 case IRPosition::IRP_CALL_SITE_RETURNED:
606 case IRPosition::IRP_CALL_SITE_ARGUMENT:
607 return true;
608 default:
609 return false;
610 }
611 }
612
613 /// Return true if the position is an argument or call site argument.
614 bool isArgumentPosition() const {
615 switch (getPositionKind()) {
616 case IRPosition::IRP_ARGUMENT:
617 case IRPosition::IRP_CALL_SITE_ARGUMENT:
618 return true;
619 default:
620 return false;
621 }
622 }
623
624 /// Return the same position without the call base context.
625 IRPosition stripCallBaseContext() const {
626 IRPosition Result = *this;
627 Result.CBContext = nullptr;
628 return Result;
629 }
630
631 /// Get the call base context from the position.
632 const CallBaseContext *getCallBaseContext() const { return CBContext; }
633
634 /// Check if the position has any call base context.
635 bool hasCallBaseContext() const { return CBContext != nullptr; }
636
637 /// Special DenseMap key values.
638 ///
639 ///{
640 static const IRPosition EmptyKey;
641 static const IRPosition TombstoneKey;
642 ///}
643
644 /// Conversion into a void * to allow reuse of pointer hashing.
645 operator void *() const { return Enc.getOpaqueValue(); }
646
647private:
648 /// Private constructor for special values only!
649 explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr)
650 : CBContext(CBContext) {
651 Enc.setFromOpaqueValue(Ptr);
652 }
653
654 /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK.
655 explicit IRPosition(Value &AnchorVal, Kind PK,
656 const CallBaseContext *CBContext = nullptr)
657 : CBContext(CBContext) {
658 switch (PK) {
659 case IRPosition::IRP_INVALID:
660 llvm_unreachable("Cannot create invalid IRP with an anchor value!")__builtin_unreachable();
661 break;
662 case IRPosition::IRP_FLOAT:
663 // Special case for floating functions.
664 if (isa<Function>(AnchorVal))
665 Enc = {&AnchorVal, ENC_FLOATING_FUNCTION};
666 else
667 Enc = {&AnchorVal, ENC_VALUE};
668 break;
669 case IRPosition::IRP_FUNCTION:
670 case IRPosition::IRP_CALL_SITE:
671 Enc = {&AnchorVal, ENC_VALUE};
672 break;
673 case IRPosition::IRP_RETURNED:
674 case IRPosition::IRP_CALL_SITE_RETURNED:
675 Enc = {&AnchorVal, ENC_RETURNED_VALUE};
676 break;
677 case IRPosition::IRP_ARGUMENT:
678 Enc = {&AnchorVal, ENC_VALUE};
679 break;
680 case IRPosition::IRP_CALL_SITE_ARGUMENT:
681 llvm_unreachable(__builtin_unreachable()
682 "Cannot create call site argument IRP with an anchor value!")__builtin_unreachable();
683 break;
684 }
685 verify();
686 }
687
688 /// Return the callee argument number of the associated value if it is an
689 /// argument or call site argument. See also `getCalleeArgNo` and
690 /// `getCallSiteArgNo`.
691 int getArgNo(bool CallbackCalleeArgIfApplicable) const {
692 if (CallbackCalleeArgIfApplicable)
693 if (Argument *Arg = getAssociatedArgument())
694 return Arg->getArgNo();
695 switch (getPositionKind()) {
696 case IRPosition::IRP_ARGUMENT:
697 return cast<Argument>(getAsValuePtr())->getArgNo();
698 case IRPosition::IRP_CALL_SITE_ARGUMENT: {
699 Use &U = *getAsUsePtr();
700 return cast<CallBase>(U.getUser())->getArgOperandNo(&U);
701 }
702 default:
703 return -1;
704 }
705 }
706
707 /// IRPosition for the use \p U. The position kind \p PK needs to be
708 /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value
709 /// the used value.
710 explicit IRPosition(Use &U, Kind PK) {
711 assert(PK == IRP_CALL_SITE_ARGUMENT &&((void)0)
712 "Use constructor is for call site arguments only!")((void)0);
713 Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE};
714 verify();
715 }
716
717 /// Verify internal invariants.
718 void verify();
719
720 /// Return the attributes of kind \p AK existing in the IR as attribute.
721 bool getAttrsFromIRAttr(Attribute::AttrKind AK,
722 SmallVectorImpl<Attribute> &Attrs) const;
723
724 /// Return the attributes of kind \p AK existing in the IR as operand bundles
725 /// of an llvm.assume.
726 bool getAttrsFromAssumes(Attribute::AttrKind AK,
727 SmallVectorImpl<Attribute> &Attrs,
728 Attributor &A) const;
729
730 /// Return the underlying pointer as Value *, valid for all positions but
731 /// IRP_CALL_SITE_ARGUMENT.
732 Value *getAsValuePtr() const {
733 assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&((void)0)
734 "Not a value pointer!")((void)0);
735 return reinterpret_cast<Value *>(Enc.getPointer());
736 }
737
738 /// Return the underlying pointer as Use *, valid only for
739 /// IRP_CALL_SITE_ARGUMENT positions.
740 Use *getAsUsePtr() const {
741 assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&((void)0)
742 "Not a value pointer!")((void)0);
743 return reinterpret_cast<Use *>(Enc.getPointer());
744 }
745
746 /// Return true if \p EncodingBits describe a returned or call site returned
747 /// position.
748 static bool isReturnPosition(char EncodingBits) {
749 return EncodingBits == ENC_RETURNED_VALUE;
750 }
751
752 /// Return true if the encoding bits describe a returned or call site returned
753 /// position.
754 bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); }
755
756 /// The encoding of the IRPosition is a combination of a pointer and two
757 /// encoding bits. The values of the encoding bits are defined in the enum
758 /// below. The pointer is either a Value* (for the first three encoding bit
759 /// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE).
760 ///
761 ///{
762 enum {
763 ENC_VALUE = 0b00,
764 ENC_RETURNED_VALUE = 0b01,
765 ENC_FLOATING_FUNCTION = 0b10,
766 ENC_CALL_SITE_ARGUMENT_USE = 0b11,
767 };
768
769 // Reserve the maximal amount of bits so there is no need to mask out the
770 // remaining ones. We will not encode anything else in the pointer anyway.
771 static constexpr int NumEncodingBits =
772 PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
773 static_assert(NumEncodingBits >= 2, "At least two bits are required!");
774
775 /// The pointer with the encoding bits.
776 PointerIntPair<void *, NumEncodingBits, char> Enc;
777 ///}
778
779 /// Call base context. Used for callsite specific analysis.
780 const CallBaseContext *CBContext = nullptr;
781
782 /// Return the encoding bits.
783 char getEncodingBits() const { return Enc.getInt(); }
784};
785
786/// Helper that allows IRPosition as a key in a DenseMap.
787template <> struct DenseMapInfo<IRPosition> {
788 static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; }
789 static inline IRPosition getTombstoneKey() {
790 return IRPosition::TombstoneKey;
791 }
792 static unsigned getHashValue(const IRPosition &IRP) {
793 return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^
794 (DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext()));
795 }
796
797 static bool isEqual(const IRPosition &a, const IRPosition &b) {
798 return a == b;
799 }
800};
801
802/// A visitor class for IR positions.
803///
804/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming
805/// positions" wrt. attributes/information. Thus, if a piece of information
806/// holds for a subsuming position, it also holds for the position P.
807///
808/// The subsuming positions always include the initial position and then,
809/// depending on the position kind, additionally the following ones:
810/// - for IRP_RETURNED:
811/// - the function (IRP_FUNCTION)
812/// - for IRP_ARGUMENT:
813/// - the function (IRP_FUNCTION)
814/// - for IRP_CALL_SITE:
815/// - the callee (IRP_FUNCTION), if known
816/// - for IRP_CALL_SITE_RETURNED:
817/// - the callee (IRP_RETURNED), if known
818/// - the call site (IRP_FUNCTION)
819/// - the callee (IRP_FUNCTION), if known
820/// - for IRP_CALL_SITE_ARGUMENT:
821/// - the argument of the callee (IRP_ARGUMENT), if known
822/// - the callee (IRP_FUNCTION), if known
823/// - the position the call site argument is associated with if it is not
824/// anchored to the call site, e.g., if it is an argument then the argument
825/// (IRP_ARGUMENT)
826class SubsumingPositionIterator {
827 SmallVector<IRPosition, 4> IRPositions;
828 using iterator = decltype(IRPositions)::iterator;
829
830public:
831 SubsumingPositionIterator(const IRPosition &IRP);
832 iterator begin() { return IRPositions.begin(); }
833 iterator end() { return IRPositions.end(); }
834};
835
836/// Wrapper for FunctoinAnalysisManager.
837struct AnalysisGetter {
838 template <typename Analysis>
839 typename Analysis::Result *getAnalysis(const Function &F) {
840 if (!FAM || !F.getParent())
841 return nullptr;
842 return &FAM->getResult<Analysis>(const_cast<Function &>(F));
843 }
844
845 AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
846 AnalysisGetter() {}
847
848private:
849 FunctionAnalysisManager *FAM = nullptr;
850};
851
852/// Data structure to hold cached (LLVM-IR) information.
853///
854/// All attributes are given an InformationCache object at creation time to
855/// avoid inspection of the IR by all of them individually. This default
856/// InformationCache will hold information required by 'default' attributes,
857/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
858/// is called.
859///
860/// If custom abstract attributes, registered manually through
861/// Attributor::registerAA(...), need more information, especially if it is not
862/// reusable, it is advised to inherit from the InformationCache and cast the
863/// instance down in the abstract attributes.
864struct InformationCache {
865 InformationCache(const Module &M, AnalysisGetter &AG,
866 BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC)
867 : DL(M.getDataLayout()), Allocator(Allocator),
868 Explorer(
869 /* ExploreInterBlock */ true, /* ExploreCFGForward */ true,
870 /* ExploreCFGBackward */ true,
871 /* LIGetter */
872 [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); },
873 /* DTGetter */
874 [&](const Function &F) {
875 return AG.getAnalysis<DominatorTreeAnalysis>(F);
876 },
877 /* PDTGetter */
878 [&](const Function &F) {
879 return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
880 }),
881 AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) {
882 if (CGSCC)
883 initializeModuleSlice(*CGSCC);
884 }
885
886 ~InformationCache() {
887 // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call
888 // the destructor manually.
889 for (auto &It : FuncInfoMap)
890 It.getSecond()->~FunctionInfo();
891 }
892
893 /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
894 /// true, constant expression users are not given to \p CB but their uses are
895 /// traversed transitively.
896 template <typename CBTy>
897 static void foreachUse(Function &F, CBTy CB,
898 bool LookThroughConstantExprUses = true) {
899 SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
900
901 for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) {
902 Use &U = *Worklist[Idx];
903
904 // Allow use in constant bitcasts and simply look through them.
905 if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
906 for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
907 Worklist.push_back(&CEU);
908 continue;
909 }
910
911 CB(U);
912 }
913 }
914
915 /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
916 /// (a subset of) all functions that we can look at during this SCC traversal.
917 /// This includes functions (transitively) called from the SCC and the
918 /// (transitive) callers of SCC functions. We also can look at a function if
919 /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
920 /// a function in the SCC or a caller of a function in the SCC.
921 void initializeModuleSlice(SetVector<Function *> &SCC) {
922 ModuleSlice.insert(SCC.begin(), SCC.end());
923
924 SmallPtrSet<Function *, 16> Seen;
925 SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
926 while (!Worklist.empty()) {
927 Function *F = Worklist.pop_back_val();
928 ModuleSlice.insert(F);
929
930 for (Instruction &I : instructions(*F))
931 if (auto *CB = dyn_cast<CallBase>(&I))
932 if (Function *Callee = CB->getCalledFunction())
933 if (Seen.insert(Callee).second)
934 Worklist.push_back(Callee);
935 }
936
937 Seen.clear();
938 Worklist.append(SCC.begin(), SCC.end());
939 while (!Worklist.empty()) {
940 Function *F = Worklist.pop_back_val();
941 ModuleSlice.insert(F);
942
943 // Traverse all transitive uses.
944 foreachUse(*F, [&](Use &U) {
945 if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
946 if (Seen.insert(UsrI->getFunction()).second)
947 Worklist.push_back(UsrI->getFunction());
948 });
949 }
950 }
951
952 /// The slice of the module we are allowed to look at.
953 SmallPtrSet<Function *, 8> ModuleSlice;
954
955 /// A vector type to hold instructions.
956 using InstructionVectorTy = SmallVector<Instruction *, 8>;
957
958 /// A map type from opcodes to instructions with this opcode.
959 using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>;
960
961 /// Return the map that relates "interesting" opcodes with all instructions
962 /// with that opcode in \p F.
963 OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) {
964 return getFunctionInfo(F).OpcodeInstMap;
965 }
966
967 /// Return the instructions in \p F that may read or write memory.
968 InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) {
969 return getFunctionInfo(F).RWInsts;
970 }
971
972 /// Return MustBeExecutedContextExplorer
973 MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() {
974 return Explorer;
975 }
976
977 /// Return TargetLibraryInfo for function \p F.
978 TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) {
979 return AG.getAnalysis<TargetLibraryAnalysis>(F);
980 }
981
982 /// Return AliasAnalysis Result for function \p F.
983 AAResults *getAAResultsForFunction(const Function &F);
984
985 /// Return true if \p Arg is involved in a must-tail call, thus the argument
986 /// of the caller or callee.
987 bool isInvolvedInMustTailCall(const Argument &Arg) {
988 FunctionInfo &FI = getFunctionInfo(*Arg.getParent());
989 return FI.CalledViaMustTail || FI.ContainsMustTailCall;
990 }
991
992 /// Return the analysis result from a pass \p AP for function \p F.
993 template <typename AP>
994 typename AP::Result *getAnalysisResultForFunction(const Function &F) {
995 return AG.getAnalysis<AP>(F);
996 }
997
998 /// Return SCC size on call graph for function \p F or 0 if unknown.
999 unsigned getSccSize(const Function &F) {
1000 if (CGSCC && CGSCC->count(const_cast<Function *>(&F)))
1001 return CGSCC->size();
1002 return 0;
1003 }
1004
1005 /// Return datalayout used in the module.
1006 const DataLayout &getDL() { return DL; }
1007
1008 /// Return the map conaining all the knowledge we have from `llvm.assume`s.
1009 const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; }
1010
1011 /// Return if \p To is potentially reachable form \p From or not
1012 /// If the same query was answered, return cached result
1013 bool getPotentiallyReachable(const Instruction &From, const Instruction &To) {
1014 auto KeyPair = std::make_pair(&From, &To);
1015 auto Iter = PotentiallyReachableMap.find(KeyPair);
1016 if (Iter != PotentiallyReachableMap.end())
1017 return Iter->second;
1018 const Function &F = *From.getFunction();
1019 bool Result = true;
1020 if (From.getFunction() == To.getFunction())
1021 Result = isPotentiallyReachable(&From, &To, nullptr,
1022 AG.getAnalysis<DominatorTreeAnalysis>(F),
1023 AG.getAnalysis<LoopAnalysis>(F));
1024 PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result));
1025 return Result;
1026 }
1027
1028 /// Check whether \p F is part of module slice.
1029 bool isInModuleSlice(const Function &F) {
1030 return ModuleSlice.count(const_cast<Function *>(&F));
1031 }
1032
1033 /// Return true if the stack (llvm::Alloca) can be accessed by other threads.
1034 bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }
1035
1036 /// Return true if the target is a GPU.
1037 bool targetIsGPU() {
1038 return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
1039 }
1040
1041private:
1042 struct FunctionInfo {
1043 ~FunctionInfo();
1044
1045 /// A nested map that remembers all instructions in a function with a
1046 /// certain instruction opcode (Instruction::getOpcode()).
1047 OpcodeInstMapTy OpcodeInstMap;
1048
1049 /// A map from functions to their instructions that may read or write
1050 /// memory.
1051 InstructionVectorTy RWInsts;
1052
1053 /// Function is called by a `musttail` call.
1054 bool CalledViaMustTail;
1055
1056 /// Function contains a `musttail` call.
1057 bool ContainsMustTailCall;
1058 };
1059
1060 /// A map type from functions to informatio about it.
1061 DenseMap<const Function *, FunctionInfo *> FuncInfoMap;
1062
1063 /// Return information about the function \p F, potentially by creating it.
1064 FunctionInfo &getFunctionInfo(const Function &F) {
1065 FunctionInfo *&FI = FuncInfoMap[&F];
1066 if (!FI) {
1067 FI = new (Allocator) FunctionInfo();
1068 initializeInformationCache(F, *FI);
1069 }
1070 return *FI;
1071 }
1072
1073 /// Initialize the function information cache \p FI for the function \p F.
1074 ///
1075 /// This method needs to be called for all function that might be looked at
1076 /// through the information cache interface *prior* to looking at them.
1077 void initializeInformationCache(const Function &F, FunctionInfo &FI);
1078
1079 /// The datalayout used in the module.
1080 const DataLayout &DL;
1081
1082 /// The allocator used to allocate memory, e.g. for `FunctionInfo`s.
1083 BumpPtrAllocator &Allocator;
1084
1085 /// MustBeExecutedContextExplorer
1086 MustBeExecutedContextExplorer Explorer;
1087
1088 /// A map with knowledge retained in `llvm.assume` instructions.
1089 RetainedKnowledgeMap KnowledgeMap;
1090
1091 /// Getters for analysis.
1092 AnalysisGetter &AG;
1093
1094 /// The underlying CGSCC, or null if not available.
1095 SetVector<Function *> *CGSCC;
1096
1097 /// Set of inlineable functions
1098 SmallPtrSet<const Function *, 8> InlineableFunctions;
1099
1100 /// A map for caching results of queries for isPotentiallyReachable
1101 DenseMap<std::pair<const Instruction *, const Instruction *>, bool>
1102 PotentiallyReachableMap;
1103
1104 /// The triple describing the target machine.
1105 Triple TargetTriple;
1106
1107 /// Give the Attributor access to the members so
1108 /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
1109 friend struct Attributor;
1110};
1111
1112/// The fixpoint analysis framework that orchestrates the attribute deduction.
1113///
1114/// The Attributor provides a general abstract analysis framework (guided
1115/// fixpoint iteration) as well as helper functions for the deduction of
1116/// (LLVM-IR) attributes. However, also other code properties can be deduced,
1117/// propagated, and ultimately manifested through the Attributor framework. This
1118/// is particularly useful if these properties interact with attributes and a
1119/// co-scheduled deduction allows to improve the solution. Even if not, thus if
1120/// attributes/properties are completely isolated, they should use the
1121/// Attributor framework to reduce the number of fixpoint iteration frameworks
1122/// in the code base. Note that the Attributor design makes sure that isolated
1123/// attributes are not impacted, in any way, by others derived at the same time
1124/// if there is no cross-reasoning performed.
1125///
1126/// The public facing interface of the Attributor is kept simple and basically
1127/// allows abstract attributes to one thing, query abstract attributes
1128/// in-flight. There are two reasons to do this:
1129/// a) The optimistic state of one abstract attribute can justify an
1130/// optimistic state of another, allowing to framework to end up with an
1131/// optimistic (=best possible) fixpoint instead of one based solely on
1132/// information in the IR.
1133/// b) This avoids reimplementing various kinds of lookups, e.g., to check
1134/// for existing IR attributes, in favor of a single lookups interface
1135/// provided by an abstract attribute subclass.
1136///
1137/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
1138/// described in the file comment.
1139struct Attributor {
1140
1141 using OptimizationRemarkGetter =
1142 function_ref<OptimizationRemarkEmitter &(Function *)>;
1143
1144 /// Constructor
1145 ///
1146 /// \param Functions The set of functions we are deriving attributes for.
1147 /// \param InfoCache Cache to hold various information accessible for
1148 /// the abstract attributes.
1149 /// \param CGUpdater Helper to update an underlying call graph.
1150 /// \param Allowed If not null, a set limiting the attribute opportunities.
1151 /// \param DeleteFns Whether to delete functions.
1152 /// \param RewriteSignatures Whether to rewrite function signatures.
1153 /// \param MaxFixedPointIterations Maximum number of iterations to run until
1154 /// fixpoint.
1155 Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
1156 CallGraphUpdater &CGUpdater,
1157 DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
1158 bool RewriteSignatures = true)
1159 : Allocator(InfoCache.Allocator), Functions(Functions),
1160 InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
1161 DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
1162 MaxFixpointIterations(None), OREGetter(None), PassName("") {}
1163
1164 /// Constructor
1165 ///
1166 /// \param Functions The set of functions we are deriving attributes for.
1167 /// \param InfoCache Cache to hold various information accessible for
1168 /// the abstract attributes.
1169 /// \param CGUpdater Helper to update an underlying call graph.
1170 /// \param Allowed If not null, a set limiting the attribute opportunities.
1171 /// \param DeleteFns Whether to delete functions
1172 /// \param MaxFixedPointIterations Maximum number of iterations to run until
1173 /// fixpoint.
1174 /// \param OREGetter A callback function that returns an ORE object from a
1175 /// Function pointer.
1176 /// \param PassName The name of the pass emitting remarks.
1177 Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
1178 CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed,
1179 bool DeleteFns, bool RewriteSignatures,
1180 Optional<unsigned> MaxFixpointIterations,
1181 OptimizationRemarkGetter OREGetter, const char *PassName)
1182 : Allocator(InfoCache.Allocator), Functions(Functions),
1183 InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
1184 DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
1185 MaxFixpointIterations(MaxFixpointIterations),
1186 OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)),
1187 PassName(PassName) {}
1188
1189 ~Attributor();
1190
1191 /// Run the analyses until a fixpoint is reached or enforced (timeout).
1192 ///
1193 /// The attributes registered with this Attributor can be used after as long
1194 /// as the Attributor is not destroyed (it owns the attributes now).
1195 ///
1196 /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
1197 ChangeStatus run();
1198
1199 /// Lookup an abstract attribute of type \p AAType at position \p IRP. While
1200 /// no abstract attribute is found equivalent positions are checked, see
1201 /// SubsumingPositionIterator. Thus, the returned abstract attribute
1202 /// might be anchored at a different position, e.g., the callee if \p IRP is a
1203 /// call base.
1204 ///
1205 /// This method is the only (supported) way an abstract attribute can retrieve
1206 /// information from another abstract attribute. As an example, take an
1207 /// abstract attribute that determines the memory access behavior for a
1208 /// argument (readnone, readonly, ...). It should use `getAAFor` to get the
1209 /// most optimistic information for other abstract attributes in-flight, e.g.
1210 /// the one reasoning about the "captured" state for the argument or the one
1211 /// reasoning on the memory access behavior of the function as a whole.
1212 ///
1213 /// If the DepClass enum is set to `DepClassTy::None` the dependence from
1214 /// \p QueryingAA to the return abstract attribute is not automatically
1215 /// recorded. This should only be used if the caller will record the
1216 /// dependence explicitly if necessary, thus if it the returned abstract
1217 /// attribute is used for reasoning. To record the dependences explicitly use
1218 /// the `Attributor::recordDependence` method.
1219 template <typename AAType>
1220 const AAType &getAAFor(const AbstractAttribute &QueryingAA,
1221 const IRPosition &IRP, DepClassTy DepClass) {
1222 return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
1223 /* ForceUpdate */ false);
1224 }
1225
1226 /// Similar to getAAFor but the return abstract attribute will be updated (via
1227 /// `AbstractAttribute::update`) even if it is found in the cache. This is
1228 /// especially useful for AAIsDead as changes in liveness can make updates
1229 /// possible/useful that were not happening before as the abstract attribute
1230 /// was assumed dead.
1231 template <typename AAType>
1232 const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA,
1233 const IRPosition &IRP, DepClassTy DepClass) {
1234 return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
1235 /* ForceUpdate */ true);
1236 }
1237
1238 /// The version of getAAFor that allows to omit a querying abstract
1239 /// attribute. Using this after Attributor started running is restricted to
1240 /// only the Attributor itself. Initial seeding of AAs can be done via this
1241 /// function.
1242 /// NOTE: ForceUpdate is ignored in any stage other than the update stage.
1243 template <typename AAType>
1244 const AAType &getOrCreateAAFor(IRPosition IRP,
1245 const AbstractAttribute *QueryingAA,
1246 DepClassTy DepClass, bool ForceUpdate = false,
1247 bool UpdateAfterInit = true) {
1248 if (!shouldPropagateCallBaseContext(IRP))
1249 IRP = IRP.stripCallBaseContext();
1250
1251 if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, DepClass,
1252 /* AllowInvalidState */ true)) {
1253 if (ForceUpdate && Phase == AttributorPhase::UPDATE)
1254 updateAA(*AAPtr);
1255 return *AAPtr;
1256 }
1257
1258 // No matching attribute found, create one.
1259 // Use the static create method.
1260 auto &AA = AAType::createForPosition(IRP, *this);
1261
1262 // If we are currenty seeding attributes, enforce seeding rules.
1263 if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) {
1264 AA.getState().indicatePessimisticFixpoint();
1265 return AA;
1266 }
1267
1268 registerAA(AA);
1269
1270 // For now we ignore naked and optnone functions.
1271 bool Invalidate = Allowed && !Allowed->count(&AAType::ID);
1272 const Function *FnScope = IRP.getAnchorScope();
1273 if (FnScope)
1274 Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) ||
1275 FnScope->hasFnAttribute(Attribute::OptimizeNone);
1276
1277 // Avoid too many nested initializations to prevent a stack overflow.
1278 Invalidate |= InitializationChainLength > MaxInitializationChainLength;
1279
1280 // Bootstrap the new attribute with an initial update to propagate
1281 // information, e.g., function -> call site. If it is not on a given
1282 // Allowed we will not perform updates at all.
1283 if (Invalidate) {
1284 AA.getState().indicatePessimisticFixpoint();
1285 return AA;
1286 }
1287
1288 {
1289 TimeTraceScope TimeScope(AA.getName() + "::initialize");
1290 ++InitializationChainLength;
1291 AA.initialize(*this);
1292 --InitializationChainLength;
1293 }
1294
1295 // Initialize and update is allowed for code outside of the current function
1296 // set, but only if it is part of module slice we are allowed to look at.
1297 // Only exception is AAIsDeadFunction whose initialization is prevented
1298 // directly, since we don't to compute it twice.
1299 if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) {
1300 if (!getInfoCache().isInModuleSlice(*FnScope)) {
1301 AA.getState().indicatePessimisticFixpoint();
1302 return AA;
1303 }
1304 }
1305
1306 // If this is queried in the manifest stage, we force the AA to indicate
1307 // pessimistic fixpoint immediately.
1308 if (Phase == AttributorPhase::MANIFEST) {
1309 AA.getState().indicatePessimisticFixpoint();
1310 return AA;
1311 }
1312
1313 // Allow seeded attributes to declare dependencies.
1314 // Remember the seeding state.
1315 if (UpdateAfterInit) {
1316 AttributorPhase OldPhase = Phase;
1317 Phase = AttributorPhase::UPDATE;
1318
1319 updateAA(AA);
1320
1321 Phase = OldPhase;
1322 }
1323
1324 if (QueryingAA && AA.getState().isValidState())
1325 recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
1326 DepClass);
1327 return AA;
1328 }
1329 template <typename AAType>
1330 const AAType &getOrCreateAAFor(const IRPosition &IRP) {
1331 return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr,
1332 DepClassTy::NONE);
1333 }
1334
1335 /// Return the attribute of \p AAType for \p IRP if existing and valid. This
1336 /// also allows non-AA users lookup.
1337 template <typename AAType>
1338 AAType *lookupAAFor(const IRPosition &IRP,
1339 const AbstractAttribute *QueryingAA = nullptr,
1340 DepClassTy DepClass = DepClassTy::OPTIONAL,
1341 bool AllowInvalidState = false) {
1342 static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
1343 "Cannot query an attribute with a type not derived from "
1344 "'AbstractAttribute'!");
1345 // Lookup the abstract attribute of type AAType. If found, return it after
1346 // registering a dependence of QueryingAA on the one returned attribute.
1347 AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP});
1348 if (!AAPtr)
1349 return nullptr;
1350
1351 AAType *AA = static_cast<AAType *>(AAPtr);
1352
1353 // Do not register a dependence on an attribute with an invalid state.
1354 if (DepClass != DepClassTy::NONE && QueryingAA &&
1355 AA->getState().isValidState())
1356 recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA),
1357 DepClass);
1358
1359 // Return nullptr if this attribute has an invalid state.
1360 if (!AllowInvalidState && !AA->getState().isValidState())
1361 return nullptr;
1362 return AA;
1363 }
1364
1365 /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
1366 /// \p FromAA changes \p ToAA should be updated as well.
1367 ///
1368 /// This method should be used in conjunction with the `getAAFor` method and
1369 /// with the DepClass enum passed to the method set to None. This can
1370 /// be beneficial to avoid false dependences but it requires the users of
1371 /// `getAAFor` to explicitly record true dependences through this method.
1372 /// The \p DepClass flag indicates if the dependence is striclty necessary.
1373 /// That means for required dependences, if \p FromAA changes to an invalid
1374 /// state, \p ToAA can be moved to a pessimistic fixpoint because it required
1375 /// information from \p FromAA but none are available anymore.
1376 void recordDependence(const AbstractAttribute &FromAA,
1377 const AbstractAttribute &ToAA, DepClassTy DepClass);
1378
1379 /// Introduce a new abstract attribute into the fixpoint analysis.
1380 ///
1381 /// Note that ownership of the attribute is given to the Attributor. It will
1382 /// invoke delete for the Attributor on destruction of the Attributor.
1383 ///
1384 /// Attributes are identified by their IR position (AAType::getIRPosition())
1385 /// and the address of their static member (see AAType::ID).
1386 template <typename AAType> AAType &registerAA(AAType &AA) {
1387 static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
1388 "Cannot register an attribute with a type not derived from "
1389 "'AbstractAttribute'!");
1390 // Put the attribute in the lookup map structure and the container we use to
1391 // keep track of all attributes.
1392 const IRPosition &IRP = AA.getIRPosition();
1393 AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}];
1394
1395 assert(!AAPtr && "Attribute already in map!")((void)0);
1396 AAPtr = &AA;
1397
1398 // Register AA with the synthetic root only before the manifest stage.
1399 if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE)
1400 DG.SyntheticRoot.Deps.push_back(
1401 AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
1402
1403 return AA;
1404 }
1405
1406 /// Return the internal information cache.
1407 InformationCache &getInfoCache() { return InfoCache; }
1408
1409 /// Return true if this is a module pass, false otherwise.
1410 bool isModulePass() const {
1411 return !Functions.empty() &&
1412 Functions.size() == Functions.front()->getParent()->size();
1413 }
1414
1415 /// Return true if we derive attributes for \p Fn
1416 bool isRunOn(Function &Fn) const {
1417 return Functions.empty() || Functions.count(&Fn);
1418 }
1419
1420 /// Determine opportunities to derive 'default' attributes in \p F and create
1421 /// abstract attribute objects for them.
1422 ///
1423 /// \param F The function that is checked for attribute opportunities.
1424 ///
1425 /// Note that abstract attribute instances are generally created even if the
1426 /// IR already contains the information they would deduce. The most important
1427 /// reason for this is the single interface, the one of the abstract attribute
1428 /// instance, which can be queried without the need to look at the IR in
1429 /// various places.
1430 void identifyDefaultAbstractAttributes(Function &F);
1431
1432 /// Determine whether the function \p F is IPO amendable
1433 ///
1434 /// If a function is exactly defined or it has alwaysinline attribute
1435 /// and is viable to be inlined, we say it is IPO amendable
1436 bool isFunctionIPOAmendable(const Function &F) {
1437 return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F);
1438 }
1439
1440 /// Mark the internal function \p F as live.
1441 ///
1442 /// This will trigger the identification and initialization of attributes for
1443 /// \p F.
1444 void markLiveInternalFunction(const Function &F) {
1445 assert(F.hasLocalLinkage() &&((void)0)
1446 "Only local linkage is assumed dead initially.")((void)0);
1447
1448 identifyDefaultAbstractAttributes(const_cast<Function &>(F));
1449 }
1450
1451 /// Helper function to remove callsite.
1452 void removeCallSite(CallInst *CI) {
1453 if (!CI)
1454 return;
1455
1456 CGUpdater.removeCallSite(*CI);
1457 }
1458
1459 /// Record that \p U is to be replaces with \p NV after information was
1460 /// manifested. This also triggers deletion of trivially dead istructions.
1461 bool changeUseAfterManifest(Use &U, Value &NV) {
1462 Value *&V = ToBeChangedUses[&U];
1463 if (V && (V->stripPointerCasts() == NV.stripPointerCasts() ||
1464 isa_and_nonnull<UndefValue>(V)))
1465 return false;
1466 assert((!V || V == &NV || isa<UndefValue>(NV)) &&((void)0)
1467 "Use was registered twice for replacement with different values!")((void)0);
1468 V = &NV;
1469 return true;
1470 }
1471
1472 /// Helper function to replace all uses of \p V with \p NV. Return true if
1473 /// there is any change. The flag \p ChangeDroppable indicates if dropppable
1474 /// uses should be changed too.
1475 bool changeValueAfterManifest(Value &V, Value &NV,
1476 bool ChangeDroppable = true) {
1477 auto &Entry = ToBeChangedValues[&V];
1478 Value *&CurNV = Entry.first;
1479 if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() ||
1480 isa<UndefValue>(CurNV)))
1481 return false;
1482 assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&((void)0)
1483 "Value replacement was registered twice with different values!")((void)0);
1484 CurNV = &NV;
1485 Entry.second = ChangeDroppable;
1486 return true;
1487 }
1488
1489 /// Record that \p I is to be replaced with `unreachable` after information
1490 /// was manifested.
1491 void changeToUnreachableAfterManifest(Instruction *I) {
1492 ToBeChangedToUnreachableInsts.insert(I);
1493 }
1494
1495 /// Record that \p II has at least one dead successor block. This information
1496 /// is used, e.g., to replace \p II with a call, after information was
1497 /// manifested.
1498 void registerInvokeWithDeadSuccessor(InvokeInst &II) {
1499 InvokeWithDeadSuccessor.push_back(&II);
1500 }
1501
1502 /// Record that \p I is deleted after information was manifested. This also
1503 /// triggers deletion of trivially dead istructions.
1504 void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); }
1505
1506 /// Record that \p BB is deleted after information was manifested. This also
1507 /// triggers deletion of trivially dead istructions.
1508 void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); }
1509
1510 // Record that \p BB is added during the manifest of an AA. Added basic blocks
1511 // are preserved in the IR.
1512 void registerManifestAddedBasicBlock(BasicBlock &BB) {
1513 ManifestAddedBlocks.insert(&BB);
1514 }
1515
1516 /// Record that \p F is deleted after information was manifested.
1517 void deleteAfterManifest(Function &F) {
1518 if (DeleteFns)
1519 ToBeDeletedFunctions.insert(&F);
1520 }
1521
1522 /// If \p IRP is assumed to be a constant, return it, if it is unclear yet,
1523 /// return None, otherwise return `nullptr`.
1524 Optional<Constant *> getAssumedConstant(const IRPosition &IRP,
1525 const AbstractAttribute &AA,
1526 bool &UsedAssumedInformation);
1527 Optional<Constant *> getAssumedConstant(const Value &V,
1528 const AbstractAttribute &AA,
1529 bool &UsedAssumedInformation) {
1530 return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation);
1531 }
1532
1533 /// If \p V is assumed simplified, return it, if it is unclear yet,
1534 /// return None, otherwise return `nullptr`.
1535 Optional<Value *> getAssumedSimplified(const IRPosition &IRP,
1536 const AbstractAttribute &AA,
1537 bool &UsedAssumedInformation) {
1538 return getAssumedSimplified(IRP, &AA, UsedAssumedInformation);
1539 }
1540 Optional<Value *> getAssumedSimplified(const Value &V,
1541 const AbstractAttribute &AA,
1542 bool &UsedAssumedInformation) {
1543 return getAssumedSimplified(IRPosition::value(V), AA,
1544 UsedAssumedInformation);
1545 }
1546
1547 /// If \p V is assumed simplified, return it, if it is unclear yet,
1548 /// return None, otherwise return `nullptr`. Same as the public version
1549 /// except that it can be used without recording dependences on any \p AA.
1550 Optional<Value *> getAssumedSimplified(const IRPosition &V,
1551 const AbstractAttribute *AA,
1552 bool &UsedAssumedInformation);
1553
1554 /// Register \p CB as a simplification callback.
1555 /// `Attributor::getAssumedSimplified` will use these callbacks before
1556 /// we it will ask `AAValueSimplify`. It is important to ensure this
1557 /// is called before `identifyDefaultAbstractAttributes`, assuming the
1558 /// latter is called at all.
1559 using SimplifictionCallbackTy = std::function<Optional<Value *>(
1560 const IRPosition &, const AbstractAttribute *, bool &)>;
1561 void registerSimplificationCallback(const IRPosition &IRP,
1562 const SimplifictionCallbackTy &CB) {
1563 SimplificationCallbacks[IRP].emplace_back(CB);
1564 }
1565
1566 /// Return true if there is a simplification callback for \p IRP.
1567 bool hasSimplificationCallback(const IRPosition &IRP) {
1568 return SimplificationCallbacks.count(IRP);
1569 }
1570
1571private:
1572 /// The vector with all simplification callbacks registered by outside AAs.
1573 DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
1574 SimplificationCallbacks;
1575
1576public:
1577 /// Translate \p V from the callee context into the call site context.
1578 Optional<Value *>
1579 translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB,
1580 const AbstractAttribute &AA,
1581 bool &UsedAssumedInformation);
1582
1583 /// Return true if \p AA (or its context instruction) is assumed dead.
1584 ///
1585 /// If \p LivenessAA is not provided it is queried.
1586 bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA,
1587 bool &UsedAssumedInformation,
1588 bool CheckBBLivenessOnly = false,
1589 DepClassTy DepClass = DepClassTy::OPTIONAL);
1590
1591 /// Return true if \p I is assumed dead.
1592 ///
1593 /// If \p LivenessAA is not provided it is queried.
1594 bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA,
1595 const AAIsDead *LivenessAA, bool &UsedAssumedInformation,
1596 bool CheckBBLivenessOnly = false,
1597 DepClassTy DepClass = DepClassTy::OPTIONAL);
1598
1599 /// Return true if \p U is assumed dead.
1600 ///
1601 /// If \p FnLivenessAA is not provided it is queried.
1602 bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA,
1603 const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
1604 bool CheckBBLivenessOnly = false,
1605 DepClassTy DepClass = DepClassTy::OPTIONAL);
1606
1607 /// Return true if \p IRP is assumed dead.
1608 ///
1609 /// If \p FnLivenessAA is not provided it is queried.
1610 bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA,
1611 const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
1612 bool CheckBBLivenessOnly = false,
1613 DepClassTy DepClass = DepClassTy::OPTIONAL);
1614
1615 /// Return true if \p BB is assumed dead.
1616 ///
1617 /// If \p LivenessAA is not provided it is queried.
1618 bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA,
1619 const AAIsDead *FnLivenessAA,
1620 DepClassTy DepClass = DepClassTy::OPTIONAL);
1621
1622 /// Check \p Pred on all (transitive) uses of \p V.
1623 ///
1624 /// This method will evaluate \p Pred on all (transitive) uses of the
1625 /// associated value and return true if \p Pred holds every time.
1626 bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
1627 const AbstractAttribute &QueryingAA, const Value &V,
1628 bool CheckBBLivenessOnly = false,
1629 DepClassTy LivenessDepClass = DepClassTy::OPTIONAL);
1630
1631 /// Emit a remark generically.
1632 ///
1633 /// This template function can be used to generically emit a remark. The
1634 /// RemarkKind should be one of the following:
1635 /// - OptimizationRemark to indicate a successful optimization attempt
1636 /// - OptimizationRemarkMissed to report a failed optimization attempt
1637 /// - OptimizationRemarkAnalysis to provide additional information about an
1638 /// optimization attempt
1639 ///
1640 /// The remark is built using a callback function \p RemarkCB that takes a
1641 /// RemarkKind as input and returns a RemarkKind.
1642 template <typename RemarkKind, typename RemarkCallBack>
1643 void emitRemark(Instruction *I, StringRef RemarkName,
1644 RemarkCallBack &&RemarkCB) const {
1645 if (!OREGetter)
1646 return;
1647
1648 Function *F = I->getFunction();
1649 auto &ORE = OREGetter.getValue()(F);
1650
1651 if (RemarkName.startswith("OMP"))
1652 ORE.emit([&]() {
1653 return RemarkCB(RemarkKind(PassName, RemarkName, I))
1654 << " [" << RemarkName << "]";
1655 });
1656 else
1657 ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); });
1658 }
1659
1660 /// Emit a remark on a function.
1661 template <typename RemarkKind, typename RemarkCallBack>
1662 void emitRemark(Function *F, StringRef RemarkName,
1663 RemarkCallBack &&RemarkCB) const {
1664 if (!OREGetter)
1665 return;
1666
1667 auto &ORE = OREGetter.getValue()(F);
1668
1669 if (RemarkName.startswith("OMP"))
1670 ORE.emit([&]() {
1671 return RemarkCB(RemarkKind(PassName, RemarkName, F))
1672 << " [" << RemarkName << "]";
1673 });
1674 else
1675 ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); });
1676 }
1677
1678 /// Helper struct used in the communication between an abstract attribute (AA)
1679 /// that wants to change the signature of a function and the Attributor which
1680 /// applies the changes. The struct is partially initialized with the
1681 /// information from the AA (see the constructor). All other members are
1682 /// provided by the Attributor prior to invoking any callbacks.
1683 struct ArgumentReplacementInfo {
1684 /// Callee repair callback type
1685 ///
1686 /// The function repair callback is invoked once to rewire the replacement
1687 /// arguments in the body of the new function. The argument replacement info
1688 /// is passed, as build from the registerFunctionSignatureRewrite call, as
1689 /// well as the replacement function and an iteratore to the first
1690 /// replacement argument.
1691 using CalleeRepairCBTy = std::function<void(
1692 const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>;
1693
1694 /// Abstract call site (ACS) repair callback type
1695 ///
1696 /// The abstract call site repair callback is invoked once on every abstract
1697 /// call site of the replaced function (\see ReplacedFn). The callback needs
1698 /// to provide the operands for the call to the new replacement function.
1699 /// The number and type of the operands appended to the provided vector
1700 /// (second argument) is defined by the number and types determined through
1701 /// the replacement type vector (\see ReplacementTypes). The first argument
1702 /// is the ArgumentReplacementInfo object registered with the Attributor
1703 /// through the registerFunctionSignatureRewrite call.
1704 using ACSRepairCBTy =
1705 std::function<void(const ArgumentReplacementInfo &, AbstractCallSite,
1706 SmallVectorImpl<Value *> &)>;
1707
1708 /// Simple getters, see the corresponding members for details.
1709 ///{
1710
1711 Attributor &getAttributor() const { return A; }
1712 const Function &getReplacedFn() const { return ReplacedFn; }
1713 const Argument &getReplacedArg() const { return ReplacedArg; }
1714 unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); }
1715 const SmallVectorImpl<Type *> &getReplacementTypes() const {
1716 return ReplacementTypes;
1717 }
1718
1719 ///}
1720
1721 private:
1722 /// Constructor that takes the argument to be replaced, the types of
1723 /// the replacement arguments, as well as callbacks to repair the call sites
1724 /// and new function after the replacement happened.
1725 ArgumentReplacementInfo(Attributor &A, Argument &Arg,
1726 ArrayRef<Type *> ReplacementTypes,
1727 CalleeRepairCBTy &&CalleeRepairCB,
1728 ACSRepairCBTy &&ACSRepairCB)
1729 : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg),
1730 ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()),
1731 CalleeRepairCB(std::move(CalleeRepairCB)),
1732 ACSRepairCB(std::move(ACSRepairCB)) {}
1733
1734 /// Reference to the attributor to allow access from the callbacks.
1735 Attributor &A;
1736
1737 /// The "old" function replaced by ReplacementFn.
1738 const Function &ReplacedFn;
1739
1740 /// The "old" argument replaced by new ones defined via ReplacementTypes.
1741 const Argument &ReplacedArg;
1742
1743 /// The types of the arguments replacing ReplacedArg.
1744 const SmallVector<Type *, 8> ReplacementTypes;
1745
1746 /// Callee repair callback, see CalleeRepairCBTy.
1747 const CalleeRepairCBTy CalleeRepairCB;
1748
1749 /// Abstract call site (ACS) repair callback, see ACSRepairCBTy.
1750 const ACSRepairCBTy ACSRepairCB;
1751
1752 /// Allow access to the private members from the Attributor.
1753 friend struct Attributor;
1754 };
1755
1756 /// Check if we can rewrite a function signature.
1757 ///
1758 /// The argument \p Arg is replaced with new ones defined by the number,
1759 /// order, and types in \p ReplacementTypes.
1760 ///
1761 /// \returns True, if the replacement can be registered, via
1762 /// registerFunctionSignatureRewrite, false otherwise.
1763 bool isValidFunctionSignatureRewrite(Argument &Arg,
1764 ArrayRef<Type *> ReplacementTypes);
1765
1766 /// Register a rewrite for a function signature.
1767 ///
1768 /// The argument \p Arg is replaced with new ones defined by the number,
1769 /// order, and types in \p ReplacementTypes. The rewiring at the call sites is
1770 /// done through \p ACSRepairCB and at the callee site through
1771 /// \p CalleeRepairCB.
1772 ///
1773 /// \returns True, if the replacement was registered, false otherwise.
1774 bool registerFunctionSignatureRewrite(
1775 Argument &Arg, ArrayRef<Type *> ReplacementTypes,
1776 ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
1777 ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB);
1778
1779 /// Check \p Pred on all function call sites.
1780 ///
1781 /// This method will evaluate \p Pred on call sites and return
1782 /// true if \p Pred holds in every call sites. However, this is only possible
1783 /// all call sites are known, hence the function has internal linkage.
1784 /// If true is returned, \p AllCallSitesKnown is set if all possible call
1785 /// sites of the function have been visited.
1786 bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
1787 const AbstractAttribute &QueryingAA,
1788 bool RequireAllCallSites, bool &AllCallSitesKnown);
1789
1790 /// Check \p Pred on all values potentially returned by \p F.
1791 ///
1792 /// This method will evaluate \p Pred on all values potentially returned by
1793 /// the function associated with \p QueryingAA. The returned values are
1794 /// matched with their respective return instructions. Returns true if \p Pred
1795 /// holds on all of them.
1796 bool checkForAllReturnedValuesAndReturnInsts(
1797 function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
1798 const AbstractAttribute &QueryingAA);
1799
1800 /// Check \p Pred on all values potentially returned by the function
1801 /// associated with \p QueryingAA.
1802 ///
1803 /// This is the context insensitive version of the method above.
1804 bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
1805 const AbstractAttribute &QueryingAA);
1806
1807 /// Check \p Pred on all instructions with an opcode present in \p Opcodes.
1808 ///
1809 /// This method will evaluate \p Pred on all instructions with an opcode
1810 /// present in \p Opcode and return true if \p Pred holds on all of them.
1811 bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
1812 const AbstractAttribute &QueryingAA,
1813 const ArrayRef<unsigned> &Opcodes,
1814 bool &UsedAssumedInformation,
1815 bool CheckBBLivenessOnly = false,
1816 bool CheckPotentiallyDead = false);
1817
1818 /// Check \p Pred on all call-like instructions (=CallBased derived).
1819 ///
1820 /// See checkForAllCallLikeInstructions(...) for more information.
1821 bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
1822 const AbstractAttribute &QueryingAA,
1823 bool &UsedAssumedInformation,
1824 bool CheckBBLivenessOnly = false,
1825 bool CheckPotentiallyDead = false) {
1826 return checkForAllInstructions(
1827 Pred, QueryingAA,
1828 {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
1829 (unsigned)Instruction::Call},
1830 UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead);
1831 }
1832
1833 /// Check \p Pred on all Read/Write instructions.
1834 ///
1835 /// This method will evaluate \p Pred on all instructions that read or write
1836 /// to memory present in the information cache and return true if \p Pred
1837 /// holds on all of them.
1838 bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred,
1839 AbstractAttribute &QueryingAA,
1840 bool &UsedAssumedInformation);
1841
1842 /// Create a shallow wrapper for \p F such that \p F has internal linkage
1843 /// afterwards. It also sets the original \p F 's name to anonymous
1844 ///
1845 /// A wrapper is a function with the same type (and attributes) as \p F
1846 /// that will only call \p F and return the result, if any.
1847 ///
1848 /// Assuming the declaration of looks like:
1849 /// rty F(aty0 arg0, ..., atyN argN);
1850 ///
1851 /// The wrapper will then look as follows:
1852 /// rty wrapper(aty0 arg0, ..., atyN argN) {
1853 /// return F(arg0, ..., argN);
1854 /// }
1855 ///
1856 static void createShallowWrapper(Function &F);
1857
1858 /// Returns true if the function \p F can be internalized. i.e. it has a
1859 /// compatible linkage.
1860 static bool isInternalizable(Function &F);
1861
1862 /// Make another copy of the function \p F such that the copied version has
1863 /// internal linkage afterwards and can be analysed. Then we replace all uses
1864 /// of the original function to the copied one
1865 ///
1866 /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
1867 /// linkage can be internalized because these linkages guarantee that other
1868 /// definitions with the same name have the same semantics as this one.
1869 ///
1870 /// This will only be run if the `attributor-allow-deep-wrappers` option is
1871 /// set, or if the function is called with \p Force set to true.
1872 ///
1873 /// If the function \p F failed to be internalized the return value will be a
1874 /// null pointer.
1875 static Function *internalizeFunction(Function &F, bool Force = false);
1876
1877 /// Make copies of each function in the set \p FnSet such that the copied
1878 /// version has internal linkage afterwards and can be analysed. Then we
1879 /// replace all uses of the original function to the copied one. The map
1880 /// \p FnMap contains a mapping of functions to their internalized versions.
1881 ///
1882 /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
1883 /// linkage can be internalized because these linkages guarantee that other
1884 /// definitions with the same name have the same semantics as this one.
1885 ///
1886 /// This version will internalize all the functions in the set \p FnSet at
1887 /// once and then replace the uses. This prevents internalized functions being
1888 /// called by external functions when there is an internalized version in the
1889 /// module.
1890 static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
1891 DenseMap<Function *, Function *> &FnMap);
1892
1893 /// Return the data layout associated with the anchor scope.
1894 const DataLayout &getDataLayout() const { return InfoCache.DL; }
1895
1896 /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s.
1897 BumpPtrAllocator &Allocator;
1898
1899private:
1900 /// This method will do fixpoint iteration until fixpoint or the
1901 /// maximum iteration count is reached.
1902 ///
1903 /// If the maximum iteration count is reached, This method will
1904 /// indicate pessimistic fixpoint on attributes that transitively depend
1905 /// on attributes that were scheduled for an update.
1906 void runTillFixpoint();
1907
1908 /// Gets called after scheduling, manifests attributes to the LLVM IR.
1909 ChangeStatus manifestAttributes();
1910
1911 /// Gets called after attributes have been manifested, cleans up the IR.
1912 /// Deletes dead functions, blocks and instructions.
1913 /// Rewrites function signitures and updates the call graph.
1914 ChangeStatus cleanupIR();
1915
1916 /// Identify internal functions that are effectively dead, thus not reachable
1917 /// from a live entry point. The functions are added to ToBeDeletedFunctions.
1918 void identifyDeadInternalFunctions();
1919
1920 /// Run `::update` on \p AA and track the dependences queried while doing so.
1921 /// Also adjust the state if we know further updates are not necessary.
1922 ChangeStatus updateAA(AbstractAttribute &AA);
1923
1924 /// Remember the dependences on the top of the dependence stack such that they
1925 /// may trigger further updates. (\see DependenceStack)
1926 void rememberDependences();
1927
1928 /// Check \p Pred on all call sites of \p Fn.
1929 ///
1930 /// This method will evaluate \p Pred on call sites and return
1931 /// true if \p Pred holds in every call sites. However, this is only possible
1932 /// all call sites are known, hence the function has internal linkage.
1933 /// If true is returned, \p AllCallSitesKnown is set if all possible call
1934 /// sites of the function have been visited.
1935 bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
1936 const Function &Fn, bool RequireAllCallSites,
1937 const AbstractAttribute *QueryingAA,
1938 bool &AllCallSitesKnown);
1939
1940 /// Determine if CallBase context in \p IRP should be propagated.
1941 bool shouldPropagateCallBaseContext(const IRPosition &IRP);
1942
1943 /// Apply all requested function signature rewrites
1944 /// (\see registerFunctionSignatureRewrite) and return Changed if the module
1945 /// was altered.
1946 ChangeStatus
1947 rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns);
1948
1949 /// Check if the Attribute \p AA should be seeded.
1950 /// See getOrCreateAAFor.
1951 bool shouldSeedAttribute(AbstractAttribute &AA);
1952
1953 /// A nested map to lookup abstract attributes based on the argument position
1954 /// on the outer level, and the addresses of the static member (AAType::ID) on
1955 /// the inner level.
1956 ///{
1957 using AAMapKeyTy = std::pair<const char *, IRPosition>;
1958 DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap;
1959 ///}
1960
1961 /// Map to remember all requested signature changes (= argument replacements).
1962 DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>>
1963 ArgumentReplacementMap;
1964
1965 /// The set of functions we are deriving attributes for.
1966 SetVector<Function *> &Functions;
1967
1968 /// The information cache that holds pre-processed (LLVM-IR) information.
1969 InformationCache &InfoCache;
1970
1971 /// Helper to update an underlying call graph.
1972 CallGraphUpdater &CGUpdater;
1973
1974 /// Abstract Attribute dependency graph
1975 AADepGraph DG;
1976
1977 /// Set of functions for which we modified the content such that it might
1978 /// impact the call graph.
1979 SmallPtrSet<Function *, 8> CGModifiedFunctions;
1980
1981 /// Information about a dependence. If FromAA is changed ToAA needs to be
1982 /// updated as well.
1983 struct DepInfo {
1984 const AbstractAttribute *FromAA;
1985 const AbstractAttribute *ToAA;
1986 DepClassTy DepClass;
1987 };
1988
1989 /// The dependence stack is used to track dependences during an
1990 /// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be
1991 /// recursive we might have multiple vectors of dependences in here. The stack
1992 /// size, should be adjusted according to the expected recursion depth and the
1993 /// inner dependence vector size to the expected number of dependences per
1994 /// abstract attribute. Since the inner vectors are actually allocated on the
1995 /// stack we can be generous with their size.
1996 using DependenceVector = SmallVector<DepInfo, 8>;
1997 SmallVector<DependenceVector *, 16> DependenceStack;
1998
1999 /// If not null, a set limiting the attribute opportunities.
2000 const DenseSet<const char *> *Allowed;
2001
2002 /// Whether to delete functions.
2003 const bool DeleteFns;
2004
2005 /// Whether to rewrite signatures.
2006 const bool RewriteSignatures;
2007
2008 /// Maximum number of fixedpoint iterations.
2009 Optional<unsigned> MaxFixpointIterations;
2010
2011 /// A set to remember the functions we already assume to be live and visited.
2012 DenseSet<const Function *> VisitedFunctions;
2013
2014 /// Uses we replace with a new value after manifest is done. We will remove
2015 /// then trivially dead instructions as well.
2016 DenseMap<Use *, Value *> ToBeChangedUses;
2017
2018 /// Values we replace with a new value after manifest is done. We will remove
2019 /// then trivially dead instructions as well.
2020 DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues;
2021
2022 /// Instructions we replace with `unreachable` insts after manifest is done.
2023 SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts;
2024
2025 /// Invoke instructions with at least a single dead successor block.
2026 SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
2027
2028 /// A flag that indicates which stage of the process we are in. Initially, the
2029 /// phase is SEEDING. Phase is changed in `Attributor::run()`
2030 enum class AttributorPhase {
2031 SEEDING,
2032 UPDATE,
2033 MANIFEST,
2034 CLEANUP,
2035 } Phase = AttributorPhase::SEEDING;
2036
2037 /// The current initialization chain length. Tracked to avoid stack overflows.
2038 unsigned InitializationChainLength = 0;
2039
2040 /// Functions, blocks, and instructions we delete after manifest is done.
2041 ///
2042 ///{
2043 SmallPtrSet<Function *, 8> ToBeDeletedFunctions;
2044 SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
2045 SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks;
2046 SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
2047 ///}
2048
2049 /// Callback to get an OptimizationRemarkEmitter from a Function *.
2050 Optional<OptimizationRemarkGetter> OREGetter;
2051
2052 /// The name of the pass to emit remarks for.
2053 const char *PassName = "";
2054
2055 friend AADepGraph;
2056 friend AttributorCallGraph;
2057};
2058
2059/// An interface to query the internal state of an abstract attribute.
2060///
2061/// The abstract state is a minimal interface that allows the Attributor to
2062/// communicate with the abstract attributes about their internal state without
2063/// enforcing or exposing implementation details, e.g., the (existence of an)
2064/// underlying lattice.
2065///
2066/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
2067/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
2068/// was reached or (4) a pessimistic fixpoint was enforced.
2069///
2070/// All methods need to be implemented by the subclass. For the common use case,
2071/// a single boolean state or a bit-encoded state, the BooleanState and
2072/// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract
2073/// attribute can inherit from them to get the abstract state interface and
2074/// additional methods to directly modify the state based if needed. See the
2075/// class comments for help.
2076struct AbstractState {
2077 virtual ~AbstractState() {}
2078
2079 /// Return if this abstract state is in a valid state. If false, no
2080 /// information provided should be used.
2081 virtual bool isValidState() const = 0;
2082
2083 /// Return if this abstract state is fixed, thus does not need to be updated
2084 /// if information changes as it cannot change itself.
2085 virtual bool isAtFixpoint() const = 0;
2086
2087 /// Indicate that the abstract state should converge to the optimistic state.
2088 ///
2089 /// This will usually make the optimistically assumed state the known to be
2090 /// true state.
2091 ///
2092 /// \returns ChangeStatus::UNCHANGED as the assumed value should not change.
2093 virtual ChangeStatus indicateOptimisticFixpoint() = 0;
2094
2095 /// Indicate that the abstract state should converge to the pessimistic state.
2096 ///
2097 /// This will usually revert the optimistically assumed state to the known to
2098 /// be true state.
2099 ///
2100 /// \returns ChangeStatus::CHANGED as the assumed value may change.
2101 virtual ChangeStatus indicatePessimisticFixpoint() = 0;
2102};
2103
2104/// Simple state with integers encoding.
2105///
2106/// The interface ensures that the assumed bits are always a subset of the known
2107/// bits. Users can only add known bits and, except through adding known bits,
2108/// they can only remove assumed bits. This should guarantee monotoniticy and
2109/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
2110/// reached when the assumed and known state/bits are equal. Users can
2111/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
2112/// state will catch up with the assumed one, for a pessimistic fixpoint it is
2113/// the other way around.
2114template <typename base_ty, base_ty BestState, base_ty WorstState>
2115struct IntegerStateBase : public AbstractState {
2116 using base_t = base_ty;
2117
2118 IntegerStateBase() {}
2119 IntegerStateBase(base_t Assumed) : Assumed(Assumed) {}
2120
2121 /// Return the best possible representable state.
2122 static constexpr base_t getBestState() { return BestState; }
2123 static constexpr base_t getBestState(const IntegerStateBase &) {
2124 return getBestState();
2125 }
2126
2127 /// Return the worst possible representable state.
2128 static constexpr base_t getWorstState() { return WorstState; }
2129 static constexpr base_t getWorstState(const IntegerStateBase &) {
2130 return getWorstState();
2131 }
2132
2133 /// See AbstractState::isValidState()
2134 /// NOTE: For now we simply pretend that the worst possible state is invalid.
2135 bool isValidState() const override { return Assumed != getWorstState(); }
2
Assuming the condition is true
3
Returning the value 1, which participates in a condition later
2136
2137 /// See AbstractState::isAtFixpoint()
2138 bool isAtFixpoint() const override { return Assumed == Known; }
2139
2140 /// See AbstractState::indicateOptimisticFixpoint(...)
2141 ChangeStatus indicateOptimisticFixpoint() override {
2142 Known = Assumed;
2143 return ChangeStatus::UNCHANGED;
2144 }
2145
2146 /// See AbstractState::indicatePessimisticFixpoint(...)
2147 ChangeStatus indicatePessimisticFixpoint() override {
2148 Assumed = Known;
2149 return ChangeStatus::CHANGED;
2150 }
2151
2152 /// Return the known state encoding
2153 base_t getKnown() const { return Known; }
2154
2155 /// Return the assumed state encoding.
2156 base_t getAssumed() const { return Assumed; }
2157
2158 /// Equality for IntegerStateBase.
2159 bool
2160 operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
2161 return this->getAssumed() == R.getAssumed() &&
2162 this->getKnown() == R.getKnown();
2163 }
2164
2165 /// Inequality for IntegerStateBase.
2166 bool
2167 operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
2168 return !(*this == R);
2169 }
2170
2171 /// "Clamp" this state with \p R. The result is subtype dependent but it is
2172 /// intended that only information assumed in both states will be assumed in
2173 /// this one afterwards.
2174 void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2175 handleNewAssumedValue(R.getAssumed());
2176 }
2177
2178 /// "Clamp" this state with \p R. The result is subtype dependent but it is
2179 /// intended that information known in either state will be known in
2180 /// this one afterwards.
2181 void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2182 handleNewKnownValue(R.getKnown());
2183 }
2184
2185 void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2186 joinOR(R.getAssumed(), R.getKnown());
2187 }
2188
2189 void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
2190 joinAND(R.getAssumed(), R.getKnown());
2191 }
2192
2193protected:
2194 /// Handle a new assumed value \p Value. Subtype dependent.
2195 virtual void handleNewAssumedValue(base_t Value) = 0;
2196
2197 /// Handle a new known value \p Value. Subtype dependent.
2198 virtual void handleNewKnownValue(base_t Value) = 0;
2199
2200 /// Handle a value \p Value. Subtype dependent.
2201 virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0;
2202
2203 /// Handle a new assumed value \p Value. Subtype dependent.
2204 virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0;
2205
2206 /// The known state encoding in an integer of type base_t.
2207 base_t Known = getWorstState();
2208
2209 /// The assumed state encoding in an integer of type base_t.
2210 base_t Assumed = getBestState();
2211};
2212
2213/// Specialization of the integer state for a bit-wise encoding.
2214template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
2215 base_ty WorstState = 0>
2216struct BitIntegerState
2217 : public IntegerStateBase<base_ty, BestState, WorstState> {
2218 using base_t = base_ty;
2219
2220 /// Return true if the bits set in \p BitsEncoding are "known bits".
2221 bool isKnown(base_t BitsEncoding) const {
2222 return (this->Known & BitsEncoding) == BitsEncoding;
2223 }
2224
2225 /// Return true if the bits set in \p BitsEncoding are "assumed bits".
2226 bool isAssumed(base_t BitsEncoding) const {
2227 return (this->Assumed & BitsEncoding) == BitsEncoding;
2228 }
2229
2230 /// Add the bits in \p BitsEncoding to the "known bits".
2231 BitIntegerState &addKnownBits(base_t Bits) {
2232 // Make sure we never miss any "known bits".
2233 this->Assumed |= Bits;
2234 this->Known |= Bits;
2235 return *this;
2236 }
2237
2238 /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
2239 BitIntegerState &removeAssumedBits(base_t BitsEncoding) {
2240 return intersectAssumedBits(~BitsEncoding);
2241 }
2242
2243 /// Remove the bits in \p BitsEncoding from the "known bits".
2244 BitIntegerState &removeKnownBits(base_t BitsEncoding) {
2245 this->Known = (this->Known & ~BitsEncoding);
2246 return *this;
2247 }
2248
2249 /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
2250 BitIntegerState &intersectAssumedBits(base_t BitsEncoding) {
2251 // Make sure we never loose any "known bits".
2252 this->Assumed = (this->Assumed & BitsEncoding) | this->Known;
2253 return *this;
2254 }
2255
2256private:
2257 void handleNewAssumedValue(base_t Value) override {
2258 intersectAssumedBits(Value);
2259 }
2260 void handleNewKnownValue(base_t Value) override { addKnownBits(Value); }
2261 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2262 this->Known |= KnownValue;
2263 this->Assumed |= AssumedValue;
2264 }
2265 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2266 this->Known &= KnownValue;
2267 this->Assumed &= AssumedValue;
2268 }
2269};
2270
2271/// Specialization of the integer state for an increasing value, hence ~0u is
2272/// the best state and 0 the worst.
2273template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
2274 base_ty WorstState = 0>
2275struct IncIntegerState
2276 : public IntegerStateBase<base_ty, BestState, WorstState> {
2277 using super = IntegerStateBase<base_ty, BestState, WorstState>;
2278 using base_t = base_ty;
2279
2280 IncIntegerState() : super() {}
2281 IncIntegerState(base_t Assumed) : super(Assumed) {}
2282
2283 /// Return the best possible representable state.
2284 static constexpr base_t getBestState() { return BestState; }
2285 static constexpr base_t
2286 getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) {
2287 return getBestState();
2288 }
2289
2290 /// Take minimum of assumed and \p Value.
2291 IncIntegerState &takeAssumedMinimum(base_t Value) {
2292 // Make sure we never loose "known value".
2293 this->Assumed = std::max(std::min(this->Assumed, Value), this->Known);
2294 return *this;
2295 }
2296
2297 /// Take maximum of known and \p Value.
2298 IncIntegerState &takeKnownMaximum(base_t Value) {
2299 // Make sure we never loose "known value".
2300 this->Assumed = std::max(Value, this->Assumed);
2301 this->Known = std::max(Value, this->Known);
2302 return *this;
2303 }
2304
2305private:
2306 void handleNewAssumedValue(base_t Value) override {
2307 takeAssumedMinimum(Value);
2308 }
2309 void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); }
2310 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2311 this->Known = std::max(this->Known, KnownValue);
2312 this->Assumed = std::max(this->Assumed, AssumedValue);
2313 }
2314 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2315 this->Known = std::min(this->Known, KnownValue);
2316 this->Assumed = std::min(this->Assumed, AssumedValue);
2317 }
2318};
2319
2320/// Specialization of the integer state for a decreasing value, hence 0 is the
2321/// best state and ~0u the worst.
2322template <typename base_ty = uint32_t>
2323struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> {
2324 using base_t = base_ty;
2325
2326 /// Take maximum of assumed and \p Value.
2327 DecIntegerState &takeAssumedMaximum(base_t Value) {
2328 // Make sure we never loose "known value".
2329 this->Assumed = std::min(std::max(this->Assumed, Value), this->Known);
2330 return *this;
2331 }
2332
2333 /// Take minimum of known and \p Value.
2334 DecIntegerState &takeKnownMinimum(base_t Value) {
2335 // Make sure we never loose "known value".
2336 this->Assumed = std::min(Value, this->Assumed);
2337 this->Known = std::min(Value, this->Known);
2338 return *this;
2339 }
2340
2341private:
2342 void handleNewAssumedValue(base_t Value) override {
2343 takeAssumedMaximum(Value);
2344 }
2345 void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); }
2346 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2347 this->Assumed = std::min(this->Assumed, KnownValue);
2348 this->Assumed = std::min(this->Assumed, AssumedValue);
2349 }
2350 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2351 this->Assumed = std::max(this->Assumed, KnownValue);
2352 this->Assumed = std::max(this->Assumed, AssumedValue);
2353 }
2354};
2355
2356/// Simple wrapper for a single bit (boolean) state.
2357struct BooleanState : public IntegerStateBase<bool, 1, 0> {
2358 using super = IntegerStateBase<bool, 1, 0>;
2359 using base_t = IntegerStateBase::base_t;
2360
2361 BooleanState() : super() {}
2362 BooleanState(base_t Assumed) : super(Assumed) {}
2363
2364 /// Set the assumed value to \p Value but never below the known one.
2365 void setAssumed(bool Value) { Assumed &= (Known | Value); }
2366
2367 /// Set the known and asssumed value to \p Value.
2368 void setKnown(bool Value) {
2369 Known |= Value;
2370 Assumed |= Value;
2371 }
2372
2373 /// Return true if the state is assumed to hold.
2374 bool isAssumed() const { return getAssumed(); }
2375
2376 /// Return true if the state is known to hold.
2377 bool isKnown() const { return getKnown(); }
2378
2379private:
2380 void handleNewAssumedValue(base_t Value) override {
2381 if (!Value)
2382 Assumed = Known;
2383 }
2384 void handleNewKnownValue(base_t Value) override {
2385 if (Value)
2386 Known = (Assumed = Value);
2387 }
2388 void joinOR(base_t AssumedValue, base_t KnownValue) override {
2389 Known |= KnownValue;
2390 Assumed |= AssumedValue;
2391 }
2392 void joinAND(base_t AssumedValue, base_t KnownValue) override {
2393 Known &= KnownValue;
2394 Assumed &= AssumedValue;
2395 }
2396};
2397
2398/// State for an integer range.
2399struct IntegerRangeState : public AbstractState {
2400
2401 /// Bitwidth of the associated value.
2402 uint32_t BitWidth;
2403
2404 /// State representing assumed range, initially set to empty.
2405 ConstantRange Assumed;
2406
2407 /// State representing known range, initially set to [-inf, inf].
2408 ConstantRange Known;
2409
2410 IntegerRangeState(uint32_t BitWidth)
2411 : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)),
2412 Known(ConstantRange::getFull(BitWidth)) {}
2413
2414 IntegerRangeState(const ConstantRange &CR)
2415 : BitWidth(CR.getBitWidth()), Assumed(CR),
2416 Known(getWorstState(CR.getBitWidth())) {}
2417
2418 /// Return the worst possible representable state.
2419 static ConstantRange getWorstState(uint32_t BitWidth) {
2420 return ConstantRange::getFull(BitWidth);
2421 }
2422
2423 /// Return the best possible representable state.
2424 static ConstantRange getBestState(uint32_t BitWidth) {
2425 return ConstantRange::getEmpty(BitWidth);
2426 }
2427 static ConstantRange getBestState(const IntegerRangeState &IRS) {
2428 return getBestState(IRS.getBitWidth());
2429 }
2430
2431 /// Return associated values' bit width.
2432 uint32_t getBitWidth() const { return BitWidth; }
2433
2434 /// See AbstractState::isValidState()
2435 bool isValidState() const override {
2436 return BitWidth > 0 && !Assumed.isFullSet();
2437 }
2438
2439 /// See AbstractState::isAtFixpoint()
2440 bool isAtFixpoint() const override { return Assumed == Known; }
2441
2442 /// See AbstractState::indicateOptimisticFixpoint(...)
2443 ChangeStatus indicateOptimisticFixpoint() override {
2444 Known = Assumed;
2445 return ChangeStatus::CHANGED;
2446 }
2447
2448 /// See AbstractState::indicatePessimisticFixpoint(...)
2449 ChangeStatus indicatePessimisticFixpoint() override {
2450 Assumed = Known;
2451 return ChangeStatus::CHANGED;
2452 }
2453
2454 /// Return the known state encoding
2455 ConstantRange getKnown() const { return Known; }
2456
2457 /// Return the assumed state encoding.
2458 ConstantRange getAssumed() const { return Assumed; }
2459
2460 /// Unite assumed range with the passed state.
2461 void unionAssumed(const ConstantRange &R) {
2462 // Don't loose a known range.
2463 Assumed = Assumed.unionWith(R).intersectWith(Known);
2464 }
2465
2466 /// See IntegerRangeState::unionAssumed(..).
2467 void unionAssumed(const IntegerRangeState &R) {
2468 unionAssumed(R.getAssumed());
2469 }
2470
2471 /// Unite known range with the passed state.
2472 void unionKnown(const ConstantRange &R) {
2473 // Don't loose a known range.
2474 Known = Known.unionWith(R);
2475 Assumed = Assumed.unionWith(Known);
2476 }
2477
2478 /// See IntegerRangeState::unionKnown(..).
2479 void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); }
2480
2481 /// Intersect known range with the passed state.
2482 void intersectKnown(const ConstantRange &R) {
2483 Assumed = Assumed.intersectWith(R);
2484 Known = Known.intersectWith(R);
2485 }
2486
2487 /// See IntegerRangeState::intersectKnown(..).
2488 void intersectKnown(const IntegerRangeState &R) {
2489 intersectKnown(R.getKnown());
2490 }
2491
2492 /// Equality for IntegerRangeState.
2493 bool operator==(const IntegerRangeState &R) const {
2494 return getAssumed() == R.getAssumed() && getKnown() == R.getKnown();
2495 }
2496
2497 /// "Clamp" this state with \p R. The result is subtype dependent but it is
2498 /// intended that only information assumed in both states will be assumed in
2499 /// this one afterwards.
2500 IntegerRangeState operator^=(const IntegerRangeState &R) {
2501 // NOTE: `^=` operator seems like `intersect` but in this case, we need to
2502 // take `union`.
2503 unionAssumed(R);
2504 return *this;
2505 }
2506
2507 IntegerRangeState operator&=(const IntegerRangeState &R) {
2508 // NOTE: `&=` operator seems like `intersect` but in this case, we need to
2509 // take `union`.
2510 unionKnown(R);
2511 unionAssumed(R);
2512 return *this;
2513 }
2514};
2515/// Helper struct necessary as the modular build fails if the virtual method
2516/// IRAttribute::manifest is defined in the Attributor.cpp.
2517struct IRAttributeManifest {
2518 static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
2519 const ArrayRef<Attribute> &DeducedAttrs,
2520 bool ForceReplace = false);
2521};
2522
2523/// Helper to tie a abstract state implementation to an abstract attribute.
2524template <typename StateTy, typename BaseType, class... Ts>
2525struct StateWrapper : public BaseType, public StateTy {
2526 /// Provide static access to the type of the state.
2527 using StateType = StateTy;
2528
2529 StateWrapper(const IRPosition &IRP, Ts... Args)
2530 : BaseType(IRP), StateTy(Args...) {}
2531
2532 /// See AbstractAttribute::getState(...).
2533 StateType &getState() override { return *this; }
2534
2535 /// See AbstractAttribute::getState(...).
2536 const StateType &getState() const override { return *this; }
2537};
2538
2539/// Helper class that provides common functionality to manifest IR attributes.
2540template <Attribute::AttrKind AK, typename BaseType>
2541struct IRAttribute : public BaseType {
2542 IRAttribute(const IRPosition &IRP) : BaseType(IRP) {}
2543
2544 /// See AbstractAttribute::initialize(...).
2545 virtual void initialize(Attributor &A) override {
2546 const IRPosition &IRP = this->getIRPosition();
2547 if (isa<UndefValue>(IRP.getAssociatedValue()) ||
2548 this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false,
2549 &A)) {
2550 this->getState().indicateOptimisticFixpoint();
2551 return;
2552 }
2553
2554 bool IsFnInterface = IRP.isFnInterfaceKind();
2555 const Function *FnScope = IRP.getAnchorScope();
2556 // TODO: Not all attributes require an exact definition. Find a way to
2557 // enable deduction for some but not all attributes in case the
2558 // definition might be changed at runtime, see also
2559 // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
2560 // TODO: We could always determine abstract attributes and if sufficient
2561 // information was found we could duplicate the functions that do not
2562 // have an exact definition.
2563 if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope)))
2564 this->getState().indicatePessimisticFixpoint();
2565 }
2566
2567 /// See AbstractAttribute::manifest(...).
2568 ChangeStatus manifest(Attributor &A) override {
2569 if (isa<UndefValue>(this->getIRPosition().getAssociatedValue()))
2570 return ChangeStatus::UNCHANGED;
2571 SmallVector<Attribute, 4> DeducedAttrs;
2572 getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs);
2573 return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(),
2574 DeducedAttrs);
2575 }
2576
2577 /// Return the kind that identifies the abstract attribute implementation.
2578 Attribute::AttrKind getAttrKind() const { return AK; }
2579
2580 /// Return the deduced attributes in \p Attrs.
2581 virtual void getDeducedAttributes(LLVMContext &Ctx,
2582 SmallVectorImpl<Attribute> &Attrs) const {
2583 Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
2584 }
2585};
2586
2587/// Base struct for all "concrete attribute" deductions.
2588///
2589/// The abstract attribute is a minimal interface that allows the Attributor to
2590/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
2591/// implementation choices made for the subclasses but also to structure their
2592/// implementation and simplify the use of other abstract attributes in-flight.
2593///
2594/// To allow easy creation of new attributes, most methods have default
2595/// implementations. The ones that do not are generally straight forward, except
2596/// `AbstractAttribute::updateImpl` which is the location of most reasoning
2597/// associated with the abstract attribute. The update is invoked by the
2598/// Attributor in case the situation used to justify the current optimistic
2599/// state might have changed. The Attributor determines this automatically
2600/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
2601///
2602/// The `updateImpl` method should inspect the IR and other abstract attributes
2603/// in-flight to justify the best possible (=optimistic) state. The actual
2604/// implementation is, similar to the underlying abstract state encoding, not
2605/// exposed. In the most common case, the `updateImpl` will go through a list of
2606/// reasons why its optimistic state is valid given the current information. If
2607/// any combination of them holds and is sufficient to justify the current
2608/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
2609/// state is adjusted to the situation and the method shall return CHANGED.
2610///
2611/// If the manifestation of the "concrete attribute" deduced by the subclass
2612/// differs from the "default" behavior, which is a (set of) LLVM-IR
2613/// attribute(s) for an argument, call site argument, function return value, or
2614/// function, the `AbstractAttribute::manifest` method should be overloaded.
2615///
2616/// NOTE: If the state obtained via getState() is INVALID, thus if
2617/// AbstractAttribute::getState().isValidState() returns false, no
2618/// information provided by the methods of this class should be used.
2619/// NOTE: The Attributor currently has certain limitations to what we can do.
2620/// As a general rule of thumb, "concrete" abstract attributes should *for
2621/// now* only perform "backward" information propagation. That means
2622/// optimistic information obtained through abstract attributes should
2623/// only be used at positions that precede the origin of the information
2624/// with regards to the program flow. More practically, information can
2625/// *now* be propagated from instructions to their enclosing function, but
2626/// *not* from call sites to the called function. The mechanisms to allow
2627/// both directions will be added in the future.
2628/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
2629/// described in the file comment.
2630struct AbstractAttribute : public IRPosition, public AADepGraphNode {
2631 using StateType = AbstractState;
2632
2633 AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
2634
2635 /// Virtual destructor.
2636 virtual ~AbstractAttribute() {}
2637
2638 /// This function is used to identify if an \p DGN is of type
2639 /// AbstractAttribute so that the dyn_cast and cast can use such information
2640 /// to cast an AADepGraphNode to an AbstractAttribute.
2641 ///
2642 /// We eagerly return true here because all AADepGraphNodes except for the
2643 /// Synthethis Node are of type AbstractAttribute
2644 static bool classof(const AADepGraphNode *DGN) { return true; }
2645
2646 /// Initialize the state with the information in the Attributor \p A.
2647 ///
2648 /// This function is called by the Attributor once all abstract attributes
2649 /// have been identified. It can and shall be used for task like:
2650 /// - identify existing knowledge in the IR and use it for the "known state"
2651 /// - perform any work that is not going to change over time, e.g., determine
2652 /// a subset of the IR, or attributes in-flight, that have to be looked at
2653 /// in the `updateImpl` method.
2654 virtual void initialize(Attributor &A) {}
2655
2656 /// Return the internal abstract state for inspection.
2657 virtual StateType &getState() = 0;
2658 virtual const StateType &getState() const = 0;
2659
2660 /// Return an IR position, see struct IRPosition.
2661 const IRPosition &getIRPosition() const { return *this; };
2662 IRPosition &getIRPosition() { return *this; };
2663
2664 /// Helper functions, for debug purposes only.
2665 ///{
2666 void print(raw_ostream &OS) const override;
2667 virtual void printWithDeps(raw_ostream &OS) const;
2668 void dump() const { print(dbgs()); }
2669
2670 /// This function should return the "summarized" assumed state as string.
2671 virtual const std::string getAsStr() const = 0;
2672
2673 /// This function should return the name of the AbstractAttribute
2674 virtual const std::string getName() const = 0;
2675
2676 /// This function should return the address of the ID of the AbstractAttribute
2677 virtual const char *getIdAddr() const = 0;
2678 ///}
2679
2680 /// Allow the Attributor access to the protected methods.
2681 friend struct Attributor;
2682
2683protected:
2684 /// Hook for the Attributor to trigger an update of the internal state.
2685 ///
2686 /// If this attribute is already fixed, this method will return UNCHANGED,
2687 /// otherwise it delegates to `AbstractAttribute::updateImpl`.
2688 ///
2689 /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
2690 ChangeStatus update(Attributor &A);
2691
2692 /// Hook for the Attributor to trigger the manifestation of the information
2693 /// represented by the abstract attribute in the LLVM-IR.
2694 ///
2695 /// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
2696 virtual ChangeStatus manifest(Attributor &A) {
2697 return ChangeStatus::UNCHANGED;
2698 }
2699
2700 /// Hook to enable custom statistic tracking, called after manifest that
2701 /// resulted in a change if statistics are enabled.
2702 ///
2703 /// We require subclasses to provide an implementation so we remember to
2704 /// add statistics for them.
2705 virtual void trackStatistics() const = 0;
2706
2707 /// The actual update/transfer function which has to be implemented by the
2708 /// derived classes.
2709 ///
2710 /// If it is called, the environment has changed and we have to determine if
2711 /// the current information is still valid or adjust it otherwise.
2712 ///
2713 /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
2714 virtual ChangeStatus updateImpl(Attributor &A) = 0;
2715};
2716
2717/// Forward declarations of output streams for debug purposes.
2718///
2719///{
2720raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
2721raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
2722raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind);
2723raw_ostream &operator<<(raw_ostream &OS, const IRPosition &);
2724raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
2725template <typename base_ty, base_ty BestState, base_ty WorstState>
2726raw_ostream &
2727operator<<(raw_ostream &OS,
2728 const IntegerStateBase<base_ty, BestState, WorstState> &S) {
2729 return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
2730 << static_cast<const AbstractState &>(S);
2731}
2732raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State);
2733///}
2734
2735struct AttributorPass : public PassInfoMixin<AttributorPass> {
2736 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
2737};
2738struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> {
2739 PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
2740 LazyCallGraph &CG, CGSCCUpdateResult &UR);
2741};
2742
2743Pass *createAttributorLegacyPass();
2744Pass *createAttributorCGSCCLegacyPass();
2745
2746/// Helper function to clamp a state \p S of type \p StateType with the
2747/// information in \p R and indicate/return if \p S did change (as-in update is
2748/// required to be run again).
2749template <typename StateType>
2750ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
2751 auto Assumed = S.getAssumed();
2752 S ^= R;
2753 return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
2754 : ChangeStatus::CHANGED;
2755}
2756
2757/// ----------------------------------------------------------------------------
2758/// Abstract Attribute Classes
2759/// ----------------------------------------------------------------------------
2760
2761/// An abstract attribute for the returned values of a function.
2762struct AAReturnedValues
2763 : public IRAttribute<Attribute::Returned, AbstractAttribute> {
2764 AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2765
2766 /// Return an assumed unique return value if a single candidate is found. If
2767 /// there cannot be one, return a nullptr. If it is not clear yet, return the
2768 /// Optional::NoneType.
2769 Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;
2770
2771 /// Check \p Pred on all returned values.
2772 ///
2773 /// This method will evaluate \p Pred on returned values and return
2774 /// true if (1) all returned values are known, and (2) \p Pred returned true
2775 /// for all returned values.
2776 ///
2777 /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts
2778 /// method, this one will not filter dead return instructions.
2779 virtual bool checkForAllReturnedValuesAndReturnInsts(
2780 function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
2781 const = 0;
2782
2783 using iterator =
2784 MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator;
2785 using const_iterator =
2786 MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator;
2787 virtual llvm::iterator_range<iterator> returned_values() = 0;
2788 virtual llvm::iterator_range<const_iterator> returned_values() const = 0;
2789
2790 virtual size_t getNumReturnValues() const = 0;
2791
2792 /// Create an abstract attribute view for the position \p IRP.
2793 static AAReturnedValues &createForPosition(const IRPosition &IRP,
2794 Attributor &A);
2795
2796 /// See AbstractAttribute::getName()
2797 const std::string getName() const override { return "AAReturnedValues"; }
2798
2799 /// See AbstractAttribute::getIdAddr()
2800 const char *getIdAddr() const override { return &ID; }
2801
2802 /// This function should return true if the type of the \p AA is
2803 /// AAReturnedValues
2804 static bool classof(const AbstractAttribute *AA) {
2805 return (AA->getIdAddr() == &ID);
2806 }
2807
2808 /// Unique ID (due to the unique address)
2809 static const char ID;
2810};
2811
2812struct AANoUnwind
2813 : public IRAttribute<Attribute::NoUnwind,
2814 StateWrapper<BooleanState, AbstractAttribute>> {
2815 AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2816
2817 /// Returns true if nounwind is assumed.
2818 bool isAssumedNoUnwind() const { return getAssumed(); }
2819
2820 /// Returns true if nounwind is known.
2821 bool isKnownNoUnwind() const { return getKnown(); }
2822
2823 /// Create an abstract attribute view for the position \p IRP.
2824 static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A);
2825
2826 /// See AbstractAttribute::getName()
2827 const std::string getName() const override { return "AANoUnwind"; }
2828
2829 /// See AbstractAttribute::getIdAddr()
2830 const char *getIdAddr() const override { return &ID; }
2831
2832 /// This function should return true if the type of the \p AA is AANoUnwind
2833 static bool classof(const AbstractAttribute *AA) {
2834 return (AA->getIdAddr() == &ID);
2835 }
2836
2837 /// Unique ID (due to the unique address)
2838 static const char ID;
2839};
2840
2841struct AANoSync
2842 : public IRAttribute<Attribute::NoSync,
2843 StateWrapper<BooleanState, AbstractAttribute>> {
2844 AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2845
2846 /// Returns true if "nosync" is assumed.
2847 bool isAssumedNoSync() const { return getAssumed(); }
2848
2849 /// Returns true if "nosync" is known.
2850 bool isKnownNoSync() const { return getKnown(); }
2851
2852 /// Create an abstract attribute view for the position \p IRP.
2853 static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);
2854
2855 /// See AbstractAttribute::getName()
2856 const std::string getName() const override { return "AANoSync"; }
2857
2858 /// See AbstractAttribute::getIdAddr()
2859 const char *getIdAddr() const override { return &ID; }
2860
2861 /// This function should return true if the type of the \p AA is AANoSync
2862 static bool classof(const AbstractAttribute *AA) {
2863 return (AA->getIdAddr() == &ID);
2864 }
2865
2866 /// Unique ID (due to the unique address)
2867 static const char ID;
2868};
2869
2870/// An abstract interface for all nonnull attributes.
2871struct AANonNull
2872 : public IRAttribute<Attribute::NonNull,
2873 StateWrapper<BooleanState, AbstractAttribute>> {
2874 AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2875
2876 /// Return true if we assume that the underlying value is nonnull.
2877 bool isAssumedNonNull() const { return getAssumed(); }
2878
2879 /// Return true if we know that underlying value is nonnull.
2880 bool isKnownNonNull() const { return getKnown(); }
2881
2882 /// Create an abstract attribute view for the position \p IRP.
2883 static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A);
2884
2885 /// See AbstractAttribute::getName()
2886 const std::string getName() const override { return "AANonNull"; }
2887
2888 /// See AbstractAttribute::getIdAddr()
2889 const char *getIdAddr() const override { return &ID; }
2890
2891 /// This function should return true if the type of the \p AA is AANonNull
2892 static bool classof(const AbstractAttribute *AA) {
2893 return (AA->getIdAddr() == &ID);
2894 }
2895
2896 /// Unique ID (due to the unique address)
2897 static const char ID;
2898};
2899
2900/// An abstract attribute for norecurse.
2901struct AANoRecurse
2902 : public IRAttribute<Attribute::NoRecurse,
2903 StateWrapper<BooleanState, AbstractAttribute>> {
2904 AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2905
2906 /// Return true if "norecurse" is assumed.
2907 bool isAssumedNoRecurse() const { return getAssumed(); }
2908
2909 /// Return true if "norecurse" is known.
2910 bool isKnownNoRecurse() const { return getKnown(); }
2911
2912 /// Create an abstract attribute view for the position \p IRP.
2913 static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A);
2914
2915 /// See AbstractAttribute::getName()
2916 const std::string getName() const override { return "AANoRecurse"; }
2917
2918 /// See AbstractAttribute::getIdAddr()
2919 const char *getIdAddr() const override { return &ID; }
2920
2921 /// This function should return true if the type of the \p AA is AANoRecurse
2922 static bool classof(const AbstractAttribute *AA) {
2923 return (AA->getIdAddr() == &ID);
2924 }
2925
2926 /// Unique ID (due to the unique address)
2927 static const char ID;
2928};
2929
2930/// An abstract attribute for willreturn.
2931struct AAWillReturn
2932 : public IRAttribute<Attribute::WillReturn,
2933 StateWrapper<BooleanState, AbstractAttribute>> {
2934 AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
2935
2936 /// Return true if "willreturn" is assumed.
2937 bool isAssumedWillReturn() const { return getAssumed(); }
2938
2939 /// Return true if "willreturn" is known.
2940 bool isKnownWillReturn() const { return getKnown(); }
2941
2942 /// Create an abstract attribute view for the position \p IRP.
2943 static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A);
2944
2945 /// See AbstractAttribute::getName()
2946 const std::string getName() const override { return "AAWillReturn"; }
2947
2948 /// See AbstractAttribute::getIdAddr()
2949 const char *getIdAddr() const override { return &ID; }
2950
2951 /// This function should return true if the type of the \p AA is AAWillReturn
2952 static bool classof(const AbstractAttribute *AA) {
2953 return (AA->getIdAddr() == &ID);
2954 }
2955
2956 /// Unique ID (due to the unique address)
2957 static const char ID;
2958};
2959
2960/// An abstract attribute for undefined behavior.
2961struct AAUndefinedBehavior
2962 : public StateWrapper<BooleanState, AbstractAttribute> {
2963 using Base = StateWrapper<BooleanState, AbstractAttribute>;
2964 AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
2965
2966 /// Return true if "undefined behavior" is assumed.
2967 bool isAssumedToCauseUB() const { return getAssumed(); }
2968
2969 /// Return true if "undefined behavior" is assumed for a specific instruction.
2970 virtual bool isAssumedToCauseUB(Instruction *I) const = 0;
2971
2972 /// Return true if "undefined behavior" is known.
2973 bool isKnownToCauseUB() const { return getKnown(); }
2974
2975 /// Return true if "undefined behavior" is known for a specific instruction.
2976 virtual bool isKnownToCauseUB(Instruction *I) const = 0;
2977
2978 /// Create an abstract attribute view for the position \p IRP.
2979 static AAUndefinedBehavior &createForPosition(const IRPosition &IRP,
2980 Attributor &A);
2981
2982 /// See AbstractAttribute::getName()
2983 const std::string getName() const override { return "AAUndefinedBehavior"; }
2984
2985 /// See AbstractAttribute::getIdAddr()
2986 const char *getIdAddr() const override { return &ID; }
2987
2988 /// This function should return true if the type of the \p AA is
2989 /// AAUndefineBehavior
2990 static bool classof(const AbstractAttribute *AA) {
2991 return (AA->getIdAddr() == &ID);
2992 }
2993
2994 /// Unique ID (due to the unique address)
2995 static const char ID;
2996};
2997
2998/// An abstract interface to determine reachability of point A to B.
2999struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> {
3000 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3001 AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3002
3003 /// Returns true if 'From' instruction is assumed to reach, 'To' instruction.
3004 /// Users should provide two positions they are interested in, and the class
3005 /// determines (and caches) reachability.
3006 bool isAssumedReachable(Attributor &A, const Instruction &From,
3007 const Instruction &To) const {
3008 if (!getState().isValidState())
3009 return true;
3010 return A.getInfoCache().getPotentiallyReachable(From, To);
3011 }
3012
3013 /// Returns true if 'From' instruction is known to reach, 'To' instruction.
3014 /// Users should provide two positions they are interested in, and the class
3015 /// determines (and caches) reachability.
3016 bool isKnownReachable(Attributor &A, const Instruction &From,
3017 const Instruction &To) const {
3018 if (!getState().isValidState())
3019 return false;
3020 return A.getInfoCache().getPotentiallyReachable(From, To);
3021 }
3022
3023 /// Create an abstract attribute view for the position \p IRP.
3024 static AAReachability &createForPosition(const IRPosition &IRP,
3025 Attributor &A);
3026
3027 /// See AbstractAttribute::getName()
3028 const std::string getName() const override { return "AAReachability"; }
3029
3030 /// See AbstractAttribute::getIdAddr()
3031 const char *getIdAddr() const override { return &ID; }
3032
3033 /// This function should return true if the type of the \p AA is
3034 /// AAReachability
3035 static bool classof(const AbstractAttribute *AA) {
3036 return (AA->getIdAddr() == &ID);
3037 }
3038
3039 /// Unique ID (due to the unique address)
3040 static const char ID;
3041};
3042
3043/// An abstract interface for all noalias attributes.
3044struct AANoAlias
3045 : public IRAttribute<Attribute::NoAlias,
3046 StateWrapper<BooleanState, AbstractAttribute>> {
3047 AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3048
3049 /// Return true if we assume that the underlying value is alias.
3050 bool isAssumedNoAlias() const { return getAssumed(); }
3051
3052 /// Return true if we know that underlying value is noalias.
3053 bool isKnownNoAlias() const { return getKnown(); }
3054
3055 /// Create an abstract attribute view for the position \p IRP.
3056 static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A);
3057
3058 /// See AbstractAttribute::getName()
3059 const std::string getName() const override { return "AANoAlias"; }
3060
3061 /// See AbstractAttribute::getIdAddr()
3062 const char *getIdAddr() const override { return &ID; }
3063
3064 /// This function should return true if the type of the \p AA is AANoAlias
3065 static bool classof(const AbstractAttribute *AA) {
3066 return (AA->getIdAddr() == &ID);
3067 }
3068
3069 /// Unique ID (due to the unique address)
3070 static const char ID;
3071};
3072
3073/// An AbstractAttribute for nofree.
3074struct AANoFree
3075 : public IRAttribute<Attribute::NoFree,
3076 StateWrapper<BooleanState, AbstractAttribute>> {
3077 AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3078
3079 /// Return true if "nofree" is assumed.
3080 bool isAssumedNoFree() const { return getAssumed(); }
3081
3082 /// Return true if "nofree" is known.
3083 bool isKnownNoFree() const { return getKnown(); }
3084
3085 /// Create an abstract attribute view for the position \p IRP.
3086 static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A);
3087
3088 /// See AbstractAttribute::getName()
3089 const std::string getName() const override { return "AANoFree"; }
3090
3091 /// See AbstractAttribute::getIdAddr()
3092 const char *getIdAddr() const override { return &ID; }
3093
3094 /// This function should return true if the type of the \p AA is AANoFree
3095 static bool classof(const AbstractAttribute *AA) {
3096 return (AA->getIdAddr() == &ID);
3097 }
3098
3099 /// Unique ID (due to the unique address)
3100 static const char ID;
3101};
3102
3103/// An AbstractAttribute for noreturn.
3104struct AANoReturn
3105 : public IRAttribute<Attribute::NoReturn,
3106 StateWrapper<BooleanState, AbstractAttribute>> {
3107 AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3108
3109 /// Return true if the underlying object is assumed to never return.
3110 bool isAssumedNoReturn() const { return getAssumed(); }
3111
3112 /// Return true if the underlying object is known to never return.
3113 bool isKnownNoReturn() const { return getKnown(); }
3114
3115 /// Create an abstract attribute view for the position \p IRP.
3116 static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A);
3117
3118 /// See AbstractAttribute::getName()
3119 const std::string getName() const override { return "AANoReturn"; }
3120
3121 /// See AbstractAttribute::getIdAddr()
3122 const char *getIdAddr() const override { return &ID; }
3123
3124 /// This function should return true if the type of the \p AA is AANoReturn
3125 static bool classof(const AbstractAttribute *AA) {
3126 return (AA->getIdAddr() == &ID);
3127 }
3128
3129 /// Unique ID (due to the unique address)
3130 static const char ID;
3131};
3132
3133/// An abstract interface for liveness abstract attribute.
3134struct AAIsDead
3135 : public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> {
3136 using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>;
3137 AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3138
3139 /// State encoding bits. A set bit in the state means the property holds.
3140 enum {
3141 HAS_NO_EFFECT = 1 << 0,
3142 IS_REMOVABLE = 1 << 1,
3143
3144 IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE,
3145 };
3146 static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value");
3147
3148protected:
3149 /// The query functions are protected such that other attributes need to go
3150 /// through the Attributor interfaces: `Attributor::isAssumedDead(...)`
3151
3152 /// Returns true if the underlying value is assumed dead.
3153 virtual bool isAssumedDead() const = 0;
3154
3155 /// Returns true if the underlying value is known dead.
3156 virtual bool isKnownDead() const = 0;
3157
3158 /// Returns true if \p BB is assumed dead.
3159 virtual bool isAssumedDead(const BasicBlock *BB) const = 0;
3160
3161 /// Returns true if \p BB is known dead.
3162 virtual bool isKnownDead(const BasicBlock *BB) const = 0;
3163
3164 /// Returns true if \p I is assumed dead.
3165 virtual bool isAssumedDead(const Instruction *I) const = 0;
3166
3167 /// Returns true if \p I is known dead.
3168 virtual bool isKnownDead(const Instruction *I) const = 0;
3169
3170 /// This method is used to check if at least one instruction in a collection
3171 /// of instructions is live.
3172 template <typename T> bool isLiveInstSet(T begin, T end) const {
3173 for (const auto &I : llvm::make_range(begin, end)) {
3174 assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&((void)0)
3175 "Instruction must be in the same anchor scope function.")((void)0);
3176
3177 if (!isAssumedDead(I))
3178 return true;
3179 }
3180
3181 return false;
3182 }
3183
3184public:
3185 /// Create an abstract attribute view for the position \p IRP.
3186 static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A);
3187
3188 /// Determine if \p F might catch asynchronous exceptions.
3189 static bool mayCatchAsynchronousExceptions(const Function &F) {
3190 return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
3191 }
3192
3193 /// Return if the edge from \p From BB to \p To BB is assumed dead.
3194 /// This is specifically useful in AAReachability.
3195 virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const {
3196 return false;
3197 }
3198
3199 /// See AbstractAttribute::getName()
3200 const std::string getName() const override { return "AAIsDead"; }
3201
3202 /// See AbstractAttribute::getIdAddr()
3203 const char *getIdAddr() const override { return &ID; }
3204
3205 /// This function should return true if the type of the \p AA is AAIsDead
3206 static bool classof(const AbstractAttribute *AA) {
3207 return (AA->getIdAddr() == &ID);
3208 }
3209
3210 /// Unique ID (due to the unique address)
3211 static const char ID;
3212
3213 friend struct Attributor;
3214};
3215
3216/// State for dereferenceable attribute
3217struct DerefState : AbstractState {
3218
3219 static DerefState getBestState() { return DerefState(); }
3220 static DerefState getBestState(const DerefState &) { return getBestState(); }
3221
3222 /// Return the worst possible representable state.
3223 static DerefState getWorstState() {
3224 DerefState DS;
3225 DS.indicatePessimisticFixpoint();
3226 return DS;
3227 }
3228 static DerefState getWorstState(const DerefState &) {
3229 return getWorstState();
3230 }
3231
3232 /// State representing for dereferenceable bytes.
3233 IncIntegerState<> DerefBytesState;
3234
3235 /// Map representing for accessed memory offsets and sizes.
3236 /// A key is Offset and a value is size.
3237 /// If there is a load/store instruction something like,
3238 /// p[offset] = v;
3239 /// (offset, sizeof(v)) will be inserted to this map.
3240 /// std::map is used because we want to iterate keys in ascending order.
3241 std::map<int64_t, uint64_t> AccessedBytesMap;
3242
3243 /// Helper function to calculate dereferenceable bytes from current known
3244 /// bytes and accessed bytes.
3245 ///
3246 /// int f(int *A){
3247 /// *A = 0;
3248 /// *(A+2) = 2;
3249 /// *(A+1) = 1;
3250 /// *(A+10) = 10;
3251 /// }
3252 /// ```
3253 /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`.
3254 /// AccessedBytesMap is std::map so it is iterated in accending order on
3255 /// key(Offset). So KnownBytes will be updated like this:
3256 ///
3257 /// |Access | KnownBytes
3258 /// |(0, 4)| 0 -> 4
3259 /// |(4, 4)| 4 -> 8
3260 /// |(8, 4)| 8 -> 12
3261 /// |(40, 4) | 12 (break)
3262 void computeKnownDerefBytesFromAccessedMap() {
3263 int64_t KnownBytes = DerefBytesState.getKnown();
3264 for (auto &Access : AccessedBytesMap) {
3265 if (KnownBytes < Access.first)
3266 break;
3267 KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second);
3268 }
3269
3270 DerefBytesState.takeKnownMaximum(KnownBytes);
3271 }
3272
3273 /// State representing that whether the value is globaly dereferenceable.
3274 BooleanState GlobalState;
3275
3276 /// See AbstractState::isValidState()
3277 bool isValidState() const override { return DerefBytesState.isValidState(); }
3278
3279 /// See AbstractState::isAtFixpoint()
3280 bool isAtFixpoint() const override {
3281 return !isValidState() ||
3282 (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint());
3283 }
3284
3285 /// See AbstractState::indicateOptimisticFixpoint(...)
3286 ChangeStatus indicateOptimisticFixpoint() override {
3287 DerefBytesState.indicateOptimisticFixpoint();
3288 GlobalState.indicateOptimisticFixpoint();
3289 return ChangeStatus::UNCHANGED;
3290 }
3291
3292 /// See AbstractState::indicatePessimisticFixpoint(...)
3293 ChangeStatus indicatePessimisticFixpoint() override {
3294 DerefBytesState.indicatePessimisticFixpoint();
3295 GlobalState.indicatePessimisticFixpoint();
3296 return ChangeStatus::CHANGED;
3297 }
3298
3299 /// Update known dereferenceable bytes.
3300 void takeKnownDerefBytesMaximum(uint64_t Bytes) {
3301 DerefBytesState.takeKnownMaximum(Bytes);
3302
3303 // Known bytes might increase.
3304 computeKnownDerefBytesFromAccessedMap();
3305 }
3306
3307 /// Update assumed dereferenceable bytes.
3308 void takeAssumedDerefBytesMinimum(uint64_t Bytes) {
3309 DerefBytesState.takeAssumedMinimum(Bytes);
3310 }
3311
3312 /// Add accessed bytes to the map.
3313 void addAccessedBytes(int64_t Offset, uint64_t Size) {
3314 uint64_t &AccessedBytes = AccessedBytesMap[Offset];
3315 AccessedBytes = std::max(AccessedBytes, Size);
3316
3317 // Known bytes might increase.
3318 computeKnownDerefBytesFromAccessedMap();
3319 }
3320
3321 /// Equality for DerefState.
3322 bool operator==(const DerefState &R) const {
3323 return this->DerefBytesState == R.DerefBytesState &&
3324 this->GlobalState == R.GlobalState;
3325 }
3326
3327 /// Inequality for DerefState.
3328 bool operator!=(const DerefState &R) const { return !(*this == R); }
3329
3330 /// See IntegerStateBase::operator^=
3331 DerefState operator^=(const DerefState &R) {
3332 DerefBytesState ^= R.DerefBytesState;
3333 GlobalState ^= R.GlobalState;
3334 return *this;
3335 }
3336
3337 /// See IntegerStateBase::operator+=
3338 DerefState operator+=(const DerefState &R) {
3339 DerefBytesState += R.DerefBytesState;
3340 GlobalState += R.GlobalState;
3341 return *this;
3342 }
3343
3344 /// See IntegerStateBase::operator&=
3345 DerefState operator&=(const DerefState &R) {
3346 DerefBytesState &= R.DerefBytesState;
3347 GlobalState &= R.GlobalState;
3348 return *this;
3349 }
3350
3351 /// See IntegerStateBase::operator|=
3352 DerefState operator|=(const DerefState &R) {
3353 DerefBytesState |= R.DerefBytesState;
3354 GlobalState |= R.GlobalState;
3355 return *this;
3356 }
3357
3358protected:
3359 const AANonNull *NonNullAA = nullptr;
3360};
3361
3362/// An abstract interface for all dereferenceable attribute.
3363struct AADereferenceable
3364 : public IRAttribute<Attribute::Dereferenceable,
3365 StateWrapper<DerefState, AbstractAttribute>> {
3366 AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3367
3368 /// Return true if we assume that the underlying value is nonnull.
3369 bool isAssumedNonNull() const {
3370 return NonNullAA && NonNullAA->isAssumedNonNull();
3371 }
3372
3373 /// Return true if we know that the underlying value is nonnull.
3374 bool isKnownNonNull() const {
3375 return NonNullAA && NonNullAA->isKnownNonNull();
3376 }
3377
3378 /// Return true if we assume that underlying value is
3379 /// dereferenceable(_or_null) globally.
3380 bool isAssumedGlobal() const { return GlobalState.getAssumed(); }
3381
3382 /// Return true if we know that underlying value is
3383 /// dereferenceable(_or_null) globally.
3384 bool isKnownGlobal() const { return GlobalState.getKnown(); }
3385
3386 /// Return assumed dereferenceable bytes.
3387 uint32_t getAssumedDereferenceableBytes() const {
3388 return DerefBytesState.getAssumed();
3389 }
3390
3391 /// Return known dereferenceable bytes.
3392 uint32_t getKnownDereferenceableBytes() const {
3393 return DerefBytesState.getKnown();
3394 }
3395
3396 /// Create an abstract attribute view for the position \p IRP.
3397 static AADereferenceable &createForPosition(const IRPosition &IRP,
3398 Attributor &A);
3399
3400 /// See AbstractAttribute::getName()
3401 const std::string getName() const override { return "AADereferenceable"; }
3402
3403 /// See AbstractAttribute::getIdAddr()
3404 const char *getIdAddr() const override { return &ID; }
3405
3406 /// This function should return true if the type of the \p AA is
3407 /// AADereferenceable
3408 static bool classof(const AbstractAttribute *AA) {
3409 return (AA->getIdAddr() == &ID);
3410 }
3411
3412 /// Unique ID (due to the unique address)
3413 static const char ID;
3414};
3415
3416using AAAlignmentStateType =
3417 IncIntegerState<uint32_t, Value::MaximumAlignment, 1>;
3418/// An abstract interface for all align attributes.
3419struct AAAlign : public IRAttribute<
3420 Attribute::Alignment,
3421 StateWrapper<AAAlignmentStateType, AbstractAttribute>> {
3422 AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3423
3424 /// Return assumed alignment.
3425 unsigned getAssumedAlign() const { return getAssumed(); }
3426
3427 /// Return known alignment.
3428 unsigned getKnownAlign() const { return getKnown(); }
3429
3430 /// See AbstractAttribute::getName()
3431 const std::string getName() const override { return "AAAlign"; }
3432
3433 /// See AbstractAttribute::getIdAddr()
3434 const char *getIdAddr() const override { return &ID; }
3435
3436 /// This function should return true if the type of the \p AA is AAAlign
3437 static bool classof(const AbstractAttribute *AA) {
3438 return (AA->getIdAddr() == &ID);
3439 }
3440
3441 /// Create an abstract attribute view for the position \p IRP.
3442 static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A);
3443
3444 /// Unique ID (due to the unique address)
3445 static const char ID;
3446};
3447
3448/// An abstract interface for all nocapture attributes.
3449struct AANoCapture
3450 : public IRAttribute<
3451 Attribute::NoCapture,
3452 StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> {
3453 AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3454
3455 /// State encoding bits. A set bit in the state means the property holds.
3456 /// NO_CAPTURE is the best possible state, 0 the worst possible state.
3457 enum {
3458 NOT_CAPTURED_IN_MEM = 1 << 0,
3459 NOT_CAPTURED_IN_INT = 1 << 1,
3460 NOT_CAPTURED_IN_RET = 1 << 2,
3461
3462 /// If we do not capture the value in memory or through integers we can only
3463 /// communicate it back as a derived pointer.
3464 NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT,
3465
3466 /// If we do not capture the value in memory, through integers, or as a
3467 /// derived pointer we know it is not captured.
3468 NO_CAPTURE =
3469 NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET,
3470 };
3471
3472 /// Return true if we know that the underlying value is not captured in its
3473 /// respective scope.
3474 bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); }
3475
3476 /// Return true if we assume that the underlying value is not captured in its
3477 /// respective scope.
3478 bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); }
3479
3480 /// Return true if we know that the underlying value is not captured in its
3481 /// respective scope but we allow it to escape through a "return".
3482 bool isKnownNoCaptureMaybeReturned() const {
3483 return isKnown(NO_CAPTURE_MAYBE_RETURNED);
3484 }
3485
3486 /// Return true if we assume that the underlying value is not captured in its
3487 /// respective scope but we allow it to escape through a "return".
3488 bool isAssumedNoCaptureMaybeReturned() const {
3489 return isAssumed(NO_CAPTURE_MAYBE_RETURNED);
3490 }
3491
3492 /// Create an abstract attribute view for the position \p IRP.
3493 static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A);
3494
3495 /// See AbstractAttribute::getName()
3496 const std::string getName() const override { return "AANoCapture"; }
3497
3498 /// See AbstractAttribute::getIdAddr()
3499 const char *getIdAddr() const override { return &ID; }
3500
3501 /// This function should return true if the type of the \p AA is AANoCapture
3502 static bool classof(const AbstractAttribute *AA) {
3503 return (AA->getIdAddr() == &ID);
3504 }
3505
3506 /// Unique ID (due to the unique address)
3507 static const char ID;
3508};
3509
3510struct ValueSimplifyStateType : public AbstractState {
3511
3512 ValueSimplifyStateType(Type *Ty) : Ty(Ty) {}
3513
3514 static ValueSimplifyStateType getBestState(Type *Ty) {
3515 return ValueSimplifyStateType(Ty);
3516 }
3517 static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) {
3518 return getBestState(VS.Ty);
3519 }
3520
3521 /// Return the worst possible representable state.
3522 static ValueSimplifyStateType getWorstState(Type *Ty) {
3523 ValueSimplifyStateType DS(Ty);
3524 DS.indicatePessimisticFixpoint();
3525 return DS;
3526 }
3527 static ValueSimplifyStateType
3528 getWorstState(const ValueSimplifyStateType &VS) {
3529 return getWorstState(VS.Ty);
3530 }
3531
3532 /// See AbstractState::isValidState(...)
3533 bool isValidState() const override { return BS.isValidState(); }
3534
3535 /// See AbstractState::isAtFixpoint(...)
3536 bool isAtFixpoint() const override { return BS.isAtFixpoint(); }
3537
3538 /// Return the assumed state encoding.
3539 ValueSimplifyStateType getAssumed() { return *this; }
3540 const ValueSimplifyStateType &getAssumed() const { return *this; }
3541
3542 /// See AbstractState::indicatePessimisticFixpoint(...)
3543 ChangeStatus indicatePessimisticFixpoint() override {
3544 return BS.indicatePessimisticFixpoint();
3545 }
3546
3547 /// See AbstractState::indicateOptimisticFixpoint(...)
3548 ChangeStatus indicateOptimisticFixpoint() override {
3549 return BS.indicateOptimisticFixpoint();
3550 }
3551
3552 /// "Clamp" this state with \p PVS.
3553 ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) {
3554 BS ^= VS.BS;
3555 unionAssumed(VS.SimplifiedAssociatedValue);
3556 return *this;
3557 }
3558
3559 bool operator==(const ValueSimplifyStateType &RHS) const {
3560 if (isValidState() != RHS.isValidState())
3561 return false;
3562 if (!isValidState() && !RHS.isValidState())
3563 return true;
3564 return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue;
3565 }
3566
3567protected:
3568 /// The type of the original value.
3569 Type *Ty;
3570
3571 /// Merge \p Other into the currently assumed simplified value
3572 bool unionAssumed(Optional<Value *> Other);
3573
3574 /// Helper to track validity and fixpoint
3575 BooleanState BS;
3576
3577 /// An assumed simplified value. Initially, it is set to Optional::None, which
3578 /// means that the value is not clear under current assumption. If in the
3579 /// pessimistic state, getAssumedSimplifiedValue doesn't return this value but
3580 /// returns orignal associated value.
3581 Optional<Value *> SimplifiedAssociatedValue;
3582};
3583
3584/// An abstract interface for value simplify abstract attribute.
3585struct AAValueSimplify
3586 : public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> {
3587 using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>;
3588 AAValueSimplify(const IRPosition &IRP, Attributor &A)
3589 : Base(IRP, IRP.getAssociatedType()) {}
3590
3591 /// Create an abstract attribute view for the position \p IRP.
3592 static AAValueSimplify &createForPosition(const IRPosition &IRP,
3593 Attributor &A);
3594
3595 /// See AbstractAttribute::getName()
3596 const std::string getName() const override { return "AAValueSimplify"; }
3597
3598 /// See AbstractAttribute::getIdAddr()
3599 const char *getIdAddr() const override { return &ID; }
3600
3601 /// This function should return true if the type of the \p AA is
3602 /// AAValueSimplify
3603 static bool classof(const AbstractAttribute *AA) {
3604 return (AA->getIdAddr() == &ID);
3605 }
3606
3607 /// Unique ID (due to the unique address)
3608 static const char ID;
3609
3610private:
3611 /// Return an assumed simplified value if a single candidate is found. If
3612 /// there cannot be one, return original value. If it is not clear yet, return
3613 /// the Optional::NoneType.
3614 ///
3615 /// Use `Attributor::getAssumedSimplified` for value simplification.
3616 virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0;
3617
3618 friend struct Attributor;
3619};
3620
3621struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
3622 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3623 AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3624
3625 /// Returns true if HeapToStack conversion is assumed to be possible.
3626 virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0;
3627
3628 /// Returns true if HeapToStack conversion is assumed and the CB is a
3629 /// callsite to a free operation to be removed.
3630 virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0;
3631
3632 /// Create an abstract attribute view for the position \p IRP.
3633 static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);
3634
3635 /// See AbstractAttribute::getName()
3636 const std::string getName() const override { return "AAHeapToStack"; }
3637
3638 /// See AbstractAttribute::getIdAddr()
3639 const char *getIdAddr() const override { return &ID; }
3640
3641 /// This function should return true if the type of the \p AA is AAHeapToStack
3642 static bool classof(const AbstractAttribute *AA) {
3643 return (AA->getIdAddr() == &ID);
3644 }
3645
3646 /// Unique ID (due to the unique address)
3647 static const char ID;
3648};
3649
3650/// An abstract interface for privatizability.
3651///
3652/// A pointer is privatizable if it can be replaced by a new, private one.
3653/// Privatizing pointer reduces the use count, interaction between unrelated
3654/// code parts.
3655///
3656/// In order for a pointer to be privatizable its value cannot be observed
3657/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
3658/// what values are necessary to make the private copy look like the original
3659/// one, and the values we need can be loaded (=dereferenceable).
3660struct AAPrivatizablePtr
3661 : public StateWrapper<BooleanState, AbstractAttribute> {
3662 using Base = StateWrapper<BooleanState, AbstractAttribute>;
3663 AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
3664
3665 /// Returns true if pointer privatization is assumed to be possible.
3666 bool isAssumedPrivatizablePtr() const { return getAssumed(); }
3667
3668 /// Returns true if pointer privatization is known to be possible.
3669 bool isKnownPrivatizablePtr() const { return getKnown(); }
3670
3671 /// Return the type we can choose for a private copy of the underlying
3672 /// value. None means it is not clear yet, nullptr means there is none.
3673 virtual Optional<Type *> getPrivatizableType() const = 0;
3674
3675 /// Create an abstract attribute view for the position \p IRP.
3676 static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
3677 Attributor &A);
3678
3679 /// See AbstractAttribute::getName()
3680 const std::string getName() const override { return "AAPrivatizablePtr"; }
3681
3682 /// See AbstractAttribute::getIdAddr()
3683 const char *getIdAddr() const override { return &ID; }
3684
3685 /// This function should return true if the type of the \p AA is
3686 /// AAPricatizablePtr
3687 static bool classof(const AbstractAttribute *AA) {
3688 return (AA->getIdAddr() == &ID);
3689 }
3690
3691 /// Unique ID (due to the unique address)
3692 static const char ID;
3693};
3694
3695/// An abstract interface for memory access kind related attributes
3696/// (readnone/readonly/writeonly).
3697struct AAMemoryBehavior
3698 : public IRAttribute<
3699 Attribute::ReadNone,
3700 StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> {
3701 AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3702
3703 /// State encoding bits. A set bit in the state means the property holds.
3704 /// BEST_STATE is the best possible state, 0 the worst possible state.
3705 enum {
3706 NO_READS = 1 << 0,
3707 NO_WRITES = 1 << 1,
3708 NO_ACCESSES = NO_READS | NO_WRITES,
3709
3710 BEST_STATE = NO_ACCESSES,
3711 };
3712 static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");
3713
3714 /// Return true if we know that the underlying value is not read or accessed
3715 /// in its respective scope.
3716 bool isKnownReadNone() const { return isKnown(NO_ACCESSES); }
3717
3718 /// Return true if we assume that the underlying value is not read or accessed
3719 /// in its respective scope.
3720 bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); }
3721
3722 /// Return true if we know that the underlying value is not accessed
3723 /// (=written) in its respective scope.
3724 bool isKnownReadOnly() const { return isKnown(NO_WRITES); }
3725
3726 /// Return true if we assume that the underlying value is not accessed
3727 /// (=written) in its respective scope.
3728 bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); }
3729
3730 /// Return true if we know that the underlying value is not read in its
3731 /// respective scope.
3732 bool isKnownWriteOnly() const { return isKnown(NO_READS); }
3733
3734 /// Return true if we assume that the underlying value is not read in its
3735 /// respective scope.
3736 bool isAssumedWriteOnly() const { return isAssumed(NO_READS); }
3737
3738 /// Create an abstract attribute view for the position \p IRP.
3739 static AAMemoryBehavior &createForPosition(const IRPosition &IRP,
3740 Attributor &A);
3741
3742 /// See AbstractAttribute::getName()
3743 const std::string getName() const override { return "AAMemoryBehavior"; }
3744
3745 /// See AbstractAttribute::getIdAddr()
3746 const char *getIdAddr() const override { return &ID; }
3747
3748 /// This function should return true if the type of the \p AA is
3749 /// AAMemoryBehavior
3750 static bool classof(const AbstractAttribute *AA) {
3751 return (AA->getIdAddr() == &ID);
3752 }
3753
3754 /// Unique ID (due to the unique address)
3755 static const char ID;
3756};
3757
3758/// An abstract interface for all memory location attributes
3759/// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly).
3760struct AAMemoryLocation
3761 : public IRAttribute<
3762 Attribute::ReadNone,
3763 StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> {
3764 using MemoryLocationsKind = StateType::base_t;
3765
3766 AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
3767
3768 /// Encoding of different locations that could be accessed by a memory
3769 /// access.
3770 enum {
3771 ALL_LOCATIONS = 0,
3772 NO_LOCAL_MEM = 1 << 0,
3773 NO_CONST_MEM = 1 << 1,
3774 NO_GLOBAL_INTERNAL_MEM = 1 << 2,
3775 NO_GLOBAL_EXTERNAL_MEM = 1 << 3,
3776 NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM,
3777 NO_ARGUMENT_MEM = 1 << 4,
3778 NO_INACCESSIBLE_MEM = 1 << 5,
3779 NO_MALLOCED_MEM = 1 << 6,
3780 NO_UNKOWN_MEM = 1 << 7,
3781 NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM |
3782 NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM |
3783 NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM,
3784
3785 // Helper bit to track if we gave up or not.
3786 VALID_STATE = NO_LOCATIONS + 1,
3787
3788 BEST_STATE = NO_LOCATIONS | VALID_STATE,
3789 };
3790 static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");
3791
3792 /// Return true if we know that the associated functions has no observable
3793 /// accesses.
3794 bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); }
3795
3796 /// Return true if we assume that the associated functions has no observable
3797 /// accesses.
3798 bool isAssumedReadNone() const {
3799 return isAssumed(NO_LOCATIONS) | isAssumedStackOnly();
3800 }
3801
3802 /// Return true if we know that the associated functions has at most
3803 /// local/stack accesses.
3804 bool isKnowStackOnly() const {
3805 return isKnown(inverseLocation(NO_LOCAL_MEM, true, true));
3806 }
3807
3808 /// Return true if we assume that the associated functions has at most
3809 /// local/stack accesses.
3810 bool isAssumedStackOnly() const {
3811 return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true));
3812 }
3813
3814 /// Return true if we know that the underlying value will only access
3815 /// inaccesible memory only (see Attribute::InaccessibleMemOnly).
3816 bool isKnownInaccessibleMemOnly() const {
3817 return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
3818 }
3819
3820 /// Return true if we assume that the underlying value will only access
3821 /// inaccesible memory only (see Attribute::InaccessibleMemOnly).
3822 bool isAssumedInaccessibleMemOnly() const {
3823 return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
3824 }
3825
3826 /// Return true if we know that the underlying value will only access
3827 /// argument pointees (see Attribute::ArgMemOnly).
3828 bool isKnownArgMemOnly() const {
3829 return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true));
3830 }
3831
3832 /// Return true if we assume that the underlying value will only access
3833 /// argument pointees (see Attribute::ArgMemOnly).
3834 bool isAssumedArgMemOnly() const {
3835 return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true));
3836 }
3837
3838 /// Return true if we know that the underlying value will only access
3839 /// inaccesible memory or argument pointees (see
3840 /// Attribute::InaccessibleOrArgMemOnly).
3841 bool isKnownInaccessibleOrArgMemOnly() const {
3842 return isKnown(
3843 inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
3844 }
3845
3846 /// Return true if we assume that the underlying value will only access
3847 /// inaccesible memory or argument pointees (see
3848 /// Attribute::InaccessibleOrArgMemOnly).
3849 bool isAssumedInaccessibleOrArgMemOnly() const {
3850 return isAssumed(
3851 inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
3852 }
3853
3854 /// Return true if the underlying value may access memory through arguement
3855 /// pointers of the associated function, if any.
3856 bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); }
3857
3858 /// Return true if only the memory locations specififed by \p MLK are assumed
3859 /// to be accessed by the associated function.
3860 bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const {
3861 return isAssumed(MLK);
3862 }
3863
3864 /// Return the locations that are assumed to be not accessed by the associated
3865 /// function, if any.
3866 MemoryLocationsKind getAssumedNotAccessedLocation() const {
3867 return getAssumed();
3868 }
3869
3870 /// Return the inverse of location \p Loc, thus for NO_XXX the return
3871 /// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine
3872 /// if local (=stack) and constant memory are allowed as well. Most of the
3873 /// time we do want them to be included, e.g., argmemonly allows accesses via
3874 /// argument pointers or local or constant memory accesses.
3875 static MemoryLocationsKind
3876 inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) {
3877 return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) |
3878 (AndConstMem ? NO_CONST_MEM : 0));
3879 };
3880
3881 /// Return the locations encoded by \p MLK as a readable string.
3882 static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK);
3883
3884 /// Simple enum to distinguish read/write/read-write accesses.
3885 enum AccessKind {
3886 NONE = 0,
3887 READ = 1 << 0,
3888 WRITE = 1 << 1,
3889 READ_WRITE = READ | WRITE,
3890 };
3891
3892 /// Check \p Pred on all accesses to the memory kinds specified by \p MLK.
3893 ///
3894 /// This method will evaluate \p Pred on all accesses (access instruction +
3895 /// underlying accessed memory pointer) and it will return true if \p Pred
3896 /// holds every time.
3897 virtual bool checkForAllAccessesToMemoryKind(
3898 function_ref<bool(const Instruction *, const Value *, AccessKind,
3899 MemoryLocationsKind)>
3900 Pred,
3901 MemoryLocationsKind MLK) const = 0;
3902
3903 /// Create an abstract attribute view for the position \p IRP.
3904 static AAMemoryLocation &createForPosition(const IRPosition &IRP,
3905 Attributor &A);
3906
3907 /// See AbstractState::getAsStr().
3908 const std::string getAsStr() const override {
3909 return getMemoryLocationsAsStr(getAssumedNotAccessedLocation());
3910 }
3911
3912 /// See AbstractAttribute::getName()
3913 const std::string getName() const override { return "AAMemoryLocation"; }
3914
3915 /// See AbstractAttribute::getIdAddr()
3916 const char *getIdAddr() const override { return &ID; }
3917
3918 /// This function should return true if the type of the \p AA is
3919 /// AAMemoryLocation
3920 static bool classof(const AbstractAttribute *AA) {
3921 return (AA->getIdAddr() == &ID);
3922 }
3923
3924 /// Unique ID (due to the unique address)
3925 static const char ID;
3926};
3927
3928/// An abstract interface for range value analysis.
3929struct AAValueConstantRange
3930 : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
3931 using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
3932 AAValueConstantRange(const IRPosition &IRP, Attributor &A)
3933 : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {}
3934
3935 /// See AbstractAttribute::getState(...).
3936 IntegerRangeState &getState() override { return *this; }
3937 const IntegerRangeState &getState() const override { return *this; }
3938
3939 /// Create an abstract attribute view for the position \p IRP.
3940 static AAValueConstantRange &createForPosition(const IRPosition &IRP,
3941 Attributor &A);
3942
3943 /// Return an assumed range for the assocaited value a program point \p CtxI.
3944 /// If \p I is nullptr, simply return an assumed range.
3945 virtual ConstantRange
3946 getAssumedConstantRange(Attributor &A,
3947 const Instruction *CtxI = nullptr) const = 0;
3948
3949 /// Return a known range for the assocaited value at a program point \p CtxI.
3950 /// If \p I is nullptr, simply return a known range.
3951 virtual ConstantRange
3952 getKnownConstantRange(Attributor &A,
3953 const Instruction *CtxI = nullptr) const = 0;
3954
3955 /// Return an assumed constant for the assocaited value a program point \p
3956 /// CtxI.
3957 Optional<ConstantInt *>
3958 getAssumedConstantInt(Attributor &A,
3959 const Instruction *CtxI = nullptr) const {
3960 ConstantRange RangeV = getAssumedConstantRange(A, CtxI);
3961 if (auto *C = RangeV.getSingleElement())
3962 return cast<ConstantInt>(
3963 ConstantInt::get(getAssociatedValue().getType(), *C));
3964 if (RangeV.isEmptySet())
3965 return llvm::None;
3966 return nullptr;
3967 }
3968
3969 /// See AbstractAttribute::getName()
3970 const std::string getName() const override { return "AAValueConstantRange"; }
3971
3972 /// See AbstractAttribute::getIdAddr()
3973 const char *getIdAddr() const override { return &ID; }
3974
3975 /// This function should return true if the type of the \p AA is
3976 /// AAValueConstantRange
3977 static bool classof(const AbstractAttribute *AA) {
3978 return (AA->getIdAddr() == &ID);
3979 }
3980
3981 /// Unique ID (due to the unique address)
3982 static const char ID;
3983};
3984
3985/// A class for a set state.
3986/// The assumed boolean state indicates whether the corresponding set is full
3987/// set or not. If the assumed state is false, this is the worst state. The
3988/// worst state (invalid state) of set of potential values is when the set
3989/// contains every possible value (i.e. we cannot in any way limit the value
3990/// that the target position can take). That never happens naturally, we only
3991/// force it. As for the conditions under which we force it, see
3992/// AAPotentialValues.
3993template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>>
3994struct PotentialValuesState : AbstractState {
3995 using SetTy = DenseSet<MemberTy, KeyInfo>;
3996
3997 PotentialValuesState() : IsValidState(true), UndefIsContained(false) {}
3998
3999 PotentialValuesState(bool IsValid)
4000 : IsValidState(IsValid), UndefIsContained(false) {}
4001
4002 /// See AbstractState::isValidState(...)
4003 bool isValidState() const override { return IsValidState.isValidState(); }
4004
4005 /// See AbstractState::isAtFixpoint(...)
4006 bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); }
4007
4008 /// See AbstractState::indicatePessimisticFixpoint(...)
4009 ChangeStatus indicatePessimisticFixpoint() override {
4010 return IsValidState.indicatePessimisticFixpoint();
4011 }
4012
4013 /// See AbstractState::indicateOptimisticFixpoint(...)
4014 ChangeStatus indicateOptimisticFixpoint() override {
4015 return IsValidState.indicateOptimisticFixpoint();
4016 }
4017
4018 /// Return the assumed state
4019 PotentialValuesState &getAssumed() { return *this; }
4020 const PotentialValuesState &getAssumed() const { return *this; }
4021
4022 /// Return this set. We should check whether this set is valid or not by
4023 /// isValidState() before calling this function.
4024 const SetTy &getAssumedSet() const {
4025 assert(isValidState() && "This set shoud not be used when it is invalid!")((void)0);
4026 return Set;
4027 }
4028
4029 /// Returns whether this state contains an undef value or not.
4030 bool undefIsContained() const {
4031 assert(isValidState() && "This flag shoud not be used when it is invalid!")((void)0);
4032 return UndefIsContained;
4033 }
4034
4035 bool operator==(const PotentialValuesState &RHS) const {
4036 if (isValidState() != RHS.isValidState())
4037 return false;
4038 if (!isValidState() && !RHS.isValidState())
4039 return true;
4040 if (undefIsContained() != RHS.undefIsContained())
4041 return false;
4042 return Set == RHS.getAssumedSet();
4043 }
4044
4045 /// Maximum number of potential values to be tracked.
4046 /// This is set by -attributor-max-potential-values command line option
4047 static unsigned MaxPotentialValues;
4048
4049 /// Return empty set as the best state of potential values.
4050 static PotentialValuesState getBestState() {
4051 return PotentialValuesState(true);
4052 }
4053
4054 static PotentialValuesState getBestState(PotentialValuesState &PVS) {
4055 return getBestState();
4056 }
4057
4058 /// Return full set as the worst state of potential values.
4059 static PotentialValuesState getWorstState() {
4060 return PotentialValuesState(false);
4061 }
4062
4063 /// Union assumed set with the passed value.
4064 void unionAssumed(const MemberTy &C) { insert(C); }
4065
4066 /// Union assumed set with assumed set of the passed state \p PVS.
4067 void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); }
4068
4069 /// Union assumed set with an undef value.
4070 void unionAssumedWithUndef() { unionWithUndef(); }
4071
4072 /// "Clamp" this state with \p PVS.
4073 PotentialValuesState operator^=(const PotentialValuesState &PVS) {
4074 IsValidState ^= PVS.IsValidState;
4075 unionAssumed(PVS);
4076 return *this;
4077 }
4078
4079 PotentialValuesState operator&=(const PotentialValuesState &PVS) {
4080 IsValidState &= PVS.IsValidState;
4081 unionAssumed(PVS);
4082 return *this;
4083 }
4084
4085private:
4086 /// Check the size of this set, and invalidate when the size is no
4087 /// less than \p MaxPotentialValues threshold.
4088 void checkAndInvalidate() {
4089 if (Set.size() >= MaxPotentialValues)
4090 indicatePessimisticFixpoint();
4091 else
4092 reduceUndefValue();
4093 }
4094
4095 /// If this state contains both undef and not undef, we can reduce
4096 /// undef to the not undef value.
4097 void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); }
4098
4099 /// Insert an element into this set.
4100 void insert(const MemberTy &C) {
4101 if (!isValidState())
4102 return;
4103 Set.insert(C);
4104 checkAndInvalidate();
4105 }
4106
4107 /// Take union with R.
4108 void unionWith(const PotentialValuesState &R) {
4109 /// If this is a full set, do nothing.
4110 if (!isValidState())
4111 return;
4112 /// If R is full set, change L to a full set.
4113 if (!R.isValidState()) {
4114 indicatePessimisticFixpoint();
4115 return;
4116 }
4117 for (const MemberTy &C : R.Set)
4118 Set.insert(C);
4119 UndefIsContained |= R.undefIsContained();
4120 checkAndInvalidate();
4121 }
4122
4123 /// Take union with an undef value.
4124 void unionWithUndef() {
4125 UndefIsContained = true;
4126 reduceUndefValue();
4127 }
4128
4129 /// Take intersection with R.
4130 void intersectWith(const PotentialValuesState &R) {
4131 /// If R is a full set, do nothing.
4132 if (!R.isValidState())
4133 return;
4134 /// If this is a full set, change this to R.
4135 if (!isValidState()) {
4136 *this = R;
4137 return;
4138 }
4139 SetTy IntersectSet;
4140 for (const MemberTy &C : Set) {
4141 if (R.Set.count(C))
4142 IntersectSet.insert(C);
4143 }
4144 Set = IntersectSet;
4145 UndefIsContained &= R.undefIsContained();
4146 reduceUndefValue();
4147 }
4148
4149 /// A helper state which indicate whether this state is valid or not.
4150 BooleanState IsValidState;
4151
4152 /// Container for potential values
4153 SetTy Set;
4154
4155 /// Flag for undef value
4156 bool UndefIsContained;
4157};
4158
4159using PotentialConstantIntValuesState = PotentialValuesState<APInt>;
4160
4161raw_ostream &operator<<(raw_ostream &OS,
4162 const PotentialConstantIntValuesState &R);
4163
4164/// An abstract interface for potential values analysis.
4165///
4166/// This AA collects potential values for each IR position.
4167/// An assumed set of potential values is initialized with the empty set (the
4168/// best state) and it will grow monotonically as we find more potential values
4169/// for this position.
4170/// The set might be forced to the worst state, that is, to contain every
4171/// possible value for this position in 2 cases.
4172/// 1. We surpassed the \p MaxPotentialValues threshold. This includes the
4173/// case that this position is affected (e.g. because of an operation) by a
4174/// Value that is in the worst state.
4175/// 2. We tried to initialize on a Value that we cannot handle (e.g. an
4176/// operator we do not currently handle).
4177///
4178/// TODO: Support values other than constant integers.
4179struct AAPotentialValues
4180 : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> {
4181 using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
4182 AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4183
4184 /// See AbstractAttribute::getState(...).
4185 PotentialConstantIntValuesState &getState() override { return *this; }
4186 const PotentialConstantIntValuesState &getState() const override {
4187 return *this;
4188 }
4189
4190 /// Create an abstract attribute view for the position \p IRP.
4191 static AAPotentialValues &createForPosition(const IRPosition &IRP,
4192 Attributor &A);
4193
4194 /// Return assumed constant for the associated value
4195 Optional<ConstantInt *>
4196 getAssumedConstantInt(Attributor &A,
4197 const Instruction *CtxI = nullptr) const {
4198 if (!isValidState())
4199 return nullptr;
4200 if (getAssumedSet().size() == 1)
4201 return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(),
4202 *(getAssumedSet().begin())));
4203 if (getAssumedSet().size() == 0) {
4204 if (undefIsContained())
4205 return cast<ConstantInt>(
4206 ConstantInt::get(getAssociatedValue().getType(), 0));
4207 return llvm::None;
4208 }
4209
4210 return nullptr;
4211 }
4212
4213 /// See AbstractAttribute::getName()
4214 const std::string getName() const override { return "AAPotentialValues"; }
4215
4216 /// See AbstractAttribute::getIdAddr()
4217 const char *getIdAddr() const override { return &ID; }
4218
4219 /// This function should return true if the type of the \p AA is
4220 /// AAPotentialValues
4221 static bool classof(const AbstractAttribute *AA) {
4222 return (AA->getIdAddr() == &ID);
4223 }
4224
4225 /// Unique ID (due to the unique address)
4226 static const char ID;
4227};
4228
4229/// An abstract interface for all noundef attributes.
4230struct AANoUndef
4231 : public IRAttribute<Attribute::NoUndef,
4232 StateWrapper<BooleanState, AbstractAttribute>> {
4233 AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}
4234
4235 /// Return true if we assume that the underlying value is noundef.
4236 bool isAssumedNoUndef() const { return getAssumed(); }
4237
4238 /// Return true if we know that underlying value is noundef.
4239 bool isKnownNoUndef() const { return getKnown(); }
4240
4241 /// Create an abstract attribute view for the position \p IRP.
4242 static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A);
4243
4244 /// See AbstractAttribute::getName()
4245 const std::string getName() const override { return "AANoUndef"; }
4246
4247 /// See AbstractAttribute::getIdAddr()
4248 const char *getIdAddr() const override { return &ID; }
4249
4250 /// This function should return true if the type of the \p AA is AANoUndef
4251 static bool classof(const AbstractAttribute *AA) {
4252 return (AA->getIdAddr() == &ID);
4253 }
4254
4255 /// Unique ID (due to the unique address)
4256 static const char ID;
4257};
4258
4259struct AACallGraphNode;
4260struct AACallEdges;
4261
4262/// An Iterator for call edges, creates AACallEdges attributes in a lazy way.
4263/// This iterator becomes invalid if the underlying edge list changes.
4264/// So This shouldn't outlive a iteration of Attributor.
4265class AACallEdgeIterator
4266 : public iterator_adaptor_base<AACallEdgeIterator,
4267 SetVector<Function *>::iterator> {
4268 AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin)
4269 : iterator_adaptor_base(Begin), A(A) {}
4270
4271public:
4272 AACallGraphNode *operator*() const;
4273
4274private:
4275 Attributor &A;
4276 friend AACallEdges;
4277 friend AttributorCallGraph;
4278};
4279
4280struct AACallGraphNode {
4281 AACallGraphNode(Attributor &A) : A(A) {}
4282 virtual ~AACallGraphNode() {}
4283
4284 virtual AACallEdgeIterator optimisticEdgesBegin() const = 0;
4285 virtual AACallEdgeIterator optimisticEdgesEnd() const = 0;
4286
4287 /// Iterator range for exploring the call graph.
4288 iterator_range<AACallEdgeIterator> optimisticEdgesRange() const {
4289 return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(),
4290 optimisticEdgesEnd());
4291 }
4292
4293protected:
4294 /// Reference to Attributor needed for GraphTraits implementation.
4295 Attributor &A;
4296};
4297
4298/// An abstract state for querying live call edges.
4299/// This interface uses the Attributor's optimistic liveness
4300/// information to compute the edges that are alive.
4301struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
4302 AACallGraphNode {
4303 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4304
4305 AACallEdges(const IRPosition &IRP, Attributor &A)
4306 : Base(IRP), AACallGraphNode(A) {}
4307
4308 /// Get the optimistic edges.
4309 virtual const SetVector<Function *> &getOptimisticEdges() const = 0;
4310
4311 /// Is there any call with a unknown callee.
4312 virtual bool hasUnknownCallee() const = 0;
4313
4314 /// Is there any call with a unknown callee, excluding any inline asm.
4315 virtual bool hasNonAsmUnknownCallee() const = 0;
4316
4317 /// Iterator for exploring the call graph.
4318 AACallEdgeIterator optimisticEdgesBegin() const override {
4319 return AACallEdgeIterator(A, getOptimisticEdges().begin());
4320 }
4321
4322 /// Iterator for exploring the call graph.
4323 AACallEdgeIterator optimisticEdgesEnd() const override {
4324 return AACallEdgeIterator(A, getOptimisticEdges().end());
4325 }
4326
4327 /// Create an abstract attribute view for the position \p IRP.
4328 static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A);
4329
4330 /// See AbstractAttribute::getName()
4331 const std::string getName() const override { return "AACallEdges"; }
4332
4333 /// See AbstractAttribute::getIdAddr()
4334 const char *getIdAddr() const override { return &ID; }
4335
4336 /// This function should return true if the type of the \p AA is AACallEdges.
4337 static bool classof(const AbstractAttribute *AA) {
4338 return (AA->getIdAddr() == &ID);
4339 }
4340
4341 /// Unique ID (due to the unique address)
4342 static const char ID;
4343};
4344
4345// Synthetic root node for the Attributor's internal call graph.
4346struct AttributorCallGraph : public AACallGraphNode {
4347 AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
4348 virtual ~AttributorCallGraph() {}
4349
4350 AACallEdgeIterator optimisticEdgesBegin() const override {
4351 return AACallEdgeIterator(A, A.Functions.begin());
4352 }
4353
4354 AACallEdgeIterator optimisticEdgesEnd() const override {
4355 return AACallEdgeIterator(A, A.Functions.end());
4356 }
4357
4358 /// Force populate the entire call graph.
4359 void populateAll() const {
4360 for (const AACallGraphNode *AA : optimisticEdgesRange()) {
4361 // Nothing else to do here.
4362 (void)AA;
4363 }
4364 }
4365
4366 void print();
4367};
4368
4369template <> struct GraphTraits<AACallGraphNode *> {
4370 using NodeRef = AACallGraphNode *;
4371 using ChildIteratorType = AACallEdgeIterator;
4372
4373 static AACallEdgeIterator child_begin(AACallGraphNode *Node) {
4374 return Node->optimisticEdgesBegin();
4375 }
4376
4377 static AACallEdgeIterator child_end(AACallGraphNode *Node) {
4378 return Node->optimisticEdgesEnd();
4379 }
4380};
4381
4382template <>
4383struct GraphTraits<AttributorCallGraph *>
4384 : public GraphTraits<AACallGraphNode *> {
4385 using nodes_iterator = AACallEdgeIterator;
4386
4387 static AACallGraphNode *getEntryNode(AttributorCallGraph *G) {
4388 return static_cast<AACallGraphNode *>(G);
4389 }
4390
4391 static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) {
4392 return G->optimisticEdgesBegin();
4393 }
4394
4395 static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) {
4396 return G->optimisticEdgesEnd();
4397 }
4398};
4399
4400template <>
4401struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits {
4402 DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {}
4403
4404 std::string getNodeLabel(const AACallGraphNode *Node,
4405 const AttributorCallGraph *Graph) {
4406 const AACallEdges *AACE = static_cast<const AACallEdges *>(Node);
4407 return AACE->getAssociatedFunction()->getName().str();
4408 }
4409
4410 static bool isNodeHidden(const AACallGraphNode *Node,
4411 const AttributorCallGraph *Graph) {
4412 // Hide the synth root.
4413 return static_cast<const AACallGraphNode *>(Graph) == Node;
4414 }
4415};
4416
4417struct AAExecutionDomain
4418 : public StateWrapper<BooleanState, AbstractAttribute> {
4419 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4420 AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4421
4422 /// Create an abstract attribute view for the position \p IRP.
4423 static AAExecutionDomain &createForPosition(const IRPosition &IRP,
4424 Attributor &A);
4425
4426 /// See AbstractAttribute::getName().
4427 const std::string getName() const override { return "AAExecutionDomain"; }
4428
4429 /// See AbstractAttribute::getIdAddr().
4430 const char *getIdAddr() const override { return &ID; }
4431
4432 /// Check if an instruction is executed only by the initial thread.
4433 virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0;
4434
4435 /// Check if a basic block is executed only by the initial thread.
4436 virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0;
4437
4438 /// This function should return true if the type of the \p AA is
4439 /// AAExecutionDomain.
4440 static bool classof(const AbstractAttribute *AA) {
4441 return (AA->getIdAddr() == &ID);
4442 }
4443
4444 /// Unique ID (due to the unique address)
4445 static const char ID;
4446};
4447
4448/// An abstract Attribute for computing reachability between functions.
4449struct AAFunctionReachability
4450 : public StateWrapper<BooleanState, AbstractAttribute> {
4451 using Base = StateWrapper<BooleanState, AbstractAttribute>;
4452
4453 AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}
4454
4455 /// If the function represented by this possition can reach \p Fn.
4456 virtual bool canReach(Attributor &A, Function *Fn) const = 0;
4457
4458 /// Create an abstract attribute view for the position \p IRP.
4459 static AAFunctionReachability &createForPosition(const IRPosition &IRP,
4460 Attributor &A);
4461
4462 /// See AbstractAttribute::getName()
4463 const std::string getName() const override { return "AAFuncitonReacability"; }
4464
4465 /// See AbstractAttribute::getIdAddr()
4466 const char *getIdAddr() const override { return &ID; }
4467
4468 /// This function should return true if the type of the \p AA is AACallEdges.
4469 static bool classof(const AbstractAttribute *AA) {
4470 return (AA->getIdAddr() == &ID);
4471 }
4472
4473 /// Unique ID (due to the unique address)
4474 static const char ID;
4475
4476private:
4477 /// Can this function reach a call with unknown calee.
4478 virtual bool canReachUnknownCallee() const = 0;
4479};
4480
4481/// An abstract interface for struct information.
4482struct AAPointerInfo : public AbstractAttribute {
4483 AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}
4484
4485 enum AccessKind {
4486 AK_READ = 1 << 0,
4487 AK_WRITE = 1 << 1,
4488 AK_READ_WRITE = AK_READ | AK_WRITE,
4489 };
4490
4491 /// An access description.
4492 struct Access {
4493 Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty)
4494 : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {}
4495 Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content,
4496 AccessKind Kind, Type *Ty)
4497 : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
4498 Ty(Ty) {}
4499 Access(const Access &Other)
4500 : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
4501 Kind(Other.Kind), Ty(Other.Ty) {}
4502 Access(const Access &&Other)
4503 : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
4504 Kind(Other.Kind), Ty(Other.Ty) {}
4505
4506 Access &operator=(const Access &Other) {
4507 LocalI = Other.LocalI;
4508 RemoteI = Other.RemoteI;
4509 Content = Other.Content;
4510 Kind = Other.Kind;
4511 Ty = Other.Ty;
4512 return *this;
4513 }
4514 bool operator==(const Access &R) const {
4515 return LocalI == R.LocalI && RemoteI == R.RemoteI &&
4516 Content == R.Content && Kind == R.Kind;
4517 }
4518 bool operator!=(const Access &R) const { return !(*this == R); }
4519
4520 Access &operator&=(const Access &R) {
4521 assert(RemoteI == R.RemoteI && "Expected same instruction!")((void)0);
4522 Content =
4523 AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
4524 Kind = AccessKind(Kind | R.Kind);
4525 return *this;
4526 }
4527
4528 /// Return the access kind.
4529 AccessKind getKind() const { return Kind; }
4530
4531 /// Return true if this is a read access.
4532 bool isRead() const { return Kind & AK_READ; }
4533
4534 /// Return true if this is a write access.
4535 bool isWrite() const { return Kind & AK_WRITE; }
4536
4537 /// Return the instruction that causes the access with respect to the local
4538 /// scope of the associated attribute.
4539 Instruction *getLocalInst() const { return LocalI; }
4540
4541 /// Return the actual instruction that causes the access.
4542 Instruction *getRemoteInst() const { return RemoteI; }
4543
4544 /// Return true if the value written is not known yet.
4545 bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); }
4546
4547 /// Return true if the value written cannot be determined at all.
4548 bool isWrittenValueUnknown() const {
4549 return Content.hasValue() && !*Content;
4550 }
4551
4552 /// Return the type associated with the access, if known.
4553 Type *getType() const { return Ty; }
4554
4555 /// Return the value writen, if any. As long as
4556 /// isWrittenValueYetUndetermined return true this function shall not be
4557 /// called.
4558 Value *getWrittenValue() const { return *Content; }
4559
4560 /// Return the written value which can be `llvm::null` if it is not yet
4561 /// determined.
4562 Optional<Value *> getContent() const { return Content; }
4563
4564 private:
4565 /// The instruction responsible for the access with respect to the local
4566 /// scope of the associated attribute.
4567 Instruction *LocalI;
4568
4569 /// The instruction responsible for the access.
4570 Instruction *RemoteI;
4571
4572 /// The value written, if any. `llvm::none` means "not known yet", `nullptr`
4573 /// cannot be determined.
4574 Optional<Value *> Content;
4575
4576 /// The access kind, e.g., READ, as bitset (could be more than one).
4577 AccessKind Kind;
4578
4579 /// The type of the content, thus the type read/written, can be null if not
4580 /// available.
4581 Type *Ty;
4582 };
4583
4584 /// Create an abstract attribute view for the position \p IRP.
4585 static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);
4586
4587 /// See AbstractAttribute::getName()
4588 const std::string getName() const override { return "AAPointerInfo"; }
4589
4590 /// See AbstractAttribute::getIdAddr()
4591 const char *getIdAddr() const override { return &ID; }
4592
4593 /// Call \p CB on all accesses that might interfere with \p LI and return true
4594 /// if all such accesses were known and the callback returned true for all of
4595 /// them, false otherwise.
4596 virtual bool forallInterferingAccesses(
4597 LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0;
4598 virtual bool forallInterferingAccesses(
4599 StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
4600
4601 /// This function should return true if the type of the \p AA is AAPointerInfo
4602 static bool classof(const AbstractAttribute *AA) {
4603 return (AA->getIdAddr() == &ID);
4604 }
4605
4606 /// Unique ID (due to the unique address)
4607 static const char ID;
4608};
4609
4610raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
4611
4612/// Run options, used by the pass manager.
4613enum AttributorRunOption {
4614 NONE = 0,
4615 MODULE = 1 << 0,
4616 CGSCC = 1 << 1,
4617 ALL = MODULE | CGSCC
4618};
4619
4620} // end namespace llvm
4621
4622#endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H