| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support/Alignment.h |
| Warning: | line 85, column 47 The result of the left shift is undefined due to shifting by '255', which is greater or equal to the width of type 'uint64_t' |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // OpenMP specific optimizations: | |||
| 10 | // | |||
| 11 | // - Deduplication of runtime calls, e.g., omp_get_thread_num. | |||
| 12 | // - Replacing globalized device memory with stack memory. | |||
| 13 | // - Replacing globalized device memory with shared memory. | |||
| 14 | // - Parallel region merging. | |||
| 15 | // - Transforming generic-mode device kernels to SPMD mode. | |||
| 16 | // - Specializing the state machine for generic-mode device kernels. | |||
| 17 | // | |||
| 18 | //===----------------------------------------------------------------------===// | |||
| 19 | ||||
| 20 | #include "llvm/Transforms/IPO/OpenMPOpt.h" | |||
| 21 | ||||
| 22 | #include "llvm/ADT/EnumeratedArray.h" | |||
| 23 | #include "llvm/ADT/PostOrderIterator.h" | |||
| 24 | #include "llvm/ADT/Statistic.h" | |||
| 25 | #include "llvm/Analysis/CallGraph.h" | |||
| 26 | #include "llvm/Analysis/CallGraphSCCPass.h" | |||
| 27 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | |||
| 28 | #include "llvm/Analysis/ValueTracking.h" | |||
| 29 | #include "llvm/Frontend/OpenMP/OMPConstants.h" | |||
| 30 | #include "llvm/Frontend/OpenMP/OMPIRBuilder.h" | |||
| 31 | #include "llvm/IR/Assumptions.h" | |||
| 32 | #include "llvm/IR/DiagnosticInfo.h" | |||
| 33 | #include "llvm/IR/GlobalValue.h" | |||
| 34 | #include "llvm/IR/Instruction.h" | |||
| 35 | #include "llvm/IR/IntrinsicInst.h" | |||
| 36 | #include "llvm/InitializePasses.h" | |||
| 37 | #include "llvm/Support/CommandLine.h" | |||
| 38 | #include "llvm/Transforms/IPO.h" | |||
| 39 | #include "llvm/Transforms/IPO/Attributor.h" | |||
| 40 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
| 41 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" | |||
| 42 | #include "llvm/Transforms/Utils/CodeExtractor.h" | |||
| 43 | ||||
| 44 | using namespace llvm; | |||
| 45 | using namespace omp; | |||
| 46 | ||||
| 47 | #define DEBUG_TYPE"openmp-opt" "openmp-opt" | |||
| 48 | ||||
| 49 | static cl::opt<bool> DisableOpenMPOptimizations( | |||
| 50 | "openmp-opt-disable", cl::ZeroOrMore, | |||
| 51 | cl::desc("Disable OpenMP specific optimizations."), cl::Hidden, | |||
| 52 | cl::init(false)); | |||
| 53 | ||||
| 54 | static cl::opt<bool> EnableParallelRegionMerging( | |||
| 55 | "openmp-opt-enable-merging", cl::ZeroOrMore, | |||
| 56 | cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden, | |||
| 57 | cl::init(false)); | |||
| 58 | ||||
| 59 | static cl::opt<bool> | |||
| 60 | DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore, | |||
| 61 | cl::desc("Disable function internalization."), | |||
| 62 | cl::Hidden, cl::init(false)); | |||
| 63 | ||||
| 64 | static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false), | |||
| 65 | cl::Hidden); | |||
| 66 | static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels", | |||
| 67 | cl::init(false), cl::Hidden); | |||
| 68 | ||||
| 69 | static cl::opt<bool> HideMemoryTransferLatency( | |||
| 70 | "openmp-hide-memory-transfer-latency", | |||
| 71 | cl::desc("[WIP] Tries to hide the latency of host to device memory" | |||
| 72 | " transfers"), | |||
| 73 | cl::Hidden, cl::init(false)); | |||
| 74 | ||||
| 75 | STATISTIC(NumOpenMPRuntimeCallsDeduplicated,static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt" , "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated" } | |||
| 76 | "Number of OpenMP runtime calls deduplicated")static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt" , "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated" }; | |||
| 77 | STATISTIC(NumOpenMPParallelRegionsDeleted,static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt" , "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted" } | |||
| 78 | "Number of OpenMP parallel regions deleted")static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt" , "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted" }; | |||
| 79 | STATISTIC(NumOpenMPRuntimeFunctionsIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = { "openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified" } | |||
| 80 | "Number of OpenMP runtime functions identified")static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = { "openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified" }; | |||
| 81 | STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified" } | |||
| 82 | "Number of OpenMP runtime function uses identified")static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified" }; | |||
| 83 | STATISTIC(NumOpenMPTargetRegionKernels,static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt" , "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified" } | |||
| 84 | "Number of OpenMP target region entry points (=kernels) identified")static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt" , "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified" }; | |||
| 85 | STATISTIC(NumOpenMPTargetRegionKernelsSPMD,static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"} | |||
| 86 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"} | |||
| 87 | "SPMD-mode instead of generic-mode")static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt" , "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in " "SPMD-mode instead of generic-mode"}; | |||
| 88 | STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"} | |||
| 89 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"} | |||
| 90 | "generic-mode without a state machines")static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode without a state machines"}; | |||
| 91 | STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"} | |||
| 92 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"} | |||
| 93 | "generic-mode with customized state machines with fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines with fallback"}; | |||
| 94 | STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" } | |||
| 95 | "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" } | |||
| 96 | "generic-mode with customized state machines without fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback" , "Number of OpenMP target region entry points (=kernels) executed in " "generic-mode with customized state machines without fallback" }; | |||
| 97 | STATISTIC(static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" } | |||
| 98 | NumOpenMPParallelRegionsReplacedInGPUStateMachine,static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" } | |||
| 99 | "Number of OpenMP parallel regions replaced with ID in GPU state machines")static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine" , "Number of OpenMP parallel regions replaced with ID in GPU state machines" }; | |||
| 100 | STATISTIC(NumOpenMPParallelRegionsMerged,static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt" , "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged" } | |||
| 101 | "Number of OpenMP parallel regions merged")static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt" , "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged" }; | |||
| 102 | STATISTIC(NumBytesMovedToSharedMemory,static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt" , "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory" } | |||
| 103 | "Amount of memory pushed to shared memory")static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt" , "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory" }; | |||
| 104 | ||||
| 105 | #if !defined(NDEBUG1) | |||
| 106 | static constexpr auto TAG = "[" DEBUG_TYPE"openmp-opt" "]"; | |||
| 107 | #endif | |||
| 108 | ||||
| 109 | namespace { | |||
| 110 | ||||
| 111 | enum class AddressSpace : unsigned { | |||
| 112 | Generic = 0, | |||
| 113 | Global = 1, | |||
| 114 | Shared = 3, | |||
| 115 | Constant = 4, | |||
| 116 | Local = 5, | |||
| 117 | }; | |||
| 118 | ||||
| 119 | struct AAHeapToShared; | |||
| 120 | ||||
| 121 | struct AAICVTracker; | |||
| 122 | ||||
| 123 | /// OpenMP specific information. For now, stores RFIs and ICVs also needed for | |||
| 124 | /// Attributor runs. | |||
| 125 | struct OMPInformationCache : public InformationCache { | |||
| 126 | OMPInformationCache(Module &M, AnalysisGetter &AG, | |||
| 127 | BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC, | |||
| 128 | SmallPtrSetImpl<Kernel> &Kernels) | |||
| 129 | : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M), | |||
| 130 | Kernels(Kernels) { | |||
| 131 | ||||
| 132 | OMPBuilder.initialize(); | |||
| 133 | initializeRuntimeFunctions(); | |||
| 134 | initializeInternalControlVars(); | |||
| 135 | } | |||
| 136 | ||||
| 137 | /// Generic information that describes an internal control variable. | |||
| 138 | struct InternalControlVarInfo { | |||
| 139 | /// The kind, as described by InternalControlVar enum. | |||
| 140 | InternalControlVar Kind; | |||
| 141 | ||||
| 142 | /// The name of the ICV. | |||
| 143 | StringRef Name; | |||
| 144 | ||||
| 145 | /// Environment variable associated with this ICV. | |||
| 146 | StringRef EnvVarName; | |||
| 147 | ||||
| 148 | /// Initial value kind. | |||
| 149 | ICVInitValue InitKind; | |||
| 150 | ||||
| 151 | /// Initial value. | |||
| 152 | ConstantInt *InitValue; | |||
| 153 | ||||
| 154 | /// Setter RTL function associated with this ICV. | |||
| 155 | RuntimeFunction Setter; | |||
| 156 | ||||
| 157 | /// Getter RTL function associated with this ICV. | |||
| 158 | RuntimeFunction Getter; | |||
| 159 | ||||
| 160 | /// RTL Function corresponding to the override clause of this ICV | |||
| 161 | RuntimeFunction Clause; | |||
| 162 | }; | |||
| 163 | ||||
| 164 | /// Generic information that describes a runtime function | |||
| 165 | struct RuntimeFunctionInfo { | |||
| 166 | ||||
| 167 | /// The kind, as described by the RuntimeFunction enum. | |||
| 168 | RuntimeFunction Kind; | |||
| 169 | ||||
| 170 | /// The name of the function. | |||
| 171 | StringRef Name; | |||
| 172 | ||||
| 173 | /// Flag to indicate a variadic function. | |||
| 174 | bool IsVarArg; | |||
| 175 | ||||
| 176 | /// The return type of the function. | |||
| 177 | Type *ReturnType; | |||
| 178 | ||||
| 179 | /// The argument types of the function. | |||
| 180 | SmallVector<Type *, 8> ArgumentTypes; | |||
| 181 | ||||
| 182 | /// The declaration if available. | |||
| 183 | Function *Declaration = nullptr; | |||
| 184 | ||||
| 185 | /// Uses of this runtime function per function containing the use. | |||
| 186 | using UseVector = SmallVector<Use *, 16>; | |||
| 187 | ||||
| 188 | /// Clear UsesMap for runtime function. | |||
| 189 | void clearUsesMap() { UsesMap.clear(); } | |||
| 190 | ||||
| 191 | /// Boolean conversion that is true if the runtime function was found. | |||
| 192 | operator bool() const { return Declaration; } | |||
| 193 | ||||
| 194 | /// Return the vector of uses in function \p F. | |||
| 195 | UseVector &getOrCreateUseVector(Function *F) { | |||
| 196 | std::shared_ptr<UseVector> &UV = UsesMap[F]; | |||
| 197 | if (!UV) | |||
| 198 | UV = std::make_shared<UseVector>(); | |||
| 199 | return *UV; | |||
| 200 | } | |||
| 201 | ||||
| 202 | /// Return the vector of uses in function \p F or `nullptr` if there are | |||
| 203 | /// none. | |||
| 204 | const UseVector *getUseVector(Function &F) const { | |||
| 205 | auto I = UsesMap.find(&F); | |||
| 206 | if (I != UsesMap.end()) | |||
| 207 | return I->second.get(); | |||
| 208 | return nullptr; | |||
| 209 | } | |||
| 210 | ||||
| 211 | /// Return how many functions contain uses of this runtime function. | |||
| 212 | size_t getNumFunctionsWithUses() const { return UsesMap.size(); } | |||
| 213 | ||||
| 214 | /// Return the number of arguments (or the minimal number for variadic | |||
| 215 | /// functions). | |||
| 216 | size_t getNumArgs() const { return ArgumentTypes.size(); } | |||
| 217 | ||||
| 218 | /// Run the callback \p CB on each use and forget the use if the result is | |||
| 219 | /// true. The callback will be fed the function in which the use was | |||
| 220 | /// encountered as second argument. | |||
| 221 | void foreachUse(SmallVectorImpl<Function *> &SCC, | |||
| 222 | function_ref<bool(Use &, Function &)> CB) { | |||
| 223 | for (Function *F : SCC) | |||
| 224 | foreachUse(CB, F); | |||
| 225 | } | |||
| 226 | ||||
| 227 | /// Run the callback \p CB on each use within the function \p F and forget | |||
| 228 | /// the use if the result is true. | |||
| 229 | void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) { | |||
| 230 | SmallVector<unsigned, 8> ToBeDeleted; | |||
| 231 | ToBeDeleted.clear(); | |||
| 232 | ||||
| 233 | unsigned Idx = 0; | |||
| 234 | UseVector &UV = getOrCreateUseVector(F); | |||
| 235 | ||||
| 236 | for (Use *U : UV) { | |||
| 237 | if (CB(*U, *F)) | |||
| 238 | ToBeDeleted.push_back(Idx); | |||
| 239 | ++Idx; | |||
| 240 | } | |||
| 241 | ||||
| 242 | // Remove the to-be-deleted indices in reverse order as prior | |||
| 243 | // modifications will not modify the smaller indices. | |||
| 244 | while (!ToBeDeleted.empty()) { | |||
| 245 | unsigned Idx = ToBeDeleted.pop_back_val(); | |||
| 246 | UV[Idx] = UV.back(); | |||
| 247 | UV.pop_back(); | |||
| 248 | } | |||
| 249 | } | |||
| 250 | ||||
| 251 | private: | |||
| 252 | /// Map from functions to all uses of this runtime function contained in | |||
| 253 | /// them. | |||
| 254 | DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap; | |||
| 255 | ||||
| 256 | public: | |||
| 257 | /// Iterators for the uses of this runtime function. | |||
| 258 | decltype(UsesMap)::iterator begin() { return UsesMap.begin(); } | |||
| 259 | decltype(UsesMap)::iterator end() { return UsesMap.end(); } | |||
| 260 | }; | |||
| 261 | ||||
| 262 | /// An OpenMP-IR-Builder instance | |||
| 263 | OpenMPIRBuilder OMPBuilder; | |||
| 264 | ||||
| 265 | /// Map from runtime function kind to the runtime function description. | |||
| 266 | EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction, | |||
| 267 | RuntimeFunction::OMPRTL___last> | |||
| 268 | RFIs; | |||
| 269 | ||||
| 270 | /// Map from function declarations/definitions to their runtime enum type. | |||
| 271 | DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap; | |||
| 272 | ||||
| 273 | /// Map from ICV kind to the ICV description. | |||
| 274 | EnumeratedArray<InternalControlVarInfo, InternalControlVar, | |||
| 275 | InternalControlVar::ICV___last> | |||
| 276 | ICVs; | |||
| 277 | ||||
| 278 | /// Helper to initialize all internal control variable information for those | |||
| 279 | /// defined in OMPKinds.def. | |||
| 280 | void initializeInternalControlVars() { | |||
| 281 | #define ICV_RT_SET(_Name, RTL) \ | |||
| 282 | { \ | |||
| 283 | auto &ICV = ICVs[_Name]; \ | |||
| 284 | ICV.Setter = RTL; \ | |||
| 285 | } | |||
| 286 | #define ICV_RT_GET(Name, RTL) \ | |||
| 287 | { \ | |||
| 288 | auto &ICV = ICVs[Name]; \ | |||
| 289 | ICV.Getter = RTL; \ | |||
| 290 | } | |||
| 291 | #define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init) \ | |||
| 292 | { \ | |||
| 293 | auto &ICV = ICVs[Enum]; \ | |||
| 294 | ICV.Name = _Name; \ | |||
| 295 | ICV.Kind = Enum; \ | |||
| 296 | ICV.InitKind = Init; \ | |||
| 297 | ICV.EnvVarName = _EnvVarName; \ | |||
| 298 | switch (ICV.InitKind) { \ | |||
| 299 | case ICV_IMPLEMENTATION_DEFINED: \ | |||
| 300 | ICV.InitValue = nullptr; \ | |||
| 301 | break; \ | |||
| 302 | case ICV_ZERO: \ | |||
| 303 | ICV.InitValue = ConstantInt::get( \ | |||
| 304 | Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0); \ | |||
| 305 | break; \ | |||
| 306 | case ICV_FALSE: \ | |||
| 307 | ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext()); \ | |||
| 308 | break; \ | |||
| 309 | case ICV_LAST: \ | |||
| 310 | break; \ | |||
| 311 | } \ | |||
| 312 | } | |||
| 313 | #include "llvm/Frontend/OpenMP/OMPKinds.def" | |||
| 314 | } | |||
| 315 | ||||
| 316 | /// Returns true if the function declaration \p F matches the runtime | |||
| 317 | /// function types, that is, return type \p RTFRetType, and argument types | |||
| 318 | /// \p RTFArgTypes. | |||
| 319 | static bool declMatchesRTFTypes(Function *F, Type *RTFRetType, | |||
| 320 | SmallVector<Type *, 8> &RTFArgTypes) { | |||
| 321 | // TODO: We should output information to the user (under debug output | |||
| 322 | // and via remarks). | |||
| 323 | ||||
| 324 | if (!F) | |||
| 325 | return false; | |||
| 326 | if (F->getReturnType() != RTFRetType) | |||
| 327 | return false; | |||
| 328 | if (F->arg_size() != RTFArgTypes.size()) | |||
| 329 | return false; | |||
| 330 | ||||
| 331 | auto RTFTyIt = RTFArgTypes.begin(); | |||
| 332 | for (Argument &Arg : F->args()) { | |||
| 333 | if (Arg.getType() != *RTFTyIt) | |||
| 334 | return false; | |||
| 335 | ||||
| 336 | ++RTFTyIt; | |||
| 337 | } | |||
| 338 | ||||
| 339 | return true; | |||
| 340 | } | |||
| 341 | ||||
| 342 | // Helper to collect all uses of the declaration in the UsesMap. | |||
| 343 | unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) { | |||
| 344 | unsigned NumUses = 0; | |||
| 345 | if (!RFI.Declaration) | |||
| 346 | return NumUses; | |||
| 347 | OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration); | |||
| 348 | ||||
| 349 | if (CollectStats) { | |||
| 350 | NumOpenMPRuntimeFunctionsIdentified += 1; | |||
| 351 | NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses(); | |||
| 352 | } | |||
| 353 | ||||
| 354 | // TODO: We directly convert uses into proper calls and unknown uses. | |||
| 355 | for (Use &U : RFI.Declaration->uses()) { | |||
| 356 | if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) { | |||
| 357 | if (ModuleSlice.count(UserI->getFunction())) { | |||
| 358 | RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U); | |||
| 359 | ++NumUses; | |||
| 360 | } | |||
| 361 | } else { | |||
| 362 | RFI.getOrCreateUseVector(nullptr).push_back(&U); | |||
| 363 | ++NumUses; | |||
| 364 | } | |||
| 365 | } | |||
| 366 | return NumUses; | |||
| 367 | } | |||
| 368 | ||||
| 369 | // Helper function to recollect uses of a runtime function. | |||
| 370 | void recollectUsesForFunction(RuntimeFunction RTF) { | |||
| 371 | auto &RFI = RFIs[RTF]; | |||
| 372 | RFI.clearUsesMap(); | |||
| 373 | collectUses(RFI, /*CollectStats*/ false); | |||
| 374 | } | |||
| 375 | ||||
| 376 | // Helper function to recollect uses of all runtime functions. | |||
| 377 | void recollectUses() { | |||
| 378 | for (int Idx = 0; Idx < RFIs.size(); ++Idx) | |||
| 379 | recollectUsesForFunction(static_cast<RuntimeFunction>(Idx)); | |||
| 380 | } | |||
| 381 | ||||
| 382 | /// Helper to initialize all runtime function information for those defined | |||
| 383 | /// in OpenMPKinds.def. | |||
| 384 | void initializeRuntimeFunctions() { | |||
| 385 | Module &M = *((*ModuleSlice.begin())->getParent()); | |||
| 386 | ||||
| 387 | // Helper macros for handling __VA_ARGS__ in OMP_RTL | |||
| 388 | #define OMP_TYPE(VarName, ...) \ | |||
| 389 | Type *VarName = OMPBuilder.VarName; \ | |||
| 390 | (void)VarName; | |||
| 391 | ||||
| 392 | #define OMP_ARRAY_TYPE(VarName, ...) \ | |||
| 393 | ArrayType *VarName##Ty = OMPBuilder.VarName##Ty; \ | |||
| 394 | (void)VarName##Ty; \ | |||
| 395 | PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy; \ | |||
| 396 | (void)VarName##PtrTy; | |||
| 397 | ||||
| 398 | #define OMP_FUNCTION_TYPE(VarName, ...) \ | |||
| 399 | FunctionType *VarName = OMPBuilder.VarName; \ | |||
| 400 | (void)VarName; \ | |||
| 401 | PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ | |||
| 402 | (void)VarName##Ptr; | |||
| 403 | ||||
| 404 | #define OMP_STRUCT_TYPE(VarName, ...) \ | |||
| 405 | StructType *VarName = OMPBuilder.VarName; \ | |||
| 406 | (void)VarName; \ | |||
| 407 | PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr; \ | |||
| 408 | (void)VarName##Ptr; | |||
| 409 | ||||
| 410 | #define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \ | |||
| 411 | { \ | |||
| 412 | SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__}); \ | |||
| 413 | Function *F = M.getFunction(_Name); \ | |||
| 414 | RTLFunctions.insert(F); \ | |||
| 415 | if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) { \ | |||
| 416 | RuntimeFunctionIDMap[F] = _Enum; \ | |||
| 417 | F->removeFnAttr(Attribute::NoInline); \ | |||
| 418 | auto &RFI = RFIs[_Enum]; \ | |||
| 419 | RFI.Kind = _Enum; \ | |||
| 420 | RFI.Name = _Name; \ | |||
| 421 | RFI.IsVarArg = _IsVarArg; \ | |||
| 422 | RFI.ReturnType = OMPBuilder._ReturnType; \ | |||
| 423 | RFI.ArgumentTypes = std::move(ArgsTypes); \ | |||
| 424 | RFI.Declaration = F; \ | |||
| 425 | unsigned NumUses = collectUses(RFI); \ | |||
| 426 | (void)NumUses; \ | |||
| 427 | LLVM_DEBUG({ \do { } while (false) | |||
| 428 | dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \do { } while (false) | |||
| 429 | << " found\n"; \do { } while (false) | |||
| 430 | if (RFI.Declaration) \do { } while (false) | |||
| 431 | dbgs() << TAG << "-> got " << NumUses << " uses in " \do { } while (false) | |||
| 432 | << RFI.getNumFunctionsWithUses() \do { } while (false) | |||
| 433 | << " different functions.\n"; \do { } while (false) | |||
| 434 | })do { } while (false); \ | |||
| 435 | } \ | |||
| 436 | } | |||
| 437 | #include "llvm/Frontend/OpenMP/OMPKinds.def" | |||
| 438 | ||||
| 439 | // TODO: We should attach the attributes defined in OMPKinds.def. | |||
| 440 | } | |||
| 441 | ||||
| 442 | /// Collection of known kernels (\see Kernel) in the module. | |||
| 443 | SmallPtrSetImpl<Kernel> &Kernels; | |||
| 444 | ||||
| 445 | /// Collection of known OpenMP runtime functions.. | |||
| 446 | DenseSet<const Function *> RTLFunctions; | |||
| 447 | }; | |||
| 448 | ||||
| 449 | template <typename Ty, bool InsertInvalidates = true> | |||
| 450 | struct BooleanStateWithSetVector : public BooleanState { | |||
| 451 | bool contains(const Ty &Elem) const { return Set.contains(Elem); } | |||
| 452 | bool insert(const Ty &Elem) { | |||
| 453 | if (InsertInvalidates) | |||
| 454 | BooleanState::indicatePessimisticFixpoint(); | |||
| 455 | return Set.insert(Elem); | |||
| 456 | } | |||
| 457 | ||||
| 458 | const Ty &operator[](int Idx) const { return Set[Idx]; } | |||
| 459 | bool operator==(const BooleanStateWithSetVector &RHS) const { | |||
| 460 | return BooleanState::operator==(RHS) && Set == RHS.Set; | |||
| 461 | } | |||
| 462 | bool operator!=(const BooleanStateWithSetVector &RHS) const { | |||
| 463 | return !(*this == RHS); | |||
| 464 | } | |||
| 465 | ||||
| 466 | bool empty() const { return Set.empty(); } | |||
| 467 | size_t size() const { return Set.size(); } | |||
| 468 | ||||
| 469 | /// "Clamp" this state with \p RHS. | |||
| 470 | BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) { | |||
| 471 | BooleanState::operator^=(RHS); | |||
| 472 | Set.insert(RHS.Set.begin(), RHS.Set.end()); | |||
| 473 | return *this; | |||
| 474 | } | |||
| 475 | ||||
| 476 | private: | |||
| 477 | /// A set to keep track of elements. | |||
| 478 | SetVector<Ty> Set; | |||
| 479 | ||||
| 480 | public: | |||
| 481 | typename decltype(Set)::iterator begin() { return Set.begin(); } | |||
| 482 | typename decltype(Set)::iterator end() { return Set.end(); } | |||
| 483 | typename decltype(Set)::const_iterator begin() const { return Set.begin(); } | |||
| 484 | typename decltype(Set)::const_iterator end() const { return Set.end(); } | |||
| 485 | }; | |||
| 486 | ||||
| 487 | template <typename Ty, bool InsertInvalidates = true> | |||
| 488 | using BooleanStateWithPtrSetVector = | |||
| 489 | BooleanStateWithSetVector<Ty *, InsertInvalidates>; | |||
| 490 | ||||
| 491 | struct KernelInfoState : AbstractState { | |||
| 492 | /// Flag to track if we reached a fixpoint. | |||
| 493 | bool IsAtFixpoint = false; | |||
| 494 | ||||
| 495 | /// The parallel regions (identified by the outlined parallel functions) that | |||
| 496 | /// can be reached from the associated function. | |||
| 497 | BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false> | |||
| 498 | ReachedKnownParallelRegions; | |||
| 499 | ||||
| 500 | /// State to track what parallel region we might reach. | |||
| 501 | BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions; | |||
| 502 | ||||
| 503 | /// State to track if we are in SPMD-mode, assumed or know, and why we decided | |||
| 504 | /// we cannot be. If it is assumed, then RequiresFullRuntime should also be | |||
| 505 | /// false. | |||
| 506 | BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker; | |||
| 507 | ||||
| 508 | /// The __kmpc_target_init call in this kernel, if any. If we find more than | |||
| 509 | /// one we abort as the kernel is malformed. | |||
| 510 | CallBase *KernelInitCB = nullptr; | |||
| 511 | ||||
| 512 | /// The __kmpc_target_deinit call in this kernel, if any. If we find more than | |||
| 513 | /// one we abort as the kernel is malformed. | |||
| 514 | CallBase *KernelDeinitCB = nullptr; | |||
| 515 | ||||
| 516 | /// Flag to indicate if the associated function is a kernel entry. | |||
| 517 | bool IsKernelEntry = false; | |||
| 518 | ||||
| 519 | /// State to track what kernel entries can reach the associated function. | |||
| 520 | BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries; | |||
| 521 | ||||
| 522 | /// State to indicate if we can track parallel level of the associated | |||
| 523 | /// function. We will give up tracking if we encounter unknown caller or the | |||
| 524 | /// caller is __kmpc_parallel_51. | |||
| 525 | BooleanStateWithSetVector<uint8_t> ParallelLevels; | |||
| 526 | ||||
| 527 | /// Abstract State interface | |||
| 528 | ///{ | |||
| 529 | ||||
| 530 | KernelInfoState() {} | |||
| 531 | KernelInfoState(bool BestState) { | |||
| 532 | if (!BestState) | |||
| 533 | indicatePessimisticFixpoint(); | |||
| 534 | } | |||
| 535 | ||||
| 536 | /// See AbstractState::isValidState(...) | |||
| 537 | bool isValidState() const override { return true; } | |||
| 538 | ||||
| 539 | /// See AbstractState::isAtFixpoint(...) | |||
| 540 | bool isAtFixpoint() const override { return IsAtFixpoint; } | |||
| 541 | ||||
| 542 | /// See AbstractState::indicatePessimisticFixpoint(...) | |||
| 543 | ChangeStatus indicatePessimisticFixpoint() override { | |||
| 544 | IsAtFixpoint = true; | |||
| 545 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
| 546 | ReachedUnknownParallelRegions.indicatePessimisticFixpoint(); | |||
| 547 | return ChangeStatus::CHANGED; | |||
| 548 | } | |||
| 549 | ||||
| 550 | /// See AbstractState::indicateOptimisticFixpoint(...) | |||
| 551 | ChangeStatus indicateOptimisticFixpoint() override { | |||
| 552 | IsAtFixpoint = true; | |||
| 553 | return ChangeStatus::UNCHANGED; | |||
| 554 | } | |||
| 555 | ||||
| 556 | /// Return the assumed state | |||
| 557 | KernelInfoState &getAssumed() { return *this; } | |||
| 558 | const KernelInfoState &getAssumed() const { return *this; } | |||
| 559 | ||||
| 560 | bool operator==(const KernelInfoState &RHS) const { | |||
| 561 | if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker) | |||
| 562 | return false; | |||
| 563 | if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions) | |||
| 564 | return false; | |||
| 565 | if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions) | |||
| 566 | return false; | |||
| 567 | if (ReachingKernelEntries != RHS.ReachingKernelEntries) | |||
| 568 | return false; | |||
| 569 | return true; | |||
| 570 | } | |||
| 571 | ||||
| 572 | /// Return empty set as the best state of potential values. | |||
| 573 | static KernelInfoState getBestState() { return KernelInfoState(true); } | |||
| 574 | ||||
| 575 | static KernelInfoState getBestState(KernelInfoState &KIS) { | |||
| 576 | return getBestState(); | |||
| 577 | } | |||
| 578 | ||||
| 579 | /// Return full set as the worst state of potential values. | |||
| 580 | static KernelInfoState getWorstState() { return KernelInfoState(false); } | |||
| 581 | ||||
| 582 | /// "Clamp" this state with \p KIS. | |||
| 583 | KernelInfoState operator^=(const KernelInfoState &KIS) { | |||
| 584 | // Do not merge two different _init and _deinit call sites. | |||
| 585 | if (KIS.KernelInitCB) { | |||
| 586 | if (KernelInitCB && KernelInitCB != KIS.KernelInitCB) | |||
| 587 | indicatePessimisticFixpoint(); | |||
| 588 | KernelInitCB = KIS.KernelInitCB; | |||
| 589 | } | |||
| 590 | if (KIS.KernelDeinitCB) { | |||
| 591 | if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB) | |||
| 592 | indicatePessimisticFixpoint(); | |||
| 593 | KernelDeinitCB = KIS.KernelDeinitCB; | |||
| 594 | } | |||
| 595 | SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker; | |||
| 596 | ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions; | |||
| 597 | ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions; | |||
| 598 | return *this; | |||
| 599 | } | |||
| 600 | ||||
| 601 | KernelInfoState operator&=(const KernelInfoState &KIS) { | |||
| 602 | return (*this ^= KIS); | |||
| 603 | } | |||
| 604 | ||||
| 605 | ///} | |||
| 606 | }; | |||
| 607 | ||||
| 608 | /// Used to map the values physically (in the IR) stored in an offload | |||
| 609 | /// array, to a vector in memory. | |||
| 610 | struct OffloadArray { | |||
| 611 | /// Physical array (in the IR). | |||
| 612 | AllocaInst *Array = nullptr; | |||
| 613 | /// Mapped values. | |||
| 614 | SmallVector<Value *, 8> StoredValues; | |||
| 615 | /// Last stores made in the offload array. | |||
| 616 | SmallVector<StoreInst *, 8> LastAccesses; | |||
| 617 | ||||
| 618 | OffloadArray() = default; | |||
| 619 | ||||
| 620 | /// Initializes the OffloadArray with the values stored in \p Array before | |||
| 621 | /// instruction \p Before is reached. Returns false if the initialization | |||
| 622 | /// fails. | |||
| 623 | /// This MUST be used immediately after the construction of the object. | |||
| 624 | bool initialize(AllocaInst &Array, Instruction &Before) { | |||
| 625 | if (!Array.getAllocatedType()->isArrayTy()) | |||
| 626 | return false; | |||
| 627 | ||||
| 628 | if (!getValues(Array, Before)) | |||
| 629 | return false; | |||
| 630 | ||||
| 631 | this->Array = &Array; | |||
| 632 | return true; | |||
| 633 | } | |||
| 634 | ||||
| 635 | static const unsigned DeviceIDArgNum = 1; | |||
| 636 | static const unsigned BasePtrsArgNum = 3; | |||
| 637 | static const unsigned PtrsArgNum = 4; | |||
| 638 | static const unsigned SizesArgNum = 5; | |||
| 639 | ||||
| 640 | private: | |||
| 641 | /// Traverses the BasicBlock where \p Array is, collecting the stores made to | |||
| 642 | /// \p Array, leaving StoredValues with the values stored before the | |||
| 643 | /// instruction \p Before is reached. | |||
| 644 | bool getValues(AllocaInst &Array, Instruction &Before) { | |||
| 645 | // Initialize container. | |||
| 646 | const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements(); | |||
| 647 | StoredValues.assign(NumValues, nullptr); | |||
| 648 | LastAccesses.assign(NumValues, nullptr); | |||
| 649 | ||||
| 650 | // TODO: This assumes the instruction \p Before is in the same | |||
| 651 | // BasicBlock as Array. Make it general, for any control flow graph. | |||
| 652 | BasicBlock *BB = Array.getParent(); | |||
| 653 | if (BB != Before.getParent()) | |||
| 654 | return false; | |||
| 655 | ||||
| 656 | const DataLayout &DL = Array.getModule()->getDataLayout(); | |||
| 657 | const unsigned int PointerSize = DL.getPointerSize(); | |||
| 658 | ||||
| 659 | for (Instruction &I : *BB) { | |||
| 660 | if (&I == &Before) | |||
| 661 | break; | |||
| 662 | ||||
| 663 | if (!isa<StoreInst>(&I)) | |||
| 664 | continue; | |||
| 665 | ||||
| 666 | auto *S = cast<StoreInst>(&I); | |||
| 667 | int64_t Offset = -1; | |||
| 668 | auto *Dst = | |||
| 669 | GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL); | |||
| 670 | if (Dst == &Array) { | |||
| 671 | int64_t Idx = Offset / PointerSize; | |||
| 672 | StoredValues[Idx] = getUnderlyingObject(S->getValueOperand()); | |||
| 673 | LastAccesses[Idx] = S; | |||
| 674 | } | |||
| 675 | } | |||
| 676 | ||||
| 677 | return isFilled(); | |||
| 678 | } | |||
| 679 | ||||
| 680 | /// Returns true if all values in StoredValues and | |||
| 681 | /// LastAccesses are not nullptrs. | |||
| 682 | bool isFilled() { | |||
| 683 | const unsigned NumValues = StoredValues.size(); | |||
| 684 | for (unsigned I = 0; I < NumValues; ++I) { | |||
| 685 | if (!StoredValues[I] || !LastAccesses[I]) | |||
| 686 | return false; | |||
| 687 | } | |||
| 688 | ||||
| 689 | return true; | |||
| 690 | } | |||
| 691 | }; | |||
| 692 | ||||
| 693 | struct OpenMPOpt { | |||
| 694 | ||||
| 695 | using OptimizationRemarkGetter = | |||
| 696 | function_ref<OptimizationRemarkEmitter &(Function *)>; | |||
| 697 | ||||
| 698 | OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater, | |||
| 699 | OptimizationRemarkGetter OREGetter, | |||
| 700 | OMPInformationCache &OMPInfoCache, Attributor &A) | |||
| 701 | : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater), | |||
| 702 | OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {} | |||
| 703 | ||||
| 704 | /// Check if any remarks are enabled for openmp-opt | |||
| 705 | bool remarksEnabled() { | |||
| 706 | auto &Ctx = M.getContext(); | |||
| 707 | return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE"openmp-opt"); | |||
| 708 | } | |||
| 709 | ||||
| 710 | /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice. | |||
| 711 | bool run(bool IsModulePass) { | |||
| 712 | if (SCC.empty()) | |||
| 713 | return false; | |||
| 714 | ||||
| 715 | bool Changed = false; | |||
| 716 | ||||
| 717 | LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()do { } while (false) | |||
| 718 | << " functions in a slice with "do { } while (false) | |||
| 719 | << OMPInfoCache.ModuleSlice.size() << " functions\n")do { } while (false); | |||
| 720 | ||||
| 721 | if (IsModulePass) { | |||
| 722 | Changed |= runAttributor(IsModulePass); | |||
| 723 | ||||
| 724 | // Recollect uses, in case Attributor deleted any. | |||
| 725 | OMPInfoCache.recollectUses(); | |||
| 726 | ||||
| 727 | // TODO: This should be folded into buildCustomStateMachine. | |||
| 728 | Changed |= rewriteDeviceCodeStateMachine(); | |||
| 729 | ||||
| 730 | if (remarksEnabled()) | |||
| 731 | analysisGlobalization(); | |||
| 732 | } else { | |||
| 733 | if (PrintICVValues) | |||
| 734 | printICVs(); | |||
| 735 | if (PrintOpenMPKernels) | |||
| 736 | printKernels(); | |||
| 737 | ||||
| 738 | Changed |= runAttributor(IsModulePass); | |||
| 739 | ||||
| 740 | // Recollect uses, in case Attributor deleted any. | |||
| 741 | OMPInfoCache.recollectUses(); | |||
| 742 | ||||
| 743 | Changed |= deleteParallelRegions(); | |||
| 744 | ||||
| 745 | if (HideMemoryTransferLatency) | |||
| 746 | Changed |= hideMemTransfersLatency(); | |||
| 747 | Changed |= deduplicateRuntimeCalls(); | |||
| 748 | if (EnableParallelRegionMerging) { | |||
| 749 | if (mergeParallelRegions()) { | |||
| 750 | deduplicateRuntimeCalls(); | |||
| 751 | Changed = true; | |||
| 752 | } | |||
| 753 | } | |||
| 754 | } | |||
| 755 | ||||
| 756 | return Changed; | |||
| 757 | } | |||
| 758 | ||||
| 759 | /// Print initial ICV values for testing. | |||
| 760 | /// FIXME: This should be done from the Attributor once it is added. | |||
| 761 | void printICVs() const { | |||
| 762 | InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel, | |||
| 763 | ICV_proc_bind}; | |||
| 764 | ||||
| 765 | for (Function *F : OMPInfoCache.ModuleSlice) { | |||
| 766 | for (auto ICV : ICVs) { | |||
| 767 | auto ICVInfo = OMPInfoCache.ICVs[ICV]; | |||
| 768 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 769 | return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name) | |||
| 770 | << " Value: " | |||
| 771 | << (ICVInfo.InitValue | |||
| 772 | ? toString(ICVInfo.InitValue->getValue(), 10, true) | |||
| 773 | : "IMPLEMENTATION_DEFINED"); | |||
| 774 | }; | |||
| 775 | ||||
| 776 | emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark); | |||
| 777 | } | |||
| 778 | } | |||
| 779 | } | |||
| 780 | ||||
| 781 | /// Print OpenMP GPU kernels for testing. | |||
| 782 | void printKernels() const { | |||
| 783 | for (Function *F : SCC) { | |||
| 784 | if (!OMPInfoCache.Kernels.count(F)) | |||
| 785 | continue; | |||
| 786 | ||||
| 787 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 788 | return ORA << "OpenMP GPU kernel " | |||
| 789 | << ore::NV("OpenMPGPUKernel", F->getName()) << "\n"; | |||
| 790 | }; | |||
| 791 | ||||
| 792 | emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark); | |||
| 793 | } | |||
| 794 | } | |||
| 795 | ||||
| 796 | /// Return the call if \p U is a callee use in a regular call. If \p RFI is | |||
| 797 | /// given it has to be the callee or a nullptr is returned. | |||
| 798 | static CallInst *getCallIfRegularCall( | |||
| 799 | Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { | |||
| 800 | CallInst *CI = dyn_cast<CallInst>(U.getUser()); | |||
| 801 | if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() && | |||
| 802 | (!RFI || | |||
| 803 | (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) | |||
| 804 | return CI; | |||
| 805 | return nullptr; | |||
| 806 | } | |||
| 807 | ||||
| 808 | /// Return the call if \p V is a regular call. If \p RFI is given it has to be | |||
| 809 | /// the callee or a nullptr is returned. | |||
| 810 | static CallInst *getCallIfRegularCall( | |||
| 811 | Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) { | |||
| 812 | CallInst *CI = dyn_cast<CallInst>(&V); | |||
| 813 | if (CI && !CI->hasOperandBundles() && | |||
| 814 | (!RFI || | |||
| 815 | (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration))) | |||
| 816 | return CI; | |||
| 817 | return nullptr; | |||
| 818 | } | |||
| 819 | ||||
| 820 | private: | |||
| 821 | /// Merge parallel regions when it is safe. | |||
| 822 | bool mergeParallelRegions() { | |||
| 823 | const unsigned CallbackCalleeOperand = 2; | |||
| 824 | const unsigned CallbackFirstArgOperand = 3; | |||
| 825 | using InsertPointTy = OpenMPIRBuilder::InsertPointTy; | |||
| 826 | ||||
| 827 | // Check if there are any __kmpc_fork_call calls to merge. | |||
| 828 | OMPInformationCache::RuntimeFunctionInfo &RFI = | |||
| 829 | OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; | |||
| 830 | ||||
| 831 | if (!RFI.Declaration) | |||
| 832 | return false; | |||
| 833 | ||||
| 834 | // Unmergable calls that prevent merging a parallel region. | |||
| 835 | OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = { | |||
| 836 | OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind], | |||
| 837 | OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads], | |||
| 838 | }; | |||
| 839 | ||||
| 840 | bool Changed = false; | |||
| 841 | LoopInfo *LI = nullptr; | |||
| 842 | DominatorTree *DT = nullptr; | |||
| 843 | ||||
| 844 | SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap; | |||
| 845 | ||||
| 846 | BasicBlock *StartBB = nullptr, *EndBB = nullptr; | |||
| 847 | auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | |||
| 848 | BasicBlock &ContinuationIP) { | |||
| 849 | BasicBlock *CGStartBB = CodeGenIP.getBlock(); | |||
| 850 | BasicBlock *CGEndBB = | |||
| 851 | SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); | |||
| 852 | assert(StartBB != nullptr && "StartBB should not be null")((void)0); | |||
| 853 | CGStartBB->getTerminator()->setSuccessor(0, StartBB); | |||
| 854 | assert(EndBB != nullptr && "EndBB should not be null")((void)0); | |||
| 855 | EndBB->getTerminator()->setSuccessor(0, CGEndBB); | |||
| 856 | }; | |||
| 857 | ||||
| 858 | auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &, | |||
| 859 | Value &Inner, Value *&ReplacementValue) -> InsertPointTy { | |||
| 860 | ReplacementValue = &Inner; | |||
| 861 | return CodeGenIP; | |||
| 862 | }; | |||
| 863 | ||||
| 864 | auto FiniCB = [&](InsertPointTy CodeGenIP) {}; | |||
| 865 | ||||
| 866 | /// Create a sequential execution region within a merged parallel region, | |||
| 867 | /// encapsulated in a master construct with a barrier for synchronization. | |||
| 868 | auto CreateSequentialRegion = [&](Function *OuterFn, | |||
| 869 | BasicBlock *OuterPredBB, | |||
| 870 | Instruction *SeqStartI, | |||
| 871 | Instruction *SeqEndI) { | |||
| 872 | // Isolate the instructions of the sequential region to a separate | |||
| 873 | // block. | |||
| 874 | BasicBlock *ParentBB = SeqStartI->getParent(); | |||
| 875 | BasicBlock *SeqEndBB = | |||
| 876 | SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI); | |||
| 877 | BasicBlock *SeqAfterBB = | |||
| 878 | SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI); | |||
| 879 | BasicBlock *SeqStartBB = | |||
| 880 | SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged"); | |||
| 881 | ||||
| 882 | assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&((void)0) | |||
| 883 | "Expected a different CFG")((void)0); | |||
| 884 | const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc(); | |||
| 885 | ParentBB->getTerminator()->eraseFromParent(); | |||
| 886 | ||||
| 887 | auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, | |||
| 888 | BasicBlock &ContinuationIP) { | |||
| 889 | BasicBlock *CGStartBB = CodeGenIP.getBlock(); | |||
| 890 | BasicBlock *CGEndBB = | |||
| 891 | SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI); | |||
| 892 | assert(SeqStartBB != nullptr && "SeqStartBB should not be null")((void)0); | |||
| 893 | CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB); | |||
| 894 | assert(SeqEndBB != nullptr && "SeqEndBB should not be null")((void)0); | |||
| 895 | SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB); | |||
| 896 | }; | |||
| 897 | auto FiniCB = [&](InsertPointTy CodeGenIP) {}; | |||
| 898 | ||||
| 899 | // Find outputs from the sequential region to outside users and | |||
| 900 | // broadcast their values to them. | |||
| 901 | for (Instruction &I : *SeqStartBB) { | |||
| 902 | SmallPtrSet<Instruction *, 4> OutsideUsers; | |||
| 903 | for (User *Usr : I.users()) { | |||
| 904 | Instruction &UsrI = *cast<Instruction>(Usr); | |||
| 905 | // Ignore outputs to LT intrinsics, code extraction for the merged | |||
| 906 | // parallel region will fix them. | |||
| 907 | if (UsrI.isLifetimeStartOrEnd()) | |||
| 908 | continue; | |||
| 909 | ||||
| 910 | if (UsrI.getParent() != SeqStartBB) | |||
| 911 | OutsideUsers.insert(&UsrI); | |||
| 912 | } | |||
| 913 | ||||
| 914 | if (OutsideUsers.empty()) | |||
| 915 | continue; | |||
| 916 | ||||
| 917 | // Emit an alloca in the outer region to store the broadcasted | |||
| 918 | // value. | |||
| 919 | const DataLayout &DL = M.getDataLayout(); | |||
| 920 | AllocaInst *AllocaI = new AllocaInst( | |||
| 921 | I.getType(), DL.getAllocaAddrSpace(), nullptr, | |||
| 922 | I.getName() + ".seq.output.alloc", &OuterFn->front().front()); | |||
| 923 | ||||
| 924 | // Emit a store instruction in the sequential BB to update the | |||
| 925 | // value. | |||
| 926 | new StoreInst(&I, AllocaI, SeqStartBB->getTerminator()); | |||
| 927 | ||||
| 928 | // Emit a load instruction and replace the use of the output value | |||
| 929 | // with it. | |||
| 930 | for (Instruction *UsrI : OutsideUsers) { | |||
| 931 | LoadInst *LoadI = new LoadInst( | |||
| 932 | I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI); | |||
| 933 | UsrI->replaceUsesOfWith(&I, LoadI); | |||
| 934 | } | |||
| 935 | } | |||
| 936 | ||||
| 937 | OpenMPIRBuilder::LocationDescription Loc( | |||
| 938 | InsertPointTy(ParentBB, ParentBB->end()), DL); | |||
| 939 | InsertPointTy SeqAfterIP = | |||
| 940 | OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB); | |||
| 941 | ||||
| 942 | OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel); | |||
| 943 | ||||
| 944 | BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock()); | |||
| 945 | ||||
| 946 | LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFndo { } while (false) | |||
| 947 | << "\n")do { } while (false); | |||
| 948 | }; | |||
| 949 | ||||
| 950 | // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all | |||
| 951 | // contained in BB and only separated by instructions that can be | |||
| 952 | // redundantly executed in parallel. The block BB is split before the first | |||
| 953 | // call (in MergableCIs) and after the last so the entire region we merge | |||
| 954 | // into a single parallel region is contained in a single basic block | |||
| 955 | // without any other instructions. We use the OpenMPIRBuilder to outline | |||
| 956 | // that block and call the resulting function via __kmpc_fork_call. | |||
| 957 | auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) { | |||
| 958 | // TODO: Change the interface to allow single CIs expanded, e.g, to | |||
| 959 | // include an outer loop. | |||
| 960 | assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs")((void)0); | |||
| 961 | ||||
| 962 | auto Remark = [&](OptimizationRemark OR) { | |||
| 963 | OR << "Parallel region merged with parallel region" | |||
| 964 | << (MergableCIs.size() > 2 ? "s" : "") << " at "; | |||
| 965 | for (auto *CI : llvm::drop_begin(MergableCIs)) { | |||
| 966 | OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc()); | |||
| 967 | if (CI != MergableCIs.back()) | |||
| 968 | OR << ", "; | |||
| 969 | } | |||
| 970 | return OR << "."; | |||
| 971 | }; | |||
| 972 | ||||
| 973 | emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark); | |||
| 974 | ||||
| 975 | Function *OriginalFn = BB->getParent(); | |||
| 976 | LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()do { } while (false) | |||
| 977 | << " parallel regions in " << OriginalFn->getName()do { } while (false) | |||
| 978 | << "\n")do { } while (false); | |||
| 979 | ||||
| 980 | // Isolate the calls to merge in a separate block. | |||
| 981 | EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI); | |||
| 982 | BasicBlock *AfterBB = | |||
| 983 | SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI); | |||
| 984 | StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr, | |||
| 985 | "omp.par.merged"); | |||
| 986 | ||||
| 987 | assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG")((void)0); | |||
| 988 | const DebugLoc DL = BB->getTerminator()->getDebugLoc(); | |||
| 989 | BB->getTerminator()->eraseFromParent(); | |||
| 990 | ||||
| 991 | // Create sequential regions for sequential instructions that are | |||
| 992 | // in-between mergable parallel regions. | |||
| 993 | for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1; | |||
| 994 | It != End; ++It) { | |||
| 995 | Instruction *ForkCI = *It; | |||
| 996 | Instruction *NextForkCI = *(It + 1); | |||
| 997 | ||||
| 998 | // Continue if there are not in-between instructions. | |||
| 999 | if (ForkCI->getNextNode() == NextForkCI) | |||
| 1000 | continue; | |||
| 1001 | ||||
| 1002 | CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(), | |||
| 1003 | NextForkCI->getPrevNode()); | |||
| 1004 | } | |||
| 1005 | ||||
| 1006 | OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()), | |||
| 1007 | DL); | |||
| 1008 | IRBuilder<>::InsertPoint AllocaIP( | |||
| 1009 | &OriginalFn->getEntryBlock(), | |||
| 1010 | OriginalFn->getEntryBlock().getFirstInsertionPt()); | |||
| 1011 | // Create the merged parallel region with default proc binding, to | |||
| 1012 | // avoid overriding binding settings, and without explicit cancellation. | |||
| 1013 | InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel( | |||
| 1014 | Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr, | |||
| 1015 | OMP_PROC_BIND_default, /* IsCancellable */ false); | |||
| 1016 | BranchInst::Create(AfterBB, AfterIP.getBlock()); | |||
| 1017 | ||||
| 1018 | // Perform the actual outlining. | |||
| 1019 | OMPInfoCache.OMPBuilder.finalize(OriginalFn, | |||
| 1020 | /* AllowExtractorSinking */ true); | |||
| 1021 | ||||
| 1022 | Function *OutlinedFn = MergableCIs.front()->getCaller(); | |||
| 1023 | ||||
| 1024 | // Replace the __kmpc_fork_call calls with direct calls to the outlined | |||
| 1025 | // callbacks. | |||
| 1026 | SmallVector<Value *, 8> Args; | |||
| 1027 | for (auto *CI : MergableCIs) { | |||
| 1028 | Value *Callee = | |||
| 1029 | CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts(); | |||
| 1030 | FunctionType *FT = | |||
| 1031 | cast<FunctionType>(Callee->getType()->getPointerElementType()); | |||
| 1032 | Args.clear(); | |||
| 1033 | Args.push_back(OutlinedFn->getArg(0)); | |||
| 1034 | Args.push_back(OutlinedFn->getArg(1)); | |||
| 1035 | for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands(); | |||
| 1036 | U < E; ++U) | |||
| 1037 | Args.push_back(CI->getArgOperand(U)); | |||
| 1038 | ||||
| 1039 | CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI); | |||
| 1040 | if (CI->getDebugLoc()) | |||
| 1041 | NewCI->setDebugLoc(CI->getDebugLoc()); | |||
| 1042 | ||||
| 1043 | // Forward parameter attributes from the callback to the callee. | |||
| 1044 | for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands(); | |||
| 1045 | U < E; ++U) | |||
| 1046 | for (const Attribute &A : CI->getAttributes().getParamAttributes(U)) | |||
| 1047 | NewCI->addParamAttr( | |||
| 1048 | U - (CallbackFirstArgOperand - CallbackCalleeOperand), A); | |||
| 1049 | ||||
| 1050 | // Emit an explicit barrier to replace the implicit fork-join barrier. | |||
| 1051 | if (CI != MergableCIs.back()) { | |||
| 1052 | // TODO: Remove barrier if the merged parallel region includes the | |||
| 1053 | // 'nowait' clause. | |||
| 1054 | OMPInfoCache.OMPBuilder.createBarrier( | |||
| 1055 | InsertPointTy(NewCI->getParent(), | |||
| 1056 | NewCI->getNextNode()->getIterator()), | |||
| 1057 | OMPD_parallel); | |||
| 1058 | } | |||
| 1059 | ||||
| 1060 | CI->eraseFromParent(); | |||
| 1061 | } | |||
| 1062 | ||||
| 1063 | assert(OutlinedFn != OriginalFn && "Outlining failed")((void)0); | |||
| 1064 | CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn); | |||
| 1065 | CGUpdater.reanalyzeFunction(*OriginalFn); | |||
| 1066 | ||||
| 1067 | NumOpenMPParallelRegionsMerged += MergableCIs.size(); | |||
| 1068 | ||||
| 1069 | return true; | |||
| 1070 | }; | |||
| 1071 | ||||
| 1072 | // Helper function that identifes sequences of | |||
| 1073 | // __kmpc_fork_call uses in a basic block. | |||
| 1074 | auto DetectPRsCB = [&](Use &U, Function &F) { | |||
| 1075 | CallInst *CI = getCallIfRegularCall(U, &RFI); | |||
| 1076 | BB2PRMap[CI->getParent()].insert(CI); | |||
| 1077 | ||||
| 1078 | return false; | |||
| 1079 | }; | |||
| 1080 | ||||
| 1081 | BB2PRMap.clear(); | |||
| 1082 | RFI.foreachUse(SCC, DetectPRsCB); | |||
| 1083 | SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector; | |||
| 1084 | // Find mergable parallel regions within a basic block that are | |||
| 1085 | // safe to merge, that is any in-between instructions can safely | |||
| 1086 | // execute in parallel after merging. | |||
| 1087 | // TODO: support merging across basic-blocks. | |||
| 1088 | for (auto &It : BB2PRMap) { | |||
| 1089 | auto &CIs = It.getSecond(); | |||
| 1090 | if (CIs.size() < 2) | |||
| 1091 | continue; | |||
| 1092 | ||||
| 1093 | BasicBlock *BB = It.getFirst(); | |||
| 1094 | SmallVector<CallInst *, 4> MergableCIs; | |||
| 1095 | ||||
| 1096 | /// Returns true if the instruction is mergable, false otherwise. | |||
| 1097 | /// A terminator instruction is unmergable by definition since merging | |||
| 1098 | /// works within a BB. Instructions before the mergable region are | |||
| 1099 | /// mergable if they are not calls to OpenMP runtime functions that may | |||
| 1100 | /// set different execution parameters for subsequent parallel regions. | |||
| 1101 | /// Instructions in-between parallel regions are mergable if they are not | |||
| 1102 | /// calls to any non-intrinsic function since that may call a non-mergable | |||
| 1103 | /// OpenMP runtime function. | |||
| 1104 | auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) { | |||
| 1105 | // We do not merge across BBs, hence return false (unmergable) if the | |||
| 1106 | // instruction is a terminator. | |||
| 1107 | if (I.isTerminator()) | |||
| 1108 | return false; | |||
| 1109 | ||||
| 1110 | if (!isa<CallInst>(&I)) | |||
| 1111 | return true; | |||
| 1112 | ||||
| 1113 | CallInst *CI = cast<CallInst>(&I); | |||
| 1114 | if (IsBeforeMergableRegion) { | |||
| 1115 | Function *CalledFunction = CI->getCalledFunction(); | |||
| 1116 | if (!CalledFunction) | |||
| 1117 | return false; | |||
| 1118 | // Return false (unmergable) if the call before the parallel | |||
| 1119 | // region calls an explicit affinity (proc_bind) or number of | |||
| 1120 | // threads (num_threads) compiler-generated function. Those settings | |||
| 1121 | // may be incompatible with following parallel regions. | |||
| 1122 | // TODO: ICV tracking to detect compatibility. | |||
| 1123 | for (const auto &RFI : UnmergableCallsInfo) { | |||
| 1124 | if (CalledFunction == RFI.Declaration) | |||
| 1125 | return false; | |||
| 1126 | } | |||
| 1127 | } else { | |||
| 1128 | // Return false (unmergable) if there is a call instruction | |||
| 1129 | // in-between parallel regions when it is not an intrinsic. It | |||
| 1130 | // may call an unmergable OpenMP runtime function in its callpath. | |||
| 1131 | // TODO: Keep track of possible OpenMP calls in the callpath. | |||
| 1132 | if (!isa<IntrinsicInst>(CI)) | |||
| 1133 | return false; | |||
| 1134 | } | |||
| 1135 | ||||
| 1136 | return true; | |||
| 1137 | }; | |||
| 1138 | // Find maximal number of parallel region CIs that are safe to merge. | |||
| 1139 | for (auto It = BB->begin(), End = BB->end(); It != End;) { | |||
| 1140 | Instruction &I = *It; | |||
| 1141 | ++It; | |||
| 1142 | ||||
| 1143 | if (CIs.count(&I)) { | |||
| 1144 | MergableCIs.push_back(cast<CallInst>(&I)); | |||
| 1145 | continue; | |||
| 1146 | } | |||
| 1147 | ||||
| 1148 | // Continue expanding if the instruction is mergable. | |||
| 1149 | if (IsMergable(I, MergableCIs.empty())) | |||
| 1150 | continue; | |||
| 1151 | ||||
| 1152 | // Forward the instruction iterator to skip the next parallel region | |||
| 1153 | // since there is an unmergable instruction which can affect it. | |||
| 1154 | for (; It != End; ++It) { | |||
| 1155 | Instruction &SkipI = *It; | |||
| 1156 | if (CIs.count(&SkipI)) { | |||
| 1157 | LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipIdo { } while (false) | |||
| 1158 | << " due to " << I << "\n")do { } while (false); | |||
| 1159 | ++It; | |||
| 1160 | break; | |||
| 1161 | } | |||
| 1162 | } | |||
| 1163 | ||||
| 1164 | // Store mergable regions found. | |||
| 1165 | if (MergableCIs.size() > 1) { | |||
| 1166 | MergableCIsVector.push_back(MergableCIs); | |||
| 1167 | LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()do { } while (false) | |||
| 1168 | << " parallel regions in block " << BB->getName()do { } while (false) | |||
| 1169 | << " of function " << BB->getParent()->getName()do { } while (false) | |||
| 1170 | << "\n";)do { } while (false); | |||
| 1171 | } | |||
| 1172 | ||||
| 1173 | MergableCIs.clear(); | |||
| 1174 | } | |||
| 1175 | ||||
| 1176 | if (!MergableCIsVector.empty()) { | |||
| 1177 | Changed = true; | |||
| 1178 | ||||
| 1179 | for (auto &MergableCIs : MergableCIsVector) | |||
| 1180 | Merge(MergableCIs, BB); | |||
| 1181 | MergableCIsVector.clear(); | |||
| 1182 | } | |||
| 1183 | } | |||
| 1184 | ||||
| 1185 | if (Changed) { | |||
| 1186 | /// Re-collect use for fork calls, emitted barrier calls, and | |||
| 1187 | /// any emitted master/end_master calls. | |||
| 1188 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call); | |||
| 1189 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier); | |||
| 1190 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master); | |||
| 1191 | OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master); | |||
| 1192 | } | |||
| 1193 | ||||
| 1194 | return Changed; | |||
| 1195 | } | |||
| 1196 | ||||
| 1197 | /// Try to delete parallel regions if possible. | |||
| 1198 | bool deleteParallelRegions() { | |||
| 1199 | const unsigned CallbackCalleeOperand = 2; | |||
| 1200 | ||||
| 1201 | OMPInformationCache::RuntimeFunctionInfo &RFI = | |||
| 1202 | OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call]; | |||
| 1203 | ||||
| 1204 | if (!RFI.Declaration) | |||
| 1205 | return false; | |||
| 1206 | ||||
| 1207 | bool Changed = false; | |||
| 1208 | auto DeleteCallCB = [&](Use &U, Function &) { | |||
| 1209 | CallInst *CI = getCallIfRegularCall(U); | |||
| 1210 | if (!CI) | |||
| 1211 | return false; | |||
| 1212 | auto *Fn = dyn_cast<Function>( | |||
| 1213 | CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts()); | |||
| 1214 | if (!Fn) | |||
| 1215 | return false; | |||
| 1216 | if (!Fn->onlyReadsMemory()) | |||
| 1217 | return false; | |||
| 1218 | if (!Fn->hasFnAttribute(Attribute::WillReturn)) | |||
| 1219 | return false; | |||
| 1220 | ||||
| 1221 | LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "do { } while (false) | |||
| 1222 | << CI->getCaller()->getName() << "\n")do { } while (false); | |||
| 1223 | ||||
| 1224 | auto Remark = [&](OptimizationRemark OR) { | |||
| 1225 | return OR << "Removing parallel region with no side-effects."; | |||
| 1226 | }; | |||
| 1227 | emitRemark<OptimizationRemark>(CI, "OMP160", Remark); | |||
| 1228 | ||||
| 1229 | CGUpdater.removeCallSite(*CI); | |||
| 1230 | CI->eraseFromParent(); | |||
| 1231 | Changed = true; | |||
| 1232 | ++NumOpenMPParallelRegionsDeleted; | |||
| 1233 | return true; | |||
| 1234 | }; | |||
| 1235 | ||||
| 1236 | RFI.foreachUse(SCC, DeleteCallCB); | |||
| 1237 | ||||
| 1238 | return Changed; | |||
| 1239 | } | |||
| 1240 | ||||
| 1241 | /// Try to eliminate runtime calls by reusing existing ones. | |||
| 1242 | bool deduplicateRuntimeCalls() { | |||
| 1243 | bool Changed = false; | |||
| 1244 | ||||
| 1245 | RuntimeFunction DeduplicableRuntimeCallIDs[] = { | |||
| 1246 | OMPRTL_omp_get_num_threads, | |||
| 1247 | OMPRTL_omp_in_parallel, | |||
| 1248 | OMPRTL_omp_get_cancellation, | |||
| 1249 | OMPRTL_omp_get_thread_limit, | |||
| 1250 | OMPRTL_omp_get_supported_active_levels, | |||
| 1251 | OMPRTL_omp_get_level, | |||
| 1252 | OMPRTL_omp_get_ancestor_thread_num, | |||
| 1253 | OMPRTL_omp_get_team_size, | |||
| 1254 | OMPRTL_omp_get_active_level, | |||
| 1255 | OMPRTL_omp_in_final, | |||
| 1256 | OMPRTL_omp_get_proc_bind, | |||
| 1257 | OMPRTL_omp_get_num_places, | |||
| 1258 | OMPRTL_omp_get_num_procs, | |||
| 1259 | OMPRTL_omp_get_place_num, | |||
| 1260 | OMPRTL_omp_get_partition_num_places, | |||
| 1261 | OMPRTL_omp_get_partition_place_nums}; | |||
| 1262 | ||||
| 1263 | // Global-tid is handled separately. | |||
| 1264 | SmallSetVector<Value *, 16> GTIdArgs; | |||
| 1265 | collectGlobalThreadIdArguments(GTIdArgs); | |||
| 1266 | LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()do { } while (false) | |||
| 1267 | << " global thread ID arguments\n")do { } while (false); | |||
| 1268 | ||||
| 1269 | for (Function *F : SCC) { | |||
| 1270 | for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs) | |||
| 1271 | Changed |= deduplicateRuntimeCalls( | |||
| 1272 | *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]); | |||
| 1273 | ||||
| 1274 | // __kmpc_global_thread_num is special as we can replace it with an | |||
| 1275 | // argument in enough cases to make it worth trying. | |||
| 1276 | Value *GTIdArg = nullptr; | |||
| 1277 | for (Argument &Arg : F->args()) | |||
| 1278 | if (GTIdArgs.count(&Arg)) { | |||
| 1279 | GTIdArg = &Arg; | |||
| 1280 | break; | |||
| 1281 | } | |||
| 1282 | Changed |= deduplicateRuntimeCalls( | |||
| 1283 | *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg); | |||
| 1284 | } | |||
| 1285 | ||||
| 1286 | return Changed; | |||
| 1287 | } | |||
| 1288 | ||||
| 1289 | /// Tries to hide the latency of runtime calls that involve host to | |||
| 1290 | /// device memory transfers by splitting them into their "issue" and "wait" | |||
| 1291 | /// versions. The "issue" is moved upwards as much as possible. The "wait" is | |||
| 1292 | /// moved downards as much as possible. The "issue" issues the memory transfer | |||
| 1293 | /// asynchronously, returning a handle. The "wait" waits in the returned | |||
| 1294 | /// handle for the memory transfer to finish. | |||
| 1295 | bool hideMemTransfersLatency() { | |||
| 1296 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper]; | |||
| 1297 | bool Changed = false; | |||
| 1298 | auto SplitMemTransfers = [&](Use &U, Function &Decl) { | |||
| 1299 | auto *RTCall = getCallIfRegularCall(U, &RFI); | |||
| 1300 | if (!RTCall) | |||
| 1301 | return false; | |||
| 1302 | ||||
| 1303 | OffloadArray OffloadArrays[3]; | |||
| 1304 | if (!getValuesInOffloadArrays(*RTCall, OffloadArrays)) | |||
| 1305 | return false; | |||
| 1306 | ||||
| 1307 | LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays))do { } while (false); | |||
| 1308 | ||||
| 1309 | // TODO: Check if can be moved upwards. | |||
| 1310 | bool WasSplit = false; | |||
| 1311 | Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall); | |||
| 1312 | if (WaitMovementPoint) | |||
| 1313 | WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint); | |||
| 1314 | ||||
| 1315 | Changed |= WasSplit; | |||
| 1316 | return WasSplit; | |||
| 1317 | }; | |||
| 1318 | RFI.foreachUse(SCC, SplitMemTransfers); | |||
| 1319 | ||||
| 1320 | return Changed; | |||
| 1321 | } | |||
| 1322 | ||||
| 1323 | void analysisGlobalization() { | |||
| 1324 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
| 1325 | ||||
| 1326 | auto CheckGlobalization = [&](Use &U, Function &Decl) { | |||
| 1327 | if (CallInst *CI = getCallIfRegularCall(U, &RFI)) { | |||
| 1328 | auto Remark = [&](OptimizationRemarkMissed ORM) { | |||
| 1329 | return ORM | |||
| 1330 | << "Found thread data sharing on the GPU. " | |||
| 1331 | << "Expect degraded performance due to data globalization."; | |||
| 1332 | }; | |||
| 1333 | emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark); | |||
| 1334 | } | |||
| 1335 | ||||
| 1336 | return false; | |||
| 1337 | }; | |||
| 1338 | ||||
| 1339 | RFI.foreachUse(SCC, CheckGlobalization); | |||
| 1340 | } | |||
| 1341 | ||||
| 1342 | /// Maps the values stored in the offload arrays passed as arguments to | |||
| 1343 | /// \p RuntimeCall into the offload arrays in \p OAs. | |||
| 1344 | bool getValuesInOffloadArrays(CallInst &RuntimeCall, | |||
| 1345 | MutableArrayRef<OffloadArray> OAs) { | |||
| 1346 | assert(OAs.size() == 3 && "Need space for three offload arrays!")((void)0); | |||
| 1347 | ||||
| 1348 | // A runtime call that involves memory offloading looks something like: | |||
| 1349 | // call void @__tgt_target_data_begin_mapper(arg0, arg1, | |||
| 1350 | // i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes, | |||
| 1351 | // ...) | |||
| 1352 | // So, the idea is to access the allocas that allocate space for these | |||
| 1353 | // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes. | |||
| 1354 | // Therefore: | |||
| 1355 | // i8** %offload_baseptrs. | |||
| 1356 | Value *BasePtrsArg = | |||
| 1357 | RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum); | |||
| 1358 | // i8** %offload_ptrs. | |||
| 1359 | Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum); | |||
| 1360 | // i8** %offload_sizes. | |||
| 1361 | Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum); | |||
| 1362 | ||||
| 1363 | // Get values stored in **offload_baseptrs. | |||
| 1364 | auto *V = getUnderlyingObject(BasePtrsArg); | |||
| 1365 | if (!isa<AllocaInst>(V)) | |||
| 1366 | return false; | |||
| 1367 | auto *BasePtrsArray = cast<AllocaInst>(V); | |||
| 1368 | if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall)) | |||
| 1369 | return false; | |||
| 1370 | ||||
| 1371 | // Get values stored in **offload_baseptrs. | |||
| 1372 | V = getUnderlyingObject(PtrsArg); | |||
| 1373 | if (!isa<AllocaInst>(V)) | |||
| 1374 | return false; | |||
| 1375 | auto *PtrsArray = cast<AllocaInst>(V); | |||
| 1376 | if (!OAs[1].initialize(*PtrsArray, RuntimeCall)) | |||
| 1377 | return false; | |||
| 1378 | ||||
| 1379 | // Get values stored in **offload_sizes. | |||
| 1380 | V = getUnderlyingObject(SizesArg); | |||
| 1381 | // If it's a [constant] global array don't analyze it. | |||
| 1382 | if (isa<GlobalValue>(V)) | |||
| 1383 | return isa<Constant>(V); | |||
| 1384 | if (!isa<AllocaInst>(V)) | |||
| 1385 | return false; | |||
| 1386 | ||||
| 1387 | auto *SizesArray = cast<AllocaInst>(V); | |||
| 1388 | if (!OAs[2].initialize(*SizesArray, RuntimeCall)) | |||
| 1389 | return false; | |||
| 1390 | ||||
| 1391 | return true; | |||
| 1392 | } | |||
| 1393 | ||||
| 1394 | /// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG. | |||
| 1395 | /// For now this is a way to test that the function getValuesInOffloadArrays | |||
| 1396 | /// is working properly. | |||
| 1397 | /// TODO: Move this to a unittest when unittests are available for OpenMPOpt. | |||
| 1398 | void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) { | |||
| 1399 | assert(OAs.size() == 3 && "There are three offload arrays to debug!")((void)0); | |||
| 1400 | ||||
| 1401 | LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n")do { } while (false); | |||
| 1402 | std::string ValuesStr; | |||
| 1403 | raw_string_ostream Printer(ValuesStr); | |||
| 1404 | std::string Separator = " --- "; | |||
| 1405 | ||||
| 1406 | for (auto *BP : OAs[0].StoredValues) { | |||
| 1407 | BP->print(Printer); | |||
| 1408 | Printer << Separator; | |||
| 1409 | } | |||
| 1410 | LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n")do { } while (false); | |||
| 1411 | ValuesStr.clear(); | |||
| 1412 | ||||
| 1413 | for (auto *P : OAs[1].StoredValues) { | |||
| 1414 | P->print(Printer); | |||
| 1415 | Printer << Separator; | |||
| 1416 | } | |||
| 1417 | LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n")do { } while (false); | |||
| 1418 | ValuesStr.clear(); | |||
| 1419 | ||||
| 1420 | for (auto *S : OAs[2].StoredValues) { | |||
| 1421 | S->print(Printer); | |||
| 1422 | Printer << Separator; | |||
| 1423 | } | |||
| 1424 | LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n")do { } while (false); | |||
| 1425 | } | |||
| 1426 | ||||
| 1427 | /// Returns the instruction where the "wait" counterpart \p RuntimeCall can be | |||
| 1428 | /// moved. Returns nullptr if the movement is not possible, or not worth it. | |||
| 1429 | Instruction *canBeMovedDownwards(CallInst &RuntimeCall) { | |||
| 1430 | // FIXME: This traverses only the BasicBlock where RuntimeCall is. | |||
| 1431 | // Make it traverse the CFG. | |||
| 1432 | ||||
| 1433 | Instruction *CurrentI = &RuntimeCall; | |||
| 1434 | bool IsWorthIt = false; | |||
| 1435 | while ((CurrentI = CurrentI->getNextNode())) { | |||
| 1436 | ||||
| 1437 | // TODO: Once we detect the regions to be offloaded we should use the | |||
| 1438 | // alias analysis manager to check if CurrentI may modify one of | |||
| 1439 | // the offloaded regions. | |||
| 1440 | if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) { | |||
| 1441 | if (IsWorthIt) | |||
| 1442 | return CurrentI; | |||
| 1443 | ||||
| 1444 | return nullptr; | |||
| 1445 | } | |||
| 1446 | ||||
| 1447 | // FIXME: For now if we move it over anything without side effect | |||
| 1448 | // is worth it. | |||
| 1449 | IsWorthIt = true; | |||
| 1450 | } | |||
| 1451 | ||||
| 1452 | // Return end of BasicBlock. | |||
| 1453 | return RuntimeCall.getParent()->getTerminator(); | |||
| 1454 | } | |||
| 1455 | ||||
| 1456 | /// Splits \p RuntimeCall into its "issue" and "wait" counterparts. | |||
| 1457 | bool splitTargetDataBeginRTC(CallInst &RuntimeCall, | |||
| 1458 | Instruction &WaitMovementPoint) { | |||
| 1459 | // Create stack allocated handle (__tgt_async_info) at the beginning of the | |||
| 1460 | // function. Used for storing information of the async transfer, allowing to | |||
| 1461 | // wait on it later. | |||
| 1462 | auto &IRBuilder = OMPInfoCache.OMPBuilder; | |||
| 1463 | auto *F = RuntimeCall.getCaller(); | |||
| 1464 | Instruction *FirstInst = &(F->getEntryBlock().front()); | |||
| 1465 | AllocaInst *Handle = new AllocaInst( | |||
| 1466 | IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst); | |||
| 1467 | ||||
| 1468 | // Add "issue" runtime call declaration: | |||
| 1469 | // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32, | |||
| 1470 | // i8**, i8**, i64*, i64*) | |||
| 1471 | FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction( | |||
| 1472 | M, OMPRTL___tgt_target_data_begin_mapper_issue); | |||
| 1473 | ||||
| 1474 | // Change RuntimeCall call site for its asynchronous version. | |||
| 1475 | SmallVector<Value *, 16> Args; | |||
| 1476 | for (auto &Arg : RuntimeCall.args()) | |||
| 1477 | Args.push_back(Arg.get()); | |||
| 1478 | Args.push_back(Handle); | |||
| 1479 | ||||
| 1480 | CallInst *IssueCallsite = | |||
| 1481 | CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall); | |||
| 1482 | RuntimeCall.eraseFromParent(); | |||
| 1483 | ||||
| 1484 | // Add "wait" runtime call declaration: | |||
| 1485 | // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info) | |||
| 1486 | FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction( | |||
| 1487 | M, OMPRTL___tgt_target_data_begin_mapper_wait); | |||
| 1488 | ||||
| 1489 | Value *WaitParams[2] = { | |||
| 1490 | IssueCallsite->getArgOperand( | |||
| 1491 | OffloadArray::DeviceIDArgNum), // device_id. | |||
| 1492 | Handle // handle to wait on. | |||
| 1493 | }; | |||
| 1494 | CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint); | |||
| 1495 | ||||
| 1496 | return true; | |||
| 1497 | } | |||
| 1498 | ||||
| 1499 | static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent, | |||
| 1500 | bool GlobalOnly, bool &SingleChoice) { | |||
| 1501 | if (CurrentIdent == NextIdent) | |||
| 1502 | return CurrentIdent; | |||
| 1503 | ||||
| 1504 | // TODO: Figure out how to actually combine multiple debug locations. For | |||
| 1505 | // now we just keep an existing one if there is a single choice. | |||
| 1506 | if (!GlobalOnly || isa<GlobalValue>(NextIdent)) { | |||
| 1507 | SingleChoice = !CurrentIdent; | |||
| 1508 | return NextIdent; | |||
| 1509 | } | |||
| 1510 | return nullptr; | |||
| 1511 | } | |||
| 1512 | ||||
| 1513 | /// Return an `struct ident_t*` value that represents the ones used in the | |||
| 1514 | /// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not | |||
| 1515 | /// return a local `struct ident_t*`. For now, if we cannot find a suitable | |||
| 1516 | /// return value we create one from scratch. We also do not yet combine | |||
| 1517 | /// information, e.g., the source locations, see combinedIdentStruct. | |||
| 1518 | Value * | |||
| 1519 | getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI, | |||
| 1520 | Function &F, bool GlobalOnly) { | |||
| 1521 | bool SingleChoice = true; | |||
| 1522 | Value *Ident = nullptr; | |||
| 1523 | auto CombineIdentStruct = [&](Use &U, Function &Caller) { | |||
| 1524 | CallInst *CI = getCallIfRegularCall(U, &RFI); | |||
| 1525 | if (!CI || &F != &Caller) | |||
| 1526 | return false; | |||
| 1527 | Ident = combinedIdentStruct(Ident, CI->getArgOperand(0), | |||
| 1528 | /* GlobalOnly */ true, SingleChoice); | |||
| 1529 | return false; | |||
| 1530 | }; | |||
| 1531 | RFI.foreachUse(SCC, CombineIdentStruct); | |||
| 1532 | ||||
| 1533 | if (!Ident || !SingleChoice) { | |||
| 1534 | // The IRBuilder uses the insertion block to get to the module, this is | |||
| 1535 | // unfortunate but we work around it for now. | |||
| 1536 | if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock()) | |||
| 1537 | OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy( | |||
| 1538 | &F.getEntryBlock(), F.getEntryBlock().begin())); | |||
| 1539 | // Create a fallback location if non was found. | |||
| 1540 | // TODO: Use the debug locations of the calls instead. | |||
| 1541 | Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr(); | |||
| 1542 | Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc); | |||
| 1543 | } | |||
| 1544 | return Ident; | |||
| 1545 | } | |||
| 1546 | ||||
| 1547 | /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or | |||
| 1548 | /// \p ReplVal if given. | |||
| 1549 | bool deduplicateRuntimeCalls(Function &F, | |||
| 1550 | OMPInformationCache::RuntimeFunctionInfo &RFI, | |||
| 1551 | Value *ReplVal = nullptr) { | |||
| 1552 | auto *UV = RFI.getUseVector(F); | |||
| 1553 | if (!UV || UV->size() + (ReplVal != nullptr) < 2) | |||
| 1554 | return false; | |||
| 1555 | ||||
| 1556 | LLVM_DEBUG(do { } while (false) | |||
| 1557 | dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Namedo { } while (false) | |||
| 1558 | << (ReplVal ? " with an existing value\n" : "\n") << "\n")do { } while (false); | |||
| 1559 | ||||
| 1560 | assert((!ReplVal || (isa<Argument>(ReplVal) &&((void)0) | |||
| 1561 | cast<Argument>(ReplVal)->getParent() == &F)) &&((void)0) | |||
| 1562 | "Unexpected replacement value!")((void)0); | |||
| 1563 | ||||
| 1564 | // TODO: Use dominance to find a good position instead. | |||
| 1565 | auto CanBeMoved = [this](CallBase &CB) { | |||
| 1566 | unsigned NumArgs = CB.getNumArgOperands(); | |||
| 1567 | if (NumArgs == 0) | |||
| 1568 | return true; | |||
| 1569 | if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr) | |||
| 1570 | return false; | |||
| 1571 | for (unsigned u = 1; u < NumArgs; ++u) | |||
| 1572 | if (isa<Instruction>(CB.getArgOperand(u))) | |||
| 1573 | return false; | |||
| 1574 | return true; | |||
| 1575 | }; | |||
| 1576 | ||||
| 1577 | if (!ReplVal) { | |||
| 1578 | for (Use *U : *UV) | |||
| 1579 | if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) { | |||
| 1580 | if (!CanBeMoved(*CI)) | |||
| 1581 | continue; | |||
| 1582 | ||||
| 1583 | // If the function is a kernel, dedup will move | |||
| 1584 | // the runtime call right after the kernel init callsite. Otherwise, | |||
| 1585 | // it will move it to the beginning of the caller function. | |||
| 1586 | if (isKernel(F)) { | |||
| 1587 | auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | |||
| 1588 | auto *KernelInitUV = KernelInitRFI.getUseVector(F); | |||
| 1589 | ||||
| 1590 | if (KernelInitUV->empty()) | |||
| 1591 | continue; | |||
| 1592 | ||||
| 1593 | assert(KernelInitUV->size() == 1 &&((void)0) | |||
| 1594 | "Expected a single __kmpc_target_init in kernel\n")((void)0); | |||
| 1595 | ||||
| 1596 | CallInst *KernelInitCI = | |||
| 1597 | getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI); | |||
| 1598 | assert(KernelInitCI &&((void)0) | |||
| 1599 | "Expected a call to __kmpc_target_init in kernel\n")((void)0); | |||
| 1600 | ||||
| 1601 | CI->moveAfter(KernelInitCI); | |||
| 1602 | } else | |||
| 1603 | CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt()); | |||
| 1604 | ReplVal = CI; | |||
| 1605 | break; | |||
| 1606 | } | |||
| 1607 | if (!ReplVal) | |||
| 1608 | return false; | |||
| 1609 | } | |||
| 1610 | ||||
| 1611 | // If we use a call as a replacement value we need to make sure the ident is | |||
| 1612 | // valid at the new location. For now we just pick a global one, either | |||
| 1613 | // existing and used by one of the calls, or created from scratch. | |||
| 1614 | if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) { | |||
| 1615 | if (CI->getNumArgOperands() > 0 && | |||
| 1616 | CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) { | |||
| 1617 | Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F, | |||
| 1618 | /* GlobalOnly */ true); | |||
| 1619 | CI->setArgOperand(0, Ident); | |||
| 1620 | } | |||
| 1621 | } | |||
| 1622 | ||||
| 1623 | bool Changed = false; | |||
| 1624 | auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) { | |||
| 1625 | CallInst *CI = getCallIfRegularCall(U, &RFI); | |||
| 1626 | if (!CI || CI == ReplVal || &F != &Caller) | |||
| 1627 | return false; | |||
| 1628 | assert(CI->getCaller() == &F && "Unexpected call!")((void)0); | |||
| 1629 | ||||
| 1630 | auto Remark = [&](OptimizationRemark OR) { | |||
| 1631 | return OR << "OpenMP runtime call " | |||
| 1632 | << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated."; | |||
| 1633 | }; | |||
| 1634 | if (CI->getDebugLoc()) | |||
| 1635 | emitRemark<OptimizationRemark>(CI, "OMP170", Remark); | |||
| 1636 | else | |||
| 1637 | emitRemark<OptimizationRemark>(&F, "OMP170", Remark); | |||
| 1638 | ||||
| 1639 | CGUpdater.removeCallSite(*CI); | |||
| 1640 | CI->replaceAllUsesWith(ReplVal); | |||
| 1641 | CI->eraseFromParent(); | |||
| 1642 | ++NumOpenMPRuntimeCallsDeduplicated; | |||
| 1643 | Changed = true; | |||
| 1644 | return true; | |||
| 1645 | }; | |||
| 1646 | RFI.foreachUse(SCC, ReplaceAndDeleteCB); | |||
| 1647 | ||||
| 1648 | return Changed; | |||
| 1649 | } | |||
| 1650 | ||||
| 1651 | /// Collect arguments that represent the global thread id in \p GTIdArgs. | |||
| 1652 | void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> >IdArgs) { | |||
| 1653 | // TODO: Below we basically perform a fixpoint iteration with a pessimistic | |||
| 1654 | // initialization. We could define an AbstractAttribute instead and | |||
| 1655 | // run the Attributor here once it can be run as an SCC pass. | |||
| 1656 | ||||
| 1657 | // Helper to check the argument \p ArgNo at all call sites of \p F for | |||
| 1658 | // a GTId. | |||
| 1659 | auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) { | |||
| 1660 | if (!F.hasLocalLinkage()) | |||
| 1661 | return false; | |||
| 1662 | for (Use &U : F.uses()) { | |||
| 1663 | if (CallInst *CI = getCallIfRegularCall(U)) { | |||
| 1664 | Value *ArgOp = CI->getArgOperand(ArgNo); | |||
| 1665 | if (CI == &RefCI || GTIdArgs.count(ArgOp) || | |||
| 1666 | getCallIfRegularCall( | |||
| 1667 | *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num])) | |||
| 1668 | continue; | |||
| 1669 | } | |||
| 1670 | return false; | |||
| 1671 | } | |||
| 1672 | return true; | |||
| 1673 | }; | |||
| 1674 | ||||
| 1675 | // Helper to identify uses of a GTId as GTId arguments. | |||
| 1676 | auto AddUserArgs = [&](Value >Id) { | |||
| 1677 | for (Use &U : GTId.uses()) | |||
| 1678 | if (CallInst *CI = dyn_cast<CallInst>(U.getUser())) | |||
| 1679 | if (CI->isArgOperand(&U)) | |||
| 1680 | if (Function *Callee = CI->getCalledFunction()) | |||
| 1681 | if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI)) | |||
| 1682 | GTIdArgs.insert(Callee->getArg(U.getOperandNo())); | |||
| 1683 | }; | |||
| 1684 | ||||
| 1685 | // The argument users of __kmpc_global_thread_num calls are GTIds. | |||
| 1686 | OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI = | |||
| 1687 | OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]; | |||
| 1688 | ||||
| 1689 | GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) { | |||
| 1690 | if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI)) | |||
| 1691 | AddUserArgs(*CI); | |||
| 1692 | return false; | |||
| 1693 | }); | |||
| 1694 | ||||
| 1695 | // Transitively search for more arguments by looking at the users of the | |||
| 1696 | // ones we know already. During the search the GTIdArgs vector is extended | |||
| 1697 | // so we cannot cache the size nor can we use a range based for. | |||
| 1698 | for (unsigned u = 0; u < GTIdArgs.size(); ++u) | |||
| 1699 | AddUserArgs(*GTIdArgs[u]); | |||
| 1700 | } | |||
| 1701 | ||||
| 1702 | /// Kernel (=GPU) optimizations and utility functions | |||
| 1703 | /// | |||
| 1704 | ///{{ | |||
| 1705 | ||||
| 1706 | /// Check if \p F is a kernel, hence entry point for target offloading. | |||
| 1707 | bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); } | |||
| 1708 | ||||
| 1709 | /// Cache to remember the unique kernel for a function. | |||
| 1710 | DenseMap<Function *, Optional<Kernel>> UniqueKernelMap; | |||
| 1711 | ||||
| 1712 | /// Find the unique kernel that will execute \p F, if any. | |||
| 1713 | Kernel getUniqueKernelFor(Function &F); | |||
| 1714 | ||||
| 1715 | /// Find the unique kernel that will execute \p I, if any. | |||
| 1716 | Kernel getUniqueKernelFor(Instruction &I) { | |||
| 1717 | return getUniqueKernelFor(*I.getFunction()); | |||
| 1718 | } | |||
| 1719 | ||||
| 1720 | /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in | |||
| 1721 | /// the cases we can avoid taking the address of a function. | |||
| 1722 | bool rewriteDeviceCodeStateMachine(); | |||
| 1723 | ||||
| 1724 | /// | |||
| 1725 | ///}} | |||
| 1726 | ||||
| 1727 | /// Emit a remark generically | |||
| 1728 | /// | |||
| 1729 | /// This template function can be used to generically emit a remark. The | |||
| 1730 | /// RemarkKind should be one of the following: | |||
| 1731 | /// - OptimizationRemark to indicate a successful optimization attempt | |||
| 1732 | /// - OptimizationRemarkMissed to report a failed optimization attempt | |||
| 1733 | /// - OptimizationRemarkAnalysis to provide additional information about an | |||
| 1734 | /// optimization attempt | |||
| 1735 | /// | |||
| 1736 | /// The remark is built using a callback function provided by the caller that | |||
| 1737 | /// takes a RemarkKind as input and returns a RemarkKind. | |||
| 1738 | template <typename RemarkKind, typename RemarkCallBack> | |||
| 1739 | void emitRemark(Instruction *I, StringRef RemarkName, | |||
| 1740 | RemarkCallBack &&RemarkCB) const { | |||
| 1741 | Function *F = I->getParent()->getParent(); | |||
| 1742 | auto &ORE = OREGetter(F); | |||
| 1743 | ||||
| 1744 | if (RemarkName.startswith("OMP")) | |||
| 1745 | ORE.emit([&]() { | |||
| 1746 | return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)) | |||
| 1747 | << " [" << RemarkName << "]"; | |||
| 1748 | }); | |||
| 1749 | else | |||
| 1750 | ORE.emit( | |||
| 1751 | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)); }); | |||
| 1752 | } | |||
| 1753 | ||||
| 1754 | /// Emit a remark on a function. | |||
| 1755 | template <typename RemarkKind, typename RemarkCallBack> | |||
| 1756 | void emitRemark(Function *F, StringRef RemarkName, | |||
| 1757 | RemarkCallBack &&RemarkCB) const { | |||
| 1758 | auto &ORE = OREGetter(F); | |||
| 1759 | ||||
| 1760 | if (RemarkName.startswith("OMP")) | |||
| 1761 | ORE.emit([&]() { | |||
| 1762 | return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)) | |||
| 1763 | << " [" << RemarkName << "]"; | |||
| 1764 | }); | |||
| 1765 | else | |||
| 1766 | ORE.emit( | |||
| 1767 | [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)); }); | |||
| 1768 | } | |||
| 1769 | ||||
| 1770 | /// RAII struct to temporarily change an RTL function's linkage to external. | |||
| 1771 | /// This prevents it from being mistakenly removed by other optimizations. | |||
| 1772 | struct ExternalizationRAII { | |||
| 1773 | ExternalizationRAII(OMPInformationCache &OMPInfoCache, | |||
| 1774 | RuntimeFunction RFKind) | |||
| 1775 | : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) { | |||
| 1776 | if (!Declaration) | |||
| 1777 | return; | |||
| 1778 | ||||
| 1779 | LinkageType = Declaration->getLinkage(); | |||
| 1780 | Declaration->setLinkage(GlobalValue::ExternalLinkage); | |||
| 1781 | } | |||
| 1782 | ||||
| 1783 | ~ExternalizationRAII() { | |||
| 1784 | if (!Declaration) | |||
| 1785 | return; | |||
| 1786 | ||||
| 1787 | Declaration->setLinkage(LinkageType); | |||
| 1788 | } | |||
| 1789 | ||||
| 1790 | Function *Declaration; | |||
| 1791 | GlobalValue::LinkageTypes LinkageType; | |||
| 1792 | }; | |||
| 1793 | ||||
| 1794 | /// The underlying module. | |||
| 1795 | Module &M; | |||
| 1796 | ||||
| 1797 | /// The SCC we are operating on. | |||
| 1798 | SmallVectorImpl<Function *> &SCC; | |||
| 1799 | ||||
| 1800 | /// Callback to update the call graph, the first argument is a removed call, | |||
| 1801 | /// the second an optional replacement call. | |||
| 1802 | CallGraphUpdater &CGUpdater; | |||
| 1803 | ||||
| 1804 | /// Callback to get an OptimizationRemarkEmitter from a Function * | |||
| 1805 | OptimizationRemarkGetter OREGetter; | |||
| 1806 | ||||
| 1807 | /// OpenMP-specific information cache. Also Used for Attributor runs. | |||
| 1808 | OMPInformationCache &OMPInfoCache; | |||
| 1809 | ||||
| 1810 | /// Attributor instance. | |||
| 1811 | Attributor &A; | |||
| 1812 | ||||
| 1813 | /// Helper function to run Attributor on SCC. | |||
| 1814 | bool runAttributor(bool IsModulePass) { | |||
| 1815 | if (SCC.empty()) | |||
| 1816 | return false; | |||
| 1817 | ||||
| 1818 | // Temporarily make these function have external linkage so the Attributor | |||
| 1819 | // doesn't remove them when we try to look them up later. | |||
| 1820 | ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel); | |||
| 1821 | ExternalizationRAII EndParallel(OMPInfoCache, | |||
| 1822 | OMPRTL___kmpc_kernel_end_parallel); | |||
| 1823 | ExternalizationRAII BarrierSPMD(OMPInfoCache, | |||
| 1824 | OMPRTL___kmpc_barrier_simple_spmd); | |||
| 1825 | ||||
| 1826 | registerAAs(IsModulePass); | |||
| 1827 | ||||
| 1828 | ChangeStatus Changed = A.run(); | |||
| 1829 | ||||
| 1830 | LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()do { } while (false) | |||
| 1831 | << " functions, result: " << Changed << ".\n")do { } while (false); | |||
| 1832 | ||||
| 1833 | return Changed == ChangeStatus::CHANGED; | |||
| 1834 | } | |||
| 1835 | ||||
| 1836 | void registerFoldRuntimeCall(RuntimeFunction RF); | |||
| 1837 | ||||
| 1838 | /// Populate the Attributor with abstract attribute opportunities in the | |||
| 1839 | /// function. | |||
| 1840 | void registerAAs(bool IsModulePass); | |||
| 1841 | }; | |||
| 1842 | ||||
| 1843 | Kernel OpenMPOpt::getUniqueKernelFor(Function &F) { | |||
| 1844 | if (!OMPInfoCache.ModuleSlice.count(&F)) | |||
| 1845 | return nullptr; | |||
| 1846 | ||||
| 1847 | // Use a scope to keep the lifetime of the CachedKernel short. | |||
| 1848 | { | |||
| 1849 | Optional<Kernel> &CachedKernel = UniqueKernelMap[&F]; | |||
| 1850 | if (CachedKernel) | |||
| 1851 | return *CachedKernel; | |||
| 1852 | ||||
| 1853 | // TODO: We should use an AA to create an (optimistic and callback | |||
| 1854 | // call-aware) call graph. For now we stick to simple patterns that | |||
| 1855 | // are less powerful, basically the worst fixpoint. | |||
| 1856 | if (isKernel(F)) { | |||
| 1857 | CachedKernel = Kernel(&F); | |||
| 1858 | return *CachedKernel; | |||
| 1859 | } | |||
| 1860 | ||||
| 1861 | CachedKernel = nullptr; | |||
| 1862 | if (!F.hasLocalLinkage()) { | |||
| 1863 | ||||
| 1864 | // See https://openmp.llvm.org/remarks/OptimizationRemarks.html | |||
| 1865 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 1866 | return ORA << "Potentially unknown OpenMP target region caller."; | |||
| 1867 | }; | |||
| 1868 | emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark); | |||
| 1869 | ||||
| 1870 | return nullptr; | |||
| 1871 | } | |||
| 1872 | } | |||
| 1873 | ||||
| 1874 | auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel { | |||
| 1875 | if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) { | |||
| 1876 | // Allow use in equality comparisons. | |||
| 1877 | if (Cmp->isEquality()) | |||
| 1878 | return getUniqueKernelFor(*Cmp); | |||
| 1879 | return nullptr; | |||
| 1880 | } | |||
| 1881 | if (auto *CB = dyn_cast<CallBase>(U.getUser())) { | |||
| 1882 | // Allow direct calls. | |||
| 1883 | if (CB->isCallee(&U)) | |||
| 1884 | return getUniqueKernelFor(*CB); | |||
| 1885 | ||||
| 1886 | OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = | |||
| 1887 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | |||
| 1888 | // Allow the use in __kmpc_parallel_51 calls. | |||
| 1889 | if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI)) | |||
| 1890 | return getUniqueKernelFor(*CB); | |||
| 1891 | return nullptr; | |||
| 1892 | } | |||
| 1893 | // Disallow every other use. | |||
| 1894 | return nullptr; | |||
| 1895 | }; | |||
| 1896 | ||||
| 1897 | // TODO: In the future we want to track more than just a unique kernel. | |||
| 1898 | SmallPtrSet<Kernel, 2> PotentialKernels; | |||
| 1899 | OMPInformationCache::foreachUse(F, [&](const Use &U) { | |||
| 1900 | PotentialKernels.insert(GetUniqueKernelForUse(U)); | |||
| 1901 | }); | |||
| 1902 | ||||
| 1903 | Kernel K = nullptr; | |||
| 1904 | if (PotentialKernels.size() == 1) | |||
| 1905 | K = *PotentialKernels.begin(); | |||
| 1906 | ||||
| 1907 | // Cache the result. | |||
| 1908 | UniqueKernelMap[&F] = K; | |||
| 1909 | ||||
| 1910 | return K; | |||
| 1911 | } | |||
| 1912 | ||||
| 1913 | bool OpenMPOpt::rewriteDeviceCodeStateMachine() { | |||
| 1914 | OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI = | |||
| 1915 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | |||
| 1916 | ||||
| 1917 | bool Changed = false; | |||
| 1918 | if (!KernelParallelRFI) | |||
| 1919 | return Changed; | |||
| 1920 | ||||
| 1921 | for (Function *F : SCC) { | |||
| 1922 | ||||
| 1923 | // Check if the function is a use in a __kmpc_parallel_51 call at | |||
| 1924 | // all. | |||
| 1925 | bool UnknownUse = false; | |||
| 1926 | bool KernelParallelUse = false; | |||
| 1927 | unsigned NumDirectCalls = 0; | |||
| 1928 | ||||
| 1929 | SmallVector<Use *, 2> ToBeReplacedStateMachineUses; | |||
| 1930 | OMPInformationCache::foreachUse(*F, [&](Use &U) { | |||
| 1931 | if (auto *CB = dyn_cast<CallBase>(U.getUser())) | |||
| 1932 | if (CB->isCallee(&U)) { | |||
| 1933 | ++NumDirectCalls; | |||
| 1934 | return; | |||
| 1935 | } | |||
| 1936 | ||||
| 1937 | if (isa<ICmpInst>(U.getUser())) { | |||
| 1938 | ToBeReplacedStateMachineUses.push_back(&U); | |||
| 1939 | return; | |||
| 1940 | } | |||
| 1941 | ||||
| 1942 | // Find wrapper functions that represent parallel kernels. | |||
| 1943 | CallInst *CI = | |||
| 1944 | OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI); | |||
| 1945 | const unsigned int WrapperFunctionArgNo = 6; | |||
| 1946 | if (!KernelParallelUse && CI && | |||
| 1947 | CI->getArgOperandNo(&U) == WrapperFunctionArgNo) { | |||
| 1948 | KernelParallelUse = true; | |||
| 1949 | ToBeReplacedStateMachineUses.push_back(&U); | |||
| 1950 | return; | |||
| 1951 | } | |||
| 1952 | UnknownUse = true; | |||
| 1953 | }); | |||
| 1954 | ||||
| 1955 | // Do not emit a remark if we haven't seen a __kmpc_parallel_51 | |||
| 1956 | // use. | |||
| 1957 | if (!KernelParallelUse) | |||
| 1958 | continue; | |||
| 1959 | ||||
| 1960 | // If this ever hits, we should investigate. | |||
| 1961 | // TODO: Checking the number of uses is not a necessary restriction and | |||
| 1962 | // should be lifted. | |||
| 1963 | if (UnknownUse || NumDirectCalls != 1 || | |||
| 1964 | ToBeReplacedStateMachineUses.size() > 2) { | |||
| 1965 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 1966 | return ORA << "Parallel region is used in " | |||
| 1967 | << (UnknownUse ? "unknown" : "unexpected") | |||
| 1968 | << " ways. Will not attempt to rewrite the state machine."; | |||
| 1969 | }; | |||
| 1970 | emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark); | |||
| 1971 | continue; | |||
| 1972 | } | |||
| 1973 | ||||
| 1974 | // Even if we have __kmpc_parallel_51 calls, we (for now) give | |||
| 1975 | // up if the function is not called from a unique kernel. | |||
| 1976 | Kernel K = getUniqueKernelFor(*F); | |||
| 1977 | if (!K) { | |||
| 1978 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 1979 | return ORA << "Parallel region is not called from a unique kernel. " | |||
| 1980 | "Will not attempt to rewrite the state machine."; | |||
| 1981 | }; | |||
| 1982 | emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark); | |||
| 1983 | continue; | |||
| 1984 | } | |||
| 1985 | ||||
| 1986 | // We now know F is a parallel body function called only from the kernel K. | |||
| 1987 | // We also identified the state machine uses in which we replace the | |||
| 1988 | // function pointer by a new global symbol for identification purposes. This | |||
| 1989 | // ensures only direct calls to the function are left. | |||
| 1990 | ||||
| 1991 | Module &M = *F->getParent(); | |||
| 1992 | Type *Int8Ty = Type::getInt8Ty(M.getContext()); | |||
| 1993 | ||||
| 1994 | auto *ID = new GlobalVariable( | |||
| 1995 | M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage, | |||
| 1996 | UndefValue::get(Int8Ty), F->getName() + ".ID"); | |||
| 1997 | ||||
| 1998 | for (Use *U : ToBeReplacedStateMachineUses) | |||
| 1999 | U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast( | |||
| 2000 | ID, U->get()->getType())); | |||
| 2001 | ||||
| 2002 | ++NumOpenMPParallelRegionsReplacedInGPUStateMachine; | |||
| 2003 | ||||
| 2004 | Changed = true; | |||
| 2005 | } | |||
| 2006 | ||||
| 2007 | return Changed; | |||
| 2008 | } | |||
| 2009 | ||||
| 2010 | /// Abstract Attribute for tracking ICV values. | |||
| 2011 | struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> { | |||
| 2012 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | |||
| 2013 | AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
| 2014 | ||||
| 2015 | void initialize(Attributor &A) override { | |||
| 2016 | Function *F = getAnchorScope(); | |||
| 2017 | if (!F || !A.isFunctionIPOAmendable(*F)) | |||
| 2018 | indicatePessimisticFixpoint(); | |||
| 2019 | } | |||
| 2020 | ||||
| 2021 | /// Returns true if value is assumed to be tracked. | |||
| 2022 | bool isAssumedTracked() const { return getAssumed(); } | |||
| 2023 | ||||
| 2024 | /// Returns true if value is known to be tracked. | |||
| 2025 | bool isKnownTracked() const { return getAssumed(); } | |||
| 2026 | ||||
| 2027 | /// Create an abstract attribute biew for the position \p IRP. | |||
| 2028 | static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A); | |||
| 2029 | ||||
| 2030 | /// Return the value with which \p I can be replaced for specific \p ICV. | |||
| 2031 | virtual Optional<Value *> getReplacementValue(InternalControlVar ICV, | |||
| 2032 | const Instruction *I, | |||
| 2033 | Attributor &A) const { | |||
| 2034 | return None; | |||
| 2035 | } | |||
| 2036 | ||||
| 2037 | /// Return an assumed unique ICV value if a single candidate is found. If | |||
| 2038 | /// there cannot be one, return a nullptr. If it is not clear yet, return the | |||
| 2039 | /// Optional::NoneType. | |||
| 2040 | virtual Optional<Value *> | |||
| 2041 | getUniqueReplacementValue(InternalControlVar ICV) const = 0; | |||
| 2042 | ||||
| 2043 | // Currently only nthreads is being tracked. | |||
| 2044 | // this array will only grow with time. | |||
| 2045 | InternalControlVar TrackableICVs[1] = {ICV_nthreads}; | |||
| 2046 | ||||
| 2047 | /// See AbstractAttribute::getName() | |||
| 2048 | const std::string getName() const override { return "AAICVTracker"; } | |||
| 2049 | ||||
| 2050 | /// See AbstractAttribute::getIdAddr() | |||
| 2051 | const char *getIdAddr() const override { return &ID; } | |||
| 2052 | ||||
| 2053 | /// This function should return true if the type of the \p AA is AAICVTracker | |||
| 2054 | static bool classof(const AbstractAttribute *AA) { | |||
| 2055 | return (AA->getIdAddr() == &ID); | |||
| 2056 | } | |||
| 2057 | ||||
| 2058 | static const char ID; | |||
| 2059 | }; | |||
| 2060 | ||||
| 2061 | struct AAICVTrackerFunction : public AAICVTracker { | |||
| 2062 | AAICVTrackerFunction(const IRPosition &IRP, Attributor &A) | |||
| 2063 | : AAICVTracker(IRP, A) {} | |||
| 2064 | ||||
| 2065 | // FIXME: come up with better string. | |||
| 2066 | const std::string getAsStr() const override { return "ICVTrackerFunction"; } | |||
| 2067 | ||||
| 2068 | // FIXME: come up with some stats. | |||
| 2069 | void trackStatistics() const override {} | |||
| 2070 | ||||
| 2071 | /// We don't manifest anything for this AA. | |||
| 2072 | ChangeStatus manifest(Attributor &A) override { | |||
| 2073 | return ChangeStatus::UNCHANGED; | |||
| 2074 | } | |||
| 2075 | ||||
| 2076 | // Map of ICV to their values at specific program point. | |||
| 2077 | EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar, | |||
| 2078 | InternalControlVar::ICV___last> | |||
| 2079 | ICVReplacementValuesMap; | |||
| 2080 | ||||
| 2081 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 2082 | ChangeStatus HasChanged = ChangeStatus::UNCHANGED; | |||
| 2083 | ||||
| 2084 | Function *F = getAnchorScope(); | |||
| 2085 | ||||
| 2086 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2087 | ||||
| 2088 | for (InternalControlVar ICV : TrackableICVs) { | |||
| 2089 | auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; | |||
| 2090 | ||||
| 2091 | auto &ValuesMap = ICVReplacementValuesMap[ICV]; | |||
| 2092 | auto TrackValues = [&](Use &U, Function &) { | |||
| 2093 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U); | |||
| 2094 | if (!CI) | |||
| 2095 | return false; | |||
| 2096 | ||||
| 2097 | // FIXME: handle setters with more that 1 arguments. | |||
| 2098 | /// Track new value. | |||
| 2099 | if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second) | |||
| 2100 | HasChanged = ChangeStatus::CHANGED; | |||
| 2101 | ||||
| 2102 | return false; | |||
| 2103 | }; | |||
| 2104 | ||||
| 2105 | auto CallCheck = [&](Instruction &I) { | |||
| 2106 | Optional<Value *> ReplVal = getValueForCall(A, &I, ICV); | |||
| 2107 | if (ReplVal.hasValue() && | |||
| 2108 | ValuesMap.insert(std::make_pair(&I, *ReplVal)).second) | |||
| 2109 | HasChanged = ChangeStatus::CHANGED; | |||
| 2110 | ||||
| 2111 | return true; | |||
| 2112 | }; | |||
| 2113 | ||||
| 2114 | // Track all changes of an ICV. | |||
| 2115 | SetterRFI.foreachUse(TrackValues, F); | |||
| 2116 | ||||
| 2117 | bool UsedAssumedInformation = false; | |||
| 2118 | A.checkForAllInstructions(CallCheck, *this, {Instruction::Call}, | |||
| 2119 | UsedAssumedInformation, | |||
| 2120 | /* CheckBBLivenessOnly */ true); | |||
| 2121 | ||||
| 2122 | /// TODO: Figure out a way to avoid adding entry in | |||
| 2123 | /// ICVReplacementValuesMap | |||
| 2124 | Instruction *Entry = &F->getEntryBlock().front(); | |||
| 2125 | if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry)) | |||
| 2126 | ValuesMap.insert(std::make_pair(Entry, nullptr)); | |||
| 2127 | } | |||
| 2128 | ||||
| 2129 | return HasChanged; | |||
| 2130 | } | |||
| 2131 | ||||
| 2132 | /// Hepler to check if \p I is a call and get the value for it if it is | |||
| 2133 | /// unique. | |||
| 2134 | Optional<Value *> getValueForCall(Attributor &A, const Instruction *I, | |||
| 2135 | InternalControlVar &ICV) const { | |||
| 2136 | ||||
| 2137 | const auto *CB = dyn_cast<CallBase>(I); | |||
| 2138 | if (!CB || CB->hasFnAttr("no_openmp") || | |||
| 2139 | CB->hasFnAttr("no_openmp_routines")) | |||
| 2140 | return None; | |||
| 2141 | ||||
| 2142 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2143 | auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter]; | |||
| 2144 | auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter]; | |||
| 2145 | Function *CalledFunction = CB->getCalledFunction(); | |||
| 2146 | ||||
| 2147 | // Indirect call, assume ICV changes. | |||
| 2148 | if (CalledFunction == nullptr) | |||
| 2149 | return nullptr; | |||
| 2150 | if (CalledFunction == GetterRFI.Declaration) | |||
| 2151 | return None; | |||
| 2152 | if (CalledFunction == SetterRFI.Declaration) { | |||
| 2153 | if (ICVReplacementValuesMap[ICV].count(I)) | |||
| 2154 | return ICVReplacementValuesMap[ICV].lookup(I); | |||
| 2155 | ||||
| 2156 | return nullptr; | |||
| 2157 | } | |||
| 2158 | ||||
| 2159 | // Since we don't know, assume it changes the ICV. | |||
| 2160 | if (CalledFunction->isDeclaration()) | |||
| 2161 | return nullptr; | |||
| 2162 | ||||
| 2163 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
| 2164 | *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED); | |||
| 2165 | ||||
| 2166 | if (ICVTrackingAA.isAssumedTracked()) | |||
| 2167 | return ICVTrackingAA.getUniqueReplacementValue(ICV); | |||
| 2168 | ||||
| 2169 | // If we don't know, assume it changes. | |||
| 2170 | return nullptr; | |||
| 2171 | } | |||
| 2172 | ||||
| 2173 | // We don't check unique value for a function, so return None. | |||
| 2174 | Optional<Value *> | |||
| 2175 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
| 2176 | return None; | |||
| 2177 | } | |||
| 2178 | ||||
| 2179 | /// Return the value with which \p I can be replaced for specific \p ICV. | |||
| 2180 | Optional<Value *> getReplacementValue(InternalControlVar ICV, | |||
| 2181 | const Instruction *I, | |||
| 2182 | Attributor &A) const override { | |||
| 2183 | const auto &ValuesMap = ICVReplacementValuesMap[ICV]; | |||
| 2184 | if (ValuesMap.count(I)) | |||
| 2185 | return ValuesMap.lookup(I); | |||
| 2186 | ||||
| 2187 | SmallVector<const Instruction *, 16> Worklist; | |||
| 2188 | SmallPtrSet<const Instruction *, 16> Visited; | |||
| 2189 | Worklist.push_back(I); | |||
| 2190 | ||||
| 2191 | Optional<Value *> ReplVal; | |||
| 2192 | ||||
| 2193 | while (!Worklist.empty()) { | |||
| 2194 | const Instruction *CurrInst = Worklist.pop_back_val(); | |||
| 2195 | if (!Visited.insert(CurrInst).second) | |||
| 2196 | continue; | |||
| 2197 | ||||
| 2198 | const BasicBlock *CurrBB = CurrInst->getParent(); | |||
| 2199 | ||||
| 2200 | // Go up and look for all potential setters/calls that might change the | |||
| 2201 | // ICV. | |||
| 2202 | while ((CurrInst = CurrInst->getPrevNode())) { | |||
| 2203 | if (ValuesMap.count(CurrInst)) { | |||
| 2204 | Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst); | |||
| 2205 | // Unknown value, track new. | |||
| 2206 | if (!ReplVal.hasValue()) { | |||
| 2207 | ReplVal = NewReplVal; | |||
| 2208 | break; | |||
| 2209 | } | |||
| 2210 | ||||
| 2211 | // If we found a new value, we can't know the icv value anymore. | |||
| 2212 | if (NewReplVal.hasValue()) | |||
| 2213 | if (ReplVal != NewReplVal) | |||
| 2214 | return nullptr; | |||
| 2215 | ||||
| 2216 | break; | |||
| 2217 | } | |||
| 2218 | ||||
| 2219 | Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV); | |||
| 2220 | if (!NewReplVal.hasValue()) | |||
| 2221 | continue; | |||
| 2222 | ||||
| 2223 | // Unknown value, track new. | |||
| 2224 | if (!ReplVal.hasValue()) { | |||
| 2225 | ReplVal = NewReplVal; | |||
| 2226 | break; | |||
| 2227 | } | |||
| 2228 | ||||
| 2229 | // if (NewReplVal.hasValue()) | |||
| 2230 | // We found a new value, we can't know the icv value anymore. | |||
| 2231 | if (ReplVal != NewReplVal) | |||
| 2232 | return nullptr; | |||
| 2233 | } | |||
| 2234 | ||||
| 2235 | // If we are in the same BB and we have a value, we are done. | |||
| 2236 | if (CurrBB == I->getParent() && ReplVal.hasValue()) | |||
| 2237 | return ReplVal; | |||
| 2238 | ||||
| 2239 | // Go through all predecessors and add terminators for analysis. | |||
| 2240 | for (const BasicBlock *Pred : predecessors(CurrBB)) | |||
| 2241 | if (const Instruction *Terminator = Pred->getTerminator()) | |||
| 2242 | Worklist.push_back(Terminator); | |||
| 2243 | } | |||
| 2244 | ||||
| 2245 | return ReplVal; | |||
| 2246 | } | |||
| 2247 | }; | |||
| 2248 | ||||
| 2249 | struct AAICVTrackerFunctionReturned : AAICVTracker { | |||
| 2250 | AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A) | |||
| 2251 | : AAICVTracker(IRP, A) {} | |||
| 2252 | ||||
| 2253 | // FIXME: come up with better string. | |||
| 2254 | const std::string getAsStr() const override { | |||
| 2255 | return "ICVTrackerFunctionReturned"; | |||
| 2256 | } | |||
| 2257 | ||||
| 2258 | // FIXME: come up with some stats. | |||
| 2259 | void trackStatistics() const override {} | |||
| 2260 | ||||
| 2261 | /// We don't manifest anything for this AA. | |||
| 2262 | ChangeStatus manifest(Attributor &A) override { | |||
| 2263 | return ChangeStatus::UNCHANGED; | |||
| 2264 | } | |||
| 2265 | ||||
| 2266 | // Map of ICV to their values at specific program point. | |||
| 2267 | EnumeratedArray<Optional<Value *>, InternalControlVar, | |||
| 2268 | InternalControlVar::ICV___last> | |||
| 2269 | ICVReplacementValuesMap; | |||
| 2270 | ||||
| 2271 | /// Return the value with which \p I can be replaced for specific \p ICV. | |||
| 2272 | Optional<Value *> | |||
| 2273 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
| 2274 | return ICVReplacementValuesMap[ICV]; | |||
| 2275 | } | |||
| 2276 | ||||
| 2277 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 2278 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
| 2279 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
| 2280 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
| 2281 | ||||
| 2282 | if (!ICVTrackingAA.isAssumedTracked()) | |||
| 2283 | return indicatePessimisticFixpoint(); | |||
| 2284 | ||||
| 2285 | for (InternalControlVar ICV : TrackableICVs) { | |||
| 2286 | Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; | |||
| 2287 | Optional<Value *> UniqueICVValue; | |||
| 2288 | ||||
| 2289 | auto CheckReturnInst = [&](Instruction &I) { | |||
| 2290 | Optional<Value *> NewReplVal = | |||
| 2291 | ICVTrackingAA.getReplacementValue(ICV, &I, A); | |||
| 2292 | ||||
| 2293 | // If we found a second ICV value there is no unique returned value. | |||
| 2294 | if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal) | |||
| 2295 | return false; | |||
| 2296 | ||||
| 2297 | UniqueICVValue = NewReplVal; | |||
| 2298 | ||||
| 2299 | return true; | |||
| 2300 | }; | |||
| 2301 | ||||
| 2302 | bool UsedAssumedInformation = false; | |||
| 2303 | if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret}, | |||
| 2304 | UsedAssumedInformation, | |||
| 2305 | /* CheckBBLivenessOnly */ true)) | |||
| 2306 | UniqueICVValue = nullptr; | |||
| 2307 | ||||
| 2308 | if (UniqueICVValue == ReplVal) | |||
| 2309 | continue; | |||
| 2310 | ||||
| 2311 | ReplVal = UniqueICVValue; | |||
| 2312 | Changed = ChangeStatus::CHANGED; | |||
| 2313 | } | |||
| 2314 | ||||
| 2315 | return Changed; | |||
| 2316 | } | |||
| 2317 | }; | |||
| 2318 | ||||
| 2319 | struct AAICVTrackerCallSite : AAICVTracker { | |||
| 2320 | AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A) | |||
| 2321 | : AAICVTracker(IRP, A) {} | |||
| 2322 | ||||
| 2323 | void initialize(Attributor &A) override { | |||
| 2324 | Function *F = getAnchorScope(); | |||
| 2325 | if (!F || !A.isFunctionIPOAmendable(*F)) | |||
| 2326 | indicatePessimisticFixpoint(); | |||
| 2327 | ||||
| 2328 | // We only initialize this AA for getters, so we need to know which ICV it | |||
| 2329 | // gets. | |||
| 2330 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2331 | for (InternalControlVar ICV : TrackableICVs) { | |||
| 2332 | auto ICVInfo = OMPInfoCache.ICVs[ICV]; | |||
| 2333 | auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter]; | |||
| 2334 | if (Getter.Declaration == getAssociatedFunction()) { | |||
| 2335 | AssociatedICV = ICVInfo.Kind; | |||
| 2336 | return; | |||
| 2337 | } | |||
| 2338 | } | |||
| 2339 | ||||
| 2340 | /// Unknown ICV. | |||
| 2341 | indicatePessimisticFixpoint(); | |||
| 2342 | } | |||
| 2343 | ||||
| 2344 | ChangeStatus manifest(Attributor &A) override { | |||
| 2345 | if (!ReplVal.hasValue() || !ReplVal.getValue()) | |||
| 2346 | return ChangeStatus::UNCHANGED; | |||
| 2347 | ||||
| 2348 | A.changeValueAfterManifest(*getCtxI(), **ReplVal); | |||
| 2349 | A.deleteAfterManifest(*getCtxI()); | |||
| 2350 | ||||
| 2351 | return ChangeStatus::CHANGED; | |||
| 2352 | } | |||
| 2353 | ||||
| 2354 | // FIXME: come up with better string. | |||
| 2355 | const std::string getAsStr() const override { return "ICVTrackerCallSite"; } | |||
| 2356 | ||||
| 2357 | // FIXME: come up with some stats. | |||
| 2358 | void trackStatistics() const override {} | |||
| 2359 | ||||
| 2360 | InternalControlVar AssociatedICV; | |||
| 2361 | Optional<Value *> ReplVal; | |||
| 2362 | ||||
| 2363 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 2364 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
| 2365 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
| 2366 | ||||
| 2367 | // We don't have any information, so we assume it changes the ICV. | |||
| 2368 | if (!ICVTrackingAA.isAssumedTracked()) | |||
| 2369 | return indicatePessimisticFixpoint(); | |||
| 2370 | ||||
| 2371 | Optional<Value *> NewReplVal = | |||
| 2372 | ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A); | |||
| 2373 | ||||
| 2374 | if (ReplVal == NewReplVal) | |||
| 2375 | return ChangeStatus::UNCHANGED; | |||
| 2376 | ||||
| 2377 | ReplVal = NewReplVal; | |||
| 2378 | return ChangeStatus::CHANGED; | |||
| 2379 | } | |||
| 2380 | ||||
| 2381 | // Return the value with which associated value can be replaced for specific | |||
| 2382 | // \p ICV. | |||
| 2383 | Optional<Value *> | |||
| 2384 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
| 2385 | return ReplVal; | |||
| 2386 | } | |||
| 2387 | }; | |||
| 2388 | ||||
| 2389 | struct AAICVTrackerCallSiteReturned : AAICVTracker { | |||
| 2390 | AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A) | |||
| 2391 | : AAICVTracker(IRP, A) {} | |||
| 2392 | ||||
| 2393 | // FIXME: come up with better string. | |||
| 2394 | const std::string getAsStr() const override { | |||
| 2395 | return "ICVTrackerCallSiteReturned"; | |||
| 2396 | } | |||
| 2397 | ||||
| 2398 | // FIXME: come up with some stats. | |||
| 2399 | void trackStatistics() const override {} | |||
| 2400 | ||||
| 2401 | /// We don't manifest anything for this AA. | |||
| 2402 | ChangeStatus manifest(Attributor &A) override { | |||
| 2403 | return ChangeStatus::UNCHANGED; | |||
| 2404 | } | |||
| 2405 | ||||
| 2406 | // Map of ICV to their values at specific program point. | |||
| 2407 | EnumeratedArray<Optional<Value *>, InternalControlVar, | |||
| 2408 | InternalControlVar::ICV___last> | |||
| 2409 | ICVReplacementValuesMap; | |||
| 2410 | ||||
| 2411 | /// Return the value with which associated value can be replaced for specific | |||
| 2412 | /// \p ICV. | |||
| 2413 | Optional<Value *> | |||
| 2414 | getUniqueReplacementValue(InternalControlVar ICV) const override { | |||
| 2415 | return ICVReplacementValuesMap[ICV]; | |||
| 2416 | } | |||
| 2417 | ||||
| 2418 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 2419 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
| 2420 | const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>( | |||
| 2421 | *this, IRPosition::returned(*getAssociatedFunction()), | |||
| 2422 | DepClassTy::REQUIRED); | |||
| 2423 | ||||
| 2424 | // We don't have any information, so we assume it changes the ICV. | |||
| 2425 | if (!ICVTrackingAA.isAssumedTracked()) | |||
| 2426 | return indicatePessimisticFixpoint(); | |||
| 2427 | ||||
| 2428 | for (InternalControlVar ICV : TrackableICVs) { | |||
| 2429 | Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV]; | |||
| 2430 | Optional<Value *> NewReplVal = | |||
| 2431 | ICVTrackingAA.getUniqueReplacementValue(ICV); | |||
| 2432 | ||||
| 2433 | if (ReplVal == NewReplVal) | |||
| 2434 | continue; | |||
| 2435 | ||||
| 2436 | ReplVal = NewReplVal; | |||
| 2437 | Changed = ChangeStatus::CHANGED; | |||
| 2438 | } | |||
| 2439 | return Changed; | |||
| 2440 | } | |||
| 2441 | }; | |||
| 2442 | ||||
| 2443 | struct AAExecutionDomainFunction : public AAExecutionDomain { | |||
| 2444 | AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A) | |||
| 2445 | : AAExecutionDomain(IRP, A) {} | |||
| 2446 | ||||
| 2447 | const std::string getAsStr() const override { | |||
| 2448 | return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) + | |||
| 2449 | "/" + std::to_string(NumBBs) + " BBs thread 0 only."; | |||
| 2450 | } | |||
| 2451 | ||||
| 2452 | /// See AbstractAttribute::trackStatistics(). | |||
| 2453 | void trackStatistics() const override {} | |||
| 2454 | ||||
| 2455 | void initialize(Attributor &A) override { | |||
| 2456 | Function *F = getAnchorScope(); | |||
| 2457 | for (const auto &BB : *F) | |||
| 2458 | SingleThreadedBBs.insert(&BB); | |||
| 2459 | NumBBs = SingleThreadedBBs.size(); | |||
| 2460 | } | |||
| 2461 | ||||
| 2462 | ChangeStatus manifest(Attributor &A) override { | |||
| 2463 | LLVM_DEBUG({do { } while (false) | |||
| 2464 | for (const BasicBlock *BB : SingleThreadedBBs)do { } while (false) | |||
| 2465 | dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "do { } while (false) | |||
| 2466 | << BB->getName() << " is executed by a single thread.\n";do { } while (false) | |||
| 2467 | })do { } while (false); | |||
| 2468 | return ChangeStatus::UNCHANGED; | |||
| 2469 | } | |||
| 2470 | ||||
| 2471 | ChangeStatus updateImpl(Attributor &A) override; | |||
| 2472 | ||||
| 2473 | /// Check if an instruction is executed by a single thread. | |||
| 2474 | bool isExecutedByInitialThreadOnly(const Instruction &I) const override { | |||
| 2475 | return isExecutedByInitialThreadOnly(*I.getParent()); | |||
| 2476 | } | |||
| 2477 | ||||
| 2478 | bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override { | |||
| 2479 | return isValidState() && SingleThreadedBBs.contains(&BB); | |||
| 2480 | } | |||
| 2481 | ||||
| 2482 | /// Set of basic blocks that are executed by a single thread. | |||
| 2483 | DenseSet<const BasicBlock *> SingleThreadedBBs; | |||
| 2484 | ||||
| 2485 | /// Total number of basic blocks in this function. | |||
| 2486 | long unsigned NumBBs; | |||
| 2487 | }; | |||
| 2488 | ||||
| 2489 | ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) { | |||
| 2490 | Function *F = getAnchorScope(); | |||
| 2491 | ReversePostOrderTraversal<Function *> RPOT(F); | |||
| 2492 | auto NumSingleThreadedBBs = SingleThreadedBBs.size(); | |||
| 2493 | ||||
| 2494 | bool AllCallSitesKnown; | |||
| 2495 | auto PredForCallSite = [&](AbstractCallSite ACS) { | |||
| 2496 | const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( | |||
| 2497 | *this, IRPosition::function(*ACS.getInstruction()->getFunction()), | |||
| 2498 | DepClassTy::REQUIRED); | |||
| 2499 | return ACS.isDirectCall() && | |||
| 2500 | ExecutionDomainAA.isExecutedByInitialThreadOnly( | |||
| 2501 | *ACS.getInstruction()); | |||
| 2502 | }; | |||
| 2503 | ||||
| 2504 | if (!A.checkForAllCallSites(PredForCallSite, *this, | |||
| 2505 | /* RequiresAllCallSites */ true, | |||
| 2506 | AllCallSitesKnown)) | |||
| 2507 | SingleThreadedBBs.erase(&F->getEntryBlock()); | |||
| 2508 | ||||
| 2509 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2510 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | |||
| 2511 | ||||
| 2512 | // Check if the edge into the successor block compares the __kmpc_target_init | |||
| 2513 | // result with -1. If we are in non-SPMD-mode that signals only the main | |||
| 2514 | // thread will execute the edge. | |||
| 2515 | auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) { | |||
| 2516 | if (!Edge || !Edge->isConditional()) | |||
| 2517 | return false; | |||
| 2518 | if (Edge->getSuccessor(0) != SuccessorBB) | |||
| 2519 | return false; | |||
| 2520 | ||||
| 2521 | auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition()); | |||
| 2522 | if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality()) | |||
| 2523 | return false; | |||
| 2524 | ||||
| 2525 | ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1)); | |||
| 2526 | if (!C) | |||
| 2527 | return false; | |||
| 2528 | ||||
| 2529 | // Match: -1 == __kmpc_target_init (for non-SPMD kernels only!) | |||
| 2530 | if (C->isAllOnesValue()) { | |||
| 2531 | auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0)); | |||
| 2532 | CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr; | |||
| 2533 | if (!CB) | |||
| 2534 | return false; | |||
| 2535 | const int InitIsSPMDArgNo = 1; | |||
| 2536 | auto *IsSPMDModeCI = | |||
| 2537 | dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo)); | |||
| 2538 | return IsSPMDModeCI && IsSPMDModeCI->isZero(); | |||
| 2539 | } | |||
| 2540 | ||||
| 2541 | return false; | |||
| 2542 | }; | |||
| 2543 | ||||
| 2544 | // Merge all the predecessor states into the current basic block. A basic | |||
| 2545 | // block is executed by a single thread if all of its predecessors are. | |||
| 2546 | auto MergePredecessorStates = [&](BasicBlock *BB) { | |||
| 2547 | if (pred_begin(BB) == pred_end(BB)) | |||
| 2548 | return SingleThreadedBBs.contains(BB); | |||
| 2549 | ||||
| 2550 | bool IsInitialThread = true; | |||
| 2551 | for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB); | |||
| 2552 | PredBB != PredEndBB; ++PredBB) { | |||
| 2553 | if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()), | |||
| 2554 | BB)) | |||
| 2555 | IsInitialThread &= SingleThreadedBBs.contains(*PredBB); | |||
| 2556 | } | |||
| 2557 | ||||
| 2558 | return IsInitialThread; | |||
| 2559 | }; | |||
| 2560 | ||||
| 2561 | for (auto *BB : RPOT) { | |||
| 2562 | if (!MergePredecessorStates(BB)) | |||
| 2563 | SingleThreadedBBs.erase(BB); | |||
| 2564 | } | |||
| 2565 | ||||
| 2566 | return (NumSingleThreadedBBs == SingleThreadedBBs.size()) | |||
| 2567 | ? ChangeStatus::UNCHANGED | |||
| 2568 | : ChangeStatus::CHANGED; | |||
| 2569 | } | |||
| 2570 | ||||
| 2571 | /// Try to replace memory allocation calls called by a single thread with a | |||
| 2572 | /// static buffer of shared memory. | |||
| 2573 | struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> { | |||
| 2574 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | |||
| 2575 | AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
| 2576 | ||||
| 2577 | /// Create an abstract attribute view for the position \p IRP. | |||
| 2578 | static AAHeapToShared &createForPosition(const IRPosition &IRP, | |||
| 2579 | Attributor &A); | |||
| 2580 | ||||
| 2581 | /// Returns true if HeapToShared conversion is assumed to be possible. | |||
| 2582 | virtual bool isAssumedHeapToShared(CallBase &CB) const = 0; | |||
| 2583 | ||||
| 2584 | /// Returns true if HeapToShared conversion is assumed and the CB is a | |||
| 2585 | /// callsite to a free operation to be removed. | |||
| 2586 | virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0; | |||
| 2587 | ||||
| 2588 | /// See AbstractAttribute::getName(). | |||
| 2589 | const std::string getName() const override { return "AAHeapToShared"; } | |||
| 2590 | ||||
| 2591 | /// See AbstractAttribute::getIdAddr(). | |||
| 2592 | const char *getIdAddr() const override { return &ID; } | |||
| 2593 | ||||
| 2594 | /// This function should return true if the type of the \p AA is | |||
| 2595 | /// AAHeapToShared. | |||
| 2596 | static bool classof(const AbstractAttribute *AA) { | |||
| 2597 | return (AA->getIdAddr() == &ID); | |||
| 2598 | } | |||
| 2599 | ||||
| 2600 | /// Unique ID (due to the unique address) | |||
| 2601 | static const char ID; | |||
| 2602 | }; | |||
| 2603 | ||||
| 2604 | struct AAHeapToSharedFunction : public AAHeapToShared { | |||
| 2605 | AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A) | |||
| 2606 | : AAHeapToShared(IRP, A) {} | |||
| 2607 | ||||
| 2608 | const std::string getAsStr() const override { | |||
| 2609 | return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) + | |||
| 2610 | " malloc calls eligible."; | |||
| 2611 | } | |||
| 2612 | ||||
| 2613 | /// See AbstractAttribute::trackStatistics(). | |||
| 2614 | void trackStatistics() const override {} | |||
| 2615 | ||||
| 2616 | /// This functions finds free calls that will be removed by the | |||
| 2617 | /// HeapToShared transformation. | |||
| 2618 | void findPotentialRemovedFreeCalls(Attributor &A) { | |||
| 2619 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2620 | auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; | |||
| 2621 | ||||
| 2622 | PotentialRemovedFreeCalls.clear(); | |||
| 2623 | // Update free call users of found malloc calls. | |||
| 2624 | for (CallBase *CB : MallocCalls) { | |||
| 2625 | SmallVector<CallBase *, 4> FreeCalls; | |||
| 2626 | for (auto *U : CB->users()) { | |||
| 2627 | CallBase *C = dyn_cast<CallBase>(U); | |||
| 2628 | if (C && C->getCalledFunction() == FreeRFI.Declaration) | |||
| 2629 | FreeCalls.push_back(C); | |||
| 2630 | } | |||
| 2631 | ||||
| 2632 | if (FreeCalls.size() != 1) | |||
| 2633 | continue; | |||
| 2634 | ||||
| 2635 | PotentialRemovedFreeCalls.insert(FreeCalls.front()); | |||
| 2636 | } | |||
| 2637 | } | |||
| 2638 | ||||
| 2639 | void initialize(Attributor &A) override { | |||
| 2640 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2641 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
| 2642 | ||||
| 2643 | for (User *U : RFI.Declaration->users()) | |||
| 2644 | if (CallBase *CB = dyn_cast<CallBase>(U)) | |||
| 2645 | MallocCalls.insert(CB); | |||
| 2646 | ||||
| 2647 | findPotentialRemovedFreeCalls(A); | |||
| 2648 | } | |||
| 2649 | ||||
| 2650 | bool isAssumedHeapToShared(CallBase &CB) const override { | |||
| 2651 | return isValidState() && MallocCalls.count(&CB); | |||
| 2652 | } | |||
| 2653 | ||||
| 2654 | bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override { | |||
| 2655 | return isValidState() && PotentialRemovedFreeCalls.count(&CB); | |||
| 2656 | } | |||
| 2657 | ||||
| 2658 | ChangeStatus manifest(Attributor &A) override { | |||
| 2659 | if (MallocCalls.empty()) | |||
| 2660 | return ChangeStatus::UNCHANGED; | |||
| 2661 | ||||
| 2662 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2663 | auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared]; | |||
| 2664 | ||||
| 2665 | Function *F = getAnchorScope(); | |||
| 2666 | auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this, | |||
| 2667 | DepClassTy::OPTIONAL); | |||
| 2668 | ||||
| 2669 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
| 2670 | for (CallBase *CB : MallocCalls) { | |||
| 2671 | // Skip replacing this if HeapToStack has already claimed it. | |||
| 2672 | if (HS && HS->isAssumedHeapToStack(*CB)) | |||
| 2673 | continue; | |||
| 2674 | ||||
| 2675 | // Find the unique free call to remove it. | |||
| 2676 | SmallVector<CallBase *, 4> FreeCalls; | |||
| 2677 | for (auto *U : CB->users()) { | |||
| 2678 | CallBase *C = dyn_cast<CallBase>(U); | |||
| 2679 | if (C && C->getCalledFunction() == FreeCall.Declaration) | |||
| 2680 | FreeCalls.push_back(C); | |||
| 2681 | } | |||
| 2682 | if (FreeCalls.size() != 1) | |||
| 2683 | continue; | |||
| 2684 | ||||
| 2685 | ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0)); | |||
| 2686 | ||||
| 2687 | LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "do { } while (false) | |||
| 2688 | << CB->getCaller()->getName() << " with "do { } while (false) | |||
| 2689 | << AllocSize->getZExtValue()do { } while (false) | |||
| 2690 | << " bytes of shared memory\n")do { } while (false); | |||
| 2691 | ||||
| 2692 | // Create a new shared memory buffer of the same size as the allocation | |||
| 2693 | // and replace all the uses of the original allocation with it. | |||
| 2694 | Module *M = CB->getModule(); | |||
| 2695 | Type *Int8Ty = Type::getInt8Ty(M->getContext()); | |||
| 2696 | Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue()); | |||
| 2697 | auto *SharedMem = new GlobalVariable( | |||
| 2698 | *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage, | |||
| 2699 | UndefValue::get(Int8ArrTy), CB->getName(), nullptr, | |||
| 2700 | GlobalValue::NotThreadLocal, | |||
| 2701 | static_cast<unsigned>(AddressSpace::Shared)); | |||
| 2702 | auto *NewBuffer = | |||
| 2703 | ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo()); | |||
| 2704 | ||||
| 2705 | auto Remark = [&](OptimizationRemark OR) { | |||
| 2706 | return OR << "Replaced globalized variable with " | |||
| 2707 | << ore::NV("SharedMemory", AllocSize->getZExtValue()) | |||
| 2708 | << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ") | |||
| 2709 | << "of shared memory."; | |||
| 2710 | }; | |||
| 2711 | A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark); | |||
| 2712 | ||||
| 2713 | SharedMem->setAlignment(MaybeAlign(32)); | |||
| 2714 | ||||
| 2715 | A.changeValueAfterManifest(*CB, *NewBuffer); | |||
| 2716 | A.deleteAfterManifest(*CB); | |||
| 2717 | A.deleteAfterManifest(*FreeCalls.front()); | |||
| 2718 | ||||
| 2719 | NumBytesMovedToSharedMemory += AllocSize->getZExtValue(); | |||
| 2720 | Changed = ChangeStatus::CHANGED; | |||
| 2721 | } | |||
| 2722 | ||||
| 2723 | return Changed; | |||
| 2724 | } | |||
| 2725 | ||||
| 2726 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 2727 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2728 | auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
| 2729 | Function *F = getAnchorScope(); | |||
| 2730 | ||||
| 2731 | auto NumMallocCalls = MallocCalls.size(); | |||
| 2732 | ||||
| 2733 | // Only consider malloc calls executed by a single thread with a constant. | |||
| 2734 | for (User *U : RFI.Declaration->users()) { | |||
| 2735 | const auto &ED = A.getAAFor<AAExecutionDomain>( | |||
| ||||
| 2736 | *this, IRPosition::function(*F), DepClassTy::REQUIRED); | |||
| 2737 | if (CallBase *CB = dyn_cast<CallBase>(U)) | |||
| 2738 | if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) || | |||
| 2739 | !ED.isExecutedByInitialThreadOnly(*CB)) | |||
| 2740 | MallocCalls.erase(CB); | |||
| 2741 | } | |||
| 2742 | ||||
| 2743 | findPotentialRemovedFreeCalls(A); | |||
| 2744 | ||||
| 2745 | if (NumMallocCalls != MallocCalls.size()) | |||
| 2746 | return ChangeStatus::CHANGED; | |||
| 2747 | ||||
| 2748 | return ChangeStatus::UNCHANGED; | |||
| 2749 | } | |||
| 2750 | ||||
| 2751 | /// Collection of all malloc calls in a function. | |||
| 2752 | SmallPtrSet<CallBase *, 4> MallocCalls; | |||
| 2753 | /// Collection of potentially removed free calls in a function. | |||
| 2754 | SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls; | |||
| 2755 | }; | |||
| 2756 | ||||
| 2757 | struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> { | |||
| 2758 | using Base = StateWrapper<KernelInfoState, AbstractAttribute>; | |||
| 2759 | AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
| 2760 | ||||
| 2761 | /// Statistics are tracked as part of manifest for now. | |||
| 2762 | void trackStatistics() const override {} | |||
| 2763 | ||||
| 2764 | /// See AbstractAttribute::getAsStr() | |||
| 2765 | const std::string getAsStr() const override { | |||
| 2766 | if (!isValidState()) | |||
| 2767 | return "<invalid>"; | |||
| 2768 | return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD" | |||
| 2769 | : "generic") + | |||
| 2770 | std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]" | |||
| 2771 | : "") + | |||
| 2772 | std::string(" #PRs: ") + | |||
| 2773 | std::to_string(ReachedKnownParallelRegions.size()) + | |||
| 2774 | ", #Unknown PRs: " + | |||
| 2775 | std::to_string(ReachedUnknownParallelRegions.size()); | |||
| 2776 | } | |||
| 2777 | ||||
| 2778 | /// Create an abstract attribute biew for the position \p IRP. | |||
| 2779 | static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A); | |||
| 2780 | ||||
| 2781 | /// See AbstractAttribute::getName() | |||
| 2782 | const std::string getName() const override { return "AAKernelInfo"; } | |||
| 2783 | ||||
| 2784 | /// See AbstractAttribute::getIdAddr() | |||
| 2785 | const char *getIdAddr() const override { return &ID; } | |||
| 2786 | ||||
| 2787 | /// This function should return true if the type of the \p AA is AAKernelInfo | |||
| 2788 | static bool classof(const AbstractAttribute *AA) { | |||
| 2789 | return (AA->getIdAddr() == &ID); | |||
| 2790 | } | |||
| 2791 | ||||
| 2792 | static const char ID; | |||
| 2793 | }; | |||
| 2794 | ||||
| 2795 | /// The function kernel info abstract attribute, basically, what can we say | |||
| 2796 | /// about a function with regards to the KernelInfoState. | |||
| 2797 | struct AAKernelInfoFunction : AAKernelInfo { | |||
| 2798 | AAKernelInfoFunction(const IRPosition &IRP, Attributor &A) | |||
| 2799 | : AAKernelInfo(IRP, A) {} | |||
| 2800 | ||||
| 2801 | /// See AbstractAttribute::initialize(...). | |||
| 2802 | void initialize(Attributor &A) override { | |||
| 2803 | // This is a high-level transform that might change the constant arguments | |||
| 2804 | // of the init and dinit calls. We need to tell the Attributor about this | |||
| 2805 | // to avoid other parts using the current constant value for simpliication. | |||
| 2806 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2807 | ||||
| 2808 | Function *Fn = getAnchorScope(); | |||
| 2809 | if (!OMPInfoCache.Kernels.count(Fn)) | |||
| 2810 | return; | |||
| 2811 | ||||
| 2812 | // Add itself to the reaching kernel and set IsKernelEntry. | |||
| 2813 | ReachingKernelEntries.insert(Fn); | |||
| 2814 | IsKernelEntry = true; | |||
| 2815 | ||||
| 2816 | OMPInformationCache::RuntimeFunctionInfo &InitRFI = | |||
| 2817 | OMPInfoCache.RFIs[OMPRTL___kmpc_target_init]; | |||
| 2818 | OMPInformationCache::RuntimeFunctionInfo &DeinitRFI = | |||
| 2819 | OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit]; | |||
| 2820 | ||||
| 2821 | // For kernels we perform more initialization work, first we find the init | |||
| 2822 | // and deinit calls. | |||
| 2823 | auto StoreCallBase = [](Use &U, | |||
| 2824 | OMPInformationCache::RuntimeFunctionInfo &RFI, | |||
| 2825 | CallBase *&Storage) { | |||
| 2826 | CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI); | |||
| 2827 | assert(CB &&((void)0) | |||
| 2828 | "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!")((void)0); | |||
| 2829 | assert(!Storage &&((void)0) | |||
| 2830 | "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!")((void)0); | |||
| 2831 | Storage = CB; | |||
| 2832 | return false; | |||
| 2833 | }; | |||
| 2834 | InitRFI.foreachUse( | |||
| 2835 | [&](Use &U, Function &) { | |||
| 2836 | StoreCallBase(U, InitRFI, KernelInitCB); | |||
| 2837 | return false; | |||
| 2838 | }, | |||
| 2839 | Fn); | |||
| 2840 | DeinitRFI.foreachUse( | |||
| 2841 | [&](Use &U, Function &) { | |||
| 2842 | StoreCallBase(U, DeinitRFI, KernelDeinitCB); | |||
| 2843 | return false; | |||
| 2844 | }, | |||
| 2845 | Fn); | |||
| 2846 | ||||
| 2847 | assert((KernelInitCB && KernelDeinitCB) &&((void)0) | |||
| 2848 | "Kernel without __kmpc_target_init or __kmpc_target_deinit!")((void)0); | |||
| 2849 | ||||
| 2850 | // For kernels we might need to initialize/finalize the IsSPMD state and | |||
| 2851 | // we need to register a simplification callback so that the Attributor | |||
| 2852 | // knows the constant arguments to __kmpc_target_init and | |||
| 2853 | // __kmpc_target_deinit might actually change. | |||
| 2854 | ||||
| 2855 | Attributor::SimplifictionCallbackTy StateMachineSimplifyCB = | |||
| 2856 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
| 2857 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
| 2858 | // IRP represents the "use generic state machine" argument of an | |||
| 2859 | // __kmpc_target_init call. We will answer this one with the internal | |||
| 2860 | // state. As long as we are not in an invalid state, we will create a | |||
| 2861 | // custom state machine so the value should be a `i1 false`. If we are | |||
| 2862 | // in an invalid state, we won't change the value that is in the IR. | |||
| 2863 | if (!isValidState()) | |||
| 2864 | return nullptr; | |||
| 2865 | if (AA) | |||
| 2866 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
| 2867 | UsedAssumedInformation = !isAtFixpoint(); | |||
| 2868 | auto *FalseVal = | |||
| 2869 | ConstantInt::getBool(IRP.getAnchorValue().getContext(), 0); | |||
| 2870 | return FalseVal; | |||
| 2871 | }; | |||
| 2872 | ||||
| 2873 | Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB = | |||
| 2874 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
| 2875 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
| 2876 | // IRP represents the "SPMDCompatibilityTracker" argument of an | |||
| 2877 | // __kmpc_target_init or | |||
| 2878 | // __kmpc_target_deinit call. We will answer this one with the internal | |||
| 2879 | // state. | |||
| 2880 | if (!SPMDCompatibilityTracker.isValidState()) | |||
| 2881 | return nullptr; | |||
| 2882 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | |||
| 2883 | if (AA) | |||
| 2884 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
| 2885 | UsedAssumedInformation = true; | |||
| 2886 | } else { | |||
| 2887 | UsedAssumedInformation = false; | |||
| 2888 | } | |||
| 2889 | auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(), | |||
| 2890 | SPMDCompatibilityTracker.isAssumed()); | |||
| 2891 | return Val; | |||
| 2892 | }; | |||
| 2893 | ||||
| 2894 | Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB = | |||
| 2895 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
| 2896 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
| 2897 | // IRP represents the "RequiresFullRuntime" argument of an | |||
| 2898 | // __kmpc_target_init or __kmpc_target_deinit call. We will answer this | |||
| 2899 | // one with the internal state of the SPMDCompatibilityTracker, so if | |||
| 2900 | // generic then true, if SPMD then false. | |||
| 2901 | if (!SPMDCompatibilityTracker.isValidState()) | |||
| 2902 | return nullptr; | |||
| 2903 | if (!SPMDCompatibilityTracker.isAtFixpoint()) { | |||
| 2904 | if (AA) | |||
| 2905 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
| 2906 | UsedAssumedInformation = true; | |||
| 2907 | } else { | |||
| 2908 | UsedAssumedInformation = false; | |||
| 2909 | } | |||
| 2910 | auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(), | |||
| 2911 | !SPMDCompatibilityTracker.isAssumed()); | |||
| 2912 | return Val; | |||
| 2913 | }; | |||
| 2914 | ||||
| 2915 | constexpr const int InitIsSPMDArgNo = 1; | |||
| 2916 | constexpr const int DeinitIsSPMDArgNo = 1; | |||
| 2917 | constexpr const int InitUseStateMachineArgNo = 2; | |||
| 2918 | constexpr const int InitRequiresFullRuntimeArgNo = 3; | |||
| 2919 | constexpr const int DeinitRequiresFullRuntimeArgNo = 2; | |||
| 2920 | A.registerSimplificationCallback( | |||
| 2921 | IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo), | |||
| 2922 | StateMachineSimplifyCB); | |||
| 2923 | A.registerSimplificationCallback( | |||
| 2924 | IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo), | |||
| 2925 | IsSPMDModeSimplifyCB); | |||
| 2926 | A.registerSimplificationCallback( | |||
| 2927 | IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo), | |||
| 2928 | IsSPMDModeSimplifyCB); | |||
| 2929 | A.registerSimplificationCallback( | |||
| 2930 | IRPosition::callsite_argument(*KernelInitCB, | |||
| 2931 | InitRequiresFullRuntimeArgNo), | |||
| 2932 | IsGenericModeSimplifyCB); | |||
| 2933 | A.registerSimplificationCallback( | |||
| 2934 | IRPosition::callsite_argument(*KernelDeinitCB, | |||
| 2935 | DeinitRequiresFullRuntimeArgNo), | |||
| 2936 | IsGenericModeSimplifyCB); | |||
| 2937 | ||||
| 2938 | // Check if we know we are in SPMD-mode already. | |||
| 2939 | ConstantInt *IsSPMDArg = | |||
| 2940 | dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo)); | |||
| 2941 | if (IsSPMDArg && !IsSPMDArg->isZero()) | |||
| 2942 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
| 2943 | } | |||
| 2944 | ||||
| 2945 | /// Modify the IR based on the KernelInfoState as the fixpoint iteration is | |||
| 2946 | /// finished now. | |||
| 2947 | ChangeStatus manifest(Attributor &A) override { | |||
| 2948 | // If we are not looking at a kernel with __kmpc_target_init and | |||
| 2949 | // __kmpc_target_deinit call we cannot actually manifest the information. | |||
| 2950 | if (!KernelInitCB || !KernelDeinitCB) | |||
| 2951 | return ChangeStatus::UNCHANGED; | |||
| 2952 | ||||
| 2953 | // Known SPMD-mode kernels need no manifest changes. | |||
| 2954 | if (SPMDCompatibilityTracker.isKnown()) | |||
| 2955 | return ChangeStatus::UNCHANGED; | |||
| 2956 | ||||
| 2957 | // If we can we change the execution mode to SPMD-mode otherwise we build a | |||
| 2958 | // custom state machine. | |||
| 2959 | if (!changeToSPMDMode(A)) | |||
| 2960 | buildCustomStateMachine(A); | |||
| 2961 | ||||
| 2962 | return ChangeStatus::CHANGED; | |||
| 2963 | } | |||
| 2964 | ||||
| 2965 | bool changeToSPMDMode(Attributor &A) { | |||
| 2966 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 2967 | ||||
| 2968 | if (!SPMDCompatibilityTracker.isAssumed()) { | |||
| 2969 | for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) { | |||
| 2970 | if (!NonCompatibleI) | |||
| 2971 | continue; | |||
| 2972 | ||||
| 2973 | // Skip diagnostics on calls to known OpenMP runtime functions for now. | |||
| 2974 | if (auto *CB = dyn_cast<CallBase>(NonCompatibleI)) | |||
| 2975 | if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction())) | |||
| 2976 | continue; | |||
| 2977 | ||||
| 2978 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 2979 | ORA << "Value has potential side effects preventing SPMD-mode " | |||
| 2980 | "execution"; | |||
| 2981 | if (isa<CallBase>(NonCompatibleI)) { | |||
| 2982 | ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to " | |||
| 2983 | "the called function to override"; | |||
| 2984 | } | |||
| 2985 | return ORA << "."; | |||
| 2986 | }; | |||
| 2987 | A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121", | |||
| 2988 | Remark); | |||
| 2989 | ||||
| 2990 | LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "do { } while (false) | |||
| 2991 | << *NonCompatibleI << "\n")do { } while (false); | |||
| 2992 | } | |||
| 2993 | ||||
| 2994 | return false; | |||
| 2995 | } | |||
| 2996 | ||||
| 2997 | // Adjust the global exec mode flag that tells the runtime what mode this | |||
| 2998 | // kernel is executed in. | |||
| 2999 | Function *Kernel = getAnchorScope(); | |||
| 3000 | GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable( | |||
| 3001 | (Kernel->getName() + "_exec_mode").str()); | |||
| 3002 | assert(ExecMode && "Kernel without exec mode?")((void)0); | |||
| 3003 | assert(ExecMode->getInitializer() &&((void)0) | |||
| 3004 | ExecMode->getInitializer()->isOneValue() &&((void)0) | |||
| 3005 | "Initially non-SPMD kernel has SPMD exec mode!")((void)0); | |||
| 3006 | ||||
| 3007 | // Set the global exec mode flag to indicate SPMD-Generic mode. | |||
| 3008 | constexpr int SPMDGeneric = 2; | |||
| 3009 | if (!ExecMode->getInitializer()->isZeroValue()) | |||
| 3010 | ExecMode->setInitializer( | |||
| 3011 | ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric)); | |||
| 3012 | ||||
| 3013 | // Next rewrite the init and deinit calls to indicate we use SPMD-mode now. | |||
| 3014 | const int InitIsSPMDArgNo = 1; | |||
| 3015 | const int DeinitIsSPMDArgNo = 1; | |||
| 3016 | const int InitUseStateMachineArgNo = 2; | |||
| 3017 | const int InitRequiresFullRuntimeArgNo = 3; | |||
| 3018 | const int DeinitRequiresFullRuntimeArgNo = 2; | |||
| 3019 | ||||
| 3020 | auto &Ctx = getAnchorValue().getContext(); | |||
| 3021 | A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo), | |||
| 3022 | *ConstantInt::getBool(Ctx, 1)); | |||
| 3023 | A.changeUseAfterManifest( | |||
| 3024 | KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), | |||
| 3025 | *ConstantInt::getBool(Ctx, 0)); | |||
| 3026 | A.changeUseAfterManifest( | |||
| 3027 | KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo), | |||
| 3028 | *ConstantInt::getBool(Ctx, 1)); | |||
| 3029 | A.changeUseAfterManifest( | |||
| 3030 | KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo), | |||
| 3031 | *ConstantInt::getBool(Ctx, 0)); | |||
| 3032 | A.changeUseAfterManifest( | |||
| 3033 | KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo), | |||
| 3034 | *ConstantInt::getBool(Ctx, 0)); | |||
| 3035 | ||||
| 3036 | ++NumOpenMPTargetRegionKernelsSPMD; | |||
| 3037 | ||||
| 3038 | auto Remark = [&](OptimizationRemark OR) { | |||
| 3039 | return OR << "Transformed generic-mode kernel to SPMD-mode."; | |||
| 3040 | }; | |||
| 3041 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark); | |||
| 3042 | return true; | |||
| 3043 | }; | |||
| 3044 | ||||
| 3045 | ChangeStatus buildCustomStateMachine(Attributor &A) { | |||
| 3046 | assert(ReachedKnownParallelRegions.isValidState() &&((void)0) | |||
| 3047 | "Custom state machine with invalid parallel region states?")((void)0); | |||
| 3048 | ||||
| 3049 | const int InitIsSPMDArgNo = 1; | |||
| 3050 | const int InitUseStateMachineArgNo = 2; | |||
| 3051 | ||||
| 3052 | // Check if the current configuration is non-SPMD and generic state machine. | |||
| 3053 | // If we already have SPMD mode or a custom state machine we do not need to | |||
| 3054 | // go any further. If it is anything but a constant something is weird and | |||
| 3055 | // we give up. | |||
| 3056 | ConstantInt *UseStateMachine = dyn_cast<ConstantInt>( | |||
| 3057 | KernelInitCB->getArgOperand(InitUseStateMachineArgNo)); | |||
| 3058 | ConstantInt *IsSPMD = | |||
| 3059 | dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo)); | |||
| 3060 | ||||
| 3061 | // If we are stuck with generic mode, try to create a custom device (=GPU) | |||
| 3062 | // state machine which is specialized for the parallel regions that are | |||
| 3063 | // reachable by the kernel. | |||
| 3064 | if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD || | |||
| 3065 | !IsSPMD->isZero()) | |||
| 3066 | return ChangeStatus::UNCHANGED; | |||
| 3067 | ||||
| 3068 | // If not SPMD mode, indicate we use a custom state machine now. | |||
| 3069 | auto &Ctx = getAnchorValue().getContext(); | |||
| 3070 | auto *FalseVal = ConstantInt::getBool(Ctx, 0); | |||
| 3071 | A.changeUseAfterManifest( | |||
| 3072 | KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal); | |||
| 3073 | ||||
| 3074 | // If we don't actually need a state machine we are done here. This can | |||
| 3075 | // happen if there simply are no parallel regions. In the resulting kernel | |||
| 3076 | // all worker threads will simply exit right away, leaving the main thread | |||
| 3077 | // to do the work alone. | |||
| 3078 | if (ReachedKnownParallelRegions.empty() && | |||
| 3079 | ReachedUnknownParallelRegions.empty()) { | |||
| 3080 | ++NumOpenMPTargetRegionKernelsWithoutStateMachine; | |||
| 3081 | ||||
| 3082 | auto Remark = [&](OptimizationRemark OR) { | |||
| 3083 | return OR << "Removing unused state machine from generic-mode kernel."; | |||
| 3084 | }; | |||
| 3085 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark); | |||
| 3086 | ||||
| 3087 | return ChangeStatus::CHANGED; | |||
| 3088 | } | |||
| 3089 | ||||
| 3090 | // Keep track in the statistics of our new shiny custom state machine. | |||
| 3091 | if (ReachedUnknownParallelRegions.empty()) { | |||
| 3092 | ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback; | |||
| 3093 | ||||
| 3094 | auto Remark = [&](OptimizationRemark OR) { | |||
| 3095 | return OR << "Rewriting generic-mode kernel with a customized state " | |||
| 3096 | "machine."; | |||
| 3097 | }; | |||
| 3098 | A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark); | |||
| 3099 | } else { | |||
| 3100 | ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback; | |||
| 3101 | ||||
| 3102 | auto Remark = [&](OptimizationRemarkAnalysis OR) { | |||
| 3103 | return OR << "Generic-mode kernel is executed with a customized state " | |||
| 3104 | "machine that requires a fallback."; | |||
| 3105 | }; | |||
| 3106 | A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark); | |||
| 3107 | ||||
| 3108 | // Tell the user why we ended up with a fallback. | |||
| 3109 | for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) { | |||
| 3110 | if (!UnknownParallelRegionCB) | |||
| 3111 | continue; | |||
| 3112 | auto Remark = [&](OptimizationRemarkAnalysis ORA) { | |||
| 3113 | return ORA << "Call may contain unknown parallel regions. Use " | |||
| 3114 | << "`__attribute__((assume(\"omp_no_parallelism\")))` to " | |||
| 3115 | "override."; | |||
| 3116 | }; | |||
| 3117 | A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB, | |||
| 3118 | "OMP133", Remark); | |||
| 3119 | } | |||
| 3120 | } | |||
| 3121 | ||||
| 3122 | // Create all the blocks: | |||
| 3123 | // | |||
| 3124 | // InitCB = __kmpc_target_init(...) | |||
| 3125 | // bool IsWorker = InitCB >= 0; | |||
| 3126 | // if (IsWorker) { | |||
| 3127 | // SMBeginBB: __kmpc_barrier_simple_spmd(...); | |||
| 3128 | // void *WorkFn; | |||
| 3129 | // bool Active = __kmpc_kernel_parallel(&WorkFn); | |||
| 3130 | // if (!WorkFn) return; | |||
| 3131 | // SMIsActiveCheckBB: if (Active) { | |||
| 3132 | // SMIfCascadeCurrentBB: if (WorkFn == <ParFn0>) | |||
| 3133 | // ParFn0(...); | |||
| 3134 | // SMIfCascadeCurrentBB: else if (WorkFn == <ParFn1>) | |||
| 3135 | // ParFn1(...); | |||
| 3136 | // ... | |||
| 3137 | // SMIfCascadeCurrentBB: else | |||
| 3138 | // ((WorkFnTy*)WorkFn)(...); | |||
| 3139 | // SMEndParallelBB: __kmpc_kernel_end_parallel(...); | |||
| 3140 | // } | |||
| 3141 | // SMDoneBB: __kmpc_barrier_simple_spmd(...); | |||
| 3142 | // goto SMBeginBB; | |||
| 3143 | // } | |||
| 3144 | // UserCodeEntryBB: // user code | |||
| 3145 | // __kmpc_target_deinit(...) | |||
| 3146 | // | |||
| 3147 | Function *Kernel = getAssociatedFunction(); | |||
| 3148 | assert(Kernel && "Expected an associated function!")((void)0); | |||
| 3149 | ||||
| 3150 | BasicBlock *InitBB = KernelInitCB->getParent(); | |||
| 3151 | BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock( | |||
| 3152 | KernelInitCB->getNextNode(), "thread.user_code.check"); | |||
| 3153 | BasicBlock *StateMachineBeginBB = BasicBlock::Create( | |||
| 3154 | Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB); | |||
| 3155 | BasicBlock *StateMachineFinishedBB = BasicBlock::Create( | |||
| 3156 | Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB); | |||
| 3157 | BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create( | |||
| 3158 | Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB); | |||
| 3159 | BasicBlock *StateMachineIfCascadeCurrentBB = | |||
| 3160 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", | |||
| 3161 | Kernel, UserCodeEntryBB); | |||
| 3162 | BasicBlock *StateMachineEndParallelBB = | |||
| 3163 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end", | |||
| 3164 | Kernel, UserCodeEntryBB); | |||
| 3165 | BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create( | |||
| 3166 | Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB); | |||
| 3167 | A.registerManifestAddedBasicBlock(*InitBB); | |||
| 3168 | A.registerManifestAddedBasicBlock(*UserCodeEntryBB); | |||
| 3169 | A.registerManifestAddedBasicBlock(*StateMachineBeginBB); | |||
| 3170 | A.registerManifestAddedBasicBlock(*StateMachineFinishedBB); | |||
| 3171 | A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB); | |||
| 3172 | A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB); | |||
| 3173 | A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB); | |||
| 3174 | A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB); | |||
| 3175 | ||||
| 3176 | const DebugLoc &DLoc = KernelInitCB->getDebugLoc(); | |||
| 3177 | ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc); | |||
| 3178 | ||||
| 3179 | InitBB->getTerminator()->eraseFromParent(); | |||
| 3180 | Instruction *IsWorker = | |||
| 3181 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB, | |||
| 3182 | ConstantInt::get(KernelInitCB->getType(), -1), | |||
| 3183 | "thread.is_worker", InitBB); | |||
| 3184 | IsWorker->setDebugLoc(DLoc); | |||
| 3185 | BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB); | |||
| 3186 | ||||
| 3187 | Module &M = *Kernel->getParent(); | |||
| 3188 | ||||
| 3189 | // Create local storage for the work function pointer. | |||
| 3190 | const DataLayout &DL = M.getDataLayout(); | |||
| 3191 | Type *VoidPtrTy = Type::getInt8PtrTy(Ctx); | |||
| 3192 | Instruction *WorkFnAI = | |||
| 3193 | new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr, | |||
| 3194 | "worker.work_fn.addr", &Kernel->getEntryBlock().front()); | |||
| 3195 | WorkFnAI->setDebugLoc(DLoc); | |||
| 3196 | ||||
| 3197 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 3198 | OMPInfoCache.OMPBuilder.updateToLocation( | |||
| 3199 | OpenMPIRBuilder::LocationDescription( | |||
| 3200 | IRBuilder<>::InsertPoint(StateMachineBeginBB, | |||
| 3201 | StateMachineBeginBB->end()), | |||
| 3202 | DLoc)); | |||
| 3203 | ||||
| 3204 | Value *Ident = KernelInitCB->getArgOperand(0); | |||
| 3205 | Value *GTid = KernelInitCB; | |||
| 3206 | ||||
| 3207 | FunctionCallee BarrierFn = | |||
| 3208 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
| 3209 | M, OMPRTL___kmpc_barrier_simple_spmd); | |||
| 3210 | CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB) | |||
| 3211 | ->setDebugLoc(DLoc); | |||
| 3212 | ||||
| 3213 | if (WorkFnAI->getType()->getPointerAddressSpace() != | |||
| 3214 | (unsigned int)AddressSpace::Generic) { | |||
| 3215 | WorkFnAI = new AddrSpaceCastInst( | |||
| 3216 | WorkFnAI, | |||
| 3217 | PointerType::getWithSamePointeeType( | |||
| 3218 | cast<PointerType>(WorkFnAI->getType()), | |||
| 3219 | (unsigned int)AddressSpace::Generic), | |||
| 3220 | WorkFnAI->getName() + ".generic", StateMachineBeginBB); | |||
| 3221 | WorkFnAI->setDebugLoc(DLoc); | |||
| 3222 | } | |||
| 3223 | ||||
| 3224 | FunctionCallee KernelParallelFn = | |||
| 3225 | OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
| 3226 | M, OMPRTL___kmpc_kernel_parallel); | |||
| 3227 | Instruction *IsActiveWorker = CallInst::Create( | |||
| 3228 | KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB); | |||
| 3229 | IsActiveWorker->setDebugLoc(DLoc); | |||
| 3230 | Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn", | |||
| 3231 | StateMachineBeginBB); | |||
| 3232 | WorkFn->setDebugLoc(DLoc); | |||
| 3233 | ||||
| 3234 | FunctionType *ParallelRegionFnTy = FunctionType::get( | |||
| 3235 | Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)}, | |||
| 3236 | false); | |||
| 3237 | Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast( | |||
| 3238 | WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast", | |||
| 3239 | StateMachineBeginBB); | |||
| 3240 | ||||
| 3241 | Instruction *IsDone = | |||
| 3242 | ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn, | |||
| 3243 | Constant::getNullValue(VoidPtrTy), "worker.is_done", | |||
| 3244 | StateMachineBeginBB); | |||
| 3245 | IsDone->setDebugLoc(DLoc); | |||
| 3246 | BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB, | |||
| 3247 | IsDone, StateMachineBeginBB) | |||
| 3248 | ->setDebugLoc(DLoc); | |||
| 3249 | ||||
| 3250 | BranchInst::Create(StateMachineIfCascadeCurrentBB, | |||
| 3251 | StateMachineDoneBarrierBB, IsActiveWorker, | |||
| 3252 | StateMachineIsActiveCheckBB) | |||
| 3253 | ->setDebugLoc(DLoc); | |||
| 3254 | ||||
| 3255 | Value *ZeroArg = | |||
| 3256 | Constant::getNullValue(ParallelRegionFnTy->getParamType(0)); | |||
| 3257 | ||||
| 3258 | // Now that we have most of the CFG skeleton it is time for the if-cascade | |||
| 3259 | // that checks the function pointer we got from the runtime against the | |||
| 3260 | // parallel regions we expect, if there are any. | |||
| 3261 | for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) { | |||
| 3262 | auto *ParallelRegion = ReachedKnownParallelRegions[i]; | |||
| 3263 | BasicBlock *PRExecuteBB = BasicBlock::Create( | |||
| 3264 | Ctx, "worker_state_machine.parallel_region.execute", Kernel, | |||
| 3265 | StateMachineEndParallelBB); | |||
| 3266 | CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB) | |||
| 3267 | ->setDebugLoc(DLoc); | |||
| 3268 | BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB) | |||
| 3269 | ->setDebugLoc(DLoc); | |||
| 3270 | ||||
| 3271 | BasicBlock *PRNextBB = | |||
| 3272 | BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check", | |||
| 3273 | Kernel, StateMachineEndParallelBB); | |||
| 3274 | ||||
| 3275 | // Check if we need to compare the pointer at all or if we can just | |||
| 3276 | // call the parallel region function. | |||
| 3277 | Value *IsPR; | |||
| 3278 | if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) { | |||
| 3279 | Instruction *CmpI = ICmpInst::Create( | |||
| 3280 | ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion, | |||
| 3281 | "worker.check_parallel_region", StateMachineIfCascadeCurrentBB); | |||
| 3282 | CmpI->setDebugLoc(DLoc); | |||
| 3283 | IsPR = CmpI; | |||
| 3284 | } else { | |||
| 3285 | IsPR = ConstantInt::getTrue(Ctx); | |||
| 3286 | } | |||
| 3287 | ||||
| 3288 | BranchInst::Create(PRExecuteBB, PRNextBB, IsPR, | |||
| 3289 | StateMachineIfCascadeCurrentBB) | |||
| 3290 | ->setDebugLoc(DLoc); | |||
| 3291 | StateMachineIfCascadeCurrentBB = PRNextBB; | |||
| 3292 | } | |||
| 3293 | ||||
| 3294 | // At the end of the if-cascade we place the indirect function pointer call | |||
| 3295 | // in case we might need it, that is if there can be parallel regions we | |||
| 3296 | // have not handled in the if-cascade above. | |||
| 3297 | if (!ReachedUnknownParallelRegions.empty()) { | |||
| 3298 | StateMachineIfCascadeCurrentBB->setName( | |||
| 3299 | "worker_state_machine.parallel_region.fallback.execute"); | |||
| 3300 | CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "", | |||
| 3301 | StateMachineIfCascadeCurrentBB) | |||
| 3302 | ->setDebugLoc(DLoc); | |||
| 3303 | } | |||
| 3304 | BranchInst::Create(StateMachineEndParallelBB, | |||
| 3305 | StateMachineIfCascadeCurrentBB) | |||
| 3306 | ->setDebugLoc(DLoc); | |||
| 3307 | ||||
| 3308 | CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction( | |||
| 3309 | M, OMPRTL___kmpc_kernel_end_parallel), | |||
| 3310 | {}, "", StateMachineEndParallelBB) | |||
| 3311 | ->setDebugLoc(DLoc); | |||
| 3312 | BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB) | |||
| 3313 | ->setDebugLoc(DLoc); | |||
| 3314 | ||||
| 3315 | CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB) | |||
| 3316 | ->setDebugLoc(DLoc); | |||
| 3317 | BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB) | |||
| 3318 | ->setDebugLoc(DLoc); | |||
| 3319 | ||||
| 3320 | return ChangeStatus::CHANGED; | |||
| 3321 | } | |||
| 3322 | ||||
| 3323 | /// Fixpoint iteration update function. Will be called every time a dependence | |||
| 3324 | /// changed its state (and in the beginning). | |||
| 3325 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 3326 | KernelInfoState StateBefore = getState(); | |||
| 3327 | ||||
| 3328 | // Callback to check a read/write instruction. | |||
| 3329 | auto CheckRWInst = [&](Instruction &I) { | |||
| 3330 | // We handle calls later. | |||
| 3331 | if (isa<CallBase>(I)) | |||
| 3332 | return true; | |||
| 3333 | // We only care about write effects. | |||
| 3334 | if (!I.mayWriteToMemory()) | |||
| 3335 | return true; | |||
| 3336 | if (auto *SI = dyn_cast<StoreInst>(&I)) { | |||
| 3337 | SmallVector<const Value *> Objects; | |||
| 3338 | getUnderlyingObjects(SI->getPointerOperand(), Objects); | |||
| 3339 | if (llvm::all_of(Objects, | |||
| 3340 | [](const Value *Obj) { return isa<AllocaInst>(Obj); })) | |||
| 3341 | return true; | |||
| 3342 | } | |||
| 3343 | // For now we give up on everything but stores. | |||
| 3344 | SPMDCompatibilityTracker.insert(&I); | |||
| 3345 | return true; | |||
| 3346 | }; | |||
| 3347 | ||||
| 3348 | bool UsedAssumedInformationInCheckRWInst = false; | |||
| 3349 | if (!SPMDCompatibilityTracker.isAtFixpoint()) | |||
| 3350 | if (!A.checkForAllReadWriteInstructions( | |||
| 3351 | CheckRWInst, *this, UsedAssumedInformationInCheckRWInst)) | |||
| 3352 | SPMDCompatibilityTracker.indicatePessimisticFixpoint(); | |||
| 3353 | ||||
| 3354 | if (!IsKernelEntry) { | |||
| 3355 | updateReachingKernelEntries(A); | |||
| 3356 | updateParallelLevels(A); | |||
| 3357 | } | |||
| 3358 | ||||
| 3359 | // Callback to check a call instruction. | |||
| 3360 | bool AllSPMDStatesWereFixed = true; | |||
| 3361 | auto CheckCallInst = [&](Instruction &I) { | |||
| 3362 | auto &CB = cast<CallBase>(I); | |||
| 3363 | auto &CBAA = A.getAAFor<AAKernelInfo>( | |||
| 3364 | *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL); | |||
| 3365 | getState() ^= CBAA.getState(); | |||
| 3366 | AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint(); | |||
| 3367 | return true; | |||
| 3368 | }; | |||
| 3369 | ||||
| 3370 | bool UsedAssumedInformationInCheckCallInst = false; | |||
| 3371 | if (!A.checkForAllCallLikeInstructions( | |||
| 3372 | CheckCallInst, *this, UsedAssumedInformationInCheckCallInst)) | |||
| 3373 | return indicatePessimisticFixpoint(); | |||
| 3374 | ||||
| 3375 | // If we haven't used any assumed information for the SPMD state we can fix | |||
| 3376 | // it. | |||
| 3377 | if (!UsedAssumedInformationInCheckRWInst && | |||
| 3378 | !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed) | |||
| 3379 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
| 3380 | ||||
| 3381 | return StateBefore == getState() ? ChangeStatus::UNCHANGED | |||
| 3382 | : ChangeStatus::CHANGED; | |||
| 3383 | } | |||
| 3384 | ||||
| 3385 | private: | |||
| 3386 | /// Update info regarding reaching kernels. | |||
| 3387 | void updateReachingKernelEntries(Attributor &A) { | |||
| 3388 | auto PredCallSite = [&](AbstractCallSite ACS) { | |||
| 3389 | Function *Caller = ACS.getInstruction()->getFunction(); | |||
| 3390 | ||||
| 3391 | assert(Caller && "Caller is nullptr")((void)0); | |||
| 3392 | ||||
| 3393 | auto &CAA = A.getOrCreateAAFor<AAKernelInfo>( | |||
| 3394 | IRPosition::function(*Caller), this, DepClassTy::REQUIRED); | |||
| 3395 | if (CAA.ReachingKernelEntries.isValidState()) { | |||
| 3396 | ReachingKernelEntries ^= CAA.ReachingKernelEntries; | |||
| 3397 | return true; | |||
| 3398 | } | |||
| 3399 | ||||
| 3400 | // We lost track of the caller of the associated function, any kernel | |||
| 3401 | // could reach now. | |||
| 3402 | ReachingKernelEntries.indicatePessimisticFixpoint(); | |||
| 3403 | ||||
| 3404 | return true; | |||
| 3405 | }; | |||
| 3406 | ||||
| 3407 | bool AllCallSitesKnown; | |||
| 3408 | if (!A.checkForAllCallSites(PredCallSite, *this, | |||
| 3409 | true /* RequireAllCallSites */, | |||
| 3410 | AllCallSitesKnown)) | |||
| 3411 | ReachingKernelEntries.indicatePessimisticFixpoint(); | |||
| 3412 | } | |||
| 3413 | ||||
| 3414 | /// Update info regarding parallel levels. | |||
| 3415 | void updateParallelLevels(Attributor &A) { | |||
| 3416 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 3417 | OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI = | |||
| 3418 | OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51]; | |||
| 3419 | ||||
| 3420 | auto PredCallSite = [&](AbstractCallSite ACS) { | |||
| 3421 | Function *Caller = ACS.getInstruction()->getFunction(); | |||
| 3422 | ||||
| 3423 | assert(Caller && "Caller is nullptr")((void)0); | |||
| 3424 | ||||
| 3425 | auto &CAA = | |||
| 3426 | A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller)); | |||
| 3427 | if (CAA.ParallelLevels.isValidState()) { | |||
| 3428 | // Any function that is called by `__kmpc_parallel_51` will not be | |||
| 3429 | // folded as the parallel level in the function is updated. In order to | |||
| 3430 | // get it right, all the analysis would depend on the implentation. That | |||
| 3431 | // said, if in the future any change to the implementation, the analysis | |||
| 3432 | // could be wrong. As a consequence, we are just conservative here. | |||
| 3433 | if (Caller == Parallel51RFI.Declaration) { | |||
| 3434 | ParallelLevels.indicatePessimisticFixpoint(); | |||
| 3435 | return true; | |||
| 3436 | } | |||
| 3437 | ||||
| 3438 | ParallelLevels ^= CAA.ParallelLevels; | |||
| 3439 | ||||
| 3440 | return true; | |||
| 3441 | } | |||
| 3442 | ||||
| 3443 | // We lost track of the caller of the associated function, any kernel | |||
| 3444 | // could reach now. | |||
| 3445 | ParallelLevels.indicatePessimisticFixpoint(); | |||
| 3446 | ||||
| 3447 | return true; | |||
| 3448 | }; | |||
| 3449 | ||||
| 3450 | bool AllCallSitesKnown = true; | |||
| 3451 | if (!A.checkForAllCallSites(PredCallSite, *this, | |||
| 3452 | true /* RequireAllCallSites */, | |||
| 3453 | AllCallSitesKnown)) | |||
| 3454 | ParallelLevels.indicatePessimisticFixpoint(); | |||
| 3455 | } | |||
| 3456 | }; | |||
| 3457 | ||||
| 3458 | /// The call site kernel info abstract attribute, basically, what can we say | |||
| 3459 | /// about a call site with regards to the KernelInfoState. For now this simply | |||
| 3460 | /// forwards the information from the callee. | |||
| 3461 | struct AAKernelInfoCallSite : AAKernelInfo { | |||
| 3462 | AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A) | |||
| 3463 | : AAKernelInfo(IRP, A) {} | |||
| 3464 | ||||
| 3465 | /// See AbstractAttribute::initialize(...). | |||
| 3466 | void initialize(Attributor &A) override { | |||
| 3467 | AAKernelInfo::initialize(A); | |||
| 3468 | ||||
| 3469 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
| 3470 | Function *Callee = getAssociatedFunction(); | |||
| 3471 | ||||
| 3472 | // Helper to lookup an assumption string. | |||
| 3473 | auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) { | |||
| 3474 | return Fn && hasAssumption(*Fn, AssumptionStr); | |||
| 3475 | }; | |||
| 3476 | ||||
| 3477 | // Check for SPMD-mode assumptions. | |||
| 3478 | if (HasAssumption(Callee, "ompx_spmd_amenable")) | |||
| 3479 | SPMDCompatibilityTracker.indicateOptimisticFixpoint(); | |||
| 3480 | ||||
| 3481 | // First weed out calls we do not care about, that is readonly/readnone | |||
| 3482 | // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a | |||
| 3483 | // parallel region or anything else we are looking for. | |||
| 3484 | if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) { | |||
| 3485 | indicateOptimisticFixpoint(); | |||
| 3486 | return; | |||
| 3487 | } | |||
| 3488 | ||||
| 3489 | // Next we check if we know the callee. If it is a known OpenMP function | |||
| 3490 | // we will handle them explicitly in the switch below. If it is not, we | |||
| 3491 | // will use an AAKernelInfo object on the callee to gather information and | |||
| 3492 | // merge that into the current state. The latter happens in the updateImpl. | |||
| 3493 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 3494 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); | |||
| 3495 | if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { | |||
| 3496 | // Unknown caller or declarations are not analyzable, we give up. | |||
| 3497 | if (!Callee || !A.isFunctionIPOAmendable(*Callee)) { | |||
| 3498 | ||||
| 3499 | // Unknown callees might contain parallel regions, except if they have | |||
| 3500 | // an appropriate assumption attached. | |||
| 3501 | if (!(HasAssumption(Callee, "omp_no_openmp") || | |||
| 3502 | HasAssumption(Callee, "omp_no_parallelism"))) | |||
| 3503 | ReachedUnknownParallelRegions.insert(&CB); | |||
| 3504 | ||||
| 3505 | // If SPMDCompatibilityTracker is not fixed, we need to give up on the | |||
| 3506 | // idea we can run something unknown in SPMD-mode. | |||
| 3507 | if (!SPMDCompatibilityTracker.isAtFixpoint()) | |||
| 3508 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3509 | ||||
| 3510 | // We have updated the state for this unknown call properly, there won't | |||
| 3511 | // be any change so we indicate a fixpoint. | |||
| 3512 | indicateOptimisticFixpoint(); | |||
| 3513 | } | |||
| 3514 | // If the callee is known and can be used in IPO, we will update the state | |||
| 3515 | // based on the callee state in updateImpl. | |||
| 3516 | return; | |||
| 3517 | } | |||
| 3518 | ||||
| 3519 | const unsigned int WrapperFunctionArgNo = 6; | |||
| 3520 | RuntimeFunction RF = It->getSecond(); | |||
| 3521 | switch (RF) { | |||
| 3522 | // All the functions we know are compatible with SPMD mode. | |||
| 3523 | case OMPRTL___kmpc_is_spmd_exec_mode: | |||
| 3524 | case OMPRTL___kmpc_for_static_fini: | |||
| 3525 | case OMPRTL___kmpc_global_thread_num: | |||
| 3526 | case OMPRTL___kmpc_get_hardware_num_threads_in_block: | |||
| 3527 | case OMPRTL___kmpc_get_hardware_num_blocks: | |||
| 3528 | case OMPRTL___kmpc_single: | |||
| 3529 | case OMPRTL___kmpc_end_single: | |||
| 3530 | case OMPRTL___kmpc_master: | |||
| 3531 | case OMPRTL___kmpc_end_master: | |||
| 3532 | case OMPRTL___kmpc_barrier: | |||
| 3533 | break; | |||
| 3534 | case OMPRTL___kmpc_for_static_init_4: | |||
| 3535 | case OMPRTL___kmpc_for_static_init_4u: | |||
| 3536 | case OMPRTL___kmpc_for_static_init_8: | |||
| 3537 | case OMPRTL___kmpc_for_static_init_8u: { | |||
| 3538 | // Check the schedule and allow static schedule in SPMD mode. | |||
| 3539 | unsigned ScheduleArgOpNo = 2; | |||
| 3540 | auto *ScheduleTypeCI = | |||
| 3541 | dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo)); | |||
| 3542 | unsigned ScheduleTypeVal = | |||
| 3543 | ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0; | |||
| 3544 | switch (OMPScheduleType(ScheduleTypeVal)) { | |||
| 3545 | case OMPScheduleType::Static: | |||
| 3546 | case OMPScheduleType::StaticChunked: | |||
| 3547 | case OMPScheduleType::Distribute: | |||
| 3548 | case OMPScheduleType::DistributeChunked: | |||
| 3549 | break; | |||
| 3550 | default: | |||
| 3551 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3552 | break; | |||
| 3553 | }; | |||
| 3554 | } break; | |||
| 3555 | case OMPRTL___kmpc_target_init: | |||
| 3556 | KernelInitCB = &CB; | |||
| 3557 | break; | |||
| 3558 | case OMPRTL___kmpc_target_deinit: | |||
| 3559 | KernelDeinitCB = &CB; | |||
| 3560 | break; | |||
| 3561 | case OMPRTL___kmpc_parallel_51: | |||
| 3562 | if (auto *ParallelRegion = dyn_cast<Function>( | |||
| 3563 | CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) { | |||
| 3564 | ReachedKnownParallelRegions.insert(ParallelRegion); | |||
| 3565 | break; | |||
| 3566 | } | |||
| 3567 | // The condition above should usually get the parallel region function | |||
| 3568 | // pointer and record it. In the off chance it doesn't we assume the | |||
| 3569 | // worst. | |||
| 3570 | ReachedUnknownParallelRegions.insert(&CB); | |||
| 3571 | break; | |||
| 3572 | case OMPRTL___kmpc_omp_task: | |||
| 3573 | // We do not look into tasks right now, just give up. | |||
| 3574 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3575 | ReachedUnknownParallelRegions.insert(&CB); | |||
| 3576 | break; | |||
| 3577 | case OMPRTL___kmpc_alloc_shared: | |||
| 3578 | case OMPRTL___kmpc_free_shared: | |||
| 3579 | // Return without setting a fixpoint, to be resolved in updateImpl. | |||
| 3580 | return; | |||
| 3581 | default: | |||
| 3582 | // Unknown OpenMP runtime calls cannot be executed in SPMD-mode, | |||
| 3583 | // generally. | |||
| 3584 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3585 | break; | |||
| 3586 | } | |||
| 3587 | // All other OpenMP runtime calls will not reach parallel regions so they | |||
| 3588 | // can be safely ignored for now. Since it is a known OpenMP runtime call we | |||
| 3589 | // have now modeled all effects and there is no need for any update. | |||
| 3590 | indicateOptimisticFixpoint(); | |||
| 3591 | } | |||
| 3592 | ||||
| 3593 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 3594 | // TODO: Once we have call site specific value information we can provide | |||
| 3595 | // call site specific liveness information and then it makes | |||
| 3596 | // sense to specialize attributes for call sites arguments instead of | |||
| 3597 | // redirecting requests to the callee argument. | |||
| 3598 | Function *F = getAssociatedFunction(); | |||
| 3599 | ||||
| 3600 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 3601 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F); | |||
| 3602 | ||||
| 3603 | // If F is not a runtime function, propagate the AAKernelInfo of the callee. | |||
| 3604 | if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) { | |||
| 3605 | const IRPosition &FnPos = IRPosition::function(*F); | |||
| 3606 | auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED); | |||
| 3607 | if (getState() == FnAA.getState()) | |||
| 3608 | return ChangeStatus::UNCHANGED; | |||
| 3609 | getState() = FnAA.getState(); | |||
| 3610 | return ChangeStatus::CHANGED; | |||
| 3611 | } | |||
| 3612 | ||||
| 3613 | // F is a runtime function that allocates or frees memory, check | |||
| 3614 | // AAHeapToStack and AAHeapToShared. | |||
| 3615 | KernelInfoState StateBefore = getState(); | |||
| 3616 | assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||((void)0) | |||
| 3617 | It->getSecond() == OMPRTL___kmpc_free_shared) &&((void)0) | |||
| 3618 | "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call")((void)0); | |||
| 3619 | ||||
| 3620 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
| 3621 | ||||
| 3622 | auto &HeapToStackAA = A.getAAFor<AAHeapToStack>( | |||
| 3623 | *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); | |||
| 3624 | auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>( | |||
| 3625 | *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL); | |||
| 3626 | ||||
| 3627 | RuntimeFunction RF = It->getSecond(); | |||
| 3628 | ||||
| 3629 | switch (RF) { | |||
| 3630 | // If neither HeapToStack nor HeapToShared assume the call is removed, | |||
| 3631 | // assume SPMD incompatibility. | |||
| 3632 | case OMPRTL___kmpc_alloc_shared: | |||
| 3633 | if (!HeapToStackAA.isAssumedHeapToStack(CB) && | |||
| 3634 | !HeapToSharedAA.isAssumedHeapToShared(CB)) | |||
| 3635 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3636 | break; | |||
| 3637 | case OMPRTL___kmpc_free_shared: | |||
| 3638 | if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) && | |||
| 3639 | !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB)) | |||
| 3640 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3641 | break; | |||
| 3642 | default: | |||
| 3643 | SPMDCompatibilityTracker.insert(&CB); | |||
| 3644 | } | |||
| 3645 | ||||
| 3646 | return StateBefore == getState() ? ChangeStatus::UNCHANGED | |||
| 3647 | : ChangeStatus::CHANGED; | |||
| 3648 | } | |||
| 3649 | }; | |||
| 3650 | ||||
| 3651 | struct AAFoldRuntimeCall | |||
| 3652 | : public StateWrapper<BooleanState, AbstractAttribute> { | |||
| 3653 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | |||
| 3654 | ||||
| 3655 | AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | |||
| 3656 | ||||
| 3657 | /// Statistics are tracked as part of manifest for now. | |||
| 3658 | void trackStatistics() const override {} | |||
| 3659 | ||||
| 3660 | /// Create an abstract attribute biew for the position \p IRP. | |||
| 3661 | static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP, | |||
| 3662 | Attributor &A); | |||
| 3663 | ||||
| 3664 | /// See AbstractAttribute::getName() | |||
| 3665 | const std::string getName() const override { return "AAFoldRuntimeCall"; } | |||
| 3666 | ||||
| 3667 | /// See AbstractAttribute::getIdAddr() | |||
| 3668 | const char *getIdAddr() const override { return &ID; } | |||
| 3669 | ||||
| 3670 | /// This function should return true if the type of the \p AA is | |||
| 3671 | /// AAFoldRuntimeCall | |||
| 3672 | static bool classof(const AbstractAttribute *AA) { | |||
| 3673 | return (AA->getIdAddr() == &ID); | |||
| 3674 | } | |||
| 3675 | ||||
| 3676 | static const char ID; | |||
| 3677 | }; | |||
| 3678 | ||||
| 3679 | struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall { | |||
| 3680 | AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A) | |||
| 3681 | : AAFoldRuntimeCall(IRP, A) {} | |||
| 3682 | ||||
| 3683 | /// See AbstractAttribute::getAsStr() | |||
| 3684 | const std::string getAsStr() const override { | |||
| 3685 | if (!isValidState()) | |||
| 3686 | return "<invalid>"; | |||
| 3687 | ||||
| 3688 | std::string Str("simplified value: "); | |||
| 3689 | ||||
| 3690 | if (!SimplifiedValue.hasValue()) | |||
| 3691 | return Str + std::string("none"); | |||
| 3692 | ||||
| 3693 | if (!SimplifiedValue.getValue()) | |||
| 3694 | return Str + std::string("nullptr"); | |||
| 3695 | ||||
| 3696 | if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue())) | |||
| 3697 | return Str + std::to_string(CI->getSExtValue()); | |||
| 3698 | ||||
| 3699 | return Str + std::string("unknown"); | |||
| 3700 | } | |||
| 3701 | ||||
| 3702 | void initialize(Attributor &A) override { | |||
| 3703 | Function *Callee = getAssociatedFunction(); | |||
| 3704 | ||||
| 3705 | auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache()); | |||
| 3706 | const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee); | |||
| 3707 | assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&((void)0) | |||
| 3708 | "Expected a known OpenMP runtime function")((void)0); | |||
| 3709 | ||||
| 3710 | RFKind = It->getSecond(); | |||
| 3711 | ||||
| 3712 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
| 3713 | A.registerSimplificationCallback( | |||
| 3714 | IRPosition::callsite_returned(CB), | |||
| 3715 | [&](const IRPosition &IRP, const AbstractAttribute *AA, | |||
| 3716 | bool &UsedAssumedInformation) -> Optional<Value *> { | |||
| 3717 | assert((isValidState() || (SimplifiedValue.hasValue() &&((void)0) | |||
| 3718 | SimplifiedValue.getValue() == nullptr)) &&((void)0) | |||
| 3719 | "Unexpected invalid state!")((void)0); | |||
| 3720 | ||||
| 3721 | if (!isAtFixpoint()) { | |||
| 3722 | UsedAssumedInformation = true; | |||
| 3723 | if (AA) | |||
| 3724 | A.recordDependence(*this, *AA, DepClassTy::OPTIONAL); | |||
| 3725 | } | |||
| 3726 | return SimplifiedValue; | |||
| 3727 | }); | |||
| 3728 | } | |||
| 3729 | ||||
| 3730 | ChangeStatus updateImpl(Attributor &A) override { | |||
| 3731 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
| 3732 | switch (RFKind) { | |||
| 3733 | case OMPRTL___kmpc_is_spmd_exec_mode: | |||
| 3734 | Changed |= foldIsSPMDExecMode(A); | |||
| 3735 | break; | |||
| 3736 | case OMPRTL___kmpc_is_generic_main_thread_id: | |||
| 3737 | Changed |= foldIsGenericMainThread(A); | |||
| 3738 | break; | |||
| 3739 | case OMPRTL___kmpc_parallel_level: | |||
| 3740 | Changed |= foldParallelLevel(A); | |||
| 3741 | break; | |||
| 3742 | case OMPRTL___kmpc_get_hardware_num_threads_in_block: | |||
| 3743 | Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit"); | |||
| 3744 | break; | |||
| 3745 | case OMPRTL___kmpc_get_hardware_num_blocks: | |||
| 3746 | Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams"); | |||
| 3747 | break; | |||
| 3748 | default: | |||
| 3749 | llvm_unreachable("Unhandled OpenMP runtime function!")__builtin_unreachable(); | |||
| 3750 | } | |||
| 3751 | ||||
| 3752 | return Changed; | |||
| 3753 | } | |||
| 3754 | ||||
| 3755 | ChangeStatus manifest(Attributor &A) override { | |||
| 3756 | ChangeStatus Changed = ChangeStatus::UNCHANGED; | |||
| 3757 | ||||
| 3758 | if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) { | |||
| 3759 | Instruction &CB = *getCtxI(); | |||
| 3760 | A.changeValueAfterManifest(CB, **SimplifiedValue); | |||
| 3761 | A.deleteAfterManifest(CB); | |||
| 3762 | ||||
| 3763 | LLVM_DEBUG(dbgs() << TAG << "Folding runtime call: " << CB << " with "do { } while (false) | |||
| 3764 | << **SimplifiedValue << "\n")do { } while (false); | |||
| 3765 | ||||
| 3766 | Changed = ChangeStatus::CHANGED; | |||
| 3767 | } | |||
| 3768 | ||||
| 3769 | return Changed; | |||
| 3770 | } | |||
| 3771 | ||||
| 3772 | ChangeStatus indicatePessimisticFixpoint() override { | |||
| 3773 | SimplifiedValue = nullptr; | |||
| 3774 | return AAFoldRuntimeCall::indicatePessimisticFixpoint(); | |||
| 3775 | } | |||
| 3776 | ||||
| 3777 | private: | |||
| 3778 | /// Fold __kmpc_is_spmd_exec_mode into a constant if possible. | |||
| 3779 | ChangeStatus foldIsSPMDExecMode(Attributor &A) { | |||
| 3780 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
| 3781 | ||||
| 3782 | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | |||
| 3783 | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | |||
| 3784 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||
| 3785 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
| 3786 | ||||
| 3787 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | |||
| 3788 | return indicatePessimisticFixpoint(); | |||
| 3789 | ||||
| 3790 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||
| 3791 | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | |||
| 3792 | DepClassTy::REQUIRED); | |||
| 3793 | ||||
| 3794 | if (!AA.isValidState()) { | |||
| 3795 | SimplifiedValue = nullptr; | |||
| 3796 | return indicatePessimisticFixpoint(); | |||
| 3797 | } | |||
| 3798 | ||||
| 3799 | if (AA.SPMDCompatibilityTracker.isAssumed()) { | |||
| 3800 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
| 3801 | ++KnownSPMDCount; | |||
| 3802 | else | |||
| 3803 | ++AssumedSPMDCount; | |||
| 3804 | } else { | |||
| 3805 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
| 3806 | ++KnownNonSPMDCount; | |||
| 3807 | else | |||
| 3808 | ++AssumedNonSPMDCount; | |||
| 3809 | } | |||
| 3810 | } | |||
| 3811 | ||||
| 3812 | if ((AssumedSPMDCount + KnownSPMDCount) && | |||
| 3813 | (AssumedNonSPMDCount + KnownNonSPMDCount)) | |||
| 3814 | return indicatePessimisticFixpoint(); | |||
| 3815 | ||||
| 3816 | auto &Ctx = getAnchorValue().getContext(); | |||
| 3817 | if (KnownSPMDCount || AssumedSPMDCount) { | |||
| 3818 | assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&((void)0) | |||
| 3819 | "Expected only SPMD kernels!")((void)0); | |||
| 3820 | // All reaching kernels are in SPMD mode. Update all function calls to | |||
| 3821 | // __kmpc_is_spmd_exec_mode to 1. | |||
| 3822 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); | |||
| 3823 | } else if (KnownNonSPMDCount || AssumedNonSPMDCount) { | |||
| 3824 | assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&((void)0) | |||
| 3825 | "Expected only non-SPMD kernels!")((void)0); | |||
| 3826 | // All reaching kernels are in non-SPMD mode. Update all function | |||
| 3827 | // calls to __kmpc_is_spmd_exec_mode to 0. | |||
| 3828 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false); | |||
| 3829 | } else { | |||
| 3830 | // We have empty reaching kernels, therefore we cannot tell if the | |||
| 3831 | // associated call site can be folded. At this moment, SimplifiedValue | |||
| 3832 | // must be none. | |||
| 3833 | assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none")((void)0); | |||
| 3834 | } | |||
| 3835 | ||||
| 3836 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
| 3837 | : ChangeStatus::CHANGED; | |||
| 3838 | } | |||
| 3839 | ||||
| 3840 | /// Fold __kmpc_is_generic_main_thread_id into a constant if possible. | |||
| 3841 | ChangeStatus foldIsGenericMainThread(Attributor &A) { | |||
| 3842 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
| 3843 | ||||
| 3844 | CallBase &CB = cast<CallBase>(getAssociatedValue()); | |||
| 3845 | Function *F = CB.getFunction(); | |||
| 3846 | const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>( | |||
| 3847 | *this, IRPosition::function(*F), DepClassTy::REQUIRED); | |||
| 3848 | ||||
| 3849 | if (!ExecutionDomainAA.isValidState()) | |||
| 3850 | return indicatePessimisticFixpoint(); | |||
| 3851 | ||||
| 3852 | auto &Ctx = getAnchorValue().getContext(); | |||
| 3853 | if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB)) | |||
| 3854 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true); | |||
| 3855 | else | |||
| 3856 | return indicatePessimisticFixpoint(); | |||
| 3857 | ||||
| 3858 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
| 3859 | : ChangeStatus::CHANGED; | |||
| 3860 | } | |||
| 3861 | ||||
| 3862 | /// Fold __kmpc_parallel_level into a constant if possible. | |||
| 3863 | ChangeStatus foldParallelLevel(Attributor &A) { | |||
| 3864 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
| 3865 | ||||
| 3866 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||
| 3867 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
| 3868 | ||||
| 3869 | if (!CallerKernelInfoAA.ParallelLevels.isValidState()) | |||
| 3870 | return indicatePessimisticFixpoint(); | |||
| 3871 | ||||
| 3872 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | |||
| 3873 | return indicatePessimisticFixpoint(); | |||
| 3874 | ||||
| 3875 | if (CallerKernelInfoAA.ReachingKernelEntries.empty()) { | |||
| 3876 | assert(!SimplifiedValue.hasValue() &&((void)0) | |||
| 3877 | "SimplifiedValue should keep none at this point")((void)0); | |||
| 3878 | return ChangeStatus::UNCHANGED; | |||
| 3879 | } | |||
| 3880 | ||||
| 3881 | unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0; | |||
| 3882 | unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0; | |||
| 3883 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||
| 3884 | auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K), | |||
| 3885 | DepClassTy::REQUIRED); | |||
| 3886 | if (!AA.SPMDCompatibilityTracker.isValidState()) | |||
| 3887 | return indicatePessimisticFixpoint(); | |||
| 3888 | ||||
| 3889 | if (AA.SPMDCompatibilityTracker.isAssumed()) { | |||
| 3890 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
| 3891 | ++KnownSPMDCount; | |||
| 3892 | else | |||
| 3893 | ++AssumedSPMDCount; | |||
| 3894 | } else { | |||
| 3895 | if (AA.SPMDCompatibilityTracker.isAtFixpoint()) | |||
| 3896 | ++KnownNonSPMDCount; | |||
| 3897 | else | |||
| 3898 | ++AssumedNonSPMDCount; | |||
| 3899 | } | |||
| 3900 | } | |||
| 3901 | ||||
| 3902 | if ((AssumedSPMDCount + KnownSPMDCount) && | |||
| 3903 | (AssumedNonSPMDCount + KnownNonSPMDCount)) | |||
| 3904 | return indicatePessimisticFixpoint(); | |||
| 3905 | ||||
| 3906 | auto &Ctx = getAnchorValue().getContext(); | |||
| 3907 | // If the caller can only be reached by SPMD kernel entries, the parallel | |||
| 3908 | // level is 1. Similarly, if the caller can only be reached by non-SPMD | |||
| 3909 | // kernel entries, it is 0. | |||
| 3910 | if (AssumedSPMDCount || KnownSPMDCount) { | |||
| 3911 | assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&((void)0) | |||
| 3912 | "Expected only SPMD kernels!")((void)0); | |||
| 3913 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1); | |||
| 3914 | } else { | |||
| 3915 | assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&((void)0) | |||
| 3916 | "Expected only non-SPMD kernels!")((void)0); | |||
| 3917 | SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0); | |||
| 3918 | } | |||
| 3919 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
| 3920 | : ChangeStatus::CHANGED; | |||
| 3921 | } | |||
| 3922 | ||||
| 3923 | ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) { | |||
| 3924 | // Specialize only if all the calls agree with the attribute constant value | |||
| 3925 | int32_t CurrentAttrValue = -1; | |||
| 3926 | Optional<Value *> SimplifiedValueBefore = SimplifiedValue; | |||
| 3927 | ||||
| 3928 | auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>( | |||
| 3929 | *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED); | |||
| 3930 | ||||
| 3931 | if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState()) | |||
| 3932 | return indicatePessimisticFixpoint(); | |||
| 3933 | ||||
| 3934 | // Iterate over the kernels that reach this function | |||
| 3935 | for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) { | |||
| 3936 | int32_t NextAttrVal = -1; | |||
| 3937 | if (K->hasFnAttribute(Attr)) | |||
| 3938 | NextAttrVal = | |||
| 3939 | std::stoi(K->getFnAttribute(Attr).getValueAsString().str()); | |||
| 3940 | ||||
| 3941 | if (NextAttrVal == -1 || | |||
| 3942 | (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal)) | |||
| 3943 | return indicatePessimisticFixpoint(); | |||
| 3944 | CurrentAttrValue = NextAttrVal; | |||
| 3945 | } | |||
| 3946 | ||||
| 3947 | if (CurrentAttrValue != -1) { | |||
| 3948 | auto &Ctx = getAnchorValue().getContext(); | |||
| 3949 | SimplifiedValue = | |||
| 3950 | ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue); | |||
| 3951 | } | |||
| 3952 | return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED | |||
| 3953 | : ChangeStatus::CHANGED; | |||
| 3954 | } | |||
| 3955 | ||||
| 3956 | /// An optional value the associated value is assumed to fold to. That is, we | |||
| 3957 | /// assume the associated value (which is a call) can be replaced by this | |||
| 3958 | /// simplified value. | |||
| 3959 | Optional<Value *> SimplifiedValue; | |||
| 3960 | ||||
| 3961 | /// The runtime function kind of the callee of the associated call site. | |||
| 3962 | RuntimeFunction RFKind; | |||
| 3963 | }; | |||
| 3964 | ||||
| 3965 | } // namespace | |||
| 3966 | ||||
| 3967 | /// Register folding callsite | |||
| 3968 | void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) { | |||
| 3969 | auto &RFI = OMPInfoCache.RFIs[RF]; | |||
| 3970 | RFI.foreachUse(SCC, [&](Use &U, Function &F) { | |||
| 3971 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI); | |||
| 3972 | if (!CI) | |||
| 3973 | return false; | |||
| 3974 | A.getOrCreateAAFor<AAFoldRuntimeCall>( | |||
| 3975 | IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr, | |||
| 3976 | DepClassTy::NONE, /* ForceUpdate */ false, | |||
| 3977 | /* UpdateAfterInit */ false); | |||
| 3978 | return false; | |||
| 3979 | }); | |||
| 3980 | } | |||
| 3981 | ||||
| 3982 | void OpenMPOpt::registerAAs(bool IsModulePass) { | |||
| 3983 | if (SCC.empty()) | |||
| 3984 | ||||
| 3985 | return; | |||
| 3986 | if (IsModulePass) { | |||
| 3987 | // Ensure we create the AAKernelInfo AAs first and without triggering an | |||
| 3988 | // update. This will make sure we register all value simplification | |||
| 3989 | // callbacks before any other AA has the chance to create an AAValueSimplify | |||
| 3990 | // or similar. | |||
| 3991 | for (Function *Kernel : OMPInfoCache.Kernels) | |||
| 3992 | A.getOrCreateAAFor<AAKernelInfo>( | |||
| 3993 | IRPosition::function(*Kernel), /* QueryingAA */ nullptr, | |||
| 3994 | DepClassTy::NONE, /* ForceUpdate */ false, | |||
| 3995 | /* UpdateAfterInit */ false); | |||
| 3996 | ||||
| 3997 | ||||
| 3998 | registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id); | |||
| 3999 | registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode); | |||
| 4000 | registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level); | |||
| 4001 | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block); | |||
| 4002 | registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks); | |||
| 4003 | } | |||
| 4004 | ||||
| 4005 | // Create CallSite AA for all Getters. | |||
| 4006 | for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) { | |||
| 4007 | auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)]; | |||
| 4008 | ||||
| 4009 | auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter]; | |||
| 4010 | ||||
| 4011 | auto CreateAA = [&](Use &U, Function &Caller) { | |||
| 4012 | CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI); | |||
| 4013 | if (!CI) | |||
| 4014 | return false; | |||
| 4015 | ||||
| 4016 | auto &CB = cast<CallBase>(*CI); | |||
| 4017 | ||||
| 4018 | IRPosition CBPos = IRPosition::callsite_function(CB); | |||
| 4019 | A.getOrCreateAAFor<AAICVTracker>(CBPos); | |||
| 4020 | return false; | |||
| 4021 | }; | |||
| 4022 | ||||
| 4023 | GetterRFI.foreachUse(SCC, CreateAA); | |||
| 4024 | } | |||
| 4025 | auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared]; | |||
| 4026 | auto CreateAA = [&](Use &U, Function &F) { | |||
| 4027 | A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F)); | |||
| 4028 | return false; | |||
| 4029 | }; | |||
| 4030 | GlobalizationRFI.foreachUse(SCC, CreateAA); | |||
| 4031 | ||||
| 4032 | // Create an ExecutionDomain AA for every function and a HeapToStack AA for | |||
| 4033 | // every function if there is a device kernel. | |||
| 4034 | if (!isOpenMPDevice(M)) | |||
| 4035 | return; | |||
| 4036 | ||||
| 4037 | for (auto *F : SCC) { | |||
| 4038 | if (F->isDeclaration()) | |||
| 4039 | continue; | |||
| 4040 | ||||
| 4041 | A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F)); | |||
| 4042 | A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F)); | |||
| 4043 | ||||
| 4044 | for (auto &I : instructions(*F)) { | |||
| 4045 | if (auto *LI = dyn_cast<LoadInst>(&I)) { | |||
| 4046 | bool UsedAssumedInformation = false; | |||
| 4047 | A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr, | |||
| 4048 | UsedAssumedInformation); | |||
| 4049 | } | |||
| 4050 | } | |||
| 4051 | } | |||
| 4052 | } | |||
| 4053 | ||||
| 4054 | const char AAICVTracker::ID = 0; | |||
| 4055 | const char AAKernelInfo::ID = 0; | |||
| 4056 | const char AAExecutionDomain::ID = 0; | |||
| 4057 | const char AAHeapToShared::ID = 0; | |||
| 4058 | const char AAFoldRuntimeCall::ID = 0; | |||
| 4059 | ||||
| 4060 | AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP, | |||
| 4061 | Attributor &A) { | |||
| 4062 | AAICVTracker *AA = nullptr; | |||
| 4063 | switch (IRP.getPositionKind()) { | |||
| 4064 | case IRPosition::IRP_INVALID: | |||
| 4065 | case IRPosition::IRP_FLOAT: | |||
| 4066 | case IRPosition::IRP_ARGUMENT: | |||
| 4067 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
| 4068 | llvm_unreachable("ICVTracker can only be created for function position!")__builtin_unreachable(); | |||
| 4069 | case IRPosition::IRP_RETURNED: | |||
| 4070 | AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A); | |||
| 4071 | break; | |||
| 4072 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
| 4073 | AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A); | |||
| 4074 | break; | |||
| 4075 | case IRPosition::IRP_CALL_SITE: | |||
| 4076 | AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A); | |||
| 4077 | break; | |||
| 4078 | case IRPosition::IRP_FUNCTION: | |||
| 4079 | AA = new (A.Allocator) AAICVTrackerFunction(IRP, A); | |||
| 4080 | break; | |||
| 4081 | } | |||
| 4082 | ||||
| 4083 | return *AA; | |||
| 4084 | } | |||
| 4085 | ||||
| 4086 | AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP, | |||
| 4087 | Attributor &A) { | |||
| 4088 | AAExecutionDomainFunction *AA = nullptr; | |||
| 4089 | switch (IRP.getPositionKind()) { | |||
| 4090 | case IRPosition::IRP_INVALID: | |||
| 4091 | case IRPosition::IRP_FLOAT: | |||
| 4092 | case IRPosition::IRP_ARGUMENT: | |||
| 4093 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
| 4094 | case IRPosition::IRP_RETURNED: | |||
| 4095 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
| 4096 | case IRPosition::IRP_CALL_SITE: | |||
| 4097 | llvm_unreachable(__builtin_unreachable() | |||
| 4098 | "AAExecutionDomain can only be created for function position!")__builtin_unreachable(); | |||
| 4099 | case IRPosition::IRP_FUNCTION: | |||
| 4100 | AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A); | |||
| 4101 | break; | |||
| 4102 | } | |||
| 4103 | ||||
| 4104 | return *AA; | |||
| 4105 | } | |||
| 4106 | ||||
| 4107 | AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP, | |||
| 4108 | Attributor &A) { | |||
| 4109 | AAHeapToSharedFunction *AA = nullptr; | |||
| 4110 | switch (IRP.getPositionKind()) { | |||
| 4111 | case IRPosition::IRP_INVALID: | |||
| 4112 | case IRPosition::IRP_FLOAT: | |||
| 4113 | case IRPosition::IRP_ARGUMENT: | |||
| 4114 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
| 4115 | case IRPosition::IRP_RETURNED: | |||
| 4116 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
| 4117 | case IRPosition::IRP_CALL_SITE: | |||
| 4118 | llvm_unreachable(__builtin_unreachable() | |||
| 4119 | "AAHeapToShared can only be created for function position!")__builtin_unreachable(); | |||
| 4120 | case IRPosition::IRP_FUNCTION: | |||
| 4121 | AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A); | |||
| 4122 | break; | |||
| 4123 | } | |||
| 4124 | ||||
| 4125 | return *AA; | |||
| 4126 | } | |||
| 4127 | ||||
| 4128 | AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP, | |||
| 4129 | Attributor &A) { | |||
| 4130 | AAKernelInfo *AA = nullptr; | |||
| 4131 | switch (IRP.getPositionKind()) { | |||
| 4132 | case IRPosition::IRP_INVALID: | |||
| 4133 | case IRPosition::IRP_FLOAT: | |||
| 4134 | case IRPosition::IRP_ARGUMENT: | |||
| 4135 | case IRPosition::IRP_RETURNED: | |||
| 4136 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
| 4137 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
| 4138 | llvm_unreachable("KernelInfo can only be created for function position!")__builtin_unreachable(); | |||
| 4139 | case IRPosition::IRP_CALL_SITE: | |||
| 4140 | AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A); | |||
| 4141 | break; | |||
| 4142 | case IRPosition::IRP_FUNCTION: | |||
| 4143 | AA = new (A.Allocator) AAKernelInfoFunction(IRP, A); | |||
| 4144 | break; | |||
| 4145 | } | |||
| 4146 | ||||
| 4147 | return *AA; | |||
| 4148 | } | |||
| 4149 | ||||
| 4150 | AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP, | |||
| 4151 | Attributor &A) { | |||
| 4152 | AAFoldRuntimeCall *AA = nullptr; | |||
| 4153 | switch (IRP.getPositionKind()) { | |||
| 4154 | case IRPosition::IRP_INVALID: | |||
| 4155 | case IRPosition::IRP_FLOAT: | |||
| 4156 | case IRPosition::IRP_ARGUMENT: | |||
| 4157 | case IRPosition::IRP_RETURNED: | |||
| 4158 | case IRPosition::IRP_FUNCTION: | |||
| 4159 | case IRPosition::IRP_CALL_SITE: | |||
| 4160 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | |||
| 4161 | llvm_unreachable("KernelInfo can only be created for call site position!")__builtin_unreachable(); | |||
| 4162 | case IRPosition::IRP_CALL_SITE_RETURNED: | |||
| 4163 | AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A); | |||
| 4164 | break; | |||
| 4165 | } | |||
| 4166 | ||||
| 4167 | return *AA; | |||
| 4168 | } | |||
| 4169 | ||||
| 4170 | PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) { | |||
| 4171 | if (!containsOpenMP(M)) | |||
| 4172 | return PreservedAnalyses::all(); | |||
| 4173 | if (DisableOpenMPOptimizations) | |||
| 4174 | return PreservedAnalyses::all(); | |||
| 4175 | ||||
| 4176 | FunctionAnalysisManager &FAM = | |||
| 4177 | AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); | |||
| 4178 | KernelSet Kernels = getDeviceKernels(M); | |||
| 4179 | ||||
| 4180 | auto IsCalled = [&](Function &F) { | |||
| 4181 | if (Kernels.contains(&F)) | |||
| 4182 | return true; | |||
| 4183 | for (const User *U : F.users()) | |||
| 4184 | if (!isa<BlockAddress>(U)) | |||
| 4185 | return true; | |||
| 4186 | return false; | |||
| 4187 | }; | |||
| 4188 | ||||
| 4189 | auto EmitRemark = [&](Function &F) { | |||
| 4190 | auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); | |||
| 4191 | ORE.emit([&]() { | |||
| 4192 | OptimizationRemarkAnalysis ORA(DEBUG_TYPE"openmp-opt", "OMP140", &F); | |||
| 4193 | return ORA << "Could not internalize function. " | |||
| 4194 | << "Some optimizations may not be possible. [OMP140]"; | |||
| 4195 | }); | |||
| 4196 | }; | |||
| 4197 | ||||
| 4198 | // Create internal copies of each function if this is a kernel Module. This | |||
| 4199 | // allows iterprocedural passes to see every call edge. | |||
| 4200 | DenseMap<Function *, Function *> InternalizedMap; | |||
| 4201 | if (isOpenMPDevice(M)) { | |||
| 4202 | SmallPtrSet<Function *, 16> InternalizeFns; | |||
| 4203 | for (Function &F : M) | |||
| 4204 | if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) && | |||
| 4205 | !DisableInternalization) { | |||
| 4206 | if (Attributor::isInternalizable(F)) { | |||
| 4207 | InternalizeFns.insert(&F); | |||
| 4208 | } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) { | |||
| 4209 | EmitRemark(F); | |||
| 4210 | } | |||
| 4211 | } | |||
| 4212 | ||||
| 4213 | Attributor::internalizeFunctions(InternalizeFns, InternalizedMap); | |||
| 4214 | } | |||
| 4215 | ||||
| 4216 | // Look at every function in the Module unless it was internalized. | |||
| 4217 | SmallVector<Function *, 16> SCC; | |||
| 4218 | for (Function &F : M) | |||
| 4219 | if (!F.isDeclaration() && !InternalizedMap.lookup(&F)) | |||
| 4220 | SCC.push_back(&F); | |||
| 4221 | ||||
| 4222 | if (SCC.empty()) | |||
| 4223 | return PreservedAnalyses::all(); | |||
| 4224 | ||||
| 4225 | AnalysisGetter AG(FAM); | |||
| 4226 | ||||
| 4227 | auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { | |||
| 4228 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | |||
| 4229 | }; | |||
| 4230 | ||||
| 4231 | BumpPtrAllocator Allocator; | |||
| 4232 | CallGraphUpdater CGUpdater; | |||
| 4233 | ||||
| 4234 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | |||
| 4235 | OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels); | |||
| 4236 | ||||
| 4237 | unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; | |||
| 4238 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false, | |||
| 4239 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | |||
| 4240 | ||||
| 4241 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | |||
| 4242 | bool Changed = OMPOpt.run(true); | |||
| 4243 | if (Changed) | |||
| 4244 | return PreservedAnalyses::none(); | |||
| 4245 | ||||
| 4246 | return PreservedAnalyses::all(); | |||
| 4247 | } | |||
| 4248 | ||||
| 4249 | PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C, | |||
| 4250 | CGSCCAnalysisManager &AM, | |||
| 4251 | LazyCallGraph &CG, | |||
| 4252 | CGSCCUpdateResult &UR) { | |||
| 4253 | if (!containsOpenMP(*C.begin()->getFunction().getParent())) | |||
| 4254 | return PreservedAnalyses::all(); | |||
| 4255 | if (DisableOpenMPOptimizations) | |||
| 4256 | return PreservedAnalyses::all(); | |||
| 4257 | ||||
| 4258 | SmallVector<Function *, 16> SCC; | |||
| 4259 | // If there are kernels in the module, we have to run on all SCC's. | |||
| 4260 | for (LazyCallGraph::Node &N : C) { | |||
| 4261 | Function *Fn = &N.getFunction(); | |||
| 4262 | SCC.push_back(Fn); | |||
| 4263 | } | |||
| 4264 | ||||
| 4265 | if (SCC.empty()) | |||
| 4266 | return PreservedAnalyses::all(); | |||
| 4267 | ||||
| 4268 | Module &M = *C.begin()->getFunction().getParent(); | |||
| 4269 | ||||
| 4270 | KernelSet Kernels = getDeviceKernels(M); | |||
| 4271 | ||||
| 4272 | FunctionAnalysisManager &FAM = | |||
| 4273 | AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager(); | |||
| 4274 | ||||
| 4275 | AnalysisGetter AG(FAM); | |||
| 4276 | ||||
| 4277 | auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & { | |||
| 4278 | return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F); | |||
| 4279 | }; | |||
| 4280 | ||||
| 4281 | BumpPtrAllocator Allocator; | |||
| 4282 | CallGraphUpdater CGUpdater; | |||
| 4283 | CGUpdater.initialize(CG, C, AM, UR); | |||
| 4284 | ||||
| 4285 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | |||
| 4286 | OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator, | |||
| 4287 | /*CGSCC*/ Functions, Kernels); | |||
| 4288 | ||||
| 4289 | unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; | |||
| 4290 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, | |||
| 4291 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | |||
| 4292 | ||||
| 4293 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | |||
| 4294 | bool Changed = OMPOpt.run(false); | |||
| 4295 | if (Changed) | |||
| 4296 | return PreservedAnalyses::none(); | |||
| 4297 | ||||
| 4298 | return PreservedAnalyses::all(); | |||
| 4299 | } | |||
| 4300 | ||||
| 4301 | namespace { | |||
| 4302 | ||||
| 4303 | struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass { | |||
| 4304 | CallGraphUpdater CGUpdater; | |||
| 4305 | static char ID; | |||
| 4306 | ||||
| 4307 | OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) { | |||
| 4308 | initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry()); | |||
| 4309 | } | |||
| 4310 | ||||
| 4311 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
| 4312 | CallGraphSCCPass::getAnalysisUsage(AU); | |||
| 4313 | } | |||
| 4314 | ||||
| 4315 | bool runOnSCC(CallGraphSCC &CGSCC) override { | |||
| 4316 | if (!containsOpenMP(CGSCC.getCallGraph().getModule())) | |||
| 4317 | return false; | |||
| 4318 | if (DisableOpenMPOptimizations || skipSCC(CGSCC)) | |||
| 4319 | return false; | |||
| 4320 | ||||
| 4321 | SmallVector<Function *, 16> SCC; | |||
| 4322 | // If there are kernels in the module, we have to run on all SCC's. | |||
| 4323 | for (CallGraphNode *CGN : CGSCC) { | |||
| 4324 | Function *Fn = CGN->getFunction(); | |||
| 4325 | if (!Fn || Fn->isDeclaration()) | |||
| 4326 | continue; | |||
| 4327 | SCC.push_back(Fn); | |||
| 4328 | } | |||
| 4329 | ||||
| 4330 | if (SCC.empty()) | |||
| 4331 | return false; | |||
| 4332 | ||||
| 4333 | Module &M = CGSCC.getCallGraph().getModule(); | |||
| 4334 | KernelSet Kernels = getDeviceKernels(M); | |||
| 4335 | ||||
| 4336 | CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph(); | |||
| 4337 | CGUpdater.initialize(CG, CGSCC); | |||
| 4338 | ||||
| 4339 | // Maintain a map of functions to avoid rebuilding the ORE | |||
| 4340 | DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap; | |||
| 4341 | auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & { | |||
| 4342 | std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F]; | |||
| 4343 | if (!ORE) | |||
| 4344 | ORE = std::make_unique<OptimizationRemarkEmitter>(F); | |||
| 4345 | return *ORE; | |||
| 4346 | }; | |||
| 4347 | ||||
| 4348 | AnalysisGetter AG; | |||
| 4349 | SetVector<Function *> Functions(SCC.begin(), SCC.end()); | |||
| 4350 | BumpPtrAllocator Allocator; | |||
| 4351 | OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, | |||
| 4352 | Allocator, | |||
| 4353 | /*CGSCC*/ Functions, Kernels); | |||
| 4354 | ||||
| 4355 | unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32; | |||
| 4356 | Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true, | |||
| 4357 | MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt"); | |||
| 4358 | ||||
| 4359 | OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A); | |||
| 4360 | return OMPOpt.run(false); | |||
| 4361 | } | |||
| 4362 | ||||
| 4363 | bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); } | |||
| 4364 | }; | |||
| 4365 | ||||
| 4366 | } // end anonymous namespace | |||
| 4367 | ||||
| 4368 | KernelSet llvm::omp::getDeviceKernels(Module &M) { | |||
| 4369 | // TODO: Create a more cross-platform way of determining device kernels. | |||
| 4370 | NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations"); | |||
| 4371 | KernelSet Kernels; | |||
| 4372 | ||||
| 4373 | if (!MD) | |||
| 4374 | return Kernels; | |||
| 4375 | ||||
| 4376 | for (auto *Op : MD->operands()) { | |||
| 4377 | if (Op->getNumOperands() < 2) | |||
| 4378 | continue; | |||
| 4379 | MDString *KindID = dyn_cast<MDString>(Op->getOperand(1)); | |||
| 4380 | if (!KindID || KindID->getString() != "kernel") | |||
| 4381 | continue; | |||
| 4382 | ||||
| 4383 | Function *KernelFn = | |||
| 4384 | mdconst::dyn_extract_or_null<Function>(Op->getOperand(0)); | |||
| 4385 | if (!KernelFn) | |||
| 4386 | continue; | |||
| 4387 | ||||
| 4388 | ++NumOpenMPTargetRegionKernels; | |||
| 4389 | ||||
| 4390 | Kernels.insert(KernelFn); | |||
| 4391 | } | |||
| 4392 | ||||
| 4393 | return Kernels; | |||
| 4394 | } | |||
| 4395 | ||||
| 4396 | bool llvm::omp::containsOpenMP(Module &M) { | |||
| 4397 | Metadata *MD = M.getModuleFlag("openmp"); | |||
| 4398 | if (!MD) | |||
| 4399 | return false; | |||
| 4400 | ||||
| 4401 | return true; | |||
| 4402 | } | |||
| 4403 | ||||
| 4404 | bool llvm::omp::isOpenMPDevice(Module &M) { | |||
| 4405 | Metadata *MD = M.getModuleFlag("openmp-device"); | |||
| 4406 | if (!MD) | |||
| 4407 | return false; | |||
| 4408 | ||||
| 4409 | return true; | |||
| 4410 | } | |||
| 4411 | ||||
| 4412 | char OpenMPOptCGSCCLegacyPass::ID = 0; | |||
| 4413 | ||||
| 4414 | INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry &Registry) { | |||
| 4415 | "OpenMP specific optimizations", false, false)static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry &Registry) { | |||
| 4416 | INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)initializeCallGraphWrapperPassPass(Registry); | |||
| 4417 | INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",PassInfo *PI = new PassInfo( "OpenMP specific optimizations", "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag ; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag , initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry )); } | |||
| 4418 | "OpenMP specific optimizations", false, false)PassInfo *PI = new PassInfo( "OpenMP specific optimizations", "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo ::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass> ), false, false); Registry.registerPass(*PI, true); return PI ; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag ; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag , initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry )); } | |||
| 4419 | ||||
| 4420 | Pass *llvm::createOpenMPOptCGSCCLegacyPass() { | |||
| 4421 | return new OpenMPOptCGSCCLegacyPass(); | |||
| 4422 | } |
| 1 | //===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===// | ||||||||
| 2 | // | ||||||||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||||||||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||||||||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
| 6 | // | ||||||||
| 7 | //===----------------------------------------------------------------------===// | ||||||||
| 8 | // | ||||||||
| 9 | // Attributor: An inter procedural (abstract) "attribute" deduction framework. | ||||||||
| 10 | // | ||||||||
| 11 | // The Attributor framework is an inter procedural abstract analysis (fixpoint | ||||||||
| 12 | // iteration analysis). The goal is to allow easy deduction of new attributes as | ||||||||
| 13 | // well as information exchange between abstract attributes in-flight. | ||||||||
| 14 | // | ||||||||
| 15 | // The Attributor class is the driver and the link between the various abstract | ||||||||
| 16 | // attributes. The Attributor will iterate until a fixpoint state is reached by | ||||||||
| 17 | // all abstract attributes in-flight, or until it will enforce a pessimistic fix | ||||||||
| 18 | // point because an iteration limit is reached. | ||||||||
| 19 | // | ||||||||
| 20 | // Abstract attributes, derived from the AbstractAttribute class, actually | ||||||||
| 21 | // describe properties of the code. They can correspond to actual LLVM-IR | ||||||||
| 22 | // attributes, or they can be more general, ultimately unrelated to LLVM-IR | ||||||||
| 23 | // attributes. The latter is useful when an abstract attributes provides | ||||||||
| 24 | // information to other abstract attributes in-flight but we might not want to | ||||||||
| 25 | // manifest the information. The Attributor allows to query in-flight abstract | ||||||||
| 26 | // attributes through the `Attributor::getAAFor` method (see the method | ||||||||
| 27 | // description for an example). If the method is used by an abstract attribute | ||||||||
| 28 | // P, and it results in an abstract attribute Q, the Attributor will | ||||||||
| 29 | // automatically capture a potential dependence from Q to P. This dependence | ||||||||
| 30 | // will cause P to be reevaluated whenever Q changes in the future. | ||||||||
| 31 | // | ||||||||
| 32 | // The Attributor will only reevaluate abstract attributes that might have | ||||||||
| 33 | // changed since the last iteration. That means that the Attribute will not | ||||||||
| 34 | // revisit all instructions/blocks/functions in the module but only query | ||||||||
| 35 | // an update from a subset of the abstract attributes. | ||||||||
| 36 | // | ||||||||
| 37 | // The update method `AbstractAttribute::updateImpl` is implemented by the | ||||||||
| 38 | // specific "abstract attribute" subclasses. The method is invoked whenever the | ||||||||
| 39 | // currently assumed state (see the AbstractState class) might not be valid | ||||||||
| 40 | // anymore. This can, for example, happen if the state was dependent on another | ||||||||
| 41 | // abstract attribute that changed. In every invocation, the update method has | ||||||||
| 42 | // to adjust the internal state of an abstract attribute to a point that is | ||||||||
| 43 | // justifiable by the underlying IR and the current state of abstract attributes | ||||||||
| 44 | // in-flight. Since the IR is given and assumed to be valid, the information | ||||||||
| 45 | // derived from it can be assumed to hold. However, information derived from | ||||||||
| 46 | // other abstract attributes is conditional on various things. If the justifying | ||||||||
| 47 | // state changed, the `updateImpl` has to revisit the situation and potentially | ||||||||
| 48 | // find another justification or limit the optimistic assumes made. | ||||||||
| 49 | // | ||||||||
| 50 | // Change is the key in this framework. Until a state of no-change, thus a | ||||||||
| 51 | // fixpoint, is reached, the Attributor will query the abstract attributes | ||||||||
| 52 | // in-flight to re-evaluate their state. If the (current) state is too | ||||||||
| 53 | // optimistic, hence it cannot be justified anymore through other abstract | ||||||||
| 54 | // attributes or the state of the IR, the state of the abstract attribute will | ||||||||
| 55 | // have to change. Generally, we assume abstract attribute state to be a finite | ||||||||
| 56 | // height lattice and the update function to be monotone. However, these | ||||||||
| 57 | // conditions are not enforced because the iteration limit will guarantee | ||||||||
| 58 | // termination. If an optimistic fixpoint is reached, or a pessimistic fix | ||||||||
| 59 | // point is enforced after a timeout, the abstract attributes are tasked to | ||||||||
| 60 | // manifest their result in the IR for passes to come. | ||||||||
| 61 | // | ||||||||
| 62 | // Attribute manifestation is not mandatory. If desired, there is support to | ||||||||
| 63 | // generate a single or multiple LLVM-IR attributes already in the helper struct | ||||||||
| 64 | // IRAttribute. In the simplest case, a subclass inherits from IRAttribute with | ||||||||
| 65 | // a proper Attribute::AttrKind as template parameter. The Attributor | ||||||||
| 66 | // manifestation framework will then create and place a new attribute if it is | ||||||||
| 67 | // allowed to do so (based on the abstract state). Other use cases can be | ||||||||
| 68 | // achieved by overloading AbstractAttribute or IRAttribute methods. | ||||||||
| 69 | // | ||||||||
| 70 | // | ||||||||
| 71 | // The "mechanics" of adding a new "abstract attribute": | ||||||||
| 72 | // - Define a class (transitively) inheriting from AbstractAttribute and one | ||||||||
| 73 | // (which could be the same) that (transitively) inherits from AbstractState. | ||||||||
| 74 | // For the latter, consider the already available BooleanState and | ||||||||
| 75 | // {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a | ||||||||
| 76 | // number tracking or bit-encoding. | ||||||||
| 77 | // - Implement all pure methods. Also use overloading if the attribute is not | ||||||||
| 78 | // conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for | ||||||||
| 79 | // an argument, call site argument, function return value, or function. See | ||||||||
| 80 | // the class and method descriptions for more information on the two | ||||||||
| 81 | // "Abstract" classes and their respective methods. | ||||||||
| 82 | // - Register opportunities for the new abstract attribute in the | ||||||||
| 83 | // `Attributor::identifyDefaultAbstractAttributes` method if it should be | ||||||||
| 84 | // counted as a 'default' attribute. | ||||||||
| 85 | // - Add sufficient tests. | ||||||||
| 86 | // - Add a Statistics object for bookkeeping. If it is a simple (set of) | ||||||||
| 87 | // attribute(s) manifested through the Attributor manifestation framework, see | ||||||||
| 88 | // the bookkeeping function in Attributor.cpp. | ||||||||
| 89 | // - If instructions with a certain opcode are interesting to the attribute, add | ||||||||
| 90 | // that opcode to the switch in `Attributor::identifyAbstractAttributes`. This | ||||||||
| 91 | // will make it possible to query all those instructions through the | ||||||||
| 92 | // `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the | ||||||||
| 93 | // need to traverse the IR repeatedly. | ||||||||
| 94 | // | ||||||||
| 95 | //===----------------------------------------------------------------------===// | ||||||||
| 96 | |||||||||
| 97 | #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H | ||||||||
| 98 | #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H | ||||||||
| 99 | |||||||||
| 100 | #include "llvm/ADT/DenseSet.h" | ||||||||
| 101 | #include "llvm/ADT/GraphTraits.h" | ||||||||
| 102 | #include "llvm/ADT/MapVector.h" | ||||||||
| 103 | #include "llvm/ADT/STLExtras.h" | ||||||||
| 104 | #include "llvm/ADT/SetVector.h" | ||||||||
| 105 | #include "llvm/ADT/Triple.h" | ||||||||
| 106 | #include "llvm/ADT/iterator.h" | ||||||||
| 107 | #include "llvm/Analysis/AssumeBundleQueries.h" | ||||||||
| 108 | #include "llvm/Analysis/CFG.h" | ||||||||
| 109 | #include "llvm/Analysis/CGSCCPassManager.h" | ||||||||
| 110 | #include "llvm/Analysis/LazyCallGraph.h" | ||||||||
| 111 | #include "llvm/Analysis/LoopInfo.h" | ||||||||
| 112 | #include "llvm/Analysis/MustExecute.h" | ||||||||
| 113 | #include "llvm/Analysis/OptimizationRemarkEmitter.h" | ||||||||
| 114 | #include "llvm/Analysis/PostDominators.h" | ||||||||
| 115 | #include "llvm/Analysis/TargetLibraryInfo.h" | ||||||||
| 116 | #include "llvm/IR/AbstractCallSite.h" | ||||||||
| 117 | #include "llvm/IR/ConstantRange.h" | ||||||||
| 118 | #include "llvm/IR/PassManager.h" | ||||||||
| 119 | #include "llvm/Support/Allocator.h" | ||||||||
| 120 | #include "llvm/Support/Casting.h" | ||||||||
| 121 | #include "llvm/Support/GraphWriter.h" | ||||||||
| 122 | #include "llvm/Support/TimeProfiler.h" | ||||||||
| 123 | #include "llvm/Transforms/Utils/CallGraphUpdater.h" | ||||||||
| 124 | |||||||||
| 125 | namespace llvm { | ||||||||
| 126 | |||||||||
| 127 | struct AADepGraphNode; | ||||||||
| 128 | struct AADepGraph; | ||||||||
| 129 | struct Attributor; | ||||||||
| 130 | struct AbstractAttribute; | ||||||||
| 131 | struct InformationCache; | ||||||||
| 132 | struct AAIsDead; | ||||||||
| 133 | struct AttributorCallGraph; | ||||||||
| 134 | |||||||||
| 135 | class AAManager; | ||||||||
| 136 | class AAResults; | ||||||||
| 137 | class Function; | ||||||||
| 138 | |||||||||
| 139 | /// Abstract Attribute helper functions. | ||||||||
| 140 | namespace AA { | ||||||||
| 141 | |||||||||
| 142 | /// Return true if \p V is dynamically unique, that is, there are no two | ||||||||
| 143 | /// "instances" of \p V at runtime with different values. | ||||||||
| 144 | bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA, | ||||||||
| 145 | const Value &V); | ||||||||
| 146 | |||||||||
| 147 | /// Return true if \p V is a valid value in \p Scope, that is a constant or an | ||||||||
| 148 | /// instruction/argument of \p Scope. | ||||||||
| 149 | bool isValidInScope(const Value &V, const Function *Scope); | ||||||||
| 150 | |||||||||
| 151 | /// Return true if \p V is a valid value at position \p CtxI, that is a | ||||||||
| 152 | /// constant, an argument of the same function as \p CtxI, or an instruction in | ||||||||
| 153 | /// that function that dominates \p CtxI. | ||||||||
| 154 | bool isValidAtPosition(const Value &V, const Instruction &CtxI, | ||||||||
| 155 | InformationCache &InfoCache); | ||||||||
| 156 | |||||||||
| 157 | /// Try to convert \p V to type \p Ty without introducing new instructions. If | ||||||||
| 158 | /// this is not possible return `nullptr`. Note: this function basically knows | ||||||||
| 159 | /// how to cast various constants. | ||||||||
| 160 | Value *getWithType(Value &V, Type &Ty); | ||||||||
| 161 | |||||||||
| 162 | /// Return the combination of \p A and \p B such that the result is a possible | ||||||||
| 163 | /// value of both. \p B is potentially casted to match the type \p Ty or the | ||||||||
| 164 | /// type of \p A if \p Ty is null. | ||||||||
| 165 | /// | ||||||||
| 166 | /// Examples: | ||||||||
| 167 | /// X + none => X | ||||||||
| 168 | /// not_none + undef => not_none | ||||||||
| 169 | /// V1 + V2 => nullptr | ||||||||
| 170 | Optional<Value *> | ||||||||
| 171 | combineOptionalValuesInAAValueLatice(const Optional<Value *> &A, | ||||||||
| 172 | const Optional<Value *> &B, Type *Ty); | ||||||||
| 173 | |||||||||
| 174 | /// Return the initial value of \p Obj with type \p Ty if that is a constant. | ||||||||
| 175 | Constant *getInitialValueForObj(Value &Obj, Type &Ty); | ||||||||
| 176 | |||||||||
| 177 | /// Collect all potential underlying objects of \p Ptr at position \p CtxI in | ||||||||
| 178 | /// \p Objects. Assumed information is used and dependences onto \p QueryingAA | ||||||||
| 179 | /// are added appropriately. | ||||||||
| 180 | /// | ||||||||
| 181 | /// \returns True if \p Objects contains all assumed underlying objects, and | ||||||||
| 182 | /// false if something went wrong and the objects could not be | ||||||||
| 183 | /// determined. | ||||||||
| 184 | bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr, | ||||||||
| 185 | SmallVectorImpl<Value *> &Objects, | ||||||||
| 186 | const AbstractAttribute &QueryingAA, | ||||||||
| 187 | const Instruction *CtxI); | ||||||||
| 188 | |||||||||
| 189 | /// Collect all potential values of the one stored by \p SI into | ||||||||
| 190 | /// \p PotentialCopies. That is, the only copies that were made via the | ||||||||
| 191 | /// store are assumed to be known and all in \p PotentialCopies. Dependences | ||||||||
| 192 | /// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will | ||||||||
| 193 | /// inform the caller if assumed information was used. | ||||||||
| 194 | /// | ||||||||
| 195 | /// \returns True if the assumed potential copies are all in \p PotentialCopies, | ||||||||
| 196 | /// false if something went wrong and the copies could not be | ||||||||
| 197 | /// determined. | ||||||||
| 198 | bool getPotentialCopiesOfStoredValue( | ||||||||
| 199 | Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies, | ||||||||
| 200 | const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation); | ||||||||
| 201 | |||||||||
| 202 | } // namespace AA | ||||||||
| 203 | |||||||||
| 204 | /// The value passed to the line option that defines the maximal initialization | ||||||||
| 205 | /// chain length. | ||||||||
| 206 | extern unsigned MaxInitializationChainLength; | ||||||||
| 207 | |||||||||
| 208 | ///{ | ||||||||
| 209 | enum class ChangeStatus { | ||||||||
| 210 | CHANGED, | ||||||||
| 211 | UNCHANGED, | ||||||||
| 212 | }; | ||||||||
| 213 | |||||||||
| 214 | ChangeStatus operator|(ChangeStatus l, ChangeStatus r); | ||||||||
| 215 | ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r); | ||||||||
| 216 | ChangeStatus operator&(ChangeStatus l, ChangeStatus r); | ||||||||
| 217 | ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r); | ||||||||
| 218 | |||||||||
| 219 | enum class DepClassTy { | ||||||||
| 220 | REQUIRED, ///< The target cannot be valid if the source is not. | ||||||||
| 221 | OPTIONAL, ///< The target may be valid if the source is not. | ||||||||
| 222 | NONE, ///< Do not track a dependence between source and target. | ||||||||
| 223 | }; | ||||||||
| 224 | ///} | ||||||||
| 225 | |||||||||
| 226 | /// The data structure for the nodes of a dependency graph | ||||||||
| 227 | struct AADepGraphNode { | ||||||||
| 228 | public: | ||||||||
| 229 | virtual ~AADepGraphNode(){}; | ||||||||
| 230 | using DepTy = PointerIntPair<AADepGraphNode *, 1>; | ||||||||
| 231 | |||||||||
| 232 | protected: | ||||||||
| 233 | /// Set of dependency graph nodes which should be updated if this one | ||||||||
| 234 | /// is updated. The bit encodes if it is optional. | ||||||||
| 235 | TinyPtrVector<DepTy> Deps; | ||||||||
| 236 | |||||||||
| 237 | static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } | ||||||||
| 238 | static AbstractAttribute *DepGetValAA(DepTy &DT) { | ||||||||
| 239 | return cast<AbstractAttribute>(DT.getPointer()); | ||||||||
| 240 | } | ||||||||
| 241 | |||||||||
| 242 | operator AbstractAttribute *() { return cast<AbstractAttribute>(this); } | ||||||||
| 243 | |||||||||
| 244 | public: | ||||||||
| 245 | using iterator = | ||||||||
| 246 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; | ||||||||
| 247 | using aaiterator = | ||||||||
| 248 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>; | ||||||||
| 249 | |||||||||
| 250 | aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); } | ||||||||
| 251 | aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); } | ||||||||
| 252 | iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); } | ||||||||
| 253 | iterator child_end() { return iterator(Deps.end(), &DepGetVal); } | ||||||||
| 254 | |||||||||
| 255 | virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; } | ||||||||
| 256 | TinyPtrVector<DepTy> &getDeps() { return Deps; } | ||||||||
| 257 | |||||||||
| 258 | friend struct Attributor; | ||||||||
| 259 | friend struct AADepGraph; | ||||||||
| 260 | }; | ||||||||
| 261 | |||||||||
| 262 | /// The data structure for the dependency graph | ||||||||
| 263 | /// | ||||||||
| 264 | /// Note that in this graph if there is an edge from A to B (A -> B), | ||||||||
| 265 | /// then it means that B depends on A, and when the state of A is | ||||||||
| 266 | /// updated, node B should also be updated | ||||||||
| 267 | struct AADepGraph { | ||||||||
| 268 | AADepGraph() {} | ||||||||
| 269 | ~AADepGraph() {} | ||||||||
| 270 | |||||||||
| 271 | using DepTy = AADepGraphNode::DepTy; | ||||||||
| 272 | static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); } | ||||||||
| 273 | using iterator = | ||||||||
| 274 | mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>; | ||||||||
| 275 | |||||||||
| 276 | /// There is no root node for the dependency graph. But the SCCIterator | ||||||||
| 277 | /// requires a single entry point, so we maintain a fake("synthetic") root | ||||||||
| 278 | /// node that depends on every node. | ||||||||
| 279 | AADepGraphNode SyntheticRoot; | ||||||||
| 280 | AADepGraphNode *GetEntryNode() { return &SyntheticRoot; } | ||||||||
| 281 | |||||||||
| 282 | iterator begin() { return SyntheticRoot.child_begin(); } | ||||||||
| 283 | iterator end() { return SyntheticRoot.child_end(); } | ||||||||
| 284 | |||||||||
| 285 | void viewGraph(); | ||||||||
| 286 | |||||||||
| 287 | /// Dump graph to file | ||||||||
| 288 | void dumpGraph(); | ||||||||
| 289 | |||||||||
| 290 | /// Print dependency graph | ||||||||
| 291 | void print(); | ||||||||
| 292 | }; | ||||||||
| 293 | |||||||||
| 294 | /// Helper to describe and deal with positions in the LLVM-IR. | ||||||||
| 295 | /// | ||||||||
| 296 | /// A position in the IR is described by an anchor value and an "offset" that | ||||||||
| 297 | /// could be the argument number, for call sites and arguments, or an indicator | ||||||||
| 298 | /// of the "position kind". The kinds, specified in the Kind enum below, include | ||||||||
| 299 | /// the locations in the attribute list, i.a., function scope and return value, | ||||||||
| 300 | /// as well as a distinction between call sites and functions. Finally, there | ||||||||
| 301 | /// are floating values that do not have a corresponding attribute list | ||||||||
| 302 | /// position. | ||||||||
| 303 | struct IRPosition { | ||||||||
| 304 | // NOTE: In the future this definition can be changed to support recursive | ||||||||
| 305 | // functions. | ||||||||
| 306 | using CallBaseContext = CallBase; | ||||||||
| 307 | |||||||||
| 308 | /// The positions we distinguish in the IR. | ||||||||
| 309 | enum Kind : char { | ||||||||
| 310 | IRP_INVALID, ///< An invalid position. | ||||||||
| 311 | IRP_FLOAT, ///< A position that is not associated with a spot suitable | ||||||||
| 312 | ///< for attributes. This could be any value or instruction. | ||||||||
| 313 | IRP_RETURNED, ///< An attribute for the function return value. | ||||||||
| 314 | IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value. | ||||||||
| 315 | IRP_FUNCTION, ///< An attribute for a function (scope). | ||||||||
| 316 | IRP_CALL_SITE, ///< An attribute for a call site (function scope). | ||||||||
| 317 | IRP_ARGUMENT, ///< An attribute for a function argument. | ||||||||
| 318 | IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument. | ||||||||
| 319 | }; | ||||||||
| 320 | |||||||||
| 321 | /// Default constructor available to create invalid positions implicitly. All | ||||||||
| 322 | /// other positions need to be created explicitly through the appropriate | ||||||||
| 323 | /// static member function. | ||||||||
| 324 | IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); } | ||||||||
| 325 | |||||||||
| 326 | /// Create a position describing the value of \p V. | ||||||||
| 327 | static const IRPosition value(const Value &V, | ||||||||
| 328 | const CallBaseContext *CBContext = nullptr) { | ||||||||
| 329 | if (auto *Arg = dyn_cast<Argument>(&V)) | ||||||||
| 330 | return IRPosition::argument(*Arg, CBContext); | ||||||||
| 331 | if (auto *CB = dyn_cast<CallBase>(&V)) | ||||||||
| 332 | return IRPosition::callsite_returned(*CB); | ||||||||
| 333 | return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext); | ||||||||
| 334 | } | ||||||||
| 335 | |||||||||
| 336 | /// Create a position describing the function scope of \p F. | ||||||||
| 337 | /// \p CBContext is used for call base specific analysis. | ||||||||
| 338 | static const IRPosition function(const Function &F, | ||||||||
| 339 | const CallBaseContext *CBContext = nullptr) { | ||||||||
| 340 | return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext); | ||||||||
| 341 | } | ||||||||
| 342 | |||||||||
| 343 | /// Create a position describing the returned value of \p F. | ||||||||
| 344 | /// \p CBContext is used for call base specific analysis. | ||||||||
| 345 | static const IRPosition returned(const Function &F, | ||||||||
| 346 | const CallBaseContext *CBContext = nullptr) { | ||||||||
| 347 | return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext); | ||||||||
| 348 | } | ||||||||
| 349 | |||||||||
| 350 | /// Create a position describing the argument \p Arg. | ||||||||
| 351 | /// \p CBContext is used for call base specific analysis. | ||||||||
| 352 | static const IRPosition argument(const Argument &Arg, | ||||||||
| 353 | const CallBaseContext *CBContext = nullptr) { | ||||||||
| 354 | return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext); | ||||||||
| 355 | } | ||||||||
| 356 | |||||||||
| 357 | /// Create a position describing the function scope of \p CB. | ||||||||
| 358 | static const IRPosition callsite_function(const CallBase &CB) { | ||||||||
| 359 | return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE); | ||||||||
| 360 | } | ||||||||
| 361 | |||||||||
| 362 | /// Create a position describing the returned value of \p CB. | ||||||||
| 363 | static const IRPosition callsite_returned(const CallBase &CB) { | ||||||||
| 364 | return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED); | ||||||||
| 365 | } | ||||||||
| 366 | |||||||||
| 367 | /// Create a position describing the argument of \p CB at position \p ArgNo. | ||||||||
| 368 | static const IRPosition callsite_argument(const CallBase &CB, | ||||||||
| 369 | unsigned ArgNo) { | ||||||||
| 370 | return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)), | ||||||||
| 371 | IRP_CALL_SITE_ARGUMENT); | ||||||||
| 372 | } | ||||||||
| 373 | |||||||||
| 374 | /// Create a position describing the argument of \p ACS at position \p ArgNo. | ||||||||
| 375 | static const IRPosition callsite_argument(AbstractCallSite ACS, | ||||||||
| 376 | unsigned ArgNo) { | ||||||||
| 377 | if (ACS.getNumArgOperands() <= ArgNo) | ||||||||
| 378 | return IRPosition(); | ||||||||
| 379 | int CSArgNo = ACS.getCallArgOperandNo(ArgNo); | ||||||||
| 380 | if (CSArgNo >= 0) | ||||||||
| 381 | return IRPosition::callsite_argument( | ||||||||
| 382 | cast<CallBase>(*ACS.getInstruction()), CSArgNo); | ||||||||
| 383 | return IRPosition(); | ||||||||
| 384 | } | ||||||||
| 385 | |||||||||
| 386 | /// Create a position with function scope matching the "context" of \p IRP. | ||||||||
| 387 | /// If \p IRP is a call site (see isAnyCallSitePosition()) then the result | ||||||||
| 388 | /// will be a call site position, otherwise the function position of the | ||||||||
| 389 | /// associated function. | ||||||||
| 390 | static const IRPosition | ||||||||
| 391 | function_scope(const IRPosition &IRP, | ||||||||
| 392 | const CallBaseContext *CBContext = nullptr) { | ||||||||
| 393 | if (IRP.isAnyCallSitePosition()) { | ||||||||
| 394 | return IRPosition::callsite_function( | ||||||||
| 395 | cast<CallBase>(IRP.getAnchorValue())); | ||||||||
| 396 | } | ||||||||
| 397 | assert(IRP.getAssociatedFunction())((void)0); | ||||||||
| 398 | return IRPosition::function(*IRP.getAssociatedFunction(), CBContext); | ||||||||
| 399 | } | ||||||||
| 400 | |||||||||
| 401 | bool operator==(const IRPosition &RHS) const { | ||||||||
| 402 | return Enc == RHS.Enc && RHS.CBContext == CBContext; | ||||||||
| 403 | } | ||||||||
| 404 | bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); } | ||||||||
| 405 | |||||||||
| 406 | /// Return the value this abstract attribute is anchored with. | ||||||||
| 407 | /// | ||||||||
| 408 | /// The anchor value might not be the associated value if the latter is not | ||||||||
| 409 | /// sufficient to determine where arguments will be manifested. This is, so | ||||||||
| 410 | /// far, only the case for call site arguments as the value is not sufficient | ||||||||
| 411 | /// to pinpoint them. Instead, we can use the call site as an anchor. | ||||||||
| 412 | Value &getAnchorValue() const { | ||||||||
| 413 | switch (getEncodingBits()) { | ||||||||
| 414 | case ENC_VALUE: | ||||||||
| 415 | case ENC_RETURNED_VALUE: | ||||||||
| 416 | case ENC_FLOATING_FUNCTION: | ||||||||
| 417 | return *getAsValuePtr(); | ||||||||
| 418 | case ENC_CALL_SITE_ARGUMENT_USE: | ||||||||
| 419 | return *(getAsUsePtr()->getUser()); | ||||||||
| 420 | default: | ||||||||
| 421 | llvm_unreachable("Unkown encoding!")__builtin_unreachable(); | ||||||||
| 422 | }; | ||||||||
| 423 | } | ||||||||
| 424 | |||||||||
| 425 | /// Return the associated function, if any. | ||||||||
| 426 | Function *getAssociatedFunction() const { | ||||||||
| 427 | if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) { | ||||||||
| 428 | // We reuse the logic that associates callback calles to arguments of a | ||||||||
| 429 | // call site here to identify the callback callee as the associated | ||||||||
| 430 | // function. | ||||||||
| 431 | if (Argument *Arg = getAssociatedArgument()) | ||||||||
| 432 | return Arg->getParent(); | ||||||||
| 433 | return CB->getCalledFunction(); | ||||||||
| 434 | } | ||||||||
| 435 | return getAnchorScope(); | ||||||||
| 436 | } | ||||||||
| 437 | |||||||||
| 438 | /// Return the associated argument, if any. | ||||||||
| 439 | Argument *getAssociatedArgument() const; | ||||||||
| 440 | |||||||||
| 441 | /// Return true if the position refers to a function interface, that is the | ||||||||
| 442 | /// function scope, the function return, or an argument. | ||||||||
| 443 | bool isFnInterfaceKind() const { | ||||||||
| 444 | switch (getPositionKind()) { | ||||||||
| 445 | case IRPosition::IRP_FUNCTION: | ||||||||
| 446 | case IRPosition::IRP_RETURNED: | ||||||||
| 447 | case IRPosition::IRP_ARGUMENT: | ||||||||
| 448 | return true; | ||||||||
| 449 | default: | ||||||||
| 450 | return false; | ||||||||
| 451 | } | ||||||||
| 452 | } | ||||||||
| 453 | |||||||||
| 454 | /// Return the Function surrounding the anchor value. | ||||||||
| 455 | Function *getAnchorScope() const { | ||||||||
| 456 | Value &V = getAnchorValue(); | ||||||||
| 457 | if (isa<Function>(V)) | ||||||||
| 458 | return &cast<Function>(V); | ||||||||
| 459 | if (isa<Argument>(V)) | ||||||||
| 460 | return cast<Argument>(V).getParent(); | ||||||||
| 461 | if (isa<Instruction>(V)) | ||||||||
| 462 | return cast<Instruction>(V).getFunction(); | ||||||||
| 463 | return nullptr; | ||||||||
| 464 | } | ||||||||
| 465 | |||||||||
| 466 | /// Return the context instruction, if any. | ||||||||
| 467 | Instruction *getCtxI() const { | ||||||||
| 468 | Value &V = getAnchorValue(); | ||||||||
| 469 | if (auto *I = dyn_cast<Instruction>(&V)) | ||||||||
| 470 | return I; | ||||||||
| 471 | if (auto *Arg = dyn_cast<Argument>(&V)) | ||||||||
| 472 | if (!Arg->getParent()->isDeclaration()) | ||||||||
| 473 | return &Arg->getParent()->getEntryBlock().front(); | ||||||||
| 474 | if (auto *F = dyn_cast<Function>(&V)) | ||||||||
| 475 | if (!F->isDeclaration()) | ||||||||
| 476 | return &(F->getEntryBlock().front()); | ||||||||
| 477 | return nullptr; | ||||||||
| 478 | } | ||||||||
| 479 | |||||||||
| 480 | /// Return the value this abstract attribute is associated with. | ||||||||
| 481 | Value &getAssociatedValue() const { | ||||||||
| 482 | if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue())) | ||||||||
| 483 | return getAnchorValue(); | ||||||||
| 484 | assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!")((void)0); | ||||||||
| 485 | return *cast<CallBase>(&getAnchorValue()) | ||||||||
| 486 | ->getArgOperand(getCallSiteArgNo()); | ||||||||
| 487 | } | ||||||||
| 488 | |||||||||
| 489 | /// Return the type this abstract attribute is associated with. | ||||||||
| 490 | Type *getAssociatedType() const { | ||||||||
| 491 | if (getPositionKind() == IRPosition::IRP_RETURNED) | ||||||||
| 492 | return getAssociatedFunction()->getReturnType(); | ||||||||
| 493 | return getAssociatedValue().getType(); | ||||||||
| 494 | } | ||||||||
| 495 | |||||||||
| 496 | /// Return the callee argument number of the associated value if it is an | ||||||||
| 497 | /// argument or call site argument, otherwise a negative value. In contrast to | ||||||||
| 498 | /// `getCallSiteArgNo` this method will always return the "argument number" | ||||||||
| 499 | /// from the perspective of the callee. This may not the same as the call site | ||||||||
| 500 | /// if this is a callback call. | ||||||||
| 501 | int getCalleeArgNo() const { | ||||||||
| 502 | return getArgNo(/* CallbackCalleeArgIfApplicable */ true); | ||||||||
| 503 | } | ||||||||
| 504 | |||||||||
| 505 | /// Return the call site argument number of the associated value if it is an | ||||||||
| 506 | /// argument or call site argument, otherwise a negative value. In contrast to | ||||||||
| 507 | /// `getCalleArgNo` this method will always return the "operand number" from | ||||||||
| 508 | /// the perspective of the call site. This may not the same as the callee | ||||||||
| 509 | /// perspective if this is a callback call. | ||||||||
| 510 | int getCallSiteArgNo() const { | ||||||||
| 511 | return getArgNo(/* CallbackCalleeArgIfApplicable */ false); | ||||||||
| 512 | } | ||||||||
| 513 | |||||||||
| 514 | /// Return the index in the attribute list for this position. | ||||||||
| 515 | unsigned getAttrIdx() const { | ||||||||
| 516 | switch (getPositionKind()) { | ||||||||
| 517 | case IRPosition::IRP_INVALID: | ||||||||
| 518 | case IRPosition::IRP_FLOAT: | ||||||||
| 519 | break; | ||||||||
| 520 | case IRPosition::IRP_FUNCTION: | ||||||||
| 521 | case IRPosition::IRP_CALL_SITE: | ||||||||
| 522 | return AttributeList::FunctionIndex; | ||||||||
| 523 | case IRPosition::IRP_RETURNED: | ||||||||
| 524 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||||||
| 525 | return AttributeList::ReturnIndex; | ||||||||
| 526 | case IRPosition::IRP_ARGUMENT: | ||||||||
| 527 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
| 528 | return getCallSiteArgNo() + AttributeList::FirstArgIndex; | ||||||||
| 529 | } | ||||||||
| 530 | llvm_unreachable(__builtin_unreachable() | ||||||||
| 531 | "There is no attribute index for a floating or invalid position!")__builtin_unreachable(); | ||||||||
| 532 | } | ||||||||
| 533 | |||||||||
| 534 | /// Return the associated position kind. | ||||||||
| 535 | Kind getPositionKind() const { | ||||||||
| 536 | char EncodingBits = getEncodingBits(); | ||||||||
| 537 | if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE) | ||||||||
| 538 | return IRP_CALL_SITE_ARGUMENT; | ||||||||
| 539 | if (EncodingBits == ENC_FLOATING_FUNCTION) | ||||||||
| 540 | return IRP_FLOAT; | ||||||||
| 541 | |||||||||
| 542 | Value *V = getAsValuePtr(); | ||||||||
| 543 | if (!V) | ||||||||
| 544 | return IRP_INVALID; | ||||||||
| 545 | if (isa<Argument>(V)) | ||||||||
| 546 | return IRP_ARGUMENT; | ||||||||
| 547 | if (isa<Function>(V)) | ||||||||
| 548 | return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION; | ||||||||
| 549 | if (isa<CallBase>(V)) | ||||||||
| 550 | return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED | ||||||||
| 551 | : IRP_CALL_SITE; | ||||||||
| 552 | return IRP_FLOAT; | ||||||||
| 553 | } | ||||||||
| 554 | |||||||||
| 555 | /// TODO: Figure out if the attribute related helper functions should live | ||||||||
| 556 | /// here or somewhere else. | ||||||||
| 557 | |||||||||
| 558 | /// Return true if any kind in \p AKs existing in the IR at a position that | ||||||||
| 559 | /// will affect this one. See also getAttrs(...). | ||||||||
| 560 | /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, | ||||||||
| 561 | /// e.g., the function position if this is an | ||||||||
| 562 | /// argument position, should be ignored. | ||||||||
| 563 | bool hasAttr(ArrayRef<Attribute::AttrKind> AKs, | ||||||||
| 564 | bool IgnoreSubsumingPositions = false, | ||||||||
| 565 | Attributor *A = nullptr) const; | ||||||||
| 566 | |||||||||
| 567 | /// Return the attributes of any kind in \p AKs existing in the IR at a | ||||||||
| 568 | /// position that will affect this one. While each position can only have a | ||||||||
| 569 | /// single attribute of any kind in \p AKs, there are "subsuming" positions | ||||||||
| 570 | /// that could have an attribute as well. This method returns all attributes | ||||||||
| 571 | /// found in \p Attrs. | ||||||||
| 572 | /// \param IgnoreSubsumingPositions Flag to determine if subsuming positions, | ||||||||
| 573 | /// e.g., the function position if this is an | ||||||||
| 574 | /// argument position, should be ignored. | ||||||||
| 575 | void getAttrs(ArrayRef<Attribute::AttrKind> AKs, | ||||||||
| 576 | SmallVectorImpl<Attribute> &Attrs, | ||||||||
| 577 | bool IgnoreSubsumingPositions = false, | ||||||||
| 578 | Attributor *A = nullptr) const; | ||||||||
| 579 | |||||||||
| 580 | /// Remove the attribute of kind \p AKs existing in the IR at this position. | ||||||||
| 581 | void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const { | ||||||||
| 582 | if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT) | ||||||||
| 583 | return; | ||||||||
| 584 | |||||||||
| 585 | AttributeList AttrList; | ||||||||
| 586 | auto *CB = dyn_cast<CallBase>(&getAnchorValue()); | ||||||||
| 587 | if (CB) | ||||||||
| 588 | AttrList = CB->getAttributes(); | ||||||||
| 589 | else | ||||||||
| 590 | AttrList = getAssociatedFunction()->getAttributes(); | ||||||||
| 591 | |||||||||
| 592 | LLVMContext &Ctx = getAnchorValue().getContext(); | ||||||||
| 593 | for (Attribute::AttrKind AK : AKs) | ||||||||
| 594 | AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK); | ||||||||
| 595 | |||||||||
| 596 | if (CB) | ||||||||
| 597 | CB->setAttributes(AttrList); | ||||||||
| 598 | else | ||||||||
| 599 | getAssociatedFunction()->setAttributes(AttrList); | ||||||||
| 600 | } | ||||||||
| 601 | |||||||||
| 602 | bool isAnyCallSitePosition() const { | ||||||||
| 603 | switch (getPositionKind()) { | ||||||||
| 604 | case IRPosition::IRP_CALL_SITE: | ||||||||
| 605 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||||||
| 606 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
| 607 | return true; | ||||||||
| 608 | default: | ||||||||
| 609 | return false; | ||||||||
| 610 | } | ||||||||
| 611 | } | ||||||||
| 612 | |||||||||
| 613 | /// Return true if the position is an argument or call site argument. | ||||||||
| 614 | bool isArgumentPosition() const { | ||||||||
| 615 | switch (getPositionKind()) { | ||||||||
| 616 | case IRPosition::IRP_ARGUMENT: | ||||||||
| 617 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
| 618 | return true; | ||||||||
| 619 | default: | ||||||||
| 620 | return false; | ||||||||
| 621 | } | ||||||||
| 622 | } | ||||||||
| 623 | |||||||||
| 624 | /// Return the same position without the call base context. | ||||||||
| 625 | IRPosition stripCallBaseContext() const { | ||||||||
| 626 | IRPosition Result = *this; | ||||||||
| 627 | Result.CBContext = nullptr; | ||||||||
| 628 | return Result; | ||||||||
| 629 | } | ||||||||
| 630 | |||||||||
| 631 | /// Get the call base context from the position. | ||||||||
| 632 | const CallBaseContext *getCallBaseContext() const { return CBContext; } | ||||||||
| 633 | |||||||||
| 634 | /// Check if the position has any call base context. | ||||||||
| 635 | bool hasCallBaseContext() const { return CBContext != nullptr; } | ||||||||
| 636 | |||||||||
| 637 | /// Special DenseMap key values. | ||||||||
| 638 | /// | ||||||||
| 639 | ///{ | ||||||||
| 640 | static const IRPosition EmptyKey; | ||||||||
| 641 | static const IRPosition TombstoneKey; | ||||||||
| 642 | ///} | ||||||||
| 643 | |||||||||
| 644 | /// Conversion into a void * to allow reuse of pointer hashing. | ||||||||
| 645 | operator void *() const { return Enc.getOpaqueValue(); } | ||||||||
| 646 | |||||||||
| 647 | private: | ||||||||
| 648 | /// Private constructor for special values only! | ||||||||
| 649 | explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr) | ||||||||
| 650 | : CBContext(CBContext) { | ||||||||
| 651 | Enc.setFromOpaqueValue(Ptr); | ||||||||
| 652 | } | ||||||||
| 653 | |||||||||
| 654 | /// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK. | ||||||||
| 655 | explicit IRPosition(Value &AnchorVal, Kind PK, | ||||||||
| 656 | const CallBaseContext *CBContext = nullptr) | ||||||||
| 657 | : CBContext(CBContext) { | ||||||||
| 658 | switch (PK) { | ||||||||
| 659 | case IRPosition::IRP_INVALID: | ||||||||
| 660 | llvm_unreachable("Cannot create invalid IRP with an anchor value!")__builtin_unreachable(); | ||||||||
| 661 | break; | ||||||||
| 662 | case IRPosition::IRP_FLOAT: | ||||||||
| 663 | // Special case for floating functions. | ||||||||
| 664 | if (isa<Function>(AnchorVal)) | ||||||||
| 665 | Enc = {&AnchorVal, ENC_FLOATING_FUNCTION}; | ||||||||
| 666 | else | ||||||||
| 667 | Enc = {&AnchorVal, ENC_VALUE}; | ||||||||
| 668 | break; | ||||||||
| 669 | case IRPosition::IRP_FUNCTION: | ||||||||
| 670 | case IRPosition::IRP_CALL_SITE: | ||||||||
| 671 | Enc = {&AnchorVal, ENC_VALUE}; | ||||||||
| 672 | break; | ||||||||
| 673 | case IRPosition::IRP_RETURNED: | ||||||||
| 674 | case IRPosition::IRP_CALL_SITE_RETURNED: | ||||||||
| 675 | Enc = {&AnchorVal, ENC_RETURNED_VALUE}; | ||||||||
| 676 | break; | ||||||||
| 677 | case IRPosition::IRP_ARGUMENT: | ||||||||
| 678 | Enc = {&AnchorVal, ENC_VALUE}; | ||||||||
| 679 | break; | ||||||||
| 680 | case IRPosition::IRP_CALL_SITE_ARGUMENT: | ||||||||
| 681 | llvm_unreachable(__builtin_unreachable() | ||||||||
| 682 | "Cannot create call site argument IRP with an anchor value!")__builtin_unreachable(); | ||||||||
| 683 | break; | ||||||||
| 684 | } | ||||||||
| 685 | verify(); | ||||||||
| 686 | } | ||||||||
| 687 | |||||||||
| 688 | /// Return the callee argument number of the associated value if it is an | ||||||||
| 689 | /// argument or call site argument. See also `getCalleeArgNo` and | ||||||||
| 690 | /// `getCallSiteArgNo`. | ||||||||
| 691 | int getArgNo(bool CallbackCalleeArgIfApplicable) const { | ||||||||
| 692 | if (CallbackCalleeArgIfApplicable) | ||||||||
| 693 | if (Argument *Arg = getAssociatedArgument()) | ||||||||
| 694 | return Arg->getArgNo(); | ||||||||
| 695 | switch (getPositionKind()) { | ||||||||
| 696 | case IRPosition::IRP_ARGUMENT: | ||||||||
| 697 | return cast<Argument>(getAsValuePtr())->getArgNo(); | ||||||||
| 698 | case IRPosition::IRP_CALL_SITE_ARGUMENT: { | ||||||||
| 699 | Use &U = *getAsUsePtr(); | ||||||||
| 700 | return cast<CallBase>(U.getUser())->getArgOperandNo(&U); | ||||||||
| 701 | } | ||||||||
| 702 | default: | ||||||||
| 703 | return -1; | ||||||||
| 704 | } | ||||||||
| 705 | } | ||||||||
| 706 | |||||||||
| 707 | /// IRPosition for the use \p U. The position kind \p PK needs to be | ||||||||
| 708 | /// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value | ||||||||
| 709 | /// the used value. | ||||||||
| 710 | explicit IRPosition(Use &U, Kind PK) { | ||||||||
| 711 | assert(PK == IRP_CALL_SITE_ARGUMENT &&((void)0) | ||||||||
| 712 | "Use constructor is for call site arguments only!")((void)0); | ||||||||
| 713 | Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE}; | ||||||||
| 714 | verify(); | ||||||||
| 715 | } | ||||||||
| 716 | |||||||||
| 717 | /// Verify internal invariants. | ||||||||
| 718 | void verify(); | ||||||||
| 719 | |||||||||
| 720 | /// Return the attributes of kind \p AK existing in the IR as attribute. | ||||||||
| 721 | bool getAttrsFromIRAttr(Attribute::AttrKind AK, | ||||||||
| 722 | SmallVectorImpl<Attribute> &Attrs) const; | ||||||||
| 723 | |||||||||
| 724 | /// Return the attributes of kind \p AK existing in the IR as operand bundles | ||||||||
| 725 | /// of an llvm.assume. | ||||||||
| 726 | bool getAttrsFromAssumes(Attribute::AttrKind AK, | ||||||||
| 727 | SmallVectorImpl<Attribute> &Attrs, | ||||||||
| 728 | Attributor &A) const; | ||||||||
| 729 | |||||||||
| 730 | /// Return the underlying pointer as Value *, valid for all positions but | ||||||||
| 731 | /// IRP_CALL_SITE_ARGUMENT. | ||||||||
| 732 | Value *getAsValuePtr() const { | ||||||||
| 733 | assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&((void)0) | ||||||||
| 734 | "Not a value pointer!")((void)0); | ||||||||
| 735 | return reinterpret_cast<Value *>(Enc.getPointer()); | ||||||||
| 736 | } | ||||||||
| 737 | |||||||||
| 738 | /// Return the underlying pointer as Use *, valid only for | ||||||||
| 739 | /// IRP_CALL_SITE_ARGUMENT positions. | ||||||||
| 740 | Use *getAsUsePtr() const { | ||||||||
| 741 | assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&((void)0) | ||||||||
| 742 | "Not a value pointer!")((void)0); | ||||||||
| 743 | return reinterpret_cast<Use *>(Enc.getPointer()); | ||||||||
| 744 | } | ||||||||
| 745 | |||||||||
| 746 | /// Return true if \p EncodingBits describe a returned or call site returned | ||||||||
| 747 | /// position. | ||||||||
| 748 | static bool isReturnPosition(char EncodingBits) { | ||||||||
| 749 | return EncodingBits == ENC_RETURNED_VALUE; | ||||||||
| 750 | } | ||||||||
| 751 | |||||||||
| 752 | /// Return true if the encoding bits describe a returned or call site returned | ||||||||
| 753 | /// position. | ||||||||
| 754 | bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); } | ||||||||
| 755 | |||||||||
| 756 | /// The encoding of the IRPosition is a combination of a pointer and two | ||||||||
| 757 | /// encoding bits. The values of the encoding bits are defined in the enum | ||||||||
| 758 | /// below. The pointer is either a Value* (for the first three encoding bit | ||||||||
| 759 | /// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE). | ||||||||
| 760 | /// | ||||||||
| 761 | ///{ | ||||||||
| 762 | enum { | ||||||||
| 763 | ENC_VALUE = 0b00, | ||||||||
| 764 | ENC_RETURNED_VALUE = 0b01, | ||||||||
| 765 | ENC_FLOATING_FUNCTION = 0b10, | ||||||||
| 766 | ENC_CALL_SITE_ARGUMENT_USE = 0b11, | ||||||||
| 767 | }; | ||||||||
| 768 | |||||||||
| 769 | // Reserve the maximal amount of bits so there is no need to mask out the | ||||||||
| 770 | // remaining ones. We will not encode anything else in the pointer anyway. | ||||||||
| 771 | static constexpr int NumEncodingBits = | ||||||||
| 772 | PointerLikeTypeTraits<void *>::NumLowBitsAvailable; | ||||||||
| 773 | static_assert(NumEncodingBits >= 2, "At least two bits are required!"); | ||||||||
| 774 | |||||||||
| 775 | /// The pointer with the encoding bits. | ||||||||
| 776 | PointerIntPair<void *, NumEncodingBits, char> Enc; | ||||||||
| 777 | ///} | ||||||||
| 778 | |||||||||
| 779 | /// Call base context. Used for callsite specific analysis. | ||||||||
| 780 | const CallBaseContext *CBContext = nullptr; | ||||||||
| 781 | |||||||||
| 782 | /// Return the encoding bits. | ||||||||
| 783 | char getEncodingBits() const { return Enc.getInt(); } | ||||||||
| 784 | }; | ||||||||
| 785 | |||||||||
| 786 | /// Helper that allows IRPosition as a key in a DenseMap. | ||||||||
| 787 | template <> struct DenseMapInfo<IRPosition> { | ||||||||
| 788 | static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; } | ||||||||
| 789 | static inline IRPosition getTombstoneKey() { | ||||||||
| 790 | return IRPosition::TombstoneKey; | ||||||||
| 791 | } | ||||||||
| 792 | static unsigned getHashValue(const IRPosition &IRP) { | ||||||||
| 793 | return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^ | ||||||||
| 794 | (DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext())); | ||||||||
| 795 | } | ||||||||
| 796 | |||||||||
| 797 | static bool isEqual(const IRPosition &a, const IRPosition &b) { | ||||||||
| 798 | return a == b; | ||||||||
| 799 | } | ||||||||
| 800 | }; | ||||||||
| 801 | |||||||||
| 802 | /// A visitor class for IR positions. | ||||||||
| 803 | /// | ||||||||
| 804 | /// Given a position P, the SubsumingPositionIterator allows to visit "subsuming | ||||||||
| 805 | /// positions" wrt. attributes/information. Thus, if a piece of information | ||||||||
| 806 | /// holds for a subsuming position, it also holds for the position P. | ||||||||
| 807 | /// | ||||||||
| 808 | /// The subsuming positions always include the initial position and then, | ||||||||
| 809 | /// depending on the position kind, additionally the following ones: | ||||||||
| 810 | /// - for IRP_RETURNED: | ||||||||
| 811 | /// - the function (IRP_FUNCTION) | ||||||||
| 812 | /// - for IRP_ARGUMENT: | ||||||||
| 813 | /// - the function (IRP_FUNCTION) | ||||||||
| 814 | /// - for IRP_CALL_SITE: | ||||||||
| 815 | /// - the callee (IRP_FUNCTION), if known | ||||||||
| 816 | /// - for IRP_CALL_SITE_RETURNED: | ||||||||
| 817 | /// - the callee (IRP_RETURNED), if known | ||||||||
| 818 | /// - the call site (IRP_FUNCTION) | ||||||||
| 819 | /// - the callee (IRP_FUNCTION), if known | ||||||||
| 820 | /// - for IRP_CALL_SITE_ARGUMENT: | ||||||||
| 821 | /// - the argument of the callee (IRP_ARGUMENT), if known | ||||||||
| 822 | /// - the callee (IRP_FUNCTION), if known | ||||||||
| 823 | /// - the position the call site argument is associated with if it is not | ||||||||
| 824 | /// anchored to the call site, e.g., if it is an argument then the argument | ||||||||
| 825 | /// (IRP_ARGUMENT) | ||||||||
| 826 | class SubsumingPositionIterator { | ||||||||
| 827 | SmallVector<IRPosition, 4> IRPositions; | ||||||||
| 828 | using iterator = decltype(IRPositions)::iterator; | ||||||||
| 829 | |||||||||
| 830 | public: | ||||||||
| 831 | SubsumingPositionIterator(const IRPosition &IRP); | ||||||||
| 832 | iterator begin() { return IRPositions.begin(); } | ||||||||
| 833 | iterator end() { return IRPositions.end(); } | ||||||||
| 834 | }; | ||||||||
| 835 | |||||||||
| 836 | /// Wrapper for FunctoinAnalysisManager. | ||||||||
| 837 | struct AnalysisGetter { | ||||||||
| 838 | template <typename Analysis> | ||||||||
| 839 | typename Analysis::Result *getAnalysis(const Function &F) { | ||||||||
| 840 | if (!FAM || !F.getParent()) | ||||||||
| 841 | return nullptr; | ||||||||
| 842 | return &FAM->getResult<Analysis>(const_cast<Function &>(F)); | ||||||||
| 843 | } | ||||||||
| 844 | |||||||||
| 845 | AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {} | ||||||||
| 846 | AnalysisGetter() {} | ||||||||
| 847 | |||||||||
| 848 | private: | ||||||||
| 849 | FunctionAnalysisManager *FAM = nullptr; | ||||||||
| 850 | }; | ||||||||
| 851 | |||||||||
| 852 | /// Data structure to hold cached (LLVM-IR) information. | ||||||||
| 853 | /// | ||||||||
| 854 | /// All attributes are given an InformationCache object at creation time to | ||||||||
| 855 | /// avoid inspection of the IR by all of them individually. This default | ||||||||
| 856 | /// InformationCache will hold information required by 'default' attributes, | ||||||||
| 857 | /// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..) | ||||||||
| 858 | /// is called. | ||||||||
| 859 | /// | ||||||||
| 860 | /// If custom abstract attributes, registered manually through | ||||||||
| 861 | /// Attributor::registerAA(...), need more information, especially if it is not | ||||||||
| 862 | /// reusable, it is advised to inherit from the InformationCache and cast the | ||||||||
| 863 | /// instance down in the abstract attributes. | ||||||||
| 864 | struct InformationCache { | ||||||||
| 865 | InformationCache(const Module &M, AnalysisGetter &AG, | ||||||||
| 866 | BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC) | ||||||||
| 867 | : DL(M.getDataLayout()), Allocator(Allocator), | ||||||||
| 868 | Explorer( | ||||||||
| 869 | /* ExploreInterBlock */ true, /* ExploreCFGForward */ true, | ||||||||
| 870 | /* ExploreCFGBackward */ true, | ||||||||
| 871 | /* LIGetter */ | ||||||||
| 872 | [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); }, | ||||||||
| 873 | /* DTGetter */ | ||||||||
| 874 | [&](const Function &F) { | ||||||||
| 875 | return AG.getAnalysis<DominatorTreeAnalysis>(F); | ||||||||
| 876 | }, | ||||||||
| 877 | /* PDTGetter */ | ||||||||
| 878 | [&](const Function &F) { | ||||||||
| 879 | return AG.getAnalysis<PostDominatorTreeAnalysis>(F); | ||||||||
| 880 | }), | ||||||||
| 881 | AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) { | ||||||||
| 882 | if (CGSCC) | ||||||||
| 883 | initializeModuleSlice(*CGSCC); | ||||||||
| 884 | } | ||||||||
| 885 | |||||||||
| 886 | ~InformationCache() { | ||||||||
| 887 | // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call | ||||||||
| 888 | // the destructor manually. | ||||||||
| 889 | for (auto &It : FuncInfoMap) | ||||||||
| 890 | It.getSecond()->~FunctionInfo(); | ||||||||
| 891 | } | ||||||||
| 892 | |||||||||
| 893 | /// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is | ||||||||
| 894 | /// true, constant expression users are not given to \p CB but their uses are | ||||||||
| 895 | /// traversed transitively. | ||||||||
| 896 | template <typename CBTy> | ||||||||
| 897 | static void foreachUse(Function &F, CBTy CB, | ||||||||
| 898 | bool LookThroughConstantExprUses = true) { | ||||||||
| 899 | SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses())); | ||||||||
| 900 | |||||||||
| 901 | for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) { | ||||||||
| 902 | Use &U = *Worklist[Idx]; | ||||||||
| 903 | |||||||||
| 904 | // Allow use in constant bitcasts and simply look through them. | ||||||||
| 905 | if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) { | ||||||||
| 906 | for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses()) | ||||||||
| 907 | Worklist.push_back(&CEU); | ||||||||
| 908 | continue; | ||||||||
| 909 | } | ||||||||
| 910 | |||||||||
| 911 | CB(U); | ||||||||
| 912 | } | ||||||||
| 913 | } | ||||||||
| 914 | |||||||||
| 915 | /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains | ||||||||
| 916 | /// (a subset of) all functions that we can look at during this SCC traversal. | ||||||||
| 917 | /// This includes functions (transitively) called from the SCC and the | ||||||||
| 918 | /// (transitive) callers of SCC functions. We also can look at a function if | ||||||||
| 919 | /// there is a "reference edge", i.a., if the function somehow uses (!=calls) | ||||||||
| 920 | /// a function in the SCC or a caller of a function in the SCC. | ||||||||
| 921 | void initializeModuleSlice(SetVector<Function *> &SCC) { | ||||||||
| 922 | ModuleSlice.insert(SCC.begin(), SCC.end()); | ||||||||
| 923 | |||||||||
| 924 | SmallPtrSet<Function *, 16> Seen; | ||||||||
| 925 | SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end()); | ||||||||
| 926 | while (!Worklist.empty()) { | ||||||||
| 927 | Function *F = Worklist.pop_back_val(); | ||||||||
| 928 | ModuleSlice.insert(F); | ||||||||
| 929 | |||||||||
| 930 | for (Instruction &I : instructions(*F)) | ||||||||
| 931 | if (auto *CB = dyn_cast<CallBase>(&I)) | ||||||||
| 932 | if (Function *Callee = CB->getCalledFunction()) | ||||||||
| 933 | if (Seen.insert(Callee).second) | ||||||||
| 934 | Worklist.push_back(Callee); | ||||||||
| 935 | } | ||||||||
| 936 | |||||||||
| 937 | Seen.clear(); | ||||||||
| 938 | Worklist.append(SCC.begin(), SCC.end()); | ||||||||
| 939 | while (!Worklist.empty()) { | ||||||||
| 940 | Function *F = Worklist.pop_back_val(); | ||||||||
| 941 | ModuleSlice.insert(F); | ||||||||
| 942 | |||||||||
| 943 | // Traverse all transitive uses. | ||||||||
| 944 | foreachUse(*F, [&](Use &U) { | ||||||||
| 945 | if (auto *UsrI = dyn_cast<Instruction>(U.getUser())) | ||||||||
| 946 | if (Seen.insert(UsrI->getFunction()).second) | ||||||||
| 947 | Worklist.push_back(UsrI->getFunction()); | ||||||||
| 948 | }); | ||||||||
| 949 | } | ||||||||
| 950 | } | ||||||||
| 951 | |||||||||
| 952 | /// The slice of the module we are allowed to look at. | ||||||||
| 953 | SmallPtrSet<Function *, 8> ModuleSlice; | ||||||||
| 954 | |||||||||
| 955 | /// A vector type to hold instructions. | ||||||||
| 956 | using InstructionVectorTy = SmallVector<Instruction *, 8>; | ||||||||
| 957 | |||||||||
| 958 | /// A map type from opcodes to instructions with this opcode. | ||||||||
| 959 | using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>; | ||||||||
| 960 | |||||||||
| 961 | /// Return the map that relates "interesting" opcodes with all instructions | ||||||||
| 962 | /// with that opcode in \p F. | ||||||||
| 963 | OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) { | ||||||||
| 964 | return getFunctionInfo(F).OpcodeInstMap; | ||||||||
| 965 | } | ||||||||
| 966 | |||||||||
| 967 | /// Return the instructions in \p F that may read or write memory. | ||||||||
| 968 | InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) { | ||||||||
| 969 | return getFunctionInfo(F).RWInsts; | ||||||||
| 970 | } | ||||||||
| 971 | |||||||||
| 972 | /// Return MustBeExecutedContextExplorer | ||||||||
| 973 | MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() { | ||||||||
| 974 | return Explorer; | ||||||||
| 975 | } | ||||||||
| 976 | |||||||||
| 977 | /// Return TargetLibraryInfo for function \p F. | ||||||||
| 978 | TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) { | ||||||||
| 979 | return AG.getAnalysis<TargetLibraryAnalysis>(F); | ||||||||
| 980 | } | ||||||||
| 981 | |||||||||
| 982 | /// Return AliasAnalysis Result for function \p F. | ||||||||
| 983 | AAResults *getAAResultsForFunction(const Function &F); | ||||||||
| 984 | |||||||||
| 985 | /// Return true if \p Arg is involved in a must-tail call, thus the argument | ||||||||
| 986 | /// of the caller or callee. | ||||||||
| 987 | bool isInvolvedInMustTailCall(const Argument &Arg) { | ||||||||
| 988 | FunctionInfo &FI = getFunctionInfo(*Arg.getParent()); | ||||||||
| 989 | return FI.CalledViaMustTail || FI.ContainsMustTailCall; | ||||||||
| 990 | } | ||||||||
| 991 | |||||||||
| 992 | /// Return the analysis result from a pass \p AP for function \p F. | ||||||||
| 993 | template <typename AP> | ||||||||
| 994 | typename AP::Result *getAnalysisResultForFunction(const Function &F) { | ||||||||
| 995 | return AG.getAnalysis<AP>(F); | ||||||||
| 996 | } | ||||||||
| 997 | |||||||||
| 998 | /// Return SCC size on call graph for function \p F or 0 if unknown. | ||||||||
| 999 | unsigned getSccSize(const Function &F) { | ||||||||
| 1000 | if (CGSCC && CGSCC->count(const_cast<Function *>(&F))) | ||||||||
| 1001 | return CGSCC->size(); | ||||||||
| 1002 | return 0; | ||||||||
| 1003 | } | ||||||||
| 1004 | |||||||||
| 1005 | /// Return datalayout used in the module. | ||||||||
| 1006 | const DataLayout &getDL() { return DL; } | ||||||||
| 1007 | |||||||||
| 1008 | /// Return the map conaining all the knowledge we have from `llvm.assume`s. | ||||||||
| 1009 | const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; } | ||||||||
| 1010 | |||||||||
| 1011 | /// Return if \p To is potentially reachable form \p From or not | ||||||||
| 1012 | /// If the same query was answered, return cached result | ||||||||
| 1013 | bool getPotentiallyReachable(const Instruction &From, const Instruction &To) { | ||||||||
| 1014 | auto KeyPair = std::make_pair(&From, &To); | ||||||||
| 1015 | auto Iter = PotentiallyReachableMap.find(KeyPair); | ||||||||
| 1016 | if (Iter != PotentiallyReachableMap.end()) | ||||||||
| 1017 | return Iter->second; | ||||||||
| 1018 | const Function &F = *From.getFunction(); | ||||||||
| 1019 | bool Result = true; | ||||||||
| 1020 | if (From.getFunction() == To.getFunction()) | ||||||||
| 1021 | Result = isPotentiallyReachable(&From, &To, nullptr, | ||||||||
| 1022 | AG.getAnalysis<DominatorTreeAnalysis>(F), | ||||||||
| 1023 | AG.getAnalysis<LoopAnalysis>(F)); | ||||||||
| 1024 | PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result)); | ||||||||
| 1025 | return Result; | ||||||||
| 1026 | } | ||||||||
| 1027 | |||||||||
| 1028 | /// Check whether \p F is part of module slice. | ||||||||
| 1029 | bool isInModuleSlice(const Function &F) { | ||||||||
| 1030 | return ModuleSlice.count(const_cast<Function *>(&F)); | ||||||||
| 1031 | } | ||||||||
| 1032 | |||||||||
| 1033 | /// Return true if the stack (llvm::Alloca) can be accessed by other threads. | ||||||||
| 1034 | bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); } | ||||||||
| 1035 | |||||||||
| 1036 | /// Return true if the target is a GPU. | ||||||||
| 1037 | bool targetIsGPU() { | ||||||||
| 1038 | return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX(); | ||||||||
| 1039 | } | ||||||||
| 1040 | |||||||||
| 1041 | private: | ||||||||
| 1042 | struct FunctionInfo { | ||||||||
| 1043 | ~FunctionInfo(); | ||||||||
| 1044 | |||||||||
| 1045 | /// A nested map that remembers all instructions in a function with a | ||||||||
| 1046 | /// certain instruction opcode (Instruction::getOpcode()). | ||||||||
| 1047 | OpcodeInstMapTy OpcodeInstMap; | ||||||||
| 1048 | |||||||||
| 1049 | /// A map from functions to their instructions that may read or write | ||||||||
| 1050 | /// memory. | ||||||||
| 1051 | InstructionVectorTy RWInsts; | ||||||||
| 1052 | |||||||||
| 1053 | /// Function is called by a `musttail` call. | ||||||||
| 1054 | bool CalledViaMustTail; | ||||||||
| 1055 | |||||||||
| 1056 | /// Function contains a `musttail` call. | ||||||||
| 1057 | bool ContainsMustTailCall; | ||||||||
| 1058 | }; | ||||||||
| 1059 | |||||||||
| 1060 | /// A map type from functions to informatio about it. | ||||||||
| 1061 | DenseMap<const Function *, FunctionInfo *> FuncInfoMap; | ||||||||
| 1062 | |||||||||
| 1063 | /// Return information about the function \p F, potentially by creating it. | ||||||||
| 1064 | FunctionInfo &getFunctionInfo(const Function &F) { | ||||||||
| 1065 | FunctionInfo *&FI = FuncInfoMap[&F]; | ||||||||
| 1066 | if (!FI) { | ||||||||
| 1067 | FI = new (Allocator) FunctionInfo(); | ||||||||
| 1068 | initializeInformationCache(F, *FI); | ||||||||
| 1069 | } | ||||||||
| 1070 | return *FI; | ||||||||
| 1071 | } | ||||||||
| 1072 | |||||||||
| 1073 | /// Initialize the function information cache \p FI for the function \p F. | ||||||||
| 1074 | /// | ||||||||
| 1075 | /// This method needs to be called for all function that might be looked at | ||||||||
| 1076 | /// through the information cache interface *prior* to looking at them. | ||||||||
| 1077 | void initializeInformationCache(const Function &F, FunctionInfo &FI); | ||||||||
| 1078 | |||||||||
| 1079 | /// The datalayout used in the module. | ||||||||
| 1080 | const DataLayout &DL; | ||||||||
| 1081 | |||||||||
| 1082 | /// The allocator used to allocate memory, e.g. for `FunctionInfo`s. | ||||||||
| 1083 | BumpPtrAllocator &Allocator; | ||||||||
| 1084 | |||||||||
| 1085 | /// MustBeExecutedContextExplorer | ||||||||
| 1086 | MustBeExecutedContextExplorer Explorer; | ||||||||
| 1087 | |||||||||
| 1088 | /// A map with knowledge retained in `llvm.assume` instructions. | ||||||||
| 1089 | RetainedKnowledgeMap KnowledgeMap; | ||||||||
| 1090 | |||||||||
| 1091 | /// Getters for analysis. | ||||||||
| 1092 | AnalysisGetter &AG; | ||||||||
| 1093 | |||||||||
| 1094 | /// The underlying CGSCC, or null if not available. | ||||||||
| 1095 | SetVector<Function *> *CGSCC; | ||||||||
| 1096 | |||||||||
| 1097 | /// Set of inlineable functions | ||||||||
| 1098 | SmallPtrSet<const Function *, 8> InlineableFunctions; | ||||||||
| 1099 | |||||||||
| 1100 | /// A map for caching results of queries for isPotentiallyReachable | ||||||||
| 1101 | DenseMap<std::pair<const Instruction *, const Instruction *>, bool> | ||||||||
| 1102 | PotentiallyReachableMap; | ||||||||
| 1103 | |||||||||
| 1104 | /// The triple describing the target machine. | ||||||||
| 1105 | Triple TargetTriple; | ||||||||
| 1106 | |||||||||
| 1107 | /// Give the Attributor access to the members so | ||||||||
| 1108 | /// Attributor::identifyDefaultAbstractAttributes(...) can initialize them. | ||||||||
| 1109 | friend struct Attributor; | ||||||||
| 1110 | }; | ||||||||
| 1111 | |||||||||
| 1112 | /// The fixpoint analysis framework that orchestrates the attribute deduction. | ||||||||
| 1113 | /// | ||||||||
| 1114 | /// The Attributor provides a general abstract analysis framework (guided | ||||||||
| 1115 | /// fixpoint iteration) as well as helper functions for the deduction of | ||||||||
| 1116 | /// (LLVM-IR) attributes. However, also other code properties can be deduced, | ||||||||
| 1117 | /// propagated, and ultimately manifested through the Attributor framework. This | ||||||||
| 1118 | /// is particularly useful if these properties interact with attributes and a | ||||||||
| 1119 | /// co-scheduled deduction allows to improve the solution. Even if not, thus if | ||||||||
| 1120 | /// attributes/properties are completely isolated, they should use the | ||||||||
| 1121 | /// Attributor framework to reduce the number of fixpoint iteration frameworks | ||||||||
| 1122 | /// in the code base. Note that the Attributor design makes sure that isolated | ||||||||
| 1123 | /// attributes are not impacted, in any way, by others derived at the same time | ||||||||
| 1124 | /// if there is no cross-reasoning performed. | ||||||||
| 1125 | /// | ||||||||
| 1126 | /// The public facing interface of the Attributor is kept simple and basically | ||||||||
| 1127 | /// allows abstract attributes to one thing, query abstract attributes | ||||||||
| 1128 | /// in-flight. There are two reasons to do this: | ||||||||
| 1129 | /// a) The optimistic state of one abstract attribute can justify an | ||||||||
| 1130 | /// optimistic state of another, allowing to framework to end up with an | ||||||||
| 1131 | /// optimistic (=best possible) fixpoint instead of one based solely on | ||||||||
| 1132 | /// information in the IR. | ||||||||
| 1133 | /// b) This avoids reimplementing various kinds of lookups, e.g., to check | ||||||||
| 1134 | /// for existing IR attributes, in favor of a single lookups interface | ||||||||
| 1135 | /// provided by an abstract attribute subclass. | ||||||||
| 1136 | /// | ||||||||
| 1137 | /// NOTE: The mechanics of adding a new "concrete" abstract attribute are | ||||||||
| 1138 | /// described in the file comment. | ||||||||
| 1139 | struct Attributor { | ||||||||
| 1140 | |||||||||
| 1141 | using OptimizationRemarkGetter = | ||||||||
| 1142 | function_ref<OptimizationRemarkEmitter &(Function *)>; | ||||||||
| 1143 | |||||||||
| 1144 | /// Constructor | ||||||||
| 1145 | /// | ||||||||
| 1146 | /// \param Functions The set of functions we are deriving attributes for. | ||||||||
| 1147 | /// \param InfoCache Cache to hold various information accessible for | ||||||||
| 1148 | /// the abstract attributes. | ||||||||
| 1149 | /// \param CGUpdater Helper to update an underlying call graph. | ||||||||
| 1150 | /// \param Allowed If not null, a set limiting the attribute opportunities. | ||||||||
| 1151 | /// \param DeleteFns Whether to delete functions. | ||||||||
| 1152 | /// \param RewriteSignatures Whether to rewrite function signatures. | ||||||||
| 1153 | /// \param MaxFixedPointIterations Maximum number of iterations to run until | ||||||||
| 1154 | /// fixpoint. | ||||||||
| 1155 | Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, | ||||||||
| 1156 | CallGraphUpdater &CGUpdater, | ||||||||
| 1157 | DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true, | ||||||||
| 1158 | bool RewriteSignatures = true) | ||||||||
| 1159 | : Allocator(InfoCache.Allocator), Functions(Functions), | ||||||||
| 1160 | InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), | ||||||||
| 1161 | DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), | ||||||||
| 1162 | MaxFixpointIterations(None), OREGetter(None), PassName("") {} | ||||||||
| 1163 | |||||||||
| 1164 | /// Constructor | ||||||||
| 1165 | /// | ||||||||
| 1166 | /// \param Functions The set of functions we are deriving attributes for. | ||||||||
| 1167 | /// \param InfoCache Cache to hold various information accessible for | ||||||||
| 1168 | /// the abstract attributes. | ||||||||
| 1169 | /// \param CGUpdater Helper to update an underlying call graph. | ||||||||
| 1170 | /// \param Allowed If not null, a set limiting the attribute opportunities. | ||||||||
| 1171 | /// \param DeleteFns Whether to delete functions | ||||||||
| 1172 | /// \param MaxFixedPointIterations Maximum number of iterations to run until | ||||||||
| 1173 | /// fixpoint. | ||||||||
| 1174 | /// \param OREGetter A callback function that returns an ORE object from a | ||||||||
| 1175 | /// Function pointer. | ||||||||
| 1176 | /// \param PassName The name of the pass emitting remarks. | ||||||||
| 1177 | Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache, | ||||||||
| 1178 | CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed, | ||||||||
| 1179 | bool DeleteFns, bool RewriteSignatures, | ||||||||
| 1180 | Optional<unsigned> MaxFixpointIterations, | ||||||||
| 1181 | OptimizationRemarkGetter OREGetter, const char *PassName) | ||||||||
| 1182 | : Allocator(InfoCache.Allocator), Functions(Functions), | ||||||||
| 1183 | InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed), | ||||||||
| 1184 | DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures), | ||||||||
| 1185 | MaxFixpointIterations(MaxFixpointIterations), | ||||||||
| 1186 | OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)), | ||||||||
| 1187 | PassName(PassName) {} | ||||||||
| 1188 | |||||||||
| 1189 | ~Attributor(); | ||||||||
| 1190 | |||||||||
| 1191 | /// Run the analyses until a fixpoint is reached or enforced (timeout). | ||||||||
| 1192 | /// | ||||||||
| 1193 | /// The attributes registered with this Attributor can be used after as long | ||||||||
| 1194 | /// as the Attributor is not destroyed (it owns the attributes now). | ||||||||
| 1195 | /// | ||||||||
| 1196 | /// \Returns CHANGED if the IR was changed, otherwise UNCHANGED. | ||||||||
| 1197 | ChangeStatus run(); | ||||||||
| 1198 | |||||||||
| 1199 | /// Lookup an abstract attribute of type \p AAType at position \p IRP. While | ||||||||
| 1200 | /// no abstract attribute is found equivalent positions are checked, see | ||||||||
| 1201 | /// SubsumingPositionIterator. Thus, the returned abstract attribute | ||||||||
| 1202 | /// might be anchored at a different position, e.g., the callee if \p IRP is a | ||||||||
| 1203 | /// call base. | ||||||||
| 1204 | /// | ||||||||
| 1205 | /// This method is the only (supported) way an abstract attribute can retrieve | ||||||||
| 1206 | /// information from another abstract attribute. As an example, take an | ||||||||
| 1207 | /// abstract attribute that determines the memory access behavior for a | ||||||||
| 1208 | /// argument (readnone, readonly, ...). It should use `getAAFor` to get the | ||||||||
| 1209 | /// most optimistic information for other abstract attributes in-flight, e.g. | ||||||||
| 1210 | /// the one reasoning about the "captured" state for the argument or the one | ||||||||
| 1211 | /// reasoning on the memory access behavior of the function as a whole. | ||||||||
| 1212 | /// | ||||||||
| 1213 | /// If the DepClass enum is set to `DepClassTy::None` the dependence from | ||||||||
| 1214 | /// \p QueryingAA to the return abstract attribute is not automatically | ||||||||
| 1215 | /// recorded. This should only be used if the caller will record the | ||||||||
| 1216 | /// dependence explicitly if necessary, thus if it the returned abstract | ||||||||
| 1217 | /// attribute is used for reasoning. To record the dependences explicitly use | ||||||||
| 1218 | /// the `Attributor::recordDependence` method. | ||||||||
| 1219 | template <typename AAType> | ||||||||
| 1220 | const AAType &getAAFor(const AbstractAttribute &QueryingAA, | ||||||||
| 1221 | const IRPosition &IRP, DepClassTy DepClass) { | ||||||||
| 1222 | return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass, | ||||||||
| 1223 | /* ForceUpdate */ false); | ||||||||
| 1224 | } | ||||||||
| 1225 | |||||||||
| 1226 | /// Similar to getAAFor but the return abstract attribute will be updated (via | ||||||||
| 1227 | /// `AbstractAttribute::update`) even if it is found in the cache. This is | ||||||||
| 1228 | /// especially useful for AAIsDead as changes in liveness can make updates | ||||||||
| 1229 | /// possible/useful that were not happening before as the abstract attribute | ||||||||
| 1230 | /// was assumed dead. | ||||||||
| 1231 | template <typename AAType> | ||||||||
| 1232 | const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA, | ||||||||
| 1233 | const IRPosition &IRP, DepClassTy DepClass) { | ||||||||
| 1234 | return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass, | ||||||||
| 1235 | /* ForceUpdate */ true); | ||||||||
| 1236 | } | ||||||||
| 1237 | |||||||||
| 1238 | /// The version of getAAFor that allows to omit a querying abstract | ||||||||
| 1239 | /// attribute. Using this after Attributor started running is restricted to | ||||||||
| 1240 | /// only the Attributor itself. Initial seeding of AAs can be done via this | ||||||||
| 1241 | /// function. | ||||||||
| 1242 | /// NOTE: ForceUpdate is ignored in any stage other than the update stage. | ||||||||
| 1243 | template <typename AAType> | ||||||||
| 1244 | const AAType &getOrCreateAAFor(IRPosition IRP, | ||||||||
| 1245 | const AbstractAttribute *QueryingAA, | ||||||||
| 1246 | DepClassTy DepClass, bool ForceUpdate = false, | ||||||||
| 1247 | bool UpdateAfterInit = true) { | ||||||||
| 1248 | if (!shouldPropagateCallBaseContext(IRP)) | ||||||||
| 1249 | IRP = IRP.stripCallBaseContext(); | ||||||||
| 1250 | |||||||||
| 1251 | if (AAType *AAPtr
| ||||||||
| 1252 | /* AllowInvalidState */ true)) { | ||||||||
| 1253 | if (ForceUpdate && Phase == AttributorPhase::UPDATE) | ||||||||
| 1254 | updateAA(*AAPtr); | ||||||||
| 1255 | return *AAPtr; | ||||||||
| 1256 | } | ||||||||
| 1257 | |||||||||
| 1258 | // No matching attribute found, create one. | ||||||||
| 1259 | // Use the static create method. | ||||||||
| 1260 | auto &AA = AAType::createForPosition(IRP, *this); | ||||||||
| 1261 | |||||||||
| 1262 | // If we are currenty seeding attributes, enforce seeding rules. | ||||||||
| 1263 | if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) { | ||||||||
| 1264 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
| 1265 | return AA; | ||||||||
| 1266 | } | ||||||||
| 1267 | |||||||||
| 1268 | registerAA(AA); | ||||||||
| 1269 | |||||||||
| 1270 | // For now we ignore naked and optnone functions. | ||||||||
| 1271 | bool Invalidate = Allowed && !Allowed->count(&AAType::ID); | ||||||||
| 1272 | const Function *FnScope = IRP.getAnchorScope(); | ||||||||
| 1273 | if (FnScope) | ||||||||
| 1274 | Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) || | ||||||||
| 1275 | FnScope->hasFnAttribute(Attribute::OptimizeNone); | ||||||||
| 1276 | |||||||||
| 1277 | // Avoid too many nested initializations to prevent a stack overflow. | ||||||||
| 1278 | Invalidate |= InitializationChainLength > MaxInitializationChainLength; | ||||||||
| 1279 | |||||||||
| 1280 | // Bootstrap the new attribute with an initial update to propagate | ||||||||
| 1281 | // information, e.g., function -> call site. If it is not on a given | ||||||||
| 1282 | // Allowed we will not perform updates at all. | ||||||||
| 1283 | if (Invalidate) { | ||||||||
| 1284 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
| 1285 | return AA; | ||||||||
| 1286 | } | ||||||||
| 1287 | |||||||||
| 1288 | { | ||||||||
| 1289 | TimeTraceScope TimeScope(AA.getName() + "::initialize"); | ||||||||
| 1290 | ++InitializationChainLength; | ||||||||
| 1291 | AA.initialize(*this); | ||||||||
| 1292 | --InitializationChainLength; | ||||||||
| 1293 | } | ||||||||
| 1294 | |||||||||
| 1295 | // Initialize and update is allowed for code outside of the current function | ||||||||
| 1296 | // set, but only if it is part of module slice we are allowed to look at. | ||||||||
| 1297 | // Only exception is AAIsDeadFunction whose initialization is prevented | ||||||||
| 1298 | // directly, since we don't to compute it twice. | ||||||||
| 1299 | if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) { | ||||||||
| 1300 | if (!getInfoCache().isInModuleSlice(*FnScope)) { | ||||||||
| 1301 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
| 1302 | return AA; | ||||||||
| 1303 | } | ||||||||
| 1304 | } | ||||||||
| 1305 | |||||||||
| 1306 | // If this is queried in the manifest stage, we force the AA to indicate | ||||||||
| 1307 | // pessimistic fixpoint immediately. | ||||||||
| 1308 | if (Phase == AttributorPhase::MANIFEST) { | ||||||||
| 1309 | AA.getState().indicatePessimisticFixpoint(); | ||||||||
| 1310 | return AA; | ||||||||
| 1311 | } | ||||||||
| 1312 | |||||||||
| 1313 | // Allow seeded attributes to declare dependencies. | ||||||||
| 1314 | // Remember the seeding state. | ||||||||
| 1315 | if (UpdateAfterInit) { | ||||||||
| 1316 | AttributorPhase OldPhase = Phase; | ||||||||
| 1317 | Phase = AttributorPhase::UPDATE; | ||||||||
| 1318 | |||||||||
| 1319 | updateAA(AA); | ||||||||
| 1320 | |||||||||
| 1321 | Phase = OldPhase; | ||||||||
| 1322 | } | ||||||||
| 1323 | |||||||||
| 1324 | if (QueryingAA && AA.getState().isValidState()) | ||||||||
| 1325 | recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA), | ||||||||
| 1326 | DepClass); | ||||||||
| 1327 | return AA; | ||||||||
| 1328 | } | ||||||||
| 1329 | template <typename AAType> | ||||||||
| 1330 | const AAType &getOrCreateAAFor(const IRPosition &IRP) { | ||||||||
| 1331 | return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr, | ||||||||
| 1332 | DepClassTy::NONE); | ||||||||
| 1333 | } | ||||||||
| 1334 | |||||||||
| 1335 | /// Return the attribute of \p AAType for \p IRP if existing and valid. This | ||||||||
| 1336 | /// also allows non-AA users lookup. | ||||||||
| 1337 | template <typename AAType> | ||||||||
| 1338 | AAType *lookupAAFor(const IRPosition &IRP, | ||||||||
| 1339 | const AbstractAttribute *QueryingAA = nullptr, | ||||||||
| 1340 | DepClassTy DepClass = DepClassTy::OPTIONAL, | ||||||||
| 1341 | bool AllowInvalidState = false) { | ||||||||
| 1342 | static_assert(std::is_base_of<AbstractAttribute, AAType>::value, | ||||||||
| 1343 | "Cannot query an attribute with a type not derived from " | ||||||||
| 1344 | "'AbstractAttribute'!"); | ||||||||
| 1345 | // Lookup the abstract attribute of type AAType. If found, return it after | ||||||||
| 1346 | // registering a dependence of QueryingAA on the one returned attribute. | ||||||||
| 1347 | AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP}); | ||||||||
| 1348 | if (!AAPtr) | ||||||||
| 1349 | return nullptr; | ||||||||
| 1350 | |||||||||
| 1351 | AAType *AA = static_cast<AAType *>(AAPtr); | ||||||||
| 1352 | |||||||||
| 1353 | // Do not register a dependence on an attribute with an invalid state. | ||||||||
| 1354 | if (DepClass != DepClassTy::NONE && QueryingAA && | ||||||||
| 1355 | AA->getState().isValidState()) | ||||||||
| 1356 | recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA), | ||||||||
| 1357 | DepClass); | ||||||||
| 1358 | |||||||||
| 1359 | // Return nullptr if this attribute has an invalid state. | ||||||||
| 1360 | if (!AllowInvalidState && !AA->getState().isValidState()) | ||||||||
| 1361 | return nullptr; | ||||||||
| 1362 | return AA; | ||||||||
| 1363 | } | ||||||||
| 1364 | |||||||||
| 1365 | /// Explicitly record a dependence from \p FromAA to \p ToAA, that is if | ||||||||
| 1366 | /// \p FromAA changes \p ToAA should be updated as well. | ||||||||
| 1367 | /// | ||||||||
| 1368 | /// This method should be used in conjunction with the `getAAFor` method and | ||||||||
| 1369 | /// with the DepClass enum passed to the method set to None. This can | ||||||||
| 1370 | /// be beneficial to avoid false dependences but it requires the users of | ||||||||
| 1371 | /// `getAAFor` to explicitly record true dependences through this method. | ||||||||
| 1372 | /// The \p DepClass flag indicates if the dependence is striclty necessary. | ||||||||
| 1373 | /// That means for required dependences, if \p FromAA changes to an invalid | ||||||||
| 1374 | /// state, \p ToAA can be moved to a pessimistic fixpoint because it required | ||||||||
| 1375 | /// information from \p FromAA but none are available anymore. | ||||||||
| 1376 | void recordDependence(const AbstractAttribute &FromAA, | ||||||||
| 1377 | const AbstractAttribute &ToAA, DepClassTy DepClass); | ||||||||
| 1378 | |||||||||
| 1379 | /// Introduce a new abstract attribute into the fixpoint analysis. | ||||||||
| 1380 | /// | ||||||||
| 1381 | /// Note that ownership of the attribute is given to the Attributor. It will | ||||||||
| 1382 | /// invoke delete for the Attributor on destruction of the Attributor. | ||||||||
| 1383 | /// | ||||||||
| 1384 | /// Attributes are identified by their IR position (AAType::getIRPosition()) | ||||||||
| 1385 | /// and the address of their static member (see AAType::ID). | ||||||||
| 1386 | template <typename AAType> AAType ®isterAA(AAType &AA) { | ||||||||
| 1387 | static_assert(std::is_base_of<AbstractAttribute, AAType>::value, | ||||||||
| 1388 | "Cannot register an attribute with a type not derived from " | ||||||||
| 1389 | "'AbstractAttribute'!"); | ||||||||
| 1390 | // Put the attribute in the lookup map structure and the container we use to | ||||||||
| 1391 | // keep track of all attributes. | ||||||||
| 1392 | const IRPosition &IRP = AA.getIRPosition(); | ||||||||
| 1393 | AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}]; | ||||||||
| 1394 | |||||||||
| 1395 | assert(!AAPtr && "Attribute already in map!")((void)0); | ||||||||
| 1396 | AAPtr = &AA; | ||||||||
| 1397 | |||||||||
| 1398 | // Register AA with the synthetic root only before the manifest stage. | ||||||||
| 1399 | if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE) | ||||||||
| 1400 | DG.SyntheticRoot.Deps.push_back( | ||||||||
| 1401 | AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED))); | ||||||||
| 1402 | |||||||||
| 1403 | return AA; | ||||||||
| 1404 | } | ||||||||
| 1405 | |||||||||
| 1406 | /// Return the internal information cache. | ||||||||
| 1407 | InformationCache &getInfoCache() { return InfoCache; } | ||||||||
| 1408 | |||||||||
| 1409 | /// Return true if this is a module pass, false otherwise. | ||||||||
| 1410 | bool isModulePass() const { | ||||||||
| 1411 | return !Functions.empty() && | ||||||||
| 1412 | Functions.size() == Functions.front()->getParent()->size(); | ||||||||
| 1413 | } | ||||||||
| 1414 | |||||||||
| 1415 | /// Return true if we derive attributes for \p Fn | ||||||||
| 1416 | bool isRunOn(Function &Fn) const { | ||||||||
| 1417 | return Functions.empty() || Functions.count(&Fn); | ||||||||
| 1418 | } | ||||||||
| 1419 | |||||||||
| 1420 | /// Determine opportunities to derive 'default' attributes in \p F and create | ||||||||
| 1421 | /// abstract attribute objects for them. | ||||||||
| 1422 | /// | ||||||||
| 1423 | /// \param F The function that is checked for attribute opportunities. | ||||||||
| 1424 | /// | ||||||||
| 1425 | /// Note that abstract attribute instances are generally created even if the | ||||||||
| 1426 | /// IR already contains the information they would deduce. The most important | ||||||||
| 1427 | /// reason for this is the single interface, the one of the abstract attribute | ||||||||
| 1428 | /// instance, which can be queried without the need to look at the IR in | ||||||||
| 1429 | /// various places. | ||||||||
| 1430 | void identifyDefaultAbstractAttributes(Function &F); | ||||||||
| 1431 | |||||||||
| 1432 | /// Determine whether the function \p F is IPO amendable | ||||||||
| 1433 | /// | ||||||||
| 1434 | /// If a function is exactly defined or it has alwaysinline attribute | ||||||||
| 1435 | /// and is viable to be inlined, we say it is IPO amendable | ||||||||
| 1436 | bool isFunctionIPOAmendable(const Function &F) { | ||||||||
| 1437 | return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F); | ||||||||
| 1438 | } | ||||||||
| 1439 | |||||||||
| 1440 | /// Mark the internal function \p F as live. | ||||||||
| 1441 | /// | ||||||||
| 1442 | /// This will trigger the identification and initialization of attributes for | ||||||||
| 1443 | /// \p F. | ||||||||
| 1444 | void markLiveInternalFunction(const Function &F) { | ||||||||
| 1445 | assert(F.hasLocalLinkage() &&((void)0) | ||||||||
| 1446 | "Only local linkage is assumed dead initially.")((void)0); | ||||||||
| 1447 | |||||||||
| 1448 | identifyDefaultAbstractAttributes(const_cast<Function &>(F)); | ||||||||
| 1449 | } | ||||||||
| 1450 | |||||||||
| 1451 | /// Helper function to remove callsite. | ||||||||
| 1452 | void removeCallSite(CallInst *CI) { | ||||||||
| 1453 | if (!CI) | ||||||||
| 1454 | return; | ||||||||
| 1455 | |||||||||
| 1456 | CGUpdater.removeCallSite(*CI); | ||||||||
| 1457 | } | ||||||||
| 1458 | |||||||||
| 1459 | /// Record that \p U is to be replaces with \p NV after information was | ||||||||
| 1460 | /// manifested. This also triggers deletion of trivially dead istructions. | ||||||||
| 1461 | bool changeUseAfterManifest(Use &U, Value &NV) { | ||||||||
| 1462 | Value *&V = ToBeChangedUses[&U]; | ||||||||
| 1463 | if (V && (V->stripPointerCasts() == NV.stripPointerCasts() || | ||||||||
| 1464 | isa_and_nonnull<UndefValue>(V))) | ||||||||
| 1465 | return false; | ||||||||
| 1466 | assert((!V || V == &NV || isa<UndefValue>(NV)) &&((void)0) | ||||||||
| 1467 | "Use was registered twice for replacement with different values!")((void)0); | ||||||||
| 1468 | V = &NV; | ||||||||
| 1469 | return true; | ||||||||
| 1470 | } | ||||||||
| 1471 | |||||||||
| 1472 | /// Helper function to replace all uses of \p V with \p NV. Return true if | ||||||||
| 1473 | /// there is any change. The flag \p ChangeDroppable indicates if dropppable | ||||||||
| 1474 | /// uses should be changed too. | ||||||||
| 1475 | bool changeValueAfterManifest(Value &V, Value &NV, | ||||||||
| 1476 | bool ChangeDroppable = true) { | ||||||||
| 1477 | auto &Entry = ToBeChangedValues[&V]; | ||||||||
| 1478 | Value *&CurNV = Entry.first; | ||||||||
| 1479 | if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() || | ||||||||
| 1480 | isa<UndefValue>(CurNV))) | ||||||||
| 1481 | return false; | ||||||||
| 1482 | assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&((void)0) | ||||||||
| 1483 | "Value replacement was registered twice with different values!")((void)0); | ||||||||
| 1484 | CurNV = &NV; | ||||||||
| 1485 | Entry.second = ChangeDroppable; | ||||||||
| 1486 | return true; | ||||||||
| 1487 | } | ||||||||
| 1488 | |||||||||
| 1489 | /// Record that \p I is to be replaced with `unreachable` after information | ||||||||
| 1490 | /// was manifested. | ||||||||
| 1491 | void changeToUnreachableAfterManifest(Instruction *I) { | ||||||||
| 1492 | ToBeChangedToUnreachableInsts.insert(I); | ||||||||
| 1493 | } | ||||||||
| 1494 | |||||||||
| 1495 | /// Record that \p II has at least one dead successor block. This information | ||||||||
| 1496 | /// is used, e.g., to replace \p II with a call, after information was | ||||||||
| 1497 | /// manifested. | ||||||||
| 1498 | void registerInvokeWithDeadSuccessor(InvokeInst &II) { | ||||||||
| 1499 | InvokeWithDeadSuccessor.push_back(&II); | ||||||||
| 1500 | } | ||||||||
| 1501 | |||||||||
| 1502 | /// Record that \p I is deleted after information was manifested. This also | ||||||||
| 1503 | /// triggers deletion of trivially dead istructions. | ||||||||
| 1504 | void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); } | ||||||||
| 1505 | |||||||||
| 1506 | /// Record that \p BB is deleted after information was manifested. This also | ||||||||
| 1507 | /// triggers deletion of trivially dead istructions. | ||||||||
| 1508 | void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); } | ||||||||
| 1509 | |||||||||
| 1510 | // Record that \p BB is added during the manifest of an AA. Added basic blocks | ||||||||
| 1511 | // are preserved in the IR. | ||||||||
| 1512 | void registerManifestAddedBasicBlock(BasicBlock &BB) { | ||||||||
| 1513 | ManifestAddedBlocks.insert(&BB); | ||||||||
| 1514 | } | ||||||||
| 1515 | |||||||||
| 1516 | /// Record that \p F is deleted after information was manifested. | ||||||||
| 1517 | void deleteAfterManifest(Function &F) { | ||||||||
| 1518 | if (DeleteFns) | ||||||||
| 1519 | ToBeDeletedFunctions.insert(&F); | ||||||||
| 1520 | } | ||||||||
| 1521 | |||||||||
| 1522 | /// If \p IRP is assumed to be a constant, return it, if it is unclear yet, | ||||||||
| 1523 | /// return None, otherwise return `nullptr`. | ||||||||
| 1524 | Optional<Constant *> getAssumedConstant(const IRPosition &IRP, | ||||||||
| 1525 | const AbstractAttribute &AA, | ||||||||
| 1526 | bool &UsedAssumedInformation); | ||||||||
| 1527 | Optional<Constant *> getAssumedConstant(const Value &V, | ||||||||
| 1528 | const AbstractAttribute &AA, | ||||||||
| 1529 | bool &UsedAssumedInformation) { | ||||||||
| 1530 | return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation); | ||||||||
| 1531 | } | ||||||||
| 1532 | |||||||||
| 1533 | /// If \p V is assumed simplified, return it, if it is unclear yet, | ||||||||
| 1534 | /// return None, otherwise return `nullptr`. | ||||||||
| 1535 | Optional<Value *> getAssumedSimplified(const IRPosition &IRP, | ||||||||
| 1536 | const AbstractAttribute &AA, | ||||||||
| 1537 | bool &UsedAssumedInformation) { | ||||||||
| 1538 | return getAssumedSimplified(IRP, &AA, UsedAssumedInformation); | ||||||||
| 1539 | } | ||||||||
| 1540 | Optional<Value *> getAssumedSimplified(const Value &V, | ||||||||
| 1541 | const AbstractAttribute &AA, | ||||||||
| 1542 | bool &UsedAssumedInformation) { | ||||||||
| 1543 | return getAssumedSimplified(IRPosition::value(V), AA, | ||||||||
| 1544 | UsedAssumedInformation); | ||||||||
| 1545 | } | ||||||||
| 1546 | |||||||||
| 1547 | /// If \p V is assumed simplified, return it, if it is unclear yet, | ||||||||
| 1548 | /// return None, otherwise return `nullptr`. Same as the public version | ||||||||
| 1549 | /// except that it can be used without recording dependences on any \p AA. | ||||||||
| 1550 | Optional<Value *> getAssumedSimplified(const IRPosition &V, | ||||||||
| 1551 | const AbstractAttribute *AA, | ||||||||
| 1552 | bool &UsedAssumedInformation); | ||||||||
| 1553 | |||||||||
| 1554 | /// Register \p CB as a simplification callback. | ||||||||
| 1555 | /// `Attributor::getAssumedSimplified` will use these callbacks before | ||||||||
| 1556 | /// we it will ask `AAValueSimplify`. It is important to ensure this | ||||||||
| 1557 | /// is called before `identifyDefaultAbstractAttributes`, assuming the | ||||||||
| 1558 | /// latter is called at all. | ||||||||
| 1559 | using SimplifictionCallbackTy = std::function<Optional<Value *>( | ||||||||
| 1560 | const IRPosition &, const AbstractAttribute *, bool &)>; | ||||||||
| 1561 | void registerSimplificationCallback(const IRPosition &IRP, | ||||||||
| 1562 | const SimplifictionCallbackTy &CB) { | ||||||||
| 1563 | SimplificationCallbacks[IRP].emplace_back(CB); | ||||||||
| 1564 | } | ||||||||
| 1565 | |||||||||
| 1566 | /// Return true if there is a simplification callback for \p IRP. | ||||||||
| 1567 | bool hasSimplificationCallback(const IRPosition &IRP) { | ||||||||
| 1568 | return SimplificationCallbacks.count(IRP); | ||||||||
| 1569 | } | ||||||||
| 1570 | |||||||||
| 1571 | private: | ||||||||
| 1572 | /// The vector with all simplification callbacks registered by outside AAs. | ||||||||
| 1573 | DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>> | ||||||||
| 1574 | SimplificationCallbacks; | ||||||||
| 1575 | |||||||||
| 1576 | public: | ||||||||
| 1577 | /// Translate \p V from the callee context into the call site context. | ||||||||
| 1578 | Optional<Value *> | ||||||||
| 1579 | translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB, | ||||||||
| 1580 | const AbstractAttribute &AA, | ||||||||
| 1581 | bool &UsedAssumedInformation); | ||||||||
| 1582 | |||||||||
| 1583 | /// Return true if \p AA (or its context instruction) is assumed dead. | ||||||||
| 1584 | /// | ||||||||
| 1585 | /// If \p LivenessAA is not provided it is queried. | ||||||||
| 1586 | bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA, | ||||||||
| 1587 | bool &UsedAssumedInformation, | ||||||||
| 1588 | bool CheckBBLivenessOnly = false, | ||||||||
| 1589 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
| 1590 | |||||||||
| 1591 | /// Return true if \p I is assumed dead. | ||||||||
| 1592 | /// | ||||||||
| 1593 | /// If \p LivenessAA is not provided it is queried. | ||||||||
| 1594 | bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA, | ||||||||
| 1595 | const AAIsDead *LivenessAA, bool &UsedAssumedInformation, | ||||||||
| 1596 | bool CheckBBLivenessOnly = false, | ||||||||
| 1597 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
| 1598 | |||||||||
| 1599 | /// Return true if \p U is assumed dead. | ||||||||
| 1600 | /// | ||||||||
| 1601 | /// If \p FnLivenessAA is not provided it is queried. | ||||||||
| 1602 | bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA, | ||||||||
| 1603 | const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, | ||||||||
| 1604 | bool CheckBBLivenessOnly = false, | ||||||||
| 1605 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
| 1606 | |||||||||
| 1607 | /// Return true if \p IRP is assumed dead. | ||||||||
| 1608 | /// | ||||||||
| 1609 | /// If \p FnLivenessAA is not provided it is queried. | ||||||||
| 1610 | bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA, | ||||||||
| 1611 | const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation, | ||||||||
| 1612 | bool CheckBBLivenessOnly = false, | ||||||||
| 1613 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
| 1614 | |||||||||
| 1615 | /// Return true if \p BB is assumed dead. | ||||||||
| 1616 | /// | ||||||||
| 1617 | /// If \p LivenessAA is not provided it is queried. | ||||||||
| 1618 | bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA, | ||||||||
| 1619 | const AAIsDead *FnLivenessAA, | ||||||||
| 1620 | DepClassTy DepClass = DepClassTy::OPTIONAL); | ||||||||
| 1621 | |||||||||
| 1622 | /// Check \p Pred on all (transitive) uses of \p V. | ||||||||
| 1623 | /// | ||||||||
| 1624 | /// This method will evaluate \p Pred on all (transitive) uses of the | ||||||||
| 1625 | /// associated value and return true if \p Pred holds every time. | ||||||||
| 1626 | bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred, | ||||||||
| 1627 | const AbstractAttribute &QueryingAA, const Value &V, | ||||||||
| 1628 | bool CheckBBLivenessOnly = false, | ||||||||
| 1629 | DepClassTy LivenessDepClass = DepClassTy::OPTIONAL); | ||||||||
| 1630 | |||||||||
| 1631 | /// Emit a remark generically. | ||||||||
| 1632 | /// | ||||||||
| 1633 | /// This template function can be used to generically emit a remark. The | ||||||||
| 1634 | /// RemarkKind should be one of the following: | ||||||||
| 1635 | /// - OptimizationRemark to indicate a successful optimization attempt | ||||||||
| 1636 | /// - OptimizationRemarkMissed to report a failed optimization attempt | ||||||||
| 1637 | /// - OptimizationRemarkAnalysis to provide additional information about an | ||||||||
| 1638 | /// optimization attempt | ||||||||
| 1639 | /// | ||||||||
| 1640 | /// The remark is built using a callback function \p RemarkCB that takes a | ||||||||
| 1641 | /// RemarkKind as input and returns a RemarkKind. | ||||||||
| 1642 | template <typename RemarkKind, typename RemarkCallBack> | ||||||||
| 1643 | void emitRemark(Instruction *I, StringRef RemarkName, | ||||||||
| 1644 | RemarkCallBack &&RemarkCB) const { | ||||||||
| 1645 | if (!OREGetter) | ||||||||
| 1646 | return; | ||||||||
| 1647 | |||||||||
| 1648 | Function *F = I->getFunction(); | ||||||||
| 1649 | auto &ORE = OREGetter.getValue()(F); | ||||||||
| 1650 | |||||||||
| 1651 | if (RemarkName.startswith("OMP")) | ||||||||
| 1652 | ORE.emit([&]() { | ||||||||
| 1653 | return RemarkCB(RemarkKind(PassName, RemarkName, I)) | ||||||||
| 1654 | << " [" << RemarkName << "]"; | ||||||||
| 1655 | }); | ||||||||
| 1656 | else | ||||||||
| 1657 | ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); }); | ||||||||
| 1658 | } | ||||||||
| 1659 | |||||||||
| 1660 | /// Emit a remark on a function. | ||||||||
| 1661 | template <typename RemarkKind, typename RemarkCallBack> | ||||||||
| 1662 | void emitRemark(Function *F, StringRef RemarkName, | ||||||||
| 1663 | RemarkCallBack &&RemarkCB) const { | ||||||||
| 1664 | if (!OREGetter) | ||||||||
| 1665 | return; | ||||||||
| 1666 | |||||||||
| 1667 | auto &ORE = OREGetter.getValue()(F); | ||||||||
| 1668 | |||||||||
| 1669 | if (RemarkName.startswith("OMP")) | ||||||||
| 1670 | ORE.emit([&]() { | ||||||||
| 1671 | return RemarkCB(RemarkKind(PassName, RemarkName, F)) | ||||||||
| 1672 | << " [" << RemarkName << "]"; | ||||||||
| 1673 | }); | ||||||||
| 1674 | else | ||||||||
| 1675 | ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); }); | ||||||||
| 1676 | } | ||||||||
| 1677 | |||||||||
| 1678 | /// Helper struct used in the communication between an abstract attribute (AA) | ||||||||
| 1679 | /// that wants to change the signature of a function and the Attributor which | ||||||||
| 1680 | /// applies the changes. The struct is partially initialized with the | ||||||||
| 1681 | /// information from the AA (see the constructor). All other members are | ||||||||
| 1682 | /// provided by the Attributor prior to invoking any callbacks. | ||||||||
| 1683 | struct ArgumentReplacementInfo { | ||||||||
| 1684 | /// Callee repair callback type | ||||||||
| 1685 | /// | ||||||||
| 1686 | /// The function repair callback is invoked once to rewire the replacement | ||||||||
| 1687 | /// arguments in the body of the new function. The argument replacement info | ||||||||
| 1688 | /// is passed, as build from the registerFunctionSignatureRewrite call, as | ||||||||
| 1689 | /// well as the replacement function and an iteratore to the first | ||||||||
| 1690 | /// replacement argument. | ||||||||
| 1691 | using CalleeRepairCBTy = std::function<void( | ||||||||
| 1692 | const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>; | ||||||||
| 1693 | |||||||||
| 1694 | /// Abstract call site (ACS) repair callback type | ||||||||
| 1695 | /// | ||||||||
| 1696 | /// The abstract call site repair callback is invoked once on every abstract | ||||||||
| 1697 | /// call site of the replaced function (\see ReplacedFn). The callback needs | ||||||||
| 1698 | /// to provide the operands for the call to the new replacement function. | ||||||||
| 1699 | /// The number and type of the operands appended to the provided vector | ||||||||
| 1700 | /// (second argument) is defined by the number and types determined through | ||||||||
| 1701 | /// the replacement type vector (\see ReplacementTypes). The first argument | ||||||||
| 1702 | /// is the ArgumentReplacementInfo object registered with the Attributor | ||||||||
| 1703 | /// through the registerFunctionSignatureRewrite call. | ||||||||
| 1704 | using ACSRepairCBTy = | ||||||||
| 1705 | std::function<void(const ArgumentReplacementInfo &, AbstractCallSite, | ||||||||
| 1706 | SmallVectorImpl<Value *> &)>; | ||||||||
| 1707 | |||||||||
| 1708 | /// Simple getters, see the corresponding members for details. | ||||||||
| 1709 | ///{ | ||||||||
| 1710 | |||||||||
| 1711 | Attributor &getAttributor() const { return A; } | ||||||||
| 1712 | const Function &getReplacedFn() const { return ReplacedFn; } | ||||||||
| 1713 | const Argument &getReplacedArg() const { return ReplacedArg; } | ||||||||
| 1714 | unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); } | ||||||||
| 1715 | const SmallVectorImpl<Type *> &getReplacementTypes() const { | ||||||||
| 1716 | return ReplacementTypes; | ||||||||
| 1717 | } | ||||||||
| 1718 | |||||||||
| 1719 | ///} | ||||||||
| 1720 | |||||||||
| 1721 | private: | ||||||||
| 1722 | /// Constructor that takes the argument to be replaced, the types of | ||||||||
| 1723 | /// the replacement arguments, as well as callbacks to repair the call sites | ||||||||
| 1724 | /// and new function after the replacement happened. | ||||||||
| 1725 | ArgumentReplacementInfo(Attributor &A, Argument &Arg, | ||||||||
| 1726 | ArrayRef<Type *> ReplacementTypes, | ||||||||
| 1727 | CalleeRepairCBTy &&CalleeRepairCB, | ||||||||
| 1728 | ACSRepairCBTy &&ACSRepairCB) | ||||||||
| 1729 | : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg), | ||||||||
| 1730 | ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()), | ||||||||
| 1731 | CalleeRepairCB(std::move(CalleeRepairCB)), | ||||||||
| 1732 | ACSRepairCB(std::move(ACSRepairCB)) {} | ||||||||
| 1733 | |||||||||
| 1734 | /// Reference to the attributor to allow access from the callbacks. | ||||||||
| 1735 | Attributor &A; | ||||||||
| 1736 | |||||||||
| 1737 | /// The "old" function replaced by ReplacementFn. | ||||||||
| 1738 | const Function &ReplacedFn; | ||||||||
| 1739 | |||||||||
| 1740 | /// The "old" argument replaced by new ones defined via ReplacementTypes. | ||||||||
| 1741 | const Argument &ReplacedArg; | ||||||||
| 1742 | |||||||||
| 1743 | /// The types of the arguments replacing ReplacedArg. | ||||||||
| 1744 | const SmallVector<Type *, 8> ReplacementTypes; | ||||||||
| 1745 | |||||||||
| 1746 | /// Callee repair callback, see CalleeRepairCBTy. | ||||||||
| 1747 | const CalleeRepairCBTy CalleeRepairCB; | ||||||||
| 1748 | |||||||||
| 1749 | /// Abstract call site (ACS) repair callback, see ACSRepairCBTy. | ||||||||
| 1750 | const ACSRepairCBTy ACSRepairCB; | ||||||||
| 1751 | |||||||||
| 1752 | /// Allow access to the private members from the Attributor. | ||||||||
| 1753 | friend struct Attributor; | ||||||||
| 1754 | }; | ||||||||
| 1755 | |||||||||
| 1756 | /// Check if we can rewrite a function signature. | ||||||||
| 1757 | /// | ||||||||
| 1758 | /// The argument \p Arg is replaced with new ones defined by the number, | ||||||||
| 1759 | /// order, and types in \p ReplacementTypes. | ||||||||
| 1760 | /// | ||||||||
| 1761 | /// \returns True, if the replacement can be registered, via | ||||||||
| 1762 | /// registerFunctionSignatureRewrite, false otherwise. | ||||||||
| 1763 | bool isValidFunctionSignatureRewrite(Argument &Arg, | ||||||||
| 1764 | ArrayRef<Type *> ReplacementTypes); | ||||||||
| 1765 | |||||||||
| 1766 | /// Register a rewrite for a function signature. | ||||||||
| 1767 | /// | ||||||||
| 1768 | /// The argument \p Arg is replaced with new ones defined by the number, | ||||||||
| 1769 | /// order, and types in \p ReplacementTypes. The rewiring at the call sites is | ||||||||
| 1770 | /// done through \p ACSRepairCB and at the callee site through | ||||||||
| 1771 | /// \p CalleeRepairCB. | ||||||||
| 1772 | /// | ||||||||
| 1773 | /// \returns True, if the replacement was registered, false otherwise. | ||||||||
| 1774 | bool registerFunctionSignatureRewrite( | ||||||||
| 1775 | Argument &Arg, ArrayRef<Type *> ReplacementTypes, | ||||||||
| 1776 | ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB, | ||||||||
| 1777 | ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB); | ||||||||
| 1778 | |||||||||
| 1779 | /// Check \p Pred on all function call sites. | ||||||||
| 1780 | /// | ||||||||
| 1781 | /// This method will evaluate \p Pred on call sites and return | ||||||||
| 1782 | /// true if \p Pred holds in every call sites. However, this is only possible | ||||||||
| 1783 | /// all call sites are known, hence the function has internal linkage. | ||||||||
| 1784 | /// If true is returned, \p AllCallSitesKnown is set if all possible call | ||||||||
| 1785 | /// sites of the function have been visited. | ||||||||
| 1786 | bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, | ||||||||
| 1787 | const AbstractAttribute &QueryingAA, | ||||||||
| 1788 | bool RequireAllCallSites, bool &AllCallSitesKnown); | ||||||||
| 1789 | |||||||||
| 1790 | /// Check \p Pred on all values potentially returned by \p F. | ||||||||
| 1791 | /// | ||||||||
| 1792 | /// This method will evaluate \p Pred on all values potentially returned by | ||||||||
| 1793 | /// the function associated with \p QueryingAA. The returned values are | ||||||||
| 1794 | /// matched with their respective return instructions. Returns true if \p Pred | ||||||||
| 1795 | /// holds on all of them. | ||||||||
| 1796 | bool checkForAllReturnedValuesAndReturnInsts( | ||||||||
| 1797 | function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred, | ||||||||
| 1798 | const AbstractAttribute &QueryingAA); | ||||||||
| 1799 | |||||||||
| 1800 | /// Check \p Pred on all values potentially returned by the function | ||||||||
| 1801 | /// associated with \p QueryingAA. | ||||||||
| 1802 | /// | ||||||||
| 1803 | /// This is the context insensitive version of the method above. | ||||||||
| 1804 | bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred, | ||||||||
| 1805 | const AbstractAttribute &QueryingAA); | ||||||||
| 1806 | |||||||||
| 1807 | /// Check \p Pred on all instructions with an opcode present in \p Opcodes. | ||||||||
| 1808 | /// | ||||||||
| 1809 | /// This method will evaluate \p Pred on all instructions with an opcode | ||||||||
| 1810 | /// present in \p Opcode and return true if \p Pred holds on all of them. | ||||||||
| 1811 | bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred, | ||||||||
| 1812 | const AbstractAttribute &QueryingAA, | ||||||||
| 1813 | const ArrayRef<unsigned> &Opcodes, | ||||||||
| 1814 | bool &UsedAssumedInformation, | ||||||||
| 1815 | bool CheckBBLivenessOnly = false, | ||||||||
| 1816 | bool CheckPotentiallyDead = false); | ||||||||
| 1817 | |||||||||
| 1818 | /// Check \p Pred on all call-like instructions (=CallBased derived). | ||||||||
| 1819 | /// | ||||||||
| 1820 | /// See checkForAllCallLikeInstructions(...) for more information. | ||||||||
| 1821 | bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred, | ||||||||
| 1822 | const AbstractAttribute &QueryingAA, | ||||||||
| 1823 | bool &UsedAssumedInformation, | ||||||||
| 1824 | bool CheckBBLivenessOnly = false, | ||||||||
| 1825 | bool CheckPotentiallyDead = false) { | ||||||||
| 1826 | return checkForAllInstructions( | ||||||||
| 1827 | Pred, QueryingAA, | ||||||||
| 1828 | {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr, | ||||||||
| 1829 | (unsigned)Instruction::Call}, | ||||||||
| 1830 | UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead); | ||||||||
| 1831 | } | ||||||||
| 1832 | |||||||||
| 1833 | /// Check \p Pred on all Read/Write instructions. | ||||||||
| 1834 | /// | ||||||||
| 1835 | /// This method will evaluate \p Pred on all instructions that read or write | ||||||||
| 1836 | /// to memory present in the information cache and return true if \p Pred | ||||||||
| 1837 | /// holds on all of them. | ||||||||
| 1838 | bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred, | ||||||||
| 1839 | AbstractAttribute &QueryingAA, | ||||||||
| 1840 | bool &UsedAssumedInformation); | ||||||||
| 1841 | |||||||||
| 1842 | /// Create a shallow wrapper for \p F such that \p F has internal linkage | ||||||||
| 1843 | /// afterwards. It also sets the original \p F 's name to anonymous | ||||||||
| 1844 | /// | ||||||||
| 1845 | /// A wrapper is a function with the same type (and attributes) as \p F | ||||||||
| 1846 | /// that will only call \p F and return the result, if any. | ||||||||
| 1847 | /// | ||||||||
| 1848 | /// Assuming the declaration of looks like: | ||||||||
| 1849 | /// rty F(aty0 arg0, ..., atyN argN); | ||||||||
| 1850 | /// | ||||||||
| 1851 | /// The wrapper will then look as follows: | ||||||||
| 1852 | /// rty wrapper(aty0 arg0, ..., atyN argN) { | ||||||||
| 1853 | /// return F(arg0, ..., argN); | ||||||||
| 1854 | /// } | ||||||||
| 1855 | /// | ||||||||
| 1856 | static void createShallowWrapper(Function &F); | ||||||||
| 1857 | |||||||||
| 1858 | /// Returns true if the function \p F can be internalized. i.e. it has a | ||||||||
| 1859 | /// compatible linkage. | ||||||||
| 1860 | static bool isInternalizable(Function &F); | ||||||||
| 1861 | |||||||||
| 1862 | /// Make another copy of the function \p F such that the copied version has | ||||||||
| 1863 | /// internal linkage afterwards and can be analysed. Then we replace all uses | ||||||||
| 1864 | /// of the original function to the copied one | ||||||||
| 1865 | /// | ||||||||
| 1866 | /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` | ||||||||
| 1867 | /// linkage can be internalized because these linkages guarantee that other | ||||||||
| 1868 | /// definitions with the same name have the same semantics as this one. | ||||||||
| 1869 | /// | ||||||||
| 1870 | /// This will only be run if the `attributor-allow-deep-wrappers` option is | ||||||||
| 1871 | /// set, or if the function is called with \p Force set to true. | ||||||||
| 1872 | /// | ||||||||
| 1873 | /// If the function \p F failed to be internalized the return value will be a | ||||||||
| 1874 | /// null pointer. | ||||||||
| 1875 | static Function *internalizeFunction(Function &F, bool Force = false); | ||||||||
| 1876 | |||||||||
| 1877 | /// Make copies of each function in the set \p FnSet such that the copied | ||||||||
| 1878 | /// version has internal linkage afterwards and can be analysed. Then we | ||||||||
| 1879 | /// replace all uses of the original function to the copied one. The map | ||||||||
| 1880 | /// \p FnMap contains a mapping of functions to their internalized versions. | ||||||||
| 1881 | /// | ||||||||
| 1882 | /// Only non-locally linked functions that have `linkonce_odr` or `weak_odr` | ||||||||
| 1883 | /// linkage can be internalized because these linkages guarantee that other | ||||||||
| 1884 | /// definitions with the same name have the same semantics as this one. | ||||||||
| 1885 | /// | ||||||||
| 1886 | /// This version will internalize all the functions in the set \p FnSet at | ||||||||
| 1887 | /// once and then replace the uses. This prevents internalized functions being | ||||||||
| 1888 | /// called by external functions when there is an internalized version in the | ||||||||
| 1889 | /// module. | ||||||||
| 1890 | static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet, | ||||||||
| 1891 | DenseMap<Function *, Function *> &FnMap); | ||||||||
| 1892 | |||||||||
| 1893 | /// Return the data layout associated with the anchor scope. | ||||||||
| 1894 | const DataLayout &getDataLayout() const { return InfoCache.DL; } | ||||||||
| 1895 | |||||||||
| 1896 | /// The allocator used to allocate memory, e.g. for `AbstractAttribute`s. | ||||||||
| 1897 | BumpPtrAllocator &Allocator; | ||||||||
| 1898 | |||||||||
| 1899 | private: | ||||||||
| 1900 | /// This method will do fixpoint iteration until fixpoint or the | ||||||||
| 1901 | /// maximum iteration count is reached. | ||||||||
| 1902 | /// | ||||||||
| 1903 | /// If the maximum iteration count is reached, This method will | ||||||||
| 1904 | /// indicate pessimistic fixpoint on attributes that transitively depend | ||||||||
| 1905 | /// on attributes that were scheduled for an update. | ||||||||
| 1906 | void runTillFixpoint(); | ||||||||
| 1907 | |||||||||
| 1908 | /// Gets called after scheduling, manifests attributes to the LLVM IR. | ||||||||
| 1909 | ChangeStatus manifestAttributes(); | ||||||||
| 1910 | |||||||||
| 1911 | /// Gets called after attributes have been manifested, cleans up the IR. | ||||||||
| 1912 | /// Deletes dead functions, blocks and instructions. | ||||||||
| 1913 | /// Rewrites function signitures and updates the call graph. | ||||||||
| 1914 | ChangeStatus cleanupIR(); | ||||||||
| 1915 | |||||||||
| 1916 | /// Identify internal functions that are effectively dead, thus not reachable | ||||||||
| 1917 | /// from a live entry point. The functions are added to ToBeDeletedFunctions. | ||||||||
| 1918 | void identifyDeadInternalFunctions(); | ||||||||
| 1919 | |||||||||
| 1920 | /// Run `::update` on \p AA and track the dependences queried while doing so. | ||||||||
| 1921 | /// Also adjust the state if we know further updates are not necessary. | ||||||||
| 1922 | ChangeStatus updateAA(AbstractAttribute &AA); | ||||||||
| 1923 | |||||||||
| 1924 | /// Remember the dependences on the top of the dependence stack such that they | ||||||||
| 1925 | /// may trigger further updates. (\see DependenceStack) | ||||||||
| 1926 | void rememberDependences(); | ||||||||
| 1927 | |||||||||
| 1928 | /// Check \p Pred on all call sites of \p Fn. | ||||||||
| 1929 | /// | ||||||||
| 1930 | /// This method will evaluate \p Pred on call sites and return | ||||||||
| 1931 | /// true if \p Pred holds in every call sites. However, this is only possible | ||||||||
| 1932 | /// all call sites are known, hence the function has internal linkage. | ||||||||
| 1933 | /// If true is returned, \p AllCallSitesKnown is set if all possible call | ||||||||
| 1934 | /// sites of the function have been visited. | ||||||||
| 1935 | bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred, | ||||||||
| 1936 | const Function &Fn, bool RequireAllCallSites, | ||||||||
| 1937 | const AbstractAttribute *QueryingAA, | ||||||||
| 1938 | bool &AllCallSitesKnown); | ||||||||
| 1939 | |||||||||
| 1940 | /// Determine if CallBase context in \p IRP should be propagated. | ||||||||
| 1941 | bool shouldPropagateCallBaseContext(const IRPosition &IRP); | ||||||||
| 1942 | |||||||||
| 1943 | /// Apply all requested function signature rewrites | ||||||||
| 1944 | /// (\see registerFunctionSignatureRewrite) and return Changed if the module | ||||||||
| 1945 | /// was altered. | ||||||||
| 1946 | ChangeStatus | ||||||||
| 1947 | rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns); | ||||||||
| 1948 | |||||||||
| 1949 | /// Check if the Attribute \p AA should be seeded. | ||||||||
| 1950 | /// See getOrCreateAAFor. | ||||||||
| 1951 | bool shouldSeedAttribute(AbstractAttribute &AA); | ||||||||
| 1952 | |||||||||
| 1953 | /// A nested map to lookup abstract attributes based on the argument position | ||||||||
| 1954 | /// on the outer level, and the addresses of the static member (AAType::ID) on | ||||||||
| 1955 | /// the inner level. | ||||||||
| 1956 | ///{ | ||||||||
| 1957 | using AAMapKeyTy = std::pair<const char *, IRPosition>; | ||||||||
| 1958 | DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap; | ||||||||
| 1959 | ///} | ||||||||
| 1960 | |||||||||
| 1961 | /// Map to remember all requested signature changes (= argument replacements). | ||||||||
| 1962 | DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>> | ||||||||
| 1963 | ArgumentReplacementMap; | ||||||||
| 1964 | |||||||||
| 1965 | /// The set of functions we are deriving attributes for. | ||||||||
| 1966 | SetVector<Function *> &Functions; | ||||||||
| 1967 | |||||||||
| 1968 | /// The information cache that holds pre-processed (LLVM-IR) information. | ||||||||
| 1969 | InformationCache &InfoCache; | ||||||||
| 1970 | |||||||||
| 1971 | /// Helper to update an underlying call graph. | ||||||||
| 1972 | CallGraphUpdater &CGUpdater; | ||||||||
| 1973 | |||||||||
| 1974 | /// Abstract Attribute dependency graph | ||||||||
| 1975 | AADepGraph DG; | ||||||||
| 1976 | |||||||||
| 1977 | /// Set of functions for which we modified the content such that it might | ||||||||
| 1978 | /// impact the call graph. | ||||||||
| 1979 | SmallPtrSet<Function *, 8> CGModifiedFunctions; | ||||||||
| 1980 | |||||||||
| 1981 | /// Information about a dependence. If FromAA is changed ToAA needs to be | ||||||||
| 1982 | /// updated as well. | ||||||||
| 1983 | struct DepInfo { | ||||||||
| 1984 | const AbstractAttribute *FromAA; | ||||||||
| 1985 | const AbstractAttribute *ToAA; | ||||||||
| 1986 | DepClassTy DepClass; | ||||||||
| 1987 | }; | ||||||||
| 1988 | |||||||||
| 1989 | /// The dependence stack is used to track dependences during an | ||||||||
| 1990 | /// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be | ||||||||
| 1991 | /// recursive we might have multiple vectors of dependences in here. The stack | ||||||||
| 1992 | /// size, should be adjusted according to the expected recursion depth and the | ||||||||
| 1993 | /// inner dependence vector size to the expected number of dependences per | ||||||||
| 1994 | /// abstract attribute. Since the inner vectors are actually allocated on the | ||||||||
| 1995 | /// stack we can be generous with their size. | ||||||||
| 1996 | using DependenceVector = SmallVector<DepInfo, 8>; | ||||||||
| 1997 | SmallVector<DependenceVector *, 16> DependenceStack; | ||||||||
| 1998 | |||||||||
| 1999 | /// If not null, a set limiting the attribute opportunities. | ||||||||
| 2000 | const DenseSet<const char *> *Allowed; | ||||||||
| 2001 | |||||||||
| 2002 | /// Whether to delete functions. | ||||||||
| 2003 | const bool DeleteFns; | ||||||||
| 2004 | |||||||||
| 2005 | /// Whether to rewrite signatures. | ||||||||
| 2006 | const bool RewriteSignatures; | ||||||||
| 2007 | |||||||||
| 2008 | /// Maximum number of fixedpoint iterations. | ||||||||
| 2009 | Optional<unsigned> MaxFixpointIterations; | ||||||||
| 2010 | |||||||||
| 2011 | /// A set to remember the functions we already assume to be live and visited. | ||||||||
| 2012 | DenseSet<const Function *> VisitedFunctions; | ||||||||
| 2013 | |||||||||
| 2014 | /// Uses we replace with a new value after manifest is done. We will remove | ||||||||
| 2015 | /// then trivially dead instructions as well. | ||||||||
| 2016 | DenseMap<Use *, Value *> ToBeChangedUses; | ||||||||
| 2017 | |||||||||
| 2018 | /// Values we replace with a new value after manifest is done. We will remove | ||||||||
| 2019 | /// then trivially dead instructions as well. | ||||||||
| 2020 | DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues; | ||||||||
| 2021 | |||||||||
| 2022 | /// Instructions we replace with `unreachable` insts after manifest is done. | ||||||||
| 2023 | SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts; | ||||||||
| 2024 | |||||||||
| 2025 | /// Invoke instructions with at least a single dead successor block. | ||||||||
| 2026 | SmallVector<WeakVH, 16> InvokeWithDeadSuccessor; | ||||||||
| 2027 | |||||||||
| 2028 | /// A flag that indicates which stage of the process we are in. Initially, the | ||||||||
| 2029 | /// phase is SEEDING. Phase is changed in `Attributor::run()` | ||||||||
| 2030 | enum class AttributorPhase { | ||||||||
| 2031 | SEEDING, | ||||||||
| 2032 | UPDATE, | ||||||||
| 2033 | MANIFEST, | ||||||||
| 2034 | CLEANUP, | ||||||||
| 2035 | } Phase = AttributorPhase::SEEDING; | ||||||||
| 2036 | |||||||||
| 2037 | /// The current initialization chain length. Tracked to avoid stack overflows. | ||||||||
| 2038 | unsigned InitializationChainLength = 0; | ||||||||
| 2039 | |||||||||
| 2040 | /// Functions, blocks, and instructions we delete after manifest is done. | ||||||||
| 2041 | /// | ||||||||
| 2042 | ///{ | ||||||||
| 2043 | SmallPtrSet<Function *, 8> ToBeDeletedFunctions; | ||||||||
| 2044 | SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks; | ||||||||
| 2045 | SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks; | ||||||||
| 2046 | SmallDenseSet<WeakVH, 8> ToBeDeletedInsts; | ||||||||
| 2047 | ///} | ||||||||
| 2048 | |||||||||
| 2049 | /// Callback to get an OptimizationRemarkEmitter from a Function *. | ||||||||
| 2050 | Optional<OptimizationRemarkGetter> OREGetter; | ||||||||
| 2051 | |||||||||
| 2052 | /// The name of the pass to emit remarks for. | ||||||||
| 2053 | const char *PassName = ""; | ||||||||
| 2054 | |||||||||
| 2055 | friend AADepGraph; | ||||||||
| 2056 | friend AttributorCallGraph; | ||||||||
| 2057 | }; | ||||||||
| 2058 | |||||||||
| 2059 | /// An interface to query the internal state of an abstract attribute. | ||||||||
| 2060 | /// | ||||||||
| 2061 | /// The abstract state is a minimal interface that allows the Attributor to | ||||||||
| 2062 | /// communicate with the abstract attributes about their internal state without | ||||||||
| 2063 | /// enforcing or exposing implementation details, e.g., the (existence of an) | ||||||||
| 2064 | /// underlying lattice. | ||||||||
| 2065 | /// | ||||||||
| 2066 | /// It is sufficient to be able to query if a state is (1) valid or invalid, (2) | ||||||||
| 2067 | /// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint | ||||||||
| 2068 | /// was reached or (4) a pessimistic fixpoint was enforced. | ||||||||
| 2069 | /// | ||||||||
| 2070 | /// All methods need to be implemented by the subclass. For the common use case, | ||||||||
| 2071 | /// a single boolean state or a bit-encoded state, the BooleanState and | ||||||||
| 2072 | /// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract | ||||||||
| 2073 | /// attribute can inherit from them to get the abstract state interface and | ||||||||
| 2074 | /// additional methods to directly modify the state based if needed. See the | ||||||||
| 2075 | /// class comments for help. | ||||||||
| 2076 | struct AbstractState { | ||||||||
| 2077 | virtual ~AbstractState() {} | ||||||||
| 2078 | |||||||||
| 2079 | /// Return if this abstract state is in a valid state. If false, no | ||||||||
| 2080 | /// information provided should be used. | ||||||||
| 2081 | virtual bool isValidState() const = 0; | ||||||||
| 2082 | |||||||||
| 2083 | /// Return if this abstract state is fixed, thus does not need to be updated | ||||||||
| 2084 | /// if information changes as it cannot change itself. | ||||||||
| 2085 | virtual bool isAtFixpoint() const = 0; | ||||||||
| 2086 | |||||||||
| 2087 | /// Indicate that the abstract state should converge to the optimistic state. | ||||||||
| 2088 | /// | ||||||||
| 2089 | /// This will usually make the optimistically assumed state the known to be | ||||||||
| 2090 | /// true state. | ||||||||
| 2091 | /// | ||||||||
| 2092 | /// \returns ChangeStatus::UNCHANGED as the assumed value should not change. | ||||||||
| 2093 | virtual ChangeStatus indicateOptimisticFixpoint() = 0; | ||||||||
| 2094 | |||||||||
| 2095 | /// Indicate that the abstract state should converge to the pessimistic state. | ||||||||
| 2096 | /// | ||||||||
| 2097 | /// This will usually revert the optimistically assumed state to the known to | ||||||||
| 2098 | /// be true state. | ||||||||
| 2099 | /// | ||||||||
| 2100 | /// \returns ChangeStatus::CHANGED as the assumed value may change. | ||||||||
| 2101 | virtual ChangeStatus indicatePessimisticFixpoint() = 0; | ||||||||
| 2102 | }; | ||||||||
| 2103 | |||||||||
| 2104 | /// Simple state with integers encoding. | ||||||||
| 2105 | /// | ||||||||
| 2106 | /// The interface ensures that the assumed bits are always a subset of the known | ||||||||
| 2107 | /// bits. Users can only add known bits and, except through adding known bits, | ||||||||
| 2108 | /// they can only remove assumed bits. This should guarantee monotoniticy and | ||||||||
| 2109 | /// thereby the existence of a fixpoint (if used corretly). The fixpoint is | ||||||||
| 2110 | /// reached when the assumed and known state/bits are equal. Users can | ||||||||
| 2111 | /// force/inidicate a fixpoint. If an optimistic one is indicated, the known | ||||||||
| 2112 | /// state will catch up with the assumed one, for a pessimistic fixpoint it is | ||||||||
| 2113 | /// the other way around. | ||||||||
| 2114 | template <typename base_ty, base_ty BestState, base_ty WorstState> | ||||||||
| 2115 | struct IntegerStateBase : public AbstractState { | ||||||||
| 2116 | using base_t = base_ty; | ||||||||
| 2117 | |||||||||
| 2118 | IntegerStateBase() {} | ||||||||
| 2119 | IntegerStateBase(base_t Assumed) : Assumed(Assumed) {} | ||||||||
| 2120 | |||||||||
| 2121 | /// Return the best possible representable state. | ||||||||
| 2122 | static constexpr base_t getBestState() { return BestState; } | ||||||||
| 2123 | static constexpr base_t getBestState(const IntegerStateBase &) { | ||||||||
| 2124 | return getBestState(); | ||||||||
| 2125 | } | ||||||||
| 2126 | |||||||||
| 2127 | /// Return the worst possible representable state. | ||||||||
| 2128 | static constexpr base_t getWorstState() { return WorstState; } | ||||||||
| 2129 | static constexpr base_t getWorstState(const IntegerStateBase &) { | ||||||||
| 2130 | return getWorstState(); | ||||||||
| 2131 | } | ||||||||
| 2132 | |||||||||
| 2133 | /// See AbstractState::isValidState() | ||||||||
| 2134 | /// NOTE: For now we simply pretend that the worst possible state is invalid. | ||||||||
| 2135 | bool isValidState() const override { return Assumed != getWorstState(); } | ||||||||
| 2136 | |||||||||
| 2137 | /// See AbstractState::isAtFixpoint() | ||||||||
| 2138 | bool isAtFixpoint() const override { return Assumed == Known; } | ||||||||
| 2139 | |||||||||
| 2140 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
| 2141 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
| 2142 | Known = Assumed; | ||||||||
| 2143 | return ChangeStatus::UNCHANGED; | ||||||||
| 2144 | } | ||||||||
| 2145 | |||||||||
| 2146 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
| 2147 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
| 2148 | Assumed = Known; | ||||||||
| 2149 | return ChangeStatus::CHANGED; | ||||||||
| 2150 | } | ||||||||
| 2151 | |||||||||
| 2152 | /// Return the known state encoding | ||||||||
| 2153 | base_t getKnown() const { return Known; } | ||||||||
| 2154 | |||||||||
| 2155 | /// Return the assumed state encoding. | ||||||||
| 2156 | base_t getAssumed() const { return Assumed; } | ||||||||
| 2157 | |||||||||
| 2158 | /// Equality for IntegerStateBase. | ||||||||
| 2159 | bool | ||||||||
| 2160 | operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const { | ||||||||
| 2161 | return this->getAssumed() == R.getAssumed() && | ||||||||
| 2162 | this->getKnown() == R.getKnown(); | ||||||||
| 2163 | } | ||||||||
| 2164 | |||||||||
| 2165 | /// Inequality for IntegerStateBase. | ||||||||
| 2166 | bool | ||||||||
| 2167 | operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const { | ||||||||
| 2168 | return !(*this == R); | ||||||||
| 2169 | } | ||||||||
| 2170 | |||||||||
| 2171 | /// "Clamp" this state with \p R. The result is subtype dependent but it is | ||||||||
| 2172 | /// intended that only information assumed in both states will be assumed in | ||||||||
| 2173 | /// this one afterwards. | ||||||||
| 2174 | void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
| 2175 | handleNewAssumedValue(R.getAssumed()); | ||||||||
| 2176 | } | ||||||||
| 2177 | |||||||||
| 2178 | /// "Clamp" this state with \p R. The result is subtype dependent but it is | ||||||||
| 2179 | /// intended that information known in either state will be known in | ||||||||
| 2180 | /// this one afterwards. | ||||||||
| 2181 | void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
| 2182 | handleNewKnownValue(R.getKnown()); | ||||||||
| 2183 | } | ||||||||
| 2184 | |||||||||
| 2185 | void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
| 2186 | joinOR(R.getAssumed(), R.getKnown()); | ||||||||
| 2187 | } | ||||||||
| 2188 | |||||||||
| 2189 | void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) { | ||||||||
| 2190 | joinAND(R.getAssumed(), R.getKnown()); | ||||||||
| 2191 | } | ||||||||
| 2192 | |||||||||
| 2193 | protected: | ||||||||
| 2194 | /// Handle a new assumed value \p Value. Subtype dependent. | ||||||||
| 2195 | virtual void handleNewAssumedValue(base_t Value) = 0; | ||||||||
| 2196 | |||||||||
| 2197 | /// Handle a new known value \p Value. Subtype dependent. | ||||||||
| 2198 | virtual void handleNewKnownValue(base_t Value) = 0; | ||||||||
| 2199 | |||||||||
| 2200 | /// Handle a value \p Value. Subtype dependent. | ||||||||
| 2201 | virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0; | ||||||||
| 2202 | |||||||||
| 2203 | /// Handle a new assumed value \p Value. Subtype dependent. | ||||||||
| 2204 | virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0; | ||||||||
| 2205 | |||||||||
| 2206 | /// The known state encoding in an integer of type base_t. | ||||||||
| 2207 | base_t Known = getWorstState(); | ||||||||
| 2208 | |||||||||
| 2209 | /// The assumed state encoding in an integer of type base_t. | ||||||||
| 2210 | base_t Assumed = getBestState(); | ||||||||
| 2211 | }; | ||||||||
| 2212 | |||||||||
| 2213 | /// Specialization of the integer state for a bit-wise encoding. | ||||||||
| 2214 | template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0), | ||||||||
| 2215 | base_ty WorstState = 0> | ||||||||
| 2216 | struct BitIntegerState | ||||||||
| 2217 | : public IntegerStateBase<base_ty, BestState, WorstState> { | ||||||||
| 2218 | using base_t = base_ty; | ||||||||
| 2219 | |||||||||
| 2220 | /// Return true if the bits set in \p BitsEncoding are "known bits". | ||||||||
| 2221 | bool isKnown(base_t BitsEncoding) const { | ||||||||
| 2222 | return (this->Known & BitsEncoding) == BitsEncoding; | ||||||||
| 2223 | } | ||||||||
| 2224 | |||||||||
| 2225 | /// Return true if the bits set in \p BitsEncoding are "assumed bits". | ||||||||
| 2226 | bool isAssumed(base_t BitsEncoding) const { | ||||||||
| 2227 | return (this->Assumed & BitsEncoding) == BitsEncoding; | ||||||||
| 2228 | } | ||||||||
| 2229 | |||||||||
| 2230 | /// Add the bits in \p BitsEncoding to the "known bits". | ||||||||
| 2231 | BitIntegerState &addKnownBits(base_t Bits) { | ||||||||
| 2232 | // Make sure we never miss any "known bits". | ||||||||
| 2233 | this->Assumed |= Bits; | ||||||||
| 2234 | this->Known |= Bits; | ||||||||
| 2235 | return *this; | ||||||||
| 2236 | } | ||||||||
| 2237 | |||||||||
| 2238 | /// Remove the bits in \p BitsEncoding from the "assumed bits" if not known. | ||||||||
| 2239 | BitIntegerState &removeAssumedBits(base_t BitsEncoding) { | ||||||||
| 2240 | return intersectAssumedBits(~BitsEncoding); | ||||||||
| 2241 | } | ||||||||
| 2242 | |||||||||
| 2243 | /// Remove the bits in \p BitsEncoding from the "known bits". | ||||||||
| 2244 | BitIntegerState &removeKnownBits(base_t BitsEncoding) { | ||||||||
| 2245 | this->Known = (this->Known & ~BitsEncoding); | ||||||||
| 2246 | return *this; | ||||||||
| 2247 | } | ||||||||
| 2248 | |||||||||
| 2249 | /// Keep only "assumed bits" also set in \p BitsEncoding but all known ones. | ||||||||
| 2250 | BitIntegerState &intersectAssumedBits(base_t BitsEncoding) { | ||||||||
| 2251 | // Make sure we never loose any "known bits". | ||||||||
| 2252 | this->Assumed = (this->Assumed & BitsEncoding) | this->Known; | ||||||||
| 2253 | return *this; | ||||||||
| 2254 | } | ||||||||
| 2255 | |||||||||
| 2256 | private: | ||||||||
| 2257 | void handleNewAssumedValue(base_t Value) override { | ||||||||
| 2258 | intersectAssumedBits(Value); | ||||||||
| 2259 | } | ||||||||
| 2260 | void handleNewKnownValue(base_t Value) override { addKnownBits(Value); } | ||||||||
| 2261 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2262 | this->Known |= KnownValue; | ||||||||
| 2263 | this->Assumed |= AssumedValue; | ||||||||
| 2264 | } | ||||||||
| 2265 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2266 | this->Known &= KnownValue; | ||||||||
| 2267 | this->Assumed &= AssumedValue; | ||||||||
| 2268 | } | ||||||||
| 2269 | }; | ||||||||
| 2270 | |||||||||
| 2271 | /// Specialization of the integer state for an increasing value, hence ~0u is | ||||||||
| 2272 | /// the best state and 0 the worst. | ||||||||
| 2273 | template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0), | ||||||||
| 2274 | base_ty WorstState = 0> | ||||||||
| 2275 | struct IncIntegerState | ||||||||
| 2276 | : public IntegerStateBase<base_ty, BestState, WorstState> { | ||||||||
| 2277 | using super = IntegerStateBase<base_ty, BestState, WorstState>; | ||||||||
| 2278 | using base_t = base_ty; | ||||||||
| 2279 | |||||||||
| 2280 | IncIntegerState() : super() {} | ||||||||
| 2281 | IncIntegerState(base_t Assumed) : super(Assumed) {} | ||||||||
| 2282 | |||||||||
| 2283 | /// Return the best possible representable state. | ||||||||
| 2284 | static constexpr base_t getBestState() { return BestState; } | ||||||||
| 2285 | static constexpr base_t | ||||||||
| 2286 | getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) { | ||||||||
| 2287 | return getBestState(); | ||||||||
| 2288 | } | ||||||||
| 2289 | |||||||||
| 2290 | /// Take minimum of assumed and \p Value. | ||||||||
| 2291 | IncIntegerState &takeAssumedMinimum(base_t Value) { | ||||||||
| 2292 | // Make sure we never loose "known value". | ||||||||
| 2293 | this->Assumed = std::max(std::min(this->Assumed, Value), this->Known); | ||||||||
| 2294 | return *this; | ||||||||
| 2295 | } | ||||||||
| 2296 | |||||||||
| 2297 | /// Take maximum of known and \p Value. | ||||||||
| 2298 | IncIntegerState &takeKnownMaximum(base_t Value) { | ||||||||
| 2299 | // Make sure we never loose "known value". | ||||||||
| 2300 | this->Assumed = std::max(Value, this->Assumed); | ||||||||
| 2301 | this->Known = std::max(Value, this->Known); | ||||||||
| 2302 | return *this; | ||||||||
| 2303 | } | ||||||||
| 2304 | |||||||||
| 2305 | private: | ||||||||
| 2306 | void handleNewAssumedValue(base_t Value) override { | ||||||||
| 2307 | takeAssumedMinimum(Value); | ||||||||
| 2308 | } | ||||||||
| 2309 | void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); } | ||||||||
| 2310 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2311 | this->Known = std::max(this->Known, KnownValue); | ||||||||
| 2312 | this->Assumed = std::max(this->Assumed, AssumedValue); | ||||||||
| 2313 | } | ||||||||
| 2314 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2315 | this->Known = std::min(this->Known, KnownValue); | ||||||||
| 2316 | this->Assumed = std::min(this->Assumed, AssumedValue); | ||||||||
| 2317 | } | ||||||||
| 2318 | }; | ||||||||
| 2319 | |||||||||
| 2320 | /// Specialization of the integer state for a decreasing value, hence 0 is the | ||||||||
| 2321 | /// best state and ~0u the worst. | ||||||||
| 2322 | template <typename base_ty = uint32_t> | ||||||||
| 2323 | struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> { | ||||||||
| 2324 | using base_t = base_ty; | ||||||||
| 2325 | |||||||||
| 2326 | /// Take maximum of assumed and \p Value. | ||||||||
| 2327 | DecIntegerState &takeAssumedMaximum(base_t Value) { | ||||||||
| 2328 | // Make sure we never loose "known value". | ||||||||
| 2329 | this->Assumed = std::min(std::max(this->Assumed, Value), this->Known); | ||||||||
| 2330 | return *this; | ||||||||
| 2331 | } | ||||||||
| 2332 | |||||||||
| 2333 | /// Take minimum of known and \p Value. | ||||||||
| 2334 | DecIntegerState &takeKnownMinimum(base_t Value) { | ||||||||
| 2335 | // Make sure we never loose "known value". | ||||||||
| 2336 | this->Assumed = std::min(Value, this->Assumed); | ||||||||
| 2337 | this->Known = std::min(Value, this->Known); | ||||||||
| 2338 | return *this; | ||||||||
| 2339 | } | ||||||||
| 2340 | |||||||||
| 2341 | private: | ||||||||
| 2342 | void handleNewAssumedValue(base_t Value) override { | ||||||||
| 2343 | takeAssumedMaximum(Value); | ||||||||
| 2344 | } | ||||||||
| 2345 | void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); } | ||||||||
| 2346 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2347 | this->Assumed = std::min(this->Assumed, KnownValue); | ||||||||
| 2348 | this->Assumed = std::min(this->Assumed, AssumedValue); | ||||||||
| 2349 | } | ||||||||
| 2350 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2351 | this->Assumed = std::max(this->Assumed, KnownValue); | ||||||||
| 2352 | this->Assumed = std::max(this->Assumed, AssumedValue); | ||||||||
| 2353 | } | ||||||||
| 2354 | }; | ||||||||
| 2355 | |||||||||
| 2356 | /// Simple wrapper for a single bit (boolean) state. | ||||||||
| 2357 | struct BooleanState : public IntegerStateBase<bool, 1, 0> { | ||||||||
| 2358 | using super = IntegerStateBase<bool, 1, 0>; | ||||||||
| 2359 | using base_t = IntegerStateBase::base_t; | ||||||||
| 2360 | |||||||||
| 2361 | BooleanState() : super() {} | ||||||||
| 2362 | BooleanState(base_t Assumed) : super(Assumed) {} | ||||||||
| 2363 | |||||||||
| 2364 | /// Set the assumed value to \p Value but never below the known one. | ||||||||
| 2365 | void setAssumed(bool Value) { Assumed &= (Known | Value); } | ||||||||
| 2366 | |||||||||
| 2367 | /// Set the known and asssumed value to \p Value. | ||||||||
| 2368 | void setKnown(bool Value) { | ||||||||
| 2369 | Known |= Value; | ||||||||
| 2370 | Assumed |= Value; | ||||||||
| 2371 | } | ||||||||
| 2372 | |||||||||
| 2373 | /// Return true if the state is assumed to hold. | ||||||||
| 2374 | bool isAssumed() const { return getAssumed(); } | ||||||||
| 2375 | |||||||||
| 2376 | /// Return true if the state is known to hold. | ||||||||
| 2377 | bool isKnown() const { return getKnown(); } | ||||||||
| 2378 | |||||||||
| 2379 | private: | ||||||||
| 2380 | void handleNewAssumedValue(base_t Value) override { | ||||||||
| 2381 | if (!Value) | ||||||||
| 2382 | Assumed = Known; | ||||||||
| 2383 | } | ||||||||
| 2384 | void handleNewKnownValue(base_t Value) override { | ||||||||
| 2385 | if (Value) | ||||||||
| 2386 | Known = (Assumed = Value); | ||||||||
| 2387 | } | ||||||||
| 2388 | void joinOR(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2389 | Known |= KnownValue; | ||||||||
| 2390 | Assumed |= AssumedValue; | ||||||||
| 2391 | } | ||||||||
| 2392 | void joinAND(base_t AssumedValue, base_t KnownValue) override { | ||||||||
| 2393 | Known &= KnownValue; | ||||||||
| 2394 | Assumed &= AssumedValue; | ||||||||
| 2395 | } | ||||||||
| 2396 | }; | ||||||||
| 2397 | |||||||||
| 2398 | /// State for an integer range. | ||||||||
| 2399 | struct IntegerRangeState : public AbstractState { | ||||||||
| 2400 | |||||||||
| 2401 | /// Bitwidth of the associated value. | ||||||||
| 2402 | uint32_t BitWidth; | ||||||||
| 2403 | |||||||||
| 2404 | /// State representing assumed range, initially set to empty. | ||||||||
| 2405 | ConstantRange Assumed; | ||||||||
| 2406 | |||||||||
| 2407 | /// State representing known range, initially set to [-inf, inf]. | ||||||||
| 2408 | ConstantRange Known; | ||||||||
| 2409 | |||||||||
| 2410 | IntegerRangeState(uint32_t BitWidth) | ||||||||
| 2411 | : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)), | ||||||||
| 2412 | Known(ConstantRange::getFull(BitWidth)) {} | ||||||||
| 2413 | |||||||||
| 2414 | IntegerRangeState(const ConstantRange &CR) | ||||||||
| 2415 | : BitWidth(CR.getBitWidth()), Assumed(CR), | ||||||||
| 2416 | Known(getWorstState(CR.getBitWidth())) {} | ||||||||
| 2417 | |||||||||
| 2418 | /// Return the worst possible representable state. | ||||||||
| 2419 | static ConstantRange getWorstState(uint32_t BitWidth) { | ||||||||
| 2420 | return ConstantRange::getFull(BitWidth); | ||||||||
| 2421 | } | ||||||||
| 2422 | |||||||||
| 2423 | /// Return the best possible representable state. | ||||||||
| 2424 | static ConstantRange getBestState(uint32_t BitWidth) { | ||||||||
| 2425 | return ConstantRange::getEmpty(BitWidth); | ||||||||
| 2426 | } | ||||||||
| 2427 | static ConstantRange getBestState(const IntegerRangeState &IRS) { | ||||||||
| 2428 | return getBestState(IRS.getBitWidth()); | ||||||||
| 2429 | } | ||||||||
| 2430 | |||||||||
| 2431 | /// Return associated values' bit width. | ||||||||
| 2432 | uint32_t getBitWidth() const { return BitWidth; } | ||||||||
| 2433 | |||||||||
| 2434 | /// See AbstractState::isValidState() | ||||||||
| 2435 | bool isValidState() const override { | ||||||||
| 2436 | return BitWidth > 0 && !Assumed.isFullSet(); | ||||||||
| 2437 | } | ||||||||
| 2438 | |||||||||
| 2439 | /// See AbstractState::isAtFixpoint() | ||||||||
| 2440 | bool isAtFixpoint() const override { return Assumed == Known; } | ||||||||
| 2441 | |||||||||
| 2442 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
| 2443 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
| 2444 | Known = Assumed; | ||||||||
| 2445 | return ChangeStatus::CHANGED; | ||||||||
| 2446 | } | ||||||||
| 2447 | |||||||||
| 2448 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
| 2449 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
| 2450 | Assumed = Known; | ||||||||
| 2451 | return ChangeStatus::CHANGED; | ||||||||
| 2452 | } | ||||||||
| 2453 | |||||||||
| 2454 | /// Return the known state encoding | ||||||||
| 2455 | ConstantRange getKnown() const { return Known; } | ||||||||
| 2456 | |||||||||
| 2457 | /// Return the assumed state encoding. | ||||||||
| 2458 | ConstantRange getAssumed() const { return Assumed; } | ||||||||
| 2459 | |||||||||
| 2460 | /// Unite assumed range with the passed state. | ||||||||
| 2461 | void unionAssumed(const ConstantRange &R) { | ||||||||
| 2462 | // Don't loose a known range. | ||||||||
| 2463 | Assumed = Assumed.unionWith(R).intersectWith(Known); | ||||||||
| 2464 | } | ||||||||
| 2465 | |||||||||
| 2466 | /// See IntegerRangeState::unionAssumed(..). | ||||||||
| 2467 | void unionAssumed(const IntegerRangeState &R) { | ||||||||
| 2468 | unionAssumed(R.getAssumed()); | ||||||||
| 2469 | } | ||||||||
| 2470 | |||||||||
| 2471 | /// Unite known range with the passed state. | ||||||||
| 2472 | void unionKnown(const ConstantRange &R) { | ||||||||
| 2473 | // Don't loose a known range. | ||||||||
| 2474 | Known = Known.unionWith(R); | ||||||||
| 2475 | Assumed = Assumed.unionWith(Known); | ||||||||
| 2476 | } | ||||||||
| 2477 | |||||||||
| 2478 | /// See IntegerRangeState::unionKnown(..). | ||||||||
| 2479 | void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); } | ||||||||
| 2480 | |||||||||
| 2481 | /// Intersect known range with the passed state. | ||||||||
| 2482 | void intersectKnown(const ConstantRange &R) { | ||||||||
| 2483 | Assumed = Assumed.intersectWith(R); | ||||||||
| 2484 | Known = Known.intersectWith(R); | ||||||||
| 2485 | } | ||||||||
| 2486 | |||||||||
| 2487 | /// See IntegerRangeState::intersectKnown(..). | ||||||||
| 2488 | void intersectKnown(const IntegerRangeState &R) { | ||||||||
| 2489 | intersectKnown(R.getKnown()); | ||||||||
| 2490 | } | ||||||||
| 2491 | |||||||||
| 2492 | /// Equality for IntegerRangeState. | ||||||||
| 2493 | bool operator==(const IntegerRangeState &R) const { | ||||||||
| 2494 | return getAssumed() == R.getAssumed() && getKnown() == R.getKnown(); | ||||||||
| 2495 | } | ||||||||
| 2496 | |||||||||
| 2497 | /// "Clamp" this state with \p R. The result is subtype dependent but it is | ||||||||
| 2498 | /// intended that only information assumed in both states will be assumed in | ||||||||
| 2499 | /// this one afterwards. | ||||||||
| 2500 | IntegerRangeState operator^=(const IntegerRangeState &R) { | ||||||||
| 2501 | // NOTE: `^=` operator seems like `intersect` but in this case, we need to | ||||||||
| 2502 | // take `union`. | ||||||||
| 2503 | unionAssumed(R); | ||||||||
| 2504 | return *this; | ||||||||
| 2505 | } | ||||||||
| 2506 | |||||||||
| 2507 | IntegerRangeState operator&=(const IntegerRangeState &R) { | ||||||||
| 2508 | // NOTE: `&=` operator seems like `intersect` but in this case, we need to | ||||||||
| 2509 | // take `union`. | ||||||||
| 2510 | unionKnown(R); | ||||||||
| 2511 | unionAssumed(R); | ||||||||
| 2512 | return *this; | ||||||||
| 2513 | } | ||||||||
| 2514 | }; | ||||||||
| 2515 | /// Helper struct necessary as the modular build fails if the virtual method | ||||||||
| 2516 | /// IRAttribute::manifest is defined in the Attributor.cpp. | ||||||||
| 2517 | struct IRAttributeManifest { | ||||||||
| 2518 | static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP, | ||||||||
| 2519 | const ArrayRef<Attribute> &DeducedAttrs, | ||||||||
| 2520 | bool ForceReplace = false); | ||||||||
| 2521 | }; | ||||||||
| 2522 | |||||||||
| 2523 | /// Helper to tie a abstract state implementation to an abstract attribute. | ||||||||
| 2524 | template <typename StateTy, typename BaseType, class... Ts> | ||||||||
| 2525 | struct StateWrapper : public BaseType, public StateTy { | ||||||||
| 2526 | /// Provide static access to the type of the state. | ||||||||
| 2527 | using StateType = StateTy; | ||||||||
| 2528 | |||||||||
| 2529 | StateWrapper(const IRPosition &IRP, Ts... Args) | ||||||||
| 2530 | : BaseType(IRP), StateTy(Args...) {} | ||||||||
| 2531 | |||||||||
| 2532 | /// See AbstractAttribute::getState(...). | ||||||||
| 2533 | StateType &getState() override { return *this; } | ||||||||
| 2534 | |||||||||
| 2535 | /// See AbstractAttribute::getState(...). | ||||||||
| 2536 | const StateType &getState() const override { return *this; } | ||||||||
| 2537 | }; | ||||||||
| 2538 | |||||||||
| 2539 | /// Helper class that provides common functionality to manifest IR attributes. | ||||||||
| 2540 | template <Attribute::AttrKind AK, typename BaseType> | ||||||||
| 2541 | struct IRAttribute : public BaseType { | ||||||||
| 2542 | IRAttribute(const IRPosition &IRP) : BaseType(IRP) {} | ||||||||
| 2543 | |||||||||
| 2544 | /// See AbstractAttribute::initialize(...). | ||||||||
| 2545 | virtual void initialize(Attributor &A) override { | ||||||||
| 2546 | const IRPosition &IRP = this->getIRPosition(); | ||||||||
| 2547 | if (isa<UndefValue>(IRP.getAssociatedValue()) || | ||||||||
| 2548 | this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false, | ||||||||
| 2549 | &A)) { | ||||||||
| 2550 | this->getState().indicateOptimisticFixpoint(); | ||||||||
| 2551 | return; | ||||||||
| 2552 | } | ||||||||
| 2553 | |||||||||
| 2554 | bool IsFnInterface = IRP.isFnInterfaceKind(); | ||||||||
| 2555 | const Function *FnScope = IRP.getAnchorScope(); | ||||||||
| 2556 | // TODO: Not all attributes require an exact definition. Find a way to | ||||||||
| 2557 | // enable deduction for some but not all attributes in case the | ||||||||
| 2558 | // definition might be changed at runtime, see also | ||||||||
| 2559 | // http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html. | ||||||||
| 2560 | // TODO: We could always determine abstract attributes and if sufficient | ||||||||
| 2561 | // information was found we could duplicate the functions that do not | ||||||||
| 2562 | // have an exact definition. | ||||||||
| 2563 | if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) | ||||||||
| 2564 | this->getState().indicatePessimisticFixpoint(); | ||||||||
| 2565 | } | ||||||||
| 2566 | |||||||||
| 2567 | /// See AbstractAttribute::manifest(...). | ||||||||
| 2568 | ChangeStatus manifest(Attributor &A) override { | ||||||||
| 2569 | if (isa<UndefValue>(this->getIRPosition().getAssociatedValue())) | ||||||||
| 2570 | return ChangeStatus::UNCHANGED; | ||||||||
| 2571 | SmallVector<Attribute, 4> DeducedAttrs; | ||||||||
| 2572 | getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs); | ||||||||
| 2573 | return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(), | ||||||||
| 2574 | DeducedAttrs); | ||||||||
| 2575 | } | ||||||||
| 2576 | |||||||||
| 2577 | /// Return the kind that identifies the abstract attribute implementation. | ||||||||
| 2578 | Attribute::AttrKind getAttrKind() const { return AK; } | ||||||||
| 2579 | |||||||||
| 2580 | /// Return the deduced attributes in \p Attrs. | ||||||||
| 2581 | virtual void getDeducedAttributes(LLVMContext &Ctx, | ||||||||
| 2582 | SmallVectorImpl<Attribute> &Attrs) const { | ||||||||
| 2583 | Attrs.emplace_back(Attribute::get(Ctx, getAttrKind())); | ||||||||
| 2584 | } | ||||||||
| 2585 | }; | ||||||||
| 2586 | |||||||||
| 2587 | /// Base struct for all "concrete attribute" deductions. | ||||||||
| 2588 | /// | ||||||||
| 2589 | /// The abstract attribute is a minimal interface that allows the Attributor to | ||||||||
| 2590 | /// orchestrate the abstract/fixpoint analysis. The design allows to hide away | ||||||||
| 2591 | /// implementation choices made for the subclasses but also to structure their | ||||||||
| 2592 | /// implementation and simplify the use of other abstract attributes in-flight. | ||||||||
| 2593 | /// | ||||||||
| 2594 | /// To allow easy creation of new attributes, most methods have default | ||||||||
| 2595 | /// implementations. The ones that do not are generally straight forward, except | ||||||||
| 2596 | /// `AbstractAttribute::updateImpl` which is the location of most reasoning | ||||||||
| 2597 | /// associated with the abstract attribute. The update is invoked by the | ||||||||
| 2598 | /// Attributor in case the situation used to justify the current optimistic | ||||||||
| 2599 | /// state might have changed. The Attributor determines this automatically | ||||||||
| 2600 | /// by monitoring the `Attributor::getAAFor` calls made by abstract attributes. | ||||||||
| 2601 | /// | ||||||||
| 2602 | /// The `updateImpl` method should inspect the IR and other abstract attributes | ||||||||
| 2603 | /// in-flight to justify the best possible (=optimistic) state. The actual | ||||||||
| 2604 | /// implementation is, similar to the underlying abstract state encoding, not | ||||||||
| 2605 | /// exposed. In the most common case, the `updateImpl` will go through a list of | ||||||||
| 2606 | /// reasons why its optimistic state is valid given the current information. If | ||||||||
| 2607 | /// any combination of them holds and is sufficient to justify the current | ||||||||
| 2608 | /// optimistic state, the method shall return UNCHAGED. If not, the optimistic | ||||||||
| 2609 | /// state is adjusted to the situation and the method shall return CHANGED. | ||||||||
| 2610 | /// | ||||||||
| 2611 | /// If the manifestation of the "concrete attribute" deduced by the subclass | ||||||||
| 2612 | /// differs from the "default" behavior, which is a (set of) LLVM-IR | ||||||||
| 2613 | /// attribute(s) for an argument, call site argument, function return value, or | ||||||||
| 2614 | /// function, the `AbstractAttribute::manifest` method should be overloaded. | ||||||||
| 2615 | /// | ||||||||
| 2616 | /// NOTE: If the state obtained via getState() is INVALID, thus if | ||||||||
| 2617 | /// AbstractAttribute::getState().isValidState() returns false, no | ||||||||
| 2618 | /// information provided by the methods of this class should be used. | ||||||||
| 2619 | /// NOTE: The Attributor currently has certain limitations to what we can do. | ||||||||
| 2620 | /// As a general rule of thumb, "concrete" abstract attributes should *for | ||||||||
| 2621 | /// now* only perform "backward" information propagation. That means | ||||||||
| 2622 | /// optimistic information obtained through abstract attributes should | ||||||||
| 2623 | /// only be used at positions that precede the origin of the information | ||||||||
| 2624 | /// with regards to the program flow. More practically, information can | ||||||||
| 2625 | /// *now* be propagated from instructions to their enclosing function, but | ||||||||
| 2626 | /// *not* from call sites to the called function. The mechanisms to allow | ||||||||
| 2627 | /// both directions will be added in the future. | ||||||||
| 2628 | /// NOTE: The mechanics of adding a new "concrete" abstract attribute are | ||||||||
| 2629 | /// described in the file comment. | ||||||||
| 2630 | struct AbstractAttribute : public IRPosition, public AADepGraphNode { | ||||||||
| 2631 | using StateType = AbstractState; | ||||||||
| 2632 | |||||||||
| 2633 | AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {} | ||||||||
| 2634 | |||||||||
| 2635 | /// Virtual destructor. | ||||||||
| 2636 | virtual ~AbstractAttribute() {} | ||||||||
| 2637 | |||||||||
| 2638 | /// This function is used to identify if an \p DGN is of type | ||||||||
| 2639 | /// AbstractAttribute so that the dyn_cast and cast can use such information | ||||||||
| 2640 | /// to cast an AADepGraphNode to an AbstractAttribute. | ||||||||
| 2641 | /// | ||||||||
| 2642 | /// We eagerly return true here because all AADepGraphNodes except for the | ||||||||
| 2643 | /// Synthethis Node are of type AbstractAttribute | ||||||||
| 2644 | static bool classof(const AADepGraphNode *DGN) { return true; } | ||||||||
| 2645 | |||||||||
| 2646 | /// Initialize the state with the information in the Attributor \p A. | ||||||||
| 2647 | /// | ||||||||
| 2648 | /// This function is called by the Attributor once all abstract attributes | ||||||||
| 2649 | /// have been identified. It can and shall be used for task like: | ||||||||
| 2650 | /// - identify existing knowledge in the IR and use it for the "known state" | ||||||||
| 2651 | /// - perform any work that is not going to change over time, e.g., determine | ||||||||
| 2652 | /// a subset of the IR, or attributes in-flight, that have to be looked at | ||||||||
| 2653 | /// in the `updateImpl` method. | ||||||||
| 2654 | virtual void initialize(Attributor &A) {} | ||||||||
| 2655 | |||||||||
| 2656 | /// Return the internal abstract state for inspection. | ||||||||
| 2657 | virtual StateType &getState() = 0; | ||||||||
| 2658 | virtual const StateType &getState() const = 0; | ||||||||
| 2659 | |||||||||
| 2660 | /// Return an IR position, see struct IRPosition. | ||||||||
| 2661 | const IRPosition &getIRPosition() const { return *this; }; | ||||||||
| 2662 | IRPosition &getIRPosition() { return *this; }; | ||||||||
| 2663 | |||||||||
| 2664 | /// Helper functions, for debug purposes only. | ||||||||
| 2665 | ///{ | ||||||||
| 2666 | void print(raw_ostream &OS) const override; | ||||||||
| 2667 | virtual void printWithDeps(raw_ostream &OS) const; | ||||||||
| 2668 | void dump() const { print(dbgs()); } | ||||||||
| 2669 | |||||||||
| 2670 | /// This function should return the "summarized" assumed state as string. | ||||||||
| 2671 | virtual const std::string getAsStr() const = 0; | ||||||||
| 2672 | |||||||||
| 2673 | /// This function should return the name of the AbstractAttribute | ||||||||
| 2674 | virtual const std::string getName() const = 0; | ||||||||
| 2675 | |||||||||
| 2676 | /// This function should return the address of the ID of the AbstractAttribute | ||||||||
| 2677 | virtual const char *getIdAddr() const = 0; | ||||||||
| 2678 | ///} | ||||||||
| 2679 | |||||||||
| 2680 | /// Allow the Attributor access to the protected methods. | ||||||||
| 2681 | friend struct Attributor; | ||||||||
| 2682 | |||||||||
| 2683 | protected: | ||||||||
| 2684 | /// Hook for the Attributor to trigger an update of the internal state. | ||||||||
| 2685 | /// | ||||||||
| 2686 | /// If this attribute is already fixed, this method will return UNCHANGED, | ||||||||
| 2687 | /// otherwise it delegates to `AbstractAttribute::updateImpl`. | ||||||||
| 2688 | /// | ||||||||
| 2689 | /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. | ||||||||
| 2690 | ChangeStatus update(Attributor &A); | ||||||||
| 2691 | |||||||||
| 2692 | /// Hook for the Attributor to trigger the manifestation of the information | ||||||||
| 2693 | /// represented by the abstract attribute in the LLVM-IR. | ||||||||
| 2694 | /// | ||||||||
| 2695 | /// \Return CHANGED if the IR was altered, otherwise UNCHANGED. | ||||||||
| 2696 | virtual ChangeStatus manifest(Attributor &A) { | ||||||||
| 2697 | return ChangeStatus::UNCHANGED; | ||||||||
| 2698 | } | ||||||||
| 2699 | |||||||||
| 2700 | /// Hook to enable custom statistic tracking, called after manifest that | ||||||||
| 2701 | /// resulted in a change if statistics are enabled. | ||||||||
| 2702 | /// | ||||||||
| 2703 | /// We require subclasses to provide an implementation so we remember to | ||||||||
| 2704 | /// add statistics for them. | ||||||||
| 2705 | virtual void trackStatistics() const = 0; | ||||||||
| 2706 | |||||||||
| 2707 | /// The actual update/transfer function which has to be implemented by the | ||||||||
| 2708 | /// derived classes. | ||||||||
| 2709 | /// | ||||||||
| 2710 | /// If it is called, the environment has changed and we have to determine if | ||||||||
| 2711 | /// the current information is still valid or adjust it otherwise. | ||||||||
| 2712 | /// | ||||||||
| 2713 | /// \Return CHANGED if the internal state changed, otherwise UNCHANGED. | ||||||||
| 2714 | virtual ChangeStatus updateImpl(Attributor &A) = 0; | ||||||||
| 2715 | }; | ||||||||
| 2716 | |||||||||
| 2717 | /// Forward declarations of output streams for debug purposes. | ||||||||
| 2718 | /// | ||||||||
| 2719 | ///{ | ||||||||
| 2720 | raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA); | ||||||||
| 2721 | raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S); | ||||||||
| 2722 | raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind); | ||||||||
| 2723 | raw_ostream &operator<<(raw_ostream &OS, const IRPosition &); | ||||||||
| 2724 | raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State); | ||||||||
| 2725 | template <typename base_ty, base_ty BestState, base_ty WorstState> | ||||||||
| 2726 | raw_ostream & | ||||||||
| 2727 | operator<<(raw_ostream &OS, | ||||||||
| 2728 | const IntegerStateBase<base_ty, BestState, WorstState> &S) { | ||||||||
| 2729 | return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")" | ||||||||
| 2730 | << static_cast<const AbstractState &>(S); | ||||||||
| 2731 | } | ||||||||
| 2732 | raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State); | ||||||||
| 2733 | ///} | ||||||||
| 2734 | |||||||||
| 2735 | struct AttributorPass : public PassInfoMixin<AttributorPass> { | ||||||||
| 2736 | PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); | ||||||||
| 2737 | }; | ||||||||
| 2738 | struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> { | ||||||||
| 2739 | PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM, | ||||||||
| 2740 | LazyCallGraph &CG, CGSCCUpdateResult &UR); | ||||||||
| 2741 | }; | ||||||||
| 2742 | |||||||||
| 2743 | Pass *createAttributorLegacyPass(); | ||||||||
| 2744 | Pass *createAttributorCGSCCLegacyPass(); | ||||||||
| 2745 | |||||||||
| 2746 | /// Helper function to clamp a state \p S of type \p StateType with the | ||||||||
| 2747 | /// information in \p R and indicate/return if \p S did change (as-in update is | ||||||||
| 2748 | /// required to be run again). | ||||||||
| 2749 | template <typename StateType> | ||||||||
| 2750 | ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) { | ||||||||
| 2751 | auto Assumed = S.getAssumed(); | ||||||||
| 2752 | S ^= R; | ||||||||
| 2753 | return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED | ||||||||
| 2754 | : ChangeStatus::CHANGED; | ||||||||
| 2755 | } | ||||||||
| 2756 | |||||||||
| 2757 | /// ---------------------------------------------------------------------------- | ||||||||
| 2758 | /// Abstract Attribute Classes | ||||||||
| 2759 | /// ---------------------------------------------------------------------------- | ||||||||
| 2760 | |||||||||
| 2761 | /// An abstract attribute for the returned values of a function. | ||||||||
| 2762 | struct AAReturnedValues | ||||||||
| 2763 | : public IRAttribute<Attribute::Returned, AbstractAttribute> { | ||||||||
| 2764 | AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 2765 | |||||||||
| 2766 | /// Return an assumed unique return value if a single candidate is found. If | ||||||||
| 2767 | /// there cannot be one, return a nullptr. If it is not clear yet, return the | ||||||||
| 2768 | /// Optional::NoneType. | ||||||||
| 2769 | Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const; | ||||||||
| 2770 | |||||||||
| 2771 | /// Check \p Pred on all returned values. | ||||||||
| 2772 | /// | ||||||||
| 2773 | /// This method will evaluate \p Pred on returned values and return | ||||||||
| 2774 | /// true if (1) all returned values are known, and (2) \p Pred returned true | ||||||||
| 2775 | /// for all returned values. | ||||||||
| 2776 | /// | ||||||||
| 2777 | /// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts | ||||||||
| 2778 | /// method, this one will not filter dead return instructions. | ||||||||
| 2779 | virtual bool checkForAllReturnedValuesAndReturnInsts( | ||||||||
| 2780 | function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred) | ||||||||
| 2781 | const = 0; | ||||||||
| 2782 | |||||||||
| 2783 | using iterator = | ||||||||
| 2784 | MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator; | ||||||||
| 2785 | using const_iterator = | ||||||||
| 2786 | MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator; | ||||||||
| 2787 | virtual llvm::iterator_range<iterator> returned_values() = 0; | ||||||||
| 2788 | virtual llvm::iterator_range<const_iterator> returned_values() const = 0; | ||||||||
| 2789 | |||||||||
| 2790 | virtual size_t getNumReturnValues() const = 0; | ||||||||
| 2791 | |||||||||
| 2792 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2793 | static AAReturnedValues &createForPosition(const IRPosition &IRP, | ||||||||
| 2794 | Attributor &A); | ||||||||
| 2795 | |||||||||
| 2796 | /// See AbstractAttribute::getName() | ||||||||
| 2797 | const std::string getName() const override { return "AAReturnedValues"; } | ||||||||
| 2798 | |||||||||
| 2799 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2800 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2801 | |||||||||
| 2802 | /// This function should return true if the type of the \p AA is | ||||||||
| 2803 | /// AAReturnedValues | ||||||||
| 2804 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2805 | return (AA->getIdAddr() == &ID); | ||||||||
| 2806 | } | ||||||||
| 2807 | |||||||||
| 2808 | /// Unique ID (due to the unique address) | ||||||||
| 2809 | static const char ID; | ||||||||
| 2810 | }; | ||||||||
| 2811 | |||||||||
| 2812 | struct AANoUnwind | ||||||||
| 2813 | : public IRAttribute<Attribute::NoUnwind, | ||||||||
| 2814 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 2815 | AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 2816 | |||||||||
| 2817 | /// Returns true if nounwind is assumed. | ||||||||
| 2818 | bool isAssumedNoUnwind() const { return getAssumed(); } | ||||||||
| 2819 | |||||||||
| 2820 | /// Returns true if nounwind is known. | ||||||||
| 2821 | bool isKnownNoUnwind() const { return getKnown(); } | ||||||||
| 2822 | |||||||||
| 2823 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2824 | static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 2825 | |||||||||
| 2826 | /// See AbstractAttribute::getName() | ||||||||
| 2827 | const std::string getName() const override { return "AANoUnwind"; } | ||||||||
| 2828 | |||||||||
| 2829 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2830 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2831 | |||||||||
| 2832 | /// This function should return true if the type of the \p AA is AANoUnwind | ||||||||
| 2833 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2834 | return (AA->getIdAddr() == &ID); | ||||||||
| 2835 | } | ||||||||
| 2836 | |||||||||
| 2837 | /// Unique ID (due to the unique address) | ||||||||
| 2838 | static const char ID; | ||||||||
| 2839 | }; | ||||||||
| 2840 | |||||||||
| 2841 | struct AANoSync | ||||||||
| 2842 | : public IRAttribute<Attribute::NoSync, | ||||||||
| 2843 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 2844 | AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 2845 | |||||||||
| 2846 | /// Returns true if "nosync" is assumed. | ||||||||
| 2847 | bool isAssumedNoSync() const { return getAssumed(); } | ||||||||
| 2848 | |||||||||
| 2849 | /// Returns true if "nosync" is known. | ||||||||
| 2850 | bool isKnownNoSync() const { return getKnown(); } | ||||||||
| 2851 | |||||||||
| 2852 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2853 | static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 2854 | |||||||||
| 2855 | /// See AbstractAttribute::getName() | ||||||||
| 2856 | const std::string getName() const override { return "AANoSync"; } | ||||||||
| 2857 | |||||||||
| 2858 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2859 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2860 | |||||||||
| 2861 | /// This function should return true if the type of the \p AA is AANoSync | ||||||||
| 2862 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2863 | return (AA->getIdAddr() == &ID); | ||||||||
| 2864 | } | ||||||||
| 2865 | |||||||||
| 2866 | /// Unique ID (due to the unique address) | ||||||||
| 2867 | static const char ID; | ||||||||
| 2868 | }; | ||||||||
| 2869 | |||||||||
| 2870 | /// An abstract interface for all nonnull attributes. | ||||||||
| 2871 | struct AANonNull | ||||||||
| 2872 | : public IRAttribute<Attribute::NonNull, | ||||||||
| 2873 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 2874 | AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 2875 | |||||||||
| 2876 | /// Return true if we assume that the underlying value is nonnull. | ||||||||
| 2877 | bool isAssumedNonNull() const { return getAssumed(); } | ||||||||
| 2878 | |||||||||
| 2879 | /// Return true if we know that underlying value is nonnull. | ||||||||
| 2880 | bool isKnownNonNull() const { return getKnown(); } | ||||||||
| 2881 | |||||||||
| 2882 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2883 | static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 2884 | |||||||||
| 2885 | /// See AbstractAttribute::getName() | ||||||||
| 2886 | const std::string getName() const override { return "AANonNull"; } | ||||||||
| 2887 | |||||||||
| 2888 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2889 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2890 | |||||||||
| 2891 | /// This function should return true if the type of the \p AA is AANonNull | ||||||||
| 2892 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2893 | return (AA->getIdAddr() == &ID); | ||||||||
| 2894 | } | ||||||||
| 2895 | |||||||||
| 2896 | /// Unique ID (due to the unique address) | ||||||||
| 2897 | static const char ID; | ||||||||
| 2898 | }; | ||||||||
| 2899 | |||||||||
| 2900 | /// An abstract attribute for norecurse. | ||||||||
| 2901 | struct AANoRecurse | ||||||||
| 2902 | : public IRAttribute<Attribute::NoRecurse, | ||||||||
| 2903 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 2904 | AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 2905 | |||||||||
| 2906 | /// Return true if "norecurse" is assumed. | ||||||||
| 2907 | bool isAssumedNoRecurse() const { return getAssumed(); } | ||||||||
| 2908 | |||||||||
| 2909 | /// Return true if "norecurse" is known. | ||||||||
| 2910 | bool isKnownNoRecurse() const { return getKnown(); } | ||||||||
| 2911 | |||||||||
| 2912 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2913 | static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 2914 | |||||||||
| 2915 | /// See AbstractAttribute::getName() | ||||||||
| 2916 | const std::string getName() const override { return "AANoRecurse"; } | ||||||||
| 2917 | |||||||||
| 2918 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2919 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2920 | |||||||||
| 2921 | /// This function should return true if the type of the \p AA is AANoRecurse | ||||||||
| 2922 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2923 | return (AA->getIdAddr() == &ID); | ||||||||
| 2924 | } | ||||||||
| 2925 | |||||||||
| 2926 | /// Unique ID (due to the unique address) | ||||||||
| 2927 | static const char ID; | ||||||||
| 2928 | }; | ||||||||
| 2929 | |||||||||
| 2930 | /// An abstract attribute for willreturn. | ||||||||
| 2931 | struct AAWillReturn | ||||||||
| 2932 | : public IRAttribute<Attribute::WillReturn, | ||||||||
| 2933 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 2934 | AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 2935 | |||||||||
| 2936 | /// Return true if "willreturn" is assumed. | ||||||||
| 2937 | bool isAssumedWillReturn() const { return getAssumed(); } | ||||||||
| 2938 | |||||||||
| 2939 | /// Return true if "willreturn" is known. | ||||||||
| 2940 | bool isKnownWillReturn() const { return getKnown(); } | ||||||||
| 2941 | |||||||||
| 2942 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2943 | static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 2944 | |||||||||
| 2945 | /// See AbstractAttribute::getName() | ||||||||
| 2946 | const std::string getName() const override { return "AAWillReturn"; } | ||||||||
| 2947 | |||||||||
| 2948 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2949 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2950 | |||||||||
| 2951 | /// This function should return true if the type of the \p AA is AAWillReturn | ||||||||
| 2952 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2953 | return (AA->getIdAddr() == &ID); | ||||||||
| 2954 | } | ||||||||
| 2955 | |||||||||
| 2956 | /// Unique ID (due to the unique address) | ||||||||
| 2957 | static const char ID; | ||||||||
| 2958 | }; | ||||||||
| 2959 | |||||||||
| 2960 | /// An abstract attribute for undefined behavior. | ||||||||
| 2961 | struct AAUndefinedBehavior | ||||||||
| 2962 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
| 2963 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 2964 | AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 2965 | |||||||||
| 2966 | /// Return true if "undefined behavior" is assumed. | ||||||||
| 2967 | bool isAssumedToCauseUB() const { return getAssumed(); } | ||||||||
| 2968 | |||||||||
| 2969 | /// Return true if "undefined behavior" is assumed for a specific instruction. | ||||||||
| 2970 | virtual bool isAssumedToCauseUB(Instruction *I) const = 0; | ||||||||
| 2971 | |||||||||
| 2972 | /// Return true if "undefined behavior" is known. | ||||||||
| 2973 | bool isKnownToCauseUB() const { return getKnown(); } | ||||||||
| 2974 | |||||||||
| 2975 | /// Return true if "undefined behavior" is known for a specific instruction. | ||||||||
| 2976 | virtual bool isKnownToCauseUB(Instruction *I) const = 0; | ||||||||
| 2977 | |||||||||
| 2978 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 2979 | static AAUndefinedBehavior &createForPosition(const IRPosition &IRP, | ||||||||
| 2980 | Attributor &A); | ||||||||
| 2981 | |||||||||
| 2982 | /// See AbstractAttribute::getName() | ||||||||
| 2983 | const std::string getName() const override { return "AAUndefinedBehavior"; } | ||||||||
| 2984 | |||||||||
| 2985 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 2986 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 2987 | |||||||||
| 2988 | /// This function should return true if the type of the \p AA is | ||||||||
| 2989 | /// AAUndefineBehavior | ||||||||
| 2990 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 2991 | return (AA->getIdAddr() == &ID); | ||||||||
| 2992 | } | ||||||||
| 2993 | |||||||||
| 2994 | /// Unique ID (due to the unique address) | ||||||||
| 2995 | static const char ID; | ||||||||
| 2996 | }; | ||||||||
| 2997 | |||||||||
| 2998 | /// An abstract interface to determine reachability of point A to B. | ||||||||
| 2999 | struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
| 3000 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 3001 | AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 3002 | |||||||||
| 3003 | /// Returns true if 'From' instruction is assumed to reach, 'To' instruction. | ||||||||
| 3004 | /// Users should provide two positions they are interested in, and the class | ||||||||
| 3005 | /// determines (and caches) reachability. | ||||||||
| 3006 | bool isAssumedReachable(Attributor &A, const Instruction &From, | ||||||||
| 3007 | const Instruction &To) const { | ||||||||
| 3008 | if (!getState().isValidState()) | ||||||||
| 3009 | return true; | ||||||||
| 3010 | return A.getInfoCache().getPotentiallyReachable(From, To); | ||||||||
| 3011 | } | ||||||||
| 3012 | |||||||||
| 3013 | /// Returns true if 'From' instruction is known to reach, 'To' instruction. | ||||||||
| 3014 | /// Users should provide two positions they are interested in, and the class | ||||||||
| 3015 | /// determines (and caches) reachability. | ||||||||
| 3016 | bool isKnownReachable(Attributor &A, const Instruction &From, | ||||||||
| 3017 | const Instruction &To) const { | ||||||||
| 3018 | if (!getState().isValidState()) | ||||||||
| 3019 | return false; | ||||||||
| 3020 | return A.getInfoCache().getPotentiallyReachable(From, To); | ||||||||
| 3021 | } | ||||||||
| 3022 | |||||||||
| 3023 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3024 | static AAReachability &createForPosition(const IRPosition &IRP, | ||||||||
| 3025 | Attributor &A); | ||||||||
| 3026 | |||||||||
| 3027 | /// See AbstractAttribute::getName() | ||||||||
| 3028 | const std::string getName() const override { return "AAReachability"; } | ||||||||
| 3029 | |||||||||
| 3030 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3031 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3032 | |||||||||
| 3033 | /// This function should return true if the type of the \p AA is | ||||||||
| 3034 | /// AAReachability | ||||||||
| 3035 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3036 | return (AA->getIdAddr() == &ID); | ||||||||
| 3037 | } | ||||||||
| 3038 | |||||||||
| 3039 | /// Unique ID (due to the unique address) | ||||||||
| 3040 | static const char ID; | ||||||||
| 3041 | }; | ||||||||
| 3042 | |||||||||
| 3043 | /// An abstract interface for all noalias attributes. | ||||||||
| 3044 | struct AANoAlias | ||||||||
| 3045 | : public IRAttribute<Attribute::NoAlias, | ||||||||
| 3046 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 3047 | AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3048 | |||||||||
| 3049 | /// Return true if we assume that the underlying value is alias. | ||||||||
| 3050 | bool isAssumedNoAlias() const { return getAssumed(); } | ||||||||
| 3051 | |||||||||
| 3052 | /// Return true if we know that underlying value is noalias. | ||||||||
| 3053 | bool isKnownNoAlias() const { return getKnown(); } | ||||||||
| 3054 | |||||||||
| 3055 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3056 | static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3057 | |||||||||
| 3058 | /// See AbstractAttribute::getName() | ||||||||
| 3059 | const std::string getName() const override { return "AANoAlias"; } | ||||||||
| 3060 | |||||||||
| 3061 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3062 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3063 | |||||||||
| 3064 | /// This function should return true if the type of the \p AA is AANoAlias | ||||||||
| 3065 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3066 | return (AA->getIdAddr() == &ID); | ||||||||
| 3067 | } | ||||||||
| 3068 | |||||||||
| 3069 | /// Unique ID (due to the unique address) | ||||||||
| 3070 | static const char ID; | ||||||||
| 3071 | }; | ||||||||
| 3072 | |||||||||
| 3073 | /// An AbstractAttribute for nofree. | ||||||||
| 3074 | struct AANoFree | ||||||||
| 3075 | : public IRAttribute<Attribute::NoFree, | ||||||||
| 3076 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 3077 | AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3078 | |||||||||
| 3079 | /// Return true if "nofree" is assumed. | ||||||||
| 3080 | bool isAssumedNoFree() const { return getAssumed(); } | ||||||||
| 3081 | |||||||||
| 3082 | /// Return true if "nofree" is known. | ||||||||
| 3083 | bool isKnownNoFree() const { return getKnown(); } | ||||||||
| 3084 | |||||||||
| 3085 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3086 | static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3087 | |||||||||
| 3088 | /// See AbstractAttribute::getName() | ||||||||
| 3089 | const std::string getName() const override { return "AANoFree"; } | ||||||||
| 3090 | |||||||||
| 3091 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3092 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3093 | |||||||||
| 3094 | /// This function should return true if the type of the \p AA is AANoFree | ||||||||
| 3095 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3096 | return (AA->getIdAddr() == &ID); | ||||||||
| 3097 | } | ||||||||
| 3098 | |||||||||
| 3099 | /// Unique ID (due to the unique address) | ||||||||
| 3100 | static const char ID; | ||||||||
| 3101 | }; | ||||||||
| 3102 | |||||||||
| 3103 | /// An AbstractAttribute for noreturn. | ||||||||
| 3104 | struct AANoReturn | ||||||||
| 3105 | : public IRAttribute<Attribute::NoReturn, | ||||||||
| 3106 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 3107 | AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3108 | |||||||||
| 3109 | /// Return true if the underlying object is assumed to never return. | ||||||||
| 3110 | bool isAssumedNoReturn() const { return getAssumed(); } | ||||||||
| 3111 | |||||||||
| 3112 | /// Return true if the underlying object is known to never return. | ||||||||
| 3113 | bool isKnownNoReturn() const { return getKnown(); } | ||||||||
| 3114 | |||||||||
| 3115 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3116 | static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3117 | |||||||||
| 3118 | /// See AbstractAttribute::getName() | ||||||||
| 3119 | const std::string getName() const override { return "AANoReturn"; } | ||||||||
| 3120 | |||||||||
| 3121 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3122 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3123 | |||||||||
| 3124 | /// This function should return true if the type of the \p AA is AANoReturn | ||||||||
| 3125 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3126 | return (AA->getIdAddr() == &ID); | ||||||||
| 3127 | } | ||||||||
| 3128 | |||||||||
| 3129 | /// Unique ID (due to the unique address) | ||||||||
| 3130 | static const char ID; | ||||||||
| 3131 | }; | ||||||||
| 3132 | |||||||||
| 3133 | /// An abstract interface for liveness abstract attribute. | ||||||||
| 3134 | struct AAIsDead | ||||||||
| 3135 | : public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> { | ||||||||
| 3136 | using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>; | ||||||||
| 3137 | AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 3138 | |||||||||
| 3139 | /// State encoding bits. A set bit in the state means the property holds. | ||||||||
| 3140 | enum { | ||||||||
| 3141 | HAS_NO_EFFECT = 1 << 0, | ||||||||
| 3142 | IS_REMOVABLE = 1 << 1, | ||||||||
| 3143 | |||||||||
| 3144 | IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE, | ||||||||
| 3145 | }; | ||||||||
| 3146 | static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value"); | ||||||||
| 3147 | |||||||||
| 3148 | protected: | ||||||||
| 3149 | /// The query functions are protected such that other attributes need to go | ||||||||
| 3150 | /// through the Attributor interfaces: `Attributor::isAssumedDead(...)` | ||||||||
| 3151 | |||||||||
| 3152 | /// Returns true if the underlying value is assumed dead. | ||||||||
| 3153 | virtual bool isAssumedDead() const = 0; | ||||||||
| 3154 | |||||||||
| 3155 | /// Returns true if the underlying value is known dead. | ||||||||
| 3156 | virtual bool isKnownDead() const = 0; | ||||||||
| 3157 | |||||||||
| 3158 | /// Returns true if \p BB is assumed dead. | ||||||||
| 3159 | virtual bool isAssumedDead(const BasicBlock *BB) const = 0; | ||||||||
| 3160 | |||||||||
| 3161 | /// Returns true if \p BB is known dead. | ||||||||
| 3162 | virtual bool isKnownDead(const BasicBlock *BB) const = 0; | ||||||||
| 3163 | |||||||||
| 3164 | /// Returns true if \p I is assumed dead. | ||||||||
| 3165 | virtual bool isAssumedDead(const Instruction *I) const = 0; | ||||||||
| 3166 | |||||||||
| 3167 | /// Returns true if \p I is known dead. | ||||||||
| 3168 | virtual bool isKnownDead(const Instruction *I) const = 0; | ||||||||
| 3169 | |||||||||
| 3170 | /// This method is used to check if at least one instruction in a collection | ||||||||
| 3171 | /// of instructions is live. | ||||||||
| 3172 | template <typename T> bool isLiveInstSet(T begin, T end) const { | ||||||||
| 3173 | for (const auto &I : llvm::make_range(begin, end)) { | ||||||||
| 3174 | assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&((void)0) | ||||||||
| 3175 | "Instruction must be in the same anchor scope function.")((void)0); | ||||||||
| 3176 | |||||||||
| 3177 | if (!isAssumedDead(I)) | ||||||||
| 3178 | return true; | ||||||||
| 3179 | } | ||||||||
| 3180 | |||||||||
| 3181 | return false; | ||||||||
| 3182 | } | ||||||||
| 3183 | |||||||||
| 3184 | public: | ||||||||
| 3185 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3186 | static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3187 | |||||||||
| 3188 | /// Determine if \p F might catch asynchronous exceptions. | ||||||||
| 3189 | static bool mayCatchAsynchronousExceptions(const Function &F) { | ||||||||
| 3190 | return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F); | ||||||||
| 3191 | } | ||||||||
| 3192 | |||||||||
| 3193 | /// Return if the edge from \p From BB to \p To BB is assumed dead. | ||||||||
| 3194 | /// This is specifically useful in AAReachability. | ||||||||
| 3195 | virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const { | ||||||||
| 3196 | return false; | ||||||||
| 3197 | } | ||||||||
| 3198 | |||||||||
| 3199 | /// See AbstractAttribute::getName() | ||||||||
| 3200 | const std::string getName() const override { return "AAIsDead"; } | ||||||||
| 3201 | |||||||||
| 3202 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3203 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3204 | |||||||||
| 3205 | /// This function should return true if the type of the \p AA is AAIsDead | ||||||||
| 3206 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3207 | return (AA->getIdAddr() == &ID); | ||||||||
| 3208 | } | ||||||||
| 3209 | |||||||||
| 3210 | /// Unique ID (due to the unique address) | ||||||||
| 3211 | static const char ID; | ||||||||
| 3212 | |||||||||
| 3213 | friend struct Attributor; | ||||||||
| 3214 | }; | ||||||||
| 3215 | |||||||||
| 3216 | /// State for dereferenceable attribute | ||||||||
| 3217 | struct DerefState : AbstractState { | ||||||||
| 3218 | |||||||||
| 3219 | static DerefState getBestState() { return DerefState(); } | ||||||||
| 3220 | static DerefState getBestState(const DerefState &) { return getBestState(); } | ||||||||
| 3221 | |||||||||
| 3222 | /// Return the worst possible representable state. | ||||||||
| 3223 | static DerefState getWorstState() { | ||||||||
| 3224 | DerefState DS; | ||||||||
| 3225 | DS.indicatePessimisticFixpoint(); | ||||||||
| 3226 | return DS; | ||||||||
| 3227 | } | ||||||||
| 3228 | static DerefState getWorstState(const DerefState &) { | ||||||||
| 3229 | return getWorstState(); | ||||||||
| 3230 | } | ||||||||
| 3231 | |||||||||
| 3232 | /// State representing for dereferenceable bytes. | ||||||||
| 3233 | IncIntegerState<> DerefBytesState; | ||||||||
| 3234 | |||||||||
| 3235 | /// Map representing for accessed memory offsets and sizes. | ||||||||
| 3236 | /// A key is Offset and a value is size. | ||||||||
| 3237 | /// If there is a load/store instruction something like, | ||||||||
| 3238 | /// p[offset] = v; | ||||||||
| 3239 | /// (offset, sizeof(v)) will be inserted to this map. | ||||||||
| 3240 | /// std::map is used because we want to iterate keys in ascending order. | ||||||||
| 3241 | std::map<int64_t, uint64_t> AccessedBytesMap; | ||||||||
| 3242 | |||||||||
| 3243 | /// Helper function to calculate dereferenceable bytes from current known | ||||||||
| 3244 | /// bytes and accessed bytes. | ||||||||
| 3245 | /// | ||||||||
| 3246 | /// int f(int *A){ | ||||||||
| 3247 | /// *A = 0; | ||||||||
| 3248 | /// *(A+2) = 2; | ||||||||
| 3249 | /// *(A+1) = 1; | ||||||||
| 3250 | /// *(A+10) = 10; | ||||||||
| 3251 | /// } | ||||||||
| 3252 | /// ``` | ||||||||
| 3253 | /// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`. | ||||||||
| 3254 | /// AccessedBytesMap is std::map so it is iterated in accending order on | ||||||||
| 3255 | /// key(Offset). So KnownBytes will be updated like this: | ||||||||
| 3256 | /// | ||||||||
| 3257 | /// |Access | KnownBytes | ||||||||
| 3258 | /// |(0, 4)| 0 -> 4 | ||||||||
| 3259 | /// |(4, 4)| 4 -> 8 | ||||||||
| 3260 | /// |(8, 4)| 8 -> 12 | ||||||||
| 3261 | /// |(40, 4) | 12 (break) | ||||||||
| 3262 | void computeKnownDerefBytesFromAccessedMap() { | ||||||||
| 3263 | int64_t KnownBytes = DerefBytesState.getKnown(); | ||||||||
| 3264 | for (auto &Access : AccessedBytesMap) { | ||||||||
| 3265 | if (KnownBytes < Access.first) | ||||||||
| 3266 | break; | ||||||||
| 3267 | KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second); | ||||||||
| 3268 | } | ||||||||
| 3269 | |||||||||
| 3270 | DerefBytesState.takeKnownMaximum(KnownBytes); | ||||||||
| 3271 | } | ||||||||
| 3272 | |||||||||
| 3273 | /// State representing that whether the value is globaly dereferenceable. | ||||||||
| 3274 | BooleanState GlobalState; | ||||||||
| 3275 | |||||||||
| 3276 | /// See AbstractState::isValidState() | ||||||||
| 3277 | bool isValidState() const override { return DerefBytesState.isValidState(); } | ||||||||
| 3278 | |||||||||
| 3279 | /// See AbstractState::isAtFixpoint() | ||||||||
| 3280 | bool isAtFixpoint() const override { | ||||||||
| 3281 | return !isValidState() || | ||||||||
| 3282 | (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint()); | ||||||||
| 3283 | } | ||||||||
| 3284 | |||||||||
| 3285 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
| 3286 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
| 3287 | DerefBytesState.indicateOptimisticFixpoint(); | ||||||||
| 3288 | GlobalState.indicateOptimisticFixpoint(); | ||||||||
| 3289 | return ChangeStatus::UNCHANGED; | ||||||||
| 3290 | } | ||||||||
| 3291 | |||||||||
| 3292 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
| 3293 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
| 3294 | DerefBytesState.indicatePessimisticFixpoint(); | ||||||||
| 3295 | GlobalState.indicatePessimisticFixpoint(); | ||||||||
| 3296 | return ChangeStatus::CHANGED; | ||||||||
| 3297 | } | ||||||||
| 3298 | |||||||||
| 3299 | /// Update known dereferenceable bytes. | ||||||||
| 3300 | void takeKnownDerefBytesMaximum(uint64_t Bytes) { | ||||||||
| 3301 | DerefBytesState.takeKnownMaximum(Bytes); | ||||||||
| 3302 | |||||||||
| 3303 | // Known bytes might increase. | ||||||||
| 3304 | computeKnownDerefBytesFromAccessedMap(); | ||||||||
| 3305 | } | ||||||||
| 3306 | |||||||||
| 3307 | /// Update assumed dereferenceable bytes. | ||||||||
| 3308 | void takeAssumedDerefBytesMinimum(uint64_t Bytes) { | ||||||||
| 3309 | DerefBytesState.takeAssumedMinimum(Bytes); | ||||||||
| 3310 | } | ||||||||
| 3311 | |||||||||
| 3312 | /// Add accessed bytes to the map. | ||||||||
| 3313 | void addAccessedBytes(int64_t Offset, uint64_t Size) { | ||||||||
| 3314 | uint64_t &AccessedBytes = AccessedBytesMap[Offset]; | ||||||||
| 3315 | AccessedBytes = std::max(AccessedBytes, Size); | ||||||||
| 3316 | |||||||||
| 3317 | // Known bytes might increase. | ||||||||
| 3318 | computeKnownDerefBytesFromAccessedMap(); | ||||||||
| 3319 | } | ||||||||
| 3320 | |||||||||
| 3321 | /// Equality for DerefState. | ||||||||
| 3322 | bool operator==(const DerefState &R) const { | ||||||||
| 3323 | return this->DerefBytesState == R.DerefBytesState && | ||||||||
| 3324 | this->GlobalState == R.GlobalState; | ||||||||
| 3325 | } | ||||||||
| 3326 | |||||||||
| 3327 | /// Inequality for DerefState. | ||||||||
| 3328 | bool operator!=(const DerefState &R) const { return !(*this == R); } | ||||||||
| 3329 | |||||||||
| 3330 | /// See IntegerStateBase::operator^= | ||||||||
| 3331 | DerefState operator^=(const DerefState &R) { | ||||||||
| 3332 | DerefBytesState ^= R.DerefBytesState; | ||||||||
| 3333 | GlobalState ^= R.GlobalState; | ||||||||
| 3334 | return *this; | ||||||||
| 3335 | } | ||||||||
| 3336 | |||||||||
| 3337 | /// See IntegerStateBase::operator+= | ||||||||
| 3338 | DerefState operator+=(const DerefState &R) { | ||||||||
| 3339 | DerefBytesState += R.DerefBytesState; | ||||||||
| 3340 | GlobalState += R.GlobalState; | ||||||||
| 3341 | return *this; | ||||||||
| 3342 | } | ||||||||
| 3343 | |||||||||
| 3344 | /// See IntegerStateBase::operator&= | ||||||||
| 3345 | DerefState operator&=(const DerefState &R) { | ||||||||
| 3346 | DerefBytesState &= R.DerefBytesState; | ||||||||
| 3347 | GlobalState &= R.GlobalState; | ||||||||
| 3348 | return *this; | ||||||||
| 3349 | } | ||||||||
| 3350 | |||||||||
| 3351 | /// See IntegerStateBase::operator|= | ||||||||
| 3352 | DerefState operator|=(const DerefState &R) { | ||||||||
| 3353 | DerefBytesState |= R.DerefBytesState; | ||||||||
| 3354 | GlobalState |= R.GlobalState; | ||||||||
| 3355 | return *this; | ||||||||
| 3356 | } | ||||||||
| 3357 | |||||||||
| 3358 | protected: | ||||||||
| 3359 | const AANonNull *NonNullAA = nullptr; | ||||||||
| 3360 | }; | ||||||||
| 3361 | |||||||||
| 3362 | /// An abstract interface for all dereferenceable attribute. | ||||||||
| 3363 | struct AADereferenceable | ||||||||
| 3364 | : public IRAttribute<Attribute::Dereferenceable, | ||||||||
| 3365 | StateWrapper<DerefState, AbstractAttribute>> { | ||||||||
| 3366 | AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3367 | |||||||||
| 3368 | /// Return true if we assume that the underlying value is nonnull. | ||||||||
| 3369 | bool isAssumedNonNull() const { | ||||||||
| 3370 | return NonNullAA && NonNullAA->isAssumedNonNull(); | ||||||||
| 3371 | } | ||||||||
| 3372 | |||||||||
| 3373 | /// Return true if we know that the underlying value is nonnull. | ||||||||
| 3374 | bool isKnownNonNull() const { | ||||||||
| 3375 | return NonNullAA && NonNullAA->isKnownNonNull(); | ||||||||
| 3376 | } | ||||||||
| 3377 | |||||||||
| 3378 | /// Return true if we assume that underlying value is | ||||||||
| 3379 | /// dereferenceable(_or_null) globally. | ||||||||
| 3380 | bool isAssumedGlobal() const { return GlobalState.getAssumed(); } | ||||||||
| 3381 | |||||||||
| 3382 | /// Return true if we know that underlying value is | ||||||||
| 3383 | /// dereferenceable(_or_null) globally. | ||||||||
| 3384 | bool isKnownGlobal() const { return GlobalState.getKnown(); } | ||||||||
| 3385 | |||||||||
| 3386 | /// Return assumed dereferenceable bytes. | ||||||||
| 3387 | uint32_t getAssumedDereferenceableBytes() const { | ||||||||
| 3388 | return DerefBytesState.getAssumed(); | ||||||||
| 3389 | } | ||||||||
| 3390 | |||||||||
| 3391 | /// Return known dereferenceable bytes. | ||||||||
| 3392 | uint32_t getKnownDereferenceableBytes() const { | ||||||||
| 3393 | return DerefBytesState.getKnown(); | ||||||||
| 3394 | } | ||||||||
| 3395 | |||||||||
| 3396 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3397 | static AADereferenceable &createForPosition(const IRPosition &IRP, | ||||||||
| 3398 | Attributor &A); | ||||||||
| 3399 | |||||||||
| 3400 | /// See AbstractAttribute::getName() | ||||||||
| 3401 | const std::string getName() const override { return "AADereferenceable"; } | ||||||||
| 3402 | |||||||||
| 3403 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3404 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3405 | |||||||||
| 3406 | /// This function should return true if the type of the \p AA is | ||||||||
| 3407 | /// AADereferenceable | ||||||||
| 3408 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3409 | return (AA->getIdAddr() == &ID); | ||||||||
| 3410 | } | ||||||||
| 3411 | |||||||||
| 3412 | /// Unique ID (due to the unique address) | ||||||||
| 3413 | static const char ID; | ||||||||
| 3414 | }; | ||||||||
| 3415 | |||||||||
| 3416 | using AAAlignmentStateType = | ||||||||
| 3417 | IncIntegerState<uint32_t, Value::MaximumAlignment, 1>; | ||||||||
| 3418 | /// An abstract interface for all align attributes. | ||||||||
| 3419 | struct AAAlign : public IRAttribute< | ||||||||
| 3420 | Attribute::Alignment, | ||||||||
| 3421 | StateWrapper<AAAlignmentStateType, AbstractAttribute>> { | ||||||||
| 3422 | AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3423 | |||||||||
| 3424 | /// Return assumed alignment. | ||||||||
| 3425 | unsigned getAssumedAlign() const { return getAssumed(); } | ||||||||
| 3426 | |||||||||
| 3427 | /// Return known alignment. | ||||||||
| 3428 | unsigned getKnownAlign() const { return getKnown(); } | ||||||||
| 3429 | |||||||||
| 3430 | /// See AbstractAttribute::getName() | ||||||||
| 3431 | const std::string getName() const override { return "AAAlign"; } | ||||||||
| 3432 | |||||||||
| 3433 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3434 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3435 | |||||||||
| 3436 | /// This function should return true if the type of the \p AA is AAAlign | ||||||||
| 3437 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3438 | return (AA->getIdAddr() == &ID); | ||||||||
| 3439 | } | ||||||||
| 3440 | |||||||||
| 3441 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3442 | static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3443 | |||||||||
| 3444 | /// Unique ID (due to the unique address) | ||||||||
| 3445 | static const char ID; | ||||||||
| 3446 | }; | ||||||||
| 3447 | |||||||||
| 3448 | /// An abstract interface for all nocapture attributes. | ||||||||
| 3449 | struct AANoCapture | ||||||||
| 3450 | : public IRAttribute< | ||||||||
| 3451 | Attribute::NoCapture, | ||||||||
| 3452 | StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> { | ||||||||
| 3453 | AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3454 | |||||||||
| 3455 | /// State encoding bits. A set bit in the state means the property holds. | ||||||||
| 3456 | /// NO_CAPTURE is the best possible state, 0 the worst possible state. | ||||||||
| 3457 | enum { | ||||||||
| 3458 | NOT_CAPTURED_IN_MEM = 1 << 0, | ||||||||
| 3459 | NOT_CAPTURED_IN_INT = 1 << 1, | ||||||||
| 3460 | NOT_CAPTURED_IN_RET = 1 << 2, | ||||||||
| 3461 | |||||||||
| 3462 | /// If we do not capture the value in memory or through integers we can only | ||||||||
| 3463 | /// communicate it back as a derived pointer. | ||||||||
| 3464 | NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT, | ||||||||
| 3465 | |||||||||
| 3466 | /// If we do not capture the value in memory, through integers, or as a | ||||||||
| 3467 | /// derived pointer we know it is not captured. | ||||||||
| 3468 | NO_CAPTURE = | ||||||||
| 3469 | NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET, | ||||||||
| 3470 | }; | ||||||||
| 3471 | |||||||||
| 3472 | /// Return true if we know that the underlying value is not captured in its | ||||||||
| 3473 | /// respective scope. | ||||||||
| 3474 | bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); } | ||||||||
| 3475 | |||||||||
| 3476 | /// Return true if we assume that the underlying value is not captured in its | ||||||||
| 3477 | /// respective scope. | ||||||||
| 3478 | bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); } | ||||||||
| 3479 | |||||||||
| 3480 | /// Return true if we know that the underlying value is not captured in its | ||||||||
| 3481 | /// respective scope but we allow it to escape through a "return". | ||||||||
| 3482 | bool isKnownNoCaptureMaybeReturned() const { | ||||||||
| 3483 | return isKnown(NO_CAPTURE_MAYBE_RETURNED); | ||||||||
| 3484 | } | ||||||||
| 3485 | |||||||||
| 3486 | /// Return true if we assume that the underlying value is not captured in its | ||||||||
| 3487 | /// respective scope but we allow it to escape through a "return". | ||||||||
| 3488 | bool isAssumedNoCaptureMaybeReturned() const { | ||||||||
| 3489 | return isAssumed(NO_CAPTURE_MAYBE_RETURNED); | ||||||||
| 3490 | } | ||||||||
| 3491 | |||||||||
| 3492 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3493 | static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3494 | |||||||||
| 3495 | /// See AbstractAttribute::getName() | ||||||||
| 3496 | const std::string getName() const override { return "AANoCapture"; } | ||||||||
| 3497 | |||||||||
| 3498 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3499 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3500 | |||||||||
| 3501 | /// This function should return true if the type of the \p AA is AANoCapture | ||||||||
| 3502 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3503 | return (AA->getIdAddr() == &ID); | ||||||||
| 3504 | } | ||||||||
| 3505 | |||||||||
| 3506 | /// Unique ID (due to the unique address) | ||||||||
| 3507 | static const char ID; | ||||||||
| 3508 | }; | ||||||||
| 3509 | |||||||||
| 3510 | struct ValueSimplifyStateType : public AbstractState { | ||||||||
| 3511 | |||||||||
| 3512 | ValueSimplifyStateType(Type *Ty) : Ty(Ty) {} | ||||||||
| 3513 | |||||||||
| 3514 | static ValueSimplifyStateType getBestState(Type *Ty) { | ||||||||
| 3515 | return ValueSimplifyStateType(Ty); | ||||||||
| 3516 | } | ||||||||
| 3517 | static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) { | ||||||||
| 3518 | return getBestState(VS.Ty); | ||||||||
| 3519 | } | ||||||||
| 3520 | |||||||||
| 3521 | /// Return the worst possible representable state. | ||||||||
| 3522 | static ValueSimplifyStateType getWorstState(Type *Ty) { | ||||||||
| 3523 | ValueSimplifyStateType DS(Ty); | ||||||||
| 3524 | DS.indicatePessimisticFixpoint(); | ||||||||
| 3525 | return DS; | ||||||||
| 3526 | } | ||||||||
| 3527 | static ValueSimplifyStateType | ||||||||
| 3528 | getWorstState(const ValueSimplifyStateType &VS) { | ||||||||
| 3529 | return getWorstState(VS.Ty); | ||||||||
| 3530 | } | ||||||||
| 3531 | |||||||||
| 3532 | /// See AbstractState::isValidState(...) | ||||||||
| 3533 | bool isValidState() const override { return BS.isValidState(); } | ||||||||
| 3534 | |||||||||
| 3535 | /// See AbstractState::isAtFixpoint(...) | ||||||||
| 3536 | bool isAtFixpoint() const override { return BS.isAtFixpoint(); } | ||||||||
| 3537 | |||||||||
| 3538 | /// Return the assumed state encoding. | ||||||||
| 3539 | ValueSimplifyStateType getAssumed() { return *this; } | ||||||||
| 3540 | const ValueSimplifyStateType &getAssumed() const { return *this; } | ||||||||
| 3541 | |||||||||
| 3542 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
| 3543 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
| 3544 | return BS.indicatePessimisticFixpoint(); | ||||||||
| 3545 | } | ||||||||
| 3546 | |||||||||
| 3547 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
| 3548 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
| 3549 | return BS.indicateOptimisticFixpoint(); | ||||||||
| 3550 | } | ||||||||
| 3551 | |||||||||
| 3552 | /// "Clamp" this state with \p PVS. | ||||||||
| 3553 | ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) { | ||||||||
| 3554 | BS ^= VS.BS; | ||||||||
| 3555 | unionAssumed(VS.SimplifiedAssociatedValue); | ||||||||
| 3556 | return *this; | ||||||||
| 3557 | } | ||||||||
| 3558 | |||||||||
| 3559 | bool operator==(const ValueSimplifyStateType &RHS) const { | ||||||||
| 3560 | if (isValidState() != RHS.isValidState()) | ||||||||
| 3561 | return false; | ||||||||
| 3562 | if (!isValidState() && !RHS.isValidState()) | ||||||||
| 3563 | return true; | ||||||||
| 3564 | return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue; | ||||||||
| 3565 | } | ||||||||
| 3566 | |||||||||
| 3567 | protected: | ||||||||
| 3568 | /// The type of the original value. | ||||||||
| 3569 | Type *Ty; | ||||||||
| 3570 | |||||||||
| 3571 | /// Merge \p Other into the currently assumed simplified value | ||||||||
| 3572 | bool unionAssumed(Optional<Value *> Other); | ||||||||
| 3573 | |||||||||
| 3574 | /// Helper to track validity and fixpoint | ||||||||
| 3575 | BooleanState BS; | ||||||||
| 3576 | |||||||||
| 3577 | /// An assumed simplified value. Initially, it is set to Optional::None, which | ||||||||
| 3578 | /// means that the value is not clear under current assumption. If in the | ||||||||
| 3579 | /// pessimistic state, getAssumedSimplifiedValue doesn't return this value but | ||||||||
| 3580 | /// returns orignal associated value. | ||||||||
| 3581 | Optional<Value *> SimplifiedAssociatedValue; | ||||||||
| 3582 | }; | ||||||||
| 3583 | |||||||||
| 3584 | /// An abstract interface for value simplify abstract attribute. | ||||||||
| 3585 | struct AAValueSimplify | ||||||||
| 3586 | : public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> { | ||||||||
| 3587 | using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>; | ||||||||
| 3588 | AAValueSimplify(const IRPosition &IRP, Attributor &A) | ||||||||
| 3589 | : Base(IRP, IRP.getAssociatedType()) {} | ||||||||
| 3590 | |||||||||
| 3591 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3592 | static AAValueSimplify &createForPosition(const IRPosition &IRP, | ||||||||
| 3593 | Attributor &A); | ||||||||
| 3594 | |||||||||
| 3595 | /// See AbstractAttribute::getName() | ||||||||
| 3596 | const std::string getName() const override { return "AAValueSimplify"; } | ||||||||
| 3597 | |||||||||
| 3598 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3599 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3600 | |||||||||
| 3601 | /// This function should return true if the type of the \p AA is | ||||||||
| 3602 | /// AAValueSimplify | ||||||||
| 3603 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3604 | return (AA->getIdAddr() == &ID); | ||||||||
| 3605 | } | ||||||||
| 3606 | |||||||||
| 3607 | /// Unique ID (due to the unique address) | ||||||||
| 3608 | static const char ID; | ||||||||
| 3609 | |||||||||
| 3610 | private: | ||||||||
| 3611 | /// Return an assumed simplified value if a single candidate is found. If | ||||||||
| 3612 | /// there cannot be one, return original value. If it is not clear yet, return | ||||||||
| 3613 | /// the Optional::NoneType. | ||||||||
| 3614 | /// | ||||||||
| 3615 | /// Use `Attributor::getAssumedSimplified` for value simplification. | ||||||||
| 3616 | virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0; | ||||||||
| 3617 | |||||||||
| 3618 | friend struct Attributor; | ||||||||
| 3619 | }; | ||||||||
| 3620 | |||||||||
| 3621 | struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
| 3622 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 3623 | AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 3624 | |||||||||
| 3625 | /// Returns true if HeapToStack conversion is assumed to be possible. | ||||||||
| 3626 | virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0; | ||||||||
| 3627 | |||||||||
| 3628 | /// Returns true if HeapToStack conversion is assumed and the CB is a | ||||||||
| 3629 | /// callsite to a free operation to be removed. | ||||||||
| 3630 | virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0; | ||||||||
| 3631 | |||||||||
| 3632 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3633 | static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 3634 | |||||||||
| 3635 | /// See AbstractAttribute::getName() | ||||||||
| 3636 | const std::string getName() const override { return "AAHeapToStack"; } | ||||||||
| 3637 | |||||||||
| 3638 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3639 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3640 | |||||||||
| 3641 | /// This function should return true if the type of the \p AA is AAHeapToStack | ||||||||
| 3642 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3643 | return (AA->getIdAddr() == &ID); | ||||||||
| 3644 | } | ||||||||
| 3645 | |||||||||
| 3646 | /// Unique ID (due to the unique address) | ||||||||
| 3647 | static const char ID; | ||||||||
| 3648 | }; | ||||||||
| 3649 | |||||||||
| 3650 | /// An abstract interface for privatizability. | ||||||||
| 3651 | /// | ||||||||
| 3652 | /// A pointer is privatizable if it can be replaced by a new, private one. | ||||||||
| 3653 | /// Privatizing pointer reduces the use count, interaction between unrelated | ||||||||
| 3654 | /// code parts. | ||||||||
| 3655 | /// | ||||||||
| 3656 | /// In order for a pointer to be privatizable its value cannot be observed | ||||||||
| 3657 | /// (=nocapture), it is (for now) not written (=readonly & noalias), we know | ||||||||
| 3658 | /// what values are necessary to make the private copy look like the original | ||||||||
| 3659 | /// one, and the values we need can be loaded (=dereferenceable). | ||||||||
| 3660 | struct AAPrivatizablePtr | ||||||||
| 3661 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
| 3662 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 3663 | AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 3664 | |||||||||
| 3665 | /// Returns true if pointer privatization is assumed to be possible. | ||||||||
| 3666 | bool isAssumedPrivatizablePtr() const { return getAssumed(); } | ||||||||
| 3667 | |||||||||
| 3668 | /// Returns true if pointer privatization is known to be possible. | ||||||||
| 3669 | bool isKnownPrivatizablePtr() const { return getKnown(); } | ||||||||
| 3670 | |||||||||
| 3671 | /// Return the type we can choose for a private copy of the underlying | ||||||||
| 3672 | /// value. None means it is not clear yet, nullptr means there is none. | ||||||||
| 3673 | virtual Optional<Type *> getPrivatizableType() const = 0; | ||||||||
| 3674 | |||||||||
| 3675 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3676 | static AAPrivatizablePtr &createForPosition(const IRPosition &IRP, | ||||||||
| 3677 | Attributor &A); | ||||||||
| 3678 | |||||||||
| 3679 | /// See AbstractAttribute::getName() | ||||||||
| 3680 | const std::string getName() const override { return "AAPrivatizablePtr"; } | ||||||||
| 3681 | |||||||||
| 3682 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3683 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3684 | |||||||||
| 3685 | /// This function should return true if the type of the \p AA is | ||||||||
| 3686 | /// AAPricatizablePtr | ||||||||
| 3687 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3688 | return (AA->getIdAddr() == &ID); | ||||||||
| 3689 | } | ||||||||
| 3690 | |||||||||
| 3691 | /// Unique ID (due to the unique address) | ||||||||
| 3692 | static const char ID; | ||||||||
| 3693 | }; | ||||||||
| 3694 | |||||||||
| 3695 | /// An abstract interface for memory access kind related attributes | ||||||||
| 3696 | /// (readnone/readonly/writeonly). | ||||||||
| 3697 | struct AAMemoryBehavior | ||||||||
| 3698 | : public IRAttribute< | ||||||||
| 3699 | Attribute::ReadNone, | ||||||||
| 3700 | StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> { | ||||||||
| 3701 | AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3702 | |||||||||
| 3703 | /// State encoding bits. A set bit in the state means the property holds. | ||||||||
| 3704 | /// BEST_STATE is the best possible state, 0 the worst possible state. | ||||||||
| 3705 | enum { | ||||||||
| 3706 | NO_READS = 1 << 0, | ||||||||
| 3707 | NO_WRITES = 1 << 1, | ||||||||
| 3708 | NO_ACCESSES = NO_READS | NO_WRITES, | ||||||||
| 3709 | |||||||||
| 3710 | BEST_STATE = NO_ACCESSES, | ||||||||
| 3711 | }; | ||||||||
| 3712 | static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value"); | ||||||||
| 3713 | |||||||||
| 3714 | /// Return true if we know that the underlying value is not read or accessed | ||||||||
| 3715 | /// in its respective scope. | ||||||||
| 3716 | bool isKnownReadNone() const { return isKnown(NO_ACCESSES); } | ||||||||
| 3717 | |||||||||
| 3718 | /// Return true if we assume that the underlying value is not read or accessed | ||||||||
| 3719 | /// in its respective scope. | ||||||||
| 3720 | bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); } | ||||||||
| 3721 | |||||||||
| 3722 | /// Return true if we know that the underlying value is not accessed | ||||||||
| 3723 | /// (=written) in its respective scope. | ||||||||
| 3724 | bool isKnownReadOnly() const { return isKnown(NO_WRITES); } | ||||||||
| 3725 | |||||||||
| 3726 | /// Return true if we assume that the underlying value is not accessed | ||||||||
| 3727 | /// (=written) in its respective scope. | ||||||||
| 3728 | bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); } | ||||||||
| 3729 | |||||||||
| 3730 | /// Return true if we know that the underlying value is not read in its | ||||||||
| 3731 | /// respective scope. | ||||||||
| 3732 | bool isKnownWriteOnly() const { return isKnown(NO_READS); } | ||||||||
| 3733 | |||||||||
| 3734 | /// Return true if we assume that the underlying value is not read in its | ||||||||
| 3735 | /// respective scope. | ||||||||
| 3736 | bool isAssumedWriteOnly() const { return isAssumed(NO_READS); } | ||||||||
| 3737 | |||||||||
| 3738 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3739 | static AAMemoryBehavior &createForPosition(const IRPosition &IRP, | ||||||||
| 3740 | Attributor &A); | ||||||||
| 3741 | |||||||||
| 3742 | /// See AbstractAttribute::getName() | ||||||||
| 3743 | const std::string getName() const override { return "AAMemoryBehavior"; } | ||||||||
| 3744 | |||||||||
| 3745 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3746 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3747 | |||||||||
| 3748 | /// This function should return true if the type of the \p AA is | ||||||||
| 3749 | /// AAMemoryBehavior | ||||||||
| 3750 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3751 | return (AA->getIdAddr() == &ID); | ||||||||
| 3752 | } | ||||||||
| 3753 | |||||||||
| 3754 | /// Unique ID (due to the unique address) | ||||||||
| 3755 | static const char ID; | ||||||||
| 3756 | }; | ||||||||
| 3757 | |||||||||
| 3758 | /// An abstract interface for all memory location attributes | ||||||||
| 3759 | /// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly). | ||||||||
| 3760 | struct AAMemoryLocation | ||||||||
| 3761 | : public IRAttribute< | ||||||||
| 3762 | Attribute::ReadNone, | ||||||||
| 3763 | StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> { | ||||||||
| 3764 | using MemoryLocationsKind = StateType::base_t; | ||||||||
| 3765 | |||||||||
| 3766 | AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 3767 | |||||||||
| 3768 | /// Encoding of different locations that could be accessed by a memory | ||||||||
| 3769 | /// access. | ||||||||
| 3770 | enum { | ||||||||
| 3771 | ALL_LOCATIONS = 0, | ||||||||
| 3772 | NO_LOCAL_MEM = 1 << 0, | ||||||||
| 3773 | NO_CONST_MEM = 1 << 1, | ||||||||
| 3774 | NO_GLOBAL_INTERNAL_MEM = 1 << 2, | ||||||||
| 3775 | NO_GLOBAL_EXTERNAL_MEM = 1 << 3, | ||||||||
| 3776 | NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM, | ||||||||
| 3777 | NO_ARGUMENT_MEM = 1 << 4, | ||||||||
| 3778 | NO_INACCESSIBLE_MEM = 1 << 5, | ||||||||
| 3779 | NO_MALLOCED_MEM = 1 << 6, | ||||||||
| 3780 | NO_UNKOWN_MEM = 1 << 7, | ||||||||
| 3781 | NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM | | ||||||||
| 3782 | NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM | | ||||||||
| 3783 | NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM, | ||||||||
| 3784 | |||||||||
| 3785 | // Helper bit to track if we gave up or not. | ||||||||
| 3786 | VALID_STATE = NO_LOCATIONS + 1, | ||||||||
| 3787 | |||||||||
| 3788 | BEST_STATE = NO_LOCATIONS | VALID_STATE, | ||||||||
| 3789 | }; | ||||||||
| 3790 | static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value"); | ||||||||
| 3791 | |||||||||
| 3792 | /// Return true if we know that the associated functions has no observable | ||||||||
| 3793 | /// accesses. | ||||||||
| 3794 | bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); } | ||||||||
| 3795 | |||||||||
| 3796 | /// Return true if we assume that the associated functions has no observable | ||||||||
| 3797 | /// accesses. | ||||||||
| 3798 | bool isAssumedReadNone() const { | ||||||||
| 3799 | return isAssumed(NO_LOCATIONS) | isAssumedStackOnly(); | ||||||||
| 3800 | } | ||||||||
| 3801 | |||||||||
| 3802 | /// Return true if we know that the associated functions has at most | ||||||||
| 3803 | /// local/stack accesses. | ||||||||
| 3804 | bool isKnowStackOnly() const { | ||||||||
| 3805 | return isKnown(inverseLocation(NO_LOCAL_MEM, true, true)); | ||||||||
| 3806 | } | ||||||||
| 3807 | |||||||||
| 3808 | /// Return true if we assume that the associated functions has at most | ||||||||
| 3809 | /// local/stack accesses. | ||||||||
| 3810 | bool isAssumedStackOnly() const { | ||||||||
| 3811 | return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true)); | ||||||||
| 3812 | } | ||||||||
| 3813 | |||||||||
| 3814 | /// Return true if we know that the underlying value will only access | ||||||||
| 3815 | /// inaccesible memory only (see Attribute::InaccessibleMemOnly). | ||||||||
| 3816 | bool isKnownInaccessibleMemOnly() const { | ||||||||
| 3817 | return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); | ||||||||
| 3818 | } | ||||||||
| 3819 | |||||||||
| 3820 | /// Return true if we assume that the underlying value will only access | ||||||||
| 3821 | /// inaccesible memory only (see Attribute::InaccessibleMemOnly). | ||||||||
| 3822 | bool isAssumedInaccessibleMemOnly() const { | ||||||||
| 3823 | return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true)); | ||||||||
| 3824 | } | ||||||||
| 3825 | |||||||||
| 3826 | /// Return true if we know that the underlying value will only access | ||||||||
| 3827 | /// argument pointees (see Attribute::ArgMemOnly). | ||||||||
| 3828 | bool isKnownArgMemOnly() const { | ||||||||
| 3829 | return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true)); | ||||||||
| 3830 | } | ||||||||
| 3831 | |||||||||
| 3832 | /// Return true if we assume that the underlying value will only access | ||||||||
| 3833 | /// argument pointees (see Attribute::ArgMemOnly). | ||||||||
| 3834 | bool isAssumedArgMemOnly() const { | ||||||||
| 3835 | return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true)); | ||||||||
| 3836 | } | ||||||||
| 3837 | |||||||||
| 3838 | /// Return true if we know that the underlying value will only access | ||||||||
| 3839 | /// inaccesible memory or argument pointees (see | ||||||||
| 3840 | /// Attribute::InaccessibleOrArgMemOnly). | ||||||||
| 3841 | bool isKnownInaccessibleOrArgMemOnly() const { | ||||||||
| 3842 | return isKnown( | ||||||||
| 3843 | inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); | ||||||||
| 3844 | } | ||||||||
| 3845 | |||||||||
| 3846 | /// Return true if we assume that the underlying value will only access | ||||||||
| 3847 | /// inaccesible memory or argument pointees (see | ||||||||
| 3848 | /// Attribute::InaccessibleOrArgMemOnly). | ||||||||
| 3849 | bool isAssumedInaccessibleOrArgMemOnly() const { | ||||||||
| 3850 | return isAssumed( | ||||||||
| 3851 | inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true)); | ||||||||
| 3852 | } | ||||||||
| 3853 | |||||||||
| 3854 | /// Return true if the underlying value may access memory through arguement | ||||||||
| 3855 | /// pointers of the associated function, if any. | ||||||||
| 3856 | bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); } | ||||||||
| 3857 | |||||||||
| 3858 | /// Return true if only the memory locations specififed by \p MLK are assumed | ||||||||
| 3859 | /// to be accessed by the associated function. | ||||||||
| 3860 | bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const { | ||||||||
| 3861 | return isAssumed(MLK); | ||||||||
| 3862 | } | ||||||||
| 3863 | |||||||||
| 3864 | /// Return the locations that are assumed to be not accessed by the associated | ||||||||
| 3865 | /// function, if any. | ||||||||
| 3866 | MemoryLocationsKind getAssumedNotAccessedLocation() const { | ||||||||
| 3867 | return getAssumed(); | ||||||||
| 3868 | } | ||||||||
| 3869 | |||||||||
| 3870 | /// Return the inverse of location \p Loc, thus for NO_XXX the return | ||||||||
| 3871 | /// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine | ||||||||
| 3872 | /// if local (=stack) and constant memory are allowed as well. Most of the | ||||||||
| 3873 | /// time we do want them to be included, e.g., argmemonly allows accesses via | ||||||||
| 3874 | /// argument pointers or local or constant memory accesses. | ||||||||
| 3875 | static MemoryLocationsKind | ||||||||
| 3876 | inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) { | ||||||||
| 3877 | return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) | | ||||||||
| 3878 | (AndConstMem ? NO_CONST_MEM : 0)); | ||||||||
| 3879 | }; | ||||||||
| 3880 | |||||||||
| 3881 | /// Return the locations encoded by \p MLK as a readable string. | ||||||||
| 3882 | static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK); | ||||||||
| 3883 | |||||||||
| 3884 | /// Simple enum to distinguish read/write/read-write accesses. | ||||||||
| 3885 | enum AccessKind { | ||||||||
| 3886 | NONE = 0, | ||||||||
| 3887 | READ = 1 << 0, | ||||||||
| 3888 | WRITE = 1 << 1, | ||||||||
| 3889 | READ_WRITE = READ | WRITE, | ||||||||
| 3890 | }; | ||||||||
| 3891 | |||||||||
| 3892 | /// Check \p Pred on all accesses to the memory kinds specified by \p MLK. | ||||||||
| 3893 | /// | ||||||||
| 3894 | /// This method will evaluate \p Pred on all accesses (access instruction + | ||||||||
| 3895 | /// underlying accessed memory pointer) and it will return true if \p Pred | ||||||||
| 3896 | /// holds every time. | ||||||||
| 3897 | virtual bool checkForAllAccessesToMemoryKind( | ||||||||
| 3898 | function_ref<bool(const Instruction *, const Value *, AccessKind, | ||||||||
| 3899 | MemoryLocationsKind)> | ||||||||
| 3900 | Pred, | ||||||||
| 3901 | MemoryLocationsKind MLK) const = 0; | ||||||||
| 3902 | |||||||||
| 3903 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3904 | static AAMemoryLocation &createForPosition(const IRPosition &IRP, | ||||||||
| 3905 | Attributor &A); | ||||||||
| 3906 | |||||||||
| 3907 | /// See AbstractState::getAsStr(). | ||||||||
| 3908 | const std::string getAsStr() const override { | ||||||||
| 3909 | return getMemoryLocationsAsStr(getAssumedNotAccessedLocation()); | ||||||||
| 3910 | } | ||||||||
| 3911 | |||||||||
| 3912 | /// See AbstractAttribute::getName() | ||||||||
| 3913 | const std::string getName() const override { return "AAMemoryLocation"; } | ||||||||
| 3914 | |||||||||
| 3915 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3916 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3917 | |||||||||
| 3918 | /// This function should return true if the type of the \p AA is | ||||||||
| 3919 | /// AAMemoryLocation | ||||||||
| 3920 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3921 | return (AA->getIdAddr() == &ID); | ||||||||
| 3922 | } | ||||||||
| 3923 | |||||||||
| 3924 | /// Unique ID (due to the unique address) | ||||||||
| 3925 | static const char ID; | ||||||||
| 3926 | }; | ||||||||
| 3927 | |||||||||
| 3928 | /// An abstract interface for range value analysis. | ||||||||
| 3929 | struct AAValueConstantRange | ||||||||
| 3930 | : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> { | ||||||||
| 3931 | using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>; | ||||||||
| 3932 | AAValueConstantRange(const IRPosition &IRP, Attributor &A) | ||||||||
| 3933 | : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {} | ||||||||
| 3934 | |||||||||
| 3935 | /// See AbstractAttribute::getState(...). | ||||||||
| 3936 | IntegerRangeState &getState() override { return *this; } | ||||||||
| 3937 | const IntegerRangeState &getState() const override { return *this; } | ||||||||
| 3938 | |||||||||
| 3939 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 3940 | static AAValueConstantRange &createForPosition(const IRPosition &IRP, | ||||||||
| 3941 | Attributor &A); | ||||||||
| 3942 | |||||||||
| 3943 | /// Return an assumed range for the assocaited value a program point \p CtxI. | ||||||||
| 3944 | /// If \p I is nullptr, simply return an assumed range. | ||||||||
| 3945 | virtual ConstantRange | ||||||||
| 3946 | getAssumedConstantRange(Attributor &A, | ||||||||
| 3947 | const Instruction *CtxI = nullptr) const = 0; | ||||||||
| 3948 | |||||||||
| 3949 | /// Return a known range for the assocaited value at a program point \p CtxI. | ||||||||
| 3950 | /// If \p I is nullptr, simply return a known range. | ||||||||
| 3951 | virtual ConstantRange | ||||||||
| 3952 | getKnownConstantRange(Attributor &A, | ||||||||
| 3953 | const Instruction *CtxI = nullptr) const = 0; | ||||||||
| 3954 | |||||||||
| 3955 | /// Return an assumed constant for the assocaited value a program point \p | ||||||||
| 3956 | /// CtxI. | ||||||||
| 3957 | Optional<ConstantInt *> | ||||||||
| 3958 | getAssumedConstantInt(Attributor &A, | ||||||||
| 3959 | const Instruction *CtxI = nullptr) const { | ||||||||
| 3960 | ConstantRange RangeV = getAssumedConstantRange(A, CtxI); | ||||||||
| 3961 | if (auto *C = RangeV.getSingleElement()) | ||||||||
| 3962 | return cast<ConstantInt>( | ||||||||
| 3963 | ConstantInt::get(getAssociatedValue().getType(), *C)); | ||||||||
| 3964 | if (RangeV.isEmptySet()) | ||||||||
| 3965 | return llvm::None; | ||||||||
| 3966 | return nullptr; | ||||||||
| 3967 | } | ||||||||
| 3968 | |||||||||
| 3969 | /// See AbstractAttribute::getName() | ||||||||
| 3970 | const std::string getName() const override { return "AAValueConstantRange"; } | ||||||||
| 3971 | |||||||||
| 3972 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 3973 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 3974 | |||||||||
| 3975 | /// This function should return true if the type of the \p AA is | ||||||||
| 3976 | /// AAValueConstantRange | ||||||||
| 3977 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 3978 | return (AA->getIdAddr() == &ID); | ||||||||
| 3979 | } | ||||||||
| 3980 | |||||||||
| 3981 | /// Unique ID (due to the unique address) | ||||||||
| 3982 | static const char ID; | ||||||||
| 3983 | }; | ||||||||
| 3984 | |||||||||
| 3985 | /// A class for a set state. | ||||||||
| 3986 | /// The assumed boolean state indicates whether the corresponding set is full | ||||||||
| 3987 | /// set or not. If the assumed state is false, this is the worst state. The | ||||||||
| 3988 | /// worst state (invalid state) of set of potential values is when the set | ||||||||
| 3989 | /// contains every possible value (i.e. we cannot in any way limit the value | ||||||||
| 3990 | /// that the target position can take). That never happens naturally, we only | ||||||||
| 3991 | /// force it. As for the conditions under which we force it, see | ||||||||
| 3992 | /// AAPotentialValues. | ||||||||
| 3993 | template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>> | ||||||||
| 3994 | struct PotentialValuesState : AbstractState { | ||||||||
| 3995 | using SetTy = DenseSet<MemberTy, KeyInfo>; | ||||||||
| 3996 | |||||||||
| 3997 | PotentialValuesState() : IsValidState(true), UndefIsContained(false) {} | ||||||||
| 3998 | |||||||||
| 3999 | PotentialValuesState(bool IsValid) | ||||||||
| 4000 | : IsValidState(IsValid), UndefIsContained(false) {} | ||||||||
| 4001 | |||||||||
| 4002 | /// See AbstractState::isValidState(...) | ||||||||
| 4003 | bool isValidState() const override { return IsValidState.isValidState(); } | ||||||||
| 4004 | |||||||||
| 4005 | /// See AbstractState::isAtFixpoint(...) | ||||||||
| 4006 | bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); } | ||||||||
| 4007 | |||||||||
| 4008 | /// See AbstractState::indicatePessimisticFixpoint(...) | ||||||||
| 4009 | ChangeStatus indicatePessimisticFixpoint() override { | ||||||||
| 4010 | return IsValidState.indicatePessimisticFixpoint(); | ||||||||
| 4011 | } | ||||||||
| 4012 | |||||||||
| 4013 | /// See AbstractState::indicateOptimisticFixpoint(...) | ||||||||
| 4014 | ChangeStatus indicateOptimisticFixpoint() override { | ||||||||
| 4015 | return IsValidState.indicateOptimisticFixpoint(); | ||||||||
| 4016 | } | ||||||||
| 4017 | |||||||||
| 4018 | /// Return the assumed state | ||||||||
| 4019 | PotentialValuesState &getAssumed() { return *this; } | ||||||||
| 4020 | const PotentialValuesState &getAssumed() const { return *this; } | ||||||||
| 4021 | |||||||||
| 4022 | /// Return this set. We should check whether this set is valid or not by | ||||||||
| 4023 | /// isValidState() before calling this function. | ||||||||
| 4024 | const SetTy &getAssumedSet() const { | ||||||||
| 4025 | assert(isValidState() && "This set shoud not be used when it is invalid!")((void)0); | ||||||||
| 4026 | return Set; | ||||||||
| 4027 | } | ||||||||
| 4028 | |||||||||
| 4029 | /// Returns whether this state contains an undef value or not. | ||||||||
| 4030 | bool undefIsContained() const { | ||||||||
| 4031 | assert(isValidState() && "This flag shoud not be used when it is invalid!")((void)0); | ||||||||
| 4032 | return UndefIsContained; | ||||||||
| 4033 | } | ||||||||
| 4034 | |||||||||
| 4035 | bool operator==(const PotentialValuesState &RHS) const { | ||||||||
| 4036 | if (isValidState() != RHS.isValidState()) | ||||||||
| 4037 | return false; | ||||||||
| 4038 | if (!isValidState() && !RHS.isValidState()) | ||||||||
| 4039 | return true; | ||||||||
| 4040 | if (undefIsContained() != RHS.undefIsContained()) | ||||||||
| 4041 | return false; | ||||||||
| 4042 | return Set == RHS.getAssumedSet(); | ||||||||
| 4043 | } | ||||||||
| 4044 | |||||||||
| 4045 | /// Maximum number of potential values to be tracked. | ||||||||
| 4046 | /// This is set by -attributor-max-potential-values command line option | ||||||||
| 4047 | static unsigned MaxPotentialValues; | ||||||||
| 4048 | |||||||||
| 4049 | /// Return empty set as the best state of potential values. | ||||||||
| 4050 | static PotentialValuesState getBestState() { | ||||||||
| 4051 | return PotentialValuesState(true); | ||||||||
| 4052 | } | ||||||||
| 4053 | |||||||||
| 4054 | static PotentialValuesState getBestState(PotentialValuesState &PVS) { | ||||||||
| 4055 | return getBestState(); | ||||||||
| 4056 | } | ||||||||
| 4057 | |||||||||
| 4058 | /// Return full set as the worst state of potential values. | ||||||||
| 4059 | static PotentialValuesState getWorstState() { | ||||||||
| 4060 | return PotentialValuesState(false); | ||||||||
| 4061 | } | ||||||||
| 4062 | |||||||||
| 4063 | /// Union assumed set with the passed value. | ||||||||
| 4064 | void unionAssumed(const MemberTy &C) { insert(C); } | ||||||||
| 4065 | |||||||||
| 4066 | /// Union assumed set with assumed set of the passed state \p PVS. | ||||||||
| 4067 | void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); } | ||||||||
| 4068 | |||||||||
| 4069 | /// Union assumed set with an undef value. | ||||||||
| 4070 | void unionAssumedWithUndef() { unionWithUndef(); } | ||||||||
| 4071 | |||||||||
| 4072 | /// "Clamp" this state with \p PVS. | ||||||||
| 4073 | PotentialValuesState operator^=(const PotentialValuesState &PVS) { | ||||||||
| 4074 | IsValidState ^= PVS.IsValidState; | ||||||||
| 4075 | unionAssumed(PVS); | ||||||||
| 4076 | return *this; | ||||||||
| 4077 | } | ||||||||
| 4078 | |||||||||
| 4079 | PotentialValuesState operator&=(const PotentialValuesState &PVS) { | ||||||||
| 4080 | IsValidState &= PVS.IsValidState; | ||||||||
| 4081 | unionAssumed(PVS); | ||||||||
| 4082 | return *this; | ||||||||
| 4083 | } | ||||||||
| 4084 | |||||||||
| 4085 | private: | ||||||||
| 4086 | /// Check the size of this set, and invalidate when the size is no | ||||||||
| 4087 | /// less than \p MaxPotentialValues threshold. | ||||||||
| 4088 | void checkAndInvalidate() { | ||||||||
| 4089 | if (Set.size() >= MaxPotentialValues) | ||||||||
| 4090 | indicatePessimisticFixpoint(); | ||||||||
| 4091 | else | ||||||||
| 4092 | reduceUndefValue(); | ||||||||
| 4093 | } | ||||||||
| 4094 | |||||||||
| 4095 | /// If this state contains both undef and not undef, we can reduce | ||||||||
| 4096 | /// undef to the not undef value. | ||||||||
| 4097 | void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); } | ||||||||
| 4098 | |||||||||
| 4099 | /// Insert an element into this set. | ||||||||
| 4100 | void insert(const MemberTy &C) { | ||||||||
| 4101 | if (!isValidState()) | ||||||||
| 4102 | return; | ||||||||
| 4103 | Set.insert(C); | ||||||||
| 4104 | checkAndInvalidate(); | ||||||||
| 4105 | } | ||||||||
| 4106 | |||||||||
| 4107 | /// Take union with R. | ||||||||
| 4108 | void unionWith(const PotentialValuesState &R) { | ||||||||
| 4109 | /// If this is a full set, do nothing. | ||||||||
| 4110 | if (!isValidState()) | ||||||||
| 4111 | return; | ||||||||
| 4112 | /// If R is full set, change L to a full set. | ||||||||
| 4113 | if (!R.isValidState()) { | ||||||||
| 4114 | indicatePessimisticFixpoint(); | ||||||||
| 4115 | return; | ||||||||
| 4116 | } | ||||||||
| 4117 | for (const MemberTy &C : R.Set) | ||||||||
| 4118 | Set.insert(C); | ||||||||
| 4119 | UndefIsContained |= R.undefIsContained(); | ||||||||
| 4120 | checkAndInvalidate(); | ||||||||
| 4121 | } | ||||||||
| 4122 | |||||||||
| 4123 | /// Take union with an undef value. | ||||||||
| 4124 | void unionWithUndef() { | ||||||||
| 4125 | UndefIsContained = true; | ||||||||
| 4126 | reduceUndefValue(); | ||||||||
| 4127 | } | ||||||||
| 4128 | |||||||||
| 4129 | /// Take intersection with R. | ||||||||
| 4130 | void intersectWith(const PotentialValuesState &R) { | ||||||||
| 4131 | /// If R is a full set, do nothing. | ||||||||
| 4132 | if (!R.isValidState()) | ||||||||
| 4133 | return; | ||||||||
| 4134 | /// If this is a full set, change this to R. | ||||||||
| 4135 | if (!isValidState()) { | ||||||||
| 4136 | *this = R; | ||||||||
| 4137 | return; | ||||||||
| 4138 | } | ||||||||
| 4139 | SetTy IntersectSet; | ||||||||
| 4140 | for (const MemberTy &C : Set) { | ||||||||
| 4141 | if (R.Set.count(C)) | ||||||||
| 4142 | IntersectSet.insert(C); | ||||||||
| 4143 | } | ||||||||
| 4144 | Set = IntersectSet; | ||||||||
| 4145 | UndefIsContained &= R.undefIsContained(); | ||||||||
| 4146 | reduceUndefValue(); | ||||||||
| 4147 | } | ||||||||
| 4148 | |||||||||
| 4149 | /// A helper state which indicate whether this state is valid or not. | ||||||||
| 4150 | BooleanState IsValidState; | ||||||||
| 4151 | |||||||||
| 4152 | /// Container for potential values | ||||||||
| 4153 | SetTy Set; | ||||||||
| 4154 | |||||||||
| 4155 | /// Flag for undef value | ||||||||
| 4156 | bool UndefIsContained; | ||||||||
| 4157 | }; | ||||||||
| 4158 | |||||||||
| 4159 | using PotentialConstantIntValuesState = PotentialValuesState<APInt>; | ||||||||
| 4160 | |||||||||
| 4161 | raw_ostream &operator<<(raw_ostream &OS, | ||||||||
| 4162 | const PotentialConstantIntValuesState &R); | ||||||||
| 4163 | |||||||||
| 4164 | /// An abstract interface for potential values analysis. | ||||||||
| 4165 | /// | ||||||||
| 4166 | /// This AA collects potential values for each IR position. | ||||||||
| 4167 | /// An assumed set of potential values is initialized with the empty set (the | ||||||||
| 4168 | /// best state) and it will grow monotonically as we find more potential values | ||||||||
| 4169 | /// for this position. | ||||||||
| 4170 | /// The set might be forced to the worst state, that is, to contain every | ||||||||
| 4171 | /// possible value for this position in 2 cases. | ||||||||
| 4172 | /// 1. We surpassed the \p MaxPotentialValues threshold. This includes the | ||||||||
| 4173 | /// case that this position is affected (e.g. because of an operation) by a | ||||||||
| 4174 | /// Value that is in the worst state. | ||||||||
| 4175 | /// 2. We tried to initialize on a Value that we cannot handle (e.g. an | ||||||||
| 4176 | /// operator we do not currently handle). | ||||||||
| 4177 | /// | ||||||||
| 4178 | /// TODO: Support values other than constant integers. | ||||||||
| 4179 | struct AAPotentialValues | ||||||||
| 4180 | : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> { | ||||||||
| 4181 | using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>; | ||||||||
| 4182 | AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 4183 | |||||||||
| 4184 | /// See AbstractAttribute::getState(...). | ||||||||
| 4185 | PotentialConstantIntValuesState &getState() override { return *this; } | ||||||||
| 4186 | const PotentialConstantIntValuesState &getState() const override { | ||||||||
| 4187 | return *this; | ||||||||
| 4188 | } | ||||||||
| 4189 | |||||||||
| 4190 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 4191 | static AAPotentialValues &createForPosition(const IRPosition &IRP, | ||||||||
| 4192 | Attributor &A); | ||||||||
| 4193 | |||||||||
| 4194 | /// Return assumed constant for the associated value | ||||||||
| 4195 | Optional<ConstantInt *> | ||||||||
| 4196 | getAssumedConstantInt(Attributor &A, | ||||||||
| 4197 | const Instruction *CtxI = nullptr) const { | ||||||||
| 4198 | if (!isValidState()) | ||||||||
| 4199 | return nullptr; | ||||||||
| 4200 | if (getAssumedSet().size() == 1) | ||||||||
| 4201 | return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(), | ||||||||
| 4202 | *(getAssumedSet().begin()))); | ||||||||
| 4203 | if (getAssumedSet().size() == 0) { | ||||||||
| 4204 | if (undefIsContained()) | ||||||||
| 4205 | return cast<ConstantInt>( | ||||||||
| 4206 | ConstantInt::get(getAssociatedValue().getType(), 0)); | ||||||||
| 4207 | return llvm::None; | ||||||||
| 4208 | } | ||||||||
| 4209 | |||||||||
| 4210 | return nullptr; | ||||||||
| 4211 | } | ||||||||
| 4212 | |||||||||
| 4213 | /// See AbstractAttribute::getName() | ||||||||
| 4214 | const std::string getName() const override { return "AAPotentialValues"; } | ||||||||
| 4215 | |||||||||
| 4216 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 4217 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 4218 | |||||||||
| 4219 | /// This function should return true if the type of the \p AA is | ||||||||
| 4220 | /// AAPotentialValues | ||||||||
| 4221 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 4222 | return (AA->getIdAddr() == &ID); | ||||||||
| 4223 | } | ||||||||
| 4224 | |||||||||
| 4225 | /// Unique ID (due to the unique address) | ||||||||
| 4226 | static const char ID; | ||||||||
| 4227 | }; | ||||||||
| 4228 | |||||||||
| 4229 | /// An abstract interface for all noundef attributes. | ||||||||
| 4230 | struct AANoUndef | ||||||||
| 4231 | : public IRAttribute<Attribute::NoUndef, | ||||||||
| 4232 | StateWrapper<BooleanState, AbstractAttribute>> { | ||||||||
| 4233 | AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {} | ||||||||
| 4234 | |||||||||
| 4235 | /// Return true if we assume that the underlying value is noundef. | ||||||||
| 4236 | bool isAssumedNoUndef() const { return getAssumed(); } | ||||||||
| 4237 | |||||||||
| 4238 | /// Return true if we know that underlying value is noundef. | ||||||||
| 4239 | bool isKnownNoUndef() const { return getKnown(); } | ||||||||
| 4240 | |||||||||
| 4241 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 4242 | static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 4243 | |||||||||
| 4244 | /// See AbstractAttribute::getName() | ||||||||
| 4245 | const std::string getName() const override { return "AANoUndef"; } | ||||||||
| 4246 | |||||||||
| 4247 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 4248 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 4249 | |||||||||
| 4250 | /// This function should return true if the type of the \p AA is AANoUndef | ||||||||
| 4251 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 4252 | return (AA->getIdAddr() == &ID); | ||||||||
| 4253 | } | ||||||||
| 4254 | |||||||||
| 4255 | /// Unique ID (due to the unique address) | ||||||||
| 4256 | static const char ID; | ||||||||
| 4257 | }; | ||||||||
| 4258 | |||||||||
| 4259 | struct AACallGraphNode; | ||||||||
| 4260 | struct AACallEdges; | ||||||||
| 4261 | |||||||||
| 4262 | /// An Iterator for call edges, creates AACallEdges attributes in a lazy way. | ||||||||
| 4263 | /// This iterator becomes invalid if the underlying edge list changes. | ||||||||
| 4264 | /// So This shouldn't outlive a iteration of Attributor. | ||||||||
| 4265 | class AACallEdgeIterator | ||||||||
| 4266 | : public iterator_adaptor_base<AACallEdgeIterator, | ||||||||
| 4267 | SetVector<Function *>::iterator> { | ||||||||
| 4268 | AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin) | ||||||||
| 4269 | : iterator_adaptor_base(Begin), A(A) {} | ||||||||
| 4270 | |||||||||
| 4271 | public: | ||||||||
| 4272 | AACallGraphNode *operator*() const; | ||||||||
| 4273 | |||||||||
| 4274 | private: | ||||||||
| 4275 | Attributor &A; | ||||||||
| 4276 | friend AACallEdges; | ||||||||
| 4277 | friend AttributorCallGraph; | ||||||||
| 4278 | }; | ||||||||
| 4279 | |||||||||
| 4280 | struct AACallGraphNode { | ||||||||
| 4281 | AACallGraphNode(Attributor &A) : A(A) {} | ||||||||
| 4282 | virtual ~AACallGraphNode() {} | ||||||||
| 4283 | |||||||||
| 4284 | virtual AACallEdgeIterator optimisticEdgesBegin() const = 0; | ||||||||
| 4285 | virtual AACallEdgeIterator optimisticEdgesEnd() const = 0; | ||||||||
| 4286 | |||||||||
| 4287 | /// Iterator range for exploring the call graph. | ||||||||
| 4288 | iterator_range<AACallEdgeIterator> optimisticEdgesRange() const { | ||||||||
| 4289 | return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(), | ||||||||
| 4290 | optimisticEdgesEnd()); | ||||||||
| 4291 | } | ||||||||
| 4292 | |||||||||
| 4293 | protected: | ||||||||
| 4294 | /// Reference to Attributor needed for GraphTraits implementation. | ||||||||
| 4295 | Attributor &A; | ||||||||
| 4296 | }; | ||||||||
| 4297 | |||||||||
| 4298 | /// An abstract state for querying live call edges. | ||||||||
| 4299 | /// This interface uses the Attributor's optimistic liveness | ||||||||
| 4300 | /// information to compute the edges that are alive. | ||||||||
| 4301 | struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>, | ||||||||
| 4302 | AACallGraphNode { | ||||||||
| 4303 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 4304 | |||||||||
| 4305 | AACallEdges(const IRPosition &IRP, Attributor &A) | ||||||||
| 4306 | : Base(IRP), AACallGraphNode(A) {} | ||||||||
| 4307 | |||||||||
| 4308 | /// Get the optimistic edges. | ||||||||
| 4309 | virtual const SetVector<Function *> &getOptimisticEdges() const = 0; | ||||||||
| 4310 | |||||||||
| 4311 | /// Is there any call with a unknown callee. | ||||||||
| 4312 | virtual bool hasUnknownCallee() const = 0; | ||||||||
| 4313 | |||||||||
| 4314 | /// Is there any call with a unknown callee, excluding any inline asm. | ||||||||
| 4315 | virtual bool hasNonAsmUnknownCallee() const = 0; | ||||||||
| 4316 | |||||||||
| 4317 | /// Iterator for exploring the call graph. | ||||||||
| 4318 | AACallEdgeIterator optimisticEdgesBegin() const override { | ||||||||
| 4319 | return AACallEdgeIterator(A, getOptimisticEdges().begin()); | ||||||||
| 4320 | } | ||||||||
| 4321 | |||||||||
| 4322 | /// Iterator for exploring the call graph. | ||||||||
| 4323 | AACallEdgeIterator optimisticEdgesEnd() const override { | ||||||||
| 4324 | return AACallEdgeIterator(A, getOptimisticEdges().end()); | ||||||||
| 4325 | } | ||||||||
| 4326 | |||||||||
| 4327 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 4328 | static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 4329 | |||||||||
| 4330 | /// See AbstractAttribute::getName() | ||||||||
| 4331 | const std::string getName() const override { return "AACallEdges"; } | ||||||||
| 4332 | |||||||||
| 4333 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 4334 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 4335 | |||||||||
| 4336 | /// This function should return true if the type of the \p AA is AACallEdges. | ||||||||
| 4337 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 4338 | return (AA->getIdAddr() == &ID); | ||||||||
| 4339 | } | ||||||||
| 4340 | |||||||||
| 4341 | /// Unique ID (due to the unique address) | ||||||||
| 4342 | static const char ID; | ||||||||
| 4343 | }; | ||||||||
| 4344 | |||||||||
| 4345 | // Synthetic root node for the Attributor's internal call graph. | ||||||||
| 4346 | struct AttributorCallGraph : public AACallGraphNode { | ||||||||
| 4347 | AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {} | ||||||||
| 4348 | virtual ~AttributorCallGraph() {} | ||||||||
| 4349 | |||||||||
| 4350 | AACallEdgeIterator optimisticEdgesBegin() const override { | ||||||||
| 4351 | return AACallEdgeIterator(A, A.Functions.begin()); | ||||||||
| 4352 | } | ||||||||
| 4353 | |||||||||
| 4354 | AACallEdgeIterator optimisticEdgesEnd() const override { | ||||||||
| 4355 | return AACallEdgeIterator(A, A.Functions.end()); | ||||||||
| 4356 | } | ||||||||
| 4357 | |||||||||
| 4358 | /// Force populate the entire call graph. | ||||||||
| 4359 | void populateAll() const { | ||||||||
| 4360 | for (const AACallGraphNode *AA : optimisticEdgesRange()) { | ||||||||
| 4361 | // Nothing else to do here. | ||||||||
| 4362 | (void)AA; | ||||||||
| 4363 | } | ||||||||
| 4364 | } | ||||||||
| 4365 | |||||||||
| 4366 | void print(); | ||||||||
| 4367 | }; | ||||||||
| 4368 | |||||||||
| 4369 | template <> struct GraphTraits<AACallGraphNode *> { | ||||||||
| 4370 | using NodeRef = AACallGraphNode *; | ||||||||
| 4371 | using ChildIteratorType = AACallEdgeIterator; | ||||||||
| 4372 | |||||||||
| 4373 | static AACallEdgeIterator child_begin(AACallGraphNode *Node) { | ||||||||
| 4374 | return Node->optimisticEdgesBegin(); | ||||||||
| 4375 | } | ||||||||
| 4376 | |||||||||
| 4377 | static AACallEdgeIterator child_end(AACallGraphNode *Node) { | ||||||||
| 4378 | return Node->optimisticEdgesEnd(); | ||||||||
| 4379 | } | ||||||||
| 4380 | }; | ||||||||
| 4381 | |||||||||
| 4382 | template <> | ||||||||
| 4383 | struct GraphTraits<AttributorCallGraph *> | ||||||||
| 4384 | : public GraphTraits<AACallGraphNode *> { | ||||||||
| 4385 | using nodes_iterator = AACallEdgeIterator; | ||||||||
| 4386 | |||||||||
| 4387 | static AACallGraphNode *getEntryNode(AttributorCallGraph *G) { | ||||||||
| 4388 | return static_cast<AACallGraphNode *>(G); | ||||||||
| 4389 | } | ||||||||
| 4390 | |||||||||
| 4391 | static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) { | ||||||||
| 4392 | return G->optimisticEdgesBegin(); | ||||||||
| 4393 | } | ||||||||
| 4394 | |||||||||
| 4395 | static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) { | ||||||||
| 4396 | return G->optimisticEdgesEnd(); | ||||||||
| 4397 | } | ||||||||
| 4398 | }; | ||||||||
| 4399 | |||||||||
| 4400 | template <> | ||||||||
| 4401 | struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits { | ||||||||
| 4402 | DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {} | ||||||||
| 4403 | |||||||||
| 4404 | std::string getNodeLabel(const AACallGraphNode *Node, | ||||||||
| 4405 | const AttributorCallGraph *Graph) { | ||||||||
| 4406 | const AACallEdges *AACE = static_cast<const AACallEdges *>(Node); | ||||||||
| 4407 | return AACE->getAssociatedFunction()->getName().str(); | ||||||||
| 4408 | } | ||||||||
| 4409 | |||||||||
| 4410 | static bool isNodeHidden(const AACallGraphNode *Node, | ||||||||
| 4411 | const AttributorCallGraph *Graph) { | ||||||||
| 4412 | // Hide the synth root. | ||||||||
| 4413 | return static_cast<const AACallGraphNode *>(Graph) == Node; | ||||||||
| 4414 | } | ||||||||
| 4415 | }; | ||||||||
| 4416 | |||||||||
| 4417 | struct AAExecutionDomain | ||||||||
| 4418 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
| 4419 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 4420 | AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 4421 | |||||||||
| 4422 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 4423 | static AAExecutionDomain &createForPosition(const IRPosition &IRP, | ||||||||
| 4424 | Attributor &A); | ||||||||
| 4425 | |||||||||
| 4426 | /// See AbstractAttribute::getName(). | ||||||||
| 4427 | const std::string getName() const override { return "AAExecutionDomain"; } | ||||||||
| 4428 | |||||||||
| 4429 | /// See AbstractAttribute::getIdAddr(). | ||||||||
| 4430 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 4431 | |||||||||
| 4432 | /// Check if an instruction is executed only by the initial thread. | ||||||||
| 4433 | virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0; | ||||||||
| 4434 | |||||||||
| 4435 | /// Check if a basic block is executed only by the initial thread. | ||||||||
| 4436 | virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0; | ||||||||
| 4437 | |||||||||
| 4438 | /// This function should return true if the type of the \p AA is | ||||||||
| 4439 | /// AAExecutionDomain. | ||||||||
| 4440 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 4441 | return (AA->getIdAddr() == &ID); | ||||||||
| 4442 | } | ||||||||
| 4443 | |||||||||
| 4444 | /// Unique ID (due to the unique address) | ||||||||
| 4445 | static const char ID; | ||||||||
| 4446 | }; | ||||||||
| 4447 | |||||||||
| 4448 | /// An abstract Attribute for computing reachability between functions. | ||||||||
| 4449 | struct AAFunctionReachability | ||||||||
| 4450 | : public StateWrapper<BooleanState, AbstractAttribute> { | ||||||||
| 4451 | using Base = StateWrapper<BooleanState, AbstractAttribute>; | ||||||||
| 4452 | |||||||||
| 4453 | AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {} | ||||||||
| 4454 | |||||||||
| 4455 | /// If the function represented by this possition can reach \p Fn. | ||||||||
| 4456 | virtual bool canReach(Attributor &A, Function *Fn) const = 0; | ||||||||
| 4457 | |||||||||
| 4458 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 4459 | static AAFunctionReachability &createForPosition(const IRPosition &IRP, | ||||||||
| 4460 | Attributor &A); | ||||||||
| 4461 | |||||||||
| 4462 | /// See AbstractAttribute::getName() | ||||||||
| 4463 | const std::string getName() const override { return "AAFuncitonReacability"; } | ||||||||
| 4464 | |||||||||
| 4465 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 4466 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 4467 | |||||||||
| 4468 | /// This function should return true if the type of the \p AA is AACallEdges. | ||||||||
| 4469 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 4470 | return (AA->getIdAddr() == &ID); | ||||||||
| 4471 | } | ||||||||
| 4472 | |||||||||
| 4473 | /// Unique ID (due to the unique address) | ||||||||
| 4474 | static const char ID; | ||||||||
| 4475 | |||||||||
| 4476 | private: | ||||||||
| 4477 | /// Can this function reach a call with unknown calee. | ||||||||
| 4478 | virtual bool canReachUnknownCallee() const = 0; | ||||||||
| 4479 | }; | ||||||||
| 4480 | |||||||||
| 4481 | /// An abstract interface for struct information. | ||||||||
| 4482 | struct AAPointerInfo : public AbstractAttribute { | ||||||||
| 4483 | AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {} | ||||||||
| 4484 | |||||||||
| 4485 | enum AccessKind { | ||||||||
| 4486 | AK_READ = 1 << 0, | ||||||||
| 4487 | AK_WRITE = 1 << 1, | ||||||||
| 4488 | AK_READ_WRITE = AK_READ | AK_WRITE, | ||||||||
| 4489 | }; | ||||||||
| 4490 | |||||||||
| 4491 | /// An access description. | ||||||||
| 4492 | struct Access { | ||||||||
| 4493 | Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty) | ||||||||
| 4494 | : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {} | ||||||||
| 4495 | Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content, | ||||||||
| 4496 | AccessKind Kind, Type *Ty) | ||||||||
| 4497 | : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind), | ||||||||
| 4498 | Ty(Ty) {} | ||||||||
| 4499 | Access(const Access &Other) | ||||||||
| 4500 | : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), | ||||||||
| 4501 | Kind(Other.Kind), Ty(Other.Ty) {} | ||||||||
| 4502 | Access(const Access &&Other) | ||||||||
| 4503 | : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content), | ||||||||
| 4504 | Kind(Other.Kind), Ty(Other.Ty) {} | ||||||||
| 4505 | |||||||||
| 4506 | Access &operator=(const Access &Other) { | ||||||||
| 4507 | LocalI = Other.LocalI; | ||||||||
| 4508 | RemoteI = Other.RemoteI; | ||||||||
| 4509 | Content = Other.Content; | ||||||||
| 4510 | Kind = Other.Kind; | ||||||||
| 4511 | Ty = Other.Ty; | ||||||||
| 4512 | return *this; | ||||||||
| 4513 | } | ||||||||
| 4514 | bool operator==(const Access &R) const { | ||||||||
| 4515 | return LocalI == R.LocalI && RemoteI == R.RemoteI && | ||||||||
| 4516 | Content == R.Content && Kind == R.Kind; | ||||||||
| 4517 | } | ||||||||
| 4518 | bool operator!=(const Access &R) const { return !(*this == R); } | ||||||||
| 4519 | |||||||||
| 4520 | Access &operator&=(const Access &R) { | ||||||||
| 4521 | assert(RemoteI == R.RemoteI && "Expected same instruction!")((void)0); | ||||||||
| 4522 | Content = | ||||||||
| 4523 | AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty); | ||||||||
| 4524 | Kind = AccessKind(Kind | R.Kind); | ||||||||
| 4525 | return *this; | ||||||||
| 4526 | } | ||||||||
| 4527 | |||||||||
| 4528 | /// Return the access kind. | ||||||||
| 4529 | AccessKind getKind() const { return Kind; } | ||||||||
| 4530 | |||||||||
| 4531 | /// Return true if this is a read access. | ||||||||
| 4532 | bool isRead() const { return Kind & AK_READ; } | ||||||||
| 4533 | |||||||||
| 4534 | /// Return true if this is a write access. | ||||||||
| 4535 | bool isWrite() const { return Kind & AK_WRITE; } | ||||||||
| 4536 | |||||||||
| 4537 | /// Return the instruction that causes the access with respect to the local | ||||||||
| 4538 | /// scope of the associated attribute. | ||||||||
| 4539 | Instruction *getLocalInst() const { return LocalI; } | ||||||||
| 4540 | |||||||||
| 4541 | /// Return the actual instruction that causes the access. | ||||||||
| 4542 | Instruction *getRemoteInst() const { return RemoteI; } | ||||||||
| 4543 | |||||||||
| 4544 | /// Return true if the value written is not known yet. | ||||||||
| 4545 | bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); } | ||||||||
| 4546 | |||||||||
| 4547 | /// Return true if the value written cannot be determined at all. | ||||||||
| 4548 | bool isWrittenValueUnknown() const { | ||||||||
| 4549 | return Content.hasValue() && !*Content; | ||||||||
| 4550 | } | ||||||||
| 4551 | |||||||||
| 4552 | /// Return the type associated with the access, if known. | ||||||||
| 4553 | Type *getType() const { return Ty; } | ||||||||
| 4554 | |||||||||
| 4555 | /// Return the value writen, if any. As long as | ||||||||
| 4556 | /// isWrittenValueYetUndetermined return true this function shall not be | ||||||||
| 4557 | /// called. | ||||||||
| 4558 | Value *getWrittenValue() const { return *Content; } | ||||||||
| 4559 | |||||||||
| 4560 | /// Return the written value which can be `llvm::null` if it is not yet | ||||||||
| 4561 | /// determined. | ||||||||
| 4562 | Optional<Value *> getContent() const { return Content; } | ||||||||
| 4563 | |||||||||
| 4564 | private: | ||||||||
| 4565 | /// The instruction responsible for the access with respect to the local | ||||||||
| 4566 | /// scope of the associated attribute. | ||||||||
| 4567 | Instruction *LocalI; | ||||||||
| 4568 | |||||||||
| 4569 | /// The instruction responsible for the access. | ||||||||
| 4570 | Instruction *RemoteI; | ||||||||
| 4571 | |||||||||
| 4572 | /// The value written, if any. `llvm::none` means "not known yet", `nullptr` | ||||||||
| 4573 | /// cannot be determined. | ||||||||
| 4574 | Optional<Value *> Content; | ||||||||
| 4575 | |||||||||
| 4576 | /// The access kind, e.g., READ, as bitset (could be more than one). | ||||||||
| 4577 | AccessKind Kind; | ||||||||
| 4578 | |||||||||
| 4579 | /// The type of the content, thus the type read/written, can be null if not | ||||||||
| 4580 | /// available. | ||||||||
| 4581 | Type *Ty; | ||||||||
| 4582 | }; | ||||||||
| 4583 | |||||||||
| 4584 | /// Create an abstract attribute view for the position \p IRP. | ||||||||
| 4585 | static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A); | ||||||||
| 4586 | |||||||||
| 4587 | /// See AbstractAttribute::getName() | ||||||||
| 4588 | const std::string getName() const override { return "AAPointerInfo"; } | ||||||||
| 4589 | |||||||||
| 4590 | /// See AbstractAttribute::getIdAddr() | ||||||||
| 4591 | const char *getIdAddr() const override { return &ID; } | ||||||||
| 4592 | |||||||||
| 4593 | /// Call \p CB on all accesses that might interfere with \p LI and return true | ||||||||
| 4594 | /// if all such accesses were known and the callback returned true for all of | ||||||||
| 4595 | /// them, false otherwise. | ||||||||
| 4596 | virtual bool forallInterferingAccesses( | ||||||||
| 4597 | LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0; | ||||||||
| 4598 | virtual bool forallInterferingAccesses( | ||||||||
| 4599 | StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0; | ||||||||
| 4600 | |||||||||
| 4601 | /// This function should return true if the type of the \p AA is AAPointerInfo | ||||||||
| 4602 | static bool classof(const AbstractAttribute *AA) { | ||||||||
| 4603 | return (AA->getIdAddr() == &ID); | ||||||||
| 4604 | } | ||||||||
| 4605 | |||||||||
| 4606 | /// Unique ID (due to the unique address) | ||||||||
| 4607 | static const char ID; | ||||||||
| 4608 | }; | ||||||||
| 4609 | |||||||||
| 4610 | raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &); | ||||||||
| 4611 | |||||||||
| 4612 | /// Run options, used by the pass manager. | ||||||||
| 4613 | enum AttributorRunOption { | ||||||||
| 4614 | NONE = 0, | ||||||||
| 4615 | MODULE = 1 << 0, | ||||||||
| 4616 | CGSCC = 1 << 1, | ||||||||
| 4617 | ALL = MODULE | CGSCC | ||||||||
| 4618 | }; | ||||||||
| 4619 | |||||||||
| 4620 | } // end namespace llvm | ||||||||
| 4621 | |||||||||
| 4622 | #endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H |
| 1 | //===- Allocator.h - Simple memory allocation abstraction -------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// \file |
| 9 | /// |
| 10 | /// This file defines the BumpPtrAllocator interface. BumpPtrAllocator conforms |
| 11 | /// to the LLVM "Allocator" concept and is similar to MallocAllocator, but |
| 12 | /// objects cannot be deallocated. Their lifetime is tied to the lifetime of the |
| 13 | /// allocator. |
| 14 | /// |
| 15 | //===----------------------------------------------------------------------===// |
| 16 | |
| 17 | #ifndef LLVM_SUPPORT_ALLOCATOR_H |
| 18 | #define LLVM_SUPPORT_ALLOCATOR_H |
| 19 | |
| 20 | #include "llvm/ADT/Optional.h" |
| 21 | #include "llvm/ADT/SmallVector.h" |
| 22 | #include "llvm/Support/Alignment.h" |
| 23 | #include "llvm/Support/AllocatorBase.h" |
| 24 | #include "llvm/Support/Compiler.h" |
| 25 | #include "llvm/Support/ErrorHandling.h" |
| 26 | #include "llvm/Support/MathExtras.h" |
| 27 | #include "llvm/Support/MemAlloc.h" |
| 28 | #include <algorithm> |
| 29 | #include <cassert> |
| 30 | #include <cstddef> |
| 31 | #include <cstdint> |
| 32 | #include <cstdlib> |
| 33 | #include <iterator> |
| 34 | #include <type_traits> |
| 35 | #include <utility> |
| 36 | |
| 37 | namespace llvm { |
| 38 | |
| 39 | namespace detail { |
| 40 | |
| 41 | // We call out to an external function to actually print the message as the |
| 42 | // printing code uses Allocator.h in its implementation. |
| 43 | void printBumpPtrAllocatorStats(unsigned NumSlabs, size_t BytesAllocated, |
| 44 | size_t TotalMemory); |
| 45 | |
| 46 | } // end namespace detail |
| 47 | |
| 48 | /// Allocate memory in an ever growing pool, as if by bump-pointer. |
| 49 | /// |
| 50 | /// This isn't strictly a bump-pointer allocator as it uses backing slabs of |
| 51 | /// memory rather than relying on a boundless contiguous heap. However, it has |
| 52 | /// bump-pointer semantics in that it is a monotonically growing pool of memory |
| 53 | /// where every allocation is found by merely allocating the next N bytes in |
| 54 | /// the slab, or the next N bytes in the next slab. |
| 55 | /// |
| 56 | /// Note that this also has a threshold for forcing allocations above a certain |
| 57 | /// size into their own slab. |
| 58 | /// |
| 59 | /// The BumpPtrAllocatorImpl template defaults to using a MallocAllocator |
| 60 | /// object, which wraps malloc, to allocate memory, but it can be changed to |
| 61 | /// use a custom allocator. |
| 62 | /// |
| 63 | /// The GrowthDelay specifies after how many allocated slabs the allocator |
| 64 | /// increases the size of the slabs. |
| 65 | template <typename AllocatorT = MallocAllocator, size_t SlabSize = 4096, |
| 66 | size_t SizeThreshold = SlabSize, size_t GrowthDelay = 128> |
| 67 | class BumpPtrAllocatorImpl |
| 68 | : public AllocatorBase<BumpPtrAllocatorImpl<AllocatorT, SlabSize, |
| 69 | SizeThreshold, GrowthDelay>>, |
| 70 | private AllocatorT { |
| 71 | public: |
| 72 | static_assert(SizeThreshold <= SlabSize, |
| 73 | "The SizeThreshold must be at most the SlabSize to ensure " |
| 74 | "that objects larger than a slab go into their own memory " |
| 75 | "allocation."); |
| 76 | static_assert(GrowthDelay > 0, |
| 77 | "GrowthDelay must be at least 1 which already increases the" |
| 78 | "slab size after each allocated slab."); |
| 79 | |
| 80 | BumpPtrAllocatorImpl() = default; |
| 81 | |
| 82 | template <typename T> |
| 83 | BumpPtrAllocatorImpl(T &&Allocator) |
| 84 | : AllocatorT(std::forward<T &&>(Allocator)) {} |
| 85 | |
| 86 | // Manually implement a move constructor as we must clear the old allocator's |
| 87 | // slabs as a matter of correctness. |
| 88 | BumpPtrAllocatorImpl(BumpPtrAllocatorImpl &&Old) |
| 89 | : AllocatorT(static_cast<AllocatorT &&>(Old)), CurPtr(Old.CurPtr), |
| 90 | End(Old.End), Slabs(std::move(Old.Slabs)), |
| 91 | CustomSizedSlabs(std::move(Old.CustomSizedSlabs)), |
| 92 | BytesAllocated(Old.BytesAllocated), RedZoneSize(Old.RedZoneSize) { |
| 93 | Old.CurPtr = Old.End = nullptr; |
| 94 | Old.BytesAllocated = 0; |
| 95 | Old.Slabs.clear(); |
| 96 | Old.CustomSizedSlabs.clear(); |
| 97 | } |
| 98 | |
| 99 | ~BumpPtrAllocatorImpl() { |
| 100 | DeallocateSlabs(Slabs.begin(), Slabs.end()); |
| 101 | DeallocateCustomSizedSlabs(); |
| 102 | } |
| 103 | |
| 104 | BumpPtrAllocatorImpl &operator=(BumpPtrAllocatorImpl &&RHS) { |
| 105 | DeallocateSlabs(Slabs.begin(), Slabs.end()); |
| 106 | DeallocateCustomSizedSlabs(); |
| 107 | |
| 108 | CurPtr = RHS.CurPtr; |
| 109 | End = RHS.End; |
| 110 | BytesAllocated = RHS.BytesAllocated; |
| 111 | RedZoneSize = RHS.RedZoneSize; |
| 112 | Slabs = std::move(RHS.Slabs); |
| 113 | CustomSizedSlabs = std::move(RHS.CustomSizedSlabs); |
| 114 | AllocatorT::operator=(static_cast<AllocatorT &&>(RHS)); |
| 115 | |
| 116 | RHS.CurPtr = RHS.End = nullptr; |
| 117 | RHS.BytesAllocated = 0; |
| 118 | RHS.Slabs.clear(); |
| 119 | RHS.CustomSizedSlabs.clear(); |
| 120 | return *this; |
| 121 | } |
| 122 | |
| 123 | /// Deallocate all but the current slab and reset the current pointer |
| 124 | /// to the beginning of it, freeing all memory allocated so far. |
| 125 | void Reset() { |
| 126 | // Deallocate all but the first slab, and deallocate all custom-sized slabs. |
| 127 | DeallocateCustomSizedSlabs(); |
| 128 | CustomSizedSlabs.clear(); |
| 129 | |
| 130 | if (Slabs.empty()) |
| 131 | return; |
| 132 | |
| 133 | // Reset the state. |
| 134 | BytesAllocated = 0; |
| 135 | CurPtr = (char *)Slabs.front(); |
| 136 | End = CurPtr + SlabSize; |
| 137 | |
| 138 | __asan_poison_memory_region(*Slabs.begin(), computeSlabSize(0)); |
| 139 | DeallocateSlabs(std::next(Slabs.begin()), Slabs.end()); |
| 140 | Slabs.erase(std::next(Slabs.begin()), Slabs.end()); |
| 141 | } |
| 142 | |
| 143 | /// Allocate space at the specified alignment. |
| 144 | LLVM_ATTRIBUTE_RETURNS_NONNULL__attribute__((returns_nonnull)) LLVM_ATTRIBUTE_RETURNS_NOALIAS__attribute__((__malloc__)) void * |
| 145 | Allocate(size_t Size, Align Alignment) { |
| 146 | // Keep track of how many bytes we've allocated. |
| 147 | BytesAllocated += Size; |
| 148 | |
| 149 | size_t Adjustment = offsetToAlignedAddr(CurPtr, Alignment); |
| 150 | assert(Adjustment + Size >= Size && "Adjustment + Size must not overflow")((void)0); |
| 151 | |
| 152 | size_t SizeToAllocate = Size; |
| 153 | #if LLVM_ADDRESS_SANITIZER_BUILD0 |
| 154 | // Add trailing bytes as a "red zone" under ASan. |
| 155 | SizeToAllocate += RedZoneSize; |
| 156 | #endif |
| 157 | |
| 158 | // Check if we have enough space. |
| 159 | if (Adjustment + SizeToAllocate <= size_t(End - CurPtr)) { |
| 160 | char *AlignedPtr = CurPtr + Adjustment; |
| 161 | CurPtr = AlignedPtr + SizeToAllocate; |
| 162 | // Update the allocation point of this memory block in MemorySanitizer. |
| 163 | // Without this, MemorySanitizer messages for values originated from here |
| 164 | // will point to the allocation of the entire slab. |
| 165 | __msan_allocated_memory(AlignedPtr, Size); |
| 166 | // Similarly, tell ASan about this space. |
| 167 | __asan_unpoison_memory_region(AlignedPtr, Size); |
| 168 | return AlignedPtr; |
| 169 | } |
| 170 | |
| 171 | // If Size is really big, allocate a separate slab for it. |
| 172 | size_t PaddedSize = SizeToAllocate + Alignment.value() - 1; |
| 173 | if (PaddedSize > SizeThreshold) { |
| 174 | void *NewSlab = |
| 175 | AllocatorT::Allocate(PaddedSize, alignof(std::max_align_t)); |
| 176 | // We own the new slab and don't want anyone reading anyting other than |
| 177 | // pieces returned from this method. So poison the whole slab. |
| 178 | __asan_poison_memory_region(NewSlab, PaddedSize); |
| 179 | CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize)); |
| 180 | |
| 181 | uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment); |
| 182 | assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize)((void)0); |
| 183 | char *AlignedPtr = (char*)AlignedAddr; |
| 184 | __msan_allocated_memory(AlignedPtr, Size); |
| 185 | __asan_unpoison_memory_region(AlignedPtr, Size); |
| 186 | return AlignedPtr; |
| 187 | } |
| 188 | |
| 189 | // Otherwise, start a new slab and try again. |
| 190 | StartNewSlab(); |
| 191 | uintptr_t AlignedAddr = alignAddr(CurPtr, Alignment); |
| 192 | assert(AlignedAddr + SizeToAllocate <= (uintptr_t)End &&((void)0) |
| 193 | "Unable to allocate memory!")((void)0); |
| 194 | char *AlignedPtr = (char*)AlignedAddr; |
| 195 | CurPtr = AlignedPtr + SizeToAllocate; |
| 196 | __msan_allocated_memory(AlignedPtr, Size); |
| 197 | __asan_unpoison_memory_region(AlignedPtr, Size); |
| 198 | return AlignedPtr; |
| 199 | } |
| 200 | |
| 201 | inline LLVM_ATTRIBUTE_RETURNS_NONNULL__attribute__((returns_nonnull)) LLVM_ATTRIBUTE_RETURNS_NOALIAS__attribute__((__malloc__)) void * |
| 202 | Allocate(size_t Size, size_t Alignment) { |
| 203 | assert(Alignment > 0 && "0-byte alignment is not allowed. Use 1 instead.")((void)0); |
| 204 | return Allocate(Size, Align(Alignment)); |
| 205 | } |
| 206 | |
| 207 | // Pull in base class overloads. |
| 208 | using AllocatorBase<BumpPtrAllocatorImpl>::Allocate; |
| 209 | |
| 210 | // Bump pointer allocators are expected to never free their storage; and |
| 211 | // clients expect pointers to remain valid for non-dereferencing uses even |
| 212 | // after deallocation. |
| 213 | void Deallocate(const void *Ptr, size_t Size, size_t /*Alignment*/) { |
| 214 | __asan_poison_memory_region(Ptr, Size); |
| 215 | } |
| 216 | |
| 217 | // Pull in base class overloads. |
| 218 | using AllocatorBase<BumpPtrAllocatorImpl>::Deallocate; |
| 219 | |
| 220 | size_t GetNumSlabs() const { return Slabs.size() + CustomSizedSlabs.size(); } |
| 221 | |
| 222 | /// \return An index uniquely and reproducibly identifying |
| 223 | /// an input pointer \p Ptr in the given allocator. |
| 224 | /// The returned value is negative iff the object is inside a custom-size |
| 225 | /// slab. |
| 226 | /// Returns an empty optional if the pointer is not found in the allocator. |
| 227 | llvm::Optional<int64_t> identifyObject(const void *Ptr) { |
| 228 | const char *P = static_cast<const char *>(Ptr); |
| 229 | int64_t InSlabIdx = 0; |
| 230 | for (size_t Idx = 0, E = Slabs.size(); Idx < E; Idx++) { |
| 231 | const char *S = static_cast<const char *>(Slabs[Idx]); |
| 232 | if (P >= S && P < S + computeSlabSize(Idx)) |
| 233 | return InSlabIdx + static_cast<int64_t>(P - S); |
| 234 | InSlabIdx += static_cast<int64_t>(computeSlabSize(Idx)); |
| 235 | } |
| 236 | |
| 237 | // Use negative index to denote custom sized slabs. |
| 238 | int64_t InCustomSizedSlabIdx = -1; |
| 239 | for (size_t Idx = 0, E = CustomSizedSlabs.size(); Idx < E; Idx++) { |
| 240 | const char *S = static_cast<const char *>(CustomSizedSlabs[Idx].first); |
| 241 | size_t Size = CustomSizedSlabs[Idx].second; |
| 242 | if (P >= S && P < S + Size) |
| 243 | return InCustomSizedSlabIdx - static_cast<int64_t>(P - S); |
| 244 | InCustomSizedSlabIdx -= static_cast<int64_t>(Size); |
| 245 | } |
| 246 | return None; |
| 247 | } |
| 248 | |
| 249 | /// A wrapper around identifyObject that additionally asserts that |
| 250 | /// the object is indeed within the allocator. |
| 251 | /// \return An index uniquely and reproducibly identifying |
| 252 | /// an input pointer \p Ptr in the given allocator. |
| 253 | int64_t identifyKnownObject(const void *Ptr) { |
| 254 | Optional<int64_t> Out = identifyObject(Ptr); |
| 255 | assert(Out && "Wrong allocator used")((void)0); |
| 256 | return *Out; |
| 257 | } |
| 258 | |
| 259 | /// A wrapper around identifyKnownObject. Accepts type information |
| 260 | /// about the object and produces a smaller identifier by relying on |
| 261 | /// the alignment information. Note that sub-classes may have different |
| 262 | /// alignment, so the most base class should be passed as template parameter |
| 263 | /// in order to obtain correct results. For that reason automatic template |
| 264 | /// parameter deduction is disabled. |
| 265 | /// \return An index uniquely and reproducibly identifying |
| 266 | /// an input pointer \p Ptr in the given allocator. This identifier is |
| 267 | /// different from the ones produced by identifyObject and |
| 268 | /// identifyAlignedObject. |
| 269 | template <typename T> |
| 270 | int64_t identifyKnownAlignedObject(const void *Ptr) { |
| 271 | int64_t Out = identifyKnownObject(Ptr); |
| 272 | assert(Out % alignof(T) == 0 && "Wrong alignment information")((void)0); |
| 273 | return Out / alignof(T); |
| 274 | } |
| 275 | |
| 276 | size_t getTotalMemory() const { |
| 277 | size_t TotalMemory = 0; |
| 278 | for (auto I = Slabs.begin(), E = Slabs.end(); I != E; ++I) |
| 279 | TotalMemory += computeSlabSize(std::distance(Slabs.begin(), I)); |
| 280 | for (auto &PtrAndSize : CustomSizedSlabs) |
| 281 | TotalMemory += PtrAndSize.second; |
| 282 | return TotalMemory; |
| 283 | } |
| 284 | |
| 285 | size_t getBytesAllocated() const { return BytesAllocated; } |
| 286 | |
| 287 | void setRedZoneSize(size_t NewSize) { |
| 288 | RedZoneSize = NewSize; |
| 289 | } |
| 290 | |
| 291 | void PrintStats() const { |
| 292 | detail::printBumpPtrAllocatorStats(Slabs.size(), BytesAllocated, |
| 293 | getTotalMemory()); |
| 294 | } |
| 295 | |
| 296 | private: |
| 297 | /// The current pointer into the current slab. |
| 298 | /// |
| 299 | /// This points to the next free byte in the slab. |
| 300 | char *CurPtr = nullptr; |
| 301 | |
| 302 | /// The end of the current slab. |
| 303 | char *End = nullptr; |
| 304 | |
| 305 | /// The slabs allocated so far. |
| 306 | SmallVector<void *, 4> Slabs; |
| 307 | |
| 308 | /// Custom-sized slabs allocated for too-large allocation requests. |
| 309 | SmallVector<std::pair<void *, size_t>, 0> CustomSizedSlabs; |
| 310 | |
| 311 | /// How many bytes we've allocated. |
| 312 | /// |
| 313 | /// Used so that we can compute how much space was wasted. |
| 314 | size_t BytesAllocated = 0; |
| 315 | |
| 316 | /// The number of bytes to put between allocations when running under |
| 317 | /// a sanitizer. |
| 318 | size_t RedZoneSize = 1; |
| 319 | |
| 320 | static size_t computeSlabSize(unsigned SlabIdx) { |
| 321 | // Scale the actual allocated slab size based on the number of slabs |
| 322 | // allocated. Every GrowthDelay slabs allocated, we double |
| 323 | // the allocated size to reduce allocation frequency, but saturate at |
| 324 | // multiplying the slab size by 2^30. |
| 325 | return SlabSize * |
| 326 | ((size_t)1 << std::min<size_t>(30, SlabIdx / GrowthDelay)); |
| 327 | } |
| 328 | |
| 329 | /// Allocate a new slab and move the bump pointers over into the new |
| 330 | /// slab, modifying CurPtr and End. |
| 331 | void StartNewSlab() { |
| 332 | size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); |
| 333 | |
| 334 | void *NewSlab = |
| 335 | AllocatorT::Allocate(AllocatedSlabSize, alignof(std::max_align_t)); |
| 336 | // We own the new slab and don't want anyone reading anything other than |
| 337 | // pieces returned from this method. So poison the whole slab. |
| 338 | __asan_poison_memory_region(NewSlab, AllocatedSlabSize); |
| 339 | |
| 340 | Slabs.push_back(NewSlab); |
| 341 | CurPtr = (char *)(NewSlab); |
| 342 | End = ((char *)NewSlab) + AllocatedSlabSize; |
| 343 | } |
| 344 | |
| 345 | /// Deallocate a sequence of slabs. |
| 346 | void DeallocateSlabs(SmallVectorImpl<void *>::iterator I, |
| 347 | SmallVectorImpl<void *>::iterator E) { |
| 348 | for (; I != E; ++I) { |
| 349 | size_t AllocatedSlabSize = |
| 350 | computeSlabSize(std::distance(Slabs.begin(), I)); |
| 351 | AllocatorT::Deallocate(*I, AllocatedSlabSize, alignof(std::max_align_t)); |
| 352 | } |
| 353 | } |
| 354 | |
| 355 | /// Deallocate all memory for custom sized slabs. |
| 356 | void DeallocateCustomSizedSlabs() { |
| 357 | for (auto &PtrAndSize : CustomSizedSlabs) { |
| 358 | void *Ptr = PtrAndSize.first; |
| 359 | size_t Size = PtrAndSize.second; |
| 360 | AllocatorT::Deallocate(Ptr, Size, alignof(std::max_align_t)); |
| 361 | } |
| 362 | } |
| 363 | |
| 364 | template <typename T> friend class SpecificBumpPtrAllocator; |
| 365 | }; |
| 366 | |
| 367 | /// The standard BumpPtrAllocator which just uses the default template |
| 368 | /// parameters. |
| 369 | typedef BumpPtrAllocatorImpl<> BumpPtrAllocator; |
| 370 | |
| 371 | /// A BumpPtrAllocator that allows only elements of a specific type to be |
| 372 | /// allocated. |
| 373 | /// |
| 374 | /// This allows calling the destructor in DestroyAll() and when the allocator is |
| 375 | /// destroyed. |
| 376 | template <typename T> class SpecificBumpPtrAllocator { |
| 377 | BumpPtrAllocator Allocator; |
| 378 | |
| 379 | public: |
| 380 | SpecificBumpPtrAllocator() { |
| 381 | // Because SpecificBumpPtrAllocator walks the memory to call destructors, |
| 382 | // it can't have red zones between allocations. |
| 383 | Allocator.setRedZoneSize(0); |
| 384 | } |
| 385 | SpecificBumpPtrAllocator(SpecificBumpPtrAllocator &&Old) |
| 386 | : Allocator(std::move(Old.Allocator)) {} |
| 387 | ~SpecificBumpPtrAllocator() { DestroyAll(); } |
| 388 | |
| 389 | SpecificBumpPtrAllocator &operator=(SpecificBumpPtrAllocator &&RHS) { |
| 390 | Allocator = std::move(RHS.Allocator); |
| 391 | return *this; |
| 392 | } |
| 393 | |
| 394 | /// Call the destructor of each allocated object and deallocate all but the |
| 395 | /// current slab and reset the current pointer to the beginning of it, freeing |
| 396 | /// all memory allocated so far. |
| 397 | void DestroyAll() { |
| 398 | auto DestroyElements = [](char *Begin, char *End) { |
| 399 | assert(Begin == (char *)alignAddr(Begin, Align::Of<T>()))((void)0); |
| 400 | for (char *Ptr = Begin; Ptr + sizeof(T) <= End; Ptr += sizeof(T)) |
| 401 | reinterpret_cast<T *>(Ptr)->~T(); |
| 402 | }; |
| 403 | |
| 404 | for (auto I = Allocator.Slabs.begin(), E = Allocator.Slabs.end(); I != E; |
| 405 | ++I) { |
| 406 | size_t AllocatedSlabSize = BumpPtrAllocator::computeSlabSize( |
| 407 | std::distance(Allocator.Slabs.begin(), I)); |
| 408 | char *Begin = (char *)alignAddr(*I, Align::Of<T>()); |
| 409 | char *End = *I == Allocator.Slabs.back() ? Allocator.CurPtr |
| 410 | : (char *)*I + AllocatedSlabSize; |
| 411 | |
| 412 | DestroyElements(Begin, End); |
| 413 | } |
| 414 | |
| 415 | for (auto &PtrAndSize : Allocator.CustomSizedSlabs) { |
| 416 | void *Ptr = PtrAndSize.first; |
| 417 | size_t Size = PtrAndSize.second; |
| 418 | DestroyElements((char *)alignAddr(Ptr, Align::Of<T>()), |
| 419 | (char *)Ptr + Size); |
| 420 | } |
| 421 | |
| 422 | Allocator.Reset(); |
| 423 | } |
| 424 | |
| 425 | /// Allocate space for an array of objects without constructing them. |
| 426 | T *Allocate(size_t num = 1) { return Allocator.Allocate<T>(num); } |
| 427 | }; |
| 428 | |
| 429 | } // end namespace llvm |
| 430 | |
| 431 | template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold, |
| 432 | size_t GrowthDelay> |
| 433 | void * |
| 434 | operator new(size_t Size, |
| 435 | llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize, SizeThreshold, |
| 436 | GrowthDelay> &Allocator) { |
| 437 | return Allocator.Allocate(Size, std::min((size_t)llvm::NextPowerOf2(Size), |
| 438 | alignof(std::max_align_t))); |
| 439 | } |
| 440 | |
| 441 | template <typename AllocatorT, size_t SlabSize, size_t SizeThreshold, |
| 442 | size_t GrowthDelay> |
| 443 | void operator delete(void *, |
| 444 | llvm::BumpPtrAllocatorImpl<AllocatorT, SlabSize, |
| 445 | SizeThreshold, GrowthDelay> &) { |
| 446 | } |
| 447 | |
| 448 | #endif // LLVM_SUPPORT_ALLOCATOR_H |
| 1 | //===-- llvm/Support/Alignment.h - Useful alignment functions ---*- C++ -*-===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This file contains types to represent alignments. | |||
| 10 | // They are instrumented to guarantee some invariants are preserved and prevent | |||
| 11 | // invalid manipulations. | |||
| 12 | // | |||
| 13 | // - Align represents an alignment in bytes, it is always set and always a valid | |||
| 14 | // power of two, its minimum value is 1 which means no alignment requirements. | |||
| 15 | // | |||
| 16 | // - MaybeAlign is an optional type, it may be undefined or set. When it's set | |||
| 17 | // you can get the underlying Align type by using the getValue() method. | |||
| 18 | // | |||
| 19 | //===----------------------------------------------------------------------===// | |||
| 20 | ||||
| 21 | #ifndef LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 22 | #define LLVM_SUPPORT_ALIGNMENT_H_ | |||
| 23 | ||||
| 24 | #include "llvm/ADT/Optional.h" | |||
| 25 | #include "llvm/Support/MathExtras.h" | |||
| 26 | #include <cassert> | |||
| 27 | #ifndef NDEBUG1 | |||
| 28 | #include <string> | |||
| 29 | #endif // NDEBUG | |||
| 30 | ||||
| 31 | namespace llvm { | |||
| 32 | ||||
| 33 | #define ALIGN_CHECK_ISPOSITIVE(decl) \ | |||
| 34 | assert(decl > 0 && (#decl " should be defined"))((void)0) | |||
| 35 | ||||
| 36 | /// This struct is a compact representation of a valid (non-zero power of two) | |||
| 37 | /// alignment. | |||
| 38 | /// It is suitable for use as static global constants. | |||
| 39 | struct Align { | |||
| 40 | private: | |||
| 41 | uint8_t ShiftValue = 0; /// The log2 of the required alignment. | |||
| 42 | /// ShiftValue is less than 64 by construction. | |||
| 43 | ||||
| 44 | friend struct MaybeAlign; | |||
| 45 | friend unsigned Log2(Align); | |||
| 46 | friend bool operator==(Align Lhs, Align Rhs); | |||
| 47 | friend bool operator!=(Align Lhs, Align Rhs); | |||
| 48 | friend bool operator<=(Align Lhs, Align Rhs); | |||
| 49 | friend bool operator>=(Align Lhs, Align Rhs); | |||
| 50 | friend bool operator<(Align Lhs, Align Rhs); | |||
| 51 | friend bool operator>(Align Lhs, Align Rhs); | |||
| 52 | friend unsigned encode(struct MaybeAlign A); | |||
| 53 | friend struct MaybeAlign decodeMaybeAlign(unsigned Value); | |||
| 54 | ||||
| 55 | /// A trivial type to allow construction of constexpr Align. | |||
| 56 | /// This is currently needed to workaround a bug in GCC 5.3 which prevents | |||
| 57 | /// definition of constexpr assign operators. | |||
| 58 | /// https://stackoverflow.com/questions/46756288/explicitly-defaulted-function-cannot-be-declared-as-constexpr-because-the-implic | |||
| 59 | /// FIXME: Remove this, make all assign operators constexpr and introduce user | |||
| 60 | /// defined literals when we don't have to support GCC 5.3 anymore. | |||
| 61 | /// https://llvm.org/docs/GettingStarted.html#getting-a-modern-host-c-toolchain | |||
| 62 | struct LogValue { | |||
| 63 | uint8_t Log; | |||
| 64 | }; | |||
| 65 | ||||
| 66 | public: | |||
| 67 | /// Default is byte-aligned. | |||
| 68 | constexpr Align() = default; | |||
| 69 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 70 | /// checks have been performed when building `Other`. | |||
| 71 | constexpr Align(const Align &Other) = default; | |||
| 72 | constexpr Align(Align &&Other) = default; | |||
| 73 | Align &operator=(const Align &Other) = default; | |||
| 74 | Align &operator=(Align &&Other) = default; | |||
| 75 | ||||
| 76 | explicit Align(uint64_t Value) { | |||
| 77 | assert(Value > 0 && "Value must not be 0")((void)0); | |||
| 78 | assert(llvm::isPowerOf2_64(Value) && "Alignment is not a power of 2")((void)0); | |||
| 79 | ShiftValue = Log2_64(Value); | |||
| 80 | assert(ShiftValue < 64 && "Broken invariant")((void)0); | |||
| 81 | } | |||
| 82 | ||||
| 83 | /// This is a hole in the type system and should not be abused. | |||
| 84 | /// Needed to interact with C for instance. | |||
| 85 | uint64_t value() const { return uint64_t(1) << ShiftValue; } | |||
| ||||
| 86 | ||||
| 87 | /// Allow constructions of constexpr Align. | |||
| 88 | template <size_t kValue> constexpr static LogValue Constant() { | |||
| 89 | return LogValue{static_cast<uint8_t>(CTLog2<kValue>())}; | |||
| 90 | } | |||
| 91 | ||||
| 92 | /// Allow constructions of constexpr Align from types. | |||
| 93 | /// Compile time equivalent to Align(alignof(T)). | |||
| 94 | template <typename T> constexpr static LogValue Of() { | |||
| 95 | return Constant<std::alignment_of<T>::value>(); | |||
| 96 | } | |||
| 97 | ||||
| 98 | /// Constexpr constructor from LogValue type. | |||
| 99 | constexpr Align(LogValue CA) : ShiftValue(CA.Log) {} | |||
| 100 | }; | |||
| 101 | ||||
| 102 | /// Treats the value 0 as a 1, so Align is always at least 1. | |||
| 103 | inline Align assumeAligned(uint64_t Value) { | |||
| 104 | return Value ? Align(Value) : Align(); | |||
| 105 | } | |||
| 106 | ||||
| 107 | /// This struct is a compact representation of a valid (power of two) or | |||
| 108 | /// undefined (0) alignment. | |||
| 109 | struct MaybeAlign : public llvm::Optional<Align> { | |||
| 110 | private: | |||
| 111 | using UP = llvm::Optional<Align>; | |||
| 112 | ||||
| 113 | public: | |||
| 114 | /// Default is undefined. | |||
| 115 | MaybeAlign() = default; | |||
| 116 | /// Do not perform checks in case of copy/move construct/assign, because the | |||
| 117 | /// checks have been performed when building `Other`. | |||
| 118 | MaybeAlign(const MaybeAlign &Other) = default; | |||
| 119 | MaybeAlign &operator=(const MaybeAlign &Other) = default; | |||
| 120 | MaybeAlign(MaybeAlign &&Other) = default; | |||
| 121 | MaybeAlign &operator=(MaybeAlign &&Other) = default; | |||
| 122 | ||||
| 123 | /// Use llvm::Optional<Align> constructor. | |||
| 124 | using UP::UP; | |||
| 125 | ||||
| 126 | explicit MaybeAlign(uint64_t Value) { | |||
| 127 | assert((Value == 0 || llvm::isPowerOf2_64(Value)) &&((void)0) | |||
| 128 | "Alignment is neither 0 nor a power of 2")((void)0); | |||
| 129 | if (Value) | |||
| 130 | emplace(Value); | |||
| 131 | } | |||
| 132 | ||||
| 133 | /// For convenience, returns a valid alignment or 1 if undefined. | |||
| 134 | Align valueOrOne() const { return hasValue() ? getValue() : Align(); } | |||
| 135 | }; | |||
| 136 | ||||
| 137 | /// Checks that SizeInBytes is a multiple of the alignment. | |||
| 138 | inline bool isAligned(Align Lhs, uint64_t SizeInBytes) { | |||
| 139 | return SizeInBytes % Lhs.value() == 0; | |||
| 140 | } | |||
| 141 | ||||
| 142 | /// Checks that Addr is a multiple of the alignment. | |||
| 143 | inline bool isAddrAligned(Align Lhs, const void *Addr) { | |||
| 144 | return isAligned(Lhs, reinterpret_cast<uintptr_t>(Addr)); | |||
| 145 | } | |||
| 146 | ||||
| 147 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 148 | inline uint64_t alignTo(uint64_t Size, Align A) { | |||
| 149 | const uint64_t Value = A.value(); | |||
| 150 | // The following line is equivalent to `(Size + Value - 1) / Value * Value`. | |||
| 151 | ||||
| 152 | // The division followed by a multiplication can be thought of as a right | |||
| 153 | // shift followed by a left shift which zeros out the extra bits produced in | |||
| 154 | // the bump; `~(Value - 1)` is a mask where all those bits being zeroed out | |||
| 155 | // are just zero. | |||
| 156 | ||||
| 157 | // Most compilers can generate this code but the pattern may be missed when | |||
| 158 | // multiple functions gets inlined. | |||
| 159 | return (Size + Value - 1) & ~(Value - 1U); | |||
| 160 | } | |||
| 161 | ||||
| 162 | /// If non-zero \p Skew is specified, the return value will be a minimal integer | |||
| 163 | /// that is greater than or equal to \p Size and equal to \p A * N + \p Skew for | |||
| 164 | /// some integer N. If \p Skew is larger than \p A, its value is adjusted to '\p | |||
| 165 | /// Skew mod \p A'. | |||
| 166 | /// | |||
| 167 | /// Examples: | |||
| 168 | /// \code | |||
| 169 | /// alignTo(5, Align(8), 7) = 7 | |||
| 170 | /// alignTo(17, Align(8), 1) = 17 | |||
| 171 | /// alignTo(~0LL, Align(8), 3) = 3 | |||
| 172 | /// \endcode | |||
| 173 | inline uint64_t alignTo(uint64_t Size, Align A, uint64_t Skew) { | |||
| 174 | const uint64_t Value = A.value(); | |||
| 175 | Skew %= Value; | |||
| 176 | return ((Size + Value - 1 - Skew) & ~(Value - 1U)) + Skew; | |||
| 177 | } | |||
| 178 | ||||
| 179 | /// Returns a multiple of A needed to store `Size` bytes. | |||
| 180 | /// Returns `Size` if current alignment is undefined. | |||
| 181 | inline uint64_t alignTo(uint64_t Size, MaybeAlign A) { | |||
| 182 | return A ? alignTo(Size, A.getValue()) : Size; | |||
| 183 | } | |||
| 184 | ||||
| 185 | /// Aligns `Addr` to `Alignment` bytes, rounding up. | |||
| 186 | inline uintptr_t alignAddr(const void *Addr, Align Alignment) { | |||
| 187 | uintptr_t ArithAddr = reinterpret_cast<uintptr_t>(Addr); | |||
| 188 | assert(static_cast<uintptr_t>(ArithAddr + Alignment.value() - 1) >=((void)0) | |||
| 189 | ArithAddr &&((void)0) | |||
| 190 | "Overflow")((void)0); | |||
| 191 | return alignTo(ArithAddr, Alignment); | |||
| 192 | } | |||
| 193 | ||||
| 194 | /// Returns the offset to the next integer (mod 2**64) that is greater than | |||
| 195 | /// or equal to \p Value and is a multiple of \p Align. | |||
| 196 | inline uint64_t offsetToAlignment(uint64_t Value, Align Alignment) { | |||
| 197 | return alignTo(Value, Alignment) - Value; | |||
| 198 | } | |||
| 199 | ||||
| 200 | /// Returns the necessary adjustment for aligning `Addr` to `Alignment` | |||
| 201 | /// bytes, rounding up. | |||
| 202 | inline uint64_t offsetToAlignedAddr(const void *Addr, Align Alignment) { | |||
| 203 | return offsetToAlignment(reinterpret_cast<uintptr_t>(Addr), Alignment); | |||
| 204 | } | |||
| 205 | ||||
| 206 | /// Returns the log2 of the alignment. | |||
| 207 | inline unsigned Log2(Align A) { return A.ShiftValue; } | |||
| 208 | ||||
| 209 | /// Returns the alignment that satisfies both alignments. | |||
| 210 | /// Same semantic as MinAlign. | |||
| 211 | inline Align commonAlignment(Align A, Align B) { return std::min(A, B); } | |||
| 212 | ||||
| 213 | /// Returns the alignment that satisfies both alignments. | |||
| 214 | /// Same semantic as MinAlign. | |||
| 215 | inline Align commonAlignment(Align A, uint64_t Offset) { | |||
| 216 | return Align(MinAlign(A.value(), Offset)); | |||
| 217 | } | |||
| 218 | ||||
| 219 | /// Returns the alignment that satisfies both alignments. | |||
| 220 | /// Same semantic as MinAlign. | |||
| 221 | inline MaybeAlign commonAlignment(MaybeAlign A, MaybeAlign B) { | |||
| 222 | return A && B ? commonAlignment(*A, *B) : A ? A : B; | |||
| 223 | } | |||
| 224 | ||||
| 225 | /// Returns the alignment that satisfies both alignments. | |||
| 226 | /// Same semantic as MinAlign. | |||
| 227 | inline MaybeAlign commonAlignment(MaybeAlign A, uint64_t Offset) { | |||
| 228 | return MaybeAlign(MinAlign((*A).value(), Offset)); | |||
| 229 | } | |||
| 230 | ||||
| 231 | /// Returns a representation of the alignment that encodes undefined as 0. | |||
| 232 | inline unsigned encode(MaybeAlign A) { return A ? A->ShiftValue + 1 : 0; } | |||
| 233 | ||||
| 234 | /// Dual operation of the encode function above. | |||
| 235 | inline MaybeAlign decodeMaybeAlign(unsigned Value) { | |||
| 236 | if (Value == 0) | |||
| 237 | return MaybeAlign(); | |||
| 238 | Align Out; | |||
| 239 | Out.ShiftValue = Value - 1; | |||
| 240 | return Out; | |||
| 241 | } | |||
| 242 | ||||
| 243 | /// Returns a representation of the alignment, the encoded value is positive by | |||
| 244 | /// definition. | |||
| 245 | inline unsigned encode(Align A) { return encode(MaybeAlign(A)); } | |||
| 246 | ||||
| 247 | /// Comparisons between Align and scalars. Rhs must be positive. | |||
| 248 | inline bool operator==(Align Lhs, uint64_t Rhs) { | |||
| 249 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 250 | return Lhs.value() == Rhs; | |||
| 251 | } | |||
| 252 | inline bool operator!=(Align Lhs, uint64_t Rhs) { | |||
| 253 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 254 | return Lhs.value() != Rhs; | |||
| 255 | } | |||
| 256 | inline bool operator<=(Align Lhs, uint64_t Rhs) { | |||
| 257 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 258 | return Lhs.value() <= Rhs; | |||
| 259 | } | |||
| 260 | inline bool operator>=(Align Lhs, uint64_t Rhs) { | |||
| 261 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 262 | return Lhs.value() >= Rhs; | |||
| 263 | } | |||
| 264 | inline bool operator<(Align Lhs, uint64_t Rhs) { | |||
| 265 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 266 | return Lhs.value() < Rhs; | |||
| 267 | } | |||
| 268 | inline bool operator>(Align Lhs, uint64_t Rhs) { | |||
| 269 | ALIGN_CHECK_ISPOSITIVE(Rhs); | |||
| 270 | return Lhs.value() > Rhs; | |||
| 271 | } | |||
| 272 | ||||
| 273 | /// Comparisons between MaybeAlign and scalars. | |||
| 274 | inline bool operator==(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 275 | return Lhs ? (*Lhs).value() == Rhs : Rhs == 0; | |||
| 276 | } | |||
| 277 | inline bool operator!=(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 278 | return Lhs ? (*Lhs).value() != Rhs : Rhs != 0; | |||
| 279 | } | |||
| 280 | ||||
| 281 | /// Comparisons operators between Align. | |||
| 282 | inline bool operator==(Align Lhs, Align Rhs) { | |||
| 283 | return Lhs.ShiftValue == Rhs.ShiftValue; | |||
| 284 | } | |||
| 285 | inline bool operator!=(Align Lhs, Align Rhs) { | |||
| 286 | return Lhs.ShiftValue != Rhs.ShiftValue; | |||
| 287 | } | |||
| 288 | inline bool operator<=(Align Lhs, Align Rhs) { | |||
| 289 | return Lhs.ShiftValue <= Rhs.ShiftValue; | |||
| 290 | } | |||
| 291 | inline bool operator>=(Align Lhs, Align Rhs) { | |||
| 292 | return Lhs.ShiftValue >= Rhs.ShiftValue; | |||
| 293 | } | |||
| 294 | inline bool operator<(Align Lhs, Align Rhs) { | |||
| 295 | return Lhs.ShiftValue < Rhs.ShiftValue; | |||
| 296 | } | |||
| 297 | inline bool operator>(Align Lhs, Align Rhs) { | |||
| 298 | return Lhs.ShiftValue > Rhs.ShiftValue; | |||
| 299 | } | |||
| 300 | ||||
| 301 | // Don't allow relational comparisons with MaybeAlign. | |||
| 302 | bool operator<=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 303 | bool operator>=(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 304 | bool operator<(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 305 | bool operator>(Align Lhs, MaybeAlign Rhs) = delete; | |||
| 306 | ||||
| 307 | bool operator<=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 308 | bool operator>=(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 309 | bool operator<(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 310 | bool operator>(MaybeAlign Lhs, Align Rhs) = delete; | |||
| 311 | ||||
| 312 | bool operator<=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 313 | bool operator>=(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 314 | bool operator<(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 315 | bool operator>(MaybeAlign Lhs, MaybeAlign Rhs) = delete; | |||
| 316 | ||||
| 317 | inline Align operator*(Align Lhs, uint64_t Rhs) { | |||
| 318 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 319 | return Align(Lhs.value() * Rhs); | |||
| 320 | } | |||
| 321 | ||||
| 322 | inline MaybeAlign operator*(MaybeAlign Lhs, uint64_t Rhs) { | |||
| 323 | assert(Rhs > 0 && "Rhs must be positive")((void)0); | |||
| 324 | return Lhs ? Lhs.getValue() * Rhs : MaybeAlign(); | |||
| 325 | } | |||
| 326 | ||||
| 327 | inline Align operator/(Align Lhs, uint64_t Divisor) { | |||
| 328 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 329 | "Divisor must be positive and a power of 2")((void)0); | |||
| 330 | assert(Lhs != 1 && "Can't halve byte alignment")((void)0); | |||
| 331 | return Align(Lhs.value() / Divisor); | |||
| 332 | } | |||
| 333 | ||||
| 334 | inline MaybeAlign operator/(MaybeAlign Lhs, uint64_t Divisor) { | |||
| 335 | assert(llvm::isPowerOf2_64(Divisor) &&((void)0) | |||
| 336 | "Divisor must be positive and a power of 2")((void)0); | |||
| 337 | return Lhs ? Lhs.getValue() / Divisor : MaybeAlign(); | |||
| 338 | } | |||
| 339 | ||||
| 340 | inline Align max(MaybeAlign Lhs, Align Rhs) { | |||
| 341 | return Lhs && *Lhs > Rhs ? *Lhs : Rhs; | |||
| 342 | } | |||
| 343 | ||||
| 344 | inline Align max(Align Lhs, MaybeAlign Rhs) { | |||
| 345 | return Rhs && *Rhs > Lhs ? *Rhs : Lhs; | |||
| 346 | } | |||
| 347 | ||||
| 348 | #ifndef NDEBUG1 | |||
| 349 | // For usage in LLVM_DEBUG macros. | |||
| 350 | inline std::string DebugStr(const Align &A) { | |||
| 351 | return std::to_string(A.value()); | |||
| 352 | } | |||
| 353 | // For usage in LLVM_DEBUG macros. | |||
| 354 | inline std::string DebugStr(const MaybeAlign &MA) { | |||
| 355 | if (MA) | |||
| 356 | return std::to_string(MA->value()); | |||
| 357 | return "None"; | |||
| 358 | } | |||
| 359 | #endif // NDEBUG | |||
| 360 | ||||
| 361 | #undef ALIGN_CHECK_ISPOSITIVE | |||
| 362 | ||||
| 363 | } // namespace llvm | |||
| 364 | ||||
| 365 | #endif // LLVM_SUPPORT_ALIGNMENT_H_ |