/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

Bug Summary

File:	src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
Warning:	line 3396, column 9 Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name OpenMPOpt.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model static -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -stack-protector 2 -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/IPO/OpenMPOpt.cpp

→

1//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// OpenMP specific optimizations:
10//
11// - Deduplication of runtime calls, e.g., omp_get_thread_num.
12// - Replacing globalized device memory with stack memory.
13// - Replacing globalized device memory with shared memory.
14// - Parallel region merging.
15// - Transforming generic-mode device kernels to SPMD mode.
16// - Specializing the state machine for generic-mode device kernels.
17//
18//===----------------------------------------------------------------------===//

20#include "llvm/Transforms/IPO/OpenMPOpt.h"

22#include "llvm/ADT/EnumeratedArray.h"
23#include "llvm/ADT/PostOrderIterator.h"
24#include "llvm/ADT/Statistic.h"
25#include "llvm/Analysis/CallGraph.h"
26#include "llvm/Analysis/CallGraphSCCPass.h"
27#include "llvm/Analysis/OptimizationRemarkEmitter.h"
28#include "llvm/Analysis/ValueTracking.h"
29#include "llvm/Frontend/OpenMP/OMPConstants.h"
30#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
31#include "llvm/IR/Assumptions.h"
32#include "llvm/IR/DiagnosticInfo.h"
33#include "llvm/IR/GlobalValue.h"
34#include "llvm/IR/Instruction.h"
35#include "llvm/IR/IntrinsicInst.h"
36#include "llvm/InitializePasses.h"
37#include "llvm/Support/CommandLine.h"
38#include "llvm/Transforms/IPO.h"
39#include "llvm/Transforms/IPO/Attributor.h"
40#include "llvm/Transforms/Utils/BasicBlockUtils.h"
41#include "llvm/Transforms/Utils/CallGraphUpdater.h"
42#include "llvm/Transforms/Utils/CodeExtractor.h"

44using namespace llvm;
45using namespace omp;

47#define DEBUG_TYPE"openmp-opt" "openmp-opt"

49static cl::opt<bool> DisableOpenMPOptimizations(
  "openmp-opt-disable", cl::ZeroOrMore,
  cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
  cl::init(false));

54static cl::opt<bool> EnableParallelRegionMerging(
  "openmp-opt-enable-merging", cl::ZeroOrMore,
  cl::desc("Enable the OpenMP region merging optimization."), cl::Hidden,
  cl::init(false));

59static cl::opt<bool>
  DisableInternalization("openmp-opt-disable-internalization", cl::ZeroOrMore,
                         cl::desc("Disable function internalization."),
                         cl::Hidden, cl::init(false));

64static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
                                  cl::Hidden);
66static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
                                      cl::init(false), cl::Hidden);

69static cl::opt<bool> HideMemoryTransferLatency(
  "openmp-hide-memory-transfer-latency",
  cl::desc("[WIP] Tries to hide the latency of host to device memory"
           " transfers"),
  cl::Hidden, cl::init(false));

75STATISTIC(NumOpenMPRuntimeCallsDeduplicated,static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt"
, "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated"
}
        "Number of OpenMP runtime calls deduplicated")static llvm::Statistic NumOpenMPRuntimeCallsDeduplicated = {"openmp-opt"
, "NumOpenMPRuntimeCallsDeduplicated", "Number of OpenMP runtime calls deduplicated"
};
77STATISTIC(NumOpenMPParallelRegionsDeleted,static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt"
, "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted"
}
        "Number of OpenMP parallel regions deleted")static llvm::Statistic NumOpenMPParallelRegionsDeleted = {"openmp-opt"
, "NumOpenMPParallelRegionsDeleted", "Number of OpenMP parallel regions deleted"
};
79STATISTIC(NumOpenMPRuntimeFunctionsIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = {
"openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified"
}
        "Number of OpenMP runtime functions identified")static llvm::Statistic NumOpenMPRuntimeFunctionsIdentified = {
"openmp-opt", "NumOpenMPRuntimeFunctionsIdentified", "Number of OpenMP runtime functions identified"
};
81STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified
 = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified"
}
        "Number of OpenMP runtime function uses identified")static llvm::Statistic NumOpenMPRuntimeFunctionUsesIdentified
 = {"openmp-opt", "NumOpenMPRuntimeFunctionUsesIdentified", "Number of OpenMP runtime function uses identified"
};
83STATISTIC(NumOpenMPTargetRegionKernels,static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt"
, "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified"
}
        "Number of OpenMP target region entry points (=kernels) identified")static llvm::Statistic NumOpenMPTargetRegionKernels = {"openmp-opt"
, "NumOpenMPTargetRegionKernels", "Number of OpenMP target region entry points (=kernels) identified"
};
85STATISTIC(NumOpenMPTargetRegionKernelsSPMD,static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
 "SPMD-mode instead of generic-mode"}
        "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
 "SPMD-mode instead of generic-mode"}
        "SPMD-mode instead of generic-mode")static llvm::Statistic NumOpenMPTargetRegionKernelsSPMD = {"openmp-opt"
, "NumOpenMPTargetRegionKernelsSPMD", "Number of OpenMP target region entry points (=kernels) executed in "
 "SPMD-mode instead of generic-mode"};
88STATISTIC(NumOpenMPTargetRegionKernelsWithoutStateMachine,static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode without a state machines"}
        "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode without a state machines"}
        "generic-mode without a state machines")static llvm::Statistic NumOpenMPTargetRegionKernelsWithoutStateMachine
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsWithoutStateMachine"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode without a state machines"};
91STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode with customized state machines with fallback"}
        "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode with customized state machines with fallback"}
        "generic-mode with customized state machines with fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode with customized state machines with fallback"};
94STATISTIC(NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback,static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode with customized state machines without fallback"
}
        "Number of OpenMP target region entry points (=kernels) executed in "static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode with customized state machines without fallback"
}
        "generic-mode with customized state machines without fallback")static llvm::Statistic NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback
 = {"openmp-opt", "NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback"
, "Number of OpenMP target region entry points (=kernels) executed in "
 "generic-mode with customized state machines without fallback"
};
97STATISTIC(static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
 = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
  NumOpenMPParallelRegionsReplacedInGPUStateMachine,static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
 = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
}
  "Number of OpenMP parallel regions replaced with ID in GPU state machines")static llvm::Statistic NumOpenMPParallelRegionsReplacedInGPUStateMachine
 = {"openmp-opt", "NumOpenMPParallelRegionsReplacedInGPUStateMachine"
, "Number of OpenMP parallel regions replaced with ID in GPU state machines"
};
100STATISTIC(NumOpenMPParallelRegionsMerged,static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt"
, "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged"
}
        "Number of OpenMP parallel regions merged")static llvm::Statistic NumOpenMPParallelRegionsMerged = {"openmp-opt"
, "NumOpenMPParallelRegionsMerged", "Number of OpenMP parallel regions merged"
};
102STATISTIC(NumBytesMovedToSharedMemory,static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt"
, "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory"
}
        "Amount of memory pushed to shared memory")static llvm::Statistic NumBytesMovedToSharedMemory = {"openmp-opt"
, "NumBytesMovedToSharedMemory", "Amount of memory pushed to shared memory"
};

105#if !defined(NDEBUG1)
106static constexpr auto TAG = "[" DEBUG_TYPE"openmp-opt" "]";
107#endif

109namespace {

111enum class AddressSpace : unsigned {
Generic = 0,
Global = 1,
Shared = 3,
Constant = 4,
Local = 5,
117};

119struct AAHeapToShared;

121struct AAICVTracker;

123/// OpenMP specific information. For now, stores RFIs and ICVs also needed for
124/// Attributor runs.
125struct OMPInformationCache : public InformationCache {
OMPInformationCache(Module &M, AnalysisGetter &AG,
                    BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
                    SmallPtrSetImpl<Kernel> &Kernels)
    : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
      Kernels(Kernels) {

  OMPBuilder.initialize();
  initializeRuntimeFunctions();
  initializeInternalControlVars();
}

/// Generic information that describes an internal control variable.
struct InternalControlVarInfo {
  /// The kind, as described by InternalControlVar enum.
  InternalControlVar Kind;

  /// The name of the ICV.
  StringRef Name;

  /// Environment variable associated with this ICV.
  StringRef EnvVarName;

  /// Initial value kind.
  ICVInitValue InitKind;

  /// Initial value.
  ConstantInt *InitValue;

  /// Setter RTL function associated with this ICV.
  RuntimeFunction Setter;

  /// Getter RTL function associated with this ICV.
  RuntimeFunction Getter;

  /// RTL Function corresponding to the override clause of this ICV
  RuntimeFunction Clause;
};

/// Generic information that describes a runtime function
struct RuntimeFunctionInfo {

  /// The kind, as described by the RuntimeFunction enum.
  RuntimeFunction Kind;

  /// The name of the function.
  StringRef Name;

  /// Flag to indicate a variadic function.
  bool IsVarArg;

  /// The return type of the function.
  Type *ReturnType;

  /// The argument types of the function.
  SmallVector<Type *, 8> ArgumentTypes;

  /// The declaration if available.
  Function *Declaration = nullptr;

  /// Uses of this runtime function per function containing the use.
  using UseVector = SmallVector<Use *, 16>;

  /// Clear UsesMap for runtime function.
  void clearUsesMap() { UsesMap.clear(); }

  /// Boolean conversion that is true if the runtime function was found.
  operator bool() const { return Declaration; }

  /// Return the vector of uses in function \p F.
  UseVector &getOrCreateUseVector(Function *F) {
    std::shared_ptr<UseVector> &UV = UsesMap[F];
    if (!UV)
      UV = std::make_shared<UseVector>();
    return *UV;
  }

  /// Return the vector of uses in function \p F or `nullptr` if there are
  /// none.
  const UseVector *getUseVector(Function &F) const {
    auto I = UsesMap.find(&F);
    if (I != UsesMap.end())
      return I->second.get();
    return nullptr;
  }

  /// Return how many functions contain uses of this runtime function.
  size_t getNumFunctionsWithUses() const { return UsesMap.size(); }

  /// Return the number of arguments (or the minimal number for variadic
  /// functions).
  size_t getNumArgs() const { return ArgumentTypes.size(); }

  /// Run the callback \p CB on each use and forget the use if the result is
  /// true. The callback will be fed the function in which the use was
  /// encountered as second argument.
  void foreachUse(SmallVectorImpl<Function *> &SCC,
                  function_ref<bool(Use &, Function &)> CB) {
    for (Function *F : SCC)
      foreachUse(CB, F);
  }

  /// Run the callback \p CB on each use within the function \p F and forget
  /// the use if the result is true.
  void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
    SmallVector<unsigned, 8> ToBeDeleted;
    ToBeDeleted.clear();

    unsigned Idx = 0;
    UseVector &UV = getOrCreateUseVector(F);

    for (Use *U : UV) {
      if (CB(*U, *F))
        ToBeDeleted.push_back(Idx);
      ++Idx;
    }

    // Remove the to-be-deleted indices in reverse order as prior
    // modifications will not modify the smaller indices.
    while (!ToBeDeleted.empty()) {
      unsigned Idx = ToBeDeleted.pop_back_val();
      UV[Idx] = UV.back();
      UV.pop_back();
    }
  }

private:
  /// Map from functions to all uses of this runtime function contained in
  /// them.
  DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;

public:
  /// Iterators for the uses of this runtime function.
  decltype(UsesMap)::iterator begin() { return UsesMap.begin(); }
  decltype(UsesMap)::iterator end() { return UsesMap.end(); }
};

/// An OpenMP-IR-Builder instance
OpenMPIRBuilder OMPBuilder;

/// Map from runtime function kind to the runtime function description.
EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
                RuntimeFunction::OMPRTL___last>
    RFIs;

/// Map from function declarations/definitions to their runtime enum type.
DenseMap<Function *, RuntimeFunction> RuntimeFunctionIDMap;

/// Map from ICV kind to the ICV description.
EnumeratedArray<InternalControlVarInfo, InternalControlVar,
                InternalControlVar::ICV___last>
    ICVs;

/// Helper to initialize all internal control variable information for those
/// defined in OMPKinds.def.
void initializeInternalControlVars() {
281#define ICV_RT_SET(_Name, RTL)                                                 \
{                                                                            \
  auto &ICV = ICVs[_Name];                                                   \
  ICV.Setter = RTL;                                                          \
}
286#define ICV_RT_GET(Name, RTL)                                                  \
{                                                                            \
  auto &ICV = ICVs[Name];                                                    \
  ICV.Getter = RTL;                                                          \
}
291#define ICV_DATA_ENV(Enum, _Name, _EnvVarName, Init)                           \
{                                                                            \
  auto &ICV = ICVs[Enum];                                                    \
  ICV.Name = _Name;                                                          \
  ICV.Kind = Enum;                                                           \
  ICV.InitKind = Init;                                                       \
  ICV.EnvVarName = _EnvVarName;                                              \
  switch (ICV.InitKind) {                                                    \
  case ICV_IMPLEMENTATION_DEFINED:                                           \
    ICV.InitValue = nullptr;                                                 \
    break;                                                                   \
  case ICV_ZERO:                                                             \
    ICV.InitValue = ConstantInt::get(                                        \
        Type::getInt32Ty(OMPBuilder.Int32->getContext()), 0);                \
    break;                                                                   \
  case ICV_FALSE:                                                            \
    ICV.InitValue = ConstantInt::getFalse(OMPBuilder.Int1->getContext());    \
    break;                                                                   \
  case ICV_LAST:                                                             \
    break;                                                                   \
  }                                                                          \
}
313#include "llvm/Frontend/OpenMP/OMPKinds.def"
}

/// Returns true if the function declaration \p F matches the runtime
/// function types, that is, return type \p RTFRetType, and argument types
/// \p RTFArgTypes.
static bool declMatchesRTFTypes(Function *F, Type *RTFRetType,
                                SmallVector<Type *, 8> &RTFArgTypes) {
  // TODO: We should output information to the user (under debug output
  //       and via remarks).

  if (!F)
    return false;
  if (F->getReturnType() != RTFRetType)
    return false;
  if (F->arg_size() != RTFArgTypes.size())
    return false;

  auto RTFTyIt = RTFArgTypes.begin();
  for (Argument &Arg : F->args()) {
    if (Arg.getType() != *RTFTyIt)
      return false;

    ++RTFTyIt;
  }

  return true;
}

// Helper to collect all uses of the declaration in the UsesMap.
unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
  unsigned NumUses = 0;
  if (!RFI.Declaration)
    return NumUses;
  OMPBuilder.addAttributes(RFI.Kind, *RFI.Declaration);

  if (CollectStats) {
    NumOpenMPRuntimeFunctionsIdentified += 1;
    NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
  }

  // TODO: We directly convert uses into proper calls and unknown uses.
  for (Use &U : RFI.Declaration->uses()) {
    if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
      if (ModuleSlice.count(UserI->getFunction())) {
        RFI.getOrCreateUseVector(UserI->getFunction()).push_back(&U);
        ++NumUses;
      }
    } else {
      RFI.getOrCreateUseVector(nullptr).push_back(&U);
      ++NumUses;
    }
  }
  return NumUses;
}

// Helper function to recollect uses of a runtime function.
void recollectUsesForFunction(RuntimeFunction RTF) {
  auto &RFI = RFIs[RTF];
  RFI.clearUsesMap();
  collectUses(RFI, /*CollectStats*/ false);
}

// Helper function to recollect uses of all runtime functions.
void recollectUses() {
  for (int Idx = 0; Idx < RFIs.size(); ++Idx)
    recollectUsesForFunction(static_cast<RuntimeFunction>(Idx));
}

/// Helper to initialize all runtime function information for those defined
/// in OpenMPKinds.def.
void initializeRuntimeFunctions() {
  Module &M = *((*ModuleSlice.begin())->getParent());

  // Helper macros for handling __VA_ARGS__ in OMP_RTL
388#define OMP_TYPE(VarName, ...)                                                 \
Type *VarName = OMPBuilder.VarName;                                          \
(void)VarName;

392#define OMP_ARRAY_TYPE(VarName, ...)                                           \
ArrayType *VarName##Ty = OMPBuilder.VarName##Ty;                             \
(void)VarName##Ty;                                                           \
PointerType *VarName##PtrTy = OMPBuilder.VarName##PtrTy;                     \
(void)VarName##PtrTy;

398#define OMP_FUNCTION_TYPE(VarName, ...)                                        \
FunctionType *VarName = OMPBuilder.VarName;                                  \
(void)VarName;                                                               \
PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
(void)VarName##Ptr;

404#define OMP_STRUCT_TYPE(VarName, ...)                                          \
StructType *VarName = OMPBuilder.VarName;                                    \
(void)VarName;                                                               \
PointerType *VarName##Ptr = OMPBuilder.VarName##Ptr;                         \
(void)VarName##Ptr;

410#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...)                     \
{                                                                            \
  SmallVector<Type *, 8> ArgsTypes({__VA_ARGS__});                           \
  Function *F = M.getFunction(_Name);                                        \
  RTLFunctions.insert(F);                                                    \
  if (declMatchesRTFTypes(F, OMPBuilder._ReturnType, ArgsTypes)) {           \
    RuntimeFunctionIDMap[F] = _Enum;                                         \
    F->removeFnAttr(Attribute::NoInline);                                    \
    auto &RFI = RFIs[_Enum];                                                 \
    RFI.Kind = _Enum;                                                        \
    RFI.Name = _Name;                                                        \
    RFI.IsVarArg = _IsVarArg;                                                \
    RFI.ReturnType = OMPBuilder._ReturnType;                                 \
    RFI.ArgumentTypes = std::move(ArgsTypes);                                \
    RFI.Declaration = F;                                                     \
    unsigned NumUses = collectUses(RFI);                                     \
    (void)NumUses;                                                           \
    LLVM_DEBUG({                                                             \do { } while (false)
      dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not")           \do { } while (false)
             << " found\n";                                                  \do { } while (false)
      if (RFI.Declaration)                                                   \do { } while (false)
        dbgs() << TAG << "-> got " << NumUses << " uses in "                 \do { } while (false)
               << RFI.getNumFunctionsWithUses()                              \do { } while (false)
               << " different functions.\n";                                 \do { } while (false)
    })do { } while (false);                                                                      \
  }                                                                          \
}
437#include "llvm/Frontend/OpenMP/OMPKinds.def"

  // TODO: We should attach the attributes defined in OMPKinds.def.
}

/// Collection of known kernels (\see Kernel) in the module.
SmallPtrSetImpl<Kernel> &Kernels;

/// Collection of known OpenMP runtime functions..
DenseSet<const Function *> RTLFunctions;
447};

449template <typename Ty, bool InsertInvalidates = true>
450struct BooleanStateWithSetVector : public BooleanState {
bool contains(const Ty &Elem) const { return Set.contains(Elem); }
bool insert(const Ty &Elem) {
  if (InsertInvalidates)
    BooleanState::indicatePessimisticFixpoint();
  return Set.insert(Elem);
}

const Ty &operator[](int Idx) const { return Set[Idx]; }
bool operator==(const BooleanStateWithSetVector &RHS) const {
  return BooleanState::operator==(RHS) && Set == RHS.Set;
}
bool operator!=(const BooleanStateWithSetVector &RHS) const {
  return !(*this == RHS);
}

bool empty() const { return Set.empty(); }
size_t size() const { return Set.size(); }

/// "Clamp" this state with \p RHS.
BooleanStateWithSetVector &operator^=(const BooleanStateWithSetVector &RHS) {
  BooleanState::operator^=(RHS);
  Set.insert(RHS.Set.begin(), RHS.Set.end());
  return *this;
}

476private:
/// A set to keep track of elements.
SetVector<Ty> Set;

480public:
typename decltype(Set)::iterator begin() { return Set.begin(); }
typename decltype(Set)::iterator end() { return Set.end(); }
typename decltype(Set)::const_iterator begin() const { return Set.begin(); }
typename decltype(Set)::const_iterator end() const { return Set.end(); }
485};

487template <typename Ty, bool InsertInvalidates = true>
488using BooleanStateWithPtrSetVector =
  BooleanStateWithSetVector<Ty *, InsertInvalidates>;

491struct KernelInfoState : AbstractState {
/// Flag to track if we reached a fixpoint.
bool IsAtFixpoint = false;

/// The parallel regions (identified by the outlined parallel functions) that
/// can be reached from the associated function.
BooleanStateWithPtrSetVector<Function, /* InsertInvalidates */ false>
    ReachedKnownParallelRegions;

/// State to track what parallel region we might reach.
BooleanStateWithPtrSetVector<CallBase> ReachedUnknownParallelRegions;

/// State to track if we are in SPMD-mode, assumed or know, and why we decided
/// we cannot be. If it is assumed, then RequiresFullRuntime should also be
/// false.
BooleanStateWithPtrSetVector<Instruction> SPMDCompatibilityTracker;

/// The __kmpc_target_init call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelInitCB = nullptr;

/// The __kmpc_target_deinit call in this kernel, if any. If we find more than
/// one we abort as the kernel is malformed.
CallBase *KernelDeinitCB = nullptr;

/// Flag to indicate if the associated function is a kernel entry.
bool IsKernelEntry = false;

/// State to track what kernel entries can reach the associated function.
BooleanStateWithPtrSetVector<Function, false> ReachingKernelEntries;

/// State to indicate if we can track parallel level of the associated
/// function. We will give up tracking if we encounter unknown caller or the
/// caller is __kmpc_parallel_51.
BooleanStateWithSetVector<uint8_t> ParallelLevels;

/// Abstract State interface
///{

KernelInfoState() {}
KernelInfoState(bool BestState) {
  if (!BestState)
    indicatePessimisticFixpoint();
}

/// See AbstractState::isValidState(...)
bool isValidState() const override { return true; }

/// See AbstractState::isAtFixpoint(...)
bool isAtFixpoint() const override { return IsAtFixpoint; }

/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
  IsAtFixpoint = true;
  SPMDCompatibilityTracker.indicatePessimisticFixpoint();
  ReachedUnknownParallelRegions.indicatePessimisticFixpoint();
  return ChangeStatus::CHANGED;
}

/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
  IsAtFixpoint = true;
  return ChangeStatus::UNCHANGED;
}

/// Return the assumed state
KernelInfoState &getAssumed() { return *this; }
const KernelInfoState &getAssumed() const { return *this; }

bool operator==(const KernelInfoState &RHS) const {
  if (SPMDCompatibilityTracker != RHS.SPMDCompatibilityTracker)
    return false;
  if (ReachedKnownParallelRegions != RHS.ReachedKnownParallelRegions)
    return false;
  if (ReachedUnknownParallelRegions != RHS.ReachedUnknownParallelRegions)
    return false;
  if (ReachingKernelEntries != RHS.ReachingKernelEntries)
    return false;
  return true;
}

/// Return empty set as the best state of potential values.
static KernelInfoState getBestState() { return KernelInfoState(true); }

static KernelInfoState getBestState(KernelInfoState &KIS) {
  return getBestState();
}

/// Return full set as the worst state of potential values.
static KernelInfoState getWorstState() { return KernelInfoState(false); }

/// "Clamp" this state with \p KIS.
KernelInfoState operator^=(const KernelInfoState &KIS) {
  // Do not merge two different _init and _deinit call sites.
  if (KIS.KernelInitCB) {
    if (KernelInitCB && KernelInitCB != KIS.KernelInitCB)
      indicatePessimisticFixpoint();
    KernelInitCB = KIS.KernelInitCB;
  }
  if (KIS.KernelDeinitCB) {
    if (KernelDeinitCB && KernelDeinitCB != KIS.KernelDeinitCB)
      indicatePessimisticFixpoint();
    KernelDeinitCB = KIS.KernelDeinitCB;
  }
  SPMDCompatibilityTracker ^= KIS.SPMDCompatibilityTracker;
  ReachedKnownParallelRegions ^= KIS.ReachedKnownParallelRegions;
  ReachedUnknownParallelRegions ^= KIS.ReachedUnknownParallelRegions;
  return *this;
}

KernelInfoState operator&=(const KernelInfoState &KIS) {
  return (*this ^= KIS);
}

///}
606};

608/// Used to map the values physically (in the IR) stored in an offload
609/// array, to a vector in memory.
610struct OffloadArray {
/// Physical array (in the IR).
AllocaInst *Array = nullptr;
/// Mapped values.
SmallVector<Value *, 8> StoredValues;
/// Last stores made in the offload array.
SmallVector<StoreInst *, 8> LastAccesses;

OffloadArray() = default;

/// Initializes the OffloadArray with the values stored in \p Array before
/// instruction \p Before is reached. Returns false if the initialization
/// fails.
/// This MUST be used immediately after the construction of the object.
bool initialize(AllocaInst &Array, Instruction &Before) {
  if (!Array.getAllocatedType()->isArrayTy())
    return false;

  if (!getValues(Array, Before))
    return false;

  this->Array = &Array;
  return true;
}

static const unsigned DeviceIDArgNum = 1;
static const unsigned BasePtrsArgNum = 3;
static const unsigned PtrsArgNum = 4;
static const unsigned SizesArgNum = 5;

640private:
/// Traverses the BasicBlock where \p Array is, collecting the stores made to
/// \p Array, leaving StoredValues with the values stored before the
/// instruction \p Before is reached.
bool getValues(AllocaInst &Array, Instruction &Before) {
  // Initialize container.
  const uint64_t NumValues = Array.getAllocatedType()->getArrayNumElements();
  StoredValues.assign(NumValues, nullptr);
  LastAccesses.assign(NumValues, nullptr);

  // TODO: This assumes the instruction \p Before is in the same
  //  BasicBlock as Array. Make it general, for any control flow graph.
  BasicBlock *BB = Array.getParent();
  if (BB != Before.getParent())
    return false;

  const DataLayout &DL = Array.getModule()->getDataLayout();
  const unsigned int PointerSize = DL.getPointerSize();

  for (Instruction &I : *BB) {
    if (&I == &Before)
      break;

    if (!isa<StoreInst>(&I))
      continue;

    auto *S = cast<StoreInst>(&I);
    int64_t Offset = -1;
    auto *Dst =
        GetPointerBaseWithConstantOffset(S->getPointerOperand(), Offset, DL);
    if (Dst == &Array) {
      int64_t Idx = Offset / PointerSize;
      StoredValues[Idx] = getUnderlyingObject(S->getValueOperand());
      LastAccesses[Idx] = S;
    }
  }

  return isFilled();
}

/// Returns true if all values in StoredValues and
/// LastAccesses are not nullptrs.
bool isFilled() {
  const unsigned NumValues = StoredValues.size();
  for (unsigned I = 0; I < NumValues; ++I) {
    if (!StoredValues[I] || !LastAccesses[I])
      return false;
  }

  return true;
}
691};

693struct OpenMPOpt {

using OptimizationRemarkGetter =
    function_ref<OptimizationRemarkEmitter &(Function *)>;

OpenMPOpt(SmallVectorImpl<Function *> &SCC, CallGraphUpdater &CGUpdater,
          OptimizationRemarkGetter OREGetter,
          OMPInformationCache &OMPInfoCache, Attributor &A)
    : M(*(*SCC.begin())->getParent()), SCC(SCC), CGUpdater(CGUpdater),
      OREGetter(OREGetter), OMPInfoCache(OMPInfoCache), A(A) {}

/// Check if any remarks are enabled for openmp-opt
bool remarksEnabled() {
  auto &Ctx = M.getContext();
  return Ctx.getDiagHandlerPtr()->isAnyRemarkEnabled(DEBUG_TYPE"openmp-opt");
}

/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
bool run(bool IsModulePass) {
  if (SCC.empty())
    return false;

  bool Changed = false;

  LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()do { } while (false)
                    << " functions in a slice with "do { } while (false)
                    << OMPInfoCache.ModuleSlice.size() << " functions\n")do { } while (false);

  if (IsModulePass) {
    Changed |= runAttributor(IsModulePass);

    // Recollect uses, in case Attributor deleted any.
    OMPInfoCache.recollectUses();

    // TODO: This should be folded into buildCustomStateMachine.
    Changed |= rewriteDeviceCodeStateMachine();

    if (remarksEnabled())
      analysisGlobalization();
  } else {
    if (PrintICVValues)
      printICVs();
    if (PrintOpenMPKernels)
      printKernels();

    Changed |= runAttributor(IsModulePass);

    // Recollect uses, in case Attributor deleted any.
    OMPInfoCache.recollectUses();

    Changed |= deleteParallelRegions();

    if (HideMemoryTransferLatency)
      Changed |= hideMemTransfersLatency();
    Changed |= deduplicateRuntimeCalls();
    if (EnableParallelRegionMerging) {
      if (mergeParallelRegions()) {
        deduplicateRuntimeCalls();
        Changed = true;
      }
    }
  }

  return Changed;
}

/// Print initial ICV values for testing.
/// FIXME: This should be done from the Attributor once it is added.
void printICVs() const {
  InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel,
                               ICV_proc_bind};

  for (Function *F : OMPInfoCache.ModuleSlice) {
    for (auto ICV : ICVs) {
      auto ICVInfo = OMPInfoCache.ICVs[ICV];
      auto Remark = [&](OptimizationRemarkAnalysis ORA) {
        return ORA << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
                   << " Value: "
                   << (ICVInfo.InitValue
                           ? toString(ICVInfo.InitValue->getValue(), 10, true)
                           : "IMPLEMENTATION_DEFINED");
      };

      emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPICVTracker", Remark);
    }
  }
}

/// Print OpenMP GPU kernels for testing.
void printKernels() const {
  for (Function *F : SCC) {
    if (!OMPInfoCache.Kernels.count(F))
      continue;

    auto Remark = [&](OptimizationRemarkAnalysis ORA) {
      return ORA << "OpenMP GPU kernel "
                 << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
    };

    emitRemark<OptimizationRemarkAnalysis>(F, "OpenMPGPU", Remark);
  }
}

/// Return the call if \p U is a callee use in a regular call. If \p RFI is
/// given it has to be the callee or a nullptr is returned.
static CallInst *getCallIfRegularCall(
    Use &U, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
  CallInst *CI = dyn_cast<CallInst>(U.getUser());
  if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
      (!RFI ||
       (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
    return CI;
  return nullptr;
}

/// Return the call if \p V is a regular call. If \p RFI is given it has to be
/// the callee or a nullptr is returned.
static CallInst *getCallIfRegularCall(
    Value &V, OMPInformationCache::RuntimeFunctionInfo *RFI = nullptr) {
  CallInst *CI = dyn_cast<CallInst>(&V);
  if (CI && !CI->hasOperandBundles() &&
      (!RFI ||
       (RFI->Declaration && CI->getCalledFunction() == RFI->Declaration)))
    return CI;
  return nullptr;
}

820private:
/// Merge parallel regions when it is safe.
bool mergeParallelRegions() {
  const unsigned CallbackCalleeOperand = 2;
  const unsigned CallbackFirstArgOperand = 3;
  using InsertPointTy = OpenMPIRBuilder::InsertPointTy;

  // Check if there are any __kmpc_fork_call calls to merge.
  OMPInformationCache::RuntimeFunctionInfo &RFI =
      OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];

  if (!RFI.Declaration)
    return false;

  // Unmergable calls that prevent merging a parallel region.
  OMPInformationCache::RuntimeFunctionInfo UnmergableCallsInfo[] = {
      OMPInfoCache.RFIs[OMPRTL___kmpc_push_proc_bind],
      OMPInfoCache.RFIs[OMPRTL___kmpc_push_num_threads],
  };

  bool Changed = false;
  LoopInfo *LI = nullptr;
  DominatorTree *DT = nullptr;

  SmallDenseMap<BasicBlock *, SmallPtrSet<Instruction *, 4>> BB2PRMap;

  BasicBlock *StartBB = nullptr, *EndBB = nullptr;
  auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
                       BasicBlock &ContinuationIP) {
    BasicBlock *CGStartBB = CodeGenIP.getBlock();
    BasicBlock *CGEndBB =
        SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
    assert(StartBB != nullptr && "StartBB should not be null")((void)0);
    CGStartBB->getTerminator()->setSuccessor(0, StartBB);
    assert(EndBB != nullptr && "EndBB should not be null")((void)0);
    EndBB->getTerminator()->setSuccessor(0, CGEndBB);
  };

  auto PrivCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &,
                    Value &Inner, Value *&ReplacementValue) -> InsertPointTy {
    ReplacementValue = &Inner;
    return CodeGenIP;
  };

  auto FiniCB = [&](InsertPointTy CodeGenIP) {};

  /// Create a sequential execution region within a merged parallel region,
  /// encapsulated in a master construct with a barrier for synchronization.
  auto CreateSequentialRegion = [&](Function *OuterFn,
                                    BasicBlock *OuterPredBB,
                                    Instruction *SeqStartI,
                                    Instruction *SeqEndI) {
    // Isolate the instructions of the sequential region to a separate
    // block.
    BasicBlock *ParentBB = SeqStartI->getParent();
    BasicBlock *SeqEndBB =
        SplitBlock(ParentBB, SeqEndI->getNextNode(), DT, LI);
    BasicBlock *SeqAfterBB =
        SplitBlock(SeqEndBB, &*SeqEndBB->getFirstInsertionPt(), DT, LI);
    BasicBlock *SeqStartBB =
        SplitBlock(ParentBB, SeqStartI, DT, LI, nullptr, "seq.par.merged");

    assert(ParentBB->getUniqueSuccessor() == SeqStartBB &&((void)0)
           "Expected a different CFG")((void)0);
    const DebugLoc DL = ParentBB->getTerminator()->getDebugLoc();
    ParentBB->getTerminator()->eraseFromParent();

    auto BodyGenCB = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP,
                         BasicBlock &ContinuationIP) {
      BasicBlock *CGStartBB = CodeGenIP.getBlock();
      BasicBlock *CGEndBB =
          SplitBlock(CGStartBB, &*CodeGenIP.getPoint(), DT, LI);
      assert(SeqStartBB != nullptr && "SeqStartBB should not be null")((void)0);
      CGStartBB->getTerminator()->setSuccessor(0, SeqStartBB);
      assert(SeqEndBB != nullptr && "SeqEndBB should not be null")((void)0);
      SeqEndBB->getTerminator()->setSuccessor(0, CGEndBB);
    };
    auto FiniCB = [&](InsertPointTy CodeGenIP) {};

    // Find outputs from the sequential region to outside users and
    // broadcast their values to them.
    for (Instruction &I : *SeqStartBB) {
      SmallPtrSet<Instruction *, 4> OutsideUsers;
      for (User *Usr : I.users()) {
        Instruction &UsrI = *cast<Instruction>(Usr);
        // Ignore outputs to LT intrinsics, code extraction for the merged
        // parallel region will fix them.
        if (UsrI.isLifetimeStartOrEnd())
          continue;

        if (UsrI.getParent() != SeqStartBB)
          OutsideUsers.insert(&UsrI);
      }

      if (OutsideUsers.empty())
        continue;

      // Emit an alloca in the outer region to store the broadcasted
      // value.
      const DataLayout &DL = M.getDataLayout();
      AllocaInst *AllocaI = new AllocaInst(
          I.getType(), DL.getAllocaAddrSpace(), nullptr,
          I.getName() + ".seq.output.alloc", &OuterFn->front().front());

      // Emit a store instruction in the sequential BB to update the
      // value.
      new StoreInst(&I, AllocaI, SeqStartBB->getTerminator());

      // Emit a load instruction and replace the use of the output value
      // with it.
      for (Instruction *UsrI : OutsideUsers) {
        LoadInst *LoadI = new LoadInst(
            I.getType(), AllocaI, I.getName() + ".seq.output.load", UsrI);
        UsrI->replaceUsesOfWith(&I, LoadI);
      }
    }

    OpenMPIRBuilder::LocationDescription Loc(
        InsertPointTy(ParentBB, ParentBB->end()), DL);
    InsertPointTy SeqAfterIP =
        OMPInfoCache.OMPBuilder.createMaster(Loc, BodyGenCB, FiniCB);

    OMPInfoCache.OMPBuilder.createBarrier(SeqAfterIP, OMPD_parallel);

    BranchInst::Create(SeqAfterBB, SeqAfterIP.getBlock());

    LLVM_DEBUG(dbgs() << TAG << "After sequential inlining " << *OuterFndo { } while (false)
                      << "\n")do { } while (false);
  };

  // Helper to merge the __kmpc_fork_call calls in MergableCIs. They are all
  // contained in BB and only separated by instructions that can be
  // redundantly executed in parallel. The block BB is split before the first
  // call (in MergableCIs) and after the last so the entire region we merge
  // into a single parallel region is contained in a single basic block
  // without any other instructions. We use the OpenMPIRBuilder to outline
  // that block and call the resulting function via __kmpc_fork_call.
  auto Merge = [&](SmallVectorImpl<CallInst *> &MergableCIs, BasicBlock *BB) {
    // TODO: Change the interface to allow single CIs expanded, e.g, to
    // include an outer loop.
    assert(MergableCIs.size() > 1 && "Assumed multiple mergable CIs")((void)0);

    auto Remark = [&](OptimizationRemark OR) {
      OR << "Parallel region merged with parallel region"
         << (MergableCIs.size() > 2 ? "s" : "") << " at ";
      for (auto *CI : llvm::drop_begin(MergableCIs)) {
        OR << ore::NV("OpenMPParallelMerge", CI->getDebugLoc());
        if (CI != MergableCIs.back())
          OR << ", ";
      }
      return OR << ".";
    };

    emitRemark<OptimizationRemark>(MergableCIs.front(), "OMP150", Remark);

    Function *OriginalFn = BB->getParent();
    LLVM_DEBUG(dbgs() << TAG << "Merge " << MergableCIs.size()do { } while (false)
                      << " parallel regions in " << OriginalFn->getName()do { } while (false)
                      << "\n")do { } while (false);

    // Isolate the calls to merge in a separate block.
    EndBB = SplitBlock(BB, MergableCIs.back()->getNextNode(), DT, LI);
    BasicBlock *AfterBB =
        SplitBlock(EndBB, &*EndBB->getFirstInsertionPt(), DT, LI);
    StartBB = SplitBlock(BB, MergableCIs.front(), DT, LI, nullptr,
                         "omp.par.merged");

    assert(BB->getUniqueSuccessor() == StartBB && "Expected a different CFG")((void)0);
    const DebugLoc DL = BB->getTerminator()->getDebugLoc();
    BB->getTerminator()->eraseFromParent();

    // Create sequential regions for sequential instructions that are
    // in-between mergable parallel regions.
    for (auto *It = MergableCIs.begin(), *End = MergableCIs.end() - 1;
         It != End; ++It) {
      Instruction *ForkCI = *It;
      Instruction *NextForkCI = *(It + 1);

      // Continue if there are not in-between instructions.
      if (ForkCI->getNextNode() == NextForkCI)
        continue;

      CreateSequentialRegion(OriginalFn, BB, ForkCI->getNextNode(),
                             NextForkCI->getPrevNode());
    }

    OpenMPIRBuilder::LocationDescription Loc(InsertPointTy(BB, BB->end()),
                                             DL);
    IRBuilder<>::InsertPoint AllocaIP(
        &OriginalFn->getEntryBlock(),
        OriginalFn->getEntryBlock().getFirstInsertionPt());
    // Create the merged parallel region with default proc binding, to
    // avoid overriding binding settings, and without explicit cancellation.
    InsertPointTy AfterIP = OMPInfoCache.OMPBuilder.createParallel(
        Loc, AllocaIP, BodyGenCB, PrivCB, FiniCB, nullptr, nullptr,
        OMP_PROC_BIND_default, /* IsCancellable */ false);
    BranchInst::Create(AfterBB, AfterIP.getBlock());

    // Perform the actual outlining.
    OMPInfoCache.OMPBuilder.finalize(OriginalFn,
                                     /* AllowExtractorSinking */ true);

    Function *OutlinedFn = MergableCIs.front()->getCaller();

    // Replace the __kmpc_fork_call calls with direct calls to the outlined
    // callbacks.
    SmallVector<Value *, 8> Args;
    for (auto *CI : MergableCIs) {
      Value *Callee =
          CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts();
      FunctionType *FT =
          cast<FunctionType>(Callee->getType()->getPointerElementType());
      Args.clear();
      Args.push_back(OutlinedFn->getArg(0));
      Args.push_back(OutlinedFn->getArg(1));
      for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
           U < E; ++U)
        Args.push_back(CI->getArgOperand(U));

      CallInst *NewCI = CallInst::Create(FT, Callee, Args, "", CI);
      if (CI->getDebugLoc())
        NewCI->setDebugLoc(CI->getDebugLoc());

      // Forward parameter attributes from the callback to the callee.
      for (unsigned U = CallbackFirstArgOperand, E = CI->getNumArgOperands();
           U < E; ++U)
        for (const Attribute &A : CI->getAttributes().getParamAttributes(U))
          NewCI->addParamAttr(
              U - (CallbackFirstArgOperand - CallbackCalleeOperand), A);

      // Emit an explicit barrier to replace the implicit fork-join barrier.
      if (CI != MergableCIs.back()) {
        // TODO: Remove barrier if the merged parallel region includes the
        // 'nowait' clause.
        OMPInfoCache.OMPBuilder.createBarrier(
            InsertPointTy(NewCI->getParent(),
                          NewCI->getNextNode()->getIterator()),
            OMPD_parallel);
      }

      CI->eraseFromParent();
    }

    assert(OutlinedFn != OriginalFn && "Outlining failed")((void)0);
    CGUpdater.registerOutlinedFunction(*OriginalFn, *OutlinedFn);
    CGUpdater.reanalyzeFunction(*OriginalFn);

    NumOpenMPParallelRegionsMerged += MergableCIs.size();

    return true;
  };

  // Helper function that identifes sequences of
  // __kmpc_fork_call uses in a basic block.
  auto DetectPRsCB = [&](Use &U, Function &F) {
    CallInst *CI = getCallIfRegularCall(U, &RFI);
    BB2PRMap[CI->getParent()].insert(CI);

    return false;
  };

  BB2PRMap.clear();
  RFI.foreachUse(SCC, DetectPRsCB);
  SmallVector<SmallVector<CallInst *, 4>, 4> MergableCIsVector;
  // Find mergable parallel regions within a basic block that are
  // safe to merge, that is any in-between instructions can safely
  // execute in parallel after merging.
  // TODO: support merging across basic-blocks.
  for (auto &It : BB2PRMap) {
    auto &CIs = It.getSecond();
    if (CIs.size() < 2)
      continue;

    BasicBlock *BB = It.getFirst();
    SmallVector<CallInst *, 4> MergableCIs;

    /// Returns true if the instruction is mergable, false otherwise.
    /// A terminator instruction is unmergable by definition since merging
    /// works within a BB. Instructions before the mergable region are
    /// mergable if they are not calls to OpenMP runtime functions that may
    /// set different execution parameters for subsequent parallel regions.
    /// Instructions in-between parallel regions are mergable if they are not
    /// calls to any non-intrinsic function since that may call a non-mergable
    /// OpenMP runtime function.
    auto IsMergable = [&](Instruction &I, bool IsBeforeMergableRegion) {
      // We do not merge across BBs, hence return false (unmergable) if the
      // instruction is a terminator.
      if (I.isTerminator())
        return false;

      if (!isa<CallInst>(&I))
        return true;

      CallInst *CI = cast<CallInst>(&I);
      if (IsBeforeMergableRegion) {
        Function *CalledFunction = CI->getCalledFunction();
        if (!CalledFunction)
          return false;
        // Return false (unmergable) if the call before the parallel
        // region calls an explicit affinity (proc_bind) or number of
        // threads (num_threads) compiler-generated function. Those settings
        // may be incompatible with following parallel regions.
        // TODO: ICV tracking to detect compatibility.
        for (const auto &RFI : UnmergableCallsInfo) {
          if (CalledFunction == RFI.Declaration)
            return false;
        }
      } else {
        // Return false (unmergable) if there is a call instruction
        // in-between parallel regions when it is not an intrinsic. It
        // may call an unmergable OpenMP runtime function in its callpath.
        // TODO: Keep track of possible OpenMP calls in the callpath.
        if (!isa<IntrinsicInst>(CI))
          return false;
      }

      return true;
    };
    // Find maximal number of parallel region CIs that are safe to merge.
    for (auto It = BB->begin(), End = BB->end(); It != End;) {
      Instruction &I = *It;
      ++It;

      if (CIs.count(&I)) {
        MergableCIs.push_back(cast<CallInst>(&I));
        continue;
      }

      // Continue expanding if the instruction is mergable.
      if (IsMergable(I, MergableCIs.empty()))
        continue;

      // Forward the instruction iterator to skip the next parallel region
      // since there is an unmergable instruction which can affect it.
      for (; It != End; ++It) {
        Instruction &SkipI = *It;
        if (CIs.count(&SkipI)) {
          LLVM_DEBUG(dbgs() << TAG << "Skip parallel region " << SkipIdo { } while (false)
                            << " due to " << I << "\n")do { } while (false);
          ++It;
          break;
        }
      }

      // Store mergable regions found.
      if (MergableCIs.size() > 1) {
        MergableCIsVector.push_back(MergableCIs);
        LLVM_DEBUG(dbgs() << TAG << "Found " << MergableCIs.size()do { } while (false)
                          << " parallel regions in block " << BB->getName()do { } while (false)
                          << " of function " << BB->getParent()->getName()do { } while (false)
                          << "\n";)do { } while (false);
      }

      MergableCIs.clear();
    }

    if (!MergableCIsVector.empty()) {
      Changed = true;

      for (auto &MergableCIs : MergableCIsVector)
        Merge(MergableCIs, BB);
      MergableCIsVector.clear();
    }
  }

  if (Changed) {
    /// Re-collect use for fork calls, emitted barrier calls, and
    /// any emitted master/end_master calls.
    OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_fork_call);
    OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_barrier);
    OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_master);
    OMPInfoCache.recollectUsesForFunction(OMPRTL___kmpc_end_master);
  }

  return Changed;
}

/// Try to delete parallel regions if possible.
bool deleteParallelRegions() {
  const unsigned CallbackCalleeOperand = 2;

  OMPInformationCache::RuntimeFunctionInfo &RFI =
      OMPInfoCache.RFIs[OMPRTL___kmpc_fork_call];

  if (!RFI.Declaration)
    return false;

  bool Changed = false;
  auto DeleteCallCB = [&](Use &U, Function &) {
    CallInst *CI = getCallIfRegularCall(U);
    if (!CI)
      return false;
    auto *Fn = dyn_cast<Function>(
        CI->getArgOperand(CallbackCalleeOperand)->stripPointerCasts());
    if (!Fn)
      return false;
    if (!Fn->onlyReadsMemory())
      return false;
    if (!Fn->hasFnAttribute(Attribute::WillReturn))
      return false;

    LLVM_DEBUG(dbgs() << TAG << "Delete read-only parallel region in "do { } while (false)
                      << CI->getCaller()->getName() << "\n")do { } while (false);

    auto Remark = [&](OptimizationRemark OR) {
      return OR << "Removing parallel region with no side-effects.";
    };
    emitRemark<OptimizationRemark>(CI, "OMP160", Remark);

    CGUpdater.removeCallSite(*CI);
    CI->eraseFromParent();
    Changed = true;
    ++NumOpenMPParallelRegionsDeleted;
    return true;
  };

  RFI.foreachUse(SCC, DeleteCallCB);

  return Changed;
}

/// Try to eliminate runtime calls by reusing existing ones.
bool deduplicateRuntimeCalls() {
  bool Changed = false;

  RuntimeFunction DeduplicableRuntimeCallIDs[] = {
      OMPRTL_omp_get_num_threads,
      OMPRTL_omp_in_parallel,
      OMPRTL_omp_get_cancellation,
      OMPRTL_omp_get_thread_limit,
      OMPRTL_omp_get_supported_active_levels,
      OMPRTL_omp_get_level,
      OMPRTL_omp_get_ancestor_thread_num,
      OMPRTL_omp_get_team_size,
      OMPRTL_omp_get_active_level,
      OMPRTL_omp_in_final,
      OMPRTL_omp_get_proc_bind,
      OMPRTL_omp_get_num_places,
      OMPRTL_omp_get_num_procs,
      OMPRTL_omp_get_place_num,
      OMPRTL_omp_get_partition_num_places,
      OMPRTL_omp_get_partition_place_nums};

  // Global-tid is handled separately.
  SmallSetVector<Value *, 16> GTIdArgs;
  collectGlobalThreadIdArguments(GTIdArgs);
  LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()do { } while (false)
                    << " global thread ID arguments\n")do { } while (false);

  for (Function *F : SCC) {
    for (auto DeduplicableRuntimeCallID : DeduplicableRuntimeCallIDs)
      Changed |= deduplicateRuntimeCalls(
          *F, OMPInfoCache.RFIs[DeduplicableRuntimeCallID]);

    // __kmpc_global_thread_num is special as we can replace it with an
    // argument in enough cases to make it worth trying.
    Value *GTIdArg = nullptr;
    for (Argument &Arg : F->args())
      if (GTIdArgs.count(&Arg)) {
        GTIdArg = &Arg;
        break;
      }
    Changed |= deduplicateRuntimeCalls(
        *F, OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
  }

  return Changed;
}

/// Tries to hide the latency of runtime calls that involve host to
/// device memory transfers by splitting them into their "issue" and "wait"
/// versions. The "issue" is moved upwards as much as possible. The "wait" is
/// moved downards as much as possible. The "issue" issues the memory transfer
/// asynchronously, returning a handle. The "wait" waits in the returned
/// handle for the memory transfer to finish.
bool hideMemTransfersLatency() {
  auto &RFI = OMPInfoCache.RFIs[OMPRTL___tgt_target_data_begin_mapper];
  bool Changed = false;
  auto SplitMemTransfers = [&](Use &U, Function &Decl) {
    auto *RTCall = getCallIfRegularCall(U, &RFI);
    if (!RTCall)
      return false;

    OffloadArray OffloadArrays[3];
    if (!getValuesInOffloadArrays(*RTCall, OffloadArrays))
      return false;

    LLVM_DEBUG(dumpValuesInOffloadArrays(OffloadArrays))do { } while (false);

    // TODO: Check if can be moved upwards.
    bool WasSplit = false;
    Instruction *WaitMovementPoint = canBeMovedDownwards(*RTCall);
    if (WaitMovementPoint)
      WasSplit = splitTargetDataBeginRTC(*RTCall, *WaitMovementPoint);

    Changed |= WasSplit;
    return WasSplit;
  };
  RFI.foreachUse(SCC, SplitMemTransfers);

  return Changed;
}

void analysisGlobalization() {
  auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];

  auto CheckGlobalization = [&](Use &U, Function &Decl) {
    if (CallInst *CI = getCallIfRegularCall(U, &RFI)) {
      auto Remark = [&](OptimizationRemarkMissed ORM) {
        return ORM
               << "Found thread data sharing on the GPU. "
               << "Expect degraded performance due to data globalization.";
      };
      emitRemark<OptimizationRemarkMissed>(CI, "OMP112", Remark);
    }

    return false;
  };

  RFI.foreachUse(SCC, CheckGlobalization);
}

/// Maps the values stored in the offload arrays passed as arguments to
/// \p RuntimeCall into the offload arrays in \p OAs.
bool getValuesInOffloadArrays(CallInst &RuntimeCall,
                              MutableArrayRef<OffloadArray> OAs) {
  assert(OAs.size() == 3 && "Need space for three offload arrays!")((void)0);

  // A runtime call that involves memory offloading looks something like:
  // call void @__tgt_target_data_begin_mapper(arg0, arg1,
  //   i8** %offload_baseptrs, i8** %offload_ptrs, i64* %offload_sizes,
  // ...)
  // So, the idea is to access the allocas that allocate space for these
  // offload arrays, offload_baseptrs, offload_ptrs, offload_sizes.
  // Therefore:
  // i8** %offload_baseptrs.
  Value *BasePtrsArg =
      RuntimeCall.getArgOperand(OffloadArray::BasePtrsArgNum);
  // i8** %offload_ptrs.
  Value *PtrsArg = RuntimeCall.getArgOperand(OffloadArray::PtrsArgNum);
  // i8** %offload_sizes.
  Value *SizesArg = RuntimeCall.getArgOperand(OffloadArray::SizesArgNum);

  // Get values stored in **offload_baseptrs.
  auto *V = getUnderlyingObject(BasePtrsArg);
  if (!isa<AllocaInst>(V))
    return false;
  auto *BasePtrsArray = cast<AllocaInst>(V);
  if (!OAs[0].initialize(*BasePtrsArray, RuntimeCall))
    return false;

  // Get values stored in **offload_baseptrs.
  V = getUnderlyingObject(PtrsArg);
  if (!isa<AllocaInst>(V))
    return false;
  auto *PtrsArray = cast<AllocaInst>(V);
  if (!OAs[1].initialize(*PtrsArray, RuntimeCall))
    return false;

  // Get values stored in **offload_sizes.
  V = getUnderlyingObject(SizesArg);
  // If it's a [constant] global array don't analyze it.
  if (isa<GlobalValue>(V))
    return isa<Constant>(V);
  if (!isa<AllocaInst>(V))
    return false;

  auto *SizesArray = cast<AllocaInst>(V);
  if (!OAs[2].initialize(*SizesArray, RuntimeCall))
    return false;

  return true;
}

/// Prints the values in the OffloadArrays \p OAs using LLVM_DEBUG.
/// For now this is a way to test that the function getValuesInOffloadArrays
/// is working properly.
/// TODO: Move this to a unittest when unittests are available for OpenMPOpt.
void dumpValuesInOffloadArrays(ArrayRef<OffloadArray> OAs) {
  assert(OAs.size() == 3 && "There are three offload arrays to debug!")((void)0);

  LLVM_DEBUG(dbgs() << TAG << " Successfully got offload values:\n")do { } while (false);
  std::string ValuesStr;
  raw_string_ostream Printer(ValuesStr);
  std::string Separator = " --- ";

  for (auto *BP : OAs[0].StoredValues) {
    BP->print(Printer);
    Printer << Separator;
  }
  LLVM_DEBUG(dbgs() << "\t\toffload_baseptrs: " << Printer.str() << "\n")do { } while (false);
  ValuesStr.clear();

  for (auto *P : OAs[1].StoredValues) {
    P->print(Printer);
    Printer << Separator;
  }
  LLVM_DEBUG(dbgs() << "\t\toffload_ptrs: " << Printer.str() << "\n")do { } while (false);
  ValuesStr.clear();

  for (auto *S : OAs[2].StoredValues) {
    S->print(Printer);
    Printer << Separator;
  }
  LLVM_DEBUG(dbgs() << "\t\toffload_sizes: " << Printer.str() << "\n")do { } while (false);
}

/// Returns the instruction where the "wait" counterpart \p RuntimeCall can be
/// moved. Returns nullptr if the movement is not possible, or not worth it.
Instruction *canBeMovedDownwards(CallInst &RuntimeCall) {
  // FIXME: This traverses only the BasicBlock where RuntimeCall is.
  //  Make it traverse the CFG.

  Instruction *CurrentI = &RuntimeCall;
  bool IsWorthIt = false;
  while ((CurrentI = CurrentI->getNextNode())) {

    // TODO: Once we detect the regions to be offloaded we should use the
    //  alias analysis manager to check if CurrentI may modify one of
    //  the offloaded regions.
    if (CurrentI->mayHaveSideEffects() || CurrentI->mayReadFromMemory()) {
      if (IsWorthIt)
        return CurrentI;

      return nullptr;
    }

    // FIXME: For now if we move it over anything without side effect
    //  is worth it.
    IsWorthIt = true;
  }

  // Return end of BasicBlock.
  return RuntimeCall.getParent()->getTerminator();
}

/// Splits \p RuntimeCall into its "issue" and "wait" counterparts.
bool splitTargetDataBeginRTC(CallInst &RuntimeCall,
                             Instruction &WaitMovementPoint) {
  // Create stack allocated handle (__tgt_async_info) at the beginning of the
  // function. Used for storing information of the async transfer, allowing to
  // wait on it later.
  auto &IRBuilder = OMPInfoCache.OMPBuilder;
  auto *F = RuntimeCall.getCaller();
  Instruction *FirstInst = &(F->getEntryBlock().front());
  AllocaInst *Handle = new AllocaInst(
      IRBuilder.AsyncInfo, F->getAddressSpace(), "handle", FirstInst);

  // Add "issue" runtime call declaration:
  // declare %struct.tgt_async_info @__tgt_target_data_begin_issue(i64, i32,
  //   i8**, i8**, i64*, i64*)
  FunctionCallee IssueDecl = IRBuilder.getOrCreateRuntimeFunction(
      M, OMPRTL___tgt_target_data_begin_mapper_issue);

  // Change RuntimeCall call site for its asynchronous version.
  SmallVector<Value *, 16> Args;
  for (auto &Arg : RuntimeCall.args())
    Args.push_back(Arg.get());
  Args.push_back(Handle);

  CallInst *IssueCallsite =
      CallInst::Create(IssueDecl, Args, /*NameStr=*/"", &RuntimeCall);
  RuntimeCall.eraseFromParent();

  // Add "wait" runtime call declaration:
  // declare void @__tgt_target_data_begin_wait(i64, %struct.__tgt_async_info)
  FunctionCallee WaitDecl = IRBuilder.getOrCreateRuntimeFunction(
      M, OMPRTL___tgt_target_data_begin_mapper_wait);

  Value *WaitParams[2] = {
      IssueCallsite->getArgOperand(
          OffloadArray::DeviceIDArgNum), // device_id.
      Handle                             // handle to wait on.
  };
  CallInst::Create(WaitDecl, WaitParams, /*NameStr=*/"", &WaitMovementPoint);

  return true;
}

static Value *combinedIdentStruct(Value *CurrentIdent, Value *NextIdent,
                                  bool GlobalOnly, bool &SingleChoice) {
  if (CurrentIdent == NextIdent)
    return CurrentIdent;

  // TODO: Figure out how to actually combine multiple debug locations. For
  //       now we just keep an existing one if there is a single choice.
  if (!GlobalOnly || isa<GlobalValue>(NextIdent)) {
    SingleChoice = !CurrentIdent;
    return NextIdent;
  }
  return nullptr;
}

/// Return an `struct ident_t*` value that represents the ones used in the
/// calls of \p RFI inside of \p F. If \p GlobalOnly is true, we will not
/// return a local `struct ident_t*`. For now, if we cannot find a suitable
/// return value we create one from scratch. We also do not yet combine
/// information, e.g., the source locations, see combinedIdentStruct.
Value *
getCombinedIdentFromCallUsesIn(OMPInformationCache::RuntimeFunctionInfo &RFI,
                               Function &F, bool GlobalOnly) {
  bool SingleChoice = true;
  Value *Ident = nullptr;
  auto CombineIdentStruct = [&](Use &U, Function &Caller) {
    CallInst *CI = getCallIfRegularCall(U, &RFI);
    if (!CI || &F != &Caller)
      return false;
    Ident = combinedIdentStruct(Ident, CI->getArgOperand(0),
                                /* GlobalOnly */ true, SingleChoice);
    return false;
  };
  RFI.foreachUse(SCC, CombineIdentStruct);

  if (!Ident || !SingleChoice) {
    // The IRBuilder uses the insertion block to get to the module, this is
    // unfortunate but we work around it for now.
    if (!OMPInfoCache.OMPBuilder.getInsertionPoint().getBlock())
      OMPInfoCache.OMPBuilder.updateToLocation(OpenMPIRBuilder::InsertPointTy(
          &F.getEntryBlock(), F.getEntryBlock().begin()));
    // Create a fallback location if non was found.
    // TODO: Use the debug locations of the calls instead.
    Constant *Loc = OMPInfoCache.OMPBuilder.getOrCreateDefaultSrcLocStr();
    Ident = OMPInfoCache.OMPBuilder.getOrCreateIdent(Loc);
  }
  return Ident;
}

/// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
/// \p ReplVal if given.
bool deduplicateRuntimeCalls(Function &F,
                             OMPInformationCache::RuntimeFunctionInfo &RFI,
                             Value *ReplVal = nullptr) {
  auto *UV = RFI.getUseVector(F);
  if (!UV || UV->size() + (ReplVal != nullptr) < 2)
    return false;

  LLVM_DEBUG(do { } while (false)
      dbgs() << TAG << "Deduplicate " << UV->size() << " uses of " << RFI.Namedo { } while (false)
             << (ReplVal ? " with an existing value\n" : "\n") << "\n")do { } while (false);

  assert((!ReplVal || (isa<Argument>(ReplVal) &&((void)0)
                       cast<Argument>(ReplVal)->getParent() == &F)) &&((void)0)
         "Unexpected replacement value!")((void)0);

  // TODO: Use dominance to find a good position instead.
  auto CanBeMoved = [this](CallBase &CB) {
    unsigned NumArgs = CB.getNumArgOperands();
    if (NumArgs == 0)
      return true;
    if (CB.getArgOperand(0)->getType() != OMPInfoCache.OMPBuilder.IdentPtr)
      return false;
    for (unsigned u = 1; u < NumArgs; ++u)
      if (isa<Instruction>(CB.getArgOperand(u)))
        return false;
    return true;
  };

  if (!ReplVal) {
    for (Use *U : *UV)
      if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
        if (!CanBeMoved(*CI))
          continue;

        // If the function is a kernel, dedup will move
        // the runtime call right after the kernel init callsite. Otherwise,
        // it will move it to the beginning of the caller function.
        if (isKernel(F)) {
          auto &KernelInitRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
          auto *KernelInitUV = KernelInitRFI.getUseVector(F);

          if (KernelInitUV->empty())
            continue;

          assert(KernelInitUV->size() == 1 &&((void)0)
                 "Expected a single __kmpc_target_init in kernel\n")((void)0);

          CallInst *KernelInitCI =
              getCallIfRegularCall(*KernelInitUV->front(), &KernelInitRFI);
          assert(KernelInitCI &&((void)0)
                 "Expected a call to __kmpc_target_init in kernel\n")((void)0);

          CI->moveAfter(KernelInitCI);
        } else
          CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
        ReplVal = CI;
        break;
      }
    if (!ReplVal)
      return false;
  }

  // If we use a call as a replacement value we need to make sure the ident is
  // valid at the new location. For now we just pick a global one, either
  // existing and used by one of the calls, or created from scratch.
  if (CallBase *CI = dyn_cast<CallBase>(ReplVal)) {
    if (CI->getNumArgOperands() > 0 &&
        CI->getArgOperand(0)->getType() == OMPInfoCache.OMPBuilder.IdentPtr) {
      Value *Ident = getCombinedIdentFromCallUsesIn(RFI, F,
                                                    /* GlobalOnly */ true);
      CI->setArgOperand(0, Ident);
    }
  }

  bool Changed = false;
  auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
    CallInst *CI = getCallIfRegularCall(U, &RFI);
    if (!CI || CI == ReplVal || &F != &Caller)
      return false;
    assert(CI->getCaller() == &F && "Unexpected call!")((void)0);

    auto Remark = [&](OptimizationRemark OR) {
      return OR << "OpenMP runtime call "
                << ore::NV("OpenMPOptRuntime", RFI.Name) << " deduplicated.";
    };
    if (CI->getDebugLoc())
      emitRemark<OptimizationRemark>(CI, "OMP170", Remark);
    else
      emitRemark<OptimizationRemark>(&F, "OMP170", Remark);

    CGUpdater.removeCallSite(*CI);
    CI->replaceAllUsesWith(ReplVal);
    CI->eraseFromParent();
    ++NumOpenMPRuntimeCallsDeduplicated;
    Changed = true;
    return true;
  };
  RFI.foreachUse(SCC, ReplaceAndDeleteCB);

  return Changed;
}

/// Collect arguments that represent the global thread id in \p GTIdArgs.
void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
  // TODO: Below we basically perform a fixpoint iteration with a pessimistic
  //       initialization. We could define an AbstractAttribute instead and
  //       run the Attributor here once it can be run as an SCC pass.

  // Helper to check the argument \p ArgNo at all call sites of \p F for
  // a GTId.
  auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
    if (!F.hasLocalLinkage())
      return false;
    for (Use &U : F.uses()) {
      if (CallInst *CI = getCallIfRegularCall(U)) {
        Value *ArgOp = CI->getArgOperand(ArgNo);
        if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
            getCallIfRegularCall(
                *ArgOp, &OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num]))
          continue;
      }
      return false;
    }
    return true;
  };

  // Helper to identify uses of a GTId as GTId arguments.
  auto AddUserArgs = [&](Value &GTId) {
    for (Use &U : GTId.uses())
      if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
        if (CI->isArgOperand(&U))
          if (Function *Callee = CI->getCalledFunction())
            if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
              GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
  };

  // The argument users of __kmpc_global_thread_num calls are GTIds.
  OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
      OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];

  GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
    if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
      AddUserArgs(*CI);
    return false;
  });

  // Transitively search for more arguments by looking at the users of the
  // ones we know already. During the search the GTIdArgs vector is extended
  // so we cannot cache the size nor can we use a range based for.
  for (unsigned u = 0; u < GTIdArgs.size(); ++u)
    AddUserArgs(*GTIdArgs[u]);
}

/// Kernel (=GPU) optimizations and utility functions
///
///{{

/// Check if \p F is a kernel, hence entry point for target offloading.
bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }

/// Cache to remember the unique kernel for a function.
DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;

/// Find the unique kernel that will execute \p F, if any.
Kernel getUniqueKernelFor(Function &F);

/// Find the unique kernel that will execute \p I, if any.
Kernel getUniqueKernelFor(Instruction &I) {
  return getUniqueKernelFor(*I.getFunction());
}

/// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
/// the cases we can avoid taking the address of a function.
bool rewriteDeviceCodeStateMachine();

///
///}}

/// Emit a remark generically
///
/// This template function can be used to generically emit a remark. The
/// RemarkKind should be one of the following:
///   - OptimizationRemark to indicate a successful optimization attempt
///   - OptimizationRemarkMissed to report a failed optimization attempt
///   - OptimizationRemarkAnalysis to provide additional information about an
///     optimization attempt
///
/// The remark is built using a callback function provided by the caller that
/// takes a RemarkKind as input and returns a RemarkKind.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Instruction *I, StringRef RemarkName,
                RemarkCallBack &&RemarkCB) const {
  Function *F = I->getParent()->getParent();
  auto &ORE = OREGetter(F);

  if (RemarkName.startswith("OMP"))
    ORE.emit([&]() {
      return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I))
             << " [" << RemarkName << "]";
    });
  else
    ORE.emit(
        [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, I)); });
}

/// Emit a remark on a function.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Function *F, StringRef RemarkName,
                RemarkCallBack &&RemarkCB) const {
  auto &ORE = OREGetter(F);

  if (RemarkName.startswith("OMP"))
    ORE.emit([&]() {
      return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F))
             << " [" << RemarkName << "]";
    });
  else
    ORE.emit(
        [&]() { return RemarkCB(RemarkKind(DEBUG_TYPE"openmp-opt", RemarkName, F)); });
}

/// RAII struct to temporarily change an RTL function's linkage to external.
/// This prevents it from being mistakenly removed by other optimizations.
struct ExternalizationRAII {
  ExternalizationRAII(OMPInformationCache &OMPInfoCache,
                      RuntimeFunction RFKind)
      : Declaration(OMPInfoCache.RFIs[RFKind].Declaration) {
    if (!Declaration)
      return;

    LinkageType = Declaration->getLinkage();
    Declaration->setLinkage(GlobalValue::ExternalLinkage);
  }

  ~ExternalizationRAII() {
    if (!Declaration)
      return;

    Declaration->setLinkage(LinkageType);
  }

  Function *Declaration;
  GlobalValue::LinkageTypes LinkageType;
};

/// The underlying module.
Module &M;

/// The SCC we are operating on.
SmallVectorImpl<Function *> &SCC;

/// Callback to update the call graph, the first argument is a removed call,
/// the second an optional replacement call.
CallGraphUpdater &CGUpdater;

/// Callback to get an OptimizationRemarkEmitter from a Function *
OptimizationRemarkGetter OREGetter;

/// OpenMP-specific information cache. Also Used for Attributor runs.
OMPInformationCache &OMPInfoCache;

/// Attributor instance.
Attributor &A;

/// Helper function to run Attributor on SCC.
bool runAttributor(bool IsModulePass) {
  if (SCC.empty())
    return false;

  // Temporarily make these function have external linkage so the Attributor
  // doesn't remove them when we try to look them up later.
  ExternalizationRAII Parallel(OMPInfoCache, OMPRTL___kmpc_kernel_parallel);
  ExternalizationRAII EndParallel(OMPInfoCache,
                                  OMPRTL___kmpc_kernel_end_parallel);
  ExternalizationRAII BarrierSPMD(OMPInfoCache,
                                  OMPRTL___kmpc_barrier_simple_spmd);

  registerAAs(IsModulePass);

  ChangeStatus Changed = A.run();

  LLVM_DEBUG(dbgs() << "[Attributor] Done with " << SCC.size()do { } while (false)
                    << " functions, result: " << Changed << ".\n")do { } while (false);

  return Changed == ChangeStatus::CHANGED;
}

void registerFoldRuntimeCall(RuntimeFunction RF);

/// Populate the Attributor with abstract attribute opportunities in the
/// function.
void registerAAs(bool IsModulePass);
1841};

1843Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
if (!OMPInfoCache.ModuleSlice.count(&F))
  return nullptr;

// Use a scope to keep the lifetime of the CachedKernel short.
{
  Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
  if (CachedKernel)
    return *CachedKernel;

  // TODO: We should use an AA to create an (optimistic and callback
  //       call-aware) call graph. For now we stick to simple patterns that
  //       are less powerful, basically the worst fixpoint.
  if (isKernel(F)) {
    CachedKernel = Kernel(&F);
    return *CachedKernel;
  }

  CachedKernel = nullptr;
  if (!F.hasLocalLinkage()) {

    // See https://openmp.llvm.org/remarks/OptimizationRemarks.html
    auto Remark = [&](OptimizationRemarkAnalysis ORA) {
      return ORA << "Potentially unknown OpenMP target region caller.";
    };
    emitRemark<OptimizationRemarkAnalysis>(&F, "OMP100", Remark);

    return nullptr;
  }
}

auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
  if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
    // Allow use in equality comparisons.
    if (Cmp->isEquality())
      return getUniqueKernelFor(*Cmp);
    return nullptr;
  }
  if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
    // Allow direct calls.
    if (CB->isCallee(&U))
      return getUniqueKernelFor(*CB);

    OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
        OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];
    // Allow the use in __kmpc_parallel_51 calls.
    if (OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI))
      return getUniqueKernelFor(*CB);
    return nullptr;
  }
  // Disallow every other use.
  return nullptr;
};

// TODO: In the future we want to track more than just a unique kernel.
SmallPtrSet<Kernel, 2> PotentialKernels;
OMPInformationCache::foreachUse(F, [&](const Use &U) {
  PotentialKernels.insert(GetUniqueKernelForUse(U));
});

Kernel K = nullptr;
if (PotentialKernels.size() == 1)
  K = *PotentialKernels.begin();

// Cache the result.
UniqueKernelMap[&F] = K;

return K;
1911}

1913bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
OMPInformationCache::RuntimeFunctionInfo &KernelParallelRFI =
    OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];

bool Changed = false;
if (!KernelParallelRFI)
  return Changed;

for (Function *F : SCC) {

  // Check if the function is a use in a __kmpc_parallel_51 call at
  // all.
  bool UnknownUse = false;
  bool KernelParallelUse = false;
  unsigned NumDirectCalls = 0;

  SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
  OMPInformationCache::foreachUse(*F, [&](Use &U) {
    if (auto *CB = dyn_cast<CallBase>(U.getUser()))
      if (CB->isCallee(&U)) {
        ++NumDirectCalls;
        return;
      }

    if (isa<ICmpInst>(U.getUser())) {
      ToBeReplacedStateMachineUses.push_back(&U);
      return;
    }

    // Find wrapper functions that represent parallel kernels.
    CallInst *CI =
        OpenMPOpt::getCallIfRegularCall(*U.getUser(), &KernelParallelRFI);
    const unsigned int WrapperFunctionArgNo = 6;
    if (!KernelParallelUse && CI &&
        CI->getArgOperandNo(&U) == WrapperFunctionArgNo) {
      KernelParallelUse = true;
      ToBeReplacedStateMachineUses.push_back(&U);
      return;
    }
    UnknownUse = true;
  });

  // Do not emit a remark if we haven't seen a __kmpc_parallel_51
  // use.
  if (!KernelParallelUse)
    continue;

  // If this ever hits, we should investigate.
  // TODO: Checking the number of uses is not a necessary restriction and
  // should be lifted.
  if (UnknownUse || NumDirectCalls != 1 ||
      ToBeReplacedStateMachineUses.size() > 2) {
    auto Remark = [&](OptimizationRemarkAnalysis ORA) {
      return ORA << "Parallel region is used in "
                 << (UnknownUse ? "unknown" : "unexpected")
                 << " ways. Will not attempt to rewrite the state machine.";
    };
    emitRemark<OptimizationRemarkAnalysis>(F, "OMP101", Remark);
    continue;
  }

  // Even if we have __kmpc_parallel_51 calls, we (for now) give
  // up if the function is not called from a unique kernel.
  Kernel K = getUniqueKernelFor(*F);
  if (!K) {
    auto Remark = [&](OptimizationRemarkAnalysis ORA) {
      return ORA << "Parallel region is not called from a unique kernel. "
                    "Will not attempt to rewrite the state machine.";
    };
    emitRemark<OptimizationRemarkAnalysis>(F, "OMP102", Remark);
    continue;
  }

  // We now know F is a parallel body function called only from the kernel K.
  // We also identified the state machine uses in which we replace the
  // function pointer by a new global symbol for identification purposes. This
  // ensures only direct calls to the function are left.

  Module &M = *F->getParent();
  Type *Int8Ty = Type::getInt8Ty(M.getContext());

  auto *ID = new GlobalVariable(
      M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
      UndefValue::get(Int8Ty), F->getName() + ".ID");

  for (Use *U : ToBeReplacedStateMachineUses)
    U->set(ConstantExpr::getPointerBitCastOrAddrSpaceCast(
        ID, U->get()->getType()));

  ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;

  Changed = true;
}

return Changed;
2008}

2010/// Abstract Attribute for tracking ICV values.
2011struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAICVTracker(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

void initialize(Attributor &A) override {
  Function *F = getAnchorScope();
  if (!F || !A.isFunctionIPOAmendable(*F))
    indicatePessimisticFixpoint();
}

/// Returns true if value is assumed to be tracked.
bool isAssumedTracked() const { return getAssumed(); }

/// Returns true if value is known to be tracked.
bool isKnownTracked() const { return getAssumed(); }

/// Create an abstract attribute biew for the position \p IRP.
static AAICVTracker &createForPosition(const IRPosition &IRP, Attributor &A);

/// Return the value with which \p I can be replaced for specific \p ICV.
virtual Optional<Value *> getReplacementValue(InternalControlVar ICV,
                                              const Instruction *I,
                                              Attributor &A) const {
  return None;
}

/// Return an assumed unique ICV value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
virtual Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const = 0;

// Currently only nthreads is being tracked.
// this array will only grow with time.
InternalControlVar TrackableICVs[1] = {ICV_nthreads};

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAICVTracker"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AAICVTracker
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

static const char ID;
2059};

2061struct AAICVTrackerFunction : public AAICVTracker {
AAICVTrackerFunction(const IRPosition &IRP, Attributor &A)
    : AAICVTracker(IRP, A) {}

// FIXME: come up with better string.
const std::string getAsStr() const override { return "ICVTrackerFunction"; }

// FIXME: come up with some stats.
void trackStatistics() const override {}

/// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
  return ChangeStatus::UNCHANGED;
}

// Map of ICV to their values at specific program point.
EnumeratedArray<DenseMap<Instruction *, Value *>, InternalControlVar,
                InternalControlVar::ICV___last>
    ICVReplacementValuesMap;

ChangeStatus updateImpl(Attributor &A) override {
  ChangeStatus HasChanged = ChangeStatus::UNCHANGED;

  Function *F = getAnchorScope();

  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());

  for (InternalControlVar ICV : TrackableICVs) {
    auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];

    auto &ValuesMap = ICVReplacementValuesMap[ICV];
    auto TrackValues = [&](Use &U, Function &) {
      CallInst *CI = OpenMPOpt::getCallIfRegularCall(U);
      if (!CI)
        return false;

      // FIXME: handle setters with more that 1 arguments.
      /// Track new value.
      if (ValuesMap.insert(std::make_pair(CI, CI->getArgOperand(0))).second)
        HasChanged = ChangeStatus::CHANGED;

      return false;
    };

    auto CallCheck = [&](Instruction &I) {
      Optional<Value *> ReplVal = getValueForCall(A, &I, ICV);
      if (ReplVal.hasValue() &&
          ValuesMap.insert(std::make_pair(&I, *ReplVal)).second)
        HasChanged = ChangeStatus::CHANGED;

      return true;
    };

    // Track all changes of an ICV.
    SetterRFI.foreachUse(TrackValues, F);

    bool UsedAssumedInformation = false;
    A.checkForAllInstructions(CallCheck, *this, {Instruction::Call},
                              UsedAssumedInformation,
                              /* CheckBBLivenessOnly */ true);

    /// TODO: Figure out a way to avoid adding entry in
    /// ICVReplacementValuesMap
    Instruction *Entry = &F->getEntryBlock().front();
    if (HasChanged == ChangeStatus::CHANGED && !ValuesMap.count(Entry))
      ValuesMap.insert(std::make_pair(Entry, nullptr));
  }

  return HasChanged;
}

/// Hepler to check if \p I is a call and get the value for it if it is
/// unique.
Optional<Value *> getValueForCall(Attributor &A, const Instruction *I,
                                  InternalControlVar &ICV) const {

  const auto *CB = dyn_cast<CallBase>(I);
  if (!CB || CB->hasFnAttr("no_openmp") ||
      CB->hasFnAttr("no_openmp_routines"))
    return None;

  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  auto &GetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Getter];
  auto &SetterRFI = OMPInfoCache.RFIs[OMPInfoCache.ICVs[ICV].Setter];
  Function *CalledFunction = CB->getCalledFunction();

  // Indirect call, assume ICV changes.
  if (CalledFunction == nullptr)
    return nullptr;
  if (CalledFunction == GetterRFI.Declaration)
    return None;
  if (CalledFunction == SetterRFI.Declaration) {
    if (ICVReplacementValuesMap[ICV].count(I))
      return ICVReplacementValuesMap[ICV].lookup(I);

    return nullptr;
  }

  // Since we don't know, assume it changes the ICV.
  if (CalledFunction->isDeclaration())
    return nullptr;

  const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
      *this, IRPosition::callsite_returned(*CB), DepClassTy::REQUIRED);

  if (ICVTrackingAA.isAssumedTracked())
    return ICVTrackingAA.getUniqueReplacementValue(ICV);

  // If we don't know, assume it changes.
  return nullptr;
}

// We don't check unique value for a function, so return None.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
  return None;
}

/// Return the value with which \p I can be replaced for specific \p ICV.
Optional<Value *> getReplacementValue(InternalControlVar ICV,
                                      const Instruction *I,
                                      Attributor &A) const override {
  const auto &ValuesMap = ICVReplacementValuesMap[ICV];
  if (ValuesMap.count(I))
    return ValuesMap.lookup(I);

  SmallVector<const Instruction *, 16> Worklist;
  SmallPtrSet<const Instruction *, 16> Visited;
  Worklist.push_back(I);

  Optional<Value *> ReplVal;

  while (!Worklist.empty()) {
    const Instruction *CurrInst = Worklist.pop_back_val();
    if (!Visited.insert(CurrInst).second)
      continue;

    const BasicBlock *CurrBB = CurrInst->getParent();

    // Go up and look for all potential setters/calls that might change the
    // ICV.
    while ((CurrInst = CurrInst->getPrevNode())) {
      if (ValuesMap.count(CurrInst)) {
        Optional<Value *> NewReplVal = ValuesMap.lookup(CurrInst);
        // Unknown value, track new.
        if (!ReplVal.hasValue()) {
          ReplVal = NewReplVal;
          break;
        }

        // If we found a new value, we can't know the icv value anymore.
        if (NewReplVal.hasValue())
          if (ReplVal != NewReplVal)
            return nullptr;

        break;
      }

      Optional<Value *> NewReplVal = getValueForCall(A, CurrInst, ICV);
      if (!NewReplVal.hasValue())
        continue;

      // Unknown value, track new.
      if (!ReplVal.hasValue()) {
        ReplVal = NewReplVal;
        break;
      }

      // if (NewReplVal.hasValue())
      // We found a new value, we can't know the icv value anymore.
      if (ReplVal != NewReplVal)
        return nullptr;
    }

    // If we are in the same BB and we have a value, we are done.
    if (CurrBB == I->getParent() && ReplVal.hasValue())
      return ReplVal;

    // Go through all predecessors and add terminators for analysis.
    for (const BasicBlock *Pred : predecessors(CurrBB))
      if (const Instruction *Terminator = Pred->getTerminator())
        Worklist.push_back(Terminator);
  }

  return ReplVal;
}
2247};

2249struct AAICVTrackerFunctionReturned : AAICVTracker {
AAICVTrackerFunctionReturned(const IRPosition &IRP, Attributor &A)
    : AAICVTracker(IRP, A) {}

// FIXME: come up with better string.
const std::string getAsStr() const override {
  return "ICVTrackerFunctionReturned";
}

// FIXME: come up with some stats.
void trackStatistics() const override {}

/// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
  return ChangeStatus::UNCHANGED;
}

// Map of ICV to their values at specific program point.
EnumeratedArray<Optional<Value *>, InternalControlVar,
                InternalControlVar::ICV___last>
    ICVReplacementValuesMap;

/// Return the value with which \p I can be replaced for specific \p ICV.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
  return ICVReplacementValuesMap[ICV];
}

ChangeStatus updateImpl(Attributor &A) override {
  ChangeStatus Changed = ChangeStatus::UNCHANGED;
  const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
      *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);

  if (!ICVTrackingAA.isAssumedTracked())
    return indicatePessimisticFixpoint();

  for (InternalControlVar ICV : TrackableICVs) {
    Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
    Optional<Value *> UniqueICVValue;

    auto CheckReturnInst = [&](Instruction &I) {
      Optional<Value *> NewReplVal =
          ICVTrackingAA.getReplacementValue(ICV, &I, A);

      // If we found a second ICV value there is no unique returned value.
      if (UniqueICVValue.hasValue() && UniqueICVValue != NewReplVal)
        return false;

      UniqueICVValue = NewReplVal;

      return true;
    };

    bool UsedAssumedInformation = false;
    if (!A.checkForAllInstructions(CheckReturnInst, *this, {Instruction::Ret},
                                   UsedAssumedInformation,
                                   /* CheckBBLivenessOnly */ true))
      UniqueICVValue = nullptr;

    if (UniqueICVValue == ReplVal)
      continue;

    ReplVal = UniqueICVValue;
    Changed = ChangeStatus::CHANGED;
  }

  return Changed;
}
2317};

2319struct AAICVTrackerCallSite : AAICVTracker {
AAICVTrackerCallSite(const IRPosition &IRP, Attributor &A)
    : AAICVTracker(IRP, A) {}

void initialize(Attributor &A) override {
  Function *F = getAnchorScope();
  if (!F || !A.isFunctionIPOAmendable(*F))
    indicatePessimisticFixpoint();

  // We only initialize this AA for getters, so we need to know which ICV it
  // gets.
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  for (InternalControlVar ICV : TrackableICVs) {
    auto ICVInfo = OMPInfoCache.ICVs[ICV];
    auto &Getter = OMPInfoCache.RFIs[ICVInfo.Getter];
    if (Getter.Declaration == getAssociatedFunction()) {
      AssociatedICV = ICVInfo.Kind;
      return;
    }
  }

  /// Unknown ICV.
  indicatePessimisticFixpoint();
}

ChangeStatus manifest(Attributor &A) override {
  if (!ReplVal.hasValue() || !ReplVal.getValue())
    return ChangeStatus::UNCHANGED;

  A.changeValueAfterManifest(*getCtxI(), **ReplVal);
  A.deleteAfterManifest(*getCtxI());

  return ChangeStatus::CHANGED;
}

// FIXME: come up with better string.
const std::string getAsStr() const override { return "ICVTrackerCallSite"; }

// FIXME: come up with some stats.
void trackStatistics() const override {}

InternalControlVar AssociatedICV;
Optional<Value *> ReplVal;

ChangeStatus updateImpl(Attributor &A) override {
  const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
      *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);

  // We don't have any information, so we assume it changes the ICV.
  if (!ICVTrackingAA.isAssumedTracked())
    return indicatePessimisticFixpoint();

  Optional<Value *> NewReplVal =
      ICVTrackingAA.getReplacementValue(AssociatedICV, getCtxI(), A);

  if (ReplVal == NewReplVal)
    return ChangeStatus::UNCHANGED;

  ReplVal = NewReplVal;
  return ChangeStatus::CHANGED;
}

// Return the value with which associated value can be replaced for specific
// \p ICV.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
  return ReplVal;
}
2387};

2389struct AAICVTrackerCallSiteReturned : AAICVTracker {
AAICVTrackerCallSiteReturned(const IRPosition &IRP, Attributor &A)
    : AAICVTracker(IRP, A) {}

// FIXME: come up with better string.
const std::string getAsStr() const override {
  return "ICVTrackerCallSiteReturned";
}

// FIXME: come up with some stats.
void trackStatistics() const override {}

/// We don't manifest anything for this AA.
ChangeStatus manifest(Attributor &A) override {
  return ChangeStatus::UNCHANGED;
}

// Map of ICV to their values at specific program point.
EnumeratedArray<Optional<Value *>, InternalControlVar,
                InternalControlVar::ICV___last>
    ICVReplacementValuesMap;

/// Return the value with which associated value can be replaced for specific
/// \p ICV.
Optional<Value *>
getUniqueReplacementValue(InternalControlVar ICV) const override {
  return ICVReplacementValuesMap[ICV];
}

ChangeStatus updateImpl(Attributor &A) override {
  ChangeStatus Changed = ChangeStatus::UNCHANGED;
  const auto &ICVTrackingAA = A.getAAFor<AAICVTracker>(
      *this, IRPosition::returned(*getAssociatedFunction()),
      DepClassTy::REQUIRED);

  // We don't have any information, so we assume it changes the ICV.
  if (!ICVTrackingAA.isAssumedTracked())
    return indicatePessimisticFixpoint();

  for (InternalControlVar ICV : TrackableICVs) {
    Optional<Value *> &ReplVal = ICVReplacementValuesMap[ICV];
    Optional<Value *> NewReplVal =
        ICVTrackingAA.getUniqueReplacementValue(ICV);

    if (ReplVal == NewReplVal)
      continue;

    ReplVal = NewReplVal;
    Changed = ChangeStatus::CHANGED;
  }
  return Changed;
}
2441};

2443struct AAExecutionDomainFunction : public AAExecutionDomain {
AAExecutionDomainFunction(const IRPosition &IRP, Attributor &A)
    : AAExecutionDomain(IRP, A) {}

const std::string getAsStr() const override {
  return "[AAExecutionDomain] " + std::to_string(SingleThreadedBBs.size()) +
         "/" + std::to_string(NumBBs) + " BBs thread 0 only.";
}

/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}

void initialize(Attributor &A) override {
  Function *F = getAnchorScope();
  for (const auto &BB : *F)
    SingleThreadedBBs.insert(&BB);
  NumBBs = SingleThreadedBBs.size();
}

ChangeStatus manifest(Attributor &A) override {
  LLVM_DEBUG({do { } while (false)
    for (const BasicBlock *BB : SingleThreadedBBs)do { } while (false)
      dbgs() << TAG << " Basic block @" << getAnchorScope()->getName() << " "do { } while (false)
             << BB->getName() << " is executed by a single thread.\n";do { } while (false)
  })do { } while (false);
  return ChangeStatus::UNCHANGED;
}

ChangeStatus updateImpl(Attributor &A) override;

/// Check if an instruction is executed by a single thread.
bool isExecutedByInitialThreadOnly(const Instruction &I) const override {
  return isExecutedByInitialThreadOnly(*I.getParent());
}

bool isExecutedByInitialThreadOnly(const BasicBlock &BB) const override {
  return isValidState() && SingleThreadedBBs.contains(&BB);
}

/// Set of basic blocks that are executed by a single thread.
DenseSet<const BasicBlock *> SingleThreadedBBs;

/// Total number of basic blocks in this function.
long unsigned NumBBs;
2487};

2489ChangeStatus AAExecutionDomainFunction::updateImpl(Attributor &A) {
Function *F = getAnchorScope();
ReversePostOrderTraversal<Function *> RPOT(F);
auto NumSingleThreadedBBs = SingleThreadedBBs.size();

bool AllCallSitesKnown;
auto PredForCallSite = [&](AbstractCallSite ACS) {
  const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
      *this, IRPosition::function(*ACS.getInstruction()->getFunction()),
      DepClassTy::REQUIRED);
  return ACS.isDirectCall() &&
         ExecutionDomainAA.isExecutedByInitialThreadOnly(
             *ACS.getInstruction());
};

if (!A.checkForAllCallSites(PredForCallSite, *this,
                            /* RequiresAllCallSites */ true,
                            AllCallSitesKnown))
  SingleThreadedBBs.erase(&F->getEntryBlock());

auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];

// Check if the edge into the successor block compares the __kmpc_target_init
// result with -1. If we are in non-SPMD-mode that signals only the main
// thread will execute the edge.
auto IsInitialThreadOnly = [&](BranchInst *Edge, BasicBlock *SuccessorBB) {
  if (!Edge || !Edge->isConditional())
    return false;
  if (Edge->getSuccessor(0) != SuccessorBB)
    return false;

  auto *Cmp = dyn_cast<CmpInst>(Edge->getCondition());
  if (!Cmp || !Cmp->isTrueWhenEqual() || !Cmp->isEquality())
    return false;

  ConstantInt *C = dyn_cast<ConstantInt>(Cmp->getOperand(1));
  if (!C)
    return false;

  // Match:  -1 == __kmpc_target_init (for non-SPMD kernels only!)
  if (C->isAllOnesValue()) {
    auto *CB = dyn_cast<CallBase>(Cmp->getOperand(0));
    CB = CB ? OpenMPOpt::getCallIfRegularCall(*CB, &RFI) : nullptr;
    if (!CB)
      return false;
    const int InitIsSPMDArgNo = 1;
    auto *IsSPMDModeCI =
        dyn_cast<ConstantInt>(CB->getOperand(InitIsSPMDArgNo));
    return IsSPMDModeCI && IsSPMDModeCI->isZero();
  }

  return false;
};

// Merge all the predecessor states into the current basic block. A basic
// block is executed by a single thread if all of its predecessors are.
auto MergePredecessorStates = [&](BasicBlock *BB) {
  if (pred_begin(BB) == pred_end(BB))
    return SingleThreadedBBs.contains(BB);

  bool IsInitialThread = true;
  for (auto PredBB = pred_begin(BB), PredEndBB = pred_end(BB);
       PredBB != PredEndBB; ++PredBB) {
    if (!IsInitialThreadOnly(dyn_cast<BranchInst>((*PredBB)->getTerminator()),
                             BB))
      IsInitialThread &= SingleThreadedBBs.contains(*PredBB);
  }

  return IsInitialThread;
};

for (auto *BB : RPOT) {
  if (!MergePredecessorStates(BB))
    SingleThreadedBBs.erase(BB);
}

return (NumSingleThreadedBBs == SingleThreadedBBs.size())
           ? ChangeStatus::UNCHANGED
           : ChangeStatus::CHANGED;
2569}

2571/// Try to replace memory allocation calls called by a single thread with a
2572/// static buffer of shared memory.
2573struct AAHeapToShared : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToShared(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Create an abstract attribute view for the position \p IRP.
static AAHeapToShared &createForPosition(const IRPosition &IRP,
                                         Attributor &A);

/// Returns true if HeapToShared conversion is assumed to be possible.
virtual bool isAssumedHeapToShared(CallBase &CB) const = 0;

/// Returns true if HeapToShared conversion is assumed and the CB is a
/// callsite to a free operation to be removed.
virtual bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const = 0;

/// See AbstractAttribute::getName().
const std::string getName() const override { return "AAHeapToShared"; }

/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAHeapToShared.
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2602};

2604struct AAHeapToSharedFunction : public AAHeapToShared {
AAHeapToSharedFunction(const IRPosition &IRP, Attributor &A)
    : AAHeapToShared(IRP, A) {}

const std::string getAsStr() const override {
  return "[AAHeapToShared] " + std::to_string(MallocCalls.size()) +
         " malloc calls eligible.";
}

/// See AbstractAttribute::trackStatistics().
void trackStatistics() const override {}

/// This functions finds free calls that will be removed by the
/// HeapToShared transformation.
void findPotentialRemovedFreeCalls(Attributor &A) {
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  auto &FreeRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];

  PotentialRemovedFreeCalls.clear();
  // Update free call users of found malloc calls.
  for (CallBase *CB : MallocCalls) {
    SmallVector<CallBase *, 4> FreeCalls;
    for (auto *U : CB->users()) {
      CallBase *C = dyn_cast<CallBase>(U);
      if (C && C->getCalledFunction() == FreeRFI.Declaration)
        FreeCalls.push_back(C);
    }

    if (FreeCalls.size() != 1)
      continue;

    PotentialRemovedFreeCalls.insert(FreeCalls.front());
  }
}

void initialize(Attributor &A) override {
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];

  for (User *U : RFI.Declaration->users())
    if (CallBase *CB = dyn_cast<CallBase>(U))
      MallocCalls.insert(CB);

  findPotentialRemovedFreeCalls(A);
}

bool isAssumedHeapToShared(CallBase &CB) const override {
  return isValidState() && MallocCalls.count(&CB);
}

bool isAssumedHeapToSharedRemovedFree(CallBase &CB) const override {
  return isValidState() && PotentialRemovedFreeCalls.count(&CB);
}

ChangeStatus manifest(Attributor &A) override {
  if (MallocCalls.empty())
    return ChangeStatus::UNCHANGED;

  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  auto &FreeCall = OMPInfoCache.RFIs[OMPRTL___kmpc_free_shared];

  Function *F = getAnchorScope();
  auto *HS = A.lookupAAFor<AAHeapToStack>(IRPosition::function(*F), this,
                                          DepClassTy::OPTIONAL);

  ChangeStatus Changed = ChangeStatus::UNCHANGED;
  for (CallBase *CB : MallocCalls) {
    // Skip replacing this if HeapToStack has already claimed it.
    if (HS && HS->isAssumedHeapToStack(*CB))
      continue;

    // Find the unique free call to remove it.
    SmallVector<CallBase *, 4> FreeCalls;
    for (auto *U : CB->users()) {
      CallBase *C = dyn_cast<CallBase>(U);
      if (C && C->getCalledFunction() == FreeCall.Declaration)
        FreeCalls.push_back(C);
    }
    if (FreeCalls.size() != 1)
      continue;

    ConstantInt *AllocSize = dyn_cast<ConstantInt>(CB->getArgOperand(0));

    LLVM_DEBUG(dbgs() << TAG << "Replace globalization call in "do { } while (false)
                      << CB->getCaller()->getName() << " with "do { } while (false)
                      << AllocSize->getZExtValue()do { } while (false)
                      << " bytes of shared memory\n")do { } while (false);

    // Create a new shared memory buffer of the same size as the allocation
    // and replace all the uses of the original allocation with it.
    Module *M = CB->getModule();
    Type *Int8Ty = Type::getInt8Ty(M->getContext());
    Type *Int8ArrTy = ArrayType::get(Int8Ty, AllocSize->getZExtValue());
    auto *SharedMem = new GlobalVariable(
        *M, Int8ArrTy, /* IsConstant */ false, GlobalValue::InternalLinkage,
        UndefValue::get(Int8ArrTy), CB->getName(), nullptr,
        GlobalValue::NotThreadLocal,
        static_cast<unsigned>(AddressSpace::Shared));
    auto *NewBuffer =
        ConstantExpr::getPointerCast(SharedMem, Int8Ty->getPointerTo());

    auto Remark = [&](OptimizationRemark OR) {
      return OR << "Replaced globalized variable with "
                << ore::NV("SharedMemory", AllocSize->getZExtValue())
                << ((AllocSize->getZExtValue() != 1) ? " bytes " : " byte ")
                << "of shared memory.";
    };
    A.emitRemark<OptimizationRemark>(CB, "OMP111", Remark);

    SharedMem->setAlignment(MaybeAlign(32));

    A.changeValueAfterManifest(*CB, *NewBuffer);
    A.deleteAfterManifest(*CB);
    A.deleteAfterManifest(*FreeCalls.front());

    NumBytesMovedToSharedMemory += AllocSize->getZExtValue();
    Changed = ChangeStatus::CHANGED;
  }

  return Changed;
}

ChangeStatus updateImpl(Attributor &A) override {
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  auto &RFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
  Function *F = getAnchorScope();

  auto NumMallocCalls = MallocCalls.size();

  // Only consider malloc calls executed by a single thread with a constant.
  for (User *U : RFI.Declaration->users()) {
    const auto &ED = A.getAAFor<AAExecutionDomain>(
        *this, IRPosition::function(*F), DepClassTy::REQUIRED);
    if (CallBase *CB = dyn_cast<CallBase>(U))
      if (!dyn_cast<ConstantInt>(CB->getArgOperand(0)) ||
          !ED.isExecutedByInitialThreadOnly(*CB))
        MallocCalls.erase(CB);
  }

  findPotentialRemovedFreeCalls(A);

  if (NumMallocCalls != MallocCalls.size())
    return ChangeStatus::CHANGED;

  return ChangeStatus::UNCHANGED;
}

/// Collection of all malloc calls in a function.
SmallPtrSet<CallBase *, 4> MallocCalls;
/// Collection of potentially removed free calls in a function.
SmallPtrSet<CallBase *, 4> PotentialRemovedFreeCalls;
2755};

2757struct AAKernelInfo : public StateWrapper<KernelInfoState, AbstractAttribute> {
using Base = StateWrapper<KernelInfoState, AbstractAttribute>;
AAKernelInfo(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Statistics are tracked as part of manifest for now.
void trackStatistics() const override {}

/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
  if (!isValidState())
    return "<invalid>";
  return std::string(SPMDCompatibilityTracker.isAssumed() ? "SPMD"
                                                          : "generic") +
         std::string(SPMDCompatibilityTracker.isAtFixpoint() ? " [FIX]"
                                                             : "") +
         std::string(" #PRs: ") +
         std::to_string(ReachedKnownParallelRegions.size()) +
         ", #Unknown PRs: " +
         std::to_string(ReachedUnknownParallelRegions.size());
}

/// Create an abstract attribute biew for the position \p IRP.
static AAKernelInfo &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAKernelInfo"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AAKernelInfo
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

static const char ID;
2793};

2795/// The function kernel info abstract attribute, basically, what can we say
2796/// about a function with regards to the KernelInfoState.
2797struct AAKernelInfoFunction : AAKernelInfo {
AAKernelInfoFunction(const IRPosition &IRP, Attributor &A)
    : AAKernelInfo(IRP, A) {}

/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
  // This is a high-level transform that might change the constant arguments
  // of the init and dinit calls. We need to tell the Attributor about this
  // to avoid other parts using the current constant value for simpliication.
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());

  Function *Fn = getAnchorScope();
  if (!OMPInfoCache.Kernels.count(Fn))
    return;

  // Add itself to the reaching kernel and set IsKernelEntry.
  ReachingKernelEntries.insert(Fn);
  IsKernelEntry = true;

  OMPInformationCache::RuntimeFunctionInfo &InitRFI =
      OMPInfoCache.RFIs[OMPRTL___kmpc_target_init];
  OMPInformationCache::RuntimeFunctionInfo &DeinitRFI =
      OMPInfoCache.RFIs[OMPRTL___kmpc_target_deinit];

  // For kernels we perform more initialization work, first we find the init
  // and deinit calls.
  auto StoreCallBase = [](Use &U,
                          OMPInformationCache::RuntimeFunctionInfo &RFI,
                          CallBase *&Storage) {
    CallBase *CB = OpenMPOpt::getCallIfRegularCall(U, &RFI);
    assert(CB &&((void)0)
           "Unexpected use of __kmpc_target_init or __kmpc_target_deinit!")((void)0);
    assert(!Storage &&((void)0)
           "Multiple uses of __kmpc_target_init or __kmpc_target_deinit!")((void)0);
    Storage = CB;
    return false;
  };
  InitRFI.foreachUse(
      [&](Use &U, Function &) {
        StoreCallBase(U, InitRFI, KernelInitCB);
        return false;
      },
      Fn);
  DeinitRFI.foreachUse(
      [&](Use &U, Function &) {
        StoreCallBase(U, DeinitRFI, KernelDeinitCB);
        return false;
      },
      Fn);

  assert((KernelInitCB && KernelDeinitCB) &&((void)0)
         "Kernel without __kmpc_target_init or __kmpc_target_deinit!")((void)0);

  // For kernels we might need to initialize/finalize the IsSPMD state and
  // we need to register a simplification callback so that the Attributor
  // knows the constant arguments to __kmpc_target_init and
  // __kmpc_target_deinit might actually change.

  Attributor::SimplifictionCallbackTy StateMachineSimplifyCB =
      [&](const IRPosition &IRP, const AbstractAttribute *AA,
          bool &UsedAssumedInformation) -> Optional<Value *> {
    // IRP represents the "use generic state machine" argument of an
    // __kmpc_target_init call. We will answer this one with the internal
    // state. As long as we are not in an invalid state, we will create a
    // custom state machine so the value should be a `i1 false`. If we are
    // in an invalid state, we won't change the value that is in the IR.
    if (!isValidState())
      return nullptr;
    if (AA)
      A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
    UsedAssumedInformation = !isAtFixpoint();
    auto *FalseVal =
        ConstantInt::getBool(IRP.getAnchorValue().getContext(), 0);
    return FalseVal;
  };

  Attributor::SimplifictionCallbackTy IsSPMDModeSimplifyCB =
      [&](const IRPosition &IRP, const AbstractAttribute *AA,
          bool &UsedAssumedInformation) -> Optional<Value *> {
    // IRP represents the "SPMDCompatibilityTracker" argument of an
    // __kmpc_target_init or
    // __kmpc_target_deinit call. We will answer this one with the internal
    // state.
    if (!SPMDCompatibilityTracker.isValidState())
      return nullptr;
    if (!SPMDCompatibilityTracker.isAtFixpoint()) {
      if (AA)
        A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
      UsedAssumedInformation = true;
    } else {
      UsedAssumedInformation = false;
    }
    auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
                                     SPMDCompatibilityTracker.isAssumed());
    return Val;
  };

  Attributor::SimplifictionCallbackTy IsGenericModeSimplifyCB =
      [&](const IRPosition &IRP, const AbstractAttribute *AA,
          bool &UsedAssumedInformation) -> Optional<Value *> {
    // IRP represents the "RequiresFullRuntime" argument of an
    // __kmpc_target_init or __kmpc_target_deinit call. We will answer this
    // one with the internal state of the SPMDCompatibilityTracker, so if
    // generic then true, if SPMD then false.
    if (!SPMDCompatibilityTracker.isValidState())
      return nullptr;
    if (!SPMDCompatibilityTracker.isAtFixpoint()) {
      if (AA)
        A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
      UsedAssumedInformation = true;
    } else {
      UsedAssumedInformation = false;
    }
    auto *Val = ConstantInt::getBool(IRP.getAnchorValue().getContext(),
                                     !SPMDCompatibilityTracker.isAssumed());
    return Val;
  };

  constexpr const int InitIsSPMDArgNo = 1;
  constexpr const int DeinitIsSPMDArgNo = 1;
  constexpr const int InitUseStateMachineArgNo = 2;
  constexpr const int InitRequiresFullRuntimeArgNo = 3;
  constexpr const int DeinitRequiresFullRuntimeArgNo = 2;
  A.registerSimplificationCallback(
      IRPosition::callsite_argument(*KernelInitCB, InitUseStateMachineArgNo),
      StateMachineSimplifyCB);
  A.registerSimplificationCallback(
      IRPosition::callsite_argument(*KernelInitCB, InitIsSPMDArgNo),
      IsSPMDModeSimplifyCB);
  A.registerSimplificationCallback(
      IRPosition::callsite_argument(*KernelDeinitCB, DeinitIsSPMDArgNo),
      IsSPMDModeSimplifyCB);
  A.registerSimplificationCallback(
      IRPosition::callsite_argument(*KernelInitCB,
                                    InitRequiresFullRuntimeArgNo),
      IsGenericModeSimplifyCB);
  A.registerSimplificationCallback(
      IRPosition::callsite_argument(*KernelDeinitCB,
                                    DeinitRequiresFullRuntimeArgNo),
      IsGenericModeSimplifyCB);

  // Check if we know we are in SPMD-mode already.
  ConstantInt *IsSPMDArg =
      dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));
  if (IsSPMDArg && !IsSPMDArg->isZero())
    SPMDCompatibilityTracker.indicateOptimisticFixpoint();
}

/// Modify the IR based on the KernelInfoState as the fixpoint iteration is
/// finished now.
ChangeStatus manifest(Attributor &A) override {
  // If we are not looking at a kernel with __kmpc_target_init and
  // __kmpc_target_deinit call we cannot actually manifest the information.
  if (!KernelInitCB || !KernelDeinitCB)
    return ChangeStatus::UNCHANGED;

  // Known SPMD-mode kernels need no manifest changes.
  if (SPMDCompatibilityTracker.isKnown())
    return ChangeStatus::UNCHANGED;

  // If we can we change the execution mode to SPMD-mode otherwise we build a
  // custom state machine.
  if (!changeToSPMDMode(A))
    buildCustomStateMachine(A);

  return ChangeStatus::CHANGED;
}

bool changeToSPMDMode(Attributor &A) {
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());

  if (!SPMDCompatibilityTracker.isAssumed()) {
    for (Instruction *NonCompatibleI : SPMDCompatibilityTracker) {
      if (!NonCompatibleI)
        continue;

      // Skip diagnostics on calls to known OpenMP runtime functions for now.
      if (auto *CB = dyn_cast<CallBase>(NonCompatibleI))
        if (OMPInfoCache.RTLFunctions.contains(CB->getCalledFunction()))
          continue;

      auto Remark = [&](OptimizationRemarkAnalysis ORA) {
        ORA << "Value has potential side effects preventing SPMD-mode "
               "execution";
        if (isa<CallBase>(NonCompatibleI)) {
          ORA << ". Add `__attribute__((assume(\"ompx_spmd_amenable\")))` to "
                 "the called function to override";
        }
        return ORA << ".";
      };
      A.emitRemark<OptimizationRemarkAnalysis>(NonCompatibleI, "OMP121",
                                               Remark);

      LLVM_DEBUG(dbgs() << TAG << "SPMD-incompatible side-effect: "do { } while (false)
                        << *NonCompatibleI << "\n")do { } while (false);
    }

    return false;
  }

  // Adjust the global exec mode flag that tells the runtime what mode this
  // kernel is executed in.
  Function *Kernel = getAnchorScope();
  GlobalVariable *ExecMode = Kernel->getParent()->getGlobalVariable(
      (Kernel->getName() + "_exec_mode").str());
  assert(ExecMode && "Kernel without exec mode?")((void)0);
  assert(ExecMode->getInitializer() &&((void)0)
         ExecMode->getInitializer()->isOneValue() &&((void)0)
         "Initially non-SPMD kernel has SPMD exec mode!")((void)0);

  // Set the global exec mode flag to indicate SPMD-Generic mode.
  constexpr int SPMDGeneric = 2;
  if (!ExecMode->getInitializer()->isZeroValue())
    ExecMode->setInitializer(
        ConstantInt::get(ExecMode->getInitializer()->getType(), SPMDGeneric));

  // Next rewrite the init and deinit calls to indicate we use SPMD-mode now.
  const int InitIsSPMDArgNo = 1;
  const int DeinitIsSPMDArgNo = 1;
  const int InitUseStateMachineArgNo = 2;
  const int InitRequiresFullRuntimeArgNo = 3;
  const int DeinitRequiresFullRuntimeArgNo = 2;

  auto &Ctx = getAnchorValue().getContext();
  A.changeUseAfterManifest(KernelInitCB->getArgOperandUse(InitIsSPMDArgNo),
                           *ConstantInt::getBool(Ctx, 1));
  A.changeUseAfterManifest(
      KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo),
      *ConstantInt::getBool(Ctx, 0));
  A.changeUseAfterManifest(
      KernelDeinitCB->getArgOperandUse(DeinitIsSPMDArgNo),
      *ConstantInt::getBool(Ctx, 1));
  A.changeUseAfterManifest(
      KernelInitCB->getArgOperandUse(InitRequiresFullRuntimeArgNo),
      *ConstantInt::getBool(Ctx, 0));
  A.changeUseAfterManifest(
      KernelDeinitCB->getArgOperandUse(DeinitRequiresFullRuntimeArgNo),
      *ConstantInt::getBool(Ctx, 0));

  ++NumOpenMPTargetRegionKernelsSPMD;

  auto Remark = [&](OptimizationRemark OR) {
    return OR << "Transformed generic-mode kernel to SPMD-mode.";
  };
  A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP120", Remark);
  return true;
};

ChangeStatus buildCustomStateMachine(Attributor &A) {
  assert(ReachedKnownParallelRegions.isValidState() &&((void)0)
         "Custom state machine with invalid parallel region states?")((void)0);

  const int InitIsSPMDArgNo = 1;
  const int InitUseStateMachineArgNo = 2;

  // Check if the current configuration is non-SPMD and generic state machine.
  // If we already have SPMD mode or a custom state machine we do not need to
  // go any further. If it is anything but a constant something is weird and
  // we give up.
  ConstantInt *UseStateMachine = dyn_cast<ConstantInt>(
      KernelInitCB->getArgOperand(InitUseStateMachineArgNo));
  ConstantInt *IsSPMD =
      dyn_cast<ConstantInt>(KernelInitCB->getArgOperand(InitIsSPMDArgNo));

  // If we are stuck with generic mode, try to create a custom device (=GPU)
  // state machine which is specialized for the parallel regions that are
  // reachable by the kernel.
  if (!UseStateMachine || UseStateMachine->isZero() || !IsSPMD ||
      !IsSPMD->isZero())
    return ChangeStatus::UNCHANGED;

  // If not SPMD mode, indicate we use a custom state machine now.
  auto &Ctx = getAnchorValue().getContext();
  auto *FalseVal = ConstantInt::getBool(Ctx, 0);
  A.changeUseAfterManifest(
      KernelInitCB->getArgOperandUse(InitUseStateMachineArgNo), *FalseVal);

  // If we don't actually need a state machine we are done here. This can
  // happen if there simply are no parallel regions. In the resulting kernel
  // all worker threads will simply exit right away, leaving the main thread
  // to do the work alone.
  if (ReachedKnownParallelRegions.empty() &&
      ReachedUnknownParallelRegions.empty()) {
    ++NumOpenMPTargetRegionKernelsWithoutStateMachine;

    auto Remark = [&](OptimizationRemark OR) {
      return OR << "Removing unused state machine from generic-mode kernel.";
    };
    A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP130", Remark);

    return ChangeStatus::CHANGED;
  }

  // Keep track in the statistics of our new shiny custom state machine.
  if (ReachedUnknownParallelRegions.empty()) {
    ++NumOpenMPTargetRegionKernelsCustomStateMachineWithoutFallback;

    auto Remark = [&](OptimizationRemark OR) {
      return OR << "Rewriting generic-mode kernel with a customized state "
                   "machine.";
    };
    A.emitRemark<OptimizationRemark>(KernelInitCB, "OMP131", Remark);
  } else {
    ++NumOpenMPTargetRegionKernelsCustomStateMachineWithFallback;

    auto Remark = [&](OptimizationRemarkAnalysis OR) {
      return OR << "Generic-mode kernel is executed with a customized state "
                   "machine that requires a fallback.";
    };
    A.emitRemark<OptimizationRemarkAnalysis>(KernelInitCB, "OMP132", Remark);

    // Tell the user why we ended up with a fallback.
    for (CallBase *UnknownParallelRegionCB : ReachedUnknownParallelRegions) {
      if (!UnknownParallelRegionCB)
        continue;
      auto Remark = [&](OptimizationRemarkAnalysis ORA) {
        return ORA << "Call may contain unknown parallel regions. Use "
                   << "`__attribute__((assume(\"omp_no_parallelism\")))` to "
                      "override.";
      };
      A.emitRemark<OptimizationRemarkAnalysis>(UnknownParallelRegionCB,
                                               "OMP133", Remark);
    }
  }

  // Create all the blocks:
  //
  //                       InitCB = __kmpc_target_init(...)
  //                       bool IsWorker = InitCB >= 0;
  //                       if (IsWorker) {
  // SMBeginBB:               __kmpc_barrier_simple_spmd(...);
  //                         void *WorkFn;
  //                         bool Active = __kmpc_kernel_parallel(&WorkFn);
  //                         if (!WorkFn) return;
  // SMIsActiveCheckBB:       if (Active) {
  // SMIfCascadeCurrentBB:      if      (WorkFn == <ParFn0>)
  //                              ParFn0(...);
  // SMIfCascadeCurrentBB:      else if (WorkFn == <ParFn1>)
  //                              ParFn1(...);
  //                            ...
  // SMIfCascadeCurrentBB:      else
  //                              ((WorkFnTy*)WorkFn)(...);
  // SMEndParallelBB:           __kmpc_kernel_end_parallel(...);
  //                          }
  // SMDoneBB:                __kmpc_barrier_simple_spmd(...);
  //                          goto SMBeginBB;
  //                       }
  // UserCodeEntryBB:      // user code
  //                       __kmpc_target_deinit(...)
  //
  Function *Kernel = getAssociatedFunction();
  assert(Kernel && "Expected an associated function!")((void)0);

  BasicBlock *InitBB = KernelInitCB->getParent();
  BasicBlock *UserCodeEntryBB = InitBB->splitBasicBlock(
      KernelInitCB->getNextNode(), "thread.user_code.check");
  BasicBlock *StateMachineBeginBB = BasicBlock::Create(
      Ctx, "worker_state_machine.begin", Kernel, UserCodeEntryBB);
  BasicBlock *StateMachineFinishedBB = BasicBlock::Create(
      Ctx, "worker_state_machine.finished", Kernel, UserCodeEntryBB);
  BasicBlock *StateMachineIsActiveCheckBB = BasicBlock::Create(
      Ctx, "worker_state_machine.is_active.check", Kernel, UserCodeEntryBB);
  BasicBlock *StateMachineIfCascadeCurrentBB =
      BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
                         Kernel, UserCodeEntryBB);
  BasicBlock *StateMachineEndParallelBB =
      BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.end",
                         Kernel, UserCodeEntryBB);
  BasicBlock *StateMachineDoneBarrierBB = BasicBlock::Create(
      Ctx, "worker_state_machine.done.barrier", Kernel, UserCodeEntryBB);
  A.registerManifestAddedBasicBlock(*InitBB);
  A.registerManifestAddedBasicBlock(*UserCodeEntryBB);
  A.registerManifestAddedBasicBlock(*StateMachineBeginBB);
  A.registerManifestAddedBasicBlock(*StateMachineFinishedBB);
  A.registerManifestAddedBasicBlock(*StateMachineIsActiveCheckBB);
  A.registerManifestAddedBasicBlock(*StateMachineIfCascadeCurrentBB);
  A.registerManifestAddedBasicBlock(*StateMachineEndParallelBB);
  A.registerManifestAddedBasicBlock(*StateMachineDoneBarrierBB);

  const DebugLoc &DLoc = KernelInitCB->getDebugLoc();
  ReturnInst::Create(Ctx, StateMachineFinishedBB)->setDebugLoc(DLoc);

  InitBB->getTerminator()->eraseFromParent();
  Instruction *IsWorker =
      ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_NE, KernelInitCB,
                       ConstantInt::get(KernelInitCB->getType(), -1),
                       "thread.is_worker", InitBB);
  IsWorker->setDebugLoc(DLoc);
  BranchInst::Create(StateMachineBeginBB, UserCodeEntryBB, IsWorker, InitBB);

  Module &M = *Kernel->getParent();

  // Create local storage for the work function pointer.
  const DataLayout &DL = M.getDataLayout();
  Type *VoidPtrTy = Type::getInt8PtrTy(Ctx);
  Instruction *WorkFnAI =
      new AllocaInst(VoidPtrTy, DL.getAllocaAddrSpace(), nullptr,
                     "worker.work_fn.addr", &Kernel->getEntryBlock().front());
  WorkFnAI->setDebugLoc(DLoc);

  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  OMPInfoCache.OMPBuilder.updateToLocation(
      OpenMPIRBuilder::LocationDescription(
          IRBuilder<>::InsertPoint(StateMachineBeginBB,
                                   StateMachineBeginBB->end()),
          DLoc));

  Value *Ident = KernelInitCB->getArgOperand(0);
  Value *GTid = KernelInitCB;

  FunctionCallee BarrierFn =
      OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
          M, OMPRTL___kmpc_barrier_simple_spmd);
  CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineBeginBB)
      ->setDebugLoc(DLoc);

  if (WorkFnAI->getType()->getPointerAddressSpace() !=
      (unsigned int)AddressSpace::Generic) {
    WorkFnAI = new AddrSpaceCastInst(
        WorkFnAI,
        PointerType::getWithSamePointeeType(
            cast<PointerType>(WorkFnAI->getType()),
            (unsigned int)AddressSpace::Generic),
        WorkFnAI->getName() + ".generic", StateMachineBeginBB);
    WorkFnAI->setDebugLoc(DLoc);
  }

  FunctionCallee KernelParallelFn =
      OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
          M, OMPRTL___kmpc_kernel_parallel);
  Instruction *IsActiveWorker = CallInst::Create(
      KernelParallelFn, {WorkFnAI}, "worker.is_active", StateMachineBeginBB);
  IsActiveWorker->setDebugLoc(DLoc);
  Instruction *WorkFn = new LoadInst(VoidPtrTy, WorkFnAI, "worker.work_fn",
                                     StateMachineBeginBB);
  WorkFn->setDebugLoc(DLoc);

  FunctionType *ParallelRegionFnTy = FunctionType::get(
      Type::getVoidTy(Ctx), {Type::getInt16Ty(Ctx), Type::getInt32Ty(Ctx)},
      false);
  Value *WorkFnCast = BitCastInst::CreatePointerBitCastOrAddrSpaceCast(
      WorkFn, ParallelRegionFnTy->getPointerTo(), "worker.work_fn.addr_cast",
      StateMachineBeginBB);

  Instruction *IsDone =
      ICmpInst::Create(ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFn,
                       Constant::getNullValue(VoidPtrTy), "worker.is_done",
                       StateMachineBeginBB);
  IsDone->setDebugLoc(DLoc);
  BranchInst::Create(StateMachineFinishedBB, StateMachineIsActiveCheckBB,
                     IsDone, StateMachineBeginBB)
      ->setDebugLoc(DLoc);

  BranchInst::Create(StateMachineIfCascadeCurrentBB,
                     StateMachineDoneBarrierBB, IsActiveWorker,
                     StateMachineIsActiveCheckBB)
      ->setDebugLoc(DLoc);

  Value *ZeroArg =
      Constant::getNullValue(ParallelRegionFnTy->getParamType(0));

  // Now that we have most of the CFG skeleton it is time for the if-cascade
  // that checks the function pointer we got from the runtime against the
  // parallel regions we expect, if there are any.
  for (int i = 0, e = ReachedKnownParallelRegions.size(); i < e; ++i) {
    auto *ParallelRegion = ReachedKnownParallelRegions[i];
    BasicBlock *PRExecuteBB = BasicBlock::Create(
        Ctx, "worker_state_machine.parallel_region.execute", Kernel,
        StateMachineEndParallelBB);
    CallInst::Create(ParallelRegion, {ZeroArg, GTid}, "", PRExecuteBB)
        ->setDebugLoc(DLoc);
    BranchInst::Create(StateMachineEndParallelBB, PRExecuteBB)
        ->setDebugLoc(DLoc);

    BasicBlock *PRNextBB =
        BasicBlock::Create(Ctx, "worker_state_machine.parallel_region.check",
                           Kernel, StateMachineEndParallelBB);

    // Check if we need to compare the pointer at all or if we can just
    // call the parallel region function.
    Value *IsPR;
    if (i + 1 < e || !ReachedUnknownParallelRegions.empty()) {
      Instruction *CmpI = ICmpInst::Create(
          ICmpInst::ICmp, llvm::CmpInst::ICMP_EQ, WorkFnCast, ParallelRegion,
          "worker.check_parallel_region", StateMachineIfCascadeCurrentBB);
      CmpI->setDebugLoc(DLoc);
      IsPR = CmpI;
    } else {
      IsPR = ConstantInt::getTrue(Ctx);
    }

    BranchInst::Create(PRExecuteBB, PRNextBB, IsPR,
                       StateMachineIfCascadeCurrentBB)
        ->setDebugLoc(DLoc);
    StateMachineIfCascadeCurrentBB = PRNextBB;
  }

  // At the end of the if-cascade we place the indirect function pointer call
  // in case we might need it, that is if there can be parallel regions we
  // have not handled in the if-cascade above.
  if (!ReachedUnknownParallelRegions.empty()) {
    StateMachineIfCascadeCurrentBB->setName(
        "worker_state_machine.parallel_region.fallback.execute");
    CallInst::Create(ParallelRegionFnTy, WorkFnCast, {ZeroArg, GTid}, "",
                     StateMachineIfCascadeCurrentBB)
        ->setDebugLoc(DLoc);
  }
  BranchInst::Create(StateMachineEndParallelBB,
                     StateMachineIfCascadeCurrentBB)
      ->setDebugLoc(DLoc);

  CallInst::Create(OMPInfoCache.OMPBuilder.getOrCreateRuntimeFunction(
                       M, OMPRTL___kmpc_kernel_end_parallel),
                   {}, "", StateMachineEndParallelBB)
      ->setDebugLoc(DLoc);
  BranchInst::Create(StateMachineDoneBarrierBB, StateMachineEndParallelBB)
      ->setDebugLoc(DLoc);

  CallInst::Create(BarrierFn, {Ident, GTid}, "", StateMachineDoneBarrierBB)
      ->setDebugLoc(DLoc);
  BranchInst::Create(StateMachineBeginBB, StateMachineDoneBarrierBB)
      ->setDebugLoc(DLoc);

  return ChangeStatus::CHANGED;
}

/// Fixpoint iteration update function. Will be called every time a dependence
/// changed its state (and in the beginning).
ChangeStatus updateImpl(Attributor &A) override {
  KernelInfoState StateBefore = getState();

  // Callback to check a read/write instruction.
  auto CheckRWInst = [&](Instruction &I) {
    // We handle calls later.
    if (isa<CallBase>(I))
      return true;
    // We only care about write effects.
    if (!I.mayWriteToMemory())
      return true;
    if (auto *SI = dyn_cast<StoreInst>(&I)) {
      SmallVector<const Value *> Objects;
      getUnderlyingObjects(SI->getPointerOperand(), Objects);
      if (llvm::all_of(Objects,
                       [](const Value *Obj) { return isa<AllocaInst>(Obj); }))
        return true;
    }
    // For now we give up on everything but stores.
    SPMDCompatibilityTracker.insert(&I);
    return true;
  };

  bool UsedAssumedInformationInCheckRWInst = false;
  if (!SPMDCompatibilityTracker.isAtFixpoint())
    if (!A.checkForAllReadWriteInstructions(
            CheckRWInst, *this, UsedAssumedInformationInCheckRWInst))
      SPMDCompatibilityTracker.indicatePessimisticFixpoint();

  if (!IsKernelEntry) {
    updateReachingKernelEntries(A);
    updateParallelLevels(A);
  }

  // Callback to check a call instruction.
  bool AllSPMDStatesWereFixed = true;
  auto CheckCallInst = [&](Instruction &I) {
    auto &CB = cast<CallBase>(I);
    auto &CBAA = A.getAAFor<AAKernelInfo>(
        *this, IRPosition::callsite_function(CB), DepClassTy::OPTIONAL);
    getState() ^= CBAA.getState();
    AllSPMDStatesWereFixed &= CBAA.SPMDCompatibilityTracker.isAtFixpoint();
    return true;
  };

  bool UsedAssumedInformationInCheckCallInst = false;
  if (!A.checkForAllCallLikeInstructions(
          CheckCallInst, *this, UsedAssumedInformationInCheckCallInst))
    return indicatePessimisticFixpoint();

  // If we haven't used any assumed information for the SPMD state we can fix
  // it.
  if (!UsedAssumedInformationInCheckRWInst &&
      !UsedAssumedInformationInCheckCallInst && AllSPMDStatesWereFixed)
    SPMDCompatibilityTracker.indicateOptimisticFixpoint();

  return StateBefore == getState() ? ChangeStatus::UNCHANGED
                                   : ChangeStatus::CHANGED;
}

3385private:
/// Update info regarding reaching kernels.
void updateReachingKernelEntries(Attributor &A) {
  auto PredCallSite = [&](AbstractCallSite ACS) {
    Function *Caller = ACS.getInstruction()->getFunction();

    assert(Caller && "Caller is nullptr")((void)0);

    auto &CAA = A.getOrCreateAAFor<AAKernelInfo>(
        IRPosition::function(*Caller), this, DepClassTy::REQUIRED);
    if (CAA.ReachingKernelEntries.isValidState()) {
1
Calling 'IntegerStateBase::isValidState'→
4
←
Returning from 'IntegerStateBase::isValidState'→
5
←
Taking true branch→
      ReachingKernelEntries ^= CAA.ReachingKernelEntries;
6
←
Called C++ object pointer is null
      return true;
    }

    // We lost track of the caller of the associated function, any kernel
    // could reach now.
    ReachingKernelEntries.indicatePessimisticFixpoint();

    return true;
  };

  bool AllCallSitesKnown;
  if (!A.checkForAllCallSites(PredCallSite, *this,
                              true /* RequireAllCallSites */,
                              AllCallSitesKnown))
    ReachingKernelEntries.indicatePessimisticFixpoint();
}

/// Update info regarding parallel levels.
void updateParallelLevels(Attributor &A) {
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  OMPInformationCache::RuntimeFunctionInfo &Parallel51RFI =
      OMPInfoCache.RFIs[OMPRTL___kmpc_parallel_51];

  auto PredCallSite = [&](AbstractCallSite ACS) {
    Function *Caller = ACS.getInstruction()->getFunction();

    assert(Caller && "Caller is nullptr")((void)0);

    auto &CAA =
        A.getOrCreateAAFor<AAKernelInfo>(IRPosition::function(*Caller));
    if (CAA.ParallelLevels.isValidState()) {
      // Any function that is called by `__kmpc_parallel_51` will not be
      // folded as the parallel level in the function is updated. In order to
      // get it right, all the analysis would depend on the implentation. That
      // said, if in the future any change to the implementation, the analysis
      // could be wrong. As a consequence, we are just conservative here.
      if (Caller == Parallel51RFI.Declaration) {
        ParallelLevels.indicatePessimisticFixpoint();
        return true;
      }

      ParallelLevels ^= CAA.ParallelLevels;

      return true;
    }

    // We lost track of the caller of the associated function, any kernel
    // could reach now.
    ParallelLevels.indicatePessimisticFixpoint();

    return true;
  };

  bool AllCallSitesKnown = true;
  if (!A.checkForAllCallSites(PredCallSite, *this,
                              true /* RequireAllCallSites */,
                              AllCallSitesKnown))
    ParallelLevels.indicatePessimisticFixpoint();
}
3456};

3458/// The call site kernel info abstract attribute, basically, what can we say
3459/// about a call site with regards to the KernelInfoState. For now this simply
3460/// forwards the information from the callee.
3461struct AAKernelInfoCallSite : AAKernelInfo {
AAKernelInfoCallSite(const IRPosition &IRP, Attributor &A)
    : AAKernelInfo(IRP, A) {}

/// See AbstractAttribute::initialize(...).
void initialize(Attributor &A) override {
  AAKernelInfo::initialize(A);

  CallBase &CB = cast<CallBase>(getAssociatedValue());
  Function *Callee = getAssociatedFunction();

  // Helper to lookup an assumption string.
  auto HasAssumption = [](Function *Fn, StringRef AssumptionStr) {
    return Fn && hasAssumption(*Fn, AssumptionStr);
  };

  // Check for SPMD-mode assumptions.
  if (HasAssumption(Callee, "ompx_spmd_amenable"))
    SPMDCompatibilityTracker.indicateOptimisticFixpoint();

  // First weed out calls we do not care about, that is readonly/readnone
  // calls, intrinsics, and "no_openmp" calls. Neither of these can reach a
  // parallel region or anything else we are looking for.
  if (!CB.mayWriteToMemory() || isa<IntrinsicInst>(CB)) {
    indicateOptimisticFixpoint();
    return;
  }

  // Next we check if we know the callee. If it is a known OpenMP function
  // we will handle them explicitly in the switch below. If it is not, we
  // will use an AAKernelInfo object on the callee to gather information and
  // merge that into the current state. The latter happens in the updateImpl.
  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
  if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
    // Unknown caller or declarations are not analyzable, we give up.
    if (!Callee || !A.isFunctionIPOAmendable(*Callee)) {

      // Unknown callees might contain parallel regions, except if they have
      // an appropriate assumption attached.
      if (!(HasAssumption(Callee, "omp_no_openmp") ||
            HasAssumption(Callee, "omp_no_parallelism")))
        ReachedUnknownParallelRegions.insert(&CB);

      // If SPMDCompatibilityTracker is not fixed, we need to give up on the
      // idea we can run something unknown in SPMD-mode.
      if (!SPMDCompatibilityTracker.isAtFixpoint())
        SPMDCompatibilityTracker.insert(&CB);

      // We have updated the state for this unknown call properly, there won't
      // be any change so we indicate a fixpoint.
      indicateOptimisticFixpoint();
    }
    // If the callee is known and can be used in IPO, we will update the state
    // based on the callee state in updateImpl.
    return;
  }

  const unsigned int WrapperFunctionArgNo = 6;
  RuntimeFunction RF = It->getSecond();
  switch (RF) {
  // All the functions we know are compatible with SPMD mode.
  case OMPRTL___kmpc_is_spmd_exec_mode:
  case OMPRTL___kmpc_for_static_fini:
  case OMPRTL___kmpc_global_thread_num:
  case OMPRTL___kmpc_get_hardware_num_threads_in_block:
  case OMPRTL___kmpc_get_hardware_num_blocks:
  case OMPRTL___kmpc_single:
  case OMPRTL___kmpc_end_single:
  case OMPRTL___kmpc_master:
  case OMPRTL___kmpc_end_master:
  case OMPRTL___kmpc_barrier:
    break;
  case OMPRTL___kmpc_for_static_init_4:
  case OMPRTL___kmpc_for_static_init_4u:
  case OMPRTL___kmpc_for_static_init_8:
  case OMPRTL___kmpc_for_static_init_8u: {
    // Check the schedule and allow static schedule in SPMD mode.
    unsigned ScheduleArgOpNo = 2;
    auto *ScheduleTypeCI =
        dyn_cast<ConstantInt>(CB.getArgOperand(ScheduleArgOpNo));
    unsigned ScheduleTypeVal =
        ScheduleTypeCI ? ScheduleTypeCI->getZExtValue() : 0;
    switch (OMPScheduleType(ScheduleTypeVal)) {
    case OMPScheduleType::Static:
    case OMPScheduleType::StaticChunked:
    case OMPScheduleType::Distribute:
    case OMPScheduleType::DistributeChunked:
      break;
    default:
      SPMDCompatibilityTracker.insert(&CB);
      break;
    };
  } break;
  case OMPRTL___kmpc_target_init:
    KernelInitCB = &CB;
    break;
  case OMPRTL___kmpc_target_deinit:
    KernelDeinitCB = &CB;
    break;
  case OMPRTL___kmpc_parallel_51:
    if (auto *ParallelRegion = dyn_cast<Function>(
            CB.getArgOperand(WrapperFunctionArgNo)->stripPointerCasts())) {
      ReachedKnownParallelRegions.insert(ParallelRegion);
      break;
    }
    // The condition above should usually get the parallel region function
    // pointer and record it. In the off chance it doesn't we assume the
    // worst.
    ReachedUnknownParallelRegions.insert(&CB);
    break;
  case OMPRTL___kmpc_omp_task:
    // We do not look into tasks right now, just give up.
    SPMDCompatibilityTracker.insert(&CB);
    ReachedUnknownParallelRegions.insert(&CB);
    break;
  case OMPRTL___kmpc_alloc_shared:
  case OMPRTL___kmpc_free_shared:
    // Return without setting a fixpoint, to be resolved in updateImpl.
    return;
  default:
    // Unknown OpenMP runtime calls cannot be executed in SPMD-mode,
    // generally.
    SPMDCompatibilityTracker.insert(&CB);
    break;
  }
  // All other OpenMP runtime calls will not reach parallel regions so they
  // can be safely ignored for now. Since it is a known OpenMP runtime call we
  // have now modeled all effects and there is no need for any update.
  indicateOptimisticFixpoint();
}

ChangeStatus updateImpl(Attributor &A) override {
  // TODO: Once we have call site specific value information we can provide
  //       call site specific liveness information and then it makes
  //       sense to specialize attributes for call sites arguments instead of
  //       redirecting requests to the callee argument.
  Function *F = getAssociatedFunction();

  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(F);

  // If F is not a runtime function, propagate the AAKernelInfo of the callee.
  if (It == OMPInfoCache.RuntimeFunctionIDMap.end()) {
    const IRPosition &FnPos = IRPosition::function(*F);
    auto &FnAA = A.getAAFor<AAKernelInfo>(*this, FnPos, DepClassTy::REQUIRED);
    if (getState() == FnAA.getState())
      return ChangeStatus::UNCHANGED;
    getState() = FnAA.getState();
    return ChangeStatus::CHANGED;
  }

  // F is a runtime function that allocates or frees memory, check
  // AAHeapToStack and AAHeapToShared.
  KernelInfoState StateBefore = getState();
  assert((It->getSecond() == OMPRTL___kmpc_alloc_shared ||((void)0)
          It->getSecond() == OMPRTL___kmpc_free_shared) &&((void)0)
         "Expected a __kmpc_alloc_shared or __kmpc_free_shared runtime call")((void)0);

  CallBase &CB = cast<CallBase>(getAssociatedValue());

  auto &HeapToStackAA = A.getAAFor<AAHeapToStack>(
      *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);
  auto &HeapToSharedAA = A.getAAFor<AAHeapToShared>(
      *this, IRPosition::function(*CB.getCaller()), DepClassTy::OPTIONAL);

  RuntimeFunction RF = It->getSecond();

  switch (RF) {
  // If neither HeapToStack nor HeapToShared assume the call is removed,
  // assume SPMD incompatibility.
  case OMPRTL___kmpc_alloc_shared:
    if (!HeapToStackAA.isAssumedHeapToStack(CB) &&
        !HeapToSharedAA.isAssumedHeapToShared(CB))
      SPMDCompatibilityTracker.insert(&CB);
    break;
  case OMPRTL___kmpc_free_shared:
    if (!HeapToStackAA.isAssumedHeapToStackRemovedFree(CB) &&
        !HeapToSharedAA.isAssumedHeapToSharedRemovedFree(CB))
      SPMDCompatibilityTracker.insert(&CB);
    break;
  default:
    SPMDCompatibilityTracker.insert(&CB);
  }

  return StateBefore == getState() ? ChangeStatus::UNCHANGED
                                   : ChangeStatus::CHANGED;
}
3649};

3651struct AAFoldRuntimeCall
  : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;

AAFoldRuntimeCall(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Statistics are tracked as part of manifest for now.
void trackStatistics() const override {}

/// Create an abstract attribute biew for the position \p IRP.
static AAFoldRuntimeCall &createForPosition(const IRPosition &IRP,
                                            Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAFoldRuntimeCall"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAFoldRuntimeCall
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

static const char ID;
3677};

3679struct AAFoldRuntimeCallCallSiteReturned : AAFoldRuntimeCall {
AAFoldRuntimeCallCallSiteReturned(const IRPosition &IRP, Attributor &A)
    : AAFoldRuntimeCall(IRP, A) {}

/// See AbstractAttribute::getAsStr()
const std::string getAsStr() const override {
  if (!isValidState())
    return "<invalid>";

  std::string Str("simplified value: ");

  if (!SimplifiedValue.hasValue())
    return Str + std::string("none");

  if (!SimplifiedValue.getValue())
    return Str + std::string("nullptr");

  if (ConstantInt *CI = dyn_cast<ConstantInt>(SimplifiedValue.getValue()))
    return Str + std::to_string(CI->getSExtValue());

  return Str + std::string("unknown");
}

void initialize(Attributor &A) override {
  Function *Callee = getAssociatedFunction();

  auto &OMPInfoCache = static_cast<OMPInformationCache &>(A.getInfoCache());
  const auto &It = OMPInfoCache.RuntimeFunctionIDMap.find(Callee);
  assert(It != OMPInfoCache.RuntimeFunctionIDMap.end() &&((void)0)
         "Expected a known OpenMP runtime function")((void)0);

  RFKind = It->getSecond();

  CallBase &CB = cast<CallBase>(getAssociatedValue());
  A.registerSimplificationCallback(
      IRPosition::callsite_returned(CB),
      [&](const IRPosition &IRP, const AbstractAttribute *AA,
          bool &UsedAssumedInformation) -> Optional<Value *> {
        assert((isValidState() || (SimplifiedValue.hasValue() &&((void)0)
                                   SimplifiedValue.getValue() == nullptr)) &&((void)0)
               "Unexpected invalid state!")((void)0);

        if (!isAtFixpoint()) {
          UsedAssumedInformation = true;
          if (AA)
            A.recordDependence(*this, *AA, DepClassTy::OPTIONAL);
        }
        return SimplifiedValue;
      });
}

ChangeStatus updateImpl(Attributor &A) override {
  ChangeStatus Changed = ChangeStatus::UNCHANGED;
  switch (RFKind) {
  case OMPRTL___kmpc_is_spmd_exec_mode:
    Changed |= foldIsSPMDExecMode(A);
    break;
  case OMPRTL___kmpc_is_generic_main_thread_id:
    Changed |= foldIsGenericMainThread(A);
    break;
  case OMPRTL___kmpc_parallel_level:
    Changed |= foldParallelLevel(A);
    break;
  case OMPRTL___kmpc_get_hardware_num_threads_in_block:
    Changed = Changed | foldKernelFnAttribute(A, "omp_target_thread_limit");
    break;
  case OMPRTL___kmpc_get_hardware_num_blocks:
    Changed = Changed | foldKernelFnAttribute(A, "omp_target_num_teams");
    break;
  default:
    llvm_unreachable("Unhandled OpenMP runtime function!")__builtin_unreachable();
  }

  return Changed;
}

ChangeStatus manifest(Attributor &A) override {
  ChangeStatus Changed = ChangeStatus::UNCHANGED;

  if (SimplifiedValue.hasValue() && SimplifiedValue.getValue()) {
    Instruction &CB = *getCtxI();
    A.changeValueAfterManifest(CB, **SimplifiedValue);
    A.deleteAfterManifest(CB);

    LLVM_DEBUG(dbgs() << TAG << "Folding runtime call: " << CB << " with "do { } while (false)
                      << **SimplifiedValue << "\n")do { } while (false);

    Changed = ChangeStatus::CHANGED;
  }

  return Changed;
}

ChangeStatus indicatePessimisticFixpoint() override {
  SimplifiedValue = nullptr;
  return AAFoldRuntimeCall::indicatePessimisticFixpoint();
}

3777private:
/// Fold __kmpc_is_spmd_exec_mode into a constant if possible.
ChangeStatus foldIsSPMDExecMode(Attributor &A) {
  Optional<Value *> SimplifiedValueBefore = SimplifiedValue;

  unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
  unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
  auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
      *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);

  if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
    return indicatePessimisticFixpoint();

  for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
    auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
                                        DepClassTy::REQUIRED);

    if (!AA.isValidState()) {
      SimplifiedValue = nullptr;
      return indicatePessimisticFixpoint();
    }

    if (AA.SPMDCompatibilityTracker.isAssumed()) {
      if (AA.SPMDCompatibilityTracker.isAtFixpoint())
        ++KnownSPMDCount;
      else
        ++AssumedSPMDCount;
    } else {
      if (AA.SPMDCompatibilityTracker.isAtFixpoint())
        ++KnownNonSPMDCount;
      else
        ++AssumedNonSPMDCount;
    }
  }

  if ((AssumedSPMDCount + KnownSPMDCount) &&
      (AssumedNonSPMDCount + KnownNonSPMDCount))
    return indicatePessimisticFixpoint();

  auto &Ctx = getAnchorValue().getContext();
  if (KnownSPMDCount || AssumedSPMDCount) {
    assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&((void)0)
           "Expected only SPMD kernels!")((void)0);
    // All reaching kernels are in SPMD mode. Update all function calls to
    // __kmpc_is_spmd_exec_mode to 1.
    SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
  } else if (KnownNonSPMDCount || AssumedNonSPMDCount) {
    assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&((void)0)
           "Expected only non-SPMD kernels!")((void)0);
    // All reaching kernels are in non-SPMD mode. Update all function
    // calls to __kmpc_is_spmd_exec_mode to 0.
    SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), false);
  } else {
    // We have empty reaching kernels, therefore we cannot tell if the
    // associated call site can be folded. At this moment, SimplifiedValue
    // must be none.
    assert(!SimplifiedValue.hasValue() && "SimplifiedValue should be none")((void)0);
  }

  return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
                                                  : ChangeStatus::CHANGED;
}

/// Fold __kmpc_is_generic_main_thread_id into a constant if possible.
ChangeStatus foldIsGenericMainThread(Attributor &A) {
  Optional<Value *> SimplifiedValueBefore = SimplifiedValue;

  CallBase &CB = cast<CallBase>(getAssociatedValue());
  Function *F = CB.getFunction();
  const auto &ExecutionDomainAA = A.getAAFor<AAExecutionDomain>(
      *this, IRPosition::function(*F), DepClassTy::REQUIRED);

  if (!ExecutionDomainAA.isValidState())
    return indicatePessimisticFixpoint();

  auto &Ctx = getAnchorValue().getContext();
  if (ExecutionDomainAA.isExecutedByInitialThreadOnly(CB))
    SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), true);
  else
    return indicatePessimisticFixpoint();

  return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
                                                  : ChangeStatus::CHANGED;
}

/// Fold __kmpc_parallel_level into a constant if possible.
ChangeStatus foldParallelLevel(Attributor &A) {
  Optional<Value *> SimplifiedValueBefore = SimplifiedValue;

  auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
      *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);

  if (!CallerKernelInfoAA.ParallelLevels.isValidState())
    return indicatePessimisticFixpoint();

  if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
    return indicatePessimisticFixpoint();

  if (CallerKernelInfoAA.ReachingKernelEntries.empty()) {
    assert(!SimplifiedValue.hasValue() &&((void)0)
           "SimplifiedValue should keep none at this point")((void)0);
    return ChangeStatus::UNCHANGED;
  }

  unsigned AssumedSPMDCount = 0, KnownSPMDCount = 0;
  unsigned AssumedNonSPMDCount = 0, KnownNonSPMDCount = 0;
  for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
    auto &AA = A.getAAFor<AAKernelInfo>(*this, IRPosition::function(*K),
                                        DepClassTy::REQUIRED);
    if (!AA.SPMDCompatibilityTracker.isValidState())
      return indicatePessimisticFixpoint();

    if (AA.SPMDCompatibilityTracker.isAssumed()) {
      if (AA.SPMDCompatibilityTracker.isAtFixpoint())
        ++KnownSPMDCount;
      else
        ++AssumedSPMDCount;
    } else {
      if (AA.SPMDCompatibilityTracker.isAtFixpoint())
        ++KnownNonSPMDCount;
      else
        ++AssumedNonSPMDCount;
    }
  }

  if ((AssumedSPMDCount + KnownSPMDCount) &&
      (AssumedNonSPMDCount + KnownNonSPMDCount))
    return indicatePessimisticFixpoint();

  auto &Ctx = getAnchorValue().getContext();
  // If the caller can only be reached by SPMD kernel entries, the parallel
  // level is 1. Similarly, if the caller can only be reached by non-SPMD
  // kernel entries, it is 0.
  if (AssumedSPMDCount || KnownSPMDCount) {
    assert(KnownNonSPMDCount == 0 && AssumedNonSPMDCount == 0 &&((void)0)
           "Expected only SPMD kernels!")((void)0);
    SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 1);
  } else {
    assert(KnownSPMDCount == 0 && AssumedSPMDCount == 0 &&((void)0)
           "Expected only non-SPMD kernels!")((void)0);
    SimplifiedValue = ConstantInt::get(Type::getInt8Ty(Ctx), 0);
  }
  return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
                                                  : ChangeStatus::CHANGED;
}

ChangeStatus foldKernelFnAttribute(Attributor &A, llvm::StringRef Attr) {
  // Specialize only if all the calls agree with the attribute constant value
  int32_t CurrentAttrValue = -1;
  Optional<Value *> SimplifiedValueBefore = SimplifiedValue;

  auto &CallerKernelInfoAA = A.getAAFor<AAKernelInfo>(
      *this, IRPosition::function(*getAnchorScope()), DepClassTy::REQUIRED);

  if (!CallerKernelInfoAA.ReachingKernelEntries.isValidState())
    return indicatePessimisticFixpoint();

  // Iterate over the kernels that reach this function
  for (Kernel K : CallerKernelInfoAA.ReachingKernelEntries) {
    int32_t NextAttrVal = -1;
    if (K->hasFnAttribute(Attr))
      NextAttrVal =
          std::stoi(K->getFnAttribute(Attr).getValueAsString().str());

    if (NextAttrVal == -1 ||
        (CurrentAttrValue != -1 && CurrentAttrValue != NextAttrVal))
      return indicatePessimisticFixpoint();
    CurrentAttrValue = NextAttrVal;
  }

  if (CurrentAttrValue != -1) {
    auto &Ctx = getAnchorValue().getContext();
    SimplifiedValue =
        ConstantInt::get(Type::getInt32Ty(Ctx), CurrentAttrValue);
  }
  return SimplifiedValue == SimplifiedValueBefore ? ChangeStatus::UNCHANGED
                                                  : ChangeStatus::CHANGED;
}

/// An optional value the associated value is assumed to fold to. That is, we
/// assume the associated value (which is a call) can be replaced by this
/// simplified value.
Optional<Value *> SimplifiedValue;

/// The runtime function kind of the callee of the associated call site.
RuntimeFunction RFKind;
3963};

3965} // namespace

3967/// Register folding callsite
3968void OpenMPOpt::registerFoldRuntimeCall(RuntimeFunction RF) {
auto &RFI = OMPInfoCache.RFIs[RF];
RFI.foreachUse(SCC, [&](Use &U, Function &F) {
  CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &RFI);
  if (!CI)
    return false;
  A.getOrCreateAAFor<AAFoldRuntimeCall>(
      IRPosition::callsite_returned(*CI), /* QueryingAA */ nullptr,
      DepClassTy::NONE, /* ForceUpdate */ false,
      /* UpdateAfterInit */ false);
  return false;
});
3980}

3982void OpenMPOpt::registerAAs(bool IsModulePass) {
if (SCC.empty())

  return;
if (IsModulePass) {
  // Ensure we create the AAKernelInfo AAs first and without triggering an
  // update. This will make sure we register all value simplification
  // callbacks before any other AA has the chance to create an AAValueSimplify
  // or similar.
  for (Function *Kernel : OMPInfoCache.Kernels)
    A.getOrCreateAAFor<AAKernelInfo>(
        IRPosition::function(*Kernel), /* QueryingAA */ nullptr,
        DepClassTy::NONE, /* ForceUpdate */ false,
        /* UpdateAfterInit */ false);


  registerFoldRuntimeCall(OMPRTL___kmpc_is_generic_main_thread_id);
  registerFoldRuntimeCall(OMPRTL___kmpc_is_spmd_exec_mode);
  registerFoldRuntimeCall(OMPRTL___kmpc_parallel_level);
  registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_threads_in_block);
  registerFoldRuntimeCall(OMPRTL___kmpc_get_hardware_num_blocks);
}

// Create CallSite AA for all Getters.
for (int Idx = 0; Idx < OMPInfoCache.ICVs.size() - 1; ++Idx) {
  auto ICVInfo = OMPInfoCache.ICVs[static_cast<InternalControlVar>(Idx)];

  auto &GetterRFI = OMPInfoCache.RFIs[ICVInfo.Getter];

  auto CreateAA = [&](Use &U, Function &Caller) {
    CallInst *CI = OpenMPOpt::getCallIfRegularCall(U, &GetterRFI);
    if (!CI)
      return false;

    auto &CB = cast<CallBase>(*CI);

    IRPosition CBPos = IRPosition::callsite_function(CB);
    A.getOrCreateAAFor<AAICVTracker>(CBPos);
    return false;
  };

  GetterRFI.foreachUse(SCC, CreateAA);
}
auto &GlobalizationRFI = OMPInfoCache.RFIs[OMPRTL___kmpc_alloc_shared];
auto CreateAA = [&](Use &U, Function &F) {
  A.getOrCreateAAFor<AAHeapToShared>(IRPosition::function(F));
  return false;
};
GlobalizationRFI.foreachUse(SCC, CreateAA);

// Create an ExecutionDomain AA for every function and a HeapToStack AA for
// every function if there is a device kernel.
if (!isOpenMPDevice(M))
  return;

for (auto *F : SCC) {
  if (F->isDeclaration())
    continue;

  A.getOrCreateAAFor<AAExecutionDomain>(IRPosition::function(*F));
  A.getOrCreateAAFor<AAHeapToStack>(IRPosition::function(*F));

  for (auto &I : instructions(*F)) {
    if (auto *LI = dyn_cast<LoadInst>(&I)) {
      bool UsedAssumedInformation = false;
      A.getAssumedSimplified(IRPosition::value(*LI), /* AA */ nullptr,
                             UsedAssumedInformation);
    }
  }
}
4052}

4054const char AAICVTracker::ID = 0;
4055const char AAKernelInfo::ID = 0;
4056const char AAExecutionDomain::ID = 0;
4057const char AAHeapToShared::ID = 0;
4058const char AAFoldRuntimeCall::ID = 0;

4060AAICVTracker &AAICVTracker::createForPosition(const IRPosition &IRP,
                                            Attributor &A) {
AAICVTracker *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
  llvm_unreachable("ICVTracker can only be created for function position!")__builtin_unreachable();
case IRPosition::IRP_RETURNED:
  AA = new (A.Allocator) AAICVTrackerFunctionReturned(IRP, A);
  break;
case IRPosition::IRP_CALL_SITE_RETURNED:
  AA = new (A.Allocator) AAICVTrackerCallSiteReturned(IRP, A);
  break;
case IRPosition::IRP_CALL_SITE:
  AA = new (A.Allocator) AAICVTrackerCallSite(IRP, A);
  break;
case IRPosition::IRP_FUNCTION:
  AA = new (A.Allocator) AAICVTrackerFunction(IRP, A);
  break;
}

return *AA;
4084}

4086AAExecutionDomain &AAExecutionDomain::createForPosition(const IRPosition &IRP,
                                                      Attributor &A) {
AAExecutionDomainFunction *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE:
  llvm_unreachable(__builtin_unreachable()
      "AAExecutionDomain can only be created for function position!")__builtin_unreachable();
case IRPosition::IRP_FUNCTION:
  AA = new (A.Allocator) AAExecutionDomainFunction(IRP, A);
  break;
}

return *AA;
4105}

4107AAHeapToShared &AAHeapToShared::createForPosition(const IRPosition &IRP,
                                                Attributor &A) {
AAHeapToSharedFunction *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE:
  llvm_unreachable(__builtin_unreachable()
      "AAHeapToShared can only be created for function position!")__builtin_unreachable();
case IRPosition::IRP_FUNCTION:
  AA = new (A.Allocator) AAHeapToSharedFunction(IRP, A);
  break;
}

return *AA;
4126}

4128AAKernelInfo &AAKernelInfo::createForPosition(const IRPosition &IRP,
                                            Attributor &A) {
AAKernelInfo *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_CALL_SITE_RETURNED:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
  llvm_unreachable("KernelInfo can only be created for function position!")__builtin_unreachable();
case IRPosition::IRP_CALL_SITE:
  AA = new (A.Allocator) AAKernelInfoCallSite(IRP, A);
  break;
case IRPosition::IRP_FUNCTION:
  AA = new (A.Allocator) AAKernelInfoFunction(IRP, A);
  break;
}

return *AA;
4148}

4150AAFoldRuntimeCall &AAFoldRuntimeCall::createForPosition(const IRPosition &IRP,
                                                      Attributor &A) {
AAFoldRuntimeCall *AA = nullptr;
switch (IRP.getPositionKind()) {
case IRPosition::IRP_INVALID:
case IRPosition::IRP_FLOAT:
case IRPosition::IRP_ARGUMENT:
case IRPosition::IRP_RETURNED:
case IRPosition::IRP_FUNCTION:
case IRPosition::IRP_CALL_SITE:
case IRPosition::IRP_CALL_SITE_ARGUMENT:
  llvm_unreachable("KernelInfo can only be created for call site position!")__builtin_unreachable();
case IRPosition::IRP_CALL_SITE_RETURNED:
  AA = new (A.Allocator) AAFoldRuntimeCallCallSiteReturned(IRP, A);
  break;
}

return *AA;
4168}

4170PreservedAnalyses OpenMPOptPass::run(Module &M, ModuleAnalysisManager &AM) {
if (!containsOpenMP(M))
  return PreservedAnalyses::all();
if (DisableOpenMPOptimizations)
  return PreservedAnalyses::all();

FunctionAnalysisManager &FAM =
    AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
KernelSet Kernels = getDeviceKernels(M);

auto IsCalled = [&](Function &F) {
  if (Kernels.contains(&F))
    return true;
  for (const User *U : F.users())
    if (!isa<BlockAddress>(U))
      return true;
  return false;
};

auto EmitRemark = [&](Function &F) {
  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
  ORE.emit([&]() {
    OptimizationRemarkAnalysis ORA(DEBUG_TYPE"openmp-opt", "OMP140", &F);
    return ORA << "Could not internalize function. "
               << "Some optimizations may not be possible. [OMP140]";
  });
};

// Create internal copies of each function if this is a kernel Module. This
// allows iterprocedural passes to see every call edge.
DenseMap<Function *, Function *> InternalizedMap;
if (isOpenMPDevice(M)) {
  SmallPtrSet<Function *, 16> InternalizeFns;
  for (Function &F : M)
    if (!F.isDeclaration() && !Kernels.contains(&F) && IsCalled(F) &&
        !DisableInternalization) {
      if (Attributor::isInternalizable(F)) {
        InternalizeFns.insert(&F);
      } else if (!F.hasLocalLinkage() && !F.hasFnAttribute(Attribute::Cold)) {
        EmitRemark(F);
      }
    }

  Attributor::internalizeFunctions(InternalizeFns, InternalizedMap);
}

// Look at every function in the Module unless it was internalized.
SmallVector<Function *, 16> SCC;
for (Function &F : M)
  if (!F.isDeclaration() && !InternalizedMap.lookup(&F))
    SCC.push_back(&F);

if (SCC.empty())
  return PreservedAnalyses::all();

AnalysisGetter AG(FAM);

auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
  return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};

BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;

SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(M, AG, Allocator, /*CGSCC*/ Functions, Kernels);

unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, true, false,
             MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");

OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(true);
if (Changed)
  return PreservedAnalyses::none();

return PreservedAnalyses::all();
4247}

4249PreservedAnalyses OpenMPOptCGSCCPass::run(LazyCallGraph::SCC &C,
                                        CGSCCAnalysisManager &AM,
                                        LazyCallGraph &CG,
                                        CGSCCUpdateResult &UR) {
if (!containsOpenMP(*C.begin()->getFunction().getParent()))
  return PreservedAnalyses::all();
if (DisableOpenMPOptimizations)
  return PreservedAnalyses::all();

SmallVector<Function *, 16> SCC;
// If there are kernels in the module, we have to run on all SCC's.
for (LazyCallGraph::Node &N : C) {
  Function *Fn = &N.getFunction();
  SCC.push_back(Fn);
}

if (SCC.empty())
  return PreservedAnalyses::all();

Module &M = *C.begin()->getFunction().getParent();

KernelSet Kernels = getDeviceKernels(M);

FunctionAnalysisManager &FAM =
    AM.getResult<FunctionAnalysisManagerCGSCCProxy>(C, CG).getManager();

AnalysisGetter AG(FAM);

auto OREGetter = [&FAM](Function *F) -> OptimizationRemarkEmitter & {
  return FAM.getResult<OptimizationRemarkEmitterAnalysis>(*F);
};

BumpPtrAllocator Allocator;
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);

SetVector<Function *> Functions(SCC.begin(), SCC.end());
OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
                              /*CGSCC*/ Functions, Kernels);

unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
             MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");

OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
bool Changed = OMPOpt.run(false);
if (Changed)
  return PreservedAnalyses::none();

return PreservedAnalyses::all();
4299}

4301namespace {

4303struct OpenMPOptCGSCCLegacyPass : public CallGraphSCCPass {
CallGraphUpdater CGUpdater;
static char ID;

OpenMPOptCGSCCLegacyPass() : CallGraphSCCPass(ID) {
  initializeOpenMPOptCGSCCLegacyPassPass(*PassRegistry::getPassRegistry());
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
  CallGraphSCCPass::getAnalysisUsage(AU);
}

bool runOnSCC(CallGraphSCC &CGSCC) override {
  if (!containsOpenMP(CGSCC.getCallGraph().getModule()))
    return false;
  if (DisableOpenMPOptimizations || skipSCC(CGSCC))
    return false;

  SmallVector<Function *, 16> SCC;
  // If there are kernels in the module, we have to run on all SCC's.
  for (CallGraphNode *CGN : CGSCC) {
    Function *Fn = CGN->getFunction();
    if (!Fn || Fn->isDeclaration())
      continue;
    SCC.push_back(Fn);
  }

  if (SCC.empty())
    return false;

  Module &M = CGSCC.getCallGraph().getModule();
  KernelSet Kernels = getDeviceKernels(M);

  CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
  CGUpdater.initialize(CG, CGSCC);

  // Maintain a map of functions to avoid rebuilding the ORE
  DenseMap<Function *, std::unique_ptr<OptimizationRemarkEmitter>> OREMap;
  auto OREGetter = [&OREMap](Function *F) -> OptimizationRemarkEmitter & {
    std::unique_ptr<OptimizationRemarkEmitter> &ORE = OREMap[F];
    if (!ORE)
      ORE = std::make_unique<OptimizationRemarkEmitter>(F);
    return *ORE;
  };

  AnalysisGetter AG;
  SetVector<Function *> Functions(SCC.begin(), SCC.end());
  BumpPtrAllocator Allocator;
  OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
                                Allocator,
                                /*CGSCC*/ Functions, Kernels);

  unsigned MaxFixpointIterations = (isOpenMPDevice(M)) ? 128 : 32;
  Attributor A(Functions, InfoCache, CGUpdater, nullptr, false, true,
               MaxFixpointIterations, OREGetter, DEBUG_TYPE"openmp-opt");

  OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
  return OMPOpt.run(false);
}

bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
4364};

4366} // end anonymous namespace

4368KernelSet llvm::omp::getDeviceKernels(Module &M) {
// TODO: Create a more cross-platform way of determining device kernels.
NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
KernelSet Kernels;

if (!MD)
  return Kernels;

for (auto *Op : MD->operands()) {
  if (Op->getNumOperands() < 2)
    continue;
  MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
  if (!KindID || KindID->getString() != "kernel")
    continue;

  Function *KernelFn =
      mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
  if (!KernelFn)
    continue;

  ++NumOpenMPTargetRegionKernels;

  Kernels.insert(KernelFn);
}

return Kernels;
4394}

4396bool llvm::omp::containsOpenMP(Module &M) {
Metadata *MD = M.getModuleFlag("openmp");
if (!MD)
  return false;

return true;
4402}

4404bool llvm::omp::isOpenMPDevice(Module &M) {
Metadata *MD = M.getModuleFlag("openmp-device");
if (!MD)
  return false;

return true;
4410}

4412char OpenMPOptCGSCCLegacyPass::ID = 0;

4414INITIALIZE_PASS_BEGIN(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry
 &Registry) {
                    "OpenMP specific optimizations", false, false)static void *initializeOpenMPOptCGSCCLegacyPassPassOnce(PassRegistry
 &Registry) {
4416INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)initializeCallGraphWrapperPassPass(Registry);
4417INITIALIZE_PASS_END(OpenMPOptCGSCCLegacyPass, "openmp-opt-cgscc",PassInfo *PI = new PassInfo( "OpenMP specific optimizations",
 "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag
; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry
 &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag
, initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry
)); }
                  "OpenMP specific optimizations", false, false)PassInfo *PI = new PassInfo( "OpenMP specific optimizations",
 "openmp-opt-cgscc", &OpenMPOptCGSCCLegacyPass::ID, PassInfo
::NormalCtor_t(callDefaultCtor<OpenMPOptCGSCCLegacyPass>
), false, false); Registry.registerPass(*PI, true); return PI
; } static llvm::once_flag InitializeOpenMPOptCGSCCLegacyPassPassFlag
; void llvm::initializeOpenMPOptCGSCCLegacyPassPass(PassRegistry
 &Registry) { llvm::call_once(InitializeOpenMPOptCGSCCLegacyPassPassFlag
, initializeOpenMPOptCGSCCLegacyPassPassOnce, std::ref(Registry
)); }

4420Pass *llvm::createOpenMPOptCGSCCLegacyPass() {
return new OpenMPOptCGSCCLegacyPass();
4422}

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO/Attributor.h

1//===- Attributor.h --- Module-wide attribute deduction ---------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Attributor: An inter procedural (abstract) "attribute" deduction framework.
10//
11// The Attributor framework is an inter procedural abstract analysis (fixpoint
12// iteration analysis). The goal is to allow easy deduction of new attributes as
13// well as information exchange between abstract attributes in-flight.
14//
15// The Attributor class is the driver and the link between the various abstract
16// attributes. The Attributor will iterate until a fixpoint state is reached by
17// all abstract attributes in-flight, or until it will enforce a pessimistic fix
18// point because an iteration limit is reached.
19//
20// Abstract attributes, derived from the AbstractAttribute class, actually
21// describe properties of the code. They can correspond to actual LLVM-IR
22// attributes, or they can be more general, ultimately unrelated to LLVM-IR
23// attributes. The latter is useful when an abstract attributes provides
24// information to other abstract attributes in-flight but we might not want to
25// manifest the information. The Attributor allows to query in-flight abstract
26// attributes through the `Attributor::getAAFor` method (see the method
27// description for an example). If the method is used by an abstract attribute
28// P, and it results in an abstract attribute Q, the Attributor will
29// automatically capture a potential dependence from Q to P. This dependence
30// will cause P to be reevaluated whenever Q changes in the future.
31//
32// The Attributor will only reevaluate abstract attributes that might have
33// changed since the last iteration. That means that the Attribute will not
34// revisit all instructions/blocks/functions in the module but only query
35// an update from a subset of the abstract attributes.
36//
37// The update method `AbstractAttribute::updateImpl` is implemented by the
38// specific "abstract attribute" subclasses. The method is invoked whenever the
39// currently assumed state (see the AbstractState class) might not be valid
40// anymore. This can, for example, happen if the state was dependent on another
41// abstract attribute that changed. In every invocation, the update method has
42// to adjust the internal state of an abstract attribute to a point that is
43// justifiable by the underlying IR and the current state of abstract attributes
44// in-flight. Since the IR is given and assumed to be valid, the information
45// derived from it can be assumed to hold. However, information derived from
46// other abstract attributes is conditional on various things. If the justifying
47// state changed, the `updateImpl` has to revisit the situation and potentially
48// find another justification or limit the optimistic assumes made.
49//
50// Change is the key in this framework. Until a state of no-change, thus a
51// fixpoint, is reached, the Attributor will query the abstract attributes
52// in-flight to re-evaluate their state. If the (current) state is too
53// optimistic, hence it cannot be justified anymore through other abstract
54// attributes or the state of the IR, the state of the abstract attribute will
55// have to change. Generally, we assume abstract attribute state to be a finite
56// height lattice and the update function to be monotone. However, these
57// conditions are not enforced because the iteration limit will guarantee
58// termination. If an optimistic fixpoint is reached, or a pessimistic fix
59// point is enforced after a timeout, the abstract attributes are tasked to
60// manifest their result in the IR for passes to come.
61//
62// Attribute manifestation is not mandatory. If desired, there is support to
63// generate a single or multiple LLVM-IR attributes already in the helper struct
64// IRAttribute. In the simplest case, a subclass inherits from IRAttribute with
65// a proper Attribute::AttrKind as template parameter. The Attributor
66// manifestation framework will then create and place a new attribute if it is
67// allowed to do so (based on the abstract state). Other use cases can be
68// achieved by overloading AbstractAttribute or IRAttribute methods.
69//
70//
71// The "mechanics" of adding a new "abstract attribute":
72// - Define a class (transitively) inheriting from AbstractAttribute and one
73//   (which could be the same) that (transitively) inherits from AbstractState.
74//   For the latter, consider the already available BooleanState and
75//   {Inc,Dec,Bit}IntegerState if they fit your needs, e.g., you require only a
76//   number tracking or bit-encoding.
77// - Implement all pure methods. Also use overloading if the attribute is not
78//   conforming with the "default" behavior: A (set of) LLVM-IR attribute(s) for
79//   an argument, call site argument, function return value, or function. See
80//   the class and method descriptions for more information on the two
81//   "Abstract" classes and their respective methods.
82// - Register opportunities for the new abstract attribute in the
83//   `Attributor::identifyDefaultAbstractAttributes` method if it should be
84//   counted as a 'default' attribute.
85// - Add sufficient tests.
86// - Add a Statistics object for bookkeeping. If it is a simple (set of)
87//   attribute(s) manifested through the Attributor manifestation framework, see
88//   the bookkeeping function in Attributor.cpp.
89// - If instructions with a certain opcode are interesting to the attribute, add
90//   that opcode to the switch in `Attributor::identifyAbstractAttributes`. This
91//   will make it possible to query all those instructions through the
92//   `InformationCache::getOpcodeInstMapForFunction` interface and eliminate the
93//   need to traverse the IR repeatedly.
94//
95//===----------------------------------------------------------------------===//

97#ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
98#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H

100#include "llvm/ADT/DenseSet.h"
101#include "llvm/ADT/GraphTraits.h"
102#include "llvm/ADT/MapVector.h"
103#include "llvm/ADT/STLExtras.h"
104#include "llvm/ADT/SetVector.h"
105#include "llvm/ADT/Triple.h"
106#include "llvm/ADT/iterator.h"
107#include "llvm/Analysis/AssumeBundleQueries.h"
108#include "llvm/Analysis/CFG.h"
109#include "llvm/Analysis/CGSCCPassManager.h"
110#include "llvm/Analysis/LazyCallGraph.h"
111#include "llvm/Analysis/LoopInfo.h"
112#include "llvm/Analysis/MustExecute.h"
113#include "llvm/Analysis/OptimizationRemarkEmitter.h"
114#include "llvm/Analysis/PostDominators.h"
115#include "llvm/Analysis/TargetLibraryInfo.h"
116#include "llvm/IR/AbstractCallSite.h"
117#include "llvm/IR/ConstantRange.h"
118#include "llvm/IR/PassManager.h"
119#include "llvm/Support/Allocator.h"
120#include "llvm/Support/Casting.h"
121#include "llvm/Support/GraphWriter.h"
122#include "llvm/Support/TimeProfiler.h"
123#include "llvm/Transforms/Utils/CallGraphUpdater.h"

125namespace llvm {

127struct AADepGraphNode;
128struct AADepGraph;
129struct Attributor;
130struct AbstractAttribute;
131struct InformationCache;
132struct AAIsDead;
133struct AttributorCallGraph;

135class AAManager;
136class AAResults;
137class Function;

139/// Abstract Attribute helper functions.
140namespace AA {

142/// Return true if \p V is dynamically unique, that is, there are no two
143/// "instances" of \p V at runtime with different values.
144bool isDynamicallyUnique(Attributor &A, const AbstractAttribute &QueryingAA,
                       const Value &V);

147/// Return true if \p V is a valid value in \p Scope, that is a constant or an
148/// instruction/argument of \p Scope.
149bool isValidInScope(const Value &V, const Function *Scope);

151/// Return true if \p V is a valid value at position \p CtxI, that is a
152/// constant, an argument of the same function as \p CtxI, or an instruction in
153/// that function that dominates \p CtxI.
154bool isValidAtPosition(const Value &V, const Instruction &CtxI,
                     InformationCache &InfoCache);

157/// Try to convert \p V to type \p Ty without introducing new instructions. If
158/// this is not possible return `nullptr`. Note: this function basically knows
159/// how to cast various constants.
160Value *getWithType(Value &V, Type &Ty);

162/// Return the combination of \p A and \p B such that the result is a possible
163/// value of both. \p B is potentially casted to match the type \p Ty or the
164/// type of \p A if \p Ty is null.
165///
166/// Examples:
167///        X + none  => X
168/// not_none + undef => not_none
169///          V1 + V2 => nullptr
170Optional<Value *>
171combineOptionalValuesInAAValueLatice(const Optional<Value *> &A,
                                   const Optional<Value *> &B, Type *Ty);

174/// Return the initial value of \p Obj with type \p Ty if that is a constant.
175Constant *getInitialValueForObj(Value &Obj, Type &Ty);

177/// Collect all potential underlying objects of \p Ptr at position \p CtxI in
178/// \p Objects. Assumed information is used and dependences onto \p QueryingAA
179/// are added appropriately.
180///
181/// \returns True if \p Objects contains all assumed underlying objects, and
182///          false if something went wrong and the objects could not be
183///          determined.
184bool getAssumedUnderlyingObjects(Attributor &A, const Value &Ptr,
                               SmallVectorImpl<Value *> &Objects,
                               const AbstractAttribute &QueryingAA,
                               const Instruction *CtxI);

189/// Collect all potential values of the one stored by \p SI into
190/// \p PotentialCopies. That is, the only copies that were made via the
191/// store are assumed to be known and all in \p PotentialCopies. Dependences
192/// onto \p QueryingAA are properly tracked, \p UsedAssumedInformation will
193/// inform the caller if assumed information was used.
194///
195/// \returns True if the assumed potential copies are all in \p PotentialCopies,
196///          false if something went wrong and the copies could not be
197///          determined.
198bool getPotentialCopiesOfStoredValue(
  Attributor &A, StoreInst &SI, SmallSetVector<Value *, 4> &PotentialCopies,
  const AbstractAttribute &QueryingAA, bool &UsedAssumedInformation);

202} // namespace AA

204/// The value passed to the line option that defines the maximal initialization
205/// chain length.
206extern unsigned MaxInitializationChainLength;

208///{
209enum class ChangeStatus {
CHANGED,
UNCHANGED,
212};

214ChangeStatus operator|(ChangeStatus l, ChangeStatus r);
215ChangeStatus &operator|=(ChangeStatus &l, ChangeStatus r);
216ChangeStatus operator&(ChangeStatus l, ChangeStatus r);
217ChangeStatus &operator&=(ChangeStatus &l, ChangeStatus r);

219enum class DepClassTy {
REQUIRED, ///< The target cannot be valid if the source is not.
OPTIONAL, ///< The target may be valid if the source is not.
NONE,     ///< Do not track a dependence between source and target.
223};
224///}

226/// The data structure for the nodes of a dependency graph
227struct AADepGraphNode {
228public:
virtual ~AADepGraphNode(){};
using DepTy = PointerIntPair<AADepGraphNode *, 1>;

232protected:
/// Set of dependency graph nodes which should be updated if this one
/// is updated. The bit encodes if it is optional.
TinyPtrVector<DepTy> Deps;

static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
static AbstractAttribute *DepGetValAA(DepTy &DT) {
  return cast<AbstractAttribute>(DT.getPointer());
}

operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }

244public:
using iterator =
    mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
using aaiterator =
    mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;

aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
iterator child_end() { return iterator(Deps.end(), &DepGetVal); }

virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
TinyPtrVector<DepTy> &getDeps() { return Deps; }

friend struct Attributor;
friend struct AADepGraph;
260};

262/// The data structure for the dependency graph
263///
264/// Note that in this graph if there is an edge from A to B (A -> B),
265/// then it means that B depends on A, and when the state of A is
266/// updated, node B should also be updated
267struct AADepGraph {
AADepGraph() {}
~AADepGraph() {}

using DepTy = AADepGraphNode::DepTy;
static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
using iterator =
    mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;

/// There is no root node for the dependency graph. But the SCCIterator
/// requires a single entry point, so we maintain a fake("synthetic") root
/// node that depends on every node.
AADepGraphNode SyntheticRoot;
AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }

iterator begin() { return SyntheticRoot.child_begin(); }
iterator end() { return SyntheticRoot.child_end(); }

void viewGraph();

/// Dump graph to file
void dumpGraph();

/// Print dependency graph
void print();
292};

294/// Helper to describe and deal with positions in the LLVM-IR.
295///
296/// A position in the IR is described by an anchor value and an "offset" that
297/// could be the argument number, for call sites and arguments, or an indicator
298/// of the "position kind". The kinds, specified in the Kind enum below, include
299/// the locations in the attribute list, i.a., function scope and return value,
300/// as well as a distinction between call sites and functions. Finally, there
301/// are floating values that do not have a corresponding attribute list
302/// position.
303struct IRPosition {
// NOTE: In the future this definition can be changed to support recursive
// functions.
using CallBaseContext = CallBase;

/// The positions we distinguish in the IR.
enum Kind : char {
  IRP_INVALID,  ///< An invalid position.
  IRP_FLOAT,    ///< A position that is not associated with a spot suitable
                ///< for attributes. This could be any value or instruction.
  IRP_RETURNED, ///< An attribute for the function return value.
  IRP_CALL_SITE_RETURNED, ///< An attribute for a call site return value.
  IRP_FUNCTION,           ///< An attribute for a function (scope).
  IRP_CALL_SITE,          ///< An attribute for a call site (function scope).
  IRP_ARGUMENT,           ///< An attribute for a function argument.
  IRP_CALL_SITE_ARGUMENT, ///< An attribute for a call site argument.
};

/// Default constructor available to create invalid positions implicitly. All
/// other positions need to be created explicitly through the appropriate
/// static member function.
IRPosition() : Enc(nullptr, ENC_VALUE) { verify(); }

/// Create a position describing the value of \p V.
static const IRPosition value(const Value &V,
                              const CallBaseContext *CBContext = nullptr) {
  if (auto *Arg = dyn_cast<Argument>(&V))
    return IRPosition::argument(*Arg, CBContext);
  if (auto *CB = dyn_cast<CallBase>(&V))
    return IRPosition::callsite_returned(*CB);
  return IRPosition(const_cast<Value &>(V), IRP_FLOAT, CBContext);
}

/// Create a position describing the function scope of \p F.
/// \p CBContext is used for call base specific analysis.
static const IRPosition function(const Function &F,
                                 const CallBaseContext *CBContext = nullptr) {
  return IRPosition(const_cast<Function &>(F), IRP_FUNCTION, CBContext);
}

/// Create a position describing the returned value of \p F.
/// \p CBContext is used for call base specific analysis.
static const IRPosition returned(const Function &F,
                                 const CallBaseContext *CBContext = nullptr) {
  return IRPosition(const_cast<Function &>(F), IRP_RETURNED, CBContext);
}

/// Create a position describing the argument \p Arg.
/// \p CBContext is used for call base specific analysis.
static const IRPosition argument(const Argument &Arg,
                                 const CallBaseContext *CBContext = nullptr) {
  return IRPosition(const_cast<Argument &>(Arg), IRP_ARGUMENT, CBContext);
}

/// Create a position describing the function scope of \p CB.
static const IRPosition callsite_function(const CallBase &CB) {
  return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE);
}

/// Create a position describing the returned value of \p CB.
static const IRPosition callsite_returned(const CallBase &CB) {
  return IRPosition(const_cast<CallBase &>(CB), IRP_CALL_SITE_RETURNED);
}

/// Create a position describing the argument of \p CB at position \p ArgNo.
static const IRPosition callsite_argument(const CallBase &CB,
                                          unsigned ArgNo) {
  return IRPosition(const_cast<Use &>(CB.getArgOperandUse(ArgNo)),
                    IRP_CALL_SITE_ARGUMENT);
}

/// Create a position describing the argument of \p ACS at position \p ArgNo.
static const IRPosition callsite_argument(AbstractCallSite ACS,
                                          unsigned ArgNo) {
  if (ACS.getNumArgOperands() <= ArgNo)
    return IRPosition();
  int CSArgNo = ACS.getCallArgOperandNo(ArgNo);
  if (CSArgNo >= 0)
    return IRPosition::callsite_argument(
        cast<CallBase>(*ACS.getInstruction()), CSArgNo);
  return IRPosition();
}

/// Create a position with function scope matching the "context" of \p IRP.
/// If \p IRP is a call site (see isAnyCallSitePosition()) then the result
/// will be a call site position, otherwise the function position of the
/// associated function.
static const IRPosition
function_scope(const IRPosition &IRP,
               const CallBaseContext *CBContext = nullptr) {
  if (IRP.isAnyCallSitePosition()) {
    return IRPosition::callsite_function(
        cast<CallBase>(IRP.getAnchorValue()));
  }
  assert(IRP.getAssociatedFunction())((void)0);
  return IRPosition::function(*IRP.getAssociatedFunction(), CBContext);
}

bool operator==(const IRPosition &RHS) const {
  return Enc == RHS.Enc && RHS.CBContext == CBContext;
}
bool operator!=(const IRPosition &RHS) const { return !(*this == RHS); }

/// Return the value this abstract attribute is anchored with.
///
/// The anchor value might not be the associated value if the latter is not
/// sufficient to determine where arguments will be manifested. This is, so
/// far, only the case for call site arguments as the value is not sufficient
/// to pinpoint them. Instead, we can use the call site as an anchor.
Value &getAnchorValue() const {
  switch (getEncodingBits()) {
  case ENC_VALUE:
  case ENC_RETURNED_VALUE:
  case ENC_FLOATING_FUNCTION:
    return *getAsValuePtr();
  case ENC_CALL_SITE_ARGUMENT_USE:
    return *(getAsUsePtr()->getUser());
  default:
    llvm_unreachable("Unkown encoding!")__builtin_unreachable();
  };
}

/// Return the associated function, if any.
Function *getAssociatedFunction() const {
  if (auto *CB = dyn_cast<CallBase>(&getAnchorValue())) {
    // We reuse the logic that associates callback calles to arguments of a
    // call site here to identify the callback callee as the associated
    // function.
    if (Argument *Arg = getAssociatedArgument())
      return Arg->getParent();
    return CB->getCalledFunction();
  }
  return getAnchorScope();
}

/// Return the associated argument, if any.
Argument *getAssociatedArgument() const;

/// Return true if the position refers to a function interface, that is the
/// function scope, the function return, or an argument.
bool isFnInterfaceKind() const {
  switch (getPositionKind()) {
  case IRPosition::IRP_FUNCTION:
  case IRPosition::IRP_RETURNED:
  case IRPosition::IRP_ARGUMENT:
    return true;
  default:
    return false;
  }
}

/// Return the Function surrounding the anchor value.
Function *getAnchorScope() const {
  Value &V = getAnchorValue();
  if (isa<Function>(V))
    return &cast<Function>(V);
  if (isa<Argument>(V))
    return cast<Argument>(V).getParent();
  if (isa<Instruction>(V))
    return cast<Instruction>(V).getFunction();
  return nullptr;
}

/// Return the context instruction, if any.
Instruction *getCtxI() const {
  Value &V = getAnchorValue();
  if (auto *I = dyn_cast<Instruction>(&V))
    return I;
  if (auto *Arg = dyn_cast<Argument>(&V))
    if (!Arg->getParent()->isDeclaration())
      return &Arg->getParent()->getEntryBlock().front();
  if (auto *F = dyn_cast<Function>(&V))
    if (!F->isDeclaration())
      return &(F->getEntryBlock().front());
  return nullptr;
}

/// Return the value this abstract attribute is associated with.
Value &getAssociatedValue() const {
  if (getCallSiteArgNo() < 0 || isa<Argument>(&getAnchorValue()))
    return getAnchorValue();
  assert(isa<CallBase>(&getAnchorValue()) && "Expected a call base!")((void)0);
  return *cast<CallBase>(&getAnchorValue())
              ->getArgOperand(getCallSiteArgNo());
}

/// Return the type this abstract attribute is associated with.
Type *getAssociatedType() const {
  if (getPositionKind() == IRPosition::IRP_RETURNED)
    return getAssociatedFunction()->getReturnType();
  return getAssociatedValue().getType();
}

/// Return the callee argument number of the associated value if it is an
/// argument or call site argument, otherwise a negative value. In contrast to
/// `getCallSiteArgNo` this method will always return the "argument number"
/// from the perspective of the callee. This may not the same as the call site
/// if this is a callback call.
int getCalleeArgNo() const {
  return getArgNo(/* CallbackCalleeArgIfApplicable */ true);
}

/// Return the call site argument number of the associated value if it is an
/// argument or call site argument, otherwise a negative value. In contrast to
/// `getCalleArgNo` this method will always return the "operand number" from
/// the perspective of the call site. This may not the same as the callee
/// perspective if this is a callback call.
int getCallSiteArgNo() const {
  return getArgNo(/* CallbackCalleeArgIfApplicable */ false);
}

/// Return the index in the attribute list for this position.
unsigned getAttrIdx() const {
  switch (getPositionKind()) {
  case IRPosition::IRP_INVALID:
  case IRPosition::IRP_FLOAT:
    break;
  case IRPosition::IRP_FUNCTION:
  case IRPosition::IRP_CALL_SITE:
    return AttributeList::FunctionIndex;
  case IRPosition::IRP_RETURNED:
  case IRPosition::IRP_CALL_SITE_RETURNED:
    return AttributeList::ReturnIndex;
  case IRPosition::IRP_ARGUMENT:
  case IRPosition::IRP_CALL_SITE_ARGUMENT:
    return getCallSiteArgNo() + AttributeList::FirstArgIndex;
  }
  llvm_unreachable(__builtin_unreachable()
      "There is no attribute index for a floating or invalid position!")__builtin_unreachable();
}

/// Return the associated position kind.
Kind getPositionKind() const {
  char EncodingBits = getEncodingBits();
  if (EncodingBits == ENC_CALL_SITE_ARGUMENT_USE)
    return IRP_CALL_SITE_ARGUMENT;
  if (EncodingBits == ENC_FLOATING_FUNCTION)
    return IRP_FLOAT;

  Value *V = getAsValuePtr();
  if (!V)
    return IRP_INVALID;
  if (isa<Argument>(V))
    return IRP_ARGUMENT;
  if (isa<Function>(V))
    return isReturnPosition(EncodingBits) ? IRP_RETURNED : IRP_FUNCTION;
  if (isa<CallBase>(V))
    return isReturnPosition(EncodingBits) ? IRP_CALL_SITE_RETURNED
                                          : IRP_CALL_SITE;
  return IRP_FLOAT;
}

/// TODO: Figure out if the attribute related helper functions should live
///       here or somewhere else.

/// Return true if any kind in \p AKs existing in the IR at a position that
/// will affect this one. See also getAttrs(...).
/// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
///                                 e.g., the function position if this is an
///                                 argument position, should be ignored.
bool hasAttr(ArrayRef<Attribute::AttrKind> AKs,
             bool IgnoreSubsumingPositions = false,
             Attributor *A = nullptr) const;

/// Return the attributes of any kind in \p AKs existing in the IR at a
/// position that will affect this one. While each position can only have a
/// single attribute of any kind in \p AKs, there are "subsuming" positions
/// that could have an attribute as well. This method returns all attributes
/// found in \p Attrs.
/// \param IgnoreSubsumingPositions Flag to determine if subsuming positions,
///                                 e.g., the function position if this is an
///                                 argument position, should be ignored.
void getAttrs(ArrayRef<Attribute::AttrKind> AKs,
              SmallVectorImpl<Attribute> &Attrs,
              bool IgnoreSubsumingPositions = false,
              Attributor *A = nullptr) const;

/// Remove the attribute of kind \p AKs existing in the IR at this position.
void removeAttrs(ArrayRef<Attribute::AttrKind> AKs) const {
  if (getPositionKind() == IRP_INVALID || getPositionKind() == IRP_FLOAT)
    return;

  AttributeList AttrList;
  auto *CB = dyn_cast<CallBase>(&getAnchorValue());
  if (CB)
    AttrList = CB->getAttributes();
  else
    AttrList = getAssociatedFunction()->getAttributes();

  LLVMContext &Ctx = getAnchorValue().getContext();
  for (Attribute::AttrKind AK : AKs)
    AttrList = AttrList.removeAttribute(Ctx, getAttrIdx(), AK);

  if (CB)
    CB->setAttributes(AttrList);
  else
    getAssociatedFunction()->setAttributes(AttrList);
}

bool isAnyCallSitePosition() const {
  switch (getPositionKind()) {
  case IRPosition::IRP_CALL_SITE:
  case IRPosition::IRP_CALL_SITE_RETURNED:
  case IRPosition::IRP_CALL_SITE_ARGUMENT:
    return true;
  default:
    return false;
  }
}

/// Return true if the position is an argument or call site argument.
bool isArgumentPosition() const {
  switch (getPositionKind()) {
  case IRPosition::IRP_ARGUMENT:
  case IRPosition::IRP_CALL_SITE_ARGUMENT:
    return true;
  default:
    return false;
  }
}

/// Return the same position without the call base context.
IRPosition stripCallBaseContext() const {
  IRPosition Result = *this;
  Result.CBContext = nullptr;
  return Result;
}

/// Get the call base context from the position.
const CallBaseContext *getCallBaseContext() const { return CBContext; }

/// Check if the position has any call base context.
bool hasCallBaseContext() const { return CBContext != nullptr; }

/// Special DenseMap key values.
///
///{
static const IRPosition EmptyKey;
static const IRPosition TombstoneKey;
///}

/// Conversion into a void * to allow reuse of pointer hashing.
operator void *() const { return Enc.getOpaqueValue(); }

647private:
/// Private constructor for special values only!
explicit IRPosition(void *Ptr, const CallBaseContext *CBContext = nullptr)
    : CBContext(CBContext) {
  Enc.setFromOpaqueValue(Ptr);
}

/// IRPosition anchored at \p AnchorVal with kind/argument numbet \p PK.
explicit IRPosition(Value &AnchorVal, Kind PK,
                    const CallBaseContext *CBContext = nullptr)
    : CBContext(CBContext) {
  switch (PK) {
  case IRPosition::IRP_INVALID:
    llvm_unreachable("Cannot create invalid IRP with an anchor value!")__builtin_unreachable();
    break;
  case IRPosition::IRP_FLOAT:
    // Special case for floating functions.
    if (isa<Function>(AnchorVal))
      Enc = {&AnchorVal, ENC_FLOATING_FUNCTION};
    else
      Enc = {&AnchorVal, ENC_VALUE};
    break;
  case IRPosition::IRP_FUNCTION:
  case IRPosition::IRP_CALL_SITE:
    Enc = {&AnchorVal, ENC_VALUE};
    break;
  case IRPosition::IRP_RETURNED:
  case IRPosition::IRP_CALL_SITE_RETURNED:
    Enc = {&AnchorVal, ENC_RETURNED_VALUE};
    break;
  case IRPosition::IRP_ARGUMENT:
    Enc = {&AnchorVal, ENC_VALUE};
    break;
  case IRPosition::IRP_CALL_SITE_ARGUMENT:
    llvm_unreachable(__builtin_unreachable()
        "Cannot create call site argument IRP with an anchor value!")__builtin_unreachable();
    break;
  }
  verify();
}

/// Return the callee argument number of the associated value if it is an
/// argument or call site argument. See also `getCalleeArgNo` and
/// `getCallSiteArgNo`.
int getArgNo(bool CallbackCalleeArgIfApplicable) const {
  if (CallbackCalleeArgIfApplicable)
    if (Argument *Arg = getAssociatedArgument())
      return Arg->getArgNo();
  switch (getPositionKind()) {
  case IRPosition::IRP_ARGUMENT:
    return cast<Argument>(getAsValuePtr())->getArgNo();
  case IRPosition::IRP_CALL_SITE_ARGUMENT: {
    Use &U = *getAsUsePtr();
    return cast<CallBase>(U.getUser())->getArgOperandNo(&U);
  }
  default:
    return -1;
  }
}

/// IRPosition for the use \p U. The position kind \p PK needs to be
/// IRP_CALL_SITE_ARGUMENT, the anchor value is the user, the associated value
/// the used value.
explicit IRPosition(Use &U, Kind PK) {
  assert(PK == IRP_CALL_SITE_ARGUMENT &&((void)0)
         "Use constructor is for call site arguments only!")((void)0);
  Enc = {&U, ENC_CALL_SITE_ARGUMENT_USE};
  verify();
}

/// Verify internal invariants.
void verify();

/// Return the attributes of kind \p AK existing in the IR as attribute.
bool getAttrsFromIRAttr(Attribute::AttrKind AK,
                        SmallVectorImpl<Attribute> &Attrs) const;

/// Return the attributes of kind \p AK existing in the IR as operand bundles
/// of an llvm.assume.
bool getAttrsFromAssumes(Attribute::AttrKind AK,
                         SmallVectorImpl<Attribute> &Attrs,
                         Attributor &A) const;

/// Return the underlying pointer as Value *, valid for all positions but
/// IRP_CALL_SITE_ARGUMENT.
Value *getAsValuePtr() const {
  assert(getEncodingBits() != ENC_CALL_SITE_ARGUMENT_USE &&((void)0)
         "Not a value pointer!")((void)0);
  return reinterpret_cast<Value *>(Enc.getPointer());
}

/// Return the underlying pointer as Use *, valid only for
/// IRP_CALL_SITE_ARGUMENT positions.
Use *getAsUsePtr() const {
  assert(getEncodingBits() == ENC_CALL_SITE_ARGUMENT_USE &&((void)0)
         "Not a value pointer!")((void)0);
  return reinterpret_cast<Use *>(Enc.getPointer());
}

/// Return true if \p EncodingBits describe a returned or call site returned
/// position.
static bool isReturnPosition(char EncodingBits) {
  return EncodingBits == ENC_RETURNED_VALUE;
}

/// Return true if the encoding bits describe a returned or call site returned
/// position.
bool isReturnPosition() const { return isReturnPosition(getEncodingBits()); }

/// The encoding of the IRPosition is a combination of a pointer and two
/// encoding bits. The values of the encoding bits are defined in the enum
/// below. The pointer is either a Value* (for the first three encoding bit
/// combinations) or Use* (for ENC_CALL_SITE_ARGUMENT_USE).
///
///{
enum {
  ENC_VALUE = 0b00,
  ENC_RETURNED_VALUE = 0b01,
  ENC_FLOATING_FUNCTION = 0b10,
  ENC_CALL_SITE_ARGUMENT_USE = 0b11,
};

// Reserve the maximal amount of bits so there is no need to mask out the
// remaining ones. We will not encode anything else in the pointer anyway.
static constexpr int NumEncodingBits =
    PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
static_assert(NumEncodingBits >= 2, "At least two bits are required!");

/// The pointer with the encoding bits.
PointerIntPair<void *, NumEncodingBits, char> Enc;
///}

/// Call base context. Used for callsite specific analysis.
const CallBaseContext *CBContext = nullptr;

/// Return the encoding bits.
char getEncodingBits() const { return Enc.getInt(); }
784};

786/// Helper that allows IRPosition as a key in a DenseMap.
787template <> struct DenseMapInfo<IRPosition> {
static inline IRPosition getEmptyKey() { return IRPosition::EmptyKey; }
static inline IRPosition getTombstoneKey() {
  return IRPosition::TombstoneKey;
}
static unsigned getHashValue(const IRPosition &IRP) {
  return (DenseMapInfo<void *>::getHashValue(IRP) << 4) ^
         (DenseMapInfo<Value *>::getHashValue(IRP.getCallBaseContext()));
}

static bool isEqual(const IRPosition &a, const IRPosition &b) {
  return a == b;
}
800};

802/// A visitor class for IR positions.
803///
804/// Given a position P, the SubsumingPositionIterator allows to visit "subsuming
805/// positions" wrt. attributes/information. Thus, if a piece of information
806/// holds for a subsuming position, it also holds for the position P.
807///
808/// The subsuming positions always include the initial position and then,
809/// depending on the position kind, additionally the following ones:
810/// - for IRP_RETURNED:
811///   - the function (IRP_FUNCTION)
812/// - for IRP_ARGUMENT:
813///   - the function (IRP_FUNCTION)
814/// - for IRP_CALL_SITE:
815///   - the callee (IRP_FUNCTION), if known
816/// - for IRP_CALL_SITE_RETURNED:
817///   - the callee (IRP_RETURNED), if known
818///   - the call site (IRP_FUNCTION)
819///   - the callee (IRP_FUNCTION), if known
820/// - for IRP_CALL_SITE_ARGUMENT:
821///   - the argument of the callee (IRP_ARGUMENT), if known
822///   - the callee (IRP_FUNCTION), if known
823///   - the position the call site argument is associated with if it is not
824///     anchored to the call site, e.g., if it is an argument then the argument
825///     (IRP_ARGUMENT)
826class SubsumingPositionIterator {
SmallVector<IRPosition, 4> IRPositions;
using iterator = decltype(IRPositions)::iterator;

830public:
SubsumingPositionIterator(const IRPosition &IRP);
iterator begin() { return IRPositions.begin(); }
iterator end() { return IRPositions.end(); }
834};

836/// Wrapper for FunctoinAnalysisManager.
837struct AnalysisGetter {
template <typename Analysis>
typename Analysis::Result *getAnalysis(const Function &F) {
  if (!FAM || !F.getParent())
    return nullptr;
  return &FAM->getResult<Analysis>(const_cast<Function &>(F));
}

AnalysisGetter(FunctionAnalysisManager &FAM) : FAM(&FAM) {}
AnalysisGetter() {}

848private:
FunctionAnalysisManager *FAM = nullptr;
850};

852/// Data structure to hold cached (LLVM-IR) information.
853///
854/// All attributes are given an InformationCache object at creation time to
855/// avoid inspection of the IR by all of them individually. This default
856/// InformationCache will hold information required by 'default' attributes,
857/// thus the ones deduced when Attributor::identifyDefaultAbstractAttributes(..)
858/// is called.
859///
860/// If custom abstract attributes, registered manually through
861/// Attributor::registerAA(...), need more information, especially if it is not
862/// reusable, it is advised to inherit from the InformationCache and cast the
863/// instance down in the abstract attributes.
864struct InformationCache {
InformationCache(const Module &M, AnalysisGetter &AG,
                 BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC)
    : DL(M.getDataLayout()), Allocator(Allocator),
      Explorer(
          /* ExploreInterBlock */ true, /* ExploreCFGForward */ true,
          /* ExploreCFGBackward */ true,
          /* LIGetter */
          [&](const Function &F) { return AG.getAnalysis<LoopAnalysis>(F); },
          /* DTGetter */
          [&](const Function &F) {
            return AG.getAnalysis<DominatorTreeAnalysis>(F);
          },
          /* PDTGetter */
          [&](const Function &F) {
            return AG.getAnalysis<PostDominatorTreeAnalysis>(F);
          }),
      AG(AG), CGSCC(CGSCC), TargetTriple(M.getTargetTriple()) {
  if (CGSCC)
    initializeModuleSlice(*CGSCC);
}

~InformationCache() {
  // The FunctionInfo objects are allocated via a BumpPtrAllocator, we call
  // the destructor manually.
  for (auto &It : FuncInfoMap)
    It.getSecond()->~FunctionInfo();
}

/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
/// true, constant expression users are not given to \p CB but their uses are
/// traversed transitively.
template <typename CBTy>
static void foreachUse(Function &F, CBTy CB,
                       bool LookThroughConstantExprUses = true) {
  SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));

  for (unsigned Idx = 0; Idx < Worklist.size(); ++Idx) {
    Use &U = *Worklist[Idx];

    // Allow use in constant bitcasts and simply look through them.
    if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
      for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
        Worklist.push_back(&CEU);
      continue;
    }

    CB(U);
  }
}

/// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
/// (a subset of) all functions that we can look at during this SCC traversal.
/// This includes functions (transitively) called from the SCC and the
/// (transitive) callers of SCC functions. We also can look at a function if
/// there is a "reference edge", i.a., if the function somehow uses (!=calls)
/// a function in the SCC or a caller of a function in the SCC.
void initializeModuleSlice(SetVector<Function *> &SCC) {
  ModuleSlice.insert(SCC.begin(), SCC.end());

  SmallPtrSet<Function *, 16> Seen;
  SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
  while (!Worklist.empty()) {
    Function *F = Worklist.pop_back_val();
    ModuleSlice.insert(F);

    for (Instruction &I : instructions(*F))
      if (auto *CB = dyn_cast<CallBase>(&I))
        if (Function *Callee = CB->getCalledFunction())
          if (Seen.insert(Callee).second)
            Worklist.push_back(Callee);
  }

  Seen.clear();
  Worklist.append(SCC.begin(), SCC.end());
  while (!Worklist.empty()) {
    Function *F = Worklist.pop_back_val();
    ModuleSlice.insert(F);

    // Traverse all transitive uses.
    foreachUse(*F, [&](Use &U) {
      if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
        if (Seen.insert(UsrI->getFunction()).second)
          Worklist.push_back(UsrI->getFunction());
    });
  }
}

/// The slice of the module we are allowed to look at.
SmallPtrSet<Function *, 8> ModuleSlice;

/// A vector type to hold instructions.
using InstructionVectorTy = SmallVector<Instruction *, 8>;

/// A map type from opcodes to instructions with this opcode.
using OpcodeInstMapTy = DenseMap<unsigned, InstructionVectorTy *>;

/// Return the map that relates "interesting" opcodes with all instructions
/// with that opcode in \p F.
OpcodeInstMapTy &getOpcodeInstMapForFunction(const Function &F) {
  return getFunctionInfo(F).OpcodeInstMap;
}

/// Return the instructions in \p F that may read or write memory.
InstructionVectorTy &getReadOrWriteInstsForFunction(const Function &F) {
  return getFunctionInfo(F).RWInsts;
}

/// Return MustBeExecutedContextExplorer
MustBeExecutedContextExplorer &getMustBeExecutedContextExplorer() {
  return Explorer;
}

/// Return TargetLibraryInfo for function \p F.
TargetLibraryInfo *getTargetLibraryInfoForFunction(const Function &F) {
  return AG.getAnalysis<TargetLibraryAnalysis>(F);
}

/// Return AliasAnalysis Result for function \p F.
AAResults *getAAResultsForFunction(const Function &F);

/// Return true if \p Arg is involved in a must-tail call, thus the argument
/// of the caller or callee.
bool isInvolvedInMustTailCall(const Argument &Arg) {
  FunctionInfo &FI = getFunctionInfo(*Arg.getParent());
  return FI.CalledViaMustTail || FI.ContainsMustTailCall;
}

/// Return the analysis result from a pass \p AP for function \p F.
template <typename AP>
typename AP::Result *getAnalysisResultForFunction(const Function &F) {
  return AG.getAnalysis<AP>(F);
}

/// Return SCC size on call graph for function \p F or 0 if unknown.
unsigned getSccSize(const Function &F) {
  if (CGSCC && CGSCC->count(const_cast<Function *>(&F)))
    return CGSCC->size();
  return 0;
}

/// Return datalayout used in the module.
const DataLayout &getDL() { return DL; }

/// Return the map conaining all the knowledge we have from `llvm.assume`s.
const RetainedKnowledgeMap &getKnowledgeMap() const { return KnowledgeMap; }

/// Return if \p To is potentially reachable form \p From or not
/// If the same query was answered, return cached result
bool getPotentiallyReachable(const Instruction &From, const Instruction &To) {
  auto KeyPair = std::make_pair(&From, &To);
  auto Iter = PotentiallyReachableMap.find(KeyPair);
  if (Iter != PotentiallyReachableMap.end())
    return Iter->second;
  const Function &F = *From.getFunction();
  bool Result = true;
  if (From.getFunction() == To.getFunction())
    Result = isPotentiallyReachable(&From, &To, nullptr,
                                    AG.getAnalysis<DominatorTreeAnalysis>(F),
                                    AG.getAnalysis<LoopAnalysis>(F));
  PotentiallyReachableMap.insert(std::make_pair(KeyPair, Result));
  return Result;
}

/// Check whether \p F is part of module slice.
bool isInModuleSlice(const Function &F) {
  return ModuleSlice.count(const_cast<Function *>(&F));
}

/// Return true if the stack (llvm::Alloca) can be accessed by other threads.
bool stackIsAccessibleByOtherThreads() { return !targetIsGPU(); }

/// Return true if the target is a GPU.
bool targetIsGPU() {
  return TargetTriple.isAMDGPU() || TargetTriple.isNVPTX();
}

1041private:
struct FunctionInfo {
  ~FunctionInfo();

  /// A nested map that remembers all instructions in a function with a
  /// certain instruction opcode (Instruction::getOpcode()).
  OpcodeInstMapTy OpcodeInstMap;

  /// A map from functions to their instructions that may read or write
  /// memory.
  InstructionVectorTy RWInsts;

  /// Function is called by a `musttail` call.
  bool CalledViaMustTail;

  /// Function contains a `musttail` call.
  bool ContainsMustTailCall;
};

/// A map type from functions to informatio about it.
DenseMap<const Function *, FunctionInfo *> FuncInfoMap;

/// Return information about the function \p F, potentially by creating it.
FunctionInfo &getFunctionInfo(const Function &F) {
  FunctionInfo *&FI = FuncInfoMap[&F];
  if (!FI) {
    FI = new (Allocator) FunctionInfo();
    initializeInformationCache(F, *FI);
  }
  return *FI;
}

/// Initialize the function information cache \p FI for the function \p F.
///
/// This method needs to be called for all function that might be looked at
/// through the information cache interface *prior* to looking at them.
void initializeInformationCache(const Function &F, FunctionInfo &FI);

/// The datalayout used in the module.
const DataLayout &DL;

/// The allocator used to allocate memory, e.g. for `FunctionInfo`s.
BumpPtrAllocator &Allocator;

/// MustBeExecutedContextExplorer
MustBeExecutedContextExplorer Explorer;

/// A map with knowledge retained in `llvm.assume` instructions.
RetainedKnowledgeMap KnowledgeMap;

/// Getters for analysis.
AnalysisGetter &AG;

/// The underlying CGSCC, or null if not available.
SetVector<Function *> *CGSCC;

/// Set of inlineable functions
SmallPtrSet<const Function *, 8> InlineableFunctions;

/// A map for caching results of queries for isPotentiallyReachable
DenseMap<std::pair<const Instruction *, const Instruction *>, bool>
    PotentiallyReachableMap;

/// The triple describing the target machine.
Triple TargetTriple;

/// Give the Attributor access to the members so
/// Attributor::identifyDefaultAbstractAttributes(...) can initialize them.
friend struct Attributor;
1110};

1112/// The fixpoint analysis framework that orchestrates the attribute deduction.
1113///
1114/// The Attributor provides a general abstract analysis framework (guided
1115/// fixpoint iteration) as well as helper functions for the deduction of
1116/// (LLVM-IR) attributes. However, also other code properties can be deduced,
1117/// propagated, and ultimately manifested through the Attributor framework. This
1118/// is particularly useful if these properties interact with attributes and a
1119/// co-scheduled deduction allows to improve the solution. Even if not, thus if
1120/// attributes/properties are completely isolated, they should use the
1121/// Attributor framework to reduce the number of fixpoint iteration frameworks
1122/// in the code base. Note that the Attributor design makes sure that isolated
1123/// attributes are not impacted, in any way, by others derived at the same time
1124/// if there is no cross-reasoning performed.
1125///
1126/// The public facing interface of the Attributor is kept simple and basically
1127/// allows abstract attributes to one thing, query abstract attributes
1128/// in-flight. There are two reasons to do this:
1129///    a) The optimistic state of one abstract attribute can justify an
1130///       optimistic state of another, allowing to framework to end up with an
1131///       optimistic (=best possible) fixpoint instead of one based solely on
1132///       information in the IR.
1133///    b) This avoids reimplementing various kinds of lookups, e.g., to check
1134///       for existing IR attributes, in favor of a single lookups interface
1135///       provided by an abstract attribute subclass.
1136///
1137/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
1138///       described in the file comment.
1139struct Attributor {

using OptimizationRemarkGetter =
    function_ref<OptimizationRemarkEmitter &(Function *)>;

/// Constructor
///
/// \param Functions The set of functions we are deriving attributes for.
/// \param InfoCache Cache to hold various information accessible for
///                  the abstract attributes.
/// \param CGUpdater Helper to update an underlying call graph.
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions.
/// \param RewriteSignatures Whether to rewrite function signatures.
/// \param MaxFixedPointIterations Maximum number of iterations to run until
///                                fixpoint.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
           CallGraphUpdater &CGUpdater,
           DenseSet<const char *> *Allowed = nullptr, bool DeleteFns = true,
           bool RewriteSignatures = true)
    : Allocator(InfoCache.Allocator), Functions(Functions),
      InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
      DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
      MaxFixpointIterations(None), OREGetter(None), PassName("") {}

/// Constructor
///
/// \param Functions The set of functions we are deriving attributes for.
/// \param InfoCache Cache to hold various information accessible for
///                  the abstract attributes.
/// \param CGUpdater Helper to update an underlying call graph.
/// \param Allowed If not null, a set limiting the attribute opportunities.
/// \param DeleteFns Whether to delete functions
/// \param MaxFixedPointIterations Maximum number of iterations to run until
///                                fixpoint.
/// \param OREGetter A callback function that returns an ORE object from a
///                  Function pointer.
/// \param PassName  The name of the pass emitting remarks.
Attributor(SetVector<Function *> &Functions, InformationCache &InfoCache,
           CallGraphUpdater &CGUpdater, DenseSet<const char *> *Allowed,
           bool DeleteFns, bool RewriteSignatures,
           Optional<unsigned> MaxFixpointIterations,
           OptimizationRemarkGetter OREGetter, const char *PassName)
    : Allocator(InfoCache.Allocator), Functions(Functions),
      InfoCache(InfoCache), CGUpdater(CGUpdater), Allowed(Allowed),
      DeleteFns(DeleteFns), RewriteSignatures(RewriteSignatures),
      MaxFixpointIterations(MaxFixpointIterations),
      OREGetter(Optional<OptimizationRemarkGetter>(OREGetter)),
      PassName(PassName) {}

~Attributor();

/// Run the analyses until a fixpoint is reached or enforced (timeout).
///
/// The attributes registered with this Attributor can be used after as long
/// as the Attributor is not destroyed (it owns the attributes now).
///
/// \Returns CHANGED if the IR was changed, otherwise UNCHANGED.
ChangeStatus run();

/// Lookup an abstract attribute of type \p AAType at position \p IRP. While
/// no abstract attribute is found equivalent positions are checked, see
/// SubsumingPositionIterator. Thus, the returned abstract attribute
/// might be anchored at a different position, e.g., the callee if \p IRP is a
/// call base.
///
/// This method is the only (supported) way an abstract attribute can retrieve
/// information from another abstract attribute. As an example, take an
/// abstract attribute that determines the memory access behavior for a
/// argument (readnone, readonly, ...). It should use `getAAFor` to get the
/// most optimistic information for other abstract attributes in-flight, e.g.
/// the one reasoning about the "captured" state for the argument or the one
/// reasoning on the memory access behavior of the function as a whole.
///
/// If the DepClass enum is set to `DepClassTy::None` the dependence from
/// \p QueryingAA to the return abstract attribute is not automatically
/// recorded. This should only be used if the caller will record the
/// dependence explicitly if necessary, thus if it the returned abstract
/// attribute is used for reasoning. To record the dependences explicitly use
/// the `Attributor::recordDependence` method.
template <typename AAType>
const AAType &getAAFor(const AbstractAttribute &QueryingAA,
                       const IRPosition &IRP, DepClassTy DepClass) {
  return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
                                  /* ForceUpdate */ false);
}

/// Similar to getAAFor but the return abstract attribute will be updated (via
/// `AbstractAttribute::update`) even if it is found in the cache. This is
/// especially useful for AAIsDead as changes in liveness can make updates
/// possible/useful that were not happening before as the abstract attribute
/// was assumed dead.
template <typename AAType>
const AAType &getAndUpdateAAFor(const AbstractAttribute &QueryingAA,
                                const IRPosition &IRP, DepClassTy DepClass) {
  return getOrCreateAAFor<AAType>(IRP, &QueryingAA, DepClass,
                                  /* ForceUpdate */ true);
}

/// The version of getAAFor that allows to omit a querying abstract
/// attribute. Using this after Attributor started running is restricted to
/// only the Attributor itself. Initial seeding of AAs can be done via this
/// function.
/// NOTE: ForceUpdate is ignored in any stage other than the update stage.
template <typename AAType>
const AAType &getOrCreateAAFor(IRPosition IRP,
                               const AbstractAttribute *QueryingAA,
                               DepClassTy DepClass, bool ForceUpdate = false,
                               bool UpdateAfterInit = true) {
  if (!shouldPropagateCallBaseContext(IRP))
    IRP = IRP.stripCallBaseContext();

  if (AAType *AAPtr = lookupAAFor<AAType>(IRP, QueryingAA, DepClass,
                                          /* AllowInvalidState */ true)) {
    if (ForceUpdate && Phase == AttributorPhase::UPDATE)
      updateAA(*AAPtr);
    return *AAPtr;
  }

  // No matching attribute found, create one.
  // Use the static create method.
  auto &AA = AAType::createForPosition(IRP, *this);

  // If we are currenty seeding attributes, enforce seeding rules.
  if (Phase == AttributorPhase::SEEDING && !shouldSeedAttribute(AA)) {
    AA.getState().indicatePessimisticFixpoint();
    return AA;
  }

  registerAA(AA);

  // For now we ignore naked and optnone functions.
  bool Invalidate = Allowed && !Allowed->count(&AAType::ID);
  const Function *FnScope = IRP.getAnchorScope();
  if (FnScope)
    Invalidate |= FnScope->hasFnAttribute(Attribute::Naked) ||
                  FnScope->hasFnAttribute(Attribute::OptimizeNone);

  // Avoid too many nested initializations to prevent a stack overflow.
  Invalidate |= InitializationChainLength > MaxInitializationChainLength;

  // Bootstrap the new attribute with an initial update to propagate
  // information, e.g., function -> call site. If it is not on a given
  // Allowed we will not perform updates at all.
  if (Invalidate) {
    AA.getState().indicatePessimisticFixpoint();
    return AA;
  }

  {
    TimeTraceScope TimeScope(AA.getName() + "::initialize");
    ++InitializationChainLength;
    AA.initialize(*this);
    --InitializationChainLength;
  }

  // Initialize and update is allowed for code outside of the current function
  // set, but only if it is part of module slice we are allowed to look at.
  // Only exception is AAIsDeadFunction whose initialization is prevented
  // directly, since we don't to compute it twice.
  if (FnScope && !Functions.count(const_cast<Function *>(FnScope))) {
    if (!getInfoCache().isInModuleSlice(*FnScope)) {
      AA.getState().indicatePessimisticFixpoint();
      return AA;
    }
  }

  // If this is queried in the manifest stage, we force the AA to indicate
  // pessimistic fixpoint immediately.
  if (Phase == AttributorPhase::MANIFEST) {
    AA.getState().indicatePessimisticFixpoint();
    return AA;
  }

  // Allow seeded attributes to declare dependencies.
  // Remember the seeding state.
  if (UpdateAfterInit) {
    AttributorPhase OldPhase = Phase;
    Phase = AttributorPhase::UPDATE;

    updateAA(AA);

    Phase = OldPhase;
  }

  if (QueryingAA && AA.getState().isValidState())
    recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
                     DepClass);
  return AA;
}
template <typename AAType>
const AAType &getOrCreateAAFor(const IRPosition &IRP) {
  return getOrCreateAAFor<AAType>(IRP, /* QueryingAA */ nullptr,
                                  DepClassTy::NONE);
}

/// Return the attribute of \p AAType for \p IRP if existing and valid. This
/// also allows non-AA users lookup.
template <typename AAType>
AAType *lookupAAFor(const IRPosition &IRP,
                    const AbstractAttribute *QueryingAA = nullptr,
                    DepClassTy DepClass = DepClassTy::OPTIONAL,
                    bool AllowInvalidState = false) {
  static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
                "Cannot query an attribute with a type not derived from "
                "'AbstractAttribute'!");
  // Lookup the abstract attribute of type AAType. If found, return it after
  // registering a dependence of QueryingAA on the one returned attribute.
  AbstractAttribute *AAPtr = AAMap.lookup({&AAType::ID, IRP});
  if (!AAPtr)
    return nullptr;

  AAType *AA = static_cast<AAType *>(AAPtr);

  // Do not register a dependence on an attribute with an invalid state.
  if (DepClass != DepClassTy::NONE && QueryingAA &&
      AA->getState().isValidState())
    recordDependence(*AA, const_cast<AbstractAttribute &>(*QueryingAA),
                     DepClass);

  // Return nullptr if this attribute has an invalid state.
  if (!AllowInvalidState && !AA->getState().isValidState())
    return nullptr;
  return AA;
}

/// Explicitly record a dependence from \p FromAA to \p ToAA, that is if
/// \p FromAA changes \p ToAA should be updated as well.
///
/// This method should be used in conjunction with the `getAAFor` method and
/// with the DepClass enum passed to the method set to None. This can
/// be beneficial to avoid false dependences but it requires the users of
/// `getAAFor` to explicitly record true dependences through this method.
/// The \p DepClass flag indicates if the dependence is striclty necessary.
/// That means for required dependences, if \p FromAA changes to an invalid
/// state, \p ToAA can be moved to a pessimistic fixpoint because it required
/// information from \p FromAA but none are available anymore.
void recordDependence(const AbstractAttribute &FromAA,
                      const AbstractAttribute &ToAA, DepClassTy DepClass);

/// Introduce a new abstract attribute into the fixpoint analysis.
///
/// Note that ownership of the attribute is given to the Attributor. It will
/// invoke delete for the Attributor on destruction of the Attributor.
///
/// Attributes are identified by their IR position (AAType::getIRPosition())
/// and the address of their static member (see AAType::ID).
template <typename AAType> AAType &registerAA(AAType &AA) {
  static_assert(std::is_base_of<AbstractAttribute, AAType>::value,
                "Cannot register an attribute with a type not derived from "
                "'AbstractAttribute'!");
  // Put the attribute in the lookup map structure and the container we use to
  // keep track of all attributes.
  const IRPosition &IRP = AA.getIRPosition();
  AbstractAttribute *&AAPtr = AAMap[{&AAType::ID, IRP}];

  assert(!AAPtr && "Attribute already in map!")((void)0);
  AAPtr = &AA;

  // Register AA with the synthetic root only before the manifest stage.
  if (Phase == AttributorPhase::SEEDING || Phase == AttributorPhase::UPDATE)
    DG.SyntheticRoot.Deps.push_back(
        AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));

  return AA;
}

/// Return the internal information cache.
InformationCache &getInfoCache() { return InfoCache; }

/// Return true if this is a module pass, false otherwise.
bool isModulePass() const {
  return !Functions.empty() &&
         Functions.size() == Functions.front()->getParent()->size();
}

/// Return true if we derive attributes for \p Fn
bool isRunOn(Function &Fn) const {
  return Functions.empty() || Functions.count(&Fn);
}

/// Determine opportunities to derive 'default' attributes in \p F and create
/// abstract attribute objects for them.
///
/// \param F The function that is checked for attribute opportunities.
///
/// Note that abstract attribute instances are generally created even if the
/// IR already contains the information they would deduce. The most important
/// reason for this is the single interface, the one of the abstract attribute
/// instance, which can be queried without the need to look at the IR in
/// various places.
void identifyDefaultAbstractAttributes(Function &F);

/// Determine whether the function \p F is IPO amendable
///
/// If a function is exactly defined or it has alwaysinline attribute
/// and is viable to be inlined, we say it is IPO amendable
bool isFunctionIPOAmendable(const Function &F) {
  return F.hasExactDefinition() || InfoCache.InlineableFunctions.count(&F);
}

/// Mark the internal function \p F as live.
///
/// This will trigger the identification and initialization of attributes for
/// \p F.
void markLiveInternalFunction(const Function &F) {
  assert(F.hasLocalLinkage() &&((void)0)
         "Only local linkage is assumed dead initially.")((void)0);

  identifyDefaultAbstractAttributes(const_cast<Function &>(F));
}

/// Helper function to remove callsite.
void removeCallSite(CallInst *CI) {
  if (!CI)
    return;

  CGUpdater.removeCallSite(*CI);
}

/// Record that \p U is to be replaces with \p NV after information was
/// manifested. This also triggers deletion of trivially dead istructions.
bool changeUseAfterManifest(Use &U, Value &NV) {
  Value *&V = ToBeChangedUses[&U];
  if (V && (V->stripPointerCasts() == NV.stripPointerCasts() ||
            isa_and_nonnull<UndefValue>(V)))
    return false;
  assert((!V || V == &NV || isa<UndefValue>(NV)) &&((void)0)
         "Use was registered twice for replacement with different values!")((void)0);
  V = &NV;
  return true;
}

/// Helper function to replace all uses of \p V with \p NV. Return true if
/// there is any change. The flag \p ChangeDroppable indicates if dropppable
/// uses should be changed too.
bool changeValueAfterManifest(Value &V, Value &NV,
                              bool ChangeDroppable = true) {
  auto &Entry = ToBeChangedValues[&V];
  Value *&CurNV = Entry.first;
  if (CurNV && (CurNV->stripPointerCasts() == NV.stripPointerCasts() ||
                isa<UndefValue>(CurNV)))
    return false;
  assert((!CurNV || CurNV == &NV || isa<UndefValue>(NV)) &&((void)0)
         "Value replacement was registered twice with different values!")((void)0);
  CurNV = &NV;
  Entry.second = ChangeDroppable;
  return true;
}

/// Record that \p I is to be replaced with `unreachable` after information
/// was manifested.
void changeToUnreachableAfterManifest(Instruction *I) {
  ToBeChangedToUnreachableInsts.insert(I);
}

/// Record that \p II has at least one dead successor block. This information
/// is used, e.g., to replace \p II with a call, after information was
/// manifested.
void registerInvokeWithDeadSuccessor(InvokeInst &II) {
  InvokeWithDeadSuccessor.push_back(&II);
}

/// Record that \p I is deleted after information was manifested. This also
/// triggers deletion of trivially dead istructions.
void deleteAfterManifest(Instruction &I) { ToBeDeletedInsts.insert(&I); }

/// Record that \p BB is deleted after information was manifested. This also
/// triggers deletion of trivially dead istructions.
void deleteAfterManifest(BasicBlock &BB) { ToBeDeletedBlocks.insert(&BB); }

// Record that \p BB is added during the manifest of an AA. Added basic blocks
// are preserved in the IR.
void registerManifestAddedBasicBlock(BasicBlock &BB) {
  ManifestAddedBlocks.insert(&BB);
}

/// Record that \p F is deleted after information was manifested.
void deleteAfterManifest(Function &F) {
  if (DeleteFns)
    ToBeDeletedFunctions.insert(&F);
}

/// If \p IRP is assumed to be a constant, return it, if it is unclear yet,
/// return None, otherwise return `nullptr`.
Optional<Constant *> getAssumedConstant(const IRPosition &IRP,
                                        const AbstractAttribute &AA,
                                        bool &UsedAssumedInformation);
Optional<Constant *> getAssumedConstant(const Value &V,
                                        const AbstractAttribute &AA,
                                        bool &UsedAssumedInformation) {
  return getAssumedConstant(IRPosition::value(V), AA, UsedAssumedInformation);
}

/// If \p V is assumed simplified, return it, if it is unclear yet,
/// return None, otherwise return `nullptr`.
Optional<Value *> getAssumedSimplified(const IRPosition &IRP,
                                       const AbstractAttribute &AA,
                                       bool &UsedAssumedInformation) {
  return getAssumedSimplified(IRP, &AA, UsedAssumedInformation);
}
Optional<Value *> getAssumedSimplified(const Value &V,
                                       const AbstractAttribute &AA,
                                       bool &UsedAssumedInformation) {
  return getAssumedSimplified(IRPosition::value(V), AA,
                              UsedAssumedInformation);
}

/// If \p V is assumed simplified, return it, if it is unclear yet,
/// return None, otherwise return `nullptr`. Same as the public version
/// except that it can be used without recording dependences on any \p AA.
Optional<Value *> getAssumedSimplified(const IRPosition &V,
                                       const AbstractAttribute *AA,
                                       bool &UsedAssumedInformation);

/// Register \p CB as a simplification callback.
/// `Attributor::getAssumedSimplified` will use these callbacks before
/// we it will ask `AAValueSimplify`. It is important to ensure this
/// is called before `identifyDefaultAbstractAttributes`, assuming the
/// latter is called at all.
using SimplifictionCallbackTy = std::function<Optional<Value *>(
    const IRPosition &, const AbstractAttribute *, bool &)>;
void registerSimplificationCallback(const IRPosition &IRP,
                                    const SimplifictionCallbackTy &CB) {
  SimplificationCallbacks[IRP].emplace_back(CB);
}

/// Return true if there is a simplification callback for \p IRP.
bool hasSimplificationCallback(const IRPosition &IRP) {
  return SimplificationCallbacks.count(IRP);
}

1571private:
/// The vector with all simplification callbacks registered by outside AAs.
DenseMap<IRPosition, SmallVector<SimplifictionCallbackTy, 1>>
    SimplificationCallbacks;

1576public:
/// Translate \p V from the callee context into the call site context.
Optional<Value *>
translateArgumentToCallSiteContent(Optional<Value *> V, CallBase &CB,
                                   const AbstractAttribute &AA,
                                   bool &UsedAssumedInformation);

/// Return true if \p AA (or its context instruction) is assumed dead.
///
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const AbstractAttribute &AA, const AAIsDead *LivenessAA,
                   bool &UsedAssumedInformation,
                   bool CheckBBLivenessOnly = false,
                   DepClassTy DepClass = DepClassTy::OPTIONAL);

/// Return true if \p I is assumed dead.
///
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const Instruction &I, const AbstractAttribute *QueryingAA,
                   const AAIsDead *LivenessAA, bool &UsedAssumedInformation,
                   bool CheckBBLivenessOnly = false,
                   DepClassTy DepClass = DepClassTy::OPTIONAL);

/// Return true if \p U is assumed dead.
///
/// If \p FnLivenessAA is not provided it is queried.
bool isAssumedDead(const Use &U, const AbstractAttribute *QueryingAA,
                   const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
                   bool CheckBBLivenessOnly = false,
                   DepClassTy DepClass = DepClassTy::OPTIONAL);

/// Return true if \p IRP is assumed dead.
///
/// If \p FnLivenessAA is not provided it is queried.
bool isAssumedDead(const IRPosition &IRP, const AbstractAttribute *QueryingAA,
                   const AAIsDead *FnLivenessAA, bool &UsedAssumedInformation,
                   bool CheckBBLivenessOnly = false,
                   DepClassTy DepClass = DepClassTy::OPTIONAL);

/// Return true if \p BB is assumed dead.
///
/// If \p LivenessAA is not provided it is queried.
bool isAssumedDead(const BasicBlock &BB, const AbstractAttribute *QueryingAA,
                   const AAIsDead *FnLivenessAA,
                   DepClassTy DepClass = DepClassTy::OPTIONAL);

/// Check \p Pred on all (transitive) uses of \p V.
///
/// This method will evaluate \p Pred on all (transitive) uses of the
/// associated value and return true if \p Pred holds every time.
bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
                     const AbstractAttribute &QueryingAA, const Value &V,
                     bool CheckBBLivenessOnly = false,
                     DepClassTy LivenessDepClass = DepClassTy::OPTIONAL);

/// Emit a remark generically.
///
/// This template function can be used to generically emit a remark. The
/// RemarkKind should be one of the following:
///   - OptimizationRemark to indicate a successful optimization attempt
///   - OptimizationRemarkMissed to report a failed optimization attempt
///   - OptimizationRemarkAnalysis to provide additional information about an
///     optimization attempt
///
/// The remark is built using a callback function \p RemarkCB that takes a
/// RemarkKind as input and returns a RemarkKind.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Instruction *I, StringRef RemarkName,
                RemarkCallBack &&RemarkCB) const {
  if (!OREGetter)
    return;

  Function *F = I->getFunction();
  auto &ORE = OREGetter.getValue()(F);

  if (RemarkName.startswith("OMP"))
    ORE.emit([&]() {
      return RemarkCB(RemarkKind(PassName, RemarkName, I))
             << " [" << RemarkName << "]";
    });
  else
    ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, I)); });
}

/// Emit a remark on a function.
template <typename RemarkKind, typename RemarkCallBack>
void emitRemark(Function *F, StringRef RemarkName,
                RemarkCallBack &&RemarkCB) const {
  if (!OREGetter)
    return;

  auto &ORE = OREGetter.getValue()(F);

  if (RemarkName.startswith("OMP"))
    ORE.emit([&]() {
      return RemarkCB(RemarkKind(PassName, RemarkName, F))
             << " [" << RemarkName << "]";
    });
  else
    ORE.emit([&]() { return RemarkCB(RemarkKind(PassName, RemarkName, F)); });
}

/// Helper struct used in the communication between an abstract attribute (AA)
/// that wants to change the signature of a function and the Attributor which
/// applies the changes. The struct is partially initialized with the
/// information from the AA (see the constructor). All other members are
/// provided by the Attributor prior to invoking any callbacks.
struct ArgumentReplacementInfo {
  /// Callee repair callback type
  ///
  /// The function repair callback is invoked once to rewire the replacement
  /// arguments in the body of the new function. The argument replacement info
  /// is passed, as build from the registerFunctionSignatureRewrite call, as
  /// well as the replacement function and an iteratore to the first
  /// replacement argument.
  using CalleeRepairCBTy = std::function<void(
      const ArgumentReplacementInfo &, Function &, Function::arg_iterator)>;

  /// Abstract call site (ACS) repair callback type
  ///
  /// The abstract call site repair callback is invoked once on every abstract
  /// call site of the replaced function (\see ReplacedFn). The callback needs
  /// to provide the operands for the call to the new replacement function.
  /// The number and type of the operands appended to the provided vector
  /// (second argument) is defined by the number and types determined through
  /// the replacement type vector (\see ReplacementTypes). The first argument
  /// is the ArgumentReplacementInfo object registered with the Attributor
  /// through the registerFunctionSignatureRewrite call.
  using ACSRepairCBTy =
      std::function<void(const ArgumentReplacementInfo &, AbstractCallSite,
                         SmallVectorImpl<Value *> &)>;

  /// Simple getters, see the corresponding members for details.
  ///{

  Attributor &getAttributor() const { return A; }
  const Function &getReplacedFn() const { return ReplacedFn; }
  const Argument &getReplacedArg() const { return ReplacedArg; }
  unsigned getNumReplacementArgs() const { return ReplacementTypes.size(); }
  const SmallVectorImpl<Type *> &getReplacementTypes() const {
    return ReplacementTypes;
  }

  ///}

private:
  /// Constructor that takes the argument to be replaced, the types of
  /// the replacement arguments, as well as callbacks to repair the call sites
  /// and new function after the replacement happened.
  ArgumentReplacementInfo(Attributor &A, Argument &Arg,
                          ArrayRef<Type *> ReplacementTypes,
                          CalleeRepairCBTy &&CalleeRepairCB,
                          ACSRepairCBTy &&ACSRepairCB)
      : A(A), ReplacedFn(*Arg.getParent()), ReplacedArg(Arg),
        ReplacementTypes(ReplacementTypes.begin(), ReplacementTypes.end()),
        CalleeRepairCB(std::move(CalleeRepairCB)),
        ACSRepairCB(std::move(ACSRepairCB)) {}

  /// Reference to the attributor to allow access from the callbacks.
  Attributor &A;

  /// The "old" function replaced by ReplacementFn.
  const Function &ReplacedFn;

  /// The "old" argument replaced by new ones defined via ReplacementTypes.
  const Argument &ReplacedArg;

  /// The types of the arguments replacing ReplacedArg.
  const SmallVector<Type *, 8> ReplacementTypes;

  /// Callee repair callback, see CalleeRepairCBTy.
  const CalleeRepairCBTy CalleeRepairCB;

  /// Abstract call site (ACS) repair callback, see ACSRepairCBTy.
  const ACSRepairCBTy ACSRepairCB;

  /// Allow access to the private members from the Attributor.
  friend struct Attributor;
};

/// Check if we can rewrite a function signature.
///
/// The argument \p Arg is replaced with new ones defined by the number,
/// order, and types in \p ReplacementTypes.
///
/// \returns True, if the replacement can be registered, via
/// registerFunctionSignatureRewrite, false otherwise.
bool isValidFunctionSignatureRewrite(Argument &Arg,
                                     ArrayRef<Type *> ReplacementTypes);

/// Register a rewrite for a function signature.
///
/// The argument \p Arg is replaced with new ones defined by the number,
/// order, and types in \p ReplacementTypes. The rewiring at the call sites is
/// done through \p ACSRepairCB and at the callee site through
/// \p CalleeRepairCB.
///
/// \returns True, if the replacement was registered, false otherwise.
bool registerFunctionSignatureRewrite(
    Argument &Arg, ArrayRef<Type *> ReplacementTypes,
    ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
    ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB);

/// Check \p Pred on all function call sites.
///
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
/// If true is returned, \p AllCallSitesKnown is set if all possible call
/// sites of the function have been visited.
bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
                          const AbstractAttribute &QueryingAA,
                          bool RequireAllCallSites, bool &AllCallSitesKnown);

/// Check \p Pred on all values potentially returned by \p F.
///
/// This method will evaluate \p Pred on all values potentially returned by
/// the function associated with \p QueryingAA. The returned values are
/// matched with their respective return instructions. Returns true if \p Pred
/// holds on all of them.
bool checkForAllReturnedValuesAndReturnInsts(
    function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred,
    const AbstractAttribute &QueryingAA);

/// Check \p Pred on all values potentially returned by the function
/// associated with \p QueryingAA.
///
/// This is the context insensitive version of the method above.
bool checkForAllReturnedValues(function_ref<bool(Value &)> Pred,
                               const AbstractAttribute &QueryingAA);

/// Check \p Pred on all instructions with an opcode present in \p Opcodes.
///
/// This method will evaluate \p Pred on all instructions with an opcode
/// present in \p Opcode and return true if \p Pred holds on all of them.
bool checkForAllInstructions(function_ref<bool(Instruction &)> Pred,
                             const AbstractAttribute &QueryingAA,
                             const ArrayRef<unsigned> &Opcodes,
                             bool &UsedAssumedInformation,
                             bool CheckBBLivenessOnly = false,
                             bool CheckPotentiallyDead = false);

/// Check \p Pred on all call-like instructions (=CallBased derived).
///
/// See checkForAllCallLikeInstructions(...) for more information.
bool checkForAllCallLikeInstructions(function_ref<bool(Instruction &)> Pred,
                                     const AbstractAttribute &QueryingAA,
                                     bool &UsedAssumedInformation,
                                     bool CheckBBLivenessOnly = false,
                                     bool CheckPotentiallyDead = false) {
  return checkForAllInstructions(
      Pred, QueryingAA,
      {(unsigned)Instruction::Invoke, (unsigned)Instruction::CallBr,
       (unsigned)Instruction::Call},
      UsedAssumedInformation, CheckBBLivenessOnly, CheckPotentiallyDead);
}

/// Check \p Pred on all Read/Write instructions.
///
/// This method will evaluate \p Pred on all instructions that read or write
/// to memory present in the information cache and return true if \p Pred
/// holds on all of them.
bool checkForAllReadWriteInstructions(function_ref<bool(Instruction &)> Pred,
                                      AbstractAttribute &QueryingAA,
                                      bool &UsedAssumedInformation);

/// Create a shallow wrapper for \p F such that \p F has internal linkage
/// afterwards. It also sets the original \p F 's name to anonymous
///
/// A wrapper is a function with the same type (and attributes) as \p F
/// that will only call \p F and return the result, if any.
///
/// Assuming the declaration of looks like:
///   rty F(aty0 arg0, ..., atyN argN);
///
/// The wrapper will then look as follows:
///   rty wrapper(aty0 arg0, ..., atyN argN) {
///     return F(arg0, ..., argN);
///   }
///
static void createShallowWrapper(Function &F);

/// Returns true if the function \p F can be internalized. i.e. it has a
/// compatible linkage.
static bool isInternalizable(Function &F);

/// Make another copy of the function \p F such that the copied version has
/// internal linkage afterwards and can be analysed. Then we replace all uses
/// of the original function to the copied one
///
/// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
/// linkage can be internalized because these linkages guarantee that other
/// definitions with the same name have the same semantics as this one.
///
/// This will only be run if the `attributor-allow-deep-wrappers` option is
/// set, or if the function is called with \p Force set to true.
///
/// If the function \p F failed to be internalized the return value will be a
/// null pointer.
static Function *internalizeFunction(Function &F, bool Force = false);

/// Make copies of each function in the set \p FnSet such that the copied
/// version has internal linkage afterwards and can be analysed. Then we
/// replace all uses of the original function to the copied one. The map
/// \p FnMap contains a mapping of functions to their internalized versions.
///
/// Only non-locally linked functions that have `linkonce_odr` or `weak_odr`
/// linkage can be internalized because these linkages guarantee that other
/// definitions with the same name have the same semantics as this one.
///
/// This version will internalize all the functions in the set \p FnSet at
/// once and then replace the uses. This prevents internalized functions being
/// called by external functions when there is an internalized version in the
/// module.
static bool internalizeFunctions(SmallPtrSetImpl<Function *> &FnSet,
                                 DenseMap<Function *, Function *> &FnMap);

/// Return the data layout associated with the anchor scope.
const DataLayout &getDataLayout() const { return InfoCache.DL; }

/// The allocator used to allocate memory, e.g. for `AbstractAttribute`s.
BumpPtrAllocator &Allocator;

1899private:
/// This method will do fixpoint iteration until fixpoint or the
/// maximum iteration count is reached.
///
/// If the maximum iteration count is reached, This method will
/// indicate pessimistic fixpoint on attributes that transitively depend
/// on attributes that were scheduled for an update.
void runTillFixpoint();

/// Gets called after scheduling, manifests attributes to the LLVM IR.
ChangeStatus manifestAttributes();

/// Gets called after attributes have been manifested, cleans up the IR.
/// Deletes dead functions, blocks and instructions.
/// Rewrites function signitures and updates the call graph.
ChangeStatus cleanupIR();

/// Identify internal functions that are effectively dead, thus not reachable
/// from a live entry point. The functions are added to ToBeDeletedFunctions.
void identifyDeadInternalFunctions();

/// Run `::update` on \p AA and track the dependences queried while doing so.
/// Also adjust the state if we know further updates are not necessary.
ChangeStatus updateAA(AbstractAttribute &AA);

/// Remember the dependences on the top of the dependence stack such that they
/// may trigger further updates. (\see DependenceStack)
void rememberDependences();

/// Check \p Pred on all call sites of \p Fn.
///
/// This method will evaluate \p Pred on call sites and return
/// true if \p Pred holds in every call sites. However, this is only possible
/// all call sites are known, hence the function has internal linkage.
/// If true is returned, \p AllCallSitesKnown is set if all possible call
/// sites of the function have been visited.
bool checkForAllCallSites(function_ref<bool(AbstractCallSite)> Pred,
                          const Function &Fn, bool RequireAllCallSites,
                          const AbstractAttribute *QueryingAA,
                          bool &AllCallSitesKnown);

/// Determine if CallBase context in \p IRP should be propagated.
bool shouldPropagateCallBaseContext(const IRPosition &IRP);

/// Apply all requested function signature rewrites
/// (\see registerFunctionSignatureRewrite) and return Changed if the module
/// was altered.
ChangeStatus
rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns);

/// Check if the Attribute \p AA should be seeded.
/// See getOrCreateAAFor.
bool shouldSeedAttribute(AbstractAttribute &AA);

/// A nested map to lookup abstract attributes based on the argument position
/// on the outer level, and the addresses of the static member (AAType::ID) on
/// the inner level.
///{
using AAMapKeyTy = std::pair<const char *, IRPosition>;
DenseMap<AAMapKeyTy, AbstractAttribute *> AAMap;
///}

/// Map to remember all requested signature changes (= argument replacements).
DenseMap<Function *, SmallVector<std::unique_ptr<ArgumentReplacementInfo>, 8>>
    ArgumentReplacementMap;

/// The set of functions we are deriving attributes for.
SetVector<Function *> &Functions;

/// The information cache that holds pre-processed (LLVM-IR) information.
InformationCache &InfoCache;

/// Helper to update an underlying call graph.
CallGraphUpdater &CGUpdater;

/// Abstract Attribute dependency graph
AADepGraph DG;

/// Set of functions for which we modified the content such that it might
/// impact the call graph.
SmallPtrSet<Function *, 8> CGModifiedFunctions;

/// Information about a dependence. If FromAA is changed ToAA needs to be
/// updated as well.
struct DepInfo {
  const AbstractAttribute *FromAA;
  const AbstractAttribute *ToAA;
  DepClassTy DepClass;
};

/// The dependence stack is used to track dependences during an
/// `AbstractAttribute::update` call. As `AbstractAttribute::update` can be
/// recursive we might have multiple vectors of dependences in here. The stack
/// size, should be adjusted according to the expected recursion depth and the
/// inner dependence vector size to the expected number of dependences per
/// abstract attribute. Since the inner vectors are actually allocated on the
/// stack we can be generous with their size.
using DependenceVector = SmallVector<DepInfo, 8>;
SmallVector<DependenceVector *, 16> DependenceStack;

/// If not null, a set limiting the attribute opportunities.
const DenseSet<const char *> *Allowed;

/// Whether to delete functions.
const bool DeleteFns;

/// Whether to rewrite signatures.
const bool RewriteSignatures;

/// Maximum number of fixedpoint iterations.
Optional<unsigned> MaxFixpointIterations;

/// A set to remember the functions we already assume to be live and visited.
DenseSet<const Function *> VisitedFunctions;

/// Uses we replace with a new value after manifest is done. We will remove
/// then trivially dead instructions as well.
DenseMap<Use *, Value *> ToBeChangedUses;

/// Values we replace with a new value after manifest is done. We will remove
/// then trivially dead instructions as well.
DenseMap<Value *, std::pair<Value *, bool>> ToBeChangedValues;

/// Instructions we replace with `unreachable` insts after manifest is done.
SmallDenseSet<WeakVH, 16> ToBeChangedToUnreachableInsts;

/// Invoke instructions with at least a single dead successor block.
SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;

/// A flag that indicates which stage of the process we are in. Initially, the
/// phase is SEEDING. Phase is changed in `Attributor::run()`
enum class AttributorPhase {
  SEEDING,
  UPDATE,
  MANIFEST,
  CLEANUP,
} Phase = AttributorPhase::SEEDING;

/// The current initialization chain length. Tracked to avoid stack overflows.
unsigned InitializationChainLength = 0;

/// Functions, blocks, and instructions we delete after manifest is done.
///
///{
SmallPtrSet<Function *, 8> ToBeDeletedFunctions;
SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
SmallPtrSet<BasicBlock *, 8> ManifestAddedBlocks;
SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
///}

/// Callback to get an OptimizationRemarkEmitter from a Function *.
Optional<OptimizationRemarkGetter> OREGetter;

/// The name of the pass to emit remarks for.
const char *PassName = "";

friend AADepGraph;
friend AttributorCallGraph;
2057};

2059/// An interface to query the internal state of an abstract attribute.
2060///
2061/// The abstract state is a minimal interface that allows the Attributor to
2062/// communicate with the abstract attributes about their internal state without
2063/// enforcing or exposing implementation details, e.g., the (existence of an)
2064/// underlying lattice.
2065///
2066/// It is sufficient to be able to query if a state is (1) valid or invalid, (2)
2067/// at a fixpoint, and to indicate to the state that (3) an optimistic fixpoint
2068/// was reached or (4) a pessimistic fixpoint was enforced.
2069///
2070/// All methods need to be implemented by the subclass. For the common use case,
2071/// a single boolean state or a bit-encoded state, the BooleanState and
2072/// {Inc,Dec,Bit}IntegerState classes are already provided. An abstract
2073/// attribute can inherit from them to get the abstract state interface and
2074/// additional methods to directly modify the state based if needed. See the
2075/// class comments for help.
2076struct AbstractState {
virtual ~AbstractState() {}

/// Return if this abstract state is in a valid state. If false, no
/// information provided should be used.
virtual bool isValidState() const = 0;

/// Return if this abstract state is fixed, thus does not need to be updated
/// if information changes as it cannot change itself.
virtual bool isAtFixpoint() const = 0;

/// Indicate that the abstract state should converge to the optimistic state.
///
/// This will usually make the optimistically assumed state the known to be
/// true state.
///
/// \returns ChangeStatus::UNCHANGED as the assumed value should not change.
virtual ChangeStatus indicateOptimisticFixpoint() = 0;

/// Indicate that the abstract state should converge to the pessimistic state.
///
/// This will usually revert the optimistically assumed state to the known to
/// be true state.
///
/// \returns ChangeStatus::CHANGED as the assumed value may change.
virtual ChangeStatus indicatePessimisticFixpoint() = 0;
2102};

2104/// Simple state with integers encoding.
2105///
2106/// The interface ensures that the assumed bits are always a subset of the known
2107/// bits. Users can only add known bits and, except through adding known bits,
2108/// they can only remove assumed bits. This should guarantee monotoniticy and
2109/// thereby the existence of a fixpoint (if used corretly). The fixpoint is
2110/// reached when the assumed and known state/bits are equal. Users can
2111/// force/inidicate a fixpoint. If an optimistic one is indicated, the known
2112/// state will catch up with the assumed one, for a pessimistic fixpoint it is
2113/// the other way around.
2114template <typename base_ty, base_ty BestState, base_ty WorstState>
2115struct IntegerStateBase : public AbstractState {
using base_t = base_ty;

IntegerStateBase() {}
IntegerStateBase(base_t Assumed) : Assumed(Assumed) {}

/// Return the best possible representable state.
static constexpr base_t getBestState() { return BestState; }
static constexpr base_t getBestState(const IntegerStateBase &) {
  return getBestState();
}

/// Return the worst possible representable state.
static constexpr base_t getWorstState() { return WorstState; }
static constexpr base_t getWorstState(const IntegerStateBase &) {
  return getWorstState();
}

/// See AbstractState::isValidState()
/// NOTE: For now we simply pretend that the worst possible state is invalid.
bool isValidState() const override { return Assumed != getWorstState(); }
2
←
Assuming the condition is true→
3
←
Returning the value 1, which participates in a condition later→

/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override { return Assumed == Known; }

/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
  Known = Assumed;
  return ChangeStatus::UNCHANGED;
}

/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
  Assumed = Known;
  return ChangeStatus::CHANGED;
}

/// Return the known state encoding
base_t getKnown() const { return Known; }

/// Return the assumed state encoding.
base_t getAssumed() const { return Assumed; }

/// Equality for IntegerStateBase.
bool
operator==(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
  return this->getAssumed() == R.getAssumed() &&
         this->getKnown() == R.getKnown();
}

/// Inequality for IntegerStateBase.
bool
operator!=(const IntegerStateBase<base_t, BestState, WorstState> &R) const {
  return !(*this == R);
}

/// "Clamp" this state with \p R. The result is subtype dependent but it is
/// intended that only information assumed in both states will be assumed in
/// this one afterwards.
void operator^=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
  handleNewAssumedValue(R.getAssumed());
}

/// "Clamp" this state with \p R. The result is subtype dependent but it is
/// intended that information known in either state will be known in
/// this one afterwards.
void operator+=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
  handleNewKnownValue(R.getKnown());
}

void operator|=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
  joinOR(R.getAssumed(), R.getKnown());
}

void operator&=(const IntegerStateBase<base_t, BestState, WorstState> &R) {
  joinAND(R.getAssumed(), R.getKnown());
}

2193protected:
/// Handle a new assumed value \p Value. Subtype dependent.
virtual void handleNewAssumedValue(base_t Value) = 0;

/// Handle a new known value \p Value. Subtype dependent.
virtual void handleNewKnownValue(base_t Value) = 0;

/// Handle a  value \p Value. Subtype dependent.
virtual void joinOR(base_t AssumedValue, base_t KnownValue) = 0;

/// Handle a new assumed value \p Value. Subtype dependent.
virtual void joinAND(base_t AssumedValue, base_t KnownValue) = 0;

/// The known state encoding in an integer of type base_t.
base_t Known = getWorstState();

/// The assumed state encoding in an integer of type base_t.
base_t Assumed = getBestState();
2211};

2213/// Specialization of the integer state for a bit-wise encoding.
2214template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
        base_ty WorstState = 0>
2216struct BitIntegerState
  : public IntegerStateBase<base_ty, BestState, WorstState> {
using base_t = base_ty;

/// Return true if the bits set in \p BitsEncoding are "known bits".
bool isKnown(base_t BitsEncoding) const {
  return (this->Known & BitsEncoding) == BitsEncoding;
}

/// Return true if the bits set in \p BitsEncoding are "assumed bits".
bool isAssumed(base_t BitsEncoding) const {
  return (this->Assumed & BitsEncoding) == BitsEncoding;
}

/// Add the bits in \p BitsEncoding to the "known bits".
BitIntegerState &addKnownBits(base_t Bits) {
  // Make sure we never miss any "known bits".
  this->Assumed |= Bits;
  this->Known |= Bits;
  return *this;
}

/// Remove the bits in \p BitsEncoding from the "assumed bits" if not known.
BitIntegerState &removeAssumedBits(base_t BitsEncoding) {
  return intersectAssumedBits(~BitsEncoding);
}

/// Remove the bits in \p BitsEncoding from the "known bits".
BitIntegerState &removeKnownBits(base_t BitsEncoding) {
  this->Known = (this->Known & ~BitsEncoding);
  return *this;
}

/// Keep only "assumed bits" also set in \p BitsEncoding but all known ones.
BitIntegerState &intersectAssumedBits(base_t BitsEncoding) {
  // Make sure we never loose any "known bits".
  this->Assumed = (this->Assumed & BitsEncoding) | this->Known;
  return *this;
}

2256private:
void handleNewAssumedValue(base_t Value) override {
  intersectAssumedBits(Value);
}
void handleNewKnownValue(base_t Value) override { addKnownBits(Value); }
void joinOR(base_t AssumedValue, base_t KnownValue) override {
  this->Known |= KnownValue;
  this->Assumed |= AssumedValue;
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
  this->Known &= KnownValue;
  this->Assumed &= AssumedValue;
}
2269};

2271/// Specialization of the integer state for an increasing value, hence ~0u is
2272/// the best state and 0 the worst.
2273template <typename base_ty = uint32_t, base_ty BestState = ~base_ty(0),
        base_ty WorstState = 0>
2275struct IncIntegerState
  : public IntegerStateBase<base_ty, BestState, WorstState> {
using super = IntegerStateBase<base_ty, BestState, WorstState>;
using base_t = base_ty;

IncIntegerState() : super() {}
IncIntegerState(base_t Assumed) : super(Assumed) {}

/// Return the best possible representable state.
static constexpr base_t getBestState() { return BestState; }
static constexpr base_t
getBestState(const IncIntegerState<base_ty, BestState, WorstState> &) {
  return getBestState();
}

/// Take minimum of assumed and \p Value.
IncIntegerState &takeAssumedMinimum(base_t Value) {
  // Make sure we never loose "known value".
  this->Assumed = std::max(std::min(this->Assumed, Value), this->Known);
  return *this;
}

/// Take maximum of known and \p Value.
IncIntegerState &takeKnownMaximum(base_t Value) {
  // Make sure we never loose "known value".
  this->Assumed = std::max(Value, this->Assumed);
  this->Known = std::max(Value, this->Known);
  return *this;
}

2305private:
void handleNewAssumedValue(base_t Value) override {
  takeAssumedMinimum(Value);
}
void handleNewKnownValue(base_t Value) override { takeKnownMaximum(Value); }
void joinOR(base_t AssumedValue, base_t KnownValue) override {
  this->Known = std::max(this->Known, KnownValue);
  this->Assumed = std::max(this->Assumed, AssumedValue);
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
  this->Known = std::min(this->Known, KnownValue);
  this->Assumed = std::min(this->Assumed, AssumedValue);
}
2318};

2320/// Specialization of the integer state for a decreasing value, hence 0 is the
2321/// best state and ~0u the worst.
2322template <typename base_ty = uint32_t>
2323struct DecIntegerState : public IntegerStateBase<base_ty, 0, ~base_ty(0)> {
using base_t = base_ty;

/// Take maximum of assumed and \p Value.
DecIntegerState &takeAssumedMaximum(base_t Value) {
  // Make sure we never loose "known value".
  this->Assumed = std::min(std::max(this->Assumed, Value), this->Known);
  return *this;
}

/// Take minimum of known and \p Value.
DecIntegerState &takeKnownMinimum(base_t Value) {
  // Make sure we never loose "known value".
  this->Assumed = std::min(Value, this->Assumed);
  this->Known = std::min(Value, this->Known);
  return *this;
}

2341private:
void handleNewAssumedValue(base_t Value) override {
  takeAssumedMaximum(Value);
}
void handleNewKnownValue(base_t Value) override { takeKnownMinimum(Value); }
void joinOR(base_t AssumedValue, base_t KnownValue) override {
  this->Assumed = std::min(this->Assumed, KnownValue);
  this->Assumed = std::min(this->Assumed, AssumedValue);
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
  this->Assumed = std::max(this->Assumed, KnownValue);
  this->Assumed = std::max(this->Assumed, AssumedValue);
}
2354};

2356/// Simple wrapper for a single bit (boolean) state.
2357struct BooleanState : public IntegerStateBase<bool, 1, 0> {
using super = IntegerStateBase<bool, 1, 0>;
using base_t = IntegerStateBase::base_t;

BooleanState() : super() {}
BooleanState(base_t Assumed) : super(Assumed) {}

/// Set the assumed value to \p Value but never below the known one.
void setAssumed(bool Value) { Assumed &= (Known | Value); }

/// Set the known and asssumed value to \p Value.
void setKnown(bool Value) {
  Known |= Value;
  Assumed |= Value;
}

/// Return true if the state is assumed to hold.
bool isAssumed() const { return getAssumed(); }

/// Return true if the state is known to hold.
bool isKnown() const { return getKnown(); }

2379private:
void handleNewAssumedValue(base_t Value) override {
  if (!Value)
    Assumed = Known;
}
void handleNewKnownValue(base_t Value) override {
  if (Value)
    Known = (Assumed = Value);
}
void joinOR(base_t AssumedValue, base_t KnownValue) override {
  Known |= KnownValue;
  Assumed |= AssumedValue;
}
void joinAND(base_t AssumedValue, base_t KnownValue) override {
  Known &= KnownValue;
  Assumed &= AssumedValue;
}
2396};

2398/// State for an integer range.
2399struct IntegerRangeState : public AbstractState {

/// Bitwidth of the associated value.
uint32_t BitWidth;

/// State representing assumed range, initially set to empty.
ConstantRange Assumed;

/// State representing known range, initially set to [-inf, inf].
ConstantRange Known;

IntegerRangeState(uint32_t BitWidth)
    : BitWidth(BitWidth), Assumed(ConstantRange::getEmpty(BitWidth)),
      Known(ConstantRange::getFull(BitWidth)) {}

IntegerRangeState(const ConstantRange &CR)
    : BitWidth(CR.getBitWidth()), Assumed(CR),
      Known(getWorstState(CR.getBitWidth())) {}

/// Return the worst possible representable state.
static ConstantRange getWorstState(uint32_t BitWidth) {
  return ConstantRange::getFull(BitWidth);
}

/// Return the best possible representable state.
static ConstantRange getBestState(uint32_t BitWidth) {
  return ConstantRange::getEmpty(BitWidth);
}
static ConstantRange getBestState(const IntegerRangeState &IRS) {
  return getBestState(IRS.getBitWidth());
}

/// Return associated values' bit width.
uint32_t getBitWidth() const { return BitWidth; }

/// See AbstractState::isValidState()
bool isValidState() const override {
  return BitWidth > 0 && !Assumed.isFullSet();
}

/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override { return Assumed == Known; }

/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
  Known = Assumed;
  return ChangeStatus::CHANGED;
}

/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
  Assumed = Known;
  return ChangeStatus::CHANGED;
}

/// Return the known state encoding
ConstantRange getKnown() const { return Known; }

/// Return the assumed state encoding.
ConstantRange getAssumed() const { return Assumed; }

/// Unite assumed range with the passed state.
void unionAssumed(const ConstantRange &R) {
  // Don't loose a known range.
  Assumed = Assumed.unionWith(R).intersectWith(Known);
}

/// See IntegerRangeState::unionAssumed(..).
void unionAssumed(const IntegerRangeState &R) {
  unionAssumed(R.getAssumed());
}

/// Unite known range with the passed state.
void unionKnown(const ConstantRange &R) {
  // Don't loose a known range.
  Known = Known.unionWith(R);
  Assumed = Assumed.unionWith(Known);
}

/// See IntegerRangeState::unionKnown(..).
void unionKnown(const IntegerRangeState &R) { unionKnown(R.getKnown()); }

/// Intersect known range with the passed state.
void intersectKnown(const ConstantRange &R) {
  Assumed = Assumed.intersectWith(R);
  Known = Known.intersectWith(R);
}

/// See IntegerRangeState::intersectKnown(..).
void intersectKnown(const IntegerRangeState &R) {
  intersectKnown(R.getKnown());
}

/// Equality for IntegerRangeState.
bool operator==(const IntegerRangeState &R) const {
  return getAssumed() == R.getAssumed() && getKnown() == R.getKnown();
}

/// "Clamp" this state with \p R. The result is subtype dependent but it is
/// intended that only information assumed in both states will be assumed in
/// this one afterwards.
IntegerRangeState operator^=(const IntegerRangeState &R) {
  // NOTE: `^=` operator seems like `intersect` but in this case, we need to
  // take `union`.
  unionAssumed(R);
  return *this;
}

IntegerRangeState operator&=(const IntegerRangeState &R) {
  // NOTE: `&=` operator seems like `intersect` but in this case, we need to
  // take `union`.
  unionKnown(R);
  unionAssumed(R);
  return *this;
}
2514};
2515/// Helper struct necessary as the modular build fails if the virtual method
2516/// IRAttribute::manifest is defined in the Attributor.cpp.
2517struct IRAttributeManifest {
static ChangeStatus manifestAttrs(Attributor &A, const IRPosition &IRP,
                                  const ArrayRef<Attribute> &DeducedAttrs,
                                  bool ForceReplace = false);
2521};

2523/// Helper to tie a abstract state implementation to an abstract attribute.
2524template <typename StateTy, typename BaseType, class... Ts>
2525struct StateWrapper : public BaseType, public StateTy {
/// Provide static access to the type of the state.
using StateType = StateTy;

StateWrapper(const IRPosition &IRP, Ts... Args)
    : BaseType(IRP), StateTy(Args...) {}

/// See AbstractAttribute::getState(...).
StateType &getState() override { return *this; }

/// See AbstractAttribute::getState(...).
const StateType &getState() const override { return *this; }
2537};

2539/// Helper class that provides common functionality to manifest IR attributes.
2540template <Attribute::AttrKind AK, typename BaseType>
2541struct IRAttribute : public BaseType {
IRAttribute(const IRPosition &IRP) : BaseType(IRP) {}

/// See AbstractAttribute::initialize(...).
virtual void initialize(Attributor &A) override {
  const IRPosition &IRP = this->getIRPosition();
  if (isa<UndefValue>(IRP.getAssociatedValue()) ||
      this->hasAttr(getAttrKind(), /* IgnoreSubsumingPositions */ false,
                    &A)) {
    this->getState().indicateOptimisticFixpoint();
    return;
  }

  bool IsFnInterface = IRP.isFnInterfaceKind();
  const Function *FnScope = IRP.getAnchorScope();
  // TODO: Not all attributes require an exact definition. Find a way to
  //       enable deduction for some but not all attributes in case the
  //       definition might be changed at runtime, see also
  //       http://lists.llvm.org/pipermail/llvm-dev/2018-February/121275.html.
  // TODO: We could always determine abstract attributes and if sufficient
  //       information was found we could duplicate the functions that do not
  //       have an exact definition.
  if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope)))
    this->getState().indicatePessimisticFixpoint();
}

/// See AbstractAttribute::manifest(...).
ChangeStatus manifest(Attributor &A) override {
  if (isa<UndefValue>(this->getIRPosition().getAssociatedValue()))
    return ChangeStatus::UNCHANGED;
  SmallVector<Attribute, 4> DeducedAttrs;
  getDeducedAttributes(this->getAnchorValue().getContext(), DeducedAttrs);
  return IRAttributeManifest::manifestAttrs(A, this->getIRPosition(),
                                            DeducedAttrs);
}

/// Return the kind that identifies the abstract attribute implementation.
Attribute::AttrKind getAttrKind() const { return AK; }

/// Return the deduced attributes in \p Attrs.
virtual void getDeducedAttributes(LLVMContext &Ctx,
                                  SmallVectorImpl<Attribute> &Attrs) const {
  Attrs.emplace_back(Attribute::get(Ctx, getAttrKind()));
}
2585};

2587/// Base struct for all "concrete attribute" deductions.
2588///
2589/// The abstract attribute is a minimal interface that allows the Attributor to
2590/// orchestrate the abstract/fixpoint analysis. The design allows to hide away
2591/// implementation choices made for the subclasses but also to structure their
2592/// implementation and simplify the use of other abstract attributes in-flight.
2593///
2594/// To allow easy creation of new attributes, most methods have default
2595/// implementations. The ones that do not are generally straight forward, except
2596/// `AbstractAttribute::updateImpl` which is the location of most reasoning
2597/// associated with the abstract attribute. The update is invoked by the
2598/// Attributor in case the situation used to justify the current optimistic
2599/// state might have changed. The Attributor determines this automatically
2600/// by monitoring the `Attributor::getAAFor` calls made by abstract attributes.
2601///
2602/// The `updateImpl` method should inspect the IR and other abstract attributes
2603/// in-flight to justify the best possible (=optimistic) state. The actual
2604/// implementation is, similar to the underlying abstract state encoding, not
2605/// exposed. In the most common case, the `updateImpl` will go through a list of
2606/// reasons why its optimistic state is valid given the current information. If
2607/// any combination of them holds and is sufficient to justify the current
2608/// optimistic state, the method shall return UNCHAGED. If not, the optimistic
2609/// state is adjusted to the situation and the method shall return CHANGED.
2610///
2611/// If the manifestation of the "concrete attribute" deduced by the subclass
2612/// differs from the "default" behavior, which is a (set of) LLVM-IR
2613/// attribute(s) for an argument, call site argument, function return value, or
2614/// function, the `AbstractAttribute::manifest` method should be overloaded.
2615///
2616/// NOTE: If the state obtained via getState() is INVALID, thus if
2617///       AbstractAttribute::getState().isValidState() returns false, no
2618///       information provided by the methods of this class should be used.
2619/// NOTE: The Attributor currently has certain limitations to what we can do.
2620///       As a general rule of thumb, "concrete" abstract attributes should *for
2621///       now* only perform "backward" information propagation. That means
2622///       optimistic information obtained through abstract attributes should
2623///       only be used at positions that precede the origin of the information
2624///       with regards to the program flow. More practically, information can
2625///       *now* be propagated from instructions to their enclosing function, but
2626///       *not* from call sites to the called function. The mechanisms to allow
2627///       both directions will be added in the future.
2628/// NOTE: The mechanics of adding a new "concrete" abstract attribute are
2629///       described in the file comment.
2630struct AbstractAttribute : public IRPosition, public AADepGraphNode {
using StateType = AbstractState;

AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}

/// Virtual destructor.
virtual ~AbstractAttribute() {}

/// This function is used to identify if an \p DGN is of type
/// AbstractAttribute so that the dyn_cast and cast can use such information
/// to cast an AADepGraphNode to an AbstractAttribute.
///
/// We eagerly return true here because all AADepGraphNodes except for the
/// Synthethis Node are of type AbstractAttribute
static bool classof(const AADepGraphNode *DGN) { return true; }

/// Initialize the state with the information in the Attributor \p A.
///
/// This function is called by the Attributor once all abstract attributes
/// have been identified. It can and shall be used for task like:
///  - identify existing knowledge in the IR and use it for the "known state"
///  - perform any work that is not going to change over time, e.g., determine
///    a subset of the IR, or attributes in-flight, that have to be looked at
///    in the `updateImpl` method.
virtual void initialize(Attributor &A) {}

/// Return the internal abstract state for inspection.
virtual StateType &getState() = 0;
virtual const StateType &getState() const = 0;

/// Return an IR position, see struct IRPosition.
const IRPosition &getIRPosition() const { return *this; };
IRPosition &getIRPosition() { return *this; };

/// Helper functions, for debug purposes only.
///{
void print(raw_ostream &OS) const override;
virtual void printWithDeps(raw_ostream &OS) const;
void dump() const { print(dbgs()); }

/// This function should return the "summarized" assumed state as string.
virtual const std::string getAsStr() const = 0;

/// This function should return the name of the AbstractAttribute
virtual const std::string getName() const = 0;

/// This function should return the address of the ID of the AbstractAttribute
virtual const char *getIdAddr() const = 0;
///}

/// Allow the Attributor access to the protected methods.
friend struct Attributor;

2683protected:
/// Hook for the Attributor to trigger an update of the internal state.
///
/// If this attribute is already fixed, this method will return UNCHANGED,
/// otherwise it delegates to `AbstractAttribute::updateImpl`.
///
/// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
ChangeStatus update(Attributor &A);

/// Hook for the Attributor to trigger the manifestation of the information
/// represented by the abstract attribute in the LLVM-IR.
///
/// \Return CHANGED if the IR was altered, otherwise UNCHANGED.
virtual ChangeStatus manifest(Attributor &A) {
  return ChangeStatus::UNCHANGED;
}

/// Hook to enable custom statistic tracking, called after manifest that
/// resulted in a change if statistics are enabled.
///
/// We require subclasses to provide an implementation so we remember to
/// add statistics for them.
virtual void trackStatistics() const = 0;

/// The actual update/transfer function which has to be implemented by the
/// derived classes.
///
/// If it is called, the environment has changed and we have to determine if
/// the current information is still valid or adjust it otherwise.
///
/// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
virtual ChangeStatus updateImpl(Attributor &A) = 0;
2715};

2717/// Forward declarations of output streams for debug purposes.
2718///
2719///{
2720raw_ostream &operator<<(raw_ostream &OS, const AbstractAttribute &AA);
2721raw_ostream &operator<<(raw_ostream &OS, ChangeStatus S);
2722raw_ostream &operator<<(raw_ostream &OS, IRPosition::Kind);
2723raw_ostream &operator<<(raw_ostream &OS, const IRPosition &);
2724raw_ostream &operator<<(raw_ostream &OS, const AbstractState &State);
2725template <typename base_ty, base_ty BestState, base_ty WorstState>
2726raw_ostream &
2727operator<<(raw_ostream &OS,
         const IntegerStateBase<base_ty, BestState, WorstState> &S) {
return OS << "(" << S.getKnown() << "-" << S.getAssumed() << ")"
          << static_cast<const AbstractState &>(S);
2731}
2732raw_ostream &operator<<(raw_ostream &OS, const IntegerRangeState &State);
2733///}

2735struct AttributorPass : public PassInfoMixin<AttributorPass> {
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
2737};
2738struct AttributorCGSCCPass : public PassInfoMixin<AttributorCGSCCPass> {
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                      LazyCallGraph &CG, CGSCCUpdateResult &UR);
2741};

2743Pass *createAttributorLegacyPass();
2744Pass *createAttributorCGSCCLegacyPass();

2746/// Helper function to clamp a state \p S of type \p StateType with the
2747/// information in \p R and indicate/return if \p S did change (as-in update is
2748/// required to be run again).
2749template <typename StateType>
2750ChangeStatus clampStateAndIndicateChange(StateType &S, const StateType &R) {
auto Assumed = S.getAssumed();
S ^= R;
return Assumed == S.getAssumed() ? ChangeStatus::UNCHANGED
                                 : ChangeStatus::CHANGED;
2755}

2757/// ----------------------------------------------------------------------------
2758///                       Abstract Attribute Classes
2759/// ----------------------------------------------------------------------------

2761/// An abstract attribute for the returned values of a function.
2762struct AAReturnedValues
  : public IRAttribute<Attribute::Returned, AbstractAttribute> {
AAReturnedValues(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return an assumed unique return value if a single candidate is found. If
/// there cannot be one, return a nullptr. If it is not clear yet, return the
/// Optional::NoneType.
Optional<Value *> getAssumedUniqueReturnValue(Attributor &A) const;

/// Check \p Pred on all returned values.
///
/// This method will evaluate \p Pred on returned values and return
/// true if (1) all returned values are known, and (2) \p Pred returned true
/// for all returned values.
///
/// Note: Unlike the Attributor::checkForAllReturnedValuesAndReturnInsts
/// method, this one will not filter dead return instructions.
virtual bool checkForAllReturnedValuesAndReturnInsts(
    function_ref<bool(Value &, const SmallSetVector<ReturnInst *, 4> &)> Pred)
    const = 0;

using iterator =
    MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::iterator;
using const_iterator =
    MapVector<Value *, SmallSetVector<ReturnInst *, 4>>::const_iterator;
virtual llvm::iterator_range<iterator> returned_values() = 0;
virtual llvm::iterator_range<const_iterator> returned_values() const = 0;

virtual size_t getNumReturnValues() const = 0;

/// Create an abstract attribute view for the position \p IRP.
static AAReturnedValues &createForPosition(const IRPosition &IRP,
                                           Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAReturnedValues"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAReturnedValues
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2810};

2812struct AANoUnwind
  : public IRAttribute<Attribute::NoUnwind,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoUnwind(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Returns true if nounwind is assumed.
bool isAssumedNoUnwind() const { return getAssumed(); }

/// Returns true if nounwind is known.
bool isKnownNoUnwind() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoUnwind &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoUnwind"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoUnwind
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2839};

2841struct AANoSync
  : public IRAttribute<Attribute::NoSync,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoSync(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Returns true if "nosync" is assumed.
bool isAssumedNoSync() const { return getAssumed(); }

/// Returns true if "nosync" is known.
bool isKnownNoSync() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoSync &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoSync"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoSync
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2868};

2870/// An abstract interface for all nonnull attributes.
2871struct AANonNull
  : public IRAttribute<Attribute::NonNull,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANonNull(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if we assume that the underlying value is nonnull.
bool isAssumedNonNull() const { return getAssumed(); }

/// Return true if we know that underlying value is nonnull.
bool isKnownNonNull() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANonNull &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANonNull"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANonNull
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2898};

2900/// An abstract attribute for norecurse.
2901struct AANoRecurse
  : public IRAttribute<Attribute::NoRecurse,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoRecurse(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if "norecurse" is assumed.
bool isAssumedNoRecurse() const { return getAssumed(); }

/// Return true if "norecurse" is known.
bool isKnownNoRecurse() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoRecurse &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoRecurse"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoRecurse
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2928};

2930/// An abstract attribute for willreturn.
2931struct AAWillReturn
  : public IRAttribute<Attribute::WillReturn,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AAWillReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if "willreturn" is assumed.
bool isAssumedWillReturn() const { return getAssumed(); }

/// Return true if "willreturn" is known.
bool isKnownWillReturn() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AAWillReturn &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAWillReturn"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AAWillReturn
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2958};

2960/// An abstract attribute for undefined behavior.
2961struct AAUndefinedBehavior
  : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAUndefinedBehavior(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Return true if "undefined behavior" is assumed.
bool isAssumedToCauseUB() const { return getAssumed(); }

/// Return true if "undefined behavior" is assumed for a specific instruction.
virtual bool isAssumedToCauseUB(Instruction *I) const = 0;

/// Return true if "undefined behavior" is known.
bool isKnownToCauseUB() const { return getKnown(); }

/// Return true if "undefined behavior" is known for a specific instruction.
virtual bool isKnownToCauseUB(Instruction *I) const = 0;

/// Create an abstract attribute view for the position \p IRP.
static AAUndefinedBehavior &createForPosition(const IRPosition &IRP,
                                              Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAUndefinedBehavior"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAUndefineBehavior
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
2996};

2998/// An abstract interface to determine reachability of point A to B.
2999struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Returns true if 'From' instruction is assumed to reach, 'To' instruction.
/// Users should provide two positions they are interested in, and the class
/// determines (and caches) reachability.
bool isAssumedReachable(Attributor &A, const Instruction &From,
                        const Instruction &To) const {
  if (!getState().isValidState())
    return true;
  return A.getInfoCache().getPotentiallyReachable(From, To);
}

/// Returns true if 'From' instruction is known to reach, 'To' instruction.
/// Users should provide two positions they are interested in, and the class
/// determines (and caches) reachability.
bool isKnownReachable(Attributor &A, const Instruction &From,
                      const Instruction &To) const {
  if (!getState().isValidState())
    return false;
  return A.getInfoCache().getPotentiallyReachable(From, To);
}

/// Create an abstract attribute view for the position \p IRP.
static AAReachability &createForPosition(const IRPosition &IRP,
                                         Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAReachability"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAReachability
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3041};

3043/// An abstract interface for all noalias attributes.
3044struct AANoAlias
  : public IRAttribute<Attribute::NoAlias,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoAlias(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if we assume that the underlying value is alias.
bool isAssumedNoAlias() const { return getAssumed(); }

/// Return true if we know that underlying value is noalias.
bool isKnownNoAlias() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoAlias &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoAlias"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoAlias
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3071};

3073/// An AbstractAttribute for nofree.
3074struct AANoFree
  : public IRAttribute<Attribute::NoFree,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoFree(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if "nofree" is assumed.
bool isAssumedNoFree() const { return getAssumed(); }

/// Return true if "nofree" is known.
bool isKnownNoFree() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoFree &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoFree"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoFree
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3101};

3103/// An AbstractAttribute for noreturn.
3104struct AANoReturn
  : public IRAttribute<Attribute::NoReturn,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoReturn(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if the underlying object is assumed to never return.
bool isAssumedNoReturn() const { return getAssumed(); }

/// Return true if the underlying object is known to never return.
bool isKnownNoReturn() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoReturn &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoReturn"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoReturn
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3131};

3133/// An abstract interface for liveness abstract attribute.
3134struct AAIsDead
  : public StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute> {
using Base = StateWrapper<BitIntegerState<uint8_t, 3, 0>, AbstractAttribute>;
AAIsDead(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// State encoding bits. A set bit in the state means the property holds.
enum {
  HAS_NO_EFFECT = 1 << 0,
  IS_REMOVABLE = 1 << 1,

  IS_DEAD = HAS_NO_EFFECT | IS_REMOVABLE,
};
static_assert(IS_DEAD == getBestState(), "Unexpected BEST_STATE value");

3148protected:
/// The query functions are protected such that other attributes need to go
/// through the Attributor interfaces: `Attributor::isAssumedDead(...)`

/// Returns true if the underlying value is assumed dead.
virtual bool isAssumedDead() const = 0;

/// Returns true if the underlying value is known dead.
virtual bool isKnownDead() const = 0;

/// Returns true if \p BB is assumed dead.
virtual bool isAssumedDead(const BasicBlock *BB) const = 0;

/// Returns true if \p BB is known dead.
virtual bool isKnownDead(const BasicBlock *BB) const = 0;

/// Returns true if \p I is assumed dead.
virtual bool isAssumedDead(const Instruction *I) const = 0;

/// Returns true if \p I is known dead.
virtual bool isKnownDead(const Instruction *I) const = 0;

/// This method is used to check if at least one instruction in a collection
/// of instructions is live.
template <typename T> bool isLiveInstSet(T begin, T end) const {
  for (const auto &I : llvm::make_range(begin, end)) {
    assert(I->getFunction() == getIRPosition().getAssociatedFunction() &&((void)0)
           "Instruction must be in the same anchor scope function.")((void)0);

    if (!isAssumedDead(I))
      return true;
  }

  return false;
}

3184public:
/// Create an abstract attribute view for the position \p IRP.
static AAIsDead &createForPosition(const IRPosition &IRP, Attributor &A);

/// Determine if \p F might catch asynchronous exceptions.
static bool mayCatchAsynchronousExceptions(const Function &F) {
  return F.hasPersonalityFn() && !canSimplifyInvokeNoUnwind(&F);
}

/// Return if the edge from \p From BB to \p To BB is assumed dead.
/// This is specifically useful in AAReachability.
virtual bool isEdgeDead(const BasicBlock *From, const BasicBlock *To) const {
  return false;
}

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAIsDead"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AAIsDead
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;

friend struct Attributor;
3214};

3216/// State for dereferenceable attribute
3217struct DerefState : AbstractState {

static DerefState getBestState() { return DerefState(); }
static DerefState getBestState(const DerefState &) { return getBestState(); }

/// Return the worst possible representable state.
static DerefState getWorstState() {
  DerefState DS;
  DS.indicatePessimisticFixpoint();
  return DS;
}
static DerefState getWorstState(const DerefState &) {
  return getWorstState();
}

/// State representing for dereferenceable bytes.
IncIntegerState<> DerefBytesState;

/// Map representing for accessed memory offsets and sizes.
/// A key is Offset and a value is size.
/// If there is a load/store instruction something like,
///   p[offset] = v;
/// (offset, sizeof(v)) will be inserted to this map.
/// std::map is used because we want to iterate keys in ascending order.
std::map<int64_t, uint64_t> AccessedBytesMap;

/// Helper function to calculate dereferenceable bytes from current known
/// bytes and accessed bytes.
///
/// int f(int *A){
///    *A = 0;
///    *(A+2) = 2;
///    *(A+1) = 1;
///    *(A+10) = 10;
/// }
/// ```
/// In that case, AccessedBytesMap is `{0:4, 4:4, 8:4, 40:4}`.
/// AccessedBytesMap is std::map so it is iterated in accending order on
/// key(Offset). So KnownBytes will be updated like this:
///
/// |Access | KnownBytes
/// |(0, 4)| 0 -> 4
/// |(4, 4)| 4 -> 8
/// |(8, 4)| 8 -> 12
/// |(40, 4) | 12 (break)
void computeKnownDerefBytesFromAccessedMap() {
  int64_t KnownBytes = DerefBytesState.getKnown();
  for (auto &Access : AccessedBytesMap) {
    if (KnownBytes < Access.first)
      break;
    KnownBytes = std::max(KnownBytes, Access.first + (int64_t)Access.second);
  }

  DerefBytesState.takeKnownMaximum(KnownBytes);
}

/// State representing that whether the value is globaly dereferenceable.
BooleanState GlobalState;

/// See AbstractState::isValidState()
bool isValidState() const override { return DerefBytesState.isValidState(); }

/// See AbstractState::isAtFixpoint()
bool isAtFixpoint() const override {
  return !isValidState() ||
         (DerefBytesState.isAtFixpoint() && GlobalState.isAtFixpoint());
}

/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
  DerefBytesState.indicateOptimisticFixpoint();
  GlobalState.indicateOptimisticFixpoint();
  return ChangeStatus::UNCHANGED;
}

/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
  DerefBytesState.indicatePessimisticFixpoint();
  GlobalState.indicatePessimisticFixpoint();
  return ChangeStatus::CHANGED;
}

/// Update known dereferenceable bytes.
void takeKnownDerefBytesMaximum(uint64_t Bytes) {
  DerefBytesState.takeKnownMaximum(Bytes);

  // Known bytes might increase.
  computeKnownDerefBytesFromAccessedMap();
}

/// Update assumed dereferenceable bytes.
void takeAssumedDerefBytesMinimum(uint64_t Bytes) {
  DerefBytesState.takeAssumedMinimum(Bytes);
}

/// Add accessed bytes to the map.
void addAccessedBytes(int64_t Offset, uint64_t Size) {
  uint64_t &AccessedBytes = AccessedBytesMap[Offset];
  AccessedBytes = std::max(AccessedBytes, Size);

  // Known bytes might increase.
  computeKnownDerefBytesFromAccessedMap();
}

/// Equality for DerefState.
bool operator==(const DerefState &R) const {
  return this->DerefBytesState == R.DerefBytesState &&
         this->GlobalState == R.GlobalState;
}

/// Inequality for DerefState.
bool operator!=(const DerefState &R) const { return !(*this == R); }

/// See IntegerStateBase::operator^=
DerefState operator^=(const DerefState &R) {
  DerefBytesState ^= R.DerefBytesState;
  GlobalState ^= R.GlobalState;
  return *this;
}

/// See IntegerStateBase::operator+=
DerefState operator+=(const DerefState &R) {
  DerefBytesState += R.DerefBytesState;
  GlobalState += R.GlobalState;
  return *this;
}

/// See IntegerStateBase::operator&=
DerefState operator&=(const DerefState &R) {
  DerefBytesState &= R.DerefBytesState;
  GlobalState &= R.GlobalState;
  return *this;
}

/// See IntegerStateBase::operator|=
DerefState operator|=(const DerefState &R) {
  DerefBytesState |= R.DerefBytesState;
  GlobalState |= R.GlobalState;
  return *this;
}

3358protected:
const AANonNull *NonNullAA = nullptr;
3360};

3362/// An abstract interface for all dereferenceable attribute.
3363struct AADereferenceable
  : public IRAttribute<Attribute::Dereferenceable,
                       StateWrapper<DerefState, AbstractAttribute>> {
AADereferenceable(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if we assume that the underlying value is nonnull.
bool isAssumedNonNull() const {
  return NonNullAA && NonNullAA->isAssumedNonNull();
}

/// Return true if we know that the underlying value is nonnull.
bool isKnownNonNull() const {
  return NonNullAA && NonNullAA->isKnownNonNull();
}

/// Return true if we assume that underlying value is
/// dereferenceable(_or_null) globally.
bool isAssumedGlobal() const { return GlobalState.getAssumed(); }

/// Return true if we know that underlying value is
/// dereferenceable(_or_null) globally.
bool isKnownGlobal() const { return GlobalState.getKnown(); }

/// Return assumed dereferenceable bytes.
uint32_t getAssumedDereferenceableBytes() const {
  return DerefBytesState.getAssumed();
}

/// Return known dereferenceable bytes.
uint32_t getKnownDereferenceableBytes() const {
  return DerefBytesState.getKnown();
}

/// Create an abstract attribute view for the position \p IRP.
static AADereferenceable &createForPosition(const IRPosition &IRP,
                                            Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AADereferenceable"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AADereferenceable
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3414};

3416using AAAlignmentStateType =
  IncIntegerState<uint32_t, Value::MaximumAlignment, 1>;
3418/// An abstract interface for all align attributes.
3419struct AAAlign : public IRAttribute<
                   Attribute::Alignment,
                   StateWrapper<AAAlignmentStateType, AbstractAttribute>> {
AAAlign(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return assumed alignment.
unsigned getAssumedAlign() const { return getAssumed(); }

/// Return known alignment.
unsigned getKnownAlign() const { return getKnown(); }

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAlign"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AAAlign
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Create an abstract attribute view for the position \p IRP.
static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A);

/// Unique ID (due to the unique address)
static const char ID;
3446};

3448/// An abstract interface for all nocapture attributes.
3449struct AANoCapture
  : public IRAttribute<
        Attribute::NoCapture,
        StateWrapper<BitIntegerState<uint16_t, 7, 0>, AbstractAttribute>> {
AANoCapture(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// State encoding bits. A set bit in the state means the property holds.
/// NO_CAPTURE is the best possible state, 0 the worst possible state.
enum {
  NOT_CAPTURED_IN_MEM = 1 << 0,
  NOT_CAPTURED_IN_INT = 1 << 1,
  NOT_CAPTURED_IN_RET = 1 << 2,

  /// If we do not capture the value in memory or through integers we can only
  /// communicate it back as a derived pointer.
  NO_CAPTURE_MAYBE_RETURNED = NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT,

  /// If we do not capture the value in memory, through integers, or as a
  /// derived pointer we know it is not captured.
  NO_CAPTURE =
      NOT_CAPTURED_IN_MEM | NOT_CAPTURED_IN_INT | NOT_CAPTURED_IN_RET,
};

/// Return true if we know that the underlying value is not captured in its
/// respective scope.
bool isKnownNoCapture() const { return isKnown(NO_CAPTURE); }

/// Return true if we assume that the underlying value is not captured in its
/// respective scope.
bool isAssumedNoCapture() const { return isAssumed(NO_CAPTURE); }

/// Return true if we know that the underlying value is not captured in its
/// respective scope but we allow it to escape through a "return".
bool isKnownNoCaptureMaybeReturned() const {
  return isKnown(NO_CAPTURE_MAYBE_RETURNED);
}

/// Return true if we assume that the underlying value is not captured in its
/// respective scope but we allow it to escape through a "return".
bool isAssumedNoCaptureMaybeReturned() const {
  return isAssumed(NO_CAPTURE_MAYBE_RETURNED);
}

/// Create an abstract attribute view for the position \p IRP.
static AANoCapture &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoCapture"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoCapture
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3508};

3510struct ValueSimplifyStateType : public AbstractState {

ValueSimplifyStateType(Type *Ty) : Ty(Ty) {}

static ValueSimplifyStateType getBestState(Type *Ty) {
  return ValueSimplifyStateType(Ty);
}
static ValueSimplifyStateType getBestState(const ValueSimplifyStateType &VS) {
  return getBestState(VS.Ty);
}

/// Return the worst possible representable state.
static ValueSimplifyStateType getWorstState(Type *Ty) {
  ValueSimplifyStateType DS(Ty);
  DS.indicatePessimisticFixpoint();
  return DS;
}
static ValueSimplifyStateType
getWorstState(const ValueSimplifyStateType &VS) {
  return getWorstState(VS.Ty);
}

/// See AbstractState::isValidState(...)
bool isValidState() const override { return BS.isValidState(); }

/// See AbstractState::isAtFixpoint(...)
bool isAtFixpoint() const override { return BS.isAtFixpoint(); }

/// Return the assumed state encoding.
ValueSimplifyStateType getAssumed() { return *this; }
const ValueSimplifyStateType &getAssumed() const { return *this; }

/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
  return BS.indicatePessimisticFixpoint();
}

/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
  return BS.indicateOptimisticFixpoint();
}

/// "Clamp" this state with \p PVS.
ValueSimplifyStateType operator^=(const ValueSimplifyStateType &VS) {
  BS ^= VS.BS;
  unionAssumed(VS.SimplifiedAssociatedValue);
  return *this;
}

bool operator==(const ValueSimplifyStateType &RHS) const {
  if (isValidState() != RHS.isValidState())
    return false;
  if (!isValidState() && !RHS.isValidState())
    return true;
  return SimplifiedAssociatedValue == RHS.SimplifiedAssociatedValue;
}

3567protected:
/// The type of the original value.
Type *Ty;

/// Merge \p Other into the currently assumed simplified value
bool unionAssumed(Optional<Value *> Other);

/// Helper to track validity and fixpoint
BooleanState BS;

/// An assumed simplified value. Initially, it is set to Optional::None, which
/// means that the value is not clear under current assumption. If in the
/// pessimistic state, getAssumedSimplifiedValue doesn't return this value but
/// returns orignal associated value.
Optional<Value *> SimplifiedAssociatedValue;
3582};

3584/// An abstract interface for value simplify abstract attribute.
3585struct AAValueSimplify
  : public StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *> {
using Base = StateWrapper<ValueSimplifyStateType, AbstractAttribute, Type *>;
AAValueSimplify(const IRPosition &IRP, Attributor &A)
    : Base(IRP, IRP.getAssociatedType()) {}

/// Create an abstract attribute view for the position \p IRP.
static AAValueSimplify &createForPosition(const IRPosition &IRP,
                                          Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAValueSimplify"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAValueSimplify
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;

3610private:
/// Return an assumed simplified value if a single candidate is found. If
/// there cannot be one, return original value. If it is not clear yet, return
/// the Optional::NoneType.
///
/// Use `Attributor::getAssumedSimplified` for value simplification.
virtual Optional<Value *> getAssumedSimplifiedValue(Attributor &A) const = 0;

friend struct Attributor;
3619};

3621struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAHeapToStack(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Returns true if HeapToStack conversion is assumed to be possible.
virtual bool isAssumedHeapToStack(const CallBase &CB) const = 0;

/// Returns true if HeapToStack conversion is assumed and the CB is a
/// callsite to a free operation to be removed.
virtual bool isAssumedHeapToStackRemovedFree(CallBase &CB) const = 0;

/// Create an abstract attribute view for the position \p IRP.
static AAHeapToStack &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAHeapToStack"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AAHeapToStack
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3648};

3650/// An abstract interface for privatizability.
3651///
3652/// A pointer is privatizable if it can be replaced by a new, private one.
3653/// Privatizing pointer reduces the use count, interaction between unrelated
3654/// code parts.
3655///
3656/// In order for a pointer to be privatizable its value cannot be observed
3657/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
3658/// what values are necessary to make the private copy look like the original
3659/// one, and the values we need can be loaded (=dereferenceable).
3660struct AAPrivatizablePtr
  : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAPrivatizablePtr(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Returns true if pointer privatization is assumed to be possible.
bool isAssumedPrivatizablePtr() const { return getAssumed(); }

/// Returns true if pointer privatization is known to be possible.
bool isKnownPrivatizablePtr() const { return getKnown(); }

/// Return the type we can choose for a private copy of the underlying
/// value. None means it is not clear yet, nullptr means there is none.
virtual Optional<Type *> getPrivatizableType() const = 0;

/// Create an abstract attribute view for the position \p IRP.
static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
                                            Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAPrivatizablePtr"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAPricatizablePtr
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3693};

3695/// An abstract interface for memory access kind related attributes
3696/// (readnone/readonly/writeonly).
3697struct AAMemoryBehavior
  : public IRAttribute<
        Attribute::ReadNone,
        StateWrapper<BitIntegerState<uint8_t, 3>, AbstractAttribute>> {
AAMemoryBehavior(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// State encoding bits. A set bit in the state means the property holds.
/// BEST_STATE is the best possible state, 0 the worst possible state.
enum {
  NO_READS = 1 << 0,
  NO_WRITES = 1 << 1,
  NO_ACCESSES = NO_READS | NO_WRITES,

  BEST_STATE = NO_ACCESSES,
};
static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");

/// Return true if we know that the underlying value is not read or accessed
/// in its respective scope.
bool isKnownReadNone() const { return isKnown(NO_ACCESSES); }

/// Return true if we assume that the underlying value is not read or accessed
/// in its respective scope.
bool isAssumedReadNone() const { return isAssumed(NO_ACCESSES); }

/// Return true if we know that the underlying value is not accessed
/// (=written) in its respective scope.
bool isKnownReadOnly() const { return isKnown(NO_WRITES); }

/// Return true if we assume that the underlying value is not accessed
/// (=written) in its respective scope.
bool isAssumedReadOnly() const { return isAssumed(NO_WRITES); }

/// Return true if we know that the underlying value is not read in its
/// respective scope.
bool isKnownWriteOnly() const { return isKnown(NO_READS); }

/// Return true if we assume that the underlying value is not read in its
/// respective scope.
bool isAssumedWriteOnly() const { return isAssumed(NO_READS); }

/// Create an abstract attribute view for the position \p IRP.
static AAMemoryBehavior &createForPosition(const IRPosition &IRP,
                                           Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAMemoryBehavior"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAMemoryBehavior
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3756};

3758/// An abstract interface for all memory location attributes
3759/// (readnone/argmemonly/inaccessiblememonly/inaccessibleorargmemonly).
3760struct AAMemoryLocation
  : public IRAttribute<
        Attribute::ReadNone,
        StateWrapper<BitIntegerState<uint32_t, 511>, AbstractAttribute>> {
using MemoryLocationsKind = StateType::base_t;

AAMemoryLocation(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Encoding of different locations that could be accessed by a memory
/// access.
enum {
  ALL_LOCATIONS = 0,
  NO_LOCAL_MEM = 1 << 0,
  NO_CONST_MEM = 1 << 1,
  NO_GLOBAL_INTERNAL_MEM = 1 << 2,
  NO_GLOBAL_EXTERNAL_MEM = 1 << 3,
  NO_GLOBAL_MEM = NO_GLOBAL_INTERNAL_MEM | NO_GLOBAL_EXTERNAL_MEM,
  NO_ARGUMENT_MEM = 1 << 4,
  NO_INACCESSIBLE_MEM = 1 << 5,
  NO_MALLOCED_MEM = 1 << 6,
  NO_UNKOWN_MEM = 1 << 7,
  NO_LOCATIONS = NO_LOCAL_MEM | NO_CONST_MEM | NO_GLOBAL_INTERNAL_MEM |
                 NO_GLOBAL_EXTERNAL_MEM | NO_ARGUMENT_MEM |
                 NO_INACCESSIBLE_MEM | NO_MALLOCED_MEM | NO_UNKOWN_MEM,

  // Helper bit to track if we gave up or not.
  VALID_STATE = NO_LOCATIONS + 1,

  BEST_STATE = NO_LOCATIONS | VALID_STATE,
};
static_assert(BEST_STATE == getBestState(), "Unexpected BEST_STATE value");

/// Return true if we know that the associated functions has no observable
/// accesses.
bool isKnownReadNone() const { return isKnown(NO_LOCATIONS); }

/// Return true if we assume that the associated functions has no observable
/// accesses.
bool isAssumedReadNone() const {
  return isAssumed(NO_LOCATIONS) | isAssumedStackOnly();
}

/// Return true if we know that the associated functions has at most
/// local/stack accesses.
bool isKnowStackOnly() const {
  return isKnown(inverseLocation(NO_LOCAL_MEM, true, true));
}

/// Return true if we assume that the associated functions has at most
/// local/stack accesses.
bool isAssumedStackOnly() const {
  return isAssumed(inverseLocation(NO_LOCAL_MEM, true, true));
}

/// Return true if we know that the underlying value will only access
/// inaccesible memory only (see Attribute::InaccessibleMemOnly).
bool isKnownInaccessibleMemOnly() const {
  return isKnown(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
}

/// Return true if we assume that the underlying value will only access
/// inaccesible memory only (see Attribute::InaccessibleMemOnly).
bool isAssumedInaccessibleMemOnly() const {
  return isAssumed(inverseLocation(NO_INACCESSIBLE_MEM, true, true));
}

/// Return true if we know that the underlying value will only access
/// argument pointees (see Attribute::ArgMemOnly).
bool isKnownArgMemOnly() const {
  return isKnown(inverseLocation(NO_ARGUMENT_MEM, true, true));
}

/// Return true if we assume that the underlying value will only access
/// argument pointees (see Attribute::ArgMemOnly).
bool isAssumedArgMemOnly() const {
  return isAssumed(inverseLocation(NO_ARGUMENT_MEM, true, true));
}

/// Return true if we know that the underlying value will only access
/// inaccesible memory or argument pointees (see
/// Attribute::InaccessibleOrArgMemOnly).
bool isKnownInaccessibleOrArgMemOnly() const {
  return isKnown(
      inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
}

/// Return true if we assume that the underlying value will only access
/// inaccesible memory or argument pointees (see
/// Attribute::InaccessibleOrArgMemOnly).
bool isAssumedInaccessibleOrArgMemOnly() const {
  return isAssumed(
      inverseLocation(NO_INACCESSIBLE_MEM | NO_ARGUMENT_MEM, true, true));
}

/// Return true if the underlying value may access memory through arguement
/// pointers of the associated function, if any.
bool mayAccessArgMem() const { return !isAssumed(NO_ARGUMENT_MEM); }

/// Return true if only the memory locations specififed by \p MLK are assumed
/// to be accessed by the associated function.
bool isAssumedSpecifiedMemOnly(MemoryLocationsKind MLK) const {
  return isAssumed(MLK);
}

/// Return the locations that are assumed to be not accessed by the associated
/// function, if any.
MemoryLocationsKind getAssumedNotAccessedLocation() const {
  return getAssumed();
}

/// Return the inverse of location \p Loc, thus for NO_XXX the return
/// describes ONLY_XXX. The flags \p AndLocalMem and \p AndConstMem determine
/// if local (=stack) and constant memory are allowed as well. Most of the
/// time we do want them to be included, e.g., argmemonly allows accesses via
/// argument pointers or local or constant memory accesses.
static MemoryLocationsKind
inverseLocation(MemoryLocationsKind Loc, bool AndLocalMem, bool AndConstMem) {
  return NO_LOCATIONS & ~(Loc | (AndLocalMem ? NO_LOCAL_MEM : 0) |
                          (AndConstMem ? NO_CONST_MEM : 0));
};

/// Return the locations encoded by \p MLK as a readable string.
static std::string getMemoryLocationsAsStr(MemoryLocationsKind MLK);

/// Simple enum to distinguish read/write/read-write accesses.
enum AccessKind {
  NONE = 0,
  READ = 1 << 0,
  WRITE = 1 << 1,
  READ_WRITE = READ | WRITE,
};

/// Check \p Pred on all accesses to the memory kinds specified by \p MLK.
///
/// This method will evaluate \p Pred on all accesses (access instruction +
/// underlying accessed memory pointer) and it will return true if \p Pred
/// holds every time.
virtual bool checkForAllAccessesToMemoryKind(
    function_ref<bool(const Instruction *, const Value *, AccessKind,
                      MemoryLocationsKind)>
        Pred,
    MemoryLocationsKind MLK) const = 0;

/// Create an abstract attribute view for the position \p IRP.
static AAMemoryLocation &createForPosition(const IRPosition &IRP,
                                           Attributor &A);

/// See AbstractState::getAsStr().
const std::string getAsStr() const override {
  return getMemoryLocationsAsStr(getAssumedNotAccessedLocation());
}

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAMemoryLocation"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAMemoryLocation
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3926};

3928/// An abstract interface for range value analysis.
3929struct AAValueConstantRange
  : public StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t> {
using Base = StateWrapper<IntegerRangeState, AbstractAttribute, uint32_t>;
AAValueConstantRange(const IRPosition &IRP, Attributor &A)
    : Base(IRP, IRP.getAssociatedType()->getIntegerBitWidth()) {}

/// See AbstractAttribute::getState(...).
IntegerRangeState &getState() override { return *this; }
const IntegerRangeState &getState() const override { return *this; }

/// Create an abstract attribute view for the position \p IRP.
static AAValueConstantRange &createForPosition(const IRPosition &IRP,
                                               Attributor &A);

/// Return an assumed range for the assocaited value a program point \p CtxI.
/// If \p I is nullptr, simply return an assumed range.
virtual ConstantRange
getAssumedConstantRange(Attributor &A,
                        const Instruction *CtxI = nullptr) const = 0;

/// Return a known range for the assocaited value at a program point \p CtxI.
/// If \p I is nullptr, simply return a known range.
virtual ConstantRange
getKnownConstantRange(Attributor &A,
                      const Instruction *CtxI = nullptr) const = 0;

/// Return an assumed constant for the assocaited value a program point \p
/// CtxI.
Optional<ConstantInt *>
getAssumedConstantInt(Attributor &A,
                      const Instruction *CtxI = nullptr) const {
  ConstantRange RangeV = getAssumedConstantRange(A, CtxI);
  if (auto *C = RangeV.getSingleElement())
    return cast<ConstantInt>(
        ConstantInt::get(getAssociatedValue().getType(), *C));
  if (RangeV.isEmptySet())
    return llvm::None;
  return nullptr;
}

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAValueConstantRange"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAValueConstantRange
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
3983};

3985/// A class for a set state.
3986/// The assumed boolean state indicates whether the corresponding set is full
3987/// set or not. If the assumed state is false, this is the worst state. The
3988/// worst state (invalid state) of set of potential values is when the set
3989/// contains every possible value (i.e. we cannot in any way limit the value
3990/// that the target position can take). That never happens naturally, we only
3991/// force it. As for the conditions under which we force it, see
3992/// AAPotentialValues.
3993template <typename MemberTy, typename KeyInfo = DenseMapInfo<MemberTy>>
3994struct PotentialValuesState : AbstractState {
using SetTy = DenseSet<MemberTy, KeyInfo>;

PotentialValuesState() : IsValidState(true), UndefIsContained(false) {}

PotentialValuesState(bool IsValid)
    : IsValidState(IsValid), UndefIsContained(false) {}

/// See AbstractState::isValidState(...)
bool isValidState() const override { return IsValidState.isValidState(); }

/// See AbstractState::isAtFixpoint(...)
bool isAtFixpoint() const override { return IsValidState.isAtFixpoint(); }

/// See AbstractState::indicatePessimisticFixpoint(...)
ChangeStatus indicatePessimisticFixpoint() override {
  return IsValidState.indicatePessimisticFixpoint();
}

/// See AbstractState::indicateOptimisticFixpoint(...)
ChangeStatus indicateOptimisticFixpoint() override {
  return IsValidState.indicateOptimisticFixpoint();
}

/// Return the assumed state
PotentialValuesState &getAssumed() { return *this; }
const PotentialValuesState &getAssumed() const { return *this; }

/// Return this set. We should check whether this set is valid or not by
/// isValidState() before calling this function.
const SetTy &getAssumedSet() const {
  assert(isValidState() && "This set shoud not be used when it is invalid!")((void)0);
  return Set;
}

/// Returns whether this state contains an undef value or not.
bool undefIsContained() const {
  assert(isValidState() && "This flag shoud not be used when it is invalid!")((void)0);
  return UndefIsContained;
}

bool operator==(const PotentialValuesState &RHS) const {
  if (isValidState() != RHS.isValidState())
    return false;
  if (!isValidState() && !RHS.isValidState())
    return true;
  if (undefIsContained() != RHS.undefIsContained())
    return false;
  return Set == RHS.getAssumedSet();
}

/// Maximum number of potential values to be tracked.
/// This is set by -attributor-max-potential-values command line option
static unsigned MaxPotentialValues;

/// Return empty set as the best state of potential values.
static PotentialValuesState getBestState() {
  return PotentialValuesState(true);
}

static PotentialValuesState getBestState(PotentialValuesState &PVS) {
  return getBestState();
}

/// Return full set as the worst state of potential values.
static PotentialValuesState getWorstState() {
  return PotentialValuesState(false);
}

/// Union assumed set with the passed value.
void unionAssumed(const MemberTy &C) { insert(C); }

/// Union assumed set with assumed set of the passed state \p PVS.
void unionAssumed(const PotentialValuesState &PVS) { unionWith(PVS); }

/// Union assumed set with an undef value.
void unionAssumedWithUndef() { unionWithUndef(); }

/// "Clamp" this state with \p PVS.
PotentialValuesState operator^=(const PotentialValuesState &PVS) {
  IsValidState ^= PVS.IsValidState;
  unionAssumed(PVS);
  return *this;
}

PotentialValuesState operator&=(const PotentialValuesState &PVS) {
  IsValidState &= PVS.IsValidState;
  unionAssumed(PVS);
  return *this;
}

4085private:
/// Check the size of this set, and invalidate when the size is no
/// less than \p MaxPotentialValues threshold.
void checkAndInvalidate() {
  if (Set.size() >= MaxPotentialValues)
    indicatePessimisticFixpoint();
  else
    reduceUndefValue();
}

/// If this state contains both undef and not undef, we can reduce
/// undef to the not undef value.
void reduceUndefValue() { UndefIsContained = UndefIsContained & Set.empty(); }

/// Insert an element into this set.
void insert(const MemberTy &C) {
  if (!isValidState())
    return;
  Set.insert(C);
  checkAndInvalidate();
}

/// Take union with R.
void unionWith(const PotentialValuesState &R) {
  /// If this is a full set, do nothing.
  if (!isValidState())
    return;
  /// If R is full set, change L to a full set.
  if (!R.isValidState()) {
    indicatePessimisticFixpoint();
    return;
  }
  for (const MemberTy &C : R.Set)
    Set.insert(C);
  UndefIsContained |= R.undefIsContained();
  checkAndInvalidate();
}

/// Take union with an undef value.
void unionWithUndef() {
  UndefIsContained = true;
  reduceUndefValue();
}

/// Take intersection with R.
void intersectWith(const PotentialValuesState &R) {
  /// If R is a full set, do nothing.
  if (!R.isValidState())
    return;
  /// If this is a full set, change this to R.
  if (!isValidState()) {
    *this = R;
    return;
  }
  SetTy IntersectSet;
  for (const MemberTy &C : Set) {
    if (R.Set.count(C))
      IntersectSet.insert(C);
  }
  Set = IntersectSet;
  UndefIsContained &= R.undefIsContained();
  reduceUndefValue();
}

/// A helper state which indicate whether this state is valid or not.
BooleanState IsValidState;

/// Container for potential values
SetTy Set;

/// Flag for undef value
bool UndefIsContained;
4157};

4159using PotentialConstantIntValuesState = PotentialValuesState<APInt>;

4161raw_ostream &operator<<(raw_ostream &OS,
                      const PotentialConstantIntValuesState &R);

4164/// An abstract interface for potential values analysis.
4165///
4166/// This AA collects potential values for each IR position.
4167/// An assumed set of potential values is initialized with the empty set (the
4168/// best state) and it will grow monotonically as we find more potential values
4169/// for this position.
4170/// The set might be forced to the worst state, that is, to contain every
4171/// possible value for this position in 2 cases.
4172///   1. We surpassed the \p MaxPotentialValues threshold. This includes the
4173///      case that this position is affected (e.g. because of an operation) by a
4174///      Value that is in the worst state.
4175///   2. We tried to initialize on a Value that we cannot handle (e.g. an
4176///      operator we do not currently handle).
4177///
4178/// TODO: Support values other than constant integers.
4179struct AAPotentialValues
  : public StateWrapper<PotentialConstantIntValuesState, AbstractAttribute> {
using Base = StateWrapper<PotentialConstantIntValuesState, AbstractAttribute>;
AAPotentialValues(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// See AbstractAttribute::getState(...).
PotentialConstantIntValuesState &getState() override { return *this; }
const PotentialConstantIntValuesState &getState() const override {
  return *this;
}

/// Create an abstract attribute view for the position \p IRP.
static AAPotentialValues &createForPosition(const IRPosition &IRP,
                                            Attributor &A);

/// Return assumed constant for the associated value
Optional<ConstantInt *>
getAssumedConstantInt(Attributor &A,
                      const Instruction *CtxI = nullptr) const {
  if (!isValidState())
    return nullptr;
  if (getAssumedSet().size() == 1)
    return cast<ConstantInt>(ConstantInt::get(getAssociatedValue().getType(),
                                              *(getAssumedSet().begin())));
  if (getAssumedSet().size() == 0) {
    if (undefIsContained())
      return cast<ConstantInt>(
          ConstantInt::get(getAssociatedValue().getType(), 0));
    return llvm::None;
  }

  return nullptr;
}

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAPotentialValues"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is
/// AAPotentialValues
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
4227};

4229/// An abstract interface for all noundef attributes.
4230struct AANoUndef
  : public IRAttribute<Attribute::NoUndef,
                       StateWrapper<BooleanState, AbstractAttribute>> {
AANoUndef(const IRPosition &IRP, Attributor &A) : IRAttribute(IRP) {}

/// Return true if we assume that the underlying value is noundef.
bool isAssumedNoUndef() const { return getAssumed(); }

/// Return true if we know that underlying value is noundef.
bool isKnownNoUndef() const { return getKnown(); }

/// Create an abstract attribute view for the position \p IRP.
static AANoUndef &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AANoUndef"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AANoUndef
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
4257};

4259struct AACallGraphNode;
4260struct AACallEdges;

4262/// An Iterator for call edges, creates AACallEdges attributes in a lazy way.
4263/// This iterator becomes invalid if the underlying edge list changes.
4264/// So This shouldn't outlive a iteration of Attributor.
4265class AACallEdgeIterator
  : public iterator_adaptor_base<AACallEdgeIterator,
                                 SetVector<Function *>::iterator> {
AACallEdgeIterator(Attributor &A, SetVector<Function *>::iterator Begin)
    : iterator_adaptor_base(Begin), A(A) {}

4271public:
AACallGraphNode *operator*() const;

4274private:
Attributor &A;
friend AACallEdges;
friend AttributorCallGraph;
4278};

4280struct AACallGraphNode {
AACallGraphNode(Attributor &A) : A(A) {}
virtual ~AACallGraphNode() {}

virtual AACallEdgeIterator optimisticEdgesBegin() const = 0;
virtual AACallEdgeIterator optimisticEdgesEnd() const = 0;

/// Iterator range for exploring the call graph.
iterator_range<AACallEdgeIterator> optimisticEdgesRange() const {
  return iterator_range<AACallEdgeIterator>(optimisticEdgesBegin(),
                                            optimisticEdgesEnd());
}

4293protected:
/// Reference to Attributor needed for GraphTraits implementation.
Attributor &A;
4296};

4298/// An abstract state for querying live call edges.
4299/// This interface uses the Attributor's optimistic liveness
4300/// information to compute the edges that are alive.
4301struct AACallEdges : public StateWrapper<BooleanState, AbstractAttribute>,
                   AACallGraphNode {
using Base = StateWrapper<BooleanState, AbstractAttribute>;

AACallEdges(const IRPosition &IRP, Attributor &A)
    : Base(IRP), AACallGraphNode(A) {}

/// Get the optimistic edges.
virtual const SetVector<Function *> &getOptimisticEdges() const = 0;

/// Is there any call with a unknown callee.
virtual bool hasUnknownCallee() const = 0;

/// Is there any call with a unknown callee, excluding any inline asm.
virtual bool hasNonAsmUnknownCallee() const = 0;

/// Iterator for exploring the call graph.
AACallEdgeIterator optimisticEdgesBegin() const override {
  return AACallEdgeIterator(A, getOptimisticEdges().begin());
}

/// Iterator for exploring the call graph.
AACallEdgeIterator optimisticEdgesEnd() const override {
  return AACallEdgeIterator(A, getOptimisticEdges().end());
}

/// Create an abstract attribute view for the position \p IRP.
static AACallEdges &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AACallEdges"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AACallEdges.
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
4343};

4345// Synthetic root node for the Attributor's internal call graph.
4346struct AttributorCallGraph : public AACallGraphNode {
AttributorCallGraph(Attributor &A) : AACallGraphNode(A) {}
virtual ~AttributorCallGraph() {}

AACallEdgeIterator optimisticEdgesBegin() const override {
  return AACallEdgeIterator(A, A.Functions.begin());
}

AACallEdgeIterator optimisticEdgesEnd() const override {
  return AACallEdgeIterator(A, A.Functions.end());
}

/// Force populate the entire call graph.
void populateAll() const {
  for (const AACallGraphNode *AA : optimisticEdgesRange()) {
    // Nothing else to do here.
    (void)AA;
  }
}

void print();
4367};

4369template <> struct GraphTraits<AACallGraphNode *> {
using NodeRef = AACallGraphNode *;
using ChildIteratorType = AACallEdgeIterator;

static AACallEdgeIterator child_begin(AACallGraphNode *Node) {
  return Node->optimisticEdgesBegin();
}

static AACallEdgeIterator child_end(AACallGraphNode *Node) {
  return Node->optimisticEdgesEnd();
}
4380};

4382template <>
4383struct GraphTraits<AttributorCallGraph *>
  : public GraphTraits<AACallGraphNode *> {
using nodes_iterator = AACallEdgeIterator;

static AACallGraphNode *getEntryNode(AttributorCallGraph *G) {
  return static_cast<AACallGraphNode *>(G);
}

static AACallEdgeIterator nodes_begin(const AttributorCallGraph *G) {
  return G->optimisticEdgesBegin();
}

static AACallEdgeIterator nodes_end(const AttributorCallGraph *G) {
  return G->optimisticEdgesEnd();
}
4398};

4400template <>
4401struct DOTGraphTraits<AttributorCallGraph *> : public DefaultDOTGraphTraits {
DOTGraphTraits(bool Simple = false) : DefaultDOTGraphTraits(Simple) {}

std::string getNodeLabel(const AACallGraphNode *Node,
                         const AttributorCallGraph *Graph) {
  const AACallEdges *AACE = static_cast<const AACallEdges *>(Node);
  return AACE->getAssociatedFunction()->getName().str();
}

static bool isNodeHidden(const AACallGraphNode *Node,
                         const AttributorCallGraph *Graph) {
  // Hide the synth root.
  return static_cast<const AACallGraphNode *>(Graph) == Node;
}
4415};

4417struct AAExecutionDomain
  : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;
AAExecutionDomain(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// Create an abstract attribute view for the position \p IRP.
static AAExecutionDomain &createForPosition(const IRPosition &IRP,
                                            Attributor &A);

/// See AbstractAttribute::getName().
const std::string getName() const override { return "AAExecutionDomain"; }

/// See AbstractAttribute::getIdAddr().
const char *getIdAddr() const override { return &ID; }

/// Check if an instruction is executed only by the initial thread.
virtual bool isExecutedByInitialThreadOnly(const Instruction &) const = 0;

/// Check if a basic block is executed only by the initial thread.
virtual bool isExecutedByInitialThreadOnly(const BasicBlock &) const = 0;

/// This function should return true if the type of the \p AA is
/// AAExecutionDomain.
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
4446};

4448/// An abstract Attribute for computing reachability between functions.
4449struct AAFunctionReachability
  : public StateWrapper<BooleanState, AbstractAttribute> {
using Base = StateWrapper<BooleanState, AbstractAttribute>;

AAFunctionReachability(const IRPosition &IRP, Attributor &A) : Base(IRP) {}

/// If the function represented by this possition can reach \p Fn.
virtual bool canReach(Attributor &A, Function *Fn) const = 0;

/// Create an abstract attribute view for the position \p IRP.
static AAFunctionReachability &createForPosition(const IRPosition &IRP,
                                                 Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAFuncitonReacability"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// This function should return true if the type of the \p AA is AACallEdges.
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;

4476private:
/// Can this function reach a call with unknown calee.
virtual bool canReachUnknownCallee() const = 0;
4479};

4481/// An abstract interface for struct information.
4482struct AAPointerInfo : public AbstractAttribute {
AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}

enum AccessKind {
  AK_READ = 1 << 0,
  AK_WRITE = 1 << 1,
  AK_READ_WRITE = AK_READ | AK_WRITE,
};

/// An access description.
struct Access {
  Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty)
      : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {}
  Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content,
         AccessKind Kind, Type *Ty)
      : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
        Ty(Ty) {}
  Access(const Access &Other)
      : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
        Kind(Other.Kind), Ty(Other.Ty) {}
  Access(const Access &&Other)
      : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
        Kind(Other.Kind), Ty(Other.Ty) {}

  Access &operator=(const Access &Other) {
    LocalI = Other.LocalI;
    RemoteI = Other.RemoteI;
    Content = Other.Content;
    Kind = Other.Kind;
    Ty = Other.Ty;
    return *this;
  }
  bool operator==(const Access &R) const {
    return LocalI == R.LocalI && RemoteI == R.RemoteI &&
           Content == R.Content && Kind == R.Kind;
  }
  bool operator!=(const Access &R) const { return !(*this == R); }

  Access &operator&=(const Access &R) {
    assert(RemoteI == R.RemoteI && "Expected same instruction!")((void)0);
    Content =
        AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
    Kind = AccessKind(Kind | R.Kind);
    return *this;
  }

  /// Return the access kind.
  AccessKind getKind() const { return Kind; }

  /// Return true if this is a read access.
  bool isRead() const { return Kind & AK_READ; }

  /// Return true if this is a write access.
  bool isWrite() const { return Kind & AK_WRITE; }

  /// Return the instruction that causes the access with respect to the local
  /// scope of the associated attribute.
  Instruction *getLocalInst() const { return LocalI; }

  /// Return the actual instruction that causes the access.
  Instruction *getRemoteInst() const { return RemoteI; }

  /// Return true if the value written is not known yet.
  bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); }

  /// Return true if the value written cannot be determined at all.
  bool isWrittenValueUnknown() const {
    return Content.hasValue() && !*Content;
  }

  /// Return the type associated with the access, if known.
  Type *getType() const { return Ty; }

  /// Return the value writen, if any. As long as
  /// isWrittenValueYetUndetermined return true this function shall not be
  /// called.
  Value *getWrittenValue() const { return *Content; }

  /// Return the written value which can be `llvm::null` if it is not yet
  /// determined.
  Optional<Value *> getContent() const { return Content; }

private:
  /// The instruction responsible for the access with respect to the local
  /// scope of the associated attribute.
  Instruction *LocalI;

  /// The instruction responsible for the access.
  Instruction *RemoteI;

  /// The value written, if any. `llvm::none` means "not known yet", `nullptr`
  /// cannot be determined.
  Optional<Value *> Content;

  /// The access kind, e.g., READ, as bitset (could be more than one).
  AccessKind Kind;

  /// The type of the content, thus the type read/written, can be null if not
  /// available.
  Type *Ty;
};

/// Create an abstract attribute view for the position \p IRP.
static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);

/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAPointerInfo"; }

/// See AbstractAttribute::getIdAddr()
const char *getIdAddr() const override { return &ID; }

/// Call \p CB on all accesses that might interfere with \p LI and return true
/// if all such accesses were known and the callback returned true for all of
/// them, false otherwise.
virtual bool forallInterferingAccesses(
    LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0;
virtual bool forallInterferingAccesses(
    StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;

/// This function should return true if the type of the \p AA is AAPointerInfo
static bool classof(const AbstractAttribute *AA) {
  return (AA->getIdAddr() == &ID);
}

/// Unique ID (due to the unique address)
static const char ID;
4608};

4610raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);

4612/// Run options, used by the pass manager.
4613enum AttributorRunOption {
NONE = 0,
MODULE = 1 << 0,
CGSCC = 1 << 1,
ALL = MODULE | CGSCC
4618};

4620} // end namespace llvm

4622#endif // LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H