/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

Bug Summary

File:	src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Warning:	line 2883, column 21 Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

Show analyzer invocation

clang -cc1 -cc1 -triple amd64-unknown-openbsd7.0 -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SimpleLoopUnswitch.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -mrelocation-model pic -pic-level 1 -fhalf-no-semantic-interposition -mframe-pointer=all -relaxed-aliasing -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -fcoverage-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -resource-dir /usr/local/lib/clang/13.0.0 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/AMDGPU -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ASMParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/BinaryFormat -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitcode -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Bitstream -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /include/llvm/CodeGen -I /include/llvm/CodeGen/PBQP -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Coroutines -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData/Coverage -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/CodeView -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/DWARF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/MSF -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/PDB -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Demangle -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/JITLink -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ExecutionEngine/Orc -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenACC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Frontend/OpenMP -I /include/llvm/CodeGen/GlobalISel -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IRReader -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/Transforms/InstCombine -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/LTO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Linker -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/MC/MCParser -I /include/llvm/CodeGen/MIRParser -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Object -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Option -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Passes -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ProfileData -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Scalar -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Support -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/DebugInfo/Symbolize -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Target -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Vectorize -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include/llvm/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Target/X86 -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/IPO -I /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include -I /usr/src/gnu/usr.bin/clang/libLLVM/../include -I /usr/src/gnu/usr.bin/clang/libLLVM/obj -I /usr/src/gnu/usr.bin/clang/libLLVM/obj/../include -D NDEBUG -D __STDC_LIMIT_MACROS -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D LLVM_PREFIX="/usr" -D PIC -internal-isystem /usr/include/c++/v1 -internal-isystem /usr/local/lib/clang/13.0.0/include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/usr/src/gnu/usr.bin/clang/libLLVM/obj -ferror-limit 19 -fvisibility-inlines-hidden -fwrapv -D_RET_PROTECTOR -ret-protector -fno-rtti -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -fno-builtin-malloc -fno-builtin-calloc -fno-builtin-realloc -fno-builtin-valloc -fno-builtin-free -fno-builtin-strdup -fno-builtin-strndup -analyzer-output=html -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /home/ben/Projects/vmm/scan-build/2022-01-12-194120-40624-1 -x c++ /usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp

→

1///===- SimpleLoopUnswitch.cpp - Hoist loop-invariant control flow ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//

9#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
10#include "llvm/ADT/DenseMap.h"
11#include "llvm/ADT/STLExtras.h"
12#include "llvm/ADT/Sequence.h"
13#include "llvm/ADT/SetVector.h"
14#include "llvm/ADT/SmallPtrSet.h"
15#include "llvm/ADT/SmallVector.h"
16#include "llvm/ADT/Statistic.h"
17#include "llvm/ADT/Twine.h"
18#include "llvm/Analysis/AssumptionCache.h"
19#include "llvm/Analysis/CFG.h"
20#include "llvm/Analysis/CodeMetrics.h"
21#include "llvm/Analysis/GuardUtils.h"
22#include "llvm/Analysis/InstructionSimplify.h"
23#include "llvm/Analysis/LoopAnalysisManager.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/LoopIterator.h"
26#include "llvm/Analysis/LoopPass.h"
27#include "llvm/Analysis/MemorySSA.h"
28#include "llvm/Analysis/MemorySSAUpdater.h"
29#include "llvm/Analysis/MustExecute.h"
30#include "llvm/Analysis/ScalarEvolution.h"
31#include "llvm/IR/BasicBlock.h"
32#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/Dominators.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/InstrTypes.h"
38#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Instructions.h"
40#include "llvm/IR/IntrinsicInst.h"
41#include "llvm/IR/PatternMatch.h"
42#include "llvm/IR/Use.h"
43#include "llvm/IR/Value.h"
44#include "llvm/InitializePasses.h"
45#include "llvm/Pass.h"
46#include "llvm/Support/Casting.h"
47#include "llvm/Support/CommandLine.h"
48#include "llvm/Support/Debug.h"
49#include "llvm/Support/ErrorHandling.h"
50#include "llvm/Support/GenericDomTree.h"
51#include "llvm/Support/raw_ostream.h"
52#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
53#include "llvm/Transforms/Utils/BasicBlockUtils.h"
54#include "llvm/Transforms/Utils/Cloning.h"
55#include "llvm/Transforms/Utils/Local.h"
56#include "llvm/Transforms/Utils/LoopUtils.h"
57#include "llvm/Transforms/Utils/ValueMapper.h"
58#include <algorithm>
59#include <cassert>
60#include <iterator>
61#include <numeric>
62#include <utility>

64#define DEBUG_TYPE"simple-loop-unswitch" "simple-loop-unswitch"

66using namespace llvm;
67using namespace llvm::PatternMatch;

69STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"simple-loop-unswitch",
 "NumBranches", "Number of branches unswitched"};
70STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"simple-loop-unswitch",
 "NumSwitches", "Number of switches unswitched"};
71STATISTIC(NumGuards, "Number of guards turned into branches for unswitching")static llvm::Statistic NumGuards = {"simple-loop-unswitch", "NumGuards"
, "Number of guards turned into branches for unswitching"};
72STATISTIC(NumTrivial, "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"simple-loop-unswitch", "NumTrivial"
, "Number of unswitches that are trivial"};
73STATISTIC(static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
  NumCostMultiplierSkipped,static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
}
  "Number of unswitch candidates that had their cost multiplier skipped")static llvm::Statistic NumCostMultiplierSkipped = {"simple-loop-unswitch"
, "NumCostMultiplierSkipped", "Number of unswitch candidates that had their cost multiplier skipped"
};

77static cl::opt<bool> EnableNonTrivialUnswitch(
  "enable-nontrivial-unswitch", cl::init(false), cl::Hidden,
  cl::desc("Forcibly enables non-trivial loop unswitching rather than "
           "following the configuration passed into the pass."));

82static cl::opt<int>
  UnswitchThreshold("unswitch-threshold", cl::init(50), cl::Hidden,
                    cl::desc("The cost threshold for unswitching a loop."));

86static cl::opt<bool> EnableUnswitchCostMultiplier(
  "enable-unswitch-cost-multiplier", cl::init(true), cl::Hidden,
  cl::desc("Enable unswitch cost multiplier that prohibits exponential "
           "explosion in nontrivial unswitch."));
90static cl::opt<int> UnswitchSiblingsToplevelDiv(
  "unswitch-siblings-toplevel-div", cl::init(2), cl::Hidden,
  cl::desc("Toplevel siblings divisor for cost multiplier."));
93static cl::opt<int> UnswitchNumInitialUnscaledCandidates(
  "unswitch-num-initial-unscaled-candidates", cl::init(8), cl::Hidden,
  cl::desc("Number of unswitch candidates that are ignored when calculating "
           "cost multiplier."));
97static cl::opt<bool> UnswitchGuards(
  "simple-loop-unswitch-guards", cl::init(true), cl::Hidden,
  cl::desc("If enabled, simple loop unswitching will also consider "
           "llvm.experimental.guard intrinsics as unswitch candidates."));
101static cl::opt<bool> DropNonTrivialImplicitNullChecks(
  "simple-loop-unswitch-drop-non-trivial-implicit-null-checks",
  cl::init(false), cl::Hidden,
  cl::desc("If enabled, drop make.implicit metadata in unswitched implicit "
           "null checks to save time analyzing if we can keep it."));
106static cl::opt<unsigned>
  MSSAThreshold("simple-loop-unswitch-memoryssa-threshold",
                cl::desc("Max number of memory uses to explore during "
                         "partial unswitching analysis"),
                cl::init(100), cl::Hidden);

112/// Collect all of the loop invariant input values transitively used by the
113/// homogeneous instruction graph from a given root.
114///
115/// This essentially walks from a root recursively through loop variant operands
116/// which have the exact same opcode and finds all inputs which are loop
117/// invariant. For some operations these can be re-associated and unswitched out
118/// of the loop entirely.
119static TinyPtrVector<Value *>
120collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
                                       LoopInfo &LI) {
assert(!L.isLoopInvariant(&Root) &&((void)0)
       "Only need to walk the graph if root itself is not invariant.")((void)0);
TinyPtrVector<Value *> Invariants;

bool IsRootAnd = match(&Root, m_LogicalAnd());
bool IsRootOr  = match(&Root, m_LogicalOr());

// Build a worklist and recurse through operators collecting invariants.
SmallVector<Instruction *, 4> Worklist;
SmallPtrSet<Instruction *, 8> Visited;
Worklist.push_back(&Root);
Visited.insert(&Root);
do {
  Instruction &I = *Worklist.pop_back_val();
  for (Value *OpV : I.operand_values()) {
    // Skip constants as unswitching isn't interesting for them.
    if (isa<Constant>(OpV))
      continue;

    // Add it to our result if loop invariant.
    if (L.isLoopInvariant(OpV)) {
      Invariants.push_back(OpV);
      continue;
    }

    // If not an instruction with the same opcode, nothing we can do.
    Instruction *OpI = dyn_cast<Instruction>(OpV);

    if (OpI && ((IsRootAnd && match(OpI, m_LogicalAnd())) ||
                (IsRootOr  && match(OpI, m_LogicalOr())))) {
      // Visit this operand.
      if (Visited.insert(OpI).second)
        Worklist.push_back(OpI);
    }
  }
} while (!Worklist.empty());

return Invariants;
160}

162static void replaceLoopInvariantUses(Loop &L, Value *Invariant,
                                   Constant &Replacement) {
assert(!isa<Constant>(Invariant) && "Why are we unswitching on a constant?")((void)0);

// Replace uses of LIC in the loop with the given constant.
// We use make_early_inc_range as set invalidates the iterator.
for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
  Instruction *UserI = dyn_cast<Instruction>(U.getUser());

  // Replace this use within the loop body.
  if (UserI && L.contains(UserI))
    U.set(&Replacement);
}
175}

177/// Check that all the LCSSA PHI nodes in the loop exit block have trivial
178/// incoming values along this edge.
179static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
                                       BasicBlock &ExitBB) {
for (Instruction &I : ExitBB) {
  auto *PN = dyn_cast<PHINode>(&I);
  if (!PN)
    // No more PHIs to check.
    return true;

  // If the incoming value for this edge isn't loop invariant the unswitch
  // won't be trivial.
  if (!L.isLoopInvariant(PN->getIncomingValueForBlock(&ExitingBB)))
    return false;
}
llvm_unreachable("Basic blocks should never be empty!")__builtin_unreachable();
193}

195/// Copy a set of loop invariant values \p ToDuplicate and insert them at the
196/// end of \p BB and conditionally branch on the copied condition. We only
197/// branch on a single value.
198static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
                                                ArrayRef<Value *> Invariants,
                                                bool Direction,
                                                BasicBlock &UnswitchedSucc,
                                                BasicBlock &NormalSucc) {
IRBuilder<> IRB(&BB);

Value *Cond = Direction ? IRB.CreateOr(Invariants) :
  IRB.CreateAnd(Invariants);
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
                 Direction ? &NormalSucc : &UnswitchedSucc);
209}

211/// Copy a set of loop invariant values, and conditionally branch on them.
212static void buildPartialInvariantUnswitchConditionalBranch(
  BasicBlock &BB, ArrayRef<Value *> ToDuplicate, bool Direction,
  BasicBlock &UnswitchedSucc, BasicBlock &NormalSucc, Loop &L,
  MemorySSAUpdater *MSSAU) {
ValueToValueMapTy VMap;
for (auto *Val : reverse(ToDuplicate)) {
  Instruction *Inst = cast<Instruction>(Val);
  Instruction *NewInst = Inst->clone();
  BB.getInstList().insert(BB.end(), NewInst);
  RemapInstruction(NewInst, VMap,
                   RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
  VMap[Val] = NewInst;

  if (!MSSAU)
    continue;

  MemorySSA *MSSA = MSSAU->getMemorySSA();
  if (auto *MemUse =
          dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(Inst))) {
    auto *DefiningAccess = MemUse->getDefiningAccess();
    // Get the first defining access before the loop.
    while (L.contains(DefiningAccess->getBlock())) {
      // If the defining access is a MemoryPhi, get the incoming
      // value for the pre-header as defining access.
      if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess))
        DefiningAccess =
            MemPhi->getIncomingValueForBlock(L.getLoopPreheader());
      else
        DefiningAccess = cast<MemoryDef>(DefiningAccess)->getDefiningAccess();
    }
    MSSAU->createMemoryAccessInBB(NewInst, DefiningAccess,
                                  NewInst->getParent(),
                                  MemorySSA::BeforeTerminator);
  }
}

IRBuilder<> IRB(&BB);
Value *Cond = VMap[ToDuplicate[0]];
IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
                 Direction ? &NormalSucc : &UnswitchedSucc);
252}

254/// Rewrite the PHI nodes in an unswitched loop exit basic block.
255///
256/// Requires that the loop exit and unswitched basic block are the same, and
257/// that the exiting block was a unique predecessor of that block. Rewrites the
258/// PHI nodes in that block such that what were LCSSA PHI nodes become trivial
259/// PHI nodes from the old preheader that now contains the unswitched
260/// terminator.
261static void rewritePHINodesForUnswitchedExitBlock(BasicBlock &UnswitchedBB,
                                                BasicBlock &OldExitingBB,
                                                BasicBlock &OldPH) {
for (PHINode &PN : UnswitchedBB.phis()) {
  // When the loop exit is directly unswitched we just need to update the
  // incoming basic block. We loop to handle weird cases with repeated
  // incoming blocks, but expect to typically only have one operand here.
  for (auto i : seq<int>(0, PN.getNumOperands())) {
    assert(PN.getIncomingBlock(i) == &OldExitingBB &&((void)0)
           "Found incoming block different from unique predecessor!")((void)0);
    PN.setIncomingBlock(i, &OldPH);
  }
}
274}

276/// Rewrite the PHI nodes in the loop exit basic block and the split off
277/// unswitched block.
278///
279/// Because the exit block remains an exit from the loop, this rewrites the
280/// LCSSA PHI nodes in it to remove the unswitched edge and introduces PHI
281/// nodes into the unswitched basic block to select between the value in the
282/// old preheader and the loop exit.
283static void rewritePHINodesForExitAndUnswitchedBlocks(BasicBlock &ExitBB,
                                                    BasicBlock &UnswitchedBB,
                                                    BasicBlock &OldExitingBB,
                                                    BasicBlock &OldPH,
                                                    bool FullUnswitch) {
assert(&ExitBB != &UnswitchedBB &&((void)0)
       "Must have different loop exit and unswitched blocks!")((void)0);
Instruction *InsertPt = &*UnswitchedBB.begin();
for (PHINode &PN : ExitBB.phis()) {
  auto *NewPN = PHINode::Create(PN.getType(), /*NumReservedValues*/ 2,
                                PN.getName() + ".split", InsertPt);

  // Walk backwards over the old PHI node's inputs to minimize the cost of
  // removing each one. We have to do this weird loop manually so that we
  // create the same number of new incoming edges in the new PHI as we expect
  // each case-based edge to be included in the unswitched switch in some
  // cases.
  // FIXME: This is really, really gross. It would be much cleaner if LLVM
  // allowed us to create a single entry for a predecessor block without
  // having separate entries for each "edge" even though these edges are
  // required to produce identical results.
  for (int i = PN.getNumIncomingValues() - 1; i >= 0; --i) {
    if (PN.getIncomingBlock(i) != &OldExitingBB)
      continue;

    Value *Incoming = PN.getIncomingValue(i);
    if (FullUnswitch)
      // No more edge from the old exiting block to the exit block.
      PN.removeIncomingValue(i);

    NewPN->addIncoming(Incoming, &OldPH);
  }

  // Now replace the old PHI with the new one and wire the old one in as an
  // input to the new one.
  PN.replaceAllUsesWith(NewPN);
  NewPN->addIncoming(&PN, &ExitBB);
}
321}

323/// Hoist the current loop up to the innermost loop containing a remaining exit.
324///
325/// Because we've removed an exit from the loop, we may have changed the set of
326/// loops reachable and need to move the current loop up the loop nest or even
327/// to an entirely separate nest.
328static void hoistLoopToNewParent(Loop &L, BasicBlock &Preheader,
                               DominatorTree &DT, LoopInfo &LI,
                               MemorySSAUpdater *MSSAU, ScalarEvolution *SE) {
// If the loop is already at the top level, we can't hoist it anywhere.
Loop *OldParentL = L.getParentLoop();
if (!OldParentL)
  return;

SmallVector<BasicBlock *, 4> Exits;
L.getExitBlocks(Exits);
Loop *NewParentL = nullptr;
for (auto *ExitBB : Exits)
  if (Loop *ExitL = LI.getLoopFor(ExitBB))
    if (!NewParentL || NewParentL->contains(ExitL))
      NewParentL = ExitL;

if (NewParentL == OldParentL)
  return;

// The new parent loop (if different) should always contain the old one.
if (NewParentL)
  assert(NewParentL->contains(OldParentL) &&((void)0)
         "Can only hoist this loop up the nest!")((void)0);

// The preheader will need to move with the body of this loop. However,
// because it isn't in this loop we also need to update the primary loop map.
assert(OldParentL == LI.getLoopFor(&Preheader) &&((void)0)
       "Parent loop of this loop should contain this loop's preheader!")((void)0);
LI.changeLoopFor(&Preheader, NewParentL);

// Remove this loop from its old parent.
OldParentL->removeChildLoop(&L);

// Add the loop either to the new parent or as a top-level loop.
if (NewParentL)
  NewParentL->addChildLoop(&L);
else
  LI.addTopLevelLoop(&L);

// Remove this loops blocks from the old parent and every other loop up the
// nest until reaching the new parent. Also update all of these
// no-longer-containing loops to reflect the nesting change.
for (Loop *OldContainingL = OldParentL; OldContainingL != NewParentL;
     OldContainingL = OldContainingL->getParentLoop()) {
  llvm::erase_if(OldContainingL->getBlocksVector(),
                 [&](const BasicBlock *BB) {
                   return BB == &Preheader || L.contains(BB);
                 });

  OldContainingL->getBlocksSet().erase(&Preheader);
  for (BasicBlock *BB : L.blocks())
    OldContainingL->getBlocksSet().erase(BB);

  // Because we just hoisted a loop out of this one, we have essentially
  // created new exit paths from it. That means we need to form LCSSA PHI
  // nodes for values used in the no-longer-nested loop.
  formLCSSA(*OldContainingL, DT, &LI, SE);

  // We shouldn't need to form dedicated exits because the exit introduced
  // here is the (just split by unswitching) preheader. However, after trivial
  // unswitching it is possible to get new non-dedicated exits out of parent
  // loop so let's conservatively form dedicated exit blocks and figure out
  // if we can optimize later.
  formDedicatedExitBlocks(OldContainingL, &DT, &LI, MSSAU,
                          /*PreserveLCSSA*/ true);
}
394}

396// Return the top-most loop containing ExitBB and having ExitBB as exiting block
397// or the loop containing ExitBB, if there is no parent loop containing ExitBB
398// as exiting block.
399static Loop *getTopMostExitingLoop(BasicBlock *ExitBB, LoopInfo &LI) {
Loop *TopMost = LI.getLoopFor(ExitBB);
Loop *Current = TopMost;
while (Current) {
  if (Current->isLoopExiting(ExitBB))
    TopMost = Current;
  Current = Current->getParentLoop();
}
return TopMost;
408}

410/// Unswitch a trivial branch if the condition is loop invariant.
411///
412/// This routine should only be called when loop code leading to the branch has
413/// been validated as trivial (no side effects). This routine checks if the
414/// condition is invariant and one of the successors is a loop exit. This
415/// allows us to unswitch without duplicating the loop, making it trivial.
416///
417/// If this routine fails to unswitch the branch it returns false.
418///
419/// If the branch can be unswitched, this routine splits the preheader and
420/// hoists the branch above that split. Preserves loop simplified form
421/// (splitting the exit block as necessary). It simplifies the branch within
422/// the loop to an unconditional branch but doesn't remove it entirely. Further
423/// cleanup can be done with some simplifycfg like pass.
424///
425/// If `SE` is not null, it will be updated based on the potential loop SCEVs
426/// invalidated by this.
427static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
                                LoopInfo &LI, ScalarEvolution *SE,
                                MemorySSAUpdater *MSSAU) {
assert(BI.isConditional() && "Can only unswitch a conditional branch!")((void)0);
LLVM_DEBUG(dbgs() << "  Trying to unswitch branch: " << BI << "\n")do { } while (false);

// The loop invariant values that we want to unswitch.
TinyPtrVector<Value *> Invariants;

// When true, we're fully unswitching the branch rather than just unswitching
// some input conditions to the branch.
bool FullUnswitch = false;

if (L.isLoopInvariant(BI.getCondition())) {
  Invariants.push_back(BI.getCondition());
  FullUnswitch = true;
} else {
  if (auto *CondInst = dyn_cast<Instruction>(BI.getCondition()))
    Invariants = collectHomogenousInstGraphLoopInvariants(L, *CondInst, LI);
  if (Invariants.empty()) {
    LLVM_DEBUG(dbgs() << "   Couldn't find invariant inputs!\n")do { } while (false);
    return false;
  }
}

// Check that one of the branch's successors exits, and which one.
bool ExitDirection = true;
int LoopExitSuccIdx = 0;
auto *LoopExitBB = BI.getSuccessor(0);
if (L.contains(LoopExitBB)) {
  ExitDirection = false;
  LoopExitSuccIdx = 1;
  LoopExitBB = BI.getSuccessor(1);
  if (L.contains(LoopExitBB)) {
    LLVM_DEBUG(dbgs() << "   Branch doesn't exit the loop!\n")do { } while (false);
    return false;
  }
}
auto *ContinueBB = BI.getSuccessor(1 - LoopExitSuccIdx);
auto *ParentBB = BI.getParent();
if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, *LoopExitBB)) {
  LLVM_DEBUG(dbgs() << "   Loop exit PHI's aren't loop-invariant!\n")do { } while (false);
  return false;
}

// When unswitching only part of the branch's condition, we need the exit
// block to be reached directly from the partially unswitched input. This can
// be done when the exit block is along the true edge and the branch condition
// is a graph of `or` operations, or the exit block is along the false edge
// and the condition is a graph of `and` operations.
if (!FullUnswitch) {
  if (ExitDirection ? !match(BI.getCondition(), m_LogicalOr())
                    : !match(BI.getCondition(), m_LogicalAnd())) {
    LLVM_DEBUG(dbgs() << "   Branch condition is in improper form for "do { } while (false)
                         "non-full unswitch!\n")do { } while (false);
    return false;
  }
}

LLVM_DEBUG({do { } while (false)
  dbgs() << "    unswitching trivial invariant conditions for: " << BIdo { } while (false)
         << "\n";do { } while (false)
  for (Value *Invariant : Invariants) {do { } while (false)
    dbgs() << "      " << *Invariant << " == true";do { } while (false)
    if (Invariant != Invariants.back())do { } while (false)
      dbgs() << " ||";do { } while (false)
    dbgs() << "\n";do { } while (false)
  }do { } while (false)
})do { } while (false);

// If we have scalar evolutions, we need to invalidate them including this
// loop, the loop containing the exit block and the topmost parent loop
// exiting via LoopExitBB.
if (SE) {
  if (Loop *ExitL = getTopMostExitingLoop(LoopExitBB, LI))
    SE->forgetLoop(ExitL);
  else
    // Forget the entire nest as this exits the entire nest.
    SE->forgetTopmostLoop(&L);
}

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

// Split the preheader, so that we know that there is a safe place to insert
// the conditional branch. We will change the preheader to have a conditional
// branch on LoopCond.
BasicBlock *OldPH = L.getLoopPreheader();
BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);

// Now that we have a place to insert the conditional branch, create a place
// to branch to: this is the exit block out of the loop that we are
// unswitching. We need to split this if there are other loop predecessors.
// Because the loop is in simplified form, *any* other predecessor is enough.
BasicBlock *UnswitchedBB;
if (FullUnswitch && LoopExitBB->getUniquePredecessor()) {
  assert(LoopExitBB->getUniquePredecessor() == BI.getParent() &&((void)0)
         "A branch's parent isn't a predecessor!")((void)0);
  UnswitchedBB = LoopExitBB;
} else {
  UnswitchedBB =
      SplitBlock(LoopExitBB, &LoopExitBB->front(), &DT, &LI, MSSAU);
}

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

// Actually move the invariant uses into the unswitched position. If possible,
// we do this by moving the instructions, but when doing partial unswitching
// we do it by building a new merge of the values in the unswitched position.
OldPH->getTerminator()->eraseFromParent();
if (FullUnswitch) {
  // If fully unswitching, we can use the existing branch instruction.
  // Splice it into the old PH to gate reaching the new preheader and re-point
  // its successors.
  OldPH->getInstList().splice(OldPH->end(), BI.getParent()->getInstList(),
                              BI);
  if (MSSAU) {
    // Temporarily clone the terminator, to make MSSA update cheaper by
    // separating "insert edge" updates from "remove edge" ones.
    ParentBB->getInstList().push_back(BI.clone());
  } else {
    // Create a new unconditional branch that will continue the loop as a new
    // terminator.
    BranchInst::Create(ContinueBB, ParentBB);
  }
  BI.setSuccessor(LoopExitSuccIdx, UnswitchedBB);
  BI.setSuccessor(1 - LoopExitSuccIdx, NewPH);
} else {
  // Only unswitching a subset of inputs to the condition, so we will need to
  // build a new branch that merges the invariant inputs.
  if (ExitDirection)
    assert(match(BI.getCondition(), m_LogicalOr()) &&((void)0)
           "Must have an `or` of `i1`s or `select i1 X, true, Y`s for the "((void)0)
           "condition!")((void)0);
  else
    assert(match(BI.getCondition(), m_LogicalAnd()) &&((void)0)
           "Must have an `and` of `i1`s or `select i1 X, Y, false`s for the"((void)0)
           " condition!")((void)0);
  buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
                                        *UnswitchedBB, *NewPH);
}

// Update the dominator tree with the added edge.
DT.insertEdge(OldPH, UnswitchedBB);

// After the dominator tree was updated with the added edge, update MemorySSA
// if available.
if (MSSAU) {
  SmallVector<CFGUpdate, 1> Updates;
  Updates.push_back({cfg::UpdateKind::Insert, OldPH, UnswitchedBB});
  MSSAU->applyInsertUpdates(Updates, DT);
}

// Finish updating dominator tree and memory ssa for full unswitch.
if (FullUnswitch) {
  if (MSSAU) {
    // Remove the cloned branch instruction.
    ParentBB->getTerminator()->eraseFromParent();
    // Create unconditional branch now.
    BranchInst::Create(ContinueBB, ParentBB);
    MSSAU->removeEdge(ParentBB, LoopExitBB);
  }
  DT.deleteEdge(ParentBB, LoopExitBB);
}

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

// Rewrite the relevant PHI nodes.
if (UnswitchedBB == LoopExitBB)
  rewritePHINodesForUnswitchedExitBlock(*UnswitchedBB, *ParentBB, *OldPH);
else
  rewritePHINodesForExitAndUnswitchedBlocks(*LoopExitBB, *UnswitchedBB,
                                            *ParentBB, *OldPH, FullUnswitch);

// The constant we can replace all of our invariants with inside the loop
// body. If any of the invariants have a value other than this the loop won't
// be entered.
ConstantInt *Replacement = ExitDirection
                               ? ConstantInt::getFalse(BI.getContext())
                               : ConstantInt::getTrue(BI.getContext());

// Since this is an i1 condition we can also trivially replace uses of it
// within the loop with a constant.
for (Value *Invariant : Invariants)
  replaceLoopInvariantUses(L, Invariant, *Replacement);

// If this was full unswitching, we may have changed the nesting relationship
// for this loop so hoist it to its correct parent if needed.
if (FullUnswitch)
  hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

LLVM_DEBUG(dbgs() << "    done: unswitching trivial branch...\n")do { } while (false);
++NumTrivial;
++NumBranches;
return true;
627}

629/// Unswitch a trivial switch if the condition is loop invariant.
630///
631/// This routine should only be called when loop code leading to the switch has
632/// been validated as trivial (no side effects). This routine checks if the
633/// condition is invariant and that at least one of the successors is a loop
634/// exit. This allows us to unswitch without duplicating the loop, making it
635/// trivial.
636///
637/// If this routine fails to unswitch the switch it returns false.
638///
639/// If the switch can be unswitched, this routine splits the preheader and
640/// copies the switch above that split. If the default case is one of the
641/// exiting cases, it copies the non-exiting cases and points them at the new
642/// preheader. If the default case is not exiting, it copies the exiting cases
643/// and points the default at the preheader. It preserves loop simplified form
644/// (splitting the exit blocks as necessary). It simplifies the switch within
645/// the loop by removing now-dead cases. If the default case is one of those
646/// unswitched, it replaces its destination with a new basic block containing
647/// only unreachable. Such basic blocks, while technically loop exits, are not
648/// considered for unswitching so this is a stable transform and the same
649/// switch will not be revisited. If after unswitching there is only a single
650/// in-loop successor, the switch is further simplified to an unconditional
651/// branch. Still more cleanup can be done with some simplifycfg like pass.
652///
653/// If `SE` is not null, it will be updated based on the potential loop SCEVs
654/// invalidated by this.
655static bool unswitchTrivialSwitch(Loop &L, SwitchInst &SI, DominatorTree &DT,
                                LoopInfo &LI, ScalarEvolution *SE,
                                MemorySSAUpdater *MSSAU) {
LLVM_DEBUG(dbgs() << "  Trying to unswitch switch: " << SI << "\n")do { } while (false);
Value *LoopCond = SI.getCondition();

// If this isn't switching on an invariant condition, we can't unswitch it.
if (!L.isLoopInvariant(LoopCond))
  return false;

auto *ParentBB = SI.getParent();

// The same check must be used both for the default and the exit cases. We
// should never leave edges from the switch instruction to a basic block that
// we are unswitching, hence the condition used to determine the default case
// needs to also be used to populate ExitCaseIndices, which is then used to
// remove cases from the switch.
auto IsTriviallyUnswitchableExitBlock = [&](BasicBlock &BBToCheck) {
  // BBToCheck is not an exit block if it is inside loop L.
  if (L.contains(&BBToCheck))
    return false;
  // BBToCheck is not trivial to unswitch if its phis aren't loop invariant.
  if (!areLoopExitPHIsLoopInvariant(L, *ParentBB, BBToCheck))
    return false;
  // We do not unswitch a block that only has an unreachable statement, as
  // it's possible this is a previously unswitched block. Only unswitch if
  // either the terminator is not unreachable, or, if it is, it's not the only
  // instruction in the block.
  auto *TI = BBToCheck.getTerminator();
  bool isUnreachable = isa<UnreachableInst>(TI);
  return !isUnreachable ||
         (isUnreachable && (BBToCheck.getFirstNonPHIOrDbg() != TI));
};

SmallVector<int, 4> ExitCaseIndices;
for (auto Case : SI.cases())
  if (IsTriviallyUnswitchableExitBlock(*Case.getCaseSuccessor()))
    ExitCaseIndices.push_back(Case.getCaseIndex());
BasicBlock *DefaultExitBB = nullptr;
SwitchInstProfUpdateWrapper::CaseWeightOpt DefaultCaseWeight =
    SwitchInstProfUpdateWrapper::getSuccessorWeight(SI, 0);
if (IsTriviallyUnswitchableExitBlock(*SI.getDefaultDest())) {
  DefaultExitBB = SI.getDefaultDest();
} else if (ExitCaseIndices.empty())
  return false;

LLVM_DEBUG(dbgs() << "    unswitching trivial switch...\n")do { } while (false);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

// We may need to invalidate SCEVs for the outermost loop reached by any of
// the exits.
Loop *OuterL = &L;

if (DefaultExitBB) {
  // Clear out the default destination temporarily to allow accurate
  // predecessor lists to be examined below.
  SI.setDefaultDest(nullptr);
  // Check the loop containing this exit.
  Loop *ExitL = LI.getLoopFor(DefaultExitBB);
  if (!ExitL || ExitL->contains(OuterL))
    OuterL = ExitL;
}

// Store the exit cases into a separate data structure and remove them from
// the switch.
SmallVector<std::tuple<ConstantInt *, BasicBlock *,
                       SwitchInstProfUpdateWrapper::CaseWeightOpt>,
            4> ExitCases;
ExitCases.reserve(ExitCaseIndices.size());
SwitchInstProfUpdateWrapper SIW(SI);
// We walk the case indices backwards so that we remove the last case first
// and don't disrupt the earlier indices.
for (unsigned Index : reverse(ExitCaseIndices)) {
  auto CaseI = SI.case_begin() + Index;
  // Compute the outer loop from this exit.
  Loop *ExitL = LI.getLoopFor(CaseI->getCaseSuccessor());
  if (!ExitL || ExitL->contains(OuterL))
    OuterL = ExitL;
  // Save the value of this case.
  auto W = SIW.getSuccessorWeight(CaseI->getSuccessorIndex());
  ExitCases.emplace_back(CaseI->getCaseValue(), CaseI->getCaseSuccessor(), W);
  // Delete the unswitched cases.
  SIW.removeCase(CaseI);
}

if (SE) {
  if (OuterL)
    SE->forgetLoop(OuterL);
  else
    SE->forgetTopmostLoop(&L);
}

// Check if after this all of the remaining cases point at the same
// successor.
BasicBlock *CommonSuccBB = nullptr;
if (SI.getNumCases() > 0 &&
    all_of(drop_begin(SI.cases()), [&SI](const SwitchInst::CaseHandle &Case) {
      return Case.getCaseSuccessor() == SI.case_begin()->getCaseSuccessor();
    }))
  CommonSuccBB = SI.case_begin()->getCaseSuccessor();
if (!DefaultExitBB) {
  // If we're not unswitching the default, we need it to match any cases to
  // have a common successor or if we have no cases it is the common
  // successor.
  if (SI.getNumCases() == 0)
    CommonSuccBB = SI.getDefaultDest();
  else if (SI.getDefaultDest() != CommonSuccBB)
    CommonSuccBB = nullptr;
}

// Split the preheader, so that we know that there is a safe place to insert
// the switch.
BasicBlock *OldPH = L.getLoopPreheader();
BasicBlock *NewPH = SplitEdge(OldPH, L.getHeader(), &DT, &LI, MSSAU);
OldPH->getTerminator()->eraseFromParent();

// Now add the unswitched switch.
auto *NewSI = SwitchInst::Create(LoopCond, NewPH, ExitCases.size(), OldPH);
SwitchInstProfUpdateWrapper NewSIW(*NewSI);

// Rewrite the IR for the unswitched basic blocks. This requires two steps.
// First, we split any exit blocks with remaining in-loop predecessors. Then
// we update the PHIs in one of two ways depending on if there was a split.
// We walk in reverse so that we split in the same order as the cases
// appeared. This is purely for convenience of reading the resulting IR, but
// it doesn't cost anything really.
SmallPtrSet<BasicBlock *, 2> UnswitchedExitBBs;
SmallDenseMap<BasicBlock *, BasicBlock *, 2> SplitExitBBMap;
// Handle the default exit if necessary.
// FIXME: It'd be great if we could merge this with the loop below but LLVM's
// ranges aren't quite powerful enough yet.
if (DefaultExitBB) {
  if (pred_empty(DefaultExitBB)) {
    UnswitchedExitBBs.insert(DefaultExitBB);
    rewritePHINodesForUnswitchedExitBlock(*DefaultExitBB, *ParentBB, *OldPH);
  } else {
    auto *SplitBB =
        SplitBlock(DefaultExitBB, &DefaultExitBB->front(), &DT, &LI, MSSAU);
    rewritePHINodesForExitAndUnswitchedBlocks(*DefaultExitBB, *SplitBB,
                                              *ParentBB, *OldPH,
                                              /*FullUnswitch*/ true);
    DefaultExitBB = SplitExitBBMap[DefaultExitBB] = SplitBB;
  }
}
// Note that we must use a reference in the for loop so that we update the
// container.
for (auto &ExitCase : reverse(ExitCases)) {
  // Grab a reference to the exit block in the pair so that we can update it.
  BasicBlock *ExitBB = std::get<1>(ExitCase);

  // If this case is the last edge into the exit block, we can simply reuse it
  // as it will no longer be a loop exit. No mapping necessary.
  if (pred_empty(ExitBB)) {
    // Only rewrite once.
    if (UnswitchedExitBBs.insert(ExitBB).second)
      rewritePHINodesForUnswitchedExitBlock(*ExitBB, *ParentBB, *OldPH);
    continue;
  }

  // Otherwise we need to split the exit block so that we retain an exit
  // block from the loop and a target for the unswitched condition.
  BasicBlock *&SplitExitBB = SplitExitBBMap[ExitBB];
  if (!SplitExitBB) {
    // If this is the first time we see this, do the split and remember it.
    SplitExitBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);
    rewritePHINodesForExitAndUnswitchedBlocks(*ExitBB, *SplitExitBB,
                                              *ParentBB, *OldPH,
                                              /*FullUnswitch*/ true);
  }
  // Update the case pair to point to the split block.
  std::get<1>(ExitCase) = SplitExitBB;
}

// Now add the unswitched cases. We do this in reverse order as we built them
// in reverse order.
for (auto &ExitCase : reverse(ExitCases)) {
  ConstantInt *CaseVal = std::get<0>(ExitCase);
  BasicBlock *UnswitchedBB = std::get<1>(ExitCase);

  NewSIW.addCase(CaseVal, UnswitchedBB, std::get<2>(ExitCase));
}

// If the default was unswitched, re-point it and add explicit cases for
// entering the loop.
if (DefaultExitBB) {
  NewSIW->setDefaultDest(DefaultExitBB);
  NewSIW.setSuccessorWeight(0, DefaultCaseWeight);

  // We removed all the exit cases, so we just copy the cases to the
  // unswitched switch.
  for (const auto &Case : SI.cases())
    NewSIW.addCase(Case.getCaseValue(), NewPH,
                   SIW.getSuccessorWeight(Case.getSuccessorIndex()));
} else if (DefaultCaseWeight) {
  // We have to set branch weight of the default case.
  uint64_t SW = *DefaultCaseWeight;
  for (const auto &Case : SI.cases()) {
    auto W = SIW.getSuccessorWeight(Case.getSuccessorIndex());
    assert(W &&((void)0)
           "case weight must be defined as default case weight is defined")((void)0);
    SW += *W;
  }
  NewSIW.setSuccessorWeight(0, SW);
}

// If we ended up with a common successor for every path through the switch
// after unswitching, rewrite it to an unconditional branch to make it easy
// to recognize. Otherwise we potentially have to recognize the default case
// pointing at unreachable and other complexity.
if (CommonSuccBB) {
  BasicBlock *BB = SI.getParent();
  // We may have had multiple edges to this common successor block, so remove
  // them as predecessors. We skip the first one, either the default or the
  // actual first case.
  bool SkippedFirst = DefaultExitBB == nullptr;
  for (auto Case : SI.cases()) {
    assert(Case.getCaseSuccessor() == CommonSuccBB &&((void)0)
           "Non-common successor!")((void)0);
    (void)Case;
    if (!SkippedFirst) {
      SkippedFirst = true;
      continue;
    }
    CommonSuccBB->removePredecessor(BB,
                                    /*KeepOneInputPHIs*/ true);
  }
  // Now nuke the switch and replace it with a direct branch.
  SIW.eraseFromParent();
  BranchInst::Create(CommonSuccBB, BB);
} else if (DefaultExitBB) {
  assert(SI.getNumCases() > 0 &&((void)0)
         "If we had no cases we'd have a common successor!")((void)0);
  // Move the last case to the default successor. This is valid as if the
  // default got unswitched it cannot be reached. This has the advantage of
  // being simple and keeping the number of edges from this switch to
  // successors the same, and avoiding any PHI update complexity.
  auto LastCaseI = std::prev(SI.case_end());

  SI.setDefaultDest(LastCaseI->getCaseSuccessor());
  SIW.setSuccessorWeight(
      0, SIW.getSuccessorWeight(LastCaseI->getSuccessorIndex()));
  SIW.removeCase(LastCaseI);
}

// Walk the unswitched exit blocks and the unswitched split blocks and update
// the dominator tree based on the CFG edits. While we are walking unordered
// containers here, the API for applyUpdates takes an unordered list of
// updates and requires them to not contain duplicates.
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
for (auto *UnswitchedExitBB : UnswitchedExitBBs) {
  DTUpdates.push_back({DT.Delete, ParentBB, UnswitchedExitBB});
  DTUpdates.push_back({DT.Insert, OldPH, UnswitchedExitBB});
}
for (auto SplitUnswitchedPair : SplitExitBBMap) {
  DTUpdates.push_back({DT.Delete, ParentBB, SplitUnswitchedPair.first});
  DTUpdates.push_back({DT.Insert, OldPH, SplitUnswitchedPair.second});
}

if (MSSAU) {
  MSSAU->applyUpdates(DTUpdates, DT, /*UpdateDT=*/true);
  if (VerifyMemorySSA)
    MSSAU->getMemorySSA()->verifyMemorySSA();
} else {
  DT.applyUpdates(DTUpdates);
}

assert(DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);

// We may have changed the nesting relationship for this loop so hoist it to
// its correct parent if needed.
hoistLoopToNewParent(L, *NewPH, DT, LI, MSSAU, SE);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

++NumTrivial;
++NumSwitches;
LLVM_DEBUG(dbgs() << "    done: unswitching trivial switch...\n")do { } while (false);
return true;
936}

938/// This routine scans the loop to find a branch or switch which occurs before
939/// any side effects occur. These can potentially be unswitched without
940/// duplicating the loop. If a branch or switch is successfully unswitched the
941/// scanning continues to see if subsequent branches or switches have become
942/// trivial. Once all trivial candidates have been unswitched, this routine
943/// returns.
944///
945/// The return value indicates whether anything was unswitched (and therefore
946/// changed).
947///
948/// If `SE` is not null, it will be updated based on the potential loop SCEVs
949/// invalidated by this.
950static bool unswitchAllTrivialConditions(Loop &L, DominatorTree &DT,
                                       LoopInfo &LI, ScalarEvolution *SE,
                                       MemorySSAUpdater *MSSAU) {
bool Changed = false;

// If loop header has only one reachable successor we should keep looking for
// trivial condition candidates in the successor as well. An alternative is
// to constant fold conditions and merge successors into loop header (then we
// only need to check header's terminator). The reason for not doing this in
// LoopUnswitch pass is that it could potentially break LoopPassManager's
// invariants. Folding dead branches could either eliminate the current loop
// or make other loops unreachable. LCSSA form might also not be preserved
// after deleting branches. The following code keeps traversing loop header's
// successors until it finds the trivial condition candidate (condition that
// is not a constant). Since unswitching generates branches with constant
// conditions, this scenario could be very common in practice.
BasicBlock *CurrentBB = L.getHeader();
SmallPtrSet<BasicBlock *, 8> Visited;
Visited.insert(CurrentBB);
do {
  // Check if there are any side-effecting instructions (e.g. stores, calls,
  // volatile loads) in the part of the loop that the code *would* execute
  // without unswitching.
  if (MSSAU) // Possible early exit with MSSA
    if (auto *Defs = MSSAU->getMemorySSA()->getBlockDefs(CurrentBB))
      if (!isa<MemoryPhi>(*Defs->begin()) || (++Defs->begin() != Defs->end()))
        return Changed;
  if (llvm::any_of(*CurrentBB,
                   [](Instruction &I) { return I.mayHaveSideEffects(); }))
    return Changed;

  Instruction *CurrentTerm = CurrentBB->getTerminator();

  if (auto *SI = dyn_cast<SwitchInst>(CurrentTerm)) {
    // Don't bother trying to unswitch past a switch with a constant
    // condition. This should be removed prior to running this pass by
    // simplifycfg.
    if (isa<Constant>(SI->getCondition()))
      return Changed;

    if (!unswitchTrivialSwitch(L, *SI, DT, LI, SE, MSSAU))
      // Couldn't unswitch this one so we're done.
      return Changed;

    // Mark that we managed to unswitch something.
    Changed = true;

    // If unswitching turned the terminator into an unconditional branch then
    // we can continue. The unswitching logic specifically works to fold any
    // cases it can into an unconditional branch to make it easier to
    // recognize here.
    auto *BI = dyn_cast<BranchInst>(CurrentBB->getTerminator());
    if (!BI || BI->isConditional())
      return Changed;

    CurrentBB = BI->getSuccessor(0);
    continue;
  }

  auto *BI = dyn_cast<BranchInst>(CurrentTerm);
  if (!BI)
    // We do not understand other terminator instructions.
    return Changed;

  // Don't bother trying to unswitch past an unconditional branch or a branch
  // with a constant value. These should be removed by simplifycfg prior to
  // running this pass.
  if (!BI->isConditional() || isa<Constant>(BI->getCondition()))
    return Changed;

  // Found a trivial condition candidate: non-foldable conditional branch. If
  // we fail to unswitch this, we can't do anything else that is trivial.
  if (!unswitchTrivialBranch(L, *BI, DT, LI, SE, MSSAU))
    return Changed;

  // Mark that we managed to unswitch something.
  Changed = true;

  // If we only unswitched some of the conditions feeding the branch, we won't
  // have collapsed it to a single successor.
  BI = cast<BranchInst>(CurrentBB->getTerminator());
  if (BI->isConditional())
    return Changed;

  // Follow the newly unconditional branch into its successor.
  CurrentBB = BI->getSuccessor(0);

  // When continuing, if we exit the loop or reach a previous visited block,
  // then we can not reach any trivial condition candidates (unfoldable
  // branch instructions or switch instructions) and no unswitch can happen.
} while (L.contains(CurrentBB) && Visited.insert(CurrentBB).second);

return Changed;
1043}

1045/// Build the cloned blocks for an unswitched copy of the given loop.
1046///
1047/// The cloned blocks are inserted before the loop preheader (`LoopPH`) and
1048/// after the split block (`SplitBB`) that will be used to select between the
1049/// cloned and original loop.
1050///
1051/// This routine handles cloning all of the necessary loop blocks and exit
1052/// blocks including rewriting their instructions and the relevant PHI nodes.
1053/// Any loop blocks or exit blocks which are dominated by a different successor
1054/// than the one for this clone of the loop blocks can be trivially skipped. We
1055/// use the `DominatingSucc` map to determine whether a block satisfies that
1056/// property with a simple map lookup.
1057///
1058/// It also correctly creates the unconditional branch in the cloned
1059/// unswitched parent block to only point at the unswitched successor.
1060///
1061/// This does not handle most of the necessary updates to `LoopInfo`. Only exit
1062/// block splitting is correctly reflected in `LoopInfo`, essentially all of
1063/// the cloned blocks (and their loops) are left without full `LoopInfo`
1064/// updates. This also doesn't fully update `DominatorTree`. It adds the cloned
1065/// blocks to them but doesn't create the cloned `DominatorTree` structure and
1066/// instead the caller must recompute an accurate DT. It *does* correctly
1067/// update the `AssumptionCache` provided in `AC`.
1068static BasicBlock *buildClonedLoopBlocks(
  Loop &L, BasicBlock *LoopPH, BasicBlock *SplitBB,
  ArrayRef<BasicBlock *> ExitBlocks, BasicBlock *ParentBB,
  BasicBlock *UnswitchedSuccBB, BasicBlock *ContinueSuccBB,
  const SmallDenseMap<BasicBlock *, BasicBlock *, 16> &DominatingSucc,
  ValueToValueMapTy &VMap,
  SmallVectorImpl<DominatorTree::UpdateType> &DTUpdates, AssumptionCache &AC,
  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
SmallVector<BasicBlock *, 4> NewBlocks;
NewBlocks.reserve(L.getNumBlocks() + ExitBlocks.size());

// We will need to clone a bunch of blocks, wrap up the clone operation in
// a helper.
auto CloneBlock = [&](BasicBlock *OldBB) {
  // Clone the basic block and insert it before the new preheader.
  BasicBlock *NewBB = CloneBasicBlock(OldBB, VMap, ".us", OldBB->getParent());
  NewBB->moveBefore(LoopPH);

  // Record this block and the mapping.
  NewBlocks.push_back(NewBB);
  VMap[OldBB] = NewBB;

  return NewBB;
};

// We skip cloning blocks when they have a dominating succ that is not the
// succ we are cloning for.
auto SkipBlock = [&](BasicBlock *BB) {
  auto It = DominatingSucc.find(BB);
  return It != DominatingSucc.end() && It->second != UnswitchedSuccBB;
};

// First, clone the preheader.
auto *ClonedPH = CloneBlock(LoopPH);

// Then clone all the loop blocks, skipping the ones that aren't necessary.
for (auto *LoopBB : L.blocks())
  if (!SkipBlock(LoopBB))
    CloneBlock(LoopBB);

// Split all the loop exit edges so that when we clone the exit blocks, if
// any of the exit blocks are *also* a preheader for some other loop, we
// don't create multiple predecessors entering the loop header.
for (auto *ExitBB : ExitBlocks) {
  if (SkipBlock(ExitBB))
    continue;

  // When we are going to clone an exit, we don't need to clone all the
  // instructions in the exit block and we want to ensure we have an easy
  // place to merge the CFG, so split the exit first. This is always safe to
  // do because there cannot be any non-loop predecessors of a loop exit in
  // loop simplified form.
  auto *MergeBB = SplitBlock(ExitBB, &ExitBB->front(), &DT, &LI, MSSAU);

  // Rearrange the names to make it easier to write test cases by having the
  // exit block carry the suffix rather than the merge block carrying the
  // suffix.
  MergeBB->takeName(ExitBB);
  ExitBB->setName(Twine(MergeBB->getName()) + ".split");

  // Now clone the original exit block.
  auto *ClonedExitBB = CloneBlock(ExitBB);
  assert(ClonedExitBB->getTerminator()->getNumSuccessors() == 1 &&((void)0)
         "Exit block should have been split to have one successor!")((void)0);
  assert(ClonedExitBB->getTerminator()->getSuccessor(0) == MergeBB &&((void)0)
         "Cloned exit block has the wrong successor!")((void)0);

  // Remap any cloned instructions and create a merge phi node for them.
  for (auto ZippedInsts : llvm::zip_first(
           llvm::make_range(ExitBB->begin(), std::prev(ExitBB->end())),
           llvm::make_range(ClonedExitBB->begin(),
                            std::prev(ClonedExitBB->end())))) {
    Instruction &I = std::get<0>(ZippedInsts);
    Instruction &ClonedI = std::get<1>(ZippedInsts);

    // The only instructions in the exit block should be PHI nodes and
    // potentially a landing pad.
    assert(((void)0)
        (isa<PHINode>(I) || isa<LandingPadInst>(I) || isa<CatchPadInst>(I)) &&((void)0)
        "Bad instruction in exit block!")((void)0);
    // We should have a value map between the instruction and its clone.
    assert(VMap.lookup(&I) == &ClonedI && "Mismatch in the value map!")((void)0);

    auto *MergePN =
        PHINode::Create(I.getType(), /*NumReservedValues*/ 2, ".us-phi",
                        &*MergeBB->getFirstInsertionPt());
    I.replaceAllUsesWith(MergePN);
    MergePN->addIncoming(&I, ExitBB);
    MergePN->addIncoming(&ClonedI, ClonedExitBB);
  }
}

// Rewrite the instructions in the cloned blocks to refer to the instructions
// in the cloned blocks. We have to do this as a second pass so that we have
// everything available. Also, we have inserted new instructions which may
// include assume intrinsics, so we update the assumption cache while
// processing this.
for (auto *ClonedBB : NewBlocks)
  for (Instruction &I : *ClonedBB) {
    RemapInstruction(&I, VMap,
                     RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
    if (auto *II = dyn_cast<AssumeInst>(&I))
      AC.registerAssumption(II);
  }

// Update any PHI nodes in the cloned successors of the skipped blocks to not
// have spurious incoming values.
for (auto *LoopBB : L.blocks())
  if (SkipBlock(LoopBB))
    for (auto *SuccBB : successors(LoopBB))
      if (auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB)))
        for (PHINode &PN : ClonedSuccBB->phis())
          PN.removeIncomingValue(LoopBB, /*DeletePHIIfEmpty*/ false);

// Remove the cloned parent as a predecessor of any successor we ended up
// cloning other than the unswitched one.
auto *ClonedParentBB = cast<BasicBlock>(VMap.lookup(ParentBB));
for (auto *SuccBB : successors(ParentBB)) {
  if (SuccBB == UnswitchedSuccBB)
    continue;

  auto *ClonedSuccBB = cast_or_null<BasicBlock>(VMap.lookup(SuccBB));
  if (!ClonedSuccBB)
    continue;

  ClonedSuccBB->removePredecessor(ClonedParentBB,
                                  /*KeepOneInputPHIs*/ true);
}

// Replace the cloned branch with an unconditional branch to the cloned
// unswitched successor.
auto *ClonedSuccBB = cast<BasicBlock>(VMap.lookup(UnswitchedSuccBB));
Instruction *ClonedTerminator = ClonedParentBB->getTerminator();
// Trivial Simplification. If Terminator is a conditional branch and
// condition becomes dead - erase it.
Value *ClonedConditionToErase = nullptr;
if (auto *BI = dyn_cast<BranchInst>(ClonedTerminator))
  ClonedConditionToErase = BI->getCondition();
else if (auto *SI = dyn_cast<SwitchInst>(ClonedTerminator))
  ClonedConditionToErase = SI->getCondition();

ClonedTerminator->eraseFromParent();
BranchInst::Create(ClonedSuccBB, ClonedParentBB);

if (ClonedConditionToErase)
  RecursivelyDeleteTriviallyDeadInstructions(ClonedConditionToErase, nullptr,
                                             MSSAU);

// If there are duplicate entries in the PHI nodes because of multiple edges
// to the unswitched successor, we need to nuke all but one as we replaced it
// with a direct branch.
for (PHINode &PN : ClonedSuccBB->phis()) {
  bool Found = false;
  // Loop over the incoming operands backwards so we can easily delete as we
  // go without invalidating the index.
  for (int i = PN.getNumOperands() - 1; i >= 0; --i) {
    if (PN.getIncomingBlock(i) != ClonedParentBB)
      continue;
    if (!Found) {
      Found = true;
      continue;
    }
    PN.removeIncomingValue(i, /*DeletePHIIfEmpty*/ false);
  }
}

// Record the domtree updates for the new blocks.
SmallPtrSet<BasicBlock *, 4> SuccSet;
for (auto *ClonedBB : NewBlocks) {
  for (auto *SuccBB : successors(ClonedBB))
    if (SuccSet.insert(SuccBB).second)
      DTUpdates.push_back({DominatorTree::Insert, ClonedBB, SuccBB});
  SuccSet.clear();
}

return ClonedPH;
1244}

1246/// Recursively clone the specified loop and all of its children.
1247///
1248/// The target parent loop for the clone should be provided, or can be null if
1249/// the clone is a top-level loop. While cloning, all the blocks are mapped
1250/// with the provided value map. The entire original loop must be present in
1251/// the value map. The cloned loop is returned.
1252static Loop *cloneLoopNest(Loop &OrigRootL, Loop *RootParentL,
                         const ValueToValueMapTy &VMap, LoopInfo &LI) {
auto AddClonedBlocksToLoop = [&](Loop &OrigL, Loop &ClonedL) {
  assert(ClonedL.getBlocks().empty() && "Must start with an empty loop!")((void)0);
  ClonedL.reserveBlocks(OrigL.getNumBlocks());
  for (auto *BB : OrigL.blocks()) {
    auto *ClonedBB = cast<BasicBlock>(VMap.lookup(BB));
    ClonedL.addBlockEntry(ClonedBB);
    if (LI.getLoopFor(BB) == &OrigL)
      LI.changeLoopFor(ClonedBB, &ClonedL);
  }
};

// We specially handle the first loop because it may get cloned into
// a different parent and because we most commonly are cloning leaf loops.
Loop *ClonedRootL = LI.AllocateLoop();
if (RootParentL)
  RootParentL->addChildLoop(ClonedRootL);
else
  LI.addTopLevelLoop(ClonedRootL);
AddClonedBlocksToLoop(OrigRootL, *ClonedRootL);

if (OrigRootL.isInnermost())
  return ClonedRootL;

// If we have a nest, we can quickly clone the entire loop nest using an
// iterative approach because it is a tree. We keep the cloned parent in the
// data structure to avoid repeatedly querying through a map to find it.
SmallVector<std::pair<Loop *, Loop *>, 16> LoopsToClone;
// Build up the loops to clone in reverse order as we'll clone them from the
// back.
for (Loop *ChildL : llvm::reverse(OrigRootL))
  LoopsToClone.push_back({ClonedRootL, ChildL});
do {
  Loop *ClonedParentL, *L;
  std::tie(ClonedParentL, L) = LoopsToClone.pop_back_val();
  Loop *ClonedL = LI.AllocateLoop();
  ClonedParentL->addChildLoop(ClonedL);
  AddClonedBlocksToLoop(*L, *ClonedL);
  for (Loop *ChildL : llvm::reverse(*L))
    LoopsToClone.push_back({ClonedL, ChildL});
} while (!LoopsToClone.empty());

return ClonedRootL;
1296}

1298/// Build the cloned loops of an original loop from unswitching.
1299///
1300/// Because unswitching simplifies the CFG of the loop, this isn't a trivial
1301/// operation. We need to re-verify that there even is a loop (as the backedge
1302/// may not have been cloned), and even if there are remaining backedges the
1303/// backedge set may be different. However, we know that each child loop is
1304/// undisturbed, we only need to find where to place each child loop within
1305/// either any parent loop or within a cloned version of the original loop.
1306///
1307/// Because child loops may end up cloned outside of any cloned version of the
1308/// original loop, multiple cloned sibling loops may be created. All of them
1309/// are returned so that the newly introduced loop nest roots can be
1310/// identified.
1311static void buildClonedLoops(Loop &OrigL, ArrayRef<BasicBlock *> ExitBlocks,
                           const ValueToValueMapTy &VMap, LoopInfo &LI,
                           SmallVectorImpl<Loop *> &NonChildClonedLoops) {
Loop *ClonedL = nullptr;

auto *OrigPH = OrigL.getLoopPreheader();
auto *OrigHeader = OrigL.getHeader();

auto *ClonedPH = cast<BasicBlock>(VMap.lookup(OrigPH));
auto *ClonedHeader = cast<BasicBlock>(VMap.lookup(OrigHeader));

// We need to know the loops of the cloned exit blocks to even compute the
// accurate parent loop. If we only clone exits to some parent of the
// original parent, we want to clone into that outer loop. We also keep track
// of the loops that our cloned exit blocks participate in.
Loop *ParentL = nullptr;
SmallVector<BasicBlock *, 4> ClonedExitsInLoops;
SmallDenseMap<BasicBlock *, Loop *, 16> ExitLoopMap;
ClonedExitsInLoops.reserve(ExitBlocks.size());
for (auto *ExitBB : ExitBlocks)
  if (auto *ClonedExitBB = cast_or_null<BasicBlock>(VMap.lookup(ExitBB)))
    if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
      ExitLoopMap[ClonedExitBB] = ExitL;
      ClonedExitsInLoops.push_back(ClonedExitBB);
      if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
        ParentL = ExitL;
    }
assert((!ParentL || ParentL == OrigL.getParentLoop() ||((void)0)
        ParentL->contains(OrigL.getParentLoop())) &&((void)0)
       "The computed parent loop should always contain (or be) the parent of "((void)0)
       "the original loop.")((void)0);

// We build the set of blocks dominated by the cloned header from the set of
// cloned blocks out of the original loop. While not all of these will
// necessarily be in the cloned loop, it is enough to establish that they
// aren't in unreachable cycles, etc.
SmallSetVector<BasicBlock *, 16> ClonedLoopBlocks;
for (auto *BB : OrigL.blocks())
  if (auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB)))
    ClonedLoopBlocks.insert(ClonedBB);

// Rebuild the set of blocks that will end up in the cloned loop. We may have
// skipped cloning some region of this loop which can in turn skip some of
// the backedges so we have to rebuild the blocks in the loop based on the
// backedges that remain after cloning.
SmallVector<BasicBlock *, 16> Worklist;
SmallPtrSet<BasicBlock *, 16> BlocksInClonedLoop;
for (auto *Pred : predecessors(ClonedHeader)) {
  // The only possible non-loop header predecessor is the preheader because
  // we know we cloned the loop in simplified form.
  if (Pred == ClonedPH)
    continue;

  // Because the loop was in simplified form, the only non-loop predecessor
  // should be the preheader.
  assert(ClonedLoopBlocks.count(Pred) && "Found a predecessor of the loop "((void)0)
                                         "header other than the preheader "((void)0)
                                         "that is not part of the loop!")((void)0);

  // Insert this block into the loop set and on the first visit (and if it
  // isn't the header we're currently walking) put it into the worklist to
  // recurse through.
  if (BlocksInClonedLoop.insert(Pred).second && Pred != ClonedHeader)
    Worklist.push_back(Pred);
}

// If we had any backedges then there *is* a cloned loop. Put the header into
// the loop set and then walk the worklist backwards to find all the blocks
// that remain within the loop after cloning.
if (!BlocksInClonedLoop.empty()) {
  BlocksInClonedLoop.insert(ClonedHeader);

  while (!Worklist.empty()) {
    BasicBlock *BB = Worklist.pop_back_val();
    assert(BlocksInClonedLoop.count(BB) &&((void)0)
           "Didn't put block into the loop set!")((void)0);

    // Insert any predecessors that are in the possible set into the cloned
    // set, and if the insert is successful, add them to the worklist. Note
    // that we filter on the blocks that are definitely reachable via the
    // backedge to the loop header so we may prune out dead code within the
    // cloned loop.
    for (auto *Pred : predecessors(BB))
      if (ClonedLoopBlocks.count(Pred) &&
          BlocksInClonedLoop.insert(Pred).second)
        Worklist.push_back(Pred);
  }

  ClonedL = LI.AllocateLoop();
  if (ParentL) {
    ParentL->addBasicBlockToLoop(ClonedPH, LI);
    ParentL->addChildLoop(ClonedL);
  } else {
    LI.addTopLevelLoop(ClonedL);
  }
  NonChildClonedLoops.push_back(ClonedL);

  ClonedL->reserveBlocks(BlocksInClonedLoop.size());
  // We don't want to just add the cloned loop blocks based on how we
  // discovered them. The original order of blocks was carefully built in
  // a way that doesn't rely on predecessor ordering. Rather than re-invent
  // that logic, we just re-walk the original blocks (and those of the child
  // loops) and filter them as we add them into the cloned loop.
  for (auto *BB : OrigL.blocks()) {
    auto *ClonedBB = cast_or_null<BasicBlock>(VMap.lookup(BB));
    if (!ClonedBB || !BlocksInClonedLoop.count(ClonedBB))
      continue;

    // Directly add the blocks that are only in this loop.
    if (LI.getLoopFor(BB) == &OrigL) {
      ClonedL->addBasicBlockToLoop(ClonedBB, LI);
      continue;
    }

    // We want to manually add it to this loop and parents.
    // Registering it with LoopInfo will happen when we clone the top
    // loop for this block.
    for (Loop *PL = ClonedL; PL; PL = PL->getParentLoop())
      PL->addBlockEntry(ClonedBB);
  }

  // Now add each child loop whose header remains within the cloned loop. All
  // of the blocks within the loop must satisfy the same constraints as the
  // header so once we pass the header checks we can just clone the entire
  // child loop nest.
  for (Loop *ChildL : OrigL) {
    auto *ClonedChildHeader =
        cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
    if (!ClonedChildHeader || !BlocksInClonedLoop.count(ClonedChildHeader))
      continue;

1442#ifndef NDEBUG1
    // We should never have a cloned child loop header but fail to have
    // all of the blocks for that child loop.
    for (auto *ChildLoopBB : ChildL->blocks())
      assert(BlocksInClonedLoop.count(((void)0)
                 cast<BasicBlock>(VMap.lookup(ChildLoopBB))) &&((void)0)
             "Child cloned loop has a header within the cloned outer "((void)0)
             "loop but not all of its blocks!")((void)0);
1450#endif

    cloneLoopNest(*ChildL, ClonedL, VMap, LI);
  }
}

// Now that we've handled all the components of the original loop that were
// cloned into a new loop, we still need to handle anything from the original
// loop that wasn't in a cloned loop.

// Figure out what blocks are left to place within any loop nest containing
// the unswitched loop. If we never formed a loop, the cloned PH is one of
// them.
SmallPtrSet<BasicBlock *, 16> UnloopedBlockSet;
if (BlocksInClonedLoop.empty())
  UnloopedBlockSet.insert(ClonedPH);
for (auto *ClonedBB : ClonedLoopBlocks)
  if (!BlocksInClonedLoop.count(ClonedBB))
    UnloopedBlockSet.insert(ClonedBB);

// Copy the cloned exits and sort them in ascending loop depth, we'll work
// backwards across these to process them inside out. The order shouldn't
// matter as we're just trying to build up the map from inside-out; we use
// the map in a more stably ordered way below.
auto OrderedClonedExitsInLoops = ClonedExitsInLoops;
llvm::sort(OrderedClonedExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
  return ExitLoopMap.lookup(LHS)->getLoopDepth() <
         ExitLoopMap.lookup(RHS)->getLoopDepth();
});

// Populate the existing ExitLoopMap with everything reachable from each
// exit, starting from the inner most exit.
while (!UnloopedBlockSet.empty() && !OrderedClonedExitsInLoops.empty()) {
  assert(Worklist.empty() && "Didn't clear worklist!")((void)0);

  BasicBlock *ExitBB = OrderedClonedExitsInLoops.pop_back_val();
  Loop *ExitL = ExitLoopMap.lookup(ExitBB);

  // Walk the CFG back until we hit the cloned PH adding everything reachable
  // and in the unlooped set to this exit block's loop.
  Worklist.push_back(ExitBB);
  do {
    BasicBlock *BB = Worklist.pop_back_val();
    // We can stop recursing at the cloned preheader (if we get there).
    if (BB == ClonedPH)
      continue;

    for (BasicBlock *PredBB : predecessors(BB)) {
      // If this pred has already been moved to our set or is part of some
      // (inner) loop, no update needed.
      if (!UnloopedBlockSet.erase(PredBB)) {
        assert(((void)0)
            (BlocksInClonedLoop.count(PredBB) || ExitLoopMap.count(PredBB)) &&((void)0)
            "Predecessor not mapped to a loop!")((void)0);
        continue;
      }

      // We just insert into the loop set here. We'll add these blocks to the
      // exit loop after we build up the set in an order that doesn't rely on
      // predecessor order (which in turn relies on use list order).
      bool Inserted = ExitLoopMap.insert({PredBB, ExitL}).second;
      (void)Inserted;
      assert(Inserted && "Should only visit an unlooped block once!")((void)0);

      // And recurse through to its predecessors.
      Worklist.push_back(PredBB);
    }
  } while (!Worklist.empty());
}

// Now that the ExitLoopMap gives as  mapping for all the non-looping cloned
// blocks to their outer loops, walk the cloned blocks and the cloned exits
// in their original order adding them to the correct loop.

// We need a stable insertion order. We use the order of the original loop
// order and map into the correct parent loop.
for (auto *BB : llvm::concat<BasicBlock *const>(
         makeArrayRef(ClonedPH), ClonedLoopBlocks, ClonedExitsInLoops))
  if (Loop *OuterL = ExitLoopMap.lookup(BB))
    OuterL->addBasicBlockToLoop(BB, LI);

1531#ifndef NDEBUG1
for (auto &BBAndL : ExitLoopMap) {
  auto *BB = BBAndL.first;
  auto *OuterL = BBAndL.second;
  assert(LI.getLoopFor(BB) == OuterL &&((void)0)
         "Failed to put all blocks into outer loops!")((void)0);
}
1538#endif

// Now that all the blocks are placed into the correct containing loop in the
// absence of child loops, find all the potentially cloned child loops and
// clone them into whatever outer loop we placed their header into.
for (Loop *ChildL : OrigL) {
  auto *ClonedChildHeader =
      cast_or_null<BasicBlock>(VMap.lookup(ChildL->getHeader()));
  if (!ClonedChildHeader || BlocksInClonedLoop.count(ClonedChildHeader))
    continue;

1549#ifndef NDEBUG1
  for (auto *ChildLoopBB : ChildL->blocks())
    assert(VMap.count(ChildLoopBB) &&((void)0)
           "Cloned a child loop header but not all of that loops blocks!")((void)0);
1553#endif

  NonChildClonedLoops.push_back(cloneLoopNest(
      *ChildL, ExitLoopMap.lookup(ClonedChildHeader), VMap, LI));
}
1558}

1560static void
1561deleteDeadClonedBlocks(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
                     ArrayRef<std::unique_ptr<ValueToValueMapTy>> VMaps,
                     DominatorTree &DT, MemorySSAUpdater *MSSAU) {
// Find all the dead clones, and remove them from their successors.
SmallVector<BasicBlock *, 16> DeadBlocks;
for (BasicBlock *BB : llvm::concat<BasicBlock *const>(L.blocks(), ExitBlocks))
  for (auto &VMap : VMaps)
    if (BasicBlock *ClonedBB = cast_or_null<BasicBlock>(VMap->lookup(BB)))
      if (!DT.isReachableFromEntry(ClonedBB)) {
        for (BasicBlock *SuccBB : successors(ClonedBB))
          SuccBB->removePredecessor(ClonedBB);
        DeadBlocks.push_back(ClonedBB);
      }

// Remove all MemorySSA in the dead blocks
if (MSSAU) {
  SmallSetVector<BasicBlock *, 8> DeadBlockSet(DeadBlocks.begin(),
                                               DeadBlocks.end());
  MSSAU->removeBlocks(DeadBlockSet);
}

// Drop any remaining references to break cycles.
for (BasicBlock *BB : DeadBlocks)
  BB->dropAllReferences();
// Erase them from the IR.
for (BasicBlock *BB : DeadBlocks)
  BB->eraseFromParent();
1588}

1590static void
1591deleteDeadBlocksFromLoop(Loop &L,
                       SmallVectorImpl<BasicBlock *> &ExitBlocks,
                       DominatorTree &DT, LoopInfo &LI,
                       MemorySSAUpdater *MSSAU,
                       function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Find all the dead blocks tied to this loop, and remove them from their
// successors.
SmallSetVector<BasicBlock *, 8> DeadBlockSet;

// Start with loop/exit blocks and get a transitive closure of reachable dead
// blocks.
SmallVector<BasicBlock *, 16> DeathCandidates(ExitBlocks.begin(),
                                              ExitBlocks.end());
DeathCandidates.append(L.blocks().begin(), L.blocks().end());
while (!DeathCandidates.empty()) {
  auto *BB = DeathCandidates.pop_back_val();
  if (!DeadBlockSet.count(BB) && !DT.isReachableFromEntry(BB)) {
    for (BasicBlock *SuccBB : successors(BB)) {
      SuccBB->removePredecessor(BB);
      DeathCandidates.push_back(SuccBB);
    }
    DeadBlockSet.insert(BB);
  }
}

// Remove all MemorySSA in the dead blocks
if (MSSAU)
  MSSAU->removeBlocks(DeadBlockSet);

// Filter out the dead blocks from the exit blocks list so that it can be
// used in the caller.
llvm::erase_if(ExitBlocks,
               [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });

// Walk from this loop up through its parents removing all of the dead blocks.
for (Loop *ParentL = &L; ParentL; ParentL = ParentL->getParentLoop()) {
  for (auto *BB : DeadBlockSet)
    ParentL->getBlocksSet().erase(BB);
  llvm::erase_if(ParentL->getBlocksVector(),
                 [&](BasicBlock *BB) { return DeadBlockSet.count(BB); });
}

// Now delete the dead child loops. This raw delete will clear them
// recursively.
llvm::erase_if(L.getSubLoopsVector(), [&](Loop *ChildL) {
  if (!DeadBlockSet.count(ChildL->getHeader()))
    return false;

  assert(llvm::all_of(ChildL->blocks(),((void)0)
                      [&](BasicBlock *ChildBB) {((void)0)
                        return DeadBlockSet.count(ChildBB);((void)0)
                      }) &&((void)0)
         "If the child loop header is dead all blocks in the child loop must "((void)0)
         "be dead as well!")((void)0);
  DestroyLoopCB(*ChildL, ChildL->getName());
  LI.destroy(ChildL);
  return true;
});

// Remove the loop mappings for the dead blocks and drop all the references
// from these blocks to others to handle cyclic references as we start
// deleting the blocks themselves.
for (auto *BB : DeadBlockSet) {
  // Check that the dominator tree has already been updated.
  assert(!DT.getNode(BB) && "Should already have cleared domtree!")((void)0);
  LI.changeLoopFor(BB, nullptr);
  // Drop all uses of the instructions to make sure we won't have dangling
  // uses in other blocks.
  for (auto &I : *BB)
    if (!I.use_empty())
      I.replaceAllUsesWith(UndefValue::get(I.getType()));
  BB->dropAllReferences();
}

// Actually delete the blocks now that they've been fully unhooked from the
// IR.
for (auto *BB : DeadBlockSet)
  BB->eraseFromParent();
1669}

1671/// Recompute the set of blocks in a loop after unswitching.
1672///
1673/// This walks from the original headers predecessors to rebuild the loop. We
1674/// take advantage of the fact that new blocks can't have been added, and so we
1675/// filter by the original loop's blocks. This also handles potentially
1676/// unreachable code that we don't want to explore but might be found examining
1677/// the predecessors of the header.
1678///
1679/// If the original loop is no longer a loop, this will return an empty set. If
1680/// it remains a loop, all the blocks within it will be added to the set
1681/// (including those blocks in inner loops).
1682static SmallPtrSet<const BasicBlock *, 16> recomputeLoopBlockSet(Loop &L,
                                                               LoopInfo &LI) {
SmallPtrSet<const BasicBlock *, 16> LoopBlockSet;

auto *PH = L.getLoopPreheader();
auto *Header = L.getHeader();

// A worklist to use while walking backwards from the header.
SmallVector<BasicBlock *, 16> Worklist;

// First walk the predecessors of the header to find the backedges. This will
// form the basis of our walk.
for (auto *Pred : predecessors(Header)) {
  // Skip the preheader.
  if (Pred == PH)
    continue;

  // Because the loop was in simplified form, the only non-loop predecessor
  // is the preheader.
  assert(L.contains(Pred) && "Found a predecessor of the loop header other "((void)0)
                             "than the preheader that is not part of the "((void)0)
                             "loop!")((void)0);

  // Insert this block into the loop set and on the first visit and, if it
  // isn't the header we're currently walking, put it into the worklist to
  // recurse through.
  if (LoopBlockSet.insert(Pred).second && Pred != Header)
    Worklist.push_back(Pred);
}

// If no backedges were found, we're done.
if (LoopBlockSet.empty())
  return LoopBlockSet;

// We found backedges, recurse through them to identify the loop blocks.
while (!Worklist.empty()) {
  BasicBlock *BB = Worklist.pop_back_val();
  assert(LoopBlockSet.count(BB) && "Didn't put block into the loop set!")((void)0);

  // No need to walk past the header.
  if (BB == Header)
    continue;

  // Because we know the inner loop structure remains valid we can use the
  // loop structure to jump immediately across the entire nested loop.
  // Further, because it is in loop simplified form, we can directly jump
  // to its preheader afterward.
  if (Loop *InnerL = LI.getLoopFor(BB))
    if (InnerL != &L) {
      assert(L.contains(InnerL) &&((void)0)
             "Should not reach a loop *outside* this loop!")((void)0);
      // The preheader is the only possible predecessor of the loop so
      // insert it into the set and check whether it was already handled.
      auto *InnerPH = InnerL->getLoopPreheader();
      assert(L.contains(InnerPH) && "Cannot contain an inner loop block "((void)0)
                                    "but not contain the inner loop "((void)0)
                                    "preheader!")((void)0);
      if (!LoopBlockSet.insert(InnerPH).second)
        // The only way to reach the preheader is through the loop body
        // itself so if it has been visited the loop is already handled.
        continue;

      // Insert all of the blocks (other than those already present) into
      // the loop set. We expect at least the block that led us to find the
      // inner loop to be in the block set, but we may also have other loop
      // blocks if they were already enqueued as predecessors of some other
      // outer loop block.
      for (auto *InnerBB : InnerL->blocks()) {
        if (InnerBB == BB) {
          assert(LoopBlockSet.count(InnerBB) &&((void)0)
                 "Block should already be in the set!")((void)0);
          continue;
        }

        LoopBlockSet.insert(InnerBB);
      }

      // Add the preheader to the worklist so we will continue past the
      // loop body.
      Worklist.push_back(InnerPH);
      continue;
    }

  // Insert any predecessors that were in the original loop into the new
  // set, and if the insert is successful, add them to the worklist.
  for (auto *Pred : predecessors(BB))
    if (L.contains(Pred) && LoopBlockSet.insert(Pred).second)
      Worklist.push_back(Pred);
}

assert(LoopBlockSet.count(Header) && "Cannot fail to add the header!")((void)0);

// We've found all the blocks participating in the loop, return our completed
// set.
return LoopBlockSet;
1777}

1779/// Rebuild a loop after unswitching removes some subset of blocks and edges.
1780///
1781/// The removal may have removed some child loops entirely but cannot have
1782/// disturbed any remaining child loops. However, they may need to be hoisted
1783/// to the parent loop (or to be top-level loops). The original loop may be
1784/// completely removed.
1785///
1786/// The sibling loops resulting from this update are returned. If the original
1787/// loop remains a valid loop, it will be the first entry in this list with all
1788/// of the newly sibling loops following it.
1789///
1790/// Returns true if the loop remains a loop after unswitching, and false if it
1791/// is no longer a loop after unswitching (and should not continue to be
1792/// referenced).
1793static bool rebuildLoopAfterUnswitch(Loop &L, ArrayRef<BasicBlock *> ExitBlocks,
                                   LoopInfo &LI,
                                   SmallVectorImpl<Loop *> &HoistedLoops) {
auto *PH = L.getLoopPreheader();

// Compute the actual parent loop from the exit blocks. Because we may have
// pruned some exits the loop may be different from the original parent.
Loop *ParentL = nullptr;
SmallVector<Loop *, 4> ExitLoops;
SmallVector<BasicBlock *, 4> ExitsInLoops;
ExitsInLoops.reserve(ExitBlocks.size());
for (auto *ExitBB : ExitBlocks)
  if (Loop *ExitL = LI.getLoopFor(ExitBB)) {
    ExitLoops.push_back(ExitL);
    ExitsInLoops.push_back(ExitBB);
    if (!ParentL || (ParentL != ExitL && ParentL->contains(ExitL)))
      ParentL = ExitL;
  }

// Recompute the blocks participating in this loop. This may be empty if it
// is no longer a loop.
auto LoopBlockSet = recomputeLoopBlockSet(L, LI);

// If we still have a loop, we need to re-set the loop's parent as the exit
// block set changing may have moved it within the loop nest. Note that this
// can only happen when this loop has a parent as it can only hoist the loop
// *up* the nest.
if (!LoopBlockSet.empty() && L.getParentLoop() != ParentL) {
  // Remove this loop's (original) blocks from all of the intervening loops.
  for (Loop *IL = L.getParentLoop(); IL != ParentL;
       IL = IL->getParentLoop()) {
    IL->getBlocksSet().erase(PH);
    for (auto *BB : L.blocks())
      IL->getBlocksSet().erase(BB);
    llvm::erase_if(IL->getBlocksVector(), [&](BasicBlock *BB) {
      return BB == PH || L.contains(BB);
    });
  }

  LI.changeLoopFor(PH, ParentL);
  L.getParentLoop()->removeChildLoop(&L);
  if (ParentL)
    ParentL->addChildLoop(&L);
  else
    LI.addTopLevelLoop(&L);
}

// Now we update all the blocks which are no longer within the loop.
auto &Blocks = L.getBlocksVector();
auto BlocksSplitI =
    LoopBlockSet.empty()
        ? Blocks.begin()
        : std::stable_partition(
              Blocks.begin(), Blocks.end(),
              [&](BasicBlock *BB) { return LoopBlockSet.count(BB); });

// Before we erase the list of unlooped blocks, build a set of them.
SmallPtrSet<BasicBlock *, 16> UnloopedBlocks(BlocksSplitI, Blocks.end());
if (LoopBlockSet.empty())
  UnloopedBlocks.insert(PH);

// Now erase these blocks from the loop.
for (auto *BB : make_range(BlocksSplitI, Blocks.end()))
  L.getBlocksSet().erase(BB);
Blocks.erase(BlocksSplitI, Blocks.end());

// Sort the exits in ascending loop depth, we'll work backwards across these
// to process them inside out.
llvm::stable_sort(ExitsInLoops, [&](BasicBlock *LHS, BasicBlock *RHS) {
  return LI.getLoopDepth(LHS) < LI.getLoopDepth(RHS);
});

// We'll build up a set for each exit loop.
SmallPtrSet<BasicBlock *, 16> NewExitLoopBlocks;
Loop *PrevExitL = L.getParentLoop(); // The deepest possible exit loop.

auto RemoveUnloopedBlocksFromLoop =
    [](Loop &L, SmallPtrSetImpl<BasicBlock *> &UnloopedBlocks) {
      for (auto *BB : UnloopedBlocks)
        L.getBlocksSet().erase(BB);
      llvm::erase_if(L.getBlocksVector(), [&](BasicBlock *BB) {
        return UnloopedBlocks.count(BB);
      });
    };

SmallVector<BasicBlock *, 16> Worklist;
while (!UnloopedBlocks.empty() && !ExitsInLoops.empty()) {
  assert(Worklist.empty() && "Didn't clear worklist!")((void)0);
  assert(NewExitLoopBlocks.empty() && "Didn't clear loop set!")((void)0);

  // Grab the next exit block, in decreasing loop depth order.
  BasicBlock *ExitBB = ExitsInLoops.pop_back_val();
  Loop &ExitL = *LI.getLoopFor(ExitBB);
  assert(ExitL.contains(&L) && "Exit loop must contain the inner loop!")((void)0);

  // Erase all of the unlooped blocks from the loops between the previous
  // exit loop and this exit loop. This works because the ExitInLoops list is
  // sorted in increasing order of loop depth and thus we visit loops in
  // decreasing order of loop depth.
  for (; PrevExitL != &ExitL; PrevExitL = PrevExitL->getParentLoop())
    RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);

  // Walk the CFG back until we hit the cloned PH adding everything reachable
  // and in the unlooped set to this exit block's loop.
  Worklist.push_back(ExitBB);
  do {
    BasicBlock *BB = Worklist.pop_back_val();
    // We can stop recursing at the cloned preheader (if we get there).
    if (BB == PH)
      continue;

    for (BasicBlock *PredBB : predecessors(BB)) {
      // If this pred has already been moved to our set or is part of some
      // (inner) loop, no update needed.
      if (!UnloopedBlocks.erase(PredBB)) {
        assert((NewExitLoopBlocks.count(PredBB) ||((void)0)
                ExitL.contains(LI.getLoopFor(PredBB))) &&((void)0)
               "Predecessor not in a nested loop (or already visited)!")((void)0);
        continue;
      }

      // We just insert into the loop set here. We'll add these blocks to the
      // exit loop after we build up the set in a deterministic order rather
      // than the predecessor-influenced visit order.
      bool Inserted = NewExitLoopBlocks.insert(PredBB).second;
      (void)Inserted;
      assert(Inserted && "Should only visit an unlooped block once!")((void)0);

      // And recurse through to its predecessors.
      Worklist.push_back(PredBB);
    }
  } while (!Worklist.empty());

  // If blocks in this exit loop were directly part of the original loop (as
  // opposed to a child loop) update the map to point to this exit loop. This
  // just updates a map and so the fact that the order is unstable is fine.
  for (auto *BB : NewExitLoopBlocks)
    if (Loop *BBL = LI.getLoopFor(BB))
      if (BBL == &L || !L.contains(BBL))
        LI.changeLoopFor(BB, &ExitL);

  // We will remove the remaining unlooped blocks from this loop in the next
  // iteration or below.
  NewExitLoopBlocks.clear();
}

// Any remaining unlooped blocks are no longer part of any loop unless they
// are part of some child loop.
for (; PrevExitL; PrevExitL = PrevExitL->getParentLoop())
  RemoveUnloopedBlocksFromLoop(*PrevExitL, UnloopedBlocks);
for (auto *BB : UnloopedBlocks)
  if (Loop *BBL = LI.getLoopFor(BB))
    if (BBL == &L || !L.contains(BBL))
      LI.changeLoopFor(BB, nullptr);

// Sink all the child loops whose headers are no longer in the loop set to
// the parent (or to be top level loops). We reach into the loop and directly
// update its subloop vector to make this batch update efficient.
auto &SubLoops = L.getSubLoopsVector();
auto SubLoopsSplitI =
    LoopBlockSet.empty()
        ? SubLoops.begin()
        : std::stable_partition(
              SubLoops.begin(), SubLoops.end(), [&](Loop *SubL) {
                return LoopBlockSet.count(SubL->getHeader());
              });
for (auto *HoistedL : make_range(SubLoopsSplitI, SubLoops.end())) {
  HoistedLoops.push_back(HoistedL);
  HoistedL->setParentLoop(nullptr);

  // To compute the new parent of this hoisted loop we look at where we
  // placed the preheader above. We can't lookup the header itself because we
  // retained the mapping from the header to the hoisted loop. But the
  // preheader and header should have the exact same new parent computed
  // based on the set of exit blocks from the original loop as the preheader
  // is a predecessor of the header and so reached in the reverse walk. And
  // because the loops were all in simplified form the preheader of the
  // hoisted loop can't be part of some *other* loop.
  if (auto *NewParentL = LI.getLoopFor(HoistedL->getLoopPreheader()))
    NewParentL->addChildLoop(HoistedL);
  else
    LI.addTopLevelLoop(HoistedL);
}
SubLoops.erase(SubLoopsSplitI, SubLoops.end());

// Actually delete the loop if nothing remained within it.
if (Blocks.empty()) {
  assert(SubLoops.empty() &&((void)0)
         "Failed to remove all subloops from the original loop!")((void)0);
  if (Loop *ParentL = L.getParentLoop())
    ParentL->removeChildLoop(llvm::find(*ParentL, &L));
  else
    LI.removeLoop(llvm::find(LI, &L));
  // markLoopAsDeleted for L should be triggered by the caller (it is typically
  // done by using the UnswitchCB callback).
  LI.destroy(&L);
  return false;
}

return true;
1993}

1995/// Helper to visit a dominator subtree, invoking a callable on each node.
1996///
1997/// Returning false at any point will stop walking past that node of the tree.
1998template <typename CallableT>
1999void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
SmallVector<DomTreeNode *, 4> DomWorklist;
DomWorklist.push_back(DT[BB]);
2002#ifndef NDEBUG1
SmallPtrSet<DomTreeNode *, 4> Visited;
Visited.insert(DT[BB]);
2005#endif
do {
  DomTreeNode *N = DomWorklist.pop_back_val();

  // Visit this node.
  if (!Callable(N->getBlock()))
    continue;

  // Accumulate the child nodes.
  for (DomTreeNode *ChildN : *N) {
    assert(Visited.insert(ChildN).second &&((void)0)
           "Cannot visit a node twice when walking a tree!")((void)0);
    DomWorklist.push_back(ChildN);
  }
} while (!DomWorklist.empty());
2020}

2022static void unswitchNontrivialInvariants(
  Loop &L, Instruction &TI, ArrayRef<Value *> Invariants,
  SmallVectorImpl<BasicBlock *> &ExitBlocks, IVConditionInfo &PartialIVInfo,
  DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
auto *ParentBB = TI.getParent();
BranchInst *BI = dyn_cast<BranchInst>(&TI);
SwitchInst *SI = BI ? nullptr : cast<SwitchInst>(&TI);

// We can only unswitch switches, conditional branches with an invariant
// condition, or combining invariant conditions with an instruction or
// partially invariant instructions.
assert((SI || (BI && BI->isConditional())) &&((void)0)
       "Can only unswitch switches and conditional branch!")((void)0);
bool PartiallyInvariant = !PartialIVInfo.InstToDuplicate.empty();
bool FullUnswitch =
    SI || (BI->getCondition() == Invariants[0] && !PartiallyInvariant);
if (FullUnswitch)
  assert(Invariants.size() == 1 &&((void)0)
         "Cannot have other invariants with full unswitching!")((void)0);
else
  assert(isa<Instruction>(BI->getCondition()) &&((void)0)
         "Partial unswitching requires an instruction as the condition!")((void)0);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

// Constant and BBs tracking the cloned and continuing successor. When we are
// unswitching the entire condition, this can just be trivially chosen to
// unswitch towards `true`. However, when we are unswitching a set of
// invariants combined with `and` or `or` or partially invariant instructions,
// the combining operation determines the best direction to unswitch: we want
// to unswitch the direction that will collapse the branch.
bool Direction = true;
int ClonedSucc = 0;
if (!FullUnswitch) {
  Value *Cond = BI->getCondition();
  (void)Cond;
  assert(((match(Cond, m_LogicalAnd()) ^ match(Cond, m_LogicalOr())) ||((void)0)
          PartiallyInvariant) &&((void)0)
         "Only `or`, `and`, an `select`, partially invariant instructions "((void)0)
         "can combine invariants being unswitched.")((void)0);
  if (!match(BI->getCondition(), m_LogicalOr())) {
    if (match(BI->getCondition(), m_LogicalAnd()) ||
        (PartiallyInvariant && !PartialIVInfo.KnownValue->isOneValue())) {
      Direction = false;
      ClonedSucc = 1;
    }
  }
}

BasicBlock *RetainedSuccBB =
    BI ? BI->getSuccessor(1 - ClonedSucc) : SI->getDefaultDest();
SmallSetVector<BasicBlock *, 4> UnswitchedSuccBBs;
if (BI)
  UnswitchedSuccBBs.insert(BI->getSuccessor(ClonedSucc));
else
  for (auto Case : SI->cases())
    if (Case.getCaseSuccessor() != RetainedSuccBB)
      UnswitchedSuccBBs.insert(Case.getCaseSuccessor());

assert(!UnswitchedSuccBBs.count(RetainedSuccBB) &&((void)0)
       "Should not unswitch the same successor we are retaining!")((void)0);

// The branch should be in this exact loop. Any inner loop's invariant branch
// should be handled by unswitching that inner loop. The caller of this
// routine should filter out any candidates that remain (but were skipped for
// whatever reason).
assert(LI.getLoopFor(ParentBB) == &L && "Branch in an inner loop!")((void)0);

// Compute the parent loop now before we start hacking on things.
Loop *ParentL = L.getParentLoop();
// Get blocks in RPO order for MSSA update, before changing the CFG.
LoopBlocksRPO LBRPO(&L);
if (MSSAU)
  LBRPO.perform(&LI);

// Compute the outer-most loop containing one of our exit blocks. This is the
// furthest up our loopnest which can be mutated, which we will use below to
// update things.
Loop *OuterExitL = &L;
for (auto *ExitBB : ExitBlocks) {
  Loop *NewOuterExitL = LI.getLoopFor(ExitBB);
  if (!NewOuterExitL) {
    // We exited the entire nest with this block, so we're done.
    OuterExitL = nullptr;
    break;
  }
  if (NewOuterExitL != OuterExitL && NewOuterExitL->contains(OuterExitL))
    OuterExitL = NewOuterExitL;
}

// At this point, we're definitely going to unswitch something so invalidate
// any cached information in ScalarEvolution for the outer most loop
// containing an exit block and all nested loops.
if (SE) {
  if (OuterExitL)
    SE->forgetLoop(OuterExitL);
  else
    SE->forgetTopmostLoop(&L);
}

// If the edge from this terminator to a successor dominates that successor,
// store a map from each block in its dominator subtree to it. This lets us
// tell when cloning for a particular successor if a block is dominated by
// some *other* successor with a single data structure. We use this to
// significantly reduce cloning.
SmallDenseMap<BasicBlock *, BasicBlock *, 16> DominatingSucc;
for (auto *SuccBB : llvm::concat<BasicBlock *const>(
         makeArrayRef(RetainedSuccBB), UnswitchedSuccBBs))
  if (SuccBB->getUniquePredecessor() ||
      llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
        return PredBB == ParentBB || DT.dominates(SuccBB, PredBB);
      }))
    visitDomSubTree(DT, SuccBB, [&](BasicBlock *BB) {
      DominatingSucc[BB] = SuccBB;
      return true;
    });

// Split the preheader, so that we know that there is a safe place to insert
// the conditional branch. We will change the preheader to have a conditional
// branch on LoopCond. The original preheader will become the split point
// between the unswitched versions, and we will have a new preheader for the
// original loop.
BasicBlock *SplitBB = L.getLoopPreheader();
BasicBlock *LoopPH = SplitEdge(SplitBB, L.getHeader(), &DT, &LI, MSSAU);

// Keep track of the dominator tree updates needed.
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;

// Clone the loop for each unswitched successor.
SmallVector<std::unique_ptr<ValueToValueMapTy>, 4> VMaps;
VMaps.reserve(UnswitchedSuccBBs.size());
SmallDenseMap<BasicBlock *, BasicBlock *, 4> ClonedPHs;
for (auto *SuccBB : UnswitchedSuccBBs) {
  VMaps.emplace_back(new ValueToValueMapTy());
  ClonedPHs[SuccBB] = buildClonedLoopBlocks(
      L, LoopPH, SplitBB, ExitBlocks, ParentBB, SuccBB, RetainedSuccBB,
      DominatingSucc, *VMaps.back(), DTUpdates, AC, DT, LI, MSSAU);
}

// Drop metadata if we may break its semantics by moving this instr into the
// split block.
if (TI.getMetadata(LLVMContext::MD_make_implicit)) {
  if (DropNonTrivialImplicitNullChecks)
    // Do not spend time trying to understand if we can keep it, just drop it
    // to save compile time.
    TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
  else {
    // It is only legal to preserve make.implicit metadata if we are
    // guaranteed no reach implicit null check after following this branch.
    ICFLoopSafetyInfo SafetyInfo;
    SafetyInfo.computeLoopSafetyInfo(&L);
    if (!SafetyInfo.isGuaranteedToExecute(TI, &DT, &L))
      TI.setMetadata(LLVMContext::MD_make_implicit, nullptr);
  }
}

// The stitching of the branched code back together depends on whether we're
// doing full unswitching or not with the exception that we always want to
// nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
if (FullUnswitch) {
  // Splice the terminator from the original loop and rewrite its
  // successors.
  SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), TI);

  // Keep a clone of the terminator for MSSA updates.
  Instruction *NewTI = TI.clone();
  ParentBB->getInstList().push_back(NewTI);

  // First wire up the moved terminator to the preheaders.
  if (BI) {
    BasicBlock *ClonedPH = ClonedPHs.begin()->second;
    BI->setSuccessor(ClonedSucc, ClonedPH);
    BI->setSuccessor(1 - ClonedSucc, LoopPH);
    DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
  } else {
    assert(SI && "Must either be a branch or switch!")((void)0);

    // Walk the cases and directly update their successors.
    assert(SI->getDefaultDest() == RetainedSuccBB &&((void)0)
           "Not retaining default successor!")((void)0);
    SI->setDefaultDest(LoopPH);
    for (auto &Case : SI->cases())
      if (Case.getCaseSuccessor() == RetainedSuccBB)
        Case.setSuccessor(LoopPH);
      else
        Case.setSuccessor(ClonedPHs.find(Case.getCaseSuccessor())->second);

    // We need to use the set to populate domtree updates as even when there
    // are multiple cases pointing at the same successor we only want to
    // remove and insert one edge in the domtree.
    for (BasicBlock *SuccBB : UnswitchedSuccBBs)
      DTUpdates.push_back(
          {DominatorTree::Insert, SplitBB, ClonedPHs.find(SuccBB)->second});
  }

  if (MSSAU) {
    DT.applyUpdates(DTUpdates);
    DTUpdates.clear();

    // Remove all but one edge to the retained block and all unswitched
    // blocks. This is to avoid having duplicate entries in the cloned Phis,
    // when we know we only keep a single edge for each case.
    MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, RetainedSuccBB);
    for (BasicBlock *SuccBB : UnswitchedSuccBBs)
      MSSAU->removeDuplicatePhiEdgesBetween(ParentBB, SuccBB);

    for (auto &VMap : VMaps)
      MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
                                 /*IgnoreIncomingWithNoClones=*/true);
    MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);

    // Remove all edges to unswitched blocks.
    for (BasicBlock *SuccBB : UnswitchedSuccBBs)
      MSSAU->removeEdge(ParentBB, SuccBB);
  }

  // Now unhook the successor relationship as we'll be replacing
  // the terminator with a direct branch. This is much simpler for branches
  // than switches so we handle those first.
  if (BI) {
    // Remove the parent as a predecessor of the unswitched successor.
    assert(UnswitchedSuccBBs.size() == 1 &&((void)0)
           "Only one possible unswitched block for a branch!")((void)0);
    BasicBlock *UnswitchedSuccBB = *UnswitchedSuccBBs.begin();
    UnswitchedSuccBB->removePredecessor(ParentBB,
                                        /*KeepOneInputPHIs*/ true);
    DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
  } else {
    // Note that we actually want to remove the parent block as a predecessor
    // of *every* case successor. The case successor is either unswitched,
    // completely eliminating an edge from the parent to that successor, or it
    // is a duplicate edge to the retained successor as the retained successor
    // is always the default successor and as we'll replace this with a direct
    // branch we no longer need the duplicate entries in the PHI nodes.
    SwitchInst *NewSI = cast<SwitchInst>(NewTI);
    assert(NewSI->getDefaultDest() == RetainedSuccBB &&((void)0)
           "Not retaining default successor!")((void)0);
    for (auto &Case : NewSI->cases())
      Case.getCaseSuccessor()->removePredecessor(
          ParentBB,
          /*KeepOneInputPHIs*/ true);

    // We need to use the set to populate domtree updates as even when there
    // are multiple cases pointing at the same successor we only want to
    // remove and insert one edge in the domtree.
    for (BasicBlock *SuccBB : UnswitchedSuccBBs)
      DTUpdates.push_back({DominatorTree::Delete, ParentBB, SuccBB});
  }

  // After MSSAU update, remove the cloned terminator instruction NewTI.
  ParentBB->getTerminator()->eraseFromParent();

  // Create a new unconditional branch to the continuing block (as opposed to
  // the one cloned).
  BranchInst::Create(RetainedSuccBB, ParentBB);
} else {
  assert(BI && "Only branches have partial unswitching.")((void)0);
  assert(UnswitchedSuccBBs.size() == 1 &&((void)0)
         "Only one possible unswitched block for a branch!")((void)0);
  BasicBlock *ClonedPH = ClonedPHs.begin()->second;
  // When doing a partial unswitch, we have to do a bit more work to build up
  // the branch in the split block.
  if (PartiallyInvariant)
    buildPartialInvariantUnswitchConditionalBranch(
        *SplitBB, Invariants, Direction, *ClonedPH, *LoopPH, L, MSSAU);
  else
    buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
                                          *ClonedPH, *LoopPH);
  DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});

  if (MSSAU) {
    DT.applyUpdates(DTUpdates);
    DTUpdates.clear();

    // Perform MSSA cloning updates.
    for (auto &VMap : VMaps)
      MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, *VMap,
                                 /*IgnoreIncomingWithNoClones=*/true);
    MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMaps, DT);
  }
}

// Apply the updates accumulated above to get an up-to-date dominator tree.
DT.applyUpdates(DTUpdates);

// Now that we have an accurate dominator tree, first delete the dead cloned
// blocks so that we can accurately build any cloned loops. It is important to
// not delete the blocks from the original loop yet because we still want to
// reference the original loop to understand the cloned loop's structure.
deleteDeadClonedBlocks(L, ExitBlocks, VMaps, DT, MSSAU);

// Build the cloned loop structure itself. This may be substantially
// different from the original structure due to the simplified CFG. This also
// handles inserting all the cloned blocks into the correct loops.
SmallVector<Loop *, 4> NonChildClonedLoops;
for (std::unique_ptr<ValueToValueMapTy> &VMap : VMaps)
  buildClonedLoops(L, ExitBlocks, *VMap, LI, NonChildClonedLoops);

// Now that our cloned loops have been built, we can update the original loop.
// First we delete the dead blocks from it and then we rebuild the loop
// structure taking these deletions into account.
deleteDeadBlocksFromLoop(L, ExitBlocks, DT, LI, MSSAU, DestroyLoopCB);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

SmallVector<Loop *, 4> HoistedLoops;
bool IsStillLoop = rebuildLoopAfterUnswitch(L, ExitBlocks, LI, HoistedLoops);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

// This transformation has a high risk of corrupting the dominator tree, and
// the below steps to rebuild loop structures will result in hard to debug
// errors in that case so verify that the dominator tree is sane first.
// FIXME: Remove this when the bugs stop showing up and rely on existing
// verification steps.
assert(DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);

if (BI && !PartiallyInvariant) {
  // If we unswitched a branch which collapses the condition to a known
  // constant we want to replace all the uses of the invariants within both
  // the original and cloned blocks. We do this here so that we can use the
  // now updated dominator tree to identify which side the users are on.
  assert(UnswitchedSuccBBs.size() == 1 &&((void)0)
         "Only one possible unswitched block for a branch!")((void)0);
  BasicBlock *ClonedPH = ClonedPHs.begin()->second;

  // When considering multiple partially-unswitched invariants
  // we cant just go replace them with constants in both branches.
  //
  // For 'AND' we infer that true branch ("continue") means true
  // for each invariant operand.
  // For 'OR' we can infer that false branch ("continue") means false
  // for each invariant operand.
  // So it happens that for multiple-partial case we dont replace
  // in the unswitched branch.
  bool ReplaceUnswitched =
      FullUnswitch || (Invariants.size() == 1) || PartiallyInvariant;

  ConstantInt *UnswitchedReplacement =
      Direction ? ConstantInt::getTrue(BI->getContext())
                : ConstantInt::getFalse(BI->getContext());
  ConstantInt *ContinueReplacement =
      Direction ? ConstantInt::getFalse(BI->getContext())
                : ConstantInt::getTrue(BI->getContext());
  for (Value *Invariant : Invariants)
    // Use make_early_inc_range here as set invalidates the iterator.
    for (Use &U : llvm::make_early_inc_range(Invariant->uses())) {
      Instruction *UserI = dyn_cast<Instruction>(U.getUser());
      if (!UserI)
        continue;

      // Replace it with the 'continue' side if in the main loop body, and the
      // unswitched if in the cloned blocks.
      if (DT.dominates(LoopPH, UserI->getParent()))
        U.set(ContinueReplacement);
      else if (ReplaceUnswitched &&
               DT.dominates(ClonedPH, UserI->getParent()))
        U.set(UnswitchedReplacement);
    }
}

// We can change which blocks are exit blocks of all the cloned sibling
// loops, the current loop, and any parent loops which shared exit blocks
// with the current loop. As a consequence, we need to re-form LCSSA for
// them. But we shouldn't need to re-form LCSSA for any child loops.
// FIXME: This could be made more efficient by tracking which exit blocks are
// new, and focusing on them, but that isn't likely to be necessary.
//
// In order to reasonably rebuild LCSSA we need to walk inside-out across the
// loop nest and update every loop that could have had its exits changed. We
// also need to cover any intervening loops. We add all of these loops to
// a list and sort them by loop depth to achieve this without updating
// unnecessary loops.
auto UpdateLoop = [&](Loop &UpdateL) {
2403#ifndef NDEBUG1
  UpdateL.verifyLoop();
  for (Loop *ChildL : UpdateL) {
    ChildL->verifyLoop();
    assert(ChildL->isRecursivelyLCSSAForm(DT, LI) &&((void)0)
           "Perturbed a child loop's LCSSA form!")((void)0);
  }
2410#endif
  // First build LCSSA for this loop so that we can preserve it when
  // forming dedicated exits. We don't want to perturb some other loop's
  // LCSSA while doing that CFG edit.
  formLCSSA(UpdateL, DT, &LI, SE);

  // For loops reached by this loop's original exit blocks we may
  // introduced new, non-dedicated exits. At least try to re-form dedicated
  // exits for these loops. This may fail if they couldn't have dedicated
  // exits to start with.
  formDedicatedExitBlocks(&UpdateL, &DT, &LI, MSSAU, /*PreserveLCSSA*/ true);
};

// For non-child cloned loops and hoisted loops, we just need to update LCSSA
// and we can do it in any order as they don't nest relative to each other.
//
// Also check if any of the loops we have updated have become top-level loops
// as that will necessitate widening the outer loop scope.
for (Loop *UpdatedL :
     llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops)) {
  UpdateLoop(*UpdatedL);
  if (UpdatedL->isOutermost())
    OuterExitL = nullptr;
}
if (IsStillLoop) {
  UpdateLoop(L);
  if (L.isOutermost())
    OuterExitL = nullptr;
}

// If the original loop had exit blocks, walk up through the outer most loop
// of those exit blocks to update LCSSA and form updated dedicated exits.
if (OuterExitL != &L)
  for (Loop *OuterL = ParentL; OuterL != OuterExitL;
       OuterL = OuterL->getParentLoop())
    UpdateLoop(*OuterL);

2447#ifndef NDEBUG1
// Verify the entire loop structure to catch any incorrect updates before we
// progress in the pass pipeline.
LI.verify(DT);
2451#endif

// Now that we've unswitched something, make callbacks to report the changes.
// For that we need to merge together the updated loops and the cloned loops
// and check whether the original loop survived.
SmallVector<Loop *, 4> SibLoops;
for (Loop *UpdatedL : llvm::concat<Loop *>(NonChildClonedLoops, HoistedLoops))
  if (UpdatedL->getParentLoop() == ParentL)
    SibLoops.push_back(UpdatedL);
UnswitchCB(IsStillLoop, PartiallyInvariant, SibLoops);

if (MSSAU && VerifyMemorySSA)
  MSSAU->getMemorySSA()->verifyMemorySSA();

if (BI)
  ++NumBranches;
else
  ++NumSwitches;
2469}

2471/// Recursively compute the cost of a dominator subtree based on the per-block
2472/// cost map provided.
2473///
2474/// The recursive computation is memozied into the provided DT-indexed cost map
2475/// to allow querying it for most nodes in the domtree without it becoming
2476/// quadratic.
2477static InstructionCost computeDomSubtreeCost(
  DomTreeNode &N,
  const SmallDenseMap<BasicBlock *, InstructionCost, 4> &BBCostMap,
  SmallDenseMap<DomTreeNode *, InstructionCost, 4> &DTCostMap) {
// Don't accumulate cost (or recurse through) blocks not in our block cost
// map and thus not part of the duplication cost being considered.
auto BBCostIt = BBCostMap.find(N.getBlock());
if (BBCostIt == BBCostMap.end())
  return 0;

// Lookup this node to see if we already computed its cost.
auto DTCostIt = DTCostMap.find(&N);
if (DTCostIt != DTCostMap.end())
  return DTCostIt->second;

// If not, we have to compute it. We can't use insert above and update
// because computing the cost may insert more things into the map.
InstructionCost Cost = std::accumulate(
    N.begin(), N.end(), BBCostIt->second,
    [&](InstructionCost Sum, DomTreeNode *ChildN) -> InstructionCost {
      return Sum + computeDomSubtreeCost(*ChildN, BBCostMap, DTCostMap);
    });
bool Inserted = DTCostMap.insert({&N, Cost}).second;
(void)Inserted;
assert(Inserted && "Should not insert a node while visiting children!")((void)0);
return Cost;
2503}

2505/// Turns a llvm.experimental.guard intrinsic into implicit control flow branch,
2506/// making the following replacement:
2507///
2508///   --code before guard--
2509///   call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
2510///   --code after guard--
2511///
2512/// into
2513///
2514///   --code before guard--
2515///   br i1 %cond, label %guarded, label %deopt
2516///
2517/// guarded:
2518///   --code after guard--
2519///
2520/// deopt:
2521///   call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
2522///   unreachable
2523///
2524/// It also makes all relevant DT and LI updates, so that all structures are in
2525/// valid state after this transform.
2526static BranchInst *
2527turnGuardIntoBranch(IntrinsicInst *GI, Loop &L,
                  SmallVectorImpl<BasicBlock *> &ExitBlocks,
                  DominatorTree &DT, LoopInfo &LI, MemorySSAUpdater *MSSAU) {
SmallVector<DominatorTree::UpdateType, 4> DTUpdates;
LLVM_DEBUG(dbgs() << "Turning " << *GI << " into a branch.\n")do { } while (false);
BasicBlock *CheckBB = GI->getParent();

if (MSSAU && VerifyMemorySSA)
   MSSAU->getMemorySSA()->verifyMemorySSA();

// Remove all CheckBB's successors from DomTree. A block can be seen among
// successors more than once, but for DomTree it should be added only once.
SmallPtrSet<BasicBlock *, 4> Successors;
for (auto *Succ : successors(CheckBB))
  if (Successors.insert(Succ).second)
    DTUpdates.push_back({DominatorTree::Delete, CheckBB, Succ});

Instruction *DeoptBlockTerm =
    SplitBlockAndInsertIfThen(GI->getArgOperand(0), GI, true);
BranchInst *CheckBI = cast<BranchInst>(CheckBB->getTerminator());
// SplitBlockAndInsertIfThen inserts control flow that branches to
// DeoptBlockTerm if the condition is true.  We want the opposite.
CheckBI->swapSuccessors();

BasicBlock *GuardedBlock = CheckBI->getSuccessor(0);
GuardedBlock->setName("guarded");
CheckBI->getSuccessor(1)->setName("deopt");
BasicBlock *DeoptBlock = CheckBI->getSuccessor(1);

// We now have a new exit block.
ExitBlocks.push_back(CheckBI->getSuccessor(1));

if (MSSAU)
  MSSAU->moveAllAfterSpliceBlocks(CheckBB, GuardedBlock, GI);

GI->moveBefore(DeoptBlockTerm);
GI->setArgOperand(0, ConstantInt::getFalse(GI->getContext()));

// Add new successors of CheckBB into DomTree.
for (auto *Succ : successors(CheckBB))
  DTUpdates.push_back({DominatorTree::Insert, CheckBB, Succ});

// Now the blocks that used to be CheckBB's successors are GuardedBlock's
// successors.
for (auto *Succ : Successors)
  DTUpdates.push_back({DominatorTree::Insert, GuardedBlock, Succ});

// Make proper changes to DT.
DT.applyUpdates(DTUpdates);
// Inform LI of a new loop block.
L.addBasicBlockToLoop(GuardedBlock, LI);

if (MSSAU) {
  MemoryDef *MD = cast<MemoryDef>(MSSAU->getMemorySSA()->getMemoryAccess(GI));
  MSSAU->moveToPlace(MD, DeoptBlock, MemorySSA::BeforeTerminator);
  if (VerifyMemorySSA)
    MSSAU->getMemorySSA()->verifyMemorySSA();
}

++NumGuards;
return CheckBI;
2588}

2590/// Cost multiplier is a way to limit potentially exponential behavior
2591/// of loop-unswitch. Cost is multipied in proportion of 2^number of unswitch
2592/// candidates available. Also accounting for the number of "sibling" loops with
2593/// the idea to account for previous unswitches that already happened on this
2594/// cluster of loops. There was an attempt to keep this formula simple,
2595/// just enough to limit the worst case behavior. Even if it is not that simple
2596/// now it is still not an attempt to provide a detailed heuristic size
2597/// prediction.
2598///
2599/// TODO: Make a proper accounting of "explosion" effect for all kinds of
2600/// unswitch candidates, making adequate predictions instead of wild guesses.
2601/// That requires knowing not just the number of "remaining" candidates but
2602/// also costs of unswitching for each of these candidates.
2603static int CalculateUnswitchCostMultiplier(
  Instruction &TI, Loop &L, LoopInfo &LI, DominatorTree &DT,
  ArrayRef<std::pair<Instruction *, TinyPtrVector<Value *>>>
      UnswitchCandidates) {

// Guards and other exiting conditions do not contribute to exponential
// explosion as soon as they dominate the latch (otherwise there might be
// another path to the latch remaining that does not allow to eliminate the
// loop copy on unswitch).
BasicBlock *Latch = L.getLoopLatch();
BasicBlock *CondBlock = TI.getParent();
if (DT.dominates(CondBlock, Latch) &&
    (isGuard(&TI) ||
     llvm::count_if(successors(&TI), [&L](BasicBlock *SuccBB) {
       return L.contains(SuccBB);
     }) <= 1)) {
  NumCostMultiplierSkipped++;
  return 1;
}

auto *ParentL = L.getParentLoop();
int SiblingsCount = (ParentL ? ParentL->getSubLoopsVector().size()
                             : std::distance(LI.begin(), LI.end()));
// Count amount of clones that all the candidates might cause during
// unswitching. Branch/guard counts as 1, switch counts as log2 of its cases.
int UnswitchedClones = 0;
for (auto Candidate : UnswitchCandidates) {
  Instruction *CI = Candidate.first;
  BasicBlock *CondBlock = CI->getParent();
  bool SkipExitingSuccessors = DT.dominates(CondBlock, Latch);
  if (isGuard(CI)) {
    if (!SkipExitingSuccessors)
      UnswitchedClones++;
    continue;
  }
  int NonExitingSuccessors = llvm::count_if(
      successors(CondBlock), [SkipExitingSuccessors, &L](BasicBlock *SuccBB) {
        return !SkipExitingSuccessors || L.contains(SuccBB);
      });
  UnswitchedClones += Log2_32(NonExitingSuccessors);
}

// Ignore up to the "unscaled candidates" number of unswitch candidates
// when calculating the power-of-two scaling of the cost. The main idea
// with this control is to allow a small number of unswitches to happen
// and rely more on siblings multiplier (see below) when the number
// of candidates is small.
unsigned ClonesPower =
    std::max(UnswitchedClones - (int)UnswitchNumInitialUnscaledCandidates, 0);

// Allowing top-level loops to spread a bit more than nested ones.
int SiblingsMultiplier =
    std::max((ParentL ? SiblingsCount
                      : SiblingsCount / (int)UnswitchSiblingsToplevelDiv),
             1);
// Compute the cost multiplier in a way that won't overflow by saturating
// at an upper bound.
int CostMultiplier;
if (ClonesPower > Log2_32(UnswitchThreshold) ||
    SiblingsMultiplier > UnswitchThreshold)
  CostMultiplier = UnswitchThreshold;
else
  CostMultiplier = std::min(SiblingsMultiplier * (1 << ClonesPower),
                            (int)UnswitchThreshold);

LLVM_DEBUG(dbgs() << "  Computed multiplier  " << CostMultiplierdo { } while (false)
                  << " (siblings " << SiblingsMultiplier << " * clones "do { } while (false)
                  << (1 << ClonesPower) << ")"do { } while (false)
                  << " for unswitch candidate: " << TI << "\n")do { } while (false);
return CostMultiplier;
2673}

2675static bool unswitchBestCondition(
  Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
  AAResults &AA, TargetTransformInfo &TTI,
  function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
  ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
  function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
SmallVector<std::pair<Instruction *, TinyPtrVector<Value *>>, 4>
    UnswitchCandidates;

// Whether or not we should also collect guards in the loop.
bool CollectGuards = false;
if (UnswitchGuards) {
1
Assuming the condition is false→
2
←
Taking false branch→
  auto *GuardDecl = L.getHeader()->getParent()->getParent()->getFunction(
      Intrinsic::getName(Intrinsic::experimental_guard));
  if (GuardDecl && !GuardDecl->use_empty())
    CollectGuards = true;
}

IVConditionInfo PartialIVInfo;
3
←
Calling implicit default constructor for 'IVConditionInfo'→
5
←
Returning from default constructor for 'IVConditionInfo'→
for (auto *BB : L.blocks()) {
6
←
Assuming '__begin1' is equal to '__end1'→
  if (LI.getLoopFor(BB) != &L)
    continue;

  if (CollectGuards)
    for (auto &I : *BB)
      if (isGuard(&I)) {
        auto *Cond = cast<IntrinsicInst>(&I)->getArgOperand(0);
        // TODO: Support AND, OR conditions and partial unswitching.
        if (!isa<Constant>(Cond) && L.isLoopInvariant(Cond))
          UnswitchCandidates.push_back({&I, {Cond}});
      }

  if (auto *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
    // We can only consider fully loop-invariant switch conditions as we need
    // to completely eliminate the switch after unswitching.
    if (!isa<Constant>(SI->getCondition()) &&
        L.isLoopInvariant(SI->getCondition()) && !BB->getUniqueSuccessor())
      UnswitchCandidates.push_back({SI, {SI->getCondition()}});
    continue;
  }

  auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
  if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
      BI->getSuccessor(0) == BI->getSuccessor(1))
    continue;

  // If BI's condition is 'select _, true, false', simplify it to confuse
  // matchers
  Value *Cond = BI->getCondition(), *CondNext;
  while (match(Cond, m_Select(m_Value(CondNext), m_One(), m_Zero())))
    Cond = CondNext;
  BI->setCondition(Cond);

  if (L.isLoopInvariant(BI->getCondition())) {
    UnswitchCandidates.push_back({BI, {BI->getCondition()}});
    continue;
  }

  Instruction &CondI = *cast<Instruction>(BI->getCondition());
  if (match(&CondI, m_CombineOr(m_LogicalAnd(), m_LogicalOr()))) {
    TinyPtrVector<Value *> Invariants =
        collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
    if (Invariants.empty())
      continue;

    UnswitchCandidates.push_back({BI, std::move(Invariants)});
    continue;
  }
}

Instruction *PartialIVCondBranch = nullptr;
if (MSSAU && !findOptionMDForLoop(&L, "llvm.loop.unswitch.partial.disable") &&
7
←
Assuming 'MSSAU' is null→
8
←
Taking false branch→
    !any_of(UnswitchCandidates, [&L](auto &TerminatorAndInvariants) {
      return TerminatorAndInvariants.first == L.getHeader()->getTerminator();
    })) {
  MemorySSA *MSSA = MSSAU->getMemorySSA();
  if (auto Info = hasPartialIVCondition(L, MSSAThreshold, *MSSA, AA)) {
    LLVM_DEBUG(do { } while (false)
        dbgs() << "simple-loop-unswitch: Found partially invariant condition "do { } while (false)
               << *Info->InstToDuplicate[0] << "\n")do { } while (false);
    PartialIVInfo = *Info;
    PartialIVCondBranch = L.getHeader()->getTerminator();
    TinyPtrVector<Value *> ValsToDuplicate;
    for (auto *Inst : Info->InstToDuplicate)
      ValsToDuplicate.push_back(Inst);
    UnswitchCandidates.push_back(
        {L.getHeader()->getTerminator(), std::move(ValsToDuplicate)});
  }
}

// If we didn't find any candidates, we're done.
if (UnswitchCandidates.empty())
9
←
Calling 'SmallVectorBase::empty'→
12
←
Returning from 'SmallVectorBase::empty'→
13
←
Taking false branch→
  return false;

// Check if there are irreducible CFG cycles in this loop. If so, we cannot
// easily unswitch non-trivial edges out of the loop. Doing so might turn the
// irreducible control flow into reducible control flow and introduce new
// loops "out of thin air". If we ever discover important use cases for doing
// this, we can add support to loop unswitch, but it is a lot of complexity
// for what seems little or no real world benefit.
LoopBlocksRPO RPOT(&L);
RPOT.perform(&LI);
if (containsIrreducibleCFG<const BasicBlock *>(RPOT, LI))
14
←
Calling 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'→
16
←
Returning from 'containsIrreducibleCFG<const llvm::BasicBlock *, llvm::LoopBlocksRPO, llvm::LoopInfo, llvm::GraphTraits<const llvm::BasicBlock *>>'→
17
←
Taking false branch→
  return false;

SmallVector<BasicBlock *, 4> ExitBlocks;
L.getUniqueExitBlocks(ExitBlocks);

// We cannot unswitch if exit blocks contain a cleanuppad/catchswitch
// instruction as we don't know how to split those exit blocks.
// FIXME: We should teach SplitBlock to handle this and remove this
// restriction.
for (auto *ExitBB : ExitBlocks) {
18
←
Assuming '__begin1' is equal to '__end1'→
  auto *I = ExitBB->getFirstNonPHI();
  if (isa<CleanupPadInst>(I) || isa<CatchSwitchInst>(I)) {
    LLVM_DEBUG(dbgs() << "Cannot unswitch because of cleanuppad/catchswitch "do { } while (false)
                         "in exit block\n")do { } while (false);
    return false;
  }
}

LLVM_DEBUG(do { } while (false)
19
←
Loop condition is false.  Exiting loop→
    dbgs() << "Considering " << UnswitchCandidates.size()do { } while (false)
           << " non-trivial loop invariant conditions for unswitching.\n")do { } while (false);

// Given that unswitching these terminators will require duplicating parts of
// the loop, so we need to be able to model that cost. Compute the ephemeral
// values and set up a data structure to hold per-BB costs. We cache each
// block's cost so that we don't recompute this when considering different
// subsets of the loop for duplication during unswitching.
SmallPtrSet<const Value *, 4> EphValues;
CodeMetrics::collectEphemeralValues(&L, &AC, EphValues);
SmallDenseMap<BasicBlock *, InstructionCost, 4> BBCostMap;

// Compute the cost of each block, as well as the total loop cost. Also, bail
// out if we see instructions which are incompatible with loop unswitching
// (convergent, noduplicate, or cross-basic-block tokens).
// FIXME: We might be able to safely handle some of these in non-duplicated
// regions.
TargetTransformInfo::TargetCostKind CostKind =
    L.getHeader()->getParent()->hasMinSize()
20
←
Assuming the condition is false→
21
←
'?' condition is false→
    ? TargetTransformInfo::TCK_CodeSize
    : TargetTransformInfo::TCK_SizeAndLatency;
InstructionCost LoopCost = 0;
for (auto *BB : L.blocks()) {
22
←
Assuming '__begin1' is equal to '__end1'→
  InstructionCost Cost = 0;
  for (auto &I : *BB) {
    if (EphValues.count(&I))
      continue;

    if (I.getType()->isTokenTy() && I.isUsedOutsideOfBlock(BB))
      return false;
    if (auto *CB = dyn_cast<CallBase>(&I))
      if (CB->isConvergent() || CB->cannotDuplicate())
        return false;

    Cost += TTI.getUserCost(&I, CostKind);
  }
  assert(Cost >= 0 && "Must not have negative costs!")((void)0);
  LoopCost += Cost;
  assert(LoopCost >= 0 && "Must not have negative loop costs!")((void)0);
  BBCostMap[BB] = Cost;
}
LLVM_DEBUG(dbgs() << "  Total loop cost: " << LoopCost << "\n")do { } while (false);
23
←
Loop condition is false.  Exiting loop→

// Now we find the best candidate by searching for the one with the following
// properties in order:
//
// 1) An unswitching cost below the threshold
// 2) The smallest number of duplicated unswitch candidates (to avoid
//    creating redundant subsequent unswitching)
// 3) The smallest cost after unswitching.
//
// We prioritize reducing fanout of unswitch candidates provided the cost
// remains below the threshold because this has a multiplicative effect.
//
// This requires memoizing each dominator subtree to avoid redundant work.
//
// FIXME: Need to actually do the number of candidates part above.
SmallDenseMap<DomTreeNode *, InstructionCost, 4> DTCostMap;
// Given a terminator which might be unswitched, computes the non-duplicated
// cost for that terminator.
auto ComputeUnswitchedCost = [&](Instruction &TI,
                                 bool FullUnswitch) -> InstructionCost {
  BasicBlock &BB = *TI.getParent();
  SmallPtrSet<BasicBlock *, 4> Visited;

  InstructionCost Cost = 0;
  for (BasicBlock *SuccBB : successors(&BB)) {
    // Don't count successors more than once.
    if (!Visited.insert(SuccBB).second)
28
←
Assuming field 'second' is true→
29
←
Taking false branch→
      continue;

    // If this is a partial unswitch candidate, then it must be a conditional
    // branch with a condition of either `or`, `and`, their corresponding
    // select forms or partially invariant instructions. In that case, one of
    // the successors is necessarily duplicated, so don't even try to remove
    // its cost.
    if (!FullUnswitch29.1
'FullUnswitch' is false
1
'FullUnswitch' is false
1
'FullUnswitch' is false
1
'FullUnswitch' is false
1
'FullUnswitch' is false
) {
30
←
Taking true branch→
      auto &BI = cast<BranchInst>(TI);
31
←
'TI' is a 'BranchInst'→
      if (match(BI.getCondition(), m_LogicalAnd())) {
32
←
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'→
39
←
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 28>>'→
40
←
Taking false branch→
        if (SuccBB == BI.getSuccessor(1))
          continue;
      } else if (match(BI.getCondition(), m_LogicalOr())) {
41
←
Calling 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'→
48
←
Returning from 'match<llvm::Value, llvm::PatternMatch::LogicalOp_match<llvm::PatternMatch::class_match<llvm::Value>, llvm::PatternMatch::class_match<llvm::Value>, 29>>'→
49
←
Taking false branch→
        if (SuccBB == BI.getSuccessor(0))
          continue;
      } else if ((PartialIVInfo.KnownValue->isOneValue() &&
50
←
Called C++ object pointer is null
                  SuccBB == BI.getSuccessor(0)) ||
                 (!PartialIVInfo.KnownValue->isOneValue() &&
                  SuccBB == BI.getSuccessor(1)))
        continue;
    }

    // This successor's domtree will not need to be duplicated after
    // unswitching if the edge to the successor dominates it (and thus the
    // entire tree). This essentially means there is no other path into this
    // subtree and so it will end up live in only one clone of the loop.
    if (SuccBB->getUniquePredecessor() ||
        llvm::all_of(predecessors(SuccBB), [&](BasicBlock *PredBB) {
          return PredBB == &BB || DT.dominates(SuccBB, PredBB);
        })) {
      Cost += computeDomSubtreeCost(*DT[SuccBB], BBCostMap, DTCostMap);
      assert(Cost <= LoopCost &&((void)0)
             "Non-duplicated cost should never exceed total loop cost!")((void)0);
    }
  }

  // Now scale the cost by the number of unique successors minus one. We
  // subtract one because there is already at least one copy of the entire
  // loop. This is computing the new cost of unswitching a condition.
  // Note that guards always have 2 unique successors that are implicit and
  // will be materialized if we decide to unswitch it.
  int SuccessorsCount = isGuard(&TI) ? 2 : Visited.size();
  assert(SuccessorsCount > 1 &&((void)0)
         "Cannot unswitch a condition without multiple distinct successors!")((void)0);
  return (LoopCost - Cost) * (SuccessorsCount - 1);
};
Instruction *BestUnswitchTI = nullptr;
InstructionCost BestUnswitchCost = 0;
ArrayRef<Value *> BestUnswitchInvariants;
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
24
←
Assuming '__begin1' is not equal to '__end1'→
  Instruction &TI = *TerminatorAndInvariants.first;
  ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
  BranchInst *BI = dyn_cast<BranchInst>(&TI);
25
←
Assuming the object is a 'BranchInst'→
  InstructionCost CandidateCost = ComputeUnswitchedCost(
27
←
Calling 'operator()'→
      TI, /*FullUnswitch*/ !BI25.1
'BI' is non-null
1
'BI' is non-null
1
'BI' is non-null
1
'BI' is non-null
1
'BI' is non-null
 || (Invariants.size() == 1 &&
26
←
Assuming the condition is false→
                                   Invariants[0] == BI->getCondition()));
  // Calculate cost multiplier which is a tool to limit potentially
  // exponential behavior of loop-unswitch.
  if (EnableUnswitchCostMultiplier) {
    int CostMultiplier =
        CalculateUnswitchCostMultiplier(TI, L, LI, DT, UnswitchCandidates);
    assert(((void)0)
        (CostMultiplier > 0 && CostMultiplier <= UnswitchThreshold) &&((void)0)
        "cost multiplier needs to be in the range of 1..UnswitchThreshold")((void)0);
    CandidateCost *= CostMultiplier;
    LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCostdo { } while (false)
                      << " (multiplier: " << CostMultiplier << ")"do { } while (false)
                      << " for unswitch candidate: " << TI << "\n")do { } while (false);
  } else {
    LLVM_DEBUG(dbgs() << "  Computed cost of " << CandidateCostdo { } while (false)
                      << " for unswitch candidate: " << TI << "\n")do { } while (false);
  }

  if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
    BestUnswitchTI = &TI;
    BestUnswitchCost = CandidateCost;
    BestUnswitchInvariants = Invariants;
  }
}
assert(BestUnswitchTI && "Failed to find loop unswitch candidate")((void)0);

if (BestUnswitchCost >= UnswitchThreshold) {
  LLVM_DEBUG(dbgs() << "Cannot unswitch, lowest cost found: "do { } while (false)
                    << BestUnswitchCost << "\n")do { } while (false);
  return false;
}

if (BestUnswitchTI != PartialIVCondBranch)
  PartialIVInfo.InstToDuplicate.clear();

// If the best candidate is a guard, turn it into a branch.
if (isGuard(BestUnswitchTI))
  BestUnswitchTI = turnGuardIntoBranch(cast<IntrinsicInst>(BestUnswitchTI), L,
                                       ExitBlocks, DT, LI, MSSAU);

LLVM_DEBUG(dbgs() << "  Unswitching non-trivial (cost = "do { } while (false)
                  << BestUnswitchCost << ") terminator: " << *BestUnswitchTIdo { } while (false)
                  << "\n")do { } while (false);
unswitchNontrivialInvariants(L, *BestUnswitchTI, BestUnswitchInvariants,
                             ExitBlocks, PartialIVInfo, DT, LI, AC,
                             UnswitchCB, SE, MSSAU, DestroyLoopCB);
return true;
2970}

2972/// Unswitch control flow predicated on loop invariant conditions.
2973///
2974/// This first hoists all branches or switches which are trivial (IE, do not
2975/// require duplicating any part of the loop) out of the loop body. It then
2976/// looks at other loop invariant control flows and tries to unswitch those as
2977/// well by cloning the loop if the result is small enough.
2978///
2979/// The `DT`, `LI`, `AC`, `AA`, `TTI` parameters are required analyses that are
2980/// also updated based on the unswitch. The `MSSA` analysis is also updated if
2981/// valid (i.e. its use is enabled).
2982///
2983/// If either `NonTrivial` is true or the flag `EnableNonTrivialUnswitch` is
2984/// true, we will attempt to do non-trivial unswitching as well as trivial
2985/// unswitching.
2986///
2987/// The `UnswitchCB` callback provided will be run after unswitching is
2988/// complete, with the first parameter set to `true` if the provided loop
2989/// remains a loop, and a list of new sibling loops created.
2990///
2991/// If `SE` is non-null, we will update that analysis based on the unswitching
2992/// done.
2993static bool
2994unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
           AAResults &AA, TargetTransformInfo &TTI, bool Trivial,
           bool NonTrivial,
           function_ref<void(bool, bool, ArrayRef<Loop *>)> UnswitchCB,
           ScalarEvolution *SE, MemorySSAUpdater *MSSAU,
           function_ref<void(Loop &, StringRef)> DestroyLoopCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&((void)0)
       "Loops must be in LCSSA form before unswitching.")((void)0);

// Must be in loop simplified form: we need a preheader and dedicated exits.
if (!L.isLoopSimplifyForm())
  return false;

// Try trivial unswitch first before loop over other basic blocks in the loop.
if (Trivial && unswitchAllTrivialConditions(L, DT, LI, SE, MSSAU)) {
  // If we unswitched successfully we will want to clean up the loop before
  // processing it further so just mark it as unswitched and return.
  UnswitchCB(/*CurrentLoopValid*/ true, false, {});
  return true;
}

// Check whether we should continue with non-trivial conditions.
// EnableNonTrivialUnswitch: Global variable that forces non-trivial
//                           unswitching for testing and debugging.
// NonTrivial: Parameter that enables non-trivial unswitching for this
//             invocation of the transform. But this should be allowed only
//             for targets without branch divergence.
//
// FIXME: If divergence analysis becomes available to a loop
// transform, we should allow unswitching for non-trivial uniform
// branches even on targets that have divergence.
// https://bugs.llvm.org/show_bug.cgi?id=48819
bool ContinueWithNonTrivial =
    EnableNonTrivialUnswitch || (NonTrivial && !TTI.hasBranchDivergence());
if (!ContinueWithNonTrivial)
  return false;

// Skip non-trivial unswitching for optsize functions.
if (L.getHeader()->getParent()->hasOptSize())
  return false;

// Skip non-trivial unswitching for loops that cannot be cloned.
if (!L.isSafeToClone())
  return false;

// For non-trivial unswitching, because it often creates new loops, we rely on
// the pass manager to iterate on the loops rather than trying to immediately
// reach a fixed point. There is no substantial advantage to iterating
// internally, and if any of the new loops are simplified enough to contain
// trivial unswitching we want to prefer those.

// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
if (unswitchBestCondition(L, DT, LI, AC, AA, TTI, UnswitchCB, SE, MSSAU,
                          DestroyLoopCB))
  return true;

// No other opportunities to unswitch.
return false;
3053}

3055PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
                                            LoopStandardAnalysisResults &AR,
                                            LPMUpdater &U) {
Function &F = *L.getHeader()->getParent();
(void)F;

LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << Ldo { } while (false)
                  << "\n")do { } while (false);

// Save the current loop name in a variable so that we can report it even
// after it has been deleted.
std::string LoopName = std::string(L.getName());

auto UnswitchCB = [&L, &U, &LoopName](bool CurrentLoopValid,
                                      bool PartiallyInvariant,
                                      ArrayRef<Loop *> NewLoops) {
  // If we did a non-trivial unswitch, we have added new (cloned) loops.
  if (!NewLoops.empty())
    U.addSiblingLoops(NewLoops);

  // If the current loop remains valid, we should revisit it to catch any
  // other unswitch opportunities. Otherwise, we need to mark it as deleted.
  if (CurrentLoopValid) {
    if (PartiallyInvariant) {
      // Mark the new loop as partially unswitched, to avoid unswitching on
      // the same condition again.
      auto &Context = L.getHeader()->getContext();
      MDNode *DisableUnswitchMD = MDNode::get(
          Context,
          MDString::get(Context, "llvm.loop.unswitch.partial.disable"));
      MDNode *NewLoopID = makePostTransformationMetadata(
          Context, L.getLoopID(), {"llvm.loop.unswitch.partial"},
          {DisableUnswitchMD});
      L.setLoopID(NewLoopID);
    } else
      U.revisitCurrentLoop();
  } else
    U.markLoopAsDeleted(L, LoopName);
};

auto DestroyLoopCB = [&U](Loop &L, StringRef Name) {
  U.markLoopAsDeleted(L, Name);
};

Optional<MemorySSAUpdater> MSSAU;
if (AR.MSSA) {
  MSSAU = MemorySSAUpdater(AR.MSSA);
  if (VerifyMemorySSA)
    AR.MSSA->verifyMemorySSA();
}
if (!unswitchLoop(L, AR.DT, AR.LI, AR.AC, AR.AA, AR.TTI, Trivial, NonTrivial,
                  UnswitchCB, &AR.SE,
                  MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
                  DestroyLoopCB))
  return PreservedAnalyses::all();

if (AR.MSSA && VerifyMemorySSA)
  AR.MSSA->verifyMemorySSA();

// Historically this pass has had issues with the dominator tree so verify it
// in asserts builds.
assert(AR.DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);

auto PA = getLoopPassPreservedAnalyses();
if (AR.MSSA)
  PA.preserve<MemorySSAAnalysis>();
return PA;
3122}

3124namespace {

3126class SimpleLoopUnswitchLegacyPass : public LoopPass {
bool NonTrivial;

3129public:
static char ID; // Pass ID, replacement for typeid

explicit SimpleLoopUnswitchLegacyPass(bool NonTrivial = false)
    : LoopPass(ID), NonTrivial(NonTrivial) {
  initializeSimpleLoopUnswitchLegacyPassPass(
      *PassRegistry::getPassRegistry());
}

bool runOnLoop(Loop *L, LPPassManager &LPM) override;

void getAnalysisUsage(AnalysisUsage &AU) const override {
  AU.addRequired<AssumptionCacheTracker>();
  AU.addRequired<TargetTransformInfoWrapperPass>();
  if (EnableMSSALoopDependency) {
    AU.addRequired<MemorySSAWrapperPass>();
    AU.addPreserved<MemorySSAWrapperPass>();
  }
  getLoopAnalysisUsage(AU);
}
3149};

3151} // end anonymous namespace

3153bool SimpleLoopUnswitchLegacyPass::runOnLoop(Loop *L, LPPassManager &LPM) {
if (skipLoop(L))
  return false;

Function &F = *L->getHeader()->getParent();

LLVM_DEBUG(dbgs() << "Unswitching loop in " << F.getName() << ": " << *Ldo { } while (false)
                  << "\n")do { } while (false);

auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
MemorySSA *MSSA = nullptr;
Optional<MemorySSAUpdater> MSSAU;
if (EnableMSSALoopDependency) {
  MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA();
  MSSAU = MemorySSAUpdater(MSSA);
}

auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
auto *SE = SEWP ? &SEWP->getSE() : nullptr;

auto UnswitchCB = [&L, &LPM](bool CurrentLoopValid, bool PartiallyInvariant,
                             ArrayRef<Loop *> NewLoops) {
  // If we did a non-trivial unswitch, we have added new (cloned) loops.
  for (auto *NewL : NewLoops)
    LPM.addLoop(*NewL);

  // If the current loop remains valid, re-add it to the queue. This is
  // a little wasteful as we'll finish processing the current loop as well,
  // but it is the best we can do in the old PM.
  if (CurrentLoopValid) {
    // If the current loop has been unswitched using a partially invariant
    // condition, we should not re-add the current loop to avoid unswitching
    // on the same condition again.
    if (!PartiallyInvariant)
      LPM.addLoop(*L);
  } else
    LPM.markLoopAsDeleted(*L);
};

auto DestroyLoopCB = [&LPM](Loop &L, StringRef /* Name */) {
  LPM.markLoopAsDeleted(L);
};

if (MSSA && VerifyMemorySSA)
  MSSA->verifyMemorySSA();

bool Changed =
    unswitchLoop(*L, DT, LI, AC, AA, TTI, true, NonTrivial, UnswitchCB, SE,
                 MSSAU.hasValue() ? MSSAU.getPointer() : nullptr,
                 DestroyLoopCB);

if (MSSA && VerifyMemorySSA)
  MSSA->verifyMemorySSA();

// Historically this pass has had issues with the dominator tree so verify it
// in asserts builds.
assert(DT.verify(DominatorTree::VerificationLevel::Fast))((void)0);

return Changed;
3216}

3218char SimpleLoopUnswitchLegacyPass::ID = 0;
3219INITIALIZE_PASS_BEGIN(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
 &Registry) {
                    "Simple unswitch loops", false, false)static void *initializeSimpleLoopUnswitchLegacyPassPassOnce(PassRegistry
 &Registry) {
3221INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry);
3222INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)initializeDominatorTreeWrapperPassPass(Registry);
3223INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)initializeLoopInfoWrapperPassPass(Registry);
3224INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry);
3225INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry);
3226INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry);
3227INITIALIZE_PASS_END(SimpleLoopUnswitchLegacyPass, "simple-loop-unswitch",PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
 false); Registry.registerPass(*PI, true); return PI; } static
 llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
 &Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }
                  "Simple unswitch loops", false, false)PassInfo *PI = new PassInfo( "Simple unswitch loops", "simple-loop-unswitch"
, &SimpleLoopUnswitchLegacyPass::ID, PassInfo::NormalCtor_t
(callDefaultCtor<SimpleLoopUnswitchLegacyPass>), false,
 false); Registry.registerPass(*PI, true); return PI; } static
 llvm::once_flag InitializeSimpleLoopUnswitchLegacyPassPassFlag
; void llvm::initializeSimpleLoopUnswitchLegacyPassPass(PassRegistry
 &Registry) { llvm::call_once(InitializeSimpleLoopUnswitchLegacyPassPassFlag
, initializeSimpleLoopUnswitchLegacyPassPassOnce, std::ref(Registry
)); }

3230Pass *llvm::createSimpleLoopUnswitchLegacyPass(bool NonTrivial) {
return new SimpleLoopUnswitchLegacyPass(NonTrivial);
3232}

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Transforms/Utils/LoopUtils.h

→

1//===- llvm/Transforms/Utils/LoopUtils.h - Loop utilities -------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines some loop transformation utilities.
10//
11//===----------------------------------------------------------------------===//
12 
13#ifndef LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
14#define LLVM_TRANSFORMS_UTILS_LOOPUTILS_H
15 
16#include "llvm/ADT/StringRef.h"
17#include "llvm/Analysis/IVDescriptors.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Transforms/Utils/ValueMapper.h"
20 
21namespace llvm {
22 
23template <typename T> class DomTreeNodeBase;
24using DomTreeNode = DomTreeNodeBase<BasicBlock>;
25class AAResults;
26class AliasSet;
27class AliasSetTracker;
28class BasicBlock;
29class BlockFrequencyInfo;
30class ICFLoopSafetyInfo;
31class IRBuilderBase;
32class Loop;
33class LoopInfo;
34class MemoryAccess;
35class MemorySSA;
36class MemorySSAUpdater;
37class OptimizationRemarkEmitter;
38class PredIteratorCache;
39class ScalarEvolution;
40class ScalarEvolutionExpander;
41class SCEV;
42class SCEVExpander;
43class TargetLibraryInfo;
44class LPPassManager;
45class Instruction;
46struct RuntimeCheckingPtrGroup;
47typedef std::pair<const RuntimeCheckingPtrGroup *,
48                  const RuntimeCheckingPtrGroup *>
49    RuntimePointerCheck;
50 
51template <typename T> class Optional;
52template <typename T, unsigned N> class SmallSetVector;
53template <typename T, unsigned N> class SmallVector;
54template <typename T> class SmallVectorImpl;
55template <typename T, unsigned N> class SmallPriorityWorklist;
56 
57BasicBlock *InsertPreheaderForLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
58                                   MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
59 
60/// Ensure that all exit blocks of the loop are dedicated exits.
61///
62/// For any loop exit block with non-loop predecessors, we split the loop
63/// predecessors to use a dedicated loop exit block. We update the dominator
64/// tree and loop info if provided, and will preserve LCSSA if requested.
65bool formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
66                             MemorySSAUpdater *MSSAU, bool PreserveLCSSA);
67 
68/// Ensures LCSSA form for every instruction from the Worklist in the scope of
69/// innermost containing loop.
70///
71/// For the given instruction which have uses outside of the loop, an LCSSA PHI
72/// node is inserted and the uses outside the loop are rewritten to use this
73/// node.
74///
75/// LoopInfo and DominatorTree are required and, since the routine makes no
76/// changes to CFG, preserved.
77///
78/// Returns true if any modifications are made.
79///
80/// This function may introduce unused PHI nodes. If \p PHIsToRemove is not
81/// nullptr, those are added to it (before removing, the caller has to check if
82/// they still do not have any uses). Otherwise the PHIs are directly removed.
83bool formLCSSAForInstructions(
84    SmallVectorImpl<Instruction *> &Worklist, const DominatorTree &DT,
85    const LoopInfo &LI, ScalarEvolution *SE, IRBuilderBase &Builder,
86    SmallVectorImpl<PHINode *> *PHIsToRemove = nullptr);
87 
88/// Put loop into LCSSA form.
89///
90/// Looks at all instructions in the loop which have uses outside of the
91/// current loop. For each, an LCSSA PHI node is inserted and the uses outside
92/// the loop are rewritten to use this node. Sub-loops must be in LCSSA form
93/// already.
94///
95/// LoopInfo and DominatorTree are required and preserved.
96///
97/// If ScalarEvolution is passed in, it will be preserved.
98///
99/// Returns true if any modifications are made to the loop.
100bool formLCSSA(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
101               ScalarEvolution *SE);
102 
103/// Put a loop nest into LCSSA form.
104///
105/// This recursively forms LCSSA for a loop nest.
106///
107/// LoopInfo and DominatorTree are required and preserved.
108///
109/// If ScalarEvolution is passed in, it will be preserved.
110///
111/// Returns true if any modifications are made to the loop.
112bool formLCSSARecursively(Loop &L, const DominatorTree &DT, const LoopInfo *LI,
113                          ScalarEvolution *SE);
114 
115/// Flags controlling how much is checked when sinking or hoisting
116/// instructions.  The number of memory access in the loop (and whether there
117/// are too many) is determined in the constructors when using MemorySSA.
118class SinkAndHoistLICMFlags {
119public:
120  // Explicitly set limits.
121  SinkAndHoistLICMFlags(unsigned LicmMssaOptCap,
122                        unsigned LicmMssaNoAccForPromotionCap, bool IsSink,
123                        Loop *L = nullptr, MemorySSA *MSSA = nullptr);
124  // Use default limits.
125  SinkAndHoistLICMFlags(bool IsSink, Loop *L = nullptr,
126                        MemorySSA *MSSA = nullptr);
127 
128  void setIsSink(bool B) { IsSink = B; }
129  bool getIsSink() { return IsSink; }
130  bool tooManyMemoryAccesses() { return NoOfMemAccTooLarge; }
131  bool tooManyClobberingCalls() { return LicmMssaOptCounter >= LicmMssaOptCap; }
132  void incrementClobberingCalls() { ++LicmMssaOptCounter; }
133 
134protected:
135  bool NoOfMemAccTooLarge = false;
136  unsigned LicmMssaOptCounter = 0;
137  unsigned LicmMssaOptCap;
138  unsigned LicmMssaNoAccForPromotionCap;
139  bool IsSink;
140};
141 
142/// Walk the specified region of the CFG (defined by all blocks
143/// dominated by the specified block, and that are in the current loop) in
144/// reverse depth first order w.r.t the DominatorTree. This allows us to visit
145/// uses before definitions, allowing us to sink a loop body in one pass without
146/// iteration. Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
147/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
148/// instructions of the loop and loop safety information as
149/// arguments. Diagnostics is emitted via \p ORE. It returns changed status.
150bool sinkRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
151                BlockFrequencyInfo *, TargetLibraryInfo *,
152                TargetTransformInfo *, Loop *, AliasSetTracker *,
153                MemorySSAUpdater *, ICFLoopSafetyInfo *,
154                SinkAndHoistLICMFlags &, OptimizationRemarkEmitter *);
155 
156/// Walk the specified region of the CFG (defined by all blocks
157/// dominated by the specified block, and that are in the current loop) in depth
158/// first order w.r.t the DominatorTree.  This allows us to visit definitions
159/// before uses, allowing us to hoist a loop body in one pass without iteration.
160/// Takes DomTreeNode, AAResults, LoopInfo, DominatorTree,
161/// BlockFrequencyInfo, TargetLibraryInfo, Loop, AliasSet information for all
162/// instructions of the loop and loop safety information as arguments.
163/// Diagnostics is emitted via \p ORE. It returns changed status.
164bool hoistRegion(DomTreeNode *, AAResults *, LoopInfo *, DominatorTree *,
165                 BlockFrequencyInfo *, TargetLibraryInfo *, Loop *,
166                 AliasSetTracker *, MemorySSAUpdater *, ScalarEvolution *,
167                 ICFLoopSafetyInfo *, SinkAndHoistLICMFlags &,
168                 OptimizationRemarkEmitter *, bool);
169 
170/// This function deletes dead loops. The caller of this function needs to
171/// guarantee that the loop is infact dead.
172/// The function requires a bunch or prerequisites to be present:
173///   - The loop needs to be in LCSSA form
174///   - The loop needs to have a Preheader
175///   - A unique dedicated exit block must exist
176///
177/// This also updates the relevant analysis information in \p DT, \p SE, \p LI
178/// and \p MSSA if pointers to those are provided.
179/// It also updates the loop PM if an updater struct is provided.
180 
181void deleteDeadLoop(Loop *L, DominatorTree *DT, ScalarEvolution *SE,
182                    LoopInfo *LI, MemorySSA *MSSA = nullptr);
183 
184/// Remove the backedge of the specified loop.  Handles loop nests and general
185/// loop structures subject to the precondition that the loop has no parent
186/// loop and has a single latch block.  Preserves all listed analyses.
187void breakLoopBackedge(Loop *L, DominatorTree &DT, ScalarEvolution &SE,
188                       LoopInfo &LI, MemorySSA *MSSA);
189 
190/// Try to promote memory values to scalars by sinking stores out of
191/// the loop and moving loads to before the loop.  We do this by looping over
192/// the stores in the loop, looking for stores to Must pointers which are
193/// loop invariant. It takes a set of must-alias values, Loop exit blocks
194/// vector, loop exit blocks insertion point vector, PredIteratorCache,
195/// LoopInfo, DominatorTree, Loop, AliasSet information for all instructions
196/// of the loop and loop safety information as arguments.
197/// Diagnostics is emitted via \p ORE. It returns changed status.
198bool promoteLoopAccessesToScalars(
199    const SmallSetVector<Value *, 8> &, SmallVectorImpl<BasicBlock *> &,
200    SmallVectorImpl<Instruction *> &, SmallVectorImpl<MemoryAccess *> &,
201    PredIteratorCache &, LoopInfo *, DominatorTree *, const TargetLibraryInfo *,
202    Loop *, AliasSetTracker *, MemorySSAUpdater *, ICFLoopSafetyInfo *,
203    OptimizationRemarkEmitter *);
204 
205/// Does a BFS from a given node to all of its children inside a given loop.
206/// The returned vector of nodes includes the starting point.
207SmallVector<DomTreeNode *, 16> collectChildrenInLoop(DomTreeNode *N,
208                                                     const Loop *CurLoop);
209 
210/// Returns the instructions that use values defined in the loop.
211SmallVector<Instruction *, 8> findDefsUsedOutsideOfLoop(Loop *L);
212 
213/// Find a combination of metadata ("llvm.loop.vectorize.width" and
214/// "llvm.loop.vectorize.scalable.enable") for a loop and use it to construct a
215/// ElementCount. If the metadata "llvm.loop.vectorize.width" cannot be found
216/// then None is returned.
217Optional<ElementCount>
218getOptionalElementCountLoopAttribute(const Loop *TheLoop);
219 
220/// Create a new loop identifier for a loop created from a loop transformation.
221///
222/// @param OrigLoopID The loop ID of the loop before the transformation.
223/// @param FollowupAttrs List of attribute names that contain attributes to be
224///                      added to the new loop ID.
225/// @param InheritOptionsAttrsPrefix Selects which attributes should be inherited
226///                                  from the original loop. The following values
227///                                  are considered:
228///        nullptr   : Inherit all attributes from @p OrigLoopID.
229///        ""        : Do not inherit any attribute from @p OrigLoopID; only use
230///                    those specified by a followup attribute.
231///        "<prefix>": Inherit all attributes except those which start with
232///                    <prefix>; commonly used to remove metadata for the
233///                    applied transformation.
234/// @param AlwaysNew If true, do not try to reuse OrigLoopID and never return
235///                  None.
236///
237/// @return The loop ID for the after-transformation loop. The following values
238///         can be returned:
239///         None         : No followup attribute was found; it is up to the
240///                        transformation to choose attributes that make sense.
241///         @p OrigLoopID: The original identifier can be reused.
242///         nullptr      : The new loop has no attributes.
243///         MDNode*      : A new unique loop identifier.
244Optional<MDNode *>
245makeFollowupLoopID(MDNode *OrigLoopID, ArrayRef<StringRef> FollowupAttrs,
246                   const char *InheritOptionsAttrsPrefix = "",
247                   bool AlwaysNew = false);
248 
249/// Look for the loop attribute that disables all transformation heuristic.
250bool hasDisableAllTransformsHint(const Loop *L);
251 
252/// Look for the loop attribute that disables the LICM transformation heuristics.
253bool hasDisableLICMTransformsHint(const Loop *L);
254 
255/// The mode sets how eager a transformation should be applied.
256enum TransformationMode {
257  /// The pass can use heuristics to determine whether a transformation should
258  /// be applied.
259  TM_Unspecified,
260 
261  /// The transformation should be applied without considering a cost model.
262  TM_Enable,
263 
264  /// The transformation should not be applied.
265  TM_Disable,
266 
267  /// Force is a flag and should not be used alone.
268  TM_Force = 0x04,
269 
270  /// The transformation was directed by the user, e.g. by a #pragma in
271  /// the source code. If the transformation could not be applied, a
272  /// warning should be emitted.
273  TM_ForcedByUser = TM_Enable | TM_Force,
274 
275  /// The transformation must not be applied. For instance, `#pragma clang loop
276  /// unroll(disable)` explicitly forbids any unrolling to take place. Unlike
277  /// general loop metadata, it must not be dropped. Most passes should not
278  /// behave differently under TM_Disable and TM_SuppressedByUser.
279  TM_SuppressedByUser = TM_Disable | TM_Force
280};
281 
282/// @{
283/// Get the mode for LLVM's supported loop transformations.
284TransformationMode hasUnrollTransformation(const Loop *L);
285TransformationMode hasUnrollAndJamTransformation(const Loop *L);
286TransformationMode hasVectorizeTransformation(const Loop *L);
287TransformationMode hasDistributeTransformation(const Loop *L);
288TransformationMode hasLICMVersioningTransformation(const Loop *L);
289/// @}
290 
291/// Set input string into loop metadata by keeping other values intact.
292/// If the string is already in loop metadata update value if it is
293/// different.
294void addStringMetadataToLoop(Loop *TheLoop, const char *MDString,
295                             unsigned V = 0);
296 
297/// Returns a loop's estimated trip count based on branch weight metadata.
298/// In addition if \p EstimatedLoopInvocationWeight is not null it is
299/// initialized with weight of loop's latch leading to the exit.
300/// Returns 0 when the count is estimated to be 0, or None when a meaningful
301/// estimate can not be made.
302Optional<unsigned>
303getLoopEstimatedTripCount(Loop *L,
304                          unsigned *EstimatedLoopInvocationWeight = nullptr);
305 
306/// Set a loop's branch weight metadata to reflect that loop has \p
307/// EstimatedTripCount iterations and \p EstimatedLoopInvocationWeight exits
308/// through latch. Returns true if metadata is successfully updated, false
309/// otherwise. Note that loop must have a latch block which controls loop exit
310/// in order to succeed.
311bool setLoopEstimatedTripCount(Loop *L, unsigned EstimatedTripCount,
312                               unsigned EstimatedLoopInvocationWeight);
313 
314/// Check inner loop (L) backedge count is known to be invariant on all
315/// iterations of its outer loop. If the loop has no parent, this is trivially
316/// true.
317bool hasIterationCountInvariantInParent(Loop *L, ScalarEvolution &SE);
318 
319/// Helper to consistently add the set of standard passes to a loop pass's \c
320/// AnalysisUsage.
321///
322/// All loop passes should call this as part of implementing their \c
323/// getAnalysisUsage.
324void getLoopAnalysisUsage(AnalysisUsage &AU);
325 
326/// Returns true if is legal to hoist or sink this instruction disregarding the
327/// possible introduction of faults.  Reasoning about potential faulting
328/// instructions is the responsibility of the caller since it is challenging to
329/// do efficiently from within this routine.
330/// \p TargetExecutesOncePerLoop is true only when it is guaranteed that the
331/// target executes at most once per execution of the loop body.  This is used
332/// to assess the legality of duplicating atomic loads.  Generally, this is
333/// true when moving out of loop and not true when moving into loops.
334/// If \p ORE is set use it to emit optimization remarks.
335bool canSinkOrHoistInst(Instruction &I, AAResults *AA, DominatorTree *DT,
336                        Loop *CurLoop, AliasSetTracker *CurAST,
337                        MemorySSAUpdater *MSSAU, bool TargetExecutesOncePerLoop,
338                        SinkAndHoistLICMFlags *LICMFlags = nullptr,
339                        OptimizationRemarkEmitter *ORE = nullptr);
340 
341/// Returns a Min/Max operation corresponding to MinMaxRecurrenceKind.
342/// The Builder's fast-math-flags must be set to propagate the expected values.
343Value *createMinMaxOp(IRBuilderBase &Builder, RecurKind RK, Value *Left,
344                      Value *Right);
345 
346/// Generates an ordered vector reduction using extracts to reduce the value.
347Value *getOrderedReduction(IRBuilderBase &Builder, Value *Acc, Value *Src,
348                           unsigned Op, RecurKind MinMaxKind = RecurKind::None,
349                           ArrayRef<Value *> RedOps = None);
350 
351/// Generates a vector reduction using shufflevectors to reduce the value.
352/// Fast-math-flags are propagated using the IRBuilder's setting.
353Value *getShuffleReduction(IRBuilderBase &Builder, Value *Src, unsigned Op,
354                           RecurKind MinMaxKind = RecurKind::None,
355                           ArrayRef<Value *> RedOps = None);
356 
357/// Create a target reduction of the given vector. The reduction operation
358/// is described by the \p Opcode parameter. min/max reductions require
359/// additional information supplied in \p RdxKind.
360/// The target is queried to determine if intrinsics or shuffle sequences are
361/// required to implement the reduction.
362/// Fast-math-flags are propagated using the IRBuilder's setting.
363Value *createSimpleTargetReduction(IRBuilderBase &B,
364                                   const TargetTransformInfo *TTI, Value *Src,
365                                   RecurKind RdxKind,
366                                   ArrayRef<Value *> RedOps = None);
367 
368/// Create a generic target reduction using a recurrence descriptor \p Desc
369/// The target is queried to determine if intrinsics or shuffle sequences are
370/// required to implement the reduction.
371/// Fast-math-flags are propagated using the RecurrenceDescriptor.
372Value *createTargetReduction(IRBuilderBase &B, const TargetTransformInfo *TTI,
373                             const RecurrenceDescriptor &Desc, Value *Src);
374 
375/// Create an ordered reduction intrinsic using the given recurrence
376/// descriptor \p Desc.
377Value *createOrderedReduction(IRBuilderBase &B,
378                              const RecurrenceDescriptor &Desc, Value *Src,
379                              Value *Start);
380 
381/// Get the intersection (logical and) of all of the potential IR flags
382/// of each scalar operation (VL) that will be converted into a vector (I).
383/// If OpValue is non-null, we only consider operations similar to OpValue
384/// when intersecting.
385/// Flag set: NSW, NUW, exact, and all of fast-math.
386void propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue = nullptr);
387 
388/// Returns true if we can prove that \p S is defined and always negative in
389/// loop \p L.
390bool isKnownNegativeInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE);
391 
392/// Returns true if we can prove that \p S is defined and always non-negative in
393/// loop \p L.
394bool isKnownNonNegativeInLoop(const SCEV *S, const Loop *L,
395                              ScalarEvolution &SE);
396 
397/// Returns true if \p S is defined and never is equal to signed/unsigned max.
398bool cannotBeMaxInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
399                       bool Signed);
400 
401/// Returns true if \p S is defined and never is equal to signed/unsigned min.
402bool cannotBeMinInLoop(const SCEV *S, const Loop *L, ScalarEvolution &SE,
403                       bool Signed);
404 
405enum ReplaceExitVal { NeverRepl, OnlyCheapRepl, NoHardUse, AlwaysRepl };
406 
407/// If the final value of any expressions that are recurrent in the loop can
408/// be computed, substitute the exit values from the loop into any instructions
409/// outside of the loop that use the final values of the current expressions.
410/// Return the number of loop exit values that have been replaced, and the
411/// corresponding phi node will be added to DeadInsts.
412int rewriteLoopExitValues(Loop *L, LoopInfo *LI, TargetLibraryInfo *TLI,
413                          ScalarEvolution *SE, const TargetTransformInfo *TTI,
414                          SCEVExpander &Rewriter, DominatorTree *DT,
415                          ReplaceExitVal ReplaceExitValue,
416                          SmallVector<WeakTrackingVH, 16> &DeadInsts);
417 
418/// Set weights for \p UnrolledLoop and \p RemainderLoop based on weights for
419/// \p OrigLoop and the following distribution of \p OrigLoop iteration among \p
420/// UnrolledLoop and \p RemainderLoop. \p UnrolledLoop receives weights that
421/// reflect TC/UF iterations, and \p RemainderLoop receives weights that reflect
422/// the remaining TC%UF iterations.
423///
424/// Note that \p OrigLoop may be equal to either \p UnrolledLoop or \p
425/// RemainderLoop in which case weights for \p OrigLoop are updated accordingly.
426/// Note also behavior is undefined if \p UnrolledLoop and \p RemainderLoop are
427/// equal. \p UF must be greater than zero.
428/// If \p OrigLoop has no profile info associated nothing happens.
429///
430/// This utility may be useful for such optimizations as unroller and
431/// vectorizer as it's typical transformation for them.
432void setProfileInfoAfterUnrolling(Loop *OrigLoop, Loop *UnrolledLoop,
433                                  Loop *RemainderLoop, uint64_t UF);
434 
435/// Utility that implements appending of loops onto a worklist given a range.
436/// We want to process loops in postorder, but the worklist is a LIFO data
437/// structure, so we append to it in *reverse* postorder.
438/// For trees, a preorder traversal is a viable reverse postorder, so we
439/// actually append using a preorder walk algorithm.
440template <typename RangeT>
441void appendLoopsToWorklist(RangeT &&, SmallPriorityWorklist<Loop *, 4> &);
442/// Utility that implements appending of loops onto a worklist given a range.
443/// It has the same behavior as appendLoopsToWorklist, but assumes the range of
444/// loops has already been reversed, so it processes loops in the given order.
445template <typename RangeT>
446void appendReversedLoopsToWorklist(RangeT &&,
447                                   SmallPriorityWorklist<Loop *, 4> &);
448 
449/// Utility that implements appending of loops onto a worklist given LoopInfo.
450/// Calls the templated utility taking a Range of loops, handing it the Loops
451/// in LoopInfo, iterated in reverse. This is because the loops are stored in
452/// RPO w.r.t. the control flow graph in LoopInfo. For the purpose of unrolling,
453/// loop deletion, and LICM, we largely want to work forward across the CFG so
454/// that we visit defs before uses and can propagate simplifications from one
455/// loop nest into the next. Calls appendReversedLoopsToWorklist with the
456/// already reversed loops in LI.
457/// FIXME: Consider changing the order in LoopInfo.
458void appendLoopsToWorklist(LoopInfo &, SmallPriorityWorklist<Loop *, 4> &);
459 
460/// Recursively clone the specified loop and all of its children,
461/// mapping the blocks with the specified map.
462Loop *cloneLoop(Loop *L, Loop *PL, ValueToValueMapTy &VM,
463                LoopInfo *LI, LPPassManager *LPM);
464 
465/// Add code that checks at runtime if the accessed arrays in \p PointerChecks
466/// overlap.
467///
468/// Returns a pair of instructions where the first element is the first
469/// instruction generated in possibly a sequence of instructions and the
470/// second value is the final comparator value or NULL if no check is needed.
471std::pair<Instruction *, Instruction *>
472addRuntimeChecks(Instruction *Loc, Loop *TheLoop,
473                 const SmallVectorImpl<RuntimePointerCheck> &PointerChecks,
474                 SCEVExpander &Expander);
475 
476/// Struct to hold information about a partially invariant condition.
477struct IVConditionInfo {
478  /// Instructions that need to be duplicated and checked for the unswitching
479  /// condition.
480  SmallVector<Instruction *> InstToDuplicate;
481 
482  /// Constant to indicate for which value the condition is invariant.
483  Constant *KnownValue = nullptr;
4
←
Null pointer value stored to 'PartialIVInfo.KnownValue'→
484 
485  /// True if the partially invariant path is no-op (=does not have any
486  /// side-effects and no loop value is used outside the loop).
487  bool PathIsNoop = true;
488 
489  /// If the partially invariant path reaches a single exit block, ExitForPath
490  /// is set to that block. Otherwise it is nullptr.
491  BasicBlock *ExitForPath = nullptr;
492};
493 
494/// Check if the loop header has a conditional branch that is not
495/// loop-invariant, because it involves load instructions. If all paths from
496/// either the true or false successor to the header or loop exists do not
497/// modify the memory feeding the condition, perform 'partial unswitching'. That
498/// is, duplicate the instructions feeding the condition in the pre-header. Then
499/// unswitch on the duplicated condition. The condition is now known in the
500/// unswitched version for the 'invariant' path through the original loop.
501///
502/// If the branch condition of the header is partially invariant, return a pair
503/// containing the instructions to duplicate and a boolean Constant to update
504/// the condition in the loops created for the true or false successors.
505Optional<IVConditionInfo> hasPartialIVCondition(Loop &L, unsigned MSSAThreshold,
506                                                MemorySSA &MSSA, AAResults &AA);
507 
508} // end namespace llvm
509 
510#endif // LLVM_TRANSFORMS_UTILS_LOOPUTILS_H

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/ADT/SmallVector.h

→

1//===- llvm/ADT/SmallVector.h - 'Normally small' vectors --------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the SmallVector class.
10//
11//===----------------------------------------------------------------------===//

13#ifndef LLVM_ADT_SMALLVECTOR_H
14#define LLVM_ADT_SMALLVECTOR_H

16#include "llvm/ADT/iterator_range.h"
17#include "llvm/Support/Compiler.h"
18#include "llvm/Support/ErrorHandling.h"
19#include "llvm/Support/MemAlloc.h"
20#include "llvm/Support/type_traits.h"
21#include <algorithm>
22#include <cassert>
23#include <cstddef>
24#include <cstdlib>
25#include <cstring>
26#include <functional>
27#include <initializer_list>
28#include <iterator>
29#include <limits>
30#include <memory>
31#include <new>
32#include <type_traits>
33#include <utility>

35namespace llvm {

37/// This is all the stuff common to all SmallVectors.
38///
39/// The template parameter specifies the type which should be used to hold the
40/// Size and Capacity of the SmallVector, so it can be adjusted.
41/// Using 32 bit size is desirable to shrink the size of the SmallVector.
42/// Using 64 bit size is desirable for cases like SmallVector<char>, where a
43/// 32 bit size would limit the vector to ~4GB. SmallVectors are used for
44/// buffering bitcode output - which can exceed 4GB.
45template <class Size_T> class SmallVectorBase {
46protected:
void *BeginX;
Size_T Size = 0, Capacity;

/// The maximum value of the Size_T used.
static constexpr size_t SizeTypeMax() {
  return std::numeric_limits<Size_T>::max();
}

SmallVectorBase() = delete;
SmallVectorBase(void *FirstEl, size_t TotalCapacity)
    : BeginX(FirstEl), Capacity(TotalCapacity) {}

/// This is a helper for \a grow() that's out of line to reduce code
/// duplication.  This function will report a fatal error if it can't grow at
/// least to \p MinSize.
void *mallocForGrow(size_t MinSize, size_t TSize, size_t &NewCapacity);

/// This is an implementation of the grow() method which only works
/// on POD-like data types and is out of line to reduce code duplication.
/// This function will report a fatal error if it cannot increase capacity.
void grow_pod(void *FirstEl, size_t MinSize, size_t TSize);

69public:
size_t size() const { return Size; }
size_t capacity() const { return Capacity; }

LLVM_NODISCARD[[clang::warn_unused_result]] bool empty() const { return !Size; }
10
←
Assuming field 'Size' is not equal to 0→
11
←
Returning zero, which participates in a condition later→

/// Set the array size to \p N, which the current array must have enough
/// capacity for.
///
/// This does not construct or destroy any elements in the vector.
///
/// Clients can use this in conjunction with capacity() to write past the end
/// of the buffer when they know that more elements are available, and only
/// update the size later. This avoids the cost of value initializing elements
/// which will only be overwritten.
void set_size(size_t N) {
  assert(N <= capacity())((void)0);
  Size = N;
}
88};

90template <class T>
91using SmallVectorSizeType =
  typename std::conditional<sizeof(T) < 4 && sizeof(void *) >= 8, uint64_t,
                            uint32_t>::type;

95/// Figure out the offset of the first element.
96template <class T, typename = void> struct SmallVectorAlignmentAndSize {
alignas(SmallVectorBase<SmallVectorSizeType<T>>) char Base[sizeof(
    SmallVectorBase<SmallVectorSizeType<T>>)];
alignas(T) char FirstEl[sizeof(T)];
100};

102/// This is the part of SmallVectorTemplateBase which does not depend on whether
103/// the type T is a POD. The extra dummy template argument is used by ArrayRef
104/// to avoid unnecessarily requiring T to be complete.
105template <typename T, typename = void>
106class SmallVectorTemplateCommon
  : public SmallVectorBase<SmallVectorSizeType<T>> {
using Base = SmallVectorBase<SmallVectorSizeType<T>>;

/// Find the address of the first element.  For this pointer math to be valid
/// with small-size of 0 for T with lots of alignment, it's important that
/// SmallVectorStorage is properly-aligned even for small-size of 0.
void *getFirstEl() const {
  return const_cast<void *>(reinterpret_cast<const void *>(
      reinterpret_cast<const char *>(this) +
      offsetof(SmallVectorAlignmentAndSize<T>, FirstEl)__builtin_offsetof(SmallVectorAlignmentAndSize<T>, FirstEl
)));
}
// Space after 'FirstEl' is clobbered, do not add any instance vars after it.

120protected:
SmallVectorTemplateCommon(size_t Size) : Base(getFirstEl(), Size) {}

void grow_pod(size_t MinSize, size_t TSize) {
  Base::grow_pod(getFirstEl(), MinSize, TSize);
}

/// Return true if this is a smallvector which has not had dynamic
/// memory allocated for it.
bool isSmall() const { return this->BeginX == getFirstEl(); }

/// Put this vector in a state of being small.
void resetToSmall() {
  this->BeginX = getFirstEl();
  this->Size = this->Capacity = 0; // FIXME: Setting Capacity to 0 is suspect.
}

/// Return true if V is an internal reference to the given range.
bool isReferenceToRange(const void *V, const void *First, const void *Last) const {
  // Use std::less to avoid UB.
  std::less<> LessThan;
  return !LessThan(V, First) && LessThan(V, Last);
}

/// Return true if V is an internal reference to this vector.
bool isReferenceToStorage(const void *V) const {
  return isReferenceToRange(V, this->begin(), this->end());
}

/// Return true if First and Last form a valid (possibly empty) range in this
/// vector's storage.
bool isRangeInStorage(const void *First, const void *Last) const {
  // Use std::less to avoid UB.
  std::less<> LessThan;
  return !LessThan(First, this->begin()) && !LessThan(Last, First) &&
         !LessThan(this->end(), Last);
}

/// Return true unless Elt will be invalidated by resizing the vector to
/// NewSize.
bool isSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
  // Past the end.
  if (LLVM_LIKELY(!isReferenceToStorage(Elt))__builtin_expect((bool)(!isReferenceToStorage(Elt)), true))
    return true;

  // Return false if Elt will be destroyed by shrinking.
  if (NewSize <= this->size())
    return Elt < this->begin() + NewSize;

  // Return false if we need to grow.
  return NewSize <= this->capacity();
}

/// Check whether Elt will be invalidated by resizing the vector to NewSize.
void assertSafeToReferenceAfterResize(const void *Elt, size_t NewSize) {
  assert(isSafeToReferenceAfterResize(Elt, NewSize) &&((void)0)
         "Attempting to reference an element of the vector in an operation "((void)0)
         "that invalidates it")((void)0);
}

/// Check whether Elt will be invalidated by increasing the size of the
/// vector by N.
void assertSafeToAdd(const void *Elt, size_t N = 1) {
  this->assertSafeToReferenceAfterResize(Elt, this->size() + N);
}

/// Check whether any part of the range will be invalidated by clearing.
void assertSafeToReferenceAfterClear(const T *From, const T *To) {
  if (From == To)
    return;
  this->assertSafeToReferenceAfterResize(From, 0);
  this->assertSafeToReferenceAfterResize(To - 1, 0);
}
template <
    class ItTy,
    std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
                     bool> = false>
void assertSafeToReferenceAfterClear(ItTy, ItTy) {}

/// Check whether any part of the range will be invalidated by growing.
void assertSafeToAddRange(const T *From, const T *To) {
  if (From == To)
    return;
  this->assertSafeToAdd(From, To - From);
  this->assertSafeToAdd(To - 1, To - From);
}
template <
    class ItTy,
    std::enable_if_t<!std::is_same<std::remove_const_t<ItTy>, T *>::value,
                     bool> = false>
void assertSafeToAddRange(ItTy, ItTy) {}

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
template <class U>
static const T *reserveForParamAndGetAddressImpl(U *This, const T &Elt,
                                                 size_t N) {
  size_t NewSize = This->size() + N;
  if (LLVM_LIKELY(NewSize <= This->capacity())__builtin_expect((bool)(NewSize <= This->capacity()), true
))
    return &Elt;

  bool ReferencesStorage = false;
  int64_t Index = -1;
  if (!U::TakesParamByValue) {
    if (LLVM_UNLIKELY(This->isReferenceToStorage(&Elt))__builtin_expect((bool)(This->isReferenceToStorage(&Elt
)), false)) {
      ReferencesStorage = true;
      Index = &Elt - This->begin();
    }
  }
  This->grow(NewSize);
  return ReferencesStorage ? This->begin() + Index : &Elt;
}

233public:
using size_type = size_t;
using difference_type = ptrdiff_t;
using value_type = T;
using iterator = T *;
using const_iterator = const T *;

using const_reverse_iterator = std::reverse_iterator<const_iterator>;
using reverse_iterator = std::reverse_iterator<iterator>;

using reference = T &;
using const_reference = const T &;
using pointer = T *;
using const_pointer = const T *;

using Base::capacity;
using Base::empty;
using Base::size;

// forward iterator creation methods.
iterator begin() { return (iterator)this->BeginX; }
const_iterator begin() const { return (const_iterator)this->BeginX; }
iterator end() { return begin() + size(); }
const_iterator end() const { return begin() + size(); }

// reverse iterator creation methods.
reverse_iterator rbegin()            { return reverse_iterator(end()); }
const_reverse_iterator rbegin() const{ return const_reverse_iterator(end()); }
reverse_iterator rend()              { return reverse_iterator(begin()); }
const_reverse_iterator rend() const { return const_reverse_iterator(begin());}

size_type size_in_bytes() const { return size() * sizeof(T); }
size_type max_size() const {
  return std::min(this->SizeTypeMax(), size_type(-1) / sizeof(T));
}

size_t capacity_in_bytes() const { return capacity() * sizeof(T); }

/// Return a pointer to the vector's buffer, even if empty().
pointer data() { return pointer(begin()); }
/// Return a pointer to the vector's buffer, even if empty().
const_pointer data() const { return const_pointer(begin()); }

reference operator[](size_type idx) {
  assert(idx < size())((void)0);
  return begin()[idx];
}
const_reference operator[](size_type idx) const {
  assert(idx < size())((void)0);
  return begin()[idx];
}

reference front() {
  assert(!empty())((void)0);
  return begin()[0];
}
const_reference front() const {
  assert(!empty())((void)0);
  return begin()[0];
}

reference back() {
  assert(!empty())((void)0);
  return end()[-1];
}
const_reference back() const {
  assert(!empty())((void)0);
  return end()[-1];
}
302};

304/// SmallVectorTemplateBase<TriviallyCopyable = false> - This is where we put
305/// method implementations that are designed to work with non-trivial T's.
306///
307/// We approximate is_trivially_copyable with trivial move/copy construction and
308/// trivial destruction. While the standard doesn't specify that you're allowed
309/// copy these types with memcpy, there is no way for the type to observe this.
310/// This catches the important case of std::pair<POD, POD>, which is not
311/// trivially assignable.
312template <typename T, bool = (is_trivially_copy_constructible<T>::value) &&
                           (is_trivially_move_constructible<T>::value) &&
                           std::is_trivially_destructible<T>::value>
315class SmallVectorTemplateBase : public SmallVectorTemplateCommon<T> {
friend class SmallVectorTemplateCommon<T>;

318protected:
static constexpr bool TakesParamByValue = false;
using ValueParamT = const T &;

SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}

static void destroy_range(T *S, T *E) {
  while (S != E) {
    --E;
    E->~T();
  }
}

/// Move the range [I, E) into the uninitialized memory starting with "Dest",
/// constructing elements as needed.
template<typename It1, typename It2>
static void uninitialized_move(It1 I, It1 E, It2 Dest) {
  std::uninitialized_copy(std::make_move_iterator(I),
                          std::make_move_iterator(E), Dest);
}

/// Copy the range [I, E) onto the uninitialized memory starting with "Dest",
/// constructing elements as needed.
template<typename It1, typename It2>
static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
  std::uninitialized_copy(I, E, Dest);
}

/// Grow the allocated memory (without initializing new elements), doubling
/// the size of the allocated memory. Guarantees space for at least one more
/// element, or MinSize more elements if specified.
void grow(size_t MinSize = 0);

/// Create a new allocation big enough for \p MinSize and pass back its size
/// in \p NewCapacity. This is the first section of \a grow().
T *mallocForGrow(size_t MinSize, size_t &NewCapacity) {
  return static_cast<T *>(
      SmallVectorBase<SmallVectorSizeType<T>>::mallocForGrow(
          MinSize, sizeof(T), NewCapacity));
}

/// Move existing elements over to the new allocation \p NewElts, the middle
/// section of \a grow().
void moveElementsForGrow(T *NewElts);

/// Transfer ownership of the allocation, finishing up \a grow().
void takeAllocationForGrow(T *NewElts, size_t NewCapacity);

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
  return this->reserveForParamAndGetAddressImpl(this, Elt, N);
}

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
  return const_cast<T *>(
      this->reserveForParamAndGetAddressImpl(this, Elt, N));
}

static T &&forward_value_param(T &&V) { return std::move(V); }
static const T &forward_value_param(const T &V) { return V; }

void growAndAssign(size_t NumElts, const T &Elt) {
  // Grow manually in case Elt is an internal reference.
  size_t NewCapacity;
  T *NewElts = mallocForGrow(NumElts, NewCapacity);
  std::uninitialized_fill_n(NewElts, NumElts, Elt);
  this->destroy_range(this->begin(), this->end());
  takeAllocationForGrow(NewElts, NewCapacity);
  this->set_size(NumElts);
}

template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
  // Grow manually in case one of Args is an internal reference.
  size_t NewCapacity;
  T *NewElts = mallocForGrow(0, NewCapacity);
  ::new ((void *)(NewElts + this->size())) T(std::forward<ArgTypes>(Args)...);
  moveElementsForGrow(NewElts);
  takeAllocationForGrow(NewElts, NewCapacity);
  this->set_size(this->size() + 1);
  return this->back();
}

403public:
void push_back(const T &Elt) {
  const T *EltPtr = reserveForParamAndGetAddress(Elt);
  ::new ((void *)this->end()) T(*EltPtr);
  this->set_size(this->size() + 1);
}

void push_back(T &&Elt) {
  T *EltPtr = reserveForParamAndGetAddress(Elt);
  ::new ((void *)this->end()) T(::std::move(*EltPtr));
  this->set_size(this->size() + 1);
}

void pop_back() {
  this->set_size(this->size() - 1);
  this->end()->~T();
}
420};

422// Define this out-of-line to dissuade the C++ compiler from inlining it.
423template <typename T, bool TriviallyCopyable>
424void SmallVectorTemplateBase<T, TriviallyCopyable>::grow(size_t MinSize) {
size_t NewCapacity;
T *NewElts = mallocForGrow(MinSize, NewCapacity);
moveElementsForGrow(NewElts);
takeAllocationForGrow(NewElts, NewCapacity);
429}

431// Define this out-of-line to dissuade the C++ compiler from inlining it.
432template <typename T, bool TriviallyCopyable>
433void SmallVectorTemplateBase<T, TriviallyCopyable>::moveElementsForGrow(
  T *NewElts) {
// Move the elements over.
this->uninitialized_move(this->begin(), this->end(), NewElts);

// Destroy the original elements.
destroy_range(this->begin(), this->end());
440}

442// Define this out-of-line to dissuade the C++ compiler from inlining it.
443template <typename T, bool TriviallyCopyable>
444void SmallVectorTemplateBase<T, TriviallyCopyable>::takeAllocationForGrow(
  T *NewElts, size_t NewCapacity) {
// If this wasn't grown from the inline copy, deallocate the old space.
if (!this->isSmall())
  free(this->begin());

this->BeginX = NewElts;
this->Capacity = NewCapacity;
452}

454/// SmallVectorTemplateBase<TriviallyCopyable = true> - This is where we put
455/// method implementations that are designed to work with trivially copyable
456/// T's. This allows using memcpy in place of copy/move construction and
457/// skipping destruction.
458template <typename T>
459class SmallVectorTemplateBase<T, true> : public SmallVectorTemplateCommon<T> {
friend class SmallVectorTemplateCommon<T>;

462protected:
/// True if it's cheap enough to take parameters by value. Doing so avoids
/// overhead related to mitigations for reference invalidation.
static constexpr bool TakesParamByValue = sizeof(T) <= 2 * sizeof(void *);

/// Either const T& or T, depending on whether it's cheap enough to take
/// parameters by value.
using ValueParamT =
    typename std::conditional<TakesParamByValue, T, const T &>::type;

SmallVectorTemplateBase(size_t Size) : SmallVectorTemplateCommon<T>(Size) {}

// No need to do a destroy loop for POD's.
static void destroy_range(T *, T *) {}

/// Move the range [I, E) onto the uninitialized memory
/// starting with "Dest", constructing elements into it as needed.
template<typename It1, typename It2>
static void uninitialized_move(It1 I, It1 E, It2 Dest) {
  // Just do a copy.
  uninitialized_copy(I, E, Dest);
}

/// Copy the range [I, E) onto the uninitialized memory
/// starting with "Dest", constructing elements into it as needed.
template<typename It1, typename It2>
static void uninitialized_copy(It1 I, It1 E, It2 Dest) {
  // Arbitrary iterator types; just use the basic implementation.
  std::uninitialized_copy(I, E, Dest);
}

/// Copy the range [I, E) onto the uninitialized memory
/// starting with "Dest", constructing elements into it as needed.
template <typename T1, typename T2>
static void uninitialized_copy(
    T1 *I, T1 *E, T2 *Dest,
    std::enable_if_t<std::is_same<typename std::remove_const<T1>::type,
                                  T2>::value> * = nullptr) {
  // Use memcpy for PODs iterated by pointers (which includes SmallVector
  // iterators): std::uninitialized_copy optimizes to memmove, but we can
  // use memcpy here. Note that I and E are iterators and thus might be
  // invalid for memcpy if they are equal.
  if (I != E)
    memcpy(reinterpret_cast<void *>(Dest), I, (E - I) * sizeof(T));
}

/// Double the size of the allocated memory, guaranteeing space for at
/// least one more element or MinSize if specified.
void grow(size_t MinSize = 0) { this->grow_pod(MinSize, sizeof(T)); }

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
const T *reserveForParamAndGetAddress(const T &Elt, size_t N = 1) {
  return this->reserveForParamAndGetAddressImpl(this, Elt, N);
}

/// Reserve enough space to add one element, and return the updated element
/// pointer in case it was a reference to the storage.
T *reserveForParamAndGetAddress(T &Elt, size_t N = 1) {
  return const_cast<T *>(
      this->reserveForParamAndGetAddressImpl(this, Elt, N));
}

/// Copy \p V or return a reference, depending on \a ValueParamT.
static ValueParamT forward_value_param(ValueParamT V) { return V; }

void growAndAssign(size_t NumElts, T Elt) {
  // Elt has been copied in case it's an internal reference, side-stepping
  // reference invalidation problems without losing the realloc optimization.
  this->set_size(0);
  this->grow(NumElts);
  std::uninitialized_fill_n(this->begin(), NumElts, Elt);
  this->set_size(NumElts);
}

template <typename... ArgTypes> T &growAndEmplaceBack(ArgTypes &&... Args) {
  // Use push_back with a copy in case Args has an internal reference,
  // side-stepping reference invalidation problems without losing the realloc
  // optimization.
  push_back(T(std::forward<ArgTypes>(Args)...));
  return this->back();
}

545public:
void push_back(ValueParamT Elt) {
  const T *EltPtr = reserveForParamAndGetAddress(Elt);
  memcpy(reinterpret_cast<void *>(this->end()), EltPtr, sizeof(T));
  this->set_size(this->size() + 1);
}

void pop_back() { this->set_size(this->size() - 1); }
553};

555/// This class consists of common code factored out of the SmallVector class to
556/// reduce code duplication based on the SmallVector 'N' template parameter.
557template <typename T>
558class SmallVectorImpl : public SmallVectorTemplateBase<T> {
using SuperClass = SmallVectorTemplateBase<T>;

561public:
using iterator = typename SuperClass::iterator;
using const_iterator = typename SuperClass::const_iterator;
using reference = typename SuperClass::reference;
using size_type = typename SuperClass::size_type;

567protected:
using SmallVectorTemplateBase<T>::TakesParamByValue;
using ValueParamT = typename SuperClass::ValueParamT;

// Default ctor - Initialize to empty.
explicit SmallVectorImpl(unsigned N)
    : SmallVectorTemplateBase<T>(N) {}

575public:
SmallVectorImpl(const SmallVectorImpl &) = delete;

~SmallVectorImpl() {
  // Subclass has already destructed this vector's elements.
  // If this wasn't grown from the inline copy, deallocate the old space.
  if (!this->isSmall())
    free(this->begin());
}

void clear() {
  this->destroy_range(this->begin(), this->end());
  this->Size = 0;
}

590private:
template <bool ForOverwrite> void resizeImpl(size_type N) {
  if (N < this->size()) {
    this->pop_back_n(this->size() - N);
  } else if (N > this->size()) {
    this->reserve(N);
    for (auto I = this->end(), E = this->begin() + N; I != E; ++I)
      if (ForOverwrite)
        new (&*I) T;
      else
        new (&*I) T();
    this->set_size(N);
  }
}

605public:
void resize(size_type N) { resizeImpl<false>(N); }

/// Like resize, but \ref T is POD, the new values won't be initialized.
void resize_for_overwrite(size_type N) { resizeImpl<true>(N); }

void resize(size_type N, ValueParamT NV) {
  if (N == this->size())
    return;

  if (N < this->size()) {
    this->pop_back_n(this->size() - N);
    return;
  }

  // N > this->size(). Defer to append.
  this->append(N - this->size(), NV);
}

void reserve(size_type N) {
  if (this->capacity() < N)
    this->grow(N);
}

void pop_back_n(size_type NumItems) {
  assert(this->size() >= NumItems)((void)0);
  this->destroy_range(this->end() - NumItems, this->end());
  this->set_size(this->size() - NumItems);
}

LLVM_NODISCARD[[clang::warn_unused_result]] T pop_back_val() {
  T Result = ::std::move(this->back());
  this->pop_back();
  return Result;
}

void swap(SmallVectorImpl &RHS);

/// Add the specified range to the end of the SmallVector.
template <typename in_iter,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<in_iter>::iterator_category,
              std::input_iterator_tag>::value>>
void append(in_iter in_start, in_iter in_end) {
  this->assertSafeToAddRange(in_start, in_end);
  size_type NumInputs = std::distance(in_start, in_end);
  this->reserve(this->size() + NumInputs);
  this->uninitialized_copy(in_start, in_end, this->end());
  this->set_size(this->size() + NumInputs);
}

/// Append \p NumInputs copies of \p Elt to the end.
void append(size_type NumInputs, ValueParamT Elt) {
  const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumInputs);
  std::uninitialized_fill_n(this->end(), NumInputs, *EltPtr);
  this->set_size(this->size() + NumInputs);
}

void append(std::initializer_list<T> IL) {
  append(IL.begin(), IL.end());
}

void append(const SmallVectorImpl &RHS) { append(RHS.begin(), RHS.end()); }

void assign(size_type NumElts, ValueParamT Elt) {
  // Note that Elt could be an internal reference.
  if (NumElts > this->capacity()) {
    this->growAndAssign(NumElts, Elt);
    return;
  }

  // Assign over existing elements.
  std::fill_n(this->begin(), std::min(NumElts, this->size()), Elt);
  if (NumElts > this->size())
    std::uninitialized_fill_n(this->end(), NumElts - this->size(), Elt);
  else if (NumElts < this->size())
    this->destroy_range(this->begin() + NumElts, this->end());
  this->set_size(NumElts);
}

// FIXME: Consider assigning over existing elements, rather than clearing &
// re-initializing them - for all assign(...) variants.

template <typename in_iter,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<in_iter>::iterator_category,
              std::input_iterator_tag>::value>>
void assign(in_iter in_start, in_iter in_end) {
  this->assertSafeToReferenceAfterClear(in_start, in_end);
  clear();
  append(in_start, in_end);
}

void assign(std::initializer_list<T> IL) {
  clear();
  append(IL);
}

void assign(const SmallVectorImpl &RHS) { assign(RHS.begin(), RHS.end()); }

iterator erase(const_iterator CI) {
  // Just cast away constness because this is a non-const member function.
  iterator I = const_cast<iterator>(CI);

  assert(this->isReferenceToStorage(CI) && "Iterator to erase is out of bounds.")((void)0);

  iterator N = I;
  // Shift all elts down one.
  std::move(I+1, this->end(), I);
  // Drop the last elt.
  this->pop_back();
  return(N);
}

iterator erase(const_iterator CS, const_iterator CE) {
  // Just cast away constness because this is a non-const member function.
  iterator S = const_cast<iterator>(CS);
  iterator E = const_cast<iterator>(CE);

  assert(this->isRangeInStorage(S, E) && "Range to erase is out of bounds.")((void)0);

  iterator N = S;
  // Shift all elts down.
  iterator I = std::move(E, this->end(), S);
  // Drop the last elts.
  this->destroy_range(I, this->end());
  this->set_size(I - this->begin());
  return(N);
}

735private:
template <class ArgType> iterator insert_one_impl(iterator I, ArgType &&Elt) {
  // Callers ensure that ArgType is derived from T.
  static_assert(
      std::is_same<std::remove_const_t<std::remove_reference_t<ArgType>>,
                   T>::value,
      "ArgType must be derived from T!");

  if (I == this->end()) {  // Important special case for empty vector.
    this->push_back(::std::forward<ArgType>(Elt));
    return this->end()-1;
  }

  assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);

  // Grow if necessary.
  size_t Index = I - this->begin();
  std::remove_reference_t<ArgType> *EltPtr =
      this->reserveForParamAndGetAddress(Elt);
  I = this->begin() + Index;

  ::new ((void*) this->end()) T(::std::move(this->back()));
  // Push everything else over.
  std::move_backward(I, this->end()-1, this->end());
  this->set_size(this->size() + 1);

  // If we just moved the element we're inserting, be sure to update
  // the reference (never happens if TakesParamByValue).
  static_assert(!TakesParamByValue || std::is_same<ArgType, T>::value,
                "ArgType must be 'T' when taking by value!");
  if (!TakesParamByValue && this->isReferenceToRange(EltPtr, I, this->end()))
    ++EltPtr;

  *I = ::std::forward<ArgType>(*EltPtr);
  return I;
}

772public:
iterator insert(iterator I, T &&Elt) {
  return insert_one_impl(I, this->forward_value_param(std::move(Elt)));
}

iterator insert(iterator I, const T &Elt) {
  return insert_one_impl(I, this->forward_value_param(Elt));
}

iterator insert(iterator I, size_type NumToInsert, ValueParamT Elt) {
  // Convert iterator to elt# to avoid invalidating iterator when we reserve()
  size_t InsertElt = I - this->begin();

  if (I == this->end()) {  // Important special case for empty vector.
    append(NumToInsert, Elt);
    return this->begin()+InsertElt;
  }

  assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);

  // Ensure there is enough space, and get the (maybe updated) address of
  // Elt.
  const T *EltPtr = this->reserveForParamAndGetAddress(Elt, NumToInsert);

  // Uninvalidate the iterator.
  I = this->begin()+InsertElt;

  // If there are more elements between the insertion point and the end of the
  // range than there are being inserted, we can use a simple approach to
  // insertion.  Since we already reserved space, we know that this won't
  // reallocate the vector.
  if (size_t(this->end()-I) >= NumToInsert) {
    T *OldEnd = this->end();
    append(std::move_iterator<iterator>(this->end() - NumToInsert),
           std::move_iterator<iterator>(this->end()));

    // Copy the existing elements that get replaced.
    std::move_backward(I, OldEnd-NumToInsert, OldEnd);

    // If we just moved the element we're inserting, be sure to update
    // the reference (never happens if TakesParamByValue).
    if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
      EltPtr += NumToInsert;

    std::fill_n(I, NumToInsert, *EltPtr);
    return I;
  }

  // Otherwise, we're inserting more elements than exist already, and we're
  // not inserting at the end.

  // Move over the elements that we're about to overwrite.
  T *OldEnd = this->end();
  this->set_size(this->size() + NumToInsert);
  size_t NumOverwritten = OldEnd-I;
  this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);

  // If we just moved the element we're inserting, be sure to update
  // the reference (never happens if TakesParamByValue).
  if (!TakesParamByValue && I <= EltPtr && EltPtr < this->end())
    EltPtr += NumToInsert;

  // Replace the overwritten part.
  std::fill_n(I, NumOverwritten, *EltPtr);

  // Insert the non-overwritten middle part.
  std::uninitialized_fill_n(OldEnd, NumToInsert - NumOverwritten, *EltPtr);
  return I;
}

template <typename ItTy,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<ItTy>::iterator_category,
              std::input_iterator_tag>::value>>
iterator insert(iterator I, ItTy From, ItTy To) {
  // Convert iterator to elt# to avoid invalidating iterator when we reserve()
  size_t InsertElt = I - this->begin();

  if (I == this->end()) {  // Important special case for empty vector.
    append(From, To);
    return this->begin()+InsertElt;
  }

  assert(this->isReferenceToStorage(I) && "Insertion iterator is out of bounds.")((void)0);

  // Check that the reserve that follows doesn't invalidate the iterators.
  this->assertSafeToAddRange(From, To);

  size_t NumToInsert = std::distance(From, To);

  // Ensure there is enough space.
  reserve(this->size() + NumToInsert);

  // Uninvalidate the iterator.
  I = this->begin()+InsertElt;

  // If there are more elements between the insertion point and the end of the
  // range than there are being inserted, we can use a simple approach to
  // insertion.  Since we already reserved space, we know that this won't
  // reallocate the vector.
  if (size_t(this->end()-I) >= NumToInsert) {
    T *OldEnd = this->end();
    append(std::move_iterator<iterator>(this->end() - NumToInsert),
           std::move_iterator<iterator>(this->end()));

    // Copy the existing elements that get replaced.
    std::move_backward(I, OldEnd-NumToInsert, OldEnd);

    std::copy(From, To, I);
    return I;
  }

  // Otherwise, we're inserting more elements than exist already, and we're
  // not inserting at the end.

  // Move over the elements that we're about to overwrite.
  T *OldEnd = this->end();
  this->set_size(this->size() + NumToInsert);
  size_t NumOverwritten = OldEnd-I;
  this->uninitialized_move(I, OldEnd, this->end()-NumOverwritten);

  // Replace the overwritten part.
  for (T *J = I; NumOverwritten > 0; --NumOverwritten) {
    *J = *From;
    ++J; ++From;
  }

  // Insert the non-overwritten middle part.
  this->uninitialized_copy(From, To, OldEnd);
  return I;
}

void insert(iterator I, std::initializer_list<T> IL) {
  insert(I, IL.begin(), IL.end());
}

template <typename... ArgTypes> reference emplace_back(ArgTypes &&... Args) {
  if (LLVM_UNLIKELY(this->size() >= this->capacity())__builtin_expect((bool)(this->size() >= this->capacity
()), false))
    return this->growAndEmplaceBack(std::forward<ArgTypes>(Args)...);

  ::new ((void *)this->end()) T(std::forward<ArgTypes>(Args)...);
  this->set_size(this->size() + 1);
  return this->back();
}

SmallVectorImpl &operator=(const SmallVectorImpl &RHS);

SmallVectorImpl &operator=(SmallVectorImpl &&RHS);

bool operator==(const SmallVectorImpl &RHS) const {
  if (this->size() != RHS.size()) return false;
  return std::equal(this->begin(), this->end(), RHS.begin());
}
bool operator!=(const SmallVectorImpl &RHS) const {
  return !(*this == RHS);
}

bool operator<(const SmallVectorImpl &RHS) const {
  return std::lexicographical_compare(this->begin(), this->end(),
                                      RHS.begin(), RHS.end());
}
933};

935template <typename T>
936void SmallVectorImpl<T>::swap(SmallVectorImpl<T> &RHS) {
if (this == &RHS) return;

// We can only avoid copying elements if neither vector is small.
if (!this->isSmall() && !RHS.isSmall()) {
  std::swap(this->BeginX, RHS.BeginX);
  std::swap(this->Size, RHS.Size);
  std::swap(this->Capacity, RHS.Capacity);
  return;
}
this->reserve(RHS.size());
RHS.reserve(this->size());

// Swap the shared elements.
size_t NumShared = this->size();
if (NumShared > RHS.size()) NumShared = RHS.size();
for (size_type i = 0; i != NumShared; ++i)
  std::swap((*this)[i], RHS[i]);

// Copy over the extra elts.
if (this->size() > RHS.size()) {
  size_t EltDiff = this->size() - RHS.size();
  this->uninitialized_copy(this->begin()+NumShared, this->end(), RHS.end());
  RHS.set_size(RHS.size() + EltDiff);
  this->destroy_range(this->begin()+NumShared, this->end());
  this->set_size(NumShared);
} else if (RHS.size() > this->size()) {
  size_t EltDiff = RHS.size() - this->size();
  this->uninitialized_copy(RHS.begin()+NumShared, RHS.end(), this->end());
  this->set_size(this->size() + EltDiff);
  this->destroy_range(RHS.begin()+NumShared, RHS.end());
  RHS.set_size(NumShared);
}
969}

971template <typename T>
972SmallVectorImpl<T> &SmallVectorImpl<T>::
operator=(const SmallVectorImpl<T> &RHS) {
// Avoid self-assignment.
if (this == &RHS) return *this;

// If we already have sufficient space, assign the common elements, then
// destroy any excess.
size_t RHSSize = RHS.size();
size_t CurSize = this->size();
if (CurSize >= RHSSize) {
  // Assign common elements.
  iterator NewEnd;
  if (RHSSize)
    NewEnd = std::copy(RHS.begin(), RHS.begin()+RHSSize, this->begin());
  else
    NewEnd = this->begin();

  // Destroy excess elements.
  this->destroy_range(NewEnd, this->end());

  // Trim.
  this->set_size(RHSSize);
  return *this;
}

// If we have to grow to have enough elements, destroy the current elements.
// This allows us to avoid copying them during the grow.
// FIXME: don't do this if they're efficiently moveable.
if (this->capacity() < RHSSize) {
  // Destroy current elements.
  this->clear();
  CurSize = 0;
  this->grow(RHSSize);
} else if (CurSize) {
  // Otherwise, use assignment for the already-constructed elements.
  std::copy(RHS.begin(), RHS.begin()+CurSize, this->begin());
}

// Copy construct the new elements in place.
this->uninitialized_copy(RHS.begin()+CurSize, RHS.end(),
                         this->begin()+CurSize);

// Set end.
this->set_size(RHSSize);
return *this;
1017}

1019template <typename T>
1020SmallVectorImpl<T> &SmallVectorImpl<T>::operator=(SmallVectorImpl<T> &&RHS) {
// Avoid self-assignment.
if (this == &RHS) return *this;

// If the RHS isn't small, clear this vector and then steal its buffer.
if (!RHS.isSmall()) {
  this->destroy_range(this->begin(), this->end());
  if (!this->isSmall()) free(this->begin());
  this->BeginX = RHS.BeginX;
  this->Size = RHS.Size;
  this->Capacity = RHS.Capacity;
  RHS.resetToSmall();
  return *this;
}

// If we already have sufficient space, assign the common elements, then
// destroy any excess.
size_t RHSSize = RHS.size();
size_t CurSize = this->size();
if (CurSize >= RHSSize) {
  // Assign common elements.
  iterator NewEnd = this->begin();
  if (RHSSize)
    NewEnd = std::move(RHS.begin(), RHS.end(), NewEnd);

  // Destroy excess elements and trim the bounds.
  this->destroy_range(NewEnd, this->end());
  this->set_size(RHSSize);

  // Clear the RHS.
  RHS.clear();

  return *this;
}

// If we have to grow to have enough elements, destroy the current elements.
// This allows us to avoid copying them during the grow.
// FIXME: this may not actually make any sense if we can efficiently move
// elements.
if (this->capacity() < RHSSize) {
  // Destroy current elements.
  this->clear();
  CurSize = 0;
  this->grow(RHSSize);
} else if (CurSize) {
  // Otherwise, use assignment for the already-constructed elements.
  std::move(RHS.begin(), RHS.begin()+CurSize, this->begin());
}

// Move-construct the new elements in place.
this->uninitialized_move(RHS.begin()+CurSize, RHS.end(),
                         this->begin()+CurSize);

// Set end.
this->set_size(RHSSize);

RHS.clear();
return *this;
1078}

1080/// Storage for the SmallVector elements.  This is specialized for the N=0 case
1081/// to avoid allocating unnecessary storage.
1082template <typename T, unsigned N>
1083struct SmallVectorStorage {
alignas(T) char InlineElts[N * sizeof(T)];
1085};

1087/// We need the storage to be properly aligned even for small-size of 0 so that
1088/// the pointer math in \a SmallVectorTemplateCommon::getFirstEl() is
1089/// well-defined.
1090template <typename T> struct alignas(T) SmallVectorStorage<T, 0> {};

1092/// Forward declaration of SmallVector so that
1093/// calculateSmallVectorDefaultInlinedElements can reference
1094/// `sizeof(SmallVector<T, 0>)`.
1095template <typename T, unsigned N> class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector;

1097/// Helper class for calculating the default number of inline elements for
1098/// `SmallVector<T>`.
1099///
1100/// This should be migrated to a constexpr function when our minimum
1101/// compiler support is enough for multi-statement constexpr functions.
1102template <typename T> struct CalculateSmallVectorDefaultInlinedElements {
// Parameter controlling the default number of inlined elements
// for `SmallVector<T>`.
//
// The default number of inlined elements ensures that
// 1. There is at least one inlined element.
// 2. `sizeof(SmallVector<T>) <= kPreferredSmallVectorSizeof` unless
// it contradicts 1.
static constexpr size_t kPreferredSmallVectorSizeof = 64;

// static_assert that sizeof(T) is not "too big".
//
// Because our policy guarantees at least one inlined element, it is possible
// for an arbitrarily large inlined element to allocate an arbitrarily large
// amount of inline storage. We generally consider it an antipattern for a
// SmallVector to allocate an excessive amount of inline storage, so we want
// to call attention to these cases and make sure that users are making an
// intentional decision if they request a lot of inline storage.
//
// We want this assertion to trigger in pathological cases, but otherwise
// not be too easy to hit. To accomplish that, the cutoff is actually somewhat
// larger than kPreferredSmallVectorSizeof (otherwise,
// `SmallVector<SmallVector<T>>` would be one easy way to trip it, and that
// pattern seems useful in practice).
//
// One wrinkle is that this assertion is in theory non-portable, since
// sizeof(T) is in general platform-dependent. However, we don't expect this
// to be much of an issue, because most LLVM development happens on 64-bit
// hosts, and therefore sizeof(T) is expected to *decrease* when compiled for
// 32-bit hosts, dodging the issue. The reverse situation, where development
// happens on a 32-bit host and then fails due to sizeof(T) *increasing* on a
// 64-bit host, is expected to be very rare.
static_assert(
    sizeof(T) <= 256,
    "You are trying to use a default number of inlined elements for "
    "`SmallVector<T>` but `sizeof(T)` is really big! Please use an "
    "explicit number of inlined elements with `SmallVector<T, N>` to make "
    "sure you really want that much inline storage.");

// Discount the size of the header itself when calculating the maximum inline
// bytes.
static constexpr size_t PreferredInlineBytes =
    kPreferredSmallVectorSizeof - sizeof(SmallVector<T, 0>);
static constexpr size_t NumElementsThatFit = PreferredInlineBytes / sizeof(T);
static constexpr size_t value =
    NumElementsThatFit == 0 ? 1 : NumElementsThatFit;
1148};

1150/// This is a 'vector' (really, a variable-sized array), optimized
1151/// for the case when the array is small.  It contains some number of elements
1152/// in-place, which allows it to avoid heap allocation when the actual number of
1153/// elements is below that threshold.  This allows normal "small" cases to be
1154/// fast without losing generality for large inputs.
1155///
1156/// \note
1157/// In the absence of a well-motivated choice for the number of inlined
1158/// elements \p N, it is recommended to use \c SmallVector<T> (that is,
1159/// omitting the \p N). This will choose a default number of inlined elements
1160/// reasonable for allocation on the stack (for example, trying to keep \c
1161/// sizeof(SmallVector<T>) around 64 bytes).
1162///
1163/// \warning This does not attempt to be exception safe.
1164///
1165/// \see https://llvm.org/docs/ProgrammersManual.html#llvm-adt-smallvector-h
1166template <typename T,
        unsigned N = CalculateSmallVectorDefaultInlinedElements<T>::value>
1168class LLVM_GSL_OWNER[[gsl::Owner]] SmallVector : public SmallVectorImpl<T>,
                                 SmallVectorStorage<T, N> {
1170public:
SmallVector() : SmallVectorImpl<T>(N) {}

~SmallVector() {
  // Destroy the constructed elements in the vector.
  this->destroy_range(this->begin(), this->end());
}

explicit SmallVector(size_t Size, const T &Value = T())
  : SmallVectorImpl<T>(N) {
  this->assign(Size, Value);
}

template <typename ItTy,
          typename = std::enable_if_t<std::is_convertible<
              typename std::iterator_traits<ItTy>::iterator_category,
              std::input_iterator_tag>::value>>
SmallVector(ItTy S, ItTy E) : SmallVectorImpl<T>(N) {
  this->append(S, E);
}

template <typename RangeTy>
explicit SmallVector(const iterator_range<RangeTy> &R)
    : SmallVectorImpl<T>(N) {
  this->append(R.begin(), R.end());
}

SmallVector(std::initializer_list<T> IL) : SmallVectorImpl<T>(N) {
  this->assign(IL);
}

SmallVector(const SmallVector &RHS) : SmallVectorImpl<T>(N) {
  if (!RHS.empty())
    SmallVectorImpl<T>::operator=(RHS);
}

SmallVector &operator=(const SmallVector &RHS) {
  SmallVectorImpl<T>::operator=(RHS);
  return *this;
}

SmallVector(SmallVector &&RHS) : SmallVectorImpl<T>(N) {
  if (!RHS.empty())
    SmallVectorImpl<T>::operator=(::std::move(RHS));
}

SmallVector(SmallVectorImpl<T> &&RHS) : SmallVectorImpl<T>(N) {
  if (!RHS.empty())
    SmallVectorImpl<T>::operator=(::std::move(RHS));
}

SmallVector &operator=(SmallVector &&RHS) {
  SmallVectorImpl<T>::operator=(::std::move(RHS));
  return *this;
}

SmallVector &operator=(SmallVectorImpl<T> &&RHS) {
  SmallVectorImpl<T>::operator=(::std::move(RHS));
  return *this;
}

SmallVector &operator=(std::initializer_list<T> IL) {
  this->assign(IL);
  return *this;
}
1235};

1237template <typename T, unsigned N>
1238inline size_t capacity_in_bytes(const SmallVector<T, N> &X) {
return X.capacity_in_bytes();
1240}

1242/// Given a range of type R, iterate the entire range and return a
1243/// SmallVector with elements of the vector.  This is useful, for example,
1244/// when you want to iterate a range and then sort the results.
1245template <unsigned Size, typename R>
1246SmallVector<typename std::remove_const<typename std::remove_reference<
              decltype(*std::begin(std::declval<R &>()))>::type>::type,
          Size>
1249to_vector(R &&Range) {
return {std::begin(Range), std::end(Range)};
1251}

1253} // end namespace llvm

1255namespace std {

/// Implement std::swap in terms of SmallVector swap.
template<typename T>
inline void
swap(llvm::SmallVectorImpl<T> &LHS, llvm::SmallVectorImpl<T> &RHS) {
  LHS.swap(RHS);
}

/// Implement std::swap in terms of SmallVector swap.
template<typename T, unsigned N>
inline void
swap(llvm::SmallVector<T, N> &LHS, llvm::SmallVector<T, N> &RHS) {
  LHS.swap(RHS);
}

1271} // end namespace std

1273#endif // LLVM_ADT_SMALLVECTOR_H

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/Analysis/CFG.h

→

1//===-- Analysis/CFG.h - BasicBlock Analyses --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This family of functions performs analyses on basic blocks, and instructions
10// contained within basic blocks.
11//
12//===----------------------------------------------------------------------===//
13 
14#ifndef LLVM_ANALYSIS_CFG_H
15#define LLVM_ANALYSIS_CFG_H
16 
17#include "llvm/ADT/GraphTraits.h"
18#include "llvm/ADT/SmallPtrSet.h"
19#include <utility>
20 
21namespace llvm {
22 
23class BasicBlock;
24class DominatorTree;
25class Function;
26class Instruction;
27class LoopInfo;
28template <typename T> class SmallVectorImpl;
29 
30/// Analyze the specified function to find all of the loop backedges in the
31/// function and return them.  This is a relatively cheap (compared to
32/// computing dominators and loop info) analysis.
33///
34/// The output is added to Result, as pairs of <from,to> edge info.
35void FindFunctionBackedges(
36    const Function &F,
37    SmallVectorImpl<std::pair<const BasicBlock *, const BasicBlock *> > &
38        Result);
39 
40/// Search for the specified successor of basic block BB and return its position
41/// in the terminator instruction's list of successors.  It is an error to call
42/// this with a block that is not a successor.
43unsigned GetSuccessorNumber(const BasicBlock *BB, const BasicBlock *Succ);
44 
45/// Return true if the specified edge is a critical edge. Critical edges are
46/// edges from a block with multiple successors to a block with multiple
47/// predecessors.
48///
49bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
50                    bool AllowIdenticalEdges = false);
51bool isCriticalEdge(const Instruction *TI, const BasicBlock *Succ,
52                    bool AllowIdenticalEdges = false);
53 
54/// Determine whether instruction 'To' is reachable from 'From', without passing
55/// through any blocks in ExclusionSet, returning true if uncertain.
56///
57/// Determine whether there is a path from From to To within a single function.
58/// Returns false only if we can prove that once 'From' has been executed then
59/// 'To' can not be executed. Conservatively returns true.
60///
61/// This function is linear with respect to the number of blocks in the CFG,
62/// walking down successors from From to reach To, with a fixed threshold.
63/// Using DT or LI allows us to answer more quickly. LI reduces the cost of
64/// an entire loop of any number of blocks to be the same as the cost of a
65/// single block. DT reduces the cost by allowing the search to terminate when
66/// we find a block that dominates the block containing 'To'. DT is most useful
67/// on branchy code but not loops, and LI is most useful on code with loops but
68/// does not help on branchy code outside loops.
69bool isPotentiallyReachable(
70    const Instruction *From, const Instruction *To,
71    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
72    const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
73 
74/// Determine whether block 'To' is reachable from 'From', returning
75/// true if uncertain.
76///
77/// Determine whether there is a path from From to To within a single function.
78/// Returns false only if we can prove that once 'From' has been reached then
79/// 'To' can not be executed. Conservatively returns true.
80bool isPotentiallyReachable(
81    const BasicBlock *From, const BasicBlock *To,
82    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
83    const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
84 
85/// Determine whether there is at least one path from a block in
86/// 'Worklist' to 'StopBB' without passing through any blocks in
87/// 'ExclusionSet', returning true if uncertain.
88///
89/// Determine whether there is a path from at least one block in Worklist to
90/// StopBB within a single function without passing through any of the blocks
91/// in 'ExclusionSet'. Returns false only if we can prove that once any block
92/// in 'Worklist' has been reached then 'StopBB' can not be executed.
93/// Conservatively returns true.
94bool isPotentiallyReachableFromMany(
95    SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
96    const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
97    const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
98 
99/// Return true if the control flow in \p RPOTraversal is irreducible.
100///
101/// This is a generic implementation to detect CFG irreducibility based on loop
102/// info analysis. It can be used for any kind of CFG (Loop, MachineLoop,
103/// Function, MachineFunction, etc.) by providing an RPO traversal (\p
104/// RPOTraversal) and the loop info analysis (\p LI) of the CFG. This utility
105/// function is only recommended when loop info analysis is available. If loop
106/// info analysis isn't available, please, don't compute it explicitly for this
107/// purpose. There are more efficient ways to detect CFG irreducibility that
108/// don't require recomputing loop info analysis (e.g., T1/T2 or Tarjan's
109/// algorithm).
110///
111/// Requirements:
112///   1) GraphTraits must be implemented for NodeT type. It is used to access
113///      NodeT successors.
114//    2) \p RPOTraversal must be a valid reverse post-order traversal of the
115///      target CFG with begin()/end() iterator interfaces.
116///   3) \p LI must be a valid LoopInfoBase that contains up-to-date loop
117///      analysis information of the CFG.
118///
119/// This algorithm uses the information about reducible loop back-edges already
120/// computed in \p LI. When a back-edge is found during the RPO traversal, the
121/// algorithm checks whether the back-edge is one of the reducible back-edges in
122/// loop info. If it isn't, the CFG is irreducible. For example, for the CFG
123/// below (canonical irreducible graph) loop info won't contain any loop, so the
124/// algorithm will return that the CFG is irreducible when checking the B <-
125/// -> C back-edge.
126///
127/// (A->B, A->C, B->C, C->B, C->D)
128///    A
129///  /   \
130/// B<- ->C
131///       |
132///       D
133///
134template <class NodeT, class RPOTraversalT, class LoopInfoT,
135          class GT = GraphTraits<NodeT>>
136bool containsIrreducibleCFG(RPOTraversalT &RPOTraversal, const LoopInfoT &LI) {
137  /// Check whether the edge (\p Src, \p Dst) is a reducible loop backedge
138  /// according to LI. I.e., check if there exists a loop that contains Src and
139  /// where Dst is the loop header.
140  auto isProperBackedge = [&](NodeT Src, NodeT Dst) {
141    for (const auto *Lp = LI.getLoopFor(Src); Lp; Lp = Lp->getParentLoop()) {
142      if (Lp->getHeader() == Dst)
143        return true;
144    }
145    return false;
146  };
147 
148  SmallPtrSet<NodeT, 32> Visited;
149  for (NodeT Node : RPOTraversal) {
150    Visited.insert(Node);
151    for (NodeT Succ : make_range(GT::child_begin(Node), GT::child_end(Node))) {
152      // Succ hasn't been visited yet
153      if (!Visited.count(Succ))
154        continue;
155      // We already visited Succ, thus Node->Succ must be a backedge. Check that
156      // the head matches what we have in the loop information. Otherwise, we
157      // have an irreducible graph.
158      if (!isProperBackedge(Node, Succ))
159        return true;
160    }
161  }
162 
163  return false;
15
←
Returning zero, which participates in a condition later→
164}
165} // End llvm namespace
166 
167#endif

←

/usr/src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/include/llvm/IR/PatternMatch.h

1//===- PatternMatch.h - Match on the LLVM IR --------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file provides a simple and efficient mechanism for performing general
10// tree-based pattern matches on the LLVM IR. The power of these routines is
11// that it allows you to write concise patterns that are expressive and easy to
12// understand. The other major advantage of this is that it allows you to
13// trivially capture/bind elements in the pattern to variables. For example,
14// you can do something like this:
15//
16//  Value *Exp = ...
17//  Value *X, *Y;  ConstantInt *C1, *C2;      // (X & C1) | (Y & C2)
18//  if (match(Exp, m_Or(m_And(m_Value(X), m_ConstantInt(C1)),
19//                      m_And(m_Value(Y), m_ConstantInt(C2))))) {
20//    ... Pattern is matched and variables are bound ...
21//  }
22//
23// This is primarily useful to things like the instruction combiner, but can
24// also be useful for static analysis tools or code generators.
25//
26//===----------------------------------------------------------------------===//
27 
28#ifndef LLVM_IR_PATTERNMATCH_H
29#define LLVM_IR_PATTERNMATCH_H
30 
31#include "llvm/ADT/APFloat.h"
32#include "llvm/ADT/APInt.h"
33#include "llvm/IR/Constant.h"
34#include "llvm/IR/Constants.h"
35#include "llvm/IR/DataLayout.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/IntrinsicInst.h"
40#include "llvm/IR/Intrinsics.h"
41#include "llvm/IR/Operator.h"
42#include "llvm/IR/Value.h"
43#include "llvm/Support/Casting.h"
44#include <cstdint>
45 
46namespace llvm {
47namespace PatternMatch {
48 
49template <typename Val, typename Pattern> bool match(Val *V, const Pattern &P) {
50  return const_cast<Pattern &>(P).match(V);
33
←
Calling 'LogicalOp_match::match'→
37
←
Returning from 'LogicalOp_match::match'→
38
←
Returning zero, which participates in a condition later→
42
←
Calling 'LogicalOp_match::match'→
46
←
Returning from 'LogicalOp_match::match'→
47
←
Returning zero, which participates in a condition later→
51}
52 
53template <typename Pattern> bool match(ArrayRef<int> Mask, const Pattern &P) {
54  return const_cast<Pattern &>(P).match(Mask);
55}
56 
57template <typename SubPattern_t> struct OneUse_match {
58  SubPattern_t SubPattern;
59 
60  OneUse_match(const SubPattern_t &SP) : SubPattern(SP) {}
61 
62  template <typename OpTy> bool match(OpTy *V) {
63    return V->hasOneUse() && SubPattern.match(V);
64  }
65};
66 
67template <typename T> inline OneUse_match<T> m_OneUse(const T &SubPattern) {
68  return SubPattern;
69}
70 
71template <typename Class> struct class_match {
72  template <typename ITy> bool match(ITy *V) { return isa<Class>(V); }
73};
74 
75/// Match an arbitrary value and ignore it.
76inline class_match<Value> m_Value() { return class_match<Value>(); }
77 
78/// Match an arbitrary unary operation and ignore it.
79inline class_match<UnaryOperator> m_UnOp() {
80  return class_match<UnaryOperator>();
81}
82 
83/// Match an arbitrary binary operation and ignore it.
84inline class_match<BinaryOperator> m_BinOp() {
85  return class_match<BinaryOperator>();
86}
87 
88/// Matches any compare instruction and ignore it.
89inline class_match<CmpInst> m_Cmp() { return class_match<CmpInst>(); }
90 
91struct undef_match {
92  static bool check(const Value *V) {
93    if (isa<UndefValue>(V))
94      return true;
95 
96    const auto *CA = dyn_cast<ConstantAggregate>(V);
97    if (!CA)
98      return false;
99 
100    SmallPtrSet<const ConstantAggregate *, 8> Seen;
101    SmallVector<const ConstantAggregate *, 8> Worklist;
102 
103    // Either UndefValue, PoisonValue, or an aggregate that only contains
104    // these is accepted by matcher.
105    // CheckValue returns false if CA cannot satisfy this constraint.
106    auto CheckValue = [&](const ConstantAggregate *CA) {
107      for (const Value *Op : CA->operand_values()) {
108        if (isa<UndefValue>(Op))
109          continue;
110 
111        const auto *CA = dyn_cast<ConstantAggregate>(Op);
112        if (!CA)
113          return false;
114        if (Seen.insert(CA).second)
115          Worklist.emplace_back(CA);
116      }
117 
118      return true;
119    };
120 
121    if (!CheckValue(CA))
122      return false;
123 
124    while (!Worklist.empty()) {
125      if (!CheckValue(Worklist.pop_back_val()))
126        return false;
127    }
128    return true;
129  }
130  template <typename ITy> bool match(ITy *V) { return check(V); }
131};
132 
133/// Match an arbitrary undef constant. This matches poison as well.
134/// If this is an aggregate and contains a non-aggregate element that is
135/// neither undef nor poison, the aggregate is not matched.
136inline auto m_Undef() { return undef_match(); }
137 
138/// Match an arbitrary poison constant.
139inline class_match<PoisonValue> m_Poison() { return class_match<PoisonValue>(); }
140 
141/// Match an arbitrary Constant and ignore it.
142inline class_match<Constant> m_Constant() { return class_match<Constant>(); }
143 
144/// Match an arbitrary ConstantInt and ignore it.
145inline class_match<ConstantInt> m_ConstantInt() {
146  return class_match<ConstantInt>();
147}
148 
149/// Match an arbitrary ConstantFP and ignore it.
150inline class_match<ConstantFP> m_ConstantFP() {
151  return class_match<ConstantFP>();
152}
153 
154/// Match an arbitrary ConstantExpr and ignore it.
155inline class_match<ConstantExpr> m_ConstantExpr() {
156  return class_match<ConstantExpr>();
157}
158 
159/// Match an arbitrary basic block value and ignore it.
160inline class_match<BasicBlock> m_BasicBlock() {
161  return class_match<BasicBlock>();
162}
163 
164/// Inverting matcher
165template <typename Ty> struct match_unless {
166  Ty M;
167 
168  match_unless(const Ty &Matcher) : M(Matcher) {}
169 
170  template <typename ITy> bool match(ITy *V) { return !M.match(V); }
171};
172 
173/// Match if the inner matcher does *NOT* match.
174template <typename Ty> inline match_unless<Ty> m_Unless(const Ty &M) {
175  return match_unless<Ty>(M);
176}
177 
178/// Matching combinators
179template <typename LTy, typename RTy> struct match_combine_or {
180  LTy L;
181  RTy R;
182 
183  match_combine_or(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
184 
185  template <typename ITy> bool match(ITy *V) {
186    if (L.match(V))
187      return true;
188    if (R.match(V))
189      return true;
190    return false;
191  }
192};
193 
194template <typename LTy, typename RTy> struct match_combine_and {
195  LTy L;
196  RTy R;
197 
198  match_combine_and(const LTy &Left, const RTy &Right) : L(Left), R(Right) {}
199 
200  template <typename ITy> bool match(ITy *V) {
201    if (L.match(V))
202      if (R.match(V))
203        return true;
204    return false;
205  }
206};
207 
208/// Combine two pattern matchers matching L || R
209template <typename LTy, typename RTy>
210inline match_combine_or<LTy, RTy> m_CombineOr(const LTy &L, const RTy &R) {
211  return match_combine_or<LTy, RTy>(L, R);
212}
213 
214/// Combine two pattern matchers matching L && R
215template <typename LTy, typename RTy>
216inline match_combine_and<LTy, RTy> m_CombineAnd(const LTy &L, const RTy &R) {
217  return match_combine_and<LTy, RTy>(L, R);
218}
219 
220struct apint_match {
221  const APInt *&Res;
222  bool AllowUndef;
223 
224  apint_match(const APInt *&Res, bool AllowUndef)
225    : Res(Res), AllowUndef(AllowUndef) {}
226 
227  template <typename ITy> bool match(ITy *V) {
228    if (auto *CI = dyn_cast<ConstantInt>(V)) {
229      Res = &CI->getValue();
230      return true;
231    }
232    if (V->getType()->isVectorTy())
233      if (const auto *C = dyn_cast<Constant>(V))
234        if (auto *CI = dyn_cast_or_null<ConstantInt>(
235                C->getSplatValue(AllowUndef))) {
236          Res = &CI->getValue();
237          return true;
238        }
239    return false;
240  }
241};
242// Either constexpr if or renaming ConstantFP::getValueAPF to
243// ConstantFP::getValue is needed to do it via single template
244// function for both apint/apfloat.
245struct apfloat_match {
246  const APFloat *&Res;
247  bool AllowUndef;
248 
249  apfloat_match(const APFloat *&Res, bool AllowUndef)
250      : Res(Res), AllowUndef(AllowUndef) {}
251 
252  template <typename ITy> bool match(ITy *V) {
253    if (auto *CI = dyn_cast<ConstantFP>(V)) {
254      Res = &CI->getValueAPF();
255      return true;
256    }
257    if (V->getType()->isVectorTy())
258      if (const auto *C = dyn_cast<Constant>(V))
259        if (auto *CI = dyn_cast_or_null<ConstantFP>(
260                C->getSplatValue(AllowUndef))) {
261          Res = &CI->getValueAPF();
262          return true;
263        }
264    return false;
265  }
266};
267 
268/// Match a ConstantInt or splatted ConstantVector, binding the
269/// specified pointer to the contained APInt.
270inline apint_match m_APInt(const APInt *&Res) {
271  // Forbid undefs by default to maintain previous behavior.
272  return apint_match(Res, /* AllowUndef */ false);
273}
274 
275/// Match APInt while allowing undefs in splat vector constants.
276inline apint_match m_APIntAllowUndef(const APInt *&Res) {
277  return apint_match(Res, /* AllowUndef */ true);
278}
279 
280/// Match APInt while forbidding undefs in splat vector constants.
281inline apint_match m_APIntForbidUndef(const APInt *&Res) {
282  return apint_match(Res, /* AllowUndef */ false);
283}
284 
285/// Match a ConstantFP or splatted ConstantVector, binding the
286/// specified pointer to the contained APFloat.
287inline apfloat_match m_APFloat(const APFloat *&Res) {
288  // Forbid undefs by default to maintain previous behavior.
289  return apfloat_match(Res, /* AllowUndef */ false);
290}
291 
292/// Match APFloat while allowing undefs in splat vector constants.
293inline apfloat_match m_APFloatAllowUndef(const APFloat *&Res) {
294  return apfloat_match(Res, /* AllowUndef */ true);
295}
296 
297/// Match APFloat while forbidding undefs in splat vector constants.
298inline apfloat_match m_APFloatForbidUndef(const APFloat *&Res) {
299  return apfloat_match(Res, /* AllowUndef */ false);
300}
301 
302template <int64_t Val> struct constantint_match {
303  template <typename ITy> bool match(ITy *V) {
304    if (const auto *CI = dyn_cast<ConstantInt>(V)) {
305      const APInt &CIV = CI->getValue();
306      if (Val >= 0)
307        return CIV == static_cast<uint64_t>(Val);
308      // If Val is negative, and CI is shorter than it, truncate to the right
309      // number of bits.  If it is larger, then we have to sign extend.  Just
310      // compare their negated values.
311      return -CIV == -Val;
312    }
313    return false;
314  }
315};
316 
317/// Match a ConstantInt with a specific value.
318template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
319  return constantint_match<Val>();
320}
321 
322/// This helper class is used to match constant scalars, vector splats,
323/// and fixed width vectors that satisfy a specified predicate.
324/// For fixed width vector constants, undefined elements are ignored.
325template <typename Predicate, typename ConstantVal>
326struct cstval_pred_ty : public Predicate {
327  template <typename ITy> bool match(ITy *V) {
328    if (const auto *CV = dyn_cast<ConstantVal>(V))
329      return this->isValue(CV->getValue());
330    if (const auto *VTy = dyn_cast<VectorType>(V->getType())) {
331      if (const auto *C = dyn_cast<Constant>(V)) {
332        if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue()))
333          return this->isValue(CV->getValue());
334 
335        // Number of elements of a scalable vector unknown at compile time
336        auto *FVTy = dyn_cast<FixedVectorType>(VTy);
337        if (!FVTy)
338          return false;
339 
340        // Non-splat vector constant: check each element for a match.
341        unsigned NumElts = FVTy->getNumElements();
342        assert(NumElts != 0 && "Constant vector with no elements?")((void)0);
343        bool HasNonUndefElements = false;
344        for (unsigned i = 0; i != NumElts; ++i) {
345          Constant *Elt = C->getAggregateElement(i);
346          if (!Elt)
347            return false;
348          if (isa<UndefValue>(Elt))
349            continue;
350          auto *CV = dyn_cast<ConstantVal>(Elt);
351          if (!CV || !this->isValue(CV->getValue()))
352            return false;
353          HasNonUndefElements = true;
354        }
355        return HasNonUndefElements;
356      }
357    }
358    return false;
359  }
360};
361 
362/// specialization of cstval_pred_ty for ConstantInt
363template <typename Predicate>
364using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>;
365 
366/// specialization of cstval_pred_ty for ConstantFP
367template <typename Predicate>
368using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>;
369 
370/// This helper class is used to match scalar and vector constants that
371/// satisfy a specified predicate, and bind them to an APInt.
372template <typename Predicate> struct api_pred_ty : public Predicate {
373  const APInt *&Res;
374 
375  api_pred_ty(const APInt *&R) : Res(R) {}
376 
377  template <typename ITy> bool match(ITy *V) {
378    if (const auto *CI = dyn_cast<ConstantInt>(V))
379      if (this->isValue(CI->getValue())) {
380        Res = &CI->getValue();
381        return true;
382      }
383    if (V->getType()->isVectorTy())
384      if (const auto *C = dyn_cast<Constant>(V))
385        if (auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
386          if (this->isValue(CI->getValue())) {
387            Res = &CI->getValue();
388            return true;
389          }
390 
391    return false;
392  }
393};
394 
395/// This helper class is used to match scalar and vector constants that
396/// satisfy a specified predicate, and bind them to an APFloat.
397/// Undefs are allowed in splat vector constants.
398template <typename Predicate> struct apf_pred_ty : public Predicate {
399  const APFloat *&Res;
400 
401  apf_pred_ty(const APFloat *&R) : Res(R) {}
402 
403  template <typename ITy> bool match(ITy *V) {
404    if (const auto *CI = dyn_cast<ConstantFP>(V))
405      if (this->isValue(CI->getValue())) {
406        Res = &CI->getValue();
407        return true;
408      }
409    if (V->getType()->isVectorTy())
410      if (const auto *C = dyn_cast<Constant>(V))
411        if (auto *CI = dyn_cast_or_null<ConstantFP>(
412                C->getSplatValue(/* AllowUndef */ true)))
413          if (this->isValue(CI->getValue())) {
414            Res = &CI->getValue();
415            return true;
416          }
417 
418    return false;
419  }
420};
421 
422///////////////////////////////////////////////////////////////////////////////
423//
424// Encapsulate constant value queries for use in templated predicate matchers.
425// This allows checking if constants match using compound predicates and works
426// with vector constants, possibly with relaxed constraints. For example, ignore
427// undef values.
428//
429///////////////////////////////////////////////////////////////////////////////
430 
431struct is_any_apint {
432  bool isValue(const APInt &C) { return true; }
433};
434/// Match an integer or vector with any integral constant.
435/// For vectors, this includes constants with undefined elements.
436inline cst_pred_ty<is_any_apint> m_AnyIntegralConstant() {
437  return cst_pred_ty<is_any_apint>();
438}
439 
440struct is_all_ones {
441  bool isValue(const APInt &C) { return C.isAllOnesValue(); }
442};
443/// Match an integer or vector with all bits set.
444/// For vectors, this includes constants with undefined elements.
445inline cst_pred_ty<is_all_ones> m_AllOnes() {
446  return cst_pred_ty<is_all_ones>();
447}
448 
449struct is_maxsignedvalue {
450  bool isValue(const APInt &C) { return C.isMaxSignedValue(); }
451};
452/// Match an integer or vector with values having all bits except for the high
453/// bit set (0x7f...).
454/// For vectors, this includes constants with undefined elements.
455inline cst_pred_ty<is_maxsignedvalue> m_MaxSignedValue() {
456  return cst_pred_ty<is_maxsignedvalue>();
457}
458inline api_pred_ty<is_maxsignedvalue> m_MaxSignedValue(const APInt *&V) {
459  return V;
460}
461 
462struct is_negative {
463  bool isValue(const APInt &C) { return C.isNegative(); }
464};
465/// Match an integer or vector of negative values.
466/// For vectors, this includes constants with undefined elements.
467inline cst_pred_ty<is_negative> m_Negative() {
468  return cst_pred_ty<is_negative>();
469}
470inline api_pred_ty<is_negative> m_Negative(const APInt *&V) {
471  return V;
472}
473 
474struct is_nonnegative {
475  bool isValue(const APInt &C) { return C.isNonNegative(); }
476};
477/// Match an integer or vector of non-negative values.
478/// For vectors, this includes constants with undefined elements.
479inline cst_pred_ty<is_nonnegative> m_NonNegative() {
480  return cst_pred_ty<is_nonnegative>();
481}
482inline api_pred_ty<is_nonnegative> m_NonNegative(const APInt *&V) {
483  return V;
484}
485 
486struct is_strictlypositive {
487  bool isValue(const APInt &C) { return C.isStrictlyPositive(); }
488};
489/// Match an integer or vector of strictly positive values.
490/// For vectors, this includes constants with undefined elements.
491inline cst_pred_ty<is_strictlypositive> m_StrictlyPositive() {
492  return cst_pred_ty<is_strictlypositive>();
493}
494inline api_pred_ty<is_strictlypositive> m_StrictlyPositive(const APInt *&V) {
495  return V;
496}
497 
498struct is_nonpositive {
499  bool isValue(const APInt &C) { return C.isNonPositive(); }
500};
501/// Match an integer or vector of non-positive values.
502/// For vectors, this includes constants with undefined elements.
503inline cst_pred_ty<is_nonpositive> m_NonPositive() {
504  return cst_pred_ty<is_nonpositive>();
505}
506inline api_pred_ty<is_nonpositive> m_NonPositive(const APInt *&V) { return V; }
507 
508struct is_one {
509  bool isValue(const APInt &C) { return C.isOneValue(); }
510};
511/// Match an integer 1 or a vector with all elements equal to 1.
512/// For vectors, this includes constants with undefined elements.
513inline cst_pred_ty<is_one> m_One() {
514  return cst_pred_ty<is_one>();
515}
516 
517struct is_zero_int {
518  bool isValue(const APInt &C) { return C.isNullValue(); }
519};
520/// Match an integer 0 or a vector with all elements equal to 0.
521/// For vectors, this includes constants with undefined elements.
522inline cst_pred_ty<is_zero_int> m_ZeroInt() {
523  return cst_pred_ty<is_zero_int>();
524}
525 
526struct is_zero {
527  template <typename ITy> bool match(ITy *V) {
528    auto *C = dyn_cast<Constant>(V);
529    // FIXME: this should be able to do something for scalable vectors
530    return C && (C->isNullValue() || cst_pred_ty<is_zero_int>().match(C));
531  }
532};
533/// Match any null constant or a vector with all elements equal to 0.
534/// For vectors, this includes constants with undefined elements.
535inline is_zero m_Zero() {
536  return is_zero();
537}
538 
539struct is_power2 {
540  bool isValue(const APInt &C) { return C.isPowerOf2(); }
541};
542/// Match an integer or vector power-of-2.
543/// For vectors, this includes constants with undefined elements.
544inline cst_pred_ty<is_power2> m_Power2() {
545  return cst_pred_ty<is_power2>();
546}
547inline api_pred_ty<is_power2> m_Power2(const APInt *&V) {
548  return V;
549}
550 
551struct is_negated_power2 {
552  bool isValue(const APInt &C) { return (-C).isPowerOf2(); }
553};
554/// Match a integer or vector negated power-of-2.
555/// For vectors, this includes constants with undefined elements.
556inline cst_pred_ty<is_negated_power2> m_NegatedPower2() {
557  return cst_pred_ty<is_negated_power2>();
558}
559inline api_pred_ty<is_negated_power2> m_NegatedPower2(const APInt *&V) {
560  return V;
561}
562 
563struct is_power2_or_zero {
564  bool isValue(const APInt &C) { return !C || C.isPowerOf2(); }
565};
566/// Match an integer or vector of 0 or power-of-2 values.
567/// For vectors, this includes constants with undefined elements.
568inline cst_pred_ty<is_power2_or_zero> m_Power2OrZero() {
569  return cst_pred_ty<is_power2_or_zero>();
570}
571inline api_pred_ty<is_power2_or_zero> m_Power2OrZero(const APInt *&V) {
572  return V;
573}
574 
575struct is_sign_mask {
576  bool isValue(const APInt &C) { return C.isSignMask(); }
577};
578/// Match an integer or vector with only the sign bit(s) set.
579/// For vectors, this includes constants with undefined elements.
580inline cst_pred_ty<is_sign_mask> m_SignMask() {
581  return cst_pred_ty<is_sign_mask>();
582}
583 
584struct is_lowbit_mask {
585  bool isValue(const APInt &C) { return C.isMask(); }
586};
587/// Match an integer or vector with only the low bit(s) set.
588/// For vectors, this includes constants with undefined elements.
589inline cst_pred_ty<is_lowbit_mask> m_LowBitMask() {
590  return cst_pred_ty<is_lowbit_mask>();
591}
592 
593struct icmp_pred_with_threshold {
594  ICmpInst::Predicate Pred;
595  const APInt *Thr;
596  bool isValue(const APInt &C) {
597    switch (Pred) {
598    case ICmpInst::Predicate::ICMP_EQ:
599      return C.eq(*Thr);
600    case ICmpInst::Predicate::ICMP_NE:
601      return C.ne(*Thr);
602    case ICmpInst::Predicate::ICMP_UGT:
603      return C.ugt(*Thr);
604    case ICmpInst::Predicate::ICMP_UGE:
605      return C.uge(*Thr);
606    case ICmpInst::Predicate::ICMP_ULT:
607      return C.ult(*Thr);
608    case ICmpInst::Predicate::ICMP_ULE:
609      return C.ule(*Thr);
610    case ICmpInst::Predicate::ICMP_SGT:
611      return C.sgt(*Thr);
612    case ICmpInst::Predicate::ICMP_SGE:
613      return C.sge(*Thr);
614    case ICmpInst::Predicate::ICMP_SLT:
615      return C.slt(*Thr);
616    case ICmpInst::Predicate::ICMP_SLE:
617      return C.sle(*Thr);
618    default:
619      llvm_unreachable("Unhandled ICmp predicate")__builtin_unreachable();
620    }
621  }
622};
623/// Match an integer or vector with every element comparing 'pred' (eg/ne/...)
624/// to Threshold. For vectors, this includes constants with undefined elements.
625inline cst_pred_ty<icmp_pred_with_threshold>
626m_SpecificInt_ICMP(ICmpInst::Predicate Predicate, const APInt &Threshold) {
627  cst_pred_ty<icmp_pred_with_threshold> P;
628  P.Pred = Predicate;
629  P.Thr = &Threshold;
630  return P;
631}
632 
633struct is_nan {
634  bool isValue(const APFloat &C) { return C.isNaN(); }
635};
636/// Match an arbitrary NaN constant. This includes quiet and signalling nans.
637/// For vectors, this includes constants with undefined elements.
638inline cstfp_pred_ty<is_nan> m_NaN() {
639  return cstfp_pred_ty<is_nan>();
640}
641 
642struct is_nonnan {
643  bool isValue(const APFloat &C) { return !C.isNaN(); }
644};
645/// Match a non-NaN FP constant.
646/// For vectors, this includes constants with undefined elements.
647inline cstfp_pred_ty<is_nonnan> m_NonNaN() {
648  return cstfp_pred_ty<is_nonnan>();
649}
650 
651struct is_inf {
652  bool isValue(const APFloat &C) { return C.isInfinity(); }
653};
654/// Match a positive or negative infinity FP constant.
655/// For vectors, this includes constants with undefined elements.
656inline cstfp_pred_ty<is_inf> m_Inf() {
657  return cstfp_pred_ty<is_inf>();
658}
659 
660struct is_noninf {
661  bool isValue(const APFloat &C) { return !C.isInfinity(); }
662};
663/// Match a non-infinity FP constant, i.e. finite or NaN.
664/// For vectors, this includes constants with undefined elements.
665inline cstfp_pred_ty<is_noninf> m_NonInf() {
666  return cstfp_pred_ty<is_noninf>();
667}
668 
669struct is_finite {
670  bool isValue(const APFloat &C) { return C.isFinite(); }
671};
672/// Match a finite FP constant, i.e. not infinity or NaN.
673/// For vectors, this includes constants with undefined elements.
674inline cstfp_pred_ty<is_finite> m_Finite() {
675  return cstfp_pred_ty<is_finite>();
676}
677inline apf_pred_ty<is_finite> m_Finite(const APFloat *&V) { return V; }
678 
679struct is_finitenonzero {
680  bool isValue(const APFloat &C) { return C.isFiniteNonZero(); }
681};
682/// Match a finite non-zero FP constant.
683/// For vectors, this includes constants with undefined elements.
684inline cstfp_pred_ty<is_finitenonzero> m_FiniteNonZero() {
685  return cstfp_pred_ty<is_finitenonzero>();
686}
687inline apf_pred_ty<is_finitenonzero> m_FiniteNonZero(const APFloat *&V) {
688  return V;
689}
690 
691struct is_any_zero_fp {
692  bool isValue(const APFloat &C) { return C.isZero(); }
693};
694/// Match a floating-point negative zero or positive zero.
695/// For vectors, this includes constants with undefined elements.
696inline cstfp_pred_ty<is_any_zero_fp> m_AnyZeroFP() {
697  return cstfp_pred_ty<is_any_zero_fp>();
698}
699 
700struct is_pos_zero_fp {
701  bool isValue(const APFloat &C) { return C.isPosZero(); }
702};
703/// Match a floating-point positive zero.
704/// For vectors, this includes constants with undefined elements.
705inline cstfp_pred_ty<is_pos_zero_fp> m_PosZeroFP() {
706  return cstfp_pred_ty<is_pos_zero_fp>();
707}
708 
709struct is_neg_zero_fp {
710  bool isValue(const APFloat &C) { return C.isNegZero(); }
711};
712/// Match a floating-point negative zero.
713/// For vectors, this includes constants with undefined elements.
714inline cstfp_pred_ty<is_neg_zero_fp> m_NegZeroFP() {
715  return cstfp_pred_ty<is_neg_zero_fp>();
716}
717 
718struct is_non_zero_fp {
719  bool isValue(const APFloat &C) { return C.isNonZero(); }
720};
721/// Match a floating-point non-zero.
722/// For vectors, this includes constants with undefined elements.
723inline cstfp_pred_ty<is_non_zero_fp> m_NonZeroFP() {
724  return cstfp_pred_ty<is_non_zero_fp>();
725}
726 
727///////////////////////////////////////////////////////////////////////////////
728 
729template <typename Class> struct bind_ty {
730  Class *&VR;
731 
732  bind_ty(Class *&V) : VR(V) {}
733 
734  template <typename ITy> bool match(ITy *V) {
735    if (auto *CV = dyn_cast<Class>(V)) {
736      VR = CV;
737      return true;
738    }
739    return false;
740  }
741};
742 
743/// Match a value, capturing it if we match.
744inline bind_ty<Value> m_Value(Value *&V) { return V; }
745inline bind_ty<const Value> m_Value(const Value *&V) { return V; }
746 
747/// Match an instruction, capturing it if we match.
748inline bind_ty<Instruction> m_Instruction(Instruction *&I) { return I; }
749/// Match a unary operator, capturing it if we match.
750inline bind_ty<UnaryOperator> m_UnOp(UnaryOperator *&I) { return I; }
751/// Match a binary operator, capturing it if we match.
752inline bind_ty<BinaryOperator> m_BinOp(BinaryOperator *&I) { return I; }
753/// Match a with overflow intrinsic, capturing it if we match.
754inline bind_ty<WithOverflowInst> m_WithOverflowInst(WithOverflowInst *&I) { return I; }
755inline bind_ty<const WithOverflowInst>
756m_WithOverflowInst(const WithOverflowInst *&I) {
757  return I;
758}
759 
760/// Match a Constant, capturing the value if we match.
761inline bind_ty<Constant> m_Constant(Constant *&C) { return C; }
762 
763/// Match a ConstantInt, capturing the value if we match.
764inline bind_ty<ConstantInt> m_ConstantInt(ConstantInt *&CI) { return CI; }
765 
766/// Match a ConstantFP, capturing the value if we match.
767inline bind_ty<ConstantFP> m_ConstantFP(ConstantFP *&C) { return C; }
768 
769/// Match a ConstantExpr, capturing the value if we match.
770inline bind_ty<ConstantExpr> m_ConstantExpr(ConstantExpr *&C) { return C; }
771 
772/// Match a basic block value, capturing it if we match.
773inline bind_ty<BasicBlock> m_BasicBlock(BasicBlock *&V) { return V; }
774inline bind_ty<const BasicBlock> m_BasicBlock(const BasicBlock *&V) {
775  return V;
776}
777 
778/// Match an arbitrary immediate Constant and ignore it.
779inline match_combine_and<class_match<Constant>,
780                         match_unless<class_match<ConstantExpr>>>
781m_ImmConstant() {
782  return m_CombineAnd(m_Constant(), m_Unless(m_ConstantExpr()));
783}
784 
785/// Match an immediate Constant, capturing the value if we match.
786inline match_combine_and<bind_ty<Constant>,
787                         match_unless<class_match<ConstantExpr>>>
788m_ImmConstant(Constant *&C) {
789  return m_CombineAnd(m_Constant(C), m_Unless(m_ConstantExpr()));
790}
791 
792/// Match a specified Value*.
793struct specificval_ty {
794  const Value *Val;
795 
796  specificval_ty(const Value *V) : Val(V) {}
797 
798  template <typename ITy> bool match(ITy *V) { return V == Val; }
799};
800 
801/// Match if we have a specific specified value.
802inline specificval_ty m_Specific(const Value *V) { return V; }
803 
804/// Stores a reference to the Value *, not the Value * itself,
805/// thus can be used in commutative matchers.
806template <typename Class> struct deferredval_ty {
807  Class *const &Val;
808 
809  deferredval_ty(Class *const &V) : Val(V) {}
810 
811  template <typename ITy> bool match(ITy *const V) { return V == Val; }
812};
813 
814/// Like m_Specific(), but works if the specific value to match is determined
815/// as part of the same match() expression. For example:
816/// m_Add(m_Value(X), m_Specific(X)) is incorrect, because m_Specific() will
817/// bind X before the pattern match starts.
818/// m_Add(m_Value(X), m_Deferred(X)) is correct, and will check against
819/// whichever value m_Value(X) populated.
820inline deferredval_ty<Value> m_Deferred(Value *const &V) { return V; }
821inline deferredval_ty<const Value> m_Deferred(const Value *const &V) {
822  return V;
823}
824 
825/// Match a specified floating point value or vector of all elements of
826/// that value.
827struct specific_fpval {
828  double Val;
829 
830  specific_fpval(double V) : Val(V) {}
831 
832  template <typename ITy> bool match(ITy *V) {
833    if (const auto *CFP = dyn_cast<ConstantFP>(V))
834      return CFP->isExactlyValue(Val);
835    if (V->getType()->isVectorTy())
836      if (const auto *C = dyn_cast<Constant>(V))
837        if (auto *CFP = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
838          return CFP->isExactlyValue(Val);
839    return false;
840  }
841};
842 
843/// Match a specific floating point value or vector with all elements
844/// equal to the value.
845inline specific_fpval m_SpecificFP(double V) { return specific_fpval(V); }
846 
847/// Match a float 1.0 or vector with all elements equal to 1.0.
848inline specific_fpval m_FPOne() { return m_SpecificFP(1.0); }
849 
850struct bind_const_intval_ty {
851  uint64_t &VR;
852 
853  bind_const_intval_ty(uint64_t &V) : VR(V) {}
854 
855  template <typename ITy> bool match(ITy *V) {
856    if (const auto *CV = dyn_cast<ConstantInt>(V))
857      if (CV->getValue().ule(UINT64_MAX0xffffffffffffffffULL)) {
858        VR = CV->getZExtValue();
859        return true;
860      }
861    return false;
862  }
863};
864 
865/// Match a specified integer value or vector of all elements of that
866/// value.
867template <bool AllowUndefs>
868struct specific_intval {
869  APInt Val;
870 
871  specific_intval(APInt V) : Val(std::move(V)) {}
872 
873  template <typename ITy> bool match(ITy *V) {
874    const auto *CI = dyn_cast<ConstantInt>(V);
875    if (!CI && V->getType()->isVectorTy())
876      if (const auto *C = dyn_cast<Constant>(V))
877        CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue(AllowUndefs));
878 
879    return CI && APInt::isSameValue(CI->getValue(), Val);
880  }
881};
882 
883/// Match a specific integer value or vector with all elements equal to
884/// the value.
885inline specific_intval<false> m_SpecificInt(APInt V) {
886  return specific_intval<false>(std::move(V));
887}
888 
889inline specific_intval<false> m_SpecificInt(uint64_t V) {
890  return m_SpecificInt(APInt(64, V));
891}
892 
893inline specific_intval<true> m_SpecificIntAllowUndef(APInt V) {
894  return specific_intval<true>(std::move(V));
895}
896 
897inline specific_intval<true> m_SpecificIntAllowUndef(uint64_t V) {
898  return m_SpecificIntAllowUndef(APInt(64, V));
899}
900 
901/// Match a ConstantInt and bind to its value.  This does not match
902/// ConstantInts wider than 64-bits.
903inline bind_const_intval_ty m_ConstantInt(uint64_t &V) { return V; }
904 
905/// Match a specified basic block value.
906struct specific_bbval {
907  BasicBlock *Val;
908 
909  specific_bbval(BasicBlock *Val) : Val(Val) {}
910 
911  template <typename ITy> bool match(ITy *V) {
912    const auto *BB = dyn_cast<BasicBlock>(V);
913    return BB && BB == Val;
914  }
915};
916 
917/// Match a specific basic block value.
918inline specific_bbval m_SpecificBB(BasicBlock *BB) {
919  return specific_bbval(BB);
920}
921 
922/// A commutative-friendly version of m_Specific().
923inline deferredval_ty<BasicBlock> m_Deferred(BasicBlock *const &BB) {
924  return BB;
925}
926inline deferredval_ty<const BasicBlock>
927m_Deferred(const BasicBlock *const &BB) {
928  return BB;
929}
930 
931//===----------------------------------------------------------------------===//
932// Matcher for any binary operator.
933//
934template <typename LHS_t, typename RHS_t, bool Commutable = false>
935struct AnyBinaryOp_match {
936  LHS_t L;
937  RHS_t R;
938 
939  // The evaluation order is always stable, regardless of Commutability.
940  // The LHS is always matched first.
941  AnyBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
942 
943  template <typename OpTy> bool match(OpTy *V) {
944    if (auto *I = dyn_cast<BinaryOperator>(V))
945      return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
946             (Commutable && L.match(I->getOperand(1)) &&
947              R.match(I->getOperand(0)));
948    return false;
949  }
950};
951 
952template <typename LHS, typename RHS>
953inline AnyBinaryOp_match<LHS, RHS> m_BinOp(const LHS &L, const RHS &R) {
954  return AnyBinaryOp_match<LHS, RHS>(L, R);
955}
956 
957//===----------------------------------------------------------------------===//
958// Matcher for any unary operator.
959// TODO fuse unary, binary matcher into n-ary matcher
960//
961template <typename OP_t> struct AnyUnaryOp_match {
962  OP_t X;
963 
964  AnyUnaryOp_match(const OP_t &X) : X(X) {}
965 
966  template <typename OpTy> bool match(OpTy *V) {
967    if (auto *I = dyn_cast<UnaryOperator>(V))
968      return X.match(I->getOperand(0));
969    return false;
970  }
971};
972 
973template <typename OP_t> inline AnyUnaryOp_match<OP_t> m_UnOp(const OP_t &X) {
974  return AnyUnaryOp_match<OP_t>(X);
975}
976 
977//===----------------------------------------------------------------------===//
978// Matchers for specific binary operators.
979//
980 
981template <typename LHS_t, typename RHS_t, unsigned Opcode,
982          bool Commutable = false>
983struct BinaryOp_match {
984  LHS_t L;
985  RHS_t R;
986 
987  // The evaluation order is always stable, regardless of Commutability.
988  // The LHS is always matched first.
989  BinaryOp_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
990 
991  template <typename OpTy> bool match(OpTy *V) {
992    if (V->getValueID() == Value::InstructionVal + Opcode) {
993      auto *I = cast<BinaryOperator>(V);
994      return (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) ||
995             (Commutable && L.match(I->getOperand(1)) &&
996              R.match(I->getOperand(0)));
997    }
998    if (auto *CE = dyn_cast<ConstantExpr>(V))
999      return CE->getOpcode() == Opcode &&
1000             ((L.match(CE->getOperand(0)) && R.match(CE->getOperand(1))) ||
1001              (Commutable && L.match(CE->getOperand(1)) &&
1002               R.match(CE->getOperand(0))));
1003    return false;
1004  }
1005};
1006 
1007template <typename LHS, typename RHS>
1008inline BinaryOp_match<LHS, RHS, Instruction::Add> m_Add(const LHS &L,
1009                                                        const RHS &R) {
1010  return BinaryOp_match<LHS, RHS, Instruction::Add>(L, R);
1011}
1012 
1013template <typename LHS, typename RHS>
1014inline BinaryOp_match<LHS, RHS, Instruction::FAdd> m_FAdd(const LHS &L,
1015                                                          const RHS &R) {
1016  return BinaryOp_match<LHS, RHS, Instruction::FAdd>(L, R);
1017}
1018 
1019template <typename LHS, typename RHS>
1020inline BinaryOp_match<LHS, RHS, Instruction::Sub> m_Sub(const LHS &L,
1021                                                        const RHS &R) {
1022  return BinaryOp_match<LHS, RHS, Instruction::Sub>(L, R);
1023}
1024 
1025template <typename LHS, typename RHS>
1026inline BinaryOp_match<LHS, RHS, Instruction::FSub> m_FSub(const LHS &L,
1027                                                          const RHS &R) {
1028  return BinaryOp_match<LHS, RHS, Instruction::FSub>(L, R);
1029}
1030 
1031template <typename Op_t> struct FNeg_match {
1032  Op_t X;
1033 
1034  FNeg_match(const Op_t &Op) : X(Op) {}
1035  template <typename OpTy> bool match(OpTy *V) {
1036    auto *FPMO = dyn_cast<FPMathOperator>(V);
1037    if (!FPMO) return false;
1038 
1039    if (FPMO->getOpcode() == Instruction::FNeg)
1040      return X.match(FPMO->getOperand(0));
1041 
1042    if (FPMO->getOpcode() == Instruction::FSub) {
1043      if (FPMO->hasNoSignedZeros()) {
1044        // With 'nsz', any zero goes.
1045        if (!cstfp_pred_ty<is_any_zero_fp>().match(FPMO->getOperand(0)))
1046          return false;
1047      } else {
1048        // Without 'nsz', we need fsub -0.0, X exactly.
1049        if (!cstfp_pred_ty<is_neg_zero_fp>().match(FPMO->getOperand(0)))
1050          return false;
1051      }
1052 
1053      return X.match(FPMO->getOperand(1));
1054    }
1055 
1056    return false;
1057  }
1058};
1059 
1060/// Match 'fneg X' as 'fsub -0.0, X'.
1061template <typename OpTy>
1062inline FNeg_match<OpTy>
1063m_FNeg(const OpTy &X) {
1064  return FNeg_match<OpTy>(X);
1065}
1066 
1067/// Match 'fneg X' as 'fsub +-0.0, X'.
1068template <typename RHS>
1069inline BinaryOp_match<cstfp_pred_ty<is_any_zero_fp>, RHS, Instruction::FSub>
1070m_FNegNSZ(const RHS &X) {
1071  return m_FSub(m_AnyZeroFP(), X);
1072}
1073 
1074template <typename LHS, typename RHS>
1075inline BinaryOp_match<LHS, RHS, Instruction::Mul> m_Mul(const LHS &L,
1076                                                        const RHS &R) {
1077  return BinaryOp_match<LHS, RHS, Instruction::Mul>(L, R);
1078}
1079 
1080template <typename LHS, typename RHS>
1081inline BinaryOp_match<LHS, RHS, Instruction::FMul> m_FMul(const LHS &L,
1082                                                          const RHS &R) {
1083  return BinaryOp_match<LHS, RHS, Instruction::FMul>(L, R);
1084}
1085 
1086template <typename LHS, typename RHS>
1087inline BinaryOp_match<LHS, RHS, Instruction::UDiv> m_UDiv(const LHS &L,
1088                                                          const RHS &R) {
1089  return BinaryOp_match<LHS, RHS, Instruction::UDiv>(L, R);
1090}
1091 
1092template <typename LHS, typename RHS>
1093inline BinaryOp_match<LHS, RHS, Instruction::SDiv> m_SDiv(const LHS &L,
1094                                                          const RHS &R) {
1095  return BinaryOp_match<LHS, RHS, Instruction::SDiv>(L, R);
1096}
1097 
1098template <typename LHS, typename RHS>
1099inline BinaryOp_match<LHS, RHS, Instruction::FDiv> m_FDiv(const LHS &L,
1100                                                          const RHS &R) {
1101  return BinaryOp_match<LHS, RHS, Instruction::FDiv>(L, R);
1102}
1103 
1104template <typename LHS, typename RHS>
1105inline BinaryOp_match<LHS, RHS, Instruction::URem> m_URem(const LHS &L,
1106                                                          const RHS &R) {
1107  return BinaryOp_match<LHS, RHS, Instruction::URem>(L, R);
1108}
1109 
1110template <typename LHS, typename RHS>
1111inline BinaryOp_match<LHS, RHS, Instruction::SRem> m_SRem(const LHS &L,
1112                                                          const RHS &R) {
1113  return BinaryOp_match<LHS, RHS, Instruction::SRem>(L, R);
1114}
1115 
1116template <typename LHS, typename RHS>
1117inline BinaryOp_match<LHS, RHS, Instruction::FRem> m_FRem(const LHS &L,
1118                                                          const RHS &R) {
1119  return BinaryOp_match<LHS, RHS, Instruction::FRem>(L, R);
1120}
1121 
1122template <typename LHS, typename RHS>
1123inline BinaryOp_match<LHS, RHS, Instruction::And> m_And(const LHS &L,
1124                                                        const RHS &R) {
1125  return BinaryOp_match<LHS, RHS, Instruction::And>(L, R);
1126}
1127 
1128template <typename LHS, typename RHS>
1129inline BinaryOp_match<LHS, RHS, Instruction::Or> m_Or(const LHS &L,
1130                                                      const RHS &R) {
1131  return BinaryOp_match<LHS, RHS, Instruction::Or>(L, R);
1132}
1133 
1134template <typename LHS, typename RHS>
1135inline BinaryOp_match<LHS, RHS, Instruction::Xor> m_Xor(const LHS &L,
1136                                                        const RHS &R) {
1137  return BinaryOp_match<LHS, RHS, Instruction::Xor>(L, R);
1138}
1139 
1140template <typename LHS, typename RHS>
1141inline BinaryOp_match<LHS, RHS, Instruction::Shl> m_Shl(const LHS &L,
1142                                                        const RHS &R) {
1143  return BinaryOp_match<LHS, RHS, Instruction::Shl>(L, R);
1144}
1145 
1146template <typename LHS, typename RHS>
1147inline BinaryOp_match<LHS, RHS, Instruction::LShr> m_LShr(const LHS &L,
1148                                                          const RHS &R) {
1149  return BinaryOp_match<LHS, RHS, Instruction::LShr>(L, R);
1150}
1151 
1152template <typename LHS, typename RHS>
1153inline BinaryOp_match<LHS, RHS, Instruction::AShr> m_AShr(const LHS &L,
1154                                                          const RHS &R) {
1155  return BinaryOp_match<LHS, RHS, Instruction::AShr>(L, R);
1156}
1157 
1158template <typename LHS_t, typename RHS_t, unsigned Opcode,
1159          unsigned WrapFlags = 0>
1160struct OverflowingBinaryOp_match {
1161  LHS_t L;
1162  RHS_t R;
1163 
1164  OverflowingBinaryOp_match(const LHS_t &LHS, const RHS_t &RHS)
1165      : L(LHS), R(RHS) {}
1166 
1167  template <typename OpTy> bool match(OpTy *V) {
1168    if (auto *Op = dyn_cast<OverflowingBinaryOperator>(V)) {
1169      if (Op->getOpcode() != Opcode)
1170        return false;
1171      if ((WrapFlags & OverflowingBinaryOperator::NoUnsignedWrap) &&
1172          !Op->hasNoUnsignedWrap())
1173        return false;
1174      if ((WrapFlags & OverflowingBinaryOperator::NoSignedWrap) &&
1175          !Op->hasNoSignedWrap())
1176        return false;
1177      return L.match(Op->getOperand(0)) && R.match(Op->getOperand(1));
1178    }
1179    return false;
1180  }
1181};
1182 
1183template <typename LHS, typename RHS>
1184inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1185                                 OverflowingBinaryOperator::NoSignedWrap>
1186m_NSWAdd(const LHS &L, const RHS &R) {
1187  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1188                                   OverflowingBinaryOperator::NoSignedWrap>(
1189      L, R);
1190}
1191template <typename LHS, typename RHS>
1192inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1193                                 OverflowingBinaryOperator::NoSignedWrap>
1194m_NSWSub(const LHS &L, const RHS &R) {
1195  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1196                                   OverflowingBinaryOperator::NoSignedWrap>(
1197      L, R);
1198}
1199template <typename LHS, typename RHS>
1200inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1201                                 OverflowingBinaryOperator::NoSignedWrap>
1202m_NSWMul(const LHS &L, const RHS &R) {
1203  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1204                                   OverflowingBinaryOperator::NoSignedWrap>(
1205      L, R);
1206}
1207template <typename LHS, typename RHS>
1208inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1209                                 OverflowingBinaryOperator::NoSignedWrap>
1210m_NSWShl(const LHS &L, const RHS &R) {
1211  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1212                                   OverflowingBinaryOperator::NoSignedWrap>(
1213      L, R);
1214}
1215 
1216template <typename LHS, typename RHS>
1217inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1218                                 OverflowingBinaryOperator::NoUnsignedWrap>
1219m_NUWAdd(const LHS &L, const RHS &R) {
1220  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Add,
1221                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1222      L, R);
1223}
1224template <typename LHS, typename RHS>
1225inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1226                                 OverflowingBinaryOperator::NoUnsignedWrap>
1227m_NUWSub(const LHS &L, const RHS &R) {
1228  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Sub,
1229                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1230      L, R);
1231}
1232template <typename LHS, typename RHS>
1233inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1234                                 OverflowingBinaryOperator::NoUnsignedWrap>
1235m_NUWMul(const LHS &L, const RHS &R) {
1236  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Mul,
1237                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1238      L, R);
1239}
1240template <typename LHS, typename RHS>
1241inline OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1242                                 OverflowingBinaryOperator::NoUnsignedWrap>
1243m_NUWShl(const LHS &L, const RHS &R) {
1244  return OverflowingBinaryOp_match<LHS, RHS, Instruction::Shl,
1245                                   OverflowingBinaryOperator::NoUnsignedWrap>(
1246      L, R);
1247}
1248 
1249//===----------------------------------------------------------------------===//
1250// Class that matches a group of binary opcodes.
1251//
1252template <typename LHS_t, typename RHS_t, typename Predicate>
1253struct BinOpPred_match : Predicate {
1254  LHS_t L;
1255  RHS_t R;
1256 
1257  BinOpPred_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1258 
1259  template <typename OpTy> bool match(OpTy *V) {
1260    if (auto *I = dyn_cast<Instruction>(V))
1261      return this->isOpType(I->getOpcode()) && L.match(I->getOperand(0)) &&
1262             R.match(I->getOperand(1));
1263    if (auto *CE = dyn_cast<ConstantExpr>(V))
1264      return this->isOpType(CE->getOpcode()) && L.match(CE->getOperand(0)) &&
1265             R.match(CE->getOperand(1));
1266    return false;
1267  }
1268};
1269 
1270struct is_shift_op {
1271  bool isOpType(unsigned Opcode) { return Instruction::isShift(Opcode); }
1272};
1273 
1274struct is_right_shift_op {
1275  bool isOpType(unsigned Opcode) {
1276    return Opcode == Instruction::LShr || Opcode == Instruction::AShr;
1277  }
1278};
1279 
1280struct is_logical_shift_op {
1281  bool isOpType(unsigned Opcode) {
1282    return Opcode == Instruction::LShr || Opcode == Instruction::Shl;
1283  }
1284};
1285 
1286struct is_bitwiselogic_op {
1287  bool isOpType(unsigned Opcode) {
1288    return Instruction::isBitwiseLogicOp(Opcode);
1289  }
1290};
1291 
1292struct is_idiv_op {
1293  bool isOpType(unsigned Opcode) {
1294    return Opcode == Instruction::SDiv || Opcode == Instruction::UDiv;
1295  }
1296};
1297 
1298struct is_irem_op {
1299  bool isOpType(unsigned Opcode) {
1300    return Opcode == Instruction::SRem || Opcode == Instruction::URem;
1301  }
1302};
1303 
1304/// Matches shift operations.
1305template <typename LHS, typename RHS>
1306inline BinOpPred_match<LHS, RHS, is_shift_op> m_Shift(const LHS &L,
1307                                                      const RHS &R) {
1308  return BinOpPred_match<LHS, RHS, is_shift_op>(L, R);
1309}
1310 
1311/// Matches logical shift operations.
1312template <typename LHS, typename RHS>
1313inline BinOpPred_match<LHS, RHS, is_right_shift_op> m_Shr(const LHS &L,
1314                                                          const RHS &R) {
1315  return BinOpPred_match<LHS, RHS, is_right_shift_op>(L, R);
1316}
1317 
1318/// Matches logical shift operations.
1319template <typename LHS, typename RHS>
1320inline BinOpPred_match<LHS, RHS, is_logical_shift_op>
1321m_LogicalShift(const LHS &L, const RHS &R) {
1322  return BinOpPred_match<LHS, RHS, is_logical_shift_op>(L, R);
1323}
1324 
1325/// Matches bitwise logic operations.
1326template <typename LHS, typename RHS>
1327inline BinOpPred_match<LHS, RHS, is_bitwiselogic_op>
1328m_BitwiseLogic(const LHS &L, const RHS &R) {
1329  return BinOpPred_match<LHS, RHS, is_bitwiselogic_op>(L, R);
1330}
1331 
1332/// Matches integer division operations.
1333template <typename LHS, typename RHS>
1334inline BinOpPred_match<LHS, RHS, is_idiv_op> m_IDiv(const LHS &L,
1335                                                    const RHS &R) {
1336  return BinOpPred_match<LHS, RHS, is_idiv_op>(L, R);
1337}
1338 
1339/// Matches integer remainder operations.
1340template <typename LHS, typename RHS>
1341inline BinOpPred_match<LHS, RHS, is_irem_op> m_IRem(const LHS &L,
1342                                                    const RHS &R) {
1343  return BinOpPred_match<LHS, RHS, is_irem_op>(L, R);
1344}
1345 
1346//===----------------------------------------------------------------------===//
1347// Class that matches exact binary ops.
1348//
1349template <typename SubPattern_t> struct Exact_match {
1350  SubPattern_t SubPattern;
1351 
1352  Exact_match(const SubPattern_t &SP) : SubPattern(SP) {}
1353 
1354  template <typename OpTy> bool match(OpTy *V) {
1355    if (auto *PEO = dyn_cast<PossiblyExactOperator>(V))
1356      return PEO->isExact() && SubPattern.match(V);
1357    return false;
1358  }
1359};
1360 
1361template <typename T> inline Exact_match<T> m_Exact(const T &SubPattern) {
1362  return SubPattern;
1363}
1364 
1365//===----------------------------------------------------------------------===//
1366// Matchers for CmpInst classes
1367//
1368 
1369template <typename LHS_t, typename RHS_t, typename Class, typename PredicateTy,
1370          bool Commutable = false>
1371struct CmpClass_match {
1372  PredicateTy &Predicate;
1373  LHS_t L;
1374  RHS_t R;
1375 
1376  // The evaluation order is always stable, regardless of Commutability.
1377  // The LHS is always matched first.
1378  CmpClass_match(PredicateTy &Pred, const LHS_t &LHS, const RHS_t &RHS)
1379      : Predicate(Pred), L(LHS), R(RHS) {}
1380 
1381  template <typename OpTy> bool match(OpTy *V) {
1382    if (auto *I = dyn_cast<Class>(V)) {
1383      if (L.match(I->getOperand(0)) && R.match(I->getOperand(1))) {
1384        Predicate = I->getPredicate();
1385        return true;
1386      } else if (Commutable && L.match(I->getOperand(1)) &&
1387           R.match(I->getOperand(0))) {
1388        Predicate = I->getSwappedPredicate();
1389        return true;
1390      }
1391    }
1392    return false;
1393  }
1394};
1395 
1396template <typename LHS, typename RHS>
1397inline CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>
1398m_Cmp(CmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1399  return CmpClass_match<LHS, RHS, CmpInst, CmpInst::Predicate>(Pred, L, R);
1400}
1401 
1402template <typename LHS, typename RHS>
1403inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>
1404m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1405  return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate>(Pred, L, R);
1406}
1407 
1408template <typename LHS, typename RHS>
1409inline CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>
1410m_FCmp(FCmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
1411  return CmpClass_match<LHS, RHS, FCmpInst, FCmpInst::Predicate>(Pred, L, R);
1412}
1413 
1414//===----------------------------------------------------------------------===//
1415// Matchers for instructions with a given opcode and number of operands.
1416//
1417 
1418/// Matches instructions with Opcode and three operands.
1419template <typename T0, unsigned Opcode> struct OneOps_match {
1420  T0 Op1;
1421 
1422  OneOps_match(const T0 &Op1) : Op1(Op1) {}
1423 
1424  template <typename OpTy> bool match(OpTy *V) {
1425    if (V->getValueID() == Value::InstructionVal + Opcode) {
1426      auto *I = cast<Instruction>(V);
1427      return Op1.match(I->getOperand(0));
1428    }
1429    return false;
1430  }
1431};
1432 
1433/// Matches instructions with Opcode and three operands.
1434template <typename T0, typename T1, unsigned Opcode> struct TwoOps_match {
1435  T0 Op1;
1436  T1 Op2;
1437 
1438  TwoOps_match(const T0 &Op1, const T1 &Op2) : Op1(Op1), Op2(Op2) {}
1439 
1440  template <typename OpTy> bool match(OpTy *V) {
1441    if (V->getValueID() == Value::InstructionVal + Opcode) {
1442      auto *I = cast<Instruction>(V);
1443      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1));
1444    }
1445    return false;
1446  }
1447};
1448 
1449/// Matches instructions with Opcode and three operands.
1450template <typename T0, typename T1, typename T2, unsigned Opcode>
1451struct ThreeOps_match {
1452  T0 Op1;
1453  T1 Op2;
1454  T2 Op3;
1455 
1456  ThreeOps_match(const T0 &Op1, const T1 &Op2, const T2 &Op3)
1457      : Op1(Op1), Op2(Op2), Op3(Op3) {}
1458 
1459  template <typename OpTy> bool match(OpTy *V) {
1460    if (V->getValueID() == Value::InstructionVal + Opcode) {
1461      auto *I = cast<Instruction>(V);
1462      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1463             Op3.match(I->getOperand(2));
1464    }
1465    return false;
1466  }
1467};
1468 
1469/// Matches SelectInst.
1470template <typename Cond, typename LHS, typename RHS>
1471inline ThreeOps_match<Cond, LHS, RHS, Instruction::Select>
1472m_Select(const Cond &C, const LHS &L, const RHS &R) {
1473  return ThreeOps_match<Cond, LHS, RHS, Instruction::Select>(C, L, R);
1474}
1475 
1476/// This matches a select of two constants, e.g.:
1477/// m_SelectCst<-1, 0>(m_Value(V))
1478template <int64_t L, int64_t R, typename Cond>
1479inline ThreeOps_match<Cond, constantint_match<L>, constantint_match<R>,
1480                      Instruction::Select>
1481m_SelectCst(const Cond &C) {
1482  return m_Select(C, m_ConstantInt<L>(), m_ConstantInt<R>());
1483}
1484 
1485/// Matches FreezeInst.
1486template <typename OpTy>
1487inline OneOps_match<OpTy, Instruction::Freeze> m_Freeze(const OpTy &Op) {
1488  return OneOps_match<OpTy, Instruction::Freeze>(Op);
1489}
1490 
1491/// Matches InsertElementInst.
1492template <typename Val_t, typename Elt_t, typename Idx_t>
1493inline ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>
1494m_InsertElt(const Val_t &Val, const Elt_t &Elt, const Idx_t &Idx) {
1495  return ThreeOps_match<Val_t, Elt_t, Idx_t, Instruction::InsertElement>(
1496      Val, Elt, Idx);
1497}
1498 
1499/// Matches ExtractElementInst.
1500template <typename Val_t, typename Idx_t>
1501inline TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>
1502m_ExtractElt(const Val_t &Val, const Idx_t &Idx) {
1503  return TwoOps_match<Val_t, Idx_t, Instruction::ExtractElement>(Val, Idx);
1504}
1505 
1506/// Matches shuffle.
1507template <typename T0, typename T1, typename T2> struct Shuffle_match {
1508  T0 Op1;
1509  T1 Op2;
1510  T2 Mask;
1511 
1512  Shuffle_match(const T0 &Op1, const T1 &Op2, const T2 &Mask)
1513      : Op1(Op1), Op2(Op2), Mask(Mask) {}
1514 
1515  template <typename OpTy> bool match(OpTy *V) {
1516    if (auto *I = dyn_cast<ShuffleVectorInst>(V)) {
1517      return Op1.match(I->getOperand(0)) && Op2.match(I->getOperand(1)) &&
1518             Mask.match(I->getShuffleMask());
1519    }
1520    return false;
1521  }
1522};
1523 
1524struct m_Mask {
1525  ArrayRef<int> &MaskRef;
1526  m_Mask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1527  bool match(ArrayRef<int> Mask) {
1528    MaskRef = Mask;
1529    return true;
1530  }
1531};
1532 
1533struct m_ZeroMask {
1534  bool match(ArrayRef<int> Mask) {
1535    return all_of(Mask, [](int Elem) { return Elem == 0 || Elem == -1; });
1536  }
1537};
1538 
1539struct m_SpecificMask {
1540  ArrayRef<int> &MaskRef;
1541  m_SpecificMask(ArrayRef<int> &MaskRef) : MaskRef(MaskRef) {}
1542  bool match(ArrayRef<int> Mask) { return MaskRef == Mask; }
1543};
1544 
1545struct m_SplatOrUndefMask {
1546  int &SplatIndex;
1547  m_SplatOrUndefMask(int &SplatIndex) : SplatIndex(SplatIndex) {}
1548  bool match(ArrayRef<int> Mask) {
1549    auto First = find_if(Mask, [](int Elem) { return Elem != -1; });
1550    if (First == Mask.end())
1551      return false;
1552    SplatIndex = *First;
1553    return all_of(Mask,
1554                  [First](int Elem) { return Elem == *First || Elem == -1; });
1555  }
1556};
1557 
1558/// Matches ShuffleVectorInst independently of mask value.
1559template <typename V1_t, typename V2_t>
1560inline TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>
1561m_Shuffle(const V1_t &v1, const V2_t &v2) {
1562  return TwoOps_match<V1_t, V2_t, Instruction::ShuffleVector>(v1, v2);
1563}
1564 
1565template <typename V1_t, typename V2_t, typename Mask_t>
1566inline Shuffle_match<V1_t, V2_t, Mask_t>
1567m_Shuffle(const V1_t &v1, const V2_t &v2, const Mask_t &mask) {
1568  return Shuffle_match<V1_t, V2_t, Mask_t>(v1, v2, mask);
1569}
1570 
1571/// Matches LoadInst.
1572template <typename OpTy>
1573inline OneOps_match<OpTy, Instruction::Load> m_Load(const OpTy &Op) {
1574  return OneOps_match<OpTy, Instruction::Load>(Op);
1575}
1576 
1577/// Matches StoreInst.
1578template <typename ValueOpTy, typename PointerOpTy>
1579inline TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>
1580m_Store(const ValueOpTy &ValueOp, const PointerOpTy &PointerOp) {
1581  return TwoOps_match<ValueOpTy, PointerOpTy, Instruction::Store>(ValueOp,
1582                                                                  PointerOp);
1583}
1584 
1585//===----------------------------------------------------------------------===//
1586// Matchers for CastInst classes
1587//
1588 
1589template <typename Op_t, unsigned Opcode> struct CastClass_match {
1590  Op_t Op;
1591 
1592  CastClass_match(const Op_t &OpMatch) : Op(OpMatch) {}
1593 
1594  template <typename OpTy> bool match(OpTy *V) {
1595    if (auto *O = dyn_cast<Operator>(V))
1596      return O->getOpcode() == Opcode && Op.match(O->getOperand(0));
1597    return false;
1598  }
1599};
1600 
1601/// Matches BitCast.
1602template <typename OpTy>
1603inline CastClass_match<OpTy, Instruction::BitCast> m_BitCast(const OpTy &Op) {
1604  return CastClass_match<OpTy, Instruction::BitCast>(Op);
1605}
1606 
1607/// Matches PtrToInt.
1608template <typename OpTy>
1609inline CastClass_match<OpTy, Instruction::PtrToInt> m_PtrToInt(const OpTy &Op) {
1610  return CastClass_match<OpTy, Instruction::PtrToInt>(Op);
1611}
1612 
1613/// Matches IntToPtr.
1614template <typename OpTy>
1615inline CastClass_match<OpTy, Instruction::IntToPtr> m_IntToPtr(const OpTy &Op) {
1616  return CastClass_match<OpTy, Instruction::IntToPtr>(Op);
1617}
1618 
1619/// Matches Trunc.
1620template <typename OpTy>
1621inline CastClass_match<OpTy, Instruction::Trunc> m_Trunc(const OpTy &Op) {
1622  return CastClass_match<OpTy, Instruction::Trunc>(Op);
1623}
1624 
1625template <typename OpTy>
1626inline match_combine_or<CastClass_match<OpTy, Instruction::Trunc>, OpTy>
1627m_TruncOrSelf(const OpTy &Op) {
1628  return m_CombineOr(m_Trunc(Op), Op);
1629}
1630 
1631/// Matches SExt.
1632template <typename OpTy>
1633inline CastClass_match<OpTy, Instruction::SExt> m_SExt(const OpTy &Op) {
1634  return CastClass_match<OpTy, Instruction::SExt>(Op);
1635}
1636 
1637/// Matches ZExt.
1638template <typename OpTy>
1639inline CastClass_match<OpTy, Instruction::ZExt> m_ZExt(const OpTy &Op) {
1640  return CastClass_match<OpTy, Instruction::ZExt>(Op);
1641}
1642 
1643template <typename OpTy>
1644inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>, OpTy>
1645m_ZExtOrSelf(const OpTy &Op) {
1646  return m_CombineOr(m_ZExt(Op), Op);
1647}
1648 
1649template <typename OpTy>
1650inline match_combine_or<CastClass_match<OpTy, Instruction::SExt>, OpTy>
1651m_SExtOrSelf(const OpTy &Op) {
1652  return m_CombineOr(m_SExt(Op), Op);
1653}
1654 
1655template <typename OpTy>
1656inline match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1657                        CastClass_match<OpTy, Instruction::SExt>>
1658m_ZExtOrSExt(const OpTy &Op) {
1659  return m_CombineOr(m_ZExt(Op), m_SExt(Op));
1660}
1661 
1662template <typename OpTy>
1663inline match_combine_or<
1664    match_combine_or<CastClass_match<OpTy, Instruction::ZExt>,
1665                     CastClass_match<OpTy, Instruction::SExt>>,
1666    OpTy>
1667m_ZExtOrSExtOrSelf(const OpTy &Op) {
1668  return m_CombineOr(m_ZExtOrSExt(Op), Op);
1669}
1670 
1671template <typename OpTy>
1672inline CastClass_match<OpTy, Instruction::UIToFP> m_UIToFP(const OpTy &Op) {
1673  return CastClass_match<OpTy, Instruction::UIToFP>(Op);
1674}
1675 
1676template <typename OpTy>
1677inline CastClass_match<OpTy, Instruction::SIToFP> m_SIToFP(const OpTy &Op) {
1678  return CastClass_match<OpTy, Instruction::SIToFP>(Op);
1679}
1680 
1681template <typename OpTy>
1682inline CastClass_match<OpTy, Instruction::FPToUI> m_FPToUI(const OpTy &Op) {
1683  return CastClass_match<OpTy, Instruction::FPToUI>(Op);
1684}
1685 
1686template <typename OpTy>
1687inline CastClass_match<OpTy, Instruction::FPToSI> m_FPToSI(const OpTy &Op) {
1688  return CastClass_match<OpTy, Instruction::FPToSI>(Op);
1689}
1690 
1691template <typename OpTy>
1692inline CastClass_match<OpTy, Instruction::FPTrunc> m_FPTrunc(const OpTy &Op) {
1693  return CastClass_match<OpTy, Instruction::FPTrunc>(Op);
1694}
1695 
1696template <typename OpTy>
1697inline CastClass_match<OpTy, Instruction::FPExt> m_FPExt(const OpTy &Op) {
1698  return CastClass_match<OpTy, Instruction::FPExt>(Op);
1699}
1700 
1701//===----------------------------------------------------------------------===//
1702// Matchers for control flow.
1703//
1704 
1705struct br_match {
1706  BasicBlock *&Succ;
1707 
1708  br_match(BasicBlock *&Succ) : Succ(Succ) {}
1709 
1710  template <typename OpTy> bool match(OpTy *V) {
1711    if (auto *BI = dyn_cast<BranchInst>(V))
1712      if (BI->isUnconditional()) {
1713        Succ = BI->getSuccessor(0);
1714        return true;
1715      }
1716    return false;
1717  }
1718};
1719 
1720inline br_match m_UnconditionalBr(BasicBlock *&Succ) { return br_match(Succ); }
1721 
1722template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1723struct brc_match {
1724  Cond_t Cond;
1725  TrueBlock_t T;
1726  FalseBlock_t F;
1727 
1728  brc_match(const Cond_t &C, const TrueBlock_t &t, const FalseBlock_t &f)
1729      : Cond(C), T(t), F(f) {}
1730 
1731  template <typename OpTy> bool match(OpTy *V) {
1732    if (auto *BI = dyn_cast<BranchInst>(V))
1733      if (BI->isConditional() && Cond.match(BI->getCondition()))
1734        return T.match(BI->getSuccessor(0)) && F.match(BI->getSuccessor(1));
1735    return false;
1736  }
1737};
1738 
1739template <typename Cond_t>
1740inline brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>
1741m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F) {
1742  return brc_match<Cond_t, bind_ty<BasicBlock>, bind_ty<BasicBlock>>(
1743      C, m_BasicBlock(T), m_BasicBlock(F));
1744}
1745 
1746template <typename Cond_t, typename TrueBlock_t, typename FalseBlock_t>
1747inline brc_match<Cond_t, TrueBlock_t, FalseBlock_t>
1748m_Br(const Cond_t &C, const TrueBlock_t &T, const FalseBlock_t &F) {
1749  return brc_match<Cond_t, TrueBlock_t, FalseBlock_t>(C, T, F);
1750}
1751 
1752//===----------------------------------------------------------------------===//
1753// Matchers for max/min idioms, eg: "select (sgt x, y), x, y" -> smax(x,y).
1754//
1755 
1756template <typename CmpInst_t, typename LHS_t, typename RHS_t, typename Pred_t,
1757          bool Commutable = false>
1758struct MaxMin_match {
1759  using PredType = Pred_t;
1760  LHS_t L;
1761  RHS_t R;
1762 
1763  // The evaluation order is always stable, regardless of Commutability.
1764  // The LHS is always matched first.
1765  MaxMin_match(const LHS_t &LHS, const RHS_t &RHS) : L(LHS), R(RHS) {}
1766 
1767  template <typename OpTy> bool match(OpTy *V) {
1768    if (auto *II = dyn_cast<IntrinsicInst>(V)) {
1769      Intrinsic::ID IID = II->getIntrinsicID();
1770      if ((IID == Intrinsic::smax && Pred_t::match(ICmpInst::ICMP_SGT)) ||
1771          (IID == Intrinsic::smin && Pred_t::match(ICmpInst::ICMP_SLT)) ||
1772          (IID == Intrinsic::umax && Pred_t::match(ICmpInst::ICMP_UGT)) ||
1773          (IID == Intrinsic::umin && Pred_t::match(ICmpInst::ICMP_ULT))) {
1774        Value *LHS = II->getOperand(0), *RHS = II->getOperand(1);
1775        return (L.match(LHS) && R.match(RHS)) ||
1776               (Commutable && L.match(RHS) && R.match(LHS));
1777      }
1778    }
1779    // Look for "(x pred y) ? x : y" or "(x pred y) ? y : x".
1780    auto *SI = dyn_cast<SelectInst>(V);
1781    if (!SI)
1782      return false;
1783    auto *Cmp = dyn_cast<CmpInst_t>(SI->getCondition());
1784    if (!Cmp)
1785      return false;
1786    // At this point we have a select conditioned on a comparison.  Check that
1787    // it is the values returned by the select that are being compared.
1788    auto *TrueVal = SI->getTrueValue();
1789    auto *FalseVal = SI->getFalseValue();
1790    auto *LHS = Cmp->getOperand(0);
1791    auto *RHS = Cmp->getOperand(1);
1792    if ((TrueVal != LHS || FalseVal != RHS) &&
1793        (TrueVal != RHS || FalseVal != LHS))
1794      return false;
1795    typename CmpInst_t::Predicate Pred =
1796        LHS == TrueVal ? Cmp->getPredicate() : Cmp->getInversePredicate();
1797    // Does "(x pred y) ? x : y" represent the desired max/min operation?
1798    if (!Pred_t::match(Pred))
1799      return false;
1800    // It does!  Bind the operands.
1801    return (L.match(LHS) && R.match(RHS)) ||
1802           (Commutable && L.match(RHS) && R.match(LHS));
1803  }
1804};
1805 
1806/// Helper class for identifying signed max predicates.
1807struct smax_pred_ty {
1808  static bool match(ICmpInst::Predicate Pred) {
1809    return Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
1810  }
1811};
1812 
1813/// Helper class for identifying signed min predicates.
1814struct smin_pred_ty {
1815  static bool match(ICmpInst::Predicate Pred) {
1816    return Pred == CmpInst::ICMP_SLT || Pred == CmpInst::ICMP_SLE;
1817  }
1818};
1819 
1820/// Helper class for identifying unsigned max predicates.
1821struct umax_pred_ty {
1822  static bool match(ICmpInst::Predicate Pred) {
1823    return Pred == CmpInst::ICMP_UGT || Pred == CmpInst::ICMP_UGE;
1824  }
1825};
1826 
1827/// Helper class for identifying unsigned min predicates.
1828struct umin_pred_ty {
1829  static bool match(ICmpInst::Predicate Pred) {
1830    return Pred == CmpInst::ICMP_ULT || Pred == CmpInst::ICMP_ULE;
1831  }
1832};
1833 
1834/// Helper class for identifying ordered max predicates.
1835struct ofmax_pred_ty {
1836  static bool match(FCmpInst::Predicate Pred) {
1837    return Pred == CmpInst::FCMP_OGT || Pred == CmpInst::FCMP_OGE;
1838  }
1839};
1840 
1841/// Helper class for identifying ordered min predicates.
1842struct ofmin_pred_ty {
1843  static bool match(FCmpInst::Predicate Pred) {
1844    return Pred == CmpInst::FCMP_OLT || Pred == CmpInst::FCMP_OLE;
1845  }
1846};
1847 
1848/// Helper class for identifying unordered max predicates.
1849struct ufmax_pred_ty {
1850  static bool match(FCmpInst::Predicate Pred) {
1851    return Pred == CmpInst::FCMP_UGT || Pred == CmpInst::FCMP_UGE;
1852  }
1853};
1854 
1855/// Helper class for identifying unordered min predicates.
1856struct ufmin_pred_ty {
1857  static bool match(FCmpInst::Predicate Pred) {
1858    return Pred == CmpInst::FCMP_ULT || Pred == CmpInst::FCMP_ULE;
1859  }
1860};
1861 
1862template <typename LHS, typename RHS>
1863inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty> m_SMax(const LHS &L,
1864                                                             const RHS &R) {
1865  return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>(L, R);
1866}
1867 
1868template <typename LHS, typename RHS>
1869inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty> m_SMin(const LHS &L,
1870                                                             const RHS &R) {
1871  return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>(L, R);
1872}
1873 
1874template <typename LHS, typename RHS>
1875inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty> m_UMax(const LHS &L,
1876                                                             const RHS &R) {
1877  return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>(L, R);
1878}
1879 
1880template <typename LHS, typename RHS>
1881inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty> m_UMin(const LHS &L,
1882                                                             const RHS &R) {
1883  return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>(L, R);
1884}
1885 
1886template <typename LHS, typename RHS>
1887inline match_combine_or<
1888    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty>,
1889                     MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty>>,
1890    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty>,
1891                     MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty>>>
1892m_MaxOrMin(const LHS &L, const RHS &R) {
1893  return m_CombineOr(m_CombineOr(m_SMax(L, R), m_SMin(L, R)),
1894                     m_CombineOr(m_UMax(L, R), m_UMin(L, R)));
1895}
1896 
1897/// Match an 'ordered' floating point maximum function.
1898/// Floating point has one special value 'NaN'. Therefore, there is no total
1899/// order. However, if we can ignore the 'NaN' value (for example, because of a
1900/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1901/// semantics. In the presence of 'NaN' we have to preserve the original
1902/// select(fcmp(ogt/ge, L, R), L, R) semantics matched by this predicate.
1903///
1904///                         max(L, R)  iff L and R are not NaN
1905///  m_OrdFMax(L, R) =      R          iff L or R are NaN
1906template <typename LHS, typename RHS>
1907inline MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty> m_OrdFMax(const LHS &L,
1908                                                                 const RHS &R) {
1909  return MaxMin_match<FCmpInst, LHS, RHS, ofmax_pred_ty>(L, R);
1910}
1911 
1912/// Match an 'ordered' floating point minimum function.
1913/// Floating point has one special value 'NaN'. Therefore, there is no total
1914/// order. However, if we can ignore the 'NaN' value (for example, because of a
1915/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1916/// semantics. In the presence of 'NaN' we have to preserve the original
1917/// select(fcmp(olt/le, L, R), L, R) semantics matched by this predicate.
1918///
1919///                         min(L, R)  iff L and R are not NaN
1920///  m_OrdFMin(L, R) =      R          iff L or R are NaN
1921template <typename LHS, typename RHS>
1922inline MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty> m_OrdFMin(const LHS &L,
1923                                                                 const RHS &R) {
1924  return MaxMin_match<FCmpInst, LHS, RHS, ofmin_pred_ty>(L, R);
1925}
1926 
1927/// Match an 'unordered' floating point maximum function.
1928/// Floating point has one special value 'NaN'. Therefore, there is no total
1929/// order. However, if we can ignore the 'NaN' value (for example, because of a
1930/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'maximum'
1931/// semantics. In the presence of 'NaN' we have to preserve the original
1932/// select(fcmp(ugt/ge, L, R), L, R) semantics matched by this predicate.
1933///
1934///                         max(L, R)  iff L and R are not NaN
1935///  m_UnordFMax(L, R) =    L          iff L or R are NaN
1936template <typename LHS, typename RHS>
1937inline MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>
1938m_UnordFMax(const LHS &L, const RHS &R) {
1939  return MaxMin_match<FCmpInst, LHS, RHS, ufmax_pred_ty>(L, R);
1940}
1941 
1942/// Match an 'unordered' floating point minimum function.
1943/// Floating point has one special value 'NaN'. Therefore, there is no total
1944/// order. However, if we can ignore the 'NaN' value (for example, because of a
1945/// 'no-nans-float-math' flag) a combination of a fcmp and select has 'minimum'
1946/// semantics. In the presence of 'NaN' we have to preserve the original
1947/// select(fcmp(ult/le, L, R), L, R) semantics matched by this predicate.
1948///
1949///                          min(L, R)  iff L and R are not NaN
1950///  m_UnordFMin(L, R) =     L          iff L or R are NaN
1951template <typename LHS, typename RHS>
1952inline MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>
1953m_UnordFMin(const LHS &L, const RHS &R) {
1954  return MaxMin_match<FCmpInst, LHS, RHS, ufmin_pred_ty>(L, R);
1955}
1956 
1957//===----------------------------------------------------------------------===//
1958// Matchers for overflow check patterns: e.g. (a + b) u< a, (a ^ -1) <u b
1959// Note that S might be matched to other instructions than AddInst.
1960//
1961 
1962template <typename LHS_t, typename RHS_t, typename Sum_t>
1963struct UAddWithOverflow_match {
1964  LHS_t L;
1965  RHS_t R;
1966  Sum_t S;
1967 
1968  UAddWithOverflow_match(const LHS_t &L, const RHS_t &R, const Sum_t &S)
1969      : L(L), R(R), S(S) {}
1970 
1971  template <typename OpTy> bool match(OpTy *V) {
1972    Value *ICmpLHS, *ICmpRHS;
1973    ICmpInst::Predicate Pred;
1974    if (!m_ICmp(Pred, m_Value(ICmpLHS), m_Value(ICmpRHS)).match(V))
1975      return false;
1976 
1977    Value *AddLHS, *AddRHS;
1978    auto AddExpr = m_Add(m_Value(AddLHS), m_Value(AddRHS));
1979 
1980    // (a + b) u< a, (a + b) u< b
1981    if (Pred == ICmpInst::ICMP_ULT)
1982      if (AddExpr.match(ICmpLHS) && (ICmpRHS == AddLHS || ICmpRHS == AddRHS))
1983        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
1984 
1985    // a >u (a + b), b >u (a + b)
1986    if (Pred == ICmpInst::ICMP_UGT)
1987      if (AddExpr.match(ICmpRHS) && (ICmpLHS == AddLHS || ICmpLHS == AddRHS))
1988        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
1989 
1990    Value *Op1;
1991    auto XorExpr = m_OneUse(m_Xor(m_Value(Op1), m_AllOnes()));
1992    // (a ^ -1) <u b
1993    if (Pred == ICmpInst::ICMP_ULT) {
1994      if (XorExpr.match(ICmpLHS))
1995        return L.match(Op1) && R.match(ICmpRHS) && S.match(ICmpLHS);
1996    }
1997    //  b > u (a ^ -1)
1998    if (Pred == ICmpInst::ICMP_UGT) {
1999      if (XorExpr.match(ICmpRHS))
2000        return L.match(Op1) && R.match(ICmpLHS) && S.match(ICmpRHS);
2001    }
2002 
2003    // Match special-case for increment-by-1.
2004    if (Pred == ICmpInst::ICMP_EQ) {
2005      // (a + 1) == 0
2006      // (1 + a) == 0
2007      if (AddExpr.match(ICmpLHS) && m_ZeroInt().match(ICmpRHS) &&
2008          (m_One().match(AddLHS) || m_One().match(AddRHS)))
2009        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpLHS);
2010      // 0 == (a + 1)
2011      // 0 == (1 + a)
2012      if (m_ZeroInt().match(ICmpLHS) && AddExpr.match(ICmpRHS) &&
2013          (m_One().match(AddLHS) || m_One().match(AddRHS)))
2014        return L.match(AddLHS) && R.match(AddRHS) && S.match(ICmpRHS);
2015    }
2016 
2017    return false;
2018  }
2019};
2020 
2021/// Match an icmp instruction checking for unsigned overflow on addition.
2022///
2023/// S is matched to the addition whose result is being checked for overflow, and
2024/// L and R are matched to the LHS and RHS of S.
2025template <typename LHS_t, typename RHS_t, typename Sum_t>
2026UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>
2027m_UAddWithOverflow(const LHS_t &L, const RHS_t &R, const Sum_t &S) {
2028  return UAddWithOverflow_match<LHS_t, RHS_t, Sum_t>(L, R, S);
2029}
2030 
2031template <typename Opnd_t> struct Argument_match {
2032  unsigned OpI;
2033  Opnd_t Val;
2034 
2035  Argument_match(unsigned OpIdx, const Opnd_t &V) : OpI(OpIdx), Val(V) {}
2036 
2037  template <typename OpTy> bool match(OpTy *V) {
2038    // FIXME: Should likely be switched to use `CallBase`.
2039    if (const auto *CI = dyn_cast<CallInst>(V))
2040      return Val.match(CI->getArgOperand(OpI));
2041    return false;
2042  }
2043};
2044 
2045/// Match an argument.
2046template <unsigned OpI, typename Opnd_t>
2047inline Argument_match<Opnd_t> m_Argument(const Opnd_t &Op) {
2048  return Argument_match<Opnd_t>(OpI, Op);
2049}
2050 
2051/// Intrinsic matchers.
2052struct IntrinsicID_match {
2053  unsigned ID;
2054 
2055  IntrinsicID_match(Intrinsic::ID IntrID) : ID(IntrID) {}
2056 
2057  template <typename OpTy> bool match(OpTy *V) {
2058    if (const auto *CI = dyn_cast<CallInst>(V))
2059      if (const auto *F = CI->getCalledFunction())
2060        return F->getIntrinsicID() == ID;
2061    return false;
2062  }
2063};
2064 
2065/// Intrinsic matches are combinations of ID matchers, and argument
2066/// matchers. Higher arity matcher are defined recursively in terms of and-ing
2067/// them with lower arity matchers. Here's some convenient typedefs for up to
2068/// several arguments, and more can be added as needed
2069template <typename T0 = void, typename T1 = void, typename T2 = void,
2070          typename T3 = void, typename T4 = void, typename T5 = void,
2071          typename T6 = void, typename T7 = void, typename T8 = void,
2072          typename T9 = void, typename T10 = void>
2073struct m_Intrinsic_Ty;
2074template <typename T0> struct m_Intrinsic_Ty<T0> {
2075  using Ty = match_combine_and<IntrinsicID_match, Argument_match<T0>>;
2076};
2077template <typename T0, typename T1> struct m_Intrinsic_Ty<T0, T1> {
2078  using Ty =
2079      match_combine_and<typename m_Intrinsic_Ty<T0>::Ty, Argument_match<T1>>;
2080};
2081template <typename T0, typename T1, typename T2>
2082struct m_Intrinsic_Ty<T0, T1, T2> {
2083  using Ty =
2084      match_combine_and<typename m_Intrinsic_Ty<T0, T1>::Ty,
2085                        Argument_match<T2>>;
2086};
2087template <typename T0, typename T1, typename T2, typename T3>
2088struct m_Intrinsic_Ty<T0, T1, T2, T3> {
2089  using Ty =
2090      match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2>::Ty,
2091                        Argument_match<T3>>;
2092};
2093 
2094template <typename T0, typename T1, typename T2, typename T3, typename T4>
2095struct m_Intrinsic_Ty<T0, T1, T2, T3, T4> {
2096  using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty,
2097                               Argument_match<T4>>;
2098};
2099 
2100template <typename T0, typename T1, typename T2, typename T3, typename T4, typename T5>
2101struct m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5> {
2102  using Ty = match_combine_and<typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty,
2103                               Argument_match<T5>>;
2104};
2105 
2106/// Match intrinsic calls like this:
2107/// m_Intrinsic<Intrinsic::fabs>(m_Value(X))
2108template <Intrinsic::ID IntrID> inline IntrinsicID_match m_Intrinsic() {
2109  return IntrinsicID_match(IntrID);
2110}
2111 
2112/// Matches MaskedLoad Intrinsic.
2113template <typename Opnd0, typename Opnd1, typename Opnd2, typename Opnd3>
2114inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2, Opnd3>::Ty
2115m_MaskedLoad(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2,
2116             const Opnd3 &Op3) {
2117  return m_Intrinsic<Intrinsic::masked_load>(Op0, Op1, Op2, Op3);
2118}
2119 
2120template <Intrinsic::ID IntrID, typename T0>
2121inline typename m_Intrinsic_Ty<T0>::Ty m_Intrinsic(const T0 &Op0) {
2122  return m_CombineAnd(m_Intrinsic<IntrID>(), m_Argument<0>(Op0));
2123}
2124 
2125template <Intrinsic::ID IntrID, typename T0, typename T1>
2126inline typename m_Intrinsic_Ty<T0, T1>::Ty m_Intrinsic(const T0 &Op0,
2127                                                       const T1 &Op1) {
2128  return m_CombineAnd(m_Intrinsic<IntrID>(Op0), m_Argument<1>(Op1));
2129}
2130 
2131template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2>
2132inline typename m_Intrinsic_Ty<T0, T1, T2>::Ty
2133m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2) {
2134  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1), m_Argument<2>(Op2));
2135}
2136 
2137template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2138          typename T3>
2139inline typename m_Intrinsic_Ty<T0, T1, T2, T3>::Ty
2140m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3) {
2141  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2), m_Argument<3>(Op3));
2142}
2143 
2144template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2145          typename T3, typename T4>
2146inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4>::Ty
2147m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2148            const T4 &Op4) {
2149  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3),
2150                      m_Argument<4>(Op4));
2151}
2152 
2153template <Intrinsic::ID IntrID, typename T0, typename T1, typename T2,
2154          typename T3, typename T4, typename T5>
2155inline typename m_Intrinsic_Ty<T0, T1, T2, T3, T4, T5>::Ty
2156m_Intrinsic(const T0 &Op0, const T1 &Op1, const T2 &Op2, const T3 &Op3,
2157            const T4 &Op4, const T5 &Op5) {
2158  return m_CombineAnd(m_Intrinsic<IntrID>(Op0, Op1, Op2, Op3, Op4),
2159                      m_Argument<5>(Op5));
2160}
2161 
2162// Helper intrinsic matching specializations.
2163template <typename Opnd0>
2164inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BitReverse(const Opnd0 &Op0) {
2165  return m_Intrinsic<Intrinsic::bitreverse>(Op0);
2166}
2167 
2168template <typename Opnd0>
2169inline typename m_Intrinsic_Ty<Opnd0>::Ty m_BSwap(const Opnd0 &Op0) {
2170  return m_Intrinsic<Intrinsic::bswap>(Op0);
2171}
2172 
2173template <typename Opnd0>
2174inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FAbs(const Opnd0 &Op0) {
2175  return m_Intrinsic<Intrinsic::fabs>(Op0);
2176}
2177 
2178template <typename Opnd0>
2179inline typename m_Intrinsic_Ty<Opnd0>::Ty m_FCanonicalize(const Opnd0 &Op0) {
2180  return m_Intrinsic<Intrinsic::canonicalize>(Op0);
2181}
2182 
2183template <typename Opnd0, typename Opnd1>
2184inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMin(const Opnd0 &Op0,
2185                                                        const Opnd1 &Op1) {
2186  return m_Intrinsic<Intrinsic::minnum>(Op0, Op1);
2187}
2188 
2189template <typename Opnd0, typename Opnd1>
2190inline typename m_Intrinsic_Ty<Opnd0, Opnd1>::Ty m_FMax(const Opnd0 &Op0,
2191                                                        const Opnd1 &Op1) {
2192  return m_Intrinsic<Intrinsic::maxnum>(Op0, Op1);
2193}
2194 
2195template <typename Opnd0, typename Opnd1, typename Opnd2>
2196inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2197m_FShl(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2198  return m_Intrinsic<Intrinsic::fshl>(Op0, Op1, Op2);
2199}
2200 
2201template <typename Opnd0, typename Opnd1, typename Opnd2>
2202inline typename m_Intrinsic_Ty<Opnd0, Opnd1, Opnd2>::Ty
2203m_FShr(const Opnd0 &Op0, const Opnd1 &Op1, const Opnd2 &Op2) {
2204  return m_Intrinsic<Intrinsic::fshr>(Op0, Op1, Op2);
2205}
2206 
2207//===----------------------------------------------------------------------===//
2208// Matchers for two-operands operators with the operators in either order
2209//
2210 
2211/// Matches a BinaryOperator with LHS and RHS in either order.
2212template <typename LHS, typename RHS>
2213inline AnyBinaryOp_match<LHS, RHS, true> m_c_BinOp(const LHS &L, const RHS &R) {
2214  return AnyBinaryOp_match<LHS, RHS, true>(L, R);
2215}
2216 
2217/// Matches an ICmp with a predicate over LHS and RHS in either order.
2218/// Swaps the predicate if operands are commuted.
2219template <typename LHS, typename RHS>
2220inline CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>
2221m_c_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R) {
2222  return CmpClass_match<LHS, RHS, ICmpInst, ICmpInst::Predicate, true>(Pred, L,
2223                                                                       R);
2224}
2225 
2226/// Matches a Add with LHS and RHS in either order.
2227template <typename LHS, typename RHS>
2228inline BinaryOp_match<LHS, RHS, Instruction::Add, true> m_c_Add(const LHS &L,
2229                                                                const RHS &R) {
2230  return BinaryOp_match<LHS, RHS, Instruction::Add, true>(L, R);
2231}
2232 
2233/// Matches a Mul with LHS and RHS in either order.
2234template <typename LHS, typename RHS>
2235inline BinaryOp_match<LHS, RHS, Instruction::Mul, true> m_c_Mul(const LHS &L,
2236                                                                const RHS &R) {
2237  return BinaryOp_match<LHS, RHS, Instruction::Mul, true>(L, R);
2238}
2239 
2240/// Matches an And with LHS and RHS in either order.
2241template <typename LHS, typename RHS>
2242inline BinaryOp_match<LHS, RHS, Instruction::And, true> m_c_And(const LHS &L,
2243                                                                const RHS &R) {
2244  return BinaryOp_match<LHS, RHS, Instruction::And, true>(L, R);
2245}
2246 
2247/// Matches an Or with LHS and RHS in either order.
2248template <typename LHS, typename RHS>
2249inline BinaryOp_match<LHS, RHS, Instruction::Or, true> m_c_Or(const LHS &L,
2250                                                              const RHS &R) {
2251  return BinaryOp_match<LHS, RHS, Instruction::Or, true>(L, R);
2252}
2253 
2254/// Matches an Xor with LHS and RHS in either order.
2255template <typename LHS, typename RHS>
2256inline BinaryOp_match<LHS, RHS, Instruction::Xor, true> m_c_Xor(const LHS &L,
2257                                                                const RHS &R) {
2258  return BinaryOp_match<LHS, RHS, Instruction::Xor, true>(L, R);
2259}
2260 
2261/// Matches a 'Neg' as 'sub 0, V'.
2262template <typename ValTy>
2263inline BinaryOp_match<cst_pred_ty<is_zero_int>, ValTy, Instruction::Sub>
2264m_Neg(const ValTy &V) {
2265  return m_Sub(m_ZeroInt(), V);
2266}
2267 
2268/// Matches a 'Neg' as 'sub nsw 0, V'.
2269template <typename ValTy>
2270inline OverflowingBinaryOp_match<cst_pred_ty<is_zero_int>, ValTy,
2271                                 Instruction::Sub,
2272                                 OverflowingBinaryOperator::NoSignedWrap>
2273m_NSWNeg(const ValTy &V) {
2274  return m_NSWSub(m_ZeroInt(), V);
2275}
2276 
2277/// Matches a 'Not' as 'xor V, -1' or 'xor -1, V'.
2278template <typename ValTy>
2279inline BinaryOp_match<ValTy, cst_pred_ty<is_all_ones>, Instruction::Xor, true>
2280m_Not(const ValTy &V) {
2281  return m_c_Xor(V, m_AllOnes());
2282}
2283 
2284/// Matches an SMin with LHS and RHS in either order.
2285template <typename LHS, typename RHS>
2286inline MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>
2287m_c_SMin(const LHS &L, const RHS &R) {
2288  return MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>(L, R);
2289}
2290/// Matches an SMax with LHS and RHS in either order.
2291template <typename LHS, typename RHS>
2292inline MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>
2293m_c_SMax(const LHS &L, const RHS &R) {
2294  return MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>(L, R);
2295}
2296/// Matches a UMin with LHS and RHS in either order.
2297template <typename LHS, typename RHS>
2298inline MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>
2299m_c_UMin(const LHS &L, const RHS &R) {
2300  return MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>(L, R);
2301}
2302/// Matches a UMax with LHS and RHS in either order.
2303template <typename LHS, typename RHS>
2304inline MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>
2305m_c_UMax(const LHS &L, const RHS &R) {
2306  return MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>(L, R);
2307}
2308 
2309template <typename LHS, typename RHS>
2310inline match_combine_or<
2311    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, smax_pred_ty, true>,
2312                     MaxMin_match<ICmpInst, LHS, RHS, smin_pred_ty, true>>,
2313    match_combine_or<MaxMin_match<ICmpInst, LHS, RHS, umax_pred_ty, true>,
2314                     MaxMin_match<ICmpInst, LHS, RHS, umin_pred_ty, true>>>
2315m_c_MaxOrMin(const LHS &L, const RHS &R) {
2316  return m_CombineOr(m_CombineOr(m_c_SMax(L, R), m_c_SMin(L, R)),
2317                     m_CombineOr(m_c_UMax(L, R), m_c_UMin(L, R)));
2318}
2319 
2320/// Matches FAdd with LHS and RHS in either order.
2321template <typename LHS, typename RHS>
2322inline BinaryOp_match<LHS, RHS, Instruction::FAdd, true>
2323m_c_FAdd(const LHS &L, const RHS &R) {
2324  return BinaryOp_match<LHS, RHS, Instruction::FAdd, true>(L, R);
2325}
2326 
2327/// Matches FMul with LHS and RHS in either order.
2328template <typename LHS, typename RHS>
2329inline BinaryOp_match<LHS, RHS, Instruction::FMul, true>
2330m_c_FMul(const LHS &L, const RHS &R) {
2331  return BinaryOp_match<LHS, RHS, Instruction::FMul, true>(L, R);
2332}
2333 
2334template <typename Opnd_t> struct Signum_match {
2335  Opnd_t Val;
2336  Signum_match(const Opnd_t &V) : Val(V) {}
2337 
2338  template <typename OpTy> bool match(OpTy *V) {
2339    unsigned TypeSize = V->getType()->getScalarSizeInBits();
2340    if (TypeSize == 0)
2341      return false;
2342 
2343    unsigned ShiftWidth = TypeSize - 1;
2344    Value *OpL = nullptr, *OpR = nullptr;
2345 
2346    // This is the representation of signum we match:
2347    //
2348    //  signum(x) == (x >> 63) | (-x >>u 63)
2349    //
2350    // An i1 value is its own signum, so it's correct to match
2351    //
2352    //  signum(x) == (x >> 0)  | (-x >>u 0)
2353    //
2354    // for i1 values.
2355 
2356    auto LHS = m_AShr(m_Value(OpL), m_SpecificInt(ShiftWidth));
2357    auto RHS = m_LShr(m_Neg(m_Value(OpR)), m_SpecificInt(ShiftWidth));
2358    auto Signum = m_Or(LHS, RHS);
2359 
2360    return Signum.match(V) && OpL == OpR && Val.match(OpL);
2361  }
2362};
2363 
2364/// Matches a signum pattern.
2365///
2366/// signum(x) =
2367///      x >  0  ->  1
2368///      x == 0  ->  0
2369///      x <  0  -> -1
2370template <typename Val_t> inline Signum_match<Val_t> m_Signum(const Val_t &V) {
2371  return Signum_match<Val_t>(V);
2372}
2373 
2374template <int Ind, typename Opnd_t> struct ExtractValue_match {
2375  Opnd_t Val;
2376  ExtractValue_match(const Opnd_t &V) : Val(V) {}
2377 
2378  template <typename OpTy> bool match(OpTy *V) {
2379    if (auto *I = dyn_cast<ExtractValueInst>(V)) {
2380      // If Ind is -1, don't inspect indices
2381      if (Ind != -1 &&
2382          !(I->getNumIndices() == 1 && I->getIndices()[0] == (unsigned)Ind))
2383        return false;
2384      return Val.match(I->getAggregateOperand());
2385    }
2386    return false;
2387  }
2388};
2389 
2390/// Match a single index ExtractValue instruction.
2391/// For example m_ExtractValue<1>(...)
2392template <int Ind, typename Val_t>
2393inline ExtractValue_match<Ind, Val_t> m_ExtractValue(const Val_t &V) {
2394  return ExtractValue_match<Ind, Val_t>(V);
2395}
2396 
2397/// Match an ExtractValue instruction with any index.
2398/// For example m_ExtractValue(...)
2399template <typename Val_t>
2400inline ExtractValue_match<-1, Val_t> m_ExtractValue(const Val_t &V) {
2401  return ExtractValue_match<-1, Val_t>(V);
2402}
2403 
2404/// Matcher for a single index InsertValue instruction.
2405template <int Ind, typename T0, typename T1> struct InsertValue_match {
2406  T0 Op0;
2407  T1 Op1;
2408 
2409  InsertValue_match(const T0 &Op0, const T1 &Op1) : Op0(Op0), Op1(Op1) {}
2410 
2411  template <typename OpTy> bool match(OpTy *V) {
2412    if (auto *I = dyn_cast<InsertValueInst>(V)) {
2413      return Op0.match(I->getOperand(0)) && Op1.match(I->getOperand(1)) &&
2414             I->getNumIndices() == 1 && Ind == I->getIndices()[0];
2415    }
2416    return false;
2417  }
2418};
2419 
2420/// Matches a single index InsertValue instruction.
2421template <int Ind, typename Val_t, typename Elt_t>
2422inline InsertValue_match<Ind, Val_t, Elt_t> m_InsertValue(const Val_t &Val,
2423                                                          const Elt_t &Elt) {
2424  return InsertValue_match<Ind, Val_t, Elt_t>(Val, Elt);
2425}
2426 
2427/// Matches patterns for `vscale`. This can either be a call to `llvm.vscale` or
2428/// the constant expression
2429///  `ptrtoint(gep <vscale x 1 x i8>, <vscale x 1 x i8>* null, i32 1>`
2430/// under the right conditions determined by DataLayout.
2431struct VScaleVal_match {
2432  const DataLayout &DL;
2433  VScaleVal_match(const DataLayout &DL) : DL(DL) {}
2434 
2435  template <typename ITy> bool match(ITy *V) {
2436    if (m_Intrinsic<Intrinsic::vscale>().match(V))
2437      return true;
2438 
2439    Value *Ptr;
2440    if (m_PtrToInt(m_Value(Ptr)).match(V)) {
2441      if (auto *GEP = dyn_cast<GEPOperator>(Ptr)) {
2442        auto *DerefTy = GEP->getSourceElementType();
2443        if (GEP->getNumIndices() == 1 && isa<ScalableVectorType>(DerefTy) &&
2444            m_Zero().match(GEP->getPointerOperand()) &&
2445            m_SpecificInt(1).match(GEP->idx_begin()->get()) &&
2446            DL.getTypeAllocSizeInBits(DerefTy).getKnownMinSize() == 8)
2447          return true;
2448      }
2449    }
2450 
2451    return false;
2452  }
2453};
2454 
2455inline VScaleVal_match m_VScale(const DataLayout &DL) {
2456  return VScaleVal_match(DL);
2457}
2458 
2459template <typename LHS, typename RHS, unsigned Opcode>
2460struct LogicalOp_match {
2461  LHS L;
2462  RHS R;
2463 
2464  LogicalOp_match(const LHS &L, const RHS &R) : L(L), R(R) {}
2465 
2466  template <typename T> bool match(T *V) {
2467    if (auto *I34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
34.1
'I' is null
43.1
'I' is null
 = dyn_cast<Instruction>(V)) {
34
←
Assuming 'V' is not a 'Instruction'→
35
←
Taking false branch→
43
←
'V' is not a 'Instruction'→
44
←
Taking false branch→
2468      if (!I->getType()->isIntOrIntVectorTy(1))
2469        return false;
2470 
2471      if (I->getOpcode() == Opcode && L.match(I->getOperand(0)) &&
2472          R.match(I->getOperand(1)))
2473        return true;
2474 
2475      if (auto *SI = dyn_cast<SelectInst>(I)) {
2476        if (Opcode == Instruction::And) {
2477          if (const auto *C = dyn_cast<Constant>(SI->getFalseValue()))
2478            if (C->isNullValue() && L.match(SI->getCondition()) &&
2479                R.match(SI->getTrueValue()))
2480              return true;
2481        } else {
2482          assert(Opcode == Instruction::Or)((void)0);
2483          if (const auto *C = dyn_cast<Constant>(SI->getTrueValue()))
2484            if (C->isOneValue() && L.match(SI->getCondition()) &&
2485                R.match(SI->getFalseValue()))
2486              return true;
2487        }
2488      }
2489    }
2490 
2491    return false;
36
←
Returning zero, which participates in a condition later→
45
←
Returning zero, which participates in a condition later→
2492  }
2493};
2494 
2495/// Matches L && R either in the form of L & R or L ? R : false.
2496/// Note that the latter form is poison-blocking.
2497template <typename LHS, typename RHS>
2498inline LogicalOp_match<LHS, RHS, Instruction::And>
2499m_LogicalAnd(const LHS &L, const RHS &R) {
2500  return LogicalOp_match<LHS, RHS, Instruction::And>(L, R);
2501}
2502 
2503/// Matches L && R where L and R are arbitrary values.
2504inline auto m_LogicalAnd() { return m_LogicalAnd(m_Value(), m_Value()); }
2505 
2506/// Matches L || R either in the form of L | R or L ? true : R.
2507/// Note that the latter form is poison-blocking.
2508template <typename LHS, typename RHS>
2509inline LogicalOp_match<LHS, RHS, Instruction::Or>
2510m_LogicalOr(const LHS &L, const RHS &R) {
2511  return LogicalOp_match<LHS, RHS, Instruction::Or>(L, R);
2512}
2513 
2514/// Matches L || R where L and R are arbitrary values.
2515inline auto m_LogicalOr() {
2516  return m_LogicalOr(m_Value(), m_Value());
2517}
2518 
2519} // end namespace PatternMatch
2520} // end namespace llvm
2521 
2522#endif // LLVM_IR_PATTERNMATCH_H