| File: | src/gnu/usr.bin/clang/libLLVM/../../../llvm/llvm/lib/Transforms/Scalar/LoopUnswitch.cpp |
| Warning: | line 1049, column 25 Called C++ object pointer is null |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
| 1 | //===- LoopUnswitch.cpp - Hoist loop-invariant conditionals in loop -------===// | |||
| 2 | // | |||
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | |||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | |||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | |||
| 6 | // | |||
| 7 | //===----------------------------------------------------------------------===// | |||
| 8 | // | |||
| 9 | // This pass transforms loops that contain branches on loop-invariant conditions | |||
| 10 | // to multiple loops. For example, it turns the left into the right code: | |||
| 11 | // | |||
| 12 | // for (...) if (lic) | |||
| 13 | // A for (...) | |||
| 14 | // if (lic) A; B; C | |||
| 15 | // B else | |||
| 16 | // C for (...) | |||
| 17 | // A; C | |||
| 18 | // | |||
| 19 | // This can increase the size of the code exponentially (doubling it every time | |||
| 20 | // a loop is unswitched) so we only unswitch if the resultant code will be | |||
| 21 | // smaller than a threshold. | |||
| 22 | // | |||
| 23 | // This pass expects LICM to be run before it to hoist invariant conditions out | |||
| 24 | // of the loop, to make the unswitching opportunity obvious. | |||
| 25 | // | |||
| 26 | //===----------------------------------------------------------------------===// | |||
| 27 | ||||
| 28 | #include "llvm/ADT/DenseMap.h" | |||
| 29 | #include "llvm/ADT/STLExtras.h" | |||
| 30 | #include "llvm/ADT/SmallPtrSet.h" | |||
| 31 | #include "llvm/ADT/SmallVector.h" | |||
| 32 | #include "llvm/ADT/Statistic.h" | |||
| 33 | #include "llvm/Analysis/AssumptionCache.h" | |||
| 34 | #include "llvm/Analysis/CodeMetrics.h" | |||
| 35 | #include "llvm/Analysis/InstructionSimplify.h" | |||
| 36 | #include "llvm/Analysis/LazyBlockFrequencyInfo.h" | |||
| 37 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" | |||
| 38 | #include "llvm/Analysis/LoopInfo.h" | |||
| 39 | #include "llvm/Analysis/LoopIterator.h" | |||
| 40 | #include "llvm/Analysis/LoopPass.h" | |||
| 41 | #include "llvm/Analysis/MemorySSA.h" | |||
| 42 | #include "llvm/Analysis/MemorySSAUpdater.h" | |||
| 43 | #include "llvm/Analysis/MustExecute.h" | |||
| 44 | #include "llvm/Analysis/ScalarEvolution.h" | |||
| 45 | #include "llvm/Analysis/TargetTransformInfo.h" | |||
| 46 | #include "llvm/IR/Attributes.h" | |||
| 47 | #include "llvm/IR/BasicBlock.h" | |||
| 48 | #include "llvm/IR/Constant.h" | |||
| 49 | #include "llvm/IR/Constants.h" | |||
| 50 | #include "llvm/IR/DerivedTypes.h" | |||
| 51 | #include "llvm/IR/Dominators.h" | |||
| 52 | #include "llvm/IR/Function.h" | |||
| 53 | #include "llvm/IR/IRBuilder.h" | |||
| 54 | #include "llvm/IR/InstrTypes.h" | |||
| 55 | #include "llvm/IR/Instruction.h" | |||
| 56 | #include "llvm/IR/Instructions.h" | |||
| 57 | #include "llvm/IR/IntrinsicInst.h" | |||
| 58 | #include "llvm/IR/Intrinsics.h" | |||
| 59 | #include "llvm/IR/Module.h" | |||
| 60 | #include "llvm/IR/Type.h" | |||
| 61 | #include "llvm/IR/User.h" | |||
| 62 | #include "llvm/IR/Value.h" | |||
| 63 | #include "llvm/IR/ValueHandle.h" | |||
| 64 | #include "llvm/InitializePasses.h" | |||
| 65 | #include "llvm/Pass.h" | |||
| 66 | #include "llvm/Support/Casting.h" | |||
| 67 | #include "llvm/Support/CommandLine.h" | |||
| 68 | #include "llvm/Support/Debug.h" | |||
| 69 | #include "llvm/Support/raw_ostream.h" | |||
| 70 | #include "llvm/Transforms/Scalar.h" | |||
| 71 | #include "llvm/Transforms/Scalar/LoopPassManager.h" | |||
| 72 | #include "llvm/Transforms/Utils/BasicBlockUtils.h" | |||
| 73 | #include "llvm/Transforms/Utils/Cloning.h" | |||
| 74 | #include "llvm/Transforms/Utils/Local.h" | |||
| 75 | #include "llvm/Transforms/Utils/LoopUtils.h" | |||
| 76 | #include "llvm/Transforms/Utils/ValueMapper.h" | |||
| 77 | #include <algorithm> | |||
| 78 | #include <cassert> | |||
| 79 | #include <map> | |||
| 80 | #include <set> | |||
| 81 | #include <tuple> | |||
| 82 | #include <utility> | |||
| 83 | #include <vector> | |||
| 84 | ||||
| 85 | using namespace llvm; | |||
| 86 | ||||
| 87 | #define DEBUG_TYPE"loop-unswitch" "loop-unswitch" | |||
| 88 | ||||
| 89 | STATISTIC(NumBranches, "Number of branches unswitched")static llvm::Statistic NumBranches = {"loop-unswitch", "NumBranches" , "Number of branches unswitched"}; | |||
| 90 | STATISTIC(NumSwitches, "Number of switches unswitched")static llvm::Statistic NumSwitches = {"loop-unswitch", "NumSwitches" , "Number of switches unswitched"}; | |||
| 91 | STATISTIC(NumGuards, "Number of guards unswitched")static llvm::Statistic NumGuards = {"loop-unswitch", "NumGuards" , "Number of guards unswitched"}; | |||
| 92 | STATISTIC(NumSelects , "Number of selects unswitched")static llvm::Statistic NumSelects = {"loop-unswitch", "NumSelects" , "Number of selects unswitched"}; | |||
| 93 | STATISTIC(NumTrivial , "Number of unswitches that are trivial")static llvm::Statistic NumTrivial = {"loop-unswitch", "NumTrivial" , "Number of unswitches that are trivial"}; | |||
| 94 | STATISTIC(NumSimplify, "Number of simplifications of unswitched code")static llvm::Statistic NumSimplify = {"loop-unswitch", "NumSimplify" , "Number of simplifications of unswitched code"}; | |||
| 95 | STATISTIC(TotalInsts, "Total number of instructions analyzed")static llvm::Statistic TotalInsts = {"loop-unswitch", "TotalInsts" , "Total number of instructions analyzed"}; | |||
| 96 | ||||
| 97 | // The specific value of 100 here was chosen based only on intuition and a | |||
| 98 | // few specific examples. | |||
| 99 | static cl::opt<unsigned> | |||
| 100 | Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"), | |||
| 101 | cl::init(100), cl::Hidden); | |||
| 102 | ||||
| 103 | static cl::opt<unsigned> | |||
| 104 | MSSAThreshold("loop-unswitch-memoryssa-threshold", | |||
| 105 | cl::desc("Max number of memory uses to explore during " | |||
| 106 | "partial unswitching analysis"), | |||
| 107 | cl::init(100), cl::Hidden); | |||
| 108 | ||||
| 109 | namespace { | |||
| 110 | ||||
| 111 | class LUAnalysisCache { | |||
| 112 | using UnswitchedValsMap = | |||
| 113 | DenseMap<const SwitchInst *, SmallPtrSet<const Value *, 8>>; | |||
| 114 | using UnswitchedValsIt = UnswitchedValsMap::iterator; | |||
| 115 | ||||
| 116 | struct LoopProperties { | |||
| 117 | unsigned CanBeUnswitchedCount; | |||
| 118 | unsigned WasUnswitchedCount; | |||
| 119 | unsigned SizeEstimation; | |||
| 120 | UnswitchedValsMap UnswitchedVals; | |||
| 121 | }; | |||
| 122 | ||||
| 123 | // Here we use std::map instead of DenseMap, since we need to keep valid | |||
| 124 | // LoopProperties pointer for current loop for better performance. | |||
| 125 | using LoopPropsMap = std::map<const Loop *, LoopProperties>; | |||
| 126 | using LoopPropsMapIt = LoopPropsMap::iterator; | |||
| 127 | ||||
| 128 | LoopPropsMap LoopsProperties; | |||
| 129 | UnswitchedValsMap *CurLoopInstructions = nullptr; | |||
| 130 | LoopProperties *CurrentLoopProperties = nullptr; | |||
| 131 | ||||
| 132 | // A loop unswitching with an estimated cost above this threshold | |||
| 133 | // is not performed. MaxSize is turned into unswitching quota for | |||
| 134 | // the current loop, and reduced correspondingly, though note that | |||
| 135 | // the quota is returned by releaseMemory() when the loop has been | |||
| 136 | // processed, so that MaxSize will return to its previous | |||
| 137 | // value. So in most cases MaxSize will equal the Threshold flag | |||
| 138 | // when a new loop is processed. An exception to that is that | |||
| 139 | // MaxSize will have a smaller value while processing nested loops | |||
| 140 | // that were introduced due to loop unswitching of an outer loop. | |||
| 141 | // | |||
| 142 | // FIXME: The way that MaxSize works is subtle and depends on the | |||
| 143 | // pass manager processing loops and calling releaseMemory() in a | |||
| 144 | // specific order. It would be good to find a more straightforward | |||
| 145 | // way of doing what MaxSize does. | |||
| 146 | unsigned MaxSize; | |||
| 147 | ||||
| 148 | public: | |||
| 149 | LUAnalysisCache() : MaxSize(Threshold) {} | |||
| 150 | ||||
| 151 | // Analyze loop. Check its size, calculate is it possible to unswitch | |||
| 152 | // it. Returns true if we can unswitch this loop. | |||
| 153 | bool countLoop(const Loop *L, const TargetTransformInfo &TTI, | |||
| 154 | AssumptionCache *AC); | |||
| 155 | ||||
| 156 | // Clean all data related to given loop. | |||
| 157 | void forgetLoop(const Loop *L); | |||
| 158 | ||||
| 159 | // Mark case value as unswitched. | |||
| 160 | // Since SI instruction can be partly unswitched, in order to avoid | |||
| 161 | // extra unswitching in cloned loops keep track all unswitched values. | |||
| 162 | void setUnswitched(const SwitchInst *SI, const Value *V); | |||
| 163 | ||||
| 164 | // Check was this case value unswitched before or not. | |||
| 165 | bool isUnswitched(const SwitchInst *SI, const Value *V); | |||
| 166 | ||||
| 167 | // Returns true if another unswitching could be done within the cost | |||
| 168 | // threshold. | |||
| 169 | bool costAllowsUnswitching(); | |||
| 170 | ||||
| 171 | // Clone all loop-unswitch related loop properties. | |||
| 172 | // Redistribute unswitching quotas. | |||
| 173 | // Note, that new loop data is stored inside the VMap. | |||
| 174 | void cloneData(const Loop *NewLoop, const Loop *OldLoop, | |||
| 175 | const ValueToValueMapTy &VMap); | |||
| 176 | }; | |||
| 177 | ||||
| 178 | class LoopUnswitch : public LoopPass { | |||
| 179 | LoopInfo *LI; // Loop information | |||
| 180 | LPPassManager *LPM; | |||
| 181 | AssumptionCache *AC; | |||
| 182 | ||||
| 183 | // Used to check if second loop needs processing after | |||
| 184 | // rewriteLoopBodyWithConditionConstant rewrites first loop. | |||
| 185 | std::vector<Loop*> LoopProcessWorklist; | |||
| 186 | ||||
| 187 | LUAnalysisCache BranchesInfo; | |||
| 188 | ||||
| 189 | bool OptimizeForSize; | |||
| 190 | bool RedoLoop = false; | |||
| 191 | ||||
| 192 | Loop *CurrentLoop = nullptr; | |||
| 193 | DominatorTree *DT = nullptr; | |||
| 194 | MemorySSA *MSSA = nullptr; | |||
| 195 | AAResults *AA = nullptr; | |||
| 196 | std::unique_ptr<MemorySSAUpdater> MSSAU; | |||
| 197 | BasicBlock *LoopHeader = nullptr; | |||
| 198 | BasicBlock *LoopPreheader = nullptr; | |||
| 199 | ||||
| 200 | bool SanitizeMemory; | |||
| 201 | SimpleLoopSafetyInfo SafetyInfo; | |||
| 202 | ||||
| 203 | // LoopBlocks contains all of the basic blocks of the loop, including the | |||
| 204 | // preheader of the loop, the body of the loop, and the exit blocks of the | |||
| 205 | // loop, in that order. | |||
| 206 | std::vector<BasicBlock*> LoopBlocks; | |||
| 207 | // NewBlocks contained cloned copy of basic blocks from LoopBlocks. | |||
| 208 | std::vector<BasicBlock*> NewBlocks; | |||
| 209 | ||||
| 210 | bool HasBranchDivergence; | |||
| 211 | ||||
| 212 | public: | |||
| 213 | static char ID; // Pass ID, replacement for typeid | |||
| 214 | ||||
| 215 | explicit LoopUnswitch(bool Os = false, bool HasBranchDivergence = false) | |||
| 216 | : LoopPass(ID), OptimizeForSize(Os), | |||
| 217 | HasBranchDivergence(HasBranchDivergence) { | |||
| 218 | initializeLoopUnswitchPass(*PassRegistry::getPassRegistry()); | |||
| 219 | } | |||
| 220 | ||||
| 221 | bool runOnLoop(Loop *L, LPPassManager &LPM) override; | |||
| 222 | bool processCurrentLoop(); | |||
| 223 | bool isUnreachableDueToPreviousUnswitching(BasicBlock *); | |||
| 224 | ||||
| 225 | /// This transformation requires natural loop information & requires that | |||
| 226 | /// loop preheaders be inserted into the CFG. | |||
| 227 | /// | |||
| 228 | void getAnalysisUsage(AnalysisUsage &AU) const override { | |||
| 229 | // Lazy BFI and BPI are marked as preserved here so Loop Unswitching | |||
| 230 | // can remain part of the same loop pass as LICM | |||
| 231 | AU.addPreserved<LazyBlockFrequencyInfoPass>(); | |||
| 232 | AU.addPreserved<LazyBranchProbabilityInfoPass>(); | |||
| 233 | AU.addRequired<AssumptionCacheTracker>(); | |||
| 234 | AU.addRequired<TargetTransformInfoWrapperPass>(); | |||
| 235 | if (EnableMSSALoopDependency) { | |||
| 236 | AU.addRequired<MemorySSAWrapperPass>(); | |||
| 237 | AU.addPreserved<MemorySSAWrapperPass>(); | |||
| 238 | } | |||
| 239 | if (HasBranchDivergence) | |||
| 240 | AU.addRequired<LegacyDivergenceAnalysis>(); | |||
| 241 | getLoopAnalysisUsage(AU); | |||
| 242 | } | |||
| 243 | ||||
| 244 | private: | |||
| 245 | void releaseMemory() override { BranchesInfo.forgetLoop(CurrentLoop); } | |||
| 246 | ||||
| 247 | void initLoopData() { | |||
| 248 | LoopHeader = CurrentLoop->getHeader(); | |||
| 249 | LoopPreheader = CurrentLoop->getLoopPreheader(); | |||
| 250 | } | |||
| 251 | ||||
| 252 | /// Split all of the edges from inside the loop to their exit blocks. | |||
| 253 | /// Update the appropriate Phi nodes as we do so. | |||
| 254 | void splitExitEdges(Loop *L, | |||
| 255 | const SmallVectorImpl<BasicBlock *> &ExitBlocks); | |||
| 256 | ||||
| 257 | bool tryTrivialLoopUnswitch(bool &Changed); | |||
| 258 | ||||
| 259 | bool unswitchIfProfitable(Value *LoopCond, Constant *Val, | |||
| 260 | Instruction *TI = nullptr, | |||
| 261 | ArrayRef<Instruction *> ToDuplicate = {}); | |||
| 262 | void unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, | |||
| 263 | BasicBlock *ExitBlock, Instruction *TI); | |||
| 264 | void unswitchNontrivialCondition(Value *LIC, Constant *OnVal, Loop *L, | |||
| 265 | Instruction *TI, | |||
| 266 | ArrayRef<Instruction *> ToDuplicate = {}); | |||
| 267 | ||||
| 268 | void rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, | |||
| 269 | Constant *Val, bool IsEqual); | |||
| 270 | ||||
| 271 | void | |||
| 272 | emitPreheaderBranchOnCondition(Value *LIC, Constant *Val, | |||
| 273 | BasicBlock *TrueDest, BasicBlock *FalseDest, | |||
| 274 | BranchInst *OldBranch, Instruction *TI, | |||
| 275 | ArrayRef<Instruction *> ToDuplicate = {}); | |||
| 276 | ||||
| 277 | void simplifyCode(std::vector<Instruction *> &Worklist, Loop *L); | |||
| 278 | ||||
| 279 | /// Given that the Invariant is not equal to Val. Simplify instructions | |||
| 280 | /// in the loop. | |||
| 281 | Value *simplifyInstructionWithNotEqual(Instruction *Inst, Value *Invariant, | |||
| 282 | Constant *Val); | |||
| 283 | }; | |||
| 284 | ||||
| 285 | } // end anonymous namespace | |||
| 286 | ||||
| 287 | // Analyze loop. Check its size, calculate is it possible to unswitch | |||
| 288 | // it. Returns true if we can unswitch this loop. | |||
| 289 | bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI, | |||
| 290 | AssumptionCache *AC) { | |||
| 291 | LoopPropsMapIt PropsIt; | |||
| 292 | bool Inserted; | |||
| 293 | std::tie(PropsIt, Inserted) = | |||
| 294 | LoopsProperties.insert(std::make_pair(L, LoopProperties())); | |||
| 295 | ||||
| 296 | LoopProperties &Props = PropsIt->second; | |||
| 297 | ||||
| 298 | if (Inserted) { | |||
| 299 | // New loop. | |||
| 300 | ||||
| 301 | // Limit the number of instructions to avoid causing significant code | |||
| 302 | // expansion, and the number of basic blocks, to avoid loops with | |||
| 303 | // large numbers of branches which cause loop unswitching to go crazy. | |||
| 304 | // This is a very ad-hoc heuristic. | |||
| 305 | ||||
| 306 | SmallPtrSet<const Value *, 32> EphValues; | |||
| 307 | CodeMetrics::collectEphemeralValues(L, AC, EphValues); | |||
| 308 | ||||
| 309 | // FIXME: This is overly conservative because it does not take into | |||
| 310 | // consideration code simplification opportunities and code that can | |||
| 311 | // be shared by the resultant unswitched loops. | |||
| 312 | CodeMetrics Metrics; | |||
| 313 | for (BasicBlock *BB : L->blocks()) | |||
| 314 | Metrics.analyzeBasicBlock(BB, TTI, EphValues); | |||
| 315 | ||||
| 316 | Props.SizeEstimation = Metrics.NumInsts; | |||
| 317 | Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); | |||
| 318 | Props.WasUnswitchedCount = 0; | |||
| 319 | MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; | |||
| 320 | ||||
| 321 | if (Metrics.notDuplicatable) { | |||
| 322 | LLVM_DEBUG(dbgs() << "NOT unswitching loop %" << L->getHeader()->getName()do { } while (false) | |||
| 323 | << ", contents cannot be "do { } while (false) | |||
| 324 | << "duplicated!\n")do { } while (false); | |||
| 325 | return false; | |||
| 326 | } | |||
| 327 | } | |||
| 328 | ||||
| 329 | // Be careful. This links are good only before new loop addition. | |||
| 330 | CurrentLoopProperties = &Props; | |||
| 331 | CurLoopInstructions = &Props.UnswitchedVals; | |||
| 332 | ||||
| 333 | return true; | |||
| 334 | } | |||
| 335 | ||||
| 336 | // Clean all data related to given loop. | |||
| 337 | void LUAnalysisCache::forgetLoop(const Loop *L) { | |||
| 338 | LoopPropsMapIt LIt = LoopsProperties.find(L); | |||
| 339 | ||||
| 340 | if (LIt != LoopsProperties.end()) { | |||
| 341 | LoopProperties &Props = LIt->second; | |||
| 342 | MaxSize += (Props.CanBeUnswitchedCount + Props.WasUnswitchedCount) * | |||
| 343 | Props.SizeEstimation; | |||
| 344 | LoopsProperties.erase(LIt); | |||
| 345 | } | |||
| 346 | ||||
| 347 | CurrentLoopProperties = nullptr; | |||
| 348 | CurLoopInstructions = nullptr; | |||
| 349 | } | |||
| 350 | ||||
| 351 | // Mark case value as unswitched. | |||
| 352 | // Since SI instruction can be partly unswitched, in order to avoid | |||
| 353 | // extra unswitching in cloned loops keep track all unswitched values. | |||
| 354 | void LUAnalysisCache::setUnswitched(const SwitchInst *SI, const Value *V) { | |||
| 355 | (*CurLoopInstructions)[SI].insert(V); | |||
| 356 | } | |||
| 357 | ||||
| 358 | // Check was this case value unswitched before or not. | |||
| 359 | bool LUAnalysisCache::isUnswitched(const SwitchInst *SI, const Value *V) { | |||
| 360 | return (*CurLoopInstructions)[SI].count(V); | |||
| 361 | } | |||
| 362 | ||||
| 363 | bool LUAnalysisCache::costAllowsUnswitching() { | |||
| 364 | return CurrentLoopProperties->CanBeUnswitchedCount > 0; | |||
| 365 | } | |||
| 366 | ||||
| 367 | // Clone all loop-unswitch related loop properties. | |||
| 368 | // Redistribute unswitching quotas. | |||
| 369 | // Note, that new loop data is stored inside the VMap. | |||
| 370 | void LUAnalysisCache::cloneData(const Loop *NewLoop, const Loop *OldLoop, | |||
| 371 | const ValueToValueMapTy &VMap) { | |||
| 372 | LoopProperties &NewLoopProps = LoopsProperties[NewLoop]; | |||
| 373 | LoopProperties &OldLoopProps = *CurrentLoopProperties; | |||
| 374 | UnswitchedValsMap &Insts = OldLoopProps.UnswitchedVals; | |||
| 375 | ||||
| 376 | // Reallocate "can-be-unswitched quota" | |||
| 377 | ||||
| 378 | --OldLoopProps.CanBeUnswitchedCount; | |||
| 379 | ++OldLoopProps.WasUnswitchedCount; | |||
| 380 | NewLoopProps.WasUnswitchedCount = 0; | |||
| 381 | unsigned Quota = OldLoopProps.CanBeUnswitchedCount; | |||
| 382 | NewLoopProps.CanBeUnswitchedCount = Quota / 2; | |||
| 383 | OldLoopProps.CanBeUnswitchedCount = Quota - Quota / 2; | |||
| 384 | ||||
| 385 | NewLoopProps.SizeEstimation = OldLoopProps.SizeEstimation; | |||
| 386 | ||||
| 387 | // Clone unswitched values info: | |||
| 388 | // for new loop switches we clone info about values that was | |||
| 389 | // already unswitched and has redundant successors. | |||
| 390 | for (const auto &I : Insts) { | |||
| 391 | const SwitchInst *OldInst = I.first; | |||
| 392 | Value *NewI = VMap.lookup(OldInst); | |||
| 393 | const SwitchInst *NewInst = cast_or_null<SwitchInst>(NewI); | |||
| 394 | assert(NewInst && "All instructions that are in SrcBB must be in VMap.")((void)0); | |||
| 395 | ||||
| 396 | NewLoopProps.UnswitchedVals[NewInst] = OldLoopProps.UnswitchedVals[OldInst]; | |||
| 397 | } | |||
| 398 | } | |||
| 399 | ||||
| 400 | char LoopUnswitch::ID = 0; | |||
| 401 | ||||
| 402 | INITIALIZE_PASS_BEGIN(LoopUnswitch, "loop-unswitch", "Unswitch loops",static void *initializeLoopUnswitchPassOnce(PassRegistry & Registry) { | |||
| 403 | false, false)static void *initializeLoopUnswitchPassOnce(PassRegistry & Registry) { | |||
| 404 | INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)initializeAssumptionCacheTrackerPass(Registry); | |||
| 405 | INITIALIZE_PASS_DEPENDENCY(LoopPass)initializeLoopPassPass(Registry); | |||
| 406 | INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)initializeTargetTransformInfoWrapperPassPass(Registry); | |||
| 407 | INITIALIZE_PASS_DEPENDENCY(LegacyDivergenceAnalysis)initializeLegacyDivergenceAnalysisPass(Registry); | |||
| 408 | INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)initializeMemorySSAWrapperPassPass(Registry); | |||
| 409 | INITIALIZE_PASS_END(LoopUnswitch, "loop-unswitch", "Unswitch loops",PassInfo *PI = new PassInfo( "Unswitch loops", "loop-unswitch" , &LoopUnswitch::ID, PassInfo::NormalCtor_t(callDefaultCtor <LoopUnswitch>), false, false); Registry.registerPass(* PI, true); return PI; } static llvm::once_flag InitializeLoopUnswitchPassFlag ; void llvm::initializeLoopUnswitchPass(PassRegistry &Registry ) { llvm::call_once(InitializeLoopUnswitchPassFlag, initializeLoopUnswitchPassOnce , std::ref(Registry)); } | |||
| 410 | false, false)PassInfo *PI = new PassInfo( "Unswitch loops", "loop-unswitch" , &LoopUnswitch::ID, PassInfo::NormalCtor_t(callDefaultCtor <LoopUnswitch>), false, false); Registry.registerPass(* PI, true); return PI; } static llvm::once_flag InitializeLoopUnswitchPassFlag ; void llvm::initializeLoopUnswitchPass(PassRegistry &Registry ) { llvm::call_once(InitializeLoopUnswitchPassFlag, initializeLoopUnswitchPassOnce , std::ref(Registry)); } | |||
| 411 | ||||
| 412 | Pass *llvm::createLoopUnswitchPass(bool Os, bool HasBranchDivergence) { | |||
| 413 | return new LoopUnswitch(Os, HasBranchDivergence); | |||
| 414 | } | |||
| 415 | ||||
| 416 | /// Operator chain lattice. | |||
| 417 | enum OperatorChain { | |||
| 418 | OC_OpChainNone, ///< There is no operator. | |||
| 419 | OC_OpChainOr, ///< There are only ORs. | |||
| 420 | OC_OpChainAnd, ///< There are only ANDs. | |||
| 421 | OC_OpChainMixed ///< There are ANDs and ORs. | |||
| 422 | }; | |||
| 423 | ||||
| 424 | /// Cond is a condition that occurs in L. If it is invariant in the loop, or has | |||
| 425 | /// an invariant piece, return the invariant. Otherwise, return null. | |||
| 426 | // | |||
| 427 | /// NOTE: findLIVLoopCondition will not return a partial LIV by walking up a | |||
| 428 | /// mixed operator chain, as we can not reliably find a value which will | |||
| 429 | /// simplify the operator chain. If the chain is AND-only or OR-only, we can use | |||
| 430 | /// 0 or ~0 to simplify the chain. | |||
| 431 | /// | |||
| 432 | /// NOTE: In case a partial LIV and a mixed operator chain, we may be able to | |||
| 433 | /// simplify the condition itself to a loop variant condition, but at the | |||
| 434 | /// cost of creating an entirely new loop. | |||
| 435 | static Value *findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, | |||
| 436 | OperatorChain &ParentChain, | |||
| 437 | DenseMap<Value *, Value *> &Cache, | |||
| 438 | MemorySSAUpdater *MSSAU) { | |||
| 439 | auto CacheIt = Cache.find(Cond); | |||
| 440 | if (CacheIt != Cache.end()) | |||
| 441 | return CacheIt->second; | |||
| 442 | ||||
| 443 | // We started analyze new instruction, increment scanned instructions counter. | |||
| 444 | ++TotalInsts; | |||
| 445 | ||||
| 446 | // We can never unswitch on vector conditions. | |||
| 447 | if (Cond->getType()->isVectorTy()) | |||
| 448 | return nullptr; | |||
| 449 | ||||
| 450 | // Constants should be folded, not unswitched on! | |||
| 451 | if (isa<Constant>(Cond)) return nullptr; | |||
| 452 | ||||
| 453 | // TODO: Handle: br (VARIANT|INVARIANT). | |||
| 454 | ||||
| 455 | // Hoist simple values out. | |||
| 456 | if (L->makeLoopInvariant(Cond, Changed, nullptr, MSSAU)) { | |||
| 457 | Cache[Cond] = Cond; | |||
| 458 | return Cond; | |||
| 459 | } | |||
| 460 | ||||
| 461 | // Walk up the operator chain to find partial invariant conditions. | |||
| 462 | if (BinaryOperator *BO = dyn_cast<BinaryOperator>(Cond)) | |||
| 463 | if (BO->getOpcode() == Instruction::And || | |||
| 464 | BO->getOpcode() == Instruction::Or) { | |||
| 465 | // Given the previous operator, compute the current operator chain status. | |||
| 466 | OperatorChain NewChain; | |||
| 467 | switch (ParentChain) { | |||
| 468 | case OC_OpChainNone: | |||
| 469 | NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : | |||
| 470 | OC_OpChainOr; | |||
| 471 | break; | |||
| 472 | case OC_OpChainOr: | |||
| 473 | NewChain = BO->getOpcode() == Instruction::Or ? OC_OpChainOr : | |||
| 474 | OC_OpChainMixed; | |||
| 475 | break; | |||
| 476 | case OC_OpChainAnd: | |||
| 477 | NewChain = BO->getOpcode() == Instruction::And ? OC_OpChainAnd : | |||
| 478 | OC_OpChainMixed; | |||
| 479 | break; | |||
| 480 | case OC_OpChainMixed: | |||
| 481 | NewChain = OC_OpChainMixed; | |||
| 482 | break; | |||
| 483 | } | |||
| 484 | ||||
| 485 | // If we reach a Mixed state, we do not want to keep walking up as we can not | |||
| 486 | // reliably find a value that will simplify the chain. With this check, we | |||
| 487 | // will return null on the first sight of mixed chain and the caller will | |||
| 488 | // either backtrack to find partial LIV in other operand or return null. | |||
| 489 | if (NewChain != OC_OpChainMixed) { | |||
| 490 | // Update the current operator chain type before we search up the chain. | |||
| 491 | ParentChain = NewChain; | |||
| 492 | // If either the left or right side is invariant, we can unswitch on this, | |||
| 493 | // which will cause the branch to go away in one loop and the condition to | |||
| 494 | // simplify in the other one. | |||
| 495 | if (Value *LHS = findLIVLoopCondition(BO->getOperand(0), L, Changed, | |||
| 496 | ParentChain, Cache, MSSAU)) { | |||
| 497 | Cache[Cond] = LHS; | |||
| 498 | return LHS; | |||
| 499 | } | |||
| 500 | // We did not manage to find a partial LIV in operand(0). Backtrack and try | |||
| 501 | // operand(1). | |||
| 502 | ParentChain = NewChain; | |||
| 503 | if (Value *RHS = findLIVLoopCondition(BO->getOperand(1), L, Changed, | |||
| 504 | ParentChain, Cache, MSSAU)) { | |||
| 505 | Cache[Cond] = RHS; | |||
| 506 | return RHS; | |||
| 507 | } | |||
| 508 | } | |||
| 509 | } | |||
| 510 | ||||
| 511 | Cache[Cond] = nullptr; | |||
| 512 | return nullptr; | |||
| 513 | } | |||
| 514 | ||||
| 515 | /// Cond is a condition that occurs in L. If it is invariant in the loop, or has | |||
| 516 | /// an invariant piece, return the invariant along with the operator chain type. | |||
| 517 | /// Otherwise, return null. | |||
| 518 | static std::pair<Value *, OperatorChain> | |||
| 519 | findLIVLoopCondition(Value *Cond, Loop *L, bool &Changed, | |||
| 520 | MemorySSAUpdater *MSSAU) { | |||
| 521 | DenseMap<Value *, Value *> Cache; | |||
| 522 | OperatorChain OpChain = OC_OpChainNone; | |||
| 523 | Value *FCond = findLIVLoopCondition(Cond, L, Changed, OpChain, Cache, MSSAU); | |||
| 524 | ||||
| 525 | // In case we do find a LIV, it can not be obtained by walking up a mixed | |||
| 526 | // operator chain. | |||
| 527 | assert((!FCond || OpChain != OC_OpChainMixed) &&((void)0) | |||
| 528 | "Do not expect a partial LIV with mixed operator chain")((void)0); | |||
| 529 | return {FCond, OpChain}; | |||
| 530 | } | |||
| 531 | ||||
| 532 | bool LoopUnswitch::runOnLoop(Loop *L, LPPassManager &LPMRef) { | |||
| 533 | if (skipLoop(L)) | |||
| ||||
| 534 | return false; | |||
| 535 | ||||
| 536 | AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache( | |||
| 537 | *L->getHeader()->getParent()); | |||
| 538 | LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); | |||
| 539 | LPM = &LPMRef; | |||
| 540 | DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); | |||
| 541 | AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); | |||
| 542 | if (EnableMSSALoopDependency) { | |||
| 543 | MSSA = &getAnalysis<MemorySSAWrapperPass>().getMSSA(); | |||
| 544 | MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); | |||
| 545 | assert(DT && "Cannot update MemorySSA without a valid DomTree.")((void)0); | |||
| 546 | } | |||
| 547 | CurrentLoop = L; | |||
| 548 | Function *F = CurrentLoop->getHeader()->getParent(); | |||
| 549 | ||||
| 550 | SanitizeMemory = F->hasFnAttribute(Attribute::SanitizeMemory); | |||
| 551 | if (SanitizeMemory) | |||
| 552 | SafetyInfo.computeLoopSafetyInfo(L); | |||
| 553 | ||||
| 554 | if (MSSA && VerifyMemorySSA) | |||
| 555 | MSSA->verifyMemorySSA(); | |||
| 556 | ||||
| 557 | bool Changed = false; | |||
| 558 | do { | |||
| 559 | assert(CurrentLoop->isLCSSAForm(*DT))((void)0); | |||
| 560 | if (MSSA
| |||
| 561 | MSSA->verifyMemorySSA(); | |||
| 562 | RedoLoop = false; | |||
| 563 | Changed |= processCurrentLoop(); | |||
| 564 | } while (RedoLoop); | |||
| 565 | ||||
| 566 | if (MSSA && VerifyMemorySSA) | |||
| 567 | MSSA->verifyMemorySSA(); | |||
| 568 | ||||
| 569 | return Changed; | |||
| 570 | } | |||
| 571 | ||||
| 572 | // Return true if the BasicBlock BB is unreachable from the loop header. | |||
| 573 | // Return false, otherwise. | |||
| 574 | bool LoopUnswitch::isUnreachableDueToPreviousUnswitching(BasicBlock *BB) { | |||
| 575 | auto *Node = DT->getNode(BB)->getIDom(); | |||
| 576 | BasicBlock *DomBB = Node->getBlock(); | |||
| 577 | while (CurrentLoop->contains(DomBB)) { | |||
| 578 | BranchInst *BInst = dyn_cast<BranchInst>(DomBB->getTerminator()); | |||
| 579 | ||||
| 580 | Node = DT->getNode(DomBB)->getIDom(); | |||
| 581 | DomBB = Node->getBlock(); | |||
| 582 | ||||
| 583 | if (!BInst || !BInst->isConditional()) | |||
| 584 | continue; | |||
| 585 | ||||
| 586 | Value *Cond = BInst->getCondition(); | |||
| 587 | if (!isa<ConstantInt>(Cond)) | |||
| 588 | continue; | |||
| 589 | ||||
| 590 | BasicBlock *UnreachableSucc = | |||
| 591 | Cond == ConstantInt::getTrue(Cond->getContext()) | |||
| 592 | ? BInst->getSuccessor(1) | |||
| 593 | : BInst->getSuccessor(0); | |||
| 594 | ||||
| 595 | if (DT->dominates(UnreachableSucc, BB)) | |||
| 596 | return true; | |||
| 597 | } | |||
| 598 | return false; | |||
| 599 | } | |||
| 600 | ||||
| 601 | /// FIXME: Remove this workaround when freeze related patches are done. | |||
| 602 | /// LoopUnswitch and Equality propagation in GVN have discrepancy about | |||
| 603 | /// whether branch on undef/poison has undefine behavior. Here it is to | |||
| 604 | /// rule out some common cases that we found such discrepancy already | |||
| 605 | /// causing problems. Detail could be found in PR31652. Note if the | |||
| 606 | /// func returns true, it is unsafe. But if it is false, it doesn't mean | |||
| 607 | /// it is necessarily safe. | |||
| 608 | static bool equalityPropUnSafe(Value &LoopCond) { | |||
| 609 | ICmpInst *CI = dyn_cast<ICmpInst>(&LoopCond); | |||
| 610 | if (!CI || !CI->isEquality()) | |||
| 611 | return false; | |||
| 612 | ||||
| 613 | Value *LHS = CI->getOperand(0); | |||
| 614 | Value *RHS = CI->getOperand(1); | |||
| 615 | if (isa<UndefValue>(LHS) || isa<UndefValue>(RHS)) | |||
| 616 | return true; | |||
| 617 | ||||
| 618 | auto HasUndefInPHI = [](PHINode &PN) { | |||
| 619 | for (Value *Opd : PN.incoming_values()) { | |||
| 620 | if (isa<UndefValue>(Opd)) | |||
| 621 | return true; | |||
| 622 | } | |||
| 623 | return false; | |||
| 624 | }; | |||
| 625 | PHINode *LPHI = dyn_cast<PHINode>(LHS); | |||
| 626 | PHINode *RPHI = dyn_cast<PHINode>(RHS); | |||
| 627 | if ((LPHI && HasUndefInPHI(*LPHI)) || (RPHI && HasUndefInPHI(*RPHI))) | |||
| 628 | return true; | |||
| 629 | ||||
| 630 | auto HasUndefInSelect = [](SelectInst &SI) { | |||
| 631 | if (isa<UndefValue>(SI.getTrueValue()) || | |||
| 632 | isa<UndefValue>(SI.getFalseValue())) | |||
| 633 | return true; | |||
| 634 | return false; | |||
| 635 | }; | |||
| 636 | SelectInst *LSI = dyn_cast<SelectInst>(LHS); | |||
| 637 | SelectInst *RSI = dyn_cast<SelectInst>(RHS); | |||
| 638 | if ((LSI && HasUndefInSelect(*LSI)) || (RSI && HasUndefInSelect(*RSI))) | |||
| 639 | return true; | |||
| 640 | return false; | |||
| 641 | } | |||
| 642 | ||||
| 643 | /// Do actual work and unswitch loop if possible and profitable. | |||
| 644 | bool LoopUnswitch::processCurrentLoop() { | |||
| 645 | bool Changed = false; | |||
| 646 | ||||
| 647 | initLoopData(); | |||
| 648 | ||||
| 649 | // If LoopSimplify was unable to form a preheader, don't do any unswitching. | |||
| 650 | if (!LoopPreheader) | |||
| 651 | return false; | |||
| 652 | ||||
| 653 | // Loops with indirectbr cannot be cloned. | |||
| 654 | if (!CurrentLoop->isSafeToClone()) | |||
| 655 | return false; | |||
| 656 | ||||
| 657 | // Without dedicated exits, splitting the exit edge may fail. | |||
| 658 | if (!CurrentLoop->hasDedicatedExits()) | |||
| 659 | return false; | |||
| 660 | ||||
| 661 | LLVMContext &Context = LoopHeader->getContext(); | |||
| 662 | ||||
| 663 | // Analyze loop cost, and stop unswitching if loop content can not be duplicated. | |||
| 664 | if (!BranchesInfo.countLoop( | |||
| 665 | CurrentLoop, | |||
| 666 | getAnalysis<TargetTransformInfoWrapperPass>().getTTI( | |||
| 667 | *CurrentLoop->getHeader()->getParent()), | |||
| 668 | AC)) | |||
| 669 | return false; | |||
| 670 | ||||
| 671 | // Try trivial unswitch first before loop over other basic blocks in the loop. | |||
| 672 | if (tryTrivialLoopUnswitch(Changed)) { | |||
| 673 | return true; | |||
| 674 | } | |||
| 675 | ||||
| 676 | // Do not do non-trivial unswitch while optimizing for size. | |||
| 677 | // FIXME: Use Function::hasOptSize(). | |||
| 678 | if (OptimizeForSize || | |||
| 679 | LoopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize)) | |||
| 680 | return Changed; | |||
| 681 | ||||
| 682 | // Run through the instructions in the loop, keeping track of three things: | |||
| 683 | // | |||
| 684 | // - That we do not unswitch loops containing convergent operations, as we | |||
| 685 | // might be making them control dependent on the unswitch value when they | |||
| 686 | // were not before. | |||
| 687 | // FIXME: This could be refined to only bail if the convergent operation is | |||
| 688 | // not already control-dependent on the unswitch value. | |||
| 689 | // | |||
| 690 | // - That basic blocks in the loop contain invokes whose predecessor edges we | |||
| 691 | // cannot split. | |||
| 692 | // | |||
| 693 | // - The set of guard intrinsics encountered (these are non terminator | |||
| 694 | // instructions that are also profitable to be unswitched). | |||
| 695 | ||||
| 696 | SmallVector<IntrinsicInst *, 4> Guards; | |||
| 697 | ||||
| 698 | for (const auto BB : CurrentLoop->blocks()) { | |||
| 699 | for (auto &I : *BB) { | |||
| 700 | auto *CB = dyn_cast<CallBase>(&I); | |||
| 701 | if (!CB) | |||
| 702 | continue; | |||
| 703 | if (CB->isConvergent()) | |||
| 704 | return Changed; | |||
| 705 | if (auto *II = dyn_cast<InvokeInst>(&I)) | |||
| 706 | if (!II->getUnwindDest()->canSplitPredecessors()) | |||
| 707 | return Changed; | |||
| 708 | if (auto *II = dyn_cast<IntrinsicInst>(&I)) | |||
| 709 | if (II->getIntrinsicID() == Intrinsic::experimental_guard) | |||
| 710 | Guards.push_back(II); | |||
| 711 | } | |||
| 712 | } | |||
| 713 | ||||
| 714 | for (IntrinsicInst *Guard : Guards) { | |||
| 715 | Value *LoopCond = findLIVLoopCondition(Guard->getOperand(0), CurrentLoop, | |||
| 716 | Changed, MSSAU.get()) | |||
| 717 | .first; | |||
| 718 | if (LoopCond && | |||
| 719 | unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { | |||
| 720 | // NB! Unswitching (if successful) could have erased some of the | |||
| 721 | // instructions in Guards leaving dangling pointers there. This is fine | |||
| 722 | // because we're returning now, and won't look at Guards again. | |||
| 723 | ++NumGuards; | |||
| 724 | return true; | |||
| 725 | } | |||
| 726 | } | |||
| 727 | ||||
| 728 | // Loop over all of the basic blocks in the loop. If we find an interior | |||
| 729 | // block that is branching on a loop-invariant condition, we can unswitch this | |||
| 730 | // loop. | |||
| 731 | for (Loop::block_iterator I = CurrentLoop->block_begin(), | |||
| 732 | E = CurrentLoop->block_end(); | |||
| 733 | I != E; ++I) { | |||
| 734 | Instruction *TI = (*I)->getTerminator(); | |||
| 735 | ||||
| 736 | // Unswitching on a potentially uninitialized predicate is not | |||
| 737 | // MSan-friendly. Limit this to the cases when the original predicate is | |||
| 738 | // guaranteed to execute, to avoid creating a use-of-uninitialized-value | |||
| 739 | // in the code that did not have one. | |||
| 740 | // This is a workaround for the discrepancy between LLVM IR and MSan | |||
| 741 | // semantics. See PR28054 for more details. | |||
| 742 | if (SanitizeMemory && | |||
| 743 | !SafetyInfo.isGuaranteedToExecute(*TI, DT, CurrentLoop)) | |||
| 744 | continue; | |||
| 745 | ||||
| 746 | if (BranchInst *BI = dyn_cast<BranchInst>(TI)) { | |||
| 747 | // Some branches may be rendered unreachable because of previous | |||
| 748 | // unswitching. | |||
| 749 | // Unswitch only those branches that are reachable. | |||
| 750 | if (isUnreachableDueToPreviousUnswitching(*I)) | |||
| 751 | continue; | |||
| 752 | ||||
| 753 | // If this isn't branching on an invariant condition, we can't unswitch | |||
| 754 | // it. | |||
| 755 | if (BI->isConditional()) { | |||
| 756 | // See if this, or some part of it, is loop invariant. If so, we can | |||
| 757 | // unswitch on it if we desire. | |||
| 758 | Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, | |||
| 759 | Changed, MSSAU.get()) | |||
| 760 | .first; | |||
| 761 | if (LoopCond && !equalityPropUnSafe(*LoopCond) && | |||
| 762 | unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context), TI)) { | |||
| 763 | ++NumBranches; | |||
| 764 | return true; | |||
| 765 | } | |||
| 766 | } | |||
| 767 | } else if (SwitchInst *SI = dyn_cast<SwitchInst>(TI)) { | |||
| 768 | Value *SC = SI->getCondition(); | |||
| 769 | Value *LoopCond; | |||
| 770 | OperatorChain OpChain; | |||
| 771 | std::tie(LoopCond, OpChain) = | |||
| 772 | findLIVLoopCondition(SC, CurrentLoop, Changed, MSSAU.get()); | |||
| 773 | ||||
| 774 | unsigned NumCases = SI->getNumCases(); | |||
| 775 | if (LoopCond && NumCases) { | |||
| 776 | // Find a value to unswitch on: | |||
| 777 | // FIXME: this should chose the most expensive case! | |||
| 778 | // FIXME: scan for a case with a non-critical edge? | |||
| 779 | Constant *UnswitchVal = nullptr; | |||
| 780 | // Find a case value such that at least one case value is unswitched | |||
| 781 | // out. | |||
| 782 | if (OpChain == OC_OpChainAnd) { | |||
| 783 | // If the chain only has ANDs and the switch has a case value of 0. | |||
| 784 | // Dropping in a 0 to the chain will unswitch out the 0-casevalue. | |||
| 785 | auto *AllZero = cast<ConstantInt>(Constant::getNullValue(SC->getType())); | |||
| 786 | if (BranchesInfo.isUnswitched(SI, AllZero)) | |||
| 787 | continue; | |||
| 788 | // We are unswitching 0 out. | |||
| 789 | UnswitchVal = AllZero; | |||
| 790 | } else if (OpChain == OC_OpChainOr) { | |||
| 791 | // If the chain only has ORs and the switch has a case value of ~0. | |||
| 792 | // Dropping in a ~0 to the chain will unswitch out the ~0-casevalue. | |||
| 793 | auto *AllOne = cast<ConstantInt>(Constant::getAllOnesValue(SC->getType())); | |||
| 794 | if (BranchesInfo.isUnswitched(SI, AllOne)) | |||
| 795 | continue; | |||
| 796 | // We are unswitching ~0 out. | |||
| 797 | UnswitchVal = AllOne; | |||
| 798 | } else { | |||
| 799 | assert(OpChain == OC_OpChainNone &&((void)0) | |||
| 800 | "Expect to unswitch on trivial chain")((void)0); | |||
| 801 | // Do not process same value again and again. | |||
| 802 | // At this point we have some cases already unswitched and | |||
| 803 | // some not yet unswitched. Let's find the first not yet unswitched one. | |||
| 804 | for (auto Case : SI->cases()) { | |||
| 805 | Constant *UnswitchValCandidate = Case.getCaseValue(); | |||
| 806 | if (!BranchesInfo.isUnswitched(SI, UnswitchValCandidate)) { | |||
| 807 | UnswitchVal = UnswitchValCandidate; | |||
| 808 | break; | |||
| 809 | } | |||
| 810 | } | |||
| 811 | } | |||
| 812 | ||||
| 813 | if (!UnswitchVal) | |||
| 814 | continue; | |||
| 815 | ||||
| 816 | if (unswitchIfProfitable(LoopCond, UnswitchVal)) { | |||
| 817 | ++NumSwitches; | |||
| 818 | // In case of a full LIV, UnswitchVal is the value we unswitched out. | |||
| 819 | // In case of a partial LIV, we only unswitch when its an AND-chain | |||
| 820 | // or OR-chain. In both cases switch input value simplifies to | |||
| 821 | // UnswitchVal. | |||
| 822 | BranchesInfo.setUnswitched(SI, UnswitchVal); | |||
| 823 | return true; | |||
| 824 | } | |||
| 825 | } | |||
| 826 | } | |||
| 827 | ||||
| 828 | // Scan the instructions to check for unswitchable values. | |||
| 829 | for (BasicBlock::iterator BBI = (*I)->begin(), E = (*I)->end(); | |||
| 830 | BBI != E; ++BBI) | |||
| 831 | if (SelectInst *SI = dyn_cast<SelectInst>(BBI)) { | |||
| 832 | Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, | |||
| 833 | Changed, MSSAU.get()) | |||
| 834 | .first; | |||
| 835 | if (LoopCond && | |||
| 836 | unswitchIfProfitable(LoopCond, ConstantInt::getTrue(Context))) { | |||
| 837 | ++NumSelects; | |||
| 838 | return true; | |||
| 839 | } | |||
| 840 | } | |||
| 841 | } | |||
| 842 | ||||
| 843 | // Check if there is a header condition that is invariant along the patch from | |||
| 844 | // either the true or false successors to the header. This allows unswitching | |||
| 845 | // conditions depending on memory accesses, if there's a path not clobbering | |||
| 846 | // the memory locations. Check if this transform has been disabled using | |||
| 847 | // metadata, to avoid unswitching the same loop multiple times. | |||
| 848 | if (MSSA && | |||
| 849 | !findOptionMDForLoop(CurrentLoop, "llvm.loop.unswitch.partial.disable")) { | |||
| 850 | if (auto Info = | |||
| 851 | hasPartialIVCondition(*CurrentLoop, MSSAThreshold, *MSSA, *AA)) { | |||
| 852 | assert(!Info->InstToDuplicate.empty() &&((void)0) | |||
| 853 | "need at least a partially invariant condition")((void)0); | |||
| 854 | LLVM_DEBUG(dbgs() << "loop-unswitch: Found partially invariant condition "do { } while (false) | |||
| 855 | << *Info->InstToDuplicate[0] << "\n")do { } while (false); | |||
| 856 | ||||
| 857 | Instruction *TI = CurrentLoop->getHeader()->getTerminator(); | |||
| 858 | Value *LoopCond = Info->InstToDuplicate[0]; | |||
| 859 | ||||
| 860 | // If the partially unswitched path is a no-op and has a single exit | |||
| 861 | // block, we do not need to do full unswitching. Instead, we can directly | |||
| 862 | // branch to the exit. | |||
| 863 | // TODO: Instead of duplicating the checks, we could also just directly | |||
| 864 | // branch to the exit from the conditional branch in the loop. | |||
| 865 | if (Info->PathIsNoop) { | |||
| 866 | if (HasBranchDivergence && | |||
| 867 | getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) { | |||
| 868 | LLVM_DEBUG(dbgs() << "NOT unswitching loop %"do { } while (false) | |||
| 869 | << CurrentLoop->getHeader()->getName()do { } while (false) | |||
| 870 | << " at non-trivial condition '"do { } while (false) | |||
| 871 | << *Info->KnownValue << "' == " << *LoopCond << "\n"do { } while (false) | |||
| 872 | << ". Condition is divergent.\n")do { } while (false); | |||
| 873 | return false; | |||
| 874 | } | |||
| 875 | ||||
| 876 | ++NumBranches; | |||
| 877 | ||||
| 878 | BasicBlock *TrueDest = LoopHeader; | |||
| 879 | BasicBlock *FalseDest = Info->ExitForPath; | |||
| 880 | if (Info->KnownValue->isOneValue()) | |||
| 881 | std::swap(TrueDest, FalseDest); | |||
| 882 | ||||
| 883 | auto *OldBr = | |||
| 884 | cast<BranchInst>(CurrentLoop->getLoopPreheader()->getTerminator()); | |||
| 885 | emitPreheaderBranchOnCondition(LoopCond, Info->KnownValue, TrueDest, | |||
| 886 | FalseDest, OldBr, TI, | |||
| 887 | Info->InstToDuplicate); | |||
| 888 | delete OldBr; | |||
| 889 | RedoLoop = false; | |||
| 890 | return true; | |||
| 891 | } | |||
| 892 | ||||
| 893 | // Otherwise, the path is not a no-op. Run regular unswitching. | |||
| 894 | if (unswitchIfProfitable(LoopCond, Info->KnownValue, | |||
| 895 | CurrentLoop->getHeader()->getTerminator(), | |||
| 896 | Info->InstToDuplicate)) { | |||
| 897 | ++NumBranches; | |||
| 898 | RedoLoop = false; | |||
| 899 | return true; | |||
| 900 | } | |||
| 901 | } | |||
| 902 | } | |||
| 903 | ||||
| 904 | return Changed; | |||
| 905 | } | |||
| 906 | ||||
| 907 | /// Check to see if all paths from BB exit the loop with no side effects | |||
| 908 | /// (including infinite loops). | |||
| 909 | /// | |||
| 910 | /// If true, we return true and set ExitBB to the block we | |||
| 911 | /// exit through. | |||
| 912 | /// | |||
| 913 | static bool isTrivialLoopExitBlockHelper(Loop *L, BasicBlock *BB, | |||
| 914 | BasicBlock *&ExitBB, | |||
| 915 | std::set<BasicBlock*> &Visited) { | |||
| 916 | if (!Visited.insert(BB).second) { | |||
| 917 | // Already visited. Without more analysis, this could indicate an infinite | |||
| 918 | // loop. | |||
| 919 | return false; | |||
| 920 | } | |||
| 921 | if (!L->contains(BB)) { | |||
| 922 | // Otherwise, this is a loop exit, this is fine so long as this is the | |||
| 923 | // first exit. | |||
| 924 | if (ExitBB) return false; | |||
| 925 | ExitBB = BB; | |||
| 926 | return true; | |||
| 927 | } | |||
| 928 | ||||
| 929 | // Otherwise, this is an unvisited intra-loop node. Check all successors. | |||
| 930 | for (BasicBlock *Succ : successors(BB)) { | |||
| 931 | // Check to see if the successor is a trivial loop exit. | |||
| 932 | if (!isTrivialLoopExitBlockHelper(L, Succ, ExitBB, Visited)) | |||
| 933 | return false; | |||
| 934 | } | |||
| 935 | ||||
| 936 | // Okay, everything after this looks good, check to make sure that this block | |||
| 937 | // doesn't include any side effects. | |||
| 938 | for (Instruction &I : *BB) | |||
| 939 | if (I.mayHaveSideEffects()) | |||
| 940 | return false; | |||
| 941 | ||||
| 942 | return true; | |||
| 943 | } | |||
| 944 | ||||
| 945 | /// Return true if the specified block unconditionally leads to an exit from | |||
| 946 | /// the specified loop, and has no side-effects in the process. If so, return | |||
| 947 | /// the block that is exited to, otherwise return null. | |||
| 948 | static BasicBlock *isTrivialLoopExitBlock(Loop *L, BasicBlock *BB) { | |||
| 949 | std::set<BasicBlock*> Visited; | |||
| 950 | Visited.insert(L->getHeader()); // Branches to header make infinite loops. | |||
| 951 | BasicBlock *ExitBB = nullptr; | |||
| 952 | if (isTrivialLoopExitBlockHelper(L, BB, ExitBB, Visited)) | |||
| 953 | return ExitBB; | |||
| 954 | return nullptr; | |||
| 955 | } | |||
| 956 | ||||
| 957 | /// We have found that we can unswitch CurrentLoop when LoopCond == Val to | |||
| 958 | /// simplify the loop. If we decide that this is profitable, | |||
| 959 | /// unswitch the loop, reprocess the pieces, then return true. | |||
| 960 | bool LoopUnswitch::unswitchIfProfitable(Value *LoopCond, Constant *Val, | |||
| 961 | Instruction *TI, | |||
| 962 | ArrayRef<Instruction *> ToDuplicate) { | |||
| 963 | // Check to see if it would be profitable to unswitch current loop. | |||
| 964 | if (!BranchesInfo.costAllowsUnswitching()) { | |||
| 965 | LLVM_DEBUG(dbgs() << "NOT unswitching loop %"do { } while (false) | |||
| 966 | << CurrentLoop->getHeader()->getName()do { } while (false) | |||
| 967 | << " at non-trivial condition '" << *Valdo { } while (false) | |||
| 968 | << "' == " << *LoopCond << "\n"do { } while (false) | |||
| 969 | << ". Cost too high.\n")do { } while (false); | |||
| 970 | return false; | |||
| 971 | } | |||
| 972 | if (HasBranchDivergence && | |||
| 973 | getAnalysis<LegacyDivergenceAnalysis>().isDivergent(LoopCond)) { | |||
| 974 | LLVM_DEBUG(dbgs() << "NOT unswitching loop %"do { } while (false) | |||
| 975 | << CurrentLoop->getHeader()->getName()do { } while (false) | |||
| 976 | << " at non-trivial condition '" << *Valdo { } while (false) | |||
| 977 | << "' == " << *LoopCond << "\n"do { } while (false) | |||
| 978 | << ". Condition is divergent.\n")do { } while (false); | |||
| 979 | return false; | |||
| 980 | } | |||
| 981 | ||||
| 982 | unswitchNontrivialCondition(LoopCond, Val, CurrentLoop, TI, ToDuplicate); | |||
| 983 | return true; | |||
| 984 | } | |||
| 985 | ||||
| 986 | /// Emit a conditional branch on two values if LIC == Val, branch to TrueDst, | |||
| 987 | /// otherwise branch to FalseDest. Insert the code immediately before OldBranch | |||
| 988 | /// and remove (but not erase!) it from the function. | |||
| 989 | void LoopUnswitch::emitPreheaderBranchOnCondition( | |||
| 990 | Value *LIC, Constant *Val, BasicBlock *TrueDest, BasicBlock *FalseDest, | |||
| 991 | BranchInst *OldBranch, Instruction *TI, | |||
| 992 | ArrayRef<Instruction *> ToDuplicate) { | |||
| 993 | assert(OldBranch->isUnconditional() && "Preheader is not split correctly")((void)0); | |||
| 994 | assert(TrueDest != FalseDest && "Branch targets should be different")((void)0); | |||
| 995 | ||||
| 996 | // Insert a conditional branch on LIC to the two preheaders. The original | |||
| 997 | // code is the true version and the new code is the false version. | |||
| 998 | Value *BranchVal = LIC; | |||
| 999 | bool Swapped = false; | |||
| 1000 | ||||
| 1001 | if (!ToDuplicate.empty()) { | |||
| 1002 | ValueToValueMapTy Old2New; | |||
| 1003 | for (Instruction *I : reverse(ToDuplicate)) { | |||
| 1004 | auto *New = I->clone(); | |||
| 1005 | New->insertBefore(OldBranch); | |||
| 1006 | RemapInstruction(New, Old2New, | |||
| 1007 | RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); | |||
| 1008 | Old2New[I] = New; | |||
| 1009 | ||||
| 1010 | if (MSSAU) { | |||
| 1011 | MemorySSA *MSSA = MSSAU->getMemorySSA(); | |||
| 1012 | auto *MemA = dyn_cast_or_null<MemoryUse>(MSSA->getMemoryAccess(I)); | |||
| 1013 | if (!MemA) | |||
| 1014 | continue; | |||
| 1015 | ||||
| 1016 | Loop *L = LI->getLoopFor(I->getParent()); | |||
| 1017 | auto *DefiningAccess = MemA->getDefiningAccess(); | |||
| 1018 | // Get the first defining access before the loop. | |||
| 1019 | while (L->contains(DefiningAccess->getBlock())) { | |||
| 1020 | // If the defining access is a MemoryPhi, get the incoming | |||
| 1021 | // value for the pre-header as defining access. | |||
| 1022 | if (auto *MemPhi = dyn_cast<MemoryPhi>(DefiningAccess)) { | |||
| 1023 | DefiningAccess = | |||
| 1024 | MemPhi->getIncomingValueForBlock(L->getLoopPreheader()); | |||
| 1025 | } else { | |||
| 1026 | DefiningAccess = | |||
| 1027 | cast<MemoryDef>(DefiningAccess)->getDefiningAccess(); | |||
| 1028 | } | |||
| 1029 | } | |||
| 1030 | MSSAU->createMemoryAccessInBB(New, DefiningAccess, New->getParent(), | |||
| 1031 | MemorySSA::BeforeTerminator); | |||
| 1032 | } | |||
| 1033 | } | |||
| 1034 | BranchVal = Old2New[ToDuplicate[0]]; | |||
| 1035 | } else { | |||
| 1036 | ||||
| 1037 | if (!isa<ConstantInt>(Val) || | |||
| 1038 | Val->getType() != Type::getInt1Ty(LIC->getContext())) | |||
| 1039 | BranchVal = new ICmpInst(OldBranch, ICmpInst::ICMP_EQ, LIC, Val); | |||
| 1040 | else if (Val != ConstantInt::getTrue(Val->getContext())) { | |||
| 1041 | // We want to enter the new loop when the condition is true. | |||
| 1042 | std::swap(TrueDest, FalseDest); | |||
| 1043 | Swapped = true; | |||
| 1044 | } | |||
| 1045 | } | |||
| 1046 | ||||
| 1047 | // Old branch will be removed, so save its parent and successor to update the | |||
| 1048 | // DomTree. | |||
| 1049 | auto *OldBranchSucc = OldBranch->getSuccessor(0); | |||
| ||||
| 1050 | auto *OldBranchParent = OldBranch->getParent(); | |||
| 1051 | ||||
| 1052 | // Insert the new branch. | |||
| 1053 | BranchInst *BI = | |||
| 1054 | IRBuilder<>(OldBranch).CreateCondBr(BranchVal, TrueDest, FalseDest, TI); | |||
| 1055 | if (Swapped) | |||
| 1056 | BI->swapProfMetadata(); | |||
| 1057 | ||||
| 1058 | // Remove the old branch so there is only one branch at the end. This is | |||
| 1059 | // needed to perform DomTree's internal DFS walk on the function's CFG. | |||
| 1060 | OldBranch->removeFromParent(); | |||
| 1061 | ||||
| 1062 | // Inform the DT about the new branch. | |||
| 1063 | if (DT) { | |||
| 1064 | // First, add both successors. | |||
| 1065 | SmallVector<DominatorTree::UpdateType, 3> Updates; | |||
| 1066 | if (TrueDest != OldBranchSucc) | |||
| 1067 | Updates.push_back({DominatorTree::Insert, OldBranchParent, TrueDest}); | |||
| 1068 | if (FalseDest != OldBranchSucc) | |||
| 1069 | Updates.push_back({DominatorTree::Insert, OldBranchParent, FalseDest}); | |||
| 1070 | // If both of the new successors are different from the old one, inform the | |||
| 1071 | // DT that the edge was deleted. | |||
| 1072 | if (OldBranchSucc != TrueDest && OldBranchSucc != FalseDest) { | |||
| 1073 | Updates.push_back({DominatorTree::Delete, OldBranchParent, OldBranchSucc}); | |||
| 1074 | } | |||
| 1075 | ||||
| 1076 | if (MSSAU) | |||
| 1077 | MSSAU->applyUpdates(Updates, *DT, /*UpdateDT=*/true); | |||
| 1078 | else | |||
| 1079 | DT->applyUpdates(Updates); | |||
| 1080 | } | |||
| 1081 | ||||
| 1082 | // If either edge is critical, split it. This helps preserve LoopSimplify | |||
| 1083 | // form for enclosing loops. | |||
| 1084 | auto Options = | |||
| 1085 | CriticalEdgeSplittingOptions(DT, LI, MSSAU.get()).setPreserveLCSSA(); | |||
| 1086 | SplitCriticalEdge(BI, 0, Options); | |||
| 1087 | SplitCriticalEdge(BI, 1, Options); | |||
| 1088 | } | |||
| 1089 | ||||
| 1090 | /// Given a loop that has a trivial unswitchable condition in it (a cond branch | |||
| 1091 | /// from its header block to its latch block, where the path through the loop | |||
| 1092 | /// that doesn't execute its body has no side-effects), unswitch it. This | |||
| 1093 | /// doesn't involve any code duplication, just moving the conditional branch | |||
| 1094 | /// outside of the loop and updating loop info. | |||
| 1095 | void LoopUnswitch::unswitchTrivialCondition(Loop *L, Value *Cond, Constant *Val, | |||
| 1096 | BasicBlock *ExitBlock, | |||
| 1097 | Instruction *TI) { | |||
| 1098 | LLVM_DEBUG(dbgs() << "loop-unswitch: Trivial-Unswitch loop %"do { } while (false) | |||
| 1099 | << LoopHeader->getName() << " [" << L->getBlocks().size()do { } while (false) | |||
| 1100 | << " blocks] in Function "do { } while (false) | |||
| 1101 | << L->getHeader()->getParent()->getName()do { } while (false) | |||
| 1102 | << " on cond: " << *Val << " == " << *Cond << "\n")do { } while (false); | |||
| 1103 | // We are going to make essential changes to CFG. This may invalidate cached | |||
| 1104 | // information for L or one of its parent loops in SCEV. | |||
| 1105 | if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) | |||
| 1106 | SEWP->getSE().forgetTopmostLoop(L); | |||
| 1107 | ||||
| 1108 | // First step, split the preheader, so that we know that there is a safe place | |||
| 1109 | // to insert the conditional branch. We will change LoopPreheader to have a | |||
| 1110 | // conditional branch on Cond. | |||
| 1111 | BasicBlock *NewPH = SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); | |||
| 1112 | ||||
| 1113 | // Now that we have a place to insert the conditional branch, create a place | |||
| 1114 | // to branch to: this is the exit block out of the loop that we should | |||
| 1115 | // short-circuit to. | |||
| 1116 | ||||
| 1117 | // Split this block now, so that the loop maintains its exit block, and so | |||
| 1118 | // that the jump from the preheader can execute the contents of the exit block | |||
| 1119 | // without actually branching to it (the exit block should be dominated by the | |||
| 1120 | // loop header, not the preheader). | |||
| 1121 | assert(!L->contains(ExitBlock) && "Exit block is in the loop?")((void)0); | |||
| 1122 | BasicBlock *NewExit = | |||
| 1123 | SplitBlock(ExitBlock, &ExitBlock->front(), DT, LI, MSSAU.get()); | |||
| 1124 | ||||
| 1125 | // Okay, now we have a position to branch from and a position to branch to, | |||
| 1126 | // insert the new conditional branch. | |||
| 1127 | auto *OldBranch = dyn_cast<BranchInst>(LoopPreheader->getTerminator()); | |||
| 1128 | assert(OldBranch && "Failed to split the preheader")((void)0); | |||
| 1129 | emitPreheaderBranchOnCondition(Cond, Val, NewExit, NewPH, OldBranch, TI); | |||
| 1130 | ||||
| 1131 | // emitPreheaderBranchOnCondition removed the OldBranch from the function. | |||
| 1132 | // Delete it, as it is no longer needed. | |||
| 1133 | delete OldBranch; | |||
| 1134 | ||||
| 1135 | // We need to reprocess this loop, it could be unswitched again. | |||
| 1136 | RedoLoop = true; | |||
| 1137 | ||||
| 1138 | // Now that we know that the loop is never entered when this condition is a | |||
| 1139 | // particular value, rewrite the loop with this info. We know that this will | |||
| 1140 | // at least eliminate the old branch. | |||
| 1141 | rewriteLoopBodyWithConditionConstant(L, Cond, Val, /*IsEqual=*/false); | |||
| 1142 | ||||
| 1143 | ++NumTrivial; | |||
| 1144 | } | |||
| 1145 | ||||
| 1146 | /// Check if the first non-constant condition starting from the loop header is | |||
| 1147 | /// a trivial unswitch condition: that is, a condition controls whether or not | |||
| 1148 | /// the loop does anything at all. If it is a trivial condition, unswitching | |||
| 1149 | /// produces no code duplications (equivalently, it produces a simpler loop and | |||
| 1150 | /// a new empty loop, which gets deleted). Therefore always unswitch trivial | |||
| 1151 | /// condition. | |||
| 1152 | bool LoopUnswitch::tryTrivialLoopUnswitch(bool &Changed) { | |||
| 1153 | BasicBlock *CurrentBB = CurrentLoop->getHeader(); | |||
| 1154 | Instruction *CurrentTerm = CurrentBB->getTerminator(); | |||
| 1155 | LLVMContext &Context = CurrentBB->getContext(); | |||
| 1156 | ||||
| 1157 | // If loop header has only one reachable successor (currently via an | |||
| 1158 | // unconditional branch or constant foldable conditional branch, but | |||
| 1159 | // should also consider adding constant foldable switch instruction in | |||
| 1160 | // future), we should keep looking for trivial condition candidates in | |||
| 1161 | // the successor as well. An alternative is to constant fold conditions | |||
| 1162 | // and merge successors into loop header (then we only need to check header's | |||
| 1163 | // terminator). The reason for not doing this in LoopUnswitch pass is that | |||
| 1164 | // it could potentially break LoopPassManager's invariants. Folding dead | |||
| 1165 | // branches could either eliminate the current loop or make other loops | |||
| 1166 | // unreachable. LCSSA form might also not be preserved after deleting | |||
| 1167 | // branches. The following code keeps traversing loop header's successors | |||
| 1168 | // until it finds the trivial condition candidate (condition that is not a | |||
| 1169 | // constant). Since unswitching generates branches with constant conditions, | |||
| 1170 | // this scenario could be very common in practice. | |||
| 1171 | SmallPtrSet<BasicBlock*, 8> Visited; | |||
| 1172 | ||||
| 1173 | while (true) { | |||
| 1174 | // If we exit loop or reach a previous visited block, then | |||
| 1175 | // we can not reach any trivial condition candidates (unfoldable | |||
| 1176 | // branch instructions or switch instructions) and no unswitch | |||
| 1177 | // can happen. Exit and return false. | |||
| 1178 | if (!CurrentLoop->contains(CurrentBB) || !Visited.insert(CurrentBB).second) | |||
| 1179 | return false; | |||
| 1180 | ||||
| 1181 | // Check if this loop will execute any side-effecting instructions (e.g. | |||
| 1182 | // stores, calls, volatile loads) in the part of the loop that the code | |||
| 1183 | // *would* execute. Check the header first. | |||
| 1184 | for (Instruction &I : *CurrentBB) | |||
| 1185 | if (I.mayHaveSideEffects()) | |||
| 1186 | return false; | |||
| 1187 | ||||
| 1188 | if (BranchInst *BI
| |||
| 1189 | if (BI->isUnconditional()) { | |||
| 1190 | CurrentBB = BI->getSuccessor(0); | |||
| 1191 | } else if (BI->getCondition() == ConstantInt::getTrue(Context)) { | |||
| 1192 | CurrentBB = BI->getSuccessor(0); | |||
| 1193 | } else if (BI->getCondition() == ConstantInt::getFalse(Context)) { | |||
| 1194 | CurrentBB = BI->getSuccessor(1); | |||
| 1195 | } else { | |||
| 1196 | // Found a trivial condition candidate: non-foldable conditional branch. | |||
| 1197 | break; | |||
| 1198 | } | |||
| 1199 | } else if (SwitchInst *SI
| |||
| 1200 | // At this point, any constant-foldable instructions should have probably | |||
| 1201 | // been folded. | |||
| 1202 | ConstantInt *Cond = dyn_cast<ConstantInt>(SI->getCondition()); | |||
| 1203 | if (!Cond) | |||
| 1204 | break; | |||
| 1205 | // Find the target block we are definitely going to. | |||
| 1206 | CurrentBB = SI->findCaseValue(Cond)->getCaseSuccessor(); | |||
| 1207 | } else { | |||
| 1208 | // We do not understand these terminator instructions. | |||
| 1209 | break; | |||
| 1210 | } | |||
| 1211 | ||||
| 1212 | CurrentTerm = CurrentBB->getTerminator(); | |||
| 1213 | } | |||
| 1214 | ||||
| 1215 | // CondVal is the condition that controls the trivial condition. | |||
| 1216 | // LoopExitBB is the BasicBlock that loop exits when meets trivial condition. | |||
| 1217 | Constant *CondVal = nullptr; | |||
| 1218 | BasicBlock *LoopExitBB = nullptr; | |||
| 1219 | ||||
| 1220 | if (BranchInst *BI
| |||
| 1221 | // If this isn't branching on an invariant condition, we can't unswitch it. | |||
| 1222 | if (!BI->isConditional()) | |||
| 1223 | return false; | |||
| 1224 | ||||
| 1225 | Value *LoopCond = findLIVLoopCondition(BI->getCondition(), CurrentLoop, | |||
| 1226 | Changed, MSSAU.get()) | |||
| 1227 | .first; | |||
| 1228 | ||||
| 1229 | // Unswitch only if the trivial condition itself is an LIV (not | |||
| 1230 | // partial LIV which could occur in and/or) | |||
| 1231 | if (!LoopCond || LoopCond != BI->getCondition()) | |||
| 1232 | return false; | |||
| 1233 | ||||
| 1234 | // Check to see if a successor of the branch is guaranteed to | |||
| 1235 | // exit through a unique exit block without having any | |||
| 1236 | // side-effects. If so, determine the value of Cond that causes | |||
| 1237 | // it to do this. | |||
| 1238 | if ((LoopExitBB = | |||
| 1239 | isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(0)))) { | |||
| 1240 | CondVal = ConstantInt::getTrue(Context); | |||
| 1241 | } else if ((LoopExitBB = | |||
| 1242 | isTrivialLoopExitBlock(CurrentLoop, BI->getSuccessor(1)))) { | |||
| 1243 | CondVal = ConstantInt::getFalse(Context); | |||
| 1244 | } | |||
| 1245 | ||||
| 1246 | // If we didn't find a single unique LoopExit block, or if the loop exit | |||
| 1247 | // block contains phi nodes, this isn't trivial. | |||
| 1248 | if (!LoopExitBB || isa<PHINode>(LoopExitBB->begin())) | |||
| 1249 | return false; // Can't handle this. | |||
| 1250 | ||||
| 1251 | if (equalityPropUnSafe(*LoopCond)) | |||
| 1252 | return false; | |||
| 1253 | ||||
| 1254 | unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, | |||
| 1255 | CurrentTerm); | |||
| 1256 | ++NumBranches; | |||
| 1257 | return true; | |||
| 1258 | } else if (SwitchInst *SI
| |||
| 1259 | // If this isn't switching on an invariant condition, we can't unswitch it. | |||
| 1260 | Value *LoopCond = findLIVLoopCondition(SI->getCondition(), CurrentLoop, | |||
| 1261 | Changed, MSSAU.get()) | |||
| 1262 | .first; | |||
| 1263 | ||||
| 1264 | // Unswitch only if the trivial condition itself is an LIV (not | |||
| 1265 | // partial LIV which could occur in and/or) | |||
| 1266 | if (!LoopCond || LoopCond != SI->getCondition()) | |||
| 1267 | return false; | |||
| 1268 | ||||
| 1269 | // Check to see if a successor of the switch is guaranteed to go to the | |||
| 1270 | // latch block or exit through a one exit block without having any | |||
| 1271 | // side-effects. If so, determine the value of Cond that causes it to do | |||
| 1272 | // this. | |||
| 1273 | // Note that we can't trivially unswitch on the default case or | |||
| 1274 | // on already unswitched cases. | |||
| 1275 | for (auto Case : SI->cases()) { | |||
| 1276 | BasicBlock *LoopExitCandidate; | |||
| 1277 | if ((LoopExitCandidate = | |||
| 1278 | isTrivialLoopExitBlock(CurrentLoop, Case.getCaseSuccessor()))) { | |||
| 1279 | // Okay, we found a trivial case, remember the value that is trivial. | |||
| 1280 | ConstantInt *CaseVal = Case.getCaseValue(); | |||
| 1281 | ||||
| 1282 | // Check that it was not unswitched before, since already unswitched | |||
| 1283 | // trivial vals are looks trivial too. | |||
| 1284 | if (BranchesInfo.isUnswitched(SI, CaseVal)) | |||
| 1285 | continue; | |||
| 1286 | LoopExitBB = LoopExitCandidate; | |||
| 1287 | CondVal = CaseVal; | |||
| 1288 | break; | |||
| 1289 | } | |||
| 1290 | } | |||
| 1291 | ||||
| 1292 | // If we didn't find a single unique LoopExit block, or if the loop exit | |||
| 1293 | // block contains phi nodes, this isn't trivial. | |||
| 1294 | if (!LoopExitBB
| |||
| 1295 | return false; // Can't handle this. | |||
| 1296 | ||||
| 1297 | unswitchTrivialCondition(CurrentLoop, LoopCond, CondVal, LoopExitBB, | |||
| 1298 | nullptr); | |||
| 1299 | ||||
| 1300 | // We are only unswitching full LIV. | |||
| 1301 | BranchesInfo.setUnswitched(SI, CondVal); | |||
| 1302 | ++NumSwitches; | |||
| 1303 | return true; | |||
| 1304 | } | |||
| 1305 | return false; | |||
| 1306 | } | |||
| 1307 | ||||
| 1308 | /// Split all of the edges from inside the loop to their exit blocks. | |||
| 1309 | /// Update the appropriate Phi nodes as we do so. | |||
| 1310 | void LoopUnswitch::splitExitEdges( | |||
| 1311 | Loop *L, const SmallVectorImpl<BasicBlock *> &ExitBlocks) { | |||
| 1312 | ||||
| 1313 | for (unsigned I = 0, E = ExitBlocks.size(); I != E; ++I) { | |||
| 1314 | BasicBlock *ExitBlock = ExitBlocks[I]; | |||
| 1315 | SmallVector<BasicBlock *, 4> Preds(pred_begin(ExitBlock), | |||
| 1316 | pred_end(ExitBlock)); | |||
| 1317 | ||||
| 1318 | // Although SplitBlockPredecessors doesn't preserve loop-simplify in | |||
| 1319 | // general, if we call it on all predecessors of all exits then it does. | |||
| 1320 | SplitBlockPredecessors(ExitBlock, Preds, ".us-lcssa", DT, LI, MSSAU.get(), | |||
| 1321 | /*PreserveLCSSA*/ true); | |||
| 1322 | } | |||
| 1323 | } | |||
| 1324 | ||||
| 1325 | /// We determined that the loop is profitable to unswitch when LIC equal Val. | |||
| 1326 | /// Split it into loop versions and test the condition outside of either loop. | |||
| 1327 | /// Return the loops created as Out1/Out2. | |||
| 1328 | void LoopUnswitch::unswitchNontrivialCondition( | |||
| 1329 | Value *LIC, Constant *Val, Loop *L, Instruction *TI, | |||
| 1330 | ArrayRef<Instruction *> ToDuplicate) { | |||
| 1331 | Function *F = LoopHeader->getParent(); | |||
| 1332 | LLVM_DEBUG(dbgs() << "loop-unswitch: Unswitching loop %"do { } while (false) | |||
| 1333 | << LoopHeader->getName() << " [" << L->getBlocks().size()do { } while (false) | |||
| 1334 | << " blocks] in Function " << F->getName() << " when '"do { } while (false) | |||
| 1335 | << *Val << "' == " << *LIC << "\n")do { } while (false); | |||
| 1336 | ||||
| 1337 | // We are going to make essential changes to CFG. This may invalidate cached | |||
| 1338 | // information for L or one of its parent loops in SCEV. | |||
| 1339 | if (auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>()) | |||
| 1340 | SEWP->getSE().forgetTopmostLoop(L); | |||
| 1341 | ||||
| 1342 | LoopBlocks.clear(); | |||
| 1343 | NewBlocks.clear(); | |||
| 1344 | ||||
| 1345 | if (MSSAU && VerifyMemorySSA) | |||
| 1346 | MSSA->verifyMemorySSA(); | |||
| 1347 | ||||
| 1348 | // First step, split the preheader and exit blocks, and add these blocks to | |||
| 1349 | // the LoopBlocks list. | |||
| 1350 | BasicBlock *NewPreheader = | |||
| 1351 | SplitEdge(LoopPreheader, LoopHeader, DT, LI, MSSAU.get()); | |||
| 1352 | LoopBlocks.push_back(NewPreheader); | |||
| 1353 | ||||
| 1354 | // We want the loop to come after the preheader, but before the exit blocks. | |||
| 1355 | llvm::append_range(LoopBlocks, L->blocks()); | |||
| 1356 | ||||
| 1357 | SmallVector<BasicBlock*, 8> ExitBlocks; | |||
| 1358 | L->getUniqueExitBlocks(ExitBlocks); | |||
| 1359 | ||||
| 1360 | // Split all of the edges from inside the loop to their exit blocks. Update | |||
| 1361 | // the appropriate Phi nodes as we do so. | |||
| 1362 | splitExitEdges(L, ExitBlocks); | |||
| 1363 | ||||
| 1364 | // The exit blocks may have been changed due to edge splitting, recompute. | |||
| 1365 | ExitBlocks.clear(); | |||
| 1366 | L->getUniqueExitBlocks(ExitBlocks); | |||
| 1367 | ||||
| 1368 | // Add exit blocks to the loop blocks. | |||
| 1369 | llvm::append_range(LoopBlocks, ExitBlocks); | |||
| 1370 | ||||
| 1371 | // Next step, clone all of the basic blocks that make up the loop (including | |||
| 1372 | // the loop preheader and exit blocks), keeping track of the mapping between | |||
| 1373 | // the instructions and blocks. | |||
| 1374 | NewBlocks.reserve(LoopBlocks.size()); | |||
| 1375 | ValueToValueMapTy VMap; | |||
| 1376 | for (unsigned I = 0, E = LoopBlocks.size(); I != E; ++I) { | |||
| 1377 | BasicBlock *NewBB = CloneBasicBlock(LoopBlocks[I], VMap, ".us", F); | |||
| 1378 | ||||
| 1379 | NewBlocks.push_back(NewBB); | |||
| 1380 | VMap[LoopBlocks[I]] = NewBB; // Keep the BB mapping. | |||
| 1381 | } | |||
| 1382 | ||||
| 1383 | // Splice the newly inserted blocks into the function right before the | |||
| 1384 | // original preheader. | |||
| 1385 | F->getBasicBlockList().splice(NewPreheader->getIterator(), | |||
| 1386 | F->getBasicBlockList(), | |||
| 1387 | NewBlocks[0]->getIterator(), F->end()); | |||
| 1388 | ||||
| 1389 | // Now we create the new Loop object for the versioned loop. | |||
| 1390 | Loop *NewLoop = cloneLoop(L, L->getParentLoop(), VMap, LI, LPM); | |||
| 1391 | ||||
| 1392 | // Recalculate unswitching quota, inherit simplified switches info for NewBB, | |||
| 1393 | // Probably clone more loop-unswitch related loop properties. | |||
| 1394 | BranchesInfo.cloneData(NewLoop, L, VMap); | |||
| 1395 | ||||
| 1396 | Loop *ParentLoop = L->getParentLoop(); | |||
| 1397 | if (ParentLoop) { | |||
| 1398 | // Make sure to add the cloned preheader and exit blocks to the parent loop | |||
| 1399 | // as well. | |||
| 1400 | ParentLoop->addBasicBlockToLoop(NewBlocks[0], *LI); | |||
| 1401 | } | |||
| 1402 | ||||
| 1403 | for (unsigned EBI = 0, EBE = ExitBlocks.size(); EBI != EBE; ++EBI) { | |||
| 1404 | BasicBlock *NewExit = cast<BasicBlock>(VMap[ExitBlocks[EBI]]); | |||
| 1405 | // The new exit block should be in the same loop as the old one. | |||
| 1406 | if (Loop *ExitBBLoop = LI->getLoopFor(ExitBlocks[EBI])) | |||
| 1407 | ExitBBLoop->addBasicBlockToLoop(NewExit, *LI); | |||
| 1408 | ||||
| 1409 | assert(NewExit->getTerminator()->getNumSuccessors() == 1 &&((void)0) | |||
| 1410 | "Exit block should have been split to have one successor!")((void)0); | |||
| 1411 | BasicBlock *ExitSucc = NewExit->getTerminator()->getSuccessor(0); | |||
| 1412 | ||||
| 1413 | // If the successor of the exit block had PHI nodes, add an entry for | |||
| 1414 | // NewExit. | |||
| 1415 | for (PHINode &PN : ExitSucc->phis()) { | |||
| 1416 | Value *V = PN.getIncomingValueForBlock(ExitBlocks[EBI]); | |||
| 1417 | ValueToValueMapTy::iterator It = VMap.find(V); | |||
| 1418 | if (It != VMap.end()) V = It->second; | |||
| 1419 | PN.addIncoming(V, NewExit); | |||
| 1420 | } | |||
| 1421 | ||||
| 1422 | if (LandingPadInst *LPad = NewExit->getLandingPadInst()) { | |||
| 1423 | PHINode *PN = PHINode::Create(LPad->getType(), 0, "", | |||
| 1424 | &*ExitSucc->getFirstInsertionPt()); | |||
| 1425 | ||||
| 1426 | for (BasicBlock *BB : predecessors(ExitSucc)) { | |||
| 1427 | LandingPadInst *LPI = BB->getLandingPadInst(); | |||
| 1428 | LPI->replaceAllUsesWith(PN); | |||
| 1429 | PN->addIncoming(LPI, BB); | |||
| 1430 | } | |||
| 1431 | } | |||
| 1432 | } | |||
| 1433 | ||||
| 1434 | // Rewrite the code to refer to itself. | |||
| 1435 | for (unsigned NBI = 0, NBE = NewBlocks.size(); NBI != NBE; ++NBI) { | |||
| 1436 | for (Instruction &I : *NewBlocks[NBI]) { | |||
| 1437 | RemapInstruction(&I, VMap, | |||
| 1438 | RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); | |||
| 1439 | if (auto *II = dyn_cast<AssumeInst>(&I)) | |||
| 1440 | AC->registerAssumption(II); | |||
| 1441 | } | |||
| 1442 | } | |||
| 1443 | ||||
| 1444 | // Rewrite the original preheader to select between versions of the loop. | |||
| 1445 | BranchInst *OldBR = cast<BranchInst>(LoopPreheader->getTerminator()); | |||
| 1446 | assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == LoopBlocks[0] &&((void)0) | |||
| 1447 | "Preheader splitting did not work correctly!")((void)0); | |||
| 1448 | ||||
| 1449 | if (MSSAU) { | |||
| 1450 | // Update MemorySSA after cloning, and before splitting to unreachables, | |||
| 1451 | // since that invalidates the 1:1 mapping of clones in VMap. | |||
| 1452 | LoopBlocksRPO LBRPO(L); | |||
| 1453 | LBRPO.perform(LI); | |||
| 1454 | MSSAU->updateForClonedLoop(LBRPO, ExitBlocks, VMap); | |||
| 1455 | } | |||
| 1456 | ||||
| 1457 | // Emit the new branch that selects between the two versions of this loop. | |||
| 1458 | emitPreheaderBranchOnCondition(LIC, Val, NewBlocks[0], LoopBlocks[0], OldBR, | |||
| 1459 | TI, ToDuplicate); | |||
| 1460 | if (MSSAU) { | |||
| 1461 | // Update MemoryPhis in Exit blocks. | |||
| 1462 | MSSAU->updateExitBlocksForClonedLoop(ExitBlocks, VMap, *DT); | |||
| 1463 | if (VerifyMemorySSA) | |||
| 1464 | MSSA->verifyMemorySSA(); | |||
| 1465 | } | |||
| 1466 | ||||
| 1467 | // The OldBr was replaced by a new one and removed (but not erased) by | |||
| 1468 | // emitPreheaderBranchOnCondition. It is no longer needed, so delete it. | |||
| 1469 | delete OldBR; | |||
| 1470 | ||||
| 1471 | LoopProcessWorklist.push_back(NewLoop); | |||
| 1472 | RedoLoop = true; | |||
| 1473 | ||||
| 1474 | // Keep a WeakTrackingVH holding onto LIC. If the first call to | |||
| 1475 | // RewriteLoopBody | |||
| 1476 | // deletes the instruction (for example by simplifying a PHI that feeds into | |||
| 1477 | // the condition that we're unswitching on), we don't rewrite the second | |||
| 1478 | // iteration. | |||
| 1479 | WeakTrackingVH LICHandle(LIC); | |||
| 1480 | ||||
| 1481 | if (ToDuplicate.empty()) { | |||
| 1482 | // Now we rewrite the original code to know that the condition is true and | |||
| 1483 | // the new code to know that the condition is false. | |||
| 1484 | rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/false); | |||
| 1485 | ||||
| 1486 | // It's possible that simplifying one loop could cause the other to be | |||
| 1487 | // changed to another value or a constant. If its a constant, don't | |||
| 1488 | // simplify it. | |||
| 1489 | if (!LoopProcessWorklist.empty() && LoopProcessWorklist.back() == NewLoop && | |||
| 1490 | LICHandle && !isa<Constant>(LICHandle)) | |||
| 1491 | rewriteLoopBodyWithConditionConstant(NewLoop, LICHandle, Val, | |||
| 1492 | /*IsEqual=*/true); | |||
| 1493 | } else { | |||
| 1494 | // Partial unswitching. Update the condition in the right loop with the | |||
| 1495 | // constant. | |||
| 1496 | auto *CC = cast<ConstantInt>(Val); | |||
| 1497 | if (CC->isOneValue()) { | |||
| 1498 | rewriteLoopBodyWithConditionConstant(NewLoop, VMap[LIC], Val, | |||
| 1499 | /*IsEqual=*/true); | |||
| 1500 | } else | |||
| 1501 | rewriteLoopBodyWithConditionConstant(L, LIC, Val, /*IsEqual=*/true); | |||
| 1502 | ||||
| 1503 | // Mark the new loop as partially unswitched, to avoid unswitching on the | |||
| 1504 | // same condition again. | |||
| 1505 | auto &Context = NewLoop->getHeader()->getContext(); | |||
| 1506 | MDNode *DisableUnswitchMD = MDNode::get( | |||
| 1507 | Context, MDString::get(Context, "llvm.loop.unswitch.partial.disable")); | |||
| 1508 | MDNode *NewLoopID = makePostTransformationMetadata( | |||
| 1509 | Context, L->getLoopID(), {"llvm.loop.unswitch.partial"}, | |||
| 1510 | {DisableUnswitchMD}); | |||
| 1511 | NewLoop->setLoopID(NewLoopID); | |||
| 1512 | } | |||
| 1513 | ||||
| 1514 | if (MSSA && VerifyMemorySSA) | |||
| 1515 | MSSA->verifyMemorySSA(); | |||
| 1516 | } | |||
| 1517 | ||||
| 1518 | /// Remove all instances of I from the worklist vector specified. | |||
| 1519 | static void removeFromWorklist(Instruction *I, | |||
| 1520 | std::vector<Instruction *> &Worklist) { | |||
| 1521 | llvm::erase_value(Worklist, I); | |||
| 1522 | } | |||
| 1523 | ||||
| 1524 | /// When we find that I really equals V, remove I from the | |||
| 1525 | /// program, replacing all uses with V and update the worklist. | |||
| 1526 | static void replaceUsesOfWith(Instruction *I, Value *V, | |||
| 1527 | std::vector<Instruction *> &Worklist, Loop *L, | |||
| 1528 | LPPassManager *LPM, MemorySSAUpdater *MSSAU) { | |||
| 1529 | LLVM_DEBUG(dbgs() << "Replace with '" << *V << "': " << *I << "\n")do { } while (false); | |||
| 1530 | ||||
| 1531 | // Add uses to the worklist, which may be dead now. | |||
| 1532 | for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) | |||
| 1533 | if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) | |||
| 1534 | Worklist.push_back(Use); | |||
| 1535 | ||||
| 1536 | // Add users to the worklist which may be simplified now. | |||
| 1537 | for (User *U : I->users()) | |||
| 1538 | Worklist.push_back(cast<Instruction>(U)); | |||
| 1539 | removeFromWorklist(I, Worklist); | |||
| 1540 | I->replaceAllUsesWith(V); | |||
| 1541 | if (!I->mayHaveSideEffects()) { | |||
| 1542 | if (MSSAU) | |||
| 1543 | MSSAU->removeMemoryAccess(I); | |||
| 1544 | I->eraseFromParent(); | |||
| 1545 | } | |||
| 1546 | ++NumSimplify; | |||
| 1547 | } | |||
| 1548 | ||||
| 1549 | /// We know either that the value LIC has the value specified by Val in the | |||
| 1550 | /// specified loop, or we know it does NOT have that value. | |||
| 1551 | /// Rewrite any uses of LIC or of properties correlated to it. | |||
| 1552 | void LoopUnswitch::rewriteLoopBodyWithConditionConstant(Loop *L, Value *LIC, | |||
| 1553 | Constant *Val, | |||
| 1554 | bool IsEqual) { | |||
| 1555 | assert(!isa<Constant>(LIC) && "Why are we unswitching on a constant?")((void)0); | |||
| 1556 | ||||
| 1557 | // FIXME: Support correlated properties, like: | |||
| 1558 | // for (...) | |||
| 1559 | // if (li1 < li2) | |||
| 1560 | // ... | |||
| 1561 | // if (li1 > li2) | |||
| 1562 | // ... | |||
| 1563 | ||||
| 1564 | // FOLD boolean conditions (X|LIC), (X&LIC). Fold conditional branches, | |||
| 1565 | // selects, switches. | |||
| 1566 | std::vector<Instruction*> Worklist; | |||
| 1567 | LLVMContext &Context = Val->getContext(); | |||
| 1568 | ||||
| 1569 | // If we know that LIC == Val, or that LIC == NotVal, just replace uses of LIC | |||
| 1570 | // in the loop with the appropriate one directly. | |||
| 1571 | if (IsEqual || (isa<ConstantInt>(Val) && | |||
| 1572 | Val->getType()->isIntegerTy(1))) { | |||
| 1573 | Value *Replacement; | |||
| 1574 | if (IsEqual) | |||
| 1575 | Replacement = Val; | |||
| 1576 | else | |||
| 1577 | Replacement = ConstantInt::get(Type::getInt1Ty(Val->getContext()), | |||
| 1578 | !cast<ConstantInt>(Val)->getZExtValue()); | |||
| 1579 | ||||
| 1580 | for (User *U : LIC->users()) { | |||
| 1581 | Instruction *UI = dyn_cast<Instruction>(U); | |||
| 1582 | if (!UI || !L->contains(UI)) | |||
| 1583 | continue; | |||
| 1584 | Worklist.push_back(UI); | |||
| 1585 | } | |||
| 1586 | ||||
| 1587 | for (Instruction *UI : Worklist) | |||
| 1588 | UI->replaceUsesOfWith(LIC, Replacement); | |||
| 1589 | ||||
| 1590 | simplifyCode(Worklist, L); | |||
| 1591 | return; | |||
| 1592 | } | |||
| 1593 | ||||
| 1594 | // Otherwise, we don't know the precise value of LIC, but we do know that it | |||
| 1595 | // is certainly NOT "Val". As such, simplify any uses in the loop that we | |||
| 1596 | // can. This case occurs when we unswitch switch statements. | |||
| 1597 | for (User *U : LIC->users()) { | |||
| 1598 | Instruction *UI = dyn_cast<Instruction>(U); | |||
| 1599 | if (!UI || !L->contains(UI)) | |||
| 1600 | continue; | |||
| 1601 | ||||
| 1602 | // At this point, we know LIC is definitely not Val. Try to use some simple | |||
| 1603 | // logic to simplify the user w.r.t. to the context. | |||
| 1604 | if (Value *Replacement = simplifyInstructionWithNotEqual(UI, LIC, Val)) { | |||
| 1605 | if (LI->replacementPreservesLCSSAForm(UI, Replacement)) { | |||
| 1606 | // This in-loop instruction has been simplified w.r.t. its context, | |||
| 1607 | // i.e. LIC != Val, make sure we propagate its replacement value to | |||
| 1608 | // all its users. | |||
| 1609 | // | |||
| 1610 | // We can not yet delete UI, the LIC user, yet, because that would invalidate | |||
| 1611 | // the LIC->users() iterator !. However, we can make this instruction | |||
| 1612 | // dead by replacing all its users and push it onto the worklist so that | |||
| 1613 | // it can be properly deleted and its operands simplified. | |||
| 1614 | UI->replaceAllUsesWith(Replacement); | |||
| 1615 | } | |||
| 1616 | } | |||
| 1617 | ||||
| 1618 | // This is a LIC user, push it into the worklist so that simplifyCode can | |||
| 1619 | // attempt to simplify it. | |||
| 1620 | Worklist.push_back(UI); | |||
| 1621 | ||||
| 1622 | // If we know that LIC is not Val, use this info to simplify code. | |||
| 1623 | SwitchInst *SI = dyn_cast<SwitchInst>(UI); | |||
| 1624 | if (!SI || !isa<ConstantInt>(Val)) continue; | |||
| 1625 | ||||
| 1626 | // NOTE: if a case value for the switch is unswitched out, we record it | |||
| 1627 | // after the unswitch finishes. We can not record it here as the switch | |||
| 1628 | // is not a direct user of the partial LIV. | |||
| 1629 | SwitchInst::CaseHandle DeadCase = | |||
| 1630 | *SI->findCaseValue(cast<ConstantInt>(Val)); | |||
| 1631 | // Default case is live for multiple values. | |||
| 1632 | if (DeadCase == *SI->case_default()) | |||
| 1633 | continue; | |||
| 1634 | ||||
| 1635 | // Found a dead case value. Don't remove PHI nodes in the | |||
| 1636 | // successor if they become single-entry, those PHI nodes may | |||
| 1637 | // be in the Users list. | |||
| 1638 | ||||
| 1639 | BasicBlock *Switch = SI->getParent(); | |||
| 1640 | BasicBlock *SISucc = DeadCase.getCaseSuccessor(); | |||
| 1641 | BasicBlock *Latch = L->getLoopLatch(); | |||
| 1642 | ||||
| 1643 | if (!SI->findCaseDest(SISucc)) continue; // Edge is critical. | |||
| 1644 | // If the DeadCase successor dominates the loop latch, then the | |||
| 1645 | // transformation isn't safe since it will delete the sole predecessor edge | |||
| 1646 | // to the latch. | |||
| 1647 | if (Latch && DT->dominates(SISucc, Latch)) | |||
| 1648 | continue; | |||
| 1649 | ||||
| 1650 | // FIXME: This is a hack. We need to keep the successor around | |||
| 1651 | // and hooked up so as to preserve the loop structure, because | |||
| 1652 | // trying to update it is complicated. So instead we preserve the | |||
| 1653 | // loop structure and put the block on a dead code path. | |||
| 1654 | SplitEdge(Switch, SISucc, DT, LI, MSSAU.get()); | |||
| 1655 | // Compute the successors instead of relying on the return value | |||
| 1656 | // of SplitEdge, since it may have split the switch successor | |||
| 1657 | // after PHI nodes. | |||
| 1658 | BasicBlock *NewSISucc = DeadCase.getCaseSuccessor(); | |||
| 1659 | BasicBlock *OldSISucc = *succ_begin(NewSISucc); | |||
| 1660 | // Create an "unreachable" destination. | |||
| 1661 | BasicBlock *Abort = BasicBlock::Create(Context, "us-unreachable", | |||
| 1662 | Switch->getParent(), | |||
| 1663 | OldSISucc); | |||
| 1664 | new UnreachableInst(Context, Abort); | |||
| 1665 | // Force the new case destination to branch to the "unreachable" | |||
| 1666 | // block while maintaining a (dead) CFG edge to the old block. | |||
| 1667 | NewSISucc->getTerminator()->eraseFromParent(); | |||
| 1668 | BranchInst::Create(Abort, OldSISucc, | |||
| 1669 | ConstantInt::getTrue(Context), NewSISucc); | |||
| 1670 | // Release the PHI operands for this edge. | |||
| 1671 | for (PHINode &PN : NewSISucc->phis()) | |||
| 1672 | PN.setIncomingValueForBlock(Switch, UndefValue::get(PN.getType())); | |||
| 1673 | // Tell the domtree about the new block. We don't fully update the | |||
| 1674 | // domtree here -- instead we force it to do a full recomputation | |||
| 1675 | // after the pass is complete -- but we do need to inform it of | |||
| 1676 | // new blocks. | |||
| 1677 | DT->addNewBlock(Abort, NewSISucc); | |||
| 1678 | } | |||
| 1679 | ||||
| 1680 | simplifyCode(Worklist, L); | |||
| 1681 | } | |||
| 1682 | ||||
| 1683 | /// Now that we have simplified some instructions in the loop, walk over it and | |||
| 1684 | /// constant prop, dce, and fold control flow where possible. Note that this is | |||
| 1685 | /// effectively a very simple loop-structure-aware optimizer. During processing | |||
| 1686 | /// of this loop, L could very well be deleted, so it must not be used. | |||
| 1687 | /// | |||
| 1688 | /// FIXME: When the loop optimizer is more mature, separate this out to a new | |||
| 1689 | /// pass. | |||
| 1690 | /// | |||
| 1691 | void LoopUnswitch::simplifyCode(std::vector<Instruction *> &Worklist, Loop *L) { | |||
| 1692 | const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); | |||
| 1693 | while (!Worklist.empty()) { | |||
| 1694 | Instruction *I = Worklist.back(); | |||
| 1695 | Worklist.pop_back(); | |||
| 1696 | ||||
| 1697 | // Simple DCE. | |||
| 1698 | if (isInstructionTriviallyDead(I)) { | |||
| 1699 | LLVM_DEBUG(dbgs() << "Remove dead instruction '" << *I << "\n")do { } while (false); | |||
| 1700 | ||||
| 1701 | // Add uses to the worklist, which may be dead now. | |||
| 1702 | for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) | |||
| 1703 | if (Instruction *Use = dyn_cast<Instruction>(I->getOperand(i))) | |||
| 1704 | Worklist.push_back(Use); | |||
| 1705 | removeFromWorklist(I, Worklist); | |||
| 1706 | if (MSSAU) | |||
| 1707 | MSSAU->removeMemoryAccess(I); | |||
| 1708 | I->eraseFromParent(); | |||
| 1709 | ++NumSimplify; | |||
| 1710 | continue; | |||
| 1711 | } | |||
| 1712 | ||||
| 1713 | // See if instruction simplification can hack this up. This is common for | |||
| 1714 | // things like "select false, X, Y" after unswitching made the condition be | |||
| 1715 | // 'false'. TODO: update the domtree properly so we can pass it here. | |||
| 1716 | if (Value *V = SimplifyInstruction(I, DL)) | |||
| 1717 | if (LI->replacementPreservesLCSSAForm(I, V)) { | |||
| 1718 | replaceUsesOfWith(I, V, Worklist, L, LPM, MSSAU.get()); | |||
| 1719 | continue; | |||
| 1720 | } | |||
| 1721 | ||||
| 1722 | // Special case hacks that appear commonly in unswitched code. | |||
| 1723 | if (BranchInst *BI = dyn_cast<BranchInst>(I)) { | |||
| 1724 | if (BI->isUnconditional()) { | |||
| 1725 | // If BI's parent is the only pred of the successor, fold the two blocks | |||
| 1726 | // together. | |||
| 1727 | BasicBlock *Pred = BI->getParent(); | |||
| 1728 | (void)Pred; | |||
| 1729 | BasicBlock *Succ = BI->getSuccessor(0); | |||
| 1730 | BasicBlock *SinglePred = Succ->getSinglePredecessor(); | |||
| 1731 | if (!SinglePred) continue; // Nothing to do. | |||
| 1732 | assert(SinglePred == Pred && "CFG broken")((void)0); | |||
| 1733 | ||||
| 1734 | // Make the LPM and Worklist updates specific to LoopUnswitch. | |||
| 1735 | removeFromWorklist(BI, Worklist); | |||
| 1736 | auto SuccIt = Succ->begin(); | |||
| 1737 | while (PHINode *PN = dyn_cast<PHINode>(SuccIt++)) { | |||
| 1738 | for (unsigned It = 0, E = PN->getNumOperands(); It != E; ++It) | |||
| 1739 | if (Instruction *Use = dyn_cast<Instruction>(PN->getOperand(It))) | |||
| 1740 | Worklist.push_back(Use); | |||
| 1741 | for (User *U : PN->users()) | |||
| 1742 | Worklist.push_back(cast<Instruction>(U)); | |||
| 1743 | removeFromWorklist(PN, Worklist); | |||
| 1744 | ++NumSimplify; | |||
| 1745 | } | |||
| 1746 | // Merge the block and make the remaining analyses updates. | |||
| 1747 | DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); | |||
| 1748 | MergeBlockIntoPredecessor(Succ, &DTU, LI, MSSAU.get()); | |||
| 1749 | ++NumSimplify; | |||
| 1750 | continue; | |||
| 1751 | } | |||
| 1752 | ||||
| 1753 | continue; | |||
| 1754 | } | |||
| 1755 | } | |||
| 1756 | } | |||
| 1757 | ||||
| 1758 | /// Simple simplifications we can do given the information that Cond is | |||
| 1759 | /// definitely not equal to Val. | |||
| 1760 | Value *LoopUnswitch::simplifyInstructionWithNotEqual(Instruction *Inst, | |||
| 1761 | Value *Invariant, | |||
| 1762 | Constant *Val) { | |||
| 1763 | // icmp eq cond, val -> false | |||
| 1764 | ICmpInst *CI = dyn_cast<ICmpInst>(Inst); | |||
| 1765 | if (CI && CI->isEquality()) { | |||
| 1766 | Value *Op0 = CI->getOperand(0); | |||
| 1767 | Value *Op1 = CI->getOperand(1); | |||
| 1768 | if ((Op0 == Invariant && Op1 == Val) || (Op0 == Val && Op1 == Invariant)) { | |||
| 1769 | LLVMContext &Ctx = Inst->getContext(); | |||
| 1770 | if (CI->getPredicate() == CmpInst::ICMP_EQ) | |||
| 1771 | return ConstantInt::getFalse(Ctx); | |||
| 1772 | else | |||
| 1773 | return ConstantInt::getTrue(Ctx); | |||
| 1774 | } | |||
| 1775 | } | |||
| 1776 | ||||
| 1777 | // FIXME: there may be other opportunities, e.g. comparison with floating | |||
| 1778 | // point, or Invariant - Val != 0, etc. | |||
| 1779 | return nullptr; | |||
| 1780 | } |